ai4r 1.2 → 1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/README.rdoc +12 -25
  2. data/examples/decision_trees/id3_example.rb +6 -9
  3. data/examples/decision_trees/results.txt +2 -0
  4. data/examples/genetic_algorithm/genetic_algorithm_example.rb +11 -13
  5. data/examples/neural_network/xor_example.rb +25 -0
  6. data/lib/ai4r.rb +10 -0
  7. data/lib/ai4r/classifiers/classifier.rb +46 -0
  8. data/lib/ai4r/classifiers/id3.rb +27 -58
  9. data/lib/ai4r/classifiers/one_r.rb +19 -58
  10. data/lib/ai4r/classifiers/prism.rb +21 -57
  11. data/lib/ai4r/classifiers/zero_r.rb +16 -48
  12. data/lib/ai4r/clusterers/bisecting_k_means.rb +115 -0
  13. data/lib/ai4r/clusterers/clusterer.rb +55 -0
  14. data/lib/ai4r/clusterers/k_means.rb +164 -0
  15. data/lib/ai4r/data/data_set.rb +250 -0
  16. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +19 -19
  17. data/lib/ai4r/neural_network/backpropagation.rb +23 -24
  18. data/site/build/site/en/broken-links.xml +2 -0
  19. data/site/build/site/en/downloads.html +200 -0
  20. data/site/build/site/en/downloads.pdf +151 -0
  21. data/site/build/site/en/forum.html +197 -0
  22. data/site/build/site/en/forum.pdf +151 -0
  23. data/site/build/site/en/geneticAlgorithms.html +591 -0
  24. data/site/build/site/en/geneticAlgorithms.pdf +934 -0
  25. data/site/build/site/en/images/ai4r-logo.png +0 -0
  26. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  27. data/site/build/site/en/images/c.png +0 -0
  28. data/site/build/site/en/images/c_wbn.png +0 -0
  29. data/site/build/site/en/images/c_wn.png +0 -0
  30. data/site/build/site/en/images/ero.gif +0 -0
  31. data/site/build/site/en/images/europe2.png +0 -0
  32. data/site/build/site/en/images/europe3.png +0 -0
  33. data/site/build/site/en/images/fitness.png +0 -0
  34. data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
  35. data/site/build/site/en/images/instruction_arrow.png +0 -0
  36. data/site/build/site/en/images/jadeferret.png +0 -0
  37. data/site/build/site/en/images/my_email.png +0 -0
  38. data/site/build/site/en/images/neural_network_example.png +0 -0
  39. data/site/build/site/en/images/rubyforge.png +0 -0
  40. data/site/build/site/en/images/s.png +0 -0
  41. data/site/build/site/en/images/s_wbn.png +0 -0
  42. data/site/build/site/en/images/s_wn.png +0 -0
  43. data/site/build/site/en/images/sigmoid.png +0 -0
  44. data/site/build/site/en/images/t.png +0 -0
  45. data/site/build/site/en/images/t_wbn.png +0 -0
  46. data/site/build/site/en/images/t_wn.png +0 -0
  47. data/site/build/site/en/index.html +336 -0
  48. data/site/build/site/en/index.pdf +508 -0
  49. data/site/build/site/en/linkmap.html +263 -0
  50. data/site/build/site/en/linkmap.pdf +94 -0
  51. data/site/build/site/en/locationmap.xml +72 -0
  52. data/site/build/site/en/machineLearning.html +339 -0
  53. data/site/build/site/en/machineLearning.pdf +337 -0
  54. data/site/build/site/en/neuralNetworks.html +484 -0
  55. data/site/build/site/en/neuralNetworks.pdf +604 -0
  56. data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
  57. data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
  58. data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
  59. data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
  60. data/site/build/site/en/skin/basic.css +166 -0
  61. data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
  62. data/site/build/site/en/skin/breadcrumbs.js +237 -0
  63. data/site/build/site/en/skin/fontsize.js +166 -0
  64. data/site/build/site/en/skin/getBlank.js +40 -0
  65. data/site/build/site/en/skin/getMenu.js +45 -0
  66. data/site/build/site/en/skin/images/README.txt +1 -0
  67. data/site/build/site/en/skin/images/add.jpg +0 -0
  68. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  69. data/site/build/site/en/skin/images/chapter.gif +0 -0
  70. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  71. data/site/build/site/en/skin/images/current.gif +0 -0
  72. data/site/build/site/en/skin/images/error.png +0 -0
  73. data/site/build/site/en/skin/images/external-link.gif +0 -0
  74. data/site/build/site/en/skin/images/fix.jpg +0 -0
  75. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  76. data/site/build/site/en/skin/images/hack.jpg +0 -0
  77. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  78. data/site/build/site/en/skin/images/info.png +0 -0
  79. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  80. data/site/build/site/en/skin/images/label.gif +0 -0
  81. data/site/build/site/en/skin/images/page.gif +0 -0
  82. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  83. data/site/build/site/en/skin/images/poddoc.png +0 -0
  84. data/site/build/site/en/skin/images/printer.gif +0 -0
  85. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  86. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  87. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  88. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  89. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  90. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  91. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  92. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  93. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  94. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  95. data/site/build/site/en/skin/images/remove.jpg +0 -0
  96. data/site/build/site/en/skin/images/rss.png +0 -0
  97. data/site/build/site/en/skin/images/spacer.gif +0 -0
  98. data/site/build/site/en/skin/images/success.png +0 -0
  99. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  100. data/site/build/site/en/skin/images/update.jpg +0 -0
  101. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  102. data/site/build/site/en/skin/images/vcss.png +0 -0
  103. data/site/build/site/en/skin/images/warning.png +0 -0
  104. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  105. data/site/build/site/en/skin/menu.js +48 -0
  106. data/site/build/site/en/skin/note.txt +50 -0
  107. data/site/build/site/en/skin/print.css +54 -0
  108. data/site/build/site/en/skin/profile.css +163 -0
  109. data/site/build/site/en/skin/prototype.js +1257 -0
  110. data/site/build/site/en/skin/screen.css +587 -0
  111. data/site/build/site/en/svn.html +252 -0
  112. data/site/build/site/en/svn.pdf +306 -0
  113. data/site/build/site/en/wholesite.pdf +1915 -0
  114. data/site/build/tmp/brokenlinks.xml +2 -0
  115. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  116. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  117. data/site/build/tmp/locationmap.xml +14 -14
  118. data/site/build/tmp/output.xmap +23 -23
  119. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  120. data/site/build/tmp/projfilters.properties +41 -41
  121. data/site/build/webapp/WEB-INF/logs/core.log +593 -679
  122. data/site/build/webapp/WEB-INF/logs/error.log +362 -279
  123. data/site/build/webapp/WEB-INF/logs/sitemap.log +368 -1015
  124. data/site/src/documentation/content/xdocs/index.xml +18 -10
  125. data/site/src/documentation/content/xdocs/machineLearning.xml +4 -3
  126. data/site/src/documentation/content/xdocs/site.xml +2 -1
  127. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  128. data/test/classifiers/id3_test.rb +45 -44
  129. data/test/classifiers/one_r_test.rb +19 -17
  130. data/test/classifiers/prism_test.rb +22 -20
  131. data/test/classifiers/zero_r_test.rb +15 -12
  132. data/test/clusterers/bisecting_k_means_test.rb +59 -0
  133. data/test/clusterers/k_means_test.rb +93 -0
  134. data/test/data/data_set_test.rb +92 -0
  135. metadata +252 -128
  136. data/lib/ai4r/classifiers/classifier_helper.rb +0 -54
  137. data/site/src/documentation/content/xdocs/forum.html +0 -9
  138. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  139. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
@@ -1,26 +1,12 @@
1
1
  = Introduction
2
2
 
3
- AI4R is a collection of ruby algorithms implementations, covering several Artificial intelligence fields,
4
- and simple practical examples using them. It implements:
5
-
6
- * Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
7
-
8
- * Neural networks (AI4R::NeuralNetwork::Backpropagation)
9
-
10
- * ID3 Decision Trees (AI4R::Classifiers::ID3)
11
-
12
- * PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
13
-
14
- * OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
15
-
16
- * ZeroR (AI4R::Classifiers::ZeroR)
3
+ This project aims to produce ruby implementations of
4
+ algorithms covering several Artificial intelligence fields.
17
5
 
18
6
  = Where can I find the lastest code and info on this project?
19
7
 
20
8
  http://ai4r.rubyforge.org
21
9
 
22
- http://ai4r.jadeferret.com
23
-
24
10
  = How to install
25
11
 
26
12
  1. Install the gem:
@@ -29,18 +15,19 @@ http://ai4r.jadeferret.com
29
15
 
30
16
  2. Include require statements in your code:
31
17
 
32
- require "rubygems"
33
- require "ai4r/classifiers/id3"en
34
- require "ai4r/classifiers/prism"
35
- require "ai4r/classifiers/one_r"
36
- require "ai4r/classifiers/zero_r"
37
- require "ai4r/neural_network/backpropagation"
38
- require "ai4r/genetic_algorithm/genetic_algorithm"
18
+ require "rubygems"
19
+ require "ai4r/classifiers/id3"en
20
+ require "ai4r/classifiers/prism"
21
+ require "ai4r/classifiers/one_r"
22
+ require "ai4r/classifiers/zero_r"
23
+ require "ai4r/neural_network/backpropagation"
24
+ require "ai4r/genetic_algorithm/genetic_algorithm"
39
25
 
40
26
  = Feedback
41
27
 
42
- If you have some constructive comments about this project, please do send those
43
- to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
28
+ If you have questions or constructive comments about this project,
29
+ please post them in the forum. If you do not want to make it public,
30
+ send it to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
44
31
 
45
32
  = Warranty
46
33
 
@@ -7,25 +7,22 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- #require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
11
10
  require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
12
- require 'csv'
13
11
 
14
12
  # Load data from data_set.csv
15
- data_set = []
16
- CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
17
- data_set << row
18
- end
19
- data_labels = data_set.shift
13
+ data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
14
+ data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
20
15
 
21
16
  # Build ID3 tree
22
- id3 = Ai4r::Classifiers::ID3.new.build(data_set, data_labels)
17
+ id3 = Ai4r::Classifiers::ID3.new.build(data_set)
23
18
 
24
19
  # Show rules
25
20
  puts "Discovered rules are:"
26
- puts id3.to_s
21
+ puts id3.get_rules
22
+ puts
27
23
 
28
24
  # Try to predict some values
25
+ puts "Prediction samples:"
29
26
  puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => " + id3.eval(['Moron Sur (GBA)','4','[86 m2 - 100 m2]'])
30
27
  puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => " + id3.eval(['Moron Sur (GBA)','3','[101 m2 - 125 m2]'])
31
28
  puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => " + id3.eval(['Recoleta (CABA)','3','[86 m2 - 100 m2]',])
@@ -23,6 +23,8 @@ elsif size=='[126 m2 - 160 m2]' and zone=='Moron Sur (GBA)' then price='[56K-75K
23
23
  elsif size=='[126 m2 - 160 m2]' and zone=='Recoleta (CABA)' then price='[200K-275K]'
24
24
  elsif size=='[126 m2 - 160 m2]' and zone=='Tigre (GBA)' then price='>275K'
25
25
  else raise 'There was not enough information during training to do a proper induction for this data element' end
26
+
27
+ Prediction samples:
26
28
  ['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => [46K-55K]
27
29
  ['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => [76K-100K]
28
30
  ['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => [126K-150K]
@@ -8,32 +8,30 @@
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
10
  require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
11
+ require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
11
12
  require 'csv'
12
13
 
13
14
  # Load data from data_set.csv
14
- data_set = []
15
- CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/travel_cost.csv", 'r')) do |row|
16
- data_set << row
17
- end
18
- data_labels = data_set.shift
19
- data_set.collect! do |column|
20
- column.collect { |element| element.to_f}
21
- end
15
+ data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
16
+ data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
17
+ data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
22
18
 
23
19
  Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
24
20
 
25
21
  puts "Some random selected tours costs: "
26
22
  3.times do
27
23
  c = Ai4r::GeneticAlgorithm::Chromosome.seed
28
- puts "COST #{-1 * c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
24
+ puts "COST #{-1 * c.fitness} TOUR: "+
25
+ "#{c.data.collect{|c| data_set.data_labels[c]} * ', '}"
29
26
  end
30
27
 
31
28
  puts "Beginning genetic search, please wait... "
32
29
  search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
33
30
  result = search.run
34
- puts "BEST COST FOUND #{-1 * result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
31
+ puts "COST #{-1 * result.fitness} TOUR: "+
32
+ "#{result.data.collect{|c| data_set.data_labels[c]} * ', '}"
35
33
 
36
- # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
37
- # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
38
- # $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
34
+ # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
35
+ # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
36
+ # $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
39
37
 
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/training_patterns'
2
+ require File.dirname(__FILE__) + '/patterns_with_noise'
3
+ require File.dirname(__FILE__) + '/patterns_with_base_noise'
4
+ require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
5
+
6
+ examples = [
7
+ [[0, 0], [0, 1]],
8
+ [[0, 1], [1, 0]],
9
+ [[1, 0], [1, 0]],
10
+ [[1, 1], [0, 0]]
11
+ ]
12
+
13
+ net = Ai4r::NeuralNetwork::Backpropagation.new([2, 1, 2, 1])
14
+
15
+ i=0
16
+ 200.times {
17
+ examples.each do |ex|
18
+ 2000.times {net.train(ex[0], [ex[1].first])}
19
+ end
20
+ puts(i=i+1)
21
+ }
22
+
23
+ examples.each do |ex|
24
+ print ex[0], ' => ', net.eval(ex[0]).inspect, ', should be ', ex[1].first, "\n"
25
+ end
@@ -0,0 +1,10 @@
1
+ require "ai4r/clusterers/clusterer"
2
+ require "ai4r/clusterers/k_means"
3
+ require "ai4r/clusterers/bisecting_k_means"
4
+ require "ai4r/classifiers/classifier"
5
+ require "ai4r/classifiers/id3"
6
+ require "ai4r/classifiers/prism"
7
+ require "ai4r/classifiers/one_r"
8
+ require "ai4r/classifiers/zero_r"
9
+ require "ai4r/neural_network/backpropagation"
10
+ require "ai4r/genetic_algorithm/genetic_algorithm"
@@ -0,0 +1,46 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Classifiers
12
+
13
+ # The only purpose of this class is to define a common API for classifiers.
14
+ # All methods in this class must be implemented in subclasses.
15
+ class Classifier
16
+
17
+ # Build a new classifier, using data examples found in data_set.
18
+ def build(data_set)
19
+ raise NotImplementedError
20
+ end
21
+
22
+ # You can evaluate new data, predicting its class.
23
+ # e.g.
24
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
25
+ def eval(data)
26
+ raise NotImplementedError
27
+ end
28
+
29
+ # This method returns the generated rules in ruby code.
30
+ # e.g.
31
+ #
32
+ # classifier.get_rules
33
+ # # => marketing_target='Y'
34
+ #
35
+ # It is a nice way to inspect induction results, and also to execute them:
36
+ # marketing_target = nil
37
+ # eval classifier.get_rules
38
+ # puts marketing_target
39
+ # # => 'Y'
40
+ def get_rules
41
+ raise NotImplementedError
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -8,7 +8,8 @@
8
8
  # the Mozilla Public License version 1.1 as published by the
9
9
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
10
 
11
- require File.dirname(__FILE__) + '/classifier_helper'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
13
 
13
14
  module Ai4r
14
15
 
@@ -44,9 +45,9 @@ module Ai4r
44
45
  # ['Chicago', '>80', 'F', 'Y']
45
46
  # ]
46
47
  #
47
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
48
+ # id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
48
49
  #
49
- # id3.to_s
50
+ # id3.get_rules
50
51
  # # => if age_range=='<30' then marketing_target='Y'
51
52
  # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
52
53
  # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
@@ -69,14 +70,14 @@ module Ai4r
69
70
  # end
70
71
  # data_labels = data_set.shift
71
72
  #
72
- # id3 = DecisionTree::ID3.new(data_set, data_labels)
73
+ # id3 = Ai4r::Classifiers::ID3.new(data_set, data_labels)
73
74
  #
74
75
  # = A nice tip for data evaluation
75
76
  #
76
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
77
+ # id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
77
78
  # age_range = '<30'
78
79
  # marketing_target = nil
79
- # eval id3.to_s
80
+ # eval id3.get_rules
80
81
  # puts marketing_target
81
82
  # # => 'Y'
82
83
  # = More about ID3 and decision trees
@@ -87,50 +88,17 @@ module Ai4r
87
88
  # = About the project
88
89
  # Author:: Sergio Fierens
89
90
  # License:: MPL 1.1
90
-
91
- class ID3
91
+ # Url:: http://ai4r.rubyforge.org/
92
+ class ID3 < Classifier
92
93
 
93
- attr_reader :data_labels
94
- include ClassifierHelper
94
+ attr_reader :data_set
95
95
 
96
- # Create a new decision tree. If your data is classified with N attributed
97
- # and M examples, then your data examples must have the following format:
98
- #
99
- # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
100
- # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
101
- # ...
102
- # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
103
- # ]
104
- #
105
- # e.g.
106
- # [ ['New York', '<30', 'M', 'Y'],
107
- # ['Chicago', '<30', 'M', 'Y'],
108
- # ['Chicago', '<30', 'F', 'Y'],
109
- # ['New York', '<30', 'M', 'Y'],
110
- # ['New York', '<30', 'M', 'Y'],
111
- # ['Chicago', '[30-50)', 'M', 'Y'],
112
- # ['New York', '[30-50)', 'F', 'N'],
113
- # ['Chicago', '[30-50)', 'F', 'Y'],
114
- # ['New York', '[30-50)', 'F', 'N'],
115
- # ['Chicago', '[50-80]', 'M', 'N'],
116
- # ['New York', '[50-80]', 'F', 'N'],
117
- # ['New York', '[50-80]', 'M', 'N'],
118
- # ['Chicago', '[50-80]', 'M', 'N'],
119
- # ['New York', '[50-80]', 'F', 'N'],
120
- # ['Chicago', '>80', 'F', 'Y']
121
- # ]
122
- #
123
- # Data labels must have the following format:
124
- # [ 'city', 'age_range', 'gender', 'marketing_target' ]
125
- #
126
- # If you do not provide labels for you data, the following labels will
127
- # be created by default:
128
- # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
129
- #
130
- def build(data_examples, data_labels=nil)
131
- check_data_examples(data_examples)
132
- @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
133
- preprocess_data(data_examples)
96
+ # Create a new ID3 classifier. You must provide a DataSet instance
97
+ # as parameter.
98
+ def build(data_set)
99
+ data_set.check_not_empty
100
+ @data_set = data_set
101
+ preprocess_data(@data_set.data_items)
134
102
  return self
135
103
  end
136
104
 
@@ -144,7 +112,7 @@ module Ai4r
144
112
  # This method returns the generated rules in ruby code.
145
113
  # e.g.
146
114
  #
147
- # id3.to_s
115
+ # id3.get_rules
148
116
  # # => if age_range=='<30' then marketing_target='Y'
149
117
  # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
150
118
  # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
@@ -155,10 +123,11 @@ module Ai4r
155
123
  # It is a nice way to inspect induction results, and also to execute them:
156
124
  # age_range = '<30'
157
125
  # marketing_target = nil
158
- # eval id3.to_s
126
+ # eval id3.get_rules
159
127
  # puts marketing_target
160
128
  # # => 'Y'
161
- def to_s
129
+ def get_rules
130
+ #return "Empty ID3 tree" if !@tree
162
131
  rules = @tree.get_rules
163
132
  rules = rules.collect do |rule|
164
133
  "#{rule[0..-2].join(' and ')} then #{rule.last}"
@@ -175,15 +144,15 @@ module Ai4r
175
144
  def build_node(data_examples, flag_att = [])
176
145
  return ErrorNode.new if data_examples.length == 0
177
146
  domain = domain(data_examples)
178
- return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
147
+ return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
179
148
  min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
180
149
  flag_att << min_entropy_index
181
150
  split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
182
- return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
151
+ return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
183
152
  nodes = split_data_examples.collect do |partial_data_examples|
184
153
  build_node(partial_data_examples, flag_att)
185
154
  end
186
- return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
155
+ return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
187
156
  end
188
157
 
189
158
  private
@@ -247,7 +216,7 @@ module Ai4r
247
216
  def domain(data_examples)
248
217
  #return build_domains(data_examples)
249
218
  domain = []
250
- @data_labels.length.times { domain << [] }
219
+ @data_set.data_labels.length.times { domain << [] }
251
220
  data_examples.each do |data|
252
221
  data.each_index do |i|
253
222
  domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
@@ -297,7 +266,7 @@ module Ai4r
297
266
  LOG2 = Math.log(2)
298
267
  end
299
268
 
300
- class EvaluationNode
269
+ class EvaluationNode #:nodoc: all
301
270
 
302
271
  attr_reader :index, :values, :nodes
303
272
 
@@ -330,7 +299,7 @@ module Ai4r
330
299
 
331
300
  end
332
301
 
333
- class CategoryNode
302
+ class CategoryNode #:nodoc: all
334
303
  def initialize(label, value)
335
304
  @label = label
336
305
  @value = value
@@ -343,7 +312,7 @@ module Ai4r
343
312
  end
344
313
  end
345
314
 
346
- class ErrorNode
315
+ class ErrorNode #:nodoc: all
347
316
  def value(data)
348
317
  raise "There was not enough information during training to do a proper induction for this data element."
349
318
  end
@@ -8,7 +8,8 @@
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
10
  require 'set'
11
- require File.dirname(__FILE__) + '/classifier_helper'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
13
 
13
14
  module Ai4r
14
15
  module Classifiers
@@ -19,58 +20,25 @@ module Ai4r
19
20
  # attribute to use to classify data that makes
20
21
  # fewest prediction errors.
21
22
  # It generates rules based on a single attribute.
22
- class OneR
23
+ class OneR < Classifier
23
24
 
24
- attr_accessor :data_labels, :rule
25
- include ClassifierHelper
25
+ attr_reader :data_set, :rule
26
26
 
27
- # Build a new OneR classifier. If your data is classified with N attributed
28
- # and M examples, then your data examples must have the following format:
29
- #
30
- # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
- # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
- # ...
33
- # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
- # ]
35
- #
36
- # e.g.
37
- # [ ['New York', '<30', 'M', 'Y'],
38
- # ['Chicago', '<30', 'M', 'Y'],
39
- # ['Chicago', '<30', 'F', 'Y'],
40
- # ['New York', '<30', 'M', 'Y'],
41
- # ['New York', '<30', 'M', 'Y'],
42
- # ['Chicago', '[30-50)', 'M', 'Y'],
43
- # ['New York', '[30-50)', 'F', 'N'],
44
- # ['Chicago', '[30-50)', 'F', 'Y'],
45
- # ['New York', '[30-50)', 'F', 'N'],
46
- # ['Chicago', '[50-80]', 'M', 'N'],
47
- # ['New York', '[50-80]', 'F', 'N'],
48
- # ['New York', '[50-80]', 'M', 'N'],
49
- # ['Chicago', '[50-80]', 'M', 'N'],
50
- # ['New York', '[50-80]', 'F', 'N'],
51
- # ['Chicago', '>80', 'F', 'Y']
52
- # ]
53
- #
54
- # Data labels must have the following format:
55
- # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
- #
57
- # If you do not provide labels for you data, the following labels will
58
- # be created by default:
59
- # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
- #
61
- def build(data_examples, data_labels = nil)
62
- check_data_examples(data_examples)
63
- @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
- if (num_attributes(data_examples) == 1)
65
- @zero_r = ZeroR.new.build(data_examples, data_labels)
27
+ # Build a new OneR classifier. You must provide a DataSet instance
28
+ # as parameter.
29
+ def build(data_set)
30
+ data_set.check_not_empty
31
+ @data_set = data_set
32
+ if (data_set.num_attributes == 1)
33
+ @zero_r = ZeroR.new.build(data_set)
66
34
  return self;
67
35
  else
68
36
  @zero_r = nil;
69
37
  end
70
- domains = build_domains(data_examples)
38
+ domains = @data_set.build_domains
71
39
  @rule = nil
72
40
  domains[1...-1].each_index do |attr_index|
73
- rule = build_rule(data_examples, attr_index, domains)
41
+ rule = build_rule(@data_set.data_items, attr_index, domains)
74
42
  @rule = rule if !@rule || rule[:correct] > @rule[:correct]
75
43
  end
76
44
  return self
@@ -88,7 +56,7 @@ module Ai4r
88
56
  # This method returns the generated rules in ruby code.
89
57
  # e.g.
90
58
  #
91
- # classifier.to_s
59
+ # classifier.get_rules
92
60
  # # => if age_range == '<30' then marketing_target = 'Y'
93
61
  # elsif age_range == '[30-50)' then marketing_target = 'N'
94
62
  # elsif age_range == '[50-80]' then marketing_target = 'N'
@@ -96,14 +64,14 @@ module Ai4r
96
64
  #
97
65
  # It is a nice way to inspect induction results, and also to execute them:
98
66
  # marketing_target = nil
99
- # eval classifier.to_s
67
+ # eval classifier.get_rules
100
68
  # puts marketing_target
101
69
  # # => 'Y'
102
- def to_s
103
- return @zero_r.to_s if @zero_r
70
+ def get_rules
71
+ return @zero_r.get_rules if @zero_r
104
72
  sentences = []
105
- attr_label = @data_labels[@rule[:attr_index]]
106
- class_label = @data_labels.last
73
+ attr_label = @data_set.data_labels[@rule[:attr_index]]
74
+ class_label = @data_set.data_labels.last
107
75
  @rule[:rule].each_pair do |attr_value, class_value|
108
76
  sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
109
77
  end
@@ -111,13 +79,6 @@ module Ai4r
111
79
  end
112
80
 
113
81
  protected
114
- def build_domains(data_examples)
115
- domains = Array.new(num_attributes(data_examples)) { Set.new }
116
- data_examples.each do |data|
117
- data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
118
- end
119
- return domains
120
- end
121
82
 
122
83
  def build_rule(data_examples, attr_index, domains)
123
84
  domain = domains[attr_index]