ai4r 1.2 → 1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. data/README.rdoc +12 -25
  2. data/examples/decision_trees/id3_example.rb +6 -9
  3. data/examples/decision_trees/results.txt +2 -0
  4. data/examples/genetic_algorithm/genetic_algorithm_example.rb +11 -13
  5. data/examples/neural_network/xor_example.rb +25 -0
  6. data/lib/ai4r.rb +10 -0
  7. data/lib/ai4r/classifiers/classifier.rb +46 -0
  8. data/lib/ai4r/classifiers/id3.rb +27 -58
  9. data/lib/ai4r/classifiers/one_r.rb +19 -58
  10. data/lib/ai4r/classifiers/prism.rb +21 -57
  11. data/lib/ai4r/classifiers/zero_r.rb +16 -48
  12. data/lib/ai4r/clusterers/bisecting_k_means.rb +115 -0
  13. data/lib/ai4r/clusterers/clusterer.rb +55 -0
  14. data/lib/ai4r/clusterers/k_means.rb +164 -0
  15. data/lib/ai4r/data/data_set.rb +250 -0
  16. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +19 -19
  17. data/lib/ai4r/neural_network/backpropagation.rb +23 -24
  18. data/site/build/site/en/broken-links.xml +2 -0
  19. data/site/build/site/en/downloads.html +200 -0
  20. data/site/build/site/en/downloads.pdf +151 -0
  21. data/site/build/site/en/forum.html +197 -0
  22. data/site/build/site/en/forum.pdf +151 -0
  23. data/site/build/site/en/geneticAlgorithms.html +591 -0
  24. data/site/build/site/en/geneticAlgorithms.pdf +934 -0
  25. data/site/build/site/en/images/ai4r-logo.png +0 -0
  26. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  27. data/site/build/site/en/images/c.png +0 -0
  28. data/site/build/site/en/images/c_wbn.png +0 -0
  29. data/site/build/site/en/images/c_wn.png +0 -0
  30. data/site/build/site/en/images/ero.gif +0 -0
  31. data/site/build/site/en/images/europe2.png +0 -0
  32. data/site/build/site/en/images/europe3.png +0 -0
  33. data/site/build/site/en/images/fitness.png +0 -0
  34. data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
  35. data/site/build/site/en/images/instruction_arrow.png +0 -0
  36. data/site/build/site/en/images/jadeferret.png +0 -0
  37. data/site/build/site/en/images/my_email.png +0 -0
  38. data/site/build/site/en/images/neural_network_example.png +0 -0
  39. data/site/build/site/en/images/rubyforge.png +0 -0
  40. data/site/build/site/en/images/s.png +0 -0
  41. data/site/build/site/en/images/s_wbn.png +0 -0
  42. data/site/build/site/en/images/s_wn.png +0 -0
  43. data/site/build/site/en/images/sigmoid.png +0 -0
  44. data/site/build/site/en/images/t.png +0 -0
  45. data/site/build/site/en/images/t_wbn.png +0 -0
  46. data/site/build/site/en/images/t_wn.png +0 -0
  47. data/site/build/site/en/index.html +336 -0
  48. data/site/build/site/en/index.pdf +508 -0
  49. data/site/build/site/en/linkmap.html +263 -0
  50. data/site/build/site/en/linkmap.pdf +94 -0
  51. data/site/build/site/en/locationmap.xml +72 -0
  52. data/site/build/site/en/machineLearning.html +339 -0
  53. data/site/build/site/en/machineLearning.pdf +337 -0
  54. data/site/build/site/en/neuralNetworks.html +484 -0
  55. data/site/build/site/en/neuralNetworks.pdf +604 -0
  56. data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
  57. data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
  58. data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
  59. data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
  60. data/site/build/site/en/skin/basic.css +166 -0
  61. data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
  62. data/site/build/site/en/skin/breadcrumbs.js +237 -0
  63. data/site/build/site/en/skin/fontsize.js +166 -0
  64. data/site/build/site/en/skin/getBlank.js +40 -0
  65. data/site/build/site/en/skin/getMenu.js +45 -0
  66. data/site/build/site/en/skin/images/README.txt +1 -0
  67. data/site/build/site/en/skin/images/add.jpg +0 -0
  68. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  69. data/site/build/site/en/skin/images/chapter.gif +0 -0
  70. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  71. data/site/build/site/en/skin/images/current.gif +0 -0
  72. data/site/build/site/en/skin/images/error.png +0 -0
  73. data/site/build/site/en/skin/images/external-link.gif +0 -0
  74. data/site/build/site/en/skin/images/fix.jpg +0 -0
  75. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  76. data/site/build/site/en/skin/images/hack.jpg +0 -0
  77. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  78. data/site/build/site/en/skin/images/info.png +0 -0
  79. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  80. data/site/build/site/en/skin/images/label.gif +0 -0
  81. data/site/build/site/en/skin/images/page.gif +0 -0
  82. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  83. data/site/build/site/en/skin/images/poddoc.png +0 -0
  84. data/site/build/site/en/skin/images/printer.gif +0 -0
  85. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  86. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  87. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  88. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  89. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  90. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  91. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  92. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  93. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  94. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  95. data/site/build/site/en/skin/images/remove.jpg +0 -0
  96. data/site/build/site/en/skin/images/rss.png +0 -0
  97. data/site/build/site/en/skin/images/spacer.gif +0 -0
  98. data/site/build/site/en/skin/images/success.png +0 -0
  99. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  100. data/site/build/site/en/skin/images/update.jpg +0 -0
  101. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  102. data/site/build/site/en/skin/images/vcss.png +0 -0
  103. data/site/build/site/en/skin/images/warning.png +0 -0
  104. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  105. data/site/build/site/en/skin/menu.js +48 -0
  106. data/site/build/site/en/skin/note.txt +50 -0
  107. data/site/build/site/en/skin/print.css +54 -0
  108. data/site/build/site/en/skin/profile.css +163 -0
  109. data/site/build/site/en/skin/prototype.js +1257 -0
  110. data/site/build/site/en/skin/screen.css +587 -0
  111. data/site/build/site/en/svn.html +252 -0
  112. data/site/build/site/en/svn.pdf +306 -0
  113. data/site/build/site/en/wholesite.pdf +1915 -0
  114. data/site/build/tmp/brokenlinks.xml +2 -0
  115. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  116. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  117. data/site/build/tmp/locationmap.xml +14 -14
  118. data/site/build/tmp/output.xmap +23 -23
  119. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  120. data/site/build/tmp/projfilters.properties +41 -41
  121. data/site/build/webapp/WEB-INF/logs/core.log +593 -679
  122. data/site/build/webapp/WEB-INF/logs/error.log +362 -279
  123. data/site/build/webapp/WEB-INF/logs/sitemap.log +368 -1015
  124. data/site/src/documentation/content/xdocs/index.xml +18 -10
  125. data/site/src/documentation/content/xdocs/machineLearning.xml +4 -3
  126. data/site/src/documentation/content/xdocs/site.xml +2 -1
  127. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  128. data/test/classifiers/id3_test.rb +45 -44
  129. data/test/classifiers/one_r_test.rb +19 -17
  130. data/test/classifiers/prism_test.rb +22 -20
  131. data/test/classifiers/zero_r_test.rb +15 -12
  132. data/test/clusterers/bisecting_k_means_test.rb +59 -0
  133. data/test/clusterers/k_means_test.rb +93 -0
  134. data/test/data/data_set_test.rb +92 -0
  135. metadata +252 -128
  136. data/lib/ai4r/classifiers/classifier_helper.rb +0 -54
  137. data/site/src/documentation/content/xdocs/forum.html +0 -9
  138. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  139. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
@@ -1,26 +1,12 @@
1
1
  = Introduction
2
2
 
3
- AI4R is a collection of ruby algorithms implementations, covering several Artificial intelligence fields,
4
- and simple practical examples using them. It implements:
5
-
6
- * Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
7
-
8
- * Neural networks (AI4R::NeuralNetwork::Backpropagation)
9
-
10
- * ID3 Decision Trees (AI4R::Classifiers::ID3)
11
-
12
- * PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
13
-
14
- * OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
15
-
16
- * ZeroR (AI4R::Classifiers::ZeroR)
3
+ This project aims to produce ruby implementations of
4
+ algorithms covering several Artificial intelligence fields.
17
5
 
18
6
  = Where can I find the lastest code and info on this project?
19
7
 
20
8
  http://ai4r.rubyforge.org
21
9
 
22
- http://ai4r.jadeferret.com
23
-
24
10
  = How to install
25
11
 
26
12
  1. Install the gem:
@@ -29,18 +15,19 @@ http://ai4r.jadeferret.com
29
15
 
30
16
  2. Include require statements in your code:
31
17
 
32
- require "rubygems"
33
- require "ai4r/classifiers/id3"en
34
- require "ai4r/classifiers/prism"
35
- require "ai4r/classifiers/one_r"
36
- require "ai4r/classifiers/zero_r"
37
- require "ai4r/neural_network/backpropagation"
38
- require "ai4r/genetic_algorithm/genetic_algorithm"
18
+ require "rubygems"
19
+ require "ai4r/classifiers/id3"en
20
+ require "ai4r/classifiers/prism"
21
+ require "ai4r/classifiers/one_r"
22
+ require "ai4r/classifiers/zero_r"
23
+ require "ai4r/neural_network/backpropagation"
24
+ require "ai4r/genetic_algorithm/genetic_algorithm"
39
25
 
40
26
  = Feedback
41
27
 
42
- If you have some constructive comments about this project, please do send those
43
- to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
28
+ If you have questions or constructive comments about this project,
29
+ please post them in the forum. If you do not want to make it public,
30
+ send it to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
44
31
 
45
32
  = Warranty
46
33
 
@@ -7,25 +7,22 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- #require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
11
10
  require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
12
- require 'csv'
13
11
 
14
12
  # Load data from data_set.csv
15
- data_set = []
16
- CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
17
- data_set << row
18
- end
19
- data_labels = data_set.shift
13
+ data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
14
+ data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
20
15
 
21
16
  # Build ID3 tree
22
- id3 = Ai4r::Classifiers::ID3.new.build(data_set, data_labels)
17
+ id3 = Ai4r::Classifiers::ID3.new.build(data_set)
23
18
 
24
19
  # Show rules
25
20
  puts "Discovered rules are:"
26
- puts id3.to_s
21
+ puts id3.get_rules
22
+ puts
27
23
 
28
24
  # Try to predict some values
25
+ puts "Prediction samples:"
29
26
  puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => " + id3.eval(['Moron Sur (GBA)','4','[86 m2 - 100 m2]'])
30
27
  puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => " + id3.eval(['Moron Sur (GBA)','3','[101 m2 - 125 m2]'])
31
28
  puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => " + id3.eval(['Recoleta (CABA)','3','[86 m2 - 100 m2]',])
@@ -23,6 +23,8 @@ elsif size=='[126 m2 - 160 m2]' and zone=='Moron Sur (GBA)' then price='[56K-75K
23
23
  elsif size=='[126 m2 - 160 m2]' and zone=='Recoleta (CABA)' then price='[200K-275K]'
24
24
  elsif size=='[126 m2 - 160 m2]' and zone=='Tigre (GBA)' then price='>275K'
25
25
  else raise 'There was not enough information during training to do a proper induction for this data element' end
26
+
27
+ Prediction samples:
26
28
  ['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => [46K-55K]
27
29
  ['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => [76K-100K]
28
30
  ['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => [126K-150K]
@@ -8,32 +8,30 @@
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
10
  require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
11
+ require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
11
12
  require 'csv'
12
13
 
13
14
  # Load data from data_set.csv
14
- data_set = []
15
- CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/travel_cost.csv", 'r')) do |row|
16
- data_set << row
17
- end
18
- data_labels = data_set.shift
19
- data_set.collect! do |column|
20
- column.collect { |element| element.to_f}
21
- end
15
+ data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
16
+ data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
17
+ data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
22
18
 
23
19
  Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
24
20
 
25
21
  puts "Some random selected tours costs: "
26
22
  3.times do
27
23
  c = Ai4r::GeneticAlgorithm::Chromosome.seed
28
- puts "COST #{-1 * c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
24
+ puts "COST #{-1 * c.fitness} TOUR: "+
25
+ "#{c.data.collect{|c| data_set.data_labels[c]} * ', '}"
29
26
  end
30
27
 
31
28
  puts "Beginning genetic search, please wait... "
32
29
  search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
33
30
  result = search.run
34
- puts "BEST COST FOUND #{-1 * result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
31
+ puts "COST #{-1 * result.fitness} TOUR: "+
32
+ "#{result.data.collect{|c| data_set.data_labels[c]} * ', '}"
35
33
 
36
- # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
37
- # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
38
- # $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
34
+ # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
35
+ # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
36
+ # $7596.74 TOUR: Moscow, Kiev, Warsaw, Berlin, Hamburg, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London Dublin
39
37
 
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/training_patterns'
2
+ require File.dirname(__FILE__) + '/patterns_with_noise'
3
+ require File.dirname(__FILE__) + '/patterns_with_base_noise'
4
+ require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
5
+
6
+ examples = [
7
+ [[0, 0], [0, 1]],
8
+ [[0, 1], [1, 0]],
9
+ [[1, 0], [1, 0]],
10
+ [[1, 1], [0, 0]]
11
+ ]
12
+
13
+ net = Ai4r::NeuralNetwork::Backpropagation.new([2, 1, 2, 1])
14
+
15
+ i=0
16
+ 200.times {
17
+ examples.each do |ex|
18
+ 2000.times {net.train(ex[0], [ex[1].first])}
19
+ end
20
+ puts(i=i+1)
21
+ }
22
+
23
+ examples.each do |ex|
24
+ print ex[0], ' => ', net.eval(ex[0]).inspect, ', should be ', ex[1].first, "\n"
25
+ end
@@ -0,0 +1,10 @@
1
+ require "ai4r/clusterers/clusterer"
2
+ require "ai4r/clusterers/k_means"
3
+ require "ai4r/clusterers/bisecting_k_means"
4
+ require "ai4r/classifiers/classifier"
5
+ require "ai4r/classifiers/id3"
6
+ require "ai4r/classifiers/prism"
7
+ require "ai4r/classifiers/one_r"
8
+ require "ai4r/classifiers/zero_r"
9
+ require "ai4r/neural_network/backpropagation"
10
+ require "ai4r/genetic_algorithm/genetic_algorithm"
@@ -0,0 +1,46 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Classifiers
12
+
13
+ # The only purpose of this class is to define a common API for classifiers.
14
+ # All methods in this class must be implemented in subclasses.
15
+ class Classifier
16
+
17
+ # Build a new classifier, using data examples found in data_set.
18
+ def build(data_set)
19
+ raise NotImplementedError
20
+ end
21
+
22
+ # You can evaluate new data, predicting its class.
23
+ # e.g.
24
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
25
+ def eval(data)
26
+ raise NotImplementedError
27
+ end
28
+
29
+ # This method returns the generated rules in ruby code.
30
+ # e.g.
31
+ #
32
+ # classifier.get_rules
33
+ # # => marketing_target='Y'
34
+ #
35
+ # It is a nice way to inspect induction results, and also to execute them:
36
+ # marketing_target = nil
37
+ # eval classifier.get_rules
38
+ # puts marketing_target
39
+ # # => 'Y'
40
+ def get_rules
41
+ raise NotImplementedError
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -8,7 +8,8 @@
8
8
  # the Mozilla Public License version 1.1 as published by the
9
9
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
10
 
11
- require File.dirname(__FILE__) + '/classifier_helper'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
13
 
13
14
  module Ai4r
14
15
 
@@ -44,9 +45,9 @@ module Ai4r
44
45
  # ['Chicago', '>80', 'F', 'Y']
45
46
  # ]
46
47
  #
47
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
48
+ # id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
48
49
  #
49
- # id3.to_s
50
+ # id3.get_rules
50
51
  # # => if age_range=='<30' then marketing_target='Y'
51
52
  # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
52
53
  # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
@@ -69,14 +70,14 @@ module Ai4r
69
70
  # end
70
71
  # data_labels = data_set.shift
71
72
  #
72
- # id3 = DecisionTree::ID3.new(data_set, data_labels)
73
+ # id3 = Ai4r::Classifiers::ID3.new(data_set, data_labels)
73
74
  #
74
75
  # = A nice tip for data evaluation
75
76
  #
76
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
77
+ # id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
77
78
  # age_range = '<30'
78
79
  # marketing_target = nil
79
- # eval id3.to_s
80
+ # eval id3.get_rules
80
81
  # puts marketing_target
81
82
  # # => 'Y'
82
83
  # = More about ID3 and decision trees
@@ -87,50 +88,17 @@ module Ai4r
87
88
  # = About the project
88
89
  # Author:: Sergio Fierens
89
90
  # License:: MPL 1.1
90
-
91
- class ID3
91
+ # Url:: http://ai4r.rubyforge.org/
92
+ class ID3 < Classifier
92
93
 
93
- attr_reader :data_labels
94
- include ClassifierHelper
94
+ attr_reader :data_set
95
95
 
96
- # Create a new decision tree. If your data is classified with N attributed
97
- # and M examples, then your data examples must have the following format:
98
- #
99
- # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
100
- # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
101
- # ...
102
- # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
103
- # ]
104
- #
105
- # e.g.
106
- # [ ['New York', '<30', 'M', 'Y'],
107
- # ['Chicago', '<30', 'M', 'Y'],
108
- # ['Chicago', '<30', 'F', 'Y'],
109
- # ['New York', '<30', 'M', 'Y'],
110
- # ['New York', '<30', 'M', 'Y'],
111
- # ['Chicago', '[30-50)', 'M', 'Y'],
112
- # ['New York', '[30-50)', 'F', 'N'],
113
- # ['Chicago', '[30-50)', 'F', 'Y'],
114
- # ['New York', '[30-50)', 'F', 'N'],
115
- # ['Chicago', '[50-80]', 'M', 'N'],
116
- # ['New York', '[50-80]', 'F', 'N'],
117
- # ['New York', '[50-80]', 'M', 'N'],
118
- # ['Chicago', '[50-80]', 'M', 'N'],
119
- # ['New York', '[50-80]', 'F', 'N'],
120
- # ['Chicago', '>80', 'F', 'Y']
121
- # ]
122
- #
123
- # Data labels must have the following format:
124
- # [ 'city', 'age_range', 'gender', 'marketing_target' ]
125
- #
126
- # If you do not provide labels for you data, the following labels will
127
- # be created by default:
128
- # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
129
- #
130
- def build(data_examples, data_labels=nil)
131
- check_data_examples(data_examples)
132
- @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
133
- preprocess_data(data_examples)
96
+ # Create a new ID3 classifier. You must provide a DataSet instance
97
+ # as parameter.
98
+ def build(data_set)
99
+ data_set.check_not_empty
100
+ @data_set = data_set
101
+ preprocess_data(@data_set.data_items)
134
102
  return self
135
103
  end
136
104
 
@@ -144,7 +112,7 @@ module Ai4r
144
112
  # This method returns the generated rules in ruby code.
145
113
  # e.g.
146
114
  #
147
- # id3.to_s
115
+ # id3.get_rules
148
116
  # # => if age_range=='<30' then marketing_target='Y'
149
117
  # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
150
118
  # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
@@ -155,10 +123,11 @@ module Ai4r
155
123
  # It is a nice way to inspect induction results, and also to execute them:
156
124
  # age_range = '<30'
157
125
  # marketing_target = nil
158
- # eval id3.to_s
126
+ # eval id3.get_rules
159
127
  # puts marketing_target
160
128
  # # => 'Y'
161
- def to_s
129
+ def get_rules
130
+ #return "Empty ID3 tree" if !@tree
162
131
  rules = @tree.get_rules
163
132
  rules = rules.collect do |rule|
164
133
  "#{rule[0..-2].join(' and ')} then #{rule.last}"
@@ -175,15 +144,15 @@ module Ai4r
175
144
  def build_node(data_examples, flag_att = [])
176
145
  return ErrorNode.new if data_examples.length == 0
177
146
  domain = domain(data_examples)
178
- return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
147
+ return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
179
148
  min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
180
149
  flag_att << min_entropy_index
181
150
  split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
182
- return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
151
+ return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
183
152
  nodes = split_data_examples.collect do |partial_data_examples|
184
153
  build_node(partial_data_examples, flag_att)
185
154
  end
186
- return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
155
+ return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
187
156
  end
188
157
 
189
158
  private
@@ -247,7 +216,7 @@ module Ai4r
247
216
  def domain(data_examples)
248
217
  #return build_domains(data_examples)
249
218
  domain = []
250
- @data_labels.length.times { domain << [] }
219
+ @data_set.data_labels.length.times { domain << [] }
251
220
  data_examples.each do |data|
252
221
  data.each_index do |i|
253
222
  domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
@@ -297,7 +266,7 @@ module Ai4r
297
266
  LOG2 = Math.log(2)
298
267
  end
299
268
 
300
- class EvaluationNode
269
+ class EvaluationNode #:nodoc: all
301
270
 
302
271
  attr_reader :index, :values, :nodes
303
272
 
@@ -330,7 +299,7 @@ module Ai4r
330
299
 
331
300
  end
332
301
 
333
- class CategoryNode
302
+ class CategoryNode #:nodoc: all
334
303
  def initialize(label, value)
335
304
  @label = label
336
305
  @value = value
@@ -343,7 +312,7 @@ module Ai4r
343
312
  end
344
313
  end
345
314
 
346
- class ErrorNode
315
+ class ErrorNode #:nodoc: all
347
316
  def value(data)
348
317
  raise "There was not enough information during training to do a proper induction for this data element."
349
318
  end
@@ -8,7 +8,8 @@
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
10
  require 'set'
11
- require File.dirname(__FILE__) + '/classifier_helper'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
13
 
13
14
  module Ai4r
14
15
  module Classifiers
@@ -19,58 +20,25 @@ module Ai4r
19
20
  # attribute to use to classify data that makes
20
21
  # fewest prediction errors.
21
22
  # It generates rules based on a single attribute.
22
- class OneR
23
+ class OneR < Classifier
23
24
 
24
- attr_accessor :data_labels, :rule
25
- include ClassifierHelper
25
+ attr_reader :data_set, :rule
26
26
 
27
- # Build a new OneR classifier. If your data is classified with N attributed
28
- # and M examples, then your data examples must have the following format:
29
- #
30
- # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
- # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
- # ...
33
- # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
- # ]
35
- #
36
- # e.g.
37
- # [ ['New York', '<30', 'M', 'Y'],
38
- # ['Chicago', '<30', 'M', 'Y'],
39
- # ['Chicago', '<30', 'F', 'Y'],
40
- # ['New York', '<30', 'M', 'Y'],
41
- # ['New York', '<30', 'M', 'Y'],
42
- # ['Chicago', '[30-50)', 'M', 'Y'],
43
- # ['New York', '[30-50)', 'F', 'N'],
44
- # ['Chicago', '[30-50)', 'F', 'Y'],
45
- # ['New York', '[30-50)', 'F', 'N'],
46
- # ['Chicago', '[50-80]', 'M', 'N'],
47
- # ['New York', '[50-80]', 'F', 'N'],
48
- # ['New York', '[50-80]', 'M', 'N'],
49
- # ['Chicago', '[50-80]', 'M', 'N'],
50
- # ['New York', '[50-80]', 'F', 'N'],
51
- # ['Chicago', '>80', 'F', 'Y']
52
- # ]
53
- #
54
- # Data labels must have the following format:
55
- # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
- #
57
- # If you do not provide labels for you data, the following labels will
58
- # be created by default:
59
- # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
- #
61
- def build(data_examples, data_labels = nil)
62
- check_data_examples(data_examples)
63
- @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
- if (num_attributes(data_examples) == 1)
65
- @zero_r = ZeroR.new.build(data_examples, data_labels)
27
+ # Build a new OneR classifier. You must provide a DataSet instance
28
+ # as parameter.
29
+ def build(data_set)
30
+ data_set.check_not_empty
31
+ @data_set = data_set
32
+ if (data_set.num_attributes == 1)
33
+ @zero_r = ZeroR.new.build(data_set)
66
34
  return self;
67
35
  else
68
36
  @zero_r = nil;
69
37
  end
70
- domains = build_domains(data_examples)
38
+ domains = @data_set.build_domains
71
39
  @rule = nil
72
40
  domains[1...-1].each_index do |attr_index|
73
- rule = build_rule(data_examples, attr_index, domains)
41
+ rule = build_rule(@data_set.data_items, attr_index, domains)
74
42
  @rule = rule if !@rule || rule[:correct] > @rule[:correct]
75
43
  end
76
44
  return self
@@ -88,7 +56,7 @@ module Ai4r
88
56
  # This method returns the generated rules in ruby code.
89
57
  # e.g.
90
58
  #
91
- # classifier.to_s
59
+ # classifier.get_rules
92
60
  # # => if age_range == '<30' then marketing_target = 'Y'
93
61
  # elsif age_range == '[30-50)' then marketing_target = 'N'
94
62
  # elsif age_range == '[50-80]' then marketing_target = 'N'
@@ -96,14 +64,14 @@ module Ai4r
96
64
  #
97
65
  # It is a nice way to inspect induction results, and also to execute them:
98
66
  # marketing_target = nil
99
- # eval classifier.to_s
67
+ # eval classifier.get_rules
100
68
  # puts marketing_target
101
69
  # # => 'Y'
102
- def to_s
103
- return @zero_r.to_s if @zero_r
70
+ def get_rules
71
+ return @zero_r.get_rules if @zero_r
104
72
  sentences = []
105
- attr_label = @data_labels[@rule[:attr_index]]
106
- class_label = @data_labels.last
73
+ attr_label = @data_set.data_labels[@rule[:attr_index]]
74
+ class_label = @data_set.data_labels.last
107
75
  @rule[:rule].each_pair do |attr_value, class_value|
108
76
  sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
109
77
  end
@@ -111,13 +79,6 @@ module Ai4r
111
79
  end
112
80
 
113
81
  protected
114
- def build_domains(data_examples)
115
- domains = Array.new(num_attributes(data_examples)) { Set.new }
116
- data_examples.each do |data|
117
- data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
118
- end
119
- return domains
120
- end
121
82
 
122
83
  def build_rule(data_examples, attr_index, domains)
123
84
  domain = domains[attr_index]