ai4r 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. data/README.rdoc +21 -20
  2. data/examples/decision_trees/id3_example.rb +3 -2
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
  4. data/examples/neural_network/backpropagation_example.rb +2 -2
  5. data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
  6. data/lib/ai4r/classifiers/id3.rb +356 -0
  7. data/lib/ai4r/classifiers/one_r.rb +148 -0
  8. data/lib/ai4r/classifiers/prism.rb +231 -0
  9. data/lib/ai4r/classifiers/zero_r.rb +104 -0
  10. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
  11. data/lib/ai4r/neural_network/backpropagation.rb +271 -0
  12. data/site/build/tmp/locationmap.xml +14 -14
  13. data/site/build/tmp/output.xmap +23 -23
  14. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  15. data/site/build/tmp/plugins-1.xml +0 -11
  16. data/site/build/tmp/plugins-2.xml +54 -0
  17. data/site/build/tmp/projfilters.properties +41 -41
  18. data/site/build/webapp/WEB-INF/logs/core.log +681 -788
  19. data/site/build/webapp/WEB-INF/logs/error.log +281 -248
  20. data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
  21. data/site/src/documentation/content/xdocs/forum.html +9 -0
  22. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
  23. data/site/src/documentation/content/xdocs/index.xml +47 -18
  24. data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
  25. data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
  26. data/site/src/documentation/content/xdocs/site.xml +8 -5
  27. data/site/src/documentation/content/xdocs/svn.xml +11 -1
  28. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  29. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  30. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  31. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  32. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  33. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
  34. data/site/src/documentation/skinconf.xml +18 -18
  35. data/test/classifiers/id3_test.rb +206 -0
  36. data/test/classifiers/one_r_test.rb +62 -0
  37. data/test/classifiers/prism_test.rb +83 -0
  38. data/test/classifiers/zero_r_test.rb +48 -0
  39. data/test/genetic_algorithm/chromosome_test.rb +41 -38
  40. data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
  41. data/test/neural_network/backpropagation_test.rb +20 -18
  42. metadata +109 -199
  43. data/lib/decision_tree/id3.rb +0 -354
  44. data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
  45. data/lib/neural_network/backpropagation.rb +0 -264
  46. data/site/build/site/en/broken-links.xml +0 -2
  47. data/site/build/site/en/downloads.html +0 -187
  48. data/site/build/site/en/downloads.pdf +0 -151
  49. data/site/build/site/en/geneticAlgorithms.html +0 -564
  50. data/site/build/site/en/geneticAlgorithms.pdf +0 -911
  51. data/site/build/site/en/images/ai4r-logo.png +0 -0
  52. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  53. data/site/build/site/en/images/c.png +0 -0
  54. data/site/build/site/en/images/c_wbn.png +0 -0
  55. data/site/build/site/en/images/c_wn.png +0 -0
  56. data/site/build/site/en/images/ero.gif +0 -0
  57. data/site/build/site/en/images/europe2.png +0 -0
  58. data/site/build/site/en/images/europe3.png +0 -0
  59. data/site/build/site/en/images/fitness.png +0 -0
  60. data/site/build/site/en/images/instruction_arrow.png +0 -0
  61. data/site/build/site/en/images/my_email.png +0 -0
  62. data/site/build/site/en/images/rubyforge.png +0 -0
  63. data/site/build/site/en/images/s.png +0 -0
  64. data/site/build/site/en/images/s_wbn.png +0 -0
  65. data/site/build/site/en/images/s_wn.png +0 -0
  66. data/site/build/site/en/images/sigmoid.png +0 -0
  67. data/site/build/site/en/images/t.png +0 -0
  68. data/site/build/site/en/images/t_wbn.png +0 -0
  69. data/site/build/site/en/images/t_wn.png +0 -0
  70. data/site/build/site/en/index.html +0 -258
  71. data/site/build/site/en/index.pdf +0 -306
  72. data/site/build/site/en/linkmap.html +0 -231
  73. data/site/build/site/en/linkmap.pdf +0 -94
  74. data/site/build/site/en/locationmap.xml +0 -72
  75. data/site/build/site/en/machineLearning.html +0 -325
  76. data/site/build/site/en/machineLearning.pdf +0 -337
  77. data/site/build/site/en/neuralNetworks.html +0 -446
  78. data/site/build/site/en/neuralNetworks.pdf +0 -604
  79. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  80. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  81. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  82. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  83. data/site/build/site/en/skin/basic.css +0 -166
  84. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  85. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  86. data/site/build/site/en/skin/fontsize.js +0 -166
  87. data/site/build/site/en/skin/getBlank.js +0 -40
  88. data/site/build/site/en/skin/getMenu.js +0 -45
  89. data/site/build/site/en/skin/images/README.txt +0 -1
  90. data/site/build/site/en/skin/images/add.jpg +0 -0
  91. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  92. data/site/build/site/en/skin/images/chapter.gif +0 -0
  93. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  94. data/site/build/site/en/skin/images/current.gif +0 -0
  95. data/site/build/site/en/skin/images/error.png +0 -0
  96. data/site/build/site/en/skin/images/external-link.gif +0 -0
  97. data/site/build/site/en/skin/images/fix.jpg +0 -0
  98. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  99. data/site/build/site/en/skin/images/hack.jpg +0 -0
  100. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  101. data/site/build/site/en/skin/images/info.png +0 -0
  102. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  103. data/site/build/site/en/skin/images/label.gif +0 -0
  104. data/site/build/site/en/skin/images/page.gif +0 -0
  105. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  106. data/site/build/site/en/skin/images/poddoc.png +0 -0
  107. data/site/build/site/en/skin/images/printer.gif +0 -0
  108. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  109. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  110. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  111. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  112. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  113. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  114. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  115. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  116. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  117. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  118. data/site/build/site/en/skin/images/remove.jpg +0 -0
  119. data/site/build/site/en/skin/images/rss.png +0 -0
  120. data/site/build/site/en/skin/images/spacer.gif +0 -0
  121. data/site/build/site/en/skin/images/success.png +0 -0
  122. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  123. data/site/build/site/en/skin/images/update.jpg +0 -0
  124. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  125. data/site/build/site/en/skin/images/vcss.png +0 -0
  126. data/site/build/site/en/skin/images/warning.png +0 -0
  127. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  128. data/site/build/site/en/skin/menu.js +0 -48
  129. data/site/build/site/en/skin/note.txt +0 -50
  130. data/site/build/site/en/skin/print.css +0 -54
  131. data/site/build/site/en/skin/profile.css +0 -163
  132. data/site/build/site/en/skin/prototype.js +0 -1257
  133. data/site/build/site/en/skin/screen.css +0 -587
  134. data/site/build/site/en/svn.html +0 -223
  135. data/site/build/site/en/svn.pdf +0 -239
  136. data/site/build/site/en/wholesite.pdf +0 -1686
  137. data/site/build/tmp/brokenlinks.xml +0 -2
  138. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  139. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  140. data/test/decision_tree/id3_test.rb +0 -209
@@ -1,40 +1,41 @@
1
1
  = Introduction
2
2
 
3
- This project aims to produce ruby implementations of
4
- algorithms covering several Artificial intelligence fields, including:
3
+ AI4R is a collection of ruby algorithms implementations, covering several Artificial intelligence fields,
4
+ and simple practical examples using them. It implements:
5
5
 
6
- * Machine Learning (DecisionTree::ID3)
7
-
8
- Decision Trees using an implementation of ID3 algorithm.
9
-
10
- * Genetic algorithms (GeneticAlgorithm::GeneticSearch)
11
-
12
- Implementation of GeneticSearch and Chromosome classes. The GeneticSearch is a generic class, and can be used to solved any kind of problems. The GeneticSearch class performs a stochastic search of the solution of a given problem.
13
-
14
- * Neural network (NeuralNetwork::Backpropagation)
15
-
16
- Implementation of neural networks using the Backpropagation supervised learning technique.
6
+ * Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
7
+
8
+ * Neural networks (AI4R::NeuralNetwork::Backpropagation)
9
+
10
+ * ID3 Decision Trees (AI4R::Classifiers::ID3)
17
11
 
18
- * Bayesian networks
12
+ * PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
13
+
14
+ * OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
19
15
 
20
- TODO
16
+ * ZeroR (AI4R::Classifiers::ZeroR)
21
17
 
22
18
  = Where can I find the lastest code and info on this project?
23
19
 
24
20
  http://ai4r.rubyforge.org
25
21
 
22
+ http://ai4r.jadeferret.com
23
+
26
24
  = How to install
27
25
 
28
26
  1. Install the gem:
29
27
 
30
- gem install http://rubyforge.org/frs/download.php/32923/ai4r-1.0.gem
28
+ gem install ai4r
31
29
 
32
30
  2. Include require statements in your code:
33
31
 
34
- require "rubygems"
35
- require "decision_tree/id3"
36
- require "neural_network/backpropagation"
37
- require "genetic_algorithm/genetic_algorithm"
32
+ require "rubygems"
33
+ require "ai4r/classifiers/id3"en
34
+ require "ai4r/classifiers/prism"
35
+ require "ai4r/classifiers/one_r"
36
+ require "ai4r/classifiers/zero_r"
37
+ require "ai4r/neural_network/backpropagation"
38
+ require "ai4r/genetic_algorithm/genetic_algorithm"
38
39
 
39
40
  = Feedback
40
41
 
@@ -7,7 +7,8 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
10
+ #require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
11
+ require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
11
12
  require 'csv'
12
13
 
13
14
  # Load data from data_set.csv
@@ -18,7 +19,7 @@ end
18
19
  data_labels = data_set.shift
19
20
 
20
21
  # Build ID3 tree
21
- id3 = DecisionTree::ID3.new(data_set, data_labels)
22
+ id3 = Ai4r::Classifiers::ID3.new.build(data_set, data_labels)
22
23
 
23
24
  # Show rules
24
25
  puts "Discovered rules are:"
@@ -7,7 +7,7 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- require File.dirname(__FILE__) + '/../../lib/genetic_algorithm/genetic_algorithm'
10
+ require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
11
11
  require 'csv'
12
12
 
13
13
  # Load data from data_set.csv
@@ -20,18 +20,18 @@ data_set.collect! do |column|
20
20
  column.collect { |element| element.to_f}
21
21
  end
22
22
 
23
- GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
23
+ Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
24
24
 
25
25
  puts "Some random selected tours costs: "
26
26
  3.times do
27
- c = GeneticAlgorithm::Chromosome.seed
28
- puts "COST #{c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
27
+ c = Ai4r::GeneticAlgorithm::Chromosome.seed
28
+ puts "COST #{-1 * c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
29
29
  end
30
30
 
31
31
  puts "Beginning genetic search, please wait... "
32
- search = GeneticAlgorithm::GeneticSearch.new(800, 100)
32
+ search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
33
33
  result = search.run
34
- puts "BEST COST FOUND #{result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
34
+ puts "BEST COST FOUND #{-1 * result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
35
35
 
36
36
  # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
37
37
  # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
@@ -10,12 +10,12 @@
10
10
  require File.dirname(__FILE__) + '/training_patterns'
11
11
  require File.dirname(__FILE__) + '/patterns_with_noise'
12
12
  require File.dirname(__FILE__) + '/patterns_with_base_noise'
13
- require File.dirname(__FILE__) + '/../../lib/neural_network/backpropagation'
13
+ require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
14
14
  require 'benchmark'
15
15
 
16
16
  times = Benchmark.measure do
17
17
 
18
- net = NeuralNetwork::Backpropagation.new([256, 3])
18
+ net = Ai4r::NeuralNetwork::Backpropagation.new([256, 3])
19
19
 
20
20
  tr_input = TRIANGLE.flatten.collect { |input| input.to_f / 10}
21
21
  sq_input = SQUARE.flatten.collect { |input| input.to_f / 10}
@@ -0,0 +1,54 @@
1
+ require 'set'
2
+
3
+ module Ai4r
4
+
5
+ module Classifiers
6
+
7
+ NUMERIC_CLASS_TYPE = 1
8
+ NOMINAL_CLASS_TYPE = 2
9
+
10
+ module ClassifierHelper
11
+
12
+ def default_data_labels(data_examples)
13
+ data_labels = []
14
+ data_examples[0][0..-2].each_index do |i|
15
+ data_labels[i] = "attribute_#{i+1}"
16
+ end
17
+ data_labels[data_labels.length]="class_value"
18
+ return data_labels
19
+ end
20
+
21
+ def check_data_examples(data_examples)
22
+ if !data_examples || data_examples.empty?
23
+ raise ArgumentError,"Examples data set must not be empty."
24
+ elsif !data_examples.first.is_a?(Array)
25
+ raise ArgumentError,"Unkown format for example data."
26
+ end
27
+ end
28
+
29
+ # Returns attributes number, including class attribute
30
+ def num_attributes(data_examples)
31
+ return 0 if !data_examples || data_examples.empty? || !data_examples.first.is_a?(Array)
32
+ return data_examples.first.size
33
+ end
34
+
35
+ # Returns an array with the domain of each attribute (Set instance
36
+ # containing all possible values)
37
+ # Return example:
38
+ # => [#<Set: {"New York", "Chicago"}>,
39
+ # #<Set: {"<30", "[30-50)", "[50-80]", ">80"}>,
40
+ # #<Set: {"M", "F"}>,
41
+ # #<Set: {"Y", "N"}>]
42
+ def build_domains(data_examples)
43
+ domains = Array.new(num_attributes(data_examples)) { Set.new }
44
+ data_examples.each do |data|
45
+ data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
46
+ end
47
+ return domains
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,356 @@
1
+ # Author:: Sergio Fierens (Implementation, Quinlan is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+
11
+ require File.dirname(__FILE__) + '/classifier_helper'
12
+
13
+ module Ai4r
14
+
15
+ module Classifiers
16
+
17
+ # = Introduction
18
+ # This is an implementation of the ID3 algorithm (Quinlan)
19
+ # Given a set of preclassified examples, it builds a top-down
20
+ # induction of decision tree, biased by the information gain and
21
+ # entropy measure.
22
+ #
23
+ # * http://en.wikipedia.org/wiki/Decision_tree
24
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
25
+ #
26
+ # = How to use it
27
+ #
28
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
29
+ #
30
+ # DATA_SET = [ ['New York', '<30', 'M', 'Y'],
31
+ # ['Chicago', '<30', 'M', 'Y'],
32
+ # ['Chicago', '<30', 'F', 'Y'],
33
+ # ['New York', '<30', 'M', 'Y'],
34
+ # ['New York', '<30', 'M', 'Y'],
35
+ # ['Chicago', '[30-50)', 'M', 'Y'],
36
+ # ['New York', '[30-50)', 'F', 'N'],
37
+ # ['Chicago', '[30-50)', 'F', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[50-80]', 'M', 'N'],
40
+ # ['New York', '[50-80]', 'F', 'N'],
41
+ # ['New York', '[50-80]', 'M', 'N'],
42
+ # ['Chicago', '[50-80]', 'M', 'N'],
43
+ # ['New York', '[50-80]', 'F', 'N'],
44
+ # ['Chicago', '>80', 'F', 'Y']
45
+ # ]
46
+ #
47
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
48
+ #
49
+ # id3.to_s
50
+ # # => if age_range=='<30' then marketing_target='Y'
51
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
52
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
53
+ # elsif age_range=='[50-80]' then marketing_target='N'
54
+ # elsif age_range=='>80' then marketing_target='Y'
55
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
56
+ #
57
+ # id3.eval(['New York', '<30', 'M'])
58
+ # # => 'Y'
59
+ #
60
+ # = A better way to load the data
61
+ #
62
+ # In the real life you will use lot more data training examples, with more
63
+ # attributes. Consider moving your data to an external CSV (comma separate
64
+ # values) file.
65
+ #
66
+ # data_set = []
67
+ # CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
68
+ # data_set << row
69
+ # end
70
+ # data_labels = data_set.shift
71
+ #
72
+ # id3 = DecisionTree::ID3.new(data_set, data_labels)
73
+ #
74
+ # = A nice tip for data evaluation
75
+ #
76
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
77
+ # age_range = '<30'
78
+ # marketing_target = nil
79
+ # eval id3.to_s
80
+ # puts marketing_target
81
+ # # => 'Y'
82
+ # = More about ID3 and decision trees
83
+ #
84
+ # * http://en.wikipedia.org/wiki/Decision_tree
85
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
86
+ #
87
+ # = About the project
88
+ # Author:: Sergio Fierens
89
+ # License:: MPL 1.1
90
+
91
+ class ID3
92
+
93
+ attr_reader :data_labels
94
+ include ClassifierHelper
95
+
96
+ # Create a new decision tree. If your data is classified with N attributed
97
+ # and M examples, then your data examples must have the following format:
98
+ #
99
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
100
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
101
+ # ...
102
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
103
+ # ]
104
+ #
105
+ # e.g.
106
+ # [ ['New York', '<30', 'M', 'Y'],
107
+ # ['Chicago', '<30', 'M', 'Y'],
108
+ # ['Chicago', '<30', 'F', 'Y'],
109
+ # ['New York', '<30', 'M', 'Y'],
110
+ # ['New York', '<30', 'M', 'Y'],
111
+ # ['Chicago', '[30-50)', 'M', 'Y'],
112
+ # ['New York', '[30-50)', 'F', 'N'],
113
+ # ['Chicago', '[30-50)', 'F', 'Y'],
114
+ # ['New York', '[30-50)', 'F', 'N'],
115
+ # ['Chicago', '[50-80]', 'M', 'N'],
116
+ # ['New York', '[50-80]', 'F', 'N'],
117
+ # ['New York', '[50-80]', 'M', 'N'],
118
+ # ['Chicago', '[50-80]', 'M', 'N'],
119
+ # ['New York', '[50-80]', 'F', 'N'],
120
+ # ['Chicago', '>80', 'F', 'Y']
121
+ # ]
122
+ #
123
+ # Data labels must have the following format:
124
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
125
+ #
126
+ # If you do not provide labels for you data, the following labels will
127
+ # be created by default:
128
+ # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
129
+ #
130
+ def build(data_examples, data_labels=nil)
131
+ check_data_examples(data_examples)
132
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
133
+ preprocess_data(data_examples)
134
+ return self
135
+ end
136
+
137
+ # You can evaluate new data, predicting its category.
138
+ # e.g.
139
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
140
+ def eval(data)
141
+ @tree.value(data) if @tree
142
+ end
143
+
144
+ # This method returns the generated rules in ruby code.
145
+ # e.g.
146
+ #
147
+ # id3.to_s
148
+ # # => if age_range=='<30' then marketing_target='Y'
149
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
150
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
151
+ # elsif age_range=='[50-80]' then marketing_target='N'
152
+ # elsif age_range=='>80' then marketing_target='Y'
153
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
154
+ #
155
+ # It is a nice way to inspect induction results, and also to execute them:
156
+ # age_range = '<30'
157
+ # marketing_target = nil
158
+ # eval id3.to_s
159
+ # puts marketing_target
160
+ # # => 'Y'
161
+ def to_s
162
+ rules = @tree.get_rules
163
+ rules = rules.collect do |rule|
164
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
165
+ end
166
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
167
+ end
168
+
169
+ private
170
+ def preprocess_data(data_examples)
171
+ @tree = build_node(data_examples)
172
+ end
173
+
174
+ private
175
+ def build_node(data_examples, flag_att = [])
176
+ return ErrorNode.new if data_examples.length == 0
177
+ domain = domain(data_examples)
178
+ return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
179
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
180
+ flag_att << min_entropy_index
181
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
182
+ return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
183
+ nodes = split_data_examples.collect do |partial_data_examples|
184
+ build_node(partial_data_examples, flag_att)
185
+ end
186
+ return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
187
+ end
188
+
189
+ private
190
+ def self.sum(values)
191
+ values.inject( 0 ) { |sum,x| sum+x }
192
+ end
193
+
194
+ private
195
+ def self.log2(z)
196
+ return 0.0 if z == 0
197
+ Math.log(z)/LOG2
198
+ end
199
+
200
+ private
201
+ def most_freq(examples, domain)
202
+ freqs = []
203
+ domain.last.length.times { freqs << 0}
204
+ examples.each do |example|
205
+ cat_index = domain.last.index(example.last)
206
+ freq = freqs[cat_index] + 1
207
+ freqs[cat_index] = freq
208
+ end
209
+ max_freq = freqs.max
210
+ max_freq_index = freqs.index(max_freq)
211
+ domain.last[max_freq_index]
212
+ end
213
+
214
+ private
215
+ def split_data_examples(data_examples, domain, att_index)
216
+ data_examples_array = []
217
+ att_value_examples = {}
218
+ data_examples.each do |example|
219
+ example_set = att_value_examples[example[att_index]]
220
+ example_set = [] if !example_set
221
+ example_set << example
222
+ att_value_examples.store(example[att_index], example_set)
223
+ end
224
+ att_value_examples.each_pair do |att_value, example_set|
225
+ att_value_index = domain[att_index].index(att_value)
226
+ data_examples_array[att_value_index] = example_set
227
+ end
228
+ return data_examples_array
229
+ end
230
+
231
+ private
232
+ def min_entropy_index(data_examples, domain, flag_att=[])
233
+ min_entropy = nil
234
+ min_index = 0
235
+ domain[0..-2].each_index do |index|
236
+ freq_grid = freq_grid(index, data_examples, domain)
237
+ entropy = entropy(freq_grid, data_examples.length)
238
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
239
+ min_entropy = entropy
240
+ min_index = index
241
+ end
242
+ end
243
+ return min_index
244
+ end
245
+
246
+ private
247
+ def domain(data_examples)
248
+ #return build_domains(data_examples)
249
+ domain = []
250
+ @data_labels.length.times { domain << [] }
251
+ data_examples.each do |data|
252
+ data.each_index do |i|
253
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
254
+ end
255
+ end
256
+ return domain
257
+ end
258
+
259
+ private
260
+ def freq_grid(att_index, data_examples, domain)
261
+ #Initialize empty grid
262
+ grid_element = []
263
+ domain.last.length.times { grid_element << 0}
264
+ grid = []
265
+ domain[att_index].length.times { grid << grid_element.clone }
266
+ #Fill frecuency with grid
267
+ data_examples.each do |example|
268
+ att_val = example[att_index]
269
+ att_val_index = domain[att_index].index(att_val)
270
+ category = example.last
271
+ category_index = domain.last.index(category)
272
+ freq = grid[att_val_index][category_index] + 1
273
+ grid[att_val_index][category_index] = freq
274
+ end
275
+ return grid
276
+ end
277
+
278
+ private
279
+ def entropy(freq_grid, total_examples)
280
+ #Calc entropy of each element
281
+ entropy = 0
282
+ freq_grid.each do |att_freq|
283
+ att_total_freq = ID3.sum(att_freq)
284
+ partial_entropy = 0
285
+ if att_total_freq != 0
286
+ att_freq.each do |freq|
287
+ prop = freq.to_f/att_total_freq
288
+ partial_entropy += (-1*prop*ID3.log2(prop))
289
+ end
290
+ end
291
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
292
+ end
293
+ return entropy
294
+ end
295
+
296
+ private
297
+ LOG2 = Math.log(2)
298
+ end
299
+
300
+ class EvaluationNode
301
+
302
+ attr_reader :index, :values, :nodes
303
+
304
+ def initialize(data_labels, index, values, nodes)
305
+ @index = index
306
+ @values = values
307
+ @nodes = nodes
308
+ @data_labels = data_labels
309
+ end
310
+
311
+ def value(data)
312
+ value = data[@index]
313
+ return rule_not_found if !@values.include?(value)
314
+ return nodes[@values.index(value)].value(data)
315
+ end
316
+
317
+ def get_rules
318
+ rule_set = []
319
+ @nodes.each_index do |child_node_index|
320
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
321
+ child_node = @nodes[child_node_index]
322
+ child_node_rules = child_node.get_rules
323
+ child_node_rules.each do |child_rule|
324
+ child_rule.unshift(my_rule)
325
+ end
326
+ rule_set += child_node_rules
327
+ end
328
+ return rule_set
329
+ end
330
+
331
+ end
332
+
333
+ class CategoryNode
334
+ def initialize(label, value)
335
+ @label = label
336
+ @value = value
337
+ end
338
+ def value(data)
339
+ return @value
340
+ end
341
+ def get_rules
342
+ return [["#{@label}='#{@value}'"]]
343
+ end
344
+ end
345
+
346
+ class ErrorNode
347
+ def value(data)
348
+ raise "There was not enough information during training to do a proper induction for this data element."
349
+ end
350
+ def get_rules
351
+ return []
352
+ end
353
+ end
354
+
355
+ end
356
+ end