ai4r 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. data/README.rdoc +21 -20
  2. data/examples/decision_trees/id3_example.rb +3 -2
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
  4. data/examples/neural_network/backpropagation_example.rb +2 -2
  5. data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
  6. data/lib/ai4r/classifiers/id3.rb +356 -0
  7. data/lib/ai4r/classifiers/one_r.rb +148 -0
  8. data/lib/ai4r/classifiers/prism.rb +231 -0
  9. data/lib/ai4r/classifiers/zero_r.rb +104 -0
  10. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
  11. data/lib/ai4r/neural_network/backpropagation.rb +271 -0
  12. data/site/build/tmp/locationmap.xml +14 -14
  13. data/site/build/tmp/output.xmap +23 -23
  14. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  15. data/site/build/tmp/plugins-1.xml +0 -11
  16. data/site/build/tmp/plugins-2.xml +54 -0
  17. data/site/build/tmp/projfilters.properties +41 -41
  18. data/site/build/webapp/WEB-INF/logs/core.log +681 -788
  19. data/site/build/webapp/WEB-INF/logs/error.log +281 -248
  20. data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
  21. data/site/src/documentation/content/xdocs/forum.html +9 -0
  22. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
  23. data/site/src/documentation/content/xdocs/index.xml +47 -18
  24. data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
  25. data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
  26. data/site/src/documentation/content/xdocs/site.xml +8 -5
  27. data/site/src/documentation/content/xdocs/svn.xml +11 -1
  28. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  29. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  30. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  31. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  32. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  33. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
  34. data/site/src/documentation/skinconf.xml +18 -18
  35. data/test/classifiers/id3_test.rb +206 -0
  36. data/test/classifiers/one_r_test.rb +62 -0
  37. data/test/classifiers/prism_test.rb +83 -0
  38. data/test/classifiers/zero_r_test.rb +48 -0
  39. data/test/genetic_algorithm/chromosome_test.rb +41 -38
  40. data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
  41. data/test/neural_network/backpropagation_test.rb +20 -18
  42. metadata +109 -199
  43. data/lib/decision_tree/id3.rb +0 -354
  44. data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
  45. data/lib/neural_network/backpropagation.rb +0 -264
  46. data/site/build/site/en/broken-links.xml +0 -2
  47. data/site/build/site/en/downloads.html +0 -187
  48. data/site/build/site/en/downloads.pdf +0 -151
  49. data/site/build/site/en/geneticAlgorithms.html +0 -564
  50. data/site/build/site/en/geneticAlgorithms.pdf +0 -911
  51. data/site/build/site/en/images/ai4r-logo.png +0 -0
  52. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  53. data/site/build/site/en/images/c.png +0 -0
  54. data/site/build/site/en/images/c_wbn.png +0 -0
  55. data/site/build/site/en/images/c_wn.png +0 -0
  56. data/site/build/site/en/images/ero.gif +0 -0
  57. data/site/build/site/en/images/europe2.png +0 -0
  58. data/site/build/site/en/images/europe3.png +0 -0
  59. data/site/build/site/en/images/fitness.png +0 -0
  60. data/site/build/site/en/images/instruction_arrow.png +0 -0
  61. data/site/build/site/en/images/my_email.png +0 -0
  62. data/site/build/site/en/images/rubyforge.png +0 -0
  63. data/site/build/site/en/images/s.png +0 -0
  64. data/site/build/site/en/images/s_wbn.png +0 -0
  65. data/site/build/site/en/images/s_wn.png +0 -0
  66. data/site/build/site/en/images/sigmoid.png +0 -0
  67. data/site/build/site/en/images/t.png +0 -0
  68. data/site/build/site/en/images/t_wbn.png +0 -0
  69. data/site/build/site/en/images/t_wn.png +0 -0
  70. data/site/build/site/en/index.html +0 -258
  71. data/site/build/site/en/index.pdf +0 -306
  72. data/site/build/site/en/linkmap.html +0 -231
  73. data/site/build/site/en/linkmap.pdf +0 -94
  74. data/site/build/site/en/locationmap.xml +0 -72
  75. data/site/build/site/en/machineLearning.html +0 -325
  76. data/site/build/site/en/machineLearning.pdf +0 -337
  77. data/site/build/site/en/neuralNetworks.html +0 -446
  78. data/site/build/site/en/neuralNetworks.pdf +0 -604
  79. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  80. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  81. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  82. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  83. data/site/build/site/en/skin/basic.css +0 -166
  84. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  85. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  86. data/site/build/site/en/skin/fontsize.js +0 -166
  87. data/site/build/site/en/skin/getBlank.js +0 -40
  88. data/site/build/site/en/skin/getMenu.js +0 -45
  89. data/site/build/site/en/skin/images/README.txt +0 -1
  90. data/site/build/site/en/skin/images/add.jpg +0 -0
  91. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  92. data/site/build/site/en/skin/images/chapter.gif +0 -0
  93. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  94. data/site/build/site/en/skin/images/current.gif +0 -0
  95. data/site/build/site/en/skin/images/error.png +0 -0
  96. data/site/build/site/en/skin/images/external-link.gif +0 -0
  97. data/site/build/site/en/skin/images/fix.jpg +0 -0
  98. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  99. data/site/build/site/en/skin/images/hack.jpg +0 -0
  100. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  101. data/site/build/site/en/skin/images/info.png +0 -0
  102. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  103. data/site/build/site/en/skin/images/label.gif +0 -0
  104. data/site/build/site/en/skin/images/page.gif +0 -0
  105. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  106. data/site/build/site/en/skin/images/poddoc.png +0 -0
  107. data/site/build/site/en/skin/images/printer.gif +0 -0
  108. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  109. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  110. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  111. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  112. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  113. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  114. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  115. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  116. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  117. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  118. data/site/build/site/en/skin/images/remove.jpg +0 -0
  119. data/site/build/site/en/skin/images/rss.png +0 -0
  120. data/site/build/site/en/skin/images/spacer.gif +0 -0
  121. data/site/build/site/en/skin/images/success.png +0 -0
  122. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  123. data/site/build/site/en/skin/images/update.jpg +0 -0
  124. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  125. data/site/build/site/en/skin/images/vcss.png +0 -0
  126. data/site/build/site/en/skin/images/warning.png +0 -0
  127. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  128. data/site/build/site/en/skin/menu.js +0 -48
  129. data/site/build/site/en/skin/note.txt +0 -50
  130. data/site/build/site/en/skin/print.css +0 -54
  131. data/site/build/site/en/skin/profile.css +0 -163
  132. data/site/build/site/en/skin/prototype.js +0 -1257
  133. data/site/build/site/en/skin/screen.css +0 -587
  134. data/site/build/site/en/svn.html +0 -223
  135. data/site/build/site/en/svn.pdf +0 -239
  136. data/site/build/site/en/wholesite.pdf +0 -1686
  137. data/site/build/tmp/brokenlinks.xml +0 -2
  138. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  139. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  140. data/test/decision_tree/id3_test.rb +0 -209
@@ -1,40 +1,41 @@
1
1
  = Introduction
2
2
 
3
- This project aims to produce ruby implementations of
4
- algorithms covering several Artificial intelligence fields, including:
3
+ AI4R is a collection of ruby algorithms implementations, covering several Artificial intelligence fields,
4
+ and simple practical examples using them. It implements:
5
5
 
6
- * Machine Learning (DecisionTree::ID3)
7
-
8
- Decision Trees using an implementation of ID3 algorithm.
9
-
10
- * Genetic algorithms (GeneticAlgorithm::GeneticSearch)
11
-
12
- Implementation of GeneticSearch and Chromosome classes. The GeneticSearch is a generic class, and can be used to solved any kind of problems. The GeneticSearch class performs a stochastic search of the solution of a given problem.
13
-
14
- * Neural network (NeuralNetwork::Backpropagation)
15
-
16
- Implementation of neural networks using the Backpropagation supervised learning technique.
6
+ * Genetic algorithms (AI4R::GeneticAlgorithm::GeneticSearch)
7
+
8
+ * Neural networks (AI4R::NeuralNetwork::Backpropagation)
9
+
10
+ * ID3 Decision Trees (AI4R::Classifiers::ID3)
17
11
 
18
- * Bayesian networks
12
+ * PRISM (J. Cendrowska, 1987) (AI4R::Classifiers::Prism)
13
+
14
+ * OneR (AKA One Attribute Rule, 1R) (AI4R::Classifiers::OneR)
19
15
 
20
- TODO
16
+ * ZeroR (AI4R::Classifiers::ZeroR)
21
17
 
22
18
  = Where can I find the lastest code and info on this project?
23
19
 
24
20
  http://ai4r.rubyforge.org
25
21
 
22
+ http://ai4r.jadeferret.com
23
+
26
24
  = How to install
27
25
 
28
26
  1. Install the gem:
29
27
 
30
- gem install http://rubyforge.org/frs/download.php/32923/ai4r-1.0.gem
28
+ gem install ai4r
31
29
 
32
30
  2. Include require statements in your code:
33
31
 
34
- require "rubygems"
35
- require "decision_tree/id3"
36
- require "neural_network/backpropagation"
37
- require "genetic_algorithm/genetic_algorithm"
32
+ require "rubygems"
33
+ require "ai4r/classifiers/id3"en
34
+ require "ai4r/classifiers/prism"
35
+ require "ai4r/classifiers/one_r"
36
+ require "ai4r/classifiers/zero_r"
37
+ require "ai4r/neural_network/backpropagation"
38
+ require "ai4r/genetic_algorithm/genetic_algorithm"
38
39
 
39
40
  = Feedback
40
41
 
@@ -7,7 +7,8 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
10
+ #require File.dirname(__FILE__) + '/../../lib/decision_tree/id3'
11
+ require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
11
12
  require 'csv'
12
13
 
13
14
  # Load data from data_set.csv
@@ -18,7 +19,7 @@ end
18
19
  data_labels = data_set.shift
19
20
 
20
21
  # Build ID3 tree
21
- id3 = DecisionTree::ID3.new(data_set, data_labels)
22
+ id3 = Ai4r::Classifiers::ID3.new.build(data_set, data_labels)
22
23
 
23
24
  # Show rules
24
25
  puts "Discovered rules are:"
@@ -7,7 +7,7 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- require File.dirname(__FILE__) + '/../../lib/genetic_algorithm/genetic_algorithm'
10
+ require File.dirname(__FILE__) + '/../../lib/ai4r/genetic_algorithm/genetic_algorithm'
11
11
  require 'csv'
12
12
 
13
13
  # Load data from data_set.csv
@@ -20,18 +20,18 @@ data_set.collect! do |column|
20
20
  column.collect { |element| element.to_f}
21
21
  end
22
22
 
23
- GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
23
+ Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
24
24
 
25
25
  puts "Some random selected tours costs: "
26
26
  3.times do
27
- c = GeneticAlgorithm::Chromosome.seed
28
- puts "COST #{c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
27
+ c = Ai4r::GeneticAlgorithm::Chromosome.seed
28
+ puts "COST #{-1 * c.fitness} TOUR: #{c.data.collect{ |c| data_labels[c]} * ', '}"
29
29
  end
30
30
 
31
31
  puts "Beginning genetic search, please wait... "
32
- search = GeneticAlgorithm::GeneticSearch.new(800, 100)
32
+ search = Ai4r::GeneticAlgorithm::GeneticSearch.new(800, 100)
33
33
  result = search.run
34
- puts "BEST COST FOUND #{result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
34
+ puts "BEST COST FOUND #{-1 * result.fitness} TOUR: #{result.data.collect{ |c| data_labels[c]} * ', '}"
35
35
 
36
36
  # $7611.99 TOUR: Moscow, Kiev, Warsaw, Hamburg, Berlin, Vienna, Munich, Milan, Rome, Barcelona, Madrid, Paris, Brussels, London, Dublin
37
37
  # $7659.81 TOUR: Moscow, Kiev, Warsaw, Vienna, Munich, Berlin, Hamburg, Brussels, Dublin, London, Paris, Milan, Rome, Barcelona, Madrid
@@ -10,12 +10,12 @@
10
10
  require File.dirname(__FILE__) + '/training_patterns'
11
11
  require File.dirname(__FILE__) + '/patterns_with_noise'
12
12
  require File.dirname(__FILE__) + '/patterns_with_base_noise'
13
- require File.dirname(__FILE__) + '/../../lib/neural_network/backpropagation'
13
+ require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
14
14
  require 'benchmark'
15
15
 
16
16
  times = Benchmark.measure do
17
17
 
18
- net = NeuralNetwork::Backpropagation.new([256, 3])
18
+ net = Ai4r::NeuralNetwork::Backpropagation.new([256, 3])
19
19
 
20
20
  tr_input = TRIANGLE.flatten.collect { |input| input.to_f / 10}
21
21
  sq_input = SQUARE.flatten.collect { |input| input.to_f / 10}
@@ -0,0 +1,54 @@
1
+ require 'set'
2
+
3
+ module Ai4r
4
+
5
+ module Classifiers
6
+
7
+ NUMERIC_CLASS_TYPE = 1
8
+ NOMINAL_CLASS_TYPE = 2
9
+
10
+ module ClassifierHelper
11
+
12
+ def default_data_labels(data_examples)
13
+ data_labels = []
14
+ data_examples[0][0..-2].each_index do |i|
15
+ data_labels[i] = "attribute_#{i+1}"
16
+ end
17
+ data_labels[data_labels.length]="class_value"
18
+ return data_labels
19
+ end
20
+
21
+ def check_data_examples(data_examples)
22
+ if !data_examples || data_examples.empty?
23
+ raise ArgumentError,"Examples data set must not be empty."
24
+ elsif !data_examples.first.is_a?(Array)
25
+ raise ArgumentError,"Unkown format for example data."
26
+ end
27
+ end
28
+
29
+ # Returns attributes number, including class attribute
30
+ def num_attributes(data_examples)
31
+ return 0 if !data_examples || data_examples.empty? || !data_examples.first.is_a?(Array)
32
+ return data_examples.first.size
33
+ end
34
+
35
+ # Returns an array with the domain of each attribute (Set instance
36
+ # containing all possible values)
37
+ # Return example:
38
+ # => [#<Set: {"New York", "Chicago"}>,
39
+ # #<Set: {"<30", "[30-50)", "[50-80]", ">80"}>,
40
+ # #<Set: {"M", "F"}>,
41
+ # #<Set: {"Y", "N"}>]
42
+ def build_domains(data_examples)
43
+ domains = Array.new(num_attributes(data_examples)) { Set.new }
44
+ data_examples.each do |data|
45
+ data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
46
+ end
47
+ return domains
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+
54
+ end
@@ -0,0 +1,356 @@
1
+ # Author:: Sergio Fierens (Implementation, Quinlan is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+
11
+ require File.dirname(__FILE__) + '/classifier_helper'
12
+
13
+ module Ai4r
14
+
15
+ module Classifiers
16
+
17
+ # = Introduction
18
+ # This is an implementation of the ID3 algorithm (Quinlan)
19
+ # Given a set of preclassified examples, it builds a top-down
20
+ # induction of decision tree, biased by the information gain and
21
+ # entropy measure.
22
+ #
23
+ # * http://en.wikipedia.org/wiki/Decision_tree
24
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
25
+ #
26
+ # = How to use it
27
+ #
28
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
29
+ #
30
+ # DATA_SET = [ ['New York', '<30', 'M', 'Y'],
31
+ # ['Chicago', '<30', 'M', 'Y'],
32
+ # ['Chicago', '<30', 'F', 'Y'],
33
+ # ['New York', '<30', 'M', 'Y'],
34
+ # ['New York', '<30', 'M', 'Y'],
35
+ # ['Chicago', '[30-50)', 'M', 'Y'],
36
+ # ['New York', '[30-50)', 'F', 'N'],
37
+ # ['Chicago', '[30-50)', 'F', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[50-80]', 'M', 'N'],
40
+ # ['New York', '[50-80]', 'F', 'N'],
41
+ # ['New York', '[50-80]', 'M', 'N'],
42
+ # ['Chicago', '[50-80]', 'M', 'N'],
43
+ # ['New York', '[50-80]', 'F', 'N'],
44
+ # ['Chicago', '>80', 'F', 'Y']
45
+ # ]
46
+ #
47
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
48
+ #
49
+ # id3.to_s
50
+ # # => if age_range=='<30' then marketing_target='Y'
51
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
52
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
53
+ # elsif age_range=='[50-80]' then marketing_target='N'
54
+ # elsif age_range=='>80' then marketing_target='Y'
55
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
56
+ #
57
+ # id3.eval(['New York', '<30', 'M'])
58
+ # # => 'Y'
59
+ #
60
+ # = A better way to load the data
61
+ #
62
+ # In the real life you will use lot more data training examples, with more
63
+ # attributes. Consider moving your data to an external CSV (comma separate
64
+ # values) file.
65
+ #
66
+ # data_set = []
67
+ # CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
68
+ # data_set << row
69
+ # end
70
+ # data_labels = data_set.shift
71
+ #
72
+ # id3 = DecisionTree::ID3.new(data_set, data_labels)
73
+ #
74
+ # = A nice tip for data evaluation
75
+ #
76
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
77
+ # age_range = '<30'
78
+ # marketing_target = nil
79
+ # eval id3.to_s
80
+ # puts marketing_target
81
+ # # => 'Y'
82
+ # = More about ID3 and decision trees
83
+ #
84
+ # * http://en.wikipedia.org/wiki/Decision_tree
85
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
86
+ #
87
+ # = About the project
88
+ # Author:: Sergio Fierens
89
+ # License:: MPL 1.1
90
+
91
+ class ID3
92
+
93
+ attr_reader :data_labels
94
+ include ClassifierHelper
95
+
96
+ # Create a new decision tree. If your data is classified with N attributed
97
+ # and M examples, then your data examples must have the following format:
98
+ #
99
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
100
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
101
+ # ...
102
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
103
+ # ]
104
+ #
105
+ # e.g.
106
+ # [ ['New York', '<30', 'M', 'Y'],
107
+ # ['Chicago', '<30', 'M', 'Y'],
108
+ # ['Chicago', '<30', 'F', 'Y'],
109
+ # ['New York', '<30', 'M', 'Y'],
110
+ # ['New York', '<30', 'M', 'Y'],
111
+ # ['Chicago', '[30-50)', 'M', 'Y'],
112
+ # ['New York', '[30-50)', 'F', 'N'],
113
+ # ['Chicago', '[30-50)', 'F', 'Y'],
114
+ # ['New York', '[30-50)', 'F', 'N'],
115
+ # ['Chicago', '[50-80]', 'M', 'N'],
116
+ # ['New York', '[50-80]', 'F', 'N'],
117
+ # ['New York', '[50-80]', 'M', 'N'],
118
+ # ['Chicago', '[50-80]', 'M', 'N'],
119
+ # ['New York', '[50-80]', 'F', 'N'],
120
+ # ['Chicago', '>80', 'F', 'Y']
121
+ # ]
122
+ #
123
+ # Data labels must have the following format:
124
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
125
+ #
126
+ # If you do not provide labels for you data, the following labels will
127
+ # be created by default:
128
+ # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
129
+ #
130
+ def build(data_examples, data_labels=nil)
131
+ check_data_examples(data_examples)
132
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
133
+ preprocess_data(data_examples)
134
+ return self
135
+ end
136
+
137
+ # You can evaluate new data, predicting its category.
138
+ # e.g.
139
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
140
+ def eval(data)
141
+ @tree.value(data) if @tree
142
+ end
143
+
144
+ # This method returns the generated rules in ruby code.
145
+ # e.g.
146
+ #
147
+ # id3.to_s
148
+ # # => if age_range=='<30' then marketing_target='Y'
149
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
150
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
151
+ # elsif age_range=='[50-80]' then marketing_target='N'
152
+ # elsif age_range=='>80' then marketing_target='Y'
153
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
154
+ #
155
+ # It is a nice way to inspect induction results, and also to execute them:
156
+ # age_range = '<30'
157
+ # marketing_target = nil
158
+ # eval id3.to_s
159
+ # puts marketing_target
160
+ # # => 'Y'
161
+ def to_s
162
+ rules = @tree.get_rules
163
+ rules = rules.collect do |rule|
164
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
165
+ end
166
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
167
+ end
168
+
169
+ private
170
+ def preprocess_data(data_examples)
171
+ @tree = build_node(data_examples)
172
+ end
173
+
174
+ private
175
+ def build_node(data_examples, flag_att = [])
176
+ return ErrorNode.new if data_examples.length == 0
177
+ domain = domain(data_examples)
178
+ return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
179
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
180
+ flag_att << min_entropy_index
181
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
182
+ return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
183
+ nodes = split_data_examples.collect do |partial_data_examples|
184
+ build_node(partial_data_examples, flag_att)
185
+ end
186
+ return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
187
+ end
188
+
189
+ private
190
+ def self.sum(values)
191
+ values.inject( 0 ) { |sum,x| sum+x }
192
+ end
193
+
194
+ private
195
+ def self.log2(z)
196
+ return 0.0 if z == 0
197
+ Math.log(z)/LOG2
198
+ end
199
+
200
+ private
201
+ def most_freq(examples, domain)
202
+ freqs = []
203
+ domain.last.length.times { freqs << 0}
204
+ examples.each do |example|
205
+ cat_index = domain.last.index(example.last)
206
+ freq = freqs[cat_index] + 1
207
+ freqs[cat_index] = freq
208
+ end
209
+ max_freq = freqs.max
210
+ max_freq_index = freqs.index(max_freq)
211
+ domain.last[max_freq_index]
212
+ end
213
+
214
+ private
215
+ def split_data_examples(data_examples, domain, att_index)
216
+ data_examples_array = []
217
+ att_value_examples = {}
218
+ data_examples.each do |example|
219
+ example_set = att_value_examples[example[att_index]]
220
+ example_set = [] if !example_set
221
+ example_set << example
222
+ att_value_examples.store(example[att_index], example_set)
223
+ end
224
+ att_value_examples.each_pair do |att_value, example_set|
225
+ att_value_index = domain[att_index].index(att_value)
226
+ data_examples_array[att_value_index] = example_set
227
+ end
228
+ return data_examples_array
229
+ end
230
+
231
+ private
232
+ def min_entropy_index(data_examples, domain, flag_att=[])
233
+ min_entropy = nil
234
+ min_index = 0
235
+ domain[0..-2].each_index do |index|
236
+ freq_grid = freq_grid(index, data_examples, domain)
237
+ entropy = entropy(freq_grid, data_examples.length)
238
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
239
+ min_entropy = entropy
240
+ min_index = index
241
+ end
242
+ end
243
+ return min_index
244
+ end
245
+
246
+ private
247
+ def domain(data_examples)
248
+ #return build_domains(data_examples)
249
+ domain = []
250
+ @data_labels.length.times { domain << [] }
251
+ data_examples.each do |data|
252
+ data.each_index do |i|
253
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
254
+ end
255
+ end
256
+ return domain
257
+ end
258
+
259
+ private
260
+ def freq_grid(att_index, data_examples, domain)
261
+ #Initialize empty grid
262
+ grid_element = []
263
+ domain.last.length.times { grid_element << 0}
264
+ grid = []
265
+ domain[att_index].length.times { grid << grid_element.clone }
266
+ #Fill frecuency with grid
267
+ data_examples.each do |example|
268
+ att_val = example[att_index]
269
+ att_val_index = domain[att_index].index(att_val)
270
+ category = example.last
271
+ category_index = domain.last.index(category)
272
+ freq = grid[att_val_index][category_index] + 1
273
+ grid[att_val_index][category_index] = freq
274
+ end
275
+ return grid
276
+ end
277
+
278
+ private
279
+ def entropy(freq_grid, total_examples)
280
+ #Calc entropy of each element
281
+ entropy = 0
282
+ freq_grid.each do |att_freq|
283
+ att_total_freq = ID3.sum(att_freq)
284
+ partial_entropy = 0
285
+ if att_total_freq != 0
286
+ att_freq.each do |freq|
287
+ prop = freq.to_f/att_total_freq
288
+ partial_entropy += (-1*prop*ID3.log2(prop))
289
+ end
290
+ end
291
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
292
+ end
293
+ return entropy
294
+ end
295
+
296
+ private
297
+ LOG2 = Math.log(2)
298
+ end
299
+
300
+ class EvaluationNode
301
+
302
+ attr_reader :index, :values, :nodes
303
+
304
+ def initialize(data_labels, index, values, nodes)
305
+ @index = index
306
+ @values = values
307
+ @nodes = nodes
308
+ @data_labels = data_labels
309
+ end
310
+
311
+ def value(data)
312
+ value = data[@index]
313
+ return rule_not_found if !@values.include?(value)
314
+ return nodes[@values.index(value)].value(data)
315
+ end
316
+
317
+ def get_rules
318
+ rule_set = []
319
+ @nodes.each_index do |child_node_index|
320
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
321
+ child_node = @nodes[child_node_index]
322
+ child_node_rules = child_node.get_rules
323
+ child_node_rules.each do |child_rule|
324
+ child_rule.unshift(my_rule)
325
+ end
326
+ rule_set += child_node_rules
327
+ end
328
+ return rule_set
329
+ end
330
+
331
+ end
332
+
333
+ class CategoryNode
334
+ def initialize(label, value)
335
+ @label = label
336
+ @value = value
337
+ end
338
+ def value(data)
339
+ return @value
340
+ end
341
+ def get_rules
342
+ return [["#{@label}='#{@value}'"]]
343
+ end
344
+ end
345
+
346
+ class ErrorNode
347
+ def value(data)
348
+ raise "There was not enough information during training to do a proper induction for this data element."
349
+ end
350
+ def get_rules
351
+ return []
352
+ end
353
+ end
354
+
355
+ end
356
+ end