nirvdrum-ai4r 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. data/.gitignore +1 -0
  2. data/.rakeTasks +7 -0
  3. data/README.rdoc +56 -0
  4. data/Rakefile.rb +42 -0
  5. data/VERSION +1 -0
  6. data/ai4r.gemspec +221 -0
  7. data/change_log +49 -0
  8. data/examples/classifiers/id3_data.csv +121 -0
  9. data/examples/classifiers/id3_example.rb +29 -0
  10. data/examples/classifiers/naive_bayes_data.csv +11 -0
  11. data/examples/classifiers/naive_bayes_example.rb +16 -0
  12. data/examples/classifiers/results.txt +31 -0
  13. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  14. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  15. data/examples/neural_network/backpropagation_example.rb +67 -0
  16. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  17. data/examples/neural_network/patterns_with_noise.rb +66 -0
  18. data/examples/neural_network/training_patterns.rb +68 -0
  19. data/examples/neural_network/xor_example.rb +35 -0
  20. data/examples/som/som_data.rb +156 -0
  21. data/examples/som/som_multi_node_example.rb +22 -0
  22. data/examples/som/som_single_example.rb +24 -0
  23. data/lib/ai4r.rb +32 -0
  24. data/lib/ai4r/classifiers/classifier.rb +59 -0
  25. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  26. data/lib/ai4r/classifiers/id3.rb +326 -0
  27. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  28. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  29. data/lib/ai4r/classifiers/one_r.rb +110 -0
  30. data/lib/ai4r/classifiers/prism.rb +197 -0
  31. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  32. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  33. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  34. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  35. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  36. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  37. data/lib/ai4r/clusterers/diana.rb +139 -0
  38. data/lib/ai4r/clusterers/k_means.rb +126 -0
  39. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  40. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  41. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  42. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  43. data/lib/ai4r/data/data_set.rb +266 -0
  44. data/lib/ai4r/data/parameterizable.rb +64 -0
  45. data/lib/ai4r/data/proximity.rb +100 -0
  46. data/lib/ai4r/data/statistics.rb +77 -0
  47. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  48. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  49. data/lib/ai4r/neural_network/backpropagation.rb +293 -0
  50. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  51. data/lib/ai4r/som/layer.rb +68 -0
  52. data/lib/ai4r/som/node.rb +96 -0
  53. data/lib/ai4r/som/som.rb +155 -0
  54. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  55. data/site/forrest.properties +152 -0
  56. data/site/forrest.properties.dispatcher.properties +25 -0
  57. data/site/forrest.properties.xml +29 -0
  58. data/site/src/documentation/README.txt +7 -0
  59. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  60. data/site/src/documentation/content/locationmap.xml +72 -0
  61. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  62. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
  63. data/site/src/documentation/content/xdocs/index.xml +155 -0
  64. data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
  65. data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
  66. data/site/src/documentation/content/xdocs/site.xml +54 -0
  67. data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
  68. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  69. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  70. data/site/src/documentation/resources/images/c.png +0 -0
  71. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  72. data/site/src/documentation/resources/images/c_wn.png +0 -0
  73. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  74. data/site/src/documentation/resources/images/ero.gif +0 -0
  75. data/site/src/documentation/resources/images/europe2.png +0 -0
  76. data/site/src/documentation/resources/images/europe3.png +0 -0
  77. data/site/src/documentation/resources/images/fitness.png +0 -0
  78. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  79. data/site/src/documentation/resources/images/icon-a.png +0 -0
  80. data/site/src/documentation/resources/images/icon-b.png +0 -0
  81. data/site/src/documentation/resources/images/icon.png +0 -0
  82. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  83. data/site/src/documentation/resources/images/my_email.png +0 -0
  84. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  85. data/site/src/documentation/resources/images/project-logo.png +0 -0
  86. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  87. data/site/src/documentation/resources/images/s.png +0 -0
  88. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  89. data/site/src/documentation/resources/images/s_wn.png +0 -0
  90. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  91. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  92. data/site/src/documentation/resources/images/t.png +0 -0
  93. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  94. data/site/src/documentation/resources/images/t_wn.png +0 -0
  95. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  96. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  97. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  98. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  99. data/site/src/documentation/sitemap.xmap +66 -0
  100. data/site/src/documentation/skinconf.xml +418 -0
  101. data/site/src/documentation/translations/langcode.xml +29 -0
  102. data/site/src/documentation/translations/languages_de.xml +24 -0
  103. data/site/src/documentation/translations/languages_en.xml +24 -0
  104. data/site/src/documentation/translations/languages_es.xml +22 -0
  105. data/site/src/documentation/translations/languages_fr.xml +24 -0
  106. data/site/src/documentation/translations/languages_nl.xml +24 -0
  107. data/site/src/documentation/translations/menu.xml +33 -0
  108. data/site/src/documentation/translations/menu_af.xml +33 -0
  109. data/site/src/documentation/translations/menu_de.xml +33 -0
  110. data/site/src/documentation/translations/menu_es.xml +33 -0
  111. data/site/src/documentation/translations/menu_fr.xml +33 -0
  112. data/site/src/documentation/translations/menu_it.xml +33 -0
  113. data/site/src/documentation/translations/menu_nl.xml +33 -0
  114. data/site/src/documentation/translations/menu_no.xml +33 -0
  115. data/site/src/documentation/translations/menu_ru.xml +33 -0
  116. data/site/src/documentation/translations/menu_sk.xml +33 -0
  117. data/site/src/documentation/translations/tabs.xml +22 -0
  118. data/site/src/documentation/translations/tabs_de.xml +22 -0
  119. data/site/src/documentation/translations/tabs_es.xml +22 -0
  120. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  121. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  122. data/test/classifiers/hyperpipes_test.rb +84 -0
  123. data/test/classifiers/id3_test.rb +208 -0
  124. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  125. data/test/classifiers/naive_bayes_test.rb +43 -0
  126. data/test/classifiers/one_r_test.rb +62 -0
  127. data/test/classifiers/prism_test.rb +85 -0
  128. data/test/classifiers/zero_r_test.rb +50 -0
  129. data/test/clusterers/average_linkage_test.rb +51 -0
  130. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  131. data/test/clusterers/centroid_linkage_test.rb +53 -0
  132. data/test/clusterers/complete_linkage_test.rb +57 -0
  133. data/test/clusterers/diana_test.rb +69 -0
  134. data/test/clusterers/k_means_test.rb +100 -0
  135. data/test/clusterers/median_linkage_test.rb +53 -0
  136. data/test/clusterers/single_linkage_test.rb +122 -0
  137. data/test/clusterers/ward_linkage_test.rb +53 -0
  138. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  139. data/test/data/data_set.csv +121 -0
  140. data/test/data/data_set_test.rb +96 -0
  141. data/test/data/proximity_test.rb +81 -0
  142. data/test/data/statistics_data_set.csv +5 -0
  143. data/test/data/statistics_test.rb +65 -0
  144. data/test/experiment/classifier_evaluator_test.rb +76 -0
  145. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  146. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  147. data/test/neural_network/backpropagation_test.rb +69 -0
  148. data/test/neural_network/hopfield_test.rb +72 -0
  149. data/test/som/som_test.rb +97 -0
  150. metadata +238 -0
@@ -0,0 +1,156 @@
1
+ # data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
2
+ # it is the full dataset, removing the last column
3
+ # website provides additional information on the dataset itself (attributes, class distribution, etc)
4
+
5
+ SOM_DATA = [
6
+ [5.1, 3.5, 1.4, 0.2],
7
+ [4.9, 3.0, 1.4, 0.2],
8
+ [4.7, 3.2, 1.3, 0.2],
9
+ [4.6, 3.1, 1.5, 0.2],
10
+ [5.0, 3.6, 1.4, 0.2],
11
+ [5.4, 3.9, 1.7, 0.4],
12
+ [4.6, 3.4, 1.4, 0.3],
13
+ [5.0, 3.4, 1.5, 0.2],
14
+ [4.4, 2.9, 1.4, 0.2],
15
+ [4.9, 3.1, 1.5, 0.1],
16
+ [5.4, 3.7, 1.5, 0.2],
17
+ [4.8, 3.4, 1.6, 0.2],
18
+ [4.8, 3.0, 1.4, 0.1],
19
+ [4.3, 3.0, 1.1, 0.1],
20
+ [5.8, 4.0, 1.2, 0.2],
21
+ [5.7, 4.4, 1.5, 0.4],
22
+ [5.4, 3.9, 1.3, 0.4],
23
+ [5.1, 3.5, 1.4, 0.3],
24
+ [5.7, 3.8, 1.7, 0.3],
25
+ [5.1, 3.8, 1.5, 0.3],
26
+ [5.4, 3.4, 1.7, 0.2],
27
+ [5.1, 3.7, 1.5, 0.4],
28
+ [4.6, 3.6, 1.0, 0.2],
29
+ [5.1, 3.3, 1.7, 0.5],
30
+ [4.8, 3.4, 1.9, 0.2],
31
+ [5.0, 3.0, 1.6, 0.2],
32
+ [5.0, 3.4, 1.6, 0.4],
33
+ [5.2, 3.5, 1.5, 0.2],
34
+ [5.2, 3.4, 1.4, 0.2],
35
+ [4.7, 3.2, 1.6, 0.2],
36
+ [4.8, 3.1, 1.6, 0.2],
37
+ [5.4, 3.4, 1.5, 0.4],
38
+ [5.2, 4.1, 1.5, 0.1],
39
+ [5.5, 4.2, 1.4, 0.2],
40
+ [4.9, 3.1, 1.5, 0.1],
41
+ [5.0, 3.2, 1.2, 0.2],
42
+ [5.5, 3.5, 1.3, 0.2],
43
+ [4.9, 3.1, 1.5, 0.1],
44
+ [4.4, 3.0, 1.3, 0.2],
45
+ [5.1, 3.4, 1.5, 0.2],
46
+ [5.0, 3.5, 1.3, 0.3],
47
+ [4.5, 2.3, 1.3, 0.3],
48
+ [4.4, 3.2, 1.3, 0.2],
49
+ [5.0, 3.5, 1.6, 0.6],
50
+ [5.1, 3.8, 1.9, 0.4],
51
+ [4.8, 3.0, 1.4, 0.3],
52
+ [5.1, 3.8, 1.6, 0.2],
53
+ [4.6, 3.2, 1.4, 0.2],
54
+ [5.3, 3.7, 1.5, 0.2],
55
+ [5.0, 3.3, 1.4, 0.2],
56
+ [7.0, 3.2, 4.7, 1.4],
57
+ [6.4, 3.2, 4.5, 1.5],
58
+ [6.9, 3.1, 4.9, 1.5],
59
+ [5.5, 2.3, 4.0, 1.3],
60
+ [6.5, 2.8, 4.6, 1.5],
61
+ [5.7, 2.8, 4.5, 1.3],
62
+ [6.3, 3.3, 4.7, 1.6],
63
+ [4.9, 2.4, 3.3, 1.0],
64
+ [6.6, 2.9, 4.6, 1.3],
65
+ [5.2, 2.7, 3.9, 1.4],
66
+ [5.0, 2.0, 3.5, 1.0],
67
+ [5.9, 3.0, 4.2, 1.5],
68
+ [6.0, 2.2, 4.0, 1.0],
69
+ [6.1, 2.9, 4.7, 1.4],
70
+ [5.6, 2.9, 3.6, 1.3],
71
+ [6.7, 3.1, 4.4, 1.4],
72
+ [5.6, 3.0, 4.5, 1.5],
73
+ [5.8, 2.7, 4.1, 1.0],
74
+ [6.2, 2.2, 4.5, 1.5],
75
+ [5.6, 2.5, 3.9, 1.1],
76
+ [5.9, 3.2, 4.8, 1.8],
77
+ [6.1, 2.8, 4.0, 1.3],
78
+ [6.3, 2.5, 4.9, 1.5],
79
+ [6.1, 2.8, 4.7, 1.2],
80
+ [6.4, 2.9, 4.3, 1.3],
81
+ [6.6, 3.0, 4.4, 1.4],
82
+ [6.8, 2.8, 4.8, 1.4],
83
+ [6.7, 3.0, 5.0, 1.7],
84
+ [6.0, 2.9, 4.5, 1.5],
85
+ [5.7, 2.6, 3.5, 1.0],
86
+ [5.5, 2.4, 3.8, 1.1],
87
+ [5.5, 2.4, 3.7, 1.0],
88
+ [5.8, 2.7, 3.9, 1.2],
89
+ [6.0, 2.7, 5.1, 1.6],
90
+ [5.4, 3.0, 4.5, 1.5],
91
+ [6.0, 3.4, 4.5, 1.6],
92
+ [6.7, 3.1, 4.7, 1.5],
93
+ [6.3, 2.3, 4.4, 1.3],
94
+ [5.6, 3.0, 4.1, 1.3],
95
+ [5.5, 2.5, 4.0, 1.3],
96
+ [5.5, 2.6, 4.4, 1.2],
97
+ [6.1, 3.0, 4.6, 1.4],
98
+ [5.8, 2.6, 4.0, 1.2],
99
+ [5.0, 2.3, 3.3, 1.0],
100
+ [5.6, 2.7, 4.2, 1.3],
101
+ [5.7, 3.0, 4.2, 1.2],
102
+ [5.7, 2.9, 4.2, 1.3],
103
+ [6.2, 2.9, 4.3, 1.3],
104
+ [5.1, 2.5, 3.0, 1.1],
105
+ [5.7, 2.8, 4.1, 1.3],
106
+ [6.3, 3.3, 6.0, 2.5],
107
+ [5.8, 2.7, 5.1, 1.9],
108
+ [7.1, 3.0, 5.9, 2.1],
109
+ [6.3, 2.9, 5.6, 1.8],
110
+ [6.5, 3.0, 5.8, 2.2],
111
+ [7.6, 3.0, 6.6, 2.1],
112
+ [4.9, 2.5, 4.5, 1.7],
113
+ [7.3, 2.9, 6.3, 1.8],
114
+ [6.7, 2.5, 5.8, 1.8],
115
+ [7.2, 3.6, 6.1, 2.5],
116
+ [6.5, 3.2, 5.1, 2.0],
117
+ [6.4, 2.7, 5.3, 1.9],
118
+ [6.8, 3.0, 5.5, 2.1],
119
+ [5.7, 2.5, 5.0, 2.0],
120
+ [5.8, 2.8, 5.1, 2.4],
121
+ [6.4, 3.2, 5.3, 2.3],
122
+ [6.5, 3.0, 5.5, 1.8],
123
+ [7.7, 3.8, 6.7, 2.2],
124
+ [7.7, 2.6, 6.9, 2.3],
125
+ [6.0, 2.2, 5.0, 1.5],
126
+ [6.9, 3.2, 5.7, 2.3],
127
+ [5.6, 2.8, 4.9, 2.0],
128
+ [7.7, 2.8, 6.7, 2.0],
129
+ [6.3, 2.7, 4.9, 1.8],
130
+ [6.7, 3.3, 5.7, 2.1],
131
+ [7.2, 3.2, 6.0, 1.8],
132
+ [6.2, 2.8, 4.8, 1.8],
133
+ [6.1, 3.0, 4.9, 1.8],
134
+ [6.4, 2.8, 5.6, 2.1],
135
+ [7.2, 3.0, 5.8, 1.6],
136
+ [7.4, 2.8, 6.1, 1.9],
137
+ [7.9, 3.8, 6.4, 2.0],
138
+ [6.4, 2.8, 5.6, 2.2],
139
+ [6.3, 2.8, 5.1, 1.5],
140
+ [6.1, 2.6, 5.6, 1.4],
141
+ [7.7, 3.0, 6.1, 2.3],
142
+ [6.3, 3.4, 5.6, 2.4],
143
+ [6.4, 3.1, 5.5, 1.8],
144
+ [6.0, 3.0, 4.8, 1.8],
145
+ [6.9, 3.1, 5.4, 2.1],
146
+ [6.7, 3.1, 5.6, 2.4],
147
+ [6.9, 3.1, 5.1, 2.3],
148
+ [5.8, 2.7, 5.1, 1.9],
149
+ [6.8, 3.2, 5.9, 2.3],
150
+ [6.7, 3.3, 5.7, 2.5],
151
+ [6.7, 3.0, 5.2, 2.3],
152
+ [6.3, 2.5, 5.0, 1.9],
153
+ [6.5, 3.0, 5.2, 2.0],
154
+ [6.2, 3.4, 5.4, 2.3],
155
+ [5.9, 3.0, 5.1, 1.8],
156
+ ]
@@ -0,0 +1,22 @@
1
+ # this example shows the impact of the size of a som on the global error distance
2
+ require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
3
+ require File.dirname(__FILE__) + '/som_data'
4
+ require 'benchmark'
5
+
6
+ 10.times do |t|
7
+ t += 3 # minimum number of nodes
8
+
9
+ puts "Nodes: #{t}"
10
+ som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
11
+ som.initiate_map
12
+
13
+ puts "global error distance: #{som.global_error(SOM_DATA)}"
14
+ puts "\ntraining the som\n"
15
+
16
+ times = Benchmark.measure do
17
+ som.train SOM_DATA
18
+ end
19
+
20
+ puts "Elapsed time for training: #{times}"
21
+ puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
22
+ end
@@ -0,0 +1,24 @@
1
+ require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
2
+ require File.dirname(__FILE__) + '/som_data'
3
+ require 'benchmark'
4
+
5
+ som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
6
+ som.initiate_map
7
+
8
+ som.nodes.each do |node|
9
+ p node.weights
10
+ end
11
+
12
+ puts "global error distance: #{som.global_error(SOM_DATA)}"
13
+ puts "\ntraining the som\n"
14
+
15
+ times = Benchmark.measure do
16
+ som.train SOM_DATA
17
+ end
18
+
19
+ som.nodes.each do |node|
20
+ p node.weights
21
+ end
22
+
23
+ puts "Elapsed time for training: #{times}"
24
+ puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
@@ -0,0 +1,32 @@
1
+ # Data
2
+ require File.dirname(__FILE__) + "/ai4r/data/data_set"
3
+ require File.dirname(__FILE__) + "/ai4r/data/statistics"
4
+ require File.dirname(__FILE__) + "/ai4r/data/proximity"
5
+ require File.dirname(__FILE__) + "/ai4r/data/parameterizable"
6
+ # Clusterers
7
+ require File.dirname(__FILE__) + "/ai4r/clusterers/clusterer"
8
+ require File.dirname(__FILE__) + "/ai4r/clusterers/k_means"
9
+ require File.dirname(__FILE__) + "/ai4r/clusterers/bisecting_k_means"
10
+ require File.dirname(__FILE__) + "/ai4r/clusterers/single_linkage"
11
+ require File.dirname(__FILE__) + "/ai4r/clusterers/complete_linkage"
12
+ require File.dirname(__FILE__) + "/ai4r/clusterers/average_linkage"
13
+ require File.dirname(__FILE__) + "/ai4r/clusterers/weighted_average_linkage"
14
+ require File.dirname(__FILE__) + "/ai4r/clusterers/centroid_linkage"
15
+ require File.dirname(__FILE__) + "/ai4r/clusterers/median_linkage"
16
+ require File.dirname(__FILE__) + "/ai4r/clusterers/ward_linkage"
17
+ require File.dirname(__FILE__) + "/ai4r/clusterers/diana"
18
+ # Classifiers
19
+ require File.dirname(__FILE__) + "/ai4r/classifiers/classifier"
20
+ require File.dirname(__FILE__) + "/ai4r/classifiers/id3"
21
+ require File.dirname(__FILE__) + "/ai4r/classifiers/prism"
22
+ require File.dirname(__FILE__) + "/ai4r/classifiers/one_r"
23
+ require File.dirname(__FILE__) + "/ai4r/classifiers/zero_r"
24
+ require File.dirname(__FILE__) + "/ai4r/classifiers/hyperpipes"
25
+ require File.dirname(__FILE__) + "/ai4r/classifiers/naive_bayes"
26
+ # Neural networks
27
+ require File.dirname(__FILE__) + "/ai4r/neural_network/backpropagation"
28
+ require File.dirname(__FILE__) + "/ai4r/neural_network/hopfield"
29
+ # Genetic Algorithms
30
+ require File.dirname(__FILE__) + "/ai4r/genetic_algorithm/genetic_algorithm"
31
+ # SOM
32
+ require File.dirname(__FILE__) + "/ai4r/som/som"
@@ -0,0 +1,59 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/parameterizable'
11
+
12
+ module Ai4r
13
+ module Classifiers
14
+
15
+ # This class defines a common API for classifiers.
16
+ # All methods in this class must be implemented in subclasses.
17
+ class Classifier
18
+
19
+ include Ai4r::Data::Parameterizable
20
+
21
+ # Build a new classifier, using data examples found in data_set.
22
+ # The last attribute of each item is considered as the
23
+ # item class.
24
+ def build(data_set)
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # You can evaluate new data, predicting its class.
29
+ # e.g.
30
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
31
+ def eval(data)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ # This method returns the generated rules in ruby code.
36
+ # e.g.
37
+ #
38
+ # classifier.get_rules
39
+ # # => if age_range=='<30' then marketing_target='Y'
40
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
41
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
42
+ # elsif age_range=='[50-80]' then marketing_target='N'
43
+ # elsif age_range=='>80' then marketing_target='Y'
44
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
45
+ #
46
+ # It is a nice way to inspect induction results, and also to execute them:
47
+ # age_range = '<30'
48
+ # city='New York'
49
+ # marketing_target = nil
50
+ # eval classifier.get_rules
51
+ # puts marketing_target
52
+ # # => 'Y'
53
+ def get_rules
54
+ raise NotImplementedError
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,118 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require 'set'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+
14
+ module Ai4r
15
+ module Classifiers
16
+
17
+ include Ai4r::Data
18
+
19
+ # = Introduction
20
+ #
21
+ # A fast classifier algorithm, created by Lucio de Souza Coelho
22
+ # and Len Trigg.
23
+ class Hyperpipes < Classifier
24
+
25
+ attr_reader :data_set, :pipes
26
+
27
+ # Build a new Hyperpipes classifier. You must provide a DataSet instance
28
+ # as parameter. The last attribute of each item is considered as
29
+ # the item class.
30
+ def build(data_set)
31
+ data_set.check_not_empty
32
+ @data_set = data_set
33
+ @domains = data_set.build_domains
34
+
35
+ @pipes = {}
36
+ @domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
37
+ @data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
38
+
39
+ return self
40
+ end
41
+
42
+ # You can evaluate new data, predicting its class.
43
+ # e.g.
44
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
45
+ def eval(data)
46
+ votes = Hash.new {0}
47
+ @pipes.each do |category, pipe|
48
+ pipe.each_with_index do |bounds, i|
49
+ if data[i].is_a? Numeric
50
+ votes[category]+=1 if data[i]>=bounds[:min] && data[i]<=bounds[:max]
51
+ else
52
+ votes[category]+=1 if bounds[data[i]]
53
+ end
54
+ end
55
+ end
56
+ return votes.to_a.max {|x, y| x.last <=> y.last}.first
57
+ end
58
+
59
+ # This method returns the generated rules in ruby code.
60
+ # e.g.
61
+ #
62
+ # classifier.get_rules
63
+ # # => if age_range == '<30' then marketing_target = 'Y'
64
+ # elsif age_range == '[30-50)' then marketing_target = 'N'
65
+ # elsif age_range == '[50-80]' then marketing_target = 'N'
66
+ # end
67
+ #
68
+ # It is a nice way to inspect induction results, and also to execute them:
69
+ # marketing_target = nil
70
+ # eval classifier.get_rules
71
+ # puts marketing_target
72
+ # # => 'Y'
73
+ def get_rules
74
+ rules = []
75
+ rules << "votes = Hash.new {0}"
76
+ data = @data_set.data_items.first
77
+ labels = @data_set.data_labels.collect {|l| l.to_s}
78
+ @pipes.each do |category, pipe|
79
+ pipe.each_with_index do |bounds, i|
80
+ rule = "votes['#{category}'] += 1 "
81
+ if data[i].is_a? Numeric
82
+ rule += "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
83
+ else
84
+ rule += "if #{bounds.inspect}[#{labels[i]}]"
85
+ end
86
+ rules << rule
87
+ end
88
+ end
89
+ rules << "#{labels.last} = votes.to_a.max {|x, y| x.last <=> y.last}.first"
90
+ return rules.join("\n")
91
+ end
92
+
93
+ protected
94
+
95
+ def build_pipe(data_set)
96
+ data_set.data_items.first[0...-1].collect do |att|
97
+ if att.is_a? Numeric
98
+ {:min=>1.0/0, :max=>-1.0/0}
99
+ else
100
+ Hash.new(false)
101
+ end
102
+ end
103
+ end
104
+
105
+ def update_pipe(pipe, data_item)
106
+ data_item[0...-1].each_with_index do |att, i|
107
+ if att.is_a? Numeric
108
+ pipe[i][:min] = att if att < pipe[i][:min]
109
+ pipe[i][:max] = att if att > pipe[i][:max]
110
+ else
111
+ pipe[i][att] = true
112
+ end
113
+ end
114
+ end
115
+
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,326 @@
1
+ # Author:: Sergio Fierens (Implementation, Quinlan is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+
14
+ module Ai4r
15
+
16
+ module Classifiers
17
+
18
+ # = Introduction
19
+ # This is an implementation of the ID3 algorithm (Quinlan)
20
+ # Given a set of preclassified examples, it builds a top-down
21
+ # induction of decision tree, biased by the information gain and
22
+ # entropy measure.
23
+ #
24
+ # * http://en.wikipedia.org/wiki/Decision_tree
25
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
26
+ #
27
+ # = How to use it
28
+ #
29
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
30
+ #
31
+ # DATA_ITEMS = [
32
+ # ['New York', '<30', 'M', 'Y'],
33
+ # ['Chicago', '<30', 'M', 'Y'],
34
+ # ['Chicago', '<30', 'F', 'Y'],
35
+ # ['New York', '<30', 'M', 'Y'],
36
+ # ['New York', '<30', 'M', 'Y'],
37
+ # ['Chicago', '[30-50)', 'M', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[30-50)', 'F', 'Y'],
40
+ # ['New York', '[30-50)', 'F', 'N'],
41
+ # ['Chicago', '[50-80]', 'M', 'N'],
42
+ # ['New York', '[50-80]', 'F', 'N'],
43
+ # ['New York', '[50-80]', 'M', 'N'],
44
+ # ['Chicago', '[50-80]', 'M', 'N'],
45
+ # ['New York', '[50-80]', 'F', 'N'],
46
+ # ['Chicago', '>80', 'F', 'Y']
47
+ # ]
48
+ #
49
+ # data_set = DataSet.new(:data_items=>DATA_SET, :data_labels=>DATA_LABELS)
50
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
51
+ #
52
+ # id3.get_rules
53
+ # # => if age_range=='<30' then marketing_target='Y'
54
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
55
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
56
+ # elsif age_range=='[50-80]' then marketing_target='N'
57
+ # elsif age_range=='>80' then marketing_target='Y'
58
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
59
+ #
60
+ # id3.eval(['New York', '<30', 'M'])
61
+ # # => 'Y'
62
+ #
63
+ # = A better way to load the data
64
+ #
65
+ # In the real life you will use lot more data training examples, with more
66
+ # attributes. Consider moving your data to an external CSV (comma separate
67
+ # values) file.
68
+ #
69
+ # data_file = "#{File.dirname(__FILE__)}/data_set.csv"
70
+ # data_set = DataSet.load_csv_with_labels data_file
71
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
72
+ #
73
+ # = A nice tip for data evaluation
74
+ #
75
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
76
+ #
77
+ # age_range = '<30'
78
+ # marketing_target = nil
79
+ # eval id3.get_rules
80
+ # puts marketing_target
81
+ # # => 'Y'
82
+ #
83
+ # = More about ID3 and decision trees
84
+ #
85
+ # * http://en.wikipedia.org/wiki/Decision_tree
86
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
87
+ #
88
+ # = About the project
89
+ # Author:: Sergio Fierens
90
+ # License:: MPL 1.1
91
+ # Url:: http://ai4r.rubyforge.org/
92
+ class ID3 < Classifier
93
+
94
+ attr_reader :data_set
95
+
96
+ # Create a new ID3 classifier. You must provide a DataSet instance
97
+ # as parameter. The last attribute of each item is considered as the
98
+ # item class.
99
+ def build(data_set)
100
+ data_set.check_not_empty
101
+ @data_set = data_set
102
+ preprocess_data(@data_set.data_items)
103
+ return self
104
+ end
105
+
106
+ # You can evaluate new data, predicting its category.
107
+ # e.g.
108
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
109
+ def eval(data)
110
+ @tree.value(data) if @tree
111
+ end
112
+
113
+ # This method returns the generated rules in ruby code.
114
+ # e.g.
115
+ #
116
+ # id3.get_rules
117
+ # # => if age_range=='<30' then marketing_target='Y'
118
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
119
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
120
+ # elsif age_range=='[50-80]' then marketing_target='N'
121
+ # elsif age_range=='>80' then marketing_target='Y'
122
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
123
+ #
124
+ # It is a nice way to inspect induction results, and also to execute them:
125
+ # age_range = '<30'
126
+ # marketing_target = nil
127
+ # eval id3.get_rules
128
+ # puts marketing_target
129
+ # # => 'Y'
130
+ def get_rules
131
+ #return "Empty ID3 tree" if !@tree
132
+ rules = @tree.get_rules
133
+ rules = rules.collect do |rule|
134
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
135
+ end
136
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
137
+ end
138
+
139
+ private
140
+ def preprocess_data(data_examples)
141
+ @tree = build_node(data_examples)
142
+ end
143
+
144
+ private
145
+ def build_node(data_examples, flag_att = [])
146
+ return ErrorNode.new if data_examples.length == 0
147
+ domain = domain(data_examples)
148
+ return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
149
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
150
+ flag_att << min_entropy_index
151
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
152
+ return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
153
+ nodes = split_data_examples.collect do |partial_data_examples|
154
+ build_node(partial_data_examples, flag_att)
155
+ end
156
+ return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
157
+ end
158
+
159
+ private
160
+ def self.sum(values)
161
+ values.inject( 0 ) { |sum,x| sum+x }
162
+ end
163
+
164
+ private
165
+ def self.log2(z)
166
+ return 0.0 if z == 0
167
+ Math.log(z)/LOG2
168
+ end
169
+
170
+ private
171
+ def most_freq(examples, domain)
172
+ freqs = []
173
+ domain.last.length.times { freqs << 0}
174
+ examples.each do |example|
175
+ cat_index = domain.last.index(example.last)
176
+ freq = freqs[cat_index] + 1
177
+ freqs[cat_index] = freq
178
+ end
179
+ max_freq = freqs.max
180
+ max_freq_index = freqs.index(max_freq)
181
+ domain.last[max_freq_index]
182
+ end
183
+
184
+ private
185
+ def split_data_examples(data_examples, domain, att_index)
186
+ data_examples_array = []
187
+ att_value_examples = {}
188
+ data_examples.each do |example|
189
+ example_set = att_value_examples[example[att_index]]
190
+ example_set = [] if !example_set
191
+ example_set << example
192
+ att_value_examples.store(example[att_index], example_set)
193
+ end
194
+ att_value_examples.each_pair do |att_value, example_set|
195
+ att_value_index = domain[att_index].index(att_value)
196
+ data_examples_array[att_value_index] = example_set
197
+ end
198
+ return data_examples_array
199
+ end
200
+
201
+ private
202
+ def min_entropy_index(data_examples, domain, flag_att=[])
203
+ min_entropy = nil
204
+ min_index = 0
205
+ domain[0..-2].each_index do |index|
206
+ freq_grid = freq_grid(index, data_examples, domain)
207
+ entropy = entropy(freq_grid, data_examples.length)
208
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
209
+ min_entropy = entropy
210
+ min_index = index
211
+ end
212
+ end
213
+ return min_index
214
+ end
215
+
216
+ private
217
+ def domain(data_examples)
218
+ #return build_domains(data_examples)
219
+ domain = []
220
+ @data_set.data_labels.length.times { domain << [] }
221
+ data_examples.each do |data|
222
+ data.each_index do |i|
223
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
224
+ end
225
+ end
226
+ return domain
227
+ end
228
+
229
+ private
230
+ def freq_grid(att_index, data_examples, domain)
231
+ #Initialize empty grid
232
+ grid_element = []
233
+ domain.last.length.times { grid_element << 0}
234
+ grid = []
235
+ domain[att_index].length.times { grid << grid_element.clone }
236
+ #Fill frecuency with grid
237
+ data_examples.each do |example|
238
+ att_val = example[att_index]
239
+ att_val_index = domain[att_index].index(att_val)
240
+ category = example.last
241
+ category_index = domain.last.index(category)
242
+ freq = grid[att_val_index][category_index] + 1
243
+ grid[att_val_index][category_index] = freq
244
+ end
245
+ return grid
246
+ end
247
+
248
+ private
249
+ def entropy(freq_grid, total_examples)
250
+ #Calc entropy of each element
251
+ entropy = 0
252
+ freq_grid.each do |att_freq|
253
+ att_total_freq = ID3.sum(att_freq)
254
+ partial_entropy = 0
255
+ if att_total_freq != 0
256
+ att_freq.each do |freq|
257
+ prop = freq.to_f/att_total_freq
258
+ partial_entropy += (-1*prop*ID3.log2(prop))
259
+ end
260
+ end
261
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
262
+ end
263
+ return entropy
264
+ end
265
+
266
+ private
267
+ LOG2 = Math.log(2)
268
+ end
269
+
270
+ class EvaluationNode #:nodoc: all
271
+
272
+ attr_reader :index, :values, :nodes
273
+
274
+ def initialize(data_labels, index, values, nodes)
275
+ @index = index
276
+ @values = values
277
+ @nodes = nodes
278
+ @data_labels = data_labels
279
+ end
280
+
281
+ def value(data)
282
+ value = data[@index]
283
+ return rule_not_found if !@values.include?(value)
284
+ return nodes[@values.index(value)].value(data)
285
+ end
286
+
287
+ def get_rules
288
+ rule_set = []
289
+ @nodes.each_index do |child_node_index|
290
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
291
+ child_node = @nodes[child_node_index]
292
+ child_node_rules = child_node.get_rules
293
+ child_node_rules.each do |child_rule|
294
+ child_rule.unshift(my_rule)
295
+ end
296
+ rule_set += child_node_rules
297
+ end
298
+ return rule_set
299
+ end
300
+
301
+ end
302
+
303
+ class CategoryNode #:nodoc: all
304
+ def initialize(label, value)
305
+ @label = label
306
+ @value = value
307
+ end
308
+ def value(data)
309
+ return @value
310
+ end
311
+ def get_rules
312
+ return [["#{@label}='#{@value}'"]]
313
+ end
314
+ end
315
+
316
+ class ErrorNode #:nodoc: all
317
+ def value(data)
318
+ raise "There was not enough information during training to do a proper induction for this data element."
319
+ end
320
+ def get_rules
321
+ return []
322
+ end
323
+ end
324
+
325
+ end
326
+ end