nirvdrum-ai4r 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. data/.gitignore +1 -0
  2. data/.rakeTasks +7 -0
  3. data/README.rdoc +56 -0
  4. data/Rakefile.rb +42 -0
  5. data/VERSION +1 -0
  6. data/ai4r.gemspec +221 -0
  7. data/change_log +49 -0
  8. data/examples/classifiers/id3_data.csv +121 -0
  9. data/examples/classifiers/id3_example.rb +29 -0
  10. data/examples/classifiers/naive_bayes_data.csv +11 -0
  11. data/examples/classifiers/naive_bayes_example.rb +16 -0
  12. data/examples/classifiers/results.txt +31 -0
  13. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  14. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  15. data/examples/neural_network/backpropagation_example.rb +67 -0
  16. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  17. data/examples/neural_network/patterns_with_noise.rb +66 -0
  18. data/examples/neural_network/training_patterns.rb +68 -0
  19. data/examples/neural_network/xor_example.rb +35 -0
  20. data/examples/som/som_data.rb +156 -0
  21. data/examples/som/som_multi_node_example.rb +22 -0
  22. data/examples/som/som_single_example.rb +24 -0
  23. data/lib/ai4r.rb +32 -0
  24. data/lib/ai4r/classifiers/classifier.rb +59 -0
  25. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  26. data/lib/ai4r/classifiers/id3.rb +326 -0
  27. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  28. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  29. data/lib/ai4r/classifiers/one_r.rb +110 -0
  30. data/lib/ai4r/classifiers/prism.rb +197 -0
  31. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  32. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  33. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  34. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  35. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  36. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  37. data/lib/ai4r/clusterers/diana.rb +139 -0
  38. data/lib/ai4r/clusterers/k_means.rb +126 -0
  39. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  40. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  41. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  42. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  43. data/lib/ai4r/data/data_set.rb +266 -0
  44. data/lib/ai4r/data/parameterizable.rb +64 -0
  45. data/lib/ai4r/data/proximity.rb +100 -0
  46. data/lib/ai4r/data/statistics.rb +77 -0
  47. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  48. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  49. data/lib/ai4r/neural_network/backpropagation.rb +293 -0
  50. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  51. data/lib/ai4r/som/layer.rb +68 -0
  52. data/lib/ai4r/som/node.rb +96 -0
  53. data/lib/ai4r/som/som.rb +155 -0
  54. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  55. data/site/forrest.properties +152 -0
  56. data/site/forrest.properties.dispatcher.properties +25 -0
  57. data/site/forrest.properties.xml +29 -0
  58. data/site/src/documentation/README.txt +7 -0
  59. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  60. data/site/src/documentation/content/locationmap.xml +72 -0
  61. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  62. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
  63. data/site/src/documentation/content/xdocs/index.xml +155 -0
  64. data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
  65. data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
  66. data/site/src/documentation/content/xdocs/site.xml +54 -0
  67. data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
  68. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  69. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  70. data/site/src/documentation/resources/images/c.png +0 -0
  71. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  72. data/site/src/documentation/resources/images/c_wn.png +0 -0
  73. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  74. data/site/src/documentation/resources/images/ero.gif +0 -0
  75. data/site/src/documentation/resources/images/europe2.png +0 -0
  76. data/site/src/documentation/resources/images/europe3.png +0 -0
  77. data/site/src/documentation/resources/images/fitness.png +0 -0
  78. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  79. data/site/src/documentation/resources/images/icon-a.png +0 -0
  80. data/site/src/documentation/resources/images/icon-b.png +0 -0
  81. data/site/src/documentation/resources/images/icon.png +0 -0
  82. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  83. data/site/src/documentation/resources/images/my_email.png +0 -0
  84. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  85. data/site/src/documentation/resources/images/project-logo.png +0 -0
  86. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  87. data/site/src/documentation/resources/images/s.png +0 -0
  88. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  89. data/site/src/documentation/resources/images/s_wn.png +0 -0
  90. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  91. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  92. data/site/src/documentation/resources/images/t.png +0 -0
  93. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  94. data/site/src/documentation/resources/images/t_wn.png +0 -0
  95. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  96. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  97. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  98. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  99. data/site/src/documentation/sitemap.xmap +66 -0
  100. data/site/src/documentation/skinconf.xml +418 -0
  101. data/site/src/documentation/translations/langcode.xml +29 -0
  102. data/site/src/documentation/translations/languages_de.xml +24 -0
  103. data/site/src/documentation/translations/languages_en.xml +24 -0
  104. data/site/src/documentation/translations/languages_es.xml +22 -0
  105. data/site/src/documentation/translations/languages_fr.xml +24 -0
  106. data/site/src/documentation/translations/languages_nl.xml +24 -0
  107. data/site/src/documentation/translations/menu.xml +33 -0
  108. data/site/src/documentation/translations/menu_af.xml +33 -0
  109. data/site/src/documentation/translations/menu_de.xml +33 -0
  110. data/site/src/documentation/translations/menu_es.xml +33 -0
  111. data/site/src/documentation/translations/menu_fr.xml +33 -0
  112. data/site/src/documentation/translations/menu_it.xml +33 -0
  113. data/site/src/documentation/translations/menu_nl.xml +33 -0
  114. data/site/src/documentation/translations/menu_no.xml +33 -0
  115. data/site/src/documentation/translations/menu_ru.xml +33 -0
  116. data/site/src/documentation/translations/menu_sk.xml +33 -0
  117. data/site/src/documentation/translations/tabs.xml +22 -0
  118. data/site/src/documentation/translations/tabs_de.xml +22 -0
  119. data/site/src/documentation/translations/tabs_es.xml +22 -0
  120. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  121. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  122. data/test/classifiers/hyperpipes_test.rb +84 -0
  123. data/test/classifiers/id3_test.rb +208 -0
  124. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  125. data/test/classifiers/naive_bayes_test.rb +43 -0
  126. data/test/classifiers/one_r_test.rb +62 -0
  127. data/test/classifiers/prism_test.rb +85 -0
  128. data/test/classifiers/zero_r_test.rb +50 -0
  129. data/test/clusterers/average_linkage_test.rb +51 -0
  130. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  131. data/test/clusterers/centroid_linkage_test.rb +53 -0
  132. data/test/clusterers/complete_linkage_test.rb +57 -0
  133. data/test/clusterers/diana_test.rb +69 -0
  134. data/test/clusterers/k_means_test.rb +100 -0
  135. data/test/clusterers/median_linkage_test.rb +53 -0
  136. data/test/clusterers/single_linkage_test.rb +122 -0
  137. data/test/clusterers/ward_linkage_test.rb +53 -0
  138. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  139. data/test/data/data_set.csv +121 -0
  140. data/test/data/data_set_test.rb +96 -0
  141. data/test/data/proximity_test.rb +81 -0
  142. data/test/data/statistics_data_set.csv +5 -0
  143. data/test/data/statistics_test.rb +65 -0
  144. data/test/experiment/classifier_evaluator_test.rb +76 -0
  145. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  146. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  147. data/test/neural_network/backpropagation_test.rb +69 -0
  148. data/test/neural_network/hopfield_test.rb +72 -0
  149. data/test/som/som_test.rb +97 -0
  150. metadata +238 -0
@@ -0,0 +1,156 @@
1
+ # data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
2
+ # it is the full dataset, removing the last column
3
+ # website provides additional information on the dataset itself (attributes, class distribution, etc)
4
+
5
+ SOM_DATA = [
6
+ [5.1, 3.5, 1.4, 0.2],
7
+ [4.9, 3.0, 1.4, 0.2],
8
+ [4.7, 3.2, 1.3, 0.2],
9
+ [4.6, 3.1, 1.5, 0.2],
10
+ [5.0, 3.6, 1.4, 0.2],
11
+ [5.4, 3.9, 1.7, 0.4],
12
+ [4.6, 3.4, 1.4, 0.3],
13
+ [5.0, 3.4, 1.5, 0.2],
14
+ [4.4, 2.9, 1.4, 0.2],
15
+ [4.9, 3.1, 1.5, 0.1],
16
+ [5.4, 3.7, 1.5, 0.2],
17
+ [4.8, 3.4, 1.6, 0.2],
18
+ [4.8, 3.0, 1.4, 0.1],
19
+ [4.3, 3.0, 1.1, 0.1],
20
+ [5.8, 4.0, 1.2, 0.2],
21
+ [5.7, 4.4, 1.5, 0.4],
22
+ [5.4, 3.9, 1.3, 0.4],
23
+ [5.1, 3.5, 1.4, 0.3],
24
+ [5.7, 3.8, 1.7, 0.3],
25
+ [5.1, 3.8, 1.5, 0.3],
26
+ [5.4, 3.4, 1.7, 0.2],
27
+ [5.1, 3.7, 1.5, 0.4],
28
+ [4.6, 3.6, 1.0, 0.2],
29
+ [5.1, 3.3, 1.7, 0.5],
30
+ [4.8, 3.4, 1.9, 0.2],
31
+ [5.0, 3.0, 1.6, 0.2],
32
+ [5.0, 3.4, 1.6, 0.4],
33
+ [5.2, 3.5, 1.5, 0.2],
34
+ [5.2, 3.4, 1.4, 0.2],
35
+ [4.7, 3.2, 1.6, 0.2],
36
+ [4.8, 3.1, 1.6, 0.2],
37
+ [5.4, 3.4, 1.5, 0.4],
38
+ [5.2, 4.1, 1.5, 0.1],
39
+ [5.5, 4.2, 1.4, 0.2],
40
+ [4.9, 3.1, 1.5, 0.1],
41
+ [5.0, 3.2, 1.2, 0.2],
42
+ [5.5, 3.5, 1.3, 0.2],
43
+ [4.9, 3.1, 1.5, 0.1],
44
+ [4.4, 3.0, 1.3, 0.2],
45
+ [5.1, 3.4, 1.5, 0.2],
46
+ [5.0, 3.5, 1.3, 0.3],
47
+ [4.5, 2.3, 1.3, 0.3],
48
+ [4.4, 3.2, 1.3, 0.2],
49
+ [5.0, 3.5, 1.6, 0.6],
50
+ [5.1, 3.8, 1.9, 0.4],
51
+ [4.8, 3.0, 1.4, 0.3],
52
+ [5.1, 3.8, 1.6, 0.2],
53
+ [4.6, 3.2, 1.4, 0.2],
54
+ [5.3, 3.7, 1.5, 0.2],
55
+ [5.0, 3.3, 1.4, 0.2],
56
+ [7.0, 3.2, 4.7, 1.4],
57
+ [6.4, 3.2, 4.5, 1.5],
58
+ [6.9, 3.1, 4.9, 1.5],
59
+ [5.5, 2.3, 4.0, 1.3],
60
+ [6.5, 2.8, 4.6, 1.5],
61
+ [5.7, 2.8, 4.5, 1.3],
62
+ [6.3, 3.3, 4.7, 1.6],
63
+ [4.9, 2.4, 3.3, 1.0],
64
+ [6.6, 2.9, 4.6, 1.3],
65
+ [5.2, 2.7, 3.9, 1.4],
66
+ [5.0, 2.0, 3.5, 1.0],
67
+ [5.9, 3.0, 4.2, 1.5],
68
+ [6.0, 2.2, 4.0, 1.0],
69
+ [6.1, 2.9, 4.7, 1.4],
70
+ [5.6, 2.9, 3.6, 1.3],
71
+ [6.7, 3.1, 4.4, 1.4],
72
+ [5.6, 3.0, 4.5, 1.5],
73
+ [5.8, 2.7, 4.1, 1.0],
74
+ [6.2, 2.2, 4.5, 1.5],
75
+ [5.6, 2.5, 3.9, 1.1],
76
+ [5.9, 3.2, 4.8, 1.8],
77
+ [6.1, 2.8, 4.0, 1.3],
78
+ [6.3, 2.5, 4.9, 1.5],
79
+ [6.1, 2.8, 4.7, 1.2],
80
+ [6.4, 2.9, 4.3, 1.3],
81
+ [6.6, 3.0, 4.4, 1.4],
82
+ [6.8, 2.8, 4.8, 1.4],
83
+ [6.7, 3.0, 5.0, 1.7],
84
+ [6.0, 2.9, 4.5, 1.5],
85
+ [5.7, 2.6, 3.5, 1.0],
86
+ [5.5, 2.4, 3.8, 1.1],
87
+ [5.5, 2.4, 3.7, 1.0],
88
+ [5.8, 2.7, 3.9, 1.2],
89
+ [6.0, 2.7, 5.1, 1.6],
90
+ [5.4, 3.0, 4.5, 1.5],
91
+ [6.0, 3.4, 4.5, 1.6],
92
+ [6.7, 3.1, 4.7, 1.5],
93
+ [6.3, 2.3, 4.4, 1.3],
94
+ [5.6, 3.0, 4.1, 1.3],
95
+ [5.5, 2.5, 4.0, 1.3],
96
+ [5.5, 2.6, 4.4, 1.2],
97
+ [6.1, 3.0, 4.6, 1.4],
98
+ [5.8, 2.6, 4.0, 1.2],
99
+ [5.0, 2.3, 3.3, 1.0],
100
+ [5.6, 2.7, 4.2, 1.3],
101
+ [5.7, 3.0, 4.2, 1.2],
102
+ [5.7, 2.9, 4.2, 1.3],
103
+ [6.2, 2.9, 4.3, 1.3],
104
+ [5.1, 2.5, 3.0, 1.1],
105
+ [5.7, 2.8, 4.1, 1.3],
106
+ [6.3, 3.3, 6.0, 2.5],
107
+ [5.8, 2.7, 5.1, 1.9],
108
+ [7.1, 3.0, 5.9, 2.1],
109
+ [6.3, 2.9, 5.6, 1.8],
110
+ [6.5, 3.0, 5.8, 2.2],
111
+ [7.6, 3.0, 6.6, 2.1],
112
+ [4.9, 2.5, 4.5, 1.7],
113
+ [7.3, 2.9, 6.3, 1.8],
114
+ [6.7, 2.5, 5.8, 1.8],
115
+ [7.2, 3.6, 6.1, 2.5],
116
+ [6.5, 3.2, 5.1, 2.0],
117
+ [6.4, 2.7, 5.3, 1.9],
118
+ [6.8, 3.0, 5.5, 2.1],
119
+ [5.7, 2.5, 5.0, 2.0],
120
+ [5.8, 2.8, 5.1, 2.4],
121
+ [6.4, 3.2, 5.3, 2.3],
122
+ [6.5, 3.0, 5.5, 1.8],
123
+ [7.7, 3.8, 6.7, 2.2],
124
+ [7.7, 2.6, 6.9, 2.3],
125
+ [6.0, 2.2, 5.0, 1.5],
126
+ [6.9, 3.2, 5.7, 2.3],
127
+ [5.6, 2.8, 4.9, 2.0],
128
+ [7.7, 2.8, 6.7, 2.0],
129
+ [6.3, 2.7, 4.9, 1.8],
130
+ [6.7, 3.3, 5.7, 2.1],
131
+ [7.2, 3.2, 6.0, 1.8],
132
+ [6.2, 2.8, 4.8, 1.8],
133
+ [6.1, 3.0, 4.9, 1.8],
134
+ [6.4, 2.8, 5.6, 2.1],
135
+ [7.2, 3.0, 5.8, 1.6],
136
+ [7.4, 2.8, 6.1, 1.9],
137
+ [7.9, 3.8, 6.4, 2.0],
138
+ [6.4, 2.8, 5.6, 2.2],
139
+ [6.3, 2.8, 5.1, 1.5],
140
+ [6.1, 2.6, 5.6, 1.4],
141
+ [7.7, 3.0, 6.1, 2.3],
142
+ [6.3, 3.4, 5.6, 2.4],
143
+ [6.4, 3.1, 5.5, 1.8],
144
+ [6.0, 3.0, 4.8, 1.8],
145
+ [6.9, 3.1, 5.4, 2.1],
146
+ [6.7, 3.1, 5.6, 2.4],
147
+ [6.9, 3.1, 5.1, 2.3],
148
+ [5.8, 2.7, 5.1, 1.9],
149
+ [6.8, 3.2, 5.9, 2.3],
150
+ [6.7, 3.3, 5.7, 2.5],
151
+ [6.7, 3.0, 5.2, 2.3],
152
+ [6.3, 2.5, 5.0, 1.9],
153
+ [6.5, 3.0, 5.2, 2.0],
154
+ [6.2, 3.4, 5.4, 2.3],
155
+ [5.9, 3.0, 5.1, 1.8],
156
+ ]
@@ -0,0 +1,22 @@
1
+ # this example shows the impact of the size of a som on the global error distance
2
+ require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
3
+ require File.dirname(__FILE__) + '/som_data'
4
+ require 'benchmark'
5
+
6
+ 10.times do |t|
7
+ t += 3 # minimum number of nodes
8
+
9
+ puts "Nodes: #{t}"
10
+ som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
11
+ som.initiate_map
12
+
13
+ puts "global error distance: #{som.global_error(SOM_DATA)}"
14
+ puts "\ntraining the som\n"
15
+
16
+ times = Benchmark.measure do
17
+ som.train SOM_DATA
18
+ end
19
+
20
+ puts "Elapsed time for training: #{times}"
21
+ puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
22
+ end
@@ -0,0 +1,24 @@
1
+ require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
2
+ require File.dirname(__FILE__) + '/som_data'
3
+ require 'benchmark'
4
+
5
+ som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
6
+ som.initiate_map
7
+
8
+ som.nodes.each do |node|
9
+ p node.weights
10
+ end
11
+
12
+ puts "global error distance: #{som.global_error(SOM_DATA)}"
13
+ puts "\ntraining the som\n"
14
+
15
+ times = Benchmark.measure do
16
+ som.train SOM_DATA
17
+ end
18
+
19
+ som.nodes.each do |node|
20
+ p node.weights
21
+ end
22
+
23
+ puts "Elapsed time for training: #{times}"
24
+ puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
@@ -0,0 +1,32 @@
1
+ # Data
2
+ require File.dirname(__FILE__) + "/ai4r/data/data_set"
3
+ require File.dirname(__FILE__) + "/ai4r/data/statistics"
4
+ require File.dirname(__FILE__) + "/ai4r/data/proximity"
5
+ require File.dirname(__FILE__) + "/ai4r/data/parameterizable"
6
+ # Clusterers
7
+ require File.dirname(__FILE__) + "/ai4r/clusterers/clusterer"
8
+ require File.dirname(__FILE__) + "/ai4r/clusterers/k_means"
9
+ require File.dirname(__FILE__) + "/ai4r/clusterers/bisecting_k_means"
10
+ require File.dirname(__FILE__) + "/ai4r/clusterers/single_linkage"
11
+ require File.dirname(__FILE__) + "/ai4r/clusterers/complete_linkage"
12
+ require File.dirname(__FILE__) + "/ai4r/clusterers/average_linkage"
13
+ require File.dirname(__FILE__) + "/ai4r/clusterers/weighted_average_linkage"
14
+ require File.dirname(__FILE__) + "/ai4r/clusterers/centroid_linkage"
15
+ require File.dirname(__FILE__) + "/ai4r/clusterers/median_linkage"
16
+ require File.dirname(__FILE__) + "/ai4r/clusterers/ward_linkage"
17
+ require File.dirname(__FILE__) + "/ai4r/clusterers/diana"
18
+ # Classifiers
19
+ require File.dirname(__FILE__) + "/ai4r/classifiers/classifier"
20
+ require File.dirname(__FILE__) + "/ai4r/classifiers/id3"
21
+ require File.dirname(__FILE__) + "/ai4r/classifiers/prism"
22
+ require File.dirname(__FILE__) + "/ai4r/classifiers/one_r"
23
+ require File.dirname(__FILE__) + "/ai4r/classifiers/zero_r"
24
+ require File.dirname(__FILE__) + "/ai4r/classifiers/hyperpipes"
25
+ require File.dirname(__FILE__) + "/ai4r/classifiers/naive_bayes"
26
+ # Neural networks
27
+ require File.dirname(__FILE__) + "/ai4r/neural_network/backpropagation"
28
+ require File.dirname(__FILE__) + "/ai4r/neural_network/hopfield"
29
+ # Genetic Algorithms
30
+ require File.dirname(__FILE__) + "/ai4r/genetic_algorithm/genetic_algorithm"
31
+ # SOM
32
+ require File.dirname(__FILE__) + "/ai4r/som/som"
@@ -0,0 +1,59 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/parameterizable'
11
+
12
+ module Ai4r
13
+ module Classifiers
14
+
15
+ # This class defines a common API for classifiers.
16
+ # All methods in this class must be implemented in subclasses.
17
+ class Classifier
18
+
19
+ include Ai4r::Data::Parameterizable
20
+
21
+ # Build a new classifier, using data examples found in data_set.
22
+ # The last attribute of each item is considered as the
23
+ # item class.
24
+ def build(data_set)
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # You can evaluate new data, predicting its class.
29
+ # e.g.
30
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
31
+ def eval(data)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ # This method returns the generated rules in ruby code.
36
+ # e.g.
37
+ #
38
+ # classifier.get_rules
39
+ # # => if age_range=='<30' then marketing_target='Y'
40
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
41
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
42
+ # elsif age_range=='[50-80]' then marketing_target='N'
43
+ # elsif age_range=='>80' then marketing_target='Y'
44
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
45
+ #
46
+ # It is a nice way to inspect induction results, and also to execute them:
47
+ # age_range = '<30'
48
+ # city='New York'
49
+ # marketing_target = nil
50
+ # eval classifier.get_rules
51
+ # puts marketing_target
52
+ # # => 'Y'
53
+ def get_rules
54
+ raise NotImplementedError
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,118 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require 'set'
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+
14
+ module Ai4r
15
+ module Classifiers
16
+
17
+ include Ai4r::Data
18
+
19
+ # = Introduction
20
+ #
21
+ # A fast classifier algorithm, created by Lucio de Souza Coelho
22
+ # and Len Trigg.
23
+ class Hyperpipes < Classifier
24
+
25
+ attr_reader :data_set, :pipes
26
+
27
+ # Build a new Hyperpipes classifier. You must provide a DataSet instance
28
+ # as parameter. The last attribute of each item is considered as
29
+ # the item class.
30
+ def build(data_set)
31
+ data_set.check_not_empty
32
+ @data_set = data_set
33
+ @domains = data_set.build_domains
34
+
35
+ @pipes = {}
36
+ @domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
37
+ @data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
38
+
39
+ return self
40
+ end
41
+
42
+ # You can evaluate new data, predicting its class.
43
+ # e.g.
44
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
45
+ def eval(data)
46
+ votes = Hash.new {0}
47
+ @pipes.each do |category, pipe|
48
+ pipe.each_with_index do |bounds, i|
49
+ if data[i].is_a? Numeric
50
+ votes[category]+=1 if data[i]>=bounds[:min] && data[i]<=bounds[:max]
51
+ else
52
+ votes[category]+=1 if bounds[data[i]]
53
+ end
54
+ end
55
+ end
56
+ return votes.to_a.max {|x, y| x.last <=> y.last}.first
57
+ end
58
+
59
+ # This method returns the generated rules in ruby code.
60
+ # e.g.
61
+ #
62
+ # classifier.get_rules
63
+ # # => if age_range == '<30' then marketing_target = 'Y'
64
+ # elsif age_range == '[30-50)' then marketing_target = 'N'
65
+ # elsif age_range == '[50-80]' then marketing_target = 'N'
66
+ # end
67
+ #
68
+ # It is a nice way to inspect induction results, and also to execute them:
69
+ # marketing_target = nil
70
+ # eval classifier.get_rules
71
+ # puts marketing_target
72
+ # # => 'Y'
73
+ def get_rules
74
+ rules = []
75
+ rules << "votes = Hash.new {0}"
76
+ data = @data_set.data_items.first
77
+ labels = @data_set.data_labels.collect {|l| l.to_s}
78
+ @pipes.each do |category, pipe|
79
+ pipe.each_with_index do |bounds, i|
80
+ rule = "votes['#{category}'] += 1 "
81
+ if data[i].is_a? Numeric
82
+ rule += "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
83
+ else
84
+ rule += "if #{bounds.inspect}[#{labels[i]}]"
85
+ end
86
+ rules << rule
87
+ end
88
+ end
89
+ rules << "#{labels.last} = votes.to_a.max {|x, y| x.last <=> y.last}.first"
90
+ return rules.join("\n")
91
+ end
92
+
93
+ protected
94
+
95
+ def build_pipe(data_set)
96
+ data_set.data_items.first[0...-1].collect do |att|
97
+ if att.is_a? Numeric
98
+ {:min=>1.0/0, :max=>-1.0/0}
99
+ else
100
+ Hash.new(false)
101
+ end
102
+ end
103
+ end
104
+
105
+ def update_pipe(pipe, data_item)
106
+ data_item[0...-1].each_with_index do |att, i|
107
+ if att.is_a? Numeric
108
+ pipe[i][:min] = att if att < pipe[i][:min]
109
+ pipe[i][:max] = att if att > pipe[i][:max]
110
+ else
111
+ pipe[i][att] = true
112
+ end
113
+ end
114
+ end
115
+
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,326 @@
1
+ # Author:: Sergio Fierens (Implementation, Quinlan is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+
11
+ require File.dirname(__FILE__) + '/../data/data_set'
12
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+
14
+ module Ai4r
15
+
16
+ module Classifiers
17
+
18
+ # = Introduction
19
+ # This is an implementation of the ID3 algorithm (Quinlan)
20
+ # Given a set of preclassified examples, it builds a top-down
21
+ # induction of decision tree, biased by the information gain and
22
+ # entropy measure.
23
+ #
24
+ # * http://en.wikipedia.org/wiki/Decision_tree
25
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
26
+ #
27
+ # = How to use it
28
+ #
29
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
30
+ #
31
+ # DATA_ITEMS = [
32
+ # ['New York', '<30', 'M', 'Y'],
33
+ # ['Chicago', '<30', 'M', 'Y'],
34
+ # ['Chicago', '<30', 'F', 'Y'],
35
+ # ['New York', '<30', 'M', 'Y'],
36
+ # ['New York', '<30', 'M', 'Y'],
37
+ # ['Chicago', '[30-50)', 'M', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[30-50)', 'F', 'Y'],
40
+ # ['New York', '[30-50)', 'F', 'N'],
41
+ # ['Chicago', '[50-80]', 'M', 'N'],
42
+ # ['New York', '[50-80]', 'F', 'N'],
43
+ # ['New York', '[50-80]', 'M', 'N'],
44
+ # ['Chicago', '[50-80]', 'M', 'N'],
45
+ # ['New York', '[50-80]', 'F', 'N'],
46
+ # ['Chicago', '>80', 'F', 'Y']
47
+ # ]
48
+ #
49
+ # data_set = DataSet.new(:data_items=>DATA_SET, :data_labels=>DATA_LABELS)
50
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
51
+ #
52
+ # id3.get_rules
53
+ # # => if age_range=='<30' then marketing_target='Y'
54
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
55
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
56
+ # elsif age_range=='[50-80]' then marketing_target='N'
57
+ # elsif age_range=='>80' then marketing_target='Y'
58
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
59
+ #
60
+ # id3.eval(['New York', '<30', 'M'])
61
+ # # => 'Y'
62
+ #
63
+ # = A better way to load the data
64
+ #
65
+ # In the real life you will use lot more data training examples, with more
66
+ # attributes. Consider moving your data to an external CSV (comma separate
67
+ # values) file.
68
+ #
69
+ # data_file = "#{File.dirname(__FILE__)}/data_set.csv"
70
+ # data_set = DataSet.load_csv_with_labels data_file
71
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
72
+ #
73
+ # = A nice tip for data evaluation
74
+ #
75
+ # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
76
+ #
77
+ # age_range = '<30'
78
+ # marketing_target = nil
79
+ # eval id3.get_rules
80
+ # puts marketing_target
81
+ # # => 'Y'
82
+ #
83
+ # = More about ID3 and decision trees
84
+ #
85
+ # * http://en.wikipedia.org/wiki/Decision_tree
86
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
87
+ #
88
+ # = About the project
89
+ # Author:: Sergio Fierens
90
+ # License:: MPL 1.1
91
+ # Url:: http://ai4r.rubyforge.org/
92
+ class ID3 < Classifier
93
+
94
+ attr_reader :data_set
95
+
96
+ # Create a new ID3 classifier. You must provide a DataSet instance
97
+ # as parameter. The last attribute of each item is considered as the
98
+ # item class.
99
+ def build(data_set)
100
+ data_set.check_not_empty
101
+ @data_set = data_set
102
+ preprocess_data(@data_set.data_items)
103
+ return self
104
+ end
105
+
106
+ # You can evaluate new data, predicting its category.
107
+ # e.g.
108
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
109
+ def eval(data)
110
+ @tree.value(data) if @tree
111
+ end
112
+
113
+ # This method returns the generated rules in ruby code.
114
+ # e.g.
115
+ #
116
+ # id3.get_rules
117
+ # # => if age_range=='<30' then marketing_target='Y'
118
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
119
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
120
+ # elsif age_range=='[50-80]' then marketing_target='N'
121
+ # elsif age_range=='>80' then marketing_target='Y'
122
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
123
+ #
124
+ # It is a nice way to inspect induction results, and also to execute them:
125
+ # age_range = '<30'
126
+ # marketing_target = nil
127
+ # eval id3.get_rules
128
+ # puts marketing_target
129
+ # # => 'Y'
130
+ def get_rules
131
+ #return "Empty ID3 tree" if !@tree
132
+ rules = @tree.get_rules
133
+ rules = rules.collect do |rule|
134
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
135
+ end
136
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
137
+ end
138
+
139
+ private
140
+ def preprocess_data(data_examples)
141
+ @tree = build_node(data_examples)
142
+ end
143
+
144
+ private
145
+ def build_node(data_examples, flag_att = [])
146
+ return ErrorNode.new if data_examples.length == 0
147
+ domain = domain(data_examples)
148
+ return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
149
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
150
+ flag_att << min_entropy_index
151
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
152
+ return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
153
+ nodes = split_data_examples.collect do |partial_data_examples|
154
+ build_node(partial_data_examples, flag_att)
155
+ end
156
+ return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
157
+ end
158
+
159
+ private
160
+ def self.sum(values)
161
+ values.inject( 0 ) { |sum,x| sum+x }
162
+ end
163
+
164
+ private
165
+ def self.log2(z)
166
+ return 0.0 if z == 0
167
+ Math.log(z)/LOG2
168
+ end
169
+
170
+ private
171
+ def most_freq(examples, domain)
172
+ freqs = []
173
+ domain.last.length.times { freqs << 0}
174
+ examples.each do |example|
175
+ cat_index = domain.last.index(example.last)
176
+ freq = freqs[cat_index] + 1
177
+ freqs[cat_index] = freq
178
+ end
179
+ max_freq = freqs.max
180
+ max_freq_index = freqs.index(max_freq)
181
+ domain.last[max_freq_index]
182
+ end
183
+
184
+ private
185
+ def split_data_examples(data_examples, domain, att_index)
186
+ data_examples_array = []
187
+ att_value_examples = {}
188
+ data_examples.each do |example|
189
+ example_set = att_value_examples[example[att_index]]
190
+ example_set = [] if !example_set
191
+ example_set << example
192
+ att_value_examples.store(example[att_index], example_set)
193
+ end
194
+ att_value_examples.each_pair do |att_value, example_set|
195
+ att_value_index = domain[att_index].index(att_value)
196
+ data_examples_array[att_value_index] = example_set
197
+ end
198
+ return data_examples_array
199
+ end
200
+
201
+ private
202
+ def min_entropy_index(data_examples, domain, flag_att=[])
203
+ min_entropy = nil
204
+ min_index = 0
205
+ domain[0..-2].each_index do |index|
206
+ freq_grid = freq_grid(index, data_examples, domain)
207
+ entropy = entropy(freq_grid, data_examples.length)
208
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
209
+ min_entropy = entropy
210
+ min_index = index
211
+ end
212
+ end
213
+ return min_index
214
+ end
215
+
216
+ private
217
+ def domain(data_examples)
218
+ #return build_domains(data_examples)
219
+ domain = []
220
+ @data_set.data_labels.length.times { domain << [] }
221
+ data_examples.each do |data|
222
+ data.each_index do |i|
223
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
224
+ end
225
+ end
226
+ return domain
227
+ end
228
+
229
+ private
230
+ def freq_grid(att_index, data_examples, domain)
231
+ #Initialize empty grid
232
+ grid_element = []
233
+ domain.last.length.times { grid_element << 0}
234
+ grid = []
235
+ domain[att_index].length.times { grid << grid_element.clone }
236
+ #Fill frecuency with grid
237
+ data_examples.each do |example|
238
+ att_val = example[att_index]
239
+ att_val_index = domain[att_index].index(att_val)
240
+ category = example.last
241
+ category_index = domain.last.index(category)
242
+ freq = grid[att_val_index][category_index] + 1
243
+ grid[att_val_index][category_index] = freq
244
+ end
245
+ return grid
246
+ end
247
+
248
+ private
249
+ def entropy(freq_grid, total_examples)
250
+ #Calc entropy of each element
251
+ entropy = 0
252
+ freq_grid.each do |att_freq|
253
+ att_total_freq = ID3.sum(att_freq)
254
+ partial_entropy = 0
255
+ if att_total_freq != 0
256
+ att_freq.each do |freq|
257
+ prop = freq.to_f/att_total_freq
258
+ partial_entropy += (-1*prop*ID3.log2(prop))
259
+ end
260
+ end
261
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
262
+ end
263
+ return entropy
264
+ end
265
+
266
+ private
267
+ LOG2 = Math.log(2)
268
+ end
269
+
270
+ class EvaluationNode #:nodoc: all
271
+
272
+ attr_reader :index, :values, :nodes
273
+
274
+ def initialize(data_labels, index, values, nodes)
275
+ @index = index
276
+ @values = values
277
+ @nodes = nodes
278
+ @data_labels = data_labels
279
+ end
280
+
281
+ def value(data)
282
+ value = data[@index]
283
+ return rule_not_found if !@values.include?(value)
284
+ return nodes[@values.index(value)].value(data)
285
+ end
286
+
287
+ def get_rules
288
+ rule_set = []
289
+ @nodes.each_index do |child_node_index|
290
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
291
+ child_node = @nodes[child_node_index]
292
+ child_node_rules = child_node.get_rules
293
+ child_node_rules.each do |child_rule|
294
+ child_rule.unshift(my_rule)
295
+ end
296
+ rule_set += child_node_rules
297
+ end
298
+ return rule_set
299
+ end
300
+
301
+ end
302
+
303
+ class CategoryNode #:nodoc: all
304
+ def initialize(label, value)
305
+ @label = label
306
+ @value = value
307
+ end
308
+ def value(data)
309
+ return @value
310
+ end
311
+ def get_rules
312
+ return [["#{@label}='#{@value}'"]]
313
+ end
314
+ end
315
+
316
+ class ErrorNode #:nodoc: all
317
+ def value(data)
318
+ raise "There was not enough information during training to do a proper induction for this data element."
319
+ end
320
+ def get_rules
321
+ return []
322
+ end
323
+ end
324
+
325
+ end
326
+ end