nirvdrum-ai4r 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/.rakeTasks +7 -0
- data/README.rdoc +56 -0
- data/Rakefile.rb +42 -0
- data/VERSION +1 -0
- data/ai4r.gemspec +221 -0
- data/change_log +49 -0
- data/examples/classifiers/id3_data.csv +121 -0
- data/examples/classifiers/id3_example.rb +29 -0
- data/examples/classifiers/naive_bayes_data.csv +11 -0
- data/examples/classifiers/naive_bayes_example.rb +16 -0
- data/examples/classifiers/results.txt +31 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
- data/examples/genetic_algorithm/travel_cost.csv +16 -0
- data/examples/neural_network/backpropagation_example.rb +67 -0
- data/examples/neural_network/patterns_with_base_noise.rb +68 -0
- data/examples/neural_network/patterns_with_noise.rb +66 -0
- data/examples/neural_network/training_patterns.rb +68 -0
- data/examples/neural_network/xor_example.rb +35 -0
- data/examples/som/som_data.rb +156 -0
- data/examples/som/som_multi_node_example.rb +22 -0
- data/examples/som/som_single_example.rb +24 -0
- data/lib/ai4r.rb +32 -0
- data/lib/ai4r/classifiers/classifier.rb +59 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
- data/lib/ai4r/classifiers/id3.rb +326 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
- data/lib/ai4r/classifiers/one_r.rb +110 -0
- data/lib/ai4r/classifiers/prism.rb +197 -0
- data/lib/ai4r/classifiers/zero_r.rb +73 -0
- data/lib/ai4r/clusterers/average_linkage.rb +59 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
- data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
- data/lib/ai4r/clusterers/clusterer.rb +61 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
- data/lib/ai4r/clusterers/diana.rb +139 -0
- data/lib/ai4r/clusterers/k_means.rb +126 -0
- data/lib/ai4r/clusterers/median_linkage.rb +61 -0
- data/lib/ai4r/clusterers/single_linkage.rb +194 -0
- data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
- data/lib/ai4r/data/data_set.rb +266 -0
- data/lib/ai4r/data/parameterizable.rb +64 -0
- data/lib/ai4r/data/proximity.rb +100 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
- data/lib/ai4r/neural_network/backpropagation.rb +293 -0
- data/lib/ai4r/neural_network/hopfield.rb +149 -0
- data/lib/ai4r/som/layer.rb +68 -0
- data/lib/ai4r/som/node.rb +96 -0
- data/lib/ai4r/som/som.rb +155 -0
- data/lib/ai4r/som/two_phase_layer.rb +90 -0
- data/site/forrest.properties +152 -0
- data/site/forrest.properties.dispatcher.properties +25 -0
- data/site/forrest.properties.xml +29 -0
- data/site/src/documentation/README.txt +7 -0
- data/site/src/documentation/classes/CatalogManager.properties +62 -0
- data/site/src/documentation/content/locationmap.xml +72 -0
- data/site/src/documentation/content/xdocs/downloads.html +9 -0
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
- data/site/src/documentation/content/xdocs/index.xml +155 -0
- data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
- data/site/src/documentation/content/xdocs/site.xml +54 -0
- data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
- data/site/src/documentation/content/xdocs/tabs.xml +35 -0
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/c.png +0 -0
- data/site/src/documentation/resources/images/c_wbn.png +0 -0
- data/site/src/documentation/resources/images/c_wn.png +0 -0
- data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
- data/site/src/documentation/resources/images/ero.gif +0 -0
- data/site/src/documentation/resources/images/europe2.png +0 -0
- data/site/src/documentation/resources/images/europe3.png +0 -0
- data/site/src/documentation/resources/images/fitness.png +0 -0
- data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
- data/site/src/documentation/resources/images/icon-a.png +0 -0
- data/site/src/documentation/resources/images/icon-b.png +0 -0
- data/site/src/documentation/resources/images/icon.png +0 -0
- data/site/src/documentation/resources/images/jadeferret.png +0 -0
- data/site/src/documentation/resources/images/my_email.png +0 -0
- data/site/src/documentation/resources/images/neural_network_example.png +0 -0
- data/site/src/documentation/resources/images/project-logo.png +0 -0
- data/site/src/documentation/resources/images/rubyforge.png +0 -0
- data/site/src/documentation/resources/images/s.png +0 -0
- data/site/src/documentation/resources/images/s_wbn.png +0 -0
- data/site/src/documentation/resources/images/s_wn.png +0 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
- data/site/src/documentation/resources/images/t.png +0 -0
- data/site/src/documentation/resources/images/t_wbn.png +0 -0
- data/site/src/documentation/resources/images/t_wn.png +0 -0
- data/site/src/documentation/resources/schema/catalog.xcat +29 -0
- data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
- data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
- data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
- data/site/src/documentation/sitemap.xmap +66 -0
- data/site/src/documentation/skinconf.xml +418 -0
- data/site/src/documentation/translations/langcode.xml +29 -0
- data/site/src/documentation/translations/languages_de.xml +24 -0
- data/site/src/documentation/translations/languages_en.xml +24 -0
- data/site/src/documentation/translations/languages_es.xml +22 -0
- data/site/src/documentation/translations/languages_fr.xml +24 -0
- data/site/src/documentation/translations/languages_nl.xml +24 -0
- data/site/src/documentation/translations/menu.xml +33 -0
- data/site/src/documentation/translations/menu_af.xml +33 -0
- data/site/src/documentation/translations/menu_de.xml +33 -0
- data/site/src/documentation/translations/menu_es.xml +33 -0
- data/site/src/documentation/translations/menu_fr.xml +33 -0
- data/site/src/documentation/translations/menu_it.xml +33 -0
- data/site/src/documentation/translations/menu_nl.xml +33 -0
- data/site/src/documentation/translations/menu_no.xml +33 -0
- data/site/src/documentation/translations/menu_ru.xml +33 -0
- data/site/src/documentation/translations/menu_sk.xml +33 -0
- data/site/src/documentation/translations/tabs.xml +22 -0
- data/site/src/documentation/translations/tabs_de.xml +22 -0
- data/site/src/documentation/translations/tabs_es.xml +22 -0
- data/site/src/documentation/translations/tabs_fr.xml +22 -0
- data/site/src/documentation/translations/tabs_nl.xml +22 -0
- data/test/classifiers/hyperpipes_test.rb +84 -0
- data/test/classifiers/id3_test.rb +208 -0
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/naive_bayes_test.rb +43 -0
- data/test/classifiers/one_r_test.rb +62 -0
- data/test/classifiers/prism_test.rb +85 -0
- data/test/classifiers/zero_r_test.rb +50 -0
- data/test/clusterers/average_linkage_test.rb +51 -0
- data/test/clusterers/bisecting_k_means_test.rb +66 -0
- data/test/clusterers/centroid_linkage_test.rb +53 -0
- data/test/clusterers/complete_linkage_test.rb +57 -0
- data/test/clusterers/diana_test.rb +69 -0
- data/test/clusterers/k_means_test.rb +100 -0
- data/test/clusterers/median_linkage_test.rb +53 -0
- data/test/clusterers/single_linkage_test.rb +122 -0
- data/test/clusterers/ward_linkage_test.rb +53 -0
- data/test/clusterers/weighted_average_linkage_test.rb +53 -0
- data/test/data/data_set.csv +121 -0
- data/test/data/data_set_test.rb +96 -0
- data/test/data/proximity_test.rb +81 -0
- data/test/data/statistics_data_set.csv +5 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- data/test/genetic_algorithm/chromosome_test.rb +58 -0
- data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
- data/test/neural_network/backpropagation_test.rb +69 -0
- data/test/neural_network/hopfield_test.rb +72 -0
- data/test/som/som_test.rb +97 -0
- metadata +238 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
|
|
2
|
+
# it is the full dataset, removing the last column
|
|
3
|
+
# website provides additional information on the dataset itself (attributes, class distribution, etc)
|
|
4
|
+
|
|
5
|
+
SOM_DATA = [
|
|
6
|
+
[5.1, 3.5, 1.4, 0.2],
|
|
7
|
+
[4.9, 3.0, 1.4, 0.2],
|
|
8
|
+
[4.7, 3.2, 1.3, 0.2],
|
|
9
|
+
[4.6, 3.1, 1.5, 0.2],
|
|
10
|
+
[5.0, 3.6, 1.4, 0.2],
|
|
11
|
+
[5.4, 3.9, 1.7, 0.4],
|
|
12
|
+
[4.6, 3.4, 1.4, 0.3],
|
|
13
|
+
[5.0, 3.4, 1.5, 0.2],
|
|
14
|
+
[4.4, 2.9, 1.4, 0.2],
|
|
15
|
+
[4.9, 3.1, 1.5, 0.1],
|
|
16
|
+
[5.4, 3.7, 1.5, 0.2],
|
|
17
|
+
[4.8, 3.4, 1.6, 0.2],
|
|
18
|
+
[4.8, 3.0, 1.4, 0.1],
|
|
19
|
+
[4.3, 3.0, 1.1, 0.1],
|
|
20
|
+
[5.8, 4.0, 1.2, 0.2],
|
|
21
|
+
[5.7, 4.4, 1.5, 0.4],
|
|
22
|
+
[5.4, 3.9, 1.3, 0.4],
|
|
23
|
+
[5.1, 3.5, 1.4, 0.3],
|
|
24
|
+
[5.7, 3.8, 1.7, 0.3],
|
|
25
|
+
[5.1, 3.8, 1.5, 0.3],
|
|
26
|
+
[5.4, 3.4, 1.7, 0.2],
|
|
27
|
+
[5.1, 3.7, 1.5, 0.4],
|
|
28
|
+
[4.6, 3.6, 1.0, 0.2],
|
|
29
|
+
[5.1, 3.3, 1.7, 0.5],
|
|
30
|
+
[4.8, 3.4, 1.9, 0.2],
|
|
31
|
+
[5.0, 3.0, 1.6, 0.2],
|
|
32
|
+
[5.0, 3.4, 1.6, 0.4],
|
|
33
|
+
[5.2, 3.5, 1.5, 0.2],
|
|
34
|
+
[5.2, 3.4, 1.4, 0.2],
|
|
35
|
+
[4.7, 3.2, 1.6, 0.2],
|
|
36
|
+
[4.8, 3.1, 1.6, 0.2],
|
|
37
|
+
[5.4, 3.4, 1.5, 0.4],
|
|
38
|
+
[5.2, 4.1, 1.5, 0.1],
|
|
39
|
+
[5.5, 4.2, 1.4, 0.2],
|
|
40
|
+
[4.9, 3.1, 1.5, 0.1],
|
|
41
|
+
[5.0, 3.2, 1.2, 0.2],
|
|
42
|
+
[5.5, 3.5, 1.3, 0.2],
|
|
43
|
+
[4.9, 3.1, 1.5, 0.1],
|
|
44
|
+
[4.4, 3.0, 1.3, 0.2],
|
|
45
|
+
[5.1, 3.4, 1.5, 0.2],
|
|
46
|
+
[5.0, 3.5, 1.3, 0.3],
|
|
47
|
+
[4.5, 2.3, 1.3, 0.3],
|
|
48
|
+
[4.4, 3.2, 1.3, 0.2],
|
|
49
|
+
[5.0, 3.5, 1.6, 0.6],
|
|
50
|
+
[5.1, 3.8, 1.9, 0.4],
|
|
51
|
+
[4.8, 3.0, 1.4, 0.3],
|
|
52
|
+
[5.1, 3.8, 1.6, 0.2],
|
|
53
|
+
[4.6, 3.2, 1.4, 0.2],
|
|
54
|
+
[5.3, 3.7, 1.5, 0.2],
|
|
55
|
+
[5.0, 3.3, 1.4, 0.2],
|
|
56
|
+
[7.0, 3.2, 4.7, 1.4],
|
|
57
|
+
[6.4, 3.2, 4.5, 1.5],
|
|
58
|
+
[6.9, 3.1, 4.9, 1.5],
|
|
59
|
+
[5.5, 2.3, 4.0, 1.3],
|
|
60
|
+
[6.5, 2.8, 4.6, 1.5],
|
|
61
|
+
[5.7, 2.8, 4.5, 1.3],
|
|
62
|
+
[6.3, 3.3, 4.7, 1.6],
|
|
63
|
+
[4.9, 2.4, 3.3, 1.0],
|
|
64
|
+
[6.6, 2.9, 4.6, 1.3],
|
|
65
|
+
[5.2, 2.7, 3.9, 1.4],
|
|
66
|
+
[5.0, 2.0, 3.5, 1.0],
|
|
67
|
+
[5.9, 3.0, 4.2, 1.5],
|
|
68
|
+
[6.0, 2.2, 4.0, 1.0],
|
|
69
|
+
[6.1, 2.9, 4.7, 1.4],
|
|
70
|
+
[5.6, 2.9, 3.6, 1.3],
|
|
71
|
+
[6.7, 3.1, 4.4, 1.4],
|
|
72
|
+
[5.6, 3.0, 4.5, 1.5],
|
|
73
|
+
[5.8, 2.7, 4.1, 1.0],
|
|
74
|
+
[6.2, 2.2, 4.5, 1.5],
|
|
75
|
+
[5.6, 2.5, 3.9, 1.1],
|
|
76
|
+
[5.9, 3.2, 4.8, 1.8],
|
|
77
|
+
[6.1, 2.8, 4.0, 1.3],
|
|
78
|
+
[6.3, 2.5, 4.9, 1.5],
|
|
79
|
+
[6.1, 2.8, 4.7, 1.2],
|
|
80
|
+
[6.4, 2.9, 4.3, 1.3],
|
|
81
|
+
[6.6, 3.0, 4.4, 1.4],
|
|
82
|
+
[6.8, 2.8, 4.8, 1.4],
|
|
83
|
+
[6.7, 3.0, 5.0, 1.7],
|
|
84
|
+
[6.0, 2.9, 4.5, 1.5],
|
|
85
|
+
[5.7, 2.6, 3.5, 1.0],
|
|
86
|
+
[5.5, 2.4, 3.8, 1.1],
|
|
87
|
+
[5.5, 2.4, 3.7, 1.0],
|
|
88
|
+
[5.8, 2.7, 3.9, 1.2],
|
|
89
|
+
[6.0, 2.7, 5.1, 1.6],
|
|
90
|
+
[5.4, 3.0, 4.5, 1.5],
|
|
91
|
+
[6.0, 3.4, 4.5, 1.6],
|
|
92
|
+
[6.7, 3.1, 4.7, 1.5],
|
|
93
|
+
[6.3, 2.3, 4.4, 1.3],
|
|
94
|
+
[5.6, 3.0, 4.1, 1.3],
|
|
95
|
+
[5.5, 2.5, 4.0, 1.3],
|
|
96
|
+
[5.5, 2.6, 4.4, 1.2],
|
|
97
|
+
[6.1, 3.0, 4.6, 1.4],
|
|
98
|
+
[5.8, 2.6, 4.0, 1.2],
|
|
99
|
+
[5.0, 2.3, 3.3, 1.0],
|
|
100
|
+
[5.6, 2.7, 4.2, 1.3],
|
|
101
|
+
[5.7, 3.0, 4.2, 1.2],
|
|
102
|
+
[5.7, 2.9, 4.2, 1.3],
|
|
103
|
+
[6.2, 2.9, 4.3, 1.3],
|
|
104
|
+
[5.1, 2.5, 3.0, 1.1],
|
|
105
|
+
[5.7, 2.8, 4.1, 1.3],
|
|
106
|
+
[6.3, 3.3, 6.0, 2.5],
|
|
107
|
+
[5.8, 2.7, 5.1, 1.9],
|
|
108
|
+
[7.1, 3.0, 5.9, 2.1],
|
|
109
|
+
[6.3, 2.9, 5.6, 1.8],
|
|
110
|
+
[6.5, 3.0, 5.8, 2.2],
|
|
111
|
+
[7.6, 3.0, 6.6, 2.1],
|
|
112
|
+
[4.9, 2.5, 4.5, 1.7],
|
|
113
|
+
[7.3, 2.9, 6.3, 1.8],
|
|
114
|
+
[6.7, 2.5, 5.8, 1.8],
|
|
115
|
+
[7.2, 3.6, 6.1, 2.5],
|
|
116
|
+
[6.5, 3.2, 5.1, 2.0],
|
|
117
|
+
[6.4, 2.7, 5.3, 1.9],
|
|
118
|
+
[6.8, 3.0, 5.5, 2.1],
|
|
119
|
+
[5.7, 2.5, 5.0, 2.0],
|
|
120
|
+
[5.8, 2.8, 5.1, 2.4],
|
|
121
|
+
[6.4, 3.2, 5.3, 2.3],
|
|
122
|
+
[6.5, 3.0, 5.5, 1.8],
|
|
123
|
+
[7.7, 3.8, 6.7, 2.2],
|
|
124
|
+
[7.7, 2.6, 6.9, 2.3],
|
|
125
|
+
[6.0, 2.2, 5.0, 1.5],
|
|
126
|
+
[6.9, 3.2, 5.7, 2.3],
|
|
127
|
+
[5.6, 2.8, 4.9, 2.0],
|
|
128
|
+
[7.7, 2.8, 6.7, 2.0],
|
|
129
|
+
[6.3, 2.7, 4.9, 1.8],
|
|
130
|
+
[6.7, 3.3, 5.7, 2.1],
|
|
131
|
+
[7.2, 3.2, 6.0, 1.8],
|
|
132
|
+
[6.2, 2.8, 4.8, 1.8],
|
|
133
|
+
[6.1, 3.0, 4.9, 1.8],
|
|
134
|
+
[6.4, 2.8, 5.6, 2.1],
|
|
135
|
+
[7.2, 3.0, 5.8, 1.6],
|
|
136
|
+
[7.4, 2.8, 6.1, 1.9],
|
|
137
|
+
[7.9, 3.8, 6.4, 2.0],
|
|
138
|
+
[6.4, 2.8, 5.6, 2.2],
|
|
139
|
+
[6.3, 2.8, 5.1, 1.5],
|
|
140
|
+
[6.1, 2.6, 5.6, 1.4],
|
|
141
|
+
[7.7, 3.0, 6.1, 2.3],
|
|
142
|
+
[6.3, 3.4, 5.6, 2.4],
|
|
143
|
+
[6.4, 3.1, 5.5, 1.8],
|
|
144
|
+
[6.0, 3.0, 4.8, 1.8],
|
|
145
|
+
[6.9, 3.1, 5.4, 2.1],
|
|
146
|
+
[6.7, 3.1, 5.6, 2.4],
|
|
147
|
+
[6.9, 3.1, 5.1, 2.3],
|
|
148
|
+
[5.8, 2.7, 5.1, 1.9],
|
|
149
|
+
[6.8, 3.2, 5.9, 2.3],
|
|
150
|
+
[6.7, 3.3, 5.7, 2.5],
|
|
151
|
+
[6.7, 3.0, 5.2, 2.3],
|
|
152
|
+
[6.3, 2.5, 5.0, 1.9],
|
|
153
|
+
[6.5, 3.0, 5.2, 2.0],
|
|
154
|
+
[6.2, 3.4, 5.4, 2.3],
|
|
155
|
+
[5.9, 3.0, 5.1, 1.8],
|
|
156
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# this example shows the impact of the size of a som on the global error distance
|
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
|
3
|
+
require File.dirname(__FILE__) + '/som_data'
|
|
4
|
+
require 'benchmark'
|
|
5
|
+
|
|
6
|
+
10.times do |t|
|
|
7
|
+
t += 3 # minimum number of nodes
|
|
8
|
+
|
|
9
|
+
puts "Nodes: #{t}"
|
|
10
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
|
|
11
|
+
som.initiate_map
|
|
12
|
+
|
|
13
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
|
14
|
+
puts "\ntraining the som\n"
|
|
15
|
+
|
|
16
|
+
times = Benchmark.measure do
|
|
17
|
+
som.train SOM_DATA
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
puts "Elapsed time for training: #{times}"
|
|
21
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
|
22
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
|
2
|
+
require File.dirname(__FILE__) + '/som_data'
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
|
|
5
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
|
|
6
|
+
som.initiate_map
|
|
7
|
+
|
|
8
|
+
som.nodes.each do |node|
|
|
9
|
+
p node.weights
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
|
13
|
+
puts "\ntraining the som\n"
|
|
14
|
+
|
|
15
|
+
times = Benchmark.measure do
|
|
16
|
+
som.train SOM_DATA
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
som.nodes.each do |node|
|
|
20
|
+
p node.weights
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
puts "Elapsed time for training: #{times}"
|
|
24
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
data/lib/ai4r.rb
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Data
|
|
2
|
+
require File.dirname(__FILE__) + "/ai4r/data/data_set"
|
|
3
|
+
require File.dirname(__FILE__) + "/ai4r/data/statistics"
|
|
4
|
+
require File.dirname(__FILE__) + "/ai4r/data/proximity"
|
|
5
|
+
require File.dirname(__FILE__) + "/ai4r/data/parameterizable"
|
|
6
|
+
# Clusterers
|
|
7
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/clusterer"
|
|
8
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/k_means"
|
|
9
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/bisecting_k_means"
|
|
10
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/single_linkage"
|
|
11
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/complete_linkage"
|
|
12
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/average_linkage"
|
|
13
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/weighted_average_linkage"
|
|
14
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/centroid_linkage"
|
|
15
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/median_linkage"
|
|
16
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/ward_linkage"
|
|
17
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/diana"
|
|
18
|
+
# Classifiers
|
|
19
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/classifier"
|
|
20
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/id3"
|
|
21
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/prism"
|
|
22
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/one_r"
|
|
23
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/zero_r"
|
|
24
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/hyperpipes"
|
|
25
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/naive_bayes"
|
|
26
|
+
# Neural networks
|
|
27
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/backpropagation"
|
|
28
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/hopfield"
|
|
29
|
+
# Genetic Algorithms
|
|
30
|
+
require File.dirname(__FILE__) + "/ai4r/genetic_algorithm/genetic_algorithm"
|
|
31
|
+
# SOM
|
|
32
|
+
require File.dirname(__FILE__) + "/ai4r/som/som"
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Author:: Sergio Fierens
|
|
2
|
+
# License:: MPL 1.1
|
|
3
|
+
# Project:: ai4r
|
|
4
|
+
# Url:: http://ai4r.rubyforge.org
|
|
5
|
+
#
|
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
9
|
+
|
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
|
11
|
+
|
|
12
|
+
module Ai4r
|
|
13
|
+
module Classifiers
|
|
14
|
+
|
|
15
|
+
# This class defines a common API for classifiers.
|
|
16
|
+
# All methods in this class must be implemented in subclasses.
|
|
17
|
+
class Classifier
|
|
18
|
+
|
|
19
|
+
include Ai4r::Data::Parameterizable
|
|
20
|
+
|
|
21
|
+
# Build a new classifier, using data examples found in data_set.
|
|
22
|
+
# The last attribute of each item is considered as the
|
|
23
|
+
# item class.
|
|
24
|
+
def build(data_set)
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# You can evaluate new data, predicting its class.
|
|
29
|
+
# e.g.
|
|
30
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
|
31
|
+
def eval(data)
|
|
32
|
+
raise NotImplementedError
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# This method returns the generated rules in ruby code.
|
|
36
|
+
# e.g.
|
|
37
|
+
#
|
|
38
|
+
# classifier.get_rules
|
|
39
|
+
# # => if age_range=='<30' then marketing_target='Y'
|
|
40
|
+
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
|
41
|
+
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
|
42
|
+
# elsif age_range=='[50-80]' then marketing_target='N'
|
|
43
|
+
# elsif age_range=='>80' then marketing_target='Y'
|
|
44
|
+
# else raise 'There was not enough information during training to do a proper induction for this data element' end
|
|
45
|
+
#
|
|
46
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
47
|
+
# age_range = '<30'
|
|
48
|
+
# city='New York'
|
|
49
|
+
# marketing_target = nil
|
|
50
|
+
# eval classifier.get_rules
|
|
51
|
+
# puts marketing_target
|
|
52
|
+
# # => 'Y'
|
|
53
|
+
def get_rules
|
|
54
|
+
raise NotImplementedError
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
|
2
|
+
# License:: MPL 1.1
|
|
3
|
+
# Project:: ai4r
|
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
5
|
+
#
|
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
9
|
+
|
|
10
|
+
require 'set'
|
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
|
13
|
+
|
|
14
|
+
module Ai4r
|
|
15
|
+
module Classifiers
|
|
16
|
+
|
|
17
|
+
include Ai4r::Data
|
|
18
|
+
|
|
19
|
+
# = Introduction
|
|
20
|
+
#
|
|
21
|
+
# A fast classifier algorithm, created by Lucio de Souza Coelho
|
|
22
|
+
# and Len Trigg.
|
|
23
|
+
class Hyperpipes < Classifier
|
|
24
|
+
|
|
25
|
+
attr_reader :data_set, :pipes
|
|
26
|
+
|
|
27
|
+
# Build a new Hyperpipes classifier. You must provide a DataSet instance
|
|
28
|
+
# as parameter. The last attribute of each item is considered as
|
|
29
|
+
# the item class.
|
|
30
|
+
def build(data_set)
|
|
31
|
+
data_set.check_not_empty
|
|
32
|
+
@data_set = data_set
|
|
33
|
+
@domains = data_set.build_domains
|
|
34
|
+
|
|
35
|
+
@pipes = {}
|
|
36
|
+
@domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
|
|
37
|
+
@data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
|
|
38
|
+
|
|
39
|
+
return self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# You can evaluate new data, predicting its class.
|
|
43
|
+
# e.g.
|
|
44
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
|
45
|
+
def eval(data)
|
|
46
|
+
votes = Hash.new {0}
|
|
47
|
+
@pipes.each do |category, pipe|
|
|
48
|
+
pipe.each_with_index do |bounds, i|
|
|
49
|
+
if data[i].is_a? Numeric
|
|
50
|
+
votes[category]+=1 if data[i]>=bounds[:min] && data[i]<=bounds[:max]
|
|
51
|
+
else
|
|
52
|
+
votes[category]+=1 if bounds[data[i]]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
return votes.to_a.max {|x, y| x.last <=> y.last}.first
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# This method returns the generated rules in ruby code.
|
|
60
|
+
# e.g.
|
|
61
|
+
#
|
|
62
|
+
# classifier.get_rules
|
|
63
|
+
# # => if age_range == '<30' then marketing_target = 'Y'
|
|
64
|
+
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
|
65
|
+
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
|
66
|
+
# end
|
|
67
|
+
#
|
|
68
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
69
|
+
# marketing_target = nil
|
|
70
|
+
# eval classifier.get_rules
|
|
71
|
+
# puts marketing_target
|
|
72
|
+
# # => 'Y'
|
|
73
|
+
def get_rules
|
|
74
|
+
rules = []
|
|
75
|
+
rules << "votes = Hash.new {0}"
|
|
76
|
+
data = @data_set.data_items.first
|
|
77
|
+
labels = @data_set.data_labels.collect {|l| l.to_s}
|
|
78
|
+
@pipes.each do |category, pipe|
|
|
79
|
+
pipe.each_with_index do |bounds, i|
|
|
80
|
+
rule = "votes['#{category}'] += 1 "
|
|
81
|
+
if data[i].is_a? Numeric
|
|
82
|
+
rule += "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
|
|
83
|
+
else
|
|
84
|
+
rule += "if #{bounds.inspect}[#{labels[i]}]"
|
|
85
|
+
end
|
|
86
|
+
rules << rule
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
rules << "#{labels.last} = votes.to_a.max {|x, y| x.last <=> y.last}.first"
|
|
90
|
+
return rules.join("\n")
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
protected
|
|
94
|
+
|
|
95
|
+
def build_pipe(data_set)
|
|
96
|
+
data_set.data_items.first[0...-1].collect do |att|
|
|
97
|
+
if att.is_a? Numeric
|
|
98
|
+
{:min=>1.0/0, :max=>-1.0/0}
|
|
99
|
+
else
|
|
100
|
+
Hash.new(false)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def update_pipe(pipe, data_item)
|
|
106
|
+
data_item[0...-1].each_with_index do |att, i|
|
|
107
|
+
if att.is_a? Numeric
|
|
108
|
+
pipe[i][:min] = att if att < pipe[i][:min]
|
|
109
|
+
pipe[i][:max] = att if att > pipe[i][:max]
|
|
110
|
+
else
|
|
111
|
+
pipe[i][att] = true
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
# Author:: Sergio Fierens (Implementation, Quinlan is
|
|
2
|
+
# the creator of the algorithm)
|
|
3
|
+
# License:: MPL 1.1
|
|
4
|
+
# Project:: ai4r
|
|
5
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
6
|
+
#
|
|
7
|
+
# You can redistribute it and/or modify it under the terms of
|
|
8
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
9
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
10
|
+
|
|
11
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
|
12
|
+
require File.dirname(__FILE__) + '/../classifiers/classifier'
|
|
13
|
+
|
|
14
|
+
module Ai4r
|
|
15
|
+
|
|
16
|
+
module Classifiers
|
|
17
|
+
|
|
18
|
+
# = Introduction
|
|
19
|
+
# This is an implementation of the ID3 algorithm (Quinlan)
|
|
20
|
+
# Given a set of preclassified examples, it builds a top-down
|
|
21
|
+
# induction of decision tree, biased by the information gain and
|
|
22
|
+
# entropy measure.
|
|
23
|
+
#
|
|
24
|
+
# * http://en.wikipedia.org/wiki/Decision_tree
|
|
25
|
+
# * http://en.wikipedia.org/wiki/ID3_algorithm
|
|
26
|
+
#
|
|
27
|
+
# = How to use it
|
|
28
|
+
#
|
|
29
|
+
# DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
30
|
+
#
|
|
31
|
+
# DATA_ITEMS = [
|
|
32
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
33
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
34
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
35
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
36
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
37
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
38
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
39
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
40
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
41
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
42
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
43
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
44
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
45
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
46
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
47
|
+
# ]
|
|
48
|
+
#
|
|
49
|
+
# data_set = DataSet.new(:data_items=>DATA_SET, :data_labels=>DATA_LABELS)
|
|
50
|
+
# id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
|
51
|
+
#
|
|
52
|
+
# id3.get_rules
|
|
53
|
+
# # => if age_range=='<30' then marketing_target='Y'
|
|
54
|
+
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
|
55
|
+
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
|
56
|
+
# elsif age_range=='[50-80]' then marketing_target='N'
|
|
57
|
+
# elsif age_range=='>80' then marketing_target='Y'
|
|
58
|
+
# else raise 'There was not enough information during training to do a proper induction for this data element' end
|
|
59
|
+
#
|
|
60
|
+
# id3.eval(['New York', '<30', 'M'])
|
|
61
|
+
# # => 'Y'
|
|
62
|
+
#
|
|
63
|
+
# = A better way to load the data
|
|
64
|
+
#
|
|
65
|
+
# In the real life you will use lot more data training examples, with more
|
|
66
|
+
# attributes. Consider moving your data to an external CSV (comma separate
|
|
67
|
+
# values) file.
|
|
68
|
+
#
|
|
69
|
+
# data_file = "#{File.dirname(__FILE__)}/data_set.csv"
|
|
70
|
+
# data_set = DataSet.load_csv_with_labels data_file
|
|
71
|
+
# id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
|
72
|
+
#
|
|
73
|
+
# = A nice tip for data evaluation
|
|
74
|
+
#
|
|
75
|
+
# id3 = Ai4r::Classifiers::ID3.new.build(data_set)
|
|
76
|
+
#
|
|
77
|
+
# age_range = '<30'
|
|
78
|
+
# marketing_target = nil
|
|
79
|
+
# eval id3.get_rules
|
|
80
|
+
# puts marketing_target
|
|
81
|
+
# # => 'Y'
|
|
82
|
+
#
|
|
83
|
+
# = More about ID3 and decision trees
|
|
84
|
+
#
|
|
85
|
+
# * http://en.wikipedia.org/wiki/Decision_tree
|
|
86
|
+
# * http://en.wikipedia.org/wiki/ID3_algorithm
|
|
87
|
+
#
|
|
88
|
+
# = About the project
|
|
89
|
+
# Author:: Sergio Fierens
|
|
90
|
+
# License:: MPL 1.1
|
|
91
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
92
|
+
class ID3 < Classifier
|
|
93
|
+
|
|
94
|
+
attr_reader :data_set
|
|
95
|
+
|
|
96
|
+
# Create a new ID3 classifier. You must provide a DataSet instance
|
|
97
|
+
# as parameter. The last attribute of each item is considered as the
|
|
98
|
+
# item class.
|
|
99
|
+
def build(data_set)
|
|
100
|
+
data_set.check_not_empty
|
|
101
|
+
@data_set = data_set
|
|
102
|
+
preprocess_data(@data_set.data_items)
|
|
103
|
+
return self
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# You can evaluate new data, predicting its category.
|
|
107
|
+
# e.g.
|
|
108
|
+
# id3.eval(['New York', '<30', 'F']) # => 'Y'
|
|
109
|
+
def eval(data)
|
|
110
|
+
@tree.value(data) if @tree
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# This method returns the generated rules in ruby code.
|
|
114
|
+
# e.g.
|
|
115
|
+
#
|
|
116
|
+
# id3.get_rules
|
|
117
|
+
# # => if age_range=='<30' then marketing_target='Y'
|
|
118
|
+
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
|
119
|
+
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
|
120
|
+
# elsif age_range=='[50-80]' then marketing_target='N'
|
|
121
|
+
# elsif age_range=='>80' then marketing_target='Y'
|
|
122
|
+
# else raise 'There was not enough information during training to do a proper induction for this data element' end
|
|
123
|
+
#
|
|
124
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
125
|
+
# age_range = '<30'
|
|
126
|
+
# marketing_target = nil
|
|
127
|
+
# eval id3.get_rules
|
|
128
|
+
# puts marketing_target
|
|
129
|
+
# # => 'Y'
|
|
130
|
+
def get_rules
|
|
131
|
+
#return "Empty ID3 tree" if !@tree
|
|
132
|
+
rules = @tree.get_rules
|
|
133
|
+
rules = rules.collect do |rule|
|
|
134
|
+
"#{rule[0..-2].join(' and ')} then #{rule.last}"
|
|
135
|
+
end
|
|
136
|
+
return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
private
|
|
140
|
+
def preprocess_data(data_examples)
|
|
141
|
+
@tree = build_node(data_examples)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
def build_node(data_examples, flag_att = [])
|
|
146
|
+
return ErrorNode.new if data_examples.length == 0
|
|
147
|
+
domain = domain(data_examples)
|
|
148
|
+
return CategoryNode.new(@data_set.data_labels.last, domain.last[0]) if domain.last.length == 1
|
|
149
|
+
min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
|
|
150
|
+
flag_att << min_entropy_index
|
|
151
|
+
split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
|
|
152
|
+
return CategoryNode.new(@data_set.data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
|
|
153
|
+
nodes = split_data_examples.collect do |partial_data_examples|
|
|
154
|
+
build_node(partial_data_examples, flag_att)
|
|
155
|
+
end
|
|
156
|
+
return EvaluationNode.new(@data_set.data_labels, min_entropy_index, domain[min_entropy_index], nodes)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
private
|
|
160
|
+
def self.sum(values)
|
|
161
|
+
values.inject( 0 ) { |sum,x| sum+x }
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
def self.log2(z)
|
|
166
|
+
return 0.0 if z == 0
|
|
167
|
+
Math.log(z)/LOG2
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
private
|
|
171
|
+
def most_freq(examples, domain)
|
|
172
|
+
freqs = []
|
|
173
|
+
domain.last.length.times { freqs << 0}
|
|
174
|
+
examples.each do |example|
|
|
175
|
+
cat_index = domain.last.index(example.last)
|
|
176
|
+
freq = freqs[cat_index] + 1
|
|
177
|
+
freqs[cat_index] = freq
|
|
178
|
+
end
|
|
179
|
+
max_freq = freqs.max
|
|
180
|
+
max_freq_index = freqs.index(max_freq)
|
|
181
|
+
domain.last[max_freq_index]
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
private
|
|
185
|
+
def split_data_examples(data_examples, domain, att_index)
|
|
186
|
+
data_examples_array = []
|
|
187
|
+
att_value_examples = {}
|
|
188
|
+
data_examples.each do |example|
|
|
189
|
+
example_set = att_value_examples[example[att_index]]
|
|
190
|
+
example_set = [] if !example_set
|
|
191
|
+
example_set << example
|
|
192
|
+
att_value_examples.store(example[att_index], example_set)
|
|
193
|
+
end
|
|
194
|
+
att_value_examples.each_pair do |att_value, example_set|
|
|
195
|
+
att_value_index = domain[att_index].index(att_value)
|
|
196
|
+
data_examples_array[att_value_index] = example_set
|
|
197
|
+
end
|
|
198
|
+
return data_examples_array
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
private
|
|
202
|
+
def min_entropy_index(data_examples, domain, flag_att=[])
|
|
203
|
+
min_entropy = nil
|
|
204
|
+
min_index = 0
|
|
205
|
+
domain[0..-2].each_index do |index|
|
|
206
|
+
freq_grid = freq_grid(index, data_examples, domain)
|
|
207
|
+
entropy = entropy(freq_grid, data_examples.length)
|
|
208
|
+
if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
|
|
209
|
+
min_entropy = entropy
|
|
210
|
+
min_index = index
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
return min_index
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
def domain(data_examples)
|
|
218
|
+
#return build_domains(data_examples)
|
|
219
|
+
domain = []
|
|
220
|
+
@data_set.data_labels.length.times { domain << [] }
|
|
221
|
+
data_examples.each do |data|
|
|
222
|
+
data.each_index do |i|
|
|
223
|
+
domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
return domain
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
private
|
|
230
|
+
def freq_grid(att_index, data_examples, domain)
|
|
231
|
+
#Initialize empty grid
|
|
232
|
+
grid_element = []
|
|
233
|
+
domain.last.length.times { grid_element << 0}
|
|
234
|
+
grid = []
|
|
235
|
+
domain[att_index].length.times { grid << grid_element.clone }
|
|
236
|
+
#Fill frecuency with grid
|
|
237
|
+
data_examples.each do |example|
|
|
238
|
+
att_val = example[att_index]
|
|
239
|
+
att_val_index = domain[att_index].index(att_val)
|
|
240
|
+
category = example.last
|
|
241
|
+
category_index = domain.last.index(category)
|
|
242
|
+
freq = grid[att_val_index][category_index] + 1
|
|
243
|
+
grid[att_val_index][category_index] = freq
|
|
244
|
+
end
|
|
245
|
+
return grid
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
private
|
|
249
|
+
def entropy(freq_grid, total_examples)
|
|
250
|
+
#Calc entropy of each element
|
|
251
|
+
entropy = 0
|
|
252
|
+
freq_grid.each do |att_freq|
|
|
253
|
+
att_total_freq = ID3.sum(att_freq)
|
|
254
|
+
partial_entropy = 0
|
|
255
|
+
if att_total_freq != 0
|
|
256
|
+
att_freq.each do |freq|
|
|
257
|
+
prop = freq.to_f/att_total_freq
|
|
258
|
+
partial_entropy += (-1*prop*ID3.log2(prop))
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
entropy += (att_total_freq.to_f/total_examples) * partial_entropy
|
|
262
|
+
end
|
|
263
|
+
return entropy
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
private
|
|
267
|
+
LOG2 = Math.log(2)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
class EvaluationNode #:nodoc: all
|
|
271
|
+
|
|
272
|
+
attr_reader :index, :values, :nodes
|
|
273
|
+
|
|
274
|
+
def initialize(data_labels, index, values, nodes)
|
|
275
|
+
@index = index
|
|
276
|
+
@values = values
|
|
277
|
+
@nodes = nodes
|
|
278
|
+
@data_labels = data_labels
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def value(data)
|
|
282
|
+
value = data[@index]
|
|
283
|
+
return rule_not_found if !@values.include?(value)
|
|
284
|
+
return nodes[@values.index(value)].value(data)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def get_rules
|
|
288
|
+
rule_set = []
|
|
289
|
+
@nodes.each_index do |child_node_index|
|
|
290
|
+
my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
|
|
291
|
+
child_node = @nodes[child_node_index]
|
|
292
|
+
child_node_rules = child_node.get_rules
|
|
293
|
+
child_node_rules.each do |child_rule|
|
|
294
|
+
child_rule.unshift(my_rule)
|
|
295
|
+
end
|
|
296
|
+
rule_set += child_node_rules
|
|
297
|
+
end
|
|
298
|
+
return rule_set
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
class CategoryNode #:nodoc: all
|
|
304
|
+
def initialize(label, value)
|
|
305
|
+
@label = label
|
|
306
|
+
@value = value
|
|
307
|
+
end
|
|
308
|
+
def value(data)
|
|
309
|
+
return @value
|
|
310
|
+
end
|
|
311
|
+
def get_rules
|
|
312
|
+
return [["#{@label}='#{@value}'"]]
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
class ErrorNode #:nodoc: all
|
|
317
|
+
def value(data)
|
|
318
|
+
raise "There was not enough information during training to do a proper induction for this data element."
|
|
319
|
+
end
|
|
320
|
+
def get_rules
|
|
321
|
+
return []
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
end
|
|
326
|
+
end
|