ai4r 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (196) hide show
  1. data/README.rdoc +48 -0
  2. data/examples/decision_trees/data_set.csv +121 -0
  3. data/examples/decision_trees/id3_example.rb +31 -0
  4. data/examples/decision_trees/results.txt +29 -0
  5. data/examples/genetic_algorithm/genetic_algorithm_example.rb +39 -0
  6. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  7. data/examples/neural_network/backpropagation_example.rb +65 -0
  8. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  9. data/examples/neural_network/patterns_with_noise.rb +66 -0
  10. data/examples/neural_network/training_patterns.rb +68 -0
  11. data/lib/decision_tree/id3.rb +354 -0
  12. data/lib/genetic_algorithm/genetic_algorithm.rb +268 -0
  13. data/lib/neural_network/backpropagation.rb +259 -0
  14. data/site/build/site/en/broken-links.xml +2 -0
  15. data/site/build/site/en/downloads.html +187 -0
  16. data/site/build/site/en/downloads.pdf +151 -0
  17. data/site/build/site/en/geneticAlgorithms.html +564 -0
  18. data/site/build/site/en/geneticAlgorithms.pdf +911 -0
  19. data/site/build/site/en/images/ai4r-logo.png +0 -0
  20. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  21. data/site/build/site/en/images/c.png +0 -0
  22. data/site/build/site/en/images/c_wbn.png +0 -0
  23. data/site/build/site/en/images/c_wn.png +0 -0
  24. data/site/build/site/en/images/ero.gif +0 -0
  25. data/site/build/site/en/images/europe2.png +0 -0
  26. data/site/build/site/en/images/europe3.png +0 -0
  27. data/site/build/site/en/images/fitness.png +0 -0
  28. data/site/build/site/en/images/instruction_arrow.png +0 -0
  29. data/site/build/site/en/images/my_email.png +0 -0
  30. data/site/build/site/en/images/rubyforge.png +0 -0
  31. data/site/build/site/en/images/s.png +0 -0
  32. data/site/build/site/en/images/s_wbn.png +0 -0
  33. data/site/build/site/en/images/s_wn.png +0 -0
  34. data/site/build/site/en/images/sigmoid.png +0 -0
  35. data/site/build/site/en/images/t.png +0 -0
  36. data/site/build/site/en/images/t_wbn.png +0 -0
  37. data/site/build/site/en/images/t_wn.png +0 -0
  38. data/site/build/site/en/index.html +258 -0
  39. data/site/build/site/en/index.pdf +306 -0
  40. data/site/build/site/en/linkmap.html +231 -0
  41. data/site/build/site/en/linkmap.pdf +94 -0
  42. data/site/build/site/en/locationmap.xml +72 -0
  43. data/site/build/site/en/machineLearning.html +325 -0
  44. data/site/build/site/en/machineLearning.pdf +337 -0
  45. data/site/build/site/en/neuralNetworks.html +446 -0
  46. data/site/build/site/en/neuralNetworks.pdf +604 -0
  47. data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
  48. data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
  49. data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
  50. data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
  51. data/site/build/site/en/skin/basic.css +166 -0
  52. data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
  53. data/site/build/site/en/skin/breadcrumbs.js +237 -0
  54. data/site/build/site/en/skin/fontsize.js +166 -0
  55. data/site/build/site/en/skin/getBlank.js +40 -0
  56. data/site/build/site/en/skin/getMenu.js +45 -0
  57. data/site/build/site/en/skin/images/README.txt +1 -0
  58. data/site/build/site/en/skin/images/add.jpg +0 -0
  59. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  60. data/site/build/site/en/skin/images/chapter.gif +0 -0
  61. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  62. data/site/build/site/en/skin/images/current.gif +0 -0
  63. data/site/build/site/en/skin/images/error.png +0 -0
  64. data/site/build/site/en/skin/images/external-link.gif +0 -0
  65. data/site/build/site/en/skin/images/fix.jpg +0 -0
  66. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  67. data/site/build/site/en/skin/images/hack.jpg +0 -0
  68. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  69. data/site/build/site/en/skin/images/info.png +0 -0
  70. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  71. data/site/build/site/en/skin/images/label.gif +0 -0
  72. data/site/build/site/en/skin/images/page.gif +0 -0
  73. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  74. data/site/build/site/en/skin/images/poddoc.png +0 -0
  75. data/site/build/site/en/skin/images/printer.gif +0 -0
  76. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  77. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  78. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  79. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  80. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  81. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  82. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  83. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  84. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  85. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  86. data/site/build/site/en/skin/images/remove.jpg +0 -0
  87. data/site/build/site/en/skin/images/rss.png +0 -0
  88. data/site/build/site/en/skin/images/spacer.gif +0 -0
  89. data/site/build/site/en/skin/images/success.png +0 -0
  90. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  91. data/site/build/site/en/skin/images/update.jpg +0 -0
  92. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  93. data/site/build/site/en/skin/images/vcss.png +0 -0
  94. data/site/build/site/en/skin/images/warning.png +0 -0
  95. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  96. data/site/build/site/en/skin/menu.js +48 -0
  97. data/site/build/site/en/skin/note.txt +50 -0
  98. data/site/build/site/en/skin/print.css +54 -0
  99. data/site/build/site/en/skin/profile.css +163 -0
  100. data/site/build/site/en/skin/prototype.js +1257 -0
  101. data/site/build/site/en/skin/screen.css +587 -0
  102. data/site/build/site/en/svn.html +223 -0
  103. data/site/build/site/en/svn.pdf +239 -0
  104. data/site/build/site/en/wholesite.pdf +1686 -0
  105. data/site/build/tmp/brokenlinks.xml +2 -0
  106. data/site/build/tmp/build-info.xml +5 -0
  107. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  108. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  109. data/site/build/tmp/input.xmap +32 -0
  110. data/site/build/tmp/internal.xmap +32 -0
  111. data/site/build/tmp/locationmap.xml +29 -0
  112. data/site/build/tmp/output.xmap +38 -0
  113. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -0
  114. data/site/build/tmp/plugins-1.xml +212 -0
  115. data/site/build/tmp/plugins-2.xml +347 -0
  116. data/site/build/tmp/projfilters.properties +41 -0
  117. data/site/build/tmp/resources.xmap +32 -0
  118. data/site/build/webapp/WEB-INF/logs/access.log +0 -0
  119. data/site/build/webapp/WEB-INF/logs/core.log +788 -0
  120. data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
  121. data/site/build/webapp/WEB-INF/logs/error.log +248 -0
  122. data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
  123. data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
  124. data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
  125. data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
  126. data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
  127. data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
  128. data/site/forrest.properties +152 -0
  129. data/site/forrest.properties.dispatcher.properties +25 -0
  130. data/site/forrest.properties.xml +29 -0
  131. data/site/src/documentation/README.txt +7 -0
  132. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  133. data/site/src/documentation/content/locationmap.xml +72 -0
  134. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  135. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +280 -0
  136. data/site/src/documentation/content/xdocs/index.xml +73 -0
  137. data/site/src/documentation/content/xdocs/machineLearning.xml +129 -0
  138. data/site/src/documentation/content/xdocs/neuralNetworks.xml +218 -0
  139. data/site/src/documentation/content/xdocs/site.xml +51 -0
  140. data/site/src/documentation/content/xdocs/svn.xml +31 -0
  141. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  142. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  143. data/site/src/documentation/resources/images/c.png +0 -0
  144. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  145. data/site/src/documentation/resources/images/c_wn.png +0 -0
  146. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  147. data/site/src/documentation/resources/images/ero.gif +0 -0
  148. data/site/src/documentation/resources/images/europe2.png +0 -0
  149. data/site/src/documentation/resources/images/europe3.png +0 -0
  150. data/site/src/documentation/resources/images/fitness.png +0 -0
  151. data/site/src/documentation/resources/images/icon-a.png +0 -0
  152. data/site/src/documentation/resources/images/icon-b.png +0 -0
  153. data/site/src/documentation/resources/images/icon.png +0 -0
  154. data/site/src/documentation/resources/images/my_email.png +0 -0
  155. data/site/src/documentation/resources/images/project-logo.png +0 -0
  156. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  157. data/site/src/documentation/resources/images/s.png +0 -0
  158. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  159. data/site/src/documentation/resources/images/s_wn.png +0 -0
  160. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  161. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  162. data/site/src/documentation/resources/images/t.png +0 -0
  163. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  164. data/site/src/documentation/resources/images/t_wn.png +0 -0
  165. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  166. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  167. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  168. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  169. data/site/src/documentation/sitemap.xmap +66 -0
  170. data/site/src/documentation/skinconf.xml +418 -0
  171. data/site/src/documentation/translations/langcode.xml +29 -0
  172. data/site/src/documentation/translations/languages_de.xml +24 -0
  173. data/site/src/documentation/translations/languages_en.xml +24 -0
  174. data/site/src/documentation/translations/languages_es.xml +22 -0
  175. data/site/src/documentation/translations/languages_fr.xml +24 -0
  176. data/site/src/documentation/translations/languages_nl.xml +24 -0
  177. data/site/src/documentation/translations/menu.xml +33 -0
  178. data/site/src/documentation/translations/menu_af.xml +33 -0
  179. data/site/src/documentation/translations/menu_de.xml +33 -0
  180. data/site/src/documentation/translations/menu_es.xml +33 -0
  181. data/site/src/documentation/translations/menu_fr.xml +33 -0
  182. data/site/src/documentation/translations/menu_it.xml +33 -0
  183. data/site/src/documentation/translations/menu_nl.xml +33 -0
  184. data/site/src/documentation/translations/menu_no.xml +33 -0
  185. data/site/src/documentation/translations/menu_ru.xml +33 -0
  186. data/site/src/documentation/translations/menu_sk.xml +33 -0
  187. data/site/src/documentation/translations/tabs.xml +22 -0
  188. data/site/src/documentation/translations/tabs_de.xml +22 -0
  189. data/site/src/documentation/translations/tabs_es.xml +22 -0
  190. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  191. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  192. data/test/decision_tree/id3_test.rb +209 -0
  193. data/test/genetic_algorithm/chromosome_test.rb +55 -0
  194. data/test/genetic_algorithm/genetic_algorithm_test.rb +78 -0
  195. data/test/neural_network/backpropagation_test.rb +44 -0
  196. metadata +274 -0
@@ -0,0 +1,68 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+
11
+ TRIANGLE = [
12
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
13
+ [ 0, 0, 0, 0, 0, 0, 1, 9, 9, 1, 0, 0, 0, 0, 0, 0],
14
+ [ 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0],
15
+ [ 0, 0, 0, 0, 0, 1, 9, 1, 1, 9, 1, 0, 0, 0, 0, 0],
16
+ [ 0, 0, 0, 0, 0, 5, 5, 0, 0, 5, 5, 0, 0, 0, 0, 0],
17
+ [ 0, 0, 0, 0, 1, 9, 1, 0, 0, 1, 9, 1, 0, 0, 0, 0],
18
+ [ 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0],
19
+ [ 0, 0, 0, 1, 9, 1, 0, 0, 0, 0, 1, 9, 1, 0, 0, 0],
20
+ [ 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0],
21
+ [ 0, 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0, 0],
22
+ [ 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0],
23
+ [ 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0],
24
+ [ 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0],
25
+ [ 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1],
26
+ [ 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5],
27
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
28
+ ]
29
+
30
+ SQUARE = [
31
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
32
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
33
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
34
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
35
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
36
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
37
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
38
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
39
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
40
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
41
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
42
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
43
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
44
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
45
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
46
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
47
+
48
+ ]
49
+
50
+ CROSS = [
51
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
52
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
53
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
54
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
55
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
56
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
57
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
58
+ [ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
59
+ [ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
60
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
61
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
62
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
63
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
64
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
65
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
66
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0]
67
+ ]
68
+
@@ -0,0 +1,354 @@
1
+
2
+ # Decision tree learning, used in data mining and machine learning,
3
+ # uses a decision tree as a predictive model which maps observations about
4
+ # an item to conclusions about the item's target value.
5
+ #
6
+ # In this module you will find an implementation of the ID3 algorithm (Quinlan)
7
+ #
8
+ # * http://en.wikipedia.org/wiki/Decision_tree
9
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
10
+ #
11
+ # Author:: Sergio Fierens
12
+ # License:: MPL 1.1
13
+ # Project:: ai4r
14
+ # Url:: http://ai4r.rubyforge.org/
15
+ #
16
+ # You can redistribute it and/or modify it under the terms of
17
+ # the Mozilla Public License version 1.1 as published by the
18
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
19
+
20
+ module DecisionTree
21
+
22
+ # = Introduction
23
+ # This is an implementation of the ID3 algorithm (Quinlan)
24
+ # Given a set of preclassified examples, it builds a top-down
25
+ # induction of decision tree, biased by the information gain and
26
+ # entropy measure.
27
+ #
28
+ # = How to use it
29
+ #
30
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
31
+ #
32
+ # DATA_SET = [ ['New York', '<30', 'M', 'Y'],
33
+ # ['Chicago', '<30', 'M', 'Y'],
34
+ # ['Chicago', '<30', 'F', 'Y'],
35
+ # ['New York', '<30', 'M', 'Y'],
36
+ # ['New York', '<30', 'M', 'Y'],
37
+ # ['Chicago', '[30-50)', 'M', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[30-50)', 'F', 'Y'],
40
+ # ['New York', '[30-50)', 'F', 'N'],
41
+ # ['Chicago', '[50-80]', 'M', 'N'],
42
+ # ['New York', '[50-80]', 'F', 'N'],
43
+ # ['New York', '[50-80]', 'M', 'N'],
44
+ # ['Chicago', '[50-80]', 'M', 'N'],
45
+ # ['New York', '[50-80]', 'F', 'N'],
46
+ # ['Chicago', '>80', 'F', 'Y']
47
+ # ]
48
+ #
49
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
50
+ #
51
+ # id3.to_s
52
+ # # => if age_range=='<30' then marketing_target='Y'
53
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
54
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
55
+ # elsif age_range=='[50-80]' then marketing_target='N'
56
+ # elsif age_range=='>80' then marketing_target='Y'
57
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
58
+ #
59
+ # id3.eval(['New York', '<30', 'M'])
60
+ # # => 'Y'
61
+ #
62
+ # = A better way to load the data
63
+ #
64
+ # In the real life you will use lot more data training examples, with more
65
+ # attributes. Consider moving your data to an external CSV (comma separate
66
+ # values) file.
67
+ #
68
+ # data_set = []
69
+ # CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
70
+ # data_set << row
71
+ # end
72
+ # data_labels = data_set.shift
73
+ #
74
+ # id3 = DecisionTree::ID3.new(data_set, data_labels)
75
+ #
76
+ # = A nice tip for data evaluation
77
+ #
78
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
79
+ # age_range = '<30'
80
+ # marketing_target = nil
81
+ # eval id3.to_s
82
+ # puts marketing_target
83
+ # # => 'Y'
84
+ # = More about ID3 and decision trees
85
+ #
86
+ # * http://en.wikipedia.org/wiki/Decision_tree
87
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
88
+ #
89
+ # = About the project
90
+ # Author:: Sergio Fierens
91
+ # License:: MPL 1.1
92
+
93
+ class ID3
94
+ attr_reader :data_labels
95
+ # Create a new decision tree. If your data is classified with N attributed
96
+ # and M examples, then your data examples must have the following format:
97
+ #
98
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
99
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
100
+ # ...
101
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
102
+ # ]
103
+ #
104
+ # e.g.
105
+ # [ ['New York', '<30', 'M', 'Y'],
106
+ # ['Chicago', '<30', 'M', 'Y'],
107
+ # ['Chicago', '<30', 'F', 'Y'],
108
+ # ['New York', '<30', 'M', 'Y'],
109
+ # ['New York', '<30', 'M', 'Y'],
110
+ # ['Chicago', '[30-50)', 'M', 'Y'],
111
+ # ['New York', '[30-50)', 'F', 'N'],
112
+ # ['Chicago', '[30-50)', 'F', 'Y'],
113
+ # ['New York', '[30-50)', 'F', 'N'],
114
+ # ['Chicago', '[50-80]', 'M', 'N'],
115
+ # ['New York', '[50-80]', 'F', 'N'],
116
+ # ['New York', '[50-80]', 'M', 'N'],
117
+ # ['Chicago', '[50-80]', 'M', 'N'],
118
+ # ['New York', '[50-80]', 'F', 'N'],
119
+ # ['Chicago', '>80', 'F', 'Y']
120
+ # ]
121
+ #
122
+ # Data labels must have the following format:
123
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
124
+ #
125
+ # If you do not provide labels for you data, the following labels will
126
+ # be created by default:
127
+ # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
128
+ #
129
+ def initialize(data_examples, data_labels=nil)
130
+ raise "Examples data set must not be empty." if !data_examples || data_examples.empty?
131
+ if !data_labels
132
+ data_labels = []
133
+ data_examples[0][0..-2].each_index do |i|
134
+ data_labels[i] = "ATTRIBUTE_#{i+1}"
135
+ end
136
+ data_labels[data_labels.length]="CATEGORY"
137
+ end
138
+ @data_labels = data_labels
139
+ preprocess_data(data_examples)
140
+ end
141
+
142
+ # You can evaluate new data, predicting its category.
143
+ # e.g.
144
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
145
+ def eval(data)
146
+ @tree.value(data)
147
+ end
148
+
149
+ # This method returns the generated rules in ruby code.
150
+ # e.g.
151
+ #
152
+ # id3.to_s
153
+ # # => if age_range=='<30' then marketing_target='Y'
154
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
155
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
156
+ # elsif age_range=='[50-80]' then marketing_target='N'
157
+ # elsif age_range=='>80' then marketing_target='Y'
158
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
159
+ #
160
+ # It is a nice way to inspect induction results, and also to execute them:
161
+ # age_range = '<30'
162
+ # marketing_target = nil
163
+ # eval id3.to_s
164
+ # puts marketing_target
165
+ # # => 'Y'
166
+ def to_s
167
+ rules = @tree.get_rules
168
+ rules = rules.collect do |rule|
169
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
170
+ end
171
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
172
+ end
173
+
174
+ private
175
+ def preprocess_data(data_examples)
176
+ @tree = build_node(data_examples)
177
+ end
178
+
179
+ private
180
+ def build_node(data_examples, flag_att = [])
181
+ return ErrorNode.new if data_examples.length == 0
182
+ domain = domain(data_examples)
183
+ return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
184
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
185
+ flag_att << min_entropy_index
186
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
187
+ return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
188
+ nodes = split_data_examples.collect do |partial_data_examples|
189
+ build_node(partial_data_examples, flag_att)
190
+ end
191
+ return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
192
+ end
193
+
194
+ private
195
+ def self.sum(values)
196
+ values.inject( 0 ) { |sum,x| sum+x }
197
+ end
198
+
199
+ private
200
+ def self.log2(z)
201
+ return 0.0 if z == 0
202
+ Math.log(z)/LOG2
203
+ end
204
+
205
+ private
206
+ def most_freq(examples, domain)
207
+ freqs = []
208
+ domain.last.length.times { freqs << 0}
209
+ examples.each do |example|
210
+ cat_index = domain.last.index(example.last)
211
+ freq = freqs[cat_index] + 1
212
+ freqs[cat_index] = freq
213
+ end
214
+ max_freq = freqs.max
215
+ max_freq_index = freqs.index(max_freq)
216
+ domain.last[max_freq_index]
217
+ end
218
+
219
+ private
220
+ def split_data_examples(data_examples, domain, att_index)
221
+ data_examples_array = []
222
+ att_value_examples = {}
223
+ data_examples.each do |example|
224
+ example_set = att_value_examples[example[att_index]]
225
+ example_set = [] if !example_set
226
+ example_set << example
227
+ att_value_examples.store(example[att_index], example_set)
228
+ end
229
+ att_value_examples.each_pair do |att_value, example_set|
230
+ att_value_index = domain[att_index].index(att_value)
231
+ data_examples_array[att_value_index] = example_set
232
+ end
233
+ return data_examples_array
234
+ end
235
+
236
+ private
237
+ def min_entropy_index(data_examples, domain, flag_att=[])
238
+ min_entropy = nil
239
+ min_index = 0
240
+ domain[0..-2].each_index do |index|
241
+ freq_grid = freq_grid(index, data_examples, domain)
242
+ entropy = entropy(freq_grid, data_examples.length)
243
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
244
+ min_entropy = entropy
245
+ min_index = index
246
+ end
247
+ end
248
+ return min_index
249
+ end
250
+
251
+ private
252
+ def domain(data_examples)
253
+ domain = []
254
+ @data_labels.length.times { domain << [] }
255
+ data_examples.each do |data|
256
+ data.each_index do |i|
257
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
258
+ end
259
+ end
260
+ return domain
261
+ end
262
+
263
+ private
264
+ def freq_grid(att_index, data_examples, domain)
265
+ #Initialize empty grid
266
+ grid_element = []
267
+ domain.last.length.times { grid_element << 0}
268
+ grid = []
269
+ domain[att_index].length.times { grid << grid_element.clone }
270
+ #Fill frecuency with grid
271
+ data_examples.each do |example|
272
+ att_val = example[att_index]
273
+ att_val_index = domain[att_index].index(att_val)
274
+ category = example.last
275
+ category_index = domain.last.index(category)
276
+ freq = grid[att_val_index][category_index] + 1
277
+ grid[att_val_index][category_index] = freq
278
+ end
279
+ return grid
280
+ end
281
+
282
+ private
283
+ def entropy(freq_grid, total_examples)
284
+ #Calc entropy of each element
285
+ entropy = 0
286
+ freq_grid.each do |att_freq|
287
+ att_total_freq = ID3.sum(att_freq)
288
+ partial_entropy = 0
289
+ if att_total_freq != 0
290
+ att_freq.each do |freq|
291
+ prop = freq.to_f/att_total_freq
292
+ partial_entropy += (-1*prop*ID3.log2(prop))
293
+ end
294
+ end
295
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
296
+ end
297
+ return entropy
298
+ end
299
+
300
+ private
301
+ LOG2 = Math.log(2)
302
+ end
303
+
304
+ class EvaluationNode
305
+ attr_reader :index, :values, :nodes
306
+ def initialize(data_labels, index, values, nodes)
307
+ @index = index
308
+ @values = values
309
+ @nodes = nodes
310
+ @data_labels = data_labels
311
+ end
312
+ def value(data)
313
+ value = data[@index]
314
+ return rule_not_found if !@values.include?(value)
315
+ return nodes[@values.index(value)].value(data)
316
+ end
317
+ def get_rules
318
+ rule_set = []
319
+ @nodes.each_index do |child_node_index|
320
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
321
+ child_node = @nodes[child_node_index]
322
+ child_node_rules = child_node.get_rules
323
+ child_node_rules.each do |child_rule|
324
+ child_rule.unshift(my_rule)
325
+ end
326
+ rule_set += child_node_rules
327
+ end
328
+ return rule_set
329
+ end
330
+ end
331
+
332
+ class CategoryNode
333
+ def initialize(label, value)
334
+ @label = label
335
+ @value = value
336
+ end
337
+ def value(data)
338
+ return @value
339
+ end
340
+ def get_rules
341
+ return [["#{@label}='#{@value}'"]]
342
+ end
343
+ end
344
+
345
+ class ErrorNode
346
+ def value(data)
347
+ raise "There was not enough information during training to do a proper induction for this data element."
348
+ end
349
+ def get_rules
350
+ return []
351
+ end
352
+ end
353
+
354
+ end
@@ -0,0 +1,268 @@
1
+ #
2
+ # The GeneticAlgorithm module implements the GeneticSearch and Chromosome
3
+ # classes. The GeneticSearch is a generic class, and can be used to solved
4
+ # any kind of problems. The GeneticSearch class performs a stochastic search
5
+ # of the solution of a given problem.
6
+ #
7
+ # The Chromosome is "problem specific". Ai4r built-in Chromosomeclass was
8
+ # designed to model the Travelling salesman problem. If you want to solve other
9
+ # type of problem, you will have to modify the Chromosome class, by overwriting
10
+ # its fitness, reproduce, and mutate functions, to model you specific problem.
11
+ #
12
+ # Author:: Sergio Fierens
13
+ # License:: MPL 1.1
14
+ # Project:: ai4r
15
+ # Url:: http://ai4r.rubyforge.org/
16
+ #
17
+ # You can redistribute it and/or modify it under the terms of
18
+ # the Mozilla Public License version 1.1 as published by the
19
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
20
+
21
+ module GeneticAlgorithm
22
+
23
+ # This class is used to automatically:
24
+ #
25
+ # 1. Choose initial population
26
+ # 2. Evaluate the fitness of each individual in the population
27
+ # 3. Repeat
28
+ # 1. Select best-ranking individuals to reproduce
29
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
30
+ # 3. Evaluate the individual fitnesses of the offspring
31
+ # 4. Replace worst ranked part of population with offspring
32
+ # 4. Until termination
33
+ #
34
+ # If you want to customize the algorithm, you must modify any of the following classes:
35
+ # - Chromosome
36
+ # - Population
37
+ class GeneticSearch
38
+
39
+ attr_accessor :population
40
+
41
+
42
+ def initialize(initial_population_size, generations)
43
+ @population_size = initial_population_size
44
+ @max_generation = generations
45
+ @generation = 0
46
+ end
47
+
48
+ # 1. Choose initial population
49
+ # 2. Evaluate the fitness of each individual in the population
50
+ # 3. Repeat
51
+ # 1. Select best-ranking individuals to reproduce
52
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
53
+ # 3. Evaluate the individual fitnesses of the offspring
54
+ # 4. Replace worst ranked part of population with offspring
55
+ # 4. Until termination
56
+ # 5. Return the best chromosome
57
+ def run
58
+ generate_initial_population #Generate initial population
59
+ @max_generation.times do
60
+ selected_to_breed = selection #Evaluates current population
61
+ offsprings = reproduction selected_to_breed #Generate the population for this new generation
62
+ replace_worst_ranked offsprings
63
+ end
64
+ return best_chromosome
65
+ end
66
+
67
+
68
+ def generate_initial_population
69
+ @population = []
70
+ @population_size.times do
71
+ population << Chromosome.seed
72
+ end
73
+ end
74
+
75
+ # Select best-ranking individuals to reproduce
76
+ #
77
+ # Selection is the stage of a genetic algorithm in which individual
78
+ # genomes are chosen from a population for later breeding.
79
+ # There are several generic selection algorithms, such as
80
+ # tournament selection and roulette wheel selection. We implemented the
81
+ # latest.
82
+ #
83
+ # Steps:
84
+ #
85
+ # 1. The fitness function is evaluated for each individual, providing fitness values
86
+ # 2. The population is sorted by descending fitness values.
87
+ # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
88
+ # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
89
+ # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
90
+ # 6. We repeat steps 4 and 5, 2/3 times the population size.
91
+ def selection
92
+ @population.sort! { |a, b| b.fitness <=> a.fitness}
93
+ best_fitness = @population[0].fitness
94
+ worst_fitness = @population.last.fitness
95
+ acum_fitness = 0
96
+ if best_fitness-worst_fitness > 0
97
+ @population.each do |chromosome|
98
+ chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
99
+ acum_fitness += chromosome.normalized_fitness
100
+ end
101
+ else
102
+ @population.each { |chromosome| chromosome.normalized_fitness = 1}
103
+ end
104
+ selected_to_breed = []
105
+ ((2*@population_size)/3).times do
106
+ selected_to_breed << select_random_individual(acum_fitness)
107
+ end
108
+ selected_to_breed
109
+ end
110
+
111
+ # We combine each pair of selected chromosome using the method
112
+ # Chromosome.reproduce
113
+ #
114
+ # The reproduction will also call the Chromosome.mutate method with
115
+ # each member of the population. You should implement Chromosome.mutate
116
+ # to only change (mutate) randomly. E.g. You could effectivly change the
117
+ # chromosome only if
118
+ # rand < ((1 - chromosome.normalized_fitness) * 0.4)
119
+ def reproduction(selected_to_breed)
120
+ offsprings = []
121
+ 0.upto(selected_to_breed.length/2-1) do |i|
122
+ offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
123
+ end
124
+ @population.each do |individual|
125
+ Chromosome.mutate(individual)
126
+ end
127
+ return offsprings
128
+ end
129
+
130
+ # Replace worst ranked part of population with offspring
131
+ def replace_worst_ranked(offsprings)
132
+ size = offsprings.length
133
+ @population = @population [0..((-1*size)-1)] + offsprings
134
+ end
135
+
136
+ # Select the best chromosome in the population
137
+ def best_chromosome
138
+ the_best = @population[0]
139
+ @population.each do |chromosome|
140
+ the_best = chromosome if chromosome.fitness > the_best.fitness
141
+ end
142
+ return the_best
143
+ end
144
+
145
+ private
146
+ def select_random_individual(acum_fitness)
147
+ select_random_target = acum_fitness * rand
148
+ local_acum = 0
149
+ @population.each do |chromosome|
150
+ local_acum += chromosome.normalized_fitness
151
+ return chromosome if local_acum >= select_random_target
152
+ end
153
+ end
154
+
155
+ end
156
+
157
+ # A Chromosome is a representation of an individual solutions for a specific
158
+ # problem. You will have to redifine you Chromosome representation for each
159
+ # particular problem, along with its fitness, mutate, reproduce, and seed
160
+ # functions.
161
+ class Chromosome
162
+
163
+ attr_accessor :data
164
+ attr_accessor :normalized_fitness
165
+
166
+ def initialize(data)
167
+ @data = data
168
+ end
169
+
170
+ # The fitness function quantifies the optimality of a solution
171
+ # (that is, a chromosome) in a genetic algorithm so that that particular
172
+ # chromosome may be ranked against all the other chromosomes.
173
+ #
174
+ # Optimal chromosomes, or at least chromosomes which are more optimal,
175
+ # are allowed to breed and mix their datasets by any of several techniques,
176
+ # producing a new generation that will (hopefully) be even better.
177
+ def fitness
178
+ return @fitness if @fitness
179
+ last_token = @data[0]
180
+ cost = 0
181
+ @data[1..-1].each do |token|
182
+ cost += @@costs[last_token][token]
183
+ last_token = token
184
+ end
185
+ @fitness = -1 * cost
186
+ return @fitness
187
+ end
188
+
189
+ # mutation is a function used to maintain genetic diversity from one
190
+ # generation of a population of chromosomes to the next. It is analogous
191
+ # to biological mutation.
192
+ #
193
+ # The purpose of mutation in GAs is to allow the
194
+ # algorithm to avoid local minima by preventing the population of
195
+ # chromosomes from becoming too similar to each other, thus slowing or even
196
+ # stopping evolution.
197
+ #
198
+ # Calling the mutate function will "probably" slightly change a chromosome
199
+ # randomly.
200
+ #
201
+ # This implementation of "mutation" will (probably) reverse the
202
+ # order of 2 consecutive randome nodes
203
+ # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
204
+ # ((1 - chromosome.normalized_fitness) * 0.4)
205
+ def self.mutate(chromosome)
206
+ if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
207
+ data = chromosome.data
208
+ index = rand(data.length-1)
209
+ data[index], data[index+1] = data[index+1], data[index]
210
+ chromosome.data = data
211
+ @fitness = nil
212
+ end
213
+ end
214
+
215
+ # Reproduction is used to vary the programming of a chromosome or
216
+ # chromosomes from one generation to the next. There are several ways to
217
+ # combine two chromosomes: One-point crossover, Two-point crossover,
218
+ # "Cut and splice", edge recombination, and more.
219
+ #
220
+ # The method is usually dependant of the problem domain.
221
+ # In this case, we have implemented edge recombination, wich is the
222
+ # most used reproduction algorithm for the Travelling salesman problem.
223
+ def self.reproduce(a, b)
224
+ data_size = @@costs[0].length
225
+ available = []
226
+ 0.upto(data_size-1) { |n| available << n }
227
+ token = a.data[0]
228
+ spawn = [token]
229
+ available.delete(token)
230
+ while available.length > 0 do
231
+ #Select next
232
+ if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
233
+ next_token = b.data[b.data.index(token)+1]
234
+ elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
235
+ next_token = a.data[a.data.index(token)+1]
236
+ else
237
+ next_token = available[rand(available.length)]
238
+ end
239
+ #Add to spawn
240
+ token = next_token
241
+ available.delete(token)
242
+ spawn << next_token
243
+ a, b = b, a if rand < 0.4
244
+ end
245
+ return Chromosome.new(spawn)
246
+ end
247
+
248
+ # Initializes an individual solution (chromosome) for the initial
249
+ # population. Usually the chromosome is generated randomly, but you can
250
+ # use some problem domain knowledge, to generate better initial solutions.
251
+ def self.seed
252
+ data_size = @@costs[0].length
253
+ available = []
254
+ 0.upto(data_size-1) { |n| available << n }
255
+ seed = []
256
+ while available.length > 0 do
257
+ index = rand(available.length)
258
+ seed << available.delete_at(index)
259
+ end
260
+ return Chromosome.new(seed)
261
+ end
262
+
263
+ def self.set_cost_matrix(costs)
264
+ @@costs = costs
265
+ end
266
+ end
267
+
268
+ end