ai4r 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. data/README.rdoc +48 -0
  2. data/examples/decision_trees/data_set.csv +121 -0
  3. data/examples/decision_trees/id3_example.rb +31 -0
  4. data/examples/decision_trees/results.txt +29 -0
  5. data/examples/genetic_algorithm/genetic_algorithm_example.rb +39 -0
  6. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  7. data/examples/neural_network/backpropagation_example.rb +65 -0
  8. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  9. data/examples/neural_network/patterns_with_noise.rb +66 -0
  10. data/examples/neural_network/training_patterns.rb +68 -0
  11. data/lib/decision_tree/id3.rb +354 -0
  12. data/lib/genetic_algorithm/genetic_algorithm.rb +268 -0
  13. data/lib/neural_network/backpropagation.rb +259 -0
  14. data/site/build/site/en/broken-links.xml +2 -0
  15. data/site/build/site/en/downloads.html +187 -0
  16. data/site/build/site/en/downloads.pdf +151 -0
  17. data/site/build/site/en/geneticAlgorithms.html +564 -0
  18. data/site/build/site/en/geneticAlgorithms.pdf +911 -0
  19. data/site/build/site/en/images/ai4r-logo.png +0 -0
  20. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  21. data/site/build/site/en/images/c.png +0 -0
  22. data/site/build/site/en/images/c_wbn.png +0 -0
  23. data/site/build/site/en/images/c_wn.png +0 -0
  24. data/site/build/site/en/images/ero.gif +0 -0
  25. data/site/build/site/en/images/europe2.png +0 -0
  26. data/site/build/site/en/images/europe3.png +0 -0
  27. data/site/build/site/en/images/fitness.png +0 -0
  28. data/site/build/site/en/images/instruction_arrow.png +0 -0
  29. data/site/build/site/en/images/my_email.png +0 -0
  30. data/site/build/site/en/images/rubyforge.png +0 -0
  31. data/site/build/site/en/images/s.png +0 -0
  32. data/site/build/site/en/images/s_wbn.png +0 -0
  33. data/site/build/site/en/images/s_wn.png +0 -0
  34. data/site/build/site/en/images/sigmoid.png +0 -0
  35. data/site/build/site/en/images/t.png +0 -0
  36. data/site/build/site/en/images/t_wbn.png +0 -0
  37. data/site/build/site/en/images/t_wn.png +0 -0
  38. data/site/build/site/en/index.html +258 -0
  39. data/site/build/site/en/index.pdf +306 -0
  40. data/site/build/site/en/linkmap.html +231 -0
  41. data/site/build/site/en/linkmap.pdf +94 -0
  42. data/site/build/site/en/locationmap.xml +72 -0
  43. data/site/build/site/en/machineLearning.html +325 -0
  44. data/site/build/site/en/machineLearning.pdf +337 -0
  45. data/site/build/site/en/neuralNetworks.html +446 -0
  46. data/site/build/site/en/neuralNetworks.pdf +604 -0
  47. data/site/build/site/en/skin/CommonMessages_de.xml +23 -0
  48. data/site/build/site/en/skin/CommonMessages_en_US.xml +23 -0
  49. data/site/build/site/en/skin/CommonMessages_es.xml +23 -0
  50. data/site/build/site/en/skin/CommonMessages_fr.xml +23 -0
  51. data/site/build/site/en/skin/basic.css +166 -0
  52. data/site/build/site/en/skin/breadcrumbs-optimized.js +90 -0
  53. data/site/build/site/en/skin/breadcrumbs.js +237 -0
  54. data/site/build/site/en/skin/fontsize.js +166 -0
  55. data/site/build/site/en/skin/getBlank.js +40 -0
  56. data/site/build/site/en/skin/getMenu.js +45 -0
  57. data/site/build/site/en/skin/images/README.txt +1 -0
  58. data/site/build/site/en/skin/images/add.jpg +0 -0
  59. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  60. data/site/build/site/en/skin/images/chapter.gif +0 -0
  61. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  62. data/site/build/site/en/skin/images/current.gif +0 -0
  63. data/site/build/site/en/skin/images/error.png +0 -0
  64. data/site/build/site/en/skin/images/external-link.gif +0 -0
  65. data/site/build/site/en/skin/images/fix.jpg +0 -0
  66. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  67. data/site/build/site/en/skin/images/hack.jpg +0 -0
  68. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  69. data/site/build/site/en/skin/images/info.png +0 -0
  70. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  71. data/site/build/site/en/skin/images/label.gif +0 -0
  72. data/site/build/site/en/skin/images/page.gif +0 -0
  73. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  74. data/site/build/site/en/skin/images/poddoc.png +0 -0
  75. data/site/build/site/en/skin/images/printer.gif +0 -0
  76. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  77. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  78. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  79. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  80. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  81. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  82. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  83. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  84. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  85. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  86. data/site/build/site/en/skin/images/remove.jpg +0 -0
  87. data/site/build/site/en/skin/images/rss.png +0 -0
  88. data/site/build/site/en/skin/images/spacer.gif +0 -0
  89. data/site/build/site/en/skin/images/success.png +0 -0
  90. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  91. data/site/build/site/en/skin/images/update.jpg +0 -0
  92. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  93. data/site/build/site/en/skin/images/vcss.png +0 -0
  94. data/site/build/site/en/skin/images/warning.png +0 -0
  95. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  96. data/site/build/site/en/skin/menu.js +48 -0
  97. data/site/build/site/en/skin/note.txt +50 -0
  98. data/site/build/site/en/skin/print.css +54 -0
  99. data/site/build/site/en/skin/profile.css +163 -0
  100. data/site/build/site/en/skin/prototype.js +1257 -0
  101. data/site/build/site/en/skin/screen.css +587 -0
  102. data/site/build/site/en/svn.html +223 -0
  103. data/site/build/site/en/svn.pdf +239 -0
  104. data/site/build/site/en/wholesite.pdf +1686 -0
  105. data/site/build/tmp/brokenlinks.xml +2 -0
  106. data/site/build/tmp/build-info.xml +5 -0
  107. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  108. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  109. data/site/build/tmp/input.xmap +32 -0
  110. data/site/build/tmp/internal.xmap +32 -0
  111. data/site/build/tmp/locationmap.xml +29 -0
  112. data/site/build/tmp/output.xmap +38 -0
  113. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -0
  114. data/site/build/tmp/plugins-1.xml +212 -0
  115. data/site/build/tmp/plugins-2.xml +347 -0
  116. data/site/build/tmp/projfilters.properties +41 -0
  117. data/site/build/tmp/resources.xmap +32 -0
  118. data/site/build/webapp/WEB-INF/logs/access.log +0 -0
  119. data/site/build/webapp/WEB-INF/logs/core.log +788 -0
  120. data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
  121. data/site/build/webapp/WEB-INF/logs/error.log +248 -0
  122. data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
  123. data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
  124. data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
  125. data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
  126. data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
  127. data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
  128. data/site/forrest.properties +152 -0
  129. data/site/forrest.properties.dispatcher.properties +25 -0
  130. data/site/forrest.properties.xml +29 -0
  131. data/site/src/documentation/README.txt +7 -0
  132. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  133. data/site/src/documentation/content/locationmap.xml +72 -0
  134. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  135. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +280 -0
  136. data/site/src/documentation/content/xdocs/index.xml +73 -0
  137. data/site/src/documentation/content/xdocs/machineLearning.xml +129 -0
  138. data/site/src/documentation/content/xdocs/neuralNetworks.xml +218 -0
  139. data/site/src/documentation/content/xdocs/site.xml +51 -0
  140. data/site/src/documentation/content/xdocs/svn.xml +31 -0
  141. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  142. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  143. data/site/src/documentation/resources/images/c.png +0 -0
  144. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  145. data/site/src/documentation/resources/images/c_wn.png +0 -0
  146. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  147. data/site/src/documentation/resources/images/ero.gif +0 -0
  148. data/site/src/documentation/resources/images/europe2.png +0 -0
  149. data/site/src/documentation/resources/images/europe3.png +0 -0
  150. data/site/src/documentation/resources/images/fitness.png +0 -0
  151. data/site/src/documentation/resources/images/icon-a.png +0 -0
  152. data/site/src/documentation/resources/images/icon-b.png +0 -0
  153. data/site/src/documentation/resources/images/icon.png +0 -0
  154. data/site/src/documentation/resources/images/my_email.png +0 -0
  155. data/site/src/documentation/resources/images/project-logo.png +0 -0
  156. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  157. data/site/src/documentation/resources/images/s.png +0 -0
  158. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  159. data/site/src/documentation/resources/images/s_wn.png +0 -0
  160. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  161. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  162. data/site/src/documentation/resources/images/t.png +0 -0
  163. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  164. data/site/src/documentation/resources/images/t_wn.png +0 -0
  165. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  166. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  167. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  168. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  169. data/site/src/documentation/sitemap.xmap +66 -0
  170. data/site/src/documentation/skinconf.xml +418 -0
  171. data/site/src/documentation/translations/langcode.xml +29 -0
  172. data/site/src/documentation/translations/languages_de.xml +24 -0
  173. data/site/src/documentation/translations/languages_en.xml +24 -0
  174. data/site/src/documentation/translations/languages_es.xml +22 -0
  175. data/site/src/documentation/translations/languages_fr.xml +24 -0
  176. data/site/src/documentation/translations/languages_nl.xml +24 -0
  177. data/site/src/documentation/translations/menu.xml +33 -0
  178. data/site/src/documentation/translations/menu_af.xml +33 -0
  179. data/site/src/documentation/translations/menu_de.xml +33 -0
  180. data/site/src/documentation/translations/menu_es.xml +33 -0
  181. data/site/src/documentation/translations/menu_fr.xml +33 -0
  182. data/site/src/documentation/translations/menu_it.xml +33 -0
  183. data/site/src/documentation/translations/menu_nl.xml +33 -0
  184. data/site/src/documentation/translations/menu_no.xml +33 -0
  185. data/site/src/documentation/translations/menu_ru.xml +33 -0
  186. data/site/src/documentation/translations/menu_sk.xml +33 -0
  187. data/site/src/documentation/translations/tabs.xml +22 -0
  188. data/site/src/documentation/translations/tabs_de.xml +22 -0
  189. data/site/src/documentation/translations/tabs_es.xml +22 -0
  190. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  191. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  192. data/test/decision_tree/id3_test.rb +209 -0
  193. data/test/genetic_algorithm/chromosome_test.rb +55 -0
  194. data/test/genetic_algorithm/genetic_algorithm_test.rb +78 -0
  195. data/test/neural_network/backpropagation_test.rb +44 -0
  196. metadata +274 -0
@@ -0,0 +1,68 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+
11
+ TRIANGLE = [
12
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
13
+ [ 0, 0, 0, 0, 0, 0, 1, 9, 9, 1, 0, 0, 0, 0, 0, 0],
14
+ [ 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0],
15
+ [ 0, 0, 0, 0, 0, 1, 9, 1, 1, 9, 1, 0, 0, 0, 0, 0],
16
+ [ 0, 0, 0, 0, 0, 5, 5, 0, 0, 5, 5, 0, 0, 0, 0, 0],
17
+ [ 0, 0, 0, 0, 1, 9, 1, 0, 0, 1, 9, 1, 0, 0, 0, 0],
18
+ [ 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0],
19
+ [ 0, 0, 0, 1, 9, 1, 0, 0, 0, 0, 1, 9, 1, 0, 0, 0],
20
+ [ 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0],
21
+ [ 0, 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0, 0],
22
+ [ 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0],
23
+ [ 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1, 0],
24
+ [ 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0],
25
+ [ 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 1],
26
+ [ 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5],
27
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
28
+ ]
29
+
30
+ SQUARE = [
31
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
32
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
33
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
34
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
35
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
36
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
37
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
38
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
39
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
40
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
41
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
42
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
43
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
44
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
45
+ [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10],
46
+ [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
47
+
48
+ ]
49
+
50
+ CROSS = [
51
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
52
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
53
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
54
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
55
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
56
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
57
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
58
+ [ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
59
+ [ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
60
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
61
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
62
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
63
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
64
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
65
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0],
66
+ [ 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0]
67
+ ]
68
+
@@ -0,0 +1,354 @@
1
+
2
+ # Decision tree learning, used in data mining and machine learning,
3
+ # uses a decision tree as a predictive model which maps observations about
4
+ # an item to conclusions about the item's target value.
5
+ #
6
+ # In this module you will find an implementation of the ID3 algorithm (Quinlan)
7
+ #
8
+ # * http://en.wikipedia.org/wiki/Decision_tree
9
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
10
+ #
11
+ # Author:: Sergio Fierens
12
+ # License:: MPL 1.1
13
+ # Project:: ai4r
14
+ # Url:: http://ai4r.rubyforge.org/
15
+ #
16
+ # You can redistribute it and/or modify it under the terms of
17
+ # the Mozilla Public License version 1.1 as published by the
18
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
19
+
20
+ module DecisionTree
21
+
22
+ # = Introduction
23
+ # This is an implementation of the ID3 algorithm (Quinlan)
24
+ # Given a set of preclassified examples, it builds a top-down
25
+ # induction of decision tree, biased by the information gain and
26
+ # entropy measure.
27
+ #
28
+ # = How to use it
29
+ #
30
+ # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
31
+ #
32
+ # DATA_SET = [ ['New York', '<30', 'M', 'Y'],
33
+ # ['Chicago', '<30', 'M', 'Y'],
34
+ # ['Chicago', '<30', 'F', 'Y'],
35
+ # ['New York', '<30', 'M', 'Y'],
36
+ # ['New York', '<30', 'M', 'Y'],
37
+ # ['Chicago', '[30-50)', 'M', 'Y'],
38
+ # ['New York', '[30-50)', 'F', 'N'],
39
+ # ['Chicago', '[30-50)', 'F', 'Y'],
40
+ # ['New York', '[30-50)', 'F', 'N'],
41
+ # ['Chicago', '[50-80]', 'M', 'N'],
42
+ # ['New York', '[50-80]', 'F', 'N'],
43
+ # ['New York', '[50-80]', 'M', 'N'],
44
+ # ['Chicago', '[50-80]', 'M', 'N'],
45
+ # ['New York', '[50-80]', 'F', 'N'],
46
+ # ['Chicago', '>80', 'F', 'Y']
47
+ # ]
48
+ #
49
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
50
+ #
51
+ # id3.to_s
52
+ # # => if age_range=='<30' then marketing_target='Y'
53
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
54
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
55
+ # elsif age_range=='[50-80]' then marketing_target='N'
56
+ # elsif age_range=='>80' then marketing_target='Y'
57
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
58
+ #
59
+ # id3.eval(['New York', '<30', 'M'])
60
+ # # => 'Y'
61
+ #
62
+ # = A better way to load the data
63
+ #
64
+ # In the real life you will use lot more data training examples, with more
65
+ # attributes. Consider moving your data to an external CSV (comma separate
66
+ # values) file.
67
+ #
68
+ # data_set = []
69
+ # CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
70
+ # data_set << row
71
+ # end
72
+ # data_labels = data_set.shift
73
+ #
74
+ # id3 = DecisionTree::ID3.new(data_set, data_labels)
75
+ #
76
+ # = A nice tip for data evaluation
77
+ #
78
+ # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
79
+ # age_range = '<30'
80
+ # marketing_target = nil
81
+ # eval id3.to_s
82
+ # puts marketing_target
83
+ # # => 'Y'
84
+ # = More about ID3 and decision trees
85
+ #
86
+ # * http://en.wikipedia.org/wiki/Decision_tree
87
+ # * http://en.wikipedia.org/wiki/ID3_algorithm
88
+ #
89
+ # = About the project
90
+ # Author:: Sergio Fierens
91
+ # License:: MPL 1.1
92
+
93
+ class ID3
94
+ attr_reader :data_labels
95
+ # Create a new decision tree. If your data is classified with N attributed
96
+ # and M examples, then your data examples must have the following format:
97
+ #
98
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
99
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
100
+ # ...
101
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
102
+ # ]
103
+ #
104
+ # e.g.
105
+ # [ ['New York', '<30', 'M', 'Y'],
106
+ # ['Chicago', '<30', 'M', 'Y'],
107
+ # ['Chicago', '<30', 'F', 'Y'],
108
+ # ['New York', '<30', 'M', 'Y'],
109
+ # ['New York', '<30', 'M', 'Y'],
110
+ # ['Chicago', '[30-50)', 'M', 'Y'],
111
+ # ['New York', '[30-50)', 'F', 'N'],
112
+ # ['Chicago', '[30-50)', 'F', 'Y'],
113
+ # ['New York', '[30-50)', 'F', 'N'],
114
+ # ['Chicago', '[50-80]', 'M', 'N'],
115
+ # ['New York', '[50-80]', 'F', 'N'],
116
+ # ['New York', '[50-80]', 'M', 'N'],
117
+ # ['Chicago', '[50-80]', 'M', 'N'],
118
+ # ['New York', '[50-80]', 'F', 'N'],
119
+ # ['Chicago', '>80', 'F', 'Y']
120
+ # ]
121
+ #
122
+ # Data labels must have the following format:
123
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
124
+ #
125
+ # If you do not provide labels for you data, the following labels will
126
+ # be created by default:
127
+ # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
128
+ #
129
+ def initialize(data_examples, data_labels=nil)
130
+ raise "Examples data set must not be empty." if !data_examples || data_examples.empty?
131
+ if !data_labels
132
+ data_labels = []
133
+ data_examples[0][0..-2].each_index do |i|
134
+ data_labels[i] = "ATTRIBUTE_#{i+1}"
135
+ end
136
+ data_labels[data_labels.length]="CATEGORY"
137
+ end
138
+ @data_labels = data_labels
139
+ preprocess_data(data_examples)
140
+ end
141
+
142
+ # You can evaluate new data, predicting its category.
143
+ # e.g.
144
+ # id3.eval(['New York', '<30', 'F']) # => 'Y'
145
+ def eval(data)
146
+ @tree.value(data)
147
+ end
148
+
149
+ # This method returns the generated rules in ruby code.
150
+ # e.g.
151
+ #
152
+ # id3.to_s
153
+ # # => if age_range=='<30' then marketing_target='Y'
154
+ # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
155
+ # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
156
+ # elsif age_range=='[50-80]' then marketing_target='N'
157
+ # elsif age_range=='>80' then marketing_target='Y'
158
+ # else raise 'There was not enough information during training to do a proper induction for this data element' end
159
+ #
160
+ # It is a nice way to inspect induction results, and also to execute them:
161
+ # age_range = '<30'
162
+ # marketing_target = nil
163
+ # eval id3.to_s
164
+ # puts marketing_target
165
+ # # => 'Y'
166
+ def to_s
167
+ rules = @tree.get_rules
168
+ rules = rules.collect do |rule|
169
+ "#{rule[0..-2].join(' and ')} then #{rule.last}"
170
+ end
171
+ return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
172
+ end
173
+
174
+ private
175
+ def preprocess_data(data_examples)
176
+ @tree = build_node(data_examples)
177
+ end
178
+
179
+ private
180
+ def build_node(data_examples, flag_att = [])
181
+ return ErrorNode.new if data_examples.length == 0
182
+ domain = domain(data_examples)
183
+ return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
184
+ min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
185
+ flag_att << min_entropy_index
186
+ split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
187
+ return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
188
+ nodes = split_data_examples.collect do |partial_data_examples|
189
+ build_node(partial_data_examples, flag_att)
190
+ end
191
+ return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
192
+ end
193
+
194
+ private
195
+ def self.sum(values)
196
+ values.inject( 0 ) { |sum,x| sum+x }
197
+ end
198
+
199
+ private
200
+ def self.log2(z)
201
+ return 0.0 if z == 0
202
+ Math.log(z)/LOG2
203
+ end
204
+
205
+ private
206
+ def most_freq(examples, domain)
207
+ freqs = []
208
+ domain.last.length.times { freqs << 0}
209
+ examples.each do |example|
210
+ cat_index = domain.last.index(example.last)
211
+ freq = freqs[cat_index] + 1
212
+ freqs[cat_index] = freq
213
+ end
214
+ max_freq = freqs.max
215
+ max_freq_index = freqs.index(max_freq)
216
+ domain.last[max_freq_index]
217
+ end
218
+
219
+ private
220
+ def split_data_examples(data_examples, domain, att_index)
221
+ data_examples_array = []
222
+ att_value_examples = {}
223
+ data_examples.each do |example|
224
+ example_set = att_value_examples[example[att_index]]
225
+ example_set = [] if !example_set
226
+ example_set << example
227
+ att_value_examples.store(example[att_index], example_set)
228
+ end
229
+ att_value_examples.each_pair do |att_value, example_set|
230
+ att_value_index = domain[att_index].index(att_value)
231
+ data_examples_array[att_value_index] = example_set
232
+ end
233
+ return data_examples_array
234
+ end
235
+
236
+ private
237
+ def min_entropy_index(data_examples, domain, flag_att=[])
238
+ min_entropy = nil
239
+ min_index = 0
240
+ domain[0..-2].each_index do |index|
241
+ freq_grid = freq_grid(index, data_examples, domain)
242
+ entropy = entropy(freq_grid, data_examples.length)
243
+ if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
244
+ min_entropy = entropy
245
+ min_index = index
246
+ end
247
+ end
248
+ return min_index
249
+ end
250
+
251
+ private
252
+ def domain(data_examples)
253
+ domain = []
254
+ @data_labels.length.times { domain << [] }
255
+ data_examples.each do |data|
256
+ data.each_index do |i|
257
+ domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
258
+ end
259
+ end
260
+ return domain
261
+ end
262
+
263
+ private
264
+ def freq_grid(att_index, data_examples, domain)
265
+ #Initialize empty grid
266
+ grid_element = []
267
+ domain.last.length.times { grid_element << 0}
268
+ grid = []
269
+ domain[att_index].length.times { grid << grid_element.clone }
270
+ #Fill frecuency with grid
271
+ data_examples.each do |example|
272
+ att_val = example[att_index]
273
+ att_val_index = domain[att_index].index(att_val)
274
+ category = example.last
275
+ category_index = domain.last.index(category)
276
+ freq = grid[att_val_index][category_index] + 1
277
+ grid[att_val_index][category_index] = freq
278
+ end
279
+ return grid
280
+ end
281
+
282
+ private
283
+ def entropy(freq_grid, total_examples)
284
+ #Calc entropy of each element
285
+ entropy = 0
286
+ freq_grid.each do |att_freq|
287
+ att_total_freq = ID3.sum(att_freq)
288
+ partial_entropy = 0
289
+ if att_total_freq != 0
290
+ att_freq.each do |freq|
291
+ prop = freq.to_f/att_total_freq
292
+ partial_entropy += (-1*prop*ID3.log2(prop))
293
+ end
294
+ end
295
+ entropy += (att_total_freq.to_f/total_examples) * partial_entropy
296
+ end
297
+ return entropy
298
+ end
299
+
300
+ private
301
+ LOG2 = Math.log(2)
302
+ end
303
+
304
+ class EvaluationNode
305
+ attr_reader :index, :values, :nodes
306
+ def initialize(data_labels, index, values, nodes)
307
+ @index = index
308
+ @values = values
309
+ @nodes = nodes
310
+ @data_labels = data_labels
311
+ end
312
+ def value(data)
313
+ value = data[@index]
314
+ return rule_not_found if !@values.include?(value)
315
+ return nodes[@values.index(value)].value(data)
316
+ end
317
+ def get_rules
318
+ rule_set = []
319
+ @nodes.each_index do |child_node_index|
320
+ my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
321
+ child_node = @nodes[child_node_index]
322
+ child_node_rules = child_node.get_rules
323
+ child_node_rules.each do |child_rule|
324
+ child_rule.unshift(my_rule)
325
+ end
326
+ rule_set += child_node_rules
327
+ end
328
+ return rule_set
329
+ end
330
+ end
331
+
332
+ class CategoryNode
333
+ def initialize(label, value)
334
+ @label = label
335
+ @value = value
336
+ end
337
+ def value(data)
338
+ return @value
339
+ end
340
+ def get_rules
341
+ return [["#{@label}='#{@value}'"]]
342
+ end
343
+ end
344
+
345
+ class ErrorNode
346
+ def value(data)
347
+ raise "There was not enough information during training to do a proper induction for this data element."
348
+ end
349
+ def get_rules
350
+ return []
351
+ end
352
+ end
353
+
354
+ end
@@ -0,0 +1,268 @@
1
+ #
2
+ # The GeneticAlgorithm module implements the GeneticSearch and Chromosome
3
+ # classes. The GeneticSearch is a generic class, and can be used to solved
4
+ # any kind of problems. The GeneticSearch class performs a stochastic search
5
+ # of the solution of a given problem.
6
+ #
7
+ # The Chromosome is "problem specific". Ai4r built-in Chromosomeclass was
8
+ # designed to model the Travelling salesman problem. If you want to solve other
9
+ # type of problem, you will have to modify the Chromosome class, by overwriting
10
+ # its fitness, reproduce, and mutate functions, to model you specific problem.
11
+ #
12
+ # Author:: Sergio Fierens
13
+ # License:: MPL 1.1
14
+ # Project:: ai4r
15
+ # Url:: http://ai4r.rubyforge.org/
16
+ #
17
+ # You can redistribute it and/or modify it under the terms of
18
+ # the Mozilla Public License version 1.1 as published by the
19
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
20
+
21
+ module GeneticAlgorithm
22
+
23
+ # This class is used to automatically:
24
+ #
25
+ # 1. Choose initial population
26
+ # 2. Evaluate the fitness of each individual in the population
27
+ # 3. Repeat
28
+ # 1. Select best-ranking individuals to reproduce
29
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
30
+ # 3. Evaluate the individual fitnesses of the offspring
31
+ # 4. Replace worst ranked part of population with offspring
32
+ # 4. Until termination
33
+ #
34
+ # If you want to customize the algorithm, you must modify any of the following classes:
35
+ # - Chromosome
36
+ # - Population
37
+ class GeneticSearch
38
+
39
+ attr_accessor :population
40
+
41
+
42
+ def initialize(initial_population_size, generations)
43
+ @population_size = initial_population_size
44
+ @max_generation = generations
45
+ @generation = 0
46
+ end
47
+
48
+ # 1. Choose initial population
49
+ # 2. Evaluate the fitness of each individual in the population
50
+ # 3. Repeat
51
+ # 1. Select best-ranking individuals to reproduce
52
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
53
+ # 3. Evaluate the individual fitnesses of the offspring
54
+ # 4. Replace worst ranked part of population with offspring
55
+ # 4. Until termination
56
+ # 5. Return the best chromosome
57
+ def run
58
+ generate_initial_population #Generate initial population
59
+ @max_generation.times do
60
+ selected_to_breed = selection #Evaluates current population
61
+ offsprings = reproduction selected_to_breed #Generate the population for this new generation
62
+ replace_worst_ranked offsprings
63
+ end
64
+ return best_chromosome
65
+ end
66
+
67
+
68
+ def generate_initial_population
69
+ @population = []
70
+ @population_size.times do
71
+ population << Chromosome.seed
72
+ end
73
+ end
74
+
75
+ # Select best-ranking individuals to reproduce
76
+ #
77
+ # Selection is the stage of a genetic algorithm in which individual
78
+ # genomes are chosen from a population for later breeding.
79
+ # There are several generic selection algorithms, such as
80
+ # tournament selection and roulette wheel selection. We implemented the
81
+ # latest.
82
+ #
83
+ # Steps:
84
+ #
85
+ # 1. The fitness function is evaluated for each individual, providing fitness values
86
+ # 2. The population is sorted by descending fitness values.
87
+ # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
88
+ # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
89
+ # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
90
+ # 6. We repeat steps 4 and 5, 2/3 times the population size.
91
+ def selection
92
+ @population.sort! { |a, b| b.fitness <=> a.fitness}
93
+ best_fitness = @population[0].fitness
94
+ worst_fitness = @population.last.fitness
95
+ acum_fitness = 0
96
+ if best_fitness-worst_fitness > 0
97
+ @population.each do |chromosome|
98
+ chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
99
+ acum_fitness += chromosome.normalized_fitness
100
+ end
101
+ else
102
+ @population.each { |chromosome| chromosome.normalized_fitness = 1}
103
+ end
104
+ selected_to_breed = []
105
+ ((2*@population_size)/3).times do
106
+ selected_to_breed << select_random_individual(acum_fitness)
107
+ end
108
+ selected_to_breed
109
+ end
110
+
111
+ # We combine each pair of selected chromosome using the method
112
+ # Chromosome.reproduce
113
+ #
114
+ # The reproduction will also call the Chromosome.mutate method with
115
+ # each member of the population. You should implement Chromosome.mutate
116
+ # to only change (mutate) randomly. E.g. You could effectivly change the
117
+ # chromosome only if
118
+ # rand < ((1 - chromosome.normalized_fitness) * 0.4)
119
+ def reproduction(selected_to_breed)
120
+ offsprings = []
121
+ 0.upto(selected_to_breed.length/2-1) do |i|
122
+ offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
123
+ end
124
+ @population.each do |individual|
125
+ Chromosome.mutate(individual)
126
+ end
127
+ return offsprings
128
+ end
129
+
130
+ # Replace worst ranked part of population with offspring
131
+ def replace_worst_ranked(offsprings)
132
+ size = offsprings.length
133
+ @population = @population [0..((-1*size)-1)] + offsprings
134
+ end
135
+
136
+ # Select the best chromosome in the population
137
+ def best_chromosome
138
+ the_best = @population[0]
139
+ @population.each do |chromosome|
140
+ the_best = chromosome if chromosome.fitness > the_best.fitness
141
+ end
142
+ return the_best
143
+ end
144
+
145
+ private
146
+ def select_random_individual(acum_fitness)
147
+ select_random_target = acum_fitness * rand
148
+ local_acum = 0
149
+ @population.each do |chromosome|
150
+ local_acum += chromosome.normalized_fitness
151
+ return chromosome if local_acum >= select_random_target
152
+ end
153
+ end
154
+
155
+ end
156
+
157
+ # A Chromosome is a representation of an individual solutions for a specific
158
+ # problem. You will have to redifine you Chromosome representation for each
159
+ # particular problem, along with its fitness, mutate, reproduce, and seed
160
+ # functions.
161
+ class Chromosome
162
+
163
+ attr_accessor :data
164
+ attr_accessor :normalized_fitness
165
+
166
+ def initialize(data)
167
+ @data = data
168
+ end
169
+
170
+ # The fitness function quantifies the optimality of a solution
171
+ # (that is, a chromosome) in a genetic algorithm so that that particular
172
+ # chromosome may be ranked against all the other chromosomes.
173
+ #
174
+ # Optimal chromosomes, or at least chromosomes which are more optimal,
175
+ # are allowed to breed and mix their datasets by any of several techniques,
176
+ # producing a new generation that will (hopefully) be even better.
177
+ def fitness
178
+ return @fitness if @fitness
179
+ last_token = @data[0]
180
+ cost = 0
181
+ @data[1..-1].each do |token|
182
+ cost += @@costs[last_token][token]
183
+ last_token = token
184
+ end
185
+ @fitness = -1 * cost
186
+ return @fitness
187
+ end
188
+
189
+ # mutation is a function used to maintain genetic diversity from one
190
+ # generation of a population of chromosomes to the next. It is analogous
191
+ # to biological mutation.
192
+ #
193
+ # The purpose of mutation in GAs is to allow the
194
+ # algorithm to avoid local minima by preventing the population of
195
+ # chromosomes from becoming too similar to each other, thus slowing or even
196
+ # stopping evolution.
197
+ #
198
+ # Calling the mutate function will "probably" slightly change a chromosome
199
+ # randomly.
200
+ #
201
+ # This implementation of "mutation" will (probably) reverse the
202
+ # order of 2 consecutive randome nodes
203
+ # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
204
+ # ((1 - chromosome.normalized_fitness) * 0.4)
205
+ def self.mutate(chromosome)
206
+ if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
207
+ data = chromosome.data
208
+ index = rand(data.length-1)
209
+ data[index], data[index+1] = data[index+1], data[index]
210
+ chromosome.data = data
211
+ @fitness = nil
212
+ end
213
+ end
214
+
215
+ # Reproduction is used to vary the programming of a chromosome or
216
+ # chromosomes from one generation to the next. There are several ways to
217
+ # combine two chromosomes: One-point crossover, Two-point crossover,
218
+ # "Cut and splice", edge recombination, and more.
219
+ #
220
+ # The method is usually dependant of the problem domain.
221
+ # In this case, we have implemented edge recombination, wich is the
222
+ # most used reproduction algorithm for the Travelling salesman problem.
223
+ def self.reproduce(a, b)
224
+ data_size = @@costs[0].length
225
+ available = []
226
+ 0.upto(data_size-1) { |n| available << n }
227
+ token = a.data[0]
228
+ spawn = [token]
229
+ available.delete(token)
230
+ while available.length > 0 do
231
+ #Select next
232
+ if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
233
+ next_token = b.data[b.data.index(token)+1]
234
+ elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
235
+ next_token = a.data[a.data.index(token)+1]
236
+ else
237
+ next_token = available[rand(available.length)]
238
+ end
239
+ #Add to spawn
240
+ token = next_token
241
+ available.delete(token)
242
+ spawn << next_token
243
+ a, b = b, a if rand < 0.4
244
+ end
245
+ return Chromosome.new(spawn)
246
+ end
247
+
248
+ # Initializes an individual solution (chromosome) for the initial
249
+ # population. Usually the chromosome is generated randomly, but you can
250
+ # use some problem domain knowledge, to generate better initial solutions.
251
+ def self.seed
252
+ data_size = @@costs[0].length
253
+ available = []
254
+ 0.upto(data_size-1) { |n| available << n }
255
+ seed = []
256
+ while available.length > 0 do
257
+ index = rand(available.length)
258
+ seed << available.delete_at(index)
259
+ end
260
+ return Chromosome.new(seed)
261
+ end
262
+
263
+ def self.set_cost_matrix(costs)
264
+ @@costs = costs
265
+ end
266
+ end
267
+
268
+ end