ai4r 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. data/README.rdoc +21 -20
  2. data/examples/decision_trees/id3_example.rb +3 -2
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
  4. data/examples/neural_network/backpropagation_example.rb +2 -2
  5. data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
  6. data/lib/ai4r/classifiers/id3.rb +356 -0
  7. data/lib/ai4r/classifiers/one_r.rb +148 -0
  8. data/lib/ai4r/classifiers/prism.rb +231 -0
  9. data/lib/ai4r/classifiers/zero_r.rb +104 -0
  10. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
  11. data/lib/ai4r/neural_network/backpropagation.rb +271 -0
  12. data/site/build/tmp/locationmap.xml +14 -14
  13. data/site/build/tmp/output.xmap +23 -23
  14. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  15. data/site/build/tmp/plugins-1.xml +0 -11
  16. data/site/build/tmp/plugins-2.xml +54 -0
  17. data/site/build/tmp/projfilters.properties +41 -41
  18. data/site/build/webapp/WEB-INF/logs/core.log +681 -788
  19. data/site/build/webapp/WEB-INF/logs/error.log +281 -248
  20. data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
  21. data/site/src/documentation/content/xdocs/forum.html +9 -0
  22. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
  23. data/site/src/documentation/content/xdocs/index.xml +47 -18
  24. data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
  25. data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
  26. data/site/src/documentation/content/xdocs/site.xml +8 -5
  27. data/site/src/documentation/content/xdocs/svn.xml +11 -1
  28. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  29. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  30. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  31. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  32. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  33. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
  34. data/site/src/documentation/skinconf.xml +18 -18
  35. data/test/classifiers/id3_test.rb +206 -0
  36. data/test/classifiers/one_r_test.rb +62 -0
  37. data/test/classifiers/prism_test.rb +83 -0
  38. data/test/classifiers/zero_r_test.rb +48 -0
  39. data/test/genetic_algorithm/chromosome_test.rb +41 -38
  40. data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
  41. data/test/neural_network/backpropagation_test.rb +20 -18
  42. metadata +109 -199
  43. data/lib/decision_tree/id3.rb +0 -354
  44. data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
  45. data/lib/neural_network/backpropagation.rb +0 -264
  46. data/site/build/site/en/broken-links.xml +0 -2
  47. data/site/build/site/en/downloads.html +0 -187
  48. data/site/build/site/en/downloads.pdf +0 -151
  49. data/site/build/site/en/geneticAlgorithms.html +0 -564
  50. data/site/build/site/en/geneticAlgorithms.pdf +0 -911
  51. data/site/build/site/en/images/ai4r-logo.png +0 -0
  52. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  53. data/site/build/site/en/images/c.png +0 -0
  54. data/site/build/site/en/images/c_wbn.png +0 -0
  55. data/site/build/site/en/images/c_wn.png +0 -0
  56. data/site/build/site/en/images/ero.gif +0 -0
  57. data/site/build/site/en/images/europe2.png +0 -0
  58. data/site/build/site/en/images/europe3.png +0 -0
  59. data/site/build/site/en/images/fitness.png +0 -0
  60. data/site/build/site/en/images/instruction_arrow.png +0 -0
  61. data/site/build/site/en/images/my_email.png +0 -0
  62. data/site/build/site/en/images/rubyforge.png +0 -0
  63. data/site/build/site/en/images/s.png +0 -0
  64. data/site/build/site/en/images/s_wbn.png +0 -0
  65. data/site/build/site/en/images/s_wn.png +0 -0
  66. data/site/build/site/en/images/sigmoid.png +0 -0
  67. data/site/build/site/en/images/t.png +0 -0
  68. data/site/build/site/en/images/t_wbn.png +0 -0
  69. data/site/build/site/en/images/t_wn.png +0 -0
  70. data/site/build/site/en/index.html +0 -258
  71. data/site/build/site/en/index.pdf +0 -306
  72. data/site/build/site/en/linkmap.html +0 -231
  73. data/site/build/site/en/linkmap.pdf +0 -94
  74. data/site/build/site/en/locationmap.xml +0 -72
  75. data/site/build/site/en/machineLearning.html +0 -325
  76. data/site/build/site/en/machineLearning.pdf +0 -337
  77. data/site/build/site/en/neuralNetworks.html +0 -446
  78. data/site/build/site/en/neuralNetworks.pdf +0 -604
  79. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  80. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  81. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  82. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  83. data/site/build/site/en/skin/basic.css +0 -166
  84. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  85. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  86. data/site/build/site/en/skin/fontsize.js +0 -166
  87. data/site/build/site/en/skin/getBlank.js +0 -40
  88. data/site/build/site/en/skin/getMenu.js +0 -45
  89. data/site/build/site/en/skin/images/README.txt +0 -1
  90. data/site/build/site/en/skin/images/add.jpg +0 -0
  91. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  92. data/site/build/site/en/skin/images/chapter.gif +0 -0
  93. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  94. data/site/build/site/en/skin/images/current.gif +0 -0
  95. data/site/build/site/en/skin/images/error.png +0 -0
  96. data/site/build/site/en/skin/images/external-link.gif +0 -0
  97. data/site/build/site/en/skin/images/fix.jpg +0 -0
  98. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  99. data/site/build/site/en/skin/images/hack.jpg +0 -0
  100. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  101. data/site/build/site/en/skin/images/info.png +0 -0
  102. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  103. data/site/build/site/en/skin/images/label.gif +0 -0
  104. data/site/build/site/en/skin/images/page.gif +0 -0
  105. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  106. data/site/build/site/en/skin/images/poddoc.png +0 -0
  107. data/site/build/site/en/skin/images/printer.gif +0 -0
  108. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  109. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  110. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  111. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  112. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  113. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  114. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  115. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  116. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  117. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  118. data/site/build/site/en/skin/images/remove.jpg +0 -0
  119. data/site/build/site/en/skin/images/rss.png +0 -0
  120. data/site/build/site/en/skin/images/spacer.gif +0 -0
  121. data/site/build/site/en/skin/images/success.png +0 -0
  122. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  123. data/site/build/site/en/skin/images/update.jpg +0 -0
  124. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  125. data/site/build/site/en/skin/images/vcss.png +0 -0
  126. data/site/build/site/en/skin/images/warning.png +0 -0
  127. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  128. data/site/build/site/en/skin/menu.js +0 -48
  129. data/site/build/site/en/skin/note.txt +0 -50
  130. data/site/build/site/en/skin/print.css +0 -54
  131. data/site/build/site/en/skin/profile.css +0 -163
  132. data/site/build/site/en/skin/prototype.js +0 -1257
  133. data/site/build/site/en/skin/screen.css +0 -587
  134. data/site/build/site/en/svn.html +0 -223
  135. data/site/build/site/en/svn.pdf +0 -239
  136. data/site/build/site/en/wholesite.pdf +0 -1686
  137. data/site/build/tmp/brokenlinks.xml +0 -2
  138. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  139. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  140. data/test/decision_tree/id3_test.rb +0 -209
@@ -0,0 +1,148 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require 'set'
11
+ require File.dirname(__FILE__) + '/classifier_helper'
12
+
13
+ module Ai4r
14
+ module Classifiers
15
+
16
+ # = Introduction
17
+ #
18
+ # The idea of the OneR algorithm is identify the single
19
+ # attribute to use to classify data that makes
20
+ # fewest prediction errors.
21
+ # It generates rules based on a single attribute.
22
+ class OneR
23
+
24
+ attr_accessor :data_labels, :rule
25
+ include ClassifierHelper
26
+
27
+ # Build a new OneR classifier. If your data is classified with N attributed
28
+ # and M examples, then your data examples must have the following format:
29
+ #
30
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
+ # ...
33
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
+ # ]
35
+ #
36
+ # e.g.
37
+ # [ ['New York', '<30', 'M', 'Y'],
38
+ # ['Chicago', '<30', 'M', 'Y'],
39
+ # ['Chicago', '<30', 'F', 'Y'],
40
+ # ['New York', '<30', 'M', 'Y'],
41
+ # ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '[30-50)', 'M', 'Y'],
43
+ # ['New York', '[30-50)', 'F', 'N'],
44
+ # ['Chicago', '[30-50)', 'F', 'Y'],
45
+ # ['New York', '[30-50)', 'F', 'N'],
46
+ # ['Chicago', '[50-80]', 'M', 'N'],
47
+ # ['New York', '[50-80]', 'F', 'N'],
48
+ # ['New York', '[50-80]', 'M', 'N'],
49
+ # ['Chicago', '[50-80]', 'M', 'N'],
50
+ # ['New York', '[50-80]', 'F', 'N'],
51
+ # ['Chicago', '>80', 'F', 'Y']
52
+ # ]
53
+ #
54
+ # Data labels must have the following format:
55
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
+ #
57
+ # If you do not provide labels for you data, the following labels will
58
+ # be created by default:
59
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
+ #
61
+ def build(data_examples, data_labels = nil)
62
+ check_data_examples(data_examples)
63
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
+ if (num_attributes(data_examples) == 1)
65
+ @zero_r = ZeroR.new.build(data_examples, data_labels)
66
+ return self;
67
+ else
68
+ @zero_r = nil;
69
+ end
70
+ domains = build_domains(data_examples)
71
+ @rule = nil
72
+ domains[1...-1].each_index do |attr_index|
73
+ rule = build_rule(data_examples, attr_index, domains)
74
+ @rule = rule if !@rule || rule[:correct] > @rule[:correct]
75
+ end
76
+ return self
77
+ end
78
+
79
+ # You can evaluate new data, predicting its class.
80
+ # e.g.
81
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
82
+ def eval(data)
83
+ return @zero_r.eval(data) if @zero_r
84
+ attr_value = data[@rule[:attr_index]]
85
+ return @rule[:rule][attr_value]
86
+ end
87
+
88
+ # This method returns the generated rules in ruby code.
89
+ # e.g.
90
+ #
91
+ # classifier.to_s
92
+ # # => if age_range == '<30' then marketing_target = 'Y'
93
+ # elsif age_range == '[30-50)' then marketing_target = 'N'
94
+ # elsif age_range == '[50-80]' then marketing_target = 'N'
95
+ # end
96
+ #
97
+ # It is a nice way to inspect induction results, and also to execute them:
98
+ # marketing_target = nil
99
+ # eval classifier.to_s
100
+ # puts marketing_target
101
+ # # => 'Y'
102
+ def to_s
103
+ return @zero_r.to_s if @zero_r
104
+ sentences = []
105
+ attr_label = @data_labels[@rule[:attr_index]]
106
+ class_label = @data_labels.last
107
+ @rule[:rule].each_pair do |attr_value, class_value|
108
+ sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
109
+ end
110
+ return "if " + sentences.join("\nelsif ") + "\nend"
111
+ end
112
+
113
+ protected
114
+ def build_domains(data_examples)
115
+ domains = Array.new(num_attributes(data_examples)) { Set.new }
116
+ data_examples.each do |data|
117
+ data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
118
+ end
119
+ return domains
120
+ end
121
+
122
+ def build_rule(data_examples, attr_index, domains)
123
+ domain = domains[attr_index]
124
+ value_freq = Hash.new
125
+ domain.each do |attr_value|
126
+ value_freq[attr_value] = Hash.new { |hash, key| hash[key] = 0 }
127
+ end
128
+ data_examples.each do |data|
129
+ value_freq[data[attr_index]][data.last] = value_freq[data[attr_index]][data.last] + 1
130
+ end
131
+ rule = {}
132
+ correct_instances = 0
133
+ value_freq.each_pair do |attr, class_freq_hash|
134
+ max_freq = 0
135
+ class_freq_hash.each_pair do |class_value, freq|
136
+ if max_freq < freq
137
+ rule[attr] = class_value
138
+ max_freq = freq
139
+ end
140
+ end
141
+ correct_instances += max_freq
142
+ end
143
+ return {:attr_index => attr_index, :rule => rule, :correct => correct_instances}
144
+ end
145
+
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,231 @@
1
+ # Author:: Sergio Fierens (Implementation only, Cendrowska is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+ #
11
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
12
+ # International Journal of Man-Machine Studies. 27(4):349-370.
13
+
14
+ require File.dirname(__FILE__) + '/classifier_helper'
15
+
16
+ module Ai4r
17
+ module Classifiers
18
+
19
+ # = Introduction
20
+ # This is an implementation of the PRISM algorithm (Cendrowska, 1987)
21
+ # Given a set of preclassified examples, it builds a set of rules
22
+ # to predict the class of other instaces.
23
+ #
24
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
25
+ # International Journal of Man-Machine Studies. 27(4):349-370.
26
+ class Prism
27
+
28
+ attr_accessor :data_labels, :rules
29
+ include ClassifierHelper
30
+
31
+ # Build a new Prism classifier. If your data is classified with N attributed
32
+ # and M examples, then your data examples must have the following format:
33
+ #
34
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
35
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
36
+ # ...
37
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
38
+ # ]
39
+ #
40
+ # e.g.
41
+ # [ ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '<30', 'M', 'Y'],
43
+ # ['Chicago', '<30', 'F', 'Y'],
44
+ # ['New York', '<30', 'M', 'Y'],
45
+ # ['New York', '<30', 'M', 'Y'],
46
+ # ['Chicago', '[30-50)', 'M', 'Y'],
47
+ # ['New York', '[30-50)', 'F', 'N'],
48
+ # ['Chicago', '[30-50)', 'F', 'Y'],
49
+ # ['New York', '[30-50)', 'F', 'N'],
50
+ # ['Chicago', '[50-80]', 'M', 'N'],
51
+ # ['New York', '[50-80]', 'F', 'N'],
52
+ # ['New York', '[50-80]', 'M', 'N'],
53
+ # ['Chicago', '[50-80]', 'M', 'N'],
54
+ # ['New York', '[50-80]', 'F', 'N'],
55
+ # ['Chicago', '>80', 'F', 'Y']
56
+ # ]
57
+ #
58
+ # Data labels must have the following format:
59
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
60
+ #
61
+ # If you do not provide labels for you data, the following labels will
62
+ # be created by default:
63
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
64
+ #
65
+ def build(data_examples, data_labels=nil)
66
+ check_data_examples(data_examples)
67
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
68
+ domains = build_domains(data_examples)
69
+ instances = data_examples.collect {|data| data }
70
+ @rules = []
71
+ domains.last.each do |class_value|
72
+ while(has_class_value(instances, class_value))
73
+ rule = build_rule(class_value, instances)
74
+ @rules << rule
75
+ instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
76
+ end
77
+ end
78
+ return self
79
+ end
80
+
81
+ # You can evaluate new data, predicting its class.
82
+ # e.g.
83
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
84
+ def eval(instace)
85
+ @rules.each do |rule|
86
+ return rule[:class_value] if matches_conditions(instace, rule[:conditions])
87
+ end
88
+ return nil
89
+ end
90
+
91
+ # This method returns the generated rules in ruby code.
92
+ # e.g.
93
+ #
94
+ # classifier.to_s
95
+ # # => if age_range == '<30' then marketing_target = 'Y'
96
+ # elsif age_range == '>80' then marketing_target = 'Y'
97
+ # elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
98
+ # else marketing_target = 'N'
99
+ # end
100
+ #
101
+ # It is a nice way to inspect induction results, and also to execute them:
102
+ # age_range = '[30-50)'
103
+ # city = 'New York'
104
+ # eval(classifier.to_s)
105
+ # puts marketing_target
106
+ # 'Y'
107
+ def to_s
108
+ out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
109
+ @rules[1...-1].each do |rule|
110
+ out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
111
+ end
112
+ out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
113
+ out += "\nend"
114
+ return out
115
+ end
116
+
117
+ protected
118
+
119
+ def has_class_value(instances, class_value)
120
+ instances.each { |data| return true if data.last == class_value}
121
+ return false
122
+ end
123
+
124
+ def is_perfect(instances, rule)
125
+ class_value = rule[:class_value]
126
+ instances.each do |data|
127
+ return false if data.last != class_value and matches_conditions(data, rule[:conditions])
128
+ end
129
+ return true
130
+ end
131
+
132
+ def matches_conditions(data, conditions)
133
+ conditions.each_pair do |attr_label, attr_value|
134
+ return false if data[get_attr_index(attr_label)] != attr_value
135
+ end
136
+ return true
137
+ end
138
+
139
+ def get_attr_index(attr_label)
140
+ return @data_labels.index(attr_label)
141
+ end
142
+
143
+ def get_attr_value(data, attr_label)
144
+ return data[get_attr_index(attr_label)]
145
+ end
146
+
147
+ def build_rule(class_value, instances)
148
+ rule = {:class_value => class_value, :conditions => {}}
149
+ rule_instances = instances.collect {|data| data }
150
+ attributes = @data_labels[0...-1].collect {|label| label }
151
+ until(is_perfect(instances, rule) || attributes.empty?)
152
+ freq_table = build_freq_table(rule_instances, attributes, class_value)
153
+ condition = get_condition(freq_table)
154
+ rule[:conditions].merge!(condition)
155
+ rule_instances = rule_instances.select do |data|
156
+ matches_conditions(data, condition)
157
+ end
158
+ end
159
+ return rule
160
+ end
161
+
162
+ # Returns a structure with the folloring format:
163
+ # => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
164
+ # attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
165
+ # ...
166
+ # }
167
+ # where p is the number of instances classified as class_value
168
+ # with that attribute value, and t is the total number of instances with
169
+ # that attribute value
170
+ def build_freq_table(rule_instances, attributes, class_value)
171
+ freq_table = Hash.new()
172
+ rule_instances.each do |data|
173
+ attributes.each do |attr_label|
174
+ attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
175
+ pt = attr_freqs[get_attr_value(data, attr_label)]
176
+ pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
177
+ attr_freqs[get_attr_value(data, attr_label)] = pt
178
+ freq_table[attr_label] = attr_freqs
179
+ end
180
+ end
181
+ return freq_table
182
+ end
183
+
184
+ # returns a single conditional term: {attrN_label => attrN_valueM}
185
+ # selecting the attribute with higher pt ratio
186
+ # (occurrences of attribute value classified as class_value /
187
+ # occurrences of attribute value)
188
+ def get_condition(freq_table)
189
+ best_pt = [0, 0]
190
+ condition = nil
191
+ freq_table.each do |attr_label, attr_freqs|
192
+ attr_freqs.each do |attr_value, pt|
193
+ if(better_pt(pt, best_pt))
194
+ condition = { attr_label => attr_value }
195
+ best_pt = pt
196
+ end
197
+ end
198
+ end
199
+ return condition
200
+ end
201
+
202
+ # pt = [p, t]
203
+ # p = occurrences of attribute value with instance classified as class_value
204
+ # t = occurrences of attribute value
205
+ # a pt is better if:
206
+ # 1- its ratio is higher
207
+ # 2- its ratio is equal, and has a higher p
208
+ def better_pt(pt, best_pt)
209
+ return false if pt[1] == 0
210
+ return true if best_pt[1] == 0
211
+ a = pt[0]*best_pt[1]
212
+ b = best_pt[0]*pt[1]
213
+ return true if a>b || (a==b && pt[0]>best_pt[0])
214
+ return false
215
+ end
216
+
217
+ def join_terms(rule)
218
+ terms = []
219
+ rule[:conditions].each do |attr_label, attr_value|
220
+ terms << "#{attr_label} == '#{attr_value}'"
221
+ end
222
+ "#{terms.join(" and ")}"
223
+ end
224
+
225
+ def then_clause(rule)
226
+ "#{@data_labels.last} = '#{rule[:class_value]}'"
227
+ end
228
+
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,104 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/classifier_helper'
11
+
12
+ module Ai4r
13
+ module Classifiers
14
+ # = Introduction
15
+ #
16
+ # The idea behind the ZeroR classifier is to identify the
17
+ # the most common class value in the training set.
18
+ # It always returns that value when evaluating an instance.
19
+ # It is frequently used as a baseline for evaluating other machine learning
20
+ # algorithms.
21
+ class ZeroR
22
+
23
+ attr_accessor :data_labels, :class_value
24
+
25
+ include ClassifierHelper
26
+
27
+ # Build a new ZeroR classifier. If your data is classified with N attributed
28
+ # and M examples, then your data examples must have the following format:
29
+ #
30
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
+ # ...
33
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
+ # ]
35
+ #
36
+ # e.g.
37
+ # [ ['New York', '<30', 'M', 'Y'],
38
+ # ['Chicago', '<30', 'M', 'Y'],
39
+ # ['Chicago', '<30', 'F', 'Y'],
40
+ # ['New York', '<30', 'M', 'Y'],
41
+ # ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '[30-50)', 'M', 'Y'],
43
+ # ['New York', '[30-50)', 'F', 'N'],
44
+ # ['Chicago', '[30-50)', 'F', 'Y'],
45
+ # ['New York', '[30-50)', 'F', 'N'],
46
+ # ['Chicago', '[50-80]', 'M', 'N'],
47
+ # ['New York', '[50-80]', 'F', 'N'],
48
+ # ['New York', '[50-80]', 'M', 'N'],
49
+ # ['Chicago', '[50-80]', 'M', 'N'],
50
+ # ['New York', '[50-80]', 'F', 'N'],
51
+ # ['Chicago', '>80', 'F', 'Y']
52
+ # ]
53
+ #
54
+ # Data labels must have the following format:
55
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
+ #
57
+ # If you do not provide labels for you data, the following labels will
58
+ # be created by default:
59
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
+ #
61
+ def build(data_examples, data_labels=nil)
62
+ check_data_examples(data_examples)
63
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
+ frequence = {}
65
+ max_freq = 0
66
+ @class_value
67
+ data_examples.each do |example|
68
+ class_value = example.last
69
+ class_frequency = frequence[class_value]
70
+ class_frequency = (class_frequency) ? class_frequency+1 : 1
71
+ if max_freq < class_frequency
72
+ max_freq = class_frequency
73
+ @class_value = class_value
74
+ end
75
+ end
76
+ return self
77
+ end
78
+
79
+ # You can evaluate new data, predicting its class.
80
+ # e.g.
81
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
82
+ def eval(data)
83
+ @class_value
84
+ end
85
+
86
+ # This method returns the generated rules in ruby code.
87
+ # e.g.
88
+ #
89
+ # classifier.to_s
90
+ # # => marketing_target='Y'
91
+ #
92
+ # It is a nice way to inspect induction results, and also to execute them:
93
+ # marketing_target = nil
94
+ # eval classifier.to_s
95
+ # puts marketing_target
96
+ # # => 'Y'
97
+ def to_s
98
+ return "#{@data_labels.last} = '#{@class_value}'"
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end