ai4r 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. data/README.rdoc +21 -20
  2. data/examples/decision_trees/id3_example.rb +3 -2
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
  4. data/examples/neural_network/backpropagation_example.rb +2 -2
  5. data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
  6. data/lib/ai4r/classifiers/id3.rb +356 -0
  7. data/lib/ai4r/classifiers/one_r.rb +148 -0
  8. data/lib/ai4r/classifiers/prism.rb +231 -0
  9. data/lib/ai4r/classifiers/zero_r.rb +104 -0
  10. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
  11. data/lib/ai4r/neural_network/backpropagation.rb +271 -0
  12. data/site/build/tmp/locationmap.xml +14 -14
  13. data/site/build/tmp/output.xmap +23 -23
  14. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  15. data/site/build/tmp/plugins-1.xml +0 -11
  16. data/site/build/tmp/plugins-2.xml +54 -0
  17. data/site/build/tmp/projfilters.properties +41 -41
  18. data/site/build/webapp/WEB-INF/logs/core.log +681 -788
  19. data/site/build/webapp/WEB-INF/logs/error.log +281 -248
  20. data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
  21. data/site/src/documentation/content/xdocs/forum.html +9 -0
  22. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
  23. data/site/src/documentation/content/xdocs/index.xml +47 -18
  24. data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
  25. data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
  26. data/site/src/documentation/content/xdocs/site.xml +8 -5
  27. data/site/src/documentation/content/xdocs/svn.xml +11 -1
  28. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  29. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  30. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  31. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  32. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  33. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
  34. data/site/src/documentation/skinconf.xml +18 -18
  35. data/test/classifiers/id3_test.rb +206 -0
  36. data/test/classifiers/one_r_test.rb +62 -0
  37. data/test/classifiers/prism_test.rb +83 -0
  38. data/test/classifiers/zero_r_test.rb +48 -0
  39. data/test/genetic_algorithm/chromosome_test.rb +41 -38
  40. data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
  41. data/test/neural_network/backpropagation_test.rb +20 -18
  42. metadata +109 -199
  43. data/lib/decision_tree/id3.rb +0 -354
  44. data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
  45. data/lib/neural_network/backpropagation.rb +0 -264
  46. data/site/build/site/en/broken-links.xml +0 -2
  47. data/site/build/site/en/downloads.html +0 -187
  48. data/site/build/site/en/downloads.pdf +0 -151
  49. data/site/build/site/en/geneticAlgorithms.html +0 -564
  50. data/site/build/site/en/geneticAlgorithms.pdf +0 -911
  51. data/site/build/site/en/images/ai4r-logo.png +0 -0
  52. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  53. data/site/build/site/en/images/c.png +0 -0
  54. data/site/build/site/en/images/c_wbn.png +0 -0
  55. data/site/build/site/en/images/c_wn.png +0 -0
  56. data/site/build/site/en/images/ero.gif +0 -0
  57. data/site/build/site/en/images/europe2.png +0 -0
  58. data/site/build/site/en/images/europe3.png +0 -0
  59. data/site/build/site/en/images/fitness.png +0 -0
  60. data/site/build/site/en/images/instruction_arrow.png +0 -0
  61. data/site/build/site/en/images/my_email.png +0 -0
  62. data/site/build/site/en/images/rubyforge.png +0 -0
  63. data/site/build/site/en/images/s.png +0 -0
  64. data/site/build/site/en/images/s_wbn.png +0 -0
  65. data/site/build/site/en/images/s_wn.png +0 -0
  66. data/site/build/site/en/images/sigmoid.png +0 -0
  67. data/site/build/site/en/images/t.png +0 -0
  68. data/site/build/site/en/images/t_wbn.png +0 -0
  69. data/site/build/site/en/images/t_wn.png +0 -0
  70. data/site/build/site/en/index.html +0 -258
  71. data/site/build/site/en/index.pdf +0 -306
  72. data/site/build/site/en/linkmap.html +0 -231
  73. data/site/build/site/en/linkmap.pdf +0 -94
  74. data/site/build/site/en/locationmap.xml +0 -72
  75. data/site/build/site/en/machineLearning.html +0 -325
  76. data/site/build/site/en/machineLearning.pdf +0 -337
  77. data/site/build/site/en/neuralNetworks.html +0 -446
  78. data/site/build/site/en/neuralNetworks.pdf +0 -604
  79. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  80. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  81. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  82. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  83. data/site/build/site/en/skin/basic.css +0 -166
  84. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  85. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  86. data/site/build/site/en/skin/fontsize.js +0 -166
  87. data/site/build/site/en/skin/getBlank.js +0 -40
  88. data/site/build/site/en/skin/getMenu.js +0 -45
  89. data/site/build/site/en/skin/images/README.txt +0 -1
  90. data/site/build/site/en/skin/images/add.jpg +0 -0
  91. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  92. data/site/build/site/en/skin/images/chapter.gif +0 -0
  93. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  94. data/site/build/site/en/skin/images/current.gif +0 -0
  95. data/site/build/site/en/skin/images/error.png +0 -0
  96. data/site/build/site/en/skin/images/external-link.gif +0 -0
  97. data/site/build/site/en/skin/images/fix.jpg +0 -0
  98. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  99. data/site/build/site/en/skin/images/hack.jpg +0 -0
  100. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  101. data/site/build/site/en/skin/images/info.png +0 -0
  102. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  103. data/site/build/site/en/skin/images/label.gif +0 -0
  104. data/site/build/site/en/skin/images/page.gif +0 -0
  105. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  106. data/site/build/site/en/skin/images/poddoc.png +0 -0
  107. data/site/build/site/en/skin/images/printer.gif +0 -0
  108. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  109. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  110. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  111. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  112. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  113. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  114. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  115. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  116. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  117. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  118. data/site/build/site/en/skin/images/remove.jpg +0 -0
  119. data/site/build/site/en/skin/images/rss.png +0 -0
  120. data/site/build/site/en/skin/images/spacer.gif +0 -0
  121. data/site/build/site/en/skin/images/success.png +0 -0
  122. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  123. data/site/build/site/en/skin/images/update.jpg +0 -0
  124. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  125. data/site/build/site/en/skin/images/vcss.png +0 -0
  126. data/site/build/site/en/skin/images/warning.png +0 -0
  127. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  128. data/site/build/site/en/skin/menu.js +0 -48
  129. data/site/build/site/en/skin/note.txt +0 -50
  130. data/site/build/site/en/skin/print.css +0 -54
  131. data/site/build/site/en/skin/profile.css +0 -163
  132. data/site/build/site/en/skin/prototype.js +0 -1257
  133. data/site/build/site/en/skin/screen.css +0 -587
  134. data/site/build/site/en/svn.html +0 -223
  135. data/site/build/site/en/svn.pdf +0 -239
  136. data/site/build/site/en/wholesite.pdf +0 -1686
  137. data/site/build/tmp/brokenlinks.xml +0 -2
  138. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  139. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  140. data/test/decision_tree/id3_test.rb +0 -209
@@ -0,0 +1,148 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require 'set'
11
+ require File.dirname(__FILE__) + '/classifier_helper'
12
+
13
+ module Ai4r
14
+ module Classifiers
15
+
16
+ # = Introduction
17
+ #
18
+ # The idea of the OneR algorithm is identify the single
19
+ # attribute to use to classify data that makes
20
+ # fewest prediction errors.
21
+ # It generates rules based on a single attribute.
22
+ class OneR
23
+
24
+ attr_accessor :data_labels, :rule
25
+ include ClassifierHelper
26
+
27
+ # Build a new OneR classifier. If your data is classified with N attributed
28
+ # and M examples, then your data examples must have the following format:
29
+ #
30
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
+ # ...
33
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
+ # ]
35
+ #
36
+ # e.g.
37
+ # [ ['New York', '<30', 'M', 'Y'],
38
+ # ['Chicago', '<30', 'M', 'Y'],
39
+ # ['Chicago', '<30', 'F', 'Y'],
40
+ # ['New York', '<30', 'M', 'Y'],
41
+ # ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '[30-50)', 'M', 'Y'],
43
+ # ['New York', '[30-50)', 'F', 'N'],
44
+ # ['Chicago', '[30-50)', 'F', 'Y'],
45
+ # ['New York', '[30-50)', 'F', 'N'],
46
+ # ['Chicago', '[50-80]', 'M', 'N'],
47
+ # ['New York', '[50-80]', 'F', 'N'],
48
+ # ['New York', '[50-80]', 'M', 'N'],
49
+ # ['Chicago', '[50-80]', 'M', 'N'],
50
+ # ['New York', '[50-80]', 'F', 'N'],
51
+ # ['Chicago', '>80', 'F', 'Y']
52
+ # ]
53
+ #
54
+ # Data labels must have the following format:
55
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
+ #
57
+ # If you do not provide labels for you data, the following labels will
58
+ # be created by default:
59
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
+ #
61
+ def build(data_examples, data_labels = nil)
62
+ check_data_examples(data_examples)
63
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
+ if (num_attributes(data_examples) == 1)
65
+ @zero_r = ZeroR.new.build(data_examples, data_labels)
66
+ return self;
67
+ else
68
+ @zero_r = nil;
69
+ end
70
+ domains = build_domains(data_examples)
71
+ @rule = nil
72
+ domains[1...-1].each_index do |attr_index|
73
+ rule = build_rule(data_examples, attr_index, domains)
74
+ @rule = rule if !@rule || rule[:correct] > @rule[:correct]
75
+ end
76
+ return self
77
+ end
78
+
79
+ # You can evaluate new data, predicting its class.
80
+ # e.g.
81
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
82
+ def eval(data)
83
+ return @zero_r.eval(data) if @zero_r
84
+ attr_value = data[@rule[:attr_index]]
85
+ return @rule[:rule][attr_value]
86
+ end
87
+
88
+ # This method returns the generated rules in ruby code.
89
+ # e.g.
90
+ #
91
+ # classifier.to_s
92
+ # # => if age_range == '<30' then marketing_target = 'Y'
93
+ # elsif age_range == '[30-50)' then marketing_target = 'N'
94
+ # elsif age_range == '[50-80]' then marketing_target = 'N'
95
+ # end
96
+ #
97
+ # It is a nice way to inspect induction results, and also to execute them:
98
+ # marketing_target = nil
99
+ # eval classifier.to_s
100
+ # puts marketing_target
101
+ # # => 'Y'
102
+ def to_s
103
+ return @zero_r.to_s if @zero_r
104
+ sentences = []
105
+ attr_label = @data_labels[@rule[:attr_index]]
106
+ class_label = @data_labels.last
107
+ @rule[:rule].each_pair do |attr_value, class_value|
108
+ sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
109
+ end
110
+ return "if " + sentences.join("\nelsif ") + "\nend"
111
+ end
112
+
113
+ protected
114
+ def build_domains(data_examples)
115
+ domains = Array.new(num_attributes(data_examples)) { Set.new }
116
+ data_examples.each do |data|
117
+ data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
118
+ end
119
+ return domains
120
+ end
121
+
122
+ def build_rule(data_examples, attr_index, domains)
123
+ domain = domains[attr_index]
124
+ value_freq = Hash.new
125
+ domain.each do |attr_value|
126
+ value_freq[attr_value] = Hash.new { |hash, key| hash[key] = 0 }
127
+ end
128
+ data_examples.each do |data|
129
+ value_freq[data[attr_index]][data.last] = value_freq[data[attr_index]][data.last] + 1
130
+ end
131
+ rule = {}
132
+ correct_instances = 0
133
+ value_freq.each_pair do |attr, class_freq_hash|
134
+ max_freq = 0
135
+ class_freq_hash.each_pair do |class_value, freq|
136
+ if max_freq < freq
137
+ rule[attr] = class_value
138
+ max_freq = freq
139
+ end
140
+ end
141
+ correct_instances += max_freq
142
+ end
143
+ return {:attr_index => attr_index, :rule => rule, :correct => correct_instances}
144
+ end
145
+
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,231 @@
1
+ # Author:: Sergio Fierens (Implementation only, Cendrowska is
2
+ # the creator of the algorithm)
3
+ # License:: MPL 1.1
4
+ # Project:: ai4r
5
+ # Url:: http://ai4r.rubyforge.org/
6
+ #
7
+ # You can redistribute it and/or modify it under the terms of
8
+ # the Mozilla Public License version 1.1 as published by the
9
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
10
+ #
11
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
12
+ # International Journal of Man-Machine Studies. 27(4):349-370.
13
+
14
+ require File.dirname(__FILE__) + '/classifier_helper'
15
+
16
+ module Ai4r
17
+ module Classifiers
18
+
19
+ # = Introduction
20
+ # This is an implementation of the PRISM algorithm (Cendrowska, 1987)
21
+ # Given a set of preclassified examples, it builds a set of rules
22
+ # to predict the class of other instaces.
23
+ #
24
+ # J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
25
+ # International Journal of Man-Machine Studies. 27(4):349-370.
26
+ class Prism
27
+
28
+ attr_accessor :data_labels, :rules
29
+ include ClassifierHelper
30
+
31
+ # Build a new Prism classifier. If your data is classified with N attributed
32
+ # and M examples, then your data examples must have the following format:
33
+ #
34
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
35
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
36
+ # ...
37
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
38
+ # ]
39
+ #
40
+ # e.g.
41
+ # [ ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '<30', 'M', 'Y'],
43
+ # ['Chicago', '<30', 'F', 'Y'],
44
+ # ['New York', '<30', 'M', 'Y'],
45
+ # ['New York', '<30', 'M', 'Y'],
46
+ # ['Chicago', '[30-50)', 'M', 'Y'],
47
+ # ['New York', '[30-50)', 'F', 'N'],
48
+ # ['Chicago', '[30-50)', 'F', 'Y'],
49
+ # ['New York', '[30-50)', 'F', 'N'],
50
+ # ['Chicago', '[50-80]', 'M', 'N'],
51
+ # ['New York', '[50-80]', 'F', 'N'],
52
+ # ['New York', '[50-80]', 'M', 'N'],
53
+ # ['Chicago', '[50-80]', 'M', 'N'],
54
+ # ['New York', '[50-80]', 'F', 'N'],
55
+ # ['Chicago', '>80', 'F', 'Y']
56
+ # ]
57
+ #
58
+ # Data labels must have the following format:
59
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
60
+ #
61
+ # If you do not provide labels for you data, the following labels will
62
+ # be created by default:
63
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
64
+ #
65
+ def build(data_examples, data_labels=nil)
66
+ check_data_examples(data_examples)
67
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
68
+ domains = build_domains(data_examples)
69
+ instances = data_examples.collect {|data| data }
70
+ @rules = []
71
+ domains.last.each do |class_value|
72
+ while(has_class_value(instances, class_value))
73
+ rule = build_rule(class_value, instances)
74
+ @rules << rule
75
+ instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
76
+ end
77
+ end
78
+ return self
79
+ end
80
+
81
+ # You can evaluate new data, predicting its class.
82
+ # e.g.
83
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
84
+ def eval(instace)
85
+ @rules.each do |rule|
86
+ return rule[:class_value] if matches_conditions(instace, rule[:conditions])
87
+ end
88
+ return nil
89
+ end
90
+
91
+ # This method returns the generated rules in ruby code.
92
+ # e.g.
93
+ #
94
+ # classifier.to_s
95
+ # # => if age_range == '<30' then marketing_target = 'Y'
96
+ # elsif age_range == '>80' then marketing_target = 'Y'
97
+ # elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
98
+ # else marketing_target = 'N'
99
+ # end
100
+ #
101
+ # It is a nice way to inspect induction results, and also to execute them:
102
+ # age_range = '[30-50)'
103
+ # city = 'New York'
104
+ # eval(classifier.to_s)
105
+ # puts marketing_target
106
+ # 'Y'
107
+ def to_s
108
+ out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
109
+ @rules[1...-1].each do |rule|
110
+ out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
111
+ end
112
+ out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
113
+ out += "\nend"
114
+ return out
115
+ end
116
+
117
+ protected
118
+
119
+ def has_class_value(instances, class_value)
120
+ instances.each { |data| return true if data.last == class_value}
121
+ return false
122
+ end
123
+
124
+ def is_perfect(instances, rule)
125
+ class_value = rule[:class_value]
126
+ instances.each do |data|
127
+ return false if data.last != class_value and matches_conditions(data, rule[:conditions])
128
+ end
129
+ return true
130
+ end
131
+
132
+ def matches_conditions(data, conditions)
133
+ conditions.each_pair do |attr_label, attr_value|
134
+ return false if data[get_attr_index(attr_label)] != attr_value
135
+ end
136
+ return true
137
+ end
138
+
139
+ def get_attr_index(attr_label)
140
+ return @data_labels.index(attr_label)
141
+ end
142
+
143
+ def get_attr_value(data, attr_label)
144
+ return data[get_attr_index(attr_label)]
145
+ end
146
+
147
+ def build_rule(class_value, instances)
148
+ rule = {:class_value => class_value, :conditions => {}}
149
+ rule_instances = instances.collect {|data| data }
150
+ attributes = @data_labels[0...-1].collect {|label| label }
151
+ until(is_perfect(instances, rule) || attributes.empty?)
152
+ freq_table = build_freq_table(rule_instances, attributes, class_value)
153
+ condition = get_condition(freq_table)
154
+ rule[:conditions].merge!(condition)
155
+ rule_instances = rule_instances.select do |data|
156
+ matches_conditions(data, condition)
157
+ end
158
+ end
159
+ return rule
160
+ end
161
+
162
+ # Returns a structure with the folloring format:
163
+ # => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
164
+ # attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
165
+ # ...
166
+ # }
167
+ # where p is the number of instances classified as class_value
168
+ # with that attribute value, and t is the total number of instances with
169
+ # that attribute value
170
+ def build_freq_table(rule_instances, attributes, class_value)
171
+ freq_table = Hash.new()
172
+ rule_instances.each do |data|
173
+ attributes.each do |attr_label|
174
+ attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
175
+ pt = attr_freqs[get_attr_value(data, attr_label)]
176
+ pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
177
+ attr_freqs[get_attr_value(data, attr_label)] = pt
178
+ freq_table[attr_label] = attr_freqs
179
+ end
180
+ end
181
+ return freq_table
182
+ end
183
+
184
+ # returns a single conditional term: {attrN_label => attrN_valueM}
185
+ # selecting the attribute with higher pt ratio
186
+ # (occurrences of attribute value classified as class_value /
187
+ # occurrences of attribute value)
188
+ def get_condition(freq_table)
189
+ best_pt = [0, 0]
190
+ condition = nil
191
+ freq_table.each do |attr_label, attr_freqs|
192
+ attr_freqs.each do |attr_value, pt|
193
+ if(better_pt(pt, best_pt))
194
+ condition = { attr_label => attr_value }
195
+ best_pt = pt
196
+ end
197
+ end
198
+ end
199
+ return condition
200
+ end
201
+
202
+ # pt = [p, t]
203
+ # p = occurrences of attribute value with instance classified as class_value
204
+ # t = occurrences of attribute value
205
+ # a pt is better if:
206
+ # 1- its ratio is higher
207
+ # 2- its ratio is equal, and has a higher p
208
+ def better_pt(pt, best_pt)
209
+ return false if pt[1] == 0
210
+ return true if best_pt[1] == 0
211
+ a = pt[0]*best_pt[1]
212
+ b = best_pt[0]*pt[1]
213
+ return true if a>b || (a==b && pt[0]>best_pt[0])
214
+ return false
215
+ end
216
+
217
+ def join_terms(rule)
218
+ terms = []
219
+ rule[:conditions].each do |attr_label, attr_value|
220
+ terms << "#{attr_label} == '#{attr_value}'"
221
+ end
222
+ "#{terms.join(" and ")}"
223
+ end
224
+
225
+ def then_clause(rule)
226
+ "#{@data_labels.last} = '#{rule[:class_value]}'"
227
+ end
228
+
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,104 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/classifier_helper'
11
+
12
+ module Ai4r
13
+ module Classifiers
14
+ # = Introduction
15
+ #
16
+ # The idea behind the ZeroR classifier is to identify the
17
+ # the most common class value in the training set.
18
+ # It always returns that value when evaluating an instance.
19
+ # It is frequently used as a baseline for evaluating other machine learning
20
+ # algorithms.
21
+ class ZeroR
22
+
23
+ attr_accessor :data_labels, :class_value
24
+
25
+ include ClassifierHelper
26
+
27
+ # Build a new ZeroR classifier. If your data is classified with N attributed
28
+ # and M examples, then your data examples must have the following format:
29
+ #
30
+ # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
31
+ # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
32
+ # ...
33
+ # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
34
+ # ]
35
+ #
36
+ # e.g.
37
+ # [ ['New York', '<30', 'M', 'Y'],
38
+ # ['Chicago', '<30', 'M', 'Y'],
39
+ # ['Chicago', '<30', 'F', 'Y'],
40
+ # ['New York', '<30', 'M', 'Y'],
41
+ # ['New York', '<30', 'M', 'Y'],
42
+ # ['Chicago', '[30-50)', 'M', 'Y'],
43
+ # ['New York', '[30-50)', 'F', 'N'],
44
+ # ['Chicago', '[30-50)', 'F', 'Y'],
45
+ # ['New York', '[30-50)', 'F', 'N'],
46
+ # ['Chicago', '[50-80]', 'M', 'N'],
47
+ # ['New York', '[50-80]', 'F', 'N'],
48
+ # ['New York', '[50-80]', 'M', 'N'],
49
+ # ['Chicago', '[50-80]', 'M', 'N'],
50
+ # ['New York', '[50-80]', 'F', 'N'],
51
+ # ['Chicago', '>80', 'F', 'Y']
52
+ # ]
53
+ #
54
+ # Data labels must have the following format:
55
+ # [ 'city', 'age_range', 'gender', 'marketing_target' ]
56
+ #
57
+ # If you do not provide labels for you data, the following labels will
58
+ # be created by default:
59
+ # [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
60
+ #
61
+ def build(data_examples, data_labels=nil)
62
+ check_data_examples(data_examples)
63
+ @data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
64
+ frequence = {}
65
+ max_freq = 0
66
+ @class_value
67
+ data_examples.each do |example|
68
+ class_value = example.last
69
+ class_frequency = frequence[class_value]
70
+ class_frequency = (class_frequency) ? class_frequency+1 : 1
71
+ if max_freq < class_frequency
72
+ max_freq = class_frequency
73
+ @class_value = class_value
74
+ end
75
+ end
76
+ return self
77
+ end
78
+
79
+ # You can evaluate new data, predicting its class.
80
+ # e.g.
81
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
82
+ def eval(data)
83
+ @class_value
84
+ end
85
+
86
+ # This method returns the generated rules in ruby code.
87
+ # e.g.
88
+ #
89
+ # classifier.to_s
90
+ # # => marketing_target='Y'
91
+ #
92
+ # It is a nice way to inspect induction results, and also to execute them:
93
+ # marketing_target = nil
94
+ # eval classifier.to_s
95
+ # puts marketing_target
96
+ # # => 'Y'
97
+ def to_s
98
+ return "#{@data_labels.last} = '#{@class_value}'"
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end