ai4r 1.1 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +21 -20
- data/examples/decision_trees/id3_example.rb +3 -2
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
- data/examples/neural_network/backpropagation_example.rb +2 -2
- data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
- data/lib/ai4r/classifiers/id3.rb +356 -0
- data/lib/ai4r/classifiers/one_r.rb +148 -0
- data/lib/ai4r/classifiers/prism.rb +231 -0
- data/lib/ai4r/classifiers/zero_r.rb +104 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
- data/lib/ai4r/neural_network/backpropagation.rb +271 -0
- data/site/build/tmp/locationmap.xml +14 -14
- data/site/build/tmp/output.xmap +23 -23
- data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
- data/site/build/tmp/plugins-1.xml +0 -11
- data/site/build/tmp/plugins-2.xml +54 -0
- data/site/build/tmp/projfilters.properties +41 -41
- data/site/build/webapp/WEB-INF/logs/core.log +681 -788
- data/site/build/webapp/WEB-INF/logs/error.log +281 -248
- data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
- data/site/src/documentation/content/xdocs/forum.html +9 -0
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
- data/site/src/documentation/content/xdocs/index.xml +47 -18
- data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
- data/site/src/documentation/content/xdocs/site.xml +8 -5
- data/site/src/documentation/content/xdocs/svn.xml +11 -1
- data/site/src/documentation/resources/images/Thumbs.db +0 -0
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
- data/site/src/documentation/resources/images/jadeferret.png +0 -0
- data/site/src/documentation/resources/images/neural_network_example.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
- data/site/src/documentation/skinconf.xml +18 -18
- data/test/classifiers/id3_test.rb +206 -0
- data/test/classifiers/one_r_test.rb +62 -0
- data/test/classifiers/prism_test.rb +83 -0
- data/test/classifiers/zero_r_test.rb +48 -0
- data/test/genetic_algorithm/chromosome_test.rb +41 -38
- data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
- data/test/neural_network/backpropagation_test.rb +20 -18
- metadata +109 -199
- data/lib/decision_tree/id3.rb +0 -354
- data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
- data/lib/neural_network/backpropagation.rb +0 -264
- data/site/build/site/en/broken-links.xml +0 -2
- data/site/build/site/en/downloads.html +0 -187
- data/site/build/site/en/downloads.pdf +0 -151
- data/site/build/site/en/geneticAlgorithms.html +0 -564
- data/site/build/site/en/geneticAlgorithms.pdf +0 -911
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +0 -258
- data/site/build/site/en/index.pdf +0 -306
- data/site/build/site/en/linkmap.html +0 -231
- data/site/build/site/en/linkmap.pdf +0 -94
- data/site/build/site/en/locationmap.xml +0 -72
- data/site/build/site/en/machineLearning.html +0 -325
- data/site/build/site/en/machineLearning.pdf +0 -337
- data/site/build/site/en/neuralNetworks.html +0 -446
- data/site/build/site/en/neuralNetworks.pdf +0 -604
- data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
- data/site/build/site/en/skin/basic.css +0 -166
- data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
- data/site/build/site/en/skin/breadcrumbs.js +0 -237
- data/site/build/site/en/skin/fontsize.js +0 -166
- data/site/build/site/en/skin/getBlank.js +0 -40
- data/site/build/site/en/skin/getMenu.js +0 -45
- data/site/build/site/en/skin/images/README.txt +0 -1
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +0 -48
- data/site/build/site/en/skin/note.txt +0 -50
- data/site/build/site/en/skin/print.css +0 -54
- data/site/build/site/en/skin/profile.css +0 -163
- data/site/build/site/en/skin/prototype.js +0 -1257
- data/site/build/site/en/skin/screen.css +0 -587
- data/site/build/site/en/svn.html +0 -223
- data/site/build/site/en/svn.pdf +0 -239
- data/site/build/site/en/wholesite.pdf +0 -1686
- data/site/build/tmp/brokenlinks.xml +0 -2
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/test/decision_tree/id3_test.rb +0 -209
@@ -0,0 +1,148 @@
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require 'set'
|
11
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
module Classifiers
|
15
|
+
|
16
|
+
# = Introduction
|
17
|
+
#
|
18
|
+
# The idea of the OneR algorithm is identify the single
|
19
|
+
# attribute to use to classify data that makes
|
20
|
+
# fewest prediction errors.
|
21
|
+
# It generates rules based on a single attribute.
|
22
|
+
class OneR
|
23
|
+
|
24
|
+
attr_accessor :data_labels, :rule
|
25
|
+
include ClassifierHelper
|
26
|
+
|
27
|
+
# Build a new OneR classifier. If your data is classified with N attributed
|
28
|
+
# and M examples, then your data examples must have the following format:
|
29
|
+
#
|
30
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
31
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
32
|
+
# ...
|
33
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
34
|
+
# ]
|
35
|
+
#
|
36
|
+
# e.g.
|
37
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
38
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
39
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
40
|
+
# ['New York', '<30', 'M', 'Y'],
|
41
|
+
# ['New York', '<30', 'M', 'Y'],
|
42
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
43
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
44
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
45
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
46
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
47
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
48
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
49
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
50
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
51
|
+
# ['Chicago', '>80', 'F', 'Y']
|
52
|
+
# ]
|
53
|
+
#
|
54
|
+
# Data labels must have the following format:
|
55
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
56
|
+
#
|
57
|
+
# If you do not provide labels for you data, the following labels will
|
58
|
+
# be created by default:
|
59
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
60
|
+
#
|
61
|
+
def build(data_examples, data_labels = nil)
|
62
|
+
check_data_examples(data_examples)
|
63
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
64
|
+
if (num_attributes(data_examples) == 1)
|
65
|
+
@zero_r = ZeroR.new.build(data_examples, data_labels)
|
66
|
+
return self;
|
67
|
+
else
|
68
|
+
@zero_r = nil;
|
69
|
+
end
|
70
|
+
domains = build_domains(data_examples)
|
71
|
+
@rule = nil
|
72
|
+
domains[1...-1].each_index do |attr_index|
|
73
|
+
rule = build_rule(data_examples, attr_index, domains)
|
74
|
+
@rule = rule if !@rule || rule[:correct] > @rule[:correct]
|
75
|
+
end
|
76
|
+
return self
|
77
|
+
end
|
78
|
+
|
79
|
+
# You can evaluate new data, predicting its class.
|
80
|
+
# e.g.
|
81
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
82
|
+
def eval(data)
|
83
|
+
return @zero_r.eval(data) if @zero_r
|
84
|
+
attr_value = data[@rule[:attr_index]]
|
85
|
+
return @rule[:rule][attr_value]
|
86
|
+
end
|
87
|
+
|
88
|
+
# This method returns the generated rules in ruby code.
|
89
|
+
# e.g.
|
90
|
+
#
|
91
|
+
# classifier.to_s
|
92
|
+
# # => if age_range == '<30' then marketing_target = 'Y'
|
93
|
+
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
94
|
+
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
95
|
+
# end
|
96
|
+
#
|
97
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
98
|
+
# marketing_target = nil
|
99
|
+
# eval classifier.to_s
|
100
|
+
# puts marketing_target
|
101
|
+
# # => 'Y'
|
102
|
+
def to_s
|
103
|
+
return @zero_r.to_s if @zero_r
|
104
|
+
sentences = []
|
105
|
+
attr_label = @data_labels[@rule[:attr_index]]
|
106
|
+
class_label = @data_labels.last
|
107
|
+
@rule[:rule].each_pair do |attr_value, class_value|
|
108
|
+
sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
|
109
|
+
end
|
110
|
+
return "if " + sentences.join("\nelsif ") + "\nend"
|
111
|
+
end
|
112
|
+
|
113
|
+
protected
|
114
|
+
def build_domains(data_examples)
|
115
|
+
domains = Array.new(num_attributes(data_examples)) { Set.new }
|
116
|
+
data_examples.each do |data|
|
117
|
+
data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
|
118
|
+
end
|
119
|
+
return domains
|
120
|
+
end
|
121
|
+
|
122
|
+
def build_rule(data_examples, attr_index, domains)
|
123
|
+
domain = domains[attr_index]
|
124
|
+
value_freq = Hash.new
|
125
|
+
domain.each do |attr_value|
|
126
|
+
value_freq[attr_value] = Hash.new { |hash, key| hash[key] = 0 }
|
127
|
+
end
|
128
|
+
data_examples.each do |data|
|
129
|
+
value_freq[data[attr_index]][data.last] = value_freq[data[attr_index]][data.last] + 1
|
130
|
+
end
|
131
|
+
rule = {}
|
132
|
+
correct_instances = 0
|
133
|
+
value_freq.each_pair do |attr, class_freq_hash|
|
134
|
+
max_freq = 0
|
135
|
+
class_freq_hash.each_pair do |class_value, freq|
|
136
|
+
if max_freq < freq
|
137
|
+
rule[attr] = class_value
|
138
|
+
max_freq = freq
|
139
|
+
end
|
140
|
+
end
|
141
|
+
correct_instances += max_freq
|
142
|
+
end
|
143
|
+
return {:attr_index => attr_index, :rule => rule, :correct => correct_instances}
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only, Cendrowska is
|
2
|
+
# the creator of the algorithm)
|
3
|
+
# License:: MPL 1.1
|
4
|
+
# Project:: ai4r
|
5
|
+
# Url:: http://ai4r.rubyforge.org/
|
6
|
+
#
|
7
|
+
# You can redistribute it and/or modify it under the terms of
|
8
|
+
# the Mozilla Public License version 1.1 as published by the
|
9
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
10
|
+
#
|
11
|
+
# J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
|
12
|
+
# International Journal of Man-Machine Studies. 27(4):349-370.
|
13
|
+
|
14
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
15
|
+
|
16
|
+
module Ai4r
|
17
|
+
module Classifiers
|
18
|
+
|
19
|
+
# = Introduction
|
20
|
+
# This is an implementation of the PRISM algorithm (Cendrowska, 1987)
|
21
|
+
# Given a set of preclassified examples, it builds a set of rules
|
22
|
+
# to predict the class of other instaces.
|
23
|
+
#
|
24
|
+
# J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
|
25
|
+
# International Journal of Man-Machine Studies. 27(4):349-370.
|
26
|
+
class Prism
|
27
|
+
|
28
|
+
attr_accessor :data_labels, :rules
|
29
|
+
include ClassifierHelper
|
30
|
+
|
31
|
+
# Build a new Prism classifier. If your data is classified with N attributed
|
32
|
+
# and M examples, then your data examples must have the following format:
|
33
|
+
#
|
34
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
35
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
36
|
+
# ...
|
37
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
38
|
+
# ]
|
39
|
+
#
|
40
|
+
# e.g.
|
41
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
42
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
43
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
44
|
+
# ['New York', '<30', 'M', 'Y'],
|
45
|
+
# ['New York', '<30', 'M', 'Y'],
|
46
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
47
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
48
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
49
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
50
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
51
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
52
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
53
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
54
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
55
|
+
# ['Chicago', '>80', 'F', 'Y']
|
56
|
+
# ]
|
57
|
+
#
|
58
|
+
# Data labels must have the following format:
|
59
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
60
|
+
#
|
61
|
+
# If you do not provide labels for you data, the following labels will
|
62
|
+
# be created by default:
|
63
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
64
|
+
#
|
65
|
+
def build(data_examples, data_labels=nil)
|
66
|
+
check_data_examples(data_examples)
|
67
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
68
|
+
domains = build_domains(data_examples)
|
69
|
+
instances = data_examples.collect {|data| data }
|
70
|
+
@rules = []
|
71
|
+
domains.last.each do |class_value|
|
72
|
+
while(has_class_value(instances, class_value))
|
73
|
+
rule = build_rule(class_value, instances)
|
74
|
+
@rules << rule
|
75
|
+
instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
|
76
|
+
end
|
77
|
+
end
|
78
|
+
return self
|
79
|
+
end
|
80
|
+
|
81
|
+
# You can evaluate new data, predicting its class.
|
82
|
+
# e.g.
|
83
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
84
|
+
def eval(instace)
|
85
|
+
@rules.each do |rule|
|
86
|
+
return rule[:class_value] if matches_conditions(instace, rule[:conditions])
|
87
|
+
end
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
# This method returns the generated rules in ruby code.
|
92
|
+
# e.g.
|
93
|
+
#
|
94
|
+
# classifier.to_s
|
95
|
+
# # => if age_range == '<30' then marketing_target = 'Y'
|
96
|
+
# elsif age_range == '>80' then marketing_target = 'Y'
|
97
|
+
# elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
|
98
|
+
# else marketing_target = 'N'
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
102
|
+
# age_range = '[30-50)'
|
103
|
+
# city = 'New York'
|
104
|
+
# eval(classifier.to_s)
|
105
|
+
# puts marketing_target
|
106
|
+
# 'Y'
|
107
|
+
def to_s
|
108
|
+
out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
|
109
|
+
@rules[1...-1].each do |rule|
|
110
|
+
out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
|
111
|
+
end
|
112
|
+
out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
|
113
|
+
out += "\nend"
|
114
|
+
return out
|
115
|
+
end
|
116
|
+
|
117
|
+
protected
|
118
|
+
|
119
|
+
def has_class_value(instances, class_value)
|
120
|
+
instances.each { |data| return true if data.last == class_value}
|
121
|
+
return false
|
122
|
+
end
|
123
|
+
|
124
|
+
def is_perfect(instances, rule)
|
125
|
+
class_value = rule[:class_value]
|
126
|
+
instances.each do |data|
|
127
|
+
return false if data.last != class_value and matches_conditions(data, rule[:conditions])
|
128
|
+
end
|
129
|
+
return true
|
130
|
+
end
|
131
|
+
|
132
|
+
def matches_conditions(data, conditions)
|
133
|
+
conditions.each_pair do |attr_label, attr_value|
|
134
|
+
return false if data[get_attr_index(attr_label)] != attr_value
|
135
|
+
end
|
136
|
+
return true
|
137
|
+
end
|
138
|
+
|
139
|
+
def get_attr_index(attr_label)
|
140
|
+
return @data_labels.index(attr_label)
|
141
|
+
end
|
142
|
+
|
143
|
+
def get_attr_value(data, attr_label)
|
144
|
+
return data[get_attr_index(attr_label)]
|
145
|
+
end
|
146
|
+
|
147
|
+
def build_rule(class_value, instances)
|
148
|
+
rule = {:class_value => class_value, :conditions => {}}
|
149
|
+
rule_instances = instances.collect {|data| data }
|
150
|
+
attributes = @data_labels[0...-1].collect {|label| label }
|
151
|
+
until(is_perfect(instances, rule) || attributes.empty?)
|
152
|
+
freq_table = build_freq_table(rule_instances, attributes, class_value)
|
153
|
+
condition = get_condition(freq_table)
|
154
|
+
rule[:conditions].merge!(condition)
|
155
|
+
rule_instances = rule_instances.select do |data|
|
156
|
+
matches_conditions(data, condition)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
return rule
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns a structure with the folloring format:
|
163
|
+
# => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
|
164
|
+
# attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
|
165
|
+
# ...
|
166
|
+
# }
|
167
|
+
# where p is the number of instances classified as class_value
|
168
|
+
# with that attribute value, and t is the total number of instances with
|
169
|
+
# that attribute value
|
170
|
+
def build_freq_table(rule_instances, attributes, class_value)
|
171
|
+
freq_table = Hash.new()
|
172
|
+
rule_instances.each do |data|
|
173
|
+
attributes.each do |attr_label|
|
174
|
+
attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
|
175
|
+
pt = attr_freqs[get_attr_value(data, attr_label)]
|
176
|
+
pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
|
177
|
+
attr_freqs[get_attr_value(data, attr_label)] = pt
|
178
|
+
freq_table[attr_label] = attr_freqs
|
179
|
+
end
|
180
|
+
end
|
181
|
+
return freq_table
|
182
|
+
end
|
183
|
+
|
184
|
+
# returns a single conditional term: {attrN_label => attrN_valueM}
|
185
|
+
# selecting the attribute with higher pt ratio
|
186
|
+
# (occurrences of attribute value classified as class_value /
|
187
|
+
# occurrences of attribute value)
|
188
|
+
def get_condition(freq_table)
|
189
|
+
best_pt = [0, 0]
|
190
|
+
condition = nil
|
191
|
+
freq_table.each do |attr_label, attr_freqs|
|
192
|
+
attr_freqs.each do |attr_value, pt|
|
193
|
+
if(better_pt(pt, best_pt))
|
194
|
+
condition = { attr_label => attr_value }
|
195
|
+
best_pt = pt
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
return condition
|
200
|
+
end
|
201
|
+
|
202
|
+
# pt = [p, t]
|
203
|
+
# p = occurrences of attribute value with instance classified as class_value
|
204
|
+
# t = occurrences of attribute value
|
205
|
+
# a pt is better if:
|
206
|
+
# 1- its ratio is higher
|
207
|
+
# 2- its ratio is equal, and has a higher p
|
208
|
+
def better_pt(pt, best_pt)
|
209
|
+
return false if pt[1] == 0
|
210
|
+
return true if best_pt[1] == 0
|
211
|
+
a = pt[0]*best_pt[1]
|
212
|
+
b = best_pt[0]*pt[1]
|
213
|
+
return true if a>b || (a==b && pt[0]>best_pt[0])
|
214
|
+
return false
|
215
|
+
end
|
216
|
+
|
217
|
+
def join_terms(rule)
|
218
|
+
terms = []
|
219
|
+
rule[:conditions].each do |attr_label, attr_value|
|
220
|
+
terms << "#{attr_label} == '#{attr_value}'"
|
221
|
+
end
|
222
|
+
"#{terms.join(" and ")}"
|
223
|
+
end
|
224
|
+
|
225
|
+
def then_clause(rule)
|
226
|
+
"#{@data_labels.last} = '#{rule[:class_value]}'"
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
11
|
+
|
12
|
+
module Ai4r
|
13
|
+
module Classifiers
|
14
|
+
# = Introduction
|
15
|
+
#
|
16
|
+
# The idea behind the ZeroR classifier is to identify the
|
17
|
+
# the most common class value in the training set.
|
18
|
+
# It always returns that value when evaluating an instance.
|
19
|
+
# It is frequently used as a baseline for evaluating other machine learning
|
20
|
+
# algorithms.
|
21
|
+
class ZeroR
|
22
|
+
|
23
|
+
attr_accessor :data_labels, :class_value
|
24
|
+
|
25
|
+
include ClassifierHelper
|
26
|
+
|
27
|
+
# Build a new ZeroR classifier. If your data is classified with N attributed
|
28
|
+
# and M examples, then your data examples must have the following format:
|
29
|
+
#
|
30
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
31
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
32
|
+
# ...
|
33
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
34
|
+
# ]
|
35
|
+
#
|
36
|
+
# e.g.
|
37
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
38
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
39
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
40
|
+
# ['New York', '<30', 'M', 'Y'],
|
41
|
+
# ['New York', '<30', 'M', 'Y'],
|
42
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
43
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
44
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
45
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
46
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
47
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
48
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
49
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
50
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
51
|
+
# ['Chicago', '>80', 'F', 'Y']
|
52
|
+
# ]
|
53
|
+
#
|
54
|
+
# Data labels must have the following format:
|
55
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
56
|
+
#
|
57
|
+
# If you do not provide labels for you data, the following labels will
|
58
|
+
# be created by default:
|
59
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
60
|
+
#
|
61
|
+
def build(data_examples, data_labels=nil)
|
62
|
+
check_data_examples(data_examples)
|
63
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
64
|
+
frequence = {}
|
65
|
+
max_freq = 0
|
66
|
+
@class_value
|
67
|
+
data_examples.each do |example|
|
68
|
+
class_value = example.last
|
69
|
+
class_frequency = frequence[class_value]
|
70
|
+
class_frequency = (class_frequency) ? class_frequency+1 : 1
|
71
|
+
if max_freq < class_frequency
|
72
|
+
max_freq = class_frequency
|
73
|
+
@class_value = class_value
|
74
|
+
end
|
75
|
+
end
|
76
|
+
return self
|
77
|
+
end
|
78
|
+
|
79
|
+
# You can evaluate new data, predicting its class.
|
80
|
+
# e.g.
|
81
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
82
|
+
def eval(data)
|
83
|
+
@class_value
|
84
|
+
end
|
85
|
+
|
86
|
+
# This method returns the generated rules in ruby code.
|
87
|
+
# e.g.
|
88
|
+
#
|
89
|
+
# classifier.to_s
|
90
|
+
# # => marketing_target='Y'
|
91
|
+
#
|
92
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
93
|
+
# marketing_target = nil
|
94
|
+
# eval classifier.to_s
|
95
|
+
# puts marketing_target
|
96
|
+
# # => 'Y'
|
97
|
+
def to_s
|
98
|
+
return "#{@data_labels.last} = '#{@class_value}'"
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|