ai4r 1.1 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +21 -20
- data/examples/decision_trees/id3_example.rb +3 -2
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
- data/examples/neural_network/backpropagation_example.rb +2 -2
- data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
- data/lib/ai4r/classifiers/id3.rb +356 -0
- data/lib/ai4r/classifiers/one_r.rb +148 -0
- data/lib/ai4r/classifiers/prism.rb +231 -0
- data/lib/ai4r/classifiers/zero_r.rb +104 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
- data/lib/ai4r/neural_network/backpropagation.rb +271 -0
- data/site/build/tmp/locationmap.xml +14 -14
- data/site/build/tmp/output.xmap +23 -23
- data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
- data/site/build/tmp/plugins-1.xml +0 -11
- data/site/build/tmp/plugins-2.xml +54 -0
- data/site/build/tmp/projfilters.properties +41 -41
- data/site/build/webapp/WEB-INF/logs/core.log +681 -788
- data/site/build/webapp/WEB-INF/logs/error.log +281 -248
- data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
- data/site/src/documentation/content/xdocs/forum.html +9 -0
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
- data/site/src/documentation/content/xdocs/index.xml +47 -18
- data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
- data/site/src/documentation/content/xdocs/site.xml +8 -5
- data/site/src/documentation/content/xdocs/svn.xml +11 -1
- data/site/src/documentation/resources/images/Thumbs.db +0 -0
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
- data/site/src/documentation/resources/images/jadeferret.png +0 -0
- data/site/src/documentation/resources/images/neural_network_example.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
- data/site/src/documentation/skinconf.xml +18 -18
- data/test/classifiers/id3_test.rb +206 -0
- data/test/classifiers/one_r_test.rb +62 -0
- data/test/classifiers/prism_test.rb +83 -0
- data/test/classifiers/zero_r_test.rb +48 -0
- data/test/genetic_algorithm/chromosome_test.rb +41 -38
- data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
- data/test/neural_network/backpropagation_test.rb +20 -18
- metadata +109 -199
- data/lib/decision_tree/id3.rb +0 -354
- data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
- data/lib/neural_network/backpropagation.rb +0 -264
- data/site/build/site/en/broken-links.xml +0 -2
- data/site/build/site/en/downloads.html +0 -187
- data/site/build/site/en/downloads.pdf +0 -151
- data/site/build/site/en/geneticAlgorithms.html +0 -564
- data/site/build/site/en/geneticAlgorithms.pdf +0 -911
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +0 -258
- data/site/build/site/en/index.pdf +0 -306
- data/site/build/site/en/linkmap.html +0 -231
- data/site/build/site/en/linkmap.pdf +0 -94
- data/site/build/site/en/locationmap.xml +0 -72
- data/site/build/site/en/machineLearning.html +0 -325
- data/site/build/site/en/machineLearning.pdf +0 -337
- data/site/build/site/en/neuralNetworks.html +0 -446
- data/site/build/site/en/neuralNetworks.pdf +0 -604
- data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
- data/site/build/site/en/skin/basic.css +0 -166
- data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
- data/site/build/site/en/skin/breadcrumbs.js +0 -237
- data/site/build/site/en/skin/fontsize.js +0 -166
- data/site/build/site/en/skin/getBlank.js +0 -40
- data/site/build/site/en/skin/getMenu.js +0 -45
- data/site/build/site/en/skin/images/README.txt +0 -1
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +0 -48
- data/site/build/site/en/skin/note.txt +0 -50
- data/site/build/site/en/skin/print.css +0 -54
- data/site/build/site/en/skin/profile.css +0 -163
- data/site/build/site/en/skin/prototype.js +0 -1257
- data/site/build/site/en/skin/screen.css +0 -587
- data/site/build/site/en/svn.html +0 -223
- data/site/build/site/en/svn.pdf +0 -239
- data/site/build/site/en/wholesite.pdf +0 -1686
- data/site/build/tmp/brokenlinks.xml +0 -2
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/test/decision_tree/id3_test.rb +0 -209
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
|
2
|
+
# License:: MPL 1.1
|
|
3
|
+
# Project:: ai4r
|
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
5
|
+
#
|
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
9
|
+
|
|
10
|
+
require 'set'
|
|
11
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
|
12
|
+
|
|
13
|
+
module Ai4r
|
|
14
|
+
module Classifiers
|
|
15
|
+
|
|
16
|
+
# = Introduction
|
|
17
|
+
#
|
|
18
|
+
# The idea of the OneR algorithm is identify the single
|
|
19
|
+
# attribute to use to classify data that makes
|
|
20
|
+
# fewest prediction errors.
|
|
21
|
+
# It generates rules based on a single attribute.
|
|
22
|
+
class OneR
|
|
23
|
+
|
|
24
|
+
attr_accessor :data_labels, :rule
|
|
25
|
+
include ClassifierHelper
|
|
26
|
+
|
|
27
|
+
# Build a new OneR classifier. If your data is classified with N attributed
|
|
28
|
+
# and M examples, then your data examples must have the following format:
|
|
29
|
+
#
|
|
30
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
|
31
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
|
32
|
+
# ...
|
|
33
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
|
34
|
+
# ]
|
|
35
|
+
#
|
|
36
|
+
# e.g.
|
|
37
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
|
38
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
39
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
40
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
41
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
42
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
43
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
44
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
45
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
46
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
47
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
48
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
49
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
50
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
51
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
52
|
+
# ]
|
|
53
|
+
#
|
|
54
|
+
# Data labels must have the following format:
|
|
55
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
56
|
+
#
|
|
57
|
+
# If you do not provide labels for you data, the following labels will
|
|
58
|
+
# be created by default:
|
|
59
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
|
60
|
+
#
|
|
61
|
+
def build(data_examples, data_labels = nil)
|
|
62
|
+
check_data_examples(data_examples)
|
|
63
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
|
64
|
+
if (num_attributes(data_examples) == 1)
|
|
65
|
+
@zero_r = ZeroR.new.build(data_examples, data_labels)
|
|
66
|
+
return self;
|
|
67
|
+
else
|
|
68
|
+
@zero_r = nil;
|
|
69
|
+
end
|
|
70
|
+
domains = build_domains(data_examples)
|
|
71
|
+
@rule = nil
|
|
72
|
+
domains[1...-1].each_index do |attr_index|
|
|
73
|
+
rule = build_rule(data_examples, attr_index, domains)
|
|
74
|
+
@rule = rule if !@rule || rule[:correct] > @rule[:correct]
|
|
75
|
+
end
|
|
76
|
+
return self
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# You can evaluate new data, predicting its class.
|
|
80
|
+
# e.g.
|
|
81
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
|
82
|
+
def eval(data)
|
|
83
|
+
return @zero_r.eval(data) if @zero_r
|
|
84
|
+
attr_value = data[@rule[:attr_index]]
|
|
85
|
+
return @rule[:rule][attr_value]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# This method returns the generated rules in ruby code.
|
|
89
|
+
# e.g.
|
|
90
|
+
#
|
|
91
|
+
# classifier.to_s
|
|
92
|
+
# # => if age_range == '<30' then marketing_target = 'Y'
|
|
93
|
+
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
|
94
|
+
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
|
95
|
+
# end
|
|
96
|
+
#
|
|
97
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
98
|
+
# marketing_target = nil
|
|
99
|
+
# eval classifier.to_s
|
|
100
|
+
# puts marketing_target
|
|
101
|
+
# # => 'Y'
|
|
102
|
+
def to_s
|
|
103
|
+
return @zero_r.to_s if @zero_r
|
|
104
|
+
sentences = []
|
|
105
|
+
attr_label = @data_labels[@rule[:attr_index]]
|
|
106
|
+
class_label = @data_labels.last
|
|
107
|
+
@rule[:rule].each_pair do |attr_value, class_value|
|
|
108
|
+
sentences << "#{attr_label} == '#{attr_value}' then #{class_label} = '#{class_value}'"
|
|
109
|
+
end
|
|
110
|
+
return "if " + sentences.join("\nelsif ") + "\nend"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
protected
|
|
114
|
+
def build_domains(data_examples)
|
|
115
|
+
domains = Array.new(num_attributes(data_examples)) { Set.new }
|
|
116
|
+
data_examples.each do |data|
|
|
117
|
+
data.each_index {|attr_index| domains[attr_index] << data[attr_index]}
|
|
118
|
+
end
|
|
119
|
+
return domains
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def build_rule(data_examples, attr_index, domains)
|
|
123
|
+
domain = domains[attr_index]
|
|
124
|
+
value_freq = Hash.new
|
|
125
|
+
domain.each do |attr_value|
|
|
126
|
+
value_freq[attr_value] = Hash.new { |hash, key| hash[key] = 0 }
|
|
127
|
+
end
|
|
128
|
+
data_examples.each do |data|
|
|
129
|
+
value_freq[data[attr_index]][data.last] = value_freq[data[attr_index]][data.last] + 1
|
|
130
|
+
end
|
|
131
|
+
rule = {}
|
|
132
|
+
correct_instances = 0
|
|
133
|
+
value_freq.each_pair do |attr, class_freq_hash|
|
|
134
|
+
max_freq = 0
|
|
135
|
+
class_freq_hash.each_pair do |class_value, freq|
|
|
136
|
+
if max_freq < freq
|
|
137
|
+
rule[attr] = class_value
|
|
138
|
+
max_freq = freq
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
correct_instances += max_freq
|
|
142
|
+
end
|
|
143
|
+
return {:attr_index => attr_index, :rule => rule, :correct => correct_instances}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only, Cendrowska is
|
|
2
|
+
# the creator of the algorithm)
|
|
3
|
+
# License:: MPL 1.1
|
|
4
|
+
# Project:: ai4r
|
|
5
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
6
|
+
#
|
|
7
|
+
# You can redistribute it and/or modify it under the terms of
|
|
8
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
9
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
10
|
+
#
|
|
11
|
+
# J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
|
|
12
|
+
# International Journal of Man-Machine Studies. 27(4):349-370.
|
|
13
|
+
|
|
14
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
|
15
|
+
|
|
16
|
+
module Ai4r
|
|
17
|
+
module Classifiers
|
|
18
|
+
|
|
19
|
+
# = Introduction
|
|
20
|
+
# This is an implementation of the PRISM algorithm (Cendrowska, 1987)
|
|
21
|
+
# Given a set of preclassified examples, it builds a set of rules
|
|
22
|
+
# to predict the class of other instaces.
|
|
23
|
+
#
|
|
24
|
+
# J. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.
|
|
25
|
+
# International Journal of Man-Machine Studies. 27(4):349-370.
|
|
26
|
+
class Prism
|
|
27
|
+
|
|
28
|
+
attr_accessor :data_labels, :rules
|
|
29
|
+
include ClassifierHelper
|
|
30
|
+
|
|
31
|
+
# Build a new Prism classifier. If your data is classified with N attributed
|
|
32
|
+
# and M examples, then your data examples must have the following format:
|
|
33
|
+
#
|
|
34
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
|
35
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
|
36
|
+
# ...
|
|
37
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
|
38
|
+
# ]
|
|
39
|
+
#
|
|
40
|
+
# e.g.
|
|
41
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
|
42
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
43
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
44
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
45
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
46
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
47
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
48
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
49
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
50
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
51
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
52
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
53
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
54
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
55
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
56
|
+
# ]
|
|
57
|
+
#
|
|
58
|
+
# Data labels must have the following format:
|
|
59
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
60
|
+
#
|
|
61
|
+
# If you do not provide labels for you data, the following labels will
|
|
62
|
+
# be created by default:
|
|
63
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
|
64
|
+
#
|
|
65
|
+
def build(data_examples, data_labels=nil)
|
|
66
|
+
check_data_examples(data_examples)
|
|
67
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
|
68
|
+
domains = build_domains(data_examples)
|
|
69
|
+
instances = data_examples.collect {|data| data }
|
|
70
|
+
@rules = []
|
|
71
|
+
domains.last.each do |class_value|
|
|
72
|
+
while(has_class_value(instances, class_value))
|
|
73
|
+
rule = build_rule(class_value, instances)
|
|
74
|
+
@rules << rule
|
|
75
|
+
instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
return self
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# You can evaluate new data, predicting its class.
|
|
82
|
+
# e.g.
|
|
83
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
|
84
|
+
def eval(instace)
|
|
85
|
+
@rules.each do |rule|
|
|
86
|
+
return rule[:class_value] if matches_conditions(instace, rule[:conditions])
|
|
87
|
+
end
|
|
88
|
+
return nil
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# This method returns the generated rules in ruby code.
|
|
92
|
+
# e.g.
|
|
93
|
+
#
|
|
94
|
+
# classifier.to_s
|
|
95
|
+
# # => if age_range == '<30' then marketing_target = 'Y'
|
|
96
|
+
# elsif age_range == '>80' then marketing_target = 'Y'
|
|
97
|
+
# elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
|
|
98
|
+
# else marketing_target = 'N'
|
|
99
|
+
# end
|
|
100
|
+
#
|
|
101
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
102
|
+
# age_range = '[30-50)'
|
|
103
|
+
# city = 'New York'
|
|
104
|
+
# eval(classifier.to_s)
|
|
105
|
+
# puts marketing_target
|
|
106
|
+
# 'Y'
|
|
107
|
+
def to_s
|
|
108
|
+
out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
|
|
109
|
+
@rules[1...-1].each do |rule|
|
|
110
|
+
out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
|
|
111
|
+
end
|
|
112
|
+
out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
|
|
113
|
+
out += "\nend"
|
|
114
|
+
return out
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
protected
|
|
118
|
+
|
|
119
|
+
def has_class_value(instances, class_value)
|
|
120
|
+
instances.each { |data| return true if data.last == class_value}
|
|
121
|
+
return false
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def is_perfect(instances, rule)
|
|
125
|
+
class_value = rule[:class_value]
|
|
126
|
+
instances.each do |data|
|
|
127
|
+
return false if data.last != class_value and matches_conditions(data, rule[:conditions])
|
|
128
|
+
end
|
|
129
|
+
return true
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def matches_conditions(data, conditions)
|
|
133
|
+
conditions.each_pair do |attr_label, attr_value|
|
|
134
|
+
return false if data[get_attr_index(attr_label)] != attr_value
|
|
135
|
+
end
|
|
136
|
+
return true
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def get_attr_index(attr_label)
|
|
140
|
+
return @data_labels.index(attr_label)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def get_attr_value(data, attr_label)
|
|
144
|
+
return data[get_attr_index(attr_label)]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def build_rule(class_value, instances)
|
|
148
|
+
rule = {:class_value => class_value, :conditions => {}}
|
|
149
|
+
rule_instances = instances.collect {|data| data }
|
|
150
|
+
attributes = @data_labels[0...-1].collect {|label| label }
|
|
151
|
+
until(is_perfect(instances, rule) || attributes.empty?)
|
|
152
|
+
freq_table = build_freq_table(rule_instances, attributes, class_value)
|
|
153
|
+
condition = get_condition(freq_table)
|
|
154
|
+
rule[:conditions].merge!(condition)
|
|
155
|
+
rule_instances = rule_instances.select do |data|
|
|
156
|
+
matches_conditions(data, condition)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
return rule
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Returns a structure with the folloring format:
|
|
163
|
+
# => {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], ... },
|
|
164
|
+
# attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
|
|
165
|
+
# ...
|
|
166
|
+
# }
|
|
167
|
+
# where p is the number of instances classified as class_value
|
|
168
|
+
# with that attribute value, and t is the total number of instances with
|
|
169
|
+
# that attribute value
|
|
170
|
+
def build_freq_table(rule_instances, attributes, class_value)
|
|
171
|
+
freq_table = Hash.new()
|
|
172
|
+
rule_instances.each do |data|
|
|
173
|
+
attributes.each do |attr_label|
|
|
174
|
+
attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
|
|
175
|
+
pt = attr_freqs[get_attr_value(data, attr_label)]
|
|
176
|
+
pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
|
|
177
|
+
attr_freqs[get_attr_value(data, attr_label)] = pt
|
|
178
|
+
freq_table[attr_label] = attr_freqs
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
return freq_table
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# returns a single conditional term: {attrN_label => attrN_valueM}
|
|
185
|
+
# selecting the attribute with higher pt ratio
|
|
186
|
+
# (occurrences of attribute value classified as class_value /
|
|
187
|
+
# occurrences of attribute value)
|
|
188
|
+
def get_condition(freq_table)
|
|
189
|
+
best_pt = [0, 0]
|
|
190
|
+
condition = nil
|
|
191
|
+
freq_table.each do |attr_label, attr_freqs|
|
|
192
|
+
attr_freqs.each do |attr_value, pt|
|
|
193
|
+
if(better_pt(pt, best_pt))
|
|
194
|
+
condition = { attr_label => attr_value }
|
|
195
|
+
best_pt = pt
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
return condition
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# pt = [p, t]
|
|
203
|
+
# p = occurrences of attribute value with instance classified as class_value
|
|
204
|
+
# t = occurrences of attribute value
|
|
205
|
+
# a pt is better if:
|
|
206
|
+
# 1- its ratio is higher
|
|
207
|
+
# 2- its ratio is equal, and has a higher p
|
|
208
|
+
def better_pt(pt, best_pt)
|
|
209
|
+
return false if pt[1] == 0
|
|
210
|
+
return true if best_pt[1] == 0
|
|
211
|
+
a = pt[0]*best_pt[1]
|
|
212
|
+
b = best_pt[0]*pt[1]
|
|
213
|
+
return true if a>b || (a==b && pt[0]>best_pt[0])
|
|
214
|
+
return false
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def join_terms(rule)
|
|
218
|
+
terms = []
|
|
219
|
+
rule[:conditions].each do |attr_label, attr_value|
|
|
220
|
+
terms << "#{attr_label} == '#{attr_value}'"
|
|
221
|
+
end
|
|
222
|
+
"#{terms.join(" and ")}"
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def then_clause(rule)
|
|
226
|
+
"#{@data_labels.last} = '#{rule[:class_value]}'"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Author:: Sergio Fierens (Implementation only)
|
|
2
|
+
# License:: MPL 1.1
|
|
3
|
+
# Project:: ai4r
|
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
|
5
|
+
#
|
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
|
9
|
+
|
|
10
|
+
require File.dirname(__FILE__) + '/classifier_helper'
|
|
11
|
+
|
|
12
|
+
module Ai4r
|
|
13
|
+
module Classifiers
|
|
14
|
+
# = Introduction
|
|
15
|
+
#
|
|
16
|
+
# The idea behind the ZeroR classifier is to identify the
|
|
17
|
+
# the most common class value in the training set.
|
|
18
|
+
# It always returns that value when evaluating an instance.
|
|
19
|
+
# It is frequently used as a baseline for evaluating other machine learning
|
|
20
|
+
# algorithms.
|
|
21
|
+
class ZeroR
|
|
22
|
+
|
|
23
|
+
attr_accessor :data_labels, :class_value
|
|
24
|
+
|
|
25
|
+
include ClassifierHelper
|
|
26
|
+
|
|
27
|
+
# Build a new ZeroR classifier. If your data is classified with N attributed
|
|
28
|
+
# and M examples, then your data examples must have the following format:
|
|
29
|
+
#
|
|
30
|
+
# [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CLASS_VAL1],
|
|
31
|
+
# [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CLASS_VAL2],
|
|
32
|
+
# ...
|
|
33
|
+
# [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CLASS_VALM],
|
|
34
|
+
# ]
|
|
35
|
+
#
|
|
36
|
+
# e.g.
|
|
37
|
+
# [ ['New York', '<30', 'M', 'Y'],
|
|
38
|
+
# ['Chicago', '<30', 'M', 'Y'],
|
|
39
|
+
# ['Chicago', '<30', 'F', 'Y'],
|
|
40
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
41
|
+
# ['New York', '<30', 'M', 'Y'],
|
|
42
|
+
# ['Chicago', '[30-50)', 'M', 'Y'],
|
|
43
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
44
|
+
# ['Chicago', '[30-50)', 'F', 'Y'],
|
|
45
|
+
# ['New York', '[30-50)', 'F', 'N'],
|
|
46
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
47
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
48
|
+
# ['New York', '[50-80]', 'M', 'N'],
|
|
49
|
+
# ['Chicago', '[50-80]', 'M', 'N'],
|
|
50
|
+
# ['New York', '[50-80]', 'F', 'N'],
|
|
51
|
+
# ['Chicago', '>80', 'F', 'Y']
|
|
52
|
+
# ]
|
|
53
|
+
#
|
|
54
|
+
# Data labels must have the following format:
|
|
55
|
+
# [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
|
56
|
+
#
|
|
57
|
+
# If you do not provide labels for you data, the following labels will
|
|
58
|
+
# be created by default:
|
|
59
|
+
# [ 'attribute_1', 'attribute_2', 'attribute_3', 'class_value' ]
|
|
60
|
+
#
|
|
61
|
+
def build(data_examples, data_labels=nil)
|
|
62
|
+
check_data_examples(data_examples)
|
|
63
|
+
@data_labels = (data_labels) ? data_labels : default_data_labels(data_examples)
|
|
64
|
+
frequence = {}
|
|
65
|
+
max_freq = 0
|
|
66
|
+
@class_value
|
|
67
|
+
data_examples.each do |example|
|
|
68
|
+
class_value = example.last
|
|
69
|
+
class_frequency = frequence[class_value]
|
|
70
|
+
class_frequency = (class_frequency) ? class_frequency+1 : 1
|
|
71
|
+
if max_freq < class_frequency
|
|
72
|
+
max_freq = class_frequency
|
|
73
|
+
@class_value = class_value
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
return self
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# You can evaluate new data, predicting its class.
|
|
80
|
+
# e.g.
|
|
81
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
|
82
|
+
def eval(data)
|
|
83
|
+
@class_value
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# This method returns the generated rules in ruby code.
|
|
87
|
+
# e.g.
|
|
88
|
+
#
|
|
89
|
+
# classifier.to_s
|
|
90
|
+
# # => marketing_target='Y'
|
|
91
|
+
#
|
|
92
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
|
93
|
+
# marketing_target = nil
|
|
94
|
+
# eval classifier.to_s
|
|
95
|
+
# puts marketing_target
|
|
96
|
+
# # => 'Y'
|
|
97
|
+
def to_s
|
|
98
|
+
return "#{@data_labels.last} = '#{@class_value}'"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
end
|
|
104
|
+
end
|