ai4r 1.13 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.rb +14 -11
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +40 -34
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +48 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +524 -145
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +29 -14
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +270 -135
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +223 -103
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +62 -62
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +399 -134
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +57 -28
- metadata +79 -109
- data/README.rdoc +0 -39
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -220
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/simple_linear_regression_test.rb +0 -37
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -167
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -104
- data/test/data/proximity_test.rb +0 -87
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
data/lib/ai4r/classifiers/ib1.rb
CHANGED
@@ -1,21 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Author:: Sergio Fierens (Implementation only)
|
2
4
|
# License:: MPL 1.1
|
3
5
|
# Project:: ai4r
|
4
|
-
# Url::
|
6
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
5
7
|
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
10
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
11
|
|
10
12
|
require 'set'
|
11
|
-
|
12
|
-
|
13
|
+
require_relative '../data/data_set'
|
14
|
+
require_relative '../classifiers/classifier'
|
13
15
|
|
14
16
|
module Ai4r
|
15
17
|
module Classifiers
|
16
|
-
|
17
18
|
# = Introduction
|
18
|
-
#
|
19
|
+
#
|
19
20
|
# IB1 algorithm implementation.
|
20
21
|
# IB1 is the simplest instance-based learning (IBL) algorithm.
|
21
22
|
#
|
@@ -26,45 +27,126 @@ module Ai4r
|
|
26
27
|
# it normalizes its attributes' ranges, processes instances
|
27
28
|
# incrementally, and has a simple policy for tolerating missing values
|
28
29
|
class IB1 < Classifier
|
29
|
-
|
30
|
-
|
30
|
+
attr_reader :data_set, :min_values, :max_values
|
31
|
+
|
32
|
+
parameters_info k: 'Number of nearest neighbors to consider. Default is 1.',
|
33
|
+
distance_function:
|
34
|
+
'Optional custom distance metric taking two instances.',
|
35
|
+
tie_break:
|
36
|
+
'Strategy used when neighbors vote tie. ' \
|
37
|
+
'Valid values are :first (default) and :random.',
|
38
|
+
random_seed:
|
39
|
+
'Seed for random tie-breaking when :tie_break is :random.'
|
40
|
+
|
41
|
+
# @return [Object]
|
42
|
+
def initialize
|
43
|
+
super()
|
44
|
+
@k = 1
|
45
|
+
@distance_function = nil
|
46
|
+
@tie_break = :first
|
47
|
+
@random_seed = nil
|
48
|
+
@rng = nil
|
49
|
+
end
|
31
50
|
|
32
51
|
# Build a new IB1 classifier. You must provide a DataSet instance
|
33
|
-
# as parameter. The last attribute of each item is considered as
|
52
|
+
# as parameter. The last attribute of each item is considered as
|
34
53
|
# the item class.
|
54
|
+
# @param data_set [Object]
|
55
|
+
# @return [Object]
|
35
56
|
def build(data_set)
|
36
57
|
data_set.check_not_empty
|
37
58
|
@data_set = data_set
|
38
59
|
@min_values = Array.new(data_set.data_labels.length)
|
39
60
|
@max_values = Array.new(data_set.data_labels.length)
|
40
61
|
data_set.data_items.each { |data_item| update_min_max(data_item[0...-1]) }
|
41
|
-
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Append a new instance to the internal dataset. The last element is
|
66
|
+
# considered the class label. Minimum and maximum values for numeric
|
67
|
+
# attributes are updated so that future distance calculations remain
|
68
|
+
# normalized.
|
69
|
+
# @param data_item [Object]
|
70
|
+
# @return [Object]
|
71
|
+
def add_instance(data_item)
|
72
|
+
@data_set << data_item
|
73
|
+
update_min_max(data_item[0...-1])
|
74
|
+
self
|
42
75
|
end
|
43
|
-
|
76
|
+
|
44
77
|
# You can evaluate new data, predicting its class.
|
45
78
|
# e.g.
|
46
|
-
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
79
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
80
|
+
#
|
81
|
+
# Evaluation does not update internal statistics, keeping the
|
82
|
+
# classifier state unchanged. Use +update_with_instance+ to
|
83
|
+
# incorporate new samples.
|
47
84
|
def eval(data)
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
85
|
+
neighbors = @data_set.data_items.map do |train_item|
|
86
|
+
[distance(data, train_item), train_item.last]
|
87
|
+
end
|
88
|
+
neighbors.sort_by! { |d, _| d }
|
89
|
+
k_limit = [@k, @data_set.data_items.length].min
|
90
|
+
k_neighbors = neighbors.first(k_limit)
|
91
|
+
|
92
|
+
# Include any other neighbors tied with the last selected distance
|
93
|
+
last_distance = k_neighbors.last[0]
|
94
|
+
neighbors[k_limit..].to_a.each do |dist, klass|
|
95
|
+
break if dist > last_distance
|
96
|
+
|
97
|
+
k_neighbors << [dist, klass]
|
57
98
|
end
|
58
|
-
|
99
|
+
|
100
|
+
counts = Hash.new(0)
|
101
|
+
k_neighbors.each { |(_dist, klass)| counts[klass] += 1 }
|
102
|
+
max_votes = counts.values.max
|
103
|
+
tied = counts.select { |_, v| v == max_votes }.keys
|
104
|
+
|
105
|
+
return tied.first if tied.length == 1
|
106
|
+
|
107
|
+
rng = @rng || (@random_seed.nil? ? Random.new : Random.new(@random_seed))
|
108
|
+
|
109
|
+
case @tie_break
|
110
|
+
when :random
|
111
|
+
tied.sample(random: rng)
|
112
|
+
else
|
113
|
+
k_neighbors.each { |(_dist, klass)| return klass if tied.include?(klass) }
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Returns an array with the +k+ nearest instances from the training set
|
118
|
+
# for the given +data+ item. The returned elements are the training data
|
119
|
+
# rows themselves, ordered from the closest to the furthest.
|
120
|
+
# @param data [Object]
|
121
|
+
# @param k [Object]
|
122
|
+
# @return [Object]
|
123
|
+
def neighbors_for(data, k_neighbors)
|
124
|
+
update_min_max(data)
|
125
|
+
@data_set.data_items
|
126
|
+
.map { |train_item| [train_item, distance(data, train_item)] }
|
127
|
+
.sort_by(&:last)
|
128
|
+
.first(k_neighbors)
|
129
|
+
.map(&:first)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Update min/max values with the provided instance attributes. If
|
133
|
+
# +learn+ is true, also append the instance to the training set so the
|
134
|
+
# classifier learns incrementally.
|
135
|
+
def update_with_instance(data_item, learn: false)
|
136
|
+
update_min_max(data_item[0...-1])
|
137
|
+
@data_set << data_item if learn
|
138
|
+
self
|
59
139
|
end
|
60
|
-
|
140
|
+
|
61
141
|
protected
|
62
142
|
|
63
143
|
# We keep in the state the min and max value of each attribute,
|
64
144
|
# to provide normalized distances between to values of a numeric attribute
|
145
|
+
# @param atts [Object]
|
146
|
+
# @return [Object]
|
65
147
|
def update_min_max(atts)
|
66
148
|
atts.each_with_index do |att, i|
|
67
|
-
if att
|
149
|
+
if att.is_a?(Numeric)
|
68
150
|
@min_values[i] = att if @min_values[i].nil? || @min_values[i] > att
|
69
151
|
@max_values[i] = att if @max_values[i].nil? || @max_values[i] < att
|
70
152
|
end
|
@@ -80,10 +162,15 @@ module Ai4r
|
|
80
162
|
# * 1 if both atts are missing
|
81
163
|
# * normalized numeric att value if other att value is missing and > 0.5
|
82
164
|
# * 1.0-normalized numeric att value if other att value is missing and < 0.5
|
83
|
-
|
165
|
+
# @param a [Object]
|
166
|
+
# @param b [Object]
|
167
|
+
# @return [Object]
|
168
|
+
def distance(data_a, data_b)
|
169
|
+
return @distance_function.call(data_a, data_b) if @distance_function
|
170
|
+
|
84
171
|
d = 0
|
85
|
-
|
86
|
-
att_b =
|
172
|
+
data_a.each_with_index do |att_a, i|
|
173
|
+
att_b = data_b[i]
|
87
174
|
if att_a.nil?
|
88
175
|
if att_b.is_a? Numeric
|
89
176
|
diff = norm(att_b, i)
|
@@ -93,7 +180,7 @@ module Ai4r
|
|
93
180
|
end
|
94
181
|
elsif att_a.is_a? Numeric
|
95
182
|
if att_b.is_a? Numeric
|
96
|
-
diff = norm(att_a, i) - norm(att_b, i)
|
183
|
+
diff = norm(att_a, i) - norm(att_b, i)
|
97
184
|
else
|
98
185
|
diff = norm(att_a, i)
|
99
186
|
diff = 1.0 - diff if diff < 0.5
|
@@ -105,17 +192,20 @@ module Ai4r
|
|
105
192
|
end
|
106
193
|
d += diff * diff
|
107
194
|
end
|
108
|
-
|
195
|
+
d
|
109
196
|
end
|
110
197
|
|
111
198
|
# Returns normalized value att
|
112
199
|
#
|
113
200
|
# index is the index of the attribute in the instance.
|
201
|
+
# @param att [Object]
|
202
|
+
# @param index [Object]
|
203
|
+
# @return [Object]
|
114
204
|
def norm(att, index)
|
115
205
|
return 0 if @min_values[index].nil?
|
116
|
-
|
206
|
+
|
207
|
+
1.0 * (att - @min_values[index]) / (@max_values[index] - @min_values[index])
|
117
208
|
end
|
118
|
-
|
119
209
|
end
|
120
210
|
end
|
121
211
|
end
|