ai4r 1.13 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.rb +14 -11
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +40 -34
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +48 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +524 -145
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +29 -14
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +270 -135
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +223 -103
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +62 -62
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +399 -134
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +57 -28
- metadata +79 -109
- data/README.rdoc +0 -39
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -220
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/simple_linear_regression_test.rb +0 -37
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -167
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -104
- data/test/data/proximity_test.rb +0 -87
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../lib/ai4r/neural_network/transformer'
|
4
|
+
require_relative '../../lib/ai4r/classifiers/logistic_regression'
|
5
|
+
require_relative '../../lib/ai4r/data/data_set'
|
6
|
+
|
7
|
+
# Tiny dataset of greetings (label 0) and farewells (label 1)
|
8
|
+
sentences = [
|
9
|
+
%w[hello there],
|
10
|
+
%w[how are you],
|
11
|
+
%w[good morning],
|
12
|
+
%w[nice to meet you],
|
13
|
+
%w[goodbye],
|
14
|
+
%w[see you later],
|
15
|
+
%w[have a nice day],
|
16
|
+
%w[take care]
|
17
|
+
]
|
18
|
+
labels = [0, 0, 0, 0, 1, 1, 1, 1]
|
19
|
+
|
20
|
+
# Build vocabulary
|
21
|
+
vocab = {}
|
22
|
+
next_id = 0
|
23
|
+
sentences.each do |tokens|
|
24
|
+
tokens.each do |t|
|
25
|
+
unless vocab.key?(t)
|
26
|
+
vocab[t] = next_id
|
27
|
+
next_id += 1
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
vocab_size = vocab.length
|
33
|
+
max_len = sentences.map(&:length).max
|
34
|
+
|
35
|
+
transformer = Ai4r::NeuralNetwork::Transformer.new(
|
36
|
+
vocab_size: vocab_size,
|
37
|
+
max_len: max_len,
|
38
|
+
architecture: :decoder
|
39
|
+
)
|
40
|
+
embed_dim = transformer.embed_dim
|
41
|
+
|
42
|
+
# Encode each sentence and average embeddings
|
43
|
+
items = []
|
44
|
+
sentences.each_with_index do |tokens, idx|
|
45
|
+
ids = tokens.map { |t| vocab[t] }
|
46
|
+
vecs = transformer.eval(ids)
|
47
|
+
avg = Array.new(embed_dim, 0.0)
|
48
|
+
vecs.each do |v|
|
49
|
+
v.each_index { |i| avg[i] += v[i] }
|
50
|
+
end
|
51
|
+
avg.map! { |v| v / vecs.length }
|
52
|
+
items << (avg + [labels[idx]])
|
53
|
+
end
|
54
|
+
|
55
|
+
labels_names = (0...embed_dim).map { |i| "x#{i}" } + ['class']
|
56
|
+
set = Ai4r::Data::DataSet.new(data_items: items, data_labels: labels_names)
|
57
|
+
|
58
|
+
classifier = Ai4r::Classifiers::LogisticRegression.new
|
59
|
+
classifier.set_parameters(lr: 0.5, iterations: 500).build(set)
|
60
|
+
|
61
|
+
# Classify a short greeting
|
62
|
+
sample = %w[hello]
|
63
|
+
ids = sample.map { |t| vocab[t] }
|
64
|
+
vecs = transformer.eval(ids)
|
65
|
+
avg = Array.new(embed_dim, 0.0)
|
66
|
+
vecs.each { |v| v.each_index { |i| avg[i] += v[i] } }
|
67
|
+
avg.map! { |v| v / vecs.length }
|
68
|
+
puts "Prediction: #{classifier.eval(avg)} (0=greeting, 1=farewell)"
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative '../../lib/ai4r/neural_network/transformer'
|
2
|
+
|
3
|
+
# Demonstrates deterministic initialization using the :seed parameter.
|
4
|
+
model_a = Ai4r::NeuralNetwork::Transformer.new(vocab_size: 5, max_len: 3, seed: 42)
|
5
|
+
model_b = Ai4r::NeuralNetwork::Transformer.new(vocab_size: 5, max_len: 3, seed: 42)
|
6
|
+
|
7
|
+
output_a = model_a.eval([0, 1, 2])
|
8
|
+
output_b = model_b.eval([0, 1, 2])
|
9
|
+
|
10
|
+
puts "Outputs identical? #{output_a == output_b}"
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative '../../lib/ai4r/neural_network/transformer'
|
2
|
+
|
3
|
+
# Simple demo of the seq2seq architecture.
|
4
|
+
# The model returns random vectors but shows how
|
5
|
+
# to provide encoder and decoder inputs.
|
6
|
+
model = Ai4r::NeuralNetwork::Transformer.new(
|
7
|
+
vocab_size: 10,
|
8
|
+
max_len: 5,
|
9
|
+
architecture: :seq2seq
|
10
|
+
)
|
11
|
+
|
12
|
+
encoder_input = [1, 2, 3]
|
13
|
+
decoder_input = [4, 5]
|
14
|
+
|
15
|
+
output = model.eval(encoder_input, decoder_input)
|
16
|
+
puts "Output length: #{output.length}"
|
@@ -1,62 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Author:: Sergio Fierens
|
2
4
|
# License:: MPL 1.1
|
3
5
|
# Project:: ai4r
|
4
|
-
# Url::
|
6
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
5
7
|
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
10
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
|
12
|
+
require_relative '../data/parameterizable'
|
13
|
+
|
12
14
|
module Ai4r
|
13
15
|
module Classifiers
|
14
|
-
|
15
16
|
# This class defines a common API for classifiers.
|
16
17
|
# All methods in this class must be implemented in subclasses.
|
17
18
|
class Classifier
|
19
|
+
include Ai4r::Data::Parameterizable
|
18
20
|
|
19
|
-
include Ai4r::Data::Parameterizable
|
20
|
-
|
21
21
|
# Build a new classifier, using data examples found in data_set.
|
22
22
|
# The last attribute of each item is considered as the
|
23
23
|
# item class.
|
24
|
+
# @param data_set [Object]
|
25
|
+
# @return [Object]
|
24
26
|
def build(data_set)
|
25
27
|
raise NotImplementedError
|
26
28
|
end
|
27
|
-
|
29
|
+
|
28
30
|
# You can evaluate new data, predicting its class.
|
29
31
|
# e.g.
|
30
32
|
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
33
|
+
# @param data [Object]
|
34
|
+
# @return [Object]
|
31
35
|
def eval(data)
|
32
36
|
raise NotImplementedError
|
33
37
|
end
|
34
|
-
|
38
|
+
|
35
39
|
# This method returns the generated rules in ruby code.
|
36
40
|
# e.g.
|
37
|
-
#
|
41
|
+
#
|
38
42
|
# classifier.get_rules
|
39
43
|
# # => if age_range=='<30' then marketing_target='Y'
|
40
44
|
# elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
41
45
|
# elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
42
46
|
# elsif age_range=='[50-80]' then marketing_target='N'
|
43
47
|
# elsif age_range=='>80' then marketing_target='Y'
|
44
|
-
# else
|
48
|
+
# else
|
49
|
+
# raise 'There was not enough information during training to do a '
|
50
|
+
# 'proper induction for this data element'
|
51
|
+
# end
|
45
52
|
#
|
46
|
-
# It is a nice way to inspect induction results, and also to execute them:
|
53
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
47
54
|
# age_range = '<30'
|
48
55
|
# city='New York'
|
49
56
|
# marketing_target = nil
|
50
|
-
# eval classifier.get_rules
|
57
|
+
# eval classifier.get_rules
|
51
58
|
# puts marketing_target
|
52
59
|
# # => 'Y'
|
53
60
|
#
|
54
61
|
# Note, however, that not all classifiers are able to produce rules.
|
55
62
|
# This method is not implemented in such classifiers.
|
63
|
+
# @return [Object]
|
56
64
|
def get_rules
|
57
65
|
raise NotImplementedError
|
58
66
|
end
|
59
|
-
|
67
|
+
# rubocop:enable Naming/AccessorMethodName
|
60
68
|
end
|
61
69
|
end
|
62
70
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Author:: OpenAI ChatGPT
|
4
|
+
# License:: MPL 1.1
|
5
|
+
# Project:: ai4r
|
6
|
+
#
|
7
|
+
# Very small gradient boosting implementation for regression using
|
8
|
+
# simple linear regression as base learner.
|
9
|
+
|
10
|
+
require_relative 'simple_linear_regression'
|
11
|
+
require_relative '../data/data_set'
|
12
|
+
require_relative '../classifiers/classifier'
|
13
|
+
|
14
|
+
module Ai4r
|
15
|
+
module Classifiers
|
16
|
+
# Gradient boosting regressor using simple linear regression base learners.
|
17
|
+
class GradientBoosting < Classifier
|
18
|
+
parameters_info n_estimators: 'Number of boosting iterations. Default 10.',
|
19
|
+
learning_rate: 'Shrinkage parameter for each learner. Default 0.1.'
|
20
|
+
|
21
|
+
attr_reader :initial_value, :learners
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
super()
|
25
|
+
@n_estimators = 10
|
26
|
+
@learning_rate = 0.1
|
27
|
+
end
|
28
|
+
|
29
|
+
def build(data_set)
|
30
|
+
data_set.check_not_empty
|
31
|
+
@learners = []
|
32
|
+
targets = data_set.data_items.map(&:last)
|
33
|
+
@initial_value = targets.sum.to_f / targets.length
|
34
|
+
predictions = Array.new(targets.length, @initial_value)
|
35
|
+
@n_estimators.times do
|
36
|
+
residuals = targets.zip(predictions).map { |y, f| y - f }
|
37
|
+
items = data_set.data_items.each_with_index.map do |item, idx|
|
38
|
+
item[0...-1] + [residuals[idx]]
|
39
|
+
end
|
40
|
+
ds = Ai4r::Data::DataSet.new(data_items: items, data_labels: data_set.data_labels)
|
41
|
+
learner = SimpleLinearRegression.new.build(ds)
|
42
|
+
@learners << learner
|
43
|
+
pred = items.map { |it| learner.eval(it[0...-1]) }
|
44
|
+
predictions = predictions.zip(pred).map { |f, p| f + (@learning_rate * p) }
|
45
|
+
end
|
46
|
+
self
|
47
|
+
end
|
48
|
+
# rubocop:enable Metrics/AbcSize
|
49
|
+
|
50
|
+
def eval(data)
|
51
|
+
value = @initial_value
|
52
|
+
@learners.each do |learner|
|
53
|
+
value += @learning_rate * learner.eval(data)
|
54
|
+
end
|
55
|
+
value
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_rules
|
59
|
+
'GradientBoosting does not support rule extraction.'
|
60
|
+
end
|
61
|
+
# rubocop:enable Naming/AccessorMethodName
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -1,118 +1,194 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Author:: Sergio Fierens (Implementation only)
|
2
4
|
# License:: MPL 1.1
|
3
5
|
# Project:: ai4r
|
4
|
-
# Url::
|
6
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
5
7
|
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
10
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
11
|
|
10
12
|
require 'set'
|
11
|
-
|
12
|
-
|
13
|
+
require_relative '../data/data_set'
|
14
|
+
require_relative '../classifiers/classifier'
|
15
|
+
require_relative '../classifiers/votes'
|
13
16
|
|
14
17
|
module Ai4r
|
18
|
+
# Collection of classifier algorithms.
|
15
19
|
module Classifiers
|
16
|
-
|
17
20
|
include Ai4r::Data
|
18
|
-
|
21
|
+
|
19
22
|
# = Introduction
|
20
|
-
#
|
21
|
-
# A fast classifier algorithm, created by Lucio de Souza Coelho
|
23
|
+
#
|
24
|
+
# A fast classifier algorithm, created by Lucio de Souza Coelho
|
22
25
|
# and Len Trigg.
|
23
26
|
class Hyperpipes < Classifier
|
24
|
-
|
25
27
|
attr_reader :data_set, :pipes
|
26
28
|
|
29
|
+
parameters_info tie_break:
|
30
|
+
'Strategy used when more than one class has the same maximal vote. ' \
|
31
|
+
'Valid values are :last (default) and :random.',
|
32
|
+
margin: 'Numeric margin added to the bounds of numeric attributes.',
|
33
|
+
random_seed: 'Seed for random tie-breaking when tie_break is :random.'
|
34
|
+
|
35
|
+
# @return [Object]
|
36
|
+
def initialize
|
37
|
+
super()
|
38
|
+
@tie_break = :last
|
39
|
+
@margin = 0
|
40
|
+
@random_seed = nil
|
41
|
+
@rng = nil
|
42
|
+
end
|
43
|
+
|
27
44
|
# Build a new Hyperpipes classifier. You must provide a DataSet instance
|
28
|
-
# as parameter. The last attribute of each item is considered as
|
45
|
+
# as parameter. The last attribute of each item is considered as
|
29
46
|
# the item class.
|
47
|
+
# @param data_set [Object]
|
48
|
+
# @return [Object]
|
30
49
|
def build(data_set)
|
31
50
|
data_set.check_not_empty
|
32
51
|
@data_set = data_set
|
33
52
|
@domains = data_set.build_domains
|
34
|
-
|
53
|
+
|
35
54
|
@pipes = {}
|
36
|
-
@domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
|
37
|
-
@data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
|
38
|
-
|
39
|
-
|
55
|
+
@domains.last.each { |cat| @pipes[cat] = build_pipe(@data_set) }
|
56
|
+
@data_set.data_items.each { |item| update_pipe(@pipes[item.last], item) }
|
57
|
+
|
58
|
+
self
|
40
59
|
end
|
41
|
-
|
60
|
+
|
42
61
|
# You can evaluate new data, predicting its class.
|
43
62
|
# e.g.
|
44
|
-
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
63
|
+
# classifier.eval(['New York', '<30', 'F']) # => 'Y'
|
64
|
+
# Tie resolution is controlled by +tie_break+ parameter.
|
65
|
+
# @param data [Object]
|
66
|
+
# @return [Object]
|
45
67
|
def eval(data)
|
46
|
-
votes =
|
68
|
+
votes = Votes.new
|
47
69
|
@pipes.each do |category, pipe|
|
48
70
|
pipe.each_with_index do |bounds, i|
|
49
71
|
if data[i].is_a? Numeric
|
50
|
-
votes
|
51
|
-
|
52
|
-
votes
|
72
|
+
votes.increment_category(category) if data[i].between?(bounds[:min], bounds[:max])
|
73
|
+
elsif bounds[data[i]]
|
74
|
+
votes.increment_category(category)
|
53
75
|
end
|
54
76
|
end
|
55
77
|
end
|
56
|
-
|
78
|
+
rng = @rng || (@random_seed.nil? ? Random.new : Random.new(@random_seed))
|
79
|
+
votes.get_winner(@tie_break, rng: rng)
|
57
80
|
end
|
58
|
-
|
81
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
82
|
+
|
59
83
|
# This method returns the generated rules in ruby code.
|
60
84
|
# e.g.
|
61
|
-
#
|
85
|
+
#
|
62
86
|
# classifier.get_rules
|
63
87
|
# # => if age_range == '<30' then marketing_target = 'Y'
|
64
88
|
# elsif age_range == '[30-50)' then marketing_target = 'N'
|
65
89
|
# elsif age_range == '[50-80]' then marketing_target = 'N'
|
66
90
|
# end
|
67
91
|
#
|
68
|
-
# It is a nice way to inspect induction results, and also to execute them:
|
92
|
+
# It is a nice way to inspect induction results, and also to execute them:
|
69
93
|
# marketing_target = nil
|
70
|
-
# eval classifier.get_rules
|
94
|
+
# eval classifier.get_rules
|
71
95
|
# puts marketing_target
|
72
96
|
# # => 'Y'
|
97
|
+
# @return [Object]
|
98
|
+
# rubocop:disable Metrics/AbcSize
|
73
99
|
def get_rules
|
74
100
|
rules = []
|
75
|
-
rules <<
|
101
|
+
rules << 'votes = Votes.new'
|
76
102
|
data = @data_set.data_items.first
|
77
|
-
labels = @data_set.data_labels.collect
|
103
|
+
labels = @data_set.data_labels.collect(&:to_s)
|
78
104
|
@pipes.each do |category, pipe|
|
79
105
|
pipe.each_with_index do |bounds, i|
|
80
|
-
rule = "votes
|
81
|
-
if data[i].is_a? Numeric
|
82
|
-
|
106
|
+
rule = "votes.increment_category('#{category}') "
|
107
|
+
rule += if data[i].is_a? Numeric
|
108
|
+
"if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
|
109
|
+
else
|
110
|
+
"if #{bounds.inspect}[#{labels[i]}]"
|
111
|
+
end
|
112
|
+
rules << rule
|
113
|
+
end
|
114
|
+
end
|
115
|
+
rules << "#{labels.last} = votes.get_winner(:#{@tie_break})"
|
116
|
+
rules.join("\n")
|
117
|
+
end
|
118
|
+
# rubocop:enable Metrics/AbcSize
|
119
|
+
# rubocop:enable Naming/AccessorMethodName
|
120
|
+
|
121
|
+
# Return a summary representation of all pipes.
|
122
|
+
#
|
123
|
+
# The returned hash maps each category to another hash where the keys are
|
124
|
+
# attribute labels and the values are either numeric ranges
|
125
|
+
# `[min, max]` (including the optional margin) or a Set of nominal values.
|
126
|
+
#
|
127
|
+
# classifier.pipes_summary
|
128
|
+
# # => { "Y" => { "city" => #{Set['New York', 'Chicago']},
|
129
|
+
# "age" => [18, 85],
|
130
|
+
# "gender" => #{Set['M', 'F']} },
|
131
|
+
# "N" => { ... } }
|
132
|
+
#
|
133
|
+
# The optional +margin+ parameter expands numeric bounds by the given
|
134
|
+
# fraction. A value of 0.1 would enlarge each range by 10%.
|
135
|
+
# @param margin [Object]
|
136
|
+
# @return [Object]
|
137
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
138
|
+
def pipes_summary(margin: 0)
|
139
|
+
raise 'Model not built yet' unless @data_set && @pipes
|
140
|
+
|
141
|
+
labels = @data_set.data_labels[0...-1]
|
142
|
+
summary = {}
|
143
|
+
@pipes.each do |category, pipe|
|
144
|
+
attr_summary = {}
|
145
|
+
pipe.each_with_index do |bounds, i|
|
146
|
+
if bounds.is_a?(Hash) && bounds.key?(:min) && bounds.key?(:max)
|
147
|
+
min = bounds[:min]
|
148
|
+
max = bounds[:max]
|
149
|
+
range_margin = (max - min) * margin
|
150
|
+
attr_summary[labels[i]] = [min - range_margin, max + range_margin]
|
83
151
|
else
|
84
|
-
|
152
|
+
attr_summary[labels[i]] = bounds.select { |_k, v| v }.keys.to_set
|
85
153
|
end
|
86
|
-
rules << rule
|
87
154
|
end
|
155
|
+
summary[category] = attr_summary
|
88
156
|
end
|
89
|
-
|
90
|
-
return rules.join("\n")
|
157
|
+
summary
|
91
158
|
end
|
92
|
-
|
159
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
160
|
+
|
93
161
|
protected
|
94
162
|
|
163
|
+
# @param data_set [Object]
|
164
|
+
# @return [Object]
|
95
165
|
def build_pipe(data_set)
|
96
166
|
data_set.data_items.first[0...-1].collect do |att|
|
97
167
|
if att.is_a? Numeric
|
98
|
-
{:
|
168
|
+
{ min: Float::INFINITY, max: -Float::INFINITY }
|
99
169
|
else
|
100
170
|
Hash.new(false)
|
101
171
|
end
|
102
172
|
end
|
103
173
|
end
|
104
|
-
|
174
|
+
|
175
|
+
# @param pipe [Object]
|
176
|
+
# @param data_item [Object]
|
177
|
+
# @return [Object]
|
178
|
+
# rubocop:disable Metrics/AbcSize
|
105
179
|
def update_pipe(pipe, data_item)
|
106
180
|
data_item[0...-1].each_with_index do |att, i|
|
107
181
|
if att.is_a? Numeric
|
108
|
-
|
109
|
-
|
182
|
+
min_val = att - @margin
|
183
|
+
max_val = att + @margin
|
184
|
+
pipe[i][:min] = min_val if min_val < pipe[i][:min]
|
185
|
+
pipe[i][:max] = max_val if max_val > pipe[i][:max]
|
110
186
|
else
|
111
187
|
pipe[i][att] = true
|
112
|
-
end
|
188
|
+
end
|
113
189
|
end
|
114
190
|
end
|
115
|
-
|
191
|
+
# rubocop:enable Metrics/AbcSize
|
116
192
|
end
|
117
193
|
end
|
118
194
|
end
|