ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/neural_network/transformer'
4
+ require_relative '../../lib/ai4r/classifiers/logistic_regression'
5
+ require_relative '../../lib/ai4r/data/data_set'
6
+
7
+ # Tiny dataset of greetings (label 0) and farewells (label 1)
8
+ sentences = [
9
+ %w[hello there],
10
+ %w[how are you],
11
+ %w[good morning],
12
+ %w[nice to meet you],
13
+ %w[goodbye],
14
+ %w[see you later],
15
+ %w[have a nice day],
16
+ %w[take care]
17
+ ]
18
+ labels = [0, 0, 0, 0, 1, 1, 1, 1]
19
+
20
+ # Build vocabulary
21
+ vocab = {}
22
+ next_id = 0
23
+ sentences.each do |tokens|
24
+ tokens.each do |t|
25
+ unless vocab.key?(t)
26
+ vocab[t] = next_id
27
+ next_id += 1
28
+ end
29
+ end
30
+ end
31
+
32
+ vocab_size = vocab.length
33
+ max_len = sentences.map(&:length).max
34
+
35
+ transformer = Ai4r::NeuralNetwork::Transformer.new(
36
+ vocab_size: vocab_size,
37
+ max_len: max_len,
38
+ architecture: :decoder
39
+ )
40
+ embed_dim = transformer.embed_dim
41
+
42
+ # Encode each sentence and average embeddings
43
+ items = []
44
+ sentences.each_with_index do |tokens, idx|
45
+ ids = tokens.map { |t| vocab[t] }
46
+ vecs = transformer.eval(ids)
47
+ avg = Array.new(embed_dim, 0.0)
48
+ vecs.each do |v|
49
+ v.each_index { |i| avg[i] += v[i] }
50
+ end
51
+ avg.map! { |v| v / vecs.length }
52
+ items << (avg + [labels[idx]])
53
+ end
54
+
55
+ labels_names = (0...embed_dim).map { |i| "x#{i}" } + ['class']
56
+ set = Ai4r::Data::DataSet.new(data_items: items, data_labels: labels_names)
57
+
58
+ classifier = Ai4r::Classifiers::LogisticRegression.new
59
+ classifier.set_parameters(lr: 0.5, iterations: 500).build(set)
60
+
61
+ # Classify a short greeting
62
+ sample = %w[hello]
63
+ ids = sample.map { |t| vocab[t] }
64
+ vecs = transformer.eval(ids)
65
+ avg = Array.new(embed_dim, 0.0)
66
+ vecs.each { |v| v.each_index { |i| avg[i] += v[i] } }
67
+ avg.map! { |v| v / vecs.length }
68
+ puts "Prediction: #{classifier.eval(avg)} (0=greeting, 1=farewell)"
@@ -0,0 +1,10 @@
1
+ require_relative '../../lib/ai4r/neural_network/transformer'
2
+
3
+ # Demonstrates deterministic initialization using the :seed parameter.
4
+ model_a = Ai4r::NeuralNetwork::Transformer.new(vocab_size: 5, max_len: 3, seed: 42)
5
+ model_b = Ai4r::NeuralNetwork::Transformer.new(vocab_size: 5, max_len: 3, seed: 42)
6
+
7
+ output_a = model_a.eval([0, 1, 2])
8
+ output_b = model_b.eval([0, 1, 2])
9
+
10
+ puts "Outputs identical? #{output_a == output_b}"
@@ -0,0 +1,16 @@
1
+ require_relative '../../lib/ai4r/neural_network/transformer'
2
+
3
+ # Simple demo of the seq2seq architecture.
4
+ # The model returns random vectors but shows how
5
+ # to provide encoder and decoder inputs.
6
+ model = Ai4r::NeuralNetwork::Transformer.new(
7
+ vocab_size: 10,
8
+ max_len: 5,
9
+ architecture: :seq2seq
10
+ )
11
+
12
+ encoder_input = [1, 2, 3]
13
+ decoder_input = [4, 5]
14
+
15
+ output = model.eval(encoder_input, decoder_input)
16
+ puts "Output length: #{output.length}"
@@ -1,62 +1,70 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require File.dirname(__FILE__) + '/../data/parameterizable'
11
-
11
+
12
+ require_relative '../data/parameterizable'
13
+
12
14
  module Ai4r
13
15
  module Classifiers
14
-
15
16
  # This class defines a common API for classifiers.
16
17
  # All methods in this class must be implemented in subclasses.
17
18
  class Classifier
19
+ include Ai4r::Data::Parameterizable
18
20
 
19
- include Ai4r::Data::Parameterizable
20
-
21
21
  # Build a new classifier, using data examples found in data_set.
22
22
  # The last attribute of each item is considered as the
23
23
  # item class.
24
+ # @param data_set [Object]
25
+ # @return [Object]
24
26
  def build(data_set)
25
27
  raise NotImplementedError
26
28
  end
27
-
29
+
28
30
  # You can evaluate new data, predicting its class.
29
31
  # e.g.
30
32
  # classifier.eval(['New York', '<30', 'F']) # => 'Y'
33
+ # @param data [Object]
34
+ # @return [Object]
31
35
  def eval(data)
32
36
  raise NotImplementedError
33
37
  end
34
-
38
+
35
39
  # This method returns the generated rules in ruby code.
36
40
  # e.g.
37
- #
41
+ #
38
42
  # classifier.get_rules
39
43
  # # => if age_range=='<30' then marketing_target='Y'
40
44
  # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
41
45
  # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
42
46
  # elsif age_range=='[50-80]' then marketing_target='N'
43
47
  # elsif age_range=='>80' then marketing_target='Y'
44
- # else raise 'There was not enough information during training to do a proper induction for this data element' end
48
+ # else
49
+ # raise 'There was not enough information during training to do a '
50
+ # 'proper induction for this data element'
51
+ # end
45
52
  #
46
- # It is a nice way to inspect induction results, and also to execute them:
53
+ # It is a nice way to inspect induction results, and also to execute them:
47
54
  # age_range = '<30'
48
55
  # city='New York'
49
56
  # marketing_target = nil
50
- # eval classifier.get_rules
57
+ # eval classifier.get_rules
51
58
  # puts marketing_target
52
59
  # # => 'Y'
53
60
  #
54
61
  # Note, however, that not all classifiers are able to produce rules.
55
62
  # This method is not implemented in such classifiers.
63
+ # @return [Object]
56
64
  def get_rules
57
65
  raise NotImplementedError
58
66
  end
59
-
67
+ # rubocop:enable Naming/AccessorMethodName
60
68
  end
61
69
  end
62
70
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI ChatGPT
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ #
7
+ # Very small gradient boosting implementation for regression using
8
+ # simple linear regression as base learner.
9
+
10
+ require_relative 'simple_linear_regression'
11
+ require_relative '../data/data_set'
12
+ require_relative '../classifiers/classifier'
13
+
14
+ module Ai4r
15
+ module Classifiers
16
+ # Gradient boosting regressor using simple linear regression base learners.
17
+ class GradientBoosting < Classifier
18
+ parameters_info n_estimators: 'Number of boosting iterations. Default 10.',
19
+ learning_rate: 'Shrinkage parameter for each learner. Default 0.1.'
20
+
21
+ attr_reader :initial_value, :learners
22
+
23
+ def initialize
24
+ super()
25
+ @n_estimators = 10
26
+ @learning_rate = 0.1
27
+ end
28
+
29
+ def build(data_set)
30
+ data_set.check_not_empty
31
+ @learners = []
32
+ targets = data_set.data_items.map(&:last)
33
+ @initial_value = targets.sum.to_f / targets.length
34
+ predictions = Array.new(targets.length, @initial_value)
35
+ @n_estimators.times do
36
+ residuals = targets.zip(predictions).map { |y, f| y - f }
37
+ items = data_set.data_items.each_with_index.map do |item, idx|
38
+ item[0...-1] + [residuals[idx]]
39
+ end
40
+ ds = Ai4r::Data::DataSet.new(data_items: items, data_labels: data_set.data_labels)
41
+ learner = SimpleLinearRegression.new.build(ds)
42
+ @learners << learner
43
+ pred = items.map { |it| learner.eval(it[0...-1]) }
44
+ predictions = predictions.zip(pred).map { |f, p| f + (@learning_rate * p) }
45
+ end
46
+ self
47
+ end
48
+ # rubocop:enable Metrics/AbcSize
49
+
50
+ def eval(data)
51
+ value = @initial_value
52
+ @learners.each do |learner|
53
+ value += @learning_rate * learner.eval(data)
54
+ end
55
+ value
56
+ end
57
+
58
+ def get_rules
59
+ 'GradientBoosting does not support rule extraction.'
60
+ end
61
+ # rubocop:enable Naming/AccessorMethodName
62
+ end
63
+ end
64
+ end
@@ -1,118 +1,194 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (Implementation only)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
12
  require 'set'
11
- require File.dirname(__FILE__) + '/../data/data_set'
12
- require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+ require_relative '../data/data_set'
14
+ require_relative '../classifiers/classifier'
15
+ require_relative '../classifiers/votes'
13
16
 
14
17
  module Ai4r
18
+ # Collection of classifier algorithms.
15
19
  module Classifiers
16
-
17
20
  include Ai4r::Data
18
-
21
+
19
22
  # = Introduction
20
- #
21
- # A fast classifier algorithm, created by Lucio de Souza Coelho
23
+ #
24
+ # A fast classifier algorithm, created by Lucio de Souza Coelho
22
25
  # and Len Trigg.
23
26
  class Hyperpipes < Classifier
24
-
25
27
  attr_reader :data_set, :pipes
26
28
 
29
+ parameters_info tie_break:
30
+ 'Strategy used when more than one class has the same maximal vote. ' \
31
+ 'Valid values are :last (default) and :random.',
32
+ margin: 'Numeric margin added to the bounds of numeric attributes.',
33
+ random_seed: 'Seed for random tie-breaking when tie_break is :random.'
34
+
35
+ # @return [Object]
36
+ def initialize
37
+ super()
38
+ @tie_break = :last
39
+ @margin = 0
40
+ @random_seed = nil
41
+ @rng = nil
42
+ end
43
+
27
44
  # Build a new Hyperpipes classifier. You must provide a DataSet instance
28
- # as parameter. The last attribute of each item is considered as
45
+ # as parameter. The last attribute of each item is considered as
29
46
  # the item class.
47
+ # @param data_set [Object]
48
+ # @return [Object]
30
49
  def build(data_set)
31
50
  data_set.check_not_empty
32
51
  @data_set = data_set
33
52
  @domains = data_set.build_domains
34
-
53
+
35
54
  @pipes = {}
36
- @domains.last.each {|cat| @pipes[cat] = build_pipe(@data_set)}
37
- @data_set.data_items.each {|item| update_pipe(@pipes[item.last], item) }
38
-
39
- return self
55
+ @domains.last.each { |cat| @pipes[cat] = build_pipe(@data_set) }
56
+ @data_set.data_items.each { |item| update_pipe(@pipes[item.last], item) }
57
+
58
+ self
40
59
  end
41
-
60
+
42
61
  # You can evaluate new data, predicting its class.
43
62
  # e.g.
44
- # classifier.eval(['New York', '<30', 'F']) # => 'Y'
63
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
64
+ # Tie resolution is controlled by +tie_break+ parameter.
65
+ # @param data [Object]
66
+ # @return [Object]
45
67
  def eval(data)
46
- votes = Hash.new {0}
68
+ votes = Votes.new
47
69
  @pipes.each do |category, pipe|
48
70
  pipe.each_with_index do |bounds, i|
49
71
  if data[i].is_a? Numeric
50
- votes[category]+=1 if data[i]>=bounds[:min] && data[i]<=bounds[:max]
51
- else
52
- votes[category]+=1 if bounds[data[i]]
72
+ votes.increment_category(category) if data[i].between?(bounds[:min], bounds[:max])
73
+ elsif bounds[data[i]]
74
+ votes.increment_category(category)
53
75
  end
54
76
  end
55
77
  end
56
- return votes.to_a.max {|x, y| x.last <=> y.last}.first
78
+ rng = @rng || (@random_seed.nil? ? Random.new : Random.new(@random_seed))
79
+ votes.get_winner(@tie_break, rng: rng)
57
80
  end
58
-
81
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
82
+
59
83
  # This method returns the generated rules in ruby code.
60
84
  # e.g.
61
- #
85
+ #
62
86
  # classifier.get_rules
63
87
  # # => if age_range == '<30' then marketing_target = 'Y'
64
88
  # elsif age_range == '[30-50)' then marketing_target = 'N'
65
89
  # elsif age_range == '[50-80]' then marketing_target = 'N'
66
90
  # end
67
91
  #
68
- # It is a nice way to inspect induction results, and also to execute them:
92
+ # It is a nice way to inspect induction results, and also to execute them:
69
93
  # marketing_target = nil
70
- # eval classifier.get_rules
94
+ # eval classifier.get_rules
71
95
  # puts marketing_target
72
96
  # # => 'Y'
97
+ # @return [Object]
98
+ # rubocop:disable Metrics/AbcSize
73
99
  def get_rules
74
100
  rules = []
75
- rules << "votes = Hash.new {0}"
101
+ rules << 'votes = Votes.new'
76
102
  data = @data_set.data_items.first
77
- labels = @data_set.data_labels.collect {|l| l.to_s}
103
+ labels = @data_set.data_labels.collect(&:to_s)
78
104
  @pipes.each do |category, pipe|
79
105
  pipe.each_with_index do |bounds, i|
80
- rule = "votes['#{category}'] += 1 "
81
- if data[i].is_a? Numeric
82
- rule += "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
106
+ rule = "votes.increment_category('#{category}') "
107
+ rule += if data[i].is_a? Numeric
108
+ "if #{labels[i]} >= #{bounds[:min]} && #{labels[i]} <= #{bounds[:max]}"
109
+ else
110
+ "if #{bounds.inspect}[#{labels[i]}]"
111
+ end
112
+ rules << rule
113
+ end
114
+ end
115
+ rules << "#{labels.last} = votes.get_winner(:#{@tie_break})"
116
+ rules.join("\n")
117
+ end
118
+ # rubocop:enable Metrics/AbcSize
119
+ # rubocop:enable Naming/AccessorMethodName
120
+
121
+ # Return a summary representation of all pipes.
122
+ #
123
+ # The returned hash maps each category to another hash where the keys are
124
+ # attribute labels and the values are either numeric ranges
125
+ # `[min, max]` (including the optional margin) or a Set of nominal values.
126
+ #
127
+ # classifier.pipes_summary
128
+ # # => { "Y" => { "city" => #{Set['New York', 'Chicago']},
129
+ # "age" => [18, 85],
130
+ # "gender" => #{Set['M', 'F']} },
131
+ # "N" => { ... } }
132
+ #
133
+ # The optional +margin+ parameter expands numeric bounds by the given
134
+ # fraction. A value of 0.1 would enlarge each range by 10%.
135
+ # @param margin [Object]
136
+ # @return [Object]
137
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
138
+ def pipes_summary(margin: 0)
139
+ raise 'Model not built yet' unless @data_set && @pipes
140
+
141
+ labels = @data_set.data_labels[0...-1]
142
+ summary = {}
143
+ @pipes.each do |category, pipe|
144
+ attr_summary = {}
145
+ pipe.each_with_index do |bounds, i|
146
+ if bounds.is_a?(Hash) && bounds.key?(:min) && bounds.key?(:max)
147
+ min = bounds[:min]
148
+ max = bounds[:max]
149
+ range_margin = (max - min) * margin
150
+ attr_summary[labels[i]] = [min - range_margin, max + range_margin]
83
151
  else
84
- rule += "if #{bounds.inspect}[#{labels[i]}]"
152
+ attr_summary[labels[i]] = bounds.select { |_k, v| v }.keys.to_set
85
153
  end
86
- rules << rule
87
154
  end
155
+ summary[category] = attr_summary
88
156
  end
89
- rules << "#{labels.last} = votes.to_a.max {|x, y| x.last <=> y.last}.first"
90
- return rules.join("\n")
157
+ summary
91
158
  end
92
-
159
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
160
+
93
161
  protected
94
162
 
163
+ # @param data_set [Object]
164
+ # @return [Object]
95
165
  def build_pipe(data_set)
96
166
  data_set.data_items.first[0...-1].collect do |att|
97
167
  if att.is_a? Numeric
98
- {:min=>1.0/0, :max=>-1.0/0}
168
+ { min: Float::INFINITY, max: -Float::INFINITY }
99
169
  else
100
170
  Hash.new(false)
101
171
  end
102
172
  end
103
173
  end
104
-
174
+
175
+ # @param pipe [Object]
176
+ # @param data_item [Object]
177
+ # @return [Object]
178
+ # rubocop:disable Metrics/AbcSize
105
179
  def update_pipe(pipe, data_item)
106
180
  data_item[0...-1].each_with_index do |att, i|
107
181
  if att.is_a? Numeric
108
- pipe[i][:min] = att if att < pipe[i][:min]
109
- pipe[i][:max] = att if att > pipe[i][:max]
182
+ min_val = att - @margin
183
+ max_val = att + @margin
184
+ pipe[i][:min] = min_val if min_val < pipe[i][:min]
185
+ pipe[i][:max] = max_val if max_val > pipe[i][:max]
110
186
  else
111
187
  pipe[i][att] = true
112
- end
188
+ end
113
189
  end
114
190
  end
115
-
191
+ # rubocop:enable Metrics/AbcSize
116
192
  end
117
193
  end
118
194
  end