adaboost 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f550adc8429f2927416f49e86461497d9f43a072
4
- data.tar.gz: bf2fa99dad4d438a25588f93d8de7a2c62c7f45f
3
+ metadata.gz: dbef461fb8ab7809de5e99ec234e85c002427bd5
4
+ data.tar.gz: 446d84856754769aeb49e4526de18f4c547d85de
5
5
  SHA512:
6
- metadata.gz: 38b3488dd4d034e9694200841a8b1c5022b60f8e8cb06afd88b99147dc140b73183153b12d7ba5c79e06865969958051dee9b59ef76af91e2901731626681b6b
7
- data.tar.gz: 8f41f660c6a53b66ee8eed9a80b3948a66bae0bb516ab8d4f52d3322a66c0f40f9201f700cb499be885c5727bc1d2b65966efd04f48fe4bfdcb4453c8607fed9
6
+ metadata.gz: de5b31c2367c9459d09d5456684b595eeb8e2fa2b8846ee80911027e66b7a45a779762b464f2bd06365315ba298365727f8cbe1a81f8ebbdb3be31ef98686725
7
+ data.tar.gz: 8b6157db326c5510329fd3b0b3531ffe7e4bb8fee5fbcd3e5b1b73e36ea17081a0fe809c71e8fa6f2f871fdb238b3edb060c3559c6e196e8194fd18ecf604861
@@ -4,40 +4,40 @@ module AdaBoost
4
4
 
5
5
  attr_reader :weak_classifiers, :y_index
6
6
 
7
- def initialize number_of_classifiers, y_index
7
+ def initialize(number_of_classifiers, y_index)
8
8
  @weak_classifiers = []
9
- @weak_learner = WeakLearner.new y_index
9
+ @weak_learner = WeakLearner.new(y_index)
10
10
  @number_of_classifiers = number_of_classifiers
11
11
  @weights = []
12
12
  @y_index = y_index
13
13
  end
14
14
 
15
- def train samples
15
+ def train(samples)
16
16
  if Config::OVER_SAMPLING_TRAINING_SET
17
- resampler = Resampler.new @y_index
18
- resampler.over_sample samples
17
+ resampler = Resampler.new(@y_index)
18
+ resampler.over_sample(samples)
19
19
  end
20
- initialize_weights samples
21
- 0.upto @number_of_classifiers - 1 do |i|
22
- weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
20
+ initialize_weights(samples)
21
+ 0.upto(@number_of_classifiers - 1) do |i|
22
+ weak_classifier = @weak_learner.generate_weak_classifier(samples, @weights)
23
23
  weak_classifier.compute_alpha
24
- update_weights weak_classifier, samples
24
+ update_weights(weak_classifier, samples)
25
25
  @weak_classifiers << weak_classifier
26
26
  yield i, weak_classifier if block_given?
27
27
  end
28
28
  end
29
29
 
30
- def classify sample
30
+ def classify(sample)
31
31
  score = 0.0
32
32
  @weak_classifiers.each do |weak_classifier|
33
- score += weak_classifier.classify_with_alpha sample
33
+ score += weak_classifier.classify_with_alpha(sample)
34
34
  end
35
35
  score
36
36
  end
37
37
 
38
- def self.build_from_model model, y_index = 0
38
+ def self.build_from_model(model, y_index = 0)
39
39
  classifiers = model.weak_classifiers
40
- adaboost = AdaBoost.new classifiers.size, y_index
40
+ adaboost = AdaBoost.new(classifiers.size, y_index)
41
41
  classifiers.each do |classifier|
42
42
  adaboost.weak_classifiers << WeakClassifier.new(classifier.feature_number, classifier.split, classifier.alpha)
43
43
  end
@@ -46,12 +46,12 @@ module AdaBoost
46
46
 
47
47
  private
48
48
 
49
- def initialize_weights samples
49
+ def initialize_weights(samples)
50
50
  samples_size = samples.size.to_f
51
51
  negative_weight = 1 / samples_size
52
52
  positive_weight = negative_weight
53
53
  if Config::INCORPORATE_COST_SENSITIVE_LEARNING
54
- analyzer = FeaturesAnalyzer.new @y_index
54
+ analyzer = FeaturesAnalyzer.new(@y_index)
55
55
  distribution = analyzer.analyze(samples).distribution
56
56
  positive_rate = distribution.positive / samples_size
57
57
  negative_rate = distribution.negative / samples_size
@@ -61,19 +61,15 @@ module AdaBoost
61
61
  end
62
62
  samples.each_with_index do |sample, i|
63
63
  y = sample[@y_index]
64
- if y == -1
65
- @weights[i] = positive_weight
66
- else
67
- @weights[i] = negative_weight
68
- end
64
+ @weights[i] = (y == -1) ? positive_weight : negative_weight
69
65
  end
70
66
  end
71
67
 
72
- def update_weights weak_classifier, samples
68
+ def update_weights(weak_classifier, samples)
73
69
  sum = 0.0
74
70
  samples.each_with_index do |sample, i|
75
71
  y = sample[@y_index]
76
- @weights[i] *= Math.exp -(weak_classifier.alpha) * weak_classifier.classify(sample) * y
72
+ @weights[i] *= Math.exp(-(weak_classifier.alpha) * weak_classifier.classify(sample) * y)
77
73
  sum += @weights[i]
78
74
  end
79
75
  @weights.each_with_index do |_, i|
@@ -22,7 +22,7 @@ module AdaBoost
22
22
  @table[1][0]
23
23
  end
24
24
 
25
- def add_prediction y, h
25
+ def add_prediction(y, h)
26
26
  @table[class_to_index(y)][class_to_index(h)] += 1
27
27
  end
28
28
 
@@ -175,8 +175,8 @@ module AdaBoost
175
175
  ]
176
176
  end
177
177
 
178
- def class_to_index k
179
- k > 0 ? 1 : 0
178
+ def class_to_index(k)
179
+ (k > 0) ? 1 : 0
180
180
  end
181
181
  end
182
182
  end
@@ -2,26 +2,26 @@ module AdaBoost
2
2
 
3
3
  class Evaluator
4
4
 
5
- def initialize classifier
5
+ def initialize(classifier)
6
6
  @classifier = classifier
7
7
  @threshold = Float::MAX
8
8
  end
9
9
 
10
- def evaluate test_set
10
+ def evaluate(test_set)
11
11
  contingency_table = ContingencyTable.new
12
12
  test_set.each do |sample|
13
13
  y = sample[@classifier.y_index]
14
- if Config::USE_THRESHOLD_CLASSIFICATION
15
- h = classify_using_threshold sample
14
+ h = if Config::USE_THRESHOLD_CLASSIFICATION
15
+ classify_using_threshold(sample)
16
16
  else
17
- h = e.classify_normally sample
17
+ classify_normally(sample)
18
18
  end
19
- contingency_table.add_prediction y, h
19
+ contingency_table.add_prediction(y, h)
20
20
  end
21
21
  contingency_table
22
22
  end
23
23
 
24
- def used_feature_numbers unique = false
24
+ def used_feature_numbers(unique = false)
25
25
  used_feature_numbers = []
26
26
  @classifier.weak_classifiers.each do |weak_classifier|
27
27
  used_feature_numbers << weak_classifier.feature_number
@@ -51,11 +51,11 @@ module AdaBoost
51
51
  @threshold
52
52
  end
53
53
 
54
- def classify_normally sample
54
+ def classify_normally(sample)
55
55
  @classifier.classify(sample > 0) ? 1 : -1
56
56
  end
57
57
 
58
- def classify_using_threshold sample
58
+ def classify_using_threshold(sample)
59
59
  score = 0.0
60
60
  @classifier.weak_classifiers.each do |weak_classifier|
61
61
  if sample[weak_classifier.feature_number] > weak_classifier.split
@@ -7,45 +7,45 @@ module AdaBoost
7
7
 
8
8
  class FeaturesAnalyzer
9
9
 
10
- def initialize y_index
10
+ def initialize(y_index)
11
11
  @y_index = y_index
12
12
  end
13
13
 
14
- def analyze samples
14
+ def analyze(samples)
15
15
 
16
16
  statistics = []
17
- distribution = Distribution.new 0, 0
17
+ distribution = Distribution.new(0, 0)
18
18
  number_of_samples = samples.size
19
19
 
20
20
  if number_of_samples < 1
21
- raise ArgumentError.new 'At least one sample is needed to analyze.'
21
+ raise ArgumentError.new('At least one sample is needed to analyze.')
22
22
  end
23
23
  number_of_features = @y_index
24
24
  sample_size = samples[0].size
25
25
  if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
26
- raise ArgumentError.new 'At least 1 feature is needed to analyze.'
26
+ raise ArgumentError.new('At least 1 feature is needed to analyze.')
27
27
  end
28
- 0.upto number_of_features - 1 do
29
- statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
28
+ 0.upto(number_of_features - 1) do
29
+ statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
30
30
  end
31
31
  samples.each do |sample|
32
- y = sample[@y_index]
33
- if y == -1
34
- distribution.negative += 1
35
- else
36
- distribution.positive += 1
32
+ y = sample[@y_index]
33
+ if y == -1
34
+ distribution.negative += 1
35
+ else
36
+ distribution.positive += 1
37
+ end
38
+ 0.upto(number_of_features - 1) do |i|
39
+ statistic = statistics[i]
40
+ feature_value = sample[i]
41
+ if feature_value < statistic.min
42
+ statistic.min = feature_value
37
43
  end
38
- 0.upto number_of_features - 1 do |i|
39
- statistic = statistics[i]
40
- feature_value = sample[i]
41
- if feature_value < statistic.min
42
- statistic.min = feature_value
43
- end
44
- if feature_value > statistic.max
45
- statistic.max = feature_value
46
- end
47
- statistic.sum += feature_value
44
+ if feature_value > statistic.max
45
+ statistic.max = feature_value
48
46
  end
47
+ statistic.sum += feature_value
48
+ end
49
49
  end
50
50
  statistics.each do |statistic|
51
51
  statistic.avg = statistic.sum / number_of_samples.to_f
@@ -67,7 +67,7 @@ module AdaBoost
67
67
  analyze
68
68
  end
69
69
 
70
- def relations x, y, samples, statistics
70
+ def relations(x, y, samples, statistics)
71
71
  sum = 0.0
72
72
  samples.each do |sample|
73
73
  x_value = sample[x].to_f
@@ -76,7 +76,7 @@ module AdaBoost
76
76
  end
77
77
  cov = sum / (samples.size - 1).to_f
78
78
  cor = cov / (statistics[x].std * statistics[y].std).to_f
79
- VariableRelations.new x, y, cov, cor
79
+ VariableRelations.new(x, y, cov, cor)
80
80
  end
81
81
  end
82
82
  end
@@ -2,12 +2,12 @@ module AdaBoost
2
2
 
3
3
  class Resampler
4
4
 
5
- def initialize y_index
5
+ def initialize(y_index)
6
6
  @y_index = y_index
7
7
  end
8
8
 
9
- def over_sample samples
10
- distribution = distribution samples
9
+ def over_sample(samples)
10
+ distribution = distribution(samples)
11
11
  y0 = distribution.negative
12
12
  y1 = distribution.positive
13
13
  majority = y0 < y1 ? 1.0 : -1.0
@@ -25,8 +25,8 @@ module AdaBoost
25
25
 
26
26
  private
27
27
 
28
- def distribution instances
29
- analyzer = FeaturesAnalyzer.new @y_index
28
+ def distribution(instances)
29
+ analyzer = FeaturesAnalyzer.new(@y_index)
30
30
  analyzer.analyze(instances).distribution
31
31
  end
32
32
  end
@@ -5,7 +5,7 @@ module AdaBoost
5
5
  attr_accessor :error
6
6
  attr_reader :feature_number, :split, :alpha
7
7
 
8
- def initialize feature_number, split, alpha = 0.0, error = 0.0
8
+ def initialize(feature_number, split, alpha = 0.0, error = 0.0)
9
9
  @feature_number = feature_number
10
10
  @split = split
11
11
  @error = error
@@ -16,15 +16,15 @@ module AdaBoost
16
16
  @alpha = 0.5 * Math.log((1.0 - @error) / @error)
17
17
  end
18
18
 
19
- def classify sample
19
+ def classify(sample)
20
20
  sample[@feature_number] > @split ? 1 : -1
21
21
  end
22
22
 
23
- def classify_with_alpha sample
23
+ def classify_with_alpha(sample)
24
24
  return classify(sample) * @alpha
25
25
  end
26
26
 
27
- def increase_error amount
27
+ def increase_error(amount)
28
28
  @error += amount
29
29
  end
30
30
  end
@@ -2,31 +2,31 @@ module AdaBoost
2
2
 
3
3
  class WeakLearner
4
4
 
5
- def initialize y_index
5
+ def initialize(y_index)
6
6
  @y_index = y_index
7
- @analyzer = FeaturesAnalyzer.new y_index
7
+ @analyzer = FeaturesAnalyzer.new(y_index)
8
8
  @classifiers_cache = []
9
9
  end
10
10
 
11
- def features_satistics samples
11
+ def features_satistics(samples)
12
12
  @analyzer.analyze(samples).statistics
13
13
  end
14
14
 
15
- def generate_weak_classifier samples, weights
15
+ def generate_weak_classifier(samples, weights)
16
16
  number_of_samples = samples.size
17
17
  if number_of_samples < 1
18
- raise ArgumentError.new 'At least one sample is needed to generate.'
18
+ raise ArgumentError.new('At least one sample is needed to generate.')
19
19
  end
20
20
  number_of_features = @y_index
21
21
  sample_size = samples[0].size
22
22
  if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
23
- raise ArgumentError.new 'At least 1 feature is needed to generate.'
23
+ raise ArgumentError.new('At least 1 feature is needed to generate.')
24
24
  end
25
25
  classifiers = []
26
26
  if Config::USE_RANDOM_WEAK_CLASSIFIERS
27
- classifiers = generate_random_classifiers samples, number_of_features
27
+ classifiers = generate_random_classifiers(samples, number_of_features)
28
28
  else
29
- classifiers = generate_all_possible_classifiers samples, number_of_features
29
+ classifiers = generate_all_possible_classifiers(samples, number_of_features)
30
30
  end
31
31
  best_index = -1
32
32
  best_error = Float::MAX
@@ -35,7 +35,7 @@ module AdaBoost
35
35
  samples.each_with_index do |sample, j|
36
36
  y = sample[@y_index]
37
37
  if classifier.classify(sample).to_f != y
38
- classifier.increase_error weights[j]
38
+ classifier.increase_error(weights[j])
39
39
  end
40
40
  end
41
41
  if classifier.error < best_error
@@ -45,33 +45,33 @@ module AdaBoost
45
45
  end
46
46
  best = classifiers[best_index]
47
47
  if !Config::USE_RANDOM_WEAK_CLASSIFIERS
48
- classifiers.delete_at best_index
48
+ classifiers.delete_at(best_index)
49
49
  end
50
50
  best
51
51
  end
52
52
 
53
53
  private
54
54
 
55
- def generate_random_classifiers samples, number_of_features
55
+ def generate_random_classifiers(samples, number_of_features)
56
56
  classifiers = []
57
- statistics = features_satistics samples
58
- 0.upto Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1 do
59
- feature_number = rand number_of_features
60
- info = statistics[feature_number]
61
- split = rand * info.rng + info.min
62
- classifiers << WeakClassifier.new(feature_number, split)
57
+ statistics = features_satistics(samples)
58
+ 0.upto(Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1) do
59
+ feature_number = rand(number_of_features)
60
+ info = statistics[feature_number]
61
+ split = rand * info.rng + info.min
62
+ classifiers << WeakClassifier.new(feature_number, split)
63
63
  end
64
64
  classifiers
65
65
  end
66
66
 
67
- def generate_all_possible_classifiers samples, number_of_features
67
+ def generate_all_possible_classifiers(samples, number_of_features)
68
68
  if @classifiers_cache.size == 0
69
69
  matrix = []
70
- 0.upto number_of_features - 1 do
70
+ 0.upto(number_of_features - 1) do
71
71
  matrix << []
72
72
  end
73
73
  samples.each do |sample|
74
- 0.upto number_of_features - 1 do |i|
74
+ 0.upto(number_of_features - 1) do |i|
75
75
  sample_value = sample[i]
76
76
  matrix[i] << sample_value
77
77
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adaboost
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dalmir da Silva