adaboost 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f550adc8429f2927416f49e86461497d9f43a072
4
- data.tar.gz: bf2fa99dad4d438a25588f93d8de7a2c62c7f45f
3
+ metadata.gz: dbef461fb8ab7809de5e99ec234e85c002427bd5
4
+ data.tar.gz: 446d84856754769aeb49e4526de18f4c547d85de
5
5
  SHA512:
6
- metadata.gz: 38b3488dd4d034e9694200841a8b1c5022b60f8e8cb06afd88b99147dc140b73183153b12d7ba5c79e06865969958051dee9b59ef76af91e2901731626681b6b
7
- data.tar.gz: 8f41f660c6a53b66ee8eed9a80b3948a66bae0bb516ab8d4f52d3322a66c0f40f9201f700cb499be885c5727bc1d2b65966efd04f48fe4bfdcb4453c8607fed9
6
+ metadata.gz: de5b31c2367c9459d09d5456684b595eeb8e2fa2b8846ee80911027e66b7a45a779762b464f2bd06365315ba298365727f8cbe1a81f8ebbdb3be31ef98686725
7
+ data.tar.gz: 8b6157db326c5510329fd3b0b3531ffe7e4bb8fee5fbcd3e5b1b73e36ea17081a0fe809c71e8fa6f2f871fdb238b3edb060c3559c6e196e8194fd18ecf604861
@@ -4,40 +4,40 @@ module AdaBoost
4
4
 
5
5
  attr_reader :weak_classifiers, :y_index
6
6
 
7
- def initialize number_of_classifiers, y_index
7
+ def initialize(number_of_classifiers, y_index)
8
8
  @weak_classifiers = []
9
- @weak_learner = WeakLearner.new y_index
9
+ @weak_learner = WeakLearner.new(y_index)
10
10
  @number_of_classifiers = number_of_classifiers
11
11
  @weights = []
12
12
  @y_index = y_index
13
13
  end
14
14
 
15
- def train samples
15
+ def train(samples)
16
16
  if Config::OVER_SAMPLING_TRAINING_SET
17
- resampler = Resampler.new @y_index
18
- resampler.over_sample samples
17
+ resampler = Resampler.new(@y_index)
18
+ resampler.over_sample(samples)
19
19
  end
20
- initialize_weights samples
21
- 0.upto @number_of_classifiers - 1 do |i|
22
- weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
20
+ initialize_weights(samples)
21
+ 0.upto(@number_of_classifiers - 1) do |i|
22
+ weak_classifier = @weak_learner.generate_weak_classifier(samples, @weights)
23
23
  weak_classifier.compute_alpha
24
- update_weights weak_classifier, samples
24
+ update_weights(weak_classifier, samples)
25
25
  @weak_classifiers << weak_classifier
26
26
  yield i, weak_classifier if block_given?
27
27
  end
28
28
  end
29
29
 
30
- def classify sample
30
+ def classify(sample)
31
31
  score = 0.0
32
32
  @weak_classifiers.each do |weak_classifier|
33
- score += weak_classifier.classify_with_alpha sample
33
+ score += weak_classifier.classify_with_alpha(sample)
34
34
  end
35
35
  score
36
36
  end
37
37
 
38
- def self.build_from_model model, y_index = 0
38
+ def self.build_from_model(model, y_index = 0)
39
39
  classifiers = model.weak_classifiers
40
- adaboost = AdaBoost.new classifiers.size, y_index
40
+ adaboost = AdaBoost.new(classifiers.size, y_index)
41
41
  classifiers.each do |classifier|
42
42
  adaboost.weak_classifiers << WeakClassifier.new(classifier.feature_number, classifier.split, classifier.alpha)
43
43
  end
@@ -46,12 +46,12 @@ module AdaBoost
46
46
 
47
47
  private
48
48
 
49
- def initialize_weights samples
49
+ def initialize_weights(samples)
50
50
  samples_size = samples.size.to_f
51
51
  negative_weight = 1 / samples_size
52
52
  positive_weight = negative_weight
53
53
  if Config::INCORPORATE_COST_SENSITIVE_LEARNING
54
- analyzer = FeaturesAnalyzer.new @y_index
54
+ analyzer = FeaturesAnalyzer.new(@y_index)
55
55
  distribution = analyzer.analyze(samples).distribution
56
56
  positive_rate = distribution.positive / samples_size
57
57
  negative_rate = distribution.negative / samples_size
@@ -61,19 +61,15 @@ module AdaBoost
61
61
  end
62
62
  samples.each_with_index do |sample, i|
63
63
  y = sample[@y_index]
64
- if y == -1
65
- @weights[i] = positive_weight
66
- else
67
- @weights[i] = negative_weight
68
- end
64
+ @weights[i] = (y == -1) ? positive_weight : negative_weight
69
65
  end
70
66
  end
71
67
 
72
- def update_weights weak_classifier, samples
68
+ def update_weights(weak_classifier, samples)
73
69
  sum = 0.0
74
70
  samples.each_with_index do |sample, i|
75
71
  y = sample[@y_index]
76
- @weights[i] *= Math.exp -(weak_classifier.alpha) * weak_classifier.classify(sample) * y
72
+ @weights[i] *= Math.exp(-(weak_classifier.alpha) * weak_classifier.classify(sample) * y)
77
73
  sum += @weights[i]
78
74
  end
79
75
  @weights.each_with_index do |_, i|
@@ -22,7 +22,7 @@ module AdaBoost
22
22
  @table[1][0]
23
23
  end
24
24
 
25
- def add_prediction y, h
25
+ def add_prediction(y, h)
26
26
  @table[class_to_index(y)][class_to_index(h)] += 1
27
27
  end
28
28
 
@@ -175,8 +175,8 @@ module AdaBoost
175
175
  ]
176
176
  end
177
177
 
178
- def class_to_index k
179
- k > 0 ? 1 : 0
178
+ def class_to_index(k)
179
+ (k > 0) ? 1 : 0
180
180
  end
181
181
  end
182
182
  end
@@ -2,26 +2,26 @@ module AdaBoost
2
2
 
3
3
  class Evaluator
4
4
 
5
- def initialize classifier
5
+ def initialize(classifier)
6
6
  @classifier = classifier
7
7
  @threshold = Float::MAX
8
8
  end
9
9
 
10
- def evaluate test_set
10
+ def evaluate(test_set)
11
11
  contingency_table = ContingencyTable.new
12
12
  test_set.each do |sample|
13
13
  y = sample[@classifier.y_index]
14
- if Config::USE_THRESHOLD_CLASSIFICATION
15
- h = classify_using_threshold sample
14
+ h = if Config::USE_THRESHOLD_CLASSIFICATION
15
+ classify_using_threshold(sample)
16
16
  else
17
- h = e.classify_normally sample
17
+ classify_normally(sample)
18
18
  end
19
- contingency_table.add_prediction y, h
19
+ contingency_table.add_prediction(y, h)
20
20
  end
21
21
  contingency_table
22
22
  end
23
23
 
24
- def used_feature_numbers unique = false
24
+ def used_feature_numbers(unique = false)
25
25
  used_feature_numbers = []
26
26
  @classifier.weak_classifiers.each do |weak_classifier|
27
27
  used_feature_numbers << weak_classifier.feature_number
@@ -51,11 +51,11 @@ module AdaBoost
51
51
  @threshold
52
52
  end
53
53
 
54
- def classify_normally sample
54
+ def classify_normally(sample)
55
55
  @classifier.classify(sample > 0) ? 1 : -1
56
56
  end
57
57
 
58
- def classify_using_threshold sample
58
+ def classify_using_threshold(sample)
59
59
  score = 0.0
60
60
  @classifier.weak_classifiers.each do |weak_classifier|
61
61
  if sample[weak_classifier.feature_number] > weak_classifier.split
@@ -7,45 +7,45 @@ module AdaBoost
7
7
 
8
8
  class FeaturesAnalyzer
9
9
 
10
- def initialize y_index
10
+ def initialize(y_index)
11
11
  @y_index = y_index
12
12
  end
13
13
 
14
- def analyze samples
14
+ def analyze(samples)
15
15
 
16
16
  statistics = []
17
- distribution = Distribution.new 0, 0
17
+ distribution = Distribution.new(0, 0)
18
18
  number_of_samples = samples.size
19
19
 
20
20
  if number_of_samples < 1
21
- raise ArgumentError.new 'At least one sample is needed to analyze.'
21
+ raise ArgumentError.new('At least one sample is needed to analyze.')
22
22
  end
23
23
  number_of_features = @y_index
24
24
  sample_size = samples[0].size
25
25
  if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
26
- raise ArgumentError.new 'At least 1 feature is needed to analyze.'
26
+ raise ArgumentError.new('At least 1 feature is needed to analyze.')
27
27
  end
28
- 0.upto number_of_features - 1 do
29
- statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
28
+ 0.upto(number_of_features - 1) do
29
+ statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
30
30
  end
31
31
  samples.each do |sample|
32
- y = sample[@y_index]
33
- if y == -1
34
- distribution.negative += 1
35
- else
36
- distribution.positive += 1
32
+ y = sample[@y_index]
33
+ if y == -1
34
+ distribution.negative += 1
35
+ else
36
+ distribution.positive += 1
37
+ end
38
+ 0.upto(number_of_features - 1) do |i|
39
+ statistic = statistics[i]
40
+ feature_value = sample[i]
41
+ if feature_value < statistic.min
42
+ statistic.min = feature_value
37
43
  end
38
- 0.upto number_of_features - 1 do |i|
39
- statistic = statistics[i]
40
- feature_value = sample[i]
41
- if feature_value < statistic.min
42
- statistic.min = feature_value
43
- end
44
- if feature_value > statistic.max
45
- statistic.max = feature_value
46
- end
47
- statistic.sum += feature_value
44
+ if feature_value > statistic.max
45
+ statistic.max = feature_value
48
46
  end
47
+ statistic.sum += feature_value
48
+ end
49
49
  end
50
50
  statistics.each do |statistic|
51
51
  statistic.avg = statistic.sum / number_of_samples.to_f
@@ -67,7 +67,7 @@ module AdaBoost
67
67
  analyze
68
68
  end
69
69
 
70
- def relations x, y, samples, statistics
70
+ def relations(x, y, samples, statistics)
71
71
  sum = 0.0
72
72
  samples.each do |sample|
73
73
  x_value = sample[x].to_f
@@ -76,7 +76,7 @@ module AdaBoost
76
76
  end
77
77
  cov = sum / (samples.size - 1).to_f
78
78
  cor = cov / (statistics[x].std * statistics[y].std).to_f
79
- VariableRelations.new x, y, cov, cor
79
+ VariableRelations.new(x, y, cov, cor)
80
80
  end
81
81
  end
82
82
  end
@@ -2,12 +2,12 @@ module AdaBoost
2
2
 
3
3
  class Resampler
4
4
 
5
- def initialize y_index
5
+ def initialize(y_index)
6
6
  @y_index = y_index
7
7
  end
8
8
 
9
- def over_sample samples
10
- distribution = distribution samples
9
+ def over_sample(samples)
10
+ distribution = distribution(samples)
11
11
  y0 = distribution.negative
12
12
  y1 = distribution.positive
13
13
  majority = y0 < y1 ? 1.0 : -1.0
@@ -25,8 +25,8 @@ module AdaBoost
25
25
 
26
26
  private
27
27
 
28
- def distribution instances
29
- analyzer = FeaturesAnalyzer.new @y_index
28
+ def distribution(instances)
29
+ analyzer = FeaturesAnalyzer.new(@y_index)
30
30
  analyzer.analyze(instances).distribution
31
31
  end
32
32
  end
@@ -5,7 +5,7 @@ module AdaBoost
5
5
  attr_accessor :error
6
6
  attr_reader :feature_number, :split, :alpha
7
7
 
8
- def initialize feature_number, split, alpha = 0.0, error = 0.0
8
+ def initialize(feature_number, split, alpha = 0.0, error = 0.0)
9
9
  @feature_number = feature_number
10
10
  @split = split
11
11
  @error = error
@@ -16,15 +16,15 @@ module AdaBoost
16
16
  @alpha = 0.5 * Math.log((1.0 - @error) / @error)
17
17
  end
18
18
 
19
- def classify sample
19
+ def classify(sample)
20
20
  sample[@feature_number] > @split ? 1 : -1
21
21
  end
22
22
 
23
- def classify_with_alpha sample
23
+ def classify_with_alpha(sample)
24
24
  return classify(sample) * @alpha
25
25
  end
26
26
 
27
- def increase_error amount
27
+ def increase_error(amount)
28
28
  @error += amount
29
29
  end
30
30
  end
@@ -2,31 +2,31 @@ module AdaBoost
2
2
 
3
3
  class WeakLearner
4
4
 
5
- def initialize y_index
5
+ def initialize(y_index)
6
6
  @y_index = y_index
7
- @analyzer = FeaturesAnalyzer.new y_index
7
+ @analyzer = FeaturesAnalyzer.new(y_index)
8
8
  @classifiers_cache = []
9
9
  end
10
10
 
11
- def features_satistics samples
11
+ def features_satistics(samples)
12
12
  @analyzer.analyze(samples).statistics
13
13
  end
14
14
 
15
- def generate_weak_classifier samples, weights
15
+ def generate_weak_classifier(samples, weights)
16
16
  number_of_samples = samples.size
17
17
  if number_of_samples < 1
18
- raise ArgumentError.new 'At least one sample is needed to generate.'
18
+ raise ArgumentError.new('At least one sample is needed to generate.')
19
19
  end
20
20
  number_of_features = @y_index
21
21
  sample_size = samples[0].size
22
22
  if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
23
- raise ArgumentError.new 'At least 1 feature is needed to generate.'
23
+ raise ArgumentError.new('At least 1 feature is needed to generate.')
24
24
  end
25
25
  classifiers = []
26
26
  if Config::USE_RANDOM_WEAK_CLASSIFIERS
27
- classifiers = generate_random_classifiers samples, number_of_features
27
+ classifiers = generate_random_classifiers(samples, number_of_features)
28
28
  else
29
- classifiers = generate_all_possible_classifiers samples, number_of_features
29
+ classifiers = generate_all_possible_classifiers(samples, number_of_features)
30
30
  end
31
31
  best_index = -1
32
32
  best_error = Float::MAX
@@ -35,7 +35,7 @@ module AdaBoost
35
35
  samples.each_with_index do |sample, j|
36
36
  y = sample[@y_index]
37
37
  if classifier.classify(sample).to_f != y
38
- classifier.increase_error weights[j]
38
+ classifier.increase_error(weights[j])
39
39
  end
40
40
  end
41
41
  if classifier.error < best_error
@@ -45,33 +45,33 @@ module AdaBoost
45
45
  end
46
46
  best = classifiers[best_index]
47
47
  if !Config::USE_RANDOM_WEAK_CLASSIFIERS
48
- classifiers.delete_at best_index
48
+ classifiers.delete_at(best_index)
49
49
  end
50
50
  best
51
51
  end
52
52
 
53
53
  private
54
54
 
55
- def generate_random_classifiers samples, number_of_features
55
+ def generate_random_classifiers(samples, number_of_features)
56
56
  classifiers = []
57
- statistics = features_satistics samples
58
- 0.upto Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1 do
59
- feature_number = rand number_of_features
60
- info = statistics[feature_number]
61
- split = rand * info.rng + info.min
62
- classifiers << WeakClassifier.new(feature_number, split)
57
+ statistics = features_satistics(samples)
58
+ 0.upto(Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1) do
59
+ feature_number = rand(number_of_features)
60
+ info = statistics[feature_number]
61
+ split = rand * info.rng + info.min
62
+ classifiers << WeakClassifier.new(feature_number, split)
63
63
  end
64
64
  classifiers
65
65
  end
66
66
 
67
- def generate_all_possible_classifiers samples, number_of_features
67
+ def generate_all_possible_classifiers(samples, number_of_features)
68
68
  if @classifiers_cache.size == 0
69
69
  matrix = []
70
- 0.upto number_of_features - 1 do
70
+ 0.upto(number_of_features - 1) do
71
71
  matrix << []
72
72
  end
73
73
  samples.each do |sample|
74
- 0.upto number_of_features - 1 do |i|
74
+ 0.upto(number_of_features - 1) do |i|
75
75
  sample_value = sample[i]
76
76
  matrix[i] << sample_value
77
77
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adaboost
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dalmir da Silva