adaboost 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2144b2d6abb1c701c8dbf955673a9a5ffd96e00e
4
- data.tar.gz: 74dbd52e82039ca79e2dcfa3ef8c2b2ac20bd3e7
3
+ metadata.gz: c0f5373579f8f93316192361c02c0b29e176f09b
4
+ data.tar.gz: 6f1c7a127b3b2283fab1b2bf3b4679e32e4587b7
5
5
  SHA512:
6
- metadata.gz: 035ca3856d5343afde2f968f43cc5de165f0f69fb1f8284c87fe2ba4525599221eb711ffeced58346ddbfbf2b89f640a240fbf336c3c2e367d6e6f99826a932b
7
- data.tar.gz: d6f6adc39fa2327ea37e986f759976d83c9c0efda519f429af0d9325927fffe9dc8c540b84556ba890434fd59674b550191dd7d1f8df8ad9e39759795e697be8
6
+ metadata.gz: 28f2c7b14a2e372be409a593c0ec45fda9feedbb08d700ae85b2714dbea3121aba8660b271711a4dac67245293fed1c579cebc662040dfc6d89e6db7d85f9c84
7
+ data.tar.gz: 6ea5ebf0d20243bbb5ba409990d3f7950d842a6a713ee6f14169ab96832e95d7b0296c884c63322f9c5066fc3511387fc775350fc1e63102625af2aa848734ed
@@ -12,6 +12,39 @@ module AdaBoost
12
12
  @y_index = y_index
13
13
  end
14
14
 
15
+ def train samples
16
+ if Config::OVER_SAMPLING_TRAINING_SET
17
+ resampler = Resampler.new @y_index
18
+ resampler.over_sample samples
19
+ end
20
+ initialize_weights samples
21
+ 0.upto @number_of_classifiers - 1 do |i|
22
+ weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
23
+ weak_classifier.compute_alpha
24
+ update_weights weak_classifier, samples
25
+ @weak_classifiers << weak_classifier
26
+ end
27
+ end
28
+
29
+ def classify sample
30
+ score = 0.0
31
+ @weak_classifiers.each do |weak_classifier|
32
+ score += weak_classifier.classify_with_alpha sample
33
+ end
34
+ score
35
+ end
36
+
37
+ def self.build_from_model model, y_index = 0
38
+ classifiers = model.weak_classifiers
39
+ adaboost = AdaBoost.new classifiers.size, y_index
40
+ classifiers.each do |classifier|
41
+ adaboost.weak_classifiers << WeakClassifier.new(classifier.feature_number, classifier.split, classifier.alpha)
42
+ end
43
+ adaboost
44
+ end
45
+
46
+ private
47
+
15
48
  def initialize_weights samples
16
49
  samples_size = samples.size.to_f
17
50
  negative_weight = 1 / samples_size
@@ -46,29 +79,5 @@ module AdaBoost
46
79
  @weights[i] /= sum
47
80
  end
48
81
  end
49
-
50
- def train samples
51
- puts "boom2"
52
- if Config::OVER_SAMPLING_TRAINING_SET
53
- resampler = Resampler.new @y_index
54
- resampler.over_sample samples
55
- end
56
- initialize_weights samples
57
- 0.upto @number_of_classifiers - 1 do |i|
58
- puts "boom"
59
- weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
60
- weak_classifier.compute_alpha
61
- update_weights weak_classifier, samples
62
- @weak_classifiers << weak_classifier
63
- end
64
- end
65
-
66
- def classify sample
67
- score = 0.0
68
- @weak_classifiers.each do |weak_classifier|
69
- score += weak_classifier.classify_with_alpha sample
70
- end
71
- score
72
- end
73
82
  end
74
83
  end
@@ -21,6 +21,26 @@ module AdaBoost
21
21
  contingency_table
22
22
  end
23
23
 
24
+ def used_feature_numbers unique = false
25
+ used_feature_numbers = []
26
+ @classifier.weak_classifiers.each do |weak_classifier|
27
+ used_feature_numbers << weak_classifier.feature_number
28
+ end
29
+ unique ? used_feature_numbers.uniq : used_feature_numbers
30
+ end
31
+
32
+ def feature_occurrences
33
+ used_numbers = used_feature_numbers
34
+ occurrences = {}
35
+ used_numbers.each do |number|
36
+ occurrences[number] = 0 if occurrences[number].nil?
37
+ occurrences[number] += 1
38
+ end
39
+ occurrences
40
+ end
41
+
42
+ private
43
+
24
44
  def threshold
25
45
  if @threshold == Float::MAX
26
46
  @threshold = 0
@@ -31,6 +51,10 @@ module AdaBoost
31
51
  @threshold
32
52
  end
33
53
 
54
+ def classify_normally sample
55
+ @classifier.classify(sample > 0) ? 1 : -1
56
+ end
57
+
34
58
  def classify_using_threshold sample
35
59
  score = 0.0
36
60
  @classifier.weak_classifiers.each do |weak_classifier|
@@ -40,27 +64,5 @@ module AdaBoost
40
64
  end
41
65
  score > threshold ? 1 : -1
42
66
  end
43
-
44
- def classify_normally sample
45
- @classifier.classify(sample > 0) ? 1 : -1
46
- end
47
-
48
- def used_feature_numbers unique = false
49
- used_feature_numbers = []
50
- @classifier.weak_classifiers.each do |weak_classifier|
51
- used_feature_numbers << weak_classifier.feature_number
52
- end
53
- unique ? used_feature_numbers.uniq : used_feature_numbers
54
- end
55
-
56
- def feature_occurrences
57
- used_numbers = used_feature_numbers
58
- occurrences = {}
59
- used_numbers.each do |number|
60
- occurrences[number] = 0 if occurrences[number].nil?
61
- occurrences[number] += 1
62
- end
63
- occurrences
64
- end
65
67
  end
66
68
  end
@@ -23,6 +23,8 @@ module AdaBoost
23
23
  end
24
24
  end
25
25
 
26
+ private
27
+
26
28
  def distribution instances
27
29
  analyzer = FeaturesAnalyzer.new @y_index
28
30
  analyzer.analyze(instances).distribution
@@ -5,11 +5,11 @@ module AdaBoost
5
5
  attr_accessor :error
6
6
  attr_reader :feature_number, :split, :alpha
7
7
 
8
- def initialize feature_number, split
8
+ def initialize feature_number, split, alpha = 0.0, error = 0.0
9
9
  @feature_number = feature_number
10
10
  @split = split
11
- @error = 0.0
12
- @alpha = 0.0
11
+ @error = error
12
+ @alpha = alpha
13
13
  end
14
14
 
15
15
  def compute_alpha
@@ -50,6 +50,8 @@ module AdaBoost
50
50
  best
51
51
  end
52
52
 
53
+ private
54
+
53
55
  def generate_random_classifiers samples, number_of_features
54
56
  classifiers = []
55
57
  statistics = features_satistics samples
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adaboost
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dalmir da Silva