adaboost 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/adaboost/adaboost.rb +33 -24
- data/lib/adaboost/evaluator.rb +24 -22
- data/lib/adaboost/resampler.rb +2 -0
- data/lib/adaboost/weak_classifier.rb +3 -3
- data/lib/adaboost/weak_learner.rb +2 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0f5373579f8f93316192361c02c0b29e176f09b
|
4
|
+
data.tar.gz: 6f1c7a127b3b2283fab1b2bf3b4679e32e4587b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28f2c7b14a2e372be409a593c0ec45fda9feedbb08d700ae85b2714dbea3121aba8660b271711a4dac67245293fed1c579cebc662040dfc6d89e6db7d85f9c84
|
7
|
+
data.tar.gz: 6ea5ebf0d20243bbb5ba409990d3f7950d842a6a713ee6f14169ab96832e95d7b0296c884c63322f9c5066fc3511387fc775350fc1e63102625af2aa848734ed
|
data/lib/adaboost/adaboost.rb
CHANGED
@@ -12,6 +12,39 @@ module AdaBoost
|
|
12
12
|
@y_index = y_index
|
13
13
|
end
|
14
14
|
|
15
|
+
def train samples
|
16
|
+
if Config::OVER_SAMPLING_TRAINING_SET
|
17
|
+
resampler = Resampler.new @y_index
|
18
|
+
resampler.over_sample samples
|
19
|
+
end
|
20
|
+
initialize_weights samples
|
21
|
+
0.upto @number_of_classifiers - 1 do |i|
|
22
|
+
weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
|
23
|
+
weak_classifier.compute_alpha
|
24
|
+
update_weights weak_classifier, samples
|
25
|
+
@weak_classifiers << weak_classifier
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def classify sample
|
30
|
+
score = 0.0
|
31
|
+
@weak_classifiers.each do |weak_classifier|
|
32
|
+
score += weak_classifier.classify_with_alpha sample
|
33
|
+
end
|
34
|
+
score
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.build_from_model model, y_index = 0
|
38
|
+
classifiers = model.weak_classifiers
|
39
|
+
adaboost = AdaBoost.new classifiers.size, y_index
|
40
|
+
classifiers.each do |classifier|
|
41
|
+
adaboost.weak_classifiers << WeakClassifier.new(classifier.feature_number, classifier.split, classifier.alpha)
|
42
|
+
end
|
43
|
+
adaboost
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
15
48
|
def initialize_weights samples
|
16
49
|
samples_size = samples.size.to_f
|
17
50
|
negative_weight = 1 / samples_size
|
@@ -46,29 +79,5 @@ module AdaBoost
|
|
46
79
|
@weights[i] /= sum
|
47
80
|
end
|
48
81
|
end
|
49
|
-
|
50
|
-
def train samples
|
51
|
-
puts "boom2"
|
52
|
-
if Config::OVER_SAMPLING_TRAINING_SET
|
53
|
-
resampler = Resampler.new @y_index
|
54
|
-
resampler.over_sample samples
|
55
|
-
end
|
56
|
-
initialize_weights samples
|
57
|
-
0.upto @number_of_classifiers - 1 do |i|
|
58
|
-
puts "boom"
|
59
|
-
weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
|
60
|
-
weak_classifier.compute_alpha
|
61
|
-
update_weights weak_classifier, samples
|
62
|
-
@weak_classifiers << weak_classifier
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def classify sample
|
67
|
-
score = 0.0
|
68
|
-
@weak_classifiers.each do |weak_classifier|
|
69
|
-
score += weak_classifier.classify_with_alpha sample
|
70
|
-
end
|
71
|
-
score
|
72
|
-
end
|
73
82
|
end
|
74
83
|
end
|
data/lib/adaboost/evaluator.rb
CHANGED
@@ -21,6 +21,26 @@ module AdaBoost
|
|
21
21
|
contingency_table
|
22
22
|
end
|
23
23
|
|
24
|
+
def used_feature_numbers unique = false
|
25
|
+
used_feature_numbers = []
|
26
|
+
@classifier.weak_classifiers.each do |weak_classifier|
|
27
|
+
used_feature_numbers << weak_classifier.feature_number
|
28
|
+
end
|
29
|
+
unique ? used_feature_numbers.uniq : used_feature_numbers
|
30
|
+
end
|
31
|
+
|
32
|
+
def feature_occurrences
|
33
|
+
used_numbers = used_feature_numbers
|
34
|
+
occurrences = {}
|
35
|
+
used_numbers.each do |number|
|
36
|
+
occurrences[number] = 0 if occurrences[number].nil?
|
37
|
+
occurrences[number] += 1
|
38
|
+
end
|
39
|
+
occurrences
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
24
44
|
def threshold
|
25
45
|
if @threshold == Float::MAX
|
26
46
|
@threshold = 0
|
@@ -31,6 +51,10 @@ module AdaBoost
|
|
31
51
|
@threshold
|
32
52
|
end
|
33
53
|
|
54
|
+
def classify_normally sample
|
55
|
+
@classifier.classify(sample > 0) ? 1 : -1
|
56
|
+
end
|
57
|
+
|
34
58
|
def classify_using_threshold sample
|
35
59
|
score = 0.0
|
36
60
|
@classifier.weak_classifiers.each do |weak_classifier|
|
@@ -40,27 +64,5 @@ module AdaBoost
|
|
40
64
|
end
|
41
65
|
score > threshold ? 1 : -1
|
42
66
|
end
|
43
|
-
|
44
|
-
def classify_normally sample
|
45
|
-
@classifier.classify(sample > 0) ? 1 : -1
|
46
|
-
end
|
47
|
-
|
48
|
-
def used_feature_numbers unique = false
|
49
|
-
used_feature_numbers = []
|
50
|
-
@classifier.weak_classifiers.each do |weak_classifier|
|
51
|
-
used_feature_numbers << weak_classifier.feature_number
|
52
|
-
end
|
53
|
-
unique ? used_feature_numbers.uniq : used_feature_numbers
|
54
|
-
end
|
55
|
-
|
56
|
-
def feature_occurrences
|
57
|
-
used_numbers = used_feature_numbers
|
58
|
-
occurrences = {}
|
59
|
-
used_numbers.each do |number|
|
60
|
-
occurrences[number] = 0 if occurrences[number].nil?
|
61
|
-
occurrences[number] += 1
|
62
|
-
end
|
63
|
-
occurrences
|
64
|
-
end
|
65
67
|
end
|
66
68
|
end
|
data/lib/adaboost/resampler.rb
CHANGED
@@ -5,11 +5,11 @@ module AdaBoost
|
|
5
5
|
attr_accessor :error
|
6
6
|
attr_reader :feature_number, :split, :alpha
|
7
7
|
|
8
|
-
def initialize feature_number, split
|
8
|
+
def initialize feature_number, split, alpha = 0.0, error = 0.0
|
9
9
|
@feature_number = feature_number
|
10
10
|
@split = split
|
11
|
-
@error =
|
12
|
-
@alpha =
|
11
|
+
@error = error
|
12
|
+
@alpha = alpha
|
13
13
|
end
|
14
14
|
|
15
15
|
def compute_alpha
|