stamina-induction 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
@@ -0,0 +1,213 @@
|
|
1
|
+
module Stamina
|
2
|
+
#
|
3
|
+
# Provides utility methods for scoring binary classifiers from signatures
|
4
|
+
#
|
5
|
+
module Scoring
|
6
|
+
|
7
|
+
#
|
8
|
+
# From the signatures of a learned model and a actual, returns an object
|
9
|
+
# responding to all instance methods defined in the Scoring module.
|
10
|
+
#
|
11
|
+
def self.scoring(learned, actual, max_size=nil)
|
12
|
+
unless learned.size==actual.size
|
13
|
+
raise ArgumentError, "Signatures must be of same size (#{learned.size} vs. #{actual.size})"
|
14
|
+
end
|
15
|
+
max_size ||= learned.size
|
16
|
+
max_size = learned.size if max_size > learned.size
|
17
|
+
tp, fn, fp, tn = 0, 0, 0, 0
|
18
|
+
(0...max_size).each do |i|
|
19
|
+
positive, labeled_as = actual[i..i]=='1', learned[i..i]=='1'
|
20
|
+
if positive==labeled_as
|
21
|
+
positive ? (tp += 1) : (tn += 1)
|
22
|
+
else
|
23
|
+
positive ? (fn += 1) : (fp += 1)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
measures = { :true_positive => tp,
|
27
|
+
:true_negative => tn,
|
28
|
+
:false_positive => fp,
|
29
|
+
:false_negative => fn }
|
30
|
+
measures.extend(Scoring)
|
31
|
+
measures
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Returns the number of positive strings correctly labeled as positive
|
36
|
+
#
|
37
|
+
def true_positive
|
38
|
+
self[:true_positive]
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Returns the number of negative strings correctly labeled as negative.
|
43
|
+
#
|
44
|
+
def true_negative
|
45
|
+
self[:true_negative]
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Returns the number of negative strings incorrectly labeled as positive.
|
50
|
+
#
|
51
|
+
def false_positive
|
52
|
+
self[:false_positive]
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Returns the number of positive strings incorrectly labeled as negative.
|
57
|
+
#
|
58
|
+
def false_negative
|
59
|
+
self[:false_negative]
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Returns the percentage of positive predictions that are correct
|
64
|
+
#
|
65
|
+
def precision
|
66
|
+
true_positive.to_f/(true_positive + false_positive)
|
67
|
+
end
|
68
|
+
alias :positive_predictive_value :precision
|
69
|
+
|
70
|
+
#
|
71
|
+
# Returns the percentage of true negative over all negative
|
72
|
+
#
|
73
|
+
def negative_predictive_value
|
74
|
+
true_negative.to_f / (true_negative + false_negative)
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Returns the percentage of positive strings that were predicted as being
|
79
|
+
# positive
|
80
|
+
#
|
81
|
+
def recall
|
82
|
+
true_positive.to_f / (true_positive + false_negative)
|
83
|
+
end
|
84
|
+
alias :sensitivity :recall
|
85
|
+
alias :true_positive_rate :recall
|
86
|
+
|
87
|
+
#
|
88
|
+
# Returns the percentage of negative strings that were predicted as being
|
89
|
+
# negative
|
90
|
+
#
|
91
|
+
def specificity
|
92
|
+
true_negative.to_f / (true_negative + false_positive)
|
93
|
+
end
|
94
|
+
alias :true_negative_rate :specificity
|
95
|
+
|
96
|
+
#
|
97
|
+
# Returns the percentage of false positives
|
98
|
+
#
|
99
|
+
def false_positive_rate
|
100
|
+
false_positive.to_f / (false_positive + true_negative)
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# Returns the percentage of false negatives
|
105
|
+
#
|
106
|
+
def false_negative_rate
|
107
|
+
false_negative.to_f / (true_positive + false_negative)
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Returns the likelihood that a predicted positive is an actual positive
|
112
|
+
#
|
113
|
+
def positive_likelihood
|
114
|
+
sensitivity / (1.0 - specificity)
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Returns the likelihood that a predicted negative is an actual negative
|
119
|
+
#
|
120
|
+
def negative_likelihood
|
121
|
+
(1.0 - sensitivity) / specificity
|
122
|
+
end
|
123
|
+
|
124
|
+
#
|
125
|
+
# Returns the percentage of predictions that are correct
|
126
|
+
#
|
127
|
+
def accuracy
|
128
|
+
num = (true_positive + true_negative).to_f
|
129
|
+
den = (true_positive + true_negative + false_positive + false_negative)
|
130
|
+
num / den
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Returns the error rate
|
135
|
+
#
|
136
|
+
def error_rate
|
137
|
+
num = (false_positive + false_negative).to_f
|
138
|
+
den = (true_positive + true_negative + false_positive + false_negative)
|
139
|
+
num / den
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Returns the harmonic mean between precision and recall
|
144
|
+
#
|
145
|
+
def f_measure
|
146
|
+
2.0 * (precision * recall) / (precision + recall)
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Returns the balanced classification rate (arithmetic mean between
|
151
|
+
# sensitivity and specificity)
|
152
|
+
#
|
153
|
+
def balanced_classification_rate
|
154
|
+
0.5 * (sensitivity + specificity)
|
155
|
+
end
|
156
|
+
alias :bcr :balanced_classification_rate
|
157
|
+
|
158
|
+
#
|
159
|
+
# Returns the balanced error rate (1 - bcr)
|
160
|
+
#
|
161
|
+
def balanced_error_rate
|
162
|
+
1.0 - balanced_classification_rate
|
163
|
+
end
|
164
|
+
alias :ber :balanced_error_rate
|
165
|
+
|
166
|
+
#
|
167
|
+
# Returns the harmonic mean between sensitivity and specificity
|
168
|
+
#
|
169
|
+
def harmonic_balanced_classification_rate
|
170
|
+
2.0 * (sensitivity * specificity) / (sensitivity + specificity)
|
171
|
+
end
|
172
|
+
alias :hbcr :harmonic_balanced_classification_rate
|
173
|
+
alias :harmonic_bcr :harmonic_balanced_classification_rate
|
174
|
+
|
175
|
+
MEASURES = [
|
176
|
+
:false_positive, :false_negative,
|
177
|
+
:true_positive, :true_negative,
|
178
|
+
:accuracy, :error_rate,
|
179
|
+
:precision, :recall, :f_measure,
|
180
|
+
:false_positive_rate, :false_negative_rate,
|
181
|
+
:true_positive_rate, :true_negative_rate,
|
182
|
+
:positive_predictive_value, :negative_predictive_value,
|
183
|
+
:sensitivity, :specificity,
|
184
|
+
:positive_likelihood, :negative_likelihood,
|
185
|
+
:balanced_classification_rate, :balanced_error_rate, :harmonic_bcr
|
186
|
+
]
|
187
|
+
|
188
|
+
def to_h
|
189
|
+
h = {}
|
190
|
+
MEASURES.each do |m|
|
191
|
+
h[m] = self.send(m.to_sym)
|
192
|
+
end
|
193
|
+
h
|
194
|
+
end
|
195
|
+
|
196
|
+
def to_s
|
197
|
+
s = ""
|
198
|
+
MEASURES.each do |m|
|
199
|
+
vals = case val = self.send(m.to_sym)
|
200
|
+
when Integer
|
201
|
+
"%s" % val
|
202
|
+
when Float
|
203
|
+
"%.5f" % val
|
204
|
+
else
|
205
|
+
"%s" % val
|
206
|
+
end
|
207
|
+
s += "%30s: %10s\n" % [m.to_s, vals]
|
208
|
+
end
|
209
|
+
s
|
210
|
+
end
|
211
|
+
|
212
|
+
end # module Scoring
|
213
|
+
end # module Stamina
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stamina-induction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Bernard Lambeau
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: stamina-core
|
16
|
+
requirement: &70159328190600 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - =
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.5.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70159328190600
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: citrus
|
27
|
+
requirement: &70159328190080 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2.4'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70159328190080
|
36
|
+
description: Stamina-induction plugs induction algorithm to the stamina toolkit.
|
37
|
+
email:
|
38
|
+
- blambeau@gmail.com
|
39
|
+
executables: []
|
40
|
+
extensions: []
|
41
|
+
extra_rdoc_files: []
|
42
|
+
files:
|
43
|
+
- LICENCE.md
|
44
|
+
- CHANGELOG.md
|
45
|
+
- lib/stamina-induction/stamina/abbadingo/random_dfa.rb
|
46
|
+
- lib/stamina-induction/stamina/abbadingo/random_sample.rb
|
47
|
+
- lib/stamina-induction/stamina/abbadingo.rb
|
48
|
+
- lib/stamina-induction/stamina/classifier.rb
|
49
|
+
- lib/stamina-induction/stamina/command/abbadingo_dfa.rb
|
50
|
+
- lib/stamina-induction/stamina/command/abbadingo_samples.rb
|
51
|
+
- lib/stamina-induction/stamina/command/classify.rb
|
52
|
+
- lib/stamina-induction/stamina/command/infer.rb
|
53
|
+
- lib/stamina-induction/stamina/command/metrics.rb
|
54
|
+
- lib/stamina-induction/stamina/command/score.rb
|
55
|
+
- lib/stamina-induction/stamina/command.rb
|
56
|
+
- lib/stamina-induction/stamina/dsl/induction.rb
|
57
|
+
- lib/stamina-induction/stamina/dsl/reg_lang.rb
|
58
|
+
- lib/stamina-induction/stamina/dsl.rb
|
59
|
+
- lib/stamina-induction/stamina/induction/blue_fringe.rb
|
60
|
+
- lib/stamina-induction/stamina/induction/commons.rb
|
61
|
+
- lib/stamina-induction/stamina/induction/rpni.rb
|
62
|
+
- lib/stamina-induction/stamina/induction/union_find.rb
|
63
|
+
- lib/stamina-induction/stamina/induction.rb
|
64
|
+
- lib/stamina-induction/stamina/input_string.rb
|
65
|
+
- lib/stamina-induction/stamina/reg_lang/canonical_info.rb
|
66
|
+
- lib/stamina-induction/stamina/reg_lang/parser/alternative.rb
|
67
|
+
- lib/stamina-induction/stamina/reg_lang/parser/node.rb
|
68
|
+
- lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb
|
69
|
+
- lib/stamina-induction/stamina/reg_lang/parser/parser.citrus
|
70
|
+
- lib/stamina-induction/stamina/reg_lang/parser/plus.rb
|
71
|
+
- lib/stamina-induction/stamina/reg_lang/parser/question.rb
|
72
|
+
- lib/stamina-induction/stamina/reg_lang/parser/regexp.rb
|
73
|
+
- lib/stamina-induction/stamina/reg_lang/parser/sequence.rb
|
74
|
+
- lib/stamina-induction/stamina/reg_lang/parser/star.rb
|
75
|
+
- lib/stamina-induction/stamina/reg_lang/parser/symbol.rb
|
76
|
+
- lib/stamina-induction/stamina/reg_lang/parser.rb
|
77
|
+
- lib/stamina-induction/stamina/reg_lang.rb
|
78
|
+
- lib/stamina-induction/stamina/sample.rb
|
79
|
+
- lib/stamina-induction/stamina/scoring.rb
|
80
|
+
- lib/stamina-induction/stamina-induction.rb
|
81
|
+
homepage: https://github.com/blambeau/stamina
|
82
|
+
licenses: []
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
require_paths:
|
86
|
+
- lib/stamina-induction
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.8.10
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Induction algorithms for the Stamina toolkit
|
105
|
+
test_files: []
|
106
|
+
has_rdoc:
|