stamina 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +22 -5
- data/LICENCE.md +2 -2
- data/bin/stamina +1 -7
- data/lib/stamina.rb +10 -19
- metadata +54 -333
- data/.gemtest +0 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +0 -37
- data/Manifest.txt +0 -16
- data/README.md +0 -78
- data/Rakefile +0 -23
- data/example/adl/automaton.adl +0 -49
- data/example/adl/sample.adl +0 -53
- data/example/basic/characteristic_sample.adl +0 -32
- data/example/basic/target.adl +0 -9
- data/example/competition/31_test.adl +0 -1500
- data/example/competition/31_training.adl +0 -1759
- data/lib/stamina/abbadingo.rb +0 -2
- data/lib/stamina/abbadingo/random_dfa.rb +0 -48
- data/lib/stamina/abbadingo/random_sample.rb +0 -146
- data/lib/stamina/adl.rb +0 -298
- data/lib/stamina/automaton.rb +0 -1263
- data/lib/stamina/automaton/complete.rb +0 -36
- data/lib/stamina/automaton/equivalence.rb +0 -55
- data/lib/stamina/automaton/metrics.rb +0 -78
- data/lib/stamina/automaton/minimize.rb +0 -25
- data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
- data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
- data/lib/stamina/automaton/strip.rb +0 -16
- data/lib/stamina/automaton/walking.rb +0 -363
- data/lib/stamina/classifier.rb +0 -52
- data/lib/stamina/command.rb +0 -45
- data/lib/stamina/command/abbadingo_dfa.rb +0 -81
- data/lib/stamina/command/abbadingo_samples.rb +0 -40
- data/lib/stamina/command/adl2dot.rb +0 -71
- data/lib/stamina/command/classify.rb +0 -48
- data/lib/stamina/command/help.rb +0 -27
- data/lib/stamina/command/infer.rb +0 -141
- data/lib/stamina/command/metrics.rb +0 -51
- data/lib/stamina/command/robustness.rb +0 -22
- data/lib/stamina/command/score.rb +0 -35
- data/lib/stamina/errors.rb +0 -23
- data/lib/stamina/ext/math.rb +0 -20
- data/lib/stamina/induction/blue_fringe.rb +0 -265
- data/lib/stamina/induction/commons.rb +0 -156
- data/lib/stamina/induction/rpni.rb +0 -186
- data/lib/stamina/induction/union_find.rb +0 -377
- data/lib/stamina/input_string.rb +0 -123
- data/lib/stamina/loader.rb +0 -1
- data/lib/stamina/markable.rb +0 -42
- data/lib/stamina/sample.rb +0 -267
- data/lib/stamina/scoring.rb +0 -213
- data/lib/stamina/utils.rb +0 -1
- data/lib/stamina/utils/decorate.rb +0 -81
- data/lib/stamina/version.rb +0 -14
- data/stamina.gemspec +0 -191
- data/stamina.noespec +0 -32
- data/tasks/debug_mail.rake +0 -78
- data/tasks/debug_mail.txt +0 -13
- data/tasks/gem.rake +0 -68
- data/tasks/spec_test.rake +0 -79
- data/tasks/unit_test.rake +0 -77
- data/tasks/yard.rake +0 -51
- data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
- data/test/stamina/abbadingo/random_sample_test.rb +0 -78
- data/test/stamina/adl_test.rb +0 -516
- data/test/stamina/automaton/classifier_test.rb +0 -259
- data/test/stamina/automaton/complete_test.rb +0 -58
- data/test/stamina/automaton/equivalence_test.rb +0 -120
- data/test/stamina/automaton/metrics_test.rb +0 -36
- data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
- data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
- data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
- data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
- data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
- data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
- data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
- data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
- data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
- data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
- data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
- data/test/stamina/automaton/strip_test.rb +0 -36
- data/test/stamina/automaton/to_dot_test.rb +0 -64
- data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
- data/test/stamina/automaton/walking_test.rb +0 -206
- data/test/stamina/automaton_additional_test.rb +0 -190
- data/test/stamina/automaton_test.rb +0 -1104
- data/test/stamina/exit.rb +0 -3
- data/test/stamina/induction/blue_fringe_test.rb +0 -83
- data/test/stamina/induction/induction_test.rb +0 -70
- data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
- data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
- data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
- data/test/stamina/induction/redblue_universal_expected.adl +0 -4
- data/test/stamina/induction/redblue_universal_sample.adl +0 -5
- data/test/stamina/induction/rpni_inria_expected.adl +0 -7
- data/test/stamina/induction/rpni_inria_sample.adl +0 -9
- data/test/stamina/induction/rpni_test.rb +0 -129
- data/test/stamina/induction/rpni_test_pta.dot +0 -22
- data/test/stamina/induction/rpni_universal_expected.adl +0 -4
- data/test/stamina/induction/rpni_universal_sample.adl +0 -4
- data/test/stamina/induction/union_find_test.rb +0 -124
- data/test/stamina/input_string_test.rb +0 -323
- data/test/stamina/markable_test.rb +0 -70
- data/test/stamina/randdfa.adl +0 -66
- data/test/stamina/sample.adl +0 -4
- data/test/stamina/sample_classify_test.rb +0 -149
- data/test/stamina/sample_test.rb +0 -290
- data/test/stamina/scoring_test.rb +0 -63
- data/test/stamina/small_dfa.dot +0 -16
- data/test/stamina/small_dfa.gif +0 -0
- data/test/stamina/small_nfa.dot +0 -18
- data/test/stamina/small_nfa.gif +0 -0
- data/test/stamina/stamina_test.rb +0 -80
- data/test/stamina/utils/decorate_test.rb +0 -65
- data/test/test_all.rb +0 -7
data/lib/stamina/input_string.rb
DELETED
@@ -1,123 +0,0 @@
|
|
1
|
-
module Stamina
|
2
|
-
#
|
3
|
-
# An input string is a sequence of input symbols (symbols being letters appearing
|
4
|
-
# on automaton edges) labeled as positive, negative or unlabeled (provided for test
|
5
|
-
# samples and query strings).
|
6
|
-
#
|
7
|
-
# This class include the Enumerable module, that allows reasoning about
|
8
|
-
# ordered symbols.
|
9
|
-
#
|
10
|
-
# == Detailed API
|
11
|
-
class InputString
|
12
|
-
include Enumerable
|
13
|
-
|
14
|
-
#
|
15
|
-
# Creates an input string from symbols and positive or negative labeling.
|
16
|
-
#
|
17
|
-
# Arguments:
|
18
|
-
# - symbols: When an array is provided, it is duplicated by default to be kept
|
19
|
-
# internally. Set dup to false to avoid duplicating it (in both cases, the
|
20
|
-
# internal array will be freezed). When a String is provided, symbols array
|
21
|
-
# is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
|
22
|
-
# ignored in the case.
|
23
|
-
# - The positive argument may be true (positive string), false (negative one)
|
24
|
-
# or nil (unlabeled).
|
25
|
-
#
|
26
|
-
# Raises:
|
27
|
-
# - ArgumentError if symbols is not an Array nor a String.
|
28
|
-
#
|
29
|
-
def initialize(symbols, positive, dup=true)
|
30
|
-
raise(ArgumentError,
|
31
|
-
"Input string expects an Array or a String: #{symbols} received",
|
32
|
-
caller) unless Array===symbols or String===symbols
|
33
|
-
@symbols = case symbols
|
34
|
-
when String
|
35
|
-
symbols.split(' ').freeze
|
36
|
-
when Array
|
37
|
-
(dup ? symbols.dup : symbols).freeze
|
38
|
-
end
|
39
|
-
@positive = positive
|
40
|
-
end
|
41
|
-
|
42
|
-
#
|
43
|
-
# Checks if this input string is empty (aka lambda, i.e. contains no symbol).
|
44
|
-
#
|
45
|
-
def empty?() @symbols.empty? end
|
46
|
-
alias :lambda? :empty?
|
47
|
-
|
48
|
-
#
|
49
|
-
# Returns the string size, i.e. number of its symbols.
|
50
|
-
#
|
51
|
-
def size() @symbols.size end
|
52
|
-
|
53
|
-
#
|
54
|
-
# Returns the exact label of this string, being true (positive string)
|
55
|
-
# false (negative string) or nil (unlabeled)
|
56
|
-
#
|
57
|
-
def label() @positive end
|
58
|
-
|
59
|
-
#
|
60
|
-
# Returns true if this input string is positively labeled, false otherwise.
|
61
|
-
#
|
62
|
-
def positive?() @positive==true end
|
63
|
-
|
64
|
-
#
|
65
|
-
# Returns true if this input string is negatively labeled, false otherwise.
|
66
|
-
#
|
67
|
-
def negative?() @positive==false end
|
68
|
-
|
69
|
-
#
|
70
|
-
# Returns true if this input string unlabeled.
|
71
|
-
#
|
72
|
-
def unlabeled?() @positive.nil? end
|
73
|
-
|
74
|
-
# Copies and returns the same string, but switch the positive flag. This
|
75
|
-
# method returns self if it is unlabeled.
|
76
|
-
def negate
|
77
|
-
return self if unlabeled?
|
78
|
-
InputString.new(@symbols, !@positive, false)
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# Returns an array with symbols of this string. Returned array may not be
|
83
|
-
# modified (it is freezed).
|
84
|
-
#
|
85
|
-
def symbols() @symbols end
|
86
|
-
|
87
|
-
#
|
88
|
-
# Yields the block with each string symbol, in order. Has no effect without
|
89
|
-
# block.
|
90
|
-
#
|
91
|
-
def each() @symbols.each {|s| yield s if block_given? } end
|
92
|
-
|
93
|
-
#
|
94
|
-
# Checks equality with another InputString. Returns true if strings have same
|
95
|
-
# sequence of symbols and same labeling, false otherwise. Returns nil if _o_
|
96
|
-
# is not an InputString.
|
97
|
-
#
|
98
|
-
def ==(o)
|
99
|
-
return nil unless InputString===o
|
100
|
-
label == o.label and @symbols == o.symbols
|
101
|
-
end
|
102
|
-
alias :eql? :==
|
103
|
-
|
104
|
-
#
|
105
|
-
# Computes a hash code for this string.
|
106
|
-
#
|
107
|
-
def hash
|
108
|
-
@symbols.hash + 37*positive?.hash
|
109
|
-
end
|
110
|
-
|
111
|
-
#
|
112
|
-
# Prints this string in ADL.
|
113
|
-
#
|
114
|
-
def to_adl
|
115
|
-
str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
|
116
|
-
str << @symbols.join(' ')
|
117
|
-
str
|
118
|
-
end
|
119
|
-
alias :to_s :to_adl
|
120
|
-
alias :inspect :to_adl
|
121
|
-
|
122
|
-
end # class InputString
|
123
|
-
end # module Stamina
|
data/lib/stamina/loader.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require "quickl"
|
data/lib/stamina/markable.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
module Stamina
|
2
|
-
#
|
3
|
-
# Allows any object to be markable with user-data.
|
4
|
-
#
|
5
|
-
# This module is expected to be included by classes that want to implement the
|
6
|
-
# Markable design pattern. Moreover, if the instances of the including class
|
7
|
-
# respond to <tt>state_changed</tt>, this method is automatically invoked when
|
8
|
-
# marks change. This method is used by <tt>automaton</tt> in order to make it
|
9
|
-
# possible to track changes and check modified automata for consistency.
|
10
|
-
#
|
11
|
-
# == Detailed API
|
12
|
-
module Markable
|
13
|
-
|
14
|
-
#
|
15
|
-
# Returns user-value associated to _key_, nil if no such key in user-data.
|
16
|
-
#
|
17
|
-
def [](key) @data[key] end
|
18
|
-
|
19
|
-
#
|
20
|
-
# Associates _value_ to _key_ in user-data. Overrides previous value if
|
21
|
-
# present.
|
22
|
-
#
|
23
|
-
def []=(key,value)
|
24
|
-
oldvalue = @data[key]
|
25
|
-
@data[key] = value
|
26
|
-
state_changed(:loaded_pair, [key,oldvalue,value]) if self.respond_to? :state_changed
|
27
|
-
end
|
28
|
-
|
29
|
-
# Removes a mark
|
30
|
-
def remove_mark(key)
|
31
|
-
oldvalue = @data[key]
|
32
|
-
@data.delete(key)
|
33
|
-
state_changed(:loaded_pair, [key,oldvalue,nil]) if self.respond_to? :state_changed
|
34
|
-
end
|
35
|
-
|
36
|
-
# Extracts the copy of attributes which can subsequently be modified.
|
37
|
-
def data
|
38
|
-
@data.nil? ? {} : @data.dup
|
39
|
-
end
|
40
|
-
|
41
|
-
end # module Markable
|
42
|
-
end # module Stamina
|
data/lib/stamina/sample.rb
DELETED
@@ -1,267 +0,0 @@
|
|
1
|
-
module Stamina
|
2
|
-
|
3
|
-
#
|
4
|
-
# A sample as an ordered collection of InputString labeled as positive or negative.
|
5
|
-
#
|
6
|
-
# == Tips and tricks
|
7
|
-
# - loading samples from disk is easy thanks to ADL !
|
8
|
-
#
|
9
|
-
# == Detailed API
|
10
|
-
class Sample
|
11
|
-
include Enumerable
|
12
|
-
|
13
|
-
# Number of strings in the sample
|
14
|
-
attr_reader :size
|
15
|
-
|
16
|
-
# Number of positive strings in the sample
|
17
|
-
attr_reader :positive_count
|
18
|
-
|
19
|
-
# Number of negative strings in the sample
|
20
|
-
attr_reader :negative_count
|
21
|
-
|
22
|
-
#
|
23
|
-
# Creates an empty sample and appends it with args, by calling Sample#<< on
|
24
|
-
# each of them.
|
25
|
-
#
|
26
|
-
def self.[](*args) Sample.new << args end
|
27
|
-
|
28
|
-
#
|
29
|
-
# Creates an empty sample.
|
30
|
-
#
|
31
|
-
def initialize(strings = nil)
|
32
|
-
@strings = []
|
33
|
-
@size, @positive_count, @negative_count = 0, 0, 0
|
34
|
-
strings.each{|s| self << s } unless strings.nil?
|
35
|
-
end
|
36
|
-
|
37
|
-
#
|
38
|
-
# Returns true if this sample does not contain any string,
|
39
|
-
# false otherwise.
|
40
|
-
#
|
41
|
-
def empty?()
|
42
|
-
@size==0
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
# Adds a string to the sample. The _str_ argument may be an InputString instance,
|
47
|
-
# a String (parsed using ADL), a Sample instance (all strings are added) or an
|
48
|
-
# Array (recurses on each element).
|
49
|
-
#
|
50
|
-
# Raises an InconsistencyError if the same string already exists with the
|
51
|
-
# opposite label. Raises an ArgumentError if the _str_ argument is not recognized.
|
52
|
-
#
|
53
|
-
def <<(str)
|
54
|
-
case str
|
55
|
-
when InputString
|
56
|
-
#raise(InconsistencyError, "Inconsistent sample on #{str}", caller) if self.include?(str.negate)
|
57
|
-
@size += 1
|
58
|
-
str.positive? ? (@positive_count += 1) : (@negative_count += 1)
|
59
|
-
@strings << str
|
60
|
-
when String
|
61
|
-
self << ADL::parse_string(str)
|
62
|
-
when Sample
|
63
|
-
str.each {|s| self << s}
|
64
|
-
when Array
|
65
|
-
str.each {|s| self << s}
|
66
|
-
else
|
67
|
-
raise(ArgumentError, "#{str} is not a valid argument.", caller)
|
68
|
-
end
|
69
|
-
self
|
70
|
-
end
|
71
|
-
|
72
|
-
#
|
73
|
-
# Returns true if a given string is included in the sample, false otherwise.
|
74
|
-
# This method allows same flexibility as << for the _str_ argument.
|
75
|
-
#
|
76
|
-
def include?(str)
|
77
|
-
case str
|
78
|
-
when InputString
|
79
|
-
@strings.include?(str)
|
80
|
-
when String
|
81
|
-
include?(ADL::parse_string(str))
|
82
|
-
when Array
|
83
|
-
str.each {|s| return false unless include?(s)}
|
84
|
-
true
|
85
|
-
when Sample
|
86
|
-
str.each {|s| return false unless include?(s)}
|
87
|
-
true
|
88
|
-
else
|
89
|
-
raise(ArgumentError, "#{str} is not a valid argument.", caller)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
#
|
94
|
-
# Compares with another sample _other_, which is required to be a Sample
|
95
|
-
# instance. Returns true if the two samples contains the same strings (including
|
96
|
-
# labels), false otherwise.
|
97
|
-
#
|
98
|
-
def ==(other)
|
99
|
-
include?(other) and other.include?(self)
|
100
|
-
end
|
101
|
-
alias :eql? :==
|
102
|
-
|
103
|
-
#
|
104
|
-
# Computes an hash code for this sample.
|
105
|
-
#
|
106
|
-
def hash
|
107
|
-
self.inject(37){|memo,str| memo + 17*str.hash}
|
108
|
-
end
|
109
|
-
|
110
|
-
#
|
111
|
-
# Yields the block with each string. This method has no effect if no
|
112
|
-
# block is given.
|
113
|
-
#
|
114
|
-
def each
|
115
|
-
return unless block_given?
|
116
|
-
@strings.each {|str| yield str}
|
117
|
-
end
|
118
|
-
|
119
|
-
#
|
120
|
-
# Yields the block with each positive string. This method has no effect if no
|
121
|
-
# block is given.
|
122
|
-
#
|
123
|
-
def each_positive
|
124
|
-
return unless block_given?
|
125
|
-
each {|str| yield str if str.positive?}
|
126
|
-
end
|
127
|
-
|
128
|
-
#
|
129
|
-
# Returns an enumerator on positive strings.
|
130
|
-
#
|
131
|
-
def positive_enumerator
|
132
|
-
if RUBY_VERSION >= "1.9"
|
133
|
-
Enumerator.new(self, :each_positive)
|
134
|
-
else
|
135
|
-
Enumerable::Enumerator.new(self, :each_positive)
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
#
|
140
|
-
# Yields the block with each negative string. This method has no effect if no
|
141
|
-
# block is given.
|
142
|
-
#
|
143
|
-
def each_negative
|
144
|
-
each {|str| yield str if str.negative?}
|
145
|
-
end
|
146
|
-
|
147
|
-
#
|
148
|
-
# Returns an enumerator on negative strings.
|
149
|
-
#
|
150
|
-
def negative_enumerator
|
151
|
-
if RUBY_VERSION >= "1.9"
|
152
|
-
Enumerator.new(self, :each_negative)
|
153
|
-
else
|
154
|
-
Enumerable::Enumerator.new(self, :each_negative)
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
#
|
159
|
-
# Checks if the sample is correctly classified by a given classifier
|
160
|
-
# (expected to include the Stamina::Classfier module).
|
161
|
-
# Unlabeled strings are simply ignored.
|
162
|
-
#
|
163
|
-
def correctly_classified_by?(classifier)
|
164
|
-
classifier.correctly_classify?(self)
|
165
|
-
end
|
166
|
-
|
167
|
-
#
|
168
|
-
# Computes and returns the binary signature of the sample. The signature
|
169
|
-
# is a String having one character for each string in the sample. A '1'
|
170
|
-
# is used for positive strings, '0' for negative ones and '?' for unlabeled.
|
171
|
-
#
|
172
|
-
def signature
|
173
|
-
signature = ''
|
174
|
-
each do |str|
|
175
|
-
signature << (str.unlabeled? ? '?' : str.positive? ? '1' : '0')
|
176
|
-
end
|
177
|
-
signature
|
178
|
-
end
|
179
|
-
|
180
|
-
#
|
181
|
-
# Takes only a given proportion of this sample and returns it as a new Sample.
|
182
|
-
#
|
183
|
-
def take(proportion = 0.5)
|
184
|
-
taken = Stamina::Sample.new
|
185
|
-
each_positive{|s| taken << s if Kernel.rand < proportion}
|
186
|
-
each_negative{|s| taken << s if Kernel.rand < proportion}
|
187
|
-
taken
|
188
|
-
end
|
189
|
-
|
190
|
-
#
|
191
|
-
# Prints an ADL description of this sample on the buffer.
|
192
|
-
#
|
193
|
-
def to_adl(buffer="")
|
194
|
-
self.inject(buffer) {|memo,str| memo << "\n" << str.to_adl}
|
195
|
-
end
|
196
|
-
alias :to_s :to_adl
|
197
|
-
alias :inspect :to_adl
|
198
|
-
|
199
|
-
#
|
200
|
-
# Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
|
201
|
-
# that the states of the PTA are in lexical order, according to the <code><=></code>
|
202
|
-
# operator defined on symbols. States reached by negative strings are tagged as
|
203
|
-
# non accepting and error.
|
204
|
-
#
|
205
|
-
def self.to_pta(sample)
|
206
|
-
thepta = Automaton.new do |pta|
|
207
|
-
initial_state = add_state(:initial => true, :accepting => false)
|
208
|
-
|
209
|
-
# Fill the PTA with each string
|
210
|
-
sample.each do |str|
|
211
|
-
# split string using the dfa
|
212
|
-
parsed, reached, remaining = pta.dfa_split(str, initial_state)
|
213
|
-
|
214
|
-
# remaining symbols are not empty -> build the PTA
|
215
|
-
unless remaining.empty?
|
216
|
-
remaining.each do |symbol|
|
217
|
-
newone = pta.add_state(:initial => false, :accepting => false, :error => false)
|
218
|
-
pta.connect(reached, newone, symbol)
|
219
|
-
reached = newone
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
# flag state
|
224
|
-
str.positive? ? reached.accepting! : reached.error!
|
225
|
-
|
226
|
-
# check consistency, should not arrive as Sample does not allow
|
227
|
-
# inconsistencies. Should appear only if _sample_ is not a Sample
|
228
|
-
# instance but some other enumerable.
|
229
|
-
raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
|
230
|
-
if (reached.error? and reached.accepting?)
|
231
|
-
end
|
232
|
-
|
233
|
-
# Reindex states by applying BFS
|
234
|
-
to_index, index = [initial_state], 0
|
235
|
-
until to_index.empty?
|
236
|
-
state = to_index.shift
|
237
|
-
state[:__index__] = index
|
238
|
-
state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
|
239
|
-
index += 1
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
# Now we rebuild a fresh one with states in order.
|
244
|
-
# This look more efficient that reordering states of the PTA
|
245
|
-
Automaton.new do |ordered|
|
246
|
-
ordered.add_n_states(thepta.state_count)
|
247
|
-
thepta.each_state do |pta_state|
|
248
|
-
source = ordered.ith_state(pta_state[:__index__])
|
249
|
-
source.initial! if pta_state.initial?
|
250
|
-
source.accepting! if pta_state.accepting?
|
251
|
-
source.error! if pta_state.error?
|
252
|
-
pta_state.out_edges.each do |e|
|
253
|
-
target = ordered.ith_state(e.target[:__index__])
|
254
|
-
ordered.connect(source, target, e.symbol)
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
end
|
260
|
-
|
261
|
-
# Convenient shortcut for Sample.to_pta(sample_instance)
|
262
|
-
def to_pta
|
263
|
-
Sample.to_pta(self)
|
264
|
-
end
|
265
|
-
|
266
|
-
end # class Sample
|
267
|
-
end # module Stamina
|
data/lib/stamina/scoring.rb
DELETED
@@ -1,213 +0,0 @@
|
|
1
|
-
module Stamina
|
2
|
-
#
|
3
|
-
# Provides utility methods for scoring binary classifiers from signatures
|
4
|
-
#
|
5
|
-
module Scoring
|
6
|
-
|
7
|
-
#
|
8
|
-
# From the signatures of a learned model and a actual, returns an object
|
9
|
-
# responding to all instance methods defined in the Scoring module.
|
10
|
-
#
|
11
|
-
def self.scoring(learned, actual, max_size=nil)
|
12
|
-
unless learned.size==actual.size
|
13
|
-
raise ArgumentError, "Signatures must be of same size (#{learned.size} vs. #{actual.size})"
|
14
|
-
end
|
15
|
-
max_size ||= learned.size
|
16
|
-
max_size = learned.size if max_size > learned.size
|
17
|
-
tp, fn, fp, tn = 0, 0, 0, 0
|
18
|
-
(0...max_size).each do |i|
|
19
|
-
positive, labeled_as = actual[i..i]=='1', learned[i..i]=='1'
|
20
|
-
if positive==labeled_as
|
21
|
-
positive ? (tp += 1) : (tn += 1)
|
22
|
-
else
|
23
|
-
positive ? (fn += 1) : (fp += 1)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
measures = { :true_positive => tp,
|
27
|
-
:true_negative => tn,
|
28
|
-
:false_positive => fp,
|
29
|
-
:false_negative => fn }
|
30
|
-
measures.extend(Scoring)
|
31
|
-
measures
|
32
|
-
end
|
33
|
-
|
34
|
-
#
|
35
|
-
# Returns the number of positive strings correctly labeled as positive
|
36
|
-
#
|
37
|
-
def true_positive
|
38
|
-
self[:true_positive]
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# Returns the number of negative strings correctly labeled as negative.
|
43
|
-
#
|
44
|
-
def true_negative
|
45
|
-
self[:true_negative]
|
46
|
-
end
|
47
|
-
|
48
|
-
#
|
49
|
-
# Returns the number of negative strings incorrectly labeled as positive.
|
50
|
-
#
|
51
|
-
def false_positive
|
52
|
-
self[:false_positive]
|
53
|
-
end
|
54
|
-
|
55
|
-
#
|
56
|
-
# Returns the number of positive strings incorrectly labeled as negative.
|
57
|
-
#
|
58
|
-
def false_negative
|
59
|
-
self[:false_negative]
|
60
|
-
end
|
61
|
-
|
62
|
-
#
|
63
|
-
# Returns the percentage of positive predictions that are correct
|
64
|
-
#
|
65
|
-
def precision
|
66
|
-
true_positive.to_f/(true_positive + false_positive)
|
67
|
-
end
|
68
|
-
alias :positive_predictive_value :precision
|
69
|
-
|
70
|
-
#
|
71
|
-
# Returns the percentage of true negative over all negative
|
72
|
-
#
|
73
|
-
def negative_predictive_value
|
74
|
-
true_negative.to_f / (true_negative + false_negative)
|
75
|
-
end
|
76
|
-
|
77
|
-
#
|
78
|
-
# Returns the percentage of positive strings that were predicted as being
|
79
|
-
# positive
|
80
|
-
#
|
81
|
-
def recall
|
82
|
-
true_positive.to_f / (true_positive + false_negative)
|
83
|
-
end
|
84
|
-
alias :sensitivity :recall
|
85
|
-
alias :true_positive_rate :recall
|
86
|
-
|
87
|
-
#
|
88
|
-
# Returns the percentage of negative strings that were predicted as being
|
89
|
-
# negative
|
90
|
-
#
|
91
|
-
def specificity
|
92
|
-
true_negative.to_f / (true_negative + false_positive)
|
93
|
-
end
|
94
|
-
alias :true_negative_rate :specificity
|
95
|
-
|
96
|
-
#
|
97
|
-
# Returns the percentage of false positives
|
98
|
-
#
|
99
|
-
def false_positive_rate
|
100
|
-
false_positive.to_f / (false_positive + true_negative)
|
101
|
-
end
|
102
|
-
|
103
|
-
#
|
104
|
-
# Returns the percentage of false negatives
|
105
|
-
#
|
106
|
-
def false_negative_rate
|
107
|
-
false_negative.to_f / (true_positive + false_negative)
|
108
|
-
end
|
109
|
-
|
110
|
-
#
|
111
|
-
# Returns the likelihood that a predicted positive is an actual positive
|
112
|
-
#
|
113
|
-
def positive_likelihood
|
114
|
-
sensitivity / (1.0 - specificity)
|
115
|
-
end
|
116
|
-
|
117
|
-
#
|
118
|
-
# Returns the likelihood that a predicted negative is an actual negative
|
119
|
-
#
|
120
|
-
def negative_likelihood
|
121
|
-
(1.0 - sensitivity) / specificity
|
122
|
-
end
|
123
|
-
|
124
|
-
#
|
125
|
-
# Returns the percentage of predictions that are correct
|
126
|
-
#
|
127
|
-
def accuracy
|
128
|
-
num = (true_positive + true_negative).to_f
|
129
|
-
den = (true_positive + true_negative + false_positive + false_negative)
|
130
|
-
num / den
|
131
|
-
end
|
132
|
-
|
133
|
-
#
|
134
|
-
# Returns the error rate
|
135
|
-
#
|
136
|
-
def error_rate
|
137
|
-
num = (false_positive + false_negative).to_f
|
138
|
-
den = (true_positive + true_negative + false_positive + false_negative)
|
139
|
-
num / den
|
140
|
-
end
|
141
|
-
|
142
|
-
#
|
143
|
-
# Returns the harmonic mean between precision and recall
|
144
|
-
#
|
145
|
-
def f_measure
|
146
|
-
2.0 * (precision * recall) / (precision + recall)
|
147
|
-
end
|
148
|
-
|
149
|
-
#
|
150
|
-
# Returns the balanced classification rate (arithmetic mean between
|
151
|
-
# sensitivity and specificity)
|
152
|
-
#
|
153
|
-
def balanced_classification_rate
|
154
|
-
0.5 * (sensitivity + specificity)
|
155
|
-
end
|
156
|
-
alias :bcr :balanced_classification_rate
|
157
|
-
|
158
|
-
#
|
159
|
-
# Returns the balanced error rate (1 - bcr)
|
160
|
-
#
|
161
|
-
def balanced_error_rate
|
162
|
-
1.0 - balanced_classification_rate
|
163
|
-
end
|
164
|
-
alias :ber :balanced_error_rate
|
165
|
-
|
166
|
-
#
|
167
|
-
# Returns the harmonic mean between sensitivity and specificity
|
168
|
-
#
|
169
|
-
def harmonic_balanced_classification_rate
|
170
|
-
2.0 * (sensitivity * specificity) / (sensitivity + specificity)
|
171
|
-
end
|
172
|
-
alias :hbcr :harmonic_balanced_classification_rate
|
173
|
-
alias :harmonic_bcr :harmonic_balanced_classification_rate
|
174
|
-
|
175
|
-
MEASURES = [
|
176
|
-
:false_positive, :false_negative,
|
177
|
-
:true_positive, :true_negative,
|
178
|
-
:accuracy, :error_rate,
|
179
|
-
:precision, :recall, :f_measure,
|
180
|
-
:false_positive_rate, :false_negative_rate,
|
181
|
-
:true_positive_rate, :true_negative_rate,
|
182
|
-
:positive_predictive_value, :negative_predictive_value,
|
183
|
-
:sensitivity, :specificity,
|
184
|
-
:positive_likelihood, :negative_likelihood,
|
185
|
-
:balanced_classification_rate, :balanced_error_rate, :harmonic_bcr
|
186
|
-
]
|
187
|
-
|
188
|
-
def to_h
|
189
|
-
h = {}
|
190
|
-
MEASURES.each do |m|
|
191
|
-
h[m] = self.send(m.to_sym)
|
192
|
-
end
|
193
|
-
h
|
194
|
-
end
|
195
|
-
|
196
|
-
def to_s
|
197
|
-
s = ""
|
198
|
-
MEASURES.each do |m|
|
199
|
-
vals = case val = self.send(m.to_sym)
|
200
|
-
when Integer
|
201
|
-
"%s" % val
|
202
|
-
when Float
|
203
|
-
"%.5f" % val
|
204
|
-
else
|
205
|
-
"%s" % val
|
206
|
-
end
|
207
|
-
s += "%30s: %10s\n" % [m.to_s, vals]
|
208
|
-
end
|
209
|
-
s
|
210
|
-
end
|
211
|
-
|
212
|
-
end # module Scoring
|
213
|
-
end # module Stamina
|