stamina-induction 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative "parser/node"
|
2
|
+
require_relative "parser/parenthesized"
|
3
|
+
require_relative "parser/symbol"
|
4
|
+
require_relative "parser/question"
|
5
|
+
require_relative "parser/plus"
|
6
|
+
require_relative "parser/star"
|
7
|
+
require_relative "parser/sequence"
|
8
|
+
require_relative "parser/alternative"
|
9
|
+
require_relative "parser/regexp"
|
10
|
+
Citrus.require File.expand_path("../parser/parser", __FILE__)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Alternative
|
4
|
+
include Node
|
5
|
+
|
6
|
+
def to_fa!(fa)
|
7
|
+
from, to = fa.add_n_states(2)
|
8
|
+
f1, t1 = self.head.to_fa!(fa)
|
9
|
+
f2, t2 = self.tail.to_fa!(fa)
|
10
|
+
fa.connect(from, f1, nil)
|
11
|
+
fa.connect(from, f2, nil)
|
12
|
+
fa.connect(t1, to, nil)
|
13
|
+
fa.connect(t2, to, nil)
|
14
|
+
[from, to]
|
15
|
+
end
|
16
|
+
|
17
|
+
end # module Alternative
|
18
|
+
end # class RegLang
|
19
|
+
end # module Stamina
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Node
|
4
|
+
|
5
|
+
def to_fa
|
6
|
+
from, to = to_fa!(fa = Automaton.new)
|
7
|
+
from.initial!
|
8
|
+
to.accepting!
|
9
|
+
fa
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_dfa
|
13
|
+
to_fa.to_dfa
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_cdfa
|
17
|
+
to_fa.to_cdfa
|
18
|
+
end
|
19
|
+
|
20
|
+
end # module Node
|
21
|
+
end # class RegLang
|
22
|
+
end # module Stamina
|
@@ -0,0 +1,49 @@
|
|
1
|
+
grammar Stamina::RegLang::Parser
|
2
|
+
|
3
|
+
rule regexp
|
4
|
+
(space* alt:alternative space*) <Stamina::RegLang::Regexp>
|
5
|
+
end
|
6
|
+
|
7
|
+
rule alternative
|
8
|
+
(head:sequence space* '|' space* tail:alternative) <Stamina::RegLang::Alternative>
|
9
|
+
| sequence
|
10
|
+
end
|
11
|
+
|
12
|
+
rule sequence
|
13
|
+
(head:monadic space+ tail:sequence) <Stamina::RegLang::Sequence>
|
14
|
+
| monadic
|
15
|
+
end
|
16
|
+
|
17
|
+
rule monadic
|
18
|
+
star | plus | question | term
|
19
|
+
end
|
20
|
+
|
21
|
+
rule star
|
22
|
+
(term '*') <Stamina::RegLang::Star>
|
23
|
+
end
|
24
|
+
|
25
|
+
rule plus
|
26
|
+
(term '+') <Stamina::RegLang::Plus>
|
27
|
+
end
|
28
|
+
|
29
|
+
rule question
|
30
|
+
(term '?') <Stamina::RegLang::Question>
|
31
|
+
end
|
32
|
+
|
33
|
+
rule term
|
34
|
+
symbol | parenthesized
|
35
|
+
end
|
36
|
+
|
37
|
+
rule symbol
|
38
|
+
[a-zA-Z0-9$_-]+ <Stamina::RegLang::Symbol>
|
39
|
+
end
|
40
|
+
|
41
|
+
rule parenthesized
|
42
|
+
('(' space* expr:regexp space* ')') <Stamina::RegLang::Parenthesized>
|
43
|
+
end
|
44
|
+
|
45
|
+
rule space
|
46
|
+
[ \t\n]
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Question
|
4
|
+
include Node
|
5
|
+
|
6
|
+
def to_fa!(fa)
|
7
|
+
f1, t1 = fa.add_n_states(2)
|
8
|
+
f2, t2 = self.term.to_fa!(fa)
|
9
|
+
fa.connect(f1,f2,nil)
|
10
|
+
fa.connect(t2,t1,nil)
|
11
|
+
fa.connect(f1,t1,nil)
|
12
|
+
[f1, t1]
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Question
|
16
|
+
end # class RegLang
|
17
|
+
end # module Stamina
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Sequence
|
4
|
+
include Node
|
5
|
+
|
6
|
+
def to_fa!(fa)
|
7
|
+
f1, t1 = self.head.to_fa!(fa)
|
8
|
+
f2, t2 = self.tail.to_fa!(fa)
|
9
|
+
fa.connect(t1, f2, nil)
|
10
|
+
[f1, t2]
|
11
|
+
end
|
12
|
+
|
13
|
+
end # module Sequence
|
14
|
+
end # class RegLang
|
15
|
+
end # module Stamina
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Star
|
4
|
+
include Node
|
5
|
+
|
6
|
+
def to_fa!(fa)
|
7
|
+
from, to = self.term.to_fa!(fa)
|
8
|
+
fa.connect(to, from, nil)
|
9
|
+
fa.connect(from, to, nil)
|
10
|
+
[from, to]
|
11
|
+
end
|
12
|
+
|
13
|
+
end # module Star
|
14
|
+
end # class RegLang
|
15
|
+
end # module Stamina
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
module Symbol
|
4
|
+
include Node
|
5
|
+
|
6
|
+
def to_fa!(fa)
|
7
|
+
from, to = fa.add_n_states(2, :initial => false, :accepting => false)
|
8
|
+
fa.connect(from, to, to_s)
|
9
|
+
[from, to]
|
10
|
+
end
|
11
|
+
|
12
|
+
end # module Symbol
|
13
|
+
end # class RegLang
|
14
|
+
end # module Stamina
|
@@ -0,0 +1,309 @@
|
|
1
|
+
module Stamina
|
2
|
+
|
3
|
+
#
|
4
|
+
# A sample as an ordered collection of InputString labeled as positive or negative.
|
5
|
+
#
|
6
|
+
# == Tips and tricks
|
7
|
+
# - loading samples from disk is easy thanks to ADL !
|
8
|
+
#
|
9
|
+
# == Detailed API
|
10
|
+
class Sample
|
11
|
+
include Enumerable
|
12
|
+
|
13
|
+
# Number of strings in the sample
|
14
|
+
attr_reader :size
|
15
|
+
|
16
|
+
# Number of positive strings in the sample
|
17
|
+
attr_reader :positive_count
|
18
|
+
|
19
|
+
# Number of negative strings in the sample
|
20
|
+
attr_reader :negative_count
|
21
|
+
|
22
|
+
#
|
23
|
+
# Creates an empty sample and appends it with args, by calling Sample#<< on
|
24
|
+
# each of them.
|
25
|
+
#
|
26
|
+
def self.[](*args) Sample.new << args end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Creates an empty sample.
|
30
|
+
#
|
31
|
+
def initialize(strings = nil)
|
32
|
+
@strings = []
|
33
|
+
@size, @positive_count, @negative_count = 0, 0, 0
|
34
|
+
strings.each{|s| self << s } unless strings.nil?
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Coerces `arg` to a Sample instance.
|
39
|
+
#
|
40
|
+
def self.coerce(arg)
|
41
|
+
if arg.is_a?(Sample)
|
42
|
+
arg
|
43
|
+
elsif arg.is_a?(String)
|
44
|
+
parse(arg)
|
45
|
+
else
|
46
|
+
raise ArgumentError, "Invalid argument #{arg} for `Sample`"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Parses an ADL input
|
52
|
+
#
|
53
|
+
def self.parse(adl)
|
54
|
+
ADL::parse_sample(adl)
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Returns true if this sample does not contain any string,
|
59
|
+
# false otherwise.
|
60
|
+
#
|
61
|
+
def empty?()
|
62
|
+
@size==0
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Adds a string to the sample. The _str_ argument may be an InputString instance,
|
67
|
+
# a String (parsed using ADL), a Sample instance (all strings are added) or an
|
68
|
+
# Array (recurses on each element).
|
69
|
+
#
|
70
|
+
# Raises an InconsistencyError if the same string already exists with the
|
71
|
+
# opposite label. Raises an ArgumentError if the _str_ argument is not recognized.
|
72
|
+
#
|
73
|
+
def <<(str)
|
74
|
+
case str
|
75
|
+
when InputString
|
76
|
+
#raise(InconsistencyError, "Inconsistent sample on #{str}", caller) if self.include?(str.negate)
|
77
|
+
@size += 1
|
78
|
+
str.positive? ? (@positive_count += 1) : (@negative_count += 1)
|
79
|
+
@strings << str
|
80
|
+
when String
|
81
|
+
self << ADL::parse_string(str)
|
82
|
+
when Sample
|
83
|
+
str.each {|s| self << s}
|
84
|
+
when Array
|
85
|
+
str.each {|s| self << s}
|
86
|
+
else
|
87
|
+
raise(ArgumentError, "#{str} is not a valid argument.", caller)
|
88
|
+
end
|
89
|
+
self
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Returns true if a given string is included in the sample, false otherwise.
|
94
|
+
# This method allows same flexibility as << for the _str_ argument.
|
95
|
+
#
|
96
|
+
def include?(str)
|
97
|
+
case str
|
98
|
+
when InputString
|
99
|
+
@strings.include?(str)
|
100
|
+
when String
|
101
|
+
include?(ADL::parse_string(str))
|
102
|
+
when Array
|
103
|
+
str.each {|s| return false unless include?(s)}
|
104
|
+
true
|
105
|
+
when Sample
|
106
|
+
str.each {|s| return false unless include?(s)}
|
107
|
+
true
|
108
|
+
else
|
109
|
+
raise(ArgumentError, "#{str} is not a valid argument.", caller)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Returns a new sample as the union of both `self` and `other`
|
115
|
+
#
|
116
|
+
def +(other)
|
117
|
+
s = Sample.new
|
118
|
+
each{|x| s << x}
|
119
|
+
other.each{|x| s << x}
|
120
|
+
s
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Compares with another sample _other_, which is required to be a Sample
|
125
|
+
# instance. Returns true if the two samples contains the same strings (including
|
126
|
+
# labels), false otherwise.
|
127
|
+
#
|
128
|
+
def ==(other)
|
129
|
+
include?(other) and other.include?(self)
|
130
|
+
end
|
131
|
+
alias :eql? :==
|
132
|
+
|
133
|
+
#
|
134
|
+
# Computes an hash code for this sample.
|
135
|
+
#
|
136
|
+
def hash
|
137
|
+
self.inject(37){|memo,str| memo + 17*str.hash}
|
138
|
+
end
|
139
|
+
|
140
|
+
#
|
141
|
+
# Yields the block with each string. This method has no effect if no
|
142
|
+
# block is given.
|
143
|
+
#
|
144
|
+
def each
|
145
|
+
return unless block_given?
|
146
|
+
@strings.each {|str| yield str}
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Yields the block with each positive string. This method has no effect if no
|
151
|
+
# block is given.
|
152
|
+
#
|
153
|
+
def each_positive
|
154
|
+
return unless block_given?
|
155
|
+
each {|str| yield str if str.positive?}
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# Returns an enumerator on positive strings.
|
160
|
+
#
|
161
|
+
def positive_enumerator
|
162
|
+
if RUBY_VERSION >= "1.9"
|
163
|
+
Enumerator.new(self, :each_positive)
|
164
|
+
else
|
165
|
+
Enumerable::Enumerator.new(self, :each_positive)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
#
|
170
|
+
# Yields the block with each negative string. This method has no effect if no
|
171
|
+
# block is given.
|
172
|
+
#
|
173
|
+
def each_negative
|
174
|
+
each {|str| yield str if str.negative?}
|
175
|
+
end
|
176
|
+
|
177
|
+
#
|
178
|
+
# Returns an enumerator on negative strings.
|
179
|
+
#
|
180
|
+
def negative_enumerator
|
181
|
+
if RUBY_VERSION >= "1.9"
|
182
|
+
Enumerator.new(self, :each_negative)
|
183
|
+
else
|
184
|
+
Enumerable::Enumerator.new(self, :each_negative)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
#
|
189
|
+
# Checks if the sample is correctly classified by a given classifier
|
190
|
+
# (expected to include the Stamina::Classfier module).
|
191
|
+
# Unlabeled strings are simply ignored.
|
192
|
+
#
|
193
|
+
def correctly_classified_by?(classifier)
|
194
|
+
classifier.correctly_classify?(self)
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# Computes and returns the binary signature of the sample. The signature
|
199
|
+
# is a String having one character for each string in the sample. A '1'
|
200
|
+
# is used for positive strings, '0' for negative ones and '?' for unlabeled.
|
201
|
+
#
|
202
|
+
def signature
|
203
|
+
signature = ''
|
204
|
+
each do |str|
|
205
|
+
signature << (str.unlabeled? ? '?' : str.positive? ? '1' : '0')
|
206
|
+
end
|
207
|
+
signature
|
208
|
+
end
|
209
|
+
|
210
|
+
#
|
211
|
+
# Takes only a given proportion of this sample and returns it as a new Sample.
|
212
|
+
#
|
213
|
+
def take(proportion = 0.5)
|
214
|
+
taken = Stamina::Sample.new
|
215
|
+
each_positive{|s| taken << s if Kernel.rand < proportion}
|
216
|
+
each_negative{|s| taken << s if Kernel.rand < proportion}
|
217
|
+
taken
|
218
|
+
end
|
219
|
+
|
220
|
+
#
|
221
|
+
# Prints an ADL description of this sample on the buffer.
|
222
|
+
#
|
223
|
+
def to_adl(buffer="")
|
224
|
+
self.inject(buffer) {|memo,str| memo << "\n" << str.to_adl}
|
225
|
+
end
|
226
|
+
alias :to_s :to_adl
|
227
|
+
alias :inspect :to_adl
|
228
|
+
|
229
|
+
#
|
230
|
+
# Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
|
231
|
+
# that the states of the PTA are in lexical order, according to the <code><=></code>
|
232
|
+
# operator defined on symbols. States reached by negative strings are tagged as
|
233
|
+
# non accepting and error.
|
234
|
+
#
|
235
|
+
def self.to_pta(sample)
|
236
|
+
thepta = Automaton.new do |pta|
|
237
|
+
initial_state = add_state(:initial => true, :accepting => false)
|
238
|
+
|
239
|
+
# Fill the PTA with each string
|
240
|
+
sample.each do |str|
|
241
|
+
# split string using the dfa
|
242
|
+
parsed, reached, remaining = pta.dfa_split(str, initial_state)
|
243
|
+
|
244
|
+
# remaining symbols are not empty -> build the PTA
|
245
|
+
unless remaining.empty?
|
246
|
+
remaining.each do |symbol|
|
247
|
+
newone = pta.add_state(:initial => false, :accepting => false, :error => false)
|
248
|
+
pta.connect(reached, newone, symbol)
|
249
|
+
reached = newone
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# flag state
|
254
|
+
str.positive? ? reached.accepting! : reached.error!
|
255
|
+
|
256
|
+
# check consistency, should not arrive as Sample does not allow
|
257
|
+
# inconsistencies. Should appear only if _sample_ is not a Sample
|
258
|
+
# instance but some other enumerable.
|
259
|
+
raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
|
260
|
+
if (reached.error? and reached.accepting?)
|
261
|
+
end
|
262
|
+
|
263
|
+
# Reindex states by applying BFS
|
264
|
+
to_index, index = [initial_state], 0
|
265
|
+
until to_index.empty?
|
266
|
+
state = to_index.shift
|
267
|
+
state[:__index__] = index
|
268
|
+
state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
|
269
|
+
index += 1
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# Now we rebuild a fresh one with states in order.
|
274
|
+
# This look more efficient that reordering states of the PTA
|
275
|
+
Automaton.new do |ordered|
|
276
|
+
ordered.add_n_states(thepta.state_count)
|
277
|
+
thepta.each_state do |pta_state|
|
278
|
+
source = ordered.ith_state(pta_state[:__index__])
|
279
|
+
source.initial! if pta_state.initial?
|
280
|
+
source.accepting! if pta_state.accepting?
|
281
|
+
source.error! if pta_state.error?
|
282
|
+
pta_state.out_edges.each do |e|
|
283
|
+
target = ordered.ith_state(e.target[:__index__])
|
284
|
+
ordered.connect(source, target, e.symbol)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
290
|
+
|
291
|
+
# Converts this sample to a PTA
|
292
|
+
def to_pta
|
293
|
+
Sample.to_pta(self)
|
294
|
+
end
|
295
|
+
alias :to_fa :to_pta
|
296
|
+
alias :to_dfa :to_pta
|
297
|
+
|
298
|
+
# Converts this sample to a canonical dfa
|
299
|
+
def to_cdfa
|
300
|
+
to_pta.to_cdfa
|
301
|
+
end
|
302
|
+
|
303
|
+
# Converts this sample to a dot output
|
304
|
+
def to_dot
|
305
|
+
to_pta.to_dot
|
306
|
+
end
|
307
|
+
|
308
|
+
end # class Sample
|
309
|
+
end # module Stamina
|