stamina-induction 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,10 @@
1
+ require_relative "parser/node"
2
+ require_relative "parser/parenthesized"
3
+ require_relative "parser/symbol"
4
+ require_relative "parser/question"
5
+ require_relative "parser/plus"
6
+ require_relative "parser/star"
7
+ require_relative "parser/sequence"
8
+ require_relative "parser/alternative"
9
+ require_relative "parser/regexp"
10
+ Citrus.require File.expand_path("../parser/parser", __FILE__)
@@ -0,0 +1,19 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Alternative
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = fa.add_n_states(2)
8
+ f1, t1 = self.head.to_fa!(fa)
9
+ f2, t2 = self.tail.to_fa!(fa)
10
+ fa.connect(from, f1, nil)
11
+ fa.connect(from, f2, nil)
12
+ fa.connect(t1, to, nil)
13
+ fa.connect(t2, to, nil)
14
+ [from, to]
15
+ end
16
+
17
+ end # module Alternative
18
+ end # class RegLang
19
+ end # module Stamina
@@ -0,0 +1,22 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Node
4
+
5
+ def to_fa
6
+ from, to = to_fa!(fa = Automaton.new)
7
+ from.initial!
8
+ to.accepting!
9
+ fa
10
+ end
11
+
12
+ def to_dfa
13
+ to_fa.to_dfa
14
+ end
15
+
16
+ def to_cdfa
17
+ to_fa.to_cdfa
18
+ end
19
+
20
+ end # module Node
21
+ end # class RegLang
22
+ end # module Stamina
@@ -0,0 +1,12 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Parenthesized
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ self.expr.to_fa!(fa)
8
+ end
9
+
10
+ end # module Parenthesized
11
+ end # class RegLang
12
+ end # module Stamina
@@ -0,0 +1,49 @@
1
+ grammar Stamina::RegLang::Parser
2
+
3
+ rule regexp
4
+ (space* alt:alternative space*) <Stamina::RegLang::Regexp>
5
+ end
6
+
7
+ rule alternative
8
+ (head:sequence space* '|' space* tail:alternative) <Stamina::RegLang::Alternative>
9
+ | sequence
10
+ end
11
+
12
+ rule sequence
13
+ (head:monadic space+ tail:sequence) <Stamina::RegLang::Sequence>
14
+ | monadic
15
+ end
16
+
17
+ rule monadic
18
+ star | plus | question | term
19
+ end
20
+
21
+ rule star
22
+ (term '*') <Stamina::RegLang::Star>
23
+ end
24
+
25
+ rule plus
26
+ (term '+') <Stamina::RegLang::Plus>
27
+ end
28
+
29
+ rule question
30
+ (term '?') <Stamina::RegLang::Question>
31
+ end
32
+
33
+ rule term
34
+ symbol | parenthesized
35
+ end
36
+
37
+ rule symbol
38
+ [a-zA-Z0-9$_-]+ <Stamina::RegLang::Symbol>
39
+ end
40
+
41
+ rule parenthesized
42
+ ('(' space* expr:regexp space* ')') <Stamina::RegLang::Parenthesized>
43
+ end
44
+
45
+ rule space
46
+ [ \t\n]
47
+ end
48
+
49
+ end
@@ -0,0 +1,14 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Plus
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = self.term.to_fa!(fa)
8
+ fa.connect(to, from, nil)
9
+ [from, to]
10
+ end
11
+
12
+ end # module Plus
13
+ end # class RegLang
14
+ end # module Stamina
@@ -0,0 +1,17 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Question
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ f1, t1 = fa.add_n_states(2)
8
+ f2, t2 = self.term.to_fa!(fa)
9
+ fa.connect(f1,f2,nil)
10
+ fa.connect(t2,t1,nil)
11
+ fa.connect(f1,t1,nil)
12
+ [f1, t1]
13
+ end
14
+
15
+ end # module Question
16
+ end # class RegLang
17
+ end # module Stamina
@@ -0,0 +1,12 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Regexp
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ self.alt.to_fa!(fa)
8
+ end
9
+
10
+ end # module Regexp
11
+ end # class RegLang
12
+ end # module Stamina
@@ -0,0 +1,15 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Sequence
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ f1, t1 = self.head.to_fa!(fa)
8
+ f2, t2 = self.tail.to_fa!(fa)
9
+ fa.connect(t1, f2, nil)
10
+ [f1, t2]
11
+ end
12
+
13
+ end # module Sequence
14
+ end # class RegLang
15
+ end # module Stamina
@@ -0,0 +1,15 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Star
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = self.term.to_fa!(fa)
8
+ fa.connect(to, from, nil)
9
+ fa.connect(from, to, nil)
10
+ [from, to]
11
+ end
12
+
13
+ end # module Star
14
+ end # class RegLang
15
+ end # module Stamina
@@ -0,0 +1,14 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Symbol
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = fa.add_n_states(2, :initial => false, :accepting => false)
8
+ fa.connect(from, to, to_s)
9
+ [from, to]
10
+ end
11
+
12
+ end # module Symbol
13
+ end # class RegLang
14
+ end # module Stamina
@@ -0,0 +1,309 @@
1
+ module Stamina
2
+
3
+ #
4
+ # A sample as an ordered collection of InputString labeled as positive or negative.
5
+ #
6
+ # == Tips and tricks
7
+ # - loading samples from disk is easy thanks to ADL !
8
+ #
9
+ # == Detailed API
10
+ class Sample
11
+ include Enumerable
12
+
13
+ # Number of strings in the sample
14
+ attr_reader :size
15
+
16
+ # Number of positive strings in the sample
17
+ attr_reader :positive_count
18
+
19
+ # Number of negative strings in the sample
20
+ attr_reader :negative_count
21
+
22
+ #
23
+ # Creates an empty sample and appends it with args, by calling Sample#<< on
24
+ # each of them.
25
+ #
26
+ def self.[](*args) Sample.new << args end
27
+
28
+ #
29
+ # Creates an empty sample.
30
+ #
31
+ def initialize(strings = nil)
32
+ @strings = []
33
+ @size, @positive_count, @negative_count = 0, 0, 0
34
+ strings.each{|s| self << s } unless strings.nil?
35
+ end
36
+
37
+ #
38
+ # Coerces `arg` to a Sample instance.
39
+ #
40
+ def self.coerce(arg)
41
+ if arg.is_a?(Sample)
42
+ arg
43
+ elsif arg.is_a?(String)
44
+ parse(arg)
45
+ else
46
+ raise ArgumentError, "Invalid argument #{arg} for `Sample`"
47
+ end
48
+ end
49
+
50
+ #
51
+ # Parses an ADL input
52
+ #
53
+ def self.parse(adl)
54
+ ADL::parse_sample(adl)
55
+ end
56
+
57
+ #
58
+ # Returns true if this sample does not contain any string,
59
+ # false otherwise.
60
+ #
61
+ def empty?()
62
+ @size==0
63
+ end
64
+
65
+ #
66
+ # Adds a string to the sample. The _str_ argument may be an InputString instance,
67
+ # a String (parsed using ADL), a Sample instance (all strings are added) or an
68
+ # Array (recurses on each element).
69
+ #
70
+ # Raises an InconsistencyError if the same string already exists with the
71
+ # opposite label. Raises an ArgumentError if the _str_ argument is not recognized.
72
+ #
73
+ def <<(str)
74
+ case str
75
+ when InputString
76
+ #raise(InconsistencyError, "Inconsistent sample on #{str}", caller) if self.include?(str.negate)
77
+ @size += 1
78
+ str.positive? ? (@positive_count += 1) : (@negative_count += 1)
79
+ @strings << str
80
+ when String
81
+ self << ADL::parse_string(str)
82
+ when Sample
83
+ str.each {|s| self << s}
84
+ when Array
85
+ str.each {|s| self << s}
86
+ else
87
+ raise(ArgumentError, "#{str} is not a valid argument.", caller)
88
+ end
89
+ self
90
+ end
91
+
92
+ #
93
+ # Returns true if a given string is included in the sample, false otherwise.
94
+ # This method allows same flexibility as << for the _str_ argument.
95
+ #
96
+ def include?(str)
97
+ case str
98
+ when InputString
99
+ @strings.include?(str)
100
+ when String
101
+ include?(ADL::parse_string(str))
102
+ when Array
103
+ str.each {|s| return false unless include?(s)}
104
+ true
105
+ when Sample
106
+ str.each {|s| return false unless include?(s)}
107
+ true
108
+ else
109
+ raise(ArgumentError, "#{str} is not a valid argument.", caller)
110
+ end
111
+ end
112
+
113
+ #
114
+ # Returns a new sample as the union of both `self` and `other`
115
+ #
116
+ def +(other)
117
+ s = Sample.new
118
+ each{|x| s << x}
119
+ other.each{|x| s << x}
120
+ s
121
+ end
122
+
123
+ #
124
+ # Compares with another sample _other_, which is required to be a Sample
125
+ # instance. Returns true if the two samples contains the same strings (including
126
+ # labels), false otherwise.
127
+ #
128
+ def ==(other)
129
+ include?(other) and other.include?(self)
130
+ end
131
+ alias :eql? :==
132
+
133
+ #
134
+ # Computes an hash code for this sample.
135
+ #
136
+ def hash
137
+ self.inject(37){|memo,str| memo + 17*str.hash}
138
+ end
139
+
140
+ #
141
+ # Yields the block with each string. This method has no effect if no
142
+ # block is given.
143
+ #
144
+ def each
145
+ return unless block_given?
146
+ @strings.each {|str| yield str}
147
+ end
148
+
149
+ #
150
+ # Yields the block with each positive string. This method has no effect if no
151
+ # block is given.
152
+ #
153
+ def each_positive
154
+ return unless block_given?
155
+ each {|str| yield str if str.positive?}
156
+ end
157
+
158
+ #
159
+ # Returns an enumerator on positive strings.
160
+ #
161
+ def positive_enumerator
162
+ if RUBY_VERSION >= "1.9"
163
+ Enumerator.new(self, :each_positive)
164
+ else
165
+ Enumerable::Enumerator.new(self, :each_positive)
166
+ end
167
+ end
168
+
169
+ #
170
+ # Yields the block with each negative string. This method has no effect if no
171
+ # block is given.
172
+ #
173
+ def each_negative
174
+ each {|str| yield str if str.negative?}
175
+ end
176
+
177
+ #
178
+ # Returns an enumerator on negative strings.
179
+ #
180
+ def negative_enumerator
181
+ if RUBY_VERSION >= "1.9"
182
+ Enumerator.new(self, :each_negative)
183
+ else
184
+ Enumerable::Enumerator.new(self, :each_negative)
185
+ end
186
+ end
187
+
188
+ #
189
+ # Checks if the sample is correctly classified by a given classifier
190
+ # (expected to include the Stamina::Classfier module).
191
+ # Unlabeled strings are simply ignored.
192
+ #
193
+ def correctly_classified_by?(classifier)
194
+ classifier.correctly_classify?(self)
195
+ end
196
+
197
+ #
198
+ # Computes and returns the binary signature of the sample. The signature
199
+ # is a String having one character for each string in the sample. A '1'
200
+ # is used for positive strings, '0' for negative ones and '?' for unlabeled.
201
+ #
202
+ def signature
203
+ signature = ''
204
+ each do |str|
205
+ signature << (str.unlabeled? ? '?' : str.positive? ? '1' : '0')
206
+ end
207
+ signature
208
+ end
209
+
210
+ #
211
+ # Takes only a given proportion of this sample and returns it as a new Sample.
212
+ #
213
+ def take(proportion = 0.5)
214
+ taken = Stamina::Sample.new
215
+ each_positive{|s| taken << s if Kernel.rand < proportion}
216
+ each_negative{|s| taken << s if Kernel.rand < proportion}
217
+ taken
218
+ end
219
+
220
+ #
221
+ # Prints an ADL description of this sample on the buffer.
222
+ #
223
+ def to_adl(buffer="")
224
+ self.inject(buffer) {|memo,str| memo << "\n" << str.to_adl}
225
+ end
226
+ alias :to_s :to_adl
227
+ alias :inspect :to_adl
228
+
229
+ #
230
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
231
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
232
+ # operator defined on symbols. States reached by negative strings are tagged as
233
+ # non accepting and error.
234
+ #
235
+ def self.to_pta(sample)
236
+ thepta = Automaton.new do |pta|
237
+ initial_state = add_state(:initial => true, :accepting => false)
238
+
239
+ # Fill the PTA with each string
240
+ sample.each do |str|
241
+ # split string using the dfa
242
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
243
+
244
+ # remaining symbols are not empty -> build the PTA
245
+ unless remaining.empty?
246
+ remaining.each do |symbol|
247
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
248
+ pta.connect(reached, newone, symbol)
249
+ reached = newone
250
+ end
251
+ end
252
+
253
+ # flag state
254
+ str.positive? ? reached.accepting! : reached.error!
255
+
256
+ # check consistency, should not arrive as Sample does not allow
257
+ # inconsistencies. Should appear only if _sample_ is not a Sample
258
+ # instance but some other enumerable.
259
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
260
+ if (reached.error? and reached.accepting?)
261
+ end
262
+
263
+ # Reindex states by applying BFS
264
+ to_index, index = [initial_state], 0
265
+ until to_index.empty?
266
+ state = to_index.shift
267
+ state[:__index__] = index
268
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
269
+ index += 1
270
+ end
271
+ end
272
+
273
+ # Now we rebuild a fresh one with states in order.
274
+ # This look more efficient that reordering states of the PTA
275
+ Automaton.new do |ordered|
276
+ ordered.add_n_states(thepta.state_count)
277
+ thepta.each_state do |pta_state|
278
+ source = ordered.ith_state(pta_state[:__index__])
279
+ source.initial! if pta_state.initial?
280
+ source.accepting! if pta_state.accepting?
281
+ source.error! if pta_state.error?
282
+ pta_state.out_edges.each do |e|
283
+ target = ordered.ith_state(e.target[:__index__])
284
+ ordered.connect(source, target, e.symbol)
285
+ end
286
+ end
287
+ end
288
+
289
+ end
290
+
291
+ # Converts this sample to a PTA
292
+ def to_pta
293
+ Sample.to_pta(self)
294
+ end
295
+ alias :to_fa :to_pta
296
+ alias :to_dfa :to_pta
297
+
298
+ # Converts this sample to a canonical dfa
299
+ def to_cdfa
300
+ to_pta.to_cdfa
301
+ end
302
+
303
+ # Converts this sample to a dot output
304
+ def to_dot
305
+ to_pta.to_dot
306
+ end
307
+
308
+ end # class Sample
309
+ end # module Stamina