stamina-induction 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,10 @@
1
+ require_relative "parser/node"
2
+ require_relative "parser/parenthesized"
3
+ require_relative "parser/symbol"
4
+ require_relative "parser/question"
5
+ require_relative "parser/plus"
6
+ require_relative "parser/star"
7
+ require_relative "parser/sequence"
8
+ require_relative "parser/alternative"
9
+ require_relative "parser/regexp"
10
+ Citrus.require File.expand_path("../parser/parser", __FILE__)
@@ -0,0 +1,19 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Alternative
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = fa.add_n_states(2)
8
+ f1, t1 = self.head.to_fa!(fa)
9
+ f2, t2 = self.tail.to_fa!(fa)
10
+ fa.connect(from, f1, nil)
11
+ fa.connect(from, f2, nil)
12
+ fa.connect(t1, to, nil)
13
+ fa.connect(t2, to, nil)
14
+ [from, to]
15
+ end
16
+
17
+ end # module Alternative
18
+ end # class RegLang
19
+ end # module Stamina
@@ -0,0 +1,22 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Node
4
+
5
+ def to_fa
6
+ from, to = to_fa!(fa = Automaton.new)
7
+ from.initial!
8
+ to.accepting!
9
+ fa
10
+ end
11
+
12
+ def to_dfa
13
+ to_fa.to_dfa
14
+ end
15
+
16
+ def to_cdfa
17
+ to_fa.to_cdfa
18
+ end
19
+
20
+ end # module Node
21
+ end # class RegLang
22
+ end # module Stamina
@@ -0,0 +1,12 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Parenthesized
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ self.expr.to_fa!(fa)
8
+ end
9
+
10
+ end # module Parenthesized
11
+ end # class RegLang
12
+ end # module Stamina
@@ -0,0 +1,49 @@
1
+ grammar Stamina::RegLang::Parser
2
+
3
+ rule regexp
4
+ (space* alt:alternative space*) <Stamina::RegLang::Regexp>
5
+ end
6
+
7
+ rule alternative
8
+ (head:sequence space* '|' space* tail:alternative) <Stamina::RegLang::Alternative>
9
+ | sequence
10
+ end
11
+
12
+ rule sequence
13
+ (head:monadic space+ tail:sequence) <Stamina::RegLang::Sequence>
14
+ | monadic
15
+ end
16
+
17
+ rule monadic
18
+ star | plus | question | term
19
+ end
20
+
21
+ rule star
22
+ (term '*') <Stamina::RegLang::Star>
23
+ end
24
+
25
+ rule plus
26
+ (term '+') <Stamina::RegLang::Plus>
27
+ end
28
+
29
+ rule question
30
+ (term '?') <Stamina::RegLang::Question>
31
+ end
32
+
33
+ rule term
34
+ symbol | parenthesized
35
+ end
36
+
37
+ rule symbol
38
+ [a-zA-Z0-9$_-]+ <Stamina::RegLang::Symbol>
39
+ end
40
+
41
+ rule parenthesized
42
+ ('(' space* expr:regexp space* ')') <Stamina::RegLang::Parenthesized>
43
+ end
44
+
45
+ rule space
46
+ [ \t\n]
47
+ end
48
+
49
+ end
@@ -0,0 +1,14 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Plus
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = self.term.to_fa!(fa)
8
+ fa.connect(to, from, nil)
9
+ [from, to]
10
+ end
11
+
12
+ end # module Plus
13
+ end # class RegLang
14
+ end # module Stamina
@@ -0,0 +1,17 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Question
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ f1, t1 = fa.add_n_states(2)
8
+ f2, t2 = self.term.to_fa!(fa)
9
+ fa.connect(f1,f2,nil)
10
+ fa.connect(t2,t1,nil)
11
+ fa.connect(f1,t1,nil)
12
+ [f1, t1]
13
+ end
14
+
15
+ end # module Question
16
+ end # class RegLang
17
+ end # module Stamina
@@ -0,0 +1,12 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Regexp
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ self.alt.to_fa!(fa)
8
+ end
9
+
10
+ end # module Regexp
11
+ end # class RegLang
12
+ end # module Stamina
@@ -0,0 +1,15 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Sequence
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ f1, t1 = self.head.to_fa!(fa)
8
+ f2, t2 = self.tail.to_fa!(fa)
9
+ fa.connect(t1, f2, nil)
10
+ [f1, t2]
11
+ end
12
+
13
+ end # module Sequence
14
+ end # class RegLang
15
+ end # module Stamina
@@ -0,0 +1,15 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Star
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = self.term.to_fa!(fa)
8
+ fa.connect(to, from, nil)
9
+ fa.connect(from, to, nil)
10
+ [from, to]
11
+ end
12
+
13
+ end # module Star
14
+ end # class RegLang
15
+ end # module Stamina
@@ -0,0 +1,14 @@
1
+ module Stamina
2
+ class RegLang
3
+ module Symbol
4
+ include Node
5
+
6
+ def to_fa!(fa)
7
+ from, to = fa.add_n_states(2, :initial => false, :accepting => false)
8
+ fa.connect(from, to, to_s)
9
+ [from, to]
10
+ end
11
+
12
+ end # module Symbol
13
+ end # class RegLang
14
+ end # module Stamina
@@ -0,0 +1,309 @@
1
+ module Stamina
2
+
3
+ #
4
+ # A sample as an ordered collection of InputString labeled as positive or negative.
5
+ #
6
+ # == Tips and tricks
7
+ # - loading samples from disk is easy thanks to ADL !
8
+ #
9
+ # == Detailed API
10
+ class Sample
11
+ include Enumerable
12
+
13
+ # Number of strings in the sample
14
+ attr_reader :size
15
+
16
+ # Number of positive strings in the sample
17
+ attr_reader :positive_count
18
+
19
+ # Number of negative strings in the sample
20
+ attr_reader :negative_count
21
+
22
+ #
23
+ # Creates an empty sample and appends it with args, by calling Sample#<< on
24
+ # each of them.
25
+ #
26
+ def self.[](*args) Sample.new << args end
27
+
28
+ #
29
+ # Creates an empty sample.
30
+ #
31
+ def initialize(strings = nil)
32
+ @strings = []
33
+ @size, @positive_count, @negative_count = 0, 0, 0
34
+ strings.each{|s| self << s } unless strings.nil?
35
+ end
36
+
37
+ #
38
+ # Coerces `arg` to a Sample instance.
39
+ #
40
+ def self.coerce(arg)
41
+ if arg.is_a?(Sample)
42
+ arg
43
+ elsif arg.is_a?(String)
44
+ parse(arg)
45
+ else
46
+ raise ArgumentError, "Invalid argument #{arg} for `Sample`"
47
+ end
48
+ end
49
+
50
+ #
51
+ # Parses an ADL input
52
+ #
53
+ def self.parse(adl)
54
+ ADL::parse_sample(adl)
55
+ end
56
+
57
+ #
58
+ # Returns true if this sample does not contain any string,
59
+ # false otherwise.
60
+ #
61
+ def empty?()
62
+ @size==0
63
+ end
64
+
65
+ #
66
+ # Adds a string to the sample. The _str_ argument may be an InputString instance,
67
+ # a String (parsed using ADL), a Sample instance (all strings are added) or an
68
+ # Array (recurses on each element).
69
+ #
70
+ # Raises an InconsistencyError if the same string already exists with the
71
+ # opposite label. Raises an ArgumentError if the _str_ argument is not recognized.
72
+ #
73
+ def <<(str)
74
+ case str
75
+ when InputString
76
+ #raise(InconsistencyError, "Inconsistent sample on #{str}", caller) if self.include?(str.negate)
77
+ @size += 1
78
+ str.positive? ? (@positive_count += 1) : (@negative_count += 1)
79
+ @strings << str
80
+ when String
81
+ self << ADL::parse_string(str)
82
+ when Sample
83
+ str.each {|s| self << s}
84
+ when Array
85
+ str.each {|s| self << s}
86
+ else
87
+ raise(ArgumentError, "#{str} is not a valid argument.", caller)
88
+ end
89
+ self
90
+ end
91
+
92
+ #
93
+ # Returns true if a given string is included in the sample, false otherwise.
94
+ # This method allows same flexibility as << for the _str_ argument.
95
+ #
96
+ def include?(str)
97
+ case str
98
+ when InputString
99
+ @strings.include?(str)
100
+ when String
101
+ include?(ADL::parse_string(str))
102
+ when Array
103
+ str.each {|s| return false unless include?(s)}
104
+ true
105
+ when Sample
106
+ str.each {|s| return false unless include?(s)}
107
+ true
108
+ else
109
+ raise(ArgumentError, "#{str} is not a valid argument.", caller)
110
+ end
111
+ end
112
+
113
+ #
114
+ # Returns a new sample as the union of both `self` and `other`
115
+ #
116
+ def +(other)
117
+ s = Sample.new
118
+ each{|x| s << x}
119
+ other.each{|x| s << x}
120
+ s
121
+ end
122
+
123
+ #
124
+ # Compares with another sample _other_, which is required to be a Sample
125
+ # instance. Returns true if the two samples contains the same strings (including
126
+ # labels), false otherwise.
127
+ #
128
+ def ==(other)
129
+ include?(other) and other.include?(self)
130
+ end
131
+ alias :eql? :==
132
+
133
+ #
134
+ # Computes an hash code for this sample.
135
+ #
136
+ def hash
137
+ self.inject(37){|memo,str| memo + 17*str.hash}
138
+ end
139
+
140
+ #
141
+ # Yields the block with each string. This method has no effect if no
142
+ # block is given.
143
+ #
144
+ def each
145
+ return unless block_given?
146
+ @strings.each {|str| yield str}
147
+ end
148
+
149
+ #
150
+ # Yields the block with each positive string. This method has no effect if no
151
+ # block is given.
152
+ #
153
+ def each_positive
154
+ return unless block_given?
155
+ each {|str| yield str if str.positive?}
156
+ end
157
+
158
+ #
159
+ # Returns an enumerator on positive strings.
160
+ #
161
+ def positive_enumerator
162
+ if RUBY_VERSION >= "1.9"
163
+ Enumerator.new(self, :each_positive)
164
+ else
165
+ Enumerable::Enumerator.new(self, :each_positive)
166
+ end
167
+ end
168
+
169
+ #
170
+ # Yields the block with each negative string. This method has no effect if no
171
+ # block is given.
172
+ #
173
+ def each_negative
174
+ each {|str| yield str if str.negative?}
175
+ end
176
+
177
+ #
178
+ # Returns an enumerator on negative strings.
179
+ #
180
+ def negative_enumerator
181
+ if RUBY_VERSION >= "1.9"
182
+ Enumerator.new(self, :each_negative)
183
+ else
184
+ Enumerable::Enumerator.new(self, :each_negative)
185
+ end
186
+ end
187
+
188
+ #
189
+ # Checks if the sample is correctly classified by a given classifier
190
+ # (expected to include the Stamina::Classfier module).
191
+ # Unlabeled strings are simply ignored.
192
+ #
193
+ def correctly_classified_by?(classifier)
194
+ classifier.correctly_classify?(self)
195
+ end
196
+
197
+ #
198
+ # Computes and returns the binary signature of the sample. The signature
199
+ # is a String having one character for each string in the sample. A '1'
200
+ # is used for positive strings, '0' for negative ones and '?' for unlabeled.
201
+ #
202
+ def signature
203
+ signature = ''
204
+ each do |str|
205
+ signature << (str.unlabeled? ? '?' : str.positive? ? '1' : '0')
206
+ end
207
+ signature
208
+ end
209
+
210
+ #
211
+ # Takes only a given proportion of this sample and returns it as a new Sample.
212
+ #
213
+ def take(proportion = 0.5)
214
+ taken = Stamina::Sample.new
215
+ each_positive{|s| taken << s if Kernel.rand < proportion}
216
+ each_negative{|s| taken << s if Kernel.rand < proportion}
217
+ taken
218
+ end
219
+
220
+ #
221
+ # Prints an ADL description of this sample on the buffer.
222
+ #
223
+ def to_adl(buffer="")
224
+ self.inject(buffer) {|memo,str| memo << "\n" << str.to_adl}
225
+ end
226
+ alias :to_s :to_adl
227
+ alias :inspect :to_adl
228
+
229
+ #
230
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
231
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
232
+ # operator defined on symbols. States reached by negative strings are tagged as
233
+ # non accepting and error.
234
+ #
235
+ def self.to_pta(sample)
236
+ thepta = Automaton.new do |pta|
237
+ initial_state = add_state(:initial => true, :accepting => false)
238
+
239
+ # Fill the PTA with each string
240
+ sample.each do |str|
241
+ # split string using the dfa
242
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
243
+
244
+ # remaining symbols are not empty -> build the PTA
245
+ unless remaining.empty?
246
+ remaining.each do |symbol|
247
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
248
+ pta.connect(reached, newone, symbol)
249
+ reached = newone
250
+ end
251
+ end
252
+
253
+ # flag state
254
+ str.positive? ? reached.accepting! : reached.error!
255
+
256
+ # check consistency, should not arrive as Sample does not allow
257
+ # inconsistencies. Should appear only if _sample_ is not a Sample
258
+ # instance but some other enumerable.
259
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
260
+ if (reached.error? and reached.accepting?)
261
+ end
262
+
263
+ # Reindex states by applying BFS
264
+ to_index, index = [initial_state], 0
265
+ until to_index.empty?
266
+ state = to_index.shift
267
+ state[:__index__] = index
268
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
269
+ index += 1
270
+ end
271
+ end
272
+
273
+ # Now we rebuild a fresh one with states in order.
274
+ # This look more efficient that reordering states of the PTA
275
+ Automaton.new do |ordered|
276
+ ordered.add_n_states(thepta.state_count)
277
+ thepta.each_state do |pta_state|
278
+ source = ordered.ith_state(pta_state[:__index__])
279
+ source.initial! if pta_state.initial?
280
+ source.accepting! if pta_state.accepting?
281
+ source.error! if pta_state.error?
282
+ pta_state.out_edges.each do |e|
283
+ target = ordered.ith_state(e.target[:__index__])
284
+ ordered.connect(source, target, e.symbol)
285
+ end
286
+ end
287
+ end
288
+
289
+ end
290
+
291
+ # Converts this sample to a PTA
292
+ def to_pta
293
+ Sample.to_pta(self)
294
+ end
295
+ alias :to_fa :to_pta
296
+ alias :to_dfa :to_pta
297
+
298
+ # Converts this sample to a canonical dfa
299
+ def to_cdfa
300
+ to_pta.to_cdfa
301
+ end
302
+
303
+ # Converts this sample to a dot output
304
+ def to_dot
305
+ to_pta.to_dot
306
+ end
307
+
308
+ end # class Sample
309
+ end # module Stamina