stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
@@ -1,5 +1,8 @@
1
1
  module Stamina
2
2
 
3
+ # Raised when an algorithm explicitely abords something
4
+ class Abord < StandardError; end
5
+
3
6
  # Main class of all stamina errors.
4
7
  class StaminaError < StandardError; end
5
8
 
@@ -17,4 +20,4 @@ module Stamina
17
20
 
18
21
  end
19
22
 
20
- end # module Stamina
23
+ end # module Stamina
@@ -0,0 +1,20 @@
1
+ if RUBY_VERSION < "1.9"
2
+
3
+ def Math.log2( x )
4
+ Math.log( x ) / Math.log( 2 )
5
+ end
6
+
7
+ def Math.logn( x, n )
8
+ Math.log( x ) / Math.log( n )
9
+ end
10
+
11
+ end
12
+
13
+ def Math.max(i, j)
14
+ i > j ? i : j
15
+ end
16
+
17
+ def Math.min(i, j)
18
+ i < j ? i : j
19
+ end
20
+
@@ -2,7 +2,7 @@ module Stamina
2
2
  module Induction
3
3
 
4
4
  #
5
- # Implementation of the RedBlue variant of the RPNI algorithm (with the blue-fringe
5
+ # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
6
  # heuristics).
7
7
  #
8
8
  # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
@@ -13,34 +13,31 @@ module Stamina
13
13
  # # sample typically comes from an ADL file
14
14
  # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
15
  #
16
- # # let RedBlue build the smallest dfa
17
- # dfa = Stamina::Induction::RedBlue.execute(sample, {:verbose => true})
16
+ # # let BlueFringe build the smallest dfa
17
+ # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
18
  #
19
19
  # Remarks:
20
20
  # - Constructor and instance methods of this class are public but not intended
21
21
  # to be used directly. They are left public for testing purposes only.
22
- # - Having read the Stamina::Induction::RedBlue base algorithm may help undertanding
22
+ # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
23
  # this variant.
24
24
  # - This class intensively uses the Stamina::Induction::UnionFind class and
25
25
  # methods defined in the Stamina::Induction::Commons module which are worth
26
26
  # reading to understand the algorithm implementation.
27
27
  #
28
- class RedBlue
28
+ class BlueFringe
29
29
  include Stamina::Induction::Commons
30
30
 
31
31
  # Union-find data structure used internally
32
32
  attr_reader :ufds
33
33
 
34
- # Additional options of the algorithm
35
- attr_reader :options
36
-
37
- #
38
- # Creates an algorithm instance with specific options
39
- #
34
+ # Creates an algorithm instance with given options.
40
35
  def initialize(options={})
41
- @options = options
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ @score_cache = {}
42
39
  end
43
-
40
+
44
41
  #
45
42
  # Computes the score of a single (group) merge. Returned value is 1 if both are
46
43
  # accepting states or both are error states and 0 otherwise. Note that d1 and d2
@@ -123,13 +120,16 @@ module Stamina
123
120
  # been evaluated and is then seen unchanged by the caller.
124
121
  #
125
122
  def merge_and_determinize_score(i, j)
126
- # score the merging, always rollback the transaction
127
- score = nil
128
- @ufds.transactional do
129
- score = merge_and_determinize(i, j)
130
- false
123
+ score = @score_cache[[i,j]] ||= begin
124
+ # score the merging, always rollback the transaction
125
+ score = nil
126
+ @ufds.transactional do
127
+ score = merge_and_determinize(i, j)
128
+ false
129
+ end
130
+ score || -1
131
131
  end
132
- score
132
+ score == -1 ? nil : score
133
133
  end
134
134
 
135
135
  #
@@ -163,8 +163,8 @@ module Stamina
163
163
  # sample are correctly classified by it.
164
164
  #
165
165
  def main(ufds)
166
- puts "Starting RedBlue (#{ufds.size} states)" if @options[:verbose]
167
- @ufds, @kernel = ufds, [0]
166
+ info("Starting BlueFringe (#{ufds.size} states)")
167
+ @ufds, @kernel, @score_cache = ufds, [0], {}
168
168
 
169
169
  # we do it until the fringe is empty (compute it only once each step)
170
170
  until (the_fringe=fringe).empty?
@@ -196,15 +196,16 @@ module Stamina
196
196
  # If not found, the last candidate must be consolidated. Otherwise, we
197
197
  # do the best merging
198
198
  unless to_consolidate.nil?
199
- puts "Consolidation of #{to_consolidate}" if @options[:verbose]
199
+ info("Consolidation of #{to_consolidate}")
200
200
  @kernel << to_consolidate
201
201
  else
202
- puts "Merging #{best[0]} and #{best[1]} [#{best[2]}]" if @options[:verbose]
202
+ @score_cache.clear
203
+ info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
203
204
  # this one should never fail because its score was positive before
204
205
  raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
205
206
  end
206
207
 
207
- # redblue does not guarantee that it will not merge a state of lower rank
208
+ # blue_fringe does not guarantee that it will not merge a state of lower rank
208
209
  # with a kernel state. The kernel should then be update at each step to keep
209
210
  # lowest indices for the whole kernel, and we sort it
210
211
  @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
@@ -226,13 +227,13 @@ module Stamina
226
227
  # given as input.
227
228
  #
228
229
  # Remarks:
229
- # - This instance version of RedBlue.execute is not intended to be used directly and
230
+ # - This instance version of BlueFringe.execute is not intended to be used directly and
230
231
  # is mainly provided for testing purposes. Please use the class variant of this
231
232
  # method if possible.
232
233
  #
233
234
  def execute(sample)
234
235
  # create union-find
235
- puts "Creating PTA and UnionFind structure" if @options[:verbose]
236
+ info("Creating PTA and UnionFind structure")
236
237
  ufds = sample2ufds(sample)
237
238
  # refine it
238
239
  ufds = main(ufds)
@@ -255,10 +256,10 @@ module Stamina
255
256
  # given as input.
256
257
  #
257
258
  def self.execute(sample, options={})
258
- RedBlue.new(options).execute(sample)
259
+ BlueFringe.new(options).execute(sample)
259
260
  end
260
261
 
261
- end # class RedBlue
262
+ end # class BlueFringe
262
263
 
263
264
  end # module Induction
264
265
  end # module Stamina
@@ -2,20 +2,45 @@ module Stamina
2
2
  module Induction
3
3
 
4
4
  #
5
- # Defines common utilities used by rpni and redblue. About acronyms:
5
+ # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
6
  # - _pta_ stands for Prefix Tree Acceptor
7
7
  # - _ufds_ stands for Union-Find Data Structure
8
8
  #
9
- # Methods pta2ufds, sample2pta and sample2ufds are simply conversion methods used
10
- # when the induction algorithm starts (executed on a sample, it first built a pta
11
- # then convert it to a union find). Method ufds2pta is used when the algorithm ends,
12
- # to convert refined union find to a dfa.
9
+ # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
+ # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
+ # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
+ # a dfa.
13
13
  #
14
14
  # The merge_user_data method is probably the most important as it actually computes
15
15
  # the merging of two states and build information about merging for determinization.
16
16
  #
17
17
  module Commons
18
18
 
19
+ DEFAULT_OPTIONS = {
20
+ :verbose => false,
21
+ :verbose_io => $stderr
22
+ }
23
+
24
+ # Additional options of the algorithm
25
+ attr_reader :options
26
+
27
+ # Is the verbose mode on ?
28
+ def verbose?
29
+ @verbose ||= !!options[:verbose]
30
+ end
31
+
32
+ def verbose_io
33
+ @verbose_io ||= options[:verbose_io] || $stderr
34
+ end
35
+
36
+ # Display an information message (when verbose)
37
+ def info(msg)
38
+ if verbose?
39
+ verbose_io << msg << "\n"
40
+ verbose_io.flush
41
+ end
42
+ end
43
+
19
44
  #
20
45
  # Factors and returns a UnionFind data structure from a PTA, keeping natural order
21
46
  # of its states for union-find elements. The resulting UnionFind contains a Hash as
@@ -47,46 +72,7 @@ module Stamina
47
72
  # non accepting and error.
48
73
  #
49
74
  def sample2pta(sample)
50
- Automaton.new do |pta|
51
- initial_state = add_state(:initial => true, :accepting => false)
52
-
53
- # Fill the PTA with each string
54
- sample.each do |str|
55
- # split string using the dfa
56
- parsed, reached, remaining = pta.dfa_split(str, initial_state)
57
-
58
- # remaining symbols are not empty -> build the PTA
59
- unless remaining.empty?
60
- remaining.each do |symbol|
61
- newone = pta.add_state(:initial => false, :accepting => false, :error => false)
62
- pta.connect(reached, newone, symbol)
63
- reached = newone
64
- end
65
- end
66
-
67
- # flag state
68
- str.positive? ? reached.accepting! : reached.error!
69
-
70
- # check consistency, should not arrive as Sample does not allow
71
- # inconsistencies. Should appear only if _sample_ is not a Sample
72
- # instance but some other enumerable.
73
- raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
74
- if (reached.error? and reached.accepting?)
75
- end
76
-
77
- # Reindex states by applying BFS
78
- to_index, index = [initial_state], 0
79
- until to_index.empty?
80
- state = to_index.shift
81
- state[:__index__] = index
82
- state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each {|e| to_index << e.target}
83
- index += 1
84
- end
85
- # Force the automaton to reindex
86
- pta.order_states{|s0,s1| s0[:__index__]<=>s1[:__index__]}
87
- # Remove marks
88
- pta.states.each{|s| s.remove_mark(:__index__)}
89
- end
75
+ sample.to_pta
90
76
  end
91
77
 
92
78
  #
@@ -167,4 +153,4 @@ module Stamina
167
153
  end # module Commons
168
154
 
169
155
  end # module Induction
170
- end # module Stamina
156
+ end # module Stamina
@@ -31,14 +31,12 @@ module Stamina
31
31
  # Union-find data structure used internally
32
32
  attr_reader :ufds
33
33
 
34
- # Additional options of the algorithm
35
- attr_reader :options
36
-
37
34
  # Creates an algorithm instance with given options.
38
35
  def initialize(options={})
39
- @options = options
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
40
38
  end
41
-
39
+
42
40
  #
43
41
  # Merges a state of rank j with a state of lower rank i. This merge method
44
42
  # includes merging for determinization.
@@ -118,7 +116,7 @@ module Stamina
118
116
  #
119
117
  def main(ufds)
120
118
  @ufds = ufds
121
- puts "Starting RPNI (#{@ufds.size} states)" if @options[:verbose]
119
+ info("Starting RPNI (#{@ufds.size} states)")
122
120
  # First loop, iterating all PTA states
123
121
  (1...@ufds.size).each do |i|
124
122
  # we ignore those that have been previously merged
@@ -130,7 +128,7 @@ module Stamina
130
128
  # simply break the loop if it works!
131
129
  success = successfull_merge_or_nothing(i,j)
132
130
  if success
133
- puts "#{i} and #{j} successfully merged" if @options[:verbose]
131
+ info("#{i} and #{j} successfully merged")
134
132
  break
135
133
  end
136
134
  end # j loop
@@ -156,7 +154,7 @@ module Stamina
156
154
  #
157
155
  def execute(sample)
158
156
  # create union-find
159
- puts "Creating PTA and UnionFind structure" if @options[:verbose]
157
+ info("Creating PTA and UnionFind structure")
160
158
  ufds = sample2ufds(sample)
161
159
  # refine it
162
160
  ufds = main(ufds)
@@ -185,4 +183,4 @@ module Stamina
185
183
  end # class RPNI
186
184
 
187
185
  end # module Induction
188
- end # module Stamina
186
+ end # module Stamina
@@ -86,7 +86,7 @@ module Stamina
86
86
  # == Transactional support
87
87
  #
88
88
  # The main aim of this UnionFind is to make the implementation induction algorithms
89
- # Stamina::Induction::RPNI and Stamina::Induction::RedBlue (sufficiently) efficient,
89
+ # Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
90
90
  # simple and readable. These algorithms rely on a try-and-error strategy are must be
91
91
  # able to revert the changes they have made during their last try. The transaction
92
92
  # support implemented by this data structure helps them achieving this goal. For this
@@ -129,7 +129,7 @@ module Stamina
129
129
  # Duplicates this node, ensuring that future changes will not affect the copy.
130
130
  # Please note that the user data itself is not duplicated and is not expected
131
131
  # to change. This property (not changing user data) is respected by the RPNI
132
- # and RedBlue classes as implemented in this library.
132
+ # and BlueFringe classes as implemented in this library.
133
133
  #
134
134
  def dup
135
135
  Node.new(@parent, @data)
@@ -374,4 +374,4 @@ module Stamina
374
374
  end # class UnionFind
375
375
 
376
376
  end # module Induction
377
- end # module Stamina
377
+ end # module Stamina
@@ -0,0 +1 @@
1
+ require "quickl"
@@ -28,9 +28,10 @@ module Stamina
28
28
  #
29
29
  # Creates an empty sample.
30
30
  #
31
- def initialize()
31
+ def initialize(strings = nil)
32
32
  @strings = []
33
33
  @size, @positive_count, @negative_count = 0, 0, 0
34
+ strings.each{|s| self << s } unless strings.nil?
34
35
  end
35
36
 
36
37
  #
@@ -175,6 +176,16 @@ module Stamina
175
176
  end
176
177
  signature
177
178
  end
179
+
180
+ #
181
+ # Takes only a given proportion of this sample and returns it as a new Sample.
182
+ #
183
+ def take(proportion = 0.5)
184
+ taken = Stamina::Sample.new
185
+ each_positive{|s| taken << s if Kernel.rand < proportion}
186
+ each_negative{|s| taken << s if Kernel.rand < proportion}
187
+ taken
188
+ end
178
189
 
179
190
  #
180
191
  # Prints an ADL description of this sample on the buffer.
@@ -184,7 +195,73 @@ module Stamina
184
195
  end
185
196
  alias :to_s :to_adl
186
197
  alias :inspect :to_adl
198
+
199
+ #
200
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
201
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
202
+ # operator defined on symbols. States reached by negative strings are tagged as
203
+ # non accepting and error.
204
+ #
205
+ def self.to_pta(sample)
206
+ thepta = Automaton.new do |pta|
207
+ initial_state = add_state(:initial => true, :accepting => false)
208
+
209
+ # Fill the PTA with each string
210
+ sample.each do |str|
211
+ # split string using the dfa
212
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
187
213
 
188
- end # class Sample
214
+ # remaining symbols are not empty -> build the PTA
215
+ unless remaining.empty?
216
+ remaining.each do |symbol|
217
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
218
+ pta.connect(reached, newone, symbol)
219
+ reached = newone
220
+ end
221
+ end
222
+
223
+ # flag state
224
+ str.positive? ? reached.accepting! : reached.error!
225
+
226
+ # check consistency, should not arrive as Sample does not allow
227
+ # inconsistencies. Should appear only if _sample_ is not a Sample
228
+ # instance but some other enumerable.
229
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
230
+ if (reached.error? and reached.accepting?)
231
+ end
189
232
 
233
+ # Reindex states by applying BFS
234
+ to_index, index = [initial_state], 0
235
+ until to_index.empty?
236
+ state = to_index.shift
237
+ state[:__index__] = index
238
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
239
+ index += 1
240
+ end
241
+ end
242
+
243
+ # Now we rebuild a fresh one with states in order.
244
+ # This look more efficient that reordering states of the PTA
245
+ Automaton.new do |ordered|
246
+ ordered.add_n_states(thepta.state_count)
247
+ thepta.each_state do |pta_state|
248
+ source = ordered.ith_state(pta_state[:__index__])
249
+ source.initial! if pta_state.initial?
250
+ source.accepting! if pta_state.accepting?
251
+ source.error! if pta_state.error?
252
+ pta_state.out_edges.each do |e|
253
+ target = ordered.ith_state(e.target[:__index__])
254
+ ordered.connect(source, target, e.symbol)
255
+ end
256
+ end
257
+ end
258
+
259
+ end
260
+
261
+ # Convenient shortcut for Sample.to_pta(sample_instance)
262
+ def to_pta
263
+ Sample.to_pta(self)
264
+ end
265
+
266
+ end # class Sample
190
267
  end # module Stamina