stamina 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
@@ -1,5 +1,8 @@
1
1
  module Stamina
2
2
 
3
+ # Raised when an algorithm explicitely abords something
4
+ class Abord < StandardError; end
5
+
3
6
  # Main class of all stamina errors.
4
7
  class StaminaError < StandardError; end
5
8
 
@@ -17,4 +20,4 @@ module Stamina
17
20
 
18
21
  end
19
22
 
20
- end # module Stamina
23
+ end # module Stamina
@@ -0,0 +1,20 @@
1
+ if RUBY_VERSION < "1.9"
2
+
3
+ def Math.log2( x )
4
+ Math.log( x ) / Math.log( 2 )
5
+ end
6
+
7
+ def Math.logn( x, n )
8
+ Math.log( x ) / Math.log( n )
9
+ end
10
+
11
+ end
12
+
13
+ def Math.max(i, j)
14
+ i > j ? i : j
15
+ end
16
+
17
+ def Math.min(i, j)
18
+ i < j ? i : j
19
+ end
20
+
@@ -2,7 +2,7 @@ module Stamina
2
2
  module Induction
3
3
 
4
4
  #
5
- # Implementation of the RedBlue variant of the RPNI algorithm (with the blue-fringe
5
+ # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
6
  # heuristics).
7
7
  #
8
8
  # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
@@ -13,34 +13,31 @@ module Stamina
13
13
  # # sample typically comes from an ADL file
14
14
  # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
15
  #
16
- # # let RedBlue build the smallest dfa
17
- # dfa = Stamina::Induction::RedBlue.execute(sample, {:verbose => true})
16
+ # # let BlueFringe build the smallest dfa
17
+ # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
18
  #
19
19
  # Remarks:
20
20
  # - Constructor and instance methods of this class are public but not intended
21
21
  # to be used directly. They are left public for testing purposes only.
22
- # - Having read the Stamina::Induction::RedBlue base algorithm may help undertanding
22
+ # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
23
  # this variant.
24
24
  # - This class intensively uses the Stamina::Induction::UnionFind class and
25
25
  # methods defined in the Stamina::Induction::Commons module which are worth
26
26
  # reading to understand the algorithm implementation.
27
27
  #
28
- class RedBlue
28
+ class BlueFringe
29
29
  include Stamina::Induction::Commons
30
30
 
31
31
  # Union-find data structure used internally
32
32
  attr_reader :ufds
33
33
 
34
- # Additional options of the algorithm
35
- attr_reader :options
36
-
37
- #
38
- # Creates an algorithm instance with specific options
39
- #
34
+ # Creates an algorithm instance with given options.
40
35
  def initialize(options={})
41
- @options = options
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ @score_cache = {}
42
39
  end
43
-
40
+
44
41
  #
45
42
  # Computes the score of a single (group) merge. Returned value is 1 if both are
46
43
  # accepting states or both are error states and 0 otherwise. Note that d1 and d2
@@ -123,13 +120,16 @@ module Stamina
123
120
  # been evaluated and is then seen unchanged by the caller.
124
121
  #
125
122
  def merge_and_determinize_score(i, j)
126
- # score the merging, always rollback the transaction
127
- score = nil
128
- @ufds.transactional do
129
- score = merge_and_determinize(i, j)
130
- false
123
+ score = @score_cache[[i,j]] ||= begin
124
+ # score the merging, always rollback the transaction
125
+ score = nil
126
+ @ufds.transactional do
127
+ score = merge_and_determinize(i, j)
128
+ false
129
+ end
130
+ score || -1
131
131
  end
132
- score
132
+ score == -1 ? nil : score
133
133
  end
134
134
 
135
135
  #
@@ -163,8 +163,8 @@ module Stamina
163
163
  # sample are correctly classified by it.
164
164
  #
165
165
  def main(ufds)
166
- puts "Starting RedBlue (#{ufds.size} states)" if @options[:verbose]
167
- @ufds, @kernel = ufds, [0]
166
+ info("Starting BlueFringe (#{ufds.size} states)")
167
+ @ufds, @kernel, @score_cache = ufds, [0], {}
168
168
 
169
169
  # we do it until the fringe is empty (compute it only once each step)
170
170
  until (the_fringe=fringe).empty?
@@ -196,15 +196,16 @@ module Stamina
196
196
  # If not found, the last candidate must be consolidated. Otherwise, we
197
197
  # do the best merging
198
198
  unless to_consolidate.nil?
199
- puts "Consolidation of #{to_consolidate}" if @options[:verbose]
199
+ info("Consolidation of #{to_consolidate}")
200
200
  @kernel << to_consolidate
201
201
  else
202
- puts "Merging #{best[0]} and #{best[1]} [#{best[2]}]" if @options[:verbose]
202
+ @score_cache.clear
203
+ info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
203
204
  # this one should never fail because its score was positive before
204
205
  raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
205
206
  end
206
207
 
207
- # redblue does not guarantee that it will not merge a state of lower rank
208
+ # blue_fringe does not guarantee that it will not merge a state of lower rank
208
209
  # with a kernel state. The kernel should then be update at each step to keep
209
210
  # lowest indices for the whole kernel, and we sort it
210
211
  @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
@@ -226,13 +227,13 @@ module Stamina
226
227
  # given as input.
227
228
  #
228
229
  # Remarks:
229
- # - This instance version of RedBlue.execute is not intended to be used directly and
230
+ # - This instance version of BlueFringe.execute is not intended to be used directly and
230
231
  # is mainly provided for testing purposes. Please use the class variant of this
231
232
  # method if possible.
232
233
  #
233
234
  def execute(sample)
234
235
  # create union-find
235
- puts "Creating PTA and UnionFind structure" if @options[:verbose]
236
+ info("Creating PTA and UnionFind structure")
236
237
  ufds = sample2ufds(sample)
237
238
  # refine it
238
239
  ufds = main(ufds)
@@ -255,10 +256,10 @@ module Stamina
255
256
  # given as input.
256
257
  #
257
258
  def self.execute(sample, options={})
258
- RedBlue.new(options).execute(sample)
259
+ BlueFringe.new(options).execute(sample)
259
260
  end
260
261
 
261
- end # class RedBlue
262
+ end # class BlueFringe
262
263
 
263
264
  end # module Induction
264
265
  end # module Stamina
@@ -2,20 +2,45 @@ module Stamina
2
2
  module Induction
3
3
 
4
4
  #
5
- # Defines common utilities used by rpni and redblue. About acronyms:
5
+ # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
6
  # - _pta_ stands for Prefix Tree Acceptor
7
7
  # - _ufds_ stands for Union-Find Data Structure
8
8
  #
9
- # Methods pta2ufds, sample2pta and sample2ufds are simply conversion methods used
10
- # when the induction algorithm starts (executed on a sample, it first built a pta
11
- # then convert it to a union find). Method ufds2pta is used when the algorithm ends,
12
- # to convert refined union find to a dfa.
9
+ # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
+ # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
+ # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
+ # a dfa.
13
13
  #
14
14
  # The merge_user_data method is probably the most important as it actually computes
15
15
  # the merging of two states and build information about merging for determinization.
16
16
  #
17
17
  module Commons
18
18
 
19
+ DEFAULT_OPTIONS = {
20
+ :verbose => false,
21
+ :verbose_io => $stderr
22
+ }
23
+
24
+ # Additional options of the algorithm
25
+ attr_reader :options
26
+
27
+ # Is the verbose mode on ?
28
+ def verbose?
29
+ @verbose ||= !!options[:verbose]
30
+ end
31
+
32
+ def verbose_io
33
+ @verbose_io ||= options[:verbose_io] || $stderr
34
+ end
35
+
36
+ # Display an information message (when verbose)
37
+ def info(msg)
38
+ if verbose?
39
+ verbose_io << msg << "\n"
40
+ verbose_io.flush
41
+ end
42
+ end
43
+
19
44
  #
20
45
  # Factors and returns a UnionFind data structure from a PTA, keeping natural order
21
46
  # of its states for union-find elements. The resulting UnionFind contains a Hash as
@@ -47,46 +72,7 @@ module Stamina
47
72
  # non accepting and error.
48
73
  #
49
74
  def sample2pta(sample)
50
- Automaton.new do |pta|
51
- initial_state = add_state(:initial => true, :accepting => false)
52
-
53
- # Fill the PTA with each string
54
- sample.each do |str|
55
- # split string using the dfa
56
- parsed, reached, remaining = pta.dfa_split(str, initial_state)
57
-
58
- # remaining symbols are not empty -> build the PTA
59
- unless remaining.empty?
60
- remaining.each do |symbol|
61
- newone = pta.add_state(:initial => false, :accepting => false, :error => false)
62
- pta.connect(reached, newone, symbol)
63
- reached = newone
64
- end
65
- end
66
-
67
- # flag state
68
- str.positive? ? reached.accepting! : reached.error!
69
-
70
- # check consistency, should not arrive as Sample does not allow
71
- # inconsistencies. Should appear only if _sample_ is not a Sample
72
- # instance but some other enumerable.
73
- raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
74
- if (reached.error? and reached.accepting?)
75
- end
76
-
77
- # Reindex states by applying BFS
78
- to_index, index = [initial_state], 0
79
- until to_index.empty?
80
- state = to_index.shift
81
- state[:__index__] = index
82
- state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each {|e| to_index << e.target}
83
- index += 1
84
- end
85
- # Force the automaton to reindex
86
- pta.order_states{|s0,s1| s0[:__index__]<=>s1[:__index__]}
87
- # Remove marks
88
- pta.states.each{|s| s.remove_mark(:__index__)}
89
- end
75
+ sample.to_pta
90
76
  end
91
77
 
92
78
  #
@@ -167,4 +153,4 @@ module Stamina
167
153
  end # module Commons
168
154
 
169
155
  end # module Induction
170
- end # module Stamina
156
+ end # module Stamina
@@ -31,14 +31,12 @@ module Stamina
31
31
  # Union-find data structure used internally
32
32
  attr_reader :ufds
33
33
 
34
- # Additional options of the algorithm
35
- attr_reader :options
36
-
37
34
  # Creates an algorithm instance with given options.
38
35
  def initialize(options={})
39
- @options = options
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
40
38
  end
41
-
39
+
42
40
  #
43
41
  # Merges a state of rank j with a state of lower rank i. This merge method
44
42
  # includes merging for determinization.
@@ -118,7 +116,7 @@ module Stamina
118
116
  #
119
117
  def main(ufds)
120
118
  @ufds = ufds
121
- puts "Starting RPNI (#{@ufds.size} states)" if @options[:verbose]
119
+ info("Starting RPNI (#{@ufds.size} states)")
122
120
  # First loop, iterating all PTA states
123
121
  (1...@ufds.size).each do |i|
124
122
  # we ignore those that have been previously merged
@@ -130,7 +128,7 @@ module Stamina
130
128
  # simply break the loop if it works!
131
129
  success = successfull_merge_or_nothing(i,j)
132
130
  if success
133
- puts "#{i} and #{j} successfully merged" if @options[:verbose]
131
+ info("#{i} and #{j} successfully merged")
134
132
  break
135
133
  end
136
134
  end # j loop
@@ -156,7 +154,7 @@ module Stamina
156
154
  #
157
155
  def execute(sample)
158
156
  # create union-find
159
- puts "Creating PTA and UnionFind structure" if @options[:verbose]
157
+ info("Creating PTA and UnionFind structure")
160
158
  ufds = sample2ufds(sample)
161
159
  # refine it
162
160
  ufds = main(ufds)
@@ -185,4 +183,4 @@ module Stamina
185
183
  end # class RPNI
186
184
 
187
185
  end # module Induction
188
- end # module Stamina
186
+ end # module Stamina
@@ -86,7 +86,7 @@ module Stamina
86
86
  # == Transactional support
87
87
  #
88
88
  # The main aim of this UnionFind is to make the implementation induction algorithms
89
- # Stamina::Induction::RPNI and Stamina::Induction::RedBlue (sufficiently) efficient,
89
+ # Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
90
90
  # simple and readable. These algorithms rely on a try-and-error strategy are must be
91
91
  # able to revert the changes they have made during their last try. The transaction
92
92
  # support implemented by this data structure helps them achieving this goal. For this
@@ -129,7 +129,7 @@ module Stamina
129
129
  # Duplicates this node, ensuring that future changes will not affect the copy.
130
130
  # Please note that the user data itself is not duplicated and is not expected
131
131
  # to change. This property (not changing user data) is respected by the RPNI
132
- # and RedBlue classes as implemented in this library.
132
+ # and BlueFringe classes as implemented in this library.
133
133
  #
134
134
  def dup
135
135
  Node.new(@parent, @data)
@@ -374,4 +374,4 @@ module Stamina
374
374
  end # class UnionFind
375
375
 
376
376
  end # module Induction
377
- end # module Stamina
377
+ end # module Stamina
@@ -0,0 +1 @@
1
+ require "quickl"
@@ -28,9 +28,10 @@ module Stamina
28
28
  #
29
29
  # Creates an empty sample.
30
30
  #
31
- def initialize()
31
+ def initialize(strings = nil)
32
32
  @strings = []
33
33
  @size, @positive_count, @negative_count = 0, 0, 0
34
+ strings.each{|s| self << s } unless strings.nil?
34
35
  end
35
36
 
36
37
  #
@@ -175,6 +176,16 @@ module Stamina
175
176
  end
176
177
  signature
177
178
  end
179
+
180
+ #
181
+ # Takes only a given proportion of this sample and returns it as a new Sample.
182
+ #
183
+ def take(proportion = 0.5)
184
+ taken = Stamina::Sample.new
185
+ each_positive{|s| taken << s if Kernel.rand < proportion}
186
+ each_negative{|s| taken << s if Kernel.rand < proportion}
187
+ taken
188
+ end
178
189
 
179
190
  #
180
191
  # Prints an ADL description of this sample on the buffer.
@@ -184,7 +195,73 @@ module Stamina
184
195
  end
185
196
  alias :to_s :to_adl
186
197
  alias :inspect :to_adl
198
+
199
+ #
200
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
201
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
202
+ # operator defined on symbols. States reached by negative strings are tagged as
203
+ # non accepting and error.
204
+ #
205
+ def self.to_pta(sample)
206
+ thepta = Automaton.new do |pta|
207
+ initial_state = add_state(:initial => true, :accepting => false)
208
+
209
+ # Fill the PTA with each string
210
+ sample.each do |str|
211
+ # split string using the dfa
212
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
187
213
 
188
- end # class Sample
214
+ # remaining symbols are not empty -> build the PTA
215
+ unless remaining.empty?
216
+ remaining.each do |symbol|
217
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
218
+ pta.connect(reached, newone, symbol)
219
+ reached = newone
220
+ end
221
+ end
222
+
223
+ # flag state
224
+ str.positive? ? reached.accepting! : reached.error!
225
+
226
+ # check consistency, should not arrive as Sample does not allow
227
+ # inconsistencies. Should appear only if _sample_ is not a Sample
228
+ # instance but some other enumerable.
229
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
230
+ if (reached.error? and reached.accepting?)
231
+ end
189
232
 
233
+ # Reindex states by applying BFS
234
+ to_index, index = [initial_state], 0
235
+ until to_index.empty?
236
+ state = to_index.shift
237
+ state[:__index__] = index
238
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
239
+ index += 1
240
+ end
241
+ end
242
+
243
+ # Now we rebuild a fresh one with states in order.
244
+ # This look more efficient that reordering states of the PTA
245
+ Automaton.new do |ordered|
246
+ ordered.add_n_states(thepta.state_count)
247
+ thepta.each_state do |pta_state|
248
+ source = ordered.ith_state(pta_state[:__index__])
249
+ source.initial! if pta_state.initial?
250
+ source.accepting! if pta_state.accepting?
251
+ source.error! if pta_state.error?
252
+ pta_state.out_edges.each do |e|
253
+ target = ordered.ith_state(e.target[:__index__])
254
+ ordered.connect(source, target, e.symbol)
255
+ end
256
+ end
257
+ end
258
+
259
+ end
260
+
261
+ # Convenient shortcut for Sample.to_pta(sample_instance)
262
+ def to_pta
263
+ Sample.to_pta(self)
264
+ end
265
+
266
+ end # class Sample
190
267
  end # module Stamina