stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
data/CHANGELOG.md CHANGED
@@ -1,3 +1,27 @@
1
+ # 0.4.0 / FIX ME
2
+
3
+ * Major Enhancements
4
+
5
+ * Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
6
+ * Added Sample#to_pta taken from Induction::Commons
7
+ * Added Automaton completion (all strings parsable) under Automaton#complete[!?]
8
+ * Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
9
+ * Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
10
+ * Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
11
+ * Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
12
+ subcommands of stamina itself (see stamina --help for a list of available commands).
13
+ Induction command (rpni and redblue) are now handled by a 'stamina infer' with
14
+ options.
15
+ * Error states and now correctly handled in ADL::parse and ADL::flush
16
+ * RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
17
+
18
+ * Minnor Enhancements
19
+ * Added a few optimizations here and there
20
+
21
+ * Bug fixes
22
+
23
+ * Fixed a bug in Automaton#depth when some states are unreachable
24
+
1
25
  # 0.3.1 / 2011-03-24
2
26
 
3
27
  * Major Enhancements
data/Gemfile.lock CHANGED
@@ -1,13 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- stamina (0.3.1)
4
+ stamina (0.4.0)
5
+ quickl (~> 0.2.0)
5
6
 
6
7
  GEM
7
8
  remote: http://rubygems.org/
8
9
  specs:
9
10
  bluecloth (2.0.11)
10
11
  diff-lcs (1.1.2)
12
+ gnuplot (2.3.6)
13
+ quickl (0.2.0)
11
14
  rake (0.8.7)
12
15
  rspec (2.4.0)
13
16
  rspec-core (~> 2.4.0)
@@ -26,6 +29,7 @@ PLATFORMS
26
29
  DEPENDENCIES
27
30
  bluecloth (~> 2.0.9)
28
31
  bundler (~> 1.0)
32
+ gnuplot (~> 2.3.6)
29
33
  rake (~> 0.8.7)
30
34
  rspec (~> 2.4.0)
31
35
  stamina!
data/bin/stamina ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler'
4
+ Bundler.setup(:default)
5
+
6
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
7
+ require "stamina/command"
8
+
9
+ Stamina::Command.run(ARGV, __FILE__)
10
+
data/lib/stamina.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  module Stamina
2
2
 
3
3
  end
4
+ require 'stamina/ext/math'
4
5
  require 'stamina/version'
5
6
  require 'stamina/loader'
6
7
  require 'set'
@@ -18,4 +19,4 @@ require 'stamina/utils'
18
19
  require 'stamina/induction/union_find'
19
20
  require 'stamina/induction/commons'
20
21
  require "stamina/induction/rpni"
21
- require "stamina/induction/redblue"
22
+ require "stamina/induction/blue_fringe"
@@ -0,0 +1,2 @@
1
+ require 'stamina/abbadingo/random_dfa'
2
+ require 'stamina/abbadingo/random_sample'
@@ -0,0 +1,48 @@
1
+ module Stamina
2
+ module Abbadingo
3
+ #
4
+ # Generates a random DFA using the Abbadingo protocol.
5
+ #
6
+ class RandomDFA
7
+
8
+ # Number of wished states
9
+ attr_reader :state_count
10
+
11
+ # Accepting ratio
12
+ attr_reader :accepting_ratio
13
+
14
+ # Creates an algorithm instance with default options
15
+ def initialize(state_count = 64, accepting_ratio = 0.5)
16
+ @state_count = state_count
17
+ @accepting_ratio = accepting_ratio
18
+ end
19
+
20
+ def execute
21
+ dfa = Automaton.new
22
+
23
+ # Generate 5/4*state_count states
24
+ (state_count.to_f * 5.0 / 4.0).to_i.times do
25
+ dfa.add_state(:initial => false,
26
+ :accepting => (Kernel.rand <= accepting_ratio),
27
+ :error => false)
28
+ end
29
+
30
+ # Generate all edges
31
+ dfa.each_state do |source|
32
+ ["0", "1"].each do |symbol|
33
+ target = dfa.ith_state(Kernel.rand(dfa.state_count))
34
+ dfa.connect(source, target, symbol)
35
+ end
36
+ end
37
+
38
+ # Choose an initial state
39
+ dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
40
+
41
+ # Minimize the automaton and return it
42
+ Stamina::Automaton::Minimize::Pitchies.execute(dfa)
43
+ end
44
+
45
+ end # class RandomDFA
46
+ end # module Abbadingo
47
+ end # module Stamina
48
+
@@ -0,0 +1,146 @@
1
+ module Stamina
2
+ module Abbadingo
3
+ #
4
+ # Generates a random Sample using the Abbadingo protocol.
5
+ #
6
+ class RandomSample
7
+
8
+ #
9
+ # Implements an enumerator for binary strings whose length lies between 0
10
+ # and max_length (passed at construction).
11
+ #
12
+ # The enumerator guarantees that strings are sampled with an uniform distribution
13
+ # among them. As the number of strings of a given length is an exponential
14
+ # function, this means that you've got 50% change of having a string of length
15
+ # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
16
+ #
17
+ # How to use it?
18
+ #
19
+ # # create for strings between 0 and 10 symbols, inclusive
20
+ # enum = Stamina::Abbadingo::StringEnumerator.new(10)
21
+ #
22
+ # # this is how to generate strings while a predicate is true
23
+ # enum.each do |s|
24
+ # # s is an array of binary integer symbols (0 or 1)
25
+ # # true for continuing, false otherwise
26
+ # return (true || false)
27
+ # end
28
+ #
29
+ # # this is how to generate a fixed number of strings
30
+ # (1..1000).collect{ enum.one }
31
+ #
32
+ # How does it work? Well, the distribution of strings is as follows:
33
+ #
34
+ # length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
35
+ # (2**n) 2**(n+1)-1
36
+ # 0 1 1 0.0000000000 0.000000 0
37
+ # 1 2 3 1.0000000000 1.584963 1
38
+ # 2 4 7 2.0000000000 2.807355 2
39
+ # 3 8 15 3.0000000000 3.906891 3
40
+ # 4 16 31 4.0000000000 4.954196 4
41
+ # 5 32 63 5.0000000000 5.977280 5
42
+ #
43
+ # where _cumul_ is the total number of string upto _length_ symbols.
44
+ #
45
+ # Therefore, the idea is to see each string has an identifier, say _x_,
46
+ # between 1 and 2**(max_length+1)-1 (see max).
47
+ # * The length of the _x_th string is log2(x).floor (see length_for)
48
+ # * The string itself is the binary decomposition of x, up to length_for(x)
49
+ # symbols (see string_for)
50
+ #
51
+ # As those identifiers naturally respect the exponential distribution, sampling
52
+ # the strings is the same as taking string_for(x) for random x upto _max_.
53
+ #
54
+ class StringEnumerator
55
+ include Enumerable
56
+
57
+ # Maximal length of a string
58
+ attr_reader :max_length
59
+
60
+ def initialize(max_length = 16)
61
+ @max_length = max_length
62
+ end
63
+
64
+ #
65
+ # Returns the length of the string whose identifier is _x_ (> 0)
66
+ #
67
+ def length_for(x)
68
+ Math.log2(x).floor
69
+ end
70
+
71
+ #
72
+ # Returns the binary string whose identifier is _x_ (> 0)
73
+ #
74
+ def string_for(x)
75
+ length = length_for(x)
76
+ (0..length-1).collect{|i| ((x >> i) % 2).to_s}
77
+ end
78
+
79
+ #
80
+ # Returns the maximum identifier, which is also the number of strings
81
+ # up to max_length symbols
82
+ #
83
+ def max
84
+ @max ||= 2 ** (max_length+1) - 1
85
+ end
86
+
87
+ #
88
+ # Generates a string at random
89
+ #
90
+ def one
91
+ string_for(1+Kernel.rand(max))
92
+ end
93
+
94
+ #
95
+ # Yields the block with a random string, until the block return false
96
+ # or nil.
97
+ #
98
+ def each
99
+ begin
100
+ cont = yield(one)
101
+ end while cont
102
+ end
103
+
104
+ end # class StringEnumerator
105
+
106
+ #
107
+ # Generates a Sample instance with _nb_ strings randomly sampled with a
108
+ # uniform distribution over all strings up
109
+ #
110
+ def self.execute(classifier, max_length = classifier.depth + 3)
111
+ enum = StringEnumerator.new(max_length)
112
+
113
+ # We generate 1800 strings for the test set plus n^2/2 strings for
114
+ # the training set. If there are no enough strings available, we generate
115
+ # the maximum we can
116
+ seen = {}
117
+ nb = Math.min(1800 + (classifier.state_count**2), enum.max)
118
+
119
+ # Let's go now
120
+ enum.each do |s|
121
+ seen[s] = true
122
+ seen.size < nb
123
+ end
124
+
125
+ # Make them
126
+ strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
127
+ pos, neg = strings.partition{|s| s.positive?}
128
+
129
+ # Split them, 1800 in test and the rest in training set
130
+ if (pos.size > 900) && (neg.size > 900)
131
+ pos_test, pos_training = pos[0...900], pos[900..-1]
132
+ neg_test, neg_training = neg[0...900], neg[900..-1]
133
+ else
134
+ pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
135
+ neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
136
+ end
137
+ flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
138
+ training = (pos_training + neg_training).sort &flusher
139
+ test = (pos_test + neg_test).sort &flusher
140
+ [Sample.new(training), Sample.new(test)]
141
+ end
142
+
143
+ end # class RandomSample
144
+ end # module Abbadingo
145
+ end # module Stamina
146
+
data/lib/stamina/adl.rb CHANGED
@@ -71,11 +71,11 @@ module Stamina
71
71
  # looking for |number initial accepting|
72
72
  raise(ADL::ParseError,
73
73
  "Parse error line #{line_number}: state definition expected, "\
74
- "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
75
- id, initial, accepting = $1, $2, $3
76
- initial, accepting = ("true"==initial), ("true"==accepting)
74
+ "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
75
+ id, initial, accepting, error = $1, $2, $3, $5
76
+ initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
77
77
 
78
- state = fa.add_state(:initial => initial, :accepting => accepting)
78
+ state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
79
79
  state[:name]=id.to_s
80
80
  states[id] = state
81
81
 
@@ -142,7 +142,7 @@ module Stamina
142
142
  def self.print_automaton(fa, buffer="")
143
143
  buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
144
  fa.states.each do |s|
145
- buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
145
+ buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
146
146
  end
147
147
  fa.edges.each do |e|
148
148
  buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
@@ -295,4 +295,4 @@ module Stamina
295
295
  end
296
296
 
297
297
  end # module ADL
298
- end # module Stamina
298
+ end # module Stamina
@@ -198,6 +198,15 @@ module Stamina
198
198
  (outs.size==@out_edges.size) and not(outs.include?(nil))
199
199
  end
200
200
 
201
+ # Checks if this state is a sink state or not. Sink states are defined as
202
+ # non accepting states having no outgoing transition or only loop
203
+ # transitions.
204
+ def sink?
205
+ return false if accepting?
206
+ out_edges.each{|e| return false unless e.target==self}
207
+ true
208
+ end
209
+
201
210
  #
202
211
  # Returns an array containing all incoming edges of the state. Edges are
203
212
  # sorted if _sorted_ is set to true. If two incoming edges have same symbol
@@ -364,8 +373,8 @@ module Stamina
364
373
  #
365
374
  def dfa_delta(symbol)
366
375
  return nil if symbol.nil?
367
- @out_edges.each {|e| return e.target if e.symbol==symbol}
368
- return nil
376
+ edge = @out_edges.find{|e| e.symbol==symbol}
377
+ edge.nil? ? nil : edge.target
369
378
  end
370
379
 
371
380
  #
@@ -456,10 +465,14 @@ module Stamina
456
465
  end
457
466
 
458
467
  # Returns edge symbol.
459
- def symbol() @data[:symbol] end
468
+ def symbol()
469
+ @data[:symbol]
470
+ end
460
471
 
461
472
  # Sets edge symbol.
462
- def symbol=(symbol) @data[:symbol]=symbol end
473
+ def symbol=(symbol)
474
+ @data[:symbol] = symbol
475
+ end
463
476
 
464
477
  alias :source :from
465
478
  alias :target :to
@@ -1207,6 +1220,14 @@ module Stamina
1207
1220
  end
1208
1221
  end
1209
1222
 
1223
+ ### public section about adl utilities #######################################
1224
+ public
1225
+
1226
+ # Prints this automaton in ADL format
1227
+ def to_adl(buffer = "")
1228
+ Stamina::ADL.print_automaton(self, buffer)
1229
+ end
1230
+
1210
1231
  ### public section about reordering ##########################################
1211
1232
  public
1212
1233
 
@@ -1235,4 +1256,8 @@ module Stamina
1235
1256
 
1236
1257
  end # module Stamina
1237
1258
  require 'stamina/automaton/walking'
1259
+ require 'stamina/automaton/complete'
1260
+ require 'stamina/automaton/strip'
1261
+ require 'stamina/automaton/equivalence'
1262
+ require 'stamina/automaton/minimize'
1238
1263
  require 'stamina/automaton/metrics'
@@ -0,0 +1,36 @@
1
+ module Stamina
2
+ class Automaton
3
+
4
+ #
5
+ # Checks if this automaton is complete
6
+ #
7
+ def complete?
8
+ alph = alphabet
9
+ states.find{|s| !(alphabet - s.out_symbols).empty?}.nil?
10
+ end
11
+
12
+ #
13
+ # Returns a completed copy of this automaton
14
+ #
15
+ def complete
16
+ self.dup.complete!
17
+ end
18
+
19
+ #
20
+ # Completes this automaton.
21
+ #
22
+ def complete!(sink_data = {:initial => false, :accepting => false, :error => false})
23
+ alph = alphabet
24
+ sink = add_state(sink_data)
25
+ each_state do |s|
26
+ out_symbols = s.out_symbols
27
+ (alph-out_symbols).each do |symbol|
28
+ connect(s, sink, symbol)
29
+ end
30
+ end
31
+ drop_state(sink) if sink.adjacent_states == [sink]
32
+ self
33
+ end
34
+
35
+ end # class Automaton
36
+ end # module Stamina
@@ -0,0 +1,55 @@
1
+ module Stamina
2
+ class Automaton
3
+
4
+ #
5
+ # Checks if this automaton is equivalent to another one.
6
+ #
7
+ # Automata must be both minimal and complete to guarantee that this method
8
+ # works.
9
+ #
10
+ def equivalent?(other, equiv = nil, key = :equiv_state)
11
+ equiv ||= Proc.new{|s1,s2| (s1.accepting? == s2.accepting?) &&
12
+ (s1.error? == s2.error?) &&
13
+ (s1.initial? == s2.initial?) }
14
+
15
+ # Both must already have basic attributes in common
16
+ return false unless state_count==other.state_count
17
+ return false unless edge_count==other.edge_count
18
+ return false unless equiv[initial_state, other.initial_state]
19
+
20
+ # We instantiate the decoration algorithm for checking equivalence on this
21
+ # automaton:
22
+ # * decoration is the index of the equivalent state in other automaton
23
+ # * d0 is thus 'other.initial_state.index'
24
+ # * suppremum is identity and fails when the equivalent state is not unique
25
+ # * propagation checks transition function delta
26
+ #
27
+ algo = Stamina::Utils::Decorate.new(key)
28
+ algo.set_suppremum do |d0, d1|
29
+ if (d0.nil? or d1.nil?)
30
+ (d0 || d1)
31
+ elsif d0==d1
32
+ d0
33
+ else
34
+ raise Stamina::Abord
35
+ end
36
+ end
37
+ algo.set_propagate do |d,e|
38
+ reached = other.ith_state(d).dfa_step(e.symbol)
39
+ raise Stamina::Abord if reached.nil?
40
+ raise Stamina::Abord unless equiv[e.target, reached]
41
+ reached.index
42
+ end
43
+
44
+ # Run the algorithm now
45
+ begin
46
+ algo.execute(self, nil, other.initial_state.index)
47
+ return true
48
+ rescue Stamina::Abord
49
+ return false
50
+ end
51
+ end
52
+ alias :<=> :equivalent?
53
+
54
+ end # class Automaton
55
+ end # module Stamina