stamina 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
data/CHANGELOG.md CHANGED
@@ -1,3 +1,27 @@
1
+ # 0.4.0 / FIX ME
2
+
3
+ * Major Enhancements
4
+
5
+ * Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
6
+ * Added Sample#to_pta taken from Induction::Commons
7
+ * Added Automaton completion (all strings parsable) under Automaton#complete[!?]
8
+ * Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
9
+ * Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
10
+ * Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
11
+ * Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
12
+ subcommands of stamina itself (see stamina --help for a list of available commands).
13
+ Induction command (rpni and redblue) are now handled by a 'stamina infer' with
14
+ options.
15
+ * Error states and now correctly handled in ADL::parse and ADL::flush
16
+ * RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
17
+
18
+ * Minnor Enhancements
19
+ * Added a few optimizations here and there
20
+
21
+ * Bug fixes
22
+
23
+ * Fixed a bug in Automaton#depth when some states are unreachable
24
+
1
25
  # 0.3.1 / 2011-03-24
2
26
 
3
27
  * Major Enhancements
data/Gemfile.lock CHANGED
@@ -1,13 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- stamina (0.3.1)
4
+ stamina (0.4.0)
5
+ quickl (~> 0.2.0)
5
6
 
6
7
  GEM
7
8
  remote: http://rubygems.org/
8
9
  specs:
9
10
  bluecloth (2.0.11)
10
11
  diff-lcs (1.1.2)
12
+ gnuplot (2.3.6)
13
+ quickl (0.2.0)
11
14
  rake (0.8.7)
12
15
  rspec (2.4.0)
13
16
  rspec-core (~> 2.4.0)
@@ -26,6 +29,7 @@ PLATFORMS
26
29
  DEPENDENCIES
27
30
  bluecloth (~> 2.0.9)
28
31
  bundler (~> 1.0)
32
+ gnuplot (~> 2.3.6)
29
33
  rake (~> 0.8.7)
30
34
  rspec (~> 2.4.0)
31
35
  stamina!
data/bin/stamina ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler'
4
+ Bundler.setup(:default)
5
+
6
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
7
+ require "stamina/command"
8
+
9
+ Stamina::Command.run(ARGV, __FILE__)
10
+
data/lib/stamina.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  module Stamina
2
2
 
3
3
  end
4
+ require 'stamina/ext/math'
4
5
  require 'stamina/version'
5
6
  require 'stamina/loader'
6
7
  require 'set'
@@ -18,4 +19,4 @@ require 'stamina/utils'
18
19
  require 'stamina/induction/union_find'
19
20
  require 'stamina/induction/commons'
20
21
  require "stamina/induction/rpni"
21
- require "stamina/induction/redblue"
22
+ require "stamina/induction/blue_fringe"
@@ -0,0 +1,2 @@
1
+ require 'stamina/abbadingo/random_dfa'
2
+ require 'stamina/abbadingo/random_sample'
@@ -0,0 +1,48 @@
1
+ module Stamina
2
+ module Abbadingo
3
+ #
4
+ # Generates a random DFA using the Abbadingo protocol.
5
+ #
6
+ class RandomDFA
7
+
8
+ # Number of wished states
9
+ attr_reader :state_count
10
+
11
+ # Accepting ratio
12
+ attr_reader :accepting_ratio
13
+
14
+ # Creates an algorithm instance with default options
15
+ def initialize(state_count = 64, accepting_ratio = 0.5)
16
+ @state_count = state_count
17
+ @accepting_ratio = accepting_ratio
18
+ end
19
+
20
+ def execute
21
+ dfa = Automaton.new
22
+
23
+ # Generate 5/4*state_count states
24
+ (state_count.to_f * 5.0 / 4.0).to_i.times do
25
+ dfa.add_state(:initial => false,
26
+ :accepting => (Kernel.rand <= accepting_ratio),
27
+ :error => false)
28
+ end
29
+
30
+ # Generate all edges
31
+ dfa.each_state do |source|
32
+ ["0", "1"].each do |symbol|
33
+ target = dfa.ith_state(Kernel.rand(dfa.state_count))
34
+ dfa.connect(source, target, symbol)
35
+ end
36
+ end
37
+
38
+ # Choose an initial state
39
+ dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
40
+
41
+ # Minimize the automaton and return it
42
+ Stamina::Automaton::Minimize::Pitchies.execute(dfa)
43
+ end
44
+
45
+ end # class RandomDFA
46
+ end # module Abbadingo
47
+ end # module Stamina
48
+
@@ -0,0 +1,146 @@
1
+ module Stamina
2
+ module Abbadingo
3
+ #
4
+ # Generates a random Sample using the Abbadingo protocol.
5
+ #
6
+ class RandomSample
7
+
8
+ #
9
+ # Implements an enumerator for binary strings whose length lies between 0
10
+ # and max_length (passed at construction).
11
+ #
12
+ # The enumerator guarantees that strings are sampled with an uniform distribution
13
+ # among them. As the number of strings of a given length is an exponential
14
+ # function, this means that you've got 50% change of having a string of length
15
+ # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
16
+ #
17
+ # How to use it?
18
+ #
19
+ # # create for strings between 0 and 10 symbols, inclusive
20
+ # enum = Stamina::Abbadingo::StringEnumerator.new(10)
21
+ #
22
+ # # this is how to generate strings while a predicate is true
23
+ # enum.each do |s|
24
+ # # s is an array of binary integer symbols (0 or 1)
25
+ # # true for continuing, false otherwise
26
+ # return (true || false)
27
+ # end
28
+ #
29
+ # # this is how to generate a fixed number of strings
30
+ # (1..1000).collect{ enum.one }
31
+ #
32
+ # How does it work? Well, the distribution of strings is as follows:
33
+ #
34
+ # length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
35
+ # (2**n) 2**(n+1)-1
36
+ # 0 1 1 0.0000000000 0.000000 0
37
+ # 1 2 3 1.0000000000 1.584963 1
38
+ # 2 4 7 2.0000000000 2.807355 2
39
+ # 3 8 15 3.0000000000 3.906891 3
40
+ # 4 16 31 4.0000000000 4.954196 4
41
+ # 5 32 63 5.0000000000 5.977280 5
42
+ #
43
+ # where _cumul_ is the total number of string upto _length_ symbols.
44
+ #
45
+ # Therefore, the idea is to see each string has an identifier, say _x_,
46
+ # between 1 and 2**(max_length+1)-1 (see max).
47
+ # * The length of the _x_th string is log2(x).floor (see length_for)
48
+ # * The string itself is the binary decomposition of x, up to length_for(x)
49
+ # symbols (see string_for)
50
+ #
51
+ # As those identifiers naturally respect the exponential distribution, sampling
52
+ # the strings is the same as taking string_for(x) for random x upto _max_.
53
+ #
54
+ class StringEnumerator
55
+ include Enumerable
56
+
57
+ # Maximal length of a string
58
+ attr_reader :max_length
59
+
60
+ def initialize(max_length = 16)
61
+ @max_length = max_length
62
+ end
63
+
64
+ #
65
+ # Returns the length of the string whose identifier is _x_ (> 0)
66
+ #
67
+ def length_for(x)
68
+ Math.log2(x).floor
69
+ end
70
+
71
+ #
72
+ # Returns the binary string whose identifier is _x_ (> 0)
73
+ #
74
+ def string_for(x)
75
+ length = length_for(x)
76
+ (0..length-1).collect{|i| ((x >> i) % 2).to_s}
77
+ end
78
+
79
+ #
80
+ # Returns the maximum identifier, which is also the number of strings
81
+ # up to max_length symbols
82
+ #
83
+ def max
84
+ @max ||= 2 ** (max_length+1) - 1
85
+ end
86
+
87
+ #
88
+ # Generates a string at random
89
+ #
90
+ def one
91
+ string_for(1+Kernel.rand(max))
92
+ end
93
+
94
+ #
95
+ # Yields the block with a random string, until the block return false
96
+ # or nil.
97
+ #
98
+ def each
99
+ begin
100
+ cont = yield(one)
101
+ end while cont
102
+ end
103
+
104
+ end # class StringEnumerator
105
+
106
+ #
107
+ # Generates a Sample instance with _nb_ strings randomly sampled with a
108
+ # uniform distribution over all strings up
109
+ #
110
+ def self.execute(classifier, max_length = classifier.depth + 3)
111
+ enum = StringEnumerator.new(max_length)
112
+
113
+ # We generate 1800 strings for the test set plus n^2/2 strings for
114
+ # the training set. If there are no enough strings available, we generate
115
+ # the maximum we can
116
+ seen = {}
117
+ nb = Math.min(1800 + (classifier.state_count**2), enum.max)
118
+
119
+ # Let's go now
120
+ enum.each do |s|
121
+ seen[s] = true
122
+ seen.size < nb
123
+ end
124
+
125
+ # Make them
126
+ strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
127
+ pos, neg = strings.partition{|s| s.positive?}
128
+
129
+ # Split them, 1800 in test and the rest in training set
130
+ if (pos.size > 900) && (neg.size > 900)
131
+ pos_test, pos_training = pos[0...900], pos[900..-1]
132
+ neg_test, neg_training = neg[0...900], neg[900..-1]
133
+ else
134
+ pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
135
+ neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
136
+ end
137
+ flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
138
+ training = (pos_training + neg_training).sort &flusher
139
+ test = (pos_test + neg_test).sort &flusher
140
+ [Sample.new(training), Sample.new(test)]
141
+ end
142
+
143
+ end # class RandomSample
144
+ end # module Abbadingo
145
+ end # module Stamina
146
+
data/lib/stamina/adl.rb CHANGED
@@ -71,11 +71,11 @@ module Stamina
71
71
  # looking for |number initial accepting|
72
72
  raise(ADL::ParseError,
73
73
  "Parse error line #{line_number}: state definition expected, "\
74
- "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
75
- id, initial, accepting = $1, $2, $3
76
- initial, accepting = ("true"==initial), ("true"==accepting)
74
+ "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
75
+ id, initial, accepting, error = $1, $2, $3, $5
76
+ initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
77
77
 
78
- state = fa.add_state(:initial => initial, :accepting => accepting)
78
+ state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
79
79
  state[:name]=id.to_s
80
80
  states[id] = state
81
81
 
@@ -142,7 +142,7 @@ module Stamina
142
142
  def self.print_automaton(fa, buffer="")
143
143
  buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
144
  fa.states.each do |s|
145
- buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
145
+ buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
146
146
  end
147
147
  fa.edges.each do |e|
148
148
  buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
@@ -295,4 +295,4 @@ module Stamina
295
295
  end
296
296
 
297
297
  end # module ADL
298
- end # module Stamina
298
+ end # module Stamina
@@ -198,6 +198,15 @@ module Stamina
198
198
  (outs.size==@out_edges.size) and not(outs.include?(nil))
199
199
  end
200
200
 
201
+ # Checks if this state is a sink state or not. Sink states are defined as
202
+ # non accepting states having no outgoing transition or only loop
203
+ # transitions.
204
+ def sink?
205
+ return false if accepting?
206
+ out_edges.each{|e| return false unless e.target==self}
207
+ true
208
+ end
209
+
201
210
  #
202
211
  # Returns an array containing all incoming edges of the state. Edges are
203
212
  # sorted if _sorted_ is set to true. If two incoming edges have same symbol
@@ -364,8 +373,8 @@ module Stamina
364
373
  #
365
374
  def dfa_delta(symbol)
366
375
  return nil if symbol.nil?
367
- @out_edges.each {|e| return e.target if e.symbol==symbol}
368
- return nil
376
+ edge = @out_edges.find{|e| e.symbol==symbol}
377
+ edge.nil? ? nil : edge.target
369
378
  end
370
379
 
371
380
  #
@@ -456,10 +465,14 @@ module Stamina
456
465
  end
457
466
 
458
467
  # Returns edge symbol.
459
- def symbol() @data[:symbol] end
468
+ def symbol()
469
+ @data[:symbol]
470
+ end
460
471
 
461
472
  # Sets edge symbol.
462
- def symbol=(symbol) @data[:symbol]=symbol end
473
+ def symbol=(symbol)
474
+ @data[:symbol] = symbol
475
+ end
463
476
 
464
477
  alias :source :from
465
478
  alias :target :to
@@ -1207,6 +1220,14 @@ module Stamina
1207
1220
  end
1208
1221
  end
1209
1222
 
1223
+ ### public section about adl utilities #######################################
1224
+ public
1225
+
1226
+ # Prints this automaton in ADL format
1227
+ def to_adl(buffer = "")
1228
+ Stamina::ADL.print_automaton(self, buffer)
1229
+ end
1230
+
1210
1231
  ### public section about reordering ##########################################
1211
1232
  public
1212
1233
 
@@ -1235,4 +1256,8 @@ module Stamina
1235
1256
 
1236
1257
  end # module Stamina
1237
1258
  require 'stamina/automaton/walking'
1259
+ require 'stamina/automaton/complete'
1260
+ require 'stamina/automaton/strip'
1261
+ require 'stamina/automaton/equivalence'
1262
+ require 'stamina/automaton/minimize'
1238
1263
  require 'stamina/automaton/metrics'
@@ -0,0 +1,36 @@
1
+ module Stamina
2
+ class Automaton
3
+
4
+ #
5
+ # Checks if this automaton is complete
6
+ #
7
+ def complete?
8
+ alph = alphabet
9
+ states.find{|s| !(alphabet - s.out_symbols).empty?}.nil?
10
+ end
11
+
12
+ #
13
+ # Returns a completed copy of this automaton
14
+ #
15
+ def complete
16
+ self.dup.complete!
17
+ end
18
+
19
+ #
20
+ # Completes this automaton.
21
+ #
22
+ def complete!(sink_data = {:initial => false, :accepting => false, :error => false})
23
+ alph = alphabet
24
+ sink = add_state(sink_data)
25
+ each_state do |s|
26
+ out_symbols = s.out_symbols
27
+ (alph-out_symbols).each do |symbol|
28
+ connect(s, sink, symbol)
29
+ end
30
+ end
31
+ drop_state(sink) if sink.adjacent_states == [sink]
32
+ self
33
+ end
34
+
35
+ end # class Automaton
36
+ end # module Stamina
@@ -0,0 +1,55 @@
1
+ module Stamina
2
+ class Automaton
3
+
4
+ #
5
+ # Checks if this automaton is equivalent to another one.
6
+ #
7
+ # Automata must be both minimal and complete to guarantee that this method
8
+ # works.
9
+ #
10
+ def equivalent?(other, equiv = nil, key = :equiv_state)
11
+ equiv ||= Proc.new{|s1,s2| (s1.accepting? == s2.accepting?) &&
12
+ (s1.error? == s2.error?) &&
13
+ (s1.initial? == s2.initial?) }
14
+
15
+ # Both must already have basic attributes in common
16
+ return false unless state_count==other.state_count
17
+ return false unless edge_count==other.edge_count
18
+ return false unless equiv[initial_state, other.initial_state]
19
+
20
+ # We instantiate the decoration algorithm for checking equivalence on this
21
+ # automaton:
22
+ # * decoration is the index of the equivalent state in other automaton
23
+ # * d0 is thus 'other.initial_state.index'
24
+ # * suppremum is identity and fails when the equivalent state is not unique
25
+ # * propagation checks transition function delta
26
+ #
27
+ algo = Stamina::Utils::Decorate.new(key)
28
+ algo.set_suppremum do |d0, d1|
29
+ if (d0.nil? or d1.nil?)
30
+ (d0 || d1)
31
+ elsif d0==d1
32
+ d0
33
+ else
34
+ raise Stamina::Abord
35
+ end
36
+ end
37
+ algo.set_propagate do |d,e|
38
+ reached = other.ith_state(d).dfa_step(e.symbol)
39
+ raise Stamina::Abord if reached.nil?
40
+ raise Stamina::Abord unless equiv[e.target, reached]
41
+ reached.index
42
+ end
43
+
44
+ # Run the algorithm now
45
+ begin
46
+ algo.execute(self, nil, other.initial_state.index)
47
+ return true
48
+ rescue Stamina::Abord
49
+ return false
50
+ end
51
+ end
52
+ alias :<=> :equivalent?
53
+
54
+ end # class Automaton
55
+ end # module Stamina