stamina 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. data/CHANGELOG.md +22 -5
  2. data/LICENCE.md +2 -2
  3. data/bin/stamina +1 -7
  4. data/lib/stamina.rb +10 -19
  5. metadata +54 -333
  6. data/.gemtest +0 -0
  7. data/Gemfile +0 -2
  8. data/Gemfile.lock +0 -37
  9. data/Manifest.txt +0 -16
  10. data/README.md +0 -78
  11. data/Rakefile +0 -23
  12. data/example/adl/automaton.adl +0 -49
  13. data/example/adl/sample.adl +0 -53
  14. data/example/basic/characteristic_sample.adl +0 -32
  15. data/example/basic/target.adl +0 -9
  16. data/example/competition/31_test.adl +0 -1500
  17. data/example/competition/31_training.adl +0 -1759
  18. data/lib/stamina/abbadingo.rb +0 -2
  19. data/lib/stamina/abbadingo/random_dfa.rb +0 -48
  20. data/lib/stamina/abbadingo/random_sample.rb +0 -146
  21. data/lib/stamina/adl.rb +0 -298
  22. data/lib/stamina/automaton.rb +0 -1263
  23. data/lib/stamina/automaton/complete.rb +0 -36
  24. data/lib/stamina/automaton/equivalence.rb +0 -55
  25. data/lib/stamina/automaton/metrics.rb +0 -78
  26. data/lib/stamina/automaton/minimize.rb +0 -25
  27. data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
  28. data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
  29. data/lib/stamina/automaton/strip.rb +0 -16
  30. data/lib/stamina/automaton/walking.rb +0 -363
  31. data/lib/stamina/classifier.rb +0 -52
  32. data/lib/stamina/command.rb +0 -45
  33. data/lib/stamina/command/abbadingo_dfa.rb +0 -81
  34. data/lib/stamina/command/abbadingo_samples.rb +0 -40
  35. data/lib/stamina/command/adl2dot.rb +0 -71
  36. data/lib/stamina/command/classify.rb +0 -48
  37. data/lib/stamina/command/help.rb +0 -27
  38. data/lib/stamina/command/infer.rb +0 -141
  39. data/lib/stamina/command/metrics.rb +0 -51
  40. data/lib/stamina/command/robustness.rb +0 -22
  41. data/lib/stamina/command/score.rb +0 -35
  42. data/lib/stamina/errors.rb +0 -23
  43. data/lib/stamina/ext/math.rb +0 -20
  44. data/lib/stamina/induction/blue_fringe.rb +0 -265
  45. data/lib/stamina/induction/commons.rb +0 -156
  46. data/lib/stamina/induction/rpni.rb +0 -186
  47. data/lib/stamina/induction/union_find.rb +0 -377
  48. data/lib/stamina/input_string.rb +0 -123
  49. data/lib/stamina/loader.rb +0 -1
  50. data/lib/stamina/markable.rb +0 -42
  51. data/lib/stamina/sample.rb +0 -267
  52. data/lib/stamina/scoring.rb +0 -213
  53. data/lib/stamina/utils.rb +0 -1
  54. data/lib/stamina/utils/decorate.rb +0 -81
  55. data/lib/stamina/version.rb +0 -14
  56. data/stamina.gemspec +0 -191
  57. data/stamina.noespec +0 -32
  58. data/tasks/debug_mail.rake +0 -78
  59. data/tasks/debug_mail.txt +0 -13
  60. data/tasks/gem.rake +0 -68
  61. data/tasks/spec_test.rake +0 -79
  62. data/tasks/unit_test.rake +0 -77
  63. data/tasks/yard.rake +0 -51
  64. data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
  65. data/test/stamina/abbadingo/random_sample_test.rb +0 -78
  66. data/test/stamina/adl_test.rb +0 -516
  67. data/test/stamina/automaton/classifier_test.rb +0 -259
  68. data/test/stamina/automaton/complete_test.rb +0 -58
  69. data/test/stamina/automaton/equivalence_test.rb +0 -120
  70. data/test/stamina/automaton/metrics_test.rb +0 -36
  71. data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
  72. data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
  73. data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
  74. data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
  75. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
  76. data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
  77. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
  78. data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
  79. data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
  80. data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
  81. data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
  82. data/test/stamina/automaton/strip_test.rb +0 -36
  83. data/test/stamina/automaton/to_dot_test.rb +0 -64
  84. data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
  85. data/test/stamina/automaton/walking_test.rb +0 -206
  86. data/test/stamina/automaton_additional_test.rb +0 -190
  87. data/test/stamina/automaton_test.rb +0 -1104
  88. data/test/stamina/exit.rb +0 -3
  89. data/test/stamina/induction/blue_fringe_test.rb +0 -83
  90. data/test/stamina/induction/induction_test.rb +0 -70
  91. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
  92. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
  93. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
  94. data/test/stamina/induction/redblue_universal_expected.adl +0 -4
  95. data/test/stamina/induction/redblue_universal_sample.adl +0 -5
  96. data/test/stamina/induction/rpni_inria_expected.adl +0 -7
  97. data/test/stamina/induction/rpni_inria_sample.adl +0 -9
  98. data/test/stamina/induction/rpni_test.rb +0 -129
  99. data/test/stamina/induction/rpni_test_pta.dot +0 -22
  100. data/test/stamina/induction/rpni_universal_expected.adl +0 -4
  101. data/test/stamina/induction/rpni_universal_sample.adl +0 -4
  102. data/test/stamina/induction/union_find_test.rb +0 -124
  103. data/test/stamina/input_string_test.rb +0 -323
  104. data/test/stamina/markable_test.rb +0 -70
  105. data/test/stamina/randdfa.adl +0 -66
  106. data/test/stamina/sample.adl +0 -4
  107. data/test/stamina/sample_classify_test.rb +0 -149
  108. data/test/stamina/sample_test.rb +0 -290
  109. data/test/stamina/scoring_test.rb +0 -63
  110. data/test/stamina/small_dfa.dot +0 -16
  111. data/test/stamina/small_dfa.gif +0 -0
  112. data/test/stamina/small_nfa.dot +0 -18
  113. data/test/stamina/small_nfa.gif +0 -0
  114. data/test/stamina/stamina_test.rb +0 -80
  115. data/test/stamina/utils/decorate_test.rb +0 -65
  116. data/test/test_all.rb +0 -7
@@ -1,2 +0,0 @@
1
- require 'stamina/abbadingo/random_dfa'
2
- require 'stamina/abbadingo/random_sample'
@@ -1,48 +0,0 @@
1
- module Stamina
2
- module Abbadingo
3
- #
4
- # Generates a random DFA using the Abbadingo protocol.
5
- #
6
- class RandomDFA
7
-
8
- # Number of wished states
9
- attr_reader :state_count
10
-
11
- # Accepting ratio
12
- attr_reader :accepting_ratio
13
-
14
- # Creates an algorithm instance with default options
15
- def initialize(state_count = 64, accepting_ratio = 0.5)
16
- @state_count = state_count
17
- @accepting_ratio = accepting_ratio
18
- end
19
-
20
- def execute
21
- dfa = Automaton.new
22
-
23
- # Generate 5/4*state_count states
24
- (state_count.to_f * 5.0 / 4.0).to_i.times do
25
- dfa.add_state(:initial => false,
26
- :accepting => (Kernel.rand <= accepting_ratio),
27
- :error => false)
28
- end
29
-
30
- # Generate all edges
31
- dfa.each_state do |source|
32
- ["0", "1"].each do |symbol|
33
- target = dfa.ith_state(Kernel.rand(dfa.state_count))
34
- dfa.connect(source, target, symbol)
35
- end
36
- end
37
-
38
- # Choose an initial state
39
- dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
40
-
41
- # Minimize the automaton and return it
42
- Stamina::Automaton::Minimize::Pitchies.execute(dfa)
43
- end
44
-
45
- end # class RandomDFA
46
- end # module Abbadingo
47
- end # module Stamina
48
-
@@ -1,146 +0,0 @@
1
- module Stamina
2
- module Abbadingo
3
- #
4
- # Generates a random Sample using the Abbadingo protocol.
5
- #
6
- class RandomSample
7
-
8
- #
9
- # Implements an enumerator for binary strings whose length lies between 0
10
- # and max_length (passed at construction).
11
- #
12
- # The enumerator guarantees that strings are sampled with an uniform distribution
13
- # among them. As the number of strings of a given length is an exponential
14
- # function, this means that you've got 50% change of having a string of length
15
- # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
16
- #
17
- # How to use it?
18
- #
19
- # # create for strings between 0 and 10 symbols, inclusive
20
- # enum = Stamina::Abbadingo::StringEnumerator.new(10)
21
- #
22
- # # this is how to generate strings while a predicate is true
23
- # enum.each do |s|
24
- # # s is an array of binary integer symbols (0 or 1)
25
- # # true for continuing, false otherwise
26
- # return (true || false)
27
- # end
28
- #
29
- # # this is how to generate a fixed number of strings
30
- # (1..1000).collect{ enum.one }
31
- #
32
- # How does it work? Well, the distribution of strings is as follows:
33
- #
34
- # length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
35
- # (2**n) 2**(n+1)-1
36
- # 0 1 1 0.0000000000 0.000000 0
37
- # 1 2 3 1.0000000000 1.584963 1
38
- # 2 4 7 2.0000000000 2.807355 2
39
- # 3 8 15 3.0000000000 3.906891 3
40
- # 4 16 31 4.0000000000 4.954196 4
41
- # 5 32 63 5.0000000000 5.977280 5
42
- #
43
- # where _cumul_ is the total number of string upto _length_ symbols.
44
- #
45
- # Therefore, the idea is to see each string has an identifier, say _x_,
46
- # between 1 and 2**(max_length+1)-1 (see max).
47
- # * The length of the _x_th string is log2(x).floor (see length_for)
48
- # * The string itself is the binary decomposition of x, up to length_for(x)
49
- # symbols (see string_for)
50
- #
51
- # As those identifiers naturally respect the exponential distribution, sampling
52
- # the strings is the same as taking string_for(x) for random x upto _max_.
53
- #
54
- class StringEnumerator
55
- include Enumerable
56
-
57
- # Maximal length of a string
58
- attr_reader :max_length
59
-
60
- def initialize(max_length = 16)
61
- @max_length = max_length
62
- end
63
-
64
- #
65
- # Returns the length of the string whose identifier is _x_ (> 0)
66
- #
67
- def length_for(x)
68
- Math.log2(x).floor
69
- end
70
-
71
- #
72
- # Returns the binary string whose identifier is _x_ (> 0)
73
- #
74
- def string_for(x)
75
- length = length_for(x)
76
- (0..length-1).collect{|i| ((x >> i) % 2).to_s}
77
- end
78
-
79
- #
80
- # Returns the maximum identifier, which is also the number of strings
81
- # up to max_length symbols
82
- #
83
- def max
84
- @max ||= 2 ** (max_length+1) - 1
85
- end
86
-
87
- #
88
- # Generates a string at random
89
- #
90
- def one
91
- string_for(1+Kernel.rand(max))
92
- end
93
-
94
- #
95
- # Yields the block with a random string, until the block return false
96
- # or nil.
97
- #
98
- def each
99
- begin
100
- cont = yield(one)
101
- end while cont
102
- end
103
-
104
- end # class StringEnumerator
105
-
106
- #
107
- # Generates a Sample instance with _nb_ strings randomly sampled with a
108
- # uniform distribution over all strings up
109
- #
110
- def self.execute(classifier, max_length = classifier.depth + 3)
111
- enum = StringEnumerator.new(max_length)
112
-
113
- # We generate 1800 strings for the test set plus n^2/2 strings for
114
- # the training set. If there are no enough strings available, we generate
115
- # the maximum we can
116
- seen = {}
117
- nb = Math.min(1800 + (classifier.state_count**2), enum.max)
118
-
119
- # Let's go now
120
- enum.each do |s|
121
- seen[s] = true
122
- seen.size < nb
123
- end
124
-
125
- # Make them
126
- strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
127
- pos, neg = strings.partition{|s| s.positive?}
128
-
129
- # Split them, 1800 in test and the rest in training set
130
- if (pos.size > 900) && (neg.size > 900)
131
- pos_test, pos_training = pos[0...900], pos[900..-1]
132
- neg_test, neg_training = neg[0...900], neg[900..-1]
133
- else
134
- pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
135
- neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
136
- end
137
- flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
138
- training = (pos_training + neg_training).sort &flusher
139
- test = (pos_test + neg_test).sort &flusher
140
- [Sample.new(training), Sample.new(test)]
141
- end
142
-
143
- end # class RandomSample
144
- end # module Abbadingo
145
- end # module Stamina
146
-
data/lib/stamina/adl.rb DELETED
@@ -1,298 +0,0 @@
1
- module Stamina
2
- #
3
- # Automaton Description Language module. This module provides parsing and
4
- # printing methods for automata and samples. Documentation of the file format
5
- # used for an automaton is given in parse_automaton; file format for samples is
6
- # documented in parse_sample.
7
- #
8
- # Methods of this module are not intended to be included by a class but invoked
9
- # on the module instead:
10
- #
11
- # begin
12
- # dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
13
- # rescue ADL::ParseError => ex
14
- # puts "Oops, the ADL automaton file seems corrupted..."
15
- # end
16
- #
17
- # == Detailed API
18
- module ADL
19
-
20
- #################################################################################
21
- # Automaton Section #
22
- #################################################################################
23
-
24
- #
25
- # Parses a given automaton description and returns an Automaton instance.
26
- #
27
- # Raises:
28
- # - ArgumentError unless _descr_ is an IO object or a String.
29
- # - ADL::ParseError if the ADL automaton format is not respected.
30
- #
31
- # ADL provides a really simple grammar to describe automata. Here is a succint
32
- # example (full documentation of the ADL automaton grammar can be found in
33
- # the self-documenting example/adl/automaton.adl file).
34
- #
35
- # # Some header comments: tool which has generated this automaton,
36
- # # maybe a date or other tool options ...
37
- # # here: 'this automaton accepts the a(ba)* regular language'
38
- # 2 2
39
- # 0 true false
40
- # 1 false true
41
- # 0 1 a
42
- # 1 0 b
43
- #
44
- def self.parse_automaton(descr)
45
- automaton = nil
46
- ADL::to_io(descr) do |io|
47
- state_count, edge_count = nil, nil
48
- state_read, edge_read = 0, 0
49
- states = {}
50
- mode = :header
51
-
52
- automaton = Automaton.new do |fa|
53
- # parse each description line
54
- line_number = 1
55
- io.each_line do |l|
56
- index = l.index('#')
57
- l = l[0,index] if index
58
- l = l.strip
59
- next if l.empty? or l[0,1]=='#'
60
-
61
- case mode
62
- when :header
63
- # looking for |state_count edge_count|
64
- raise(ADL::ParseError,
65
- "Parse error line #{line_number}: 'state_count edge_count' expected, "\
66
- "'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
67
- state_count, edge_count = $1.to_i, $2.to_i
68
- mode = :states
69
-
70
- when :states
71
- # looking for |number initial accepting|
72
- raise(ADL::ParseError,
73
- "Parse error line #{line_number}: state definition expected, "\
74
- "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
75
- id, initial, accepting, error = $1, $2, $3, $5
76
- initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
77
-
78
- state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
79
- state[:name]=id.to_s
80
- states[id] = state
81
-
82
- state_read += 1
83
- mode = (edge_count==0 ? :end : :edges) if state_read==state_count
84
-
85
- when :edges
86
- # looking for |source target symbol|
87
- raise(ADL::ParseError,
88
- "Parse error line #{line_number}: edge definition expected, "\
89
- "'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
90
- source, target, symbol = $1, $2, $3
91
- raise(ADL::ParseError,
92
- "Parse error line #{line_number}: no such state #{source}") \
93
- unless states[source]
94
- raise(ADL::ParseError,
95
- "Parse error line #{line_number}: no such state #{target}") \
96
- unless states[target]
97
-
98
- fa.connect(states[source], states[target], {:symbol => symbol})
99
-
100
- edge_read += 1
101
- mode = :end if edge_read==edge_count
102
-
103
- when :end
104
- raise(ADL::ParseError,
105
- "Parse error line #{line_number}: trailing data found '#{l}")
106
-
107
- end # case mode
108
-
109
- line_number += 1
110
- end
111
-
112
- raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
113
- "#{state_read} found.") if state_count != state_read
114
- raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
115
- "#{edge_read} found.") if edge_count != edge_read
116
-
117
- end # Automaton.new
118
- end
119
- return automaton
120
- end # def self.parse
121
-
122
- #
123
- # Parses an automaton file _f_.
124
- #
125
- # Shortcut for:
126
- # File.open(f, 'r') do |io|
127
- # Stamina::ADL.parse_automaton(io)
128
- # end
129
- #
130
- def self.parse_automaton_file(f)
131
- automaton = nil
132
- File.open(f) do |file|
133
- automaton = ADL::parse_automaton(file)
134
- end
135
- automaton
136
- end
137
-
138
- #
139
- # Prints an automaton to a buffer (responding to <code>:&lt;&lt;</code>) in ADL
140
- # format. Returns the buffer itself.
141
- #
142
- def self.print_automaton(fa, buffer="")
143
- buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
- fa.states.each do |s|
145
- buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
146
- end
147
- fa.edges.each do |e|
148
- buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
149
- end
150
- buffer
151
- end
152
-
153
- #
154
- # Prints an automaton to a file whose path is provided.
155
- #
156
- # Shortcut for:
157
- # File.open(file, 'w') do |io|
158
- # print_automaton(fa, io)
159
- # end
160
- #
161
- def self.print_automaton_to_file(fa, file)
162
- File.open(file, 'w') do |io|
163
- print_automaton(fa, io)
164
- end
165
- end
166
-
167
- #################################################################################
168
- # String and Sample Section #
169
- #################################################################################
170
-
171
- #
172
- # Parses an input string _str_ and returns a InputString instance. Format of
173
- # input strings is documented in parse_sample. _str_ is required to be a ruby
174
- # String.
175
- #
176
- # Raises:
177
- # - ADL::ParseError if the ADL string format is not respected.
178
- #
179
- def self.parse_string(str)
180
- symbols = str.split(' ')
181
- case symbols[0]
182
- when '+'
183
- symbols.shift
184
- InputString.new symbols, true, false
185
- when '-'
186
- symbols.shift
187
- InputString.new symbols, false, false
188
- when '?'
189
- symbols.shift
190
- InputString.new symbols, nil, false
191
- else
192
- raise ADL::ParseError, "Invalid string format #{str}", caller
193
- end
194
- end
195
-
196
- #
197
- # Parses the sample provided by _descr_. When a block is provided, yields it with
198
- # InputString instances and ignores the sample argument. Otherwise, fills the sample
199
- # (any object responding to <code><<</code>) with string, creating a fresh new
200
- # one (as a Sample instance) if sample is nil.
201
- #
202
- # ADL provides a really simple grammar to describe samples (here is a succint
203
- # example, the full documentation of the sample grammar can be found in the
204
- # self-documenting example/adl/sample.adl file):
205
- #
206
- # #
207
- # # Some header comments: tool which has generated this sample,
208
- # # maybe a date or other tool options ...
209
- # # here: 'this sample is caracteristic for the a(ba)* regular language'
210
- # #
211
- # # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
212
- # # Empty lines and lines becoming with # are simply ignored.
213
- # #
214
- # -
215
- # + a
216
- # - a b
217
- # + a b a
218
- #
219
- # Raises:
220
- # - ArgumentError unless _descr_ argument is an IO object or a String.
221
- # - ADL::ParseError if the ADL sample format is not respected.
222
- # - InconsistencyError if the sample is not consistent (see Sample)
223
- #
224
- def self.parse_sample(descr, sample=nil)
225
- sample = Sample.new if (sample.nil? and not block_given?)
226
- ADL::to_io(descr) do |io|
227
- io.each_line do |l|
228
- l = l.strip
229
- next if l.empty? or l[0,1]=='#'
230
- if sample.nil? and block_given?
231
- yield parse_string(l)
232
- else
233
- sample << parse_string(l)
234
- end
235
- end
236
- end
237
- sample
238
- end
239
-
240
- #
241
- # Parses an automaton file _f_.
242
- #
243
- # Shortuct for:
244
- # File.open(f) do |file|
245
- # sample = ADL::parse_sample(file, sample)
246
- # end
247
- #
248
- def self.parse_sample_file(f, sample=nil)
249
- File.open(f) do |file|
250
- sample = ADL::parse_sample(file, sample)
251
- end
252
- sample
253
- end
254
-
255
- #
256
- # Prints a sample in ADL format on a buffer. Sample argument is expected to be
257
- # an object responding to each, yielding InputString instances. Buffer is expected
258
- # to be an object responding to <code><<</code>.
259
- #
260
- def self.print_sample(sample, buffer="")
261
- sample.each do |str|
262
- buffer << str.to_s << "\n"
263
- end
264
- end
265
-
266
- #
267
- # Prints a sample in a file.
268
- #
269
- # Shortcut for:
270
- # File.open(file, 'w') do |io|
271
- # print_sample(sample, f)
272
- # end
273
- #
274
- def self.print_sample_in_file(sample, file)
275
- File.open(file, 'w') do |f|
276
- print_sample(sample, f)
277
- end
278
- end
279
-
280
- ### private section ##########################################################
281
- private
282
-
283
- #
284
- # Converts a parsable argument to an IO object or raises an ArgumentError.
285
- #
286
- def self.to_io(descr)
287
- case descr
288
- when IO
289
- yield descr
290
- when String
291
- yield StringIO.new(descr)
292
- else
293
- raise ArgumentError, "IO instance expected, #{descr.class} received", caller
294
- end
295
- end
296
-
297
- end # module ADL
298
- end # module Stamina