stamina 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. data/CHANGELOG.md +22 -5
  2. data/LICENCE.md +2 -2
  3. data/bin/stamina +1 -7
  4. data/lib/stamina.rb +10 -19
  5. metadata +54 -333
  6. data/.gemtest +0 -0
  7. data/Gemfile +0 -2
  8. data/Gemfile.lock +0 -37
  9. data/Manifest.txt +0 -16
  10. data/README.md +0 -78
  11. data/Rakefile +0 -23
  12. data/example/adl/automaton.adl +0 -49
  13. data/example/adl/sample.adl +0 -53
  14. data/example/basic/characteristic_sample.adl +0 -32
  15. data/example/basic/target.adl +0 -9
  16. data/example/competition/31_test.adl +0 -1500
  17. data/example/competition/31_training.adl +0 -1759
  18. data/lib/stamina/abbadingo.rb +0 -2
  19. data/lib/stamina/abbadingo/random_dfa.rb +0 -48
  20. data/lib/stamina/abbadingo/random_sample.rb +0 -146
  21. data/lib/stamina/adl.rb +0 -298
  22. data/lib/stamina/automaton.rb +0 -1263
  23. data/lib/stamina/automaton/complete.rb +0 -36
  24. data/lib/stamina/automaton/equivalence.rb +0 -55
  25. data/lib/stamina/automaton/metrics.rb +0 -78
  26. data/lib/stamina/automaton/minimize.rb +0 -25
  27. data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
  28. data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
  29. data/lib/stamina/automaton/strip.rb +0 -16
  30. data/lib/stamina/automaton/walking.rb +0 -363
  31. data/lib/stamina/classifier.rb +0 -52
  32. data/lib/stamina/command.rb +0 -45
  33. data/lib/stamina/command/abbadingo_dfa.rb +0 -81
  34. data/lib/stamina/command/abbadingo_samples.rb +0 -40
  35. data/lib/stamina/command/adl2dot.rb +0 -71
  36. data/lib/stamina/command/classify.rb +0 -48
  37. data/lib/stamina/command/help.rb +0 -27
  38. data/lib/stamina/command/infer.rb +0 -141
  39. data/lib/stamina/command/metrics.rb +0 -51
  40. data/lib/stamina/command/robustness.rb +0 -22
  41. data/lib/stamina/command/score.rb +0 -35
  42. data/lib/stamina/errors.rb +0 -23
  43. data/lib/stamina/ext/math.rb +0 -20
  44. data/lib/stamina/induction/blue_fringe.rb +0 -265
  45. data/lib/stamina/induction/commons.rb +0 -156
  46. data/lib/stamina/induction/rpni.rb +0 -186
  47. data/lib/stamina/induction/union_find.rb +0 -377
  48. data/lib/stamina/input_string.rb +0 -123
  49. data/lib/stamina/loader.rb +0 -1
  50. data/lib/stamina/markable.rb +0 -42
  51. data/lib/stamina/sample.rb +0 -267
  52. data/lib/stamina/scoring.rb +0 -213
  53. data/lib/stamina/utils.rb +0 -1
  54. data/lib/stamina/utils/decorate.rb +0 -81
  55. data/lib/stamina/version.rb +0 -14
  56. data/stamina.gemspec +0 -191
  57. data/stamina.noespec +0 -32
  58. data/tasks/debug_mail.rake +0 -78
  59. data/tasks/debug_mail.txt +0 -13
  60. data/tasks/gem.rake +0 -68
  61. data/tasks/spec_test.rake +0 -79
  62. data/tasks/unit_test.rake +0 -77
  63. data/tasks/yard.rake +0 -51
  64. data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
  65. data/test/stamina/abbadingo/random_sample_test.rb +0 -78
  66. data/test/stamina/adl_test.rb +0 -516
  67. data/test/stamina/automaton/classifier_test.rb +0 -259
  68. data/test/stamina/automaton/complete_test.rb +0 -58
  69. data/test/stamina/automaton/equivalence_test.rb +0 -120
  70. data/test/stamina/automaton/metrics_test.rb +0 -36
  71. data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
  72. data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
  73. data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
  74. data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
  75. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
  76. data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
  77. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
  78. data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
  79. data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
  80. data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
  81. data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
  82. data/test/stamina/automaton/strip_test.rb +0 -36
  83. data/test/stamina/automaton/to_dot_test.rb +0 -64
  84. data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
  85. data/test/stamina/automaton/walking_test.rb +0 -206
  86. data/test/stamina/automaton_additional_test.rb +0 -190
  87. data/test/stamina/automaton_test.rb +0 -1104
  88. data/test/stamina/exit.rb +0 -3
  89. data/test/stamina/induction/blue_fringe_test.rb +0 -83
  90. data/test/stamina/induction/induction_test.rb +0 -70
  91. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
  92. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
  93. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
  94. data/test/stamina/induction/redblue_universal_expected.adl +0 -4
  95. data/test/stamina/induction/redblue_universal_sample.adl +0 -5
  96. data/test/stamina/induction/rpni_inria_expected.adl +0 -7
  97. data/test/stamina/induction/rpni_inria_sample.adl +0 -9
  98. data/test/stamina/induction/rpni_test.rb +0 -129
  99. data/test/stamina/induction/rpni_test_pta.dot +0 -22
  100. data/test/stamina/induction/rpni_universal_expected.adl +0 -4
  101. data/test/stamina/induction/rpni_universal_sample.adl +0 -4
  102. data/test/stamina/induction/union_find_test.rb +0 -124
  103. data/test/stamina/input_string_test.rb +0 -323
  104. data/test/stamina/markable_test.rb +0 -70
  105. data/test/stamina/randdfa.adl +0 -66
  106. data/test/stamina/sample.adl +0 -4
  107. data/test/stamina/sample_classify_test.rb +0 -149
  108. data/test/stamina/sample_test.rb +0 -290
  109. data/test/stamina/scoring_test.rb +0 -63
  110. data/test/stamina/small_dfa.dot +0 -16
  111. data/test/stamina/small_dfa.gif +0 -0
  112. data/test/stamina/small_nfa.dot +0 -18
  113. data/test/stamina/small_nfa.gif +0 -0
  114. data/test/stamina/stamina_test.rb +0 -80
  115. data/test/stamina/utils/decorate_test.rb +0 -65
  116. data/test/test_all.rb +0 -7
@@ -1,2 +0,0 @@
1
- require 'stamina/abbadingo/random_dfa'
2
- require 'stamina/abbadingo/random_sample'
@@ -1,48 +0,0 @@
1
- module Stamina
2
- module Abbadingo
3
- #
4
- # Generates a random DFA using the Abbadingo protocol.
5
- #
6
- class RandomDFA
7
-
8
- # Number of wished states
9
- attr_reader :state_count
10
-
11
- # Accepting ratio
12
- attr_reader :accepting_ratio
13
-
14
- # Creates an algorithm instance with default options
15
- def initialize(state_count = 64, accepting_ratio = 0.5)
16
- @state_count = state_count
17
- @accepting_ratio = accepting_ratio
18
- end
19
-
20
- def execute
21
- dfa = Automaton.new
22
-
23
- # Generate 5/4*state_count states
24
- (state_count.to_f * 5.0 / 4.0).to_i.times do
25
- dfa.add_state(:initial => false,
26
- :accepting => (Kernel.rand <= accepting_ratio),
27
- :error => false)
28
- end
29
-
30
- # Generate all edges
31
- dfa.each_state do |source|
32
- ["0", "1"].each do |symbol|
33
- target = dfa.ith_state(Kernel.rand(dfa.state_count))
34
- dfa.connect(source, target, symbol)
35
- end
36
- end
37
-
38
- # Choose an initial state
39
- dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
40
-
41
- # Minimize the automaton and return it
42
- Stamina::Automaton::Minimize::Pitchies.execute(dfa)
43
- end
44
-
45
- end # class RandomDFA
46
- end # module Abbadingo
47
- end # module Stamina
48
-
@@ -1,146 +0,0 @@
1
- module Stamina
2
- module Abbadingo
3
- #
4
- # Generates a random Sample using the Abbadingo protocol.
5
- #
6
- class RandomSample
7
-
8
- #
9
- # Implements an enumerator for binary strings whose length lies between 0
10
- # and max_length (passed at construction).
11
- #
12
- # The enumerator guarantees that strings are sampled with an uniform distribution
13
- # among them. As the number of strings of a given length is an exponential
14
- # function, this means that you've got 50% change of having a string of length
15
- # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
16
- #
17
- # How to use it?
18
- #
19
- # # create for strings between 0 and 10 symbols, inclusive
20
- # enum = Stamina::Abbadingo::StringEnumerator.new(10)
21
- #
22
- # # this is how to generate strings while a predicate is true
23
- # enum.each do |s|
24
- # # s is an array of binary integer symbols (0 or 1)
25
- # # true for continuing, false otherwise
26
- # return (true || false)
27
- # end
28
- #
29
- # # this is how to generate a fixed number of strings
30
- # (1..1000).collect{ enum.one }
31
- #
32
- # How does it work? Well, the distribution of strings is as follows:
33
- #
34
- # length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
35
- # (2**n) 2**(n+1)-1
36
- # 0 1 1 0.0000000000 0.000000 0
37
- # 1 2 3 1.0000000000 1.584963 1
38
- # 2 4 7 2.0000000000 2.807355 2
39
- # 3 8 15 3.0000000000 3.906891 3
40
- # 4 16 31 4.0000000000 4.954196 4
41
- # 5 32 63 5.0000000000 5.977280 5
42
- #
43
- # where _cumul_ is the total number of string upto _length_ symbols.
44
- #
45
- # Therefore, the idea is to see each string has an identifier, say _x_,
46
- # between 1 and 2**(max_length+1)-1 (see max).
47
- # * The length of the _x_th string is log2(x).floor (see length_for)
48
- # * The string itself is the binary decomposition of x, up to length_for(x)
49
- # symbols (see string_for)
50
- #
51
- # As those identifiers naturally respect the exponential distribution, sampling
52
- # the strings is the same as taking string_for(x) for random x upto _max_.
53
- #
54
- class StringEnumerator
55
- include Enumerable
56
-
57
- # Maximal length of a string
58
- attr_reader :max_length
59
-
60
- def initialize(max_length = 16)
61
- @max_length = max_length
62
- end
63
-
64
- #
65
- # Returns the length of the string whose identifier is _x_ (> 0)
66
- #
67
- def length_for(x)
68
- Math.log2(x).floor
69
- end
70
-
71
- #
72
- # Returns the binary string whose identifier is _x_ (> 0)
73
- #
74
- def string_for(x)
75
- length = length_for(x)
76
- (0..length-1).collect{|i| ((x >> i) % 2).to_s}
77
- end
78
-
79
- #
80
- # Returns the maximum identifier, which is also the number of strings
81
- # up to max_length symbols
82
- #
83
- def max
84
- @max ||= 2 ** (max_length+1) - 1
85
- end
86
-
87
- #
88
- # Generates a string at random
89
- #
90
- def one
91
- string_for(1+Kernel.rand(max))
92
- end
93
-
94
- #
95
- # Yields the block with a random string, until the block return false
96
- # or nil.
97
- #
98
- def each
99
- begin
100
- cont = yield(one)
101
- end while cont
102
- end
103
-
104
- end # class StringEnumerator
105
-
106
- #
107
- # Generates a Sample instance with _nb_ strings randomly sampled with a
108
- # uniform distribution over all strings up
109
- #
110
- def self.execute(classifier, max_length = classifier.depth + 3)
111
- enum = StringEnumerator.new(max_length)
112
-
113
- # We generate 1800 strings for the test set plus n^2/2 strings for
114
- # the training set. If there are no enough strings available, we generate
115
- # the maximum we can
116
- seen = {}
117
- nb = Math.min(1800 + (classifier.state_count**2), enum.max)
118
-
119
- # Let's go now
120
- enum.each do |s|
121
- seen[s] = true
122
- seen.size < nb
123
- end
124
-
125
- # Make them
126
- strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
127
- pos, neg = strings.partition{|s| s.positive?}
128
-
129
- # Split them, 1800 in test and the rest in training set
130
- if (pos.size > 900) && (neg.size > 900)
131
- pos_test, pos_training = pos[0...900], pos[900..-1]
132
- neg_test, neg_training = neg[0...900], neg[900..-1]
133
- else
134
- pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
135
- neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
136
- end
137
- flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
138
- training = (pos_training + neg_training).sort &flusher
139
- test = (pos_test + neg_test).sort &flusher
140
- [Sample.new(training), Sample.new(test)]
141
- end
142
-
143
- end # class RandomSample
144
- end # module Abbadingo
145
- end # module Stamina
146
-
data/lib/stamina/adl.rb DELETED
@@ -1,298 +0,0 @@
1
- module Stamina
2
- #
3
- # Automaton Description Language module. This module provides parsing and
4
- # printing methods for automata and samples. Documentation of the file format
5
- # used for an automaton is given in parse_automaton; file format for samples is
6
- # documented in parse_sample.
7
- #
8
- # Methods of this module are not intended to be included by a class but invoked
9
- # on the module instead:
10
- #
11
- # begin
12
- # dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
13
- # rescue ADL::ParseError => ex
14
- # puts "Oops, the ADL automaton file seems corrupted..."
15
- # end
16
- #
17
- # == Detailed API
18
- module ADL
19
-
20
- #################################################################################
21
- # Automaton Section #
22
- #################################################################################
23
-
24
- #
25
- # Parses a given automaton description and returns an Automaton instance.
26
- #
27
- # Raises:
28
- # - ArgumentError unless _descr_ is an IO object or a String.
29
- # - ADL::ParseError if the ADL automaton format is not respected.
30
- #
31
- # ADL provides a really simple grammar to describe automata. Here is a succint
32
- # example (full documentation of the ADL automaton grammar can be found in
33
- # the self-documenting example/adl/automaton.adl file).
34
- #
35
- # # Some header comments: tool which has generated this automaton,
36
- # # maybe a date or other tool options ...
37
- # # here: 'this automaton accepts the a(ba)* regular language'
38
- # 2 2
39
- # 0 true false
40
- # 1 false true
41
- # 0 1 a
42
- # 1 0 b
43
- #
44
- def self.parse_automaton(descr)
45
- automaton = nil
46
- ADL::to_io(descr) do |io|
47
- state_count, edge_count = nil, nil
48
- state_read, edge_read = 0, 0
49
- states = {}
50
- mode = :header
51
-
52
- automaton = Automaton.new do |fa|
53
- # parse each description line
54
- line_number = 1
55
- io.each_line do |l|
56
- index = l.index('#')
57
- l = l[0,index] if index
58
- l = l.strip
59
- next if l.empty? or l[0,1]=='#'
60
-
61
- case mode
62
- when :header
63
- # looking for |state_count edge_count|
64
- raise(ADL::ParseError,
65
- "Parse error line #{line_number}: 'state_count edge_count' expected, "\
66
- "'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
67
- state_count, edge_count = $1.to_i, $2.to_i
68
- mode = :states
69
-
70
- when :states
71
- # looking for |number initial accepting|
72
- raise(ADL::ParseError,
73
- "Parse error line #{line_number}: state definition expected, "\
74
- "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
75
- id, initial, accepting, error = $1, $2, $3, $5
76
- initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
77
-
78
- state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
79
- state[:name]=id.to_s
80
- states[id] = state
81
-
82
- state_read += 1
83
- mode = (edge_count==0 ? :end : :edges) if state_read==state_count
84
-
85
- when :edges
86
- # looking for |source target symbol|
87
- raise(ADL::ParseError,
88
- "Parse error line #{line_number}: edge definition expected, "\
89
- "'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
90
- source, target, symbol = $1, $2, $3
91
- raise(ADL::ParseError,
92
- "Parse error line #{line_number}: no such state #{source}") \
93
- unless states[source]
94
- raise(ADL::ParseError,
95
- "Parse error line #{line_number}: no such state #{target}") \
96
- unless states[target]
97
-
98
- fa.connect(states[source], states[target], {:symbol => symbol})
99
-
100
- edge_read += 1
101
- mode = :end if edge_read==edge_count
102
-
103
- when :end
104
- raise(ADL::ParseError,
105
- "Parse error line #{line_number}: trailing data found '#{l}")
106
-
107
- end # case mode
108
-
109
- line_number += 1
110
- end
111
-
112
- raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
113
- "#{state_read} found.") if state_count != state_read
114
- raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
115
- "#{edge_read} found.") if edge_count != edge_read
116
-
117
- end # Automaton.new
118
- end
119
- return automaton
120
- end # def self.parse
121
-
122
- #
123
- # Parses an automaton file _f_.
124
- #
125
- # Shortcut for:
126
- # File.open(f, 'r') do |io|
127
- # Stamina::ADL.parse_automaton(io)
128
- # end
129
- #
130
- def self.parse_automaton_file(f)
131
- automaton = nil
132
- File.open(f) do |file|
133
- automaton = ADL::parse_automaton(file)
134
- end
135
- automaton
136
- end
137
-
138
- #
139
- # Prints an automaton to a buffer (responding to <code>:&lt;&lt;</code>) in ADL
140
- # format. Returns the buffer itself.
141
- #
142
- def self.print_automaton(fa, buffer="")
143
- buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
- fa.states.each do |s|
145
- buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
146
- end
147
- fa.edges.each do |e|
148
- buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
149
- end
150
- buffer
151
- end
152
-
153
- #
154
- # Prints an automaton to a file whose path is provided.
155
- #
156
- # Shortcut for:
157
- # File.open(file, 'w') do |io|
158
- # print_automaton(fa, io)
159
- # end
160
- #
161
- def self.print_automaton_to_file(fa, file)
162
- File.open(file, 'w') do |io|
163
- print_automaton(fa, io)
164
- end
165
- end
166
-
167
- #################################################################################
168
- # String and Sample Section #
169
- #################################################################################
170
-
171
- #
172
- # Parses an input string _str_ and returns a InputString instance. Format of
173
- # input strings is documented in parse_sample. _str_ is required to be a ruby
174
- # String.
175
- #
176
- # Raises:
177
- # - ADL::ParseError if the ADL string format is not respected.
178
- #
179
- def self.parse_string(str)
180
- symbols = str.split(' ')
181
- case symbols[0]
182
- when '+'
183
- symbols.shift
184
- InputString.new symbols, true, false
185
- when '-'
186
- symbols.shift
187
- InputString.new symbols, false, false
188
- when '?'
189
- symbols.shift
190
- InputString.new symbols, nil, false
191
- else
192
- raise ADL::ParseError, "Invalid string format #{str}", caller
193
- end
194
- end
195
-
196
- #
197
- # Parses the sample provided by _descr_. When a block is provided, yields it with
198
- # InputString instances and ignores the sample argument. Otherwise, fills the sample
199
- # (any object responding to <code><<</code>) with string, creating a fresh new
200
- # one (as a Sample instance) if sample is nil.
201
- #
202
- # ADL provides a really simple grammar to describe samples (here is a succint
203
- # example, the full documentation of the sample grammar can be found in the
204
- # self-documenting example/adl/sample.adl file):
205
- #
206
- # #
207
- # # Some header comments: tool which has generated this sample,
208
- # # maybe a date or other tool options ...
209
- # # here: 'this sample is caracteristic for the a(ba)* regular language'
210
- # #
211
- # # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
212
- # # Empty lines and lines becoming with # are simply ignored.
213
- # #
214
- # -
215
- # + a
216
- # - a b
217
- # + a b a
218
- #
219
- # Raises:
220
- # - ArgumentError unless _descr_ argument is an IO object or a String.
221
- # - ADL::ParseError if the ADL sample format is not respected.
222
- # - InconsistencyError if the sample is not consistent (see Sample)
223
- #
224
- def self.parse_sample(descr, sample=nil)
225
- sample = Sample.new if (sample.nil? and not block_given?)
226
- ADL::to_io(descr) do |io|
227
- io.each_line do |l|
228
- l = l.strip
229
- next if l.empty? or l[0,1]=='#'
230
- if sample.nil? and block_given?
231
- yield parse_string(l)
232
- else
233
- sample << parse_string(l)
234
- end
235
- end
236
- end
237
- sample
238
- end
239
-
240
- #
241
- # Parses an automaton file _f_.
242
- #
243
- # Shortuct for:
244
- # File.open(f) do |file|
245
- # sample = ADL::parse_sample(file, sample)
246
- # end
247
- #
248
- def self.parse_sample_file(f, sample=nil)
249
- File.open(f) do |file|
250
- sample = ADL::parse_sample(file, sample)
251
- end
252
- sample
253
- end
254
-
255
- #
256
- # Prints a sample in ADL format on a buffer. Sample argument is expected to be
257
- # an object responding to each, yielding InputString instances. Buffer is expected
258
- # to be an object responding to <code><<</code>.
259
- #
260
- def self.print_sample(sample, buffer="")
261
- sample.each do |str|
262
- buffer << str.to_s << "\n"
263
- end
264
- end
265
-
266
- #
267
- # Prints a sample in a file.
268
- #
269
- # Shortcut for:
270
- # File.open(file, 'w') do |io|
271
- # print_sample(sample, f)
272
- # end
273
- #
274
- def self.print_sample_in_file(sample, file)
275
- File.open(file, 'w') do |f|
276
- print_sample(sample, f)
277
- end
278
- end
279
-
280
- ### private section ##########################################################
281
- private
282
-
283
- #
284
- # Converts a parsable argument to an IO object or raises an ArgumentError.
285
- #
286
- def self.to_io(descr)
287
- case descr
288
- when IO
289
- yield descr
290
- when String
291
- yield StringIO.new(descr)
292
- else
293
- raise ArgumentError, "IO instance expected, #{descr.class} received", caller
294
- end
295
- end
296
-
297
- end # module ADL
298
- end # module Stamina