stamina-induction 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ # O.5.4 / 2012-03-06
2
+
3
+ * InputString and Sample have been moved from stamina-induction to stamina-core as ADL
4
+ and Walking rely on them.
5
+ * Walking methods `parses?`, `accepts?` and `rejects?` are now aliased as `parse?`,
6
+ `accept?` and `reject?`, respectively.
7
+ * Automaton#to_dot now accepts a boolean argument to bypass sorting its states.
8
+
1
9
  # 0.5.3 / 2012-02-25
2
10
 
3
11
  * Resolve accuracy between github tags and rubygems
@@ -1,5 +1,3 @@
1
- require_relative 'sample'
2
- require_relative 'input_string'
3
1
  require_relative 'classifier'
4
2
  require_relative 'scoring'
5
3
  require_relative 'induction/union_find'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stamina-induction
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-25 00:00:00.000000000Z
12
+ date: 2012-03-06 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: stamina-core
16
- requirement: &72834540 !ruby/object:Gem::Requirement
16
+ requirement: &70263208819720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - =
20
20
  - !ruby/object:Gem::Version
21
- version: 0.5.3
21
+ version: 0.5.4
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *72834540
24
+ version_requirements: *70263208819720
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: citrus
27
- requirement: &72834300 !ruby/object:Gem::Requirement
27
+ requirement: &70263208835580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '2.4'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *72834300
35
+ version_requirements: *70263208835580
36
36
  description: Stamina-induction plugs induction algorithm to the stamina toolkit.
37
37
  email:
38
38
  - blambeau@gmail.com
@@ -42,41 +42,39 @@ extra_rdoc_files: []
42
42
  files:
43
43
  - LICENCE.md
44
44
  - CHANGELOG.md
45
- - lib/stamina-induction/stamina/dsl/induction.rb
46
- - lib/stamina-induction/stamina/dsl/reg_lang.rb
47
- - lib/stamina-induction/stamina/command/classify.rb
48
- - lib/stamina-induction/stamina/command/score.rb
45
+ - lib/stamina-induction/stamina/abbadingo/random_dfa.rb
46
+ - lib/stamina-induction/stamina/abbadingo/random_sample.rb
47
+ - lib/stamina-induction/stamina/abbadingo.rb
48
+ - lib/stamina-induction/stamina/classifier.rb
49
49
  - lib/stamina-induction/stamina/command/abbadingo_dfa.rb
50
- - lib/stamina-induction/stamina/command/infer.rb
51
50
  - lib/stamina-induction/stamina/command/abbadingo_samples.rb
51
+ - lib/stamina-induction/stamina/command/classify.rb
52
+ - lib/stamina-induction/stamina/command/infer.rb
52
53
  - lib/stamina-induction/stamina/command/metrics.rb
53
- - lib/stamina-induction/stamina/reg_lang/parser/plus.rb
54
- - lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb
55
- - lib/stamina-induction/stamina/reg_lang/parser/question.rb
54
+ - lib/stamina-induction/stamina/command/score.rb
55
+ - lib/stamina-induction/stamina/command.rb
56
+ - lib/stamina-induction/stamina/dsl/induction.rb
57
+ - lib/stamina-induction/stamina/dsl/reg_lang.rb
58
+ - lib/stamina-induction/stamina/dsl.rb
59
+ - lib/stamina-induction/stamina/induction/blue_fringe.rb
60
+ - lib/stamina-induction/stamina/induction/commons.rb
61
+ - lib/stamina-induction/stamina/induction/rpni.rb
62
+ - lib/stamina-induction/stamina/induction/union_find.rb
63
+ - lib/stamina-induction/stamina/induction.rb
64
+ - lib/stamina-induction/stamina/reg_lang/canonical_info.rb
56
65
  - lib/stamina-induction/stamina/reg_lang/parser/alternative.rb
57
66
  - lib/stamina-induction/stamina/reg_lang/parser/node.rb
58
- - lib/stamina-induction/stamina/reg_lang/parser/symbol.rb
59
- - lib/stamina-induction/stamina/reg_lang/parser/sequence.rb
67
+ - lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb
60
68
  - lib/stamina-induction/stamina/reg_lang/parser/parser.citrus
69
+ - lib/stamina-induction/stamina/reg_lang/parser/plus.rb
70
+ - lib/stamina-induction/stamina/reg_lang/parser/question.rb
61
71
  - lib/stamina-induction/stamina/reg_lang/parser/regexp.rb
72
+ - lib/stamina-induction/stamina/reg_lang/parser/sequence.rb
62
73
  - lib/stamina-induction/stamina/reg_lang/parser/star.rb
63
- - lib/stamina-induction/stamina/reg_lang/canonical_info.rb
74
+ - lib/stamina-induction/stamina/reg_lang/parser/symbol.rb
64
75
  - lib/stamina-induction/stamina/reg_lang/parser.rb
65
- - lib/stamina-induction/stamina/induction.rb
66
- - lib/stamina-induction/stamina/classifier.rb
67
- - lib/stamina-induction/stamina/abbadingo.rb
68
- - lib/stamina-induction/stamina/dsl.rb
69
- - lib/stamina-induction/stamina/sample.rb
70
- - lib/stamina-induction/stamina/input_string.rb
71
- - lib/stamina-induction/stamina/induction/rpni.rb
72
- - lib/stamina-induction/stamina/induction/blue_fringe.rb
73
- - lib/stamina-induction/stamina/induction/union_find.rb
74
- - lib/stamina-induction/stamina/induction/commons.rb
75
76
  - lib/stamina-induction/stamina/reg_lang.rb
76
- - lib/stamina-induction/stamina/abbadingo/random_dfa.rb
77
- - lib/stamina-induction/stamina/abbadingo/random_sample.rb
78
77
  - lib/stamina-induction/stamina/scoring.rb
79
- - lib/stamina-induction/stamina/command.rb
80
78
  - lib/stamina-induction/stamina-induction.rb
81
79
  homepage: https://github.com/blambeau/stamina
82
80
  licenses: []
@@ -98,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
96
  version: '0'
99
97
  requirements: []
100
98
  rubyforge_project:
101
- rubygems_version: 1.8.15
99
+ rubygems_version: 1.8.10
102
100
  signing_key:
103
101
  specification_version: 3
104
102
  summary: Induction algorithms for the Stamina toolkit
@@ -1,123 +0,0 @@
1
- module Stamina
2
- #
3
- # An input string is a sequence of input symbols (symbols being letters appearing
4
- # on automaton edges) labeled as positive, negative or unlabeled (provided for test
5
- # samples and query strings).
6
- #
7
- # This class include the Enumerable module, that allows reasoning about
8
- # ordered symbols.
9
- #
10
- # == Detailed API
11
- class InputString
12
- include Enumerable
13
-
14
- #
15
- # Creates an input string from symbols and positive or negative labeling.
16
- #
17
- # Arguments:
18
- # - symbols: When an array is provided, it is duplicated by default to be kept
19
- # internally. Set dup to false to avoid duplicating it (in both cases, the
20
- # internal array will be freezed). When a String is provided, symbols array
21
- # is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
22
- # ignored in the case.
23
- # - The positive argument may be true (positive string), false (negative one)
24
- # or nil (unlabeled).
25
- #
26
- # Raises:
27
- # - ArgumentError if symbols is not an Array nor a String.
28
- #
29
- def initialize(symbols, positive, dup=true)
30
- raise(ArgumentError,
31
- "Input string expects an Array or a String: #{symbols} received",
32
- caller) unless Array===symbols or String===symbols
33
- @symbols = case symbols
34
- when String
35
- symbols.split(' ').freeze
36
- when Array
37
- (dup ? symbols.dup : symbols).freeze
38
- end
39
- @positive = positive
40
- end
41
-
42
- #
43
- # Checks if this input string is empty (aka lambda, i.e. contains no symbol).
44
- #
45
- def empty?() @symbols.empty? end
46
- alias :lambda? :empty?
47
-
48
- #
49
- # Returns the string size, i.e. number of its symbols.
50
- #
51
- def size() @symbols.size end
52
-
53
- #
54
- # Returns the exact label of this string, being true (positive string)
55
- # false (negative string) or nil (unlabeled)
56
- #
57
- def label() @positive end
58
-
59
- #
60
- # Returns true if this input string is positively labeled, false otherwise.
61
- #
62
- def positive?() @positive==true end
63
-
64
- #
65
- # Returns true if this input string is negatively labeled, false otherwise.
66
- #
67
- def negative?() @positive==false end
68
-
69
- #
70
- # Returns true if this input string unlabeled.
71
- #
72
- def unlabeled?() @positive.nil? end
73
-
74
- # Copies and returns the same string, but switch the positive flag. This
75
- # method returns self if it is unlabeled.
76
- def negate
77
- return self if unlabeled?
78
- InputString.new(@symbols, !@positive, false)
79
- end
80
-
81
- #
82
- # Returns an array with symbols of this string. Returned array may not be
83
- # modified (it is freezed).
84
- #
85
- def symbols() @symbols end
86
-
87
- #
88
- # Yields the block with each string symbol, in order. Has no effect without
89
- # block.
90
- #
91
- def each() @symbols.each {|s| yield s if block_given? } end
92
-
93
- #
94
- # Checks equality with another InputString. Returns true if strings have same
95
- # sequence of symbols and same labeling, false otherwise. Returns nil if _o_
96
- # is not an InputString.
97
- #
98
- def ==(o)
99
- return nil unless InputString===o
100
- label == o.label and @symbols == o.symbols
101
- end
102
- alias :eql? :==
103
-
104
- #
105
- # Computes a hash code for this string.
106
- #
107
- def hash
108
- @symbols.hash + 37*positive?.hash
109
- end
110
-
111
- #
112
- # Prints this string in ADL.
113
- #
114
- def to_adl
115
- str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
116
- str << @symbols.join(' ')
117
- str
118
- end
119
- alias :to_s :to_adl
120
- alias :inspect :to_adl
121
-
122
- end # class InputString
123
- end # module Stamina
@@ -1,309 +0,0 @@
1
- module Stamina
2
-
3
- #
4
- # A sample as an ordered collection of InputString labeled as positive or negative.
5
- #
6
- # == Tips and tricks
7
- # - loading samples from disk is easy thanks to ADL !
8
- #
9
- # == Detailed API
10
- class Sample
11
- include Enumerable
12
-
13
- # Number of strings in the sample
14
- attr_reader :size
15
-
16
- # Number of positive strings in the sample
17
- attr_reader :positive_count
18
-
19
- # Number of negative strings in the sample
20
- attr_reader :negative_count
21
-
22
- #
23
- # Creates an empty sample and appends it with args, by calling Sample#<< on
24
- # each of them.
25
- #
26
- def self.[](*args) Sample.new << args end
27
-
28
- #
29
- # Creates an empty sample.
30
- #
31
- def initialize(strings = nil)
32
- @strings = []
33
- @size, @positive_count, @negative_count = 0, 0, 0
34
- strings.each{|s| self << s } unless strings.nil?
35
- end
36
-
37
- #
38
- # Coerces `arg` to a Sample instance.
39
- #
40
- def self.coerce(arg)
41
- if arg.is_a?(Sample)
42
- arg
43
- elsif arg.is_a?(String)
44
- parse(arg)
45
- else
46
- raise ArgumentError, "Invalid argument #{arg} for `Sample`"
47
- end
48
- end
49
-
50
- #
51
- # Parses an ADL input
52
- #
53
- def self.parse(adl)
54
- ADL::parse_sample(adl)
55
- end
56
-
57
- #
58
- # Returns true if this sample does not contain any string,
59
- # false otherwise.
60
- #
61
- def empty?()
62
- @size==0
63
- end
64
-
65
- #
66
- # Adds a string to the sample. The _str_ argument may be an InputString instance,
67
- # a String (parsed using ADL), a Sample instance (all strings are added) or an
68
- # Array (recurses on each element).
69
- #
70
- # Raises an InconsistencyError if the same string already exists with the
71
- # opposite label. Raises an ArgumentError if the _str_ argument is not recognized.
72
- #
73
- def <<(str)
74
- case str
75
- when InputString
76
- #raise(InconsistencyError, "Inconsistent sample on #{str}", caller) if self.include?(str.negate)
77
- @size += 1
78
- str.positive? ? (@positive_count += 1) : (@negative_count += 1)
79
- @strings << str
80
- when String
81
- self << ADL::parse_string(str)
82
- when Sample
83
- str.each {|s| self << s}
84
- when Array
85
- str.each {|s| self << s}
86
- else
87
- raise(ArgumentError, "#{str} is not a valid argument.", caller)
88
- end
89
- self
90
- end
91
-
92
- #
93
- # Returns true if a given string is included in the sample, false otherwise.
94
- # This method allows same flexibility as << for the _str_ argument.
95
- #
96
- def include?(str)
97
- case str
98
- when InputString
99
- @strings.include?(str)
100
- when String
101
- include?(ADL::parse_string(str))
102
- when Array
103
- str.each {|s| return false unless include?(s)}
104
- true
105
- when Sample
106
- str.each {|s| return false unless include?(s)}
107
- true
108
- else
109
- raise(ArgumentError, "#{str} is not a valid argument.", caller)
110
- end
111
- end
112
-
113
- #
114
- # Returns a new sample as the union of both `self` and `other`
115
- #
116
- def +(other)
117
- s = Sample.new
118
- each{|x| s << x}
119
- other.each{|x| s << x}
120
- s
121
- end
122
-
123
- #
124
- # Compares with another sample _other_, which is required to be a Sample
125
- # instance. Returns true if the two samples contains the same strings (including
126
- # labels), false otherwise.
127
- #
128
- def ==(other)
129
- include?(other) and other.include?(self)
130
- end
131
- alias :eql? :==
132
-
133
- #
134
- # Computes an hash code for this sample.
135
- #
136
- def hash
137
- self.inject(37){|memo,str| memo + 17*str.hash}
138
- end
139
-
140
- #
141
- # Yields the block with each string. This method has no effect if no
142
- # block is given.
143
- #
144
- def each
145
- return unless block_given?
146
- @strings.each {|str| yield str}
147
- end
148
-
149
- #
150
- # Yields the block with each positive string. This method has no effect if no
151
- # block is given.
152
- #
153
- def each_positive
154
- return unless block_given?
155
- each {|str| yield str if str.positive?}
156
- end
157
-
158
- #
159
- # Returns an enumerator on positive strings.
160
- #
161
- def positive_enumerator
162
- if RUBY_VERSION >= "1.9"
163
- Enumerator.new(self, :each_positive)
164
- else
165
- Enumerable::Enumerator.new(self, :each_positive)
166
- end
167
- end
168
-
169
- #
170
- # Yields the block with each negative string. This method has no effect if no
171
- # block is given.
172
- #
173
- def each_negative
174
- each {|str| yield str if str.negative?}
175
- end
176
-
177
- #
178
- # Returns an enumerator on negative strings.
179
- #
180
- def negative_enumerator
181
- if RUBY_VERSION >= "1.9"
182
- Enumerator.new(self, :each_negative)
183
- else
184
- Enumerable::Enumerator.new(self, :each_negative)
185
- end
186
- end
187
-
188
- #
189
- # Checks if the sample is correctly classified by a given classifier
190
- # (expected to include the Stamina::Classfier module).
191
- # Unlabeled strings are simply ignored.
192
- #
193
- def correctly_classified_by?(classifier)
194
- classifier.correctly_classify?(self)
195
- end
196
-
197
- #
198
- # Computes and returns the binary signature of the sample. The signature
199
- # is a String having one character for each string in the sample. A '1'
200
- # is used for positive strings, '0' for negative ones and '?' for unlabeled.
201
- #
202
- def signature
203
- signature = ''
204
- each do |str|
205
- signature << (str.unlabeled? ? '?' : str.positive? ? '1' : '0')
206
- end
207
- signature
208
- end
209
-
210
- #
211
- # Takes only a given proportion of this sample and returns it as a new Sample.
212
- #
213
- def take(proportion = 0.5)
214
- taken = Stamina::Sample.new
215
- each_positive{|s| taken << s if Kernel.rand < proportion}
216
- each_negative{|s| taken << s if Kernel.rand < proportion}
217
- taken
218
- end
219
-
220
- #
221
- # Prints an ADL description of this sample on the buffer.
222
- #
223
- def to_adl(buffer="")
224
- self.inject(buffer) {|memo,str| memo << "\n" << str.to_adl}
225
- end
226
- alias :to_s :to_adl
227
- alias :inspect :to_adl
228
-
229
- #
230
- # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
231
- # that the states of the PTA are in lexical order, according to the <code><=></code>
232
- # operator defined on symbols. States reached by negative strings are tagged as
233
- # non accepting and error.
234
- #
235
- def self.to_pta(sample)
236
- thepta = Automaton.new do |pta|
237
- initial_state = add_state(:initial => true, :accepting => false)
238
-
239
- # Fill the PTA with each string
240
- sample.each do |str|
241
- # split string using the dfa
242
- parsed, reached, remaining = pta.dfa_split(str, initial_state)
243
-
244
- # remaining symbols are not empty -> build the PTA
245
- unless remaining.empty?
246
- remaining.each do |symbol|
247
- newone = pta.add_state(:initial => false, :accepting => false, :error => false)
248
- pta.connect(reached, newone, symbol)
249
- reached = newone
250
- end
251
- end
252
-
253
- # flag state
254
- str.positive? ? reached.accepting! : reached.error!
255
-
256
- # check consistency, should not arrive as Sample does not allow
257
- # inconsistencies. Should appear only if _sample_ is not a Sample
258
- # instance but some other enumerable.
259
- raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
260
- if (reached.error? and reached.accepting?)
261
- end
262
-
263
- # Reindex states by applying BFS
264
- to_index, index = [initial_state], 0
265
- until to_index.empty?
266
- state = to_index.shift
267
- state[:__index__] = index
268
- state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
269
- index += 1
270
- end
271
- end
272
-
273
- # Now we rebuild a fresh one with states in order.
274
- # This look more efficient that reordering states of the PTA
275
- Automaton.new do |ordered|
276
- ordered.add_n_states(thepta.state_count)
277
- thepta.each_state do |pta_state|
278
- source = ordered.ith_state(pta_state[:__index__])
279
- source.initial! if pta_state.initial?
280
- source.accepting! if pta_state.accepting?
281
- source.error! if pta_state.error?
282
- pta_state.out_edges.each do |e|
283
- target = ordered.ith_state(e.target[:__index__])
284
- ordered.connect(source, target, e.symbol)
285
- end
286
- end
287
- end
288
-
289
- end
290
-
291
- # Converts this sample to a PTA
292
- def to_pta
293
- Sample.to_pta(self)
294
- end
295
- alias :to_fa :to_pta
296
- alias :to_dfa :to_pta
297
-
298
- # Converts this sample to a canonical dfa
299
- def to_cdfa
300
- to_pta.to_cdfa
301
- end
302
-
303
- # Converts this sample to a dot output
304
- def to_dot
305
- to_pta.to_dot
306
- end
307
-
308
- end # class Sample
309
- end # module Stamina