stamina-induction 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,123 @@
1
+ module Stamina
2
+ #
3
+ # An input string is a sequence of input symbols (symbols being letters appearing
4
+ # on automaton edges) labeled as positive, negative or unlabeled (provided for test
5
+ # samples and query strings).
6
+ #
7
+ # This class include the Enumerable module, that allows reasoning about
8
+ # ordered symbols.
9
+ #
10
+ # == Detailed API
11
+ class InputString
12
+ include Enumerable
13
+
14
+ #
15
+ # Creates an input string from symbols and positive or negative labeling.
16
+ #
17
+ # Arguments:
18
+ # - symbols: When an array is provided, it is duplicated by default to be kept
19
+ # internally. Set dup to false to avoid duplicating it (in both cases, the
20
+ # internal array will be freezed). When a String is provided, symbols array
21
+ # is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
22
+ # ignored in the case.
23
+ # - The positive argument may be true (positive string), false (negative one)
24
+ # or nil (unlabeled).
25
+ #
26
+ # Raises:
27
+ # - ArgumentError if symbols is not an Array nor a String.
28
+ #
29
+ def initialize(symbols, positive, dup=true)
30
+ raise(ArgumentError,
31
+ "Input string expects an Array or a String: #{symbols} received",
32
+ caller) unless Array===symbols or String===symbols
33
+ @symbols = case symbols
34
+ when String
35
+ symbols.split(' ').freeze
36
+ when Array
37
+ (dup ? symbols.dup : symbols).freeze
38
+ end
39
+ @positive = positive
40
+ end
41
+
42
+ #
43
+ # Checks if this input string is empty (aka lambda, i.e. contains no symbol).
44
+ #
45
+ def empty?() @symbols.empty? end
46
+ alias :lambda? :empty?
47
+
48
+ #
49
+ # Returns the string size, i.e. number of its symbols.
50
+ #
51
+ def size() @symbols.size end
52
+
53
+ #
54
+ # Returns the exact label of this string, being true (positive string)
55
+ # false (negative string) or nil (unlabeled)
56
+ #
57
+ def label() @positive end
58
+
59
+ #
60
+ # Returns true if this input string is positively labeled, false otherwise.
61
+ #
62
+ def positive?() @positive==true end
63
+
64
+ #
65
+ # Returns true if this input string is negatively labeled, false otherwise.
66
+ #
67
+ def negative?() @positive==false end
68
+
69
+ #
70
+ # Returns true if this input string unlabeled.
71
+ #
72
+ def unlabeled?() @positive.nil? end
73
+
74
+ # Copies and returns the same string, but switch the positive flag. This
75
+ # method returns self if it is unlabeled.
76
+ def negate
77
+ return self if unlabeled?
78
+ InputString.new(@symbols, !@positive, false)
79
+ end
80
+
81
+ #
82
+ # Returns an array with symbols of this string. Returned array may not be
83
+ # modified (it is freezed).
84
+ #
85
+ def symbols() @symbols end
86
+
87
+ #
88
+ # Yields the block with each string symbol, in order. Has no effect without
89
+ # block.
90
+ #
91
+ def each() @symbols.each {|s| yield s if block_given? } end
92
+
93
+ #
94
+ # Checks equality with another InputString. Returns true if strings have same
95
+ # sequence of symbols and same labeling, false otherwise. Returns nil if _o_
96
+ # is not an InputString.
97
+ #
98
+ def ==(o)
99
+ return nil unless InputString===o
100
+ label == o.label and @symbols == o.symbols
101
+ end
102
+ alias :eql? :==
103
+
104
+ #
105
+ # Computes a hash code for this string.
106
+ #
107
+ def hash
108
+ @symbols.hash + 37*positive?.hash
109
+ end
110
+
111
+ #
112
+ # Prints this string in ADL.
113
+ #
114
+ def to_adl
115
+ str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
116
+ str << @symbols.join(' ')
117
+ str
118
+ end
119
+ alias :to_s :to_adl
120
+ alias :inspect :to_adl
121
+
122
+ end # class InputString
123
+ end # module Stamina
@@ -0,0 +1,226 @@
1
+ require_relative "reg_lang/parser"
2
+ module Stamina
3
+ class RegLang
4
+
5
+ # Automaton capturing this regular language
6
+ attr_reader :fa
7
+ protected :fa
8
+
9
+ #
10
+ # Creates a regular language instance based on an automaton.
11
+ #
12
+ def initialize(fa)
13
+ @fa = fa
14
+ end
15
+
16
+ ############################################################################
17
+ # CLASS METHODS
18
+
19
+ #
20
+ # Coerces `arg` to a regular language
21
+ #
22
+ # @raise ArgumentError if `arg` cannot be coerced to a regular language
23
+ #
24
+ def self.coerce(arg)
25
+ if arg.respond_to?(:to_reglang)
26
+ arg.to_reglang
27
+ elsif arg.respond_to?(:to_fa)
28
+ new(arg.to_fa)
29
+ elsif arg.is_a?(String)
30
+ parse(arg)
31
+ else
32
+ raise ArgumentError, "Invalid argument #{arg} for `RegLang`"
33
+ end
34
+ end
35
+
36
+ #
37
+ # Builds a sigma star language
38
+ #
39
+ def self.sigma_star(alph)
40
+ new(Automaton.new do |fa|
41
+ fa.alphabet = alph.to_a
42
+ fa.add_state(:initial => true, :accepting => true)
43
+ alph.each do |symbol|
44
+ fa.connect(0,0,symbol)
45
+ end
46
+ end)
47
+ end
48
+
49
+ #
50
+ # Creates a regular language by parsing an expression.
51
+ #
52
+ def self.parse(str)
53
+ RegLang.new(Parser.parse(str).to_fa)
54
+ end
55
+
56
+ ############################################################################
57
+ # OPERATORS
58
+
59
+ #
60
+ # Returns the prefix-closed version of this regular language.
61
+ #
62
+ def prefix_closed
63
+ automaton = fa.dup
64
+ automaton.each_state{|s| s.accepting!}
65
+ RegLang.new(automaton)
66
+ end
67
+
68
+ #
69
+ # Returns the complement of this regular language
70
+ #
71
+ def complement
72
+ RegLang.new(to_dfa.complement)
73
+ end
74
+
75
+ def **(x)
76
+ raise ArgumentError, "Invalid argument for ** (#{x})" unless x == -1
77
+ complement
78
+ end
79
+
80
+ #
81
+ # Returns a regular language defined as the union of `self` with `other`.
82
+ #
83
+ def +(other)
84
+ unioned = Automaton.new
85
+ fa.dup(unioned)
86
+ other.to_fa.dup(unioned)
87
+ RegLang.new(unioned)
88
+ end
89
+ alias :| :+
90
+ alias :union :+
91
+
92
+ #
93
+ # Returns a regular language defined as the intersection of `self` with
94
+ # `other`.
95
+ #
96
+ def *(other)
97
+ RegLang.new(fa.compose(other.fa))
98
+ end
99
+ alias :& :*
100
+ alias :intersection :*
101
+
102
+ #
103
+ # Returns a regular language defined capturing all strings from `self` but
104
+ # those in common with `other`.
105
+ #
106
+ def -(other)
107
+ self & other.complement
108
+ end
109
+ alias :difference :-
110
+
111
+ #
112
+ # Returns the regular language defined when abstracting from `symbols`
113
+ #
114
+ def hide(symbols)
115
+ RegLang.new(fa.hide(symbols))
116
+ end
117
+
118
+ #
119
+ # Returns the regular language defined when projecting on `symbols`
120
+ #
121
+ def project(symbols)
122
+ RegLang.new(fa.keep(symbols))
123
+ end
124
+
125
+ ############################################################################
126
+ # CANONICAL DFA
127
+
128
+ def short_prefixes
129
+ canonical_info.short_prefixes
130
+ end
131
+
132
+ def kernel
133
+ canonical_info.kernel
134
+ end
135
+
136
+ def characteristic_sample
137
+ canonical_info.characteristic_sample
138
+ end
139
+
140
+ private
141
+
142
+ def canonical_info
143
+ @canonical_info ||= CanonicalInfo.new(self)
144
+ end
145
+
146
+ ############################################################################
147
+ # QUERIES
148
+ public
149
+
150
+ #
151
+ # Checks if the language is empty
152
+ #
153
+ def empty?
154
+ self <=> EMPTY
155
+ end
156
+
157
+ #
158
+ # Checks if this regular language includes a given string
159
+ #
160
+ def include?(str)
161
+ fa.accepts?(str)
162
+ end
163
+
164
+ #
165
+ # Checks if `self` and `other` capture the same regular language.
166
+ #
167
+ def eql?(other)
168
+ self.to_cdfa <=> other.to_cdfa
169
+ end
170
+ alias :<=> :eql?
171
+
172
+ ############################################################################
173
+ # COERCIONS
174
+
175
+ #
176
+ # Returns self.
177
+ #
178
+ def to_reglang
179
+ self
180
+ end
181
+
182
+ #
183
+ # Returns a finite automaton capturing this regular language.
184
+ #
185
+ # Returned automaton may be non-deterministic.
186
+ #
187
+ def to_fa
188
+ fa.dup
189
+ end
190
+
191
+ #
192
+ # Returns a deterministic finite automaton capturing this regular
193
+ # language.
194
+ #
195
+ # Returned automaton is not guaranteed to be minimal or canonical.
196
+ #
197
+ def to_dfa
198
+ fa.determinize
199
+ end
200
+
201
+ #
202
+ # Returns the canonical deterministic finite automaton capturing this
203
+ # regular language.
204
+ #
205
+ def to_cdfa
206
+ fa.to_cdfa
207
+ end
208
+
209
+ #
210
+ # Returns a dot output
211
+ #
212
+ def to_dot
213
+ dfa = to_cdfa
214
+ dfa.depth
215
+ dfa.order_states{|s,t| s[:depth] <=> t[:depth]}
216
+ dfa.to_dot
217
+ end
218
+
219
+ def to_adl
220
+ to_cdfa.to_adl
221
+ end
222
+
223
+ EMPTY = RegLang.new(Automaton::DUM)
224
+ end # class RegLang
225
+ end # module Stamina
226
+ require_relative 'reg_lang/canonical_info'
@@ -0,0 +1,181 @@
1
+ module Stamina
2
+ class RegLang
3
+ class CanonicalInfo
4
+
5
+ SHORT_PREFIXES = begin
6
+ algo = Stamina::Utils::Decorate.new(:short_prefix)
7
+ algo.set_suppremum do |d0,d1|
8
+ if (d0.nil? || d1.nil?)
9
+ (d0 || d1)
10
+ else
11
+ d0.size <= d1.size ? d0 : d1
12
+ end
13
+ end
14
+ algo.set_propagate do |deco, edge|
15
+ deco.dup << edge.symbol
16
+ end
17
+ algo
18
+ end
19
+
20
+ attr_reader :cdfa
21
+
22
+ def initialize(lang)
23
+ @cdfa = lang.to_cdfa
24
+ end
25
+
26
+ # Returns the short prefix of a state or an edge.
27
+ def short_prefix(s_or_e)
28
+ prefixes!
29
+ s_or_e[:short_prefix] ||= begin
30
+ s_or_e.source[:short_prefix] + [s_or_e.symbol]
31
+ end
32
+ end
33
+
34
+ # Returns a positive suffix for `state`
35
+ def positive_suffix(state)
36
+ state[:positive_suffix] ||= find_suffix(state, true)
37
+ end
38
+
39
+ # Returns a negative suffix for `state`
40
+ def negative_suffix(state)
41
+ state[:negative_suffix] ||= find_suffix(state, false)
42
+ end
43
+
44
+ #
45
+ # Returns the short prefixes of the language as a sample
46
+ #
47
+ def short_prefixes
48
+ prefixes = Sample.new
49
+ cdfa.each_state do |s|
50
+ prefixes << InputString.new(short_prefix(s), s.accepting?)
51
+ end
52
+ prefixes
53
+ end
54
+
55
+ #
56
+ # Returns the language kernel as a sample
57
+ #
58
+ def kernel
59
+ kernel = Sample.new
60
+ kernel << InputString.new([], cdfa.initial_state.accepting?)
61
+ cdfa.each_edge do |e|
62
+ kernel << InputString.new(short_prefix(e), e.target.accepting?)
63
+ end
64
+ kernel
65
+ end
66
+
67
+ #
68
+ # Builds a characteristic sample
69
+ #
70
+ def characteristic_sample
71
+ sample = Sample.new
72
+
73
+ # at least one positive string should be found from
74
+ # the initial state
75
+ if pos = positive_suffix(cdfa.initial_state)
76
+ sample << InputString.new(pos, true)
77
+ else
78
+ sample << InputString.new([], false)
79
+ return sample
80
+ end
81
+
82
+ # condition 1: positive string for each element of the kernel
83
+ cdfa.each_edge do |edge|
84
+ pos = short_prefix(edge) + positive_suffix(edge.target)
85
+ sample << InputString.new(pos, true, false)
86
+ end
87
+
88
+ # condition 2: pair-wise distinguising suffixes
89
+ cdfa.each_state do |source|
90
+ cdfa.each_edge do |edge|
91
+ next if (target = edge.target) == source
92
+ if suffix = distinguish(source, target)
93
+ sign = cdfa.accepts?(suffix, source)
94
+ sample << InputString.new(short_prefix(source) + suffix, sign)
95
+ sample << InputString.new(short_prefix(edge) + suffix, !sign)
96
+ end
97
+ end
98
+ end
99
+
100
+ sample
101
+ end
102
+
103
+ private
104
+
105
+ # Ensures that short prefixes of states are recognized
106
+ def prefixes!
107
+ unless defined?(@prefixes)
108
+ SHORT_PREFIXES.execute(cdfa, nil, [])
109
+ @prefixes = true
110
+ end
111
+ end
112
+
113
+ def cross(xs, ys)
114
+ xs.each{|x| ys.each{|y| yield(x,y)}}
115
+ end
116
+
117
+ # Distinguishes two states, returning a suffix which is accepted for one
118
+ # and rejected by the other
119
+ def distinguish(x, y)
120
+ raise ArgumentError, "x and y should be different" if x == y
121
+ build_distinguish_matrix[[x,y].sort]
122
+ end
123
+
124
+ def build_distinguish_matrix
125
+ @diff_matrix ||= begin
126
+ mat = {}
127
+
128
+ # pairs to be explored
129
+ to_explore = []
130
+
131
+ # start by marking accepting vs. non-accepting states
132
+ acc, nonacc = cdfa.states.partition{|s| s.accepting?}
133
+ cross(acc, nonacc) do |*pair|
134
+ mat[pair.sort!] = []
135
+ to_explore << pair
136
+ end
137
+
138
+ # Visit each pair backwards
139
+ while pair = to_explore.pop
140
+ suffix = mat[pair]
141
+ cross(pair[0].in_edges, pair[1].in_edges) do |se, te|
142
+ next if se.symbol != te.symbol
143
+ source = [se.source, te.source].sort!
144
+ if mat[source].nil? ||
145
+ (mat[source].length > (1+suffix.length))
146
+ mat[source] = [se.symbol] + suffix
147
+ to_explore.push(source)
148
+ end
149
+ end
150
+ end
151
+
152
+ mat
153
+ end
154
+ end
155
+
156
+ # Recursively finds a positive/negative suffix for `state`
157
+ def find_suffix(state, positive, stack = [], seen = {})
158
+ if positive == state.accepting?
159
+ # (pos and accepting) or (neg and non-accepting) => lambda
160
+ stack
161
+ elsif found = state.out_edges.find{|e| positive == e.target.accepting?}
162
+ # at one step => augment stack with symbol
163
+ stack << found.symbol
164
+ elsif found = state.out_edges.find{|e| !seen.has_key?(e.target)}
165
+ # recurse on a neighbour if you find one
166
+ seen[state] = true
167
+ find_suffix(found.target, positive, stack << found.symbol, seen)
168
+ elsif !positive
169
+ # in case of negative suffix: pick one in alphabet
170
+ outs = state.out_symbols
171
+ found = state.automaton.alphabet.find{|s| !outs.include?(s)}
172
+ found ? (stack << found) : nil
173
+ else
174
+ # unable to find a suffix :-(
175
+ nil
176
+ end
177
+ end
178
+
179
+ end # class CanonicalInfo
180
+ end # class RegLang
181
+ end # module Stamina