stamina-induction 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,123 @@
1
+ module Stamina
2
+ #
3
+ # An input string is a sequence of input symbols (symbols being letters appearing
4
+ # on automaton edges) labeled as positive, negative or unlabeled (provided for test
5
+ # samples and query strings).
6
+ #
7
+ # This class include the Enumerable module, that allows reasoning about
8
+ # ordered symbols.
9
+ #
10
+ # == Detailed API
11
+ class InputString
12
+ include Enumerable
13
+
14
+ #
15
+ # Creates an input string from symbols and positive or negative labeling.
16
+ #
17
+ # Arguments:
18
+ # - symbols: When an array is provided, it is duplicated by default to be kept
19
+ # internally. Set dup to false to avoid duplicating it (in both cases, the
20
+ # internal array will be freezed). When a String is provided, symbols array
21
+ # is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
22
+ # ignored in the case.
23
+ # - The positive argument may be true (positive string), false (negative one)
24
+ # or nil (unlabeled).
25
+ #
26
+ # Raises:
27
+ # - ArgumentError if symbols is not an Array nor a String.
28
+ #
29
+ def initialize(symbols, positive, dup=true)
30
+ raise(ArgumentError,
31
+ "Input string expects an Array or a String: #{symbols} received",
32
+ caller) unless Array===symbols or String===symbols
33
+ @symbols = case symbols
34
+ when String
35
+ symbols.split(' ').freeze
36
+ when Array
37
+ (dup ? symbols.dup : symbols).freeze
38
+ end
39
+ @positive = positive
40
+ end
41
+
42
+ #
43
+ # Checks if this input string is empty (aka lambda, i.e. contains no symbol).
44
+ #
45
+ def empty?() @symbols.empty? end
46
+ alias :lambda? :empty?
47
+
48
+ #
49
+ # Returns the string size, i.e. number of its symbols.
50
+ #
51
+ def size() @symbols.size end
52
+
53
+ #
54
+ # Returns the exact label of this string, being true (positive string)
55
+ # false (negative string) or nil (unlabeled)
56
+ #
57
+ def label() @positive end
58
+
59
+ #
60
+ # Returns true if this input string is positively labeled, false otherwise.
61
+ #
62
+ def positive?() @positive==true end
63
+
64
+ #
65
+ # Returns true if this input string is negatively labeled, false otherwise.
66
+ #
67
+ def negative?() @positive==false end
68
+
69
+ #
70
+ # Returns true if this input string unlabeled.
71
+ #
72
+ def unlabeled?() @positive.nil? end
73
+
74
+ # Copies and returns the same string, but switch the positive flag. This
75
+ # method returns self if it is unlabeled.
76
+ def negate
77
+ return self if unlabeled?
78
+ InputString.new(@symbols, !@positive, false)
79
+ end
80
+
81
+ #
82
+ # Returns an array with symbols of this string. Returned array may not be
83
+ # modified (it is freezed).
84
+ #
85
+ def symbols() @symbols end
86
+
87
+ #
88
+ # Yields the block with each string symbol, in order. Has no effect without
89
+ # block.
90
+ #
91
+ def each() @symbols.each {|s| yield s if block_given? } end
92
+
93
+ #
94
+ # Checks equality with another InputString. Returns true if strings have same
95
+ # sequence of symbols and same labeling, false otherwise. Returns nil if _o_
96
+ # is not an InputString.
97
+ #
98
+ def ==(o)
99
+ return nil unless InputString===o
100
+ label == o.label and @symbols == o.symbols
101
+ end
102
+ alias :eql? :==
103
+
104
+ #
105
+ # Computes a hash code for this string.
106
+ #
107
+ def hash
108
+ @symbols.hash + 37*positive?.hash
109
+ end
110
+
111
+ #
112
+ # Prints this string in ADL.
113
+ #
114
+ def to_adl
115
+ str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
116
+ str << @symbols.join(' ')
117
+ str
118
+ end
119
+ alias :to_s :to_adl
120
+ alias :inspect :to_adl
121
+
122
+ end # class InputString
123
+ end # module Stamina
@@ -0,0 +1,226 @@
1
+ require_relative "reg_lang/parser"
2
+ module Stamina
3
+ class RegLang
4
+
5
+ # Automaton capturing this regular language
6
+ attr_reader :fa
7
+ protected :fa
8
+
9
+ #
10
+ # Creates a regular language instance based on an automaton.
11
+ #
12
+ def initialize(fa)
13
+ @fa = fa
14
+ end
15
+
16
+ ############################################################################
17
+ # CLASS METHODS
18
+
19
+ #
20
+ # Coerces `arg` to a regular language
21
+ #
22
+ # @raise ArgumentError if `arg` cannot be coerced to a regular language
23
+ #
24
+ def self.coerce(arg)
25
+ if arg.respond_to?(:to_reglang)
26
+ arg.to_reglang
27
+ elsif arg.respond_to?(:to_fa)
28
+ new(arg.to_fa)
29
+ elsif arg.is_a?(String)
30
+ parse(arg)
31
+ else
32
+ raise ArgumentError, "Invalid argument #{arg} for `RegLang`"
33
+ end
34
+ end
35
+
36
+ #
37
+ # Builds a sigma star language
38
+ #
39
+ def self.sigma_star(alph)
40
+ new(Automaton.new do |fa|
41
+ fa.alphabet = alph.to_a
42
+ fa.add_state(:initial => true, :accepting => true)
43
+ alph.each do |symbol|
44
+ fa.connect(0,0,symbol)
45
+ end
46
+ end)
47
+ end
48
+
49
+ #
50
+ # Creates a regular language by parsing an expression.
51
+ #
52
+ def self.parse(str)
53
+ RegLang.new(Parser.parse(str).to_fa)
54
+ end
55
+
56
+ ############################################################################
57
+ # OPERATORS
58
+
59
+ #
60
+ # Returns the prefix-closed version of this regular language.
61
+ #
62
+ def prefix_closed
63
+ automaton = fa.dup
64
+ automaton.each_state{|s| s.accepting!}
65
+ RegLang.new(automaton)
66
+ end
67
+
68
+ #
69
+ # Returns the complement of this regular language
70
+ #
71
+ def complement
72
+ RegLang.new(to_dfa.complement)
73
+ end
74
+
75
+ def **(x)
76
+ raise ArgumentError, "Invalid argument for ** (#{x})" unless x == -1
77
+ complement
78
+ end
79
+
80
+ #
81
+ # Returns a regular language defined as the union of `self` with `other`.
82
+ #
83
+ def +(other)
84
+ unioned = Automaton.new
85
+ fa.dup(unioned)
86
+ other.to_fa.dup(unioned)
87
+ RegLang.new(unioned)
88
+ end
89
+ alias :| :+
90
+ alias :union :+
91
+
92
+ #
93
+ # Returns a regular language defined as the intersection of `self` with
94
+ # `other`.
95
+ #
96
+ def *(other)
97
+ RegLang.new(fa.compose(other.fa))
98
+ end
99
+ alias :& :*
100
+ alias :intersection :*
101
+
102
+ #
103
+ # Returns a regular language defined capturing all strings from `self` but
104
+ # those in common with `other`.
105
+ #
106
+ def -(other)
107
+ self & other.complement
108
+ end
109
+ alias :difference :-
110
+
111
+ #
112
+ # Returns the regular language defined when abstracting from `symbols`
113
+ #
114
+ def hide(symbols)
115
+ RegLang.new(fa.hide(symbols))
116
+ end
117
+
118
+ #
119
+ # Returns the regular language defined when projecting on `symbols`
120
+ #
121
+ def project(symbols)
122
+ RegLang.new(fa.keep(symbols))
123
+ end
124
+
125
+ ############################################################################
126
+ # CANONICAL DFA
127
+
128
+ def short_prefixes
129
+ canonical_info.short_prefixes
130
+ end
131
+
132
+ def kernel
133
+ canonical_info.kernel
134
+ end
135
+
136
+ def characteristic_sample
137
+ canonical_info.characteristic_sample
138
+ end
139
+
140
+ private
141
+
142
+ def canonical_info
143
+ @canonical_info ||= CanonicalInfo.new(self)
144
+ end
145
+
146
+ ############################################################################
147
+ # QUERIES
148
+ public
149
+
150
+ #
151
+ # Checks if the language is empty
152
+ #
153
+ def empty?
154
+ self <=> EMPTY
155
+ end
156
+
157
+ #
158
+ # Checks if this regular language includes a given string
159
+ #
160
+ def include?(str)
161
+ fa.accepts?(str)
162
+ end
163
+
164
+ #
165
+ # Checks if `self` and `other` capture the same regular language.
166
+ #
167
+ def eql?(other)
168
+ self.to_cdfa <=> other.to_cdfa
169
+ end
170
+ alias :<=> :eql?
171
+
172
+ ############################################################################
173
+ # COERCIONS
174
+
175
+ #
176
+ # Returns self.
177
+ #
178
+ def to_reglang
179
+ self
180
+ end
181
+
182
+ #
183
+ # Returns a finite automaton capturing this regular language.
184
+ #
185
+ # Returned automaton may be non-deterministic.
186
+ #
187
+ def to_fa
188
+ fa.dup
189
+ end
190
+
191
+ #
192
+ # Returns a deterministic finite automaton capturing this regular
193
+ # language.
194
+ #
195
+ # Returned automaton is not guaranteed to be minimal or canonical.
196
+ #
197
+ def to_dfa
198
+ fa.determinize
199
+ end
200
+
201
+ #
202
+ # Returns the canonical deterministic finite automaton capturing this
203
+ # regular language.
204
+ #
205
+ def to_cdfa
206
+ fa.to_cdfa
207
+ end
208
+
209
+ #
210
+ # Returns a dot output
211
+ #
212
+ def to_dot
213
+ dfa = to_cdfa
214
+ dfa.depth
215
+ dfa.order_states{|s,t| s[:depth] <=> t[:depth]}
216
+ dfa.to_dot
217
+ end
218
+
219
+ def to_adl
220
+ to_cdfa.to_adl
221
+ end
222
+
223
+ EMPTY = RegLang.new(Automaton::DUM)
224
+ end # class RegLang
225
+ end # module Stamina
226
+ require_relative 'reg_lang/canonical_info'
@@ -0,0 +1,181 @@
1
+ module Stamina
2
+ class RegLang
3
+ class CanonicalInfo
4
+
5
+ SHORT_PREFIXES = begin
6
+ algo = Stamina::Utils::Decorate.new(:short_prefix)
7
+ algo.set_suppremum do |d0,d1|
8
+ if (d0.nil? || d1.nil?)
9
+ (d0 || d1)
10
+ else
11
+ d0.size <= d1.size ? d0 : d1
12
+ end
13
+ end
14
+ algo.set_propagate do |deco, edge|
15
+ deco.dup << edge.symbol
16
+ end
17
+ algo
18
+ end
19
+
20
+ attr_reader :cdfa
21
+
22
+ def initialize(lang)
23
+ @cdfa = lang.to_cdfa
24
+ end
25
+
26
+ # Returns the short prefix of a state or an edge.
27
+ def short_prefix(s_or_e)
28
+ prefixes!
29
+ s_or_e[:short_prefix] ||= begin
30
+ s_or_e.source[:short_prefix] + [s_or_e.symbol]
31
+ end
32
+ end
33
+
34
+ # Returns a positive suffix for `state`
35
+ def positive_suffix(state)
36
+ state[:positive_suffix] ||= find_suffix(state, true)
37
+ end
38
+
39
+ # Returns a negative suffix for `state`
40
+ def negative_suffix(state)
41
+ state[:negative_suffix] ||= find_suffix(state, false)
42
+ end
43
+
44
+ #
45
+ # Returns the short prefixes of the language as a sample
46
+ #
47
+ def short_prefixes
48
+ prefixes = Sample.new
49
+ cdfa.each_state do |s|
50
+ prefixes << InputString.new(short_prefix(s), s.accepting?)
51
+ end
52
+ prefixes
53
+ end
54
+
55
+ #
56
+ # Returns the language kernel as a sample
57
+ #
58
+ def kernel
59
+ kernel = Sample.new
60
+ kernel << InputString.new([], cdfa.initial_state.accepting?)
61
+ cdfa.each_edge do |e|
62
+ kernel << InputString.new(short_prefix(e), e.target.accepting?)
63
+ end
64
+ kernel
65
+ end
66
+
67
+ #
68
+ # Builds a characteristic sample
69
+ #
70
+ def characteristic_sample
71
+ sample = Sample.new
72
+
73
+ # at least one positive string should be found from
74
+ # the initial state
75
+ if pos = positive_suffix(cdfa.initial_state)
76
+ sample << InputString.new(pos, true)
77
+ else
78
+ sample << InputString.new([], false)
79
+ return sample
80
+ end
81
+
82
+ # condition 1: positive string for each element of the kernel
83
+ cdfa.each_edge do |edge|
84
+ pos = short_prefix(edge) + positive_suffix(edge.target)
85
+ sample << InputString.new(pos, true, false)
86
+ end
87
+
88
+ # condition 2: pair-wise distinguising suffixes
89
+ cdfa.each_state do |source|
90
+ cdfa.each_edge do |edge|
91
+ next if (target = edge.target) == source
92
+ if suffix = distinguish(source, target)
93
+ sign = cdfa.accepts?(suffix, source)
94
+ sample << InputString.new(short_prefix(source) + suffix, sign)
95
+ sample << InputString.new(short_prefix(edge) + suffix, !sign)
96
+ end
97
+ end
98
+ end
99
+
100
+ sample
101
+ end
102
+
103
+ private
104
+
105
+ # Ensures that short prefixes of states are recognized
106
+ def prefixes!
107
+ unless defined?(@prefixes)
108
+ SHORT_PREFIXES.execute(cdfa, nil, [])
109
+ @prefixes = true
110
+ end
111
+ end
112
+
113
+ def cross(xs, ys)
114
+ xs.each{|x| ys.each{|y| yield(x,y)}}
115
+ end
116
+
117
+ # Distinguishes two states, returning a suffix which is accepted for one
118
+ # and rejected by the other
119
+ def distinguish(x, y)
120
+ raise ArgumentError, "x and y should be different" if x == y
121
+ build_distinguish_matrix[[x,y].sort]
122
+ end
123
+
124
+ def build_distinguish_matrix
125
+ @diff_matrix ||= begin
126
+ mat = {}
127
+
128
+ # pairs to be explored
129
+ to_explore = []
130
+
131
+ # start by marking accepting vs. non-accepting states
132
+ acc, nonacc = cdfa.states.partition{|s| s.accepting?}
133
+ cross(acc, nonacc) do |*pair|
134
+ mat[pair.sort!] = []
135
+ to_explore << pair
136
+ end
137
+
138
+ # Visit each pair backwards
139
+ while pair = to_explore.pop
140
+ suffix = mat[pair]
141
+ cross(pair[0].in_edges, pair[1].in_edges) do |se, te|
142
+ next if se.symbol != te.symbol
143
+ source = [se.source, te.source].sort!
144
+ if mat[source].nil? ||
145
+ (mat[source].length > (1+suffix.length))
146
+ mat[source] = [se.symbol] + suffix
147
+ to_explore.push(source)
148
+ end
149
+ end
150
+ end
151
+
152
+ mat
153
+ end
154
+ end
155
+
156
+ # Recursively finds a positive/negative suffix for `state`
157
+ def find_suffix(state, positive, stack = [], seen = {})
158
+ if positive == state.accepting?
159
+ # (pos and accepting) or (neg and non-accepting) => lambda
160
+ stack
161
+ elsif found = state.out_edges.find{|e| positive == e.target.accepting?}
162
+ # at one step => augment stack with symbol
163
+ stack << found.symbol
164
+ elsif found = state.out_edges.find{|e| !seen.has_key?(e.target)}
165
+ # recurse on a neighbour if you find one
166
+ seen[state] = true
167
+ find_suffix(found.target, positive, stack << found.symbol, seen)
168
+ elsif !positive
169
+ # in case of negative suffix: pick one in alphabet
170
+ outs = state.out_symbols
171
+ found = state.automaton.alphabet.find{|s| !outs.include?(s)}
172
+ found ? (stack << found) : nil
173
+ else
174
+ # unable to find a suffix :-(
175
+ nil
176
+ end
177
+ end
178
+
179
+ end # class CanonicalInfo
180
+ end # class RegLang
181
+ end # module Stamina