stamina-induction 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
module Stamina
|
2
|
+
#
|
3
|
+
# An input string is a sequence of input symbols (symbols being letters appearing
|
4
|
+
# on automaton edges) labeled as positive, negative or unlabeled (provided for test
|
5
|
+
# samples and query strings).
|
6
|
+
#
|
7
|
+
# This class include the Enumerable module, that allows reasoning about
|
8
|
+
# ordered symbols.
|
9
|
+
#
|
10
|
+
# == Detailed API
|
11
|
+
class InputString
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates an input string from symbols and positive or negative labeling.
|
16
|
+
#
|
17
|
+
# Arguments:
|
18
|
+
# - symbols: When an array is provided, it is duplicated by default to be kept
|
19
|
+
# internally. Set dup to false to avoid duplicating it (in both cases, the
|
20
|
+
# internal array will be freezed). When a String is provided, symbols array
|
21
|
+
# is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
|
22
|
+
# ignored in the case.
|
23
|
+
# - The positive argument may be true (positive string), false (negative one)
|
24
|
+
# or nil (unlabeled).
|
25
|
+
#
|
26
|
+
# Raises:
|
27
|
+
# - ArgumentError if symbols is not an Array nor a String.
|
28
|
+
#
|
29
|
+
def initialize(symbols, positive, dup=true)
|
30
|
+
raise(ArgumentError,
|
31
|
+
"Input string expects an Array or a String: #{symbols} received",
|
32
|
+
caller) unless Array===symbols or String===symbols
|
33
|
+
@symbols = case symbols
|
34
|
+
when String
|
35
|
+
symbols.split(' ').freeze
|
36
|
+
when Array
|
37
|
+
(dup ? symbols.dup : symbols).freeze
|
38
|
+
end
|
39
|
+
@positive = positive
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Checks if this input string is empty (aka lambda, i.e. contains no symbol).
|
44
|
+
#
|
45
|
+
def empty?() @symbols.empty? end
|
46
|
+
alias :lambda? :empty?
|
47
|
+
|
48
|
+
#
|
49
|
+
# Returns the string size, i.e. number of its symbols.
|
50
|
+
#
|
51
|
+
def size() @symbols.size end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Returns the exact label of this string, being true (positive string)
|
55
|
+
# false (negative string) or nil (unlabeled)
|
56
|
+
#
|
57
|
+
def label() @positive end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Returns true if this input string is positively labeled, false otherwise.
|
61
|
+
#
|
62
|
+
def positive?() @positive==true end
|
63
|
+
|
64
|
+
#
|
65
|
+
# Returns true if this input string is negatively labeled, false otherwise.
|
66
|
+
#
|
67
|
+
def negative?() @positive==false end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Returns true if this input string unlabeled.
|
71
|
+
#
|
72
|
+
def unlabeled?() @positive.nil? end
|
73
|
+
|
74
|
+
# Copies and returns the same string, but switch the positive flag. This
|
75
|
+
# method returns self if it is unlabeled.
|
76
|
+
def negate
|
77
|
+
return self if unlabeled?
|
78
|
+
InputString.new(@symbols, !@positive, false)
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Returns an array with symbols of this string. Returned array may not be
|
83
|
+
# modified (it is freezed).
|
84
|
+
#
|
85
|
+
def symbols() @symbols end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Yields the block with each string symbol, in order. Has no effect without
|
89
|
+
# block.
|
90
|
+
#
|
91
|
+
def each() @symbols.each {|s| yield s if block_given? } end
|
92
|
+
|
93
|
+
#
|
94
|
+
# Checks equality with another InputString. Returns true if strings have same
|
95
|
+
# sequence of symbols and same labeling, false otherwise. Returns nil if _o_
|
96
|
+
# is not an InputString.
|
97
|
+
#
|
98
|
+
def ==(o)
|
99
|
+
return nil unless InputString===o
|
100
|
+
label == o.label and @symbols == o.symbols
|
101
|
+
end
|
102
|
+
alias :eql? :==
|
103
|
+
|
104
|
+
#
|
105
|
+
# Computes a hash code for this string.
|
106
|
+
#
|
107
|
+
def hash
|
108
|
+
@symbols.hash + 37*positive?.hash
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Prints this string in ADL.
|
113
|
+
#
|
114
|
+
def to_adl
|
115
|
+
str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
|
116
|
+
str << @symbols.join(' ')
|
117
|
+
str
|
118
|
+
end
|
119
|
+
alias :to_s :to_adl
|
120
|
+
alias :inspect :to_adl
|
121
|
+
|
122
|
+
end # class InputString
|
123
|
+
end # module Stamina
|
@@ -0,0 +1,226 @@
|
|
1
|
+
require_relative "reg_lang/parser"
|
2
|
+
module Stamina
|
3
|
+
class RegLang
|
4
|
+
|
5
|
+
# Automaton capturing this regular language
|
6
|
+
attr_reader :fa
|
7
|
+
protected :fa
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a regular language instance based on an automaton.
|
11
|
+
#
|
12
|
+
def initialize(fa)
|
13
|
+
@fa = fa
|
14
|
+
end
|
15
|
+
|
16
|
+
############################################################################
|
17
|
+
# CLASS METHODS
|
18
|
+
|
19
|
+
#
|
20
|
+
# Coerces `arg` to a regular language
|
21
|
+
#
|
22
|
+
# @raise ArgumentError if `arg` cannot be coerced to a regular language
|
23
|
+
#
|
24
|
+
def self.coerce(arg)
|
25
|
+
if arg.respond_to?(:to_reglang)
|
26
|
+
arg.to_reglang
|
27
|
+
elsif arg.respond_to?(:to_fa)
|
28
|
+
new(arg.to_fa)
|
29
|
+
elsif arg.is_a?(String)
|
30
|
+
parse(arg)
|
31
|
+
else
|
32
|
+
raise ArgumentError, "Invalid argument #{arg} for `RegLang`"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Builds a sigma star language
|
38
|
+
#
|
39
|
+
def self.sigma_star(alph)
|
40
|
+
new(Automaton.new do |fa|
|
41
|
+
fa.alphabet = alph.to_a
|
42
|
+
fa.add_state(:initial => true, :accepting => true)
|
43
|
+
alph.each do |symbol|
|
44
|
+
fa.connect(0,0,symbol)
|
45
|
+
end
|
46
|
+
end)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Creates a regular language by parsing an expression.
|
51
|
+
#
|
52
|
+
def self.parse(str)
|
53
|
+
RegLang.new(Parser.parse(str).to_fa)
|
54
|
+
end
|
55
|
+
|
56
|
+
############################################################################
|
57
|
+
# OPERATORS
|
58
|
+
|
59
|
+
#
|
60
|
+
# Returns the prefix-closed version of this regular language.
|
61
|
+
#
|
62
|
+
def prefix_closed
|
63
|
+
automaton = fa.dup
|
64
|
+
automaton.each_state{|s| s.accepting!}
|
65
|
+
RegLang.new(automaton)
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Returns the complement of this regular language
|
70
|
+
#
|
71
|
+
def complement
|
72
|
+
RegLang.new(to_dfa.complement)
|
73
|
+
end
|
74
|
+
|
75
|
+
def **(x)
|
76
|
+
raise ArgumentError, "Invalid argument for ** (#{x})" unless x == -1
|
77
|
+
complement
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Returns a regular language defined as the union of `self` with `other`.
|
82
|
+
#
|
83
|
+
def +(other)
|
84
|
+
unioned = Automaton.new
|
85
|
+
fa.dup(unioned)
|
86
|
+
other.to_fa.dup(unioned)
|
87
|
+
RegLang.new(unioned)
|
88
|
+
end
|
89
|
+
alias :| :+
|
90
|
+
alias :union :+
|
91
|
+
|
92
|
+
#
|
93
|
+
# Returns a regular language defined as the intersection of `self` with
|
94
|
+
# `other`.
|
95
|
+
#
|
96
|
+
def *(other)
|
97
|
+
RegLang.new(fa.compose(other.fa))
|
98
|
+
end
|
99
|
+
alias :& :*
|
100
|
+
alias :intersection :*
|
101
|
+
|
102
|
+
#
|
103
|
+
# Returns a regular language defined capturing all strings from `self` but
|
104
|
+
# those in common with `other`.
|
105
|
+
#
|
106
|
+
def -(other)
|
107
|
+
self & other.complement
|
108
|
+
end
|
109
|
+
alias :difference :-
|
110
|
+
|
111
|
+
#
|
112
|
+
# Returns the regular language defined when abstracting from `symbols`
|
113
|
+
#
|
114
|
+
def hide(symbols)
|
115
|
+
RegLang.new(fa.hide(symbols))
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
# Returns the regular language defined when projecting on `symbols`
|
120
|
+
#
|
121
|
+
def project(symbols)
|
122
|
+
RegLang.new(fa.keep(symbols))
|
123
|
+
end
|
124
|
+
|
125
|
+
############################################################################
|
126
|
+
# CANONICAL DFA
|
127
|
+
|
128
|
+
def short_prefixes
|
129
|
+
canonical_info.short_prefixes
|
130
|
+
end
|
131
|
+
|
132
|
+
def kernel
|
133
|
+
canonical_info.kernel
|
134
|
+
end
|
135
|
+
|
136
|
+
def characteristic_sample
|
137
|
+
canonical_info.characteristic_sample
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
def canonical_info
|
143
|
+
@canonical_info ||= CanonicalInfo.new(self)
|
144
|
+
end
|
145
|
+
|
146
|
+
############################################################################
|
147
|
+
# QUERIES
|
148
|
+
public
|
149
|
+
|
150
|
+
#
|
151
|
+
# Checks if the language is empty
|
152
|
+
#
|
153
|
+
def empty?
|
154
|
+
self <=> EMPTY
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# Checks if this regular language includes a given string
|
159
|
+
#
|
160
|
+
def include?(str)
|
161
|
+
fa.accepts?(str)
|
162
|
+
end
|
163
|
+
|
164
|
+
#
|
165
|
+
# Checks if `self` and `other` capture the same regular language.
|
166
|
+
#
|
167
|
+
def eql?(other)
|
168
|
+
self.to_cdfa <=> other.to_cdfa
|
169
|
+
end
|
170
|
+
alias :<=> :eql?
|
171
|
+
|
172
|
+
############################################################################
|
173
|
+
# COERCIONS
|
174
|
+
|
175
|
+
#
|
176
|
+
# Returns self.
|
177
|
+
#
|
178
|
+
def to_reglang
|
179
|
+
self
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# Returns a finite automaton capturing this regular language.
|
184
|
+
#
|
185
|
+
# Returned automaton may be non-deterministic.
|
186
|
+
#
|
187
|
+
def to_fa
|
188
|
+
fa.dup
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Returns a deterministic finite automaton capturing this regular
|
193
|
+
# language.
|
194
|
+
#
|
195
|
+
# Returned automaton is not guaranteed to be minimal or canonical.
|
196
|
+
#
|
197
|
+
def to_dfa
|
198
|
+
fa.determinize
|
199
|
+
end
|
200
|
+
|
201
|
+
#
|
202
|
+
# Returns the canonical deterministic finite automaton capturing this
|
203
|
+
# regular language.
|
204
|
+
#
|
205
|
+
def to_cdfa
|
206
|
+
fa.to_cdfa
|
207
|
+
end
|
208
|
+
|
209
|
+
#
|
210
|
+
# Returns a dot output
|
211
|
+
#
|
212
|
+
def to_dot
|
213
|
+
dfa = to_cdfa
|
214
|
+
dfa.depth
|
215
|
+
dfa.order_states{|s,t| s[:depth] <=> t[:depth]}
|
216
|
+
dfa.to_dot
|
217
|
+
end
|
218
|
+
|
219
|
+
def to_adl
|
220
|
+
to_cdfa.to_adl
|
221
|
+
end
|
222
|
+
|
223
|
+
EMPTY = RegLang.new(Automaton::DUM)
|
224
|
+
end # class RegLang
|
225
|
+
end # module Stamina
|
226
|
+
require_relative 'reg_lang/canonical_info'
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
class CanonicalInfo
|
4
|
+
|
5
|
+
SHORT_PREFIXES = begin
|
6
|
+
algo = Stamina::Utils::Decorate.new(:short_prefix)
|
7
|
+
algo.set_suppremum do |d0,d1|
|
8
|
+
if (d0.nil? || d1.nil?)
|
9
|
+
(d0 || d1)
|
10
|
+
else
|
11
|
+
d0.size <= d1.size ? d0 : d1
|
12
|
+
end
|
13
|
+
end
|
14
|
+
algo.set_propagate do |deco, edge|
|
15
|
+
deco.dup << edge.symbol
|
16
|
+
end
|
17
|
+
algo
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :cdfa
|
21
|
+
|
22
|
+
def initialize(lang)
|
23
|
+
@cdfa = lang.to_cdfa
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the short prefix of a state or an edge.
|
27
|
+
def short_prefix(s_or_e)
|
28
|
+
prefixes!
|
29
|
+
s_or_e[:short_prefix] ||= begin
|
30
|
+
s_or_e.source[:short_prefix] + [s_or_e.symbol]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns a positive suffix for `state`
|
35
|
+
def positive_suffix(state)
|
36
|
+
state[:positive_suffix] ||= find_suffix(state, true)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns a negative suffix for `state`
|
40
|
+
def negative_suffix(state)
|
41
|
+
state[:negative_suffix] ||= find_suffix(state, false)
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Returns the short prefixes of the language as a sample
|
46
|
+
#
|
47
|
+
def short_prefixes
|
48
|
+
prefixes = Sample.new
|
49
|
+
cdfa.each_state do |s|
|
50
|
+
prefixes << InputString.new(short_prefix(s), s.accepting?)
|
51
|
+
end
|
52
|
+
prefixes
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Returns the language kernel as a sample
|
57
|
+
#
|
58
|
+
def kernel
|
59
|
+
kernel = Sample.new
|
60
|
+
kernel << InputString.new([], cdfa.initial_state.accepting?)
|
61
|
+
cdfa.each_edge do |e|
|
62
|
+
kernel << InputString.new(short_prefix(e), e.target.accepting?)
|
63
|
+
end
|
64
|
+
kernel
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Builds a characteristic sample
|
69
|
+
#
|
70
|
+
def characteristic_sample
|
71
|
+
sample = Sample.new
|
72
|
+
|
73
|
+
# at least one positive string should be found from
|
74
|
+
# the initial state
|
75
|
+
if pos = positive_suffix(cdfa.initial_state)
|
76
|
+
sample << InputString.new(pos, true)
|
77
|
+
else
|
78
|
+
sample << InputString.new([], false)
|
79
|
+
return sample
|
80
|
+
end
|
81
|
+
|
82
|
+
# condition 1: positive string for each element of the kernel
|
83
|
+
cdfa.each_edge do |edge|
|
84
|
+
pos = short_prefix(edge) + positive_suffix(edge.target)
|
85
|
+
sample << InputString.new(pos, true, false)
|
86
|
+
end
|
87
|
+
|
88
|
+
# condition 2: pair-wise distinguising suffixes
|
89
|
+
cdfa.each_state do |source|
|
90
|
+
cdfa.each_edge do |edge|
|
91
|
+
next if (target = edge.target) == source
|
92
|
+
if suffix = distinguish(source, target)
|
93
|
+
sign = cdfa.accepts?(suffix, source)
|
94
|
+
sample << InputString.new(short_prefix(source) + suffix, sign)
|
95
|
+
sample << InputString.new(short_prefix(edge) + suffix, !sign)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
sample
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# Ensures that short prefixes of states are recognized
|
106
|
+
def prefixes!
|
107
|
+
unless defined?(@prefixes)
|
108
|
+
SHORT_PREFIXES.execute(cdfa, nil, [])
|
109
|
+
@prefixes = true
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def cross(xs, ys)
|
114
|
+
xs.each{|x| ys.each{|y| yield(x,y)}}
|
115
|
+
end
|
116
|
+
|
117
|
+
# Distinguishes two states, returning a suffix which is accepted for one
|
118
|
+
# and rejected by the other
|
119
|
+
def distinguish(x, y)
|
120
|
+
raise ArgumentError, "x and y should be different" if x == y
|
121
|
+
build_distinguish_matrix[[x,y].sort]
|
122
|
+
end
|
123
|
+
|
124
|
+
def build_distinguish_matrix
|
125
|
+
@diff_matrix ||= begin
|
126
|
+
mat = {}
|
127
|
+
|
128
|
+
# pairs to be explored
|
129
|
+
to_explore = []
|
130
|
+
|
131
|
+
# start by marking accepting vs. non-accepting states
|
132
|
+
acc, nonacc = cdfa.states.partition{|s| s.accepting?}
|
133
|
+
cross(acc, nonacc) do |*pair|
|
134
|
+
mat[pair.sort!] = []
|
135
|
+
to_explore << pair
|
136
|
+
end
|
137
|
+
|
138
|
+
# Visit each pair backwards
|
139
|
+
while pair = to_explore.pop
|
140
|
+
suffix = mat[pair]
|
141
|
+
cross(pair[0].in_edges, pair[1].in_edges) do |se, te|
|
142
|
+
next if se.symbol != te.symbol
|
143
|
+
source = [se.source, te.source].sort!
|
144
|
+
if mat[source].nil? ||
|
145
|
+
(mat[source].length > (1+suffix.length))
|
146
|
+
mat[source] = [se.symbol] + suffix
|
147
|
+
to_explore.push(source)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
mat
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Recursively finds a positive/negative suffix for `state`
|
157
|
+
def find_suffix(state, positive, stack = [], seen = {})
|
158
|
+
if positive == state.accepting?
|
159
|
+
# (pos and accepting) or (neg and non-accepting) => lambda
|
160
|
+
stack
|
161
|
+
elsif found = state.out_edges.find{|e| positive == e.target.accepting?}
|
162
|
+
# at one step => augment stack with symbol
|
163
|
+
stack << found.symbol
|
164
|
+
elsif found = state.out_edges.find{|e| !seen.has_key?(e.target)}
|
165
|
+
# recurse on a neighbour if you find one
|
166
|
+
seen[state] = true
|
167
|
+
find_suffix(found.target, positive, stack << found.symbol, seen)
|
168
|
+
elsif !positive
|
169
|
+
# in case of negative suffix: pick one in alphabet
|
170
|
+
outs = state.out_symbols
|
171
|
+
found = state.automaton.alphabet.find{|s| !outs.include?(s)}
|
172
|
+
found ? (stack << found) : nil
|
173
|
+
else
|
174
|
+
# unable to find a suffix :-(
|
175
|
+
nil
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end # class CanonicalInfo
|
180
|
+
end # class RegLang
|
181
|
+
end # module Stamina
|