stamina-induction 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
module Stamina
|
2
|
+
#
|
3
|
+
# An input string is a sequence of input symbols (symbols being letters appearing
|
4
|
+
# on automaton edges) labeled as positive, negative or unlabeled (provided for test
|
5
|
+
# samples and query strings).
|
6
|
+
#
|
7
|
+
# This class include the Enumerable module, that allows reasoning about
|
8
|
+
# ordered symbols.
|
9
|
+
#
|
10
|
+
# == Detailed API
|
11
|
+
class InputString
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates an input string from symbols and positive or negative labeling.
|
16
|
+
#
|
17
|
+
# Arguments:
|
18
|
+
# - symbols: When an array is provided, it is duplicated by default to be kept
|
19
|
+
# internally. Set dup to false to avoid duplicating it (in both cases, the
|
20
|
+
# internal array will be freezed). When a String is provided, symbols array
|
21
|
+
# is created using <tt>symbols.split(' ')</tt> and then freezed. _dup_ is
|
22
|
+
# ignored in the case.
|
23
|
+
# - The positive argument may be true (positive string), false (negative one)
|
24
|
+
# or nil (unlabeled).
|
25
|
+
#
|
26
|
+
# Raises:
|
27
|
+
# - ArgumentError if symbols is not an Array nor a String.
|
28
|
+
#
|
29
|
+
def initialize(symbols, positive, dup=true)
|
30
|
+
raise(ArgumentError,
|
31
|
+
"Input string expects an Array or a String: #{symbols} received",
|
32
|
+
caller) unless Array===symbols or String===symbols
|
33
|
+
@symbols = case symbols
|
34
|
+
when String
|
35
|
+
symbols.split(' ').freeze
|
36
|
+
when Array
|
37
|
+
(dup ? symbols.dup : symbols).freeze
|
38
|
+
end
|
39
|
+
@positive = positive
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Checks if this input string is empty (aka lambda, i.e. contains no symbol).
|
44
|
+
#
|
45
|
+
def empty?() @symbols.empty? end
|
46
|
+
alias :lambda? :empty?
|
47
|
+
|
48
|
+
#
|
49
|
+
# Returns the string size, i.e. number of its symbols.
|
50
|
+
#
|
51
|
+
def size() @symbols.size end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Returns the exact label of this string, being true (positive string)
|
55
|
+
# false (negative string) or nil (unlabeled)
|
56
|
+
#
|
57
|
+
def label() @positive end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Returns true if this input string is positively labeled, false otherwise.
|
61
|
+
#
|
62
|
+
def positive?() @positive==true end
|
63
|
+
|
64
|
+
#
|
65
|
+
# Returns true if this input string is negatively labeled, false otherwise.
|
66
|
+
#
|
67
|
+
def negative?() @positive==false end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Returns true if this input string unlabeled.
|
71
|
+
#
|
72
|
+
def unlabeled?() @positive.nil? end
|
73
|
+
|
74
|
+
# Copies and returns the same string, but switch the positive flag. This
|
75
|
+
# method returns self if it is unlabeled.
|
76
|
+
def negate
|
77
|
+
return self if unlabeled?
|
78
|
+
InputString.new(@symbols, !@positive, false)
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Returns an array with symbols of this string. Returned array may not be
|
83
|
+
# modified (it is freezed).
|
84
|
+
#
|
85
|
+
def symbols() @symbols end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Yields the block with each string symbol, in order. Has no effect without
|
89
|
+
# block.
|
90
|
+
#
|
91
|
+
def each() @symbols.each {|s| yield s if block_given? } end
|
92
|
+
|
93
|
+
#
|
94
|
+
# Checks equality with another InputString. Returns true if strings have same
|
95
|
+
# sequence of symbols and same labeling, false otherwise. Returns nil if _o_
|
96
|
+
# is not an InputString.
|
97
|
+
#
|
98
|
+
def ==(o)
|
99
|
+
return nil unless InputString===o
|
100
|
+
label == o.label and @symbols == o.symbols
|
101
|
+
end
|
102
|
+
alias :eql? :==
|
103
|
+
|
104
|
+
#
|
105
|
+
# Computes a hash code for this string.
|
106
|
+
#
|
107
|
+
def hash
|
108
|
+
@symbols.hash + 37*positive?.hash
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Prints this string in ADL.
|
113
|
+
#
|
114
|
+
def to_adl
|
115
|
+
str = (unlabeled? ? '?' : (positive? ? '+ ' : '- '))
|
116
|
+
str << @symbols.join(' ')
|
117
|
+
str
|
118
|
+
end
|
119
|
+
alias :to_s :to_adl
|
120
|
+
alias :inspect :to_adl
|
121
|
+
|
122
|
+
end # class InputString
|
123
|
+
end # module Stamina
|
@@ -0,0 +1,226 @@
|
|
1
|
+
require_relative "reg_lang/parser"
|
2
|
+
module Stamina
|
3
|
+
class RegLang
|
4
|
+
|
5
|
+
# Automaton capturing this regular language
|
6
|
+
attr_reader :fa
|
7
|
+
protected :fa
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a regular language instance based on an automaton.
|
11
|
+
#
|
12
|
+
def initialize(fa)
|
13
|
+
@fa = fa
|
14
|
+
end
|
15
|
+
|
16
|
+
############################################################################
|
17
|
+
# CLASS METHODS
|
18
|
+
|
19
|
+
#
|
20
|
+
# Coerces `arg` to a regular language
|
21
|
+
#
|
22
|
+
# @raise ArgumentError if `arg` cannot be coerced to a regular language
|
23
|
+
#
|
24
|
+
def self.coerce(arg)
|
25
|
+
if arg.respond_to?(:to_reglang)
|
26
|
+
arg.to_reglang
|
27
|
+
elsif arg.respond_to?(:to_fa)
|
28
|
+
new(arg.to_fa)
|
29
|
+
elsif arg.is_a?(String)
|
30
|
+
parse(arg)
|
31
|
+
else
|
32
|
+
raise ArgumentError, "Invalid argument #{arg} for `RegLang`"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Builds a sigma star language
|
38
|
+
#
|
39
|
+
def self.sigma_star(alph)
|
40
|
+
new(Automaton.new do |fa|
|
41
|
+
fa.alphabet = alph.to_a
|
42
|
+
fa.add_state(:initial => true, :accepting => true)
|
43
|
+
alph.each do |symbol|
|
44
|
+
fa.connect(0,0,symbol)
|
45
|
+
end
|
46
|
+
end)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Creates a regular language by parsing an expression.
|
51
|
+
#
|
52
|
+
def self.parse(str)
|
53
|
+
RegLang.new(Parser.parse(str).to_fa)
|
54
|
+
end
|
55
|
+
|
56
|
+
############################################################################
|
57
|
+
# OPERATORS
|
58
|
+
|
59
|
+
#
|
60
|
+
# Returns the prefix-closed version of this regular language.
|
61
|
+
#
|
62
|
+
def prefix_closed
|
63
|
+
automaton = fa.dup
|
64
|
+
automaton.each_state{|s| s.accepting!}
|
65
|
+
RegLang.new(automaton)
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Returns the complement of this regular language
|
70
|
+
#
|
71
|
+
def complement
|
72
|
+
RegLang.new(to_dfa.complement)
|
73
|
+
end
|
74
|
+
|
75
|
+
def **(x)
|
76
|
+
raise ArgumentError, "Invalid argument for ** (#{x})" unless x == -1
|
77
|
+
complement
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Returns a regular language defined as the union of `self` with `other`.
|
82
|
+
#
|
83
|
+
def +(other)
|
84
|
+
unioned = Automaton.new
|
85
|
+
fa.dup(unioned)
|
86
|
+
other.to_fa.dup(unioned)
|
87
|
+
RegLang.new(unioned)
|
88
|
+
end
|
89
|
+
alias :| :+
|
90
|
+
alias :union :+
|
91
|
+
|
92
|
+
#
|
93
|
+
# Returns a regular language defined as the intersection of `self` with
|
94
|
+
# `other`.
|
95
|
+
#
|
96
|
+
def *(other)
|
97
|
+
RegLang.new(fa.compose(other.fa))
|
98
|
+
end
|
99
|
+
alias :& :*
|
100
|
+
alias :intersection :*
|
101
|
+
|
102
|
+
#
|
103
|
+
# Returns a regular language defined capturing all strings from `self` but
|
104
|
+
# those in common with `other`.
|
105
|
+
#
|
106
|
+
def -(other)
|
107
|
+
self & other.complement
|
108
|
+
end
|
109
|
+
alias :difference :-
|
110
|
+
|
111
|
+
#
|
112
|
+
# Returns the regular language defined when abstracting from `symbols`
|
113
|
+
#
|
114
|
+
def hide(symbols)
|
115
|
+
RegLang.new(fa.hide(symbols))
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
# Returns the regular language defined when projecting on `symbols`
|
120
|
+
#
|
121
|
+
def project(symbols)
|
122
|
+
RegLang.new(fa.keep(symbols))
|
123
|
+
end
|
124
|
+
|
125
|
+
############################################################################
|
126
|
+
# CANONICAL DFA
|
127
|
+
|
128
|
+
def short_prefixes
|
129
|
+
canonical_info.short_prefixes
|
130
|
+
end
|
131
|
+
|
132
|
+
def kernel
|
133
|
+
canonical_info.kernel
|
134
|
+
end
|
135
|
+
|
136
|
+
def characteristic_sample
|
137
|
+
canonical_info.characteristic_sample
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
def canonical_info
|
143
|
+
@canonical_info ||= CanonicalInfo.new(self)
|
144
|
+
end
|
145
|
+
|
146
|
+
############################################################################
|
147
|
+
# QUERIES
|
148
|
+
public
|
149
|
+
|
150
|
+
#
|
151
|
+
# Checks if the language is empty
|
152
|
+
#
|
153
|
+
def empty?
|
154
|
+
self <=> EMPTY
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# Checks if this regular language includes a given string
|
159
|
+
#
|
160
|
+
def include?(str)
|
161
|
+
fa.accepts?(str)
|
162
|
+
end
|
163
|
+
|
164
|
+
#
|
165
|
+
# Checks if `self` and `other` capture the same regular language.
|
166
|
+
#
|
167
|
+
def eql?(other)
|
168
|
+
self.to_cdfa <=> other.to_cdfa
|
169
|
+
end
|
170
|
+
alias :<=> :eql?
|
171
|
+
|
172
|
+
############################################################################
|
173
|
+
# COERCIONS
|
174
|
+
|
175
|
+
#
|
176
|
+
# Returns self.
|
177
|
+
#
|
178
|
+
def to_reglang
|
179
|
+
self
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# Returns a finite automaton capturing this regular language.
|
184
|
+
#
|
185
|
+
# Returned automaton may be non-deterministic.
|
186
|
+
#
|
187
|
+
def to_fa
|
188
|
+
fa.dup
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Returns a deterministic finite automaton capturing this regular
|
193
|
+
# language.
|
194
|
+
#
|
195
|
+
# Returned automaton is not guaranteed to be minimal or canonical.
|
196
|
+
#
|
197
|
+
def to_dfa
|
198
|
+
fa.determinize
|
199
|
+
end
|
200
|
+
|
201
|
+
#
|
202
|
+
# Returns the canonical deterministic finite automaton capturing this
|
203
|
+
# regular language.
|
204
|
+
#
|
205
|
+
def to_cdfa
|
206
|
+
fa.to_cdfa
|
207
|
+
end
|
208
|
+
|
209
|
+
#
|
210
|
+
# Returns a dot output
|
211
|
+
#
|
212
|
+
def to_dot
|
213
|
+
dfa = to_cdfa
|
214
|
+
dfa.depth
|
215
|
+
dfa.order_states{|s,t| s[:depth] <=> t[:depth]}
|
216
|
+
dfa.to_dot
|
217
|
+
end
|
218
|
+
|
219
|
+
def to_adl
|
220
|
+
to_cdfa.to_adl
|
221
|
+
end
|
222
|
+
|
223
|
+
EMPTY = RegLang.new(Automaton::DUM)
|
224
|
+
end # class RegLang
|
225
|
+
end # module Stamina
|
226
|
+
require_relative 'reg_lang/canonical_info'
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module Stamina
|
2
|
+
class RegLang
|
3
|
+
class CanonicalInfo
|
4
|
+
|
5
|
+
SHORT_PREFIXES = begin
|
6
|
+
algo = Stamina::Utils::Decorate.new(:short_prefix)
|
7
|
+
algo.set_suppremum do |d0,d1|
|
8
|
+
if (d0.nil? || d1.nil?)
|
9
|
+
(d0 || d1)
|
10
|
+
else
|
11
|
+
d0.size <= d1.size ? d0 : d1
|
12
|
+
end
|
13
|
+
end
|
14
|
+
algo.set_propagate do |deco, edge|
|
15
|
+
deco.dup << edge.symbol
|
16
|
+
end
|
17
|
+
algo
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :cdfa
|
21
|
+
|
22
|
+
def initialize(lang)
|
23
|
+
@cdfa = lang.to_cdfa
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the short prefix of a state or an edge.
|
27
|
+
def short_prefix(s_or_e)
|
28
|
+
prefixes!
|
29
|
+
s_or_e[:short_prefix] ||= begin
|
30
|
+
s_or_e.source[:short_prefix] + [s_or_e.symbol]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns a positive suffix for `state`
|
35
|
+
def positive_suffix(state)
|
36
|
+
state[:positive_suffix] ||= find_suffix(state, true)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns a negative suffix for `state`
|
40
|
+
def negative_suffix(state)
|
41
|
+
state[:negative_suffix] ||= find_suffix(state, false)
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Returns the short prefixes of the language as a sample
|
46
|
+
#
|
47
|
+
def short_prefixes
|
48
|
+
prefixes = Sample.new
|
49
|
+
cdfa.each_state do |s|
|
50
|
+
prefixes << InputString.new(short_prefix(s), s.accepting?)
|
51
|
+
end
|
52
|
+
prefixes
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Returns the language kernel as a sample
|
57
|
+
#
|
58
|
+
def kernel
|
59
|
+
kernel = Sample.new
|
60
|
+
kernel << InputString.new([], cdfa.initial_state.accepting?)
|
61
|
+
cdfa.each_edge do |e|
|
62
|
+
kernel << InputString.new(short_prefix(e), e.target.accepting?)
|
63
|
+
end
|
64
|
+
kernel
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Builds a characteristic sample
|
69
|
+
#
|
70
|
+
def characteristic_sample
|
71
|
+
sample = Sample.new
|
72
|
+
|
73
|
+
# at least one positive string should be found from
|
74
|
+
# the initial state
|
75
|
+
if pos = positive_suffix(cdfa.initial_state)
|
76
|
+
sample << InputString.new(pos, true)
|
77
|
+
else
|
78
|
+
sample << InputString.new([], false)
|
79
|
+
return sample
|
80
|
+
end
|
81
|
+
|
82
|
+
# condition 1: positive string for each element of the kernel
|
83
|
+
cdfa.each_edge do |edge|
|
84
|
+
pos = short_prefix(edge) + positive_suffix(edge.target)
|
85
|
+
sample << InputString.new(pos, true, false)
|
86
|
+
end
|
87
|
+
|
88
|
+
# condition 2: pair-wise distinguising suffixes
|
89
|
+
cdfa.each_state do |source|
|
90
|
+
cdfa.each_edge do |edge|
|
91
|
+
next if (target = edge.target) == source
|
92
|
+
if suffix = distinguish(source, target)
|
93
|
+
sign = cdfa.accepts?(suffix, source)
|
94
|
+
sample << InputString.new(short_prefix(source) + suffix, sign)
|
95
|
+
sample << InputString.new(short_prefix(edge) + suffix, !sign)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
sample
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# Ensures that short prefixes of states are recognized
|
106
|
+
def prefixes!
|
107
|
+
unless defined?(@prefixes)
|
108
|
+
SHORT_PREFIXES.execute(cdfa, nil, [])
|
109
|
+
@prefixes = true
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def cross(xs, ys)
|
114
|
+
xs.each{|x| ys.each{|y| yield(x,y)}}
|
115
|
+
end
|
116
|
+
|
117
|
+
# Distinguishes two states, returning a suffix which is accepted for one
|
118
|
+
# and rejected by the other
|
119
|
+
def distinguish(x, y)
|
120
|
+
raise ArgumentError, "x and y should be different" if x == y
|
121
|
+
build_distinguish_matrix[[x,y].sort]
|
122
|
+
end
|
123
|
+
|
124
|
+
def build_distinguish_matrix
|
125
|
+
@diff_matrix ||= begin
|
126
|
+
mat = {}
|
127
|
+
|
128
|
+
# pairs to be explored
|
129
|
+
to_explore = []
|
130
|
+
|
131
|
+
# start by marking accepting vs. non-accepting states
|
132
|
+
acc, nonacc = cdfa.states.partition{|s| s.accepting?}
|
133
|
+
cross(acc, nonacc) do |*pair|
|
134
|
+
mat[pair.sort!] = []
|
135
|
+
to_explore << pair
|
136
|
+
end
|
137
|
+
|
138
|
+
# Visit each pair backwards
|
139
|
+
while pair = to_explore.pop
|
140
|
+
suffix = mat[pair]
|
141
|
+
cross(pair[0].in_edges, pair[1].in_edges) do |se, te|
|
142
|
+
next if se.symbol != te.symbol
|
143
|
+
source = [se.source, te.source].sort!
|
144
|
+
if mat[source].nil? ||
|
145
|
+
(mat[source].length > (1+suffix.length))
|
146
|
+
mat[source] = [se.symbol] + suffix
|
147
|
+
to_explore.push(source)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
mat
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Recursively finds a positive/negative suffix for `state`
|
157
|
+
def find_suffix(state, positive, stack = [], seen = {})
|
158
|
+
if positive == state.accepting?
|
159
|
+
# (pos and accepting) or (neg and non-accepting) => lambda
|
160
|
+
stack
|
161
|
+
elsif found = state.out_edges.find{|e| positive == e.target.accepting?}
|
162
|
+
# at one step => augment stack with symbol
|
163
|
+
stack << found.symbol
|
164
|
+
elsif found = state.out_edges.find{|e| !seen.has_key?(e.target)}
|
165
|
+
# recurse on a neighbour if you find one
|
166
|
+
seen[state] = true
|
167
|
+
find_suffix(found.target, positive, stack << found.symbol, seen)
|
168
|
+
elsif !positive
|
169
|
+
# in case of negative suffix: pick one in alphabet
|
170
|
+
outs = state.out_symbols
|
171
|
+
found = state.automaton.alphabet.find{|s| !outs.include?(s)}
|
172
|
+
found ? (stack << found) : nil
|
173
|
+
else
|
174
|
+
# unable to find a suffix :-(
|
175
|
+
nil
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end # class CanonicalInfo
|
180
|
+
end # class RegLang
|
181
|
+
end # module Stamina
|