stamina-induction 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
data/CHANGELOG.md
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# 0.5.0 / FIX ME
|
2
|
+
|
3
|
+
* Breaking features.
|
4
|
+
|
5
|
+
* Support for ruby 1.8.7 has been definitely removed.
|
6
|
+
|
7
|
+
* Major enhancements
|
8
|
+
|
9
|
+
* The project has been split in different sub gems (core, induction and gui). This
|
10
|
+
implies a lot of internal changes, but the public API has not been affected. A main
|
11
|
+
'stamina' gem automatically includes all sub gems so previous behavior is guaranteed.
|
12
|
+
|
13
|
+
* Minor enhancements
|
14
|
+
* Fixed a bug with bundler usage in main stamina binary
|
15
|
+
* adl2dot command now support samples as input in addition to automata. In that case,
|
16
|
+
the dot result models a PTA (prefix tree acceptor)
|
17
|
+
* Added --png to 'stamina adl2dot'
|
18
|
+
|
19
|
+
# 0.4.0 / 2011-05-01
|
20
|
+
|
21
|
+
* Major Enhancements
|
22
|
+
|
23
|
+
* Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
|
24
|
+
* Added Sample#to_pta taken from Induction::Commons
|
25
|
+
* Added Automaton completion (all strings parsable) under Automaton#complete[!?]
|
26
|
+
* Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
|
27
|
+
* Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
|
28
|
+
* Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
|
29
|
+
* Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
|
30
|
+
subcommands of stamina itself (see stamina --help for a list of available commands).
|
31
|
+
Induction command (rpni and redblue) are now handled by a 'stamina infer' with
|
32
|
+
options.
|
33
|
+
* Error states and now correctly handled in ADL::parse and ADL::flush
|
34
|
+
* RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
|
35
|
+
|
36
|
+
* Minnor Enhancements
|
37
|
+
* Added a few optimizations here and there
|
38
|
+
|
39
|
+
* Bug fixes
|
40
|
+
|
41
|
+
* Fixed a bug in Automaton#depth when some states are unreachable
|
42
|
+
|
43
|
+
# 0.3.1 / 2011-03-24
|
44
|
+
|
45
|
+
* Major Enhancements
|
46
|
+
|
47
|
+
* Implemented the decoration algorithm of Damas10, allowing to decorate states
|
48
|
+
with information propagated from states to states until a fixpoint is reached.
|
49
|
+
* Added Automaton::Metrics module, automatically included, with useful metrics
|
50
|
+
like automaton depth, accepting ratio and so on.
|
51
|
+
* Added Scoring module and Classifier#classification_scoring(sample) method
|
52
|
+
with common measures from information retrieval.
|
53
|
+
|
54
|
+
* On the devel side
|
55
|
+
|
56
|
+
* Moved specific automaton tests under test/stamina/automaton/...
|
57
|
+
|
58
|
+
# 0.3.0 / 2011-03-24
|
59
|
+
|
60
|
+
* On the devel side
|
61
|
+
|
62
|
+
* The project structure is now handled by Noe
|
63
|
+
* Ensures that tests are correctly executed under ruby 1.9.2
|
64
|
+
|
65
|
+
|
66
|
+
# 0.2.2 / 2010-10-22
|
67
|
+
|
68
|
+
* Major Enhancements
|
69
|
+
|
70
|
+
* Sample#<< does not detect inconsistencies anymore, to ensure a linear method instead of a quadratic one.
|
71
|
+
|
72
|
+
* On the devel side
|
73
|
+
|
74
|
+
* Fixes a bug in Rakefile that lead to test failures under ruby 1.8.7
|
75
|
+
|
76
|
+
# 0.2.1 / 2010-05-01
|
77
|
+
|
78
|
+
* Main public version for the official competition, extracted from private SVN.
|
data/LICENCE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008-2009 University of Louvain
|
4
|
+
(Universite catholique de Louvain-la-Neuve, Belgium)
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
14
|
+
all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
THE SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'stamina/induction'
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Abbadingo
|
3
|
+
#
|
4
|
+
# Generates a random DFA using the Abbadingo protocol.
|
5
|
+
#
|
6
|
+
class RandomDFA
|
7
|
+
|
8
|
+
DEFAULT_OPTIONS = {
|
9
|
+
:minimize => :hopcroft
|
10
|
+
}
|
11
|
+
|
12
|
+
def execute(state_count = 64,
|
13
|
+
accepting_ratio = 0.5,
|
14
|
+
options = {})
|
15
|
+
options = DEFAULT_OPTIONS.merge(options)
|
16
|
+
|
17
|
+
# Built dfa
|
18
|
+
dfa = Automaton.new
|
19
|
+
|
20
|
+
# Generate 5/4*state_count states
|
21
|
+
(state_count.to_f * 5.0 / 4.0).to_i.times do
|
22
|
+
dfa.add_state(:initial => false,
|
23
|
+
:accepting => (Kernel.rand <= accepting_ratio),
|
24
|
+
:error => false)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Generate all edges
|
28
|
+
dfa.each_state do |source|
|
29
|
+
["0", "1"].each do |symbol|
|
30
|
+
target = dfa.ith_state(Kernel.rand(dfa.state_count))
|
31
|
+
dfa.connect(source, target, symbol)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Choose an initial state
|
36
|
+
dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
|
37
|
+
|
38
|
+
# Minimize the automaton and return it
|
39
|
+
case options[:minimize]
|
40
|
+
when :hopcroft
|
41
|
+
Stamina::Automaton::Minimize::Hopcroft.execute(dfa)
|
42
|
+
when :pitchies
|
43
|
+
Stamina::Automaton::Minimize::Pitchies.execute(dfa)
|
44
|
+
else
|
45
|
+
dfa
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.execute(*args)
|
50
|
+
new.execute(*args)
|
51
|
+
end
|
52
|
+
|
53
|
+
end # class RandomDFA
|
54
|
+
end # module Abbadingo
|
55
|
+
end # module Stamina
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Abbadingo
|
3
|
+
#
|
4
|
+
# Generates a random Sample using the Abbadingo protocol.
|
5
|
+
#
|
6
|
+
class RandomSample
|
7
|
+
|
8
|
+
#
|
9
|
+
# Implements an enumerator for binary strings whose length lies between 0
|
10
|
+
# and max_length (passed at construction).
|
11
|
+
#
|
12
|
+
# The enumerator guarantees that strings are sampled with an uniform
|
13
|
+
# distribution among all available. As the number of strings of a given
|
14
|
+
# length is an exponential function, this means that you've got 50% change
|
15
|
+
# of having a string of length max_length, 25% of max_length - 1, 12.5% of
|
16
|
+
# max_length - 2 and so on.
|
17
|
+
#
|
18
|
+
# How to use it?
|
19
|
+
#
|
20
|
+
# # create for strings between 0 and 10 symbols, inclusive
|
21
|
+
# enum = Stamina::Abbadingo::StringEnumerator.new(10)
|
22
|
+
#
|
23
|
+
# # this is how to generate strings while a predicate is true
|
24
|
+
# enum.each do |s|
|
25
|
+
# # s is an array of binary integer symbols (0 or 1)
|
26
|
+
# # true for continuing, false otherwise
|
27
|
+
# (true || false)
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# # this is how to generate a fixed number of strings
|
31
|
+
# (1..1000).collect{ enum.one }
|
32
|
+
#
|
33
|
+
# How does it work? Well, the distribution of strings is as follows:
|
34
|
+
#
|
35
|
+
# length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
|
36
|
+
# (2**n) 2**(n+1)-1
|
37
|
+
# 0 1 1 0.0000000000 0.000000 0
|
38
|
+
# 1 2 3 1.0000000000 1.584963 1
|
39
|
+
# 2 4 7 2.0000000000 2.807355 2
|
40
|
+
# 3 8 15 3.0000000000 3.906891 3
|
41
|
+
# 4 16 31 4.0000000000 4.954196 4
|
42
|
+
# 5 32 63 5.0000000000 5.977280 5
|
43
|
+
#
|
44
|
+
# where _cumul_ is the total number of string upto _length_ symbols.
|
45
|
+
#
|
46
|
+
# Therefore, the idea is to see each string has an identifier, say _x_,
|
47
|
+
# between 1 and 2**(max_length+1)-1 (see max).
|
48
|
+
# * The length of the _x_th string is log2(x).floor (see length_for)
|
49
|
+
# * The string itself is the binary decomposition of x, up to length_for(x)
|
50
|
+
# symbols (see string_for)
|
51
|
+
#
|
52
|
+
# As those identifiers naturally respect the exponential distribution, sampling
|
53
|
+
# the strings is the same as taking string_for(x) for random x upto _max_.
|
54
|
+
#
|
55
|
+
class StringEnumerator
|
56
|
+
include Enumerable
|
57
|
+
|
58
|
+
# Maximal length of a string
|
59
|
+
attr_reader :max_length
|
60
|
+
|
61
|
+
def initialize(max_length = 16)
|
62
|
+
@max_length = max_length
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Returns the length of the string whose identifier is _x_ (> 0)
|
67
|
+
#
|
68
|
+
def length_for(x)
|
69
|
+
Math.log2(x).floor
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Returns the binary string whose identifier is _x_ (> 0)
|
74
|
+
#
|
75
|
+
def string_for(x)
|
76
|
+
length = length_for(x)
|
77
|
+
(0..length-1).collect{|i| ((x >> i) % 2).to_s}
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Returns the maximum identifier, which is also the number of strings
|
82
|
+
# up to max_length symbols
|
83
|
+
#
|
84
|
+
def max
|
85
|
+
@max ||= 2 ** (max_length+1) - 1
|
86
|
+
end
|
87
|
+
|
88
|
+
#
|
89
|
+
# Generates a string at random
|
90
|
+
#
|
91
|
+
def one
|
92
|
+
string_for(1+Kernel.rand(max))
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Yields the block with a random string, until the block return false
|
97
|
+
# or nil.
|
98
|
+
#
|
99
|
+
def each
|
100
|
+
begin
|
101
|
+
cont = yield(one)
|
102
|
+
end while cont
|
103
|
+
end
|
104
|
+
|
105
|
+
end # class StringEnumerator
|
106
|
+
|
107
|
+
#
|
108
|
+
# Generates a Sample instance with _nb_ strings randomly sampled with a
|
109
|
+
# uniform distribution over all strings up
|
110
|
+
#
|
111
|
+
def self.execute(classifier, max_length = classifier.depth + 3)
|
112
|
+
enum = StringEnumerator.new(max_length)
|
113
|
+
|
114
|
+
# We generate 1800 strings for the test set plus n^2/2 strings for
|
115
|
+
# the training set. If there are no enough strings available, we generate
|
116
|
+
# the maximum we can
|
117
|
+
seen = {}
|
118
|
+
nb = Math.min(1800 + (classifier.state_count**2), enum.max)
|
119
|
+
|
120
|
+
# Let's go now
|
121
|
+
enum.each do |s|
|
122
|
+
seen[s] = true
|
123
|
+
seen.size < nb
|
124
|
+
end
|
125
|
+
|
126
|
+
# Make them
|
127
|
+
strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
|
128
|
+
pos, neg = strings.partition{|s| s.positive?}
|
129
|
+
|
130
|
+
# Split them, 1800 in test and the rest in training set
|
131
|
+
if (pos.size > 900) && (neg.size > 900)
|
132
|
+
pos_test, pos_training = pos[0...900], pos[900..-1]
|
133
|
+
neg_test, neg_training = neg[0...900], neg[900..-1]
|
134
|
+
else
|
135
|
+
pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
|
136
|
+
neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
|
137
|
+
end
|
138
|
+
flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
|
139
|
+
training = (pos_training + neg_training).sort &flusher
|
140
|
+
test = (pos_test + neg_test).sort &flusher
|
141
|
+
[Sample.new(training), Sample.new(test)]
|
142
|
+
end
|
143
|
+
|
144
|
+
end # class RandomSample
|
145
|
+
end # module Abbadingo
|
146
|
+
end # module Stamina
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stamina
|
2
|
+
#
|
3
|
+
# Provides a reusable module for binary classifiers. Classes including this
|
4
|
+
# module are required to provide a label_of(string) method, returning '1' for
|
5
|
+
# strings considered positive, and '0' fr strings considered negative.
|
6
|
+
#
|
7
|
+
# Note that an Automaton being a classifier it already includes this module.
|
8
|
+
#
|
9
|
+
module Classifier
|
10
|
+
|
11
|
+
#
|
12
|
+
# Computes a signature for a given sample (that is, an ordered set of strings).
|
13
|
+
# The signature is a string containing 1 (considered positive, or accepted)
|
14
|
+
# and 0 (considered negative, or rejected), one for each string.
|
15
|
+
#
|
16
|
+
def signature(sample)
|
17
|
+
signature = ''
|
18
|
+
sample.each do |str|
|
19
|
+
signature << label_of(str)
|
20
|
+
end
|
21
|
+
signature
|
22
|
+
end
|
23
|
+
alias :classification_signature :signature
|
24
|
+
|
25
|
+
#
|
26
|
+
# Classifies a sample then compute the classification scoring that is obtained
|
27
|
+
# by comparing the signature obtained by classification and the one of the sample
|
28
|
+
# itself. Returns an object responding to methods defined in Scoring module.
|
29
|
+
#
|
30
|
+
# This method is actually a convenient shortcut for:
|
31
|
+
#
|
32
|
+
# Stamina::Scoring.scoring(signature(sample), sample.signature)
|
33
|
+
#
|
34
|
+
def scoring(sample)
|
35
|
+
Stamina::Scoring.scoring(signature(sample), sample.signature)
|
36
|
+
end
|
37
|
+
alias :classification_scoring :scoring
|
38
|
+
|
39
|
+
#
|
40
|
+
# Checks if a labeled sample is correctly classified by the classifier.
|
41
|
+
#
|
42
|
+
def correctly_classify?(sample)
|
43
|
+
sample.each do |str|
|
44
|
+
label = label_of(str)
|
45
|
+
expected = (str.positive? ? '1' : '0')
|
46
|
+
return false unless expected==label
|
47
|
+
end
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
end # module Classifier
|
52
|
+
class Automaton
|
53
|
+
include Stamina::Classifier
|
54
|
+
end
|
55
|
+
end # module Stamina
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Stamina
|
2
|
+
class Command
|
3
|
+
#
|
4
|
+
# Generates a DFA following Abbadingo's protocol
|
5
|
+
#
|
6
|
+
# SYNOPSIS
|
7
|
+
# #{program_name} #{command_name}
|
8
|
+
#
|
9
|
+
# OPTIONS
|
10
|
+
# #{summarized_options}
|
11
|
+
#
|
12
|
+
class AbbadingoDfa < Quickl::Command(__FILE__, __LINE__)
|
13
|
+
include Robustness
|
14
|
+
|
15
|
+
# Size of the target automaton
|
16
|
+
attr_accessor :size
|
17
|
+
|
18
|
+
# Tolerance on the size
|
19
|
+
attr_accessor :size_tolerance
|
20
|
+
|
21
|
+
# Tolerance on the automaton depth
|
22
|
+
attr_accessor :depth_tolerance
|
23
|
+
|
24
|
+
# Where to flush the dfa
|
25
|
+
attr_accessor :output_file
|
26
|
+
|
27
|
+
# Install options
|
28
|
+
options do |opt|
|
29
|
+
|
30
|
+
@size = 64
|
31
|
+
opt.on("--size=X", Integer, "Sets the size of the automaton to generate") do |x|
|
32
|
+
@size = x
|
33
|
+
end
|
34
|
+
|
35
|
+
@size_tolerance = nil
|
36
|
+
opt.on("--size-tolerance[=X]", Integer, "Sets the tolerance on automaton size (in number of states)") do |x|
|
37
|
+
@size_tolerance = x
|
38
|
+
end
|
39
|
+
|
40
|
+
@depth_tolerance = 0
|
41
|
+
opt.on("--depth-tolerance[=X]", Integer, "Sets the tolerance on expected automaton depth (in length, 0 by default)") do |x|
|
42
|
+
@depth_tolerance = x
|
43
|
+
end
|
44
|
+
|
45
|
+
@output_file = nil
|
46
|
+
opt.on("-o", "--output=OUTPUT",
|
47
|
+
"Flush DFA in output file") do |value|
|
48
|
+
@output_file = assert_writable_file(value)
|
49
|
+
end
|
50
|
+
|
51
|
+
end # options
|
52
|
+
|
53
|
+
def accept?(dfa)
|
54
|
+
(size_tolerance.nil? || (size - dfa.state_count).abs <= size_tolerance) &&
|
55
|
+
(depth_tolerance.nil? || ((2*Math.log2(size)-2) - dfa.depth).abs <= depth_tolerance)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Command execution
|
59
|
+
def execute(args)
|
60
|
+
require 'stamina/abbadingo'
|
61
|
+
|
62
|
+
# generate it
|
63
|
+
randomizer = Stamina::Abbadingo::RandomDFA.new(size)
|
64
|
+
begin
|
65
|
+
dfa = randomizer.execute
|
66
|
+
end until accept?(dfa)
|
67
|
+
|
68
|
+
# flush it
|
69
|
+
if output_file
|
70
|
+
File.open(output_file, 'w') do |file|
|
71
|
+
Stamina::ADL.print_automaton(dfa, file)
|
72
|
+
end
|
73
|
+
else
|
74
|
+
Stamina::ADL.print_automaton(dfa, $stdout)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class AbbadingoDFA
|
79
|
+
end # class Command
|
80
|
+
end # module Stamina
|