stamina-induction 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
data/CHANGELOG.md
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# 0.5.0 / FIX ME
|
2
|
+
|
3
|
+
* Breaking features.
|
4
|
+
|
5
|
+
* Support for ruby 1.8.7 has been definitely removed.
|
6
|
+
|
7
|
+
* Major enhancements
|
8
|
+
|
9
|
+
* The project has been split in different sub gems (core, induction and gui). This
|
10
|
+
implies a lot of internal changes, but the public API has not been affected. A main
|
11
|
+
'stamina' gem automatically includes all sub gems so previous behavior is guaranteed.
|
12
|
+
|
13
|
+
* Minor enhancements
|
14
|
+
* Fixed a bug with bundler usage in main stamina binary
|
15
|
+
* adl2dot command now support samples as input in addition to automata. In that case,
|
16
|
+
the dot result models a PTA (prefix tree acceptor)
|
17
|
+
* Added --png to 'stamina adl2dot'
|
18
|
+
|
19
|
+
# 0.4.0 / 2011-05-01
|
20
|
+
|
21
|
+
* Major Enhancements
|
22
|
+
|
23
|
+
* Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
|
24
|
+
* Added Sample#to_pta taken from Induction::Commons
|
25
|
+
* Added Automaton completion (all strings parsable) under Automaton#complete[!?]
|
26
|
+
* Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
|
27
|
+
* Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
|
28
|
+
* Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
|
29
|
+
* Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
|
30
|
+
subcommands of stamina itself (see stamina --help for a list of available commands).
|
31
|
+
Induction command (rpni and redblue) are now handled by a 'stamina infer' with
|
32
|
+
options.
|
33
|
+
* Error states and now correctly handled in ADL::parse and ADL::flush
|
34
|
+
* RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
|
35
|
+
|
36
|
+
* Minnor Enhancements
|
37
|
+
* Added a few optimizations here and there
|
38
|
+
|
39
|
+
* Bug fixes
|
40
|
+
|
41
|
+
* Fixed a bug in Automaton#depth when some states are unreachable
|
42
|
+
|
43
|
+
# 0.3.1 / 2011-03-24
|
44
|
+
|
45
|
+
* Major Enhancements
|
46
|
+
|
47
|
+
* Implemented the decoration algorithm of Damas10, allowing to decorate states
|
48
|
+
with information propagated from states to states until a fixpoint is reached.
|
49
|
+
* Added Automaton::Metrics module, automatically included, with useful metrics
|
50
|
+
like automaton depth, accepting ratio and so on.
|
51
|
+
* Added Scoring module and Classifier#classification_scoring(sample) method
|
52
|
+
with common measures from information retrieval.
|
53
|
+
|
54
|
+
* On the devel side
|
55
|
+
|
56
|
+
* Moved specific automaton tests under test/stamina/automaton/...
|
57
|
+
|
58
|
+
# 0.3.0 / 2011-03-24
|
59
|
+
|
60
|
+
* On the devel side
|
61
|
+
|
62
|
+
* The project structure is now handled by Noe
|
63
|
+
* Ensures that tests are correctly executed under ruby 1.9.2
|
64
|
+
|
65
|
+
|
66
|
+
# 0.2.2 / 2010-10-22
|
67
|
+
|
68
|
+
* Major Enhancements
|
69
|
+
|
70
|
+
* Sample#<< does not detect inconsistencies anymore, to ensure a linear method instead of a quadratic one.
|
71
|
+
|
72
|
+
* On the devel side
|
73
|
+
|
74
|
+
* Fixes a bug in Rakefile that lead to test failures under ruby 1.8.7
|
75
|
+
|
76
|
+
# 0.2.1 / 2010-05-01
|
77
|
+
|
78
|
+
* Main public version for the official competition, extracted from private SVN.
|
data/LICENCE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008-2009 University of Louvain
|
4
|
+
(Universite catholique de Louvain-la-Neuve, Belgium)
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
14
|
+
all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
THE SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'stamina/induction'
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Abbadingo
|
3
|
+
#
|
4
|
+
# Generates a random DFA using the Abbadingo protocol.
|
5
|
+
#
|
6
|
+
class RandomDFA
|
7
|
+
|
8
|
+
DEFAULT_OPTIONS = {
|
9
|
+
:minimize => :hopcroft
|
10
|
+
}
|
11
|
+
|
12
|
+
def execute(state_count = 64,
|
13
|
+
accepting_ratio = 0.5,
|
14
|
+
options = {})
|
15
|
+
options = DEFAULT_OPTIONS.merge(options)
|
16
|
+
|
17
|
+
# Built dfa
|
18
|
+
dfa = Automaton.new
|
19
|
+
|
20
|
+
# Generate 5/4*state_count states
|
21
|
+
(state_count.to_f * 5.0 / 4.0).to_i.times do
|
22
|
+
dfa.add_state(:initial => false,
|
23
|
+
:accepting => (Kernel.rand <= accepting_ratio),
|
24
|
+
:error => false)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Generate all edges
|
28
|
+
dfa.each_state do |source|
|
29
|
+
["0", "1"].each do |symbol|
|
30
|
+
target = dfa.ith_state(Kernel.rand(dfa.state_count))
|
31
|
+
dfa.connect(source, target, symbol)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Choose an initial state
|
36
|
+
dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
|
37
|
+
|
38
|
+
# Minimize the automaton and return it
|
39
|
+
case options[:minimize]
|
40
|
+
when :hopcroft
|
41
|
+
Stamina::Automaton::Minimize::Hopcroft.execute(dfa)
|
42
|
+
when :pitchies
|
43
|
+
Stamina::Automaton::Minimize::Pitchies.execute(dfa)
|
44
|
+
else
|
45
|
+
dfa
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.execute(*args)
|
50
|
+
new.execute(*args)
|
51
|
+
end
|
52
|
+
|
53
|
+
end # class RandomDFA
|
54
|
+
end # module Abbadingo
|
55
|
+
end # module Stamina
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Abbadingo
|
3
|
+
#
|
4
|
+
# Generates a random Sample using the Abbadingo protocol.
|
5
|
+
#
|
6
|
+
class RandomSample
|
7
|
+
|
8
|
+
#
|
9
|
+
# Implements an enumerator for binary strings whose length lies between 0
|
10
|
+
# and max_length (passed at construction).
|
11
|
+
#
|
12
|
+
# The enumerator guarantees that strings are sampled with an uniform
|
13
|
+
# distribution among all available. As the number of strings of a given
|
14
|
+
# length is an exponential function, this means that you've got 50% change
|
15
|
+
# of having a string of length max_length, 25% of max_length - 1, 12.5% of
|
16
|
+
# max_length - 2 and so on.
|
17
|
+
#
|
18
|
+
# How to use it?
|
19
|
+
#
|
20
|
+
# # create for strings between 0 and 10 symbols, inclusive
|
21
|
+
# enum = Stamina::Abbadingo::StringEnumerator.new(10)
|
22
|
+
#
|
23
|
+
# # this is how to generate strings while a predicate is true
|
24
|
+
# enum.each do |s|
|
25
|
+
# # s is an array of binary integer symbols (0 or 1)
|
26
|
+
# # true for continuing, false otherwise
|
27
|
+
# (true || false)
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# # this is how to generate a fixed number of strings
|
31
|
+
# (1..1000).collect{ enum.one }
|
32
|
+
#
|
33
|
+
# How does it work? Well, the distribution of strings is as follows:
|
34
|
+
#
|
35
|
+
# length [n]b_strings [c]umul log2(n) log2(c) log2(c).floor
|
36
|
+
# (2**n) 2**(n+1)-1
|
37
|
+
# 0 1 1 0.0000000000 0.000000 0
|
38
|
+
# 1 2 3 1.0000000000 1.584963 1
|
39
|
+
# 2 4 7 2.0000000000 2.807355 2
|
40
|
+
# 3 8 15 3.0000000000 3.906891 3
|
41
|
+
# 4 16 31 4.0000000000 4.954196 4
|
42
|
+
# 5 32 63 5.0000000000 5.977280 5
|
43
|
+
#
|
44
|
+
# where _cumul_ is the total number of string upto _length_ symbols.
|
45
|
+
#
|
46
|
+
# Therefore, the idea is to see each string has an identifier, say _x_,
|
47
|
+
# between 1 and 2**(max_length+1)-1 (see max).
|
48
|
+
# * The length of the _x_th string is log2(x).floor (see length_for)
|
49
|
+
# * The string itself is the binary decomposition of x, up to length_for(x)
|
50
|
+
# symbols (see string_for)
|
51
|
+
#
|
52
|
+
# As those identifiers naturally respect the exponential distribution, sampling
|
53
|
+
# the strings is the same as taking string_for(x) for random x upto _max_.
|
54
|
+
#
|
55
|
+
class StringEnumerator
|
56
|
+
include Enumerable
|
57
|
+
|
58
|
+
# Maximal length of a string
|
59
|
+
attr_reader :max_length
|
60
|
+
|
61
|
+
def initialize(max_length = 16)
|
62
|
+
@max_length = max_length
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Returns the length of the string whose identifier is _x_ (> 0)
|
67
|
+
#
|
68
|
+
def length_for(x)
|
69
|
+
Math.log2(x).floor
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Returns the binary string whose identifier is _x_ (> 0)
|
74
|
+
#
|
75
|
+
def string_for(x)
|
76
|
+
length = length_for(x)
|
77
|
+
(0..length-1).collect{|i| ((x >> i) % 2).to_s}
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Returns the maximum identifier, which is also the number of strings
|
82
|
+
# up to max_length symbols
|
83
|
+
#
|
84
|
+
def max
|
85
|
+
@max ||= 2 ** (max_length+1) - 1
|
86
|
+
end
|
87
|
+
|
88
|
+
#
|
89
|
+
# Generates a string at random
|
90
|
+
#
|
91
|
+
def one
|
92
|
+
string_for(1+Kernel.rand(max))
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Yields the block with a random string, until the block return false
|
97
|
+
# or nil.
|
98
|
+
#
|
99
|
+
def each
|
100
|
+
begin
|
101
|
+
cont = yield(one)
|
102
|
+
end while cont
|
103
|
+
end
|
104
|
+
|
105
|
+
end # class StringEnumerator
|
106
|
+
|
107
|
+
#
|
108
|
+
# Generates a Sample instance with _nb_ strings randomly sampled with a
|
109
|
+
# uniform distribution over all strings up
|
110
|
+
#
|
111
|
+
def self.execute(classifier, max_length = classifier.depth + 3)
|
112
|
+
enum = StringEnumerator.new(max_length)
|
113
|
+
|
114
|
+
# We generate 1800 strings for the test set plus n^2/2 strings for
|
115
|
+
# the training set. If there are no enough strings available, we generate
|
116
|
+
# the maximum we can
|
117
|
+
seen = {}
|
118
|
+
nb = Math.min(1800 + (classifier.state_count**2), enum.max)
|
119
|
+
|
120
|
+
# Let's go now
|
121
|
+
enum.each do |s|
|
122
|
+
seen[s] = true
|
123
|
+
seen.size < nb
|
124
|
+
end
|
125
|
+
|
126
|
+
# Make them
|
127
|
+
strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
|
128
|
+
pos, neg = strings.partition{|s| s.positive?}
|
129
|
+
|
130
|
+
# Split them, 1800 in test and the rest in training set
|
131
|
+
if (pos.size > 900) && (neg.size > 900)
|
132
|
+
pos_test, pos_training = pos[0...900], pos[900..-1]
|
133
|
+
neg_test, neg_training = neg[0...900], neg[900..-1]
|
134
|
+
else
|
135
|
+
pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
|
136
|
+
neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
|
137
|
+
end
|
138
|
+
flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
|
139
|
+
training = (pos_training + neg_training).sort &flusher
|
140
|
+
test = (pos_test + neg_test).sort &flusher
|
141
|
+
[Sample.new(training), Sample.new(test)]
|
142
|
+
end
|
143
|
+
|
144
|
+
end # class RandomSample
|
145
|
+
end # module Abbadingo
|
146
|
+
end # module Stamina
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stamina
|
2
|
+
#
|
3
|
+
# Provides a reusable module for binary classifiers. Classes including this
|
4
|
+
# module are required to provide a label_of(string) method, returning '1' for
|
5
|
+
# strings considered positive, and '0' fr strings considered negative.
|
6
|
+
#
|
7
|
+
# Note that an Automaton being a classifier it already includes this module.
|
8
|
+
#
|
9
|
+
module Classifier
|
10
|
+
|
11
|
+
#
|
12
|
+
# Computes a signature for a given sample (that is, an ordered set of strings).
|
13
|
+
# The signature is a string containing 1 (considered positive, or accepted)
|
14
|
+
# and 0 (considered negative, or rejected), one for each string.
|
15
|
+
#
|
16
|
+
def signature(sample)
|
17
|
+
signature = ''
|
18
|
+
sample.each do |str|
|
19
|
+
signature << label_of(str)
|
20
|
+
end
|
21
|
+
signature
|
22
|
+
end
|
23
|
+
alias :classification_signature :signature
|
24
|
+
|
25
|
+
#
|
26
|
+
# Classifies a sample then compute the classification scoring that is obtained
|
27
|
+
# by comparing the signature obtained by classification and the one of the sample
|
28
|
+
# itself. Returns an object responding to methods defined in Scoring module.
|
29
|
+
#
|
30
|
+
# This method is actually a convenient shortcut for:
|
31
|
+
#
|
32
|
+
# Stamina::Scoring.scoring(signature(sample), sample.signature)
|
33
|
+
#
|
34
|
+
def scoring(sample)
|
35
|
+
Stamina::Scoring.scoring(signature(sample), sample.signature)
|
36
|
+
end
|
37
|
+
alias :classification_scoring :scoring
|
38
|
+
|
39
|
+
#
|
40
|
+
# Checks if a labeled sample is correctly classified by the classifier.
|
41
|
+
#
|
42
|
+
def correctly_classify?(sample)
|
43
|
+
sample.each do |str|
|
44
|
+
label = label_of(str)
|
45
|
+
expected = (str.positive? ? '1' : '0')
|
46
|
+
return false unless expected==label
|
47
|
+
end
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
end # module Classifier
|
52
|
+
class Automaton
|
53
|
+
include Stamina::Classifier
|
54
|
+
end
|
55
|
+
end # module Stamina
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Stamina
|
2
|
+
class Command
|
3
|
+
#
|
4
|
+
# Generates a DFA following Abbadingo's protocol
|
5
|
+
#
|
6
|
+
# SYNOPSIS
|
7
|
+
# #{program_name} #{command_name}
|
8
|
+
#
|
9
|
+
# OPTIONS
|
10
|
+
# #{summarized_options}
|
11
|
+
#
|
12
|
+
class AbbadingoDfa < Quickl::Command(__FILE__, __LINE__)
|
13
|
+
include Robustness
|
14
|
+
|
15
|
+
# Size of the target automaton
|
16
|
+
attr_accessor :size
|
17
|
+
|
18
|
+
# Tolerance on the size
|
19
|
+
attr_accessor :size_tolerance
|
20
|
+
|
21
|
+
# Tolerance on the automaton depth
|
22
|
+
attr_accessor :depth_tolerance
|
23
|
+
|
24
|
+
# Where to flush the dfa
|
25
|
+
attr_accessor :output_file
|
26
|
+
|
27
|
+
# Install options
|
28
|
+
options do |opt|
|
29
|
+
|
30
|
+
@size = 64
|
31
|
+
opt.on("--size=X", Integer, "Sets the size of the automaton to generate") do |x|
|
32
|
+
@size = x
|
33
|
+
end
|
34
|
+
|
35
|
+
@size_tolerance = nil
|
36
|
+
opt.on("--size-tolerance[=X]", Integer, "Sets the tolerance on automaton size (in number of states)") do |x|
|
37
|
+
@size_tolerance = x
|
38
|
+
end
|
39
|
+
|
40
|
+
@depth_tolerance = 0
|
41
|
+
opt.on("--depth-tolerance[=X]", Integer, "Sets the tolerance on expected automaton depth (in length, 0 by default)") do |x|
|
42
|
+
@depth_tolerance = x
|
43
|
+
end
|
44
|
+
|
45
|
+
@output_file = nil
|
46
|
+
opt.on("-o", "--output=OUTPUT",
|
47
|
+
"Flush DFA in output file") do |value|
|
48
|
+
@output_file = assert_writable_file(value)
|
49
|
+
end
|
50
|
+
|
51
|
+
end # options
|
52
|
+
|
53
|
+
def accept?(dfa)
|
54
|
+
(size_tolerance.nil? || (size - dfa.state_count).abs <= size_tolerance) &&
|
55
|
+
(depth_tolerance.nil? || ((2*Math.log2(size)-2) - dfa.depth).abs <= depth_tolerance)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Command execution
|
59
|
+
def execute(args)
|
60
|
+
require 'stamina/abbadingo'
|
61
|
+
|
62
|
+
# generate it
|
63
|
+
randomizer = Stamina::Abbadingo::RandomDFA.new(size)
|
64
|
+
begin
|
65
|
+
dfa = randomizer.execute
|
66
|
+
end until accept?(dfa)
|
67
|
+
|
68
|
+
# flush it
|
69
|
+
if output_file
|
70
|
+
File.open(output_file, 'w') do |file|
|
71
|
+
Stamina::ADL.print_automaton(dfa, file)
|
72
|
+
end
|
73
|
+
else
|
74
|
+
Stamina::ADL.print_automaton(dfa, $stdout)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class AbbadingoDFA
|
79
|
+
end # class Command
|
80
|
+
end # module Stamina
|