stamina-induction 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,39 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Generates samples following Abbadingo's protocol
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} target.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class AbbadingoSamples < Quickl::Command(__FILE__, __LINE__)
13
+
14
+ # Install options
15
+ options do |opt|
16
+
17
+ end # options
18
+
19
+ # Command execution
20
+ def execute(args)
21
+ raise Quickl::Help unless args.size == 1
22
+
23
+ # Loads the target automaton
24
+ target_file = args.first
25
+ basename = File.basename(target_file, '.adl')
26
+ dirname = File.dirname(target_file)
27
+ target = Stamina::ADL::parse_automaton_file(target_file)
28
+
29
+ require 'stamina/abbadingo'
30
+ training, test = Stamina::Abbadingo::RandomSample.execute(target)
31
+
32
+ # Flush results aside the target automaton file
33
+ Stamina::ADL::print_sample_in_file(training, File.join(dirname, "#{basename}-training.adl"))
34
+ Stamina::ADL::print_sample_in_file(test, File.join(dirname, "#{basename}-test.adl"))
35
+ end
36
+
37
+ end # class AbbadingoSamples
38
+ end # class Command
39
+ end # module Stamina
@@ -0,0 +1,47 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Classifies a sample thanks with an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Classify < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Where to flush the output
16
+ attr_accessor :output_file
17
+
18
+ # Install options
19
+ options do |opt|
20
+
21
+ @output_file = nil
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush classification signature in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+
28
+ end # options
29
+
30
+ # Command execution
31
+ def execute(args)
32
+ raise Quickl::Help unless args.size == 2
33
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
34
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
35
+
36
+ if of = output_file
37
+ File.open(of, 'w'){|io|
38
+ io << automaton.signature(sample)
39
+ }
40
+ else
41
+ $stdout << automaton.signature(sample)
42
+ end
43
+ end
44
+
45
+ end # class Classify
46
+ end # class Command
47
+ end # module Stamina
@@ -0,0 +1,140 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Grammar inference, induces a DFA from a training sample using an
5
+ # chosen algorithm.
6
+ #
7
+ # SYNOPSIS
8
+ # #{program_name} #{command_name} sample.adl
9
+ #
10
+ # OPTIONS
11
+ # #{summarized_options}
12
+ #
13
+ class Infer < Quickl::Command(__FILE__, __LINE__)
14
+ include Robustness
15
+
16
+ attr_accessor :algorithm
17
+ attr_accessor :take
18
+ attr_accessor :score
19
+ attr_accessor :verbose
20
+ attr_accessor :drop
21
+ attr_accessor :output_file
22
+
23
+ # Install options
24
+ options do |opt|
25
+
26
+ @algorithm = :rpni
27
+ opt.on("--algorithm=X", "Sets the induction algorithm to use (rpni, bluefringe)") do |x|
28
+ @algorithm = x.to_sym
29
+ end
30
+
31
+ @take = 1.0
32
+ opt.on("--take=X", Float, "Take only X% of available strings") do |x|
33
+ @take = x.to_f
34
+ unless @take > 0.0 and @take <= 1.0
35
+ raise Quickl::InvalidOption, "Invalid --take option: #{@take}"
36
+ end
37
+ end
38
+
39
+ @score = nil
40
+ opt.on("--score=test.adl", "Add scoring information to metadata, using test.adl file") do |x|
41
+ @score = assert_readable_file(x)
42
+ end
43
+
44
+ @verbose = true
45
+ opt.on("-v", "--[no-]verbose", "Verbose mode") do |x|
46
+ @verbose = x
47
+ end
48
+
49
+ @drop = false
50
+ opt.on("-d", "--drop", "Drop result") do |x|
51
+ @drop = x
52
+ end
53
+
54
+ @output_file = nil
55
+ opt.on("-o", "--output=OUTPUT",
56
+ "Flush induced DFA in output file") do |value|
57
+ @output_file = assert_writable_file(value)
58
+ end
59
+
60
+ end # options
61
+
62
+ def launch_induction(sample)
63
+ require 'benchmark'
64
+
65
+ algo_clazz = case algorithm
66
+ when :rpni
67
+ Stamina::Induction::RPNI
68
+ when :bluefringe
69
+ Stamina::Induction::BlueFringe
70
+ else
71
+ raise Quickl::InvalidOption, "Unknown induction algorithm: #{algo}"
72
+ end
73
+
74
+ dfa, tms = nil, nil
75
+ tms = Benchmark.measure do
76
+ dfa = algo_clazz.execute(sample, {:verbose => verbose})
77
+ end
78
+ [dfa, tms]
79
+ end
80
+
81
+ def load_sample(file)
82
+ sample = Stamina::ADL.parse_sample_file(file)
83
+ if @take != 1.0
84
+ sampled = Stamina::Sample.new
85
+ sample.each_positive{|s| sampled << s if Kernel.rand < @take}
86
+ sample.each_negative{|s| sampled << s if Kernel.rand < @take}
87
+ sample = sampled
88
+ end
89
+ sample
90
+ end
91
+
92
+ # Command execution
93
+ def execute(args)
94
+ raise Quickl::Help unless args.size == 1
95
+
96
+ # Parses the sample
97
+ $stderr << "Parsing sample...\n" if verbose
98
+ sample = load_sample(assert_readable_file(args.first))
99
+
100
+ # Induce the DFA
101
+ dfa, tms = launch_induction(sample)
102
+
103
+ # Flush result
104
+ unless drop
105
+ if output_file
106
+ File.open(output_file, 'w') do |file|
107
+ Stamina::ADL.print_automaton(dfa, file)
108
+ end
109
+ else
110
+ Stamina::ADL.print_automaton(dfa, $stdout)
111
+ end
112
+ end
113
+
114
+ # build meta information
115
+ meta = {:algorithm => algorithm,
116
+ :sample => File.basename(args.first),
117
+ :take => take,
118
+ :sample_size => sample.size,
119
+ :positive_count => sample.positive_count,
120
+ :negative_count => sample.negative_count,
121
+ :real_time => tms.real,
122
+ :total_time => tms.total,
123
+ :user_time => tms.utime + tms.cutime,
124
+ :system_time => tms.stime + tms.cstime}
125
+
126
+ if score
127
+ test = Stamina::ADL::parse_sample_file(score)
128
+ classified_as = dfa.signature(test)
129
+ reference = test.signature
130
+ scoring = Scoring.scoring(classified_as, reference)
131
+ meta.merge!(scoring.to_h)
132
+ end
133
+
134
+ # Display information
135
+ puts meta.inspect
136
+ end
137
+
138
+ end # class Infer
139
+ end # class Command
140
+ end # module Stamina
@@ -0,0 +1,50 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints metrics about an automaton or sample
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} [file.adl]
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Metrics < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size <= 1
23
+
24
+ # Loads the target automaton
25
+ input = if args.size == 1
26
+ File.read assert_readable_file(args.first)
27
+ else
28
+ $stdin.readlines.join("\n")
29
+ end
30
+
31
+ # Flush metrics
32
+ begin
33
+ target = Stamina::ADL::parse_automaton(input)
34
+ puts "Alphabet size: #{target.alphabet_size}"
35
+ puts "State count: #{target.state_count}"
36
+ puts "Edge count: #{target.edge_count}"
37
+ puts "Degree (avg): #{target.avg_degree}"
38
+ puts "Accepting ratio: #{target.accepting_ratio}"
39
+ puts "Depth: #{target.depth}"
40
+ rescue ADL::ParseError
41
+ sample = Stamina::ADL::parse_sample(input)
42
+ puts "Size: #{sample.size}"
43
+ puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
+ puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
+ end
46
+ end
47
+
48
+ end # class Metrics
49
+ end # class Command
50
+ end # module Stamina
@@ -0,0 +1,34 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Scores the labelling of a sample by an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Score < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size == 2
23
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
+
26
+ classified_as = automaton.signature(sample)
27
+ reference = sample.signature
28
+ scoring = Scoring.scoring(classified_as, reference)
29
+ puts scoring.to_s
30
+ end
31
+
32
+ end # class Score
33
+ end # class Command
34
+ end # module Stamina
@@ -0,0 +1,2 @@
1
+ require_relative 'dsl/induction'
2
+ require_relative 'dsl/reg_lang'
@@ -0,0 +1,29 @@
1
+ module Stamina
2
+ module Dsl
3
+ module Induction
4
+
5
+ #
6
+ # Coerces `arg` to a Sample
7
+ #
8
+ def sample(arg)
9
+ Sample.coerce(arg)
10
+ end
11
+
12
+ #
13
+ # Learn a regular language from `arg` using the RPNI algorithm.
14
+ #
15
+ def rpni(arg)
16
+ regular Stamina::Induction::RPNI.execute(sample(arg))
17
+ end
18
+
19
+ #
20
+ # Learn a regular language from `arg` using the RPNI algorithm.
21
+ #
22
+ def blue_fringe(arg)
23
+ regular Stamina::Induction::BlueFringe.execute(sample(arg))
24
+ end
25
+
26
+ end # module Induction
27
+ include Induction
28
+ end # module Dsl
29
+ end # module Stamina
@@ -0,0 +1,69 @@
1
+ module Stamina
2
+ module Dsl
3
+ module RegLang
4
+
5
+ EMPTY_LANG = ::Stamina::RegLang::EMPTY
6
+
7
+ #
8
+ # Coerces `arg` to a regular language.
9
+ #
10
+ def regular(arg)
11
+ Stamina::RegLang.coerce(arg)
12
+ end
13
+
14
+ #
15
+ # Returns the universal language on a given alphabet.
16
+ #
17
+ def sigma_star(alphabet)
18
+ Stamina::RegLang.sigma_star(alphabet)
19
+ end
20
+
21
+ #
22
+ # Coerces `arg` to a prefix-closed regular language.
23
+ #
24
+ def prefix_closed(arg)
25
+ regular(arg).prefix_closed
26
+ end
27
+
28
+ #
29
+ # Extracts the short prefixes of a regular language (coerced from `arg`)
30
+ # as a Sample instance.
31
+ #
32
+ def short_prefixes(arg)
33
+ regular(arg).short_prefixes
34
+ end
35
+
36
+ #
37
+ # Extracts the kernel of a regular language (coerced from `arg`) as
38
+ # a Sample instance.
39
+ #
40
+ def kernel(arg)
41
+ regular(arg).kernel
42
+ end
43
+
44
+ #
45
+ # Extracts a characteristic sample for a regular language (coerced from
46
+ # `arg`) as a Sample instance.
47
+ #
48
+ def characteristic_sample(arg)
49
+ regular(arg).characteristic_sample
50
+ end
51
+
52
+ #
53
+ # Hides allbut `alph` symbols in the regular language `arg`
54
+ #
55
+ def project(arg, alph)
56
+ regular(arg).project(alph)
57
+ end
58
+
59
+ #
60
+ # Hides `alph` symbols in the regular language `arg`
61
+ #
62
+ def hide(arg, alph)
63
+ regular(arg).hide(alph)
64
+ end
65
+
66
+ end # module RegLang
67
+ include RegLang
68
+ end # module Dsl
69
+ end # module Stamina
@@ -0,0 +1,13 @@
1
+ require_relative 'sample'
2
+ require_relative 'input_string'
3
+ require_relative 'classifier'
4
+ require_relative 'scoring'
5
+ require_relative 'induction/union_find'
6
+ require_relative 'induction/commons'
7
+ require_relative 'induction/rpni'
8
+ require_relative 'induction/blue_fringe'
9
+ require_relative 'abbadingo'
10
+ require_relative 'dsl/induction'
11
+ require_relative 'reg_lang'
12
+ require_relative 'dsl/reg_lang'
13
+ require_relative 'command'
@@ -0,0 +1,265 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
+ # heuristics).
7
+ #
8
+ # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
+ # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
+ # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
+ #
12
+ # Example:
13
+ # # sample typically comes from an ADL file
14
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
+ #
16
+ # # let BlueFringe build the smallest dfa
17
+ # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
+ #
19
+ # Remarks:
20
+ # - Constructor and instance methods of this class are public but not intended
21
+ # to be used directly. They are left public for testing purposes only.
22
+ # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
+ # this variant.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class BlueFringe
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Creates an algorithm instance with given options.
35
+ def initialize(options={})
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ @score_cache = {}
39
+ end
40
+
41
+ #
42
+ # Computes the score of a single (group) merge. Returned value is 1 if both are
43
+ # accepting states or both are error states and 0 otherwise. Note that d1 and d2
44
+ # are expected to be merge compatible as this method does not distinguish this
45
+ # case.
46
+ #
47
+ def merge_score(d1, d2)
48
+ # Score of 1 if both accepting or both error
49
+ ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
50
+ end
51
+
52
+ #
53
+ # Merges a state of rank j with a state of lower rank i. This merge method
54
+ # includes merging for determinization. It returns nil if the merge is
55
+ # incompatible, a merge score otherwise.
56
+ #
57
+ # Preconditions:
58
+ # - States denoted by i and j are expected leader states (non merged ones)
59
+ # - States denoted by i and j are expected to be different
60
+ #
61
+ # Postconditions:
62
+ # - Union find is refined, states i and j having been merged, as well as all
63
+ # state pairs that need to be merged to ensure the deterministic property
64
+ # of the quotient automaton.
65
+ # - If the resulting quotient automaton is consistent with the negative sample,
66
+ # this method returns the number of accepting pairs + the number of error pairs
67
+ # that have been merged. The refined union-find correctly encodes the quotient
68
+ # automaton. Otherwise, the method returns nil and the union-find information
69
+ # must be considered inaccurate.
70
+ #
71
+ def merge_and_determinize(i, j)
72
+ # Make the union (keep merging score as well as additional merges to be performed
73
+ # in score and determinization, respectively). Recompute the user data attached to
74
+ # the new state group (new_data)
75
+ determinization, score = [], nil
76
+ @ufds.union(i, j) do |d1, d2|
77
+ # states are incompatible if new_data cannot be created because it would
78
+ # lead to merge and error and an accepting state. We simply return nil in this
79
+ # case...
80
+ return nil unless (new_data = merge_user_data(d1, d2, determinization))
81
+ # otherwise, we score
82
+ score = merge_score(d1, d2)
83
+ # and we let the union find keep the new_data for the group
84
+ new_data
85
+ end
86
+
87
+ # Merge for determinization starts here, based on the determinization array
88
+ # computed as a side effect of merge_user_data
89
+ determinization.each do |pair|
90
+ # we take the leader states of the pair to merge
91
+ pair = pair.collect{|i| @ufds.find(i)}
92
+ # do nothing if already the same leader state
93
+ next if pair[0]==pair[1]
94
+ # otherwise recurse and keep subscore
95
+ subscore = merge_and_determinize(pair[0], pair[1])
96
+ # failure if merging for determinization led to merge error and accepting
97
+ # states
98
+ return nil if subscore.nil?
99
+ # this is the new score
100
+ score += subscore
101
+ end
102
+
103
+ score
104
+ end
105
+
106
+ #
107
+ # Evaluates the score of merging states i and j. Returns nil if the states are
108
+ # cannot be merged, a positive score otherwise.
109
+ #
110
+ # Preconditions:
111
+ # - States denoted by i and j are expected leader states (non merged ones)
112
+ # - States denoted by i and j are expected to be different
113
+ #
114
+ # Postconditions:
115
+ # - Returned value is nil if the quotient automaton would be incompatible with
116
+ # the sample. Otherwise a positive number is returned, encoding the number of
117
+ # interresting pairs that have been merged (interesting = both accepting or both
118
+ # error)
119
+ # - The union find is ALWAYS restored to its previous value after merging has
120
+ # been evaluated and is then seen unchanged by the caller.
121
+ #
122
+ def merge_and_determinize_score(i, j)
123
+ score = @score_cache[[i,j]] ||= begin
124
+ # score the merging, always rollback the transaction
125
+ score = nil
126
+ @ufds.transactional do
127
+ score = merge_and_determinize(i, j)
128
+ false
129
+ end
130
+ score || -1
131
+ end
132
+ score == -1 ? nil : score
133
+ end
134
+
135
+ #
136
+ # Computes the fringe given the current union find. The fringe is returned as an
137
+ # array of state indices.
138
+ #
139
+ # Postconditions:
140
+ # - Returned array contains indices of leader states only.
141
+ # - Returned array is disjoint with the kernel.
142
+ #
143
+ def fringe
144
+ fringe = []
145
+ @kernel.each do |k1|
146
+ delta = @ufds.mergeable_data(k1)[:delta]
147
+ delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
+ end
149
+ (fringe - @kernel).sort
150
+ end
151
+
152
+ #
153
+ # Main method of the algorithm. Refines the union find passed as first argument
154
+ # by merging well chosen state pairs. Returns the refined union find.
155
+ #
156
+ # Preconditions:
157
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
+ # and :error boolean flags as well as a :delta sub hash)
159
+ #
160
+ # Postconditions:
161
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
+ # it comes from) such that all positive and negative strings of the underlying
163
+ # sample are correctly classified by it.
164
+ #
165
+ def main(ufds)
166
+ info("Starting BlueFringe (#{ufds.size} states)")
167
+ @ufds, @kernel, @score_cache = ufds, [0], {}
168
+
169
+ # we do it until the fringe is empty (compute it only once each step)
170
+ until (the_fringe=fringe).empty?
171
+ # state to consolidate (if any)
172
+ to_consolidate = nil
173
+ # best candidate [source index, target index, score]
174
+ best = [nil, nil, -1]
175
+
176
+ # for each state on the fringe as merge candidate
177
+ the_fringe.each do |candidate|
178
+ to_consolidate = candidate
179
+
180
+ # evaluate score of merging candidate with each kernel state
181
+ @kernel.each do |target|
182
+ score = merge_and_determinize_score(candidate, target)
183
+ unless score.nil?
184
+ # if a score has been found, the candidate will not be
185
+ # consolidated. We keep it as best if its better than the
186
+ # previous one
187
+ to_consolidate = nil
188
+ best = [candidate, target, score] if score > best[2]
189
+ end
190
+ end
191
+
192
+ # No possible target, break the loop (will consolidate right now)!
193
+ break unless to_consolidate.nil?
194
+ end
195
+
196
+ # If not found, the last candidate must be consolidated. Otherwise, we
197
+ # do the best merging
198
+ unless to_consolidate.nil?
199
+ info("Consolidation of #{to_consolidate}")
200
+ @kernel << to_consolidate
201
+ else
202
+ @score_cache.clear
203
+ info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
204
+ # this one should never fail because its score was positive before
205
+ raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
206
+ end
207
+
208
+ # blue_fringe does not guarantee that it will not merge a state of lower rank
209
+ # with a kernel state. The kernel should then be update at each step to keep
210
+ # lowest indices for the whole kernel, and we sort it
211
+ @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
212
+ end
213
+
214
+ # return the refined union find now
215
+ @ufds
216
+ end
217
+
218
+ #
219
+ # Build the smallest DFA compatible with the sample given as input.
220
+ #
221
+ # Preconditions:
222
+ # - The sample is consistent (does not contains the same string both labeled as
223
+ # positive and negative) and contains at least one string.
224
+ #
225
+ # Postconditions:
226
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
227
+ # given as input.
228
+ #
229
+ # Remarks:
230
+ # - This instance version of BlueFringe.execute is not intended to be used directly and
231
+ # is mainly provided for testing purposes. Please use the class variant of this
232
+ # method if possible.
233
+ #
234
+ def execute(sample)
235
+ # create union-find
236
+ info("Creating PTA and UnionFind structure")
237
+ ufds = sample2ufds(sample)
238
+ # refine it
239
+ ufds = main(ufds)
240
+ # compute and return quotient automaton
241
+ ufds2dfa(ufds)
242
+ end
243
+
244
+ #
245
+ # Build the smallest DFA compatible with the sample given as input.
246
+ #
247
+ # Options (the _options_ hash):
248
+ # - :verbose can be set to true to trace algorithm execution on standard output.
249
+ #
250
+ # Preconditions:
251
+ # - The sample is consistent (does not contains the same string both labeled as
252
+ # positive and negative) and contains at least one string.
253
+ #
254
+ # Postconditions:
255
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
256
+ # given as input.
257
+ #
258
+ def self.execute(sample, options={})
259
+ BlueFringe.new(options).execute(sample)
260
+ end
261
+
262
+ end # class BlueFringe
263
+
264
+ end # module Induction
265
+ end # module Stamina