stamina-induction 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,39 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Generates samples following Abbadingo's protocol
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} target.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class AbbadingoSamples < Quickl::Command(__FILE__, __LINE__)
13
+
14
+ # Install options
15
+ options do |opt|
16
+
17
+ end # options
18
+
19
+ # Command execution
20
+ def execute(args)
21
+ raise Quickl::Help unless args.size == 1
22
+
23
+ # Loads the target automaton
24
+ target_file = args.first
25
+ basename = File.basename(target_file, '.adl')
26
+ dirname = File.dirname(target_file)
27
+ target = Stamina::ADL::parse_automaton_file(target_file)
28
+
29
+ require 'stamina/abbadingo'
30
+ training, test = Stamina::Abbadingo::RandomSample.execute(target)
31
+
32
+ # Flush results aside the target automaton file
33
+ Stamina::ADL::print_sample_in_file(training, File.join(dirname, "#{basename}-training.adl"))
34
+ Stamina::ADL::print_sample_in_file(test, File.join(dirname, "#{basename}-test.adl"))
35
+ end
36
+
37
+ end # class AbbadingoSamples
38
+ end # class Command
39
+ end # module Stamina
@@ -0,0 +1,47 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Classifies a sample thanks with an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Classify < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Where to flush the output
16
+ attr_accessor :output_file
17
+
18
+ # Install options
19
+ options do |opt|
20
+
21
+ @output_file = nil
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush classification signature in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+
28
+ end # options
29
+
30
+ # Command execution
31
+ def execute(args)
32
+ raise Quickl::Help unless args.size == 2
33
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
34
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
35
+
36
+ if of = output_file
37
+ File.open(of, 'w'){|io|
38
+ io << automaton.signature(sample)
39
+ }
40
+ else
41
+ $stdout << automaton.signature(sample)
42
+ end
43
+ end
44
+
45
+ end # class Classify
46
+ end # class Command
47
+ end # module Stamina
@@ -0,0 +1,140 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Grammar inference, induces a DFA from a training sample using an
5
+ # chosen algorithm.
6
+ #
7
+ # SYNOPSIS
8
+ # #{program_name} #{command_name} sample.adl
9
+ #
10
+ # OPTIONS
11
+ # #{summarized_options}
12
+ #
13
+ class Infer < Quickl::Command(__FILE__, __LINE__)
14
+ include Robustness
15
+
16
+ attr_accessor :algorithm
17
+ attr_accessor :take
18
+ attr_accessor :score
19
+ attr_accessor :verbose
20
+ attr_accessor :drop
21
+ attr_accessor :output_file
22
+
23
+ # Install options
24
+ options do |opt|
25
+
26
+ @algorithm = :rpni
27
+ opt.on("--algorithm=X", "Sets the induction algorithm to use (rpni, bluefringe)") do |x|
28
+ @algorithm = x.to_sym
29
+ end
30
+
31
+ @take = 1.0
32
+ opt.on("--take=X", Float, "Take only X% of available strings") do |x|
33
+ @take = x.to_f
34
+ unless @take > 0.0 and @take <= 1.0
35
+ raise Quickl::InvalidOption, "Invalid --take option: #{@take}"
36
+ end
37
+ end
38
+
39
+ @score = nil
40
+ opt.on("--score=test.adl", "Add scoring information to metadata, using test.adl file") do |x|
41
+ @score = assert_readable_file(x)
42
+ end
43
+
44
+ @verbose = true
45
+ opt.on("-v", "--[no-]verbose", "Verbose mode") do |x|
46
+ @verbose = x
47
+ end
48
+
49
+ @drop = false
50
+ opt.on("-d", "--drop", "Drop result") do |x|
51
+ @drop = x
52
+ end
53
+
54
+ @output_file = nil
55
+ opt.on("-o", "--output=OUTPUT",
56
+ "Flush induced DFA in output file") do |value|
57
+ @output_file = assert_writable_file(value)
58
+ end
59
+
60
+ end # options
61
+
62
+ def launch_induction(sample)
63
+ require 'benchmark'
64
+
65
+ algo_clazz = case algorithm
66
+ when :rpni
67
+ Stamina::Induction::RPNI
68
+ when :bluefringe
69
+ Stamina::Induction::BlueFringe
70
+ else
71
+ raise Quickl::InvalidOption, "Unknown induction algorithm: #{algo}"
72
+ end
73
+
74
+ dfa, tms = nil, nil
75
+ tms = Benchmark.measure do
76
+ dfa = algo_clazz.execute(sample, {:verbose => verbose})
77
+ end
78
+ [dfa, tms]
79
+ end
80
+
81
+ def load_sample(file)
82
+ sample = Stamina::ADL.parse_sample_file(file)
83
+ if @take != 1.0
84
+ sampled = Stamina::Sample.new
85
+ sample.each_positive{|s| sampled << s if Kernel.rand < @take}
86
+ sample.each_negative{|s| sampled << s if Kernel.rand < @take}
87
+ sample = sampled
88
+ end
89
+ sample
90
+ end
91
+
92
+ # Command execution
93
+ def execute(args)
94
+ raise Quickl::Help unless args.size == 1
95
+
96
+ # Parses the sample
97
+ $stderr << "Parsing sample...\n" if verbose
98
+ sample = load_sample(assert_readable_file(args.first))
99
+
100
+ # Induce the DFA
101
+ dfa, tms = launch_induction(sample)
102
+
103
+ # Flush result
104
+ unless drop
105
+ if output_file
106
+ File.open(output_file, 'w') do |file|
107
+ Stamina::ADL.print_automaton(dfa, file)
108
+ end
109
+ else
110
+ Stamina::ADL.print_automaton(dfa, $stdout)
111
+ end
112
+ end
113
+
114
+ # build meta information
115
+ meta = {:algorithm => algorithm,
116
+ :sample => File.basename(args.first),
117
+ :take => take,
118
+ :sample_size => sample.size,
119
+ :positive_count => sample.positive_count,
120
+ :negative_count => sample.negative_count,
121
+ :real_time => tms.real,
122
+ :total_time => tms.total,
123
+ :user_time => tms.utime + tms.cutime,
124
+ :system_time => tms.stime + tms.cstime}
125
+
126
+ if score
127
+ test = Stamina::ADL::parse_sample_file(score)
128
+ classified_as = dfa.signature(test)
129
+ reference = test.signature
130
+ scoring = Scoring.scoring(classified_as, reference)
131
+ meta.merge!(scoring.to_h)
132
+ end
133
+
134
+ # Display information
135
+ puts meta.inspect
136
+ end
137
+
138
+ end # class Infer
139
+ end # class Command
140
+ end # module Stamina
@@ -0,0 +1,50 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints metrics about an automaton or sample
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} [file.adl]
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Metrics < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size <= 1
23
+
24
+ # Loads the target automaton
25
+ input = if args.size == 1
26
+ File.read assert_readable_file(args.first)
27
+ else
28
+ $stdin.readlines.join("\n")
29
+ end
30
+
31
+ # Flush metrics
32
+ begin
33
+ target = Stamina::ADL::parse_automaton(input)
34
+ puts "Alphabet size: #{target.alphabet_size}"
35
+ puts "State count: #{target.state_count}"
36
+ puts "Edge count: #{target.edge_count}"
37
+ puts "Degree (avg): #{target.avg_degree}"
38
+ puts "Accepting ratio: #{target.accepting_ratio}"
39
+ puts "Depth: #{target.depth}"
40
+ rescue ADL::ParseError
41
+ sample = Stamina::ADL::parse_sample(input)
42
+ puts "Size: #{sample.size}"
43
+ puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
+ puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
+ end
46
+ end
47
+
48
+ end # class Metrics
49
+ end # class Command
50
+ end # module Stamina
@@ -0,0 +1,34 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Scores the labelling of a sample by an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Score < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size == 2
23
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
+
26
+ classified_as = automaton.signature(sample)
27
+ reference = sample.signature
28
+ scoring = Scoring.scoring(classified_as, reference)
29
+ puts scoring.to_s
30
+ end
31
+
32
+ end # class Score
33
+ end # class Command
34
+ end # module Stamina
@@ -0,0 +1,2 @@
1
+ require_relative 'dsl/induction'
2
+ require_relative 'dsl/reg_lang'
@@ -0,0 +1,29 @@
1
+ module Stamina
2
+ module Dsl
3
+ module Induction
4
+
5
+ #
6
+ # Coerces `arg` to a Sample
7
+ #
8
+ def sample(arg)
9
+ Sample.coerce(arg)
10
+ end
11
+
12
+ #
13
+ # Learn a regular language from `arg` using the RPNI algorithm.
14
+ #
15
+ def rpni(arg)
16
+ regular Stamina::Induction::RPNI.execute(sample(arg))
17
+ end
18
+
19
+ #
20
+ # Learn a regular language from `arg` using the RPNI algorithm.
21
+ #
22
+ def blue_fringe(arg)
23
+ regular Stamina::Induction::BlueFringe.execute(sample(arg))
24
+ end
25
+
26
+ end # module Induction
27
+ include Induction
28
+ end # module Dsl
29
+ end # module Stamina
@@ -0,0 +1,69 @@
1
+ module Stamina
2
+ module Dsl
3
+ module RegLang
4
+
5
+ EMPTY_LANG = ::Stamina::RegLang::EMPTY
6
+
7
+ #
8
+ # Coerces `arg` to a regular language.
9
+ #
10
+ def regular(arg)
11
+ Stamina::RegLang.coerce(arg)
12
+ end
13
+
14
+ #
15
+ # Returns the universal language on a given alphabet.
16
+ #
17
+ def sigma_star(alphabet)
18
+ Stamina::RegLang.sigma_star(alphabet)
19
+ end
20
+
21
+ #
22
+ # Coerces `arg` to a prefix-closed regular language.
23
+ #
24
+ def prefix_closed(arg)
25
+ regular(arg).prefix_closed
26
+ end
27
+
28
+ #
29
+ # Extracts the short prefixes of a regular language (coerced from `arg`)
30
+ # as a Sample instance.
31
+ #
32
+ def short_prefixes(arg)
33
+ regular(arg).short_prefixes
34
+ end
35
+
36
+ #
37
+ # Extracts the kernel of a regular language (coerced from `arg`) as
38
+ # a Sample instance.
39
+ #
40
+ def kernel(arg)
41
+ regular(arg).kernel
42
+ end
43
+
44
+ #
45
+ # Extracts a characteristic sample for a regular language (coerced from
46
+ # `arg`) as a Sample instance.
47
+ #
48
+ def characteristic_sample(arg)
49
+ regular(arg).characteristic_sample
50
+ end
51
+
52
+ #
53
+ # Hides allbut `alph` symbols in the regular language `arg`
54
+ #
55
+ def project(arg, alph)
56
+ regular(arg).project(alph)
57
+ end
58
+
59
+ #
60
+ # Hides `alph` symbols in the regular language `arg`
61
+ #
62
+ def hide(arg, alph)
63
+ regular(arg).hide(alph)
64
+ end
65
+
66
+ end # module RegLang
67
+ include RegLang
68
+ end # module Dsl
69
+ end # module Stamina
@@ -0,0 +1,13 @@
1
+ require_relative 'sample'
2
+ require_relative 'input_string'
3
+ require_relative 'classifier'
4
+ require_relative 'scoring'
5
+ require_relative 'induction/union_find'
6
+ require_relative 'induction/commons'
7
+ require_relative 'induction/rpni'
8
+ require_relative 'induction/blue_fringe'
9
+ require_relative 'abbadingo'
10
+ require_relative 'dsl/induction'
11
+ require_relative 'reg_lang'
12
+ require_relative 'dsl/reg_lang'
13
+ require_relative 'command'
@@ -0,0 +1,265 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
+ # heuristics).
7
+ #
8
+ # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
+ # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
+ # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
+ #
12
+ # Example:
13
+ # # sample typically comes from an ADL file
14
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
+ #
16
+ # # let BlueFringe build the smallest dfa
17
+ # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
+ #
19
+ # Remarks:
20
+ # - Constructor and instance methods of this class are public but not intended
21
+ # to be used directly. They are left public for testing purposes only.
22
+ # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
+ # this variant.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class BlueFringe
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Creates an algorithm instance with given options.
35
+ def initialize(options={})
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ @score_cache = {}
39
+ end
40
+
41
+ #
42
+ # Computes the score of a single (group) merge. Returned value is 1 if both are
43
+ # accepting states or both are error states and 0 otherwise. Note that d1 and d2
44
+ # are expected to be merge compatible as this method does not distinguish this
45
+ # case.
46
+ #
47
+ def merge_score(d1, d2)
48
+ # Score of 1 if both accepting or both error
49
+ ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
50
+ end
51
+
52
+ #
53
+ # Merges a state of rank j with a state of lower rank i. This merge method
54
+ # includes merging for determinization. It returns nil if the merge is
55
+ # incompatible, a merge score otherwise.
56
+ #
57
+ # Preconditions:
58
+ # - States denoted by i and j are expected leader states (non merged ones)
59
+ # - States denoted by i and j are expected to be different
60
+ #
61
+ # Postconditions:
62
+ # - Union find is refined, states i and j having been merged, as well as all
63
+ # state pairs that need to be merged to ensure the deterministic property
64
+ # of the quotient automaton.
65
+ # - If the resulting quotient automaton is consistent with the negative sample,
66
+ # this method returns the number of accepting pairs + the number of error pairs
67
+ # that have been merged. The refined union-find correctly encodes the quotient
68
+ # automaton. Otherwise, the method returns nil and the union-find information
69
+ # must be considered inaccurate.
70
+ #
71
+ def merge_and_determinize(i, j)
72
+ # Make the union (keep merging score as well as additional merges to be performed
73
+ # in score and determinization, respectively). Recompute the user data attached to
74
+ # the new state group (new_data)
75
+ determinization, score = [], nil
76
+ @ufds.union(i, j) do |d1, d2|
77
+ # states are incompatible if new_data cannot be created because it would
78
+ # lead to merge and error and an accepting state. We simply return nil in this
79
+ # case...
80
+ return nil unless (new_data = merge_user_data(d1, d2, determinization))
81
+ # otherwise, we score
82
+ score = merge_score(d1, d2)
83
+ # and we let the union find keep the new_data for the group
84
+ new_data
85
+ end
86
+
87
+ # Merge for determinization starts here, based on the determinization array
88
+ # computed as a side effect of merge_user_data
89
+ determinization.each do |pair|
90
+ # we take the leader states of the pair to merge
91
+ pair = pair.collect{|i| @ufds.find(i)}
92
+ # do nothing if already the same leader state
93
+ next if pair[0]==pair[1]
94
+ # otherwise recurse and keep subscore
95
+ subscore = merge_and_determinize(pair[0], pair[1])
96
+ # failure if merging for determinization led to merge error and accepting
97
+ # states
98
+ return nil if subscore.nil?
99
+ # this is the new score
100
+ score += subscore
101
+ end
102
+
103
+ score
104
+ end
105
+
106
+ #
107
+ # Evaluates the score of merging states i and j. Returns nil if the states are
108
+ # cannot be merged, a positive score otherwise.
109
+ #
110
+ # Preconditions:
111
+ # - States denoted by i and j are expected leader states (non merged ones)
112
+ # - States denoted by i and j are expected to be different
113
+ #
114
+ # Postconditions:
115
+ # - Returned value is nil if the quotient automaton would be incompatible with
116
+ # the sample. Otherwise a positive number is returned, encoding the number of
117
+ # interresting pairs that have been merged (interesting = both accepting or both
118
+ # error)
119
+ # - The union find is ALWAYS restored to its previous value after merging has
120
+ # been evaluated and is then seen unchanged by the caller.
121
+ #
122
+ def merge_and_determinize_score(i, j)
123
+ score = @score_cache[[i,j]] ||= begin
124
+ # score the merging, always rollback the transaction
125
+ score = nil
126
+ @ufds.transactional do
127
+ score = merge_and_determinize(i, j)
128
+ false
129
+ end
130
+ score || -1
131
+ end
132
+ score == -1 ? nil : score
133
+ end
134
+
135
+ #
136
+ # Computes the fringe given the current union find. The fringe is returned as an
137
+ # array of state indices.
138
+ #
139
+ # Postconditions:
140
+ # - Returned array contains indices of leader states only.
141
+ # - Returned array is disjoint with the kernel.
142
+ #
143
+ def fringe
144
+ fringe = []
145
+ @kernel.each do |k1|
146
+ delta = @ufds.mergeable_data(k1)[:delta]
147
+ delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
+ end
149
+ (fringe - @kernel).sort
150
+ end
151
+
152
+ #
153
+ # Main method of the algorithm. Refines the union find passed as first argument
154
+ # by merging well chosen state pairs. Returns the refined union find.
155
+ #
156
+ # Preconditions:
157
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
+ # and :error boolean flags as well as a :delta sub hash)
159
+ #
160
+ # Postconditions:
161
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
+ # it comes from) such that all positive and negative strings of the underlying
163
+ # sample are correctly classified by it.
164
+ #
165
+ def main(ufds)
166
+ info("Starting BlueFringe (#{ufds.size} states)")
167
+ @ufds, @kernel, @score_cache = ufds, [0], {}
168
+
169
+ # we do it until the fringe is empty (compute it only once each step)
170
+ until (the_fringe=fringe).empty?
171
+ # state to consolidate (if any)
172
+ to_consolidate = nil
173
+ # best candidate [source index, target index, score]
174
+ best = [nil, nil, -1]
175
+
176
+ # for each state on the fringe as merge candidate
177
+ the_fringe.each do |candidate|
178
+ to_consolidate = candidate
179
+
180
+ # evaluate score of merging candidate with each kernel state
181
+ @kernel.each do |target|
182
+ score = merge_and_determinize_score(candidate, target)
183
+ unless score.nil?
184
+ # if a score has been found, the candidate will not be
185
+ # consolidated. We keep it as best if its better than the
186
+ # previous one
187
+ to_consolidate = nil
188
+ best = [candidate, target, score] if score > best[2]
189
+ end
190
+ end
191
+
192
+ # No possible target, break the loop (will consolidate right now)!
193
+ break unless to_consolidate.nil?
194
+ end
195
+
196
+ # If not found, the last candidate must be consolidated. Otherwise, we
197
+ # do the best merging
198
+ unless to_consolidate.nil?
199
+ info("Consolidation of #{to_consolidate}")
200
+ @kernel << to_consolidate
201
+ else
202
+ @score_cache.clear
203
+ info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
204
+ # this one should never fail because its score was positive before
205
+ raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
206
+ end
207
+
208
+ # blue_fringe does not guarantee that it will not merge a state of lower rank
209
+ # with a kernel state. The kernel should then be update at each step to keep
210
+ # lowest indices for the whole kernel, and we sort it
211
+ @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
212
+ end
213
+
214
+ # return the refined union find now
215
+ @ufds
216
+ end
217
+
218
+ #
219
+ # Build the smallest DFA compatible with the sample given as input.
220
+ #
221
+ # Preconditions:
222
+ # - The sample is consistent (does not contains the same string both labeled as
223
+ # positive and negative) and contains at least one string.
224
+ #
225
+ # Postconditions:
226
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
227
+ # given as input.
228
+ #
229
+ # Remarks:
230
+ # - This instance version of BlueFringe.execute is not intended to be used directly and
231
+ # is mainly provided for testing purposes. Please use the class variant of this
232
+ # method if possible.
233
+ #
234
+ def execute(sample)
235
+ # create union-find
236
+ info("Creating PTA and UnionFind structure")
237
+ ufds = sample2ufds(sample)
238
+ # refine it
239
+ ufds = main(ufds)
240
+ # compute and return quotient automaton
241
+ ufds2dfa(ufds)
242
+ end
243
+
244
+ #
245
+ # Build the smallest DFA compatible with the sample given as input.
246
+ #
247
+ # Options (the _options_ hash):
248
+ # - :verbose can be set to true to trace algorithm execution on standard output.
249
+ #
250
+ # Preconditions:
251
+ # - The sample is consistent (does not contains the same string both labeled as
252
+ # positive and negative) and contains at least one string.
253
+ #
254
+ # Postconditions:
255
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
256
+ # given as input.
257
+ #
258
+ def self.execute(sample, options={})
259
+ BlueFringe.new(options).execute(sample)
260
+ end
261
+
262
+ end # class BlueFringe
263
+
264
+ end # module Induction
265
+ end # module Stamina