stamina 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +33 -0
  5. data/LICENCE.md +22 -0
  6. data/Manifest.txt +16 -0
  7. data/README.md +78 -0
  8. data/Rakefile +23 -0
  9. data/bin/adl2dot +12 -0
  10. data/bin/classify +12 -0
  11. data/bin/redblue +12 -0
  12. data/bin/rpni +12 -0
  13. data/example/adl/automaton.adl +49 -0
  14. data/example/adl/sample.adl +53 -0
  15. data/example/basic/characteristic_sample.adl +32 -0
  16. data/example/basic/target.adl +9 -0
  17. data/example/competition/31_test.adl +1500 -0
  18. data/example/competition/31_training.adl +1759 -0
  19. data/lib/stamina.rb +19 -0
  20. data/lib/stamina/adl.rb +298 -0
  21. data/lib/stamina/automaton.rb +1237 -0
  22. data/lib/stamina/automaton/walking.rb +336 -0
  23. data/lib/stamina/classifier.rb +37 -0
  24. data/lib/stamina/command/adl2dot_command.rb +73 -0
  25. data/lib/stamina/command/classify_command.rb +57 -0
  26. data/lib/stamina/command/redblue_command.rb +58 -0
  27. data/lib/stamina/command/rpni_command.rb +58 -0
  28. data/lib/stamina/command/stamina_command.rb +79 -0
  29. data/lib/stamina/errors.rb +20 -0
  30. data/lib/stamina/induction/commons.rb +170 -0
  31. data/lib/stamina/induction/redblue.rb +264 -0
  32. data/lib/stamina/induction/rpni.rb +188 -0
  33. data/lib/stamina/induction/union_find.rb +377 -0
  34. data/lib/stamina/input_string.rb +123 -0
  35. data/lib/stamina/loader.rb +0 -0
  36. data/lib/stamina/markable.rb +42 -0
  37. data/lib/stamina/sample.rb +190 -0
  38. data/lib/stamina/version.rb +14 -0
  39. data/stamina.gemspec +190 -0
  40. data/stamina.noespec +35 -0
  41. data/tasks/debug_mail.rake +78 -0
  42. data/tasks/debug_mail.txt +13 -0
  43. data/tasks/gem.rake +68 -0
  44. data/tasks/spec_test.rake +79 -0
  45. data/tasks/unit_test.rake +77 -0
  46. data/tasks/yard.rake +51 -0
  47. data/test/stamina/adl_test.rb +491 -0
  48. data/test/stamina/automaton_additional_test.rb +190 -0
  49. data/test/stamina/automaton_classifier_test.rb +155 -0
  50. data/test/stamina/automaton_test.rb +1092 -0
  51. data/test/stamina/automaton_to_dot_test.rb +64 -0
  52. data/test/stamina/automaton_walking_test.rb +206 -0
  53. data/test/stamina/exit.rb +3 -0
  54. data/test/stamina/induction/induction_test.rb +70 -0
  55. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +19 -0
  56. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +64 -0
  57. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +9 -0
  58. data/test/stamina/induction/redblue_test.rb +83 -0
  59. data/test/stamina/induction/redblue_universal_expected.adl +4 -0
  60. data/test/stamina/induction/redblue_universal_sample.adl +5 -0
  61. data/test/stamina/induction/rpni_inria_expected.adl +7 -0
  62. data/test/stamina/induction/rpni_inria_sample.adl +9 -0
  63. data/test/stamina/induction/rpni_test.rb +129 -0
  64. data/test/stamina/induction/rpni_test_pta.dot +22 -0
  65. data/test/stamina/induction/rpni_universal_expected.adl +4 -0
  66. data/test/stamina/induction/rpni_universal_sample.adl +4 -0
  67. data/test/stamina/induction/union_find_test.rb +124 -0
  68. data/test/stamina/input_string_test.rb +323 -0
  69. data/test/stamina/markable_test.rb +70 -0
  70. data/test/stamina/randdfa.adl +66 -0
  71. data/test/stamina/sample.adl +4 -0
  72. data/test/stamina/sample_classify_test.rb +149 -0
  73. data/test/stamina/sample_test.rb +218 -0
  74. data/test/stamina/small_dfa.dot +16 -0
  75. data/test/stamina/small_dfa.gif +0 -0
  76. data/test/stamina/small_nfa.dot +18 -0
  77. data/test/stamina/small_nfa.gif +0 -0
  78. data/test/stamina/stamina_test.rb +69 -0
  79. data/test/test_all.rb +7 -0
  80. metadata +279 -0
@@ -0,0 +1,58 @@
1
+ require 'stamina/command/stamina_command'
2
+ require 'stamina/induction/redblue'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Implementation of the redblue command line tool
7
+ class RedBlueCommand < StaminaCommand
8
+
9
+ # Creates a score command instance
10
+ def initialize
11
+ super("redblue", "[options] sample.adl",
12
+ "Executes RedBlue (Regular Positive and Negative Inference) on a ADL sample and\n"\
13
+ "flushes the induced DFA on the standard output in ADL format as well")
14
+ end
15
+
16
+ # Installs additional options
17
+ def options
18
+ super do |opt|
19
+ opt.on("-v", "--verbose", "Verbose mode") do
20
+ @verbose = true
21
+ end
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush induced DFA in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+ end
28
+ end
29
+
30
+ # Sets the sample file
31
+ def sample_file=(file)
32
+ assert_readable_file(file)
33
+ puts "Parsing sample and building PTA" if @verbose
34
+ @sample = Stamina::ADL.parse_sample_file(file)
35
+ rescue Stamina::ADL::ParseError
36
+ raise ArgumentError, "#{file} is not a valid ADL sample file"
37
+ end
38
+
39
+ # Executes the command
40
+ def main(argv)
41
+ parse(argv, :sample_file)
42
+ t1 = Time.now
43
+ dfa = Stamina::Induction::RedBlue.execute(@sample, {:verbose => @verbose})
44
+ t2 = Time.now
45
+ if @output_file
46
+ File.open(@output_file, 'w') do |file|
47
+ Stamina::ADL.print_automaton(dfa, file)
48
+ end
49
+ else
50
+ Stamina::ADL.print_automaton(dfa, STDOUT)
51
+ end
52
+ puts "Executed in #{t2-t1} sec" if @verbose
53
+ end
54
+
55
+ end # class ScoreCommand
56
+
57
+ end # module Command
58
+ end # module Stamina
@@ -0,0 +1,58 @@
1
+ require 'stamina/command/stamina_command'
2
+ require 'stamina/induction/rpni'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Implementation of the rpni command line tool
7
+ class RPNICommand < StaminaCommand
8
+
9
+ # Creates a score command instance
10
+ def initialize
11
+ super("rpni", "[options] sample.adl",
12
+ "Executes RPNI (Regular Positive and Negative Inference) on a ADL sample and\n"\
13
+ "flushes the induced DFA on the standard output in ADL format as well")
14
+ end
15
+
16
+ # Installs additional options
17
+ def options
18
+ super do |opt|
19
+ opt.on("-v", "--verbose", "Verbose mode") do
20
+ @verbose = true
21
+ end
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush induced DFA in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+ end
28
+ end
29
+
30
+ # Sets the sample file
31
+ def sample_file=(file)
32
+ assert_readable_file(file)
33
+ puts "Parsing sample and building PTA" if @verbose
34
+ @sample = Stamina::ADL.parse_sample_file(file)
35
+ rescue Stamina::ADL::ParseError
36
+ raise ArgumentError, "#{file} is not a valid ADL sample file"
37
+ end
38
+
39
+ # Executes the command
40
+ def main(argv)
41
+ parse(argv, :sample_file)
42
+ t1 = Time.now
43
+ dfa = Stamina::Induction::RPNI.execute(@sample, {:verbose => @verbose})
44
+ t2 = Time.now
45
+ if @output_file
46
+ File.open(@output_file, 'w') do |file|
47
+ Stamina::ADL.print_automaton(dfa, file)
48
+ end
49
+ else
50
+ Stamina::ADL.print_automaton(dfa, STDOUT)
51
+ end
52
+ puts "Executed in #{t2-t1} sec" if @verbose
53
+ end
54
+
55
+ end # class ScoreCommand
56
+
57
+ end # module Command
58
+ end # module Stamina
@@ -0,0 +1,79 @@
1
+ require 'stamina'
2
+ require 'optparse'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Helper to create stamina commands
7
+ class StaminaCommand
8
+
9
+ # Command name
10
+ attr_reader :name
11
+
12
+ # Command description
13
+ attr_reader :description
14
+
15
+ # Command usage
16
+ attr_reader :usage
17
+
18
+ # Creates a command with a name, usage and description
19
+ def initialize(name, usage, description)
20
+ @name = name
21
+ @usage = usage
22
+ @description = description
23
+ end
24
+
25
+ # Creates options
26
+ def options(&block)
27
+ OptionParser.new do |opt|
28
+ opt.program_name = name
29
+ opt.version = Stamina::VERSION
30
+ opt.release = nil
31
+ opt.summary_indent = ' ' * 4
32
+ banner = <<-EOF
33
+ # usage: #{opt.program_name} #{usage}
34
+ # #{description}
35
+ EOF
36
+ opt.banner = banner.gsub(/[ \t]+# /, "")
37
+ block.call(opt) if block
38
+ opt.on_tail("-h", "--help", "Show this message") do
39
+ puts opt
40
+ exit
41
+ end
42
+ end
43
+ end
44
+
45
+ # Prints usage (and optionnaly exits)
46
+ def show_usage(and_exit=true)
47
+ puts options
48
+ Kernel.exit if and_exit
49
+ end
50
+
51
+ # Checks that a given file is readable or raises an ArgumentError
52
+ def assert_readable_file(file)
53
+ raise ArgumentError, "File #{file} does not exists" unless File.exists?(file)
54
+ raise ArgumentError, "File #{file} cannot be read" unless File.readable?(file)
55
+ end
56
+
57
+ # Checks that a given file is writable or raises an ArgumentError
58
+ def assert_writable_file(file)
59
+ raise ArgumentError, "File #{file} cannot be written" \
60
+ unless not(File.exists?(file)) or File.writable?(file)
61
+ end
62
+
63
+ # Parses arguments and install last argument as instance variables
64
+ def parse(argv, *variables)
65
+ rest = options.parse(argv)
66
+ show_usage(true) unless rest.size==variables.size
67
+ variables.each_with_index do |var,i|
68
+ self.send("#{var}=".to_sym, rest[i])
69
+ end
70
+ rescue ArgumentError => ex
71
+ puts ex.message
72
+ puts
73
+ show_usage(true)
74
+ end
75
+
76
+ end # class StaminaCommand
77
+
78
+ end # module Command
79
+ end # module Stamina
@@ -0,0 +1,20 @@
1
+ module Stamina
2
+
3
+ # Main class of all stamina errors.
4
+ class StaminaError < StandardError; end
5
+
6
+ # Raised by samples implementations and other induction algorithms
7
+ # when a sample is inconsistent (same string labeled as being both
8
+ # positive and negative)
9
+ class InconsistencyError < StaminaError; end
10
+
11
+ # Specific errors of the ADL module.
12
+ module ADL
13
+
14
+ # Raised by the ADL module when an automaton, string or sample
15
+ # format is violated at parsing time.
16
+ class ParseError < StaminaError; end
17
+
18
+ end
19
+
20
+ end # module Stamina
@@ -0,0 +1,170 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Defines common utilities used by rpni and redblue. About acronyms:
6
+ # - _pta_ stands for Prefix Tree Acceptor
7
+ # - _ufds_ stands for Union-Find Data Structure
8
+ #
9
+ # Methods pta2ufds, sample2pta and sample2ufds are simply conversion methods used
10
+ # when the induction algorithm starts (executed on a sample, it first built a pta
11
+ # then convert it to a union find). Method ufds2pta is used when the algorithm ends,
12
+ # to convert refined union find to a dfa.
13
+ #
14
+ # The merge_user_data method is probably the most important as it actually computes
15
+ # the merging of two states and build information about merging for determinization.
16
+ #
17
+ module Commons
18
+
19
+ #
20
+ # Factors and returns a UnionFind data structure from a PTA, keeping natural order
21
+ # of its states for union-find elements. The resulting UnionFind contains a Hash as
22
+ # mergeable user data, presenting the following keys:
23
+ # - :initial, :accepting and :error flags of each state
24
+ # - :master indicating the index of the state in the PTA
25
+ # - :delta a delta function through a Hash {symbol => state_index}
26
+ #
27
+ # In this version, other user data attached to PTA states is lost during the
28
+ # conversion.
29
+ #
30
+ def pta2ufds(pta)
31
+ Stamina::Induction::UnionFind.new(pta.state_count) do |i|
32
+ state = pta.ith_state(i)
33
+ data = {:initial => state.initial?,
34
+ :accepting => state.accepting?,
35
+ :error => state.error?,
36
+ :master => i,
37
+ :delta => {}}
38
+ state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
39
+ data
40
+ end
41
+ end
42
+
43
+ #
44
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
45
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
46
+ # operator defined on symbols. States reached by negative strings are tagged as
47
+ # non accepting and error.
48
+ #
49
+ def sample2pta(sample)
50
+ Automaton.new do |pta|
51
+ initial_state = add_state(:initial => true, :accepting => false)
52
+
53
+ # Fill the PTA with each string
54
+ sample.each do |str|
55
+ # split string using the dfa
56
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
57
+
58
+ # remaining symbols are not empty -> build the PTA
59
+ unless remaining.empty?
60
+ remaining.each do |symbol|
61
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
62
+ pta.connect(reached, newone, symbol)
63
+ reached = newone
64
+ end
65
+ end
66
+
67
+ # flag state
68
+ str.positive? ? reached.accepting! : reached.error!
69
+
70
+ # check consistency, should not arrive as Sample does not allow
71
+ # inconsistencies. Should appear only if _sample_ is not a Sample
72
+ # instance but some other enumerable.
73
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
74
+ if (reached.error? and reached.accepting?)
75
+ end
76
+
77
+ # Reindex states by applying BFS
78
+ to_index, index = [initial_state], 0
79
+ until to_index.empty?
80
+ state = to_index.shift
81
+ state[:__index__] = index
82
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each {|e| to_index << e.target}
83
+ index += 1
84
+ end
85
+ # Force the automaton to reindex
86
+ pta.order_states{|s0,s1| s0[:__index__]<=>s1[:__index__]}
87
+ # Remove marks
88
+ pta.states.each{|s| s.remove_mark(:__index__)}
89
+ end
90
+ end
91
+
92
+ #
93
+ # Converts a Sample instance to a 'ready to refine' union find data structure.
94
+ # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
95
+ #
96
+ def sample2ufds(sample)
97
+ pta2ufds(sample2pta(sample))
98
+ end
99
+
100
+ #
101
+ # Computes the quotient automaton from a refined UnionFind data structure.
102
+ #
103
+ # In this version, only accepting and initial flags are taken into account
104
+ # when creating quotient automaton states. Other user data is lost during
105
+ # the conversion.
106
+ #
107
+ def ufds2dfa(ufds)
108
+ Automaton.new(false) do |fa|
109
+ mergeable_datas = ufds.mergeable_datas
110
+ mergeable_datas.each do |data|
111
+ state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
112
+ state_data[:name] = data[:master].to_s
113
+ state_data[:error] = false
114
+ fa.add_state(state_data)
115
+ end
116
+ mergeable_datas.each do |data|
117
+ source = fa.get_state(data[:master].to_s)
118
+ data[:delta].each_pair do |symbol, target|
119
+ target = fa.get_state(ufds.find(target).to_s)
120
+ fa.connect(source, target, symbol)
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ #
127
+ # Merges two user data hashes _d1_ and _d2_ according to rules defined
128
+ # below. Also fills a _determinization_ array with pairs of state indices
129
+ # that are reached from d1 and d2 through the same symbol and should be
130
+ # merged for determinization. This method does NOT ensure that those pairs
131
+ # correspond to distinguish states according to the union find. In other
132
+ # words state indices in these pairs do not necessarily corespond to master
133
+ # states (see UnionFind for this term).
134
+ #
135
+ # Returns the resulting data if the merge is successful (does not lead to
136
+ # merging an error state with an accepting one), nil otherwise.
137
+ #
138
+ # The merging procedure for the different hash keys is as follows:
139
+ # - result[:initial] = d1[:initial] or d2[:initial]
140
+ # - result[:accepting] = d1[:accepting] or d2[:accepting]
141
+ # - result[:error] = d1[:error] or d2[:error]
142
+ # - result[:master] = min(d1[:master], d2[:master])
143
+ # - result[:delta] = merging of delta hashes, keeping smaller target index
144
+ # on key collisions.
145
+ #
146
+ def merge_user_data(d1, d2, determinization)
147
+ # we compute flags first
148
+ new_data = {:initial => d1[:initial] || d2[:initial],
149
+ :accepting => d1[:accepting] || d2[:accepting],
150
+ :error => d1[:error] || d2[:error],
151
+ :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
152
+
153
+ # merge failure if accepting and error states are merged
154
+ return nil if new_data[:accepting] and new_data[:error]
155
+
156
+ # we recompute the delta function of the resulting state
157
+ # keeping merging for determinization as pairs in _determinization_
158
+ new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
159
+ determinization << [t1, t2]
160
+ t1 < t2 ? t1 : t2
161
+ end
162
+
163
+ # returns merged data
164
+ new_data
165
+ end
166
+
167
+ end # module Commons
168
+
169
+ end # module Induction
170
+ end # module Stamina
@@ -0,0 +1,264 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the RedBlue variant of the RPNI algorithm (with the blue-fringe
6
+ # heuristics).
7
+ #
8
+ # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
+ # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
+ # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
+ #
12
+ # Example:
13
+ # # sample typically comes from an ADL file
14
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
+ #
16
+ # # let RedBlue build the smallest dfa
17
+ # dfa = Stamina::Induction::RedBlue.execute(sample, {:verbose => true})
18
+ #
19
+ # Remarks:
20
+ # - Constructor and instance methods of this class are public but not intended
21
+ # to be used directly. They are left public for testing purposes only.
22
+ # - Having read the Stamina::Induction::RedBlue base algorithm may help undertanding
23
+ # this variant.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class RedBlue
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Additional options of the algorithm
35
+ attr_reader :options
36
+
37
+ #
38
+ # Creates an algorithm instance with specific options
39
+ #
40
+ def initialize(options={})
41
+ @options = options
42
+ end
43
+
44
+ #
45
+ # Computes the score of a single (group) merge. Returned value is 1 if both are
46
+ # accepting states or both are error states and 0 otherwise. Note that d1 and d2
47
+ # are expected to be merge compatible as this method does not distinguish this
48
+ # case.
49
+ #
50
+ def merge_score(d1, d2)
51
+ # Score of 1 if both accepting or both error
52
+ ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
53
+ end
54
+
55
+ #
56
+ # Merges a state of rank j with a state of lower rank i. This merge method
57
+ # includes merging for determinization. It returns nil if the merge is
58
+ # incompatible, a merge score otherwise.
59
+ #
60
+ # Preconditions:
61
+ # - States denoted by i and j are expected leader states (non merged ones)
62
+ # - States denoted by i and j are expected to be different
63
+ #
64
+ # Postconditions:
65
+ # - Union find is refined, states i and j having been merged, as well as all
66
+ # state pairs that need to be merged to ensure the deterministic property
67
+ # of the quotient automaton.
68
+ # - If the resulting quotient automaton is consistent with the negative sample,
69
+ # this method returns the number of accepting pairs + the number of error pairs
70
+ # that have been merged. The refined union-find correctly encodes the quotient
71
+ # automaton. Otherwise, the method returns nil and the union-find information
72
+ # must be considered inaccurate.
73
+ #
74
+ def merge_and_determinize(i, j)
75
+ # Make the union (keep merging score as well as additional merges to be performed
76
+ # in score and determinization, respectively). Recompute the user data attached to
77
+ # the new state group (new_data)
78
+ determinization, score = [], nil
79
+ @ufds.union(i, j) do |d1, d2|
80
+ # states are incompatible if new_data cannot be created because it would
81
+ # lead to merge and error and an accepting state. We simply return nil in this
82
+ # case...
83
+ return nil unless (new_data = merge_user_data(d1, d2, determinization))
84
+ # otherwise, we score
85
+ score = merge_score(d1, d2)
86
+ # and we let the union find keep the new_data for the group
87
+ new_data
88
+ end
89
+
90
+ # Merge for determinization starts here, based on the determinization array
91
+ # computed as a side effect of merge_user_data
92
+ determinization.each do |pair|
93
+ # we take the leader states of the pair to merge
94
+ pair = pair.collect{|i| @ufds.find(i)}
95
+ # do nothing if already the same leader state
96
+ next if pair[0]==pair[1]
97
+ # otherwise recurse and keep subscore
98
+ subscore = merge_and_determinize(pair[0], pair[1])
99
+ # failure if merging for determinization led to merge error and accepting
100
+ # states
101
+ return nil if subscore.nil?
102
+ # this is the new score
103
+ score += subscore
104
+ end
105
+
106
+ score
107
+ end
108
+
109
+ #
110
+ # Evaluates the score of merging states i and j. Returns nil if the states are
111
+ # cannot be merged, a positive score otherwise.
112
+ #
113
+ # Preconditions:
114
+ # - States denoted by i and j are expected leader states (non merged ones)
115
+ # - States denoted by i and j are expected to be different
116
+ #
117
+ # Postconditions:
118
+ # - Returned value is nil if the quotient automaton would be incompatible with
119
+ # the sample. Otherwise a positive number is returned, encoding the number of
120
+ # interresting pairs that have been merged (interesting = both accepting or both
121
+ # error)
122
+ # - The union find is ALWAYS restored to its previous value after merging has
123
+ # been evaluated and is then seen unchanged by the caller.
124
+ #
125
+ def merge_and_determinize_score(i, j)
126
+ # score the merging, always rollback the transaction
127
+ score = nil
128
+ @ufds.transactional do
129
+ score = merge_and_determinize(i, j)
130
+ false
131
+ end
132
+ score
133
+ end
134
+
135
+ #
136
+ # Computes the fringe given the current union find. The fringe is returned as an
137
+ # array of state indices.
138
+ #
139
+ # Postconditions:
140
+ # - Returned array contains indices of leader states only.
141
+ # - Returned array is disjoint with the kernel.
142
+ #
143
+ def fringe
144
+ fringe = []
145
+ @kernel.each do |k1|
146
+ delta = @ufds.mergeable_data(k1)[:delta]
147
+ delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
+ end
149
+ (fringe - @kernel).sort
150
+ end
151
+
152
+ #
153
+ # Main method of the algorithm. Refines the union find passed as first argument
154
+ # by merging well chosen state pairs. Returns the refined union find.
155
+ #
156
+ # Preconditions:
157
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
+ # and :error boolean flags as well as a :delta sub hash)
159
+ #
160
+ # Postconditions:
161
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
+ # it comes from) such that all positive and negative strings of the underlying
163
+ # sample are correctly classified by it.
164
+ #
165
+ def main(ufds)
166
+ puts "Starting RedBlue (#{ufds.size} states)" if @options[:verbose]
167
+ @ufds, @kernel = ufds, [0]
168
+
169
+ # we do it until the fringe is empty (compute it only once each step)
170
+ until (the_fringe=fringe).empty?
171
+ # state to consolidate (if any)
172
+ to_consolidate = nil
173
+ # best candidate [source index, target index, score]
174
+ best = [nil, nil, -1]
175
+
176
+ # for each state on the fringe as merge candidate
177
+ the_fringe.each do |candidate|
178
+ to_consolidate = candidate
179
+
180
+ # evaluate score of merging candidate with each kernel state
181
+ @kernel.each do |target|
182
+ score = merge_and_determinize_score(candidate, target)
183
+ unless score.nil?
184
+ # if a score has been found, the candidate will not be
185
+ # consolidated. We keep it as best if its better than the
186
+ # previous one
187
+ to_consolidate = nil
188
+ best = [candidate, target, score] if score > best[2]
189
+ end
190
+ end
191
+
192
+ # No possible target, break the loop (will consolidate right now)!
193
+ break unless to_consolidate.nil?
194
+ end
195
+
196
+ # If not found, the last candidate must be consolidated. Otherwise, we
197
+ # do the best merging
198
+ unless to_consolidate.nil?
199
+ puts "Consolidation of #{to_consolidate}" if @options[:verbose]
200
+ @kernel << to_consolidate
201
+ else
202
+ puts "Merging #{best[0]} and #{best[1]} [#{best[2]}]" if @options[:verbose]
203
+ # this one should never fail because its score was positive before
204
+ raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
205
+ end
206
+
207
+ # redblue does not guarantee that it will not merge a state of lower rank
208
+ # with a kernel state. The kernel should then be update at each step to keep
209
+ # lowest indices for the whole kernel, and we sort it
210
+ @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
211
+ end
212
+
213
+ # return the refined union find now
214
+ @ufds
215
+ end
216
+
217
+ #
218
+ # Build the smallest DFA compatible with the sample given as input.
219
+ #
220
+ # Preconditions:
221
+ # - The sample is consistent (does not contains the same string both labeled as
222
+ # positive and negative) and contains at least one string.
223
+ #
224
+ # Postconditions:
225
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
226
+ # given as input.
227
+ #
228
+ # Remarks:
229
+ # - This instance version of RedBlue.execute is not intended to be used directly and
230
+ # is mainly provided for testing purposes. Please use the class variant of this
231
+ # method if possible.
232
+ #
233
+ def execute(sample)
234
+ # create union-find
235
+ puts "Creating PTA and UnionFind structure" if @options[:verbose]
236
+ ufds = sample2ufds(sample)
237
+ # refine it
238
+ ufds = main(ufds)
239
+ # compute and return quotient automaton
240
+ ufds2dfa(ufds)
241
+ end
242
+
243
+ #
244
+ # Build the smallest DFA compatible with the sample given as input.
245
+ #
246
+ # Options (the _options_ hash):
247
+ # - :verbose can be set to true to trace algorithm execution on standard output.
248
+ #
249
+ # Preconditions:
250
+ # - The sample is consistent (does not contains the same string both labeled as
251
+ # positive and negative) and contains at least one string.
252
+ #
253
+ # Postconditions:
254
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
255
+ # given as input.
256
+ #
257
+ def self.execute(sample, options={})
258
+ RedBlue.new(options).execute(sample)
259
+ end
260
+
261
+ end # class RedBlue
262
+
263
+ end # module Induction
264
+ end # module Stamina