stamina 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +33 -0
  5. data/LICENCE.md +22 -0
  6. data/Manifest.txt +16 -0
  7. data/README.md +78 -0
  8. data/Rakefile +23 -0
  9. data/bin/adl2dot +12 -0
  10. data/bin/classify +12 -0
  11. data/bin/redblue +12 -0
  12. data/bin/rpni +12 -0
  13. data/example/adl/automaton.adl +49 -0
  14. data/example/adl/sample.adl +53 -0
  15. data/example/basic/characteristic_sample.adl +32 -0
  16. data/example/basic/target.adl +9 -0
  17. data/example/competition/31_test.adl +1500 -0
  18. data/example/competition/31_training.adl +1759 -0
  19. data/lib/stamina.rb +19 -0
  20. data/lib/stamina/adl.rb +298 -0
  21. data/lib/stamina/automaton.rb +1237 -0
  22. data/lib/stamina/automaton/walking.rb +336 -0
  23. data/lib/stamina/classifier.rb +37 -0
  24. data/lib/stamina/command/adl2dot_command.rb +73 -0
  25. data/lib/stamina/command/classify_command.rb +57 -0
  26. data/lib/stamina/command/redblue_command.rb +58 -0
  27. data/lib/stamina/command/rpni_command.rb +58 -0
  28. data/lib/stamina/command/stamina_command.rb +79 -0
  29. data/lib/stamina/errors.rb +20 -0
  30. data/lib/stamina/induction/commons.rb +170 -0
  31. data/lib/stamina/induction/redblue.rb +264 -0
  32. data/lib/stamina/induction/rpni.rb +188 -0
  33. data/lib/stamina/induction/union_find.rb +377 -0
  34. data/lib/stamina/input_string.rb +123 -0
  35. data/lib/stamina/loader.rb +0 -0
  36. data/lib/stamina/markable.rb +42 -0
  37. data/lib/stamina/sample.rb +190 -0
  38. data/lib/stamina/version.rb +14 -0
  39. data/stamina.gemspec +190 -0
  40. data/stamina.noespec +35 -0
  41. data/tasks/debug_mail.rake +78 -0
  42. data/tasks/debug_mail.txt +13 -0
  43. data/tasks/gem.rake +68 -0
  44. data/tasks/spec_test.rake +79 -0
  45. data/tasks/unit_test.rake +77 -0
  46. data/tasks/yard.rake +51 -0
  47. data/test/stamina/adl_test.rb +491 -0
  48. data/test/stamina/automaton_additional_test.rb +190 -0
  49. data/test/stamina/automaton_classifier_test.rb +155 -0
  50. data/test/stamina/automaton_test.rb +1092 -0
  51. data/test/stamina/automaton_to_dot_test.rb +64 -0
  52. data/test/stamina/automaton_walking_test.rb +206 -0
  53. data/test/stamina/exit.rb +3 -0
  54. data/test/stamina/induction/induction_test.rb +70 -0
  55. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +19 -0
  56. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +64 -0
  57. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +9 -0
  58. data/test/stamina/induction/redblue_test.rb +83 -0
  59. data/test/stamina/induction/redblue_universal_expected.adl +4 -0
  60. data/test/stamina/induction/redblue_universal_sample.adl +5 -0
  61. data/test/stamina/induction/rpni_inria_expected.adl +7 -0
  62. data/test/stamina/induction/rpni_inria_sample.adl +9 -0
  63. data/test/stamina/induction/rpni_test.rb +129 -0
  64. data/test/stamina/induction/rpni_test_pta.dot +22 -0
  65. data/test/stamina/induction/rpni_universal_expected.adl +4 -0
  66. data/test/stamina/induction/rpni_universal_sample.adl +4 -0
  67. data/test/stamina/induction/union_find_test.rb +124 -0
  68. data/test/stamina/input_string_test.rb +323 -0
  69. data/test/stamina/markable_test.rb +70 -0
  70. data/test/stamina/randdfa.adl +66 -0
  71. data/test/stamina/sample.adl +4 -0
  72. data/test/stamina/sample_classify_test.rb +149 -0
  73. data/test/stamina/sample_test.rb +218 -0
  74. data/test/stamina/small_dfa.dot +16 -0
  75. data/test/stamina/small_dfa.gif +0 -0
  76. data/test/stamina/small_nfa.dot +18 -0
  77. data/test/stamina/small_nfa.gif +0 -0
  78. data/test/stamina/stamina_test.rb +69 -0
  79. data/test/test_all.rb +7 -0
  80. metadata +279 -0
@@ -0,0 +1,58 @@
1
+ require 'stamina/command/stamina_command'
2
+ require 'stamina/induction/redblue'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Implementation of the redblue command line tool
7
+ class RedBlueCommand < StaminaCommand
8
+
9
+ # Creates a score command instance
10
+ def initialize
11
+ super("redblue", "[options] sample.adl",
12
+ "Executes RedBlue (Regular Positive and Negative Inference) on a ADL sample and\n"\
13
+ "flushes the induced DFA on the standard output in ADL format as well")
14
+ end
15
+
16
+ # Installs additional options
17
+ def options
18
+ super do |opt|
19
+ opt.on("-v", "--verbose", "Verbose mode") do
20
+ @verbose = true
21
+ end
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush induced DFA in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+ end
28
+ end
29
+
30
+ # Sets the sample file
31
+ def sample_file=(file)
32
+ assert_readable_file(file)
33
+ puts "Parsing sample and building PTA" if @verbose
34
+ @sample = Stamina::ADL.parse_sample_file(file)
35
+ rescue Stamina::ADL::ParseError
36
+ raise ArgumentError, "#{file} is not a valid ADL sample file"
37
+ end
38
+
39
+ # Executes the command
40
+ def main(argv)
41
+ parse(argv, :sample_file)
42
+ t1 = Time.now
43
+ dfa = Stamina::Induction::RedBlue.execute(@sample, {:verbose => @verbose})
44
+ t2 = Time.now
45
+ if @output_file
46
+ File.open(@output_file, 'w') do |file|
47
+ Stamina::ADL.print_automaton(dfa, file)
48
+ end
49
+ else
50
+ Stamina::ADL.print_automaton(dfa, STDOUT)
51
+ end
52
+ puts "Executed in #{t2-t1} sec" if @verbose
53
+ end
54
+
55
+ end # class ScoreCommand
56
+
57
+ end # module Command
58
+ end # module Stamina
@@ -0,0 +1,58 @@
1
+ require 'stamina/command/stamina_command'
2
+ require 'stamina/induction/rpni'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Implementation of the rpni command line tool
7
+ class RPNICommand < StaminaCommand
8
+
9
+ # Creates a score command instance
10
+ def initialize
11
+ super("rpni", "[options] sample.adl",
12
+ "Executes RPNI (Regular Positive and Negative Inference) on a ADL sample and\n"\
13
+ "flushes the induced DFA on the standard output in ADL format as well")
14
+ end
15
+
16
+ # Installs additional options
17
+ def options
18
+ super do |opt|
19
+ opt.on("-v", "--verbose", "Verbose mode") do
20
+ @verbose = true
21
+ end
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush induced DFA in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+ end
28
+ end
29
+
30
+ # Sets the sample file
31
+ def sample_file=(file)
32
+ assert_readable_file(file)
33
+ puts "Parsing sample and building PTA" if @verbose
34
+ @sample = Stamina::ADL.parse_sample_file(file)
35
+ rescue Stamina::ADL::ParseError
36
+ raise ArgumentError, "#{file} is not a valid ADL sample file"
37
+ end
38
+
39
+ # Executes the command
40
+ def main(argv)
41
+ parse(argv, :sample_file)
42
+ t1 = Time.now
43
+ dfa = Stamina::Induction::RPNI.execute(@sample, {:verbose => @verbose})
44
+ t2 = Time.now
45
+ if @output_file
46
+ File.open(@output_file, 'w') do |file|
47
+ Stamina::ADL.print_automaton(dfa, file)
48
+ end
49
+ else
50
+ Stamina::ADL.print_automaton(dfa, STDOUT)
51
+ end
52
+ puts "Executed in #{t2-t1} sec" if @verbose
53
+ end
54
+
55
+ end # class ScoreCommand
56
+
57
+ end # module Command
58
+ end # module Stamina
@@ -0,0 +1,79 @@
1
+ require 'stamina'
2
+ require 'optparse'
3
+ module Stamina
4
+ module Command
5
+
6
+ # Helper to create stamina commands
7
+ class StaminaCommand
8
+
9
+ # Command name
10
+ attr_reader :name
11
+
12
+ # Command description
13
+ attr_reader :description
14
+
15
+ # Command usage
16
+ attr_reader :usage
17
+
18
+ # Creates a command with a name, usage and description
19
+ def initialize(name, usage, description)
20
+ @name = name
21
+ @usage = usage
22
+ @description = description
23
+ end
24
+
25
+ # Creates options
26
+ def options(&block)
27
+ OptionParser.new do |opt|
28
+ opt.program_name = name
29
+ opt.version = Stamina::VERSION
30
+ opt.release = nil
31
+ opt.summary_indent = ' ' * 4
32
+ banner = <<-EOF
33
+ # usage: #{opt.program_name} #{usage}
34
+ # #{description}
35
+ EOF
36
+ opt.banner = banner.gsub(/[ \t]+# /, "")
37
+ block.call(opt) if block
38
+ opt.on_tail("-h", "--help", "Show this message") do
39
+ puts opt
40
+ exit
41
+ end
42
+ end
43
+ end
44
+
45
+ # Prints usage (and optionnaly exits)
46
+ def show_usage(and_exit=true)
47
+ puts options
48
+ Kernel.exit if and_exit
49
+ end
50
+
51
+ # Checks that a given file is readable or raises an ArgumentError
52
+ def assert_readable_file(file)
53
+ raise ArgumentError, "File #{file} does not exists" unless File.exists?(file)
54
+ raise ArgumentError, "File #{file} cannot be read" unless File.readable?(file)
55
+ end
56
+
57
+ # Checks that a given file is writable or raises an ArgumentError
58
+ def assert_writable_file(file)
59
+ raise ArgumentError, "File #{file} cannot be written" \
60
+ unless not(File.exists?(file)) or File.writable?(file)
61
+ end
62
+
63
+ # Parses arguments and install last argument as instance variables
64
+ def parse(argv, *variables)
65
+ rest = options.parse(argv)
66
+ show_usage(true) unless rest.size==variables.size
67
+ variables.each_with_index do |var,i|
68
+ self.send("#{var}=".to_sym, rest[i])
69
+ end
70
+ rescue ArgumentError => ex
71
+ puts ex.message
72
+ puts
73
+ show_usage(true)
74
+ end
75
+
76
+ end # class StaminaCommand
77
+
78
+ end # module Command
79
+ end # module Stamina
@@ -0,0 +1,20 @@
1
+ module Stamina
2
+
3
+ # Main class of all stamina errors.
4
+ class StaminaError < StandardError; end
5
+
6
+ # Raised by samples implementations and other induction algorithms
7
+ # when a sample is inconsistent (same string labeled as being both
8
+ # positive and negative)
9
+ class InconsistencyError < StaminaError; end
10
+
11
+ # Specific errors of the ADL module.
12
+ module ADL
13
+
14
+ # Raised by the ADL module when an automaton, string or sample
15
+ # format is violated at parsing time.
16
+ class ParseError < StaminaError; end
17
+
18
+ end
19
+
20
+ end # module Stamina
@@ -0,0 +1,170 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Defines common utilities used by rpni and redblue. About acronyms:
6
+ # - _pta_ stands for Prefix Tree Acceptor
7
+ # - _ufds_ stands for Union-Find Data Structure
8
+ #
9
+ # Methods pta2ufds, sample2pta and sample2ufds are simply conversion methods used
10
+ # when the induction algorithm starts (executed on a sample, it first built a pta
11
+ # then convert it to a union find). Method ufds2pta is used when the algorithm ends,
12
+ # to convert refined union find to a dfa.
13
+ #
14
+ # The merge_user_data method is probably the most important as it actually computes
15
+ # the merging of two states and build information about merging for determinization.
16
+ #
17
+ module Commons
18
+
19
+ #
20
+ # Factors and returns a UnionFind data structure from a PTA, keeping natural order
21
+ # of its states for union-find elements. The resulting UnionFind contains a Hash as
22
+ # mergeable user data, presenting the following keys:
23
+ # - :initial, :accepting and :error flags of each state
24
+ # - :master indicating the index of the state in the PTA
25
+ # - :delta a delta function through a Hash {symbol => state_index}
26
+ #
27
+ # In this version, other user data attached to PTA states is lost during the
28
+ # conversion.
29
+ #
30
+ def pta2ufds(pta)
31
+ Stamina::Induction::UnionFind.new(pta.state_count) do |i|
32
+ state = pta.ith_state(i)
33
+ data = {:initial => state.initial?,
34
+ :accepting => state.accepting?,
35
+ :error => state.error?,
36
+ :master => i,
37
+ :delta => {}}
38
+ state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
39
+ data
40
+ end
41
+ end
42
+
43
+ #
44
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
45
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
46
+ # operator defined on symbols. States reached by negative strings are tagged as
47
+ # non accepting and error.
48
+ #
49
+ def sample2pta(sample)
50
+ Automaton.new do |pta|
51
+ initial_state = add_state(:initial => true, :accepting => false)
52
+
53
+ # Fill the PTA with each string
54
+ sample.each do |str|
55
+ # split string using the dfa
56
+ parsed, reached, remaining = pta.dfa_split(str, initial_state)
57
+
58
+ # remaining symbols are not empty -> build the PTA
59
+ unless remaining.empty?
60
+ remaining.each do |symbol|
61
+ newone = pta.add_state(:initial => false, :accepting => false, :error => false)
62
+ pta.connect(reached, newone, symbol)
63
+ reached = newone
64
+ end
65
+ end
66
+
67
+ # flag state
68
+ str.positive? ? reached.accepting! : reached.error!
69
+
70
+ # check consistency, should not arrive as Sample does not allow
71
+ # inconsistencies. Should appear only if _sample_ is not a Sample
72
+ # instance but some other enumerable.
73
+ raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
74
+ if (reached.error? and reached.accepting?)
75
+ end
76
+
77
+ # Reindex states by applying BFS
78
+ to_index, index = [initial_state], 0
79
+ until to_index.empty?
80
+ state = to_index.shift
81
+ state[:__index__] = index
82
+ state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each {|e| to_index << e.target}
83
+ index += 1
84
+ end
85
+ # Force the automaton to reindex
86
+ pta.order_states{|s0,s1| s0[:__index__]<=>s1[:__index__]}
87
+ # Remove marks
88
+ pta.states.each{|s| s.remove_mark(:__index__)}
89
+ end
90
+ end
91
+
92
+ #
93
+ # Converts a Sample instance to a 'ready to refine' union find data structure.
94
+ # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
95
+ #
96
+ def sample2ufds(sample)
97
+ pta2ufds(sample2pta(sample))
98
+ end
99
+
100
+ #
101
+ # Computes the quotient automaton from a refined UnionFind data structure.
102
+ #
103
+ # In this version, only accepting and initial flags are taken into account
104
+ # when creating quotient automaton states. Other user data is lost during
105
+ # the conversion.
106
+ #
107
+ def ufds2dfa(ufds)
108
+ Automaton.new(false) do |fa|
109
+ mergeable_datas = ufds.mergeable_datas
110
+ mergeable_datas.each do |data|
111
+ state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
112
+ state_data[:name] = data[:master].to_s
113
+ state_data[:error] = false
114
+ fa.add_state(state_data)
115
+ end
116
+ mergeable_datas.each do |data|
117
+ source = fa.get_state(data[:master].to_s)
118
+ data[:delta].each_pair do |symbol, target|
119
+ target = fa.get_state(ufds.find(target).to_s)
120
+ fa.connect(source, target, symbol)
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ #
127
+ # Merges two user data hashes _d1_ and _d2_ according to rules defined
128
+ # below. Also fills a _determinization_ array with pairs of state indices
129
+ # that are reached from d1 and d2 through the same symbol and should be
130
+ # merged for determinization. This method does NOT ensure that those pairs
131
+ # correspond to distinguish states according to the union find. In other
132
+ # words state indices in these pairs do not necessarily corespond to master
133
+ # states (see UnionFind for this term).
134
+ #
135
+ # Returns the resulting data if the merge is successful (does not lead to
136
+ # merging an error state with an accepting one), nil otherwise.
137
+ #
138
+ # The merging procedure for the different hash keys is as follows:
139
+ # - result[:initial] = d1[:initial] or d2[:initial]
140
+ # - result[:accepting] = d1[:accepting] or d2[:accepting]
141
+ # - result[:error] = d1[:error] or d2[:error]
142
+ # - result[:master] = min(d1[:master], d2[:master])
143
+ # - result[:delta] = merging of delta hashes, keeping smaller target index
144
+ # on key collisions.
145
+ #
146
+ def merge_user_data(d1, d2, determinization)
147
+ # we compute flags first
148
+ new_data = {:initial => d1[:initial] || d2[:initial],
149
+ :accepting => d1[:accepting] || d2[:accepting],
150
+ :error => d1[:error] || d2[:error],
151
+ :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
152
+
153
+ # merge failure if accepting and error states are merged
154
+ return nil if new_data[:accepting] and new_data[:error]
155
+
156
+ # we recompute the delta function of the resulting state
157
+ # keeping merging for determinization as pairs in _determinization_
158
+ new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
159
+ determinization << [t1, t2]
160
+ t1 < t2 ? t1 : t2
161
+ end
162
+
163
+ # returns merged data
164
+ new_data
165
+ end
166
+
167
+ end # module Commons
168
+
169
+ end # module Induction
170
+ end # module Stamina
@@ -0,0 +1,264 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the RedBlue variant of the RPNI algorithm (with the blue-fringe
6
+ # heuristics).
7
+ #
8
+ # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
+ # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
+ # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
+ #
12
+ # Example:
13
+ # # sample typically comes from an ADL file
14
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
+ #
16
+ # # let RedBlue build the smallest dfa
17
+ # dfa = Stamina::Induction::RedBlue.execute(sample, {:verbose => true})
18
+ #
19
+ # Remarks:
20
+ # - Constructor and instance methods of this class are public but not intended
21
+ # to be used directly. They are left public for testing purposes only.
22
+ # - Having read the Stamina::Induction::RedBlue base algorithm may help undertanding
23
+ # this variant.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class RedBlue
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Additional options of the algorithm
35
+ attr_reader :options
36
+
37
+ #
38
+ # Creates an algorithm instance with specific options
39
+ #
40
+ def initialize(options={})
41
+ @options = options
42
+ end
43
+
44
+ #
45
+ # Computes the score of a single (group) merge. Returned value is 1 if both are
46
+ # accepting states or both are error states and 0 otherwise. Note that d1 and d2
47
+ # are expected to be merge compatible as this method does not distinguish this
48
+ # case.
49
+ #
50
+ def merge_score(d1, d2)
51
+ # Score of 1 if both accepting or both error
52
+ ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
53
+ end
54
+
55
+ #
56
+ # Merges a state of rank j with a state of lower rank i. This merge method
57
+ # includes merging for determinization. It returns nil if the merge is
58
+ # incompatible, a merge score otherwise.
59
+ #
60
+ # Preconditions:
61
+ # - States denoted by i and j are expected leader states (non merged ones)
62
+ # - States denoted by i and j are expected to be different
63
+ #
64
+ # Postconditions:
65
+ # - Union find is refined, states i and j having been merged, as well as all
66
+ # state pairs that need to be merged to ensure the deterministic property
67
+ # of the quotient automaton.
68
+ # - If the resulting quotient automaton is consistent with the negative sample,
69
+ # this method returns the number of accepting pairs + the number of error pairs
70
+ # that have been merged. The refined union-find correctly encodes the quotient
71
+ # automaton. Otherwise, the method returns nil and the union-find information
72
+ # must be considered inaccurate.
73
+ #
74
+ def merge_and_determinize(i, j)
75
+ # Make the union (keep merging score as well as additional merges to be performed
76
+ # in score and determinization, respectively). Recompute the user data attached to
77
+ # the new state group (new_data)
78
+ determinization, score = [], nil
79
+ @ufds.union(i, j) do |d1, d2|
80
+ # states are incompatible if new_data cannot be created because it would
81
+ # lead to merge and error and an accepting state. We simply return nil in this
82
+ # case...
83
+ return nil unless (new_data = merge_user_data(d1, d2, determinization))
84
+ # otherwise, we score
85
+ score = merge_score(d1, d2)
86
+ # and we let the union find keep the new_data for the group
87
+ new_data
88
+ end
89
+
90
+ # Merge for determinization starts here, based on the determinization array
91
+ # computed as a side effect of merge_user_data
92
+ determinization.each do |pair|
93
+ # we take the leader states of the pair to merge
94
+ pair = pair.collect{|i| @ufds.find(i)}
95
+ # do nothing if already the same leader state
96
+ next if pair[0]==pair[1]
97
+ # otherwise recurse and keep subscore
98
+ subscore = merge_and_determinize(pair[0], pair[1])
99
+ # failure if merging for determinization led to merge error and accepting
100
+ # states
101
+ return nil if subscore.nil?
102
+ # this is the new score
103
+ score += subscore
104
+ end
105
+
106
+ score
107
+ end
108
+
109
+ #
110
+ # Evaluates the score of merging states i and j. Returns nil if the states are
111
+ # cannot be merged, a positive score otherwise.
112
+ #
113
+ # Preconditions:
114
+ # - States denoted by i and j are expected leader states (non merged ones)
115
+ # - States denoted by i and j are expected to be different
116
+ #
117
+ # Postconditions:
118
+ # - Returned value is nil if the quotient automaton would be incompatible with
119
+ # the sample. Otherwise a positive number is returned, encoding the number of
120
+ # interresting pairs that have been merged (interesting = both accepting or both
121
+ # error)
122
+ # - The union find is ALWAYS restored to its previous value after merging has
123
+ # been evaluated and is then seen unchanged by the caller.
124
+ #
125
+ def merge_and_determinize_score(i, j)
126
+ # score the merging, always rollback the transaction
127
+ score = nil
128
+ @ufds.transactional do
129
+ score = merge_and_determinize(i, j)
130
+ false
131
+ end
132
+ score
133
+ end
134
+
135
+ #
136
+ # Computes the fringe given the current union find. The fringe is returned as an
137
+ # array of state indices.
138
+ #
139
+ # Postconditions:
140
+ # - Returned array contains indices of leader states only.
141
+ # - Returned array is disjoint with the kernel.
142
+ #
143
+ def fringe
144
+ fringe = []
145
+ @kernel.each do |k1|
146
+ delta = @ufds.mergeable_data(k1)[:delta]
147
+ delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
+ end
149
+ (fringe - @kernel).sort
150
+ end
151
+
152
+ #
153
+ # Main method of the algorithm. Refines the union find passed as first argument
154
+ # by merging well chosen state pairs. Returns the refined union find.
155
+ #
156
+ # Preconditions:
157
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
+ # and :error boolean flags as well as a :delta sub hash)
159
+ #
160
+ # Postconditions:
161
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
+ # it comes from) such that all positive and negative strings of the underlying
163
+ # sample are correctly classified by it.
164
+ #
165
+ def main(ufds)
166
+ puts "Starting RedBlue (#{ufds.size} states)" if @options[:verbose]
167
+ @ufds, @kernel = ufds, [0]
168
+
169
+ # we do it until the fringe is empty (compute it only once each step)
170
+ until (the_fringe=fringe).empty?
171
+ # state to consolidate (if any)
172
+ to_consolidate = nil
173
+ # best candidate [source index, target index, score]
174
+ best = [nil, nil, -1]
175
+
176
+ # for each state on the fringe as merge candidate
177
+ the_fringe.each do |candidate|
178
+ to_consolidate = candidate
179
+
180
+ # evaluate score of merging candidate with each kernel state
181
+ @kernel.each do |target|
182
+ score = merge_and_determinize_score(candidate, target)
183
+ unless score.nil?
184
+ # if a score has been found, the candidate will not be
185
+ # consolidated. We keep it as best if its better than the
186
+ # previous one
187
+ to_consolidate = nil
188
+ best = [candidate, target, score] if score > best[2]
189
+ end
190
+ end
191
+
192
+ # No possible target, break the loop (will consolidate right now)!
193
+ break unless to_consolidate.nil?
194
+ end
195
+
196
+ # If not found, the last candidate must be consolidated. Otherwise, we
197
+ # do the best merging
198
+ unless to_consolidate.nil?
199
+ puts "Consolidation of #{to_consolidate}" if @options[:verbose]
200
+ @kernel << to_consolidate
201
+ else
202
+ puts "Merging #{best[0]} and #{best[1]} [#{best[2]}]" if @options[:verbose]
203
+ # this one should never fail because its score was positive before
204
+ raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
205
+ end
206
+
207
+ # redblue does not guarantee that it will not merge a state of lower rank
208
+ # with a kernel state. The kernel should then be update at each step to keep
209
+ # lowest indices for the whole kernel, and we sort it
210
+ @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
211
+ end
212
+
213
+ # return the refined union find now
214
+ @ufds
215
+ end
216
+
217
+ #
218
+ # Build the smallest DFA compatible with the sample given as input.
219
+ #
220
+ # Preconditions:
221
+ # - The sample is consistent (does not contains the same string both labeled as
222
+ # positive and negative) and contains at least one string.
223
+ #
224
+ # Postconditions:
225
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
226
+ # given as input.
227
+ #
228
+ # Remarks:
229
+ # - This instance version of RedBlue.execute is not intended to be used directly and
230
+ # is mainly provided for testing purposes. Please use the class variant of this
231
+ # method if possible.
232
+ #
233
+ def execute(sample)
234
+ # create union-find
235
+ puts "Creating PTA and UnionFind structure" if @options[:verbose]
236
+ ufds = sample2ufds(sample)
237
+ # refine it
238
+ ufds = main(ufds)
239
+ # compute and return quotient automaton
240
+ ufds2dfa(ufds)
241
+ end
242
+
243
+ #
244
+ # Build the smallest DFA compatible with the sample given as input.
245
+ #
246
+ # Options (the _options_ hash):
247
+ # - :verbose can be set to true to trace algorithm execution on standard output.
248
+ #
249
+ # Preconditions:
250
+ # - The sample is consistent (does not contains the same string both labeled as
251
+ # positive and negative) and contains at least one string.
252
+ #
253
+ # Postconditions:
254
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
255
+ # given as input.
256
+ #
257
+ def self.execute(sample, options={})
258
+ RedBlue.new(options).execute(sample)
259
+ end
260
+
261
+ end # class RedBlue
262
+
263
+ end # module Induction
264
+ end # module Stamina