stamina 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. data/CHANGELOG.md +22 -5
  2. data/LICENCE.md +2 -2
  3. data/bin/stamina +1 -7
  4. data/lib/stamina.rb +10 -19
  5. metadata +54 -333
  6. data/.gemtest +0 -0
  7. data/Gemfile +0 -2
  8. data/Gemfile.lock +0 -37
  9. data/Manifest.txt +0 -16
  10. data/README.md +0 -78
  11. data/Rakefile +0 -23
  12. data/example/adl/automaton.adl +0 -49
  13. data/example/adl/sample.adl +0 -53
  14. data/example/basic/characteristic_sample.adl +0 -32
  15. data/example/basic/target.adl +0 -9
  16. data/example/competition/31_test.adl +0 -1500
  17. data/example/competition/31_training.adl +0 -1759
  18. data/lib/stamina/abbadingo.rb +0 -2
  19. data/lib/stamina/abbadingo/random_dfa.rb +0 -48
  20. data/lib/stamina/abbadingo/random_sample.rb +0 -146
  21. data/lib/stamina/adl.rb +0 -298
  22. data/lib/stamina/automaton.rb +0 -1263
  23. data/lib/stamina/automaton/complete.rb +0 -36
  24. data/lib/stamina/automaton/equivalence.rb +0 -55
  25. data/lib/stamina/automaton/metrics.rb +0 -78
  26. data/lib/stamina/automaton/minimize.rb +0 -25
  27. data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
  28. data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
  29. data/lib/stamina/automaton/strip.rb +0 -16
  30. data/lib/stamina/automaton/walking.rb +0 -363
  31. data/lib/stamina/classifier.rb +0 -52
  32. data/lib/stamina/command.rb +0 -45
  33. data/lib/stamina/command/abbadingo_dfa.rb +0 -81
  34. data/lib/stamina/command/abbadingo_samples.rb +0 -40
  35. data/lib/stamina/command/adl2dot.rb +0 -71
  36. data/lib/stamina/command/classify.rb +0 -48
  37. data/lib/stamina/command/help.rb +0 -27
  38. data/lib/stamina/command/infer.rb +0 -141
  39. data/lib/stamina/command/metrics.rb +0 -51
  40. data/lib/stamina/command/robustness.rb +0 -22
  41. data/lib/stamina/command/score.rb +0 -35
  42. data/lib/stamina/errors.rb +0 -23
  43. data/lib/stamina/ext/math.rb +0 -20
  44. data/lib/stamina/induction/blue_fringe.rb +0 -265
  45. data/lib/stamina/induction/commons.rb +0 -156
  46. data/lib/stamina/induction/rpni.rb +0 -186
  47. data/lib/stamina/induction/union_find.rb +0 -377
  48. data/lib/stamina/input_string.rb +0 -123
  49. data/lib/stamina/loader.rb +0 -1
  50. data/lib/stamina/markable.rb +0 -42
  51. data/lib/stamina/sample.rb +0 -267
  52. data/lib/stamina/scoring.rb +0 -213
  53. data/lib/stamina/utils.rb +0 -1
  54. data/lib/stamina/utils/decorate.rb +0 -81
  55. data/lib/stamina/version.rb +0 -14
  56. data/stamina.gemspec +0 -191
  57. data/stamina.noespec +0 -32
  58. data/tasks/debug_mail.rake +0 -78
  59. data/tasks/debug_mail.txt +0 -13
  60. data/tasks/gem.rake +0 -68
  61. data/tasks/spec_test.rake +0 -79
  62. data/tasks/unit_test.rake +0 -77
  63. data/tasks/yard.rake +0 -51
  64. data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
  65. data/test/stamina/abbadingo/random_sample_test.rb +0 -78
  66. data/test/stamina/adl_test.rb +0 -516
  67. data/test/stamina/automaton/classifier_test.rb +0 -259
  68. data/test/stamina/automaton/complete_test.rb +0 -58
  69. data/test/stamina/automaton/equivalence_test.rb +0 -120
  70. data/test/stamina/automaton/metrics_test.rb +0 -36
  71. data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
  72. data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
  73. data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
  74. data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
  75. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
  76. data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
  77. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
  78. data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
  79. data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
  80. data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
  81. data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
  82. data/test/stamina/automaton/strip_test.rb +0 -36
  83. data/test/stamina/automaton/to_dot_test.rb +0 -64
  84. data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
  85. data/test/stamina/automaton/walking_test.rb +0 -206
  86. data/test/stamina/automaton_additional_test.rb +0 -190
  87. data/test/stamina/automaton_test.rb +0 -1104
  88. data/test/stamina/exit.rb +0 -3
  89. data/test/stamina/induction/blue_fringe_test.rb +0 -83
  90. data/test/stamina/induction/induction_test.rb +0 -70
  91. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
  92. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
  93. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
  94. data/test/stamina/induction/redblue_universal_expected.adl +0 -4
  95. data/test/stamina/induction/redblue_universal_sample.adl +0 -5
  96. data/test/stamina/induction/rpni_inria_expected.adl +0 -7
  97. data/test/stamina/induction/rpni_inria_sample.adl +0 -9
  98. data/test/stamina/induction/rpni_test.rb +0 -129
  99. data/test/stamina/induction/rpni_test_pta.dot +0 -22
  100. data/test/stamina/induction/rpni_universal_expected.adl +0 -4
  101. data/test/stamina/induction/rpni_universal_sample.adl +0 -4
  102. data/test/stamina/induction/union_find_test.rb +0 -124
  103. data/test/stamina/input_string_test.rb +0 -323
  104. data/test/stamina/markable_test.rb +0 -70
  105. data/test/stamina/randdfa.adl +0 -66
  106. data/test/stamina/sample.adl +0 -4
  107. data/test/stamina/sample_classify_test.rb +0 -149
  108. data/test/stamina/sample_test.rb +0 -290
  109. data/test/stamina/scoring_test.rb +0 -63
  110. data/test/stamina/small_dfa.dot +0 -16
  111. data/test/stamina/small_dfa.gif +0 -0
  112. data/test/stamina/small_nfa.dot +0 -18
  113. data/test/stamina/small_nfa.gif +0 -0
  114. data/test/stamina/stamina_test.rb +0 -80
  115. data/test/stamina/utils/decorate_test.rb +0 -65
  116. data/test/test_all.rb +0 -7
@@ -1,51 +0,0 @@
1
- module Stamina
2
- class Command
3
- #
4
- # Prints metrics about an automaton or sample
5
- #
6
- # SYNOPSIS
7
- # #{program_name} #{command_name} [file.adl]
8
- #
9
- # OPTIONS
10
- # #{summarized_options}
11
- #
12
- class Metrics < Quickl::Command(__FILE__, __LINE__)
13
- include Robustness
14
-
15
- # Install options
16
- options do |opt|
17
-
18
- end # options
19
-
20
- # Command execution
21
- def execute(args)
22
- raise Quickl::Help unless args.size <= 1
23
-
24
- # Loads the target automaton
25
- input = if args.size == 1
26
- File.read assert_readable_file(args.first)
27
- else
28
- $stdin.readlines.join("\n")
29
- end
30
-
31
- # Flush metrics
32
- begin
33
- target = Stamina::ADL::parse_automaton(input)
34
- puts "Alphabet size: #{target.alphabet_size}"
35
- puts "State count: #{target.state_count}"
36
- puts "Edge count: #{target.edge_count}"
37
- puts "Degree (avg): #{target.avg_degree}"
38
- puts "Accepting ratio: #{target.accepting_ratio}"
39
- puts "Depth: #{target.depth}"
40
- rescue ADL::ParseError
41
- sample = Stamina::ADL::parse_sample(input)
42
- puts "Size: #{sample.size}"
43
- puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
- puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
- end
46
- end
47
-
48
- end # class Metrics
49
- end # class Command
50
- end # module Stamina
51
-
@@ -1,22 +0,0 @@
1
- module Stamina
2
- class Command
3
- module Robustness
4
-
5
- # Checks that a given file is readable or raises a Quickl::IOAccessError
6
- def assert_readable_file(file)
7
- raise Quickl::IOAccessError, "File #{file} does not exists" unless File.exists?(file)
8
- raise Quickl::IOAccessError, "File #{file} cannot be read" unless File.readable?(file)
9
- file
10
- end
11
-
12
- # Checks that a given file is writable or raises a Quickl::IOAccessError
13
- def assert_writable_file(file)
14
- raise Quickl::IOAccessError, "File #{file} cannot be written" \
15
- unless not(File.exists?(file)) or File.writable?(file)
16
- file
17
- end
18
-
19
- end # module Robustness
20
- end # class Command
21
- end # module Stamina
22
-
@@ -1,35 +0,0 @@
1
- module Stamina
2
- class Command
3
- #
4
- # Scores the labelling of a sample by an automaton
5
- #
6
- # SYNOPSIS
7
- # #{program_name} #{command_name} sample.adl automaton.adl
8
- #
9
- # OPTIONS
10
- # #{summarized_options}
11
- #
12
- class Score < Quickl::Command(__FILE__, __LINE__)
13
- include Robustness
14
-
15
- # Install options
16
- options do |opt|
17
-
18
- end # options
19
-
20
- # Command execution
21
- def execute(args)
22
- raise Quickl::Help unless args.size == 2
23
- sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
- automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
-
26
- classified_as = automaton.signature(sample)
27
- reference = sample.signature
28
- scoring = Scoring.scoring(classified_as, reference)
29
- puts scoring.to_s
30
- end
31
-
32
- end # class Score
33
- end # class Command
34
- end # module Stamina
35
-
@@ -1,23 +0,0 @@
1
- module Stamina
2
-
3
- # Raised when an algorithm explicitely abords something
4
- class Abord < StandardError; end
5
-
6
- # Main class of all stamina errors.
7
- class StaminaError < StandardError; end
8
-
9
- # Raised by samples implementations and other induction algorithms
10
- # when a sample is inconsistent (same string labeled as being both
11
- # positive and negative)
12
- class InconsistencyError < StaminaError; end
13
-
14
- # Specific errors of the ADL module.
15
- module ADL
16
-
17
- # Raised by the ADL module when an automaton, string or sample
18
- # format is violated at parsing time.
19
- class ParseError < StaminaError; end
20
-
21
- end
22
-
23
- end # module Stamina
@@ -1,20 +0,0 @@
1
- if RUBY_VERSION < "1.9"
2
-
3
- def Math.log2( x )
4
- Math.log( x ) / Math.log( 2 )
5
- end
6
-
7
- def Math.logn( x, n )
8
- Math.log( x ) / Math.log( n )
9
- end
10
-
11
- end
12
-
13
- def Math.max(i, j)
14
- i > j ? i : j
15
- end
16
-
17
- def Math.min(i, j)
18
- i < j ? i : j
19
- end
20
-
@@ -1,265 +0,0 @@
1
- module Stamina
2
- module Induction
3
-
4
- #
5
- # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
- # heuristics).
7
- #
8
- # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
- # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
- # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
- #
12
- # Example:
13
- # # sample typically comes from an ADL file
14
- # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
- #
16
- # # let BlueFringe build the smallest dfa
17
- # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
- #
19
- # Remarks:
20
- # - Constructor and instance methods of this class are public but not intended
21
- # to be used directly. They are left public for testing purposes only.
22
- # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
- # this variant.
24
- # - This class intensively uses the Stamina::Induction::UnionFind class and
25
- # methods defined in the Stamina::Induction::Commons module which are worth
26
- # reading to understand the algorithm implementation.
27
- #
28
- class BlueFringe
29
- include Stamina::Induction::Commons
30
-
31
- # Union-find data structure used internally
32
- attr_reader :ufds
33
-
34
- # Creates an algorithm instance with given options.
35
- def initialize(options={})
36
- raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
- @options = DEFAULT_OPTIONS.merge(options)
38
- @score_cache = {}
39
- end
40
-
41
- #
42
- # Computes the score of a single (group) merge. Returned value is 1 if both are
43
- # accepting states or both are error states and 0 otherwise. Note that d1 and d2
44
- # are expected to be merge compatible as this method does not distinguish this
45
- # case.
46
- #
47
- def merge_score(d1, d2)
48
- # Score of 1 if both accepting or both error
49
- ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
50
- end
51
-
52
- #
53
- # Merges a state of rank j with a state of lower rank i. This merge method
54
- # includes merging for determinization. It returns nil if the merge is
55
- # incompatible, a merge score otherwise.
56
- #
57
- # Preconditions:
58
- # - States denoted by i and j are expected leader states (non merged ones)
59
- # - States denoted by i and j are expected to be different
60
- #
61
- # Postconditions:
62
- # - Union find is refined, states i and j having been merged, as well as all
63
- # state pairs that need to be merged to ensure the deterministic property
64
- # of the quotient automaton.
65
- # - If the resulting quotient automaton is consistent with the negative sample,
66
- # this method returns the number of accepting pairs + the number of error pairs
67
- # that have been merged. The refined union-find correctly encodes the quotient
68
- # automaton. Otherwise, the method returns nil and the union-find information
69
- # must be considered inaccurate.
70
- #
71
- def merge_and_determinize(i, j)
72
- # Make the union (keep merging score as well as additional merges to be performed
73
- # in score and determinization, respectively). Recompute the user data attached to
74
- # the new state group (new_data)
75
- determinization, score = [], nil
76
- @ufds.union(i, j) do |d1, d2|
77
- # states are incompatible if new_data cannot be created because it would
78
- # lead to merge and error and an accepting state. We simply return nil in this
79
- # case...
80
- return nil unless (new_data = merge_user_data(d1, d2, determinization))
81
- # otherwise, we score
82
- score = merge_score(d1, d2)
83
- # and we let the union find keep the new_data for the group
84
- new_data
85
- end
86
-
87
- # Merge for determinization starts here, based on the determinization array
88
- # computed as a side effect of merge_user_data
89
- determinization.each do |pair|
90
- # we take the leader states of the pair to merge
91
- pair = pair.collect{|i| @ufds.find(i)}
92
- # do nothing if already the same leader state
93
- next if pair[0]==pair[1]
94
- # otherwise recurse and keep subscore
95
- subscore = merge_and_determinize(pair[0], pair[1])
96
- # failure if merging for determinization led to merge error and accepting
97
- # states
98
- return nil if subscore.nil?
99
- # this is the new score
100
- score += subscore
101
- end
102
-
103
- score
104
- end
105
-
106
- #
107
- # Evaluates the score of merging states i and j. Returns nil if the states are
108
- # cannot be merged, a positive score otherwise.
109
- #
110
- # Preconditions:
111
- # - States denoted by i and j are expected leader states (non merged ones)
112
- # - States denoted by i and j are expected to be different
113
- #
114
- # Postconditions:
115
- # - Returned value is nil if the quotient automaton would be incompatible with
116
- # the sample. Otherwise a positive number is returned, encoding the number of
117
- # interresting pairs that have been merged (interesting = both accepting or both
118
- # error)
119
- # - The union find is ALWAYS restored to its previous value after merging has
120
- # been evaluated and is then seen unchanged by the caller.
121
- #
122
- def merge_and_determinize_score(i, j)
123
- score = @score_cache[[i,j]] ||= begin
124
- # score the merging, always rollback the transaction
125
- score = nil
126
- @ufds.transactional do
127
- score = merge_and_determinize(i, j)
128
- false
129
- end
130
- score || -1
131
- end
132
- score == -1 ? nil : score
133
- end
134
-
135
- #
136
- # Computes the fringe given the current union find. The fringe is returned as an
137
- # array of state indices.
138
- #
139
- # Postconditions:
140
- # - Returned array contains indices of leader states only.
141
- # - Returned array is disjoint with the kernel.
142
- #
143
- def fringe
144
- fringe = []
145
- @kernel.each do |k1|
146
- delta = @ufds.mergeable_data(k1)[:delta]
147
- delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
- end
149
- (fringe - @kernel).sort
150
- end
151
-
152
- #
153
- # Main method of the algorithm. Refines the union find passed as first argument
154
- # by merging well chosen state pairs. Returns the refined union find.
155
- #
156
- # Preconditions:
157
- # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
- # and :error boolean flags as well as a :delta sub hash)
159
- #
160
- # Postconditions:
161
- # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
- # it comes from) such that all positive and negative strings of the underlying
163
- # sample are correctly classified by it.
164
- #
165
- def main(ufds)
166
- info("Starting BlueFringe (#{ufds.size} states)")
167
- @ufds, @kernel, @score_cache = ufds, [0], {}
168
-
169
- # we do it until the fringe is empty (compute it only once each step)
170
- until (the_fringe=fringe).empty?
171
- # state to consolidate (if any)
172
- to_consolidate = nil
173
- # best candidate [source index, target index, score]
174
- best = [nil, nil, -1]
175
-
176
- # for each state on the fringe as merge candidate
177
- the_fringe.each do |candidate|
178
- to_consolidate = candidate
179
-
180
- # evaluate score of merging candidate with each kernel state
181
- @kernel.each do |target|
182
- score = merge_and_determinize_score(candidate, target)
183
- unless score.nil?
184
- # if a score has been found, the candidate will not be
185
- # consolidated. We keep it as best if its better than the
186
- # previous one
187
- to_consolidate = nil
188
- best = [candidate, target, score] if score > best[2]
189
- end
190
- end
191
-
192
- # No possible target, break the loop (will consolidate right now)!
193
- break unless to_consolidate.nil?
194
- end
195
-
196
- # If not found, the last candidate must be consolidated. Otherwise, we
197
- # do the best merging
198
- unless to_consolidate.nil?
199
- info("Consolidation of #{to_consolidate}")
200
- @kernel << to_consolidate
201
- else
202
- @score_cache.clear
203
- info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
204
- # this one should never fail because its score was positive before
205
- raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
206
- end
207
-
208
- # blue_fringe does not guarantee that it will not merge a state of lower rank
209
- # with a kernel state. The kernel should then be update at each step to keep
210
- # lowest indices for the whole kernel, and we sort it
211
- @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
212
- end
213
-
214
- # return the refined union find now
215
- @ufds
216
- end
217
-
218
- #
219
- # Build the smallest DFA compatible with the sample given as input.
220
- #
221
- # Preconditions:
222
- # - The sample is consistent (does not contains the same string both labeled as
223
- # positive and negative) and contains at least one string.
224
- #
225
- # Postconditions:
226
- # - The returned DFA is the smallest DFA that correctly labels the learning sample
227
- # given as input.
228
- #
229
- # Remarks:
230
- # - This instance version of BlueFringe.execute is not intended to be used directly and
231
- # is mainly provided for testing purposes. Please use the class variant of this
232
- # method if possible.
233
- #
234
- def execute(sample)
235
- # create union-find
236
- info("Creating PTA and UnionFind structure")
237
- ufds = sample2ufds(sample)
238
- # refine it
239
- ufds = main(ufds)
240
- # compute and return quotient automaton
241
- ufds2dfa(ufds)
242
- end
243
-
244
- #
245
- # Build the smallest DFA compatible with the sample given as input.
246
- #
247
- # Options (the _options_ hash):
248
- # - :verbose can be set to true to trace algorithm execution on standard output.
249
- #
250
- # Preconditions:
251
- # - The sample is consistent (does not contains the same string both labeled as
252
- # positive and negative) and contains at least one string.
253
- #
254
- # Postconditions:
255
- # - The returned DFA is the smallest DFA that correctly labels the learning sample
256
- # given as input.
257
- #
258
- def self.execute(sample, options={})
259
- BlueFringe.new(options).execute(sample)
260
- end
261
-
262
- end # class BlueFringe
263
-
264
- end # module Induction
265
- end # module Stamina
@@ -1,156 +0,0 @@
1
- module Stamina
2
- module Induction
3
-
4
- #
5
- # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
- # - _pta_ stands for Prefix Tree Acceptor
7
- # - _ufds_ stands for Union-Find Data Structure
8
- #
9
- # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
- # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
- # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
- # a dfa.
13
- #
14
- # The merge_user_data method is probably the most important as it actually computes
15
- # the merging of two states and build information about merging for determinization.
16
- #
17
- module Commons
18
-
19
- DEFAULT_OPTIONS = {
20
- :verbose => false,
21
- :verbose_io => $stderr
22
- }
23
-
24
- # Additional options of the algorithm
25
- attr_reader :options
26
-
27
- # Is the verbose mode on ?
28
- def verbose?
29
- @verbose ||= !!options[:verbose]
30
- end
31
-
32
- def verbose_io
33
- @verbose_io ||= options[:verbose_io] || $stderr
34
- end
35
-
36
- # Display an information message (when verbose)
37
- def info(msg)
38
- if verbose?
39
- verbose_io << msg << "\n"
40
- verbose_io.flush
41
- end
42
- end
43
-
44
- #
45
- # Factors and returns a UnionFind data structure from a PTA, keeping natural order
46
- # of its states for union-find elements. The resulting UnionFind contains a Hash as
47
- # mergeable user data, presenting the following keys:
48
- # - :initial, :accepting and :error flags of each state
49
- # - :master indicating the index of the state in the PTA
50
- # - :delta a delta function through a Hash {symbol => state_index}
51
- #
52
- # In this version, other user data attached to PTA states is lost during the
53
- # conversion.
54
- #
55
- def pta2ufds(pta)
56
- Stamina::Induction::UnionFind.new(pta.state_count) do |i|
57
- state = pta.ith_state(i)
58
- data = {:initial => state.initial?,
59
- :accepting => state.accepting?,
60
- :error => state.error?,
61
- :master => i,
62
- :delta => {}}
63
- state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
64
- data
65
- end
66
- end
67
-
68
- #
69
- # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
70
- # that the states of the PTA are in lexical order, according to the <code><=></code>
71
- # operator defined on symbols. States reached by negative strings are tagged as
72
- # non accepting and error.
73
- #
74
- def sample2pta(sample)
75
- sample.to_pta
76
- end
77
-
78
- #
79
- # Converts a Sample instance to a 'ready to refine' union find data structure.
80
- # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
81
- #
82
- def sample2ufds(sample)
83
- pta2ufds(sample2pta(sample))
84
- end
85
-
86
- #
87
- # Computes the quotient automaton from a refined UnionFind data structure.
88
- #
89
- # In this version, only accepting and initial flags are taken into account
90
- # when creating quotient automaton states. Other user data is lost during
91
- # the conversion.
92
- #
93
- def ufds2dfa(ufds)
94
- Automaton.new(false) do |fa|
95
- mergeable_datas = ufds.mergeable_datas
96
- mergeable_datas.each do |data|
97
- state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
98
- state_data[:name] = data[:master].to_s
99
- state_data[:error] = false
100
- fa.add_state(state_data)
101
- end
102
- mergeable_datas.each do |data|
103
- source = fa.get_state(data[:master].to_s)
104
- data[:delta].each_pair do |symbol, target|
105
- target = fa.get_state(ufds.find(target).to_s)
106
- fa.connect(source, target, symbol)
107
- end
108
- end
109
- end
110
- end
111
-
112
- #
113
- # Merges two user data hashes _d1_ and _d2_ according to rules defined
114
- # below. Also fills a _determinization_ array with pairs of state indices
115
- # that are reached from d1 and d2 through the same symbol and should be
116
- # merged for determinization. This method does NOT ensure that those pairs
117
- # correspond to distinguish states according to the union find. In other
118
- # words state indices in these pairs do not necessarily corespond to master
119
- # states (see UnionFind for this term).
120
- #
121
- # Returns the resulting data if the merge is successful (does not lead to
122
- # merging an error state with an accepting one), nil otherwise.
123
- #
124
- # The merging procedure for the different hash keys is as follows:
125
- # - result[:initial] = d1[:initial] or d2[:initial]
126
- # - result[:accepting] = d1[:accepting] or d2[:accepting]
127
- # - result[:error] = d1[:error] or d2[:error]
128
- # - result[:master] = min(d1[:master], d2[:master])
129
- # - result[:delta] = merging of delta hashes, keeping smaller target index
130
- # on key collisions.
131
- #
132
- def merge_user_data(d1, d2, determinization)
133
- # we compute flags first
134
- new_data = {:initial => d1[:initial] || d2[:initial],
135
- :accepting => d1[:accepting] || d2[:accepting],
136
- :error => d1[:error] || d2[:error],
137
- :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
138
-
139
- # merge failure if accepting and error states are merged
140
- return nil if new_data[:accepting] and new_data[:error]
141
-
142
- # we recompute the delta function of the resulting state
143
- # keeping merging for determinization as pairs in _determinization_
144
- new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
145
- determinization << [t1, t2]
146
- t1 < t2 ? t1 : t2
147
- end
148
-
149
- # returns merged data
150
- new_data
151
- end
152
-
153
- end # module Commons
154
-
155
- end # module Induction
156
- end # module Stamina