stamina 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. data/CHANGELOG.md +22 -5
  2. data/LICENCE.md +2 -2
  3. data/bin/stamina +1 -7
  4. data/lib/stamina.rb +10 -19
  5. metadata +54 -333
  6. data/.gemtest +0 -0
  7. data/Gemfile +0 -2
  8. data/Gemfile.lock +0 -37
  9. data/Manifest.txt +0 -16
  10. data/README.md +0 -78
  11. data/Rakefile +0 -23
  12. data/example/adl/automaton.adl +0 -49
  13. data/example/adl/sample.adl +0 -53
  14. data/example/basic/characteristic_sample.adl +0 -32
  15. data/example/basic/target.adl +0 -9
  16. data/example/competition/31_test.adl +0 -1500
  17. data/example/competition/31_training.adl +0 -1759
  18. data/lib/stamina/abbadingo.rb +0 -2
  19. data/lib/stamina/abbadingo/random_dfa.rb +0 -48
  20. data/lib/stamina/abbadingo/random_sample.rb +0 -146
  21. data/lib/stamina/adl.rb +0 -298
  22. data/lib/stamina/automaton.rb +0 -1263
  23. data/lib/stamina/automaton/complete.rb +0 -36
  24. data/lib/stamina/automaton/equivalence.rb +0 -55
  25. data/lib/stamina/automaton/metrics.rb +0 -78
  26. data/lib/stamina/automaton/minimize.rb +0 -25
  27. data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
  28. data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
  29. data/lib/stamina/automaton/strip.rb +0 -16
  30. data/lib/stamina/automaton/walking.rb +0 -363
  31. data/lib/stamina/classifier.rb +0 -52
  32. data/lib/stamina/command.rb +0 -45
  33. data/lib/stamina/command/abbadingo_dfa.rb +0 -81
  34. data/lib/stamina/command/abbadingo_samples.rb +0 -40
  35. data/lib/stamina/command/adl2dot.rb +0 -71
  36. data/lib/stamina/command/classify.rb +0 -48
  37. data/lib/stamina/command/help.rb +0 -27
  38. data/lib/stamina/command/infer.rb +0 -141
  39. data/lib/stamina/command/metrics.rb +0 -51
  40. data/lib/stamina/command/robustness.rb +0 -22
  41. data/lib/stamina/command/score.rb +0 -35
  42. data/lib/stamina/errors.rb +0 -23
  43. data/lib/stamina/ext/math.rb +0 -20
  44. data/lib/stamina/induction/blue_fringe.rb +0 -265
  45. data/lib/stamina/induction/commons.rb +0 -156
  46. data/lib/stamina/induction/rpni.rb +0 -186
  47. data/lib/stamina/induction/union_find.rb +0 -377
  48. data/lib/stamina/input_string.rb +0 -123
  49. data/lib/stamina/loader.rb +0 -1
  50. data/lib/stamina/markable.rb +0 -42
  51. data/lib/stamina/sample.rb +0 -267
  52. data/lib/stamina/scoring.rb +0 -213
  53. data/lib/stamina/utils.rb +0 -1
  54. data/lib/stamina/utils/decorate.rb +0 -81
  55. data/lib/stamina/version.rb +0 -14
  56. data/stamina.gemspec +0 -191
  57. data/stamina.noespec +0 -32
  58. data/tasks/debug_mail.rake +0 -78
  59. data/tasks/debug_mail.txt +0 -13
  60. data/tasks/gem.rake +0 -68
  61. data/tasks/spec_test.rake +0 -79
  62. data/tasks/unit_test.rake +0 -77
  63. data/tasks/yard.rake +0 -51
  64. data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
  65. data/test/stamina/abbadingo/random_sample_test.rb +0 -78
  66. data/test/stamina/adl_test.rb +0 -516
  67. data/test/stamina/automaton/classifier_test.rb +0 -259
  68. data/test/stamina/automaton/complete_test.rb +0 -58
  69. data/test/stamina/automaton/equivalence_test.rb +0 -120
  70. data/test/stamina/automaton/metrics_test.rb +0 -36
  71. data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
  72. data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
  73. data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
  74. data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
  75. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
  76. data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
  77. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
  78. data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
  79. data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
  80. data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
  81. data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
  82. data/test/stamina/automaton/strip_test.rb +0 -36
  83. data/test/stamina/automaton/to_dot_test.rb +0 -64
  84. data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
  85. data/test/stamina/automaton/walking_test.rb +0 -206
  86. data/test/stamina/automaton_additional_test.rb +0 -190
  87. data/test/stamina/automaton_test.rb +0 -1104
  88. data/test/stamina/exit.rb +0 -3
  89. data/test/stamina/induction/blue_fringe_test.rb +0 -83
  90. data/test/stamina/induction/induction_test.rb +0 -70
  91. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
  92. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
  93. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
  94. data/test/stamina/induction/redblue_universal_expected.adl +0 -4
  95. data/test/stamina/induction/redblue_universal_sample.adl +0 -5
  96. data/test/stamina/induction/rpni_inria_expected.adl +0 -7
  97. data/test/stamina/induction/rpni_inria_sample.adl +0 -9
  98. data/test/stamina/induction/rpni_test.rb +0 -129
  99. data/test/stamina/induction/rpni_test_pta.dot +0 -22
  100. data/test/stamina/induction/rpni_universal_expected.adl +0 -4
  101. data/test/stamina/induction/rpni_universal_sample.adl +0 -4
  102. data/test/stamina/induction/union_find_test.rb +0 -124
  103. data/test/stamina/input_string_test.rb +0 -323
  104. data/test/stamina/markable_test.rb +0 -70
  105. data/test/stamina/randdfa.adl +0 -66
  106. data/test/stamina/sample.adl +0 -4
  107. data/test/stamina/sample_classify_test.rb +0 -149
  108. data/test/stamina/sample_test.rb +0 -290
  109. data/test/stamina/scoring_test.rb +0 -63
  110. data/test/stamina/small_dfa.dot +0 -16
  111. data/test/stamina/small_dfa.gif +0 -0
  112. data/test/stamina/small_nfa.dot +0 -18
  113. data/test/stamina/small_nfa.gif +0 -0
  114. data/test/stamina/stamina_test.rb +0 -80
  115. data/test/stamina/utils/decorate_test.rb +0 -65
  116. data/test/test_all.rb +0 -7
@@ -1,51 +0,0 @@
1
- module Stamina
2
- class Command
3
- #
4
- # Prints metrics about an automaton or sample
5
- #
6
- # SYNOPSIS
7
- # #{program_name} #{command_name} [file.adl]
8
- #
9
- # OPTIONS
10
- # #{summarized_options}
11
- #
12
- class Metrics < Quickl::Command(__FILE__, __LINE__)
13
- include Robustness
14
-
15
- # Install options
16
- options do |opt|
17
-
18
- end # options
19
-
20
- # Command execution
21
- def execute(args)
22
- raise Quickl::Help unless args.size <= 1
23
-
24
- # Loads the target automaton
25
- input = if args.size == 1
26
- File.read assert_readable_file(args.first)
27
- else
28
- $stdin.readlines.join("\n")
29
- end
30
-
31
- # Flush metrics
32
- begin
33
- target = Stamina::ADL::parse_automaton(input)
34
- puts "Alphabet size: #{target.alphabet_size}"
35
- puts "State count: #{target.state_count}"
36
- puts "Edge count: #{target.edge_count}"
37
- puts "Degree (avg): #{target.avg_degree}"
38
- puts "Accepting ratio: #{target.accepting_ratio}"
39
- puts "Depth: #{target.depth}"
40
- rescue ADL::ParseError
41
- sample = Stamina::ADL::parse_sample(input)
42
- puts "Size: #{sample.size}"
43
- puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
- puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
- end
46
- end
47
-
48
- end # class Metrics
49
- end # class Command
50
- end # module Stamina
51
-
@@ -1,22 +0,0 @@
1
- module Stamina
2
- class Command
3
- module Robustness
4
-
5
- # Checks that a given file is readable or raises a Quickl::IOAccessError
6
- def assert_readable_file(file)
7
- raise Quickl::IOAccessError, "File #{file} does not exists" unless File.exists?(file)
8
- raise Quickl::IOAccessError, "File #{file} cannot be read" unless File.readable?(file)
9
- file
10
- end
11
-
12
- # Checks that a given file is writable or raises a Quickl::IOAccessError
13
- def assert_writable_file(file)
14
- raise Quickl::IOAccessError, "File #{file} cannot be written" \
15
- unless not(File.exists?(file)) or File.writable?(file)
16
- file
17
- end
18
-
19
- end # module Robustness
20
- end # class Command
21
- end # module Stamina
22
-
@@ -1,35 +0,0 @@
1
- module Stamina
2
- class Command
3
- #
4
- # Scores the labelling of a sample by an automaton
5
- #
6
- # SYNOPSIS
7
- # #{program_name} #{command_name} sample.adl automaton.adl
8
- #
9
- # OPTIONS
10
- # #{summarized_options}
11
- #
12
- class Score < Quickl::Command(__FILE__, __LINE__)
13
- include Robustness
14
-
15
- # Install options
16
- options do |opt|
17
-
18
- end # options
19
-
20
- # Command execution
21
- def execute(args)
22
- raise Quickl::Help unless args.size == 2
23
- sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
- automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
-
26
- classified_as = automaton.signature(sample)
27
- reference = sample.signature
28
- scoring = Scoring.scoring(classified_as, reference)
29
- puts scoring.to_s
30
- end
31
-
32
- end # class Score
33
- end # class Command
34
- end # module Stamina
35
-
@@ -1,23 +0,0 @@
1
- module Stamina
2
-
3
- # Raised when an algorithm explicitely abords something
4
- class Abord < StandardError; end
5
-
6
- # Main class of all stamina errors.
7
- class StaminaError < StandardError; end
8
-
9
- # Raised by samples implementations and other induction algorithms
10
- # when a sample is inconsistent (same string labeled as being both
11
- # positive and negative)
12
- class InconsistencyError < StaminaError; end
13
-
14
- # Specific errors of the ADL module.
15
- module ADL
16
-
17
- # Raised by the ADL module when an automaton, string or sample
18
- # format is violated at parsing time.
19
- class ParseError < StaminaError; end
20
-
21
- end
22
-
23
- end # module Stamina
@@ -1,20 +0,0 @@
1
- if RUBY_VERSION < "1.9"
2
-
3
- def Math.log2( x )
4
- Math.log( x ) / Math.log( 2 )
5
- end
6
-
7
- def Math.logn( x, n )
8
- Math.log( x ) / Math.log( n )
9
- end
10
-
11
- end
12
-
13
- def Math.max(i, j)
14
- i > j ? i : j
15
- end
16
-
17
- def Math.min(i, j)
18
- i < j ? i : j
19
- end
20
-
@@ -1,265 +0,0 @@
1
- module Stamina
2
- module Induction
3
-
4
- #
5
- # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
6
- # heuristics).
7
- #
8
- # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
9
- # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
10
- # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
11
- #
12
- # Example:
13
- # # sample typically comes from an ADL file
14
- # sample = Stamina::ADL.parse_sample_file('sample.adl')
15
- #
16
- # # let BlueFringe build the smallest dfa
17
- # dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
18
- #
19
- # Remarks:
20
- # - Constructor and instance methods of this class are public but not intended
21
- # to be used directly. They are left public for testing purposes only.
22
- # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
23
- # this variant.
24
- # - This class intensively uses the Stamina::Induction::UnionFind class and
25
- # methods defined in the Stamina::Induction::Commons module which are worth
26
- # reading to understand the algorithm implementation.
27
- #
28
- class BlueFringe
29
- include Stamina::Induction::Commons
30
-
31
- # Union-find data structure used internally
32
- attr_reader :ufds
33
-
34
- # Creates an algorithm instance with given options.
35
- def initialize(options={})
36
- raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
- @options = DEFAULT_OPTIONS.merge(options)
38
- @score_cache = {}
39
- end
40
-
41
- #
42
- # Computes the score of a single (group) merge. Returned value is 1 if both are
43
- # accepting states or both are error states and 0 otherwise. Note that d1 and d2
44
- # are expected to be merge compatible as this method does not distinguish this
45
- # case.
46
- #
47
- def merge_score(d1, d2)
48
- # Score of 1 if both accepting or both error
49
- ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
50
- end
51
-
52
- #
53
- # Merges a state of rank j with a state of lower rank i. This merge method
54
- # includes merging for determinization. It returns nil if the merge is
55
- # incompatible, a merge score otherwise.
56
- #
57
- # Preconditions:
58
- # - States denoted by i and j are expected leader states (non merged ones)
59
- # - States denoted by i and j are expected to be different
60
- #
61
- # Postconditions:
62
- # - Union find is refined, states i and j having been merged, as well as all
63
- # state pairs that need to be merged to ensure the deterministic property
64
- # of the quotient automaton.
65
- # - If the resulting quotient automaton is consistent with the negative sample,
66
- # this method returns the number of accepting pairs + the number of error pairs
67
- # that have been merged. The refined union-find correctly encodes the quotient
68
- # automaton. Otherwise, the method returns nil and the union-find information
69
- # must be considered inaccurate.
70
- #
71
- def merge_and_determinize(i, j)
72
- # Make the union (keep merging score as well as additional merges to be performed
73
- # in score and determinization, respectively). Recompute the user data attached to
74
- # the new state group (new_data)
75
- determinization, score = [], nil
76
- @ufds.union(i, j) do |d1, d2|
77
- # states are incompatible if new_data cannot be created because it would
78
- # lead to merge and error and an accepting state. We simply return nil in this
79
- # case...
80
- return nil unless (new_data = merge_user_data(d1, d2, determinization))
81
- # otherwise, we score
82
- score = merge_score(d1, d2)
83
- # and we let the union find keep the new_data for the group
84
- new_data
85
- end
86
-
87
- # Merge for determinization starts here, based on the determinization array
88
- # computed as a side effect of merge_user_data
89
- determinization.each do |pair|
90
- # we take the leader states of the pair to merge
91
- pair = pair.collect{|i| @ufds.find(i)}
92
- # do nothing if already the same leader state
93
- next if pair[0]==pair[1]
94
- # otherwise recurse and keep subscore
95
- subscore = merge_and_determinize(pair[0], pair[1])
96
- # failure if merging for determinization led to merge error and accepting
97
- # states
98
- return nil if subscore.nil?
99
- # this is the new score
100
- score += subscore
101
- end
102
-
103
- score
104
- end
105
-
106
- #
107
- # Evaluates the score of merging states i and j. Returns nil if the states are
108
- # cannot be merged, a positive score otherwise.
109
- #
110
- # Preconditions:
111
- # - States denoted by i and j are expected leader states (non merged ones)
112
- # - States denoted by i and j are expected to be different
113
- #
114
- # Postconditions:
115
- # - Returned value is nil if the quotient automaton would be incompatible with
116
- # the sample. Otherwise a positive number is returned, encoding the number of
117
- # interresting pairs that have been merged (interesting = both accepting or both
118
- # error)
119
- # - The union find is ALWAYS restored to its previous value after merging has
120
- # been evaluated and is then seen unchanged by the caller.
121
- #
122
- def merge_and_determinize_score(i, j)
123
- score = @score_cache[[i,j]] ||= begin
124
- # score the merging, always rollback the transaction
125
- score = nil
126
- @ufds.transactional do
127
- score = merge_and_determinize(i, j)
128
- false
129
- end
130
- score || -1
131
- end
132
- score == -1 ? nil : score
133
- end
134
-
135
- #
136
- # Computes the fringe given the current union find. The fringe is returned as an
137
- # array of state indices.
138
- #
139
- # Postconditions:
140
- # - Returned array contains indices of leader states only.
141
- # - Returned array is disjoint with the kernel.
142
- #
143
- def fringe
144
- fringe = []
145
- @kernel.each do |k1|
146
- delta = @ufds.mergeable_data(k1)[:delta]
147
- delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
148
- end
149
- (fringe - @kernel).sort
150
- end
151
-
152
- #
153
- # Main method of the algorithm. Refines the union find passed as first argument
154
- # by merging well chosen state pairs. Returns the refined union find.
155
- #
156
- # Preconditions:
157
- # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
158
- # and :error boolean flags as well as a :delta sub hash)
159
- #
160
- # Postconditions:
161
- # - The union find has been refined. It encodes a quotient automaton (of the PTA
162
- # it comes from) such that all positive and negative strings of the underlying
163
- # sample are correctly classified by it.
164
- #
165
- def main(ufds)
166
- info("Starting BlueFringe (#{ufds.size} states)")
167
- @ufds, @kernel, @score_cache = ufds, [0], {}
168
-
169
- # we do it until the fringe is empty (compute it only once each step)
170
- until (the_fringe=fringe).empty?
171
- # state to consolidate (if any)
172
- to_consolidate = nil
173
- # best candidate [source index, target index, score]
174
- best = [nil, nil, -1]
175
-
176
- # for each state on the fringe as merge candidate
177
- the_fringe.each do |candidate|
178
- to_consolidate = candidate
179
-
180
- # evaluate score of merging candidate with each kernel state
181
- @kernel.each do |target|
182
- score = merge_and_determinize_score(candidate, target)
183
- unless score.nil?
184
- # if a score has been found, the candidate will not be
185
- # consolidated. We keep it as best if its better than the
186
- # previous one
187
- to_consolidate = nil
188
- best = [candidate, target, score] if score > best[2]
189
- end
190
- end
191
-
192
- # No possible target, break the loop (will consolidate right now)!
193
- break unless to_consolidate.nil?
194
- end
195
-
196
- # If not found, the last candidate must be consolidated. Otherwise, we
197
- # do the best merging
198
- unless to_consolidate.nil?
199
- info("Consolidation of #{to_consolidate}")
200
- @kernel << to_consolidate
201
- else
202
- @score_cache.clear
203
- info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
204
- # this one should never fail because its score was positive before
205
- raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
206
- end
207
-
208
- # blue_fringe does not guarantee that it will not merge a state of lower rank
209
- # with a kernel state. The kernel should then be update at each step to keep
210
- # lowest indices for the whole kernel, and we sort it
211
- @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
212
- end
213
-
214
- # return the refined union find now
215
- @ufds
216
- end
217
-
218
- #
219
- # Build the smallest DFA compatible with the sample given as input.
220
- #
221
- # Preconditions:
222
- # - The sample is consistent (does not contains the same string both labeled as
223
- # positive and negative) and contains at least one string.
224
- #
225
- # Postconditions:
226
- # - The returned DFA is the smallest DFA that correctly labels the learning sample
227
- # given as input.
228
- #
229
- # Remarks:
230
- # - This instance version of BlueFringe.execute is not intended to be used directly and
231
- # is mainly provided for testing purposes. Please use the class variant of this
232
- # method if possible.
233
- #
234
- def execute(sample)
235
- # create union-find
236
- info("Creating PTA and UnionFind structure")
237
- ufds = sample2ufds(sample)
238
- # refine it
239
- ufds = main(ufds)
240
- # compute and return quotient automaton
241
- ufds2dfa(ufds)
242
- end
243
-
244
- #
245
- # Build the smallest DFA compatible with the sample given as input.
246
- #
247
- # Options (the _options_ hash):
248
- # - :verbose can be set to true to trace algorithm execution on standard output.
249
- #
250
- # Preconditions:
251
- # - The sample is consistent (does not contains the same string both labeled as
252
- # positive and negative) and contains at least one string.
253
- #
254
- # Postconditions:
255
- # - The returned DFA is the smallest DFA that correctly labels the learning sample
256
- # given as input.
257
- #
258
- def self.execute(sample, options={})
259
- BlueFringe.new(options).execute(sample)
260
- end
261
-
262
- end # class BlueFringe
263
-
264
- end # module Induction
265
- end # module Stamina
@@ -1,156 +0,0 @@
1
- module Stamina
2
- module Induction
3
-
4
- #
5
- # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
- # - _pta_ stands for Prefix Tree Acceptor
7
- # - _ufds_ stands for Union-Find Data Structure
8
- #
9
- # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
- # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
- # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
- # a dfa.
13
- #
14
- # The merge_user_data method is probably the most important as it actually computes
15
- # the merging of two states and build information about merging for determinization.
16
- #
17
- module Commons
18
-
19
- DEFAULT_OPTIONS = {
20
- :verbose => false,
21
- :verbose_io => $stderr
22
- }
23
-
24
- # Additional options of the algorithm
25
- attr_reader :options
26
-
27
- # Is the verbose mode on ?
28
- def verbose?
29
- @verbose ||= !!options[:verbose]
30
- end
31
-
32
- def verbose_io
33
- @verbose_io ||= options[:verbose_io] || $stderr
34
- end
35
-
36
- # Display an information message (when verbose)
37
- def info(msg)
38
- if verbose?
39
- verbose_io << msg << "\n"
40
- verbose_io.flush
41
- end
42
- end
43
-
44
- #
45
- # Factors and returns a UnionFind data structure from a PTA, keeping natural order
46
- # of its states for union-find elements. The resulting UnionFind contains a Hash as
47
- # mergeable user data, presenting the following keys:
48
- # - :initial, :accepting and :error flags of each state
49
- # - :master indicating the index of the state in the PTA
50
- # - :delta a delta function through a Hash {symbol => state_index}
51
- #
52
- # In this version, other user data attached to PTA states is lost during the
53
- # conversion.
54
- #
55
- def pta2ufds(pta)
56
- Stamina::Induction::UnionFind.new(pta.state_count) do |i|
57
- state = pta.ith_state(i)
58
- data = {:initial => state.initial?,
59
- :accepting => state.accepting?,
60
- :error => state.error?,
61
- :master => i,
62
- :delta => {}}
63
- state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
64
- data
65
- end
66
- end
67
-
68
- #
69
- # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
70
- # that the states of the PTA are in lexical order, according to the <code><=></code>
71
- # operator defined on symbols. States reached by negative strings are tagged as
72
- # non accepting and error.
73
- #
74
- def sample2pta(sample)
75
- sample.to_pta
76
- end
77
-
78
- #
79
- # Converts a Sample instance to a 'ready to refine' union find data structure.
80
- # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
81
- #
82
- def sample2ufds(sample)
83
- pta2ufds(sample2pta(sample))
84
- end
85
-
86
- #
87
- # Computes the quotient automaton from a refined UnionFind data structure.
88
- #
89
- # In this version, only accepting and initial flags are taken into account
90
- # when creating quotient automaton states. Other user data is lost during
91
- # the conversion.
92
- #
93
- def ufds2dfa(ufds)
94
- Automaton.new(false) do |fa|
95
- mergeable_datas = ufds.mergeable_datas
96
- mergeable_datas.each do |data|
97
- state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
98
- state_data[:name] = data[:master].to_s
99
- state_data[:error] = false
100
- fa.add_state(state_data)
101
- end
102
- mergeable_datas.each do |data|
103
- source = fa.get_state(data[:master].to_s)
104
- data[:delta].each_pair do |symbol, target|
105
- target = fa.get_state(ufds.find(target).to_s)
106
- fa.connect(source, target, symbol)
107
- end
108
- end
109
- end
110
- end
111
-
112
- #
113
- # Merges two user data hashes _d1_ and _d2_ according to rules defined
114
- # below. Also fills a _determinization_ array with pairs of state indices
115
- # that are reached from d1 and d2 through the same symbol and should be
116
- # merged for determinization. This method does NOT ensure that those pairs
117
- # correspond to distinguish states according to the union find. In other
118
- # words state indices in these pairs do not necessarily corespond to master
119
- # states (see UnionFind for this term).
120
- #
121
- # Returns the resulting data if the merge is successful (does not lead to
122
- # merging an error state with an accepting one), nil otherwise.
123
- #
124
- # The merging procedure for the different hash keys is as follows:
125
- # - result[:initial] = d1[:initial] or d2[:initial]
126
- # - result[:accepting] = d1[:accepting] or d2[:accepting]
127
- # - result[:error] = d1[:error] or d2[:error]
128
- # - result[:master] = min(d1[:master], d2[:master])
129
- # - result[:delta] = merging of delta hashes, keeping smaller target index
130
- # on key collisions.
131
- #
132
- def merge_user_data(d1, d2, determinization)
133
- # we compute flags first
134
- new_data = {:initial => d1[:initial] || d2[:initial],
135
- :accepting => d1[:accepting] || d2[:accepting],
136
- :error => d1[:error] || d2[:error],
137
- :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
138
-
139
- # merge failure if accepting and error states are merged
140
- return nil if new_data[:accepting] and new_data[:error]
141
-
142
- # we recompute the delta function of the resulting state
143
- # keeping merging for determinization as pairs in _determinization_
144
- new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
145
- determinization << [t1, t2]
146
- t1 < t2 ? t1 : t2
147
- end
148
-
149
- # returns merged data
150
- new_data
151
- end
152
-
153
- end # module Commons
154
-
155
- end # module Induction
156
- end # module Stamina