gullah 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/LICENSE +21 -0
- data/README.md +87 -0
- data/Rakefile +11 -0
- data/TODO.md +2 -0
- data/examples/hat.rb +27 -0
- data/examples/trash.rb +42 -0
- data/examples/xml.rb +45 -0
- data/gullah.gemspec +31 -0
- data/lib/gullah/atom.rb +132 -0
- data/lib/gullah/boundary.rb +11 -0
- data/lib/gullah/dotifier.rb +127 -0
- data/lib/gullah/error.rb +7 -0
- data/lib/gullah/hopper.rb +142 -0
- data/lib/gullah/iterator.rb +67 -0
- data/lib/gullah/leaf.rb +24 -0
- data/lib/gullah/node.rb +553 -0
- data/lib/gullah/parse.rb +233 -0
- data/lib/gullah/picker.rb +56 -0
- data/lib/gullah/rule.rb +90 -0
- data/lib/gullah/segment.rb +92 -0
- data/lib/gullah/trash.rb +15 -0
- data/lib/gullah/version.rb +7 -0
- data/lib/gullah.rb +777 -0
- data/test/basic_test.rb +451 -0
- data/test/big_tree_test.rb +26 -0
- data/test/boundary_test.rb +29 -0
- data/test/date_test.rb +111 -0
- data/test/error_test.rb +245 -0
- data/test/json_test.rb +124 -0
- data/test/parse_demo_test.rb +33 -0
- data/test/precondition_test.rb +68 -0
- data/test/tests_per_subrule_test.rb +49 -0
- data/test/tree_walking_test.rb +88 -0
- metadata +157 -0
data/lib/gullah/error.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a Hopper keeps completed parses, deleting inferior ones as better parses are found
|
5
|
+
# this facilitates efficient memory use and parsing
|
6
|
+
class Hopper # :nodoc:
|
7
|
+
def initialize(filters, number_sought)
|
8
|
+
dross = filters - %i[completion correctness size pending]
|
9
|
+
raise Error, "unknown filters: #{dross.join ', '}" if dross.any?
|
10
|
+
|
11
|
+
# fix filter order
|
12
|
+
@filters = %i[correctness completion size pending] & filters
|
13
|
+
@number_sought = number_sought
|
14
|
+
@thresholds = {}
|
15
|
+
@bin = []
|
16
|
+
@first = true
|
17
|
+
@seen = Set.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def size
|
21
|
+
@bin.length
|
22
|
+
end
|
23
|
+
alias length size
|
24
|
+
|
25
|
+
def satisfied?
|
26
|
+
if @bin.length == @number_sought
|
27
|
+
limits = @thresholds.values_at(:correctness, :pending).compact
|
28
|
+
if limits.any? && limits.all?(&:zero?)
|
29
|
+
# we could have accumulated some dross
|
30
|
+
@bin.uniq!(&:summary)
|
31
|
+
@bin.length == @number_sought
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def <<(parse)
|
37
|
+
if @bin.empty?
|
38
|
+
set_thresholds parse
|
39
|
+
else
|
40
|
+
return unless adequate? parse
|
41
|
+
end
|
42
|
+
|
43
|
+
@bin << parse
|
44
|
+
end
|
45
|
+
|
46
|
+
def dump
|
47
|
+
@bin
|
48
|
+
end
|
49
|
+
|
50
|
+
# is this parse at least as good as any in the bin?
|
51
|
+
def adequate?(parse)
|
52
|
+
return true if @filters.none?
|
53
|
+
|
54
|
+
@thresholds.each do |f, limit|
|
55
|
+
value = case f
|
56
|
+
when :completion
|
57
|
+
parse.length
|
58
|
+
when :correctness
|
59
|
+
parse.incorrectness_count
|
60
|
+
when :size
|
61
|
+
parse.size
|
62
|
+
when :pending
|
63
|
+
parse.pending_count
|
64
|
+
end
|
65
|
+
if value < limit
|
66
|
+
# we have a new champion!
|
67
|
+
set_thresholds(parse)
|
68
|
+
@bin.select! { |p| adequate? p }
|
69
|
+
return true
|
70
|
+
end
|
71
|
+
return false if value > limit
|
72
|
+
end
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
# is this parse worth improving further?
|
77
|
+
def continuable?(parse)
|
78
|
+
return true if @first || @filters.none?
|
79
|
+
|
80
|
+
@thresholds.slice(:correctness, :size).each do |f, limit|
|
81
|
+
# completion is more important than size, so ignore size unless we have
|
82
|
+
# a single complete parse already
|
83
|
+
# TODO if we *do* have a single completed parse, we should throw in more tests
|
84
|
+
next if f == :size && @thresholds[:completion]&.>(1)
|
85
|
+
|
86
|
+
value = case f
|
87
|
+
when :correctness
|
88
|
+
parse.incorrectness_count
|
89
|
+
when :size
|
90
|
+
parse.size
|
91
|
+
end
|
92
|
+
return true if value < limit
|
93
|
+
return false if value > limit
|
94
|
+
end
|
95
|
+
true
|
96
|
+
end
|
97
|
+
|
98
|
+
def vet(parse, i, offset, rule, do_unary_branch_check)
|
99
|
+
preconditions_satisfied = rule.preconditions.all? do |pc|
|
100
|
+
# at this point, any prospective node will be non-terminal
|
101
|
+
kids = parse.roots[i...offset]
|
102
|
+
pc.call rule.name, kids.first.start, kids.last.end, kids.first.text, kids
|
103
|
+
end
|
104
|
+
return unless preconditions_satisfied
|
105
|
+
|
106
|
+
candidate = "#{rule.name}[#{parse.roots[i...offset].map(&:summary).join(',')}]"
|
107
|
+
unvetted_summary = [
|
108
|
+
parse.roots[0...i].map(&:summary) +
|
109
|
+
[candidate] +
|
110
|
+
parse.roots[offset..].map(&:summary)
|
111
|
+
].join(';')
|
112
|
+
unless @seen.include? unvetted_summary
|
113
|
+
@seen << unvetted_summary
|
114
|
+
parse.add(i, offset, rule, do_unary_branch_check).tap do |new_parse|
|
115
|
+
if new_parse
|
116
|
+
new_parse._summary = unvetted_summary
|
117
|
+
new_parse.roots[i]._summary = candidate
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def set_thresholds(parse)
|
126
|
+
@filters.each do |f|
|
127
|
+
value = case f
|
128
|
+
when :completion
|
129
|
+
parse.length
|
130
|
+
when :correctness
|
131
|
+
parse.incorrectness_count
|
132
|
+
when :size
|
133
|
+
parse.size
|
134
|
+
when :pending
|
135
|
+
parse.pending_count
|
136
|
+
end
|
137
|
+
@thresholds[f] = value
|
138
|
+
end
|
139
|
+
@first = false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# for iterating over reductions of a given parse
|
5
|
+
class Iterator # :nodoc:
|
6
|
+
attr_reader :parse
|
7
|
+
|
8
|
+
def initialize(parse, hopper, starters, do_unary_branch_check)
|
9
|
+
@parse = parse
|
10
|
+
@hopper = hopper
|
11
|
+
@starters = starters
|
12
|
+
@do_unary_branch_check = do_unary_branch_check
|
13
|
+
@returned_any = false
|
14
|
+
# this iterator iterates over both node indices and rule indices
|
15
|
+
@root_index = 0
|
16
|
+
@rule_index = 0
|
17
|
+
@node = parse.roots[0]
|
18
|
+
end
|
19
|
+
|
20
|
+
# return the next reduction, if any
|
21
|
+
def next
|
22
|
+
loop do
|
23
|
+
return nil unless (a = current_rule)
|
24
|
+
|
25
|
+
@rule_index += 1
|
26
|
+
unless (offset = a.match(parse.roots, @root_index))
|
27
|
+
next
|
28
|
+
end
|
29
|
+
|
30
|
+
if (p = @hopper.vet(parse, @root_index, offset, a.parent, @do_unary_branch_check))
|
31
|
+
@returned_any = true
|
32
|
+
return p
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# number of nodes that need reduction
|
38
|
+
def length
|
39
|
+
@parse.length
|
40
|
+
end
|
41
|
+
|
42
|
+
# number of erroneous nodes in the parse
|
43
|
+
def errors
|
44
|
+
@parse.incorrectness_count
|
45
|
+
end
|
46
|
+
|
47
|
+
def never_returned_any?
|
48
|
+
!@returned_any
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def current_rule
|
54
|
+
while @node
|
55
|
+
@rules ||= @starters[@node.name]
|
56
|
+
r = @rules&.[] @rule_index
|
57
|
+
return r if r
|
58
|
+
|
59
|
+
# the rules for this node are used up; try the next one
|
60
|
+
@rule_index = 0
|
61
|
+
@root_index += 1
|
62
|
+
@node = parse.roots[@root_index]
|
63
|
+
@rules = nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/gullah/leaf.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a rule used in string tokenization
|
5
|
+
class Leaf # :nodoc:
|
6
|
+
attr_reader :rx, :name, :ignorable, :boundary, :tests, :ancestor_tests, :preconditions
|
7
|
+
|
8
|
+
def initialize(name, rx, ignorable: false, boundary: false, tests: [], preconditions: [])
|
9
|
+
@name = name
|
10
|
+
@rx = rx
|
11
|
+
@ignorable = ignorable
|
12
|
+
@boundary = boundary
|
13
|
+
@tests = tests
|
14
|
+
@preconditions = preconditions
|
15
|
+
end
|
16
|
+
|
17
|
+
## ADVISORILY PRIVATE
|
18
|
+
|
19
|
+
def _post_init(tests, preconditions)
|
20
|
+
@tests, @ancestor_tests = tests.partition { |m| m.arity == 1 }
|
21
|
+
@preconditions = preconditions
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/gullah/node.rb
ADDED
@@ -0,0 +1,553 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a node in an AST
|
5
|
+
class Node
|
6
|
+
##
|
7
|
+
# The parent node of this node, if any.
|
8
|
+
attr_reader :parent
|
9
|
+
|
10
|
+
attr_reader :rule # :nodoc:
|
11
|
+
|
12
|
+
##
|
13
|
+
# A hash of attributes, including indicators of tests that passed or failed.
|
14
|
+
# The +atts+ alias of +attributes+ exists for when a more telegraphic coding style is useful.
|
15
|
+
attr_reader :attributes # TODO: collect the keys users shouldn't use and document them
|
16
|
+
|
17
|
+
##
|
18
|
+
# The children of this node, if any, as an array.
|
19
|
+
attr_reader :children
|
20
|
+
|
21
|
+
##
|
22
|
+
# A concise stringification of the structure of this node's subtree.
|
23
|
+
attr_reader :summary
|
24
|
+
|
25
|
+
##
|
26
|
+
# An alternative method for when a more telegraphic coding style is useful.
|
27
|
+
alias atts attributes
|
28
|
+
|
29
|
+
def initialize(parse, s, e, rule) # :nodoc:
|
30
|
+
@rule = rule
|
31
|
+
@leaf = rule.is_a?(Leaf) || trash?
|
32
|
+
@text = parse.text
|
33
|
+
@attributes = {}
|
34
|
+
@failed_test = false
|
35
|
+
if @leaf
|
36
|
+
@start = s
|
37
|
+
@end = e
|
38
|
+
else
|
39
|
+
@children = parse.roots[s...e]
|
40
|
+
@children.each { |n| adopt n }
|
41
|
+
end
|
42
|
+
unless trash?
|
43
|
+
rule.tests.each do |t|
|
44
|
+
result, *extra = Array(t.call(self))
|
45
|
+
case result
|
46
|
+
when :ignore
|
47
|
+
# no-op test
|
48
|
+
when :pass
|
49
|
+
(attributes[:satisfied] ||= []) << [t.name, *extra]
|
50
|
+
when :fail
|
51
|
+
@failed_test = true
|
52
|
+
(attributes[:failures] ||= []) << [t.name, *extra]
|
53
|
+
break
|
54
|
+
else
|
55
|
+
raise Error, <<~MSG
|
56
|
+
test #{t.name} returned an unexpected value:
|
57
|
+
#{result.inspect}
|
58
|
+
expected values: #{%i[ignore pass fail].inspect}
|
59
|
+
MSG
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
unless failed?
|
64
|
+
# if any test failed, this node will not be the child of another node
|
65
|
+
rule.ancestor_tests.each do |t|
|
66
|
+
# use position rather than node itself for the sake of clonability
|
67
|
+
(attributes[:pending] ||= []) << [t, position]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# The name of the rule that created this node.
|
74
|
+
def name
|
75
|
+
rule.name
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Does this node represent a character sequence no leaf rule matched?
|
80
|
+
def trash?
|
81
|
+
false
|
82
|
+
end
|
83
|
+
|
84
|
+
##
|
85
|
+
# Is this node one that cannot be the child of another node?
|
86
|
+
def boundary?
|
87
|
+
false
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Is this a leaf node?
|
92
|
+
def leaf?
|
93
|
+
@leaf
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Does this node have some failed test or does it represent characters no leaf rule mached?
|
98
|
+
def failed?
|
99
|
+
trash? || error?
|
100
|
+
end
|
101
|
+
|
102
|
+
# is this node some sort of boundary to further matching
|
103
|
+
def traversible? # :nodoc:
|
104
|
+
!(boundary? || trash? || error?)
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Does this node have some failed test?
|
109
|
+
def error?
|
110
|
+
@failed_test
|
111
|
+
end
|
112
|
+
|
113
|
+
##
|
114
|
+
# Does this node's subtree contain unsatisfied syntactic requirements?
|
115
|
+
# These are tests that depend on nodes not in the node's own subtree.
|
116
|
+
def pending_tests?
|
117
|
+
!!attributes[:pending]
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Was this node created by an +ignore+ rule?
|
122
|
+
def ignorable?
|
123
|
+
@leaf && rule.ignorable
|
124
|
+
end
|
125
|
+
|
126
|
+
##
|
127
|
+
# Was this node created by something other than an +ignore+ rule?
|
128
|
+
def significant?
|
129
|
+
!ignorable?
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Is this a node that has other nodes as children?
|
134
|
+
def nonterminal?
|
135
|
+
!@leaf
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# The portion of the original text covered by this node. This is in effect
|
140
|
+
# the text of the leaves of its subtree.
|
141
|
+
def text
|
142
|
+
@text[start...self.end]
|
143
|
+
end
|
144
|
+
|
145
|
+
##
|
146
|
+
# A reference to the full text the node's text is embedded in.
|
147
|
+
def full_text
|
148
|
+
@text
|
149
|
+
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# The text preceding this node's text. Useful for lookaround tests and preconditions.
|
153
|
+
def text_before
|
154
|
+
@text[0...start]
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# The text following this node's text. Useful for lookaround tests and preconditions.
|
159
|
+
def text_after
|
160
|
+
@text[self.end..]
|
161
|
+
end
|
162
|
+
|
163
|
+
##
|
164
|
+
# The node's start text offset. For a non-terminal node, this will be
|
165
|
+
# the same as the start of the first leaf node of its subtree.
|
166
|
+
def start
|
167
|
+
@start ||= @children[0].start
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# The node's end text offset. For a non-terminal node, this will be the
|
172
|
+
# same as the end of the last leaf node of its subtree.
|
173
|
+
def end
|
174
|
+
@end ||= @children[-1].end
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Distance of the node from the root node of the parse tree.
|
179
|
+
# During parsing, while nodes are being added, this distance may change, unlike
|
180
|
+
# the height.
|
181
|
+
#
|
182
|
+
# The root node has a depth of 0. It's children have a depth of 1. Their
|
183
|
+
# children have a depth of 2. And so forth.
|
184
|
+
def depth
|
185
|
+
parent ? 1 + parent.depth : 0
|
186
|
+
end
|
187
|
+
|
188
|
+
##
|
189
|
+
# The distance of a node from the first leaf node in its subtree. If the node
|
190
|
+
# is the immediate parent of this leaf, its distance will be one. Leaves have
|
191
|
+
# a height of zero.
|
192
|
+
def height
|
193
|
+
@height ||= @leaf ? 0 : 1 + children[0].height
|
194
|
+
end
|
195
|
+
|
196
|
+
##
|
197
|
+
# A pair consisting of the nodes start and height. This will be a unique
|
198
|
+
# identifier for the node in its parse and is constant at all stages of parsing.
|
199
|
+
def position
|
200
|
+
@position ||= [start, height]
|
201
|
+
end
|
202
|
+
|
203
|
+
##
|
204
|
+
# Does this node contain the given text offset?
|
205
|
+
def contains?(offset)
|
206
|
+
start <= offset && offset < self.end
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# Finds the node at the given position within this node's subtree.
|
211
|
+
def find(pos)
|
212
|
+
offset = pos.first
|
213
|
+
return nil unless contains?(offset)
|
214
|
+
|
215
|
+
return self if pos == position
|
216
|
+
|
217
|
+
if (child = children&.find { |c| c.contains? offset })
|
218
|
+
child.find(pos)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# The number of nodes in this node's subtree. Leaves always have a size of 1.
|
224
|
+
def size
|
225
|
+
@size ||= @leaf ? 1 : @children.map(&:size).sum + 1
|
226
|
+
end
|
227
|
+
|
228
|
+
##
|
229
|
+
# The root of this node's current parse tree.
|
230
|
+
#
|
231
|
+
# Note, if you use this in a node test
|
232
|
+
# the root will always be the same as the node itself because these tests are run
|
233
|
+
# when the node is being added to the tree. If you use it in structure tests, it
|
234
|
+
# will be some ancestor of the node but not necessarily the final root. The current
|
235
|
+
# root is always the first argument to structure tests. Using this argument is more
|
236
|
+
# efficient than using the root method. Really, the root method is only useful in
|
237
|
+
# completed parses.
|
238
|
+
def root
|
239
|
+
parent ? parent.root : self
|
240
|
+
end
|
241
|
+
|
242
|
+
##
|
243
|
+
# Does this node have any parent? If not, it is a root.
|
244
|
+
def root?
|
245
|
+
parent.nil?
|
246
|
+
end
|
247
|
+
|
248
|
+
##
|
249
|
+
# Returns an Enumerable enumerating the nodes immediately above this node in the
|
250
|
+
# tree: its parent, its parent's parent, etc.
|
251
|
+
def ancestors
|
252
|
+
_ancestors self
|
253
|
+
end
|
254
|
+
|
255
|
+
##
|
256
|
+
# Returns an Enumerable over the descendants of this node: its children, its children's
|
257
|
+
# children, etc. This enumeration is depth-first.
|
258
|
+
def descendants
|
259
|
+
_descendants self
|
260
|
+
end
|
261
|
+
|
262
|
+
##
|
263
|
+
# Returns an Enumerable over this node and its descendants. The node itself is the first
|
264
|
+
# node returned.
|
265
|
+
def subtree
|
266
|
+
_descendants nil
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Returns the children of this node's parent's children minus this node itself.
|
271
|
+
def siblings
|
272
|
+
parent&.children&.reject { |n| n == self }
|
273
|
+
end
|
274
|
+
|
275
|
+
##
|
276
|
+
# The index of this node among its parent's children.
|
277
|
+
def sibling_index
|
278
|
+
@sibling_index ||= parent.children.index self if parent
|
279
|
+
end
|
280
|
+
|
281
|
+
##
|
282
|
+
# Returns the children of this node's parent that precede it.
|
283
|
+
def prior_siblings
|
284
|
+
parent && siblings[0...sibling_index]
|
285
|
+
end
|
286
|
+
|
287
|
+
##
|
288
|
+
# Returns the children of this node's parent that follow it.
|
289
|
+
def later_siblings
|
290
|
+
parent && siblings[(sibling_index + 1)..]
|
291
|
+
end
|
292
|
+
|
293
|
+
##
|
294
|
+
# Is this node the last of its parent's children?
|
295
|
+
def last_child?
|
296
|
+
parent && sibling_index == parent.children.length - 1
|
297
|
+
end
|
298
|
+
|
299
|
+
##
|
300
|
+
# Is this node the first of its parent's children?
|
301
|
+
def first_child?
|
302
|
+
sibling_index.zero?
|
303
|
+
end
|
304
|
+
|
305
|
+
##
|
306
|
+
# The immediately prior sibling to this node.
|
307
|
+
def prior_sibling
|
308
|
+
if parent
|
309
|
+
first_child? ? nil : parent.children[sibling_index - 1]
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
##
|
314
|
+
# The immediately following sibling to this node.
|
315
|
+
def later_sibling
|
316
|
+
parent && parent.children[sibling_index + 1]
|
317
|
+
end
|
318
|
+
|
319
|
+
##
|
320
|
+
# The leaves of this node's subtree. If the node is a leaf, this returns a
|
321
|
+
# single-member array containing the node itself.
|
322
|
+
def leaves
|
323
|
+
@leaf ? [self] : descendants.select(&:leaf?)
|
324
|
+
end
|
325
|
+
|
326
|
+
##
|
327
|
+
# The collection of nodes in the subtree containing this node that do not +contain+
|
328
|
+
# the node and whose start offset precedes its start offset.
|
329
|
+
def prior
|
330
|
+
root.descendants.reject { |n| n.contains? start }.select { |n| n.start < start }
|
331
|
+
end
|
332
|
+
|
333
|
+
##
|
334
|
+
# The collection of nodes in the subtree containing this node whose start offset
|
335
|
+
# is at or after its end offset.
|
336
|
+
def later
|
337
|
+
root.descendants.select { |n| n.start >= self.end }
|
338
|
+
end
|
339
|
+
|
340
|
+
def clone # :nodoc:
|
341
|
+
super.tap do |c|
|
342
|
+
c._attributes = deep_clone(attributes)
|
343
|
+
unless c.leaf?
|
344
|
+
c._children = deep_clone(children)
|
345
|
+
c.children.each do |child|
|
346
|
+
child._parent = c
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Produces a simplified representation of the node to facilitate debugging. The +so+
|
353
|
+
# named parameter, if true, will cause the representation to drop ignored nodes.
|
354
|
+
# The name "so" stands for "significant only".
|
355
|
+
#
|
356
|
+
# > pp root.dbg
|
357
|
+
#
|
358
|
+
# {:name=>:S,
|
359
|
+
# :pos=>{:start=>0, :end=>11, :depth=>0},
|
360
|
+
# :children=>
|
361
|
+
# [{:name=>:NP,
|
362
|
+
# :pos=>{:start=>0, :end=>7, :depth=>1},
|
363
|
+
# :children=>
|
364
|
+
# [{:name=>:D, :pos=>{:start=>0, :end=>3, :depth=>2}, :text=>"the"},
|
365
|
+
# {:name=>:_ws,
|
366
|
+
# :pos=>{:start=>3, :end=>4, :depth=>2},
|
367
|
+
# :ignorable=>true,
|
368
|
+
# :text=>" "},
|
369
|
+
# {:name=>:N, :pos=>{:start=>4, :end=>7, :depth=>2}, :text=>"cat"}]},
|
370
|
+
# {:name=>:_ws,
|
371
|
+
# :pos=>{:start=>7, :end=>8, :depth=>1},
|
372
|
+
# :ignorable=>true,
|
373
|
+
# :text=>" "},
|
374
|
+
# {:name=>:VP,
|
375
|
+
# :pos=>{:start=>8, :end=>11, :depth=>1},
|
376
|
+
# :children=>
|
377
|
+
# [{:name=>:V, :pos=>{:start=>8, :end=>11, :depth=>2}, :text=>"sat"}]}]}
|
378
|
+
#
|
379
|
+
# > pp root.dbg so: true
|
380
|
+
#
|
381
|
+
# {:name=>:S,
|
382
|
+
# :pos=>{:start=>0, :end=>11, :depth=>0},
|
383
|
+
# :children=>
|
384
|
+
# [{:name=>:NP,
|
385
|
+
# :pos=>{:start=>0, :end=>7, :depth=>1},
|
386
|
+
# :children=>
|
387
|
+
# [{:name=>:D, :pos=>{:start=>0, :end=>3, :depth=>2}, :text=>"the"},
|
388
|
+
# {:name=>:_ws, :pos=>{:start=>3, :end=>4, :depth=>2}, :text=>" "},
|
389
|
+
# {:name=>:N, :pos=>{:start=>4, :end=>7, :depth=>2}, :text=>"cat"}]},
|
390
|
+
# {:name=>:_ws, :pos=>{:start=>7, :end=>8, :depth=>1}, :text=>" "},
|
391
|
+
# {:name=>:VP,
|
392
|
+
# :pos=>{:start=>8, :end=>11, :depth=>1},
|
393
|
+
# :children=>
|
394
|
+
# [{:name=>:V, :pos=>{:start=>8, :end=>11, :depth=>2}, :text=>"sat"}]}]}
|
395
|
+
def dbg(so: false)
|
396
|
+
{
|
397
|
+
name: name,
|
398
|
+
pos: {
|
399
|
+
start: start,
|
400
|
+
end: self.end,
|
401
|
+
depth: depth
|
402
|
+
}
|
403
|
+
}.tap do |simpleton|
|
404
|
+
simpleton[:failed] = true if @failed_test
|
405
|
+
simpleton[:attributes] = deep_clone attributes if attributes.any?
|
406
|
+
if leaf?
|
407
|
+
simpleton[:trash] = true if trash?
|
408
|
+
simpleton[:ignorable] = true unless so || significant?
|
409
|
+
simpleton[:text] = text
|
410
|
+
else
|
411
|
+
simpleton[:children] = children.map { |c| c.dbg so: so }
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
## ADVISORILY PRIVATE
|
417
|
+
|
418
|
+
# :stopdoc:
|
419
|
+
|
420
|
+
def _summary=(str) # :nodoc:
|
421
|
+
@summary = str
|
422
|
+
end
|
423
|
+
|
424
|
+
# used during parsing
|
425
|
+
# make sure we don't have any repeated symbols in a unary branch
|
426
|
+
def _loop_check?(seen = nil) # :nodoc:
|
427
|
+
return true if seen == name
|
428
|
+
|
429
|
+
return false if !@leaf && children.length > 1
|
430
|
+
|
431
|
+
if seen.nil?
|
432
|
+
# this is the beginning of the check
|
433
|
+
# the only name we need look for is this rule's name, since
|
434
|
+
# all those below it must have passed the check
|
435
|
+
seen = name
|
436
|
+
end
|
437
|
+
@leaf ? false : children.first._loop_check?(seen)
|
438
|
+
end
|
439
|
+
|
440
|
+
def _attributes=(attributes) # :nodoc:
|
441
|
+
@attributes = attributes
|
442
|
+
end
|
443
|
+
|
444
|
+
def _parent=(other) # :nodoc:
|
445
|
+
@parent = other
|
446
|
+
end
|
447
|
+
|
448
|
+
def _children=(children) # :nodoc:
|
449
|
+
@children = children
|
450
|
+
end
|
451
|
+
|
452
|
+
def _descendants(skip) # :nodoc:
|
453
|
+
Descendants.new(self, skip)
|
454
|
+
end
|
455
|
+
|
456
|
+
def _ancestors(skip) # :nodoc:
|
457
|
+
Ancestors.new(self, skip)
|
458
|
+
end
|
459
|
+
|
460
|
+
def _failed_test=(bool) # :nodoc:
|
461
|
+
@failed_test = bool
|
462
|
+
end
|
463
|
+
|
464
|
+
private
|
465
|
+
|
466
|
+
def deep_clone(obj)
|
467
|
+
case obj
|
468
|
+
when String, Method
|
469
|
+
obj
|
470
|
+
when Array
|
471
|
+
obj.map { |o| deep_clone o }
|
472
|
+
when Hash
|
473
|
+
obj.map { |k, v| [deep_clone(k), deep_clone(v)] }.to_h
|
474
|
+
when Set
|
475
|
+
obj.map { |v| deep_clone v }.to_set
|
476
|
+
else
|
477
|
+
obj.clone
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
class Ancestors
|
482
|
+
include Enumerable
|
483
|
+
def initialize(n, skip)
|
484
|
+
@n = n
|
485
|
+
@skip = skip
|
486
|
+
end
|
487
|
+
|
488
|
+
def each(&block)
|
489
|
+
yield @n unless @n == @skip
|
490
|
+
@n.parent&._ancestors(@skip)&.each(&block)
|
491
|
+
end
|
492
|
+
|
493
|
+
def last
|
494
|
+
@n.root? ? @n : @n.root
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
class Descendants
|
499
|
+
include Enumerable
|
500
|
+
def initialize(n, skip)
|
501
|
+
@n = n
|
502
|
+
@skip = skip
|
503
|
+
end
|
504
|
+
|
505
|
+
def each(&block)
|
506
|
+
yield @n unless @n == @skip
|
507
|
+
unless @n.leaf?
|
508
|
+
@n.children.each do |c|
|
509
|
+
c._descendants(@skip).each(&block)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
def last
|
515
|
+
@n.root.leaves.last
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
# establish parent-child relationship and migrate needs from child to self
|
520
|
+
def adopt(n)
|
521
|
+
n._parent = self
|
522
|
+
if (pending = n.attributes.delete :pending)
|
523
|
+
pending.each do |pair|
|
524
|
+
r, l = pair
|
525
|
+
child = find(l) # this will necessarily find some child
|
526
|
+
result, *extra = Array(r.call(self, child))
|
527
|
+
case result
|
528
|
+
when :ignore
|
529
|
+
# nothing to do
|
530
|
+
when nil
|
531
|
+
# the test doesn't apply, this node inherits it
|
532
|
+
(attributes[:pending] ||= []) << pair
|
533
|
+
when :pass
|
534
|
+
# mark the results on the parent and the child
|
535
|
+
(attributes[:satisfied_ancestor] ||= []) << [r.name, l, *extra]
|
536
|
+
(child.attributes[:satisfied_descendant] ||= []) << [r.name, position, *extra]
|
537
|
+
when :fail
|
538
|
+
@failed_test = true
|
539
|
+
(attributes[:failed_ancestor] ||= []) << [r.name, l, *extra]
|
540
|
+
(child.attributes[:failed_descendant] ||= []) << [r.name, position, *extra]
|
541
|
+
child._failed_test = true
|
542
|
+
else
|
543
|
+
raise Error, <<~MSG
|
544
|
+
ancestor test #{r.name} returned an unexpected value:
|
545
|
+
#{result.inspect}
|
546
|
+
expected values: #{[:ignore, :pass, :fail, nil].inspect}
|
547
|
+
MSG
|
548
|
+
end
|
549
|
+
end
|
550
|
+
end
|
551
|
+
end
|
552
|
+
end
|
553
|
+
end
|