gullah 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/LICENSE +21 -0
- data/README.md +87 -0
- data/Rakefile +11 -0
- data/TODO.md +2 -0
- data/examples/hat.rb +27 -0
- data/examples/trash.rb +42 -0
- data/examples/xml.rb +45 -0
- data/gullah.gemspec +31 -0
- data/lib/gullah/atom.rb +132 -0
- data/lib/gullah/boundary.rb +11 -0
- data/lib/gullah/dotifier.rb +127 -0
- data/lib/gullah/error.rb +7 -0
- data/lib/gullah/hopper.rb +142 -0
- data/lib/gullah/iterator.rb +67 -0
- data/lib/gullah/leaf.rb +24 -0
- data/lib/gullah/node.rb +553 -0
- data/lib/gullah/parse.rb +233 -0
- data/lib/gullah/picker.rb +56 -0
- data/lib/gullah/rule.rb +90 -0
- data/lib/gullah/segment.rb +92 -0
- data/lib/gullah/trash.rb +15 -0
- data/lib/gullah/version.rb +7 -0
- data/lib/gullah.rb +777 -0
- data/test/basic_test.rb +451 -0
- data/test/big_tree_test.rb +26 -0
- data/test/boundary_test.rb +29 -0
- data/test/date_test.rb +111 -0
- data/test/error_test.rb +245 -0
- data/test/json_test.rb +124 -0
- data/test/parse_demo_test.rb +33 -0
- data/test/precondition_test.rb +68 -0
- data/test/tests_per_subrule_test.rb +49 -0
- data/test/tree_walking_test.rb +88 -0
- metadata +157 -0
data/lib/gullah/error.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a Hopper keeps completed parses, deleting inferior ones as better parses are found
|
5
|
+
# this facilitates efficient memory use and parsing
|
6
|
+
class Hopper # :nodoc:
|
7
|
+
def initialize(filters, number_sought)
|
8
|
+
dross = filters - %i[completion correctness size pending]
|
9
|
+
raise Error, "unknown filters: #{dross.join ', '}" if dross.any?
|
10
|
+
|
11
|
+
# fix filter order
|
12
|
+
@filters = %i[correctness completion size pending] & filters
|
13
|
+
@number_sought = number_sought
|
14
|
+
@thresholds = {}
|
15
|
+
@bin = []
|
16
|
+
@first = true
|
17
|
+
@seen = Set.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def size
|
21
|
+
@bin.length
|
22
|
+
end
|
23
|
+
alias length size
|
24
|
+
|
25
|
+
def satisfied?
|
26
|
+
if @bin.length == @number_sought
|
27
|
+
limits = @thresholds.values_at(:correctness, :pending).compact
|
28
|
+
if limits.any? && limits.all?(&:zero?)
|
29
|
+
# we could have accumulated some dross
|
30
|
+
@bin.uniq!(&:summary)
|
31
|
+
@bin.length == @number_sought
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def <<(parse)
|
37
|
+
if @bin.empty?
|
38
|
+
set_thresholds parse
|
39
|
+
else
|
40
|
+
return unless adequate? parse
|
41
|
+
end
|
42
|
+
|
43
|
+
@bin << parse
|
44
|
+
end
|
45
|
+
|
46
|
+
def dump
|
47
|
+
@bin
|
48
|
+
end
|
49
|
+
|
50
|
+
# is this parse at least as good as any in the bin?
|
51
|
+
def adequate?(parse)
|
52
|
+
return true if @filters.none?
|
53
|
+
|
54
|
+
@thresholds.each do |f, limit|
|
55
|
+
value = case f
|
56
|
+
when :completion
|
57
|
+
parse.length
|
58
|
+
when :correctness
|
59
|
+
parse.incorrectness_count
|
60
|
+
when :size
|
61
|
+
parse.size
|
62
|
+
when :pending
|
63
|
+
parse.pending_count
|
64
|
+
end
|
65
|
+
if value < limit
|
66
|
+
# we have a new champion!
|
67
|
+
set_thresholds(parse)
|
68
|
+
@bin.select! { |p| adequate? p }
|
69
|
+
return true
|
70
|
+
end
|
71
|
+
return false if value > limit
|
72
|
+
end
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
# is this parse worth improving further?
|
77
|
+
def continuable?(parse)
|
78
|
+
return true if @first || @filters.none?
|
79
|
+
|
80
|
+
@thresholds.slice(:correctness, :size).each do |f, limit|
|
81
|
+
# completion is more important than size, so ignore size unless we have
|
82
|
+
# a single complete parse already
|
83
|
+
# TODO if we *do* have a single completed parse, we should throw in more tests
|
84
|
+
next if f == :size && @thresholds[:completion]&.>(1)
|
85
|
+
|
86
|
+
value = case f
|
87
|
+
when :correctness
|
88
|
+
parse.incorrectness_count
|
89
|
+
when :size
|
90
|
+
parse.size
|
91
|
+
end
|
92
|
+
return true if value < limit
|
93
|
+
return false if value > limit
|
94
|
+
end
|
95
|
+
true
|
96
|
+
end
|
97
|
+
|
98
|
+
def vet(parse, i, offset, rule, do_unary_branch_check)
|
99
|
+
preconditions_satisfied = rule.preconditions.all? do |pc|
|
100
|
+
# at this point, any prospective node will be non-terminal
|
101
|
+
kids = parse.roots[i...offset]
|
102
|
+
pc.call rule.name, kids.first.start, kids.last.end, kids.first.text, kids
|
103
|
+
end
|
104
|
+
return unless preconditions_satisfied
|
105
|
+
|
106
|
+
candidate = "#{rule.name}[#{parse.roots[i...offset].map(&:summary).join(',')}]"
|
107
|
+
unvetted_summary = [
|
108
|
+
parse.roots[0...i].map(&:summary) +
|
109
|
+
[candidate] +
|
110
|
+
parse.roots[offset..].map(&:summary)
|
111
|
+
].join(';')
|
112
|
+
unless @seen.include? unvetted_summary
|
113
|
+
@seen << unvetted_summary
|
114
|
+
parse.add(i, offset, rule, do_unary_branch_check).tap do |new_parse|
|
115
|
+
if new_parse
|
116
|
+
new_parse._summary = unvetted_summary
|
117
|
+
new_parse.roots[i]._summary = candidate
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def set_thresholds(parse)
|
126
|
+
@filters.each do |f|
|
127
|
+
value = case f
|
128
|
+
when :completion
|
129
|
+
parse.length
|
130
|
+
when :correctness
|
131
|
+
parse.incorrectness_count
|
132
|
+
when :size
|
133
|
+
parse.size
|
134
|
+
when :pending
|
135
|
+
parse.pending_count
|
136
|
+
end
|
137
|
+
@thresholds[f] = value
|
138
|
+
end
|
139
|
+
@first = false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# for iterating over reductions of a given parse
|
5
|
+
class Iterator # :nodoc:
|
6
|
+
attr_reader :parse
|
7
|
+
|
8
|
+
def initialize(parse, hopper, starters, do_unary_branch_check)
|
9
|
+
@parse = parse
|
10
|
+
@hopper = hopper
|
11
|
+
@starters = starters
|
12
|
+
@do_unary_branch_check = do_unary_branch_check
|
13
|
+
@returned_any = false
|
14
|
+
# this iterator iterates over both node indices and rule indices
|
15
|
+
@root_index = 0
|
16
|
+
@rule_index = 0
|
17
|
+
@node = parse.roots[0]
|
18
|
+
end
|
19
|
+
|
20
|
+
# return the next reduction, if any
|
21
|
+
def next
|
22
|
+
loop do
|
23
|
+
return nil unless (a = current_rule)
|
24
|
+
|
25
|
+
@rule_index += 1
|
26
|
+
unless (offset = a.match(parse.roots, @root_index))
|
27
|
+
next
|
28
|
+
end
|
29
|
+
|
30
|
+
if (p = @hopper.vet(parse, @root_index, offset, a.parent, @do_unary_branch_check))
|
31
|
+
@returned_any = true
|
32
|
+
return p
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# number of nodes that need reduction
|
38
|
+
def length
|
39
|
+
@parse.length
|
40
|
+
end
|
41
|
+
|
42
|
+
# number of erroneous nodes in the parse
|
43
|
+
def errors
|
44
|
+
@parse.incorrectness_count
|
45
|
+
end
|
46
|
+
|
47
|
+
def never_returned_any?
|
48
|
+
!@returned_any
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def current_rule
|
54
|
+
while @node
|
55
|
+
@rules ||= @starters[@node.name]
|
56
|
+
r = @rules&.[] @rule_index
|
57
|
+
return r if r
|
58
|
+
|
59
|
+
# the rules for this node are used up; try the next one
|
60
|
+
@rule_index = 0
|
61
|
+
@root_index += 1
|
62
|
+
@node = parse.roots[@root_index]
|
63
|
+
@rules = nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/gullah/leaf.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a rule used in string tokenization
|
5
|
+
class Leaf # :nodoc:
|
6
|
+
attr_reader :rx, :name, :ignorable, :boundary, :tests, :ancestor_tests, :preconditions
|
7
|
+
|
8
|
+
def initialize(name, rx, ignorable: false, boundary: false, tests: [], preconditions: [])
|
9
|
+
@name = name
|
10
|
+
@rx = rx
|
11
|
+
@ignorable = ignorable
|
12
|
+
@boundary = boundary
|
13
|
+
@tests = tests
|
14
|
+
@preconditions = preconditions
|
15
|
+
end
|
16
|
+
|
17
|
+
## ADVISORILY PRIVATE
|
18
|
+
|
19
|
+
def _post_init(tests, preconditions)
|
20
|
+
@tests, @ancestor_tests = tests.partition { |m| m.arity == 1 }
|
21
|
+
@preconditions = preconditions
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/gullah/node.rb
ADDED
@@ -0,0 +1,553 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gullah
|
4
|
+
# a node in an AST
|
5
|
+
class Node
|
6
|
+
##
|
7
|
+
# The parent node of this node, if any.
|
8
|
+
attr_reader :parent
|
9
|
+
|
10
|
+
attr_reader :rule # :nodoc:
|
11
|
+
|
12
|
+
##
|
13
|
+
# A hash of attributes, including indicators of tests that passed or failed.
|
14
|
+
# The +atts+ alias of +attributes+ exists for when a more telegraphic coding style is useful.
|
15
|
+
attr_reader :attributes # TODO: collect the keys users shouldn't use and document them
|
16
|
+
|
17
|
+
##
|
18
|
+
# The children of this node, if any, as an array.
|
19
|
+
attr_reader :children
|
20
|
+
|
21
|
+
##
|
22
|
+
# A concise stringification of the structure of this node's subtree.
|
23
|
+
attr_reader :summary
|
24
|
+
|
25
|
+
##
|
26
|
+
# An alternative method for when a more telegraphic coding style is useful.
|
27
|
+
alias atts attributes
|
28
|
+
|
29
|
+
def initialize(parse, s, e, rule) # :nodoc:
|
30
|
+
@rule = rule
|
31
|
+
@leaf = rule.is_a?(Leaf) || trash?
|
32
|
+
@text = parse.text
|
33
|
+
@attributes = {}
|
34
|
+
@failed_test = false
|
35
|
+
if @leaf
|
36
|
+
@start = s
|
37
|
+
@end = e
|
38
|
+
else
|
39
|
+
@children = parse.roots[s...e]
|
40
|
+
@children.each { |n| adopt n }
|
41
|
+
end
|
42
|
+
unless trash?
|
43
|
+
rule.tests.each do |t|
|
44
|
+
result, *extra = Array(t.call(self))
|
45
|
+
case result
|
46
|
+
when :ignore
|
47
|
+
# no-op test
|
48
|
+
when :pass
|
49
|
+
(attributes[:satisfied] ||= []) << [t.name, *extra]
|
50
|
+
when :fail
|
51
|
+
@failed_test = true
|
52
|
+
(attributes[:failures] ||= []) << [t.name, *extra]
|
53
|
+
break
|
54
|
+
else
|
55
|
+
raise Error, <<~MSG
|
56
|
+
test #{t.name} returned an unexpected value:
|
57
|
+
#{result.inspect}
|
58
|
+
expected values: #{%i[ignore pass fail].inspect}
|
59
|
+
MSG
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
unless failed?
|
64
|
+
# if any test failed, this node will not be the child of another node
|
65
|
+
rule.ancestor_tests.each do |t|
|
66
|
+
# use position rather than node itself for the sake of clonability
|
67
|
+
(attributes[:pending] ||= []) << [t, position]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# The name of the rule that created this node.
|
74
|
+
def name
|
75
|
+
rule.name
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Does this node represent a character sequence no leaf rule matched?
|
80
|
+
def trash?
|
81
|
+
false
|
82
|
+
end
|
83
|
+
|
84
|
+
##
|
85
|
+
# Is this node one that cannot be the child of another node?
|
86
|
+
def boundary?
|
87
|
+
false
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Is this a leaf node?
|
92
|
+
def leaf?
|
93
|
+
@leaf
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Does this node have some failed test or does it represent characters no leaf rule mached?
|
98
|
+
def failed?
|
99
|
+
trash? || error?
|
100
|
+
end
|
101
|
+
|
102
|
+
# is this node some sort of boundary to further matching
|
103
|
+
def traversible? # :nodoc:
|
104
|
+
!(boundary? || trash? || error?)
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Does this node have some failed test?
|
109
|
+
def error?
|
110
|
+
@failed_test
|
111
|
+
end
|
112
|
+
|
113
|
+
##
|
114
|
+
# Does this node's subtree contain unsatisfied syntactic requirements?
|
115
|
+
# These are tests that depend on nodes not in the node's own subtree.
|
116
|
+
def pending_tests?
|
117
|
+
!!attributes[:pending]
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Was this node created by an +ignore+ rule?
|
122
|
+
def ignorable?
|
123
|
+
@leaf && rule.ignorable
|
124
|
+
end
|
125
|
+
|
126
|
+
##
|
127
|
+
# Was this node created by something other than an +ignore+ rule?
|
128
|
+
def significant?
|
129
|
+
!ignorable?
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Is this a node that has other nodes as children?
|
134
|
+
def nonterminal?
|
135
|
+
!@leaf
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# The portion of the original text covered by this node. This is in effect
|
140
|
+
# the text of the leaves of its subtree.
|
141
|
+
def text
|
142
|
+
@text[start...self.end]
|
143
|
+
end
|
144
|
+
|
145
|
+
##
|
146
|
+
# A reference to the full text the node's text is embedded in.
|
147
|
+
def full_text
|
148
|
+
@text
|
149
|
+
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# The text preceding this node's text. Useful for lookaround tests and preconditions.
|
153
|
+
def text_before
|
154
|
+
@text[0...start]
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# The text following this node's text. Useful for lookaround tests and preconditions.
|
159
|
+
def text_after
|
160
|
+
@text[self.end..]
|
161
|
+
end
|
162
|
+
|
163
|
+
##
|
164
|
+
# The node's start text offset. For a non-terminal node, this will be
|
165
|
+
# the same as the start of the first leaf node of its subtree.
|
166
|
+
def start
|
167
|
+
@start ||= @children[0].start
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# The node's end text offset. For a non-terminal node, this will be the
|
172
|
+
# same as the end of the last leaf node of its subtree.
|
173
|
+
def end
|
174
|
+
@end ||= @children[-1].end
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Distance of the node from the root node of the parse tree.
|
179
|
+
# During parsing, while nodes are being added, this distance may change, unlike
|
180
|
+
# the height.
|
181
|
+
#
|
182
|
+
# The root node has a depth of 0. It's children have a depth of 1. Their
|
183
|
+
# children have a depth of 2. And so forth.
|
184
|
+
def depth
|
185
|
+
parent ? 1 + parent.depth : 0
|
186
|
+
end
|
187
|
+
|
188
|
+
##
|
189
|
+
# The distance of a node from the first leaf node in its subtree. If the node
|
190
|
+
# is the immediate parent of this leaf, its distance will be one. Leaves have
|
191
|
+
# a height of zero.
|
192
|
+
def height
|
193
|
+
@height ||= @leaf ? 0 : 1 + children[0].height
|
194
|
+
end
|
195
|
+
|
196
|
+
##
|
197
|
+
# A pair consisting of the nodes start and height. This will be a unique
|
198
|
+
# identifier for the node in its parse and is constant at all stages of parsing.
|
199
|
+
def position
|
200
|
+
@position ||= [start, height]
|
201
|
+
end
|
202
|
+
|
203
|
+
##
|
204
|
+
# Does this node contain the given text offset?
|
205
|
+
def contains?(offset)
|
206
|
+
start <= offset && offset < self.end
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# Finds the node at the given position within this node's subtree.
|
211
|
+
def find(pos)
|
212
|
+
offset = pos.first
|
213
|
+
return nil unless contains?(offset)
|
214
|
+
|
215
|
+
return self if pos == position
|
216
|
+
|
217
|
+
if (child = children&.find { |c| c.contains? offset })
|
218
|
+
child.find(pos)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# The number of nodes in this node's subtree. Leaves always have a size of 1.
|
224
|
+
def size
|
225
|
+
@size ||= @leaf ? 1 : @children.map(&:size).sum + 1
|
226
|
+
end
|
227
|
+
|
228
|
+
##
|
229
|
+
# The root of this node's current parse tree.
|
230
|
+
#
|
231
|
+
# Note, if you use this in a node test
|
232
|
+
# the root will always be the same as the node itself because these tests are run
|
233
|
+
# when the node is being added to the tree. If you use it in structure tests, it
|
234
|
+
# will be some ancestor of the node but not necessarily the final root. The current
|
235
|
+
# root is always the first argument to structure tests. Using this argument is more
|
236
|
+
# efficient than using the root method. Really, the root method is only useful in
|
237
|
+
# completed parses.
|
238
|
+
def root
|
239
|
+
parent ? parent.root : self
|
240
|
+
end
|
241
|
+
|
242
|
+
##
|
243
|
+
# Does this node have any parent? If not, it is a root.
|
244
|
+
def root?
|
245
|
+
parent.nil?
|
246
|
+
end
|
247
|
+
|
248
|
+
##
|
249
|
+
# Returns an Enumerable enumerating the nodes immediately above this node in the
|
250
|
+
# tree: its parent, its parent's parent, etc.
|
251
|
+
def ancestors
|
252
|
+
_ancestors self
|
253
|
+
end
|
254
|
+
|
255
|
+
##
|
256
|
+
# Returns an Enumerable over the descendants of this node: its children, its children's
|
257
|
+
# children, etc. This enumeration is depth-first.
|
258
|
+
def descendants
|
259
|
+
_descendants self
|
260
|
+
end
|
261
|
+
|
262
|
+
##
|
263
|
+
# Returns an Enumerable over this node and its descendants. The node itself is the first
|
264
|
+
# node returned.
|
265
|
+
def subtree
|
266
|
+
_descendants nil
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Returns the children of this node's parent's children minus this node itself.
|
271
|
+
def siblings
|
272
|
+
parent&.children&.reject { |n| n == self }
|
273
|
+
end
|
274
|
+
|
275
|
+
##
|
276
|
+
# The index of this node among its parent's children.
|
277
|
+
def sibling_index
|
278
|
+
@sibling_index ||= parent.children.index self if parent
|
279
|
+
end
|
280
|
+
|
281
|
+
##
|
282
|
+
# Returns the children of this node's parent that precede it.
|
283
|
+
def prior_siblings
|
284
|
+
parent && siblings[0...sibling_index]
|
285
|
+
end
|
286
|
+
|
287
|
+
##
|
288
|
+
# Returns the children of this node's parent that follow it.
|
289
|
+
def later_siblings
|
290
|
+
parent && siblings[(sibling_index + 1)..]
|
291
|
+
end
|
292
|
+
|
293
|
+
##
|
294
|
+
# Is this node the last of its parent's children?
|
295
|
+
def last_child?
|
296
|
+
parent && sibling_index == parent.children.length - 1
|
297
|
+
end
|
298
|
+
|
299
|
+
##
|
300
|
+
# Is this node the first of its parent's children?
|
301
|
+
def first_child?
|
302
|
+
sibling_index.zero?
|
303
|
+
end
|
304
|
+
|
305
|
+
##
|
306
|
+
# The immediately prior sibling to this node.
|
307
|
+
def prior_sibling
|
308
|
+
if parent
|
309
|
+
first_child? ? nil : parent.children[sibling_index - 1]
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
##
|
314
|
+
# The immediately following sibling to this node.
|
315
|
+
def later_sibling
|
316
|
+
parent && parent.children[sibling_index + 1]
|
317
|
+
end
|
318
|
+
|
319
|
+
##
|
320
|
+
# The leaves of this node's subtree. If the node is a leaf, this returns a
|
321
|
+
# single-member array containing the node itself.
|
322
|
+
def leaves
|
323
|
+
@leaf ? [self] : descendants.select(&:leaf?)
|
324
|
+
end
|
325
|
+
|
326
|
+
##
|
327
|
+
# The collection of nodes in the subtree containing this node that do not +contain+
|
328
|
+
# the node and whose start offset precedes its start offset.
|
329
|
+
def prior
|
330
|
+
root.descendants.reject { |n| n.contains? start }.select { |n| n.start < start }
|
331
|
+
end
|
332
|
+
|
333
|
+
##
|
334
|
+
# The collection of nodes in the subtree containing this node whose start offset
|
335
|
+
# is at or after its end offset.
|
336
|
+
def later
|
337
|
+
root.descendants.select { |n| n.start >= self.end }
|
338
|
+
end
|
339
|
+
|
340
|
+
def clone # :nodoc:
|
341
|
+
super.tap do |c|
|
342
|
+
c._attributes = deep_clone(attributes)
|
343
|
+
unless c.leaf?
|
344
|
+
c._children = deep_clone(children)
|
345
|
+
c.children.each do |child|
|
346
|
+
child._parent = c
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Produces a simplified representation of the node to facilitate debugging. The +so+
|
353
|
+
# named parameter, if true, will cause the representation to drop ignored nodes.
|
354
|
+
# The name "so" stands for "significant only".
|
355
|
+
#
|
356
|
+
# > pp root.dbg
|
357
|
+
#
|
358
|
+
# {:name=>:S,
|
359
|
+
# :pos=>{:start=>0, :end=>11, :depth=>0},
|
360
|
+
# :children=>
|
361
|
+
# [{:name=>:NP,
|
362
|
+
# :pos=>{:start=>0, :end=>7, :depth=>1},
|
363
|
+
# :children=>
|
364
|
+
# [{:name=>:D, :pos=>{:start=>0, :end=>3, :depth=>2}, :text=>"the"},
|
365
|
+
# {:name=>:_ws,
|
366
|
+
# :pos=>{:start=>3, :end=>4, :depth=>2},
|
367
|
+
# :ignorable=>true,
|
368
|
+
# :text=>" "},
|
369
|
+
# {:name=>:N, :pos=>{:start=>4, :end=>7, :depth=>2}, :text=>"cat"}]},
|
370
|
+
# {:name=>:_ws,
|
371
|
+
# :pos=>{:start=>7, :end=>8, :depth=>1},
|
372
|
+
# :ignorable=>true,
|
373
|
+
# :text=>" "},
|
374
|
+
# {:name=>:VP,
|
375
|
+
# :pos=>{:start=>8, :end=>11, :depth=>1},
|
376
|
+
# :children=>
|
377
|
+
# [{:name=>:V, :pos=>{:start=>8, :end=>11, :depth=>2}, :text=>"sat"}]}]}
|
378
|
+
#
|
379
|
+
# > pp root.dbg so: true
|
380
|
+
#
|
381
|
+
# {:name=>:S,
|
382
|
+
# :pos=>{:start=>0, :end=>11, :depth=>0},
|
383
|
+
# :children=>
|
384
|
+
# [{:name=>:NP,
|
385
|
+
# :pos=>{:start=>0, :end=>7, :depth=>1},
|
386
|
+
# :children=>
|
387
|
+
# [{:name=>:D, :pos=>{:start=>0, :end=>3, :depth=>2}, :text=>"the"},
|
388
|
+
# {:name=>:_ws, :pos=>{:start=>3, :end=>4, :depth=>2}, :text=>" "},
|
389
|
+
# {:name=>:N, :pos=>{:start=>4, :end=>7, :depth=>2}, :text=>"cat"}]},
|
390
|
+
# {:name=>:_ws, :pos=>{:start=>7, :end=>8, :depth=>1}, :text=>" "},
|
391
|
+
# {:name=>:VP,
|
392
|
+
# :pos=>{:start=>8, :end=>11, :depth=>1},
|
393
|
+
# :children=>
|
394
|
+
# [{:name=>:V, :pos=>{:start=>8, :end=>11, :depth=>2}, :text=>"sat"}]}]}
|
395
|
+
def dbg(so: false)
|
396
|
+
{
|
397
|
+
name: name,
|
398
|
+
pos: {
|
399
|
+
start: start,
|
400
|
+
end: self.end,
|
401
|
+
depth: depth
|
402
|
+
}
|
403
|
+
}.tap do |simpleton|
|
404
|
+
simpleton[:failed] = true if @failed_test
|
405
|
+
simpleton[:attributes] = deep_clone attributes if attributes.any?
|
406
|
+
if leaf?
|
407
|
+
simpleton[:trash] = true if trash?
|
408
|
+
simpleton[:ignorable] = true unless so || significant?
|
409
|
+
simpleton[:text] = text
|
410
|
+
else
|
411
|
+
simpleton[:children] = children.map { |c| c.dbg so: so }
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
## ADVISORILY PRIVATE
|
417
|
+
|
418
|
+
# :stopdoc:
|
419
|
+
|
420
|
+
def _summary=(str) # :nodoc:
|
421
|
+
@summary = str
|
422
|
+
end
|
423
|
+
|
424
|
+
# used during parsing
|
425
|
+
# make sure we don't have any repeated symbols in a unary branch
|
426
|
+
def _loop_check?(seen = nil) # :nodoc:
|
427
|
+
return true if seen == name
|
428
|
+
|
429
|
+
return false if !@leaf && children.length > 1
|
430
|
+
|
431
|
+
if seen.nil?
|
432
|
+
# this is the beginning of the check
|
433
|
+
# the only name we need look for is this rule's name, since
|
434
|
+
# all those below it must have passed the check
|
435
|
+
seen = name
|
436
|
+
end
|
437
|
+
@leaf ? false : children.first._loop_check?(seen)
|
438
|
+
end
|
439
|
+
|
440
|
+
def _attributes=(attributes) # :nodoc:
|
441
|
+
@attributes = attributes
|
442
|
+
end
|
443
|
+
|
444
|
+
def _parent=(other) # :nodoc:
|
445
|
+
@parent = other
|
446
|
+
end
|
447
|
+
|
448
|
+
def _children=(children) # :nodoc:
|
449
|
+
@children = children
|
450
|
+
end
|
451
|
+
|
452
|
+
def _descendants(skip) # :nodoc:
|
453
|
+
Descendants.new(self, skip)
|
454
|
+
end
|
455
|
+
|
456
|
+
def _ancestors(skip) # :nodoc:
|
457
|
+
Ancestors.new(self, skip)
|
458
|
+
end
|
459
|
+
|
460
|
+
def _failed_test=(bool) # :nodoc:
|
461
|
+
@failed_test = bool
|
462
|
+
end
|
463
|
+
|
464
|
+
private
|
465
|
+
|
466
|
+
def deep_clone(obj)
|
467
|
+
case obj
|
468
|
+
when String, Method
|
469
|
+
obj
|
470
|
+
when Array
|
471
|
+
obj.map { |o| deep_clone o }
|
472
|
+
when Hash
|
473
|
+
obj.map { |k, v| [deep_clone(k), deep_clone(v)] }.to_h
|
474
|
+
when Set
|
475
|
+
obj.map { |v| deep_clone v }.to_set
|
476
|
+
else
|
477
|
+
obj.clone
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
class Ancestors
|
482
|
+
include Enumerable
|
483
|
+
def initialize(n, skip)
|
484
|
+
@n = n
|
485
|
+
@skip = skip
|
486
|
+
end
|
487
|
+
|
488
|
+
def each(&block)
|
489
|
+
yield @n unless @n == @skip
|
490
|
+
@n.parent&._ancestors(@skip)&.each(&block)
|
491
|
+
end
|
492
|
+
|
493
|
+
def last
|
494
|
+
@n.root? ? @n : @n.root
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
class Descendants
|
499
|
+
include Enumerable
|
500
|
+
def initialize(n, skip)
|
501
|
+
@n = n
|
502
|
+
@skip = skip
|
503
|
+
end
|
504
|
+
|
505
|
+
def each(&block)
|
506
|
+
yield @n unless @n == @skip
|
507
|
+
unless @n.leaf?
|
508
|
+
@n.children.each do |c|
|
509
|
+
c._descendants(@skip).each(&block)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
def last
|
515
|
+
@n.root.leaves.last
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
# establish parent-child relationship and migrate needs from child to self
|
520
|
+
def adopt(n)
|
521
|
+
n._parent = self
|
522
|
+
if (pending = n.attributes.delete :pending)
|
523
|
+
pending.each do |pair|
|
524
|
+
r, l = pair
|
525
|
+
child = find(l) # this will necessarily find some child
|
526
|
+
result, *extra = Array(r.call(self, child))
|
527
|
+
case result
|
528
|
+
when :ignore
|
529
|
+
# nothing to do
|
530
|
+
when nil
|
531
|
+
# the test doesn't apply, this node inherits it
|
532
|
+
(attributes[:pending] ||= []) << pair
|
533
|
+
when :pass
|
534
|
+
# mark the results on the parent and the child
|
535
|
+
(attributes[:satisfied_ancestor] ||= []) << [r.name, l, *extra]
|
536
|
+
(child.attributes[:satisfied_descendant] ||= []) << [r.name, position, *extra]
|
537
|
+
when :fail
|
538
|
+
@failed_test = true
|
539
|
+
(attributes[:failed_ancestor] ||= []) << [r.name, l, *extra]
|
540
|
+
(child.attributes[:failed_descendant] ||= []) << [r.name, position, *extra]
|
541
|
+
child._failed_test = true
|
542
|
+
else
|
543
|
+
raise Error, <<~MSG
|
544
|
+
ancestor test #{r.name} returned an unexpected value:
|
545
|
+
#{result.inspect}
|
546
|
+
expected values: #{[:ignore, :pass, :fail, nil].inspect}
|
547
|
+
MSG
|
548
|
+
end
|
549
|
+
end
|
550
|
+
end
|
551
|
+
end
|
552
|
+
end
|
553
|
+
end
|