rpeg 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/README.md +155 -0
- data/Rakefile +9 -0
- data/lib/rpeg/captures.rb +702 -0
- data/lib/rpeg/parsing_machine.rb +457 -0
- data/lib/rpeg/re.rb +233 -0
- data/lib/rpeg/rpeg.rb +1622 -0
- data/lib/rpeg.rb +5 -0
- metadata +81 -0
@@ -0,0 +1,702 @@
|
|
1
|
+
# Code directly related to captures.
|
2
|
+
#
|
3
|
+
# In terms of LPEG is corresponds to lpcap.h and lpcap.c
|
4
|
+
|
5
|
+
# Capture-related data and functionality
|
6
|
+
module Capture
|
7
|
+
KINDS = %i[const position argument simple group backref subst table fold string num query function close runtime].each do |kind|
|
8
|
+
const_set kind.upcase, kind
|
9
|
+
end
|
10
|
+
|
11
|
+
# Used inside the VM when recording capture information.
|
12
|
+
class Breadcrumb
|
13
|
+
# From time to time we need to tweak each of these
|
14
|
+
attr_accessor :size, :subject_index, :data, :kind
|
15
|
+
|
16
|
+
# q.v. LPEG's Capture struct (lpcap.h)
|
17
|
+
#
|
18
|
+
# We use size instead of siz, subject_index instead of s, and data instead of idx.
|
19
|
+
def initialize(size, subject_index, data, kind)
|
20
|
+
raise "Bad Capture kind #{kind}" unless KINDS.include?(kind)
|
21
|
+
|
22
|
+
@size = size
|
23
|
+
@subject_index = subject_index
|
24
|
+
@data = data
|
25
|
+
@kind = kind
|
26
|
+
end
|
27
|
+
|
28
|
+
# An "open" capture is a "full capture" if it has non-zero size. See isfullcap in lpcap.c
|
29
|
+
def full?
|
30
|
+
@size.positive?
|
31
|
+
end
|
32
|
+
|
33
|
+
def close?
|
34
|
+
@kind == CLOSE
|
35
|
+
end
|
36
|
+
|
37
|
+
# The index of the end of the match, q.v. LPEG's closeaddr (lpcap.h)
|
38
|
+
def end_index
|
39
|
+
@subject_index + size - 1
|
40
|
+
end
|
41
|
+
|
42
|
+
# Dynamic because of the setters we sometimes use
|
43
|
+
def to_s
|
44
|
+
"Breadcrumb size:#{size} sub_idx:#{subject_index} data:#{data.inspect} kind:#{kind}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# The result of a table capture. The idea is to mimic a little bit of the functionality of a Lua table which is like a combination
|
49
|
+
# Array/Hash.
|
50
|
+
#
|
51
|
+
# Internally, we have a hash. Indexing can be by (hash) key or (array) index. Getting and setting is supported.
|
52
|
+
#
|
53
|
+
# The initial, contiguous segment of the array part (non nil values at 0, 1, 2, ..., k) is available from #unpack.
|
54
|
+
class TableCapture
|
55
|
+
def initialize(hash_part, array_part)
|
56
|
+
@data = hash_part.clone
|
57
|
+
array_part.each_with_index do |val, idx|
|
58
|
+
@data[idx] = val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Let i be the smallest natural number such that self[i].nil?. We return [self[0], self[1], ..., self[i-1]]
|
63
|
+
def unpack
|
64
|
+
(0..).lazy.map { |key| @data[key] }.take_while { |v| !v.nil? }.force
|
65
|
+
end
|
66
|
+
|
67
|
+
def each
|
68
|
+
@data.each
|
69
|
+
end
|
70
|
+
|
71
|
+
# Note that we say false if all keys are positive integers but 0 has no value (and so #unpack returns [])
|
72
|
+
def empty?
|
73
|
+
size.zero?
|
74
|
+
end
|
75
|
+
|
76
|
+
def size
|
77
|
+
@data.size
|
78
|
+
end
|
79
|
+
|
80
|
+
def [](key)
|
81
|
+
@data[key]
|
82
|
+
end
|
83
|
+
|
84
|
+
def []=(key, value)
|
85
|
+
@data[key] = value
|
86
|
+
end
|
87
|
+
|
88
|
+
def delete(key)
|
89
|
+
@data.delete(key)
|
90
|
+
end
|
91
|
+
|
92
|
+
# We support comparison with
|
93
|
+
# - TableCapture, in which case we just compare the data objects
|
94
|
+
# - Hash, in which case we check key-by-key
|
95
|
+
# - Array, in which case we check index-by-index
|
96
|
+
def ==(other)
|
97
|
+
case other
|
98
|
+
when TableCapture
|
99
|
+
@data == other.instance_variable_get(:@data)
|
100
|
+
when Hash
|
101
|
+
@data == other
|
102
|
+
when Array
|
103
|
+
@data == other.each_with_index.to_a.map(&:reverse).to_h
|
104
|
+
else
|
105
|
+
raise "Bad type #{other.class} for =="
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Very annoyingly, Ruby's #coerce mechanism is only used by the Numeric types. This means we can make convenient checks like
|
110
|
+
# table_capture == {} but not {} == table_capture. The only approach I can think of is to monkeypatch Array and Hash.
|
111
|
+
#
|
112
|
+
# Maybe we shouldn't bother, and just cobble something together for these tests in contexts like unit test classes. It would be
|
113
|
+
# nice to define a refinement and use it in a unit test file, but then, for example, assertion functions won't see the
|
114
|
+
# refinement as they are defined elsewhere.
|
115
|
+
|
116
|
+
# Technique from https://stackoverflow.com/a/61438012/1299011
|
117
|
+
module ArrayHashOverloadExtension
|
118
|
+
def ==(other)
|
119
|
+
return (other == self) if other.is_a?(TableCapture)
|
120
|
+
|
121
|
+
super
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
[::Hash, ::Array].each do |klass|
|
126
|
+
klass.class_eval do
|
127
|
+
prepend ArrayHashOverloadExtension
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# q.v. struct StrAux in lpcap.c
|
134
|
+
#
|
135
|
+
# value is a Breadcrumb or subject index pair (start stop)
|
136
|
+
StrAux = Struct.new :isstring, :breadcrumb_idx, :subject_start, :subject_end
|
137
|
+
MAX_STR_CAPS = 10
|
138
|
+
|
139
|
+
# VM and post-match capture support
|
140
|
+
#
|
141
|
+
# q.v. LPEG's CaptureState, lpcap.h
|
142
|
+
#
|
143
|
+
# As a Ruby class it contains as much of the capture code (lpcap.c) as makes sense. Because of this, it needs to know the
|
144
|
+
# @subject and the extra_args
|
145
|
+
class CaptureState
|
146
|
+
attr_reader :captures
|
147
|
+
|
148
|
+
def initialize(breadcrumbs, subject, subject_index, extra_args, starting_index:)
|
149
|
+
@breadcrumbs = breadcrumbs
|
150
|
+
@breadcrumb_idx = starting_index || 0
|
151
|
+
@subject = subject.freeze
|
152
|
+
@subject_index = subject_index
|
153
|
+
@extra_args = extra_args.freeze
|
154
|
+
@captures = []
|
155
|
+
end
|
156
|
+
|
157
|
+
def capture_all
|
158
|
+
push_capture until done?
|
159
|
+
end
|
160
|
+
|
161
|
+
# push a captured value
|
162
|
+
def push(cap)
|
163
|
+
@captures << cap
|
164
|
+
end
|
165
|
+
|
166
|
+
# Pop the top capture off and return it, erroring if there isn't one.
|
167
|
+
#
|
168
|
+
# If the argument is given we pop off the last n captures as a chunk (not one by one) and return them in an array.
|
169
|
+
#
|
170
|
+
# nil might be in the stack, so we need to count rather than simply check pop for truthiness
|
171
|
+
def pop(num = nil)
|
172
|
+
if num
|
173
|
+
raise "Cannot pop off a negative number of elements" if num.negative?
|
174
|
+
raise "There are not #{num} captures to pop" if num > @captures.size
|
175
|
+
|
176
|
+
@captures.pop(num)
|
177
|
+
else
|
178
|
+
raise "There is not a capture to pop" unless @captures.size.positive?
|
179
|
+
|
180
|
+
@captures.pop
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def done?
|
185
|
+
@breadcrumb_idx == @breadcrumbs.size
|
186
|
+
end
|
187
|
+
|
188
|
+
def current_breadcrumb
|
189
|
+
raise "No available breadcrumb" if done?
|
190
|
+
|
191
|
+
@breadcrumbs[@breadcrumb_idx]
|
192
|
+
end
|
193
|
+
|
194
|
+
def advance
|
195
|
+
@breadcrumb_idx += 1
|
196
|
+
end
|
197
|
+
|
198
|
+
def index
|
199
|
+
@breadcrumb_idx
|
200
|
+
end
|
201
|
+
|
202
|
+
def index=(val)
|
203
|
+
@breadcrumb_idx = val
|
204
|
+
end
|
205
|
+
|
206
|
+
# The ending subject index of the _previous_ breadcrumb
|
207
|
+
def prev_end_index
|
208
|
+
raise "No previous breadcrumb" unless @breadcrumb_idx.positive?
|
209
|
+
|
210
|
+
@breadcrumbs[@breadcrumb_idx - 1].end_index
|
211
|
+
end
|
212
|
+
|
213
|
+
# Extract the next capture, returning the number of values obtained.
|
214
|
+
private def push_capture
|
215
|
+
breadcrumb = current_breadcrumb
|
216
|
+
|
217
|
+
case breadcrumb.kind
|
218
|
+
when Capture::CONST, Capture::RUNTIME
|
219
|
+
push breadcrumb.data
|
220
|
+
advance
|
221
|
+
1
|
222
|
+
when Capture::POSITION
|
223
|
+
push breadcrumb.subject_index
|
224
|
+
advance
|
225
|
+
1
|
226
|
+
when Capture::ARGUMENT
|
227
|
+
index = breadcrumb.data
|
228
|
+
raise "Reference to absent extra argument ##{index}" if index > @extra_args.size
|
229
|
+
|
230
|
+
# with an Argument Capture the extra arguments are indexed from 1
|
231
|
+
push @extra_args[index - 1]
|
232
|
+
advance
|
233
|
+
1
|
234
|
+
when Capture::SIMPLE
|
235
|
+
count = push_nested_captures(add_extra: true)
|
236
|
+
# We need to make the whole match appear first in the list we just generated
|
237
|
+
munge_last!(count) if count > 1
|
238
|
+
count
|
239
|
+
when Capture::GROUP
|
240
|
+
if breadcrumb.data
|
241
|
+
# Named group. We don't extract anything but just move forward. A Backref capture might find us later
|
242
|
+
seek_next!
|
243
|
+
0
|
244
|
+
else
|
245
|
+
push_nested_captures
|
246
|
+
end
|
247
|
+
when Capture::BACKREF
|
248
|
+
group_name = breadcrumb.data
|
249
|
+
bc_idx = @breadcrumb_idx
|
250
|
+
|
251
|
+
seek_back_ref!(group_name) # move to the named group capture
|
252
|
+
count = push_nested_captures
|
253
|
+
# restore our location and step to the next one
|
254
|
+
@breadcrumb_idx = bc_idx
|
255
|
+
advance
|
256
|
+
|
257
|
+
count
|
258
|
+
when Capture::SUBST
|
259
|
+
push extract_subst_capture
|
260
|
+
1
|
261
|
+
when Capture::TABLE
|
262
|
+
push_table_capture
|
263
|
+
when Capture::FOLD
|
264
|
+
push_fold_capture
|
265
|
+
when Capture::STRING
|
266
|
+
push extract_string_capture
|
267
|
+
1
|
268
|
+
when Capture::NUM
|
269
|
+
push_num_capture
|
270
|
+
when Capture::FUNCTION
|
271
|
+
push_function_capture
|
272
|
+
when Capture::QUERY
|
273
|
+
push_query_capture
|
274
|
+
else
|
275
|
+
raise "Unhandled capture kind #{breadcrumb.kind}"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
# See pushnestedcaptures in lpcap.c
|
280
|
+
#
|
281
|
+
# /*
|
282
|
+
# ** Push on the Lua stack all values generated by nested captures inside
|
283
|
+
# ** the current capture. Returns number of values pushed. 'addextra'
|
284
|
+
# ** makes it push the entire match after all captured values. The
|
285
|
+
# ** entire match is pushed also if there are no other nested values,
|
286
|
+
# ** so the function never returns zero.
|
287
|
+
# */
|
288
|
+
#
|
289
|
+
# We append what we find to the capture state and return their number.
|
290
|
+
#
|
291
|
+
# Code is closely based on the LPEG code.
|
292
|
+
def push_nested_captures(add_extra: false)
|
293
|
+
open_capture = current_breadcrumb
|
294
|
+
advance
|
295
|
+
|
296
|
+
if open_capture.full?
|
297
|
+
cpos = open_capture.subject_index
|
298
|
+
match_len = open_capture.size - 1
|
299
|
+
match_range = cpos...(cpos + match_len)
|
300
|
+
push @subject[match_range].join
|
301
|
+
return 1
|
302
|
+
end
|
303
|
+
|
304
|
+
count = 0
|
305
|
+
count += push_capture until current_breadcrumb.close? # Nested captures
|
306
|
+
|
307
|
+
# We have reached our matching close
|
308
|
+
close_capture = current_breadcrumb
|
309
|
+
advance
|
310
|
+
if add_extra || count.zero?
|
311
|
+
match_range = (open_capture.subject_index)...(close_capture.subject_index)
|
312
|
+
push @subject[match_range].join
|
313
|
+
count += 1
|
314
|
+
end
|
315
|
+
count.must_be.positive?
|
316
|
+
count
|
317
|
+
end
|
318
|
+
|
319
|
+
# This is LPEG's tablecap (lpcap.h)
|
320
|
+
#
|
321
|
+
# Instead of always producing a Hash, potentially with integer keys 0, 1, 2, ..., we produce an array when there are no named
|
322
|
+
# groups to worry about.
|
323
|
+
# - TODO: reconsider this. Arrays are nicer than Hashes in this case but client code might not know which class to expect,
|
324
|
+
# especially when getting captures from a complicated pattern.
|
325
|
+
# - At first I always returned a Hash, with numeric keys 0, 1, 2, ... for the anonymous captures and name keys for the
|
326
|
+
# others. But this felt clunky, especially when we want to, say, join the anonymous arguments into a string.
|
327
|
+
# - Maybe we should return a hash with an :anonymous key giving the array of anonymous captures, or something like that.
|
328
|
+
#
|
329
|
+
# Experimental: return a TableCapture instance
|
330
|
+
def push_table_capture
|
331
|
+
if current_breadcrumb.full?
|
332
|
+
# Empty table
|
333
|
+
push []
|
334
|
+
advance
|
335
|
+
return 1
|
336
|
+
end
|
337
|
+
|
338
|
+
advance # move past the open capture
|
339
|
+
named_results = {}
|
340
|
+
indexed_results = []
|
341
|
+
next_index = 0
|
342
|
+
until current_breadcrumb.close?
|
343
|
+
breadcrumb = current_breadcrumb
|
344
|
+
if breadcrumb.kind == Capture::GROUP && breadcrumb.data
|
345
|
+
# named group. We only keep track of the *first* value in the group
|
346
|
+
push_one_nested_value
|
347
|
+
value = pop
|
348
|
+
named_results[breadcrumb.data] = value
|
349
|
+
else
|
350
|
+
# not a named group
|
351
|
+
# k is the number we just got. We pop them back and put them in our result object
|
352
|
+
k = push_capture
|
353
|
+
(0..(k - 1)).to_a.reverse_each do |i|
|
354
|
+
indexed_results[next_index + i] = pop
|
355
|
+
end
|
356
|
+
next_index += k
|
357
|
+
end
|
358
|
+
end
|
359
|
+
advance # skip the close entry
|
360
|
+
|
361
|
+
push Capture::TableCapture.new(named_results, indexed_results)
|
362
|
+
1
|
363
|
+
end
|
364
|
+
|
365
|
+
# This is LPEG's foldcap (lpcap.c)
|
366
|
+
def push_fold_capture
|
367
|
+
raise "no initial value for fold capture" if current_breadcrumb.full?
|
368
|
+
|
369
|
+
fn = current_breadcrumb.data.must_be
|
370
|
+
advance
|
371
|
+
|
372
|
+
if current_breadcrumb.close? || (n = push_capture).zero?
|
373
|
+
raise "no initial value for fold capture"
|
374
|
+
end
|
375
|
+
|
376
|
+
# discard all but one capture. This is the first value for the fold accumulator
|
377
|
+
pop(n - 1)
|
378
|
+
acc = pop
|
379
|
+
until current_breadcrumb.close?
|
380
|
+
n = push_capture
|
381
|
+
acc = fn.call(acc, *pop(n))
|
382
|
+
end
|
383
|
+
advance # skip close
|
384
|
+
push acc
|
385
|
+
1
|
386
|
+
end
|
387
|
+
|
388
|
+
# Push nested values and then pop off all but one
|
389
|
+
def push_one_nested_value
|
390
|
+
n = push_nested_captures
|
391
|
+
pop(n - 1)
|
392
|
+
end
|
393
|
+
|
394
|
+
# This is LPEG's numcap (lpcap.c)
|
395
|
+
def push_num_capture
|
396
|
+
idx = current_breadcrumb.data
|
397
|
+
if idx.zero?
|
398
|
+
# skip them all
|
399
|
+
seek_next!
|
400
|
+
return 0
|
401
|
+
end
|
402
|
+
|
403
|
+
n = push_nested_captures
|
404
|
+
raise "no capture '#{idx}" if n < idx
|
405
|
+
|
406
|
+
vals = pop(n) # pop them off
|
407
|
+
push vals[idx - 1] # push back the one we want
|
408
|
+
1
|
409
|
+
end
|
410
|
+
|
411
|
+
# This is LPEG's functioncap (lpcap.c)
|
412
|
+
def push_function_capture
|
413
|
+
proc = current_breadcrumb.data.must_be_a(Proc) # get the proc to call
|
414
|
+
n = push_nested_captures # get the nested captures...
|
415
|
+
args = pop(n) # ...pop them
|
416
|
+
result = proc.call(*args) # ... and pass them to the proc
|
417
|
+
# the results, if any, are the capture values
|
418
|
+
#
|
419
|
+
# The natural thing to do here would be result = Array(result) and just enumerate them to push onto the capture stack. BUT,
|
420
|
+
# sometimes proc will return a Hash (when building a grammar in RE, for example) and Array({x: 1}) = [[:x, 1]], which is not
|
421
|
+
# what we want. At root, the issue is that Lua is better than Ruby at distinguishing between a function that returns multiple
|
422
|
+
# value and one that returns a single value that is an array. The following appears to be what we want, and remember that we
|
423
|
+
# need to write capture functions that are careful to distinguish between returning [1,2,3] (multiple captures) and [[1,2,3]]
|
424
|
+
# (single capture that is an array).
|
425
|
+
#
|
426
|
+
# Another gotcha: a function that returns nil does not give a capture, while one that returns [nil] has captured the single
|
427
|
+
# value nil.
|
428
|
+
#
|
429
|
+
# TODO: consider whether the Maybe monad would help here. It would help distinguish between [1,2,3] and [[1,2,3]] more cleanly,
|
430
|
+
# but not with "no capture" vs "nil was captured", since typically Maybe(nil) = None().
|
431
|
+
if result.is_a?(Array)
|
432
|
+
result.each { |cap| push cap }
|
433
|
+
result.size
|
434
|
+
elsif result
|
435
|
+
push result
|
436
|
+
1
|
437
|
+
else
|
438
|
+
0
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
# This is LPEG's querycap (lpcap.c)
|
443
|
+
def push_query_capture
|
444
|
+
hash = current_breadcrumb.data.must_be_a(Hash)
|
445
|
+
push_one_nested_value
|
446
|
+
query_key = pop # pop it
|
447
|
+
result = hash[query_key]
|
448
|
+
if result
|
449
|
+
push(result)
|
450
|
+
1
|
451
|
+
else
|
452
|
+
0 # no result
|
453
|
+
end
|
454
|
+
end
|
455
|
+
|
456
|
+
# This is LPEG's substcap (lpcap.c)
|
457
|
+
def extract_subst_capture
|
458
|
+
breadcrumb = current_breadcrumb
|
459
|
+
curr = breadcrumb.subject_index
|
460
|
+
result = +""
|
461
|
+
if breadcrumb.full?
|
462
|
+
result = @subject[curr, breadcrumb.size - 1].join
|
463
|
+
else
|
464
|
+
advance # skip open
|
465
|
+
until current_breadcrumb.close?
|
466
|
+
nxt = current_breadcrumb.subject_index
|
467
|
+
result << @subject[curr, nxt - curr].join
|
468
|
+
if (match = extract_one_string("replacement"))
|
469
|
+
result << match
|
470
|
+
curr = prev_end_index
|
471
|
+
else
|
472
|
+
# no capture index
|
473
|
+
curr = nxt
|
474
|
+
end
|
475
|
+
end
|
476
|
+
result << @subject[curr, current_breadcrumb.subject_index - curr].join
|
477
|
+
end
|
478
|
+
advance
|
479
|
+
result
|
480
|
+
end
|
481
|
+
|
482
|
+
# This is LPEG's stringcap (lpcap.c)
|
483
|
+
#
|
484
|
+
# We return the result
|
485
|
+
def extract_string_capture
|
486
|
+
fmt = current_breadcrumb.data.must_be_a(String)
|
487
|
+
the_str_caps = str_caps
|
488
|
+
result = +""
|
489
|
+
idx = -1
|
490
|
+
loop do
|
491
|
+
idx += 1
|
492
|
+
break if idx >= fmt.length
|
493
|
+
|
494
|
+
if fmt[idx] != "%"
|
495
|
+
result << fmt[idx]
|
496
|
+
next
|
497
|
+
end
|
498
|
+
|
499
|
+
idx += 1
|
500
|
+
unless ('0'..'9').cover?(fmt[idx])
|
501
|
+
result << fmt[idx]
|
502
|
+
next
|
503
|
+
end
|
504
|
+
|
505
|
+
capture_index = fmt[idx].to_i
|
506
|
+
raise "invalid capture index (#{capture_index})" if capture_index > the_str_caps.size - 1
|
507
|
+
|
508
|
+
str_cap = the_str_caps[capture_index]
|
509
|
+
if str_cap.isstring
|
510
|
+
result << @subject[(str_cap.subject_start)...(str_cap.subject_end)].join
|
511
|
+
next
|
512
|
+
end
|
513
|
+
|
514
|
+
cs_index = @breadcrumb_idx
|
515
|
+
@breadcrumb_idx = the_str_caps[capture_index].breadcrumb_idx
|
516
|
+
val = extract_one_string("capture") # lpeg's addonestring, but return instead of appending to b
|
517
|
+
raise "no values in capture index #{capture_index}" unless val
|
518
|
+
|
519
|
+
result << val
|
520
|
+
@breadcrumb_idx = cs_index
|
521
|
+
end
|
522
|
+
result
|
523
|
+
end
|
524
|
+
|
525
|
+
# This is LPEG's addonestring (lpcap.c)
|
526
|
+
#
|
527
|
+
# /*
|
528
|
+
# ** Evaluates a capture and adds its first value to buffer 'b'; returns
|
529
|
+
# ** whether there was a value
|
530
|
+
# */
|
531
|
+
#
|
532
|
+
# We just return the value, or nil if there isn't one
|
533
|
+
def extract_one_string(what)
|
534
|
+
case current_breadcrumb.kind
|
535
|
+
when Capture::STRING
|
536
|
+
extract_string_capture
|
537
|
+
when Capture::SUBST
|
538
|
+
extract_subst_capture
|
539
|
+
else
|
540
|
+
n = push_capture
|
541
|
+
return nil if n.zero?
|
542
|
+
|
543
|
+
pop(n - 1) # just leave one
|
544
|
+
res = pop
|
545
|
+
# LPEG tests the type of this value with lua_isstring, which returns 1 if the value is a string or a number.
|
546
|
+
raise "invalid #{what} value (a #{res.class})" unless res.is_a?(String) || res.is_a?(Numeric)
|
547
|
+
|
548
|
+
res.to_s
|
549
|
+
end
|
550
|
+
end
|
551
|
+
|
552
|
+
# In LPEG this logic is split between the main VM loop (lpvm.c) and the runtimecap function (lpcap.c). As noted above, the LPEG
|
553
|
+
# code is complicated by the need to manage references to objects living on the Lua stack to avoid C-side memory leaks. We don't
|
554
|
+
# have to worry about such things
|
555
|
+
#
|
556
|
+
# We start at the CLOSE_RUN_TIME breadcrumb and
|
557
|
+
# - change the CLOSE_RUN_TIME to a regular CLOSE
|
558
|
+
# - find the matching OPEN and grab the Proc that we need to call
|
559
|
+
# - push the nested captures and immediately pop them
|
560
|
+
# - pass the necessary arguments to proc: the subject, the current posistion, and the just-popped captures
|
561
|
+
# - clear out all the breadcrumbs after the OPEN
|
562
|
+
#
|
563
|
+
# We return a [bc, result] pair.
|
564
|
+
# - bc is the new "breadcrumb count" for the VM, as we discard the existing captures for the RunTime grouping.
|
565
|
+
# - result is the result of the Proc call
|
566
|
+
def run_time_capture
|
567
|
+
seek_matching_open!
|
568
|
+
current_breadcrumb.kind.must_be(Capture::GROUP)
|
569
|
+
open_cap_idx = index
|
570
|
+
|
571
|
+
proc = current_breadcrumb.data.must_be_a(Proc) # get the proc to call
|
572
|
+
|
573
|
+
@subject_as_str ||= @subject.join
|
574
|
+
args = [@subject_as_str, @subject_index]
|
575
|
+
n = push_nested_captures
|
576
|
+
args += pop(n) # prepare arguments for the function
|
577
|
+
result = Array(proc.call(*args)) # ... and pass them to the proc
|
578
|
+
|
579
|
+
[open_cap_idx + 1, result]
|
580
|
+
end
|
581
|
+
|
582
|
+
# This is LPEG's getstrcaps (lpcap.c)
|
583
|
+
# /*
|
584
|
+
# ** Collect values from current capture into array 'cps'. Current
|
585
|
+
# ** capture must be Cstring (first call) or Csimple (recursive calls).
|
586
|
+
# ** (In first call, fills %0 with whole match for Cstring.)
|
587
|
+
# ** Returns number of elements in the array that were filled.
|
588
|
+
# */
|
589
|
+
#
|
590
|
+
# We simply return the array of StrAux elements
|
591
|
+
def str_caps
|
592
|
+
result = []
|
593
|
+
first_aux = StrAux.new
|
594
|
+
first_aux.isstring = true
|
595
|
+
first_aux.subject_start = current_breadcrumb.subject_index
|
596
|
+
result << first_aux
|
597
|
+
|
598
|
+
first_is_full = current_breadcrumb.full?
|
599
|
+
unless first_is_full
|
600
|
+
advance # move past the Open
|
601
|
+
until current_breadcrumb.close?
|
602
|
+
if result.size > MAX_STR_CAPS
|
603
|
+
seek_next! # just skip it
|
604
|
+
elsif current_breadcrumb.kind == Capture::SIMPLE
|
605
|
+
result += str_caps # get the matches recursively
|
606
|
+
else
|
607
|
+
# Not a string
|
608
|
+
aux = StrAux.new
|
609
|
+
aux.isstring = false
|
610
|
+
aux.breadcrumb_idx = index
|
611
|
+
seek_next!
|
612
|
+
result << aux
|
613
|
+
end
|
614
|
+
end
|
615
|
+
end
|
616
|
+
result[0].subject_end = current_breadcrumb.end_index
|
617
|
+
advance # skip capture close/full capture
|
618
|
+
result
|
619
|
+
end
|
620
|
+
|
621
|
+
# Search backwards from the current breadcrumb for the start of the group capture with the given name.
|
622
|
+
#
|
623
|
+
# If we find it the state index is updated appropriately.
|
624
|
+
# If we don't find it we raise an exception.
|
625
|
+
#
|
626
|
+
# This is LPEG's findback() (lpcap.c)
|
627
|
+
def seek_back_ref!(group_name)
|
628
|
+
group_name.must_be
|
629
|
+
while @breadcrumb_idx.positive?
|
630
|
+
@breadcrumb_idx -= 1
|
631
|
+
# Skip nested captures
|
632
|
+
if current_breadcrumb.close?
|
633
|
+
seek_matching_open!
|
634
|
+
else
|
635
|
+
# The opening of a capture that encloses the BACKREF. Skip it and keep going backwards
|
636
|
+
next unless current_breadcrumb.full?
|
637
|
+
end
|
638
|
+
# We are at an open capture that was closed before our BACKREF
|
639
|
+
next unless current_breadcrumb.kind == Capture::GROUP # is it a group?
|
640
|
+
next unless current_breadcrumb.data == group_name # does it have the right name?
|
641
|
+
|
642
|
+
# We found it!
|
643
|
+
return
|
644
|
+
end
|
645
|
+
raise "back reference '#{group_name}' not found"
|
646
|
+
end
|
647
|
+
|
648
|
+
# This is LPEG's findopen (lpcap.c)
|
649
|
+
#
|
650
|
+
# Assume we are starting from a close capture. We go back to the matching open capture.
|
651
|
+
def seek_matching_open!
|
652
|
+
n = 0 # number of nested closes waiting for an open
|
653
|
+
loop do
|
654
|
+
@breadcrumb_idx -= 1
|
655
|
+
raise "subject index underflow in seek_open!" if @breadcrumb_idx.negative?
|
656
|
+
|
657
|
+
if current_breadcrumb.close?
|
658
|
+
n += 1
|
659
|
+
elsif !current_breadcrumb.full?
|
660
|
+
# It's an open of some sort
|
661
|
+
return if n.zero?
|
662
|
+
|
663
|
+
n -= 1
|
664
|
+
end
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
# This is LPEG's nextcap (lpcap.c)
|
669
|
+
#
|
670
|
+
# Move to the next capture
|
671
|
+
def seek_next!
|
672
|
+
unless current_breadcrumb.full?
|
673
|
+
n = 0 # number of nested opens waiting for a close
|
674
|
+
loop do
|
675
|
+
@breadcrumb_idx += 1
|
676
|
+
if current_breadcrumb.close?
|
677
|
+
break if n.zero?
|
678
|
+
|
679
|
+
n -= 1
|
680
|
+
elsif !current_breadcrumb.full?
|
681
|
+
n += 1
|
682
|
+
end
|
683
|
+
end
|
684
|
+
end
|
685
|
+
|
686
|
+
@breadcrumb_idx += 1
|
687
|
+
end
|
688
|
+
|
689
|
+
# partially rotate the captures to make what is currently the final value the n-th from last value. For example, if @captures is
|
690
|
+
# currently [0, 1, 2, 3, 4], then calling munge_last(3) makes it [0, 1, 4, 2, 3]. Now 4 (previously the last value) is third
|
691
|
+
# from last. When n == 1 this is a no-op
|
692
|
+
def munge_last!(num)
|
693
|
+
return if num == 1
|
694
|
+
raise "Bad munge argument" unless num.positive?
|
695
|
+
raise "Not enough values in array to munge it" if num > @captures.size
|
696
|
+
|
697
|
+
tail = @captures.pop(num)
|
698
|
+
last = tail.pop
|
699
|
+
@captures << last
|
700
|
+
@captures += tail
|
701
|
+
end
|
702
|
+
end
|