meta_parse 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/meta_parse.rb +165 -35
- data/lib/util.rb +39 -61
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5afbd35c216415e4d640838f26fa0e7b30d5d1f
|
4
|
+
data.tar.gz: d7051bc1eb8ae1cbd7fa716ad631ac79c4dadcd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ffc733a1a70f6105b86c0651dee9b4f3307b0d8c4402810038c307ebb2375d54f06eeaed0c46874ff17a646b048c669427c9e472398a2378005a486f55c81e6
|
7
|
+
data.tar.gz: 9b069986342c3e21b1346be532d973d4097daead288dcf876e06ec18b23151c020fbb63e39786906abd65ad20104e49d3b9fd6da2468422e050743dc3efc832d
|
data/lib/meta_parse.rb
CHANGED
@@ -1,40 +1,51 @@
|
|
1
1
|
require 'strscan'
|
2
2
|
|
3
|
+
##
|
4
|
+
# This module contains the classes which implement parsers and, when included,
|
5
|
+
# provides class methods for defining custom parsers.
|
6
|
+
|
3
7
|
module MetaParse
|
4
8
|
|
9
|
+
##
|
10
|
+
# Extend including class with ClassMethods.
|
11
|
+
|
5
12
|
def self.included(base)
|
6
13
|
base.extend ClassMethods
|
7
14
|
end
|
8
15
|
|
9
|
-
|
10
|
-
|
16
|
+
##
|
17
|
+
# Parse string using an instance method previously defined by MetaParse::ClassMethodss::match_method.
|
18
|
+
|
11
19
|
def parse_with_method(method_name, string)
|
12
20
|
self.send(method_name, string.meta(self), self)
|
13
21
|
end
|
14
22
|
|
15
|
-
module ClassMethods
|
16
|
-
|
23
|
+
module ClassMethods
|
24
|
+
##
|
25
|
+
# Defines a method which takes a scanner from provided block.
|
17
26
|
# The block passed should return a Matcher or Matcher spec, which is compiled to a Matcher if necessary.
|
18
27
|
# The result of calling the defined method is the same as calling match? on the resulting Matcher.
|
28
|
+
|
19
29
|
def match_method(name, &block)
|
20
30
|
match_spec = yield
|
21
31
|
matcher = MetaParse::Matcher.compile(match_spec)
|
22
32
|
|
23
33
|
define_matcher_method(name, matcher)
|
24
34
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
35
|
+
|
36
|
+
##
|
37
|
+
# Returns a RepetitionMatcher which matches the pattern specified by its arguments zero-or-more-times.
|
38
|
+
|
32
39
|
def rep(*args, &block)
|
33
40
|
Matcher.compile([:*, *args])
|
34
41
|
end
|
35
|
-
|
36
|
-
|
42
|
+
|
43
|
+
##
|
44
|
+
# Return a SequentialMatcher. If block is supplied, it defines a function which will be passed an array
|
37
45
|
# of all matched values and which should return a non-nil result for the match as a whole.
|
46
|
+
# Sequential matching with terminal block is the mechanism by which arbitrary values can be produced at
|
47
|
+
# any step in parsing.
|
48
|
+
|
38
49
|
def seq(*args, &block)
|
39
50
|
if block_given?
|
40
51
|
wrapped = lambda { |scanner, context|
|
@@ -46,27 +57,49 @@ module MetaParse
|
|
46
57
|
end
|
47
58
|
Matcher.compile([:and, *args])
|
48
59
|
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Return an AlternativeMatcher.
|
49
63
|
|
50
64
|
def alt(*args)
|
51
65
|
Matcher.compile([:or, *args])
|
52
66
|
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Compile a Matcher spec into corresponding matcher.
|
53
70
|
|
54
71
|
def comp(spec)
|
55
72
|
Matcher.compile(spec)
|
56
73
|
end
|
57
|
-
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
##
|
78
|
+
# Define a match method using supplied Matcher.
|
79
|
+
# Used by ::match_method, which should be used instead.
|
80
|
+
|
81
|
+
def define_matcher_method(name, matcher)
|
82
|
+
self.send(:define_method, name) do |scanner, context=nil|
|
83
|
+
matcher.match scanner, context
|
84
|
+
end
|
85
|
+
end
|
58
86
|
end
|
59
|
-
### End Interface
|
60
87
|
|
61
88
|
class MetaScanner < StringScanner
|
62
89
|
attr_accessor :parser
|
63
90
|
|
91
|
+
##
|
92
|
+
# Initialize MetaScanner with supplied string as input.
|
93
|
+
# Optionally-supplied parser will be receiver of Symbol message sent by subclass, FunctionMatcher.
|
64
94
|
def initialize(string, parser=nil)
|
65
95
|
super string
|
66
96
|
@parser = parser
|
67
97
|
end
|
68
98
|
|
69
|
-
|
99
|
+
##
|
100
|
+
# Match and return a single character or return nil, updating internal position on match.
|
101
|
+
|
102
|
+
# NOTE: This is a special case and could actually be handled by match_string if necessary.
|
70
103
|
def match_char(char)
|
71
104
|
c = peek(1)
|
72
105
|
if c == char
|
@@ -75,6 +108,9 @@ module MetaParse
|
|
75
108
|
end
|
76
109
|
end
|
77
110
|
|
111
|
+
##
|
112
|
+
# Match and return a string or return nil, updating internal position on match.
|
113
|
+
|
78
114
|
def match_string(str, position2=0)
|
79
115
|
if (string.equal_at(pos, str, position2))
|
80
116
|
self.pos += str.length
|
@@ -82,6 +118,9 @@ module MetaParse
|
|
82
118
|
end
|
83
119
|
end
|
84
120
|
|
121
|
+
##
|
122
|
+
# Scan for a Regexp or string, returning any match, or nil, and updating internal position on match.
|
123
|
+
|
85
124
|
def scan(spec)
|
86
125
|
case spec
|
87
126
|
when Regexp
|
@@ -97,9 +136,16 @@ module MetaParse
|
|
97
136
|
end
|
98
137
|
end
|
99
138
|
|
139
|
+
##
|
140
|
+
# General class to match a simple pattern against a scanner.
|
141
|
+
# Subclasses implement compound matching.
|
142
|
+
|
100
143
|
class Matcher
|
101
144
|
attr_accessor :spec
|
102
145
|
|
146
|
+
##
|
147
|
+
# Compile a Matcher specification into a concrete Matcher or subclass.
|
148
|
+
|
103
149
|
def self.compile(spec)
|
104
150
|
case spec
|
105
151
|
when Matcher
|
@@ -130,8 +176,11 @@ module MetaParse
|
|
130
176
|
end
|
131
177
|
end
|
132
178
|
|
133
|
-
|
134
|
-
|
179
|
+
##
|
180
|
+
# Initialize Matcher with Matcher spec.
|
181
|
+
|
182
|
+
def initialize(spec)
|
183
|
+
@spec = spec
|
135
184
|
end
|
136
185
|
|
137
186
|
def show
|
@@ -142,6 +191,11 @@ module MetaParse
|
|
142
191
|
"<match #{show}>"
|
143
192
|
end
|
144
193
|
|
194
|
+
##
|
195
|
+
# Try to match own pattern/combination against supplied scanner. Context is unused.
|
196
|
+
#
|
197
|
+
# Subclasses should implement match? but not call it on peers.
|
198
|
+
|
145
199
|
def match?(scanner, context=nil)
|
146
200
|
case scanner
|
147
201
|
when MetaScanner
|
@@ -156,52 +210,80 @@ module MetaParse
|
|
156
210
|
end
|
157
211
|
end
|
158
212
|
|
213
|
+
##
|
214
|
+
# Like match? but clone self first if stateful.
|
215
|
+
#
|
216
|
+
# Subclasses should not implement match, but should call it on peers.
|
217
|
+
|
159
218
|
def match(scanner, context=nil)
|
160
219
|
(stateful ? clone : self).match? scanner, context
|
161
220
|
end
|
162
221
|
|
222
|
+
##
|
223
|
+
# Is this Matcher stateful? A class attribute which may be overriden by subclasses.
|
224
|
+
|
163
225
|
def stateful
|
164
226
|
false
|
165
227
|
end
|
166
228
|
|
229
|
+
##
|
230
|
+
# Syntactic sugar to create MetaScanner and match? self against string.
|
231
|
+
|
167
232
|
def m?(string)
|
168
233
|
match? MetaScanner.new(string)
|
169
234
|
end
|
170
235
|
|
236
|
+
##
|
237
|
+
# Syntactic sugar to create MetaScanner and match self against string.
|
238
|
+
|
171
239
|
def m(string)
|
172
240
|
match MetaScanner.new(string)
|
173
241
|
end
|
174
242
|
|
175
243
|
end
|
176
244
|
|
245
|
+
##
|
246
|
+
# Matcher subclass matching sub_match repeatedly.
|
247
|
+
|
177
248
|
class RepetitionMatcher < Matcher
|
178
|
-
|
249
|
+
# Match at least min times, if min is present.
|
250
|
+
attr_accessor :min
|
251
|
+
|
252
|
+
# Match at most max times, if max is present.
|
253
|
+
attr_accessor :max
|
254
|
+
|
255
|
+
# A proc used to combine match results as by inject, if present.
|
256
|
+
attr_accessor :reducer
|
179
257
|
|
180
258
|
def initialize(sub_match, min=0, max=nil, reducer=nil, initial_value=[])
|
181
259
|
@spec, @min, @max, @reducer, @initial_value = sub_match, min, max, reducer, initial_value
|
182
260
|
end
|
183
261
|
|
262
|
+
##
|
263
|
+
# RepetitionMatcher is stateful.
|
264
|
+
|
184
265
|
def stateful
|
185
266
|
true
|
186
267
|
end
|
187
268
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
269
|
+
|
270
|
+
# def match?(scanner, context=nil)
|
271
|
+
# matches = []
|
272
|
+
# while (!max || (matches.count < max)) && (match = spec.match(scanner))
|
273
|
+
# matches << match
|
274
|
+
# end
|
193
275
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
end
|
276
|
+
# unless min && (matches.count < min)
|
277
|
+
# case finalizer
|
278
|
+
# when Proc
|
279
|
+
# finalizer.call(matches, *finalizer_args)
|
280
|
+
# when Symbol
|
281
|
+
# send(finalizer, matches, *finalizer_args)
|
282
|
+
# when nil
|
283
|
+
# matches
|
284
|
+
# end
|
285
|
+
# end
|
286
|
+
# end
|
205
287
|
|
206
288
|
def match?(scanner, context=nil)
|
207
289
|
# Need to copy the initial value since it is potentially destructively modified (if an array, for example).
|
@@ -229,6 +311,9 @@ module MetaParse
|
|
229
311
|
end
|
230
312
|
end
|
231
313
|
|
314
|
+
##
|
315
|
+
# Matcher subclass matching first of alternative submatches.
|
316
|
+
|
232
317
|
class AlternativeMatcher < Matcher
|
233
318
|
def match?(scanner, context=nil)
|
234
319
|
spec.each do |alternative|
|
@@ -243,13 +328,23 @@ module MetaParse
|
|
243
328
|
end
|
244
329
|
end
|
245
330
|
|
331
|
+
##
|
332
|
+
# Matcher subclass matching submatches sequentially.
|
333
|
+
|
246
334
|
class SequentialMatcher < Matcher
|
247
335
|
attr_accessor :matches
|
336
|
+
|
337
|
+
|
338
|
+
##
|
339
|
+
# SequentialMatcher is stateful.
|
248
340
|
|
249
341
|
def stateful
|
250
342
|
true
|
251
343
|
end
|
252
344
|
|
345
|
+
##
|
346
|
+
# Match submatch patterns against scanner sequentially, accumulating results in @matches.
|
347
|
+
|
253
348
|
def match?(scanner, context = nil)
|
254
349
|
@matches = []
|
255
350
|
initial_position = scanner.pos
|
@@ -266,14 +361,23 @@ module MetaParse
|
|
266
361
|
return last_match
|
267
362
|
end
|
268
363
|
|
364
|
+
##
|
365
|
+
# Remove most recent sequential match from result stack (@matches), returning it (pop).
|
366
|
+
|
269
367
|
def pop
|
270
368
|
@matches.pop
|
271
369
|
end
|
272
370
|
|
371
|
+
##
|
372
|
+
# Add (push) value to result stack (@matches).
|
373
|
+
|
273
374
|
def push(value)
|
274
375
|
@matches.push(value)
|
275
376
|
end
|
276
377
|
|
378
|
+
##
|
379
|
+
# Reset result stack (@matches) to be empty.
|
380
|
+
|
277
381
|
def clear
|
278
382
|
@matches = []
|
279
383
|
end
|
@@ -283,11 +387,15 @@ module MetaParse
|
|
283
387
|
end
|
284
388
|
end
|
285
389
|
|
390
|
+
##
|
286
391
|
# Arbitrary predicate, particularly useful for generating final return value from SequentialMatchers.
|
287
392
|
# Context is the containing matcher, and in the case of SequentialMatcher includes access to accumulated matches.
|
288
393
|
class FunctionMatcher < Matcher
|
289
394
|
include MetaParse
|
290
395
|
|
396
|
+
##
|
397
|
+
# Try to match against scanner using arbitrary predicated specified as Proc or Symbol naming method of Matcher's parser.
|
398
|
+
|
291
399
|
def match?(scanner, context=nil)
|
292
400
|
case spec
|
293
401
|
when Proc
|
@@ -298,20 +406,32 @@ module MetaParse
|
|
298
406
|
end
|
299
407
|
end
|
300
408
|
|
409
|
+
##
|
410
|
+
# Extract matches from context and return them, resetting context's matches to be empty.
|
411
|
+
|
301
412
|
def all_matches(scanner, context)
|
302
413
|
matches_so_far = context.matches
|
303
414
|
context.matches = []
|
304
415
|
matches_so_far
|
305
416
|
end
|
306
417
|
|
418
|
+
##
|
419
|
+
# Join all matches as extracted by #all_matches.
|
420
|
+
|
307
421
|
def all_matches_joined(scanner, context)
|
308
422
|
all_matches(scanner,context).join
|
309
423
|
end
|
310
424
|
|
425
|
+
##
|
426
|
+
# Joins's string args. Helper function for use in assembling parse results.
|
427
|
+
|
311
428
|
def join_strings(array, *args)
|
312
429
|
array.join(*args)
|
313
430
|
end
|
314
431
|
|
432
|
+
##
|
433
|
+
# Apply block to context's matches stack, then clear context, returning result.
|
434
|
+
|
315
435
|
def collapse(&block)
|
316
436
|
lambda { |scanner, context|
|
317
437
|
stack = context.matches
|
@@ -323,14 +443,24 @@ module MetaParse
|
|
323
443
|
end
|
324
444
|
|
325
445
|
class String
|
446
|
+
|
447
|
+
##
|
448
|
+
# Returns true iff receiver (String) is equal to string at index, position, of receiver.
|
449
|
+
# If position2 is supplied, begin comparison of string at that index position.
|
450
|
+
|
326
451
|
def equal_at(position, string, position2=0)
|
327
452
|
for i in position2..(position2 + string.length - 1) do
|
453
|
+
puts "#{position}-#{i}"
|
454
|
+
puts "#{self[position]}-#{string[i]}"
|
328
455
|
return nil unless self[position] == string[i]
|
329
456
|
position += 1
|
330
457
|
end
|
331
458
|
return true
|
332
459
|
end
|
333
460
|
|
461
|
+
##
|
462
|
+
# Return scanner using receiver (String) as source.
|
463
|
+
|
334
464
|
def meta(parser=nil)
|
335
465
|
MetaParse::MetaScanner.new(self, parser)
|
336
466
|
end
|
data/lib/util.rb
CHANGED
@@ -1,33 +1,20 @@
|
|
1
1
|
# require 'util'
|
2
2
|
|
3
3
|
module Util
|
4
|
+
##
|
5
|
+
# Sugar to allow calling mgsub as instance method.
|
6
|
+
|
4
7
|
def mgsub(string, substitution_hash)
|
5
8
|
Util::mgsub(string, substitution_hash)
|
6
9
|
end
|
7
10
|
|
8
|
-
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
regexp_hash.each do |regexp, substitution|
|
15
|
-
if !result && matched_string.match(regexp)
|
16
|
-
match_data = Regexp.last_match
|
17
|
-
result = case substitution
|
18
|
-
when String
|
19
|
-
substitution
|
20
|
-
else
|
21
|
-
substitution.call(match_data)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
result
|
27
|
-
}
|
28
|
-
end
|
11
|
+
##
|
12
|
+
# Multiple Global Substitution.
|
13
|
+
#
|
14
|
+
# substitution_hash contains pattern (regexp or string) keys and corresponding substitution.
|
15
|
+
# Substitution can either be a string or a function of regexp MatchData.
|
16
|
+
# The latter allows for arbitrary substitution based on matched content.
|
29
17
|
|
30
|
-
# mgsub == multiple, global substitution.
|
31
18
|
def self.mgsub(string, substitution_hash)
|
32
19
|
all_regexps = []
|
33
20
|
regexp_hash = {}
|
@@ -46,6 +33,11 @@ module Util
|
|
46
33
|
string.gsub(combined_regexp, &substitution_function)
|
47
34
|
end
|
48
35
|
|
36
|
+
private
|
37
|
+
|
38
|
+
##
|
39
|
+
# Return corresponding regexp, escaping string if necessary. Regexp returns itself.
|
40
|
+
|
49
41
|
def self.to_regexp(spec)
|
50
42
|
case spec
|
51
43
|
when Regexp
|
@@ -53,51 +45,39 @@ module Util
|
|
53
45
|
when String
|
54
46
|
Regexp.escape(spec)
|
55
47
|
end
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
48
|
+
end
|
49
|
+
|
50
|
+
# We can't use a closure (for now) because JRuby (as of 1.7.14) doesn't handle return correctly in lambdas.
|
51
|
+
# It returns from the containing function. Here we depend on an early return from the actual function only.
|
52
|
+
def self.make_substitution_function(regexp_hash)
|
53
|
+
->(matched_string) {
|
54
|
+
result = nil # Poor man's early return because of JRuby bug.
|
55
|
+
|
56
|
+
regexp_hash.each do |regexp, substitution|
|
57
|
+
if !result && matched_string.match(regexp)
|
58
|
+
match_data = Regexp.last_match
|
59
|
+
result = case substitution
|
60
|
+
when String
|
61
|
+
substitution
|
62
|
+
else
|
63
|
+
substitution.call(match_data)
|
64
|
+
end
|
65
|
+
end
|
73
66
|
end
|
74
|
-
end
|
75
67
|
|
76
|
-
|
77
|
-
|
78
|
-
end
|
68
|
+
result
|
69
|
+
}
|
79
70
|
end
|
80
71
|
end
|
81
72
|
|
82
|
-
class Object
|
83
|
-
def self
|
84
|
-
self
|
85
|
-
end
|
86
|
-
|
87
|
-
def maybe(default = nil)
|
88
|
-
Util::Maybe.new(self, default)
|
89
|
-
end
|
90
|
-
|
91
|
-
def perhaps(default = self)
|
92
|
-
maybe(default)
|
93
|
-
end
|
94
73
|
|
95
74
|
class String < Object
|
96
|
-
|
97
|
-
|
98
|
-
|
75
|
+
def mgsub(substitution_hash)
|
76
|
+
Util::mgsub(self, substitution_hash)
|
77
|
+
end
|
99
78
|
end
|
100
|
-
|
79
|
+
|
80
|
+
#end
|
101
81
|
=begin
|
102
82
|
|
103
83
|
Perhaps is intended especially for type coercions, like:
|
@@ -108,5 +88,3 @@ def whatever(input)
|
|
108
88
|
end
|
109
89
|
|
110
90
|
=end
|
111
|
-
|
112
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: meta_parse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chhi'mèd Künzang
|
@@ -32,7 +32,7 @@ extra_rdoc_files: []
|
|
32
32
|
files:
|
33
33
|
- lib/meta_parse.rb
|
34
34
|
- lib/util.rb
|
35
|
-
homepage:
|
35
|
+
homepage: https://github.com/clkunzang/meta_parse
|
36
36
|
licenses:
|
37
37
|
- MIT
|
38
38
|
metadata: {}
|