@grain/stdlib 0.4.6 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +93 -0
- package/array.gr +18 -18
- package/array.md +18 -18
- package/bigint.gr +497 -0
- package/bigint.md +811 -0
- package/buffer.gr +59 -223
- package/buffer.md +24 -17
- package/bytes.gr +100 -202
- package/bytes.md +19 -0
- package/char.gr +63 -133
- package/exception.gr +28 -2
- package/exception.md +43 -0
- package/float32.gr +76 -95
- package/float32.md +69 -30
- package/float64.gr +81 -95
- package/float64.md +69 -30
- package/hash.gr +37 -37
- package/int32.gr +152 -198
- package/int32.md +104 -0
- package/int64.gr +151 -197
- package/int64.md +104 -0
- package/list.gr +467 -70
- package/list.md +1141 -0
- package/map.gr +192 -7
- package/map.md +525 -0
- package/number.gr +111 -54
- package/number.md +100 -3
- package/option.md +1 -1
- package/package.json +3 -3
- package/pervasives.gr +499 -59
- package/pervasives.md +1116 -0
- package/queue.gr +4 -0
- package/queue.md +10 -0
- package/random.gr +196 -0
- package/random.md +179 -0
- package/regex.gr +1833 -842
- package/regex.md +11 -11
- package/result.md +1 -1
- package/runtime/bigint.gr +2045 -0
- package/runtime/bigint.md +326 -0
- package/runtime/dataStructures.gr +99 -278
- package/runtime/dataStructures.md +391 -0
- package/runtime/debug.md +6 -0
- package/runtime/equal.gr +5 -23
- package/runtime/equal.md +6 -0
- package/runtime/exception.md +30 -0
- package/runtime/gc.gr +20 -3
- package/runtime/gc.md +36 -0
- package/runtime/malloc.gr +13 -11
- package/runtime/malloc.md +55 -0
- package/runtime/numberUtils.gr +91 -41
- package/runtime/numberUtils.md +54 -0
- package/runtime/numbers.gr +1049 -391
- package/runtime/numbers.md +300 -0
- package/runtime/string.gr +136 -230
- package/runtime/string.md +24 -0
- package/runtime/stringUtils.gr +58 -38
- package/runtime/stringUtils.md +6 -0
- package/runtime/unsafe/constants.gr +17 -0
- package/runtime/unsafe/constants.md +72 -0
- package/runtime/unsafe/conv.md +71 -0
- package/runtime/unsafe/errors.md +204 -0
- package/runtime/unsafe/memory.md +54 -0
- package/runtime/unsafe/printWasm.md +24 -0
- package/runtime/unsafe/tags.gr +9 -8
- package/runtime/unsafe/tags.md +120 -0
- package/runtime/unsafe/wasmf32.md +168 -0
- package/runtime/unsafe/wasmf64.md +168 -0
- package/runtime/unsafe/wasmi32.md +282 -0
- package/runtime/unsafe/wasmi64.md +300 -0
- package/runtime/utils/printing.gr +62 -0
- package/runtime/utils/printing.md +18 -0
- package/runtime/wasi.gr +1 -1
- package/runtime/wasi.md +839 -0
- package/set.gr +17 -8
- package/set.md +24 -21
- package/stack.gr +3 -3
- package/stack.md +4 -6
- package/string.gr +194 -329
- package/string.md +3 -3
- package/sys/file.gr +245 -429
- package/sys/process.gr +27 -45
- package/sys/random.gr +47 -16
- package/sys/random.md +38 -0
- package/sys/time.gr +11 -27
package/regex.gr
CHANGED
|
@@ -61,10 +61,14 @@ let makeRegExParserConfig = () => {
|
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
-
let configWithCaseSensitive =
|
|
64
|
+
let configWithCaseSensitive =
|
|
65
|
+
(
|
|
66
|
+
config: RegExParserConfig,
|
|
67
|
+
caseSensitive: Bool,
|
|
68
|
+
) => {
|
|
65
69
|
{
|
|
66
70
|
isPerlRegExp: config.isPerlRegExp,
|
|
67
|
-
caseSensitive
|
|
71
|
+
caseSensitive,
|
|
68
72
|
multiline: config.multiline,
|
|
69
73
|
groupNumber: config.groupNumber,
|
|
70
74
|
references: config.references,
|
|
@@ -75,7 +79,7 @@ let configWithMultiLine = (config: RegExParserConfig, multiline: Bool) => {
|
|
|
75
79
|
{
|
|
76
80
|
isPerlRegExp: config.isPerlRegExp,
|
|
77
81
|
caseSensitive: config.caseSensitive,
|
|
78
|
-
multiline
|
|
82
|
+
multiline,
|
|
79
83
|
groupNumber: config.groupNumber,
|
|
80
84
|
references: config.references,
|
|
81
85
|
}
|
|
@@ -95,18 +99,32 @@ record RegExBuf {
|
|
|
95
99
|
config: RegExParserConfig,
|
|
96
100
|
}
|
|
97
101
|
|
|
98
|
-
let makeRegExBuf =
|
|
99
|
-
{
|
|
102
|
+
let makeRegExBuf = s => {
|
|
103
|
+
{
|
|
104
|
+
input: s,
|
|
105
|
+
inputExploded: String.explode(s),
|
|
106
|
+
cursor: box(0),
|
|
107
|
+
config: makeRegExParserConfig(),
|
|
108
|
+
}
|
|
100
109
|
}
|
|
101
110
|
|
|
102
111
|
let withConfig = (buf: RegExBuf, config: RegExParserConfig) => {
|
|
103
|
-
{
|
|
112
|
+
{
|
|
113
|
+
input: buf.input,
|
|
114
|
+
inputExploded: buf.inputExploded,
|
|
115
|
+
cursor: buf.cursor,
|
|
116
|
+
config,
|
|
117
|
+
}
|
|
104
118
|
}
|
|
105
119
|
|
|
106
120
|
// Parsing internals for recursive descent
|
|
107
121
|
|
|
108
122
|
let parseErr = (buf: RegExBuf, msg: String, posShift) => {
|
|
109
|
-
"Invalid Regular Expression: " ++
|
|
123
|
+
"Invalid Regular Expression: " ++
|
|
124
|
+
msg ++
|
|
125
|
+
" (position " ++
|
|
126
|
+
toString(unbox(buf.cursor) + posShift) ++
|
|
127
|
+
")"
|
|
110
128
|
}
|
|
111
129
|
|
|
112
130
|
let next = (buf: RegExBuf) => {
|
|
@@ -148,7 +166,17 @@ let eat = (buf: RegExBuf, char: Char) => {
|
|
|
148
166
|
buf.cursor := cursor + 1
|
|
149
167
|
Ok(ret)
|
|
150
168
|
} else {
|
|
151
|
-
Err(
|
|
169
|
+
Err(
|
|
170
|
+
parseErr(
|
|
171
|
+
buf,
|
|
172
|
+
"Expected character '" ++
|
|
173
|
+
Char.toString(char) ++
|
|
174
|
+
", but found character '" ++
|
|
175
|
+
Char.toString(ret) ++
|
|
176
|
+
"'",
|
|
177
|
+
0
|
|
178
|
+
)
|
|
179
|
+
)
|
|
152
180
|
}
|
|
153
181
|
}
|
|
154
182
|
}
|
|
@@ -156,7 +184,7 @@ let eat = (buf: RegExBuf, char: Char) => {
|
|
|
156
184
|
/**
|
|
157
185
|
* Checks if the given regex buffer is empty
|
|
158
186
|
* @param buf: The buffer to check
|
|
159
|
-
* @returns `false` if the buffer is empty
|
|
187
|
+
* @returns `false` if the buffer is empty or `true` otherwise.
|
|
160
188
|
*/
|
|
161
189
|
let more = (buf: RegExBuf) => {
|
|
162
190
|
unbox(buf.cursor) < Array.length(buf.inputExploded)
|
|
@@ -168,7 +196,16 @@ let moreN = (buf: RegExBuf, n) => {
|
|
|
168
196
|
|
|
169
197
|
// END Parsing internals for recursive descent
|
|
170
198
|
|
|
199
|
+
/*
|
|
200
|
+
|
|
201
|
+
=================================
|
|
202
|
+
REGEX TYPE DEFINITIONS
|
|
203
|
+
=================================
|
|
171
204
|
|
|
205
|
+
*/
|
|
206
|
+
type RERange = List<(Number, Number)>
|
|
207
|
+
type CharRangeElt = Number
|
|
208
|
+
type CharRange = List<(CharRangeElt, CharRangeElt)>
|
|
172
209
|
/*
|
|
173
210
|
|
|
174
211
|
=================================
|
|
@@ -179,39 +216,38 @@ Based on https://github.com/racket/racket/blob/0a9c70e95a69743dd5d219a395e995be4
|
|
|
179
216
|
|
|
180
217
|
*/
|
|
181
218
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
let rangeInvert = (rng, limitC) => {
|
|
219
|
+
let rangeInvert = (rng: CharRange, limitC) => {
|
|
185
220
|
let rec help = (rng, start) => {
|
|
186
|
-
match(rng) {
|
|
221
|
+
match (rng) {
|
|
187
222
|
[] when start > limitC => [],
|
|
188
223
|
[] => [(start, limitC)],
|
|
189
|
-
[(subrangeStart, subrangeEnd), ...tl] =>
|
|
224
|
+
[(subrangeStart, subrangeEnd), ...tl] =>
|
|
225
|
+
[(start, subrangeStart - 1), ...help(tl, subrangeEnd + 1)],
|
|
190
226
|
}
|
|
191
227
|
}
|
|
192
228
|
help(rng, 0)
|
|
193
229
|
}
|
|
194
230
|
|
|
195
|
-
let rec rangeContains = (rng, v) => {
|
|
196
|
-
match(rng) {
|
|
231
|
+
let rec rangeContains = (rng: CharRange, v: CharRangeElt) => {
|
|
232
|
+
match (rng) {
|
|
197
233
|
[] => false,
|
|
198
|
-
[(start, end), ..._] when
|
|
234
|
+
[(start, end), ..._] when start <= v && v <= end => true,
|
|
199
235
|
[_, ...tl] => rangeContains(tl, v),
|
|
200
236
|
}
|
|
201
237
|
}
|
|
202
238
|
|
|
203
|
-
let rec rangeAdd = (rng, v) => {
|
|
204
|
-
match(rng) {
|
|
239
|
+
let rec rangeAdd = (rng: CharRange, v: CharRangeElt) => {
|
|
240
|
+
match (rng) {
|
|
205
241
|
_ when rangeContains(rng, v) => rng,
|
|
206
|
-
_ => rangeUnion(rng, [(v, v)])
|
|
242
|
+
_ => rangeUnion(rng, [(v, v)]),
|
|
207
243
|
}
|
|
208
|
-
},
|
|
209
|
-
|
|
210
|
-
rangeUnion = (rng1, rng2) => {
|
|
211
|
-
match((rng1, rng2)) {
|
|
244
|
+
}, rangeUnion = (rng1, rng2) => {
|
|
245
|
+
match ((rng1, rng2)) {
|
|
212
246
|
([], _) => rng2,
|
|
213
247
|
(_, []) => rng1,
|
|
214
|
-
([(r1start, r1end), ...r1tl], [(r2start, r2end), ...r2tl]) when
|
|
248
|
+
([(r1start, r1end), ...r1tl], [(r2start, r2end), ...r2tl]) when (
|
|
249
|
+
r1start <= r2start
|
|
250
|
+
) => {
|
|
215
251
|
if (r1end + 1 >= r2start) {
|
|
216
252
|
if (r1end <= r2end) {
|
|
217
253
|
rangeUnion([(r1start, r2end), ...r2tl], r1tl)
|
|
@@ -222,48 +258,48 @@ rangeUnion = (rng1, rng2) => {
|
|
|
222
258
|
[(r1start, r1end), ...rangeUnion(r1tl, rng2)]
|
|
223
259
|
}
|
|
224
260
|
},
|
|
225
|
-
(_, _) => rangeUnion(rng2, rng1)
|
|
261
|
+
(_, _) => rangeUnion(rng2, rng1),
|
|
226
262
|
}
|
|
227
263
|
}
|
|
228
264
|
|
|
229
|
-
let rangeAddSpan = (rng, fromC, toC) => {
|
|
265
|
+
let rangeAddSpan = (rng: CharRange, fromC, toC) => {
|
|
230
266
|
rangeUnion(rng, [(fromC, toC)])
|
|
231
267
|
}
|
|
232
268
|
|
|
233
|
-
let rangeSingleton = (rng) => {
|
|
234
|
-
match(rng) {
|
|
269
|
+
let rangeSingleton = (rng: CharRange) => {
|
|
270
|
+
match (rng) {
|
|
235
271
|
[(c1, c2)] when c1 == c2 => Some(c1),
|
|
236
|
-
_ => None
|
|
272
|
+
_ => None,
|
|
237
273
|
}
|
|
238
274
|
}
|
|
239
275
|
|
|
240
|
-
let rec rangeIncludes = (rng, lo, hi) => {
|
|
241
|
-
match(rng) {
|
|
276
|
+
let rec rangeIncludes = (rng: CharRange, lo, hi) => {
|
|
277
|
+
match (rng) {
|
|
242
278
|
[] => false,
|
|
243
279
|
[(c1, c2), ...tl] when lo > c2 => rangeIncludes(tl, lo, hi),
|
|
244
280
|
[(c1, c2), ..._] => lo >= c1 && hi <= c2,
|
|
245
281
|
}
|
|
246
282
|
}
|
|
247
283
|
|
|
248
|
-
let rec rangeWithin = (rng, lo, hi) => {
|
|
249
|
-
match(rng) {
|
|
284
|
+
let rec rangeWithin = (rng: CharRange, lo, hi) => {
|
|
285
|
+
match (rng) {
|
|
250
286
|
[] => true,
|
|
251
287
|
[(c1, _), ..._] when c1 < lo => false,
|
|
252
288
|
[(_, c2), ..._] when c2 > hi => false,
|
|
253
|
-
[_, ...tl] => rangeWithin(tl, lo, hi)
|
|
289
|
+
[_, ...tl] => rangeWithin(tl, lo, hi),
|
|
254
290
|
}
|
|
255
291
|
}
|
|
256
292
|
|
|
257
|
-
let rec rangeOverlaps = (rng, lo, hi) => {
|
|
258
|
-
match(rng) {
|
|
293
|
+
let rec rangeOverlaps = (rng: CharRange, lo, hi) => {
|
|
294
|
+
match (rng) {
|
|
259
295
|
[] => false,
|
|
260
296
|
[(_, c2), ...tl] when lo > c2 => rangeOverlaps(tl, lo, hi),
|
|
261
|
-
[(c1, c2), ..._] =>
|
|
297
|
+
[(c1, c2), ..._] => lo >= c1 && lo <= c2 && (hi >= c1 && hi <= c2),
|
|
262
298
|
}
|
|
263
299
|
}
|
|
264
300
|
|
|
265
|
-
let rangeAddCaseAware = (rng, c, config) => {
|
|
266
|
-
match(c) {
|
|
301
|
+
let rangeAddCaseAware = (rng: CharRange, c, config) => {
|
|
302
|
+
match (c) {
|
|
267
303
|
None => Ok(rng),
|
|
268
304
|
Some(c) => {
|
|
269
305
|
let rng = rangeAdd(rng, c)
|
|
@@ -277,21 +313,23 @@ let rangeAddCaseAware = (rng, c, config) => {
|
|
|
277
313
|
let rng = rangeAdd(rng, Char.code(Char.downcase(Char.fromCode(c))))
|
|
278
314
|
Ok(rng)
|
|
279
315
|
*/
|
|
280
|
-
Err(
|
|
316
|
+
Err(
|
|
317
|
+
"NYI: Case-insensitive matching is not supported until grain-lang/grain#661 is resolved."
|
|
318
|
+
)
|
|
281
319
|
}
|
|
282
|
-
}
|
|
320
|
+
},
|
|
283
321
|
}
|
|
284
322
|
}
|
|
285
323
|
|
|
286
|
-
let rangeAddSpanCaseAware = (rng, fromC, toC, config) => {
|
|
324
|
+
let rangeAddSpanCaseAware = (rng: CharRange, fromC, toC, config) => {
|
|
287
325
|
if (config.caseSensitive) {
|
|
288
326
|
Ok(rangeAddSpan(rng, fromC, toC))
|
|
289
327
|
} else {
|
|
290
328
|
let mut ret = Ok(rng)
|
|
291
|
-
for (let mut i = fromC; i <= toC; i
|
|
329
|
+
for (let mut i = fromC; i <= toC; i += 1) {
|
|
292
330
|
match (ret) {
|
|
293
331
|
Ok(x) => ret = rangeAddCaseAware(x, Some(i), config),
|
|
294
|
-
Err(e) => break
|
|
332
|
+
Err(e) => break,
|
|
295
333
|
}
|
|
296
334
|
}
|
|
297
335
|
ret
|
|
@@ -362,7 +400,7 @@ enum UnicodeCategory {
|
|
|
362
400
|
OtherFormat,
|
|
363
401
|
OtherSurrogate,
|
|
364
402
|
OtherNotAssigned,
|
|
365
|
-
OtherPrivateUse
|
|
403
|
+
OtherPrivateUse,
|
|
366
404
|
}
|
|
367
405
|
|
|
368
406
|
enum ParsedRegularExpression {
|
|
@@ -376,23 +414,54 @@ enum ParsedRegularExpression {
|
|
|
376
414
|
REWordBoundary,
|
|
377
415
|
RENotWordBoundary,
|
|
378
416
|
RELiteral(Char),
|
|
379
|
-
RELiteralString(
|
|
417
|
+
RELiteralString(
|
|
418
|
+
String
|
|
419
|
+
), // <- sequences of literals are flattened into a string
|
|
380
420
|
REAlts(ParsedRegularExpression, ParsedRegularExpression),
|
|
381
421
|
RESequence(List<ParsedRegularExpression>, Bool), // seq elts, needs backtrack
|
|
382
422
|
REGroup(ParsedRegularExpression, Number), // regex, group ID
|
|
383
|
-
RERepeat(
|
|
423
|
+
RERepeat(
|
|
424
|
+
ParsedRegularExpression,
|
|
425
|
+
Number,
|
|
426
|
+
Option<Number>,
|
|
427
|
+
Bool
|
|
428
|
+
), // regex, min, max (None for infinity), true=non-greedy
|
|
384
429
|
REMaybe(ParsedRegularExpression, Bool), // regex, true=non-greedy
|
|
385
|
-
REConditional(
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
430
|
+
REConditional(
|
|
431
|
+
ParsedRegularExpression,
|
|
432
|
+
ParsedRegularExpression,
|
|
433
|
+
Option<ParsedRegularExpression>,
|
|
434
|
+
Number,
|
|
435
|
+
Number,
|
|
436
|
+
Bool
|
|
437
|
+
), // test, if-true, if-false, n-start, num-n, needs-backtrack
|
|
438
|
+
RELookahead(
|
|
439
|
+
ParsedRegularExpression,
|
|
440
|
+
Bool,
|
|
441
|
+
Number,
|
|
442
|
+
Number
|
|
443
|
+
), // regex, is-match, n-start, num-n
|
|
444
|
+
RELookbehind(
|
|
445
|
+
ParsedRegularExpression,
|
|
446
|
+
Bool,
|
|
447
|
+
Box<Number>,
|
|
448
|
+
Box<Number>,
|
|
449
|
+
Number,
|
|
450
|
+
Number
|
|
451
|
+
), // regex, is-match, lb-min, lb-max, n-start, num-n (lb-xx values patched in later)
|
|
452
|
+
RECut(
|
|
453
|
+
ParsedRegularExpression,
|
|
454
|
+
Number,
|
|
455
|
+
Number,
|
|
456
|
+
Bool
|
|
457
|
+
), // regex, n-start, num-n, needs-backtrack
|
|
389
458
|
REReference(Number, Bool), // n, case-sensitive
|
|
390
|
-
RERange(
|
|
391
|
-
REUnicodeCategories(List<UnicodeCategory>, Bool) // symlist, true=match/false=does-not-match
|
|
459
|
+
RERange(RERange),
|
|
460
|
+
REUnicodeCategories(List<UnicodeCategory>, Bool), // symlist, true=match/false=does-not-match
|
|
392
461
|
}
|
|
393
462
|
|
|
394
463
|
let needsBacktrack = (rx: ParsedRegularExpression) => {
|
|
395
|
-
match(rx) {
|
|
464
|
+
match (rx) {
|
|
396
465
|
REAlts(_, _) => true,
|
|
397
466
|
RESequence(_, nb) => nb,
|
|
398
467
|
REGroup(_, _) => true,
|
|
@@ -401,12 +470,12 @@ let needsBacktrack = (rx: ParsedRegularExpression) => {
|
|
|
401
470
|
REConditional(_, _, _, _, _, nb) => nb,
|
|
402
471
|
RECut(_, _, _, nb) => nb,
|
|
403
472
|
REUnicodeCategories(_, _) => true,
|
|
404
|
-
_ => false
|
|
473
|
+
_ => false,
|
|
405
474
|
}
|
|
406
475
|
}
|
|
407
476
|
|
|
408
|
-
let makeRERange = (rng, limitC) => {
|
|
409
|
-
match(rng) {
|
|
477
|
+
let makeRERange = (rng: CharRange, limitC) => {
|
|
478
|
+
match (rng) {
|
|
410
479
|
[(c1, c2)] when c1 == c2 => RELiteral(Char.fromCode(c1)),
|
|
411
480
|
_ when rangeIncludes(rng, 0, limitC) => REAny,
|
|
412
481
|
_ => RERange(rng),
|
|
@@ -417,39 +486,39 @@ enum MergeMode {
|
|
|
417
486
|
MMChar,
|
|
418
487
|
}
|
|
419
488
|
|
|
420
|
-
let mergeAdjacent =
|
|
489
|
+
let mergeAdjacent = lst => {
|
|
421
490
|
// see [TODO] below
|
|
422
491
|
let readyForAccum = (l, mode) => {
|
|
423
|
-
match(l) {
|
|
492
|
+
match (l) {
|
|
424
493
|
[] => true,
|
|
425
494
|
[hd, ..._] => {
|
|
426
|
-
match(mode) {
|
|
495
|
+
match (mode) {
|
|
427
496
|
None => false,
|
|
428
497
|
Some(MMChar) => {
|
|
429
|
-
match(hd) {
|
|
498
|
+
match (hd) {
|
|
430
499
|
RELiteral(x) => false,
|
|
431
500
|
RELiteralString(x) => false,
|
|
432
|
-
_ => true
|
|
501
|
+
_ => true,
|
|
433
502
|
}
|
|
434
|
-
}
|
|
503
|
+
},
|
|
435
504
|
}
|
|
436
|
-
}
|
|
505
|
+
},
|
|
437
506
|
}
|
|
438
507
|
}
|
|
439
508
|
let rec loop = (mode, accum, l) => {
|
|
440
|
-
match(l) {
|
|
509
|
+
match (l) {
|
|
441
510
|
// flatten nested sequences
|
|
442
|
-
[
|
|
511
|
+
[RESequence(rxs1, _), ...tl] => loop(mode, accum, List.append(rxs1, tl)),
|
|
443
512
|
// drop empty elements
|
|
444
513
|
[REEmpty, ...tl] => loop(mode, accum, tl),
|
|
445
514
|
[RELiteralString(""), ...tl] => loop(mode, accum, tl),
|
|
446
515
|
// [TODO] Clean up with or-patterns (grain-lang/grain#696)
|
|
447
516
|
_ when readyForAccum(l, mode) => {
|
|
448
|
-
match(accum) {
|
|
517
|
+
match (accum) {
|
|
449
518
|
[] => [],
|
|
450
519
|
[hd] => [RELiteralString(hd), ...loop(None, [], l)],
|
|
451
520
|
[hd, ...tl] => {
|
|
452
|
-
let newHd = match(mode) {
|
|
521
|
+
let newHd = match (mode) {
|
|
453
522
|
// MMByte would go here, if supported
|
|
454
523
|
Some(MMChar) => List.join("", List.reverse(accum)),
|
|
455
524
|
None => fail "internal error (mergeAdjacent)",
|
|
@@ -458,9 +527,12 @@ let mergeAdjacent = (lst) => {
|
|
|
458
527
|
},
|
|
459
528
|
}
|
|
460
529
|
},
|
|
461
|
-
[] =>
|
|
462
|
-
|
|
463
|
-
[
|
|
530
|
+
[] =>
|
|
531
|
+
fail "impossible (mergeAdjacent)", // avoid warning (can delete once TODO is resolved)
|
|
532
|
+
[RELiteralString(x), ...tl] when Option.isSome(mode) =>
|
|
533
|
+
loop(mode, [x, ...accum], tl),
|
|
534
|
+
[RELiteral(c), ...tl] when Option.isSome(mode) =>
|
|
535
|
+
loop(mode, [Char.toString(c), ...accum], tl),
|
|
464
536
|
[RELiteralString(x), ...tl] => loop(Some(MMChar), [x], tl),
|
|
465
537
|
[RELiteral(c), ...tl] => loop(Some(MMChar), [Char.toString(c)], tl),
|
|
466
538
|
[hd, ...tl] => [hd, ...loop(None, [], tl)],
|
|
@@ -469,28 +541,31 @@ let mergeAdjacent = (lst) => {
|
|
|
469
541
|
loop(None, [], lst)
|
|
470
542
|
}
|
|
471
543
|
|
|
472
|
-
let makeRESequence =
|
|
473
|
-
match(lst) {
|
|
544
|
+
let makeRESequence = lst => {
|
|
545
|
+
match (lst) {
|
|
474
546
|
[] => REEmpty,
|
|
475
547
|
[hd] => hd,
|
|
476
548
|
_ => {
|
|
477
|
-
match(mergeAdjacent(lst)) {
|
|
549
|
+
match (mergeAdjacent(lst)) {
|
|
478
550
|
[hd] => hd,
|
|
479
|
-
mList => RESequence(mList, List.some(needsBacktrack, mList))
|
|
551
|
+
mList => RESequence(mList, List.some(needsBacktrack, mList)),
|
|
480
552
|
}
|
|
481
|
-
}
|
|
553
|
+
},
|
|
482
554
|
}
|
|
483
555
|
}
|
|
484
556
|
|
|
485
557
|
let makeREAlts = (rx1, rx2, limitC) => {
|
|
486
|
-
match((rx1, rx2)) {
|
|
487
|
-
(
|
|
488
|
-
(
|
|
489
|
-
(
|
|
490
|
-
(
|
|
491
|
-
|
|
492
|
-
(
|
|
493
|
-
|
|
558
|
+
match ((rx1, rx2)) {
|
|
559
|
+
(RENever, _) => rx2,
|
|
560
|
+
(_, RENever) => rx1,
|
|
561
|
+
(RERange(r1), RERange(r2)) => makeRERange(rangeUnion(r1, r2), limitC),
|
|
562
|
+
(RERange(r1), RELiteral(c2)) =>
|
|
563
|
+
makeRERange(rangeAdd(r1, Char.code(c2)), limitC),
|
|
564
|
+
(RELiteral(c1), RERange(r2)) =>
|
|
565
|
+
makeRERange(rangeAdd(r2, Char.code(c1)), limitC),
|
|
566
|
+
(RELiteral(c1), RELiteral(c2)) =>
|
|
567
|
+
makeRERange(rangeAdd(rangeAdd([], Char.code(c1)), Char.code(c2)), limitC),
|
|
568
|
+
_ => REAlts(rx1, rx2),
|
|
494
569
|
}
|
|
495
570
|
}
|
|
496
571
|
|
|
@@ -499,10 +574,11 @@ let makeRECut = (rx, nStart, numN) => {
|
|
|
499
574
|
}
|
|
500
575
|
|
|
501
576
|
let makeREConditional = (tst, pces1, pces2, nStart, numN) => {
|
|
502
|
-
let nb = needsBacktrack(pces1) ||
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
577
|
+
let nb = needsBacktrack(pces1) ||
|
|
578
|
+
match (pces2) {
|
|
579
|
+
None => false,
|
|
580
|
+
Some(p2) => needsBacktrack(p2),
|
|
581
|
+
}
|
|
506
582
|
REConditional(tst, pces1, pces2, nStart, numN, nb)
|
|
507
583
|
}
|
|
508
584
|
|
|
@@ -526,62 +602,70 @@ let range_d = () => {
|
|
|
526
602
|
}
|
|
527
603
|
|
|
528
604
|
let range_w = () => {
|
|
529
|
-
rangeAdd(
|
|
605
|
+
rangeAdd(
|
|
606
|
+
rangeAddSpan(
|
|
607
|
+
rangeAddSpan(range_d(), Char.code('a'), Char.code('z')),
|
|
608
|
+
Char.code('A'),
|
|
609
|
+
Char.code('Z')
|
|
610
|
+
),
|
|
611
|
+
Char.code('_')
|
|
612
|
+
)
|
|
530
613
|
}
|
|
531
614
|
|
|
532
615
|
let range_s = () => {
|
|
533
616
|
// newline, tab, page, return
|
|
534
|
-
rangeAdd(
|
|
617
|
+
rangeAdd(
|
|
618
|
+
rangeAdd(rangeAdd(rangeAdd(rangeAdd([], Char.code(' ')), 9), 10), 12),
|
|
619
|
+
13
|
|
620
|
+
)
|
|
535
621
|
}
|
|
536
622
|
|
|
537
623
|
let rec parseRangeNot = (buf: RegExBuf) => {
|
|
538
624
|
if (!more(buf)) {
|
|
539
625
|
Err(parseErr(buf, "Missing closing `]`", 0))
|
|
540
626
|
} else {
|
|
541
|
-
match(peek(buf)) {
|
|
627
|
+
match (peek(buf)) {
|
|
542
628
|
Err(e) => Err(e),
|
|
543
629
|
Ok('^') => {
|
|
544
630
|
ignore(eat(buf, '^'))
|
|
545
|
-
match(parseRange(buf)) {
|
|
631
|
+
match (parseRange(buf)) {
|
|
546
632
|
Err(e) => Err(e),
|
|
547
|
-
Ok(rng) => Ok(rangeInvert(rng, rangeLimit))
|
|
633
|
+
Ok(rng) => Ok(rangeInvert(rng, rangeLimit)),
|
|
548
634
|
}
|
|
549
635
|
},
|
|
550
|
-
Ok(_) => parseRange(buf)
|
|
636
|
+
Ok(_) => parseRange(buf),
|
|
551
637
|
}
|
|
552
638
|
}
|
|
553
|
-
},
|
|
554
|
-
|
|
555
|
-
parseRange = (buf: RegExBuf) => {
|
|
639
|
+
}, parseRange = (buf: RegExBuf) => {
|
|
556
640
|
if (!more(buf)) {
|
|
557
641
|
Err(parseErr(buf, "Missing closing `]`", 0))
|
|
558
642
|
} else {
|
|
559
|
-
match(peek(buf)) {
|
|
643
|
+
match (peek(buf)) {
|
|
560
644
|
Err(e) => Err(e),
|
|
561
645
|
Ok(']') => {
|
|
562
646
|
ignore(eat(buf, ']'))
|
|
563
|
-
match(parseRangeRest(buf, [], None, None)) {
|
|
647
|
+
match (parseRangeRest(buf, [], None, None)) {
|
|
564
648
|
Err(e) => Err(e),
|
|
565
|
-
Ok(rng) => Ok(rangeAdd(rng, Char.code(']')))
|
|
649
|
+
Ok(rng) => Ok(rangeAdd(rng, Char.code(']'))),
|
|
566
650
|
}
|
|
567
651
|
},
|
|
568
652
|
Ok('-') => {
|
|
569
653
|
ignore(eat(buf, '-'))
|
|
570
|
-
match(parseRangeRest(buf, [], None, None)) {
|
|
654
|
+
match (parseRangeRest(buf, [], None, None)) {
|
|
571
655
|
Err(e) => Err(e),
|
|
572
|
-
Ok(rng) => Ok(rangeAdd(rng, Char.code('-')))
|
|
656
|
+
Ok(rng) => Ok(rangeAdd(rng, Char.code('-'))),
|
|
573
657
|
}
|
|
574
658
|
},
|
|
575
|
-
Ok(_) => parseRangeRest(buf, [], None, None)
|
|
659
|
+
Ok(_) => parseRangeRest(buf, [], None, None),
|
|
576
660
|
}
|
|
577
661
|
}
|
|
578
|
-
},
|
|
579
|
-
|
|
580
|
-
parseClass = (buf: RegExBuf) => {
|
|
662
|
+
}, parseClass = (buf: RegExBuf) => {
|
|
581
663
|
if (!more(buf)) {
|
|
582
|
-
Err(
|
|
664
|
+
Err(
|
|
665
|
+
"no chars"
|
|
666
|
+
) // caught in handler (we use a Result to cleanly mesh with the Result type below)
|
|
583
667
|
} else {
|
|
584
|
-
match(peek(buf)) {
|
|
668
|
+
match (peek(buf)) {
|
|
585
669
|
Err(e) => Err(e),
|
|
586
670
|
Ok('d') => {
|
|
587
671
|
ignore(eat(buf, 'd'))
|
|
@@ -610,65 +694,133 @@ parseClass = (buf: RegExBuf) => {
|
|
|
610
694
|
Ok(c) => Err("unknown class: " ++ toString(c)),
|
|
611
695
|
}
|
|
612
696
|
}
|
|
613
|
-
},
|
|
614
|
-
|
|
615
|
-
parsePosixCharClass = (buf: RegExBuf) => {
|
|
697
|
+
}, parsePosixCharClass = (buf: RegExBuf) => {
|
|
616
698
|
if (!more(buf)) {
|
|
617
699
|
Err(parseErr(buf, "Missing POSIX character class after `[`", 0))
|
|
618
700
|
} else {
|
|
619
|
-
match(peek(buf)) {
|
|
701
|
+
match (peek(buf)) {
|
|
620
702
|
Err(e) => Err(e),
|
|
621
703
|
Ok(':') => {
|
|
622
704
|
ignore(eat(buf, ':'))
|
|
623
|
-
let rec loop =
|
|
624
|
-
match(peek(buf)) {
|
|
705
|
+
let rec loop = acc => {
|
|
706
|
+
match (peek(buf)) {
|
|
625
707
|
Err(e) => Err(e),
|
|
626
708
|
Ok(':') => {
|
|
627
709
|
ignore(eat(buf, ':'))
|
|
628
|
-
match(eat(buf, ']')) {
|
|
710
|
+
match (eat(buf, ']')) {
|
|
629
711
|
Err(_) => Err(parseErr(buf, "Missing closing `]`", 0)),
|
|
630
|
-
Ok(_) => Ok(List.join("", List.reverse(acc)))
|
|
712
|
+
Ok(_) => Ok(List.join("", List.reverse(acc))),
|
|
631
713
|
}
|
|
632
714
|
},
|
|
633
|
-
Ok(c) when (
|
|
715
|
+
Ok(c) when (
|
|
716
|
+
Char.code('a') <= Char.code(c) && Char.code(c) <= Char.code('z')
|
|
717
|
+
) => {
|
|
634
718
|
ignore(eat(buf, c))
|
|
635
719
|
loop([Char.toString(c), ...acc])
|
|
636
720
|
},
|
|
637
|
-
Ok(_) =>
|
|
721
|
+
Ok(_) =>
|
|
722
|
+
Err(
|
|
723
|
+
parseErr(buf, "Invalid character in POSIX character class", 0)
|
|
724
|
+
),
|
|
638
725
|
}
|
|
639
726
|
}
|
|
640
|
-
match(loop([])) {
|
|
727
|
+
match (loop([])) {
|
|
641
728
|
Err(e) => Err(e),
|
|
642
729
|
Ok(s) => {
|
|
643
|
-
match(s) {
|
|
644
|
-
"alpha" =>
|
|
730
|
+
match (s) {
|
|
731
|
+
"alpha" =>
|
|
732
|
+
Ok(
|
|
733
|
+
rangeAddSpan(
|
|
734
|
+
rangeAddSpan([], Char.code('a'), Char.code('z')),
|
|
735
|
+
Char.code('A'),
|
|
736
|
+
Char.code('Z')
|
|
737
|
+
)
|
|
738
|
+
),
|
|
645
739
|
"upper" => Ok(rangeAddSpan([], Char.code('A'), Char.code('Z'))),
|
|
646
740
|
"lower" => Ok(rangeAddSpan([], Char.code('a'), Char.code('z'))),
|
|
647
741
|
"digit" => Ok(rangeAddSpan([], Char.code('0'), Char.code('9'))),
|
|
648
|
-
"xdigit" =>
|
|
649
|
-
|
|
650
|
-
|
|
742
|
+
"xdigit" =>
|
|
743
|
+
Ok(
|
|
744
|
+
rangeAddSpan(
|
|
745
|
+
rangeAddSpan(
|
|
746
|
+
rangeAddSpan([], Char.code('0'), Char.code('9')),
|
|
747
|
+
Char.code('a'),
|
|
748
|
+
Char.code('f')
|
|
749
|
+
),
|
|
750
|
+
Char.code('A'),
|
|
751
|
+
Char.code('F')
|
|
752
|
+
)
|
|
753
|
+
),
|
|
754
|
+
"alnum" =>
|
|
755
|
+
Ok(
|
|
756
|
+
rangeAddSpan(
|
|
757
|
+
rangeAddSpan(
|
|
758
|
+
rangeAddSpan([], Char.code('0'), Char.code('9')),
|
|
759
|
+
Char.code('a'),
|
|
760
|
+
Char.code('z')
|
|
761
|
+
),
|
|
762
|
+
Char.code('A'),
|
|
763
|
+
Char.code('Z')
|
|
764
|
+
)
|
|
765
|
+
),
|
|
766
|
+
"word" =>
|
|
767
|
+
Ok(
|
|
768
|
+
rangeAdd(
|
|
769
|
+
rangeAddSpan(
|
|
770
|
+
rangeAddSpan([], Char.code('a'), Char.code('f')),
|
|
771
|
+
Char.code('A'),
|
|
772
|
+
Char.code('F')
|
|
773
|
+
),
|
|
774
|
+
Char.code('_')
|
|
775
|
+
)
|
|
776
|
+
),
|
|
651
777
|
"blank" => Ok(rangeAdd(rangeAdd([], 0x20), 0x9)), // space and tab
|
|
652
778
|
"space" => Ok(range_s()),
|
|
653
|
-
"graph" =>
|
|
654
|
-
|
|
779
|
+
"graph" =>
|
|
780
|
+
Err(
|
|
781
|
+
parseErr(
|
|
782
|
+
buf,
|
|
783
|
+
"the [:graph:] character class is not currently supported. For more information, see https://github.com/grain-lang/grain/issues/661",
|
|
784
|
+
0
|
|
785
|
+
)
|
|
786
|
+
),
|
|
787
|
+
"print" =>
|
|
788
|
+
Err(
|
|
789
|
+
parseErr(
|
|
790
|
+
buf,
|
|
791
|
+
"the [:print:] character class is not currently supported. For more information, see https://github.com/grain-lang/grain/issues/661",
|
|
792
|
+
0
|
|
793
|
+
)
|
|
794
|
+
),
|
|
655
795
|
"cntrl" => Ok(rangeAddSpan([], 0, 31)),
|
|
656
796
|
"ascii" => Ok(rangeAddSpan([], 0, 127)),
|
|
657
|
-
_ =>
|
|
797
|
+
_ =>
|
|
798
|
+
Err(parseErr(buf, "Invalid POSIX character class: " ++ s, 0)),
|
|
658
799
|
}
|
|
659
|
-
}
|
|
800
|
+
},
|
|
660
801
|
}
|
|
661
802
|
},
|
|
662
|
-
Ok(c) =>
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
803
|
+
Ok(c) =>
|
|
804
|
+
Err(
|
|
805
|
+
parseErr(
|
|
806
|
+
buf,
|
|
807
|
+
"Expected `:` after `[`. Found: `" ++ Char.toString(c) ++ "`",
|
|
808
|
+
0
|
|
809
|
+
)
|
|
810
|
+
),
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}, parseRangeRest =
|
|
814
|
+
(
|
|
815
|
+
buf: RegExBuf,
|
|
816
|
+
rng: CharRange,
|
|
817
|
+
spanFrom: Option<Number>,
|
|
818
|
+
mustSpanFrom: Option<Number>,
|
|
819
|
+
) => {
|
|
668
820
|
if (!more(buf)) {
|
|
669
821
|
Err(parseErr(buf, "Missing closing `]`", 0))
|
|
670
822
|
} else {
|
|
671
|
-
match(peek(buf)) {
|
|
823
|
+
match (peek(buf)) {
|
|
672
824
|
Err(e) => Err(e),
|
|
673
825
|
Ok(']') => {
|
|
674
826
|
ignore(eat(buf, ']'))
|
|
@@ -678,60 +830,108 @@ parseRangeRest = (buf: RegExBuf, rng, spanFrom: Option<Number>, mustSpanFrom: Op
|
|
|
678
830
|
if (!moreN(buf, 1)) {
|
|
679
831
|
Err(parseErr(buf, "Missing closing `]`", 1))
|
|
680
832
|
} else {
|
|
681
|
-
match(peekN(buf, 1)) {
|
|
833
|
+
match (peekN(buf, 1)) {
|
|
682
834
|
Err(e) => Err(e),
|
|
683
835
|
Ok(']') => {
|
|
684
|
-
match(mustSpanFrom) {
|
|
685
|
-
Some(_) =>
|
|
836
|
+
match (mustSpanFrom) {
|
|
837
|
+
Some(_) =>
|
|
838
|
+
Err(
|
|
839
|
+
parseErr(
|
|
840
|
+
buf,
|
|
841
|
+
"misplaced hyphen within square brackets in pattern",
|
|
842
|
+
1
|
|
843
|
+
)
|
|
844
|
+
),
|
|
686
845
|
None => {
|
|
687
846
|
ignore(eat(buf, '-'))
|
|
688
847
|
ignore(eat(buf, ']'))
|
|
689
|
-
match(rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
848
|
+
match (rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
690
849
|
Err(e) => Err(e),
|
|
691
|
-
Ok(rng) => Ok(rangeAdd(rng, Char.code('-')))
|
|
850
|
+
Ok(rng) => Ok(rangeAdd(rng, Char.code('-'))),
|
|
692
851
|
}
|
|
693
|
-
}
|
|
852
|
+
},
|
|
694
853
|
}
|
|
695
854
|
},
|
|
696
|
-
Ok(_) when Option.isNone(spanFrom) =>
|
|
855
|
+
Ok(_) when Option.isNone(spanFrom) =>
|
|
856
|
+
Err(
|
|
857
|
+
parseErr(
|
|
858
|
+
buf,
|
|
859
|
+
"misplaced hyphen within square brackets in pattern",
|
|
860
|
+
1
|
|
861
|
+
)
|
|
862
|
+
),
|
|
697
863
|
Ok(_) => {
|
|
698
864
|
ignore(eat(buf, '-'))
|
|
699
865
|
parseRangeRest(buf, rng, None, spanFrom)
|
|
700
|
-
}
|
|
866
|
+
},
|
|
701
867
|
}
|
|
702
868
|
}
|
|
703
869
|
},
|
|
704
870
|
Ok('\\') => {
|
|
705
871
|
ignore(eat(buf, '\\'))
|
|
706
|
-
if (!
|
|
872
|
+
if (!buf.config.isPerlRegExp) {
|
|
707
873
|
parseRangeRestSpan(buf, Char.code('\\'), rng, spanFrom, mustSpanFrom)
|
|
708
874
|
} else {
|
|
709
875
|
if (!more(buf)) {
|
|
710
|
-
Err(
|
|
876
|
+
Err(
|
|
877
|
+
parseErr(
|
|
878
|
+
buf,
|
|
879
|
+
"escaping backslash at end pattern (within square brackets)",
|
|
880
|
+
0
|
|
881
|
+
)
|
|
882
|
+
)
|
|
711
883
|
} else {
|
|
712
|
-
match(peek(buf)) {
|
|
884
|
+
match (peek(buf)) {
|
|
713
885
|
Err(e) => Err(e),
|
|
714
|
-
Ok(c) when (
|
|
715
|
-
|
|
716
|
-
|
|
886
|
+
Ok(c) when (
|
|
887
|
+
Char.code('a') <= Char.code(c) &&
|
|
888
|
+
Char.code(c) <= Char.code('z') ||
|
|
889
|
+
Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z')
|
|
890
|
+
) => {
|
|
891
|
+
match (mustSpanFrom) {
|
|
892
|
+
Some(_) =>
|
|
893
|
+
Err(
|
|
894
|
+
parseErr(
|
|
895
|
+
buf,
|
|
896
|
+
"misplaced hyphen within square brackets in pattern",
|
|
897
|
+
0
|
|
898
|
+
)
|
|
899
|
+
),
|
|
717
900
|
None => {
|
|
718
901
|
let curPos = unbox(buf.cursor)
|
|
719
|
-
match(parseClass(buf)) {
|
|
720
|
-
Err(e) =>
|
|
902
|
+
match (parseClass(buf)) {
|
|
903
|
+
Err(e) =>
|
|
904
|
+
Err(
|
|
905
|
+
"Invalid Regular Expression: illegal alphebetic escape (position " ++
|
|
906
|
+
toString(curPos) ++
|
|
907
|
+
")"
|
|
908
|
+
),
|
|
721
909
|
Ok(range1) => {
|
|
722
|
-
match(rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
910
|
+
match (rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
723
911
|
Err(e) => Err(e),
|
|
724
|
-
Ok(r) =>
|
|
912
|
+
Ok(r) =>
|
|
913
|
+
parseRangeRest(
|
|
914
|
+
buf,
|
|
915
|
+
rangeUnion(range1, r),
|
|
916
|
+
spanFrom,
|
|
917
|
+
mustSpanFrom
|
|
918
|
+
),
|
|
725
919
|
}
|
|
726
|
-
}
|
|
920
|
+
},
|
|
727
921
|
}
|
|
728
|
-
}
|
|
922
|
+
},
|
|
729
923
|
}
|
|
730
924
|
},
|
|
731
925
|
Ok(c) => {
|
|
732
926
|
ignore(next(buf))
|
|
733
|
-
parseRangeRestSpan(
|
|
734
|
-
|
|
927
|
+
parseRangeRestSpan(
|
|
928
|
+
buf,
|
|
929
|
+
Char.code(c),
|
|
930
|
+
rng,
|
|
931
|
+
spanFrom,
|
|
932
|
+
mustSpanFrom
|
|
933
|
+
)
|
|
934
|
+
},
|
|
735
935
|
}
|
|
736
936
|
}
|
|
737
937
|
}
|
|
@@ -739,7 +939,7 @@ parseRangeRest = (buf: RegExBuf, rng, spanFrom: Option<Number>, mustSpanFrom: Op
|
|
|
739
939
|
Ok('[') => {
|
|
740
940
|
ignore(eat(buf, '['))
|
|
741
941
|
let curPos = unbox(buf.cursor)
|
|
742
|
-
match(parsePosixCharClass(buf)) {
|
|
942
|
+
match (parsePosixCharClass(buf)) {
|
|
743
943
|
// NOTE: Based on the spec, we don't propagate out
|
|
744
944
|
// the errors here. Instead, we treat malformed
|
|
745
945
|
// POSIX classes as being simple sequences of characters.
|
|
@@ -748,39 +948,45 @@ parseRangeRest = (buf: RegExBuf, rng, spanFrom: Option<Number>, mustSpanFrom: Op
|
|
|
748
948
|
parseRangeRestSpan(buf, Char.code('['), rng, spanFrom, mustSpanFrom)
|
|
749
949
|
},
|
|
750
950
|
Ok(rngNew) => {
|
|
751
|
-
match(rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
951
|
+
match (rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
752
952
|
Err(e) => Err(e),
|
|
753
|
-
Ok(rng) =>
|
|
953
|
+
Ok(rng) =>
|
|
954
|
+
parseRangeRest(buf, rangeUnion(rngNew, rng), None, None),
|
|
754
955
|
}
|
|
755
|
-
}
|
|
956
|
+
},
|
|
756
957
|
}
|
|
757
958
|
},
|
|
758
959
|
Ok(c) => {
|
|
759
960
|
ignore(next(buf))
|
|
760
961
|
parseRangeRestSpan(buf, Char.code(c), rng, spanFrom, mustSpanFrom)
|
|
761
|
-
}
|
|
962
|
+
},
|
|
762
963
|
}
|
|
763
964
|
}
|
|
764
|
-
},
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
965
|
+
}, parseRangeRestSpan =
|
|
966
|
+
(
|
|
967
|
+
buf: RegExBuf,
|
|
968
|
+
c,
|
|
969
|
+
rng: CharRange,
|
|
970
|
+
spanFrom: Option<Number>,
|
|
971
|
+
mustSpanFrom: Option<Number>,
|
|
972
|
+
) => {
|
|
973
|
+
match (mustSpanFrom) {
|
|
768
974
|
Some(n) => {
|
|
769
975
|
if (n > c) {
|
|
770
976
|
Err(parseErr(buf, "invalid range within square brackets in pattern", 0))
|
|
771
977
|
} else {
|
|
772
|
-
match(rangeAddSpanCaseAware(rng, n, c, buf.config)) {
|
|
978
|
+
match (rangeAddSpanCaseAware(rng, n, c, buf.config)) {
|
|
773
979
|
Err(e) => Err(e),
|
|
774
|
-
Ok(rng) => parseRangeRest(buf, rng, None, None)
|
|
980
|
+
Ok(rng) => parseRangeRest(buf, rng, None, None),
|
|
775
981
|
}
|
|
776
982
|
}
|
|
777
983
|
},
|
|
778
984
|
None => {
|
|
779
|
-
match(rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
985
|
+
match (rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
780
986
|
Err(e) => Err(e),
|
|
781
|
-
Ok(rng) => parseRangeRest(buf, rng, Some(c), None)
|
|
987
|
+
Ok(rng) => parseRangeRest(buf, rng, Some(c), None),
|
|
782
988
|
}
|
|
783
|
-
}
|
|
989
|
+
},
|
|
784
990
|
}
|
|
785
991
|
}
|
|
786
992
|
|
|
@@ -789,170 +995,229 @@ parseRangeRestSpan = (buf: RegExBuf, c, rng, spanFrom: Option<Number>, mustSpanF
|
|
|
789
995
|
let rec parseAtom = (buf: RegExBuf) => {
|
|
790
996
|
match (peek(buf)) {
|
|
791
997
|
Err(e) => Err(e),
|
|
792
|
-
Ok(c) =>
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
998
|
+
Ok(c) =>
|
|
999
|
+
match (c) {
|
|
1000
|
+
'(' => {
|
|
1001
|
+
if (!moreN(buf, 1)) {
|
|
1002
|
+
Err(parseErr(buf, "Parentheses not closed", 1))
|
|
1003
|
+
} else if (peekN(buf, 1) == Ok('?')) {
|
|
1004
|
+
// fancy group
|
|
1005
|
+
if (!moreN(buf, 2)) {
|
|
1006
|
+
Err(parseErr(buf, "Parentheses not closed", 2))
|
|
1007
|
+
} else {
|
|
1008
|
+
match (peekN(buf, 2)) {
|
|
1009
|
+
Err(e) => Err(e),
|
|
1010
|
+
Ok('>') => {
|
|
1011
|
+
// cut
|
|
1012
|
+
ignore(eat(buf, '('))
|
|
1013
|
+
ignore(eat(buf, '?'))
|
|
1014
|
+
ignore(eat(buf, '>'))
|
|
1015
|
+
let preNumGroups = unbox(buf.config.groupNumber)
|
|
1016
|
+
match (parseRegex(buf)) {
|
|
1017
|
+
Err(e) => Err(e),
|
|
1018
|
+
Ok(rx) => {
|
|
1019
|
+
let postNumGroups = unbox(buf.config.groupNumber)
|
|
1020
|
+
match (eat(buf, ')')) {
|
|
1021
|
+
Err(e) => Err(e),
|
|
1022
|
+
Ok(_) =>
|
|
1023
|
+
Ok(
|
|
1024
|
+
makeRECut(
|
|
1025
|
+
rx,
|
|
1026
|
+
preNumGroups,
|
|
1027
|
+
postNumGroups - preNumGroups
|
|
1028
|
+
)
|
|
1029
|
+
),
|
|
1030
|
+
}
|
|
1031
|
+
},
|
|
817
1032
|
}
|
|
818
|
-
}
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
1033
|
+
},
|
|
1034
|
+
Ok('(') => {
|
|
1035
|
+
// conditional
|
|
1036
|
+
ignore(eat(buf, '('))
|
|
1037
|
+
ignore(eat(buf, '?'))
|
|
1038
|
+
ignore(eat(buf, '('))
|
|
1039
|
+
let tstPreNumGroups = unbox(buf.config.groupNumber)
|
|
1040
|
+
match (parseTest(buf)) {
|
|
1041
|
+
Err(e) => Err(e),
|
|
1042
|
+
Ok(test) => {
|
|
1043
|
+
let tstSpanNumGroups = unbox(buf.config.groupNumber) -
|
|
1044
|
+
tstPreNumGroups
|
|
1045
|
+
match (parsePCEs(buf, false)) {
|
|
1046
|
+
Err(e) => Err(e),
|
|
1047
|
+
Ok(pces) => {
|
|
1048
|
+
if (!more(buf)) {
|
|
1049
|
+
Err(parseErr(buf, "Parentheses not closed", 0))
|
|
1050
|
+
} else {
|
|
1051
|
+
match (peek(buf)) {
|
|
1052
|
+
Err(e) => Err(e),
|
|
1053
|
+
Ok('|') => {
|
|
1054
|
+
ignore(eat(buf, '|'))
|
|
1055
|
+
match (parsePCEs(buf, false)) {
|
|
1056
|
+
Err(e) => Err(e),
|
|
1057
|
+
Ok(pces2) => {
|
|
1058
|
+
match (peek(buf)) {
|
|
1059
|
+
Err(_) =>
|
|
1060
|
+
Err(
|
|
1061
|
+
parseErr(
|
|
1062
|
+
buf,
|
|
1063
|
+
"Parentheses not closed",
|
|
1064
|
+
0
|
|
1065
|
+
)
|
|
1066
|
+
),
|
|
1067
|
+
Ok(_) => {
|
|
1068
|
+
ignore(eat(buf, ')'))
|
|
1069
|
+
Ok(
|
|
1070
|
+
makeREConditional(
|
|
1071
|
+
test,
|
|
1072
|
+
makeRESequence(pces),
|
|
1073
|
+
Some(makeRESequence(pces2)),
|
|
1074
|
+
tstPreNumGroups,
|
|
1075
|
+
tstSpanNumGroups
|
|
1076
|
+
)
|
|
1077
|
+
)
|
|
1078
|
+
},
|
|
848
1079
|
}
|
|
849
|
-
}
|
|
1080
|
+
},
|
|
850
1081
|
}
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
1082
|
+
},
|
|
1083
|
+
Ok(')') => {
|
|
1084
|
+
ignore(eat(buf, ')'))
|
|
1085
|
+
Ok(
|
|
1086
|
+
makeREConditional(
|
|
1087
|
+
test,
|
|
1088
|
+
makeRESequence(pces),
|
|
1089
|
+
None,
|
|
1090
|
+
tstPreNumGroups,
|
|
1091
|
+
tstSpanNumGroups
|
|
1092
|
+
)
|
|
1093
|
+
)
|
|
1094
|
+
},
|
|
1095
|
+
Ok(_) => {
|
|
1096
|
+
Err(
|
|
1097
|
+
parseErr(buf, "Failed to parse condition", 0)
|
|
1098
|
+
)
|
|
1099
|
+
},
|
|
859
1100
|
}
|
|
860
1101
|
}
|
|
861
|
-
}
|
|
1102
|
+
},
|
|
862
1103
|
}
|
|
863
|
-
}
|
|
1104
|
+
},
|
|
864
1105
|
}
|
|
865
|
-
}
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
1106
|
+
},
|
|
1107
|
+
Ok(c) when (
|
|
1108
|
+
c == 'i' || c == 's' || c == 'm' || c == '-' || c == ':'
|
|
1109
|
+
) => {
|
|
1110
|
+
// match with mode
|
|
1111
|
+
ignore(eat(buf, '('))
|
|
1112
|
+
ignore(eat(buf, '?'))
|
|
1113
|
+
match (parseMode(buf)) {
|
|
1114
|
+
Err(e) => Err(e),
|
|
1115
|
+
Ok(config) => {
|
|
1116
|
+
if (!more(buf)) {
|
|
1117
|
+
Err(parseErr(buf, "Parentheses not closed", 0))
|
|
1118
|
+
} else {
|
|
1119
|
+
match (peek(buf)) {
|
|
1120
|
+
Err(e) => Err(e),
|
|
1121
|
+
Ok(':') => {
|
|
1122
|
+
ignore(eat(buf, ':'))
|
|
1123
|
+
match (parseRegex(withConfig(buf, config))) {
|
|
1124
|
+
Err(e) => Err(e),
|
|
1125
|
+
Ok(rx) => {
|
|
1126
|
+
match (eat(buf, ')')) {
|
|
1127
|
+
Err(e) => Err(e),
|
|
1128
|
+
Ok(_) => Ok(rx),
|
|
1129
|
+
}
|
|
1130
|
+
},
|
|
888
1131
|
}
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
1132
|
+
},
|
|
1133
|
+
Ok(_) => {
|
|
1134
|
+
Err(
|
|
1135
|
+
parseErr(
|
|
1136
|
+
buf,
|
|
1137
|
+
"expected `:` or another mode after `(?` and a mode sequence; a mode is `i`, `-i`, `m`, `-m`, `s`, or `-s`",
|
|
1138
|
+
0
|
|
1139
|
+
)
|
|
1140
|
+
)
|
|
1141
|
+
},
|
|
893
1142
|
}
|
|
894
1143
|
}
|
|
895
|
-
}
|
|
1144
|
+
},
|
|
896
1145
|
}
|
|
1146
|
+
},
|
|
1147
|
+
Ok(_) => {
|
|
1148
|
+
ignore(eat(buf, '('))
|
|
1149
|
+
ignore(eat(buf, '?'))
|
|
1150
|
+
parseLook(buf)
|
|
1151
|
+
},
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
} else {
|
|
1155
|
+
// simple group
|
|
1156
|
+
ignore(eat(buf, '('))
|
|
1157
|
+
let groupNum = unbox(buf.config.groupNumber)
|
|
1158
|
+
// Note that this inc operation is side-effecting
|
|
1159
|
+
match (parseRegex(
|
|
1160
|
+
withConfig(buf, configIncGroupNumber(buf.config))
|
|
1161
|
+
)) {
|
|
1162
|
+
Err(e) => Err(e),
|
|
1163
|
+
Ok(r) => {
|
|
1164
|
+
match (eat(buf, ')')) {
|
|
1165
|
+
Err(e) => Err(e),
|
|
1166
|
+
Ok(_) => Ok(REGroup(r, groupNum)),
|
|
897
1167
|
}
|
|
898
1168
|
},
|
|
899
|
-
Ok(_) => {
|
|
900
|
-
ignore(eat(buf, '('))
|
|
901
|
-
ignore(eat(buf, '?'))
|
|
902
|
-
parseLook(buf)
|
|
903
|
-
},
|
|
904
1169
|
}
|
|
905
1170
|
}
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
ignore(eat(buf, '
|
|
909
|
-
|
|
910
|
-
// Note that this inc operation is side-effecting
|
|
911
|
-
match(parseRegex(withConfig(buf, configIncGroupNumber(buf.config)))) {
|
|
1171
|
+
},
|
|
1172
|
+
'[' => {
|
|
1173
|
+
ignore(eat(buf, '['))
|
|
1174
|
+
match (parseRangeNot(buf)) {
|
|
912
1175
|
Err(e) => Err(e),
|
|
913
|
-
Ok(
|
|
914
|
-
match(eat(buf, ')')) {
|
|
915
|
-
Err(e) => Err(e),
|
|
916
|
-
Ok(_) => Ok(REGroup(r, groupNum))
|
|
917
|
-
}
|
|
918
|
-
}
|
|
1176
|
+
Ok(rng) => Ok(makeRERange(rng, rangeLimit)),
|
|
919
1177
|
}
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
}
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
1178
|
+
},
|
|
1179
|
+
'.' => {
|
|
1180
|
+
ignore(eat(buf, '.'))
|
|
1181
|
+
if (buf.config.multiline) {
|
|
1182
|
+
// if in multiline mode, '.' matches everything but \n
|
|
1183
|
+
Ok(
|
|
1184
|
+
makeRERange(
|
|
1185
|
+
rangeInvert(rangeAdd([], Char.code('\n')), rangeLimit),
|
|
1186
|
+
rangeLimit
|
|
1187
|
+
)
|
|
1188
|
+
)
|
|
1189
|
+
} else {
|
|
1190
|
+
Ok(REAny)
|
|
1191
|
+
}
|
|
1192
|
+
},
|
|
1193
|
+
'^' => {
|
|
1194
|
+
ignore(eat(buf, '^'))
|
|
1195
|
+
Ok(
|
|
1196
|
+
if (buf.config.multiline) {
|
|
1197
|
+
RELineStart
|
|
1198
|
+
} else {
|
|
1199
|
+
REStart
|
|
1200
|
+
}
|
|
1201
|
+
)
|
|
1202
|
+
},
|
|
1203
|
+
'$' => {
|
|
1204
|
+
ignore(eat(buf, '$'))
|
|
1205
|
+
Ok(
|
|
1206
|
+
if (buf.config.multiline) {
|
|
1207
|
+
RELineEnd
|
|
1208
|
+
} else {
|
|
1209
|
+
REEnd
|
|
1210
|
+
}
|
|
1211
|
+
)
|
|
1212
|
+
},
|
|
1213
|
+
_ => parseLiteral(buf),
|
|
945
1214
|
},
|
|
946
|
-
_ => parseLiteral(buf)
|
|
947
|
-
}
|
|
948
1215
|
}
|
|
949
|
-
},
|
|
950
|
-
|
|
951
|
-
parseLook = (buf: RegExBuf) => {
|
|
1216
|
+
}, parseLook = (buf: RegExBuf) => {
|
|
952
1217
|
let preNumGroups = unbox(buf.config.groupNumber)
|
|
953
1218
|
let spanNumGroups = () => unbox(buf.config.groupNumber) - preNumGroups
|
|
954
1219
|
// (isMatch, isAhead)
|
|
955
|
-
let flags = match(peek(buf)) {
|
|
1220
|
+
let flags = match (peek(buf)) {
|
|
956
1221
|
Err(e) => Err(e),
|
|
957
1222
|
Ok('=') => {
|
|
958
1223
|
ignore(eat(buf, '='))
|
|
@@ -967,7 +1232,7 @@ parseLook = (buf: RegExBuf) => {
|
|
|
967
1232
|
if (!more(buf)) {
|
|
968
1233
|
Err(parseErr(buf, "Unterminated look sequence", 0))
|
|
969
1234
|
} else {
|
|
970
|
-
match(peek(buf)) {
|
|
1235
|
+
match (peek(buf)) {
|
|
971
1236
|
Err(e) => Err(e),
|
|
972
1237
|
Ok('=') => {
|
|
973
1238
|
ignore(eat(buf, '='))
|
|
@@ -977,91 +1242,103 @@ parseLook = (buf: RegExBuf) => {
|
|
|
977
1242
|
ignore(eat(buf, '!'))
|
|
978
1243
|
Ok((false, false))
|
|
979
1244
|
},
|
|
980
|
-
Ok(_) => Err(parseErr(buf, "Invalid look sequence", 0))
|
|
1245
|
+
Ok(_) => Err(parseErr(buf, "Invalid look sequence", 0)),
|
|
981
1246
|
}
|
|
982
1247
|
}
|
|
983
1248
|
},
|
|
984
1249
|
Ok(_) => {
|
|
985
1250
|
Err(parseErr(buf, "Invalid look sequence", 0))
|
|
986
|
-
}
|
|
1251
|
+
},
|
|
987
1252
|
}
|
|
988
|
-
match(flags) {
|
|
1253
|
+
match (flags) {
|
|
989
1254
|
Err(e) => Err(e),
|
|
990
1255
|
Ok((isMatch, isAhead)) => {
|
|
991
|
-
match(parseRegex(buf)) {
|
|
1256
|
+
match (parseRegex(buf)) {
|
|
992
1257
|
Err(e) => Err(e),
|
|
993
1258
|
Ok(rx) => {
|
|
994
|
-
match(eat(buf, ')')) {
|
|
1259
|
+
match (eat(buf, ')')) {
|
|
995
1260
|
Err(e) => Err(e),
|
|
996
1261
|
Ok(_) => {
|
|
997
1262
|
if (isAhead) {
|
|
998
1263
|
Ok(RELookahead(rx, isMatch, preNumGroups, spanNumGroups()))
|
|
999
1264
|
} else {
|
|
1000
|
-
Ok(
|
|
1265
|
+
Ok(
|
|
1266
|
+
RELookbehind(
|
|
1267
|
+
rx,
|
|
1268
|
+
isMatch,
|
|
1269
|
+
box(0),
|
|
1270
|
+
box(0),
|
|
1271
|
+
preNumGroups,
|
|
1272
|
+
spanNumGroups()
|
|
1273
|
+
)
|
|
1274
|
+
)
|
|
1001
1275
|
}
|
|
1002
|
-
}
|
|
1276
|
+
},
|
|
1003
1277
|
}
|
|
1004
|
-
}
|
|
1278
|
+
},
|
|
1005
1279
|
}
|
|
1006
|
-
}
|
|
1280
|
+
},
|
|
1007
1281
|
}
|
|
1008
|
-
},
|
|
1009
|
-
|
|
1010
|
-
parseTest = (buf: RegExBuf) => {
|
|
1282
|
+
}, parseTest = (buf: RegExBuf) => {
|
|
1011
1283
|
if (!more(buf)) {
|
|
1012
1284
|
Err(parseErr(buf, "Expected test", 0))
|
|
1013
1285
|
} else {
|
|
1014
|
-
match(peek(buf)) {
|
|
1286
|
+
match (peek(buf)) {
|
|
1015
1287
|
Err(e) => Err(e),
|
|
1016
1288
|
Ok('?') => {
|
|
1017
1289
|
ignore(eat(buf, '?'))
|
|
1018
1290
|
parseLook(buf)
|
|
1019
1291
|
},
|
|
1020
|
-
Ok(c) when (
|
|
1292
|
+
Ok(c) when (
|
|
1293
|
+
Char.code(c) >= Char.code('0') && Char.code(c) <= Char.code('9')
|
|
1294
|
+
) => {
|
|
1021
1295
|
buf.config.references := true
|
|
1022
1296
|
let curPos = unbox(buf.cursor)
|
|
1023
|
-
match(parseInteger(buf, 0)) {
|
|
1297
|
+
match (parseInteger(buf, 0)) {
|
|
1024
1298
|
Err(e) => Err(e),
|
|
1025
1299
|
Ok(n) => {
|
|
1026
1300
|
if (unbox(buf.cursor) == curPos) {
|
|
1027
|
-
Err(
|
|
1301
|
+
Err(
|
|
1302
|
+
parseErr(buf, "expected `)` after `(?(` followed by digits", 0)
|
|
1303
|
+
)
|
|
1028
1304
|
} else {
|
|
1029
|
-
match(eat(buf, ')')) {
|
|
1305
|
+
match (eat(buf, ')')) {
|
|
1030
1306
|
Err(e) => Err(e),
|
|
1031
|
-
Ok(_) => Ok(REReference(n, false))
|
|
1307
|
+
Ok(_) => Ok(REReference(n, false)),
|
|
1032
1308
|
}
|
|
1033
1309
|
}
|
|
1034
|
-
}
|
|
1310
|
+
},
|
|
1035
1311
|
}
|
|
1036
1312
|
},
|
|
1037
|
-
Ok(_) =>
|
|
1313
|
+
Ok(_) =>
|
|
1314
|
+
Err(
|
|
1315
|
+
parseErr(buf, "expected `(?=`, `(?!`, `(?<`, or digit after `(?(`", 0)
|
|
1316
|
+
),
|
|
1038
1317
|
}
|
|
1039
1318
|
}
|
|
1040
|
-
},
|
|
1041
|
-
|
|
1042
|
-
parseInteger = (buf: RegExBuf, n) => {
|
|
1319
|
+
}, parseInteger = (buf: RegExBuf, n) => {
|
|
1043
1320
|
if (!more(buf)) {
|
|
1044
1321
|
Ok(n)
|
|
1045
1322
|
} else {
|
|
1046
|
-
match(peek(buf)) {
|
|
1323
|
+
match (peek(buf)) {
|
|
1047
1324
|
Err(c) => Err(c),
|
|
1048
|
-
Ok(c) when (
|
|
1325
|
+
Ok(c) when (
|
|
1326
|
+
Char.code(c) >= Char.code('0') && Char.code(c) <= Char.code('9')
|
|
1327
|
+
) => {
|
|
1049
1328
|
ignore(next(buf))
|
|
1050
|
-
parseInteger(buf,
|
|
1329
|
+
parseInteger(buf, 10 * n + (Char.code(c) - Char.code('0')))
|
|
1051
1330
|
},
|
|
1052
|
-
Ok(_) => Ok(n)
|
|
1331
|
+
Ok(_) => Ok(n),
|
|
1053
1332
|
}
|
|
1054
1333
|
}
|
|
1055
|
-
},
|
|
1056
|
-
|
|
1057
|
-
parseMode = (buf: RegExBuf) => {
|
|
1334
|
+
}, parseMode = (buf: RegExBuf) => {
|
|
1058
1335
|
let processState = ((cs, ml)) => {
|
|
1059
|
-
let withCs = match(cs) {
|
|
1336
|
+
let withCs = match (cs) {
|
|
1060
1337
|
None => buf.config,
|
|
1061
1338
|
Some(true) => configWithCaseSensitive(buf.config, true),
|
|
1062
1339
|
Some(_) => configWithCaseSensitive(buf.config, false),
|
|
1063
1340
|
}
|
|
1064
|
-
match(ml) {
|
|
1341
|
+
match (ml) {
|
|
1065
1342
|
None => withCs,
|
|
1066
1343
|
Some(true) => configWithMultiLine(withCs, true),
|
|
1067
1344
|
Some(_) => configWithMultiLine(withCs, false),
|
|
@@ -1071,7 +1348,7 @@ parseMode = (buf: RegExBuf) => {
|
|
|
1071
1348
|
if (!more(buf)) {
|
|
1072
1349
|
Ok(processState((cs, ml)))
|
|
1073
1350
|
} else {
|
|
1074
|
-
match(peek(buf)) {
|
|
1351
|
+
match (peek(buf)) {
|
|
1075
1352
|
Err(e) => Err(e),
|
|
1076
1353
|
Ok('i') => {
|
|
1077
1354
|
ignore(eat(buf, 'i'))
|
|
@@ -1090,7 +1367,7 @@ parseMode = (buf: RegExBuf) => {
|
|
|
1090
1367
|
if (!more(buf)) {
|
|
1091
1368
|
Ok(processState((cs, ml)))
|
|
1092
1369
|
} else {
|
|
1093
|
-
match(peek(buf)) {
|
|
1370
|
+
match (peek(buf)) {
|
|
1094
1371
|
Err(e) => Err(e),
|
|
1095
1372
|
Ok('i') => {
|
|
1096
1373
|
ignore(eat(buf, 'i'))
|
|
@@ -1104,32 +1381,33 @@ parseMode = (buf: RegExBuf) => {
|
|
|
1104
1381
|
ignore(eat(buf, 'm'))
|
|
1105
1382
|
help((cs, Some(false)))
|
|
1106
1383
|
},
|
|
1107
|
-
_ => Ok(processState((cs, ml)))
|
|
1384
|
+
_ => Ok(processState((cs, ml))),
|
|
1108
1385
|
}
|
|
1109
1386
|
}
|
|
1110
1387
|
},
|
|
1111
|
-
_ => Ok(processState((cs, ml)))
|
|
1388
|
+
_ => Ok(processState((cs, ml))),
|
|
1112
1389
|
}
|
|
1113
1390
|
}
|
|
1114
1391
|
}
|
|
1115
1392
|
help((None, None))
|
|
1116
|
-
},
|
|
1117
|
-
|
|
1118
|
-
parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
1393
|
+
}, parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
1119
1394
|
if (!more(buf)) {
|
|
1120
1395
|
Err(parseErr(buf, "Expected unicode category", 0))
|
|
1121
1396
|
} else {
|
|
1122
|
-
match(peek(buf)) {
|
|
1397
|
+
match (peek(buf)) {
|
|
1123
1398
|
Err(e) => Err(e),
|
|
1124
1399
|
Ok('{') => {
|
|
1125
1400
|
ignore(eat(buf, '{'))
|
|
1126
1401
|
let catNegated = if (peek(buf) == Ok('^')) {
|
|
1127
1402
|
ignore(eat(buf, '^'))
|
|
1128
1403
|
true
|
|
1129
|
-
} else
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1404
|
+
} else {
|
|
1405
|
+
false
|
|
1406
|
+
}
|
|
1407
|
+
let rec loop = acc => {
|
|
1408
|
+
match (peek(buf)) {
|
|
1409
|
+
Err(e) =>
|
|
1410
|
+
Err(parseErr(buf, "Missing `}` to close `\\" ++ pC ++ "`", 0)),
|
|
1133
1411
|
Ok('}') => {
|
|
1134
1412
|
ignore(eat(buf, '}'))
|
|
1135
1413
|
Ok(List.join("", List.reverse(acc)))
|
|
@@ -1137,22 +1415,39 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1137
1415
|
Ok(c) => {
|
|
1138
1416
|
ignore(eat(buf, c))
|
|
1139
1417
|
loop([Char.toString(c), ...acc])
|
|
1140
|
-
}
|
|
1418
|
+
},
|
|
1141
1419
|
}
|
|
1142
1420
|
}
|
|
1143
|
-
let lst = match(loop([])) {
|
|
1421
|
+
let lst = match (loop([])) {
|
|
1144
1422
|
Err(e) => Err(e),
|
|
1145
1423
|
Ok(s) => {
|
|
1146
1424
|
// In case anyone is curious where these codes originate from:
|
|
1147
1425
|
// https://www.unicode.org/reports/tr44/#General_Category_Values
|
|
1148
|
-
match(s) {
|
|
1426
|
+
match (s) {
|
|
1149
1427
|
"Ll" => Ok([LetterLowercase]),
|
|
1150
1428
|
"Lu" => Ok([LetterUppercase]),
|
|
1151
1429
|
"Lt" => Ok([LetterTitlecase]),
|
|
1152
1430
|
"Lm" => Ok([LetterModifier]),
|
|
1153
|
-
"L&" =>
|
|
1431
|
+
"L&" =>
|
|
1432
|
+
Ok(
|
|
1433
|
+
[
|
|
1434
|
+
LetterLowercase,
|
|
1435
|
+
LetterUppercase,
|
|
1436
|
+
LetterTitlecase,
|
|
1437
|
+
LetterModifier,
|
|
1438
|
+
]
|
|
1439
|
+
),
|
|
1154
1440
|
"Lo" => Ok([LetterOther]),
|
|
1155
|
-
"L" =>
|
|
1441
|
+
"L" =>
|
|
1442
|
+
Ok(
|
|
1443
|
+
[
|
|
1444
|
+
LetterLowercase,
|
|
1445
|
+
LetterUppercase,
|
|
1446
|
+
LetterTitlecase,
|
|
1447
|
+
LetterModifier,
|
|
1448
|
+
LetterOther,
|
|
1449
|
+
]
|
|
1450
|
+
),
|
|
1156
1451
|
"Nd" => Ok([NumberDecimalDigit]),
|
|
1157
1452
|
"Nl" => Ok([NumberLetter]),
|
|
1158
1453
|
"No" => Ok([NumberOther]),
|
|
@@ -1164,7 +1459,18 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1164
1459
|
"Pc" => Ok([PunctuationConnector]),
|
|
1165
1460
|
"Pd" => Ok([PunctuationDash]),
|
|
1166
1461
|
"Po" => Ok([PunctuationOther]),
|
|
1167
|
-
"P" =>
|
|
1462
|
+
"P" =>
|
|
1463
|
+
Ok(
|
|
1464
|
+
[
|
|
1465
|
+
PunctuationOpen,
|
|
1466
|
+
PunctuationClose,
|
|
1467
|
+
PunctuationInitialQuote,
|
|
1468
|
+
PunctuationFinalQuote,
|
|
1469
|
+
PunctuationConnector,
|
|
1470
|
+
PunctuationDash,
|
|
1471
|
+
PunctuationOther,
|
|
1472
|
+
]
|
|
1473
|
+
),
|
|
1168
1474
|
"Mn" => Ok([MarkNonSpacing]),
|
|
1169
1475
|
"Mc" => Ok([MarkSpacingCombining]),
|
|
1170
1476
|
"Me" => Ok([MarkEnclosing]),
|
|
@@ -1173,7 +1479,8 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1173
1479
|
"Sk" => Ok([SymbolModifier]),
|
|
1174
1480
|
"Sm" => Ok([SymbolMath]),
|
|
1175
1481
|
"So" => Ok([SymbolOther]),
|
|
1176
|
-
"S" =>
|
|
1482
|
+
"S" =>
|
|
1483
|
+
Ok([SymbolCurrency, SymbolModifier, SymbolMath, SymbolOther]),
|
|
1177
1484
|
"Zl" => Ok([SeparatorLine]),
|
|
1178
1485
|
"Zp" => Ok([SeparatorParagraph]),
|
|
1179
1486
|
"Zs" => Ok([SeparatorSpace]),
|
|
@@ -1183,46 +1490,94 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1183
1490
|
"Cs" => Ok([OtherSurrogate]),
|
|
1184
1491
|
"Cn" => Ok([OtherNotAssigned]),
|
|
1185
1492
|
"Co" => Ok([OtherPrivateUse]),
|
|
1186
|
-
"C" =>
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1493
|
+
"C" =>
|
|
1494
|
+
Ok(
|
|
1495
|
+
[
|
|
1496
|
+
OtherControl,
|
|
1497
|
+
OtherFormat,
|
|
1498
|
+
OtherSurrogate,
|
|
1499
|
+
OtherNotAssigned,
|
|
1500
|
+
OtherPrivateUse,
|
|
1501
|
+
]
|
|
1502
|
+
),
|
|
1503
|
+
"." =>
|
|
1504
|
+
Ok(
|
|
1505
|
+
[
|
|
1506
|
+
LetterLowercase,
|
|
1507
|
+
LetterUppercase,
|
|
1508
|
+
LetterTitlecase,
|
|
1509
|
+
LetterModifier,
|
|
1510
|
+
LetterOther,
|
|
1511
|
+
NumberDecimalDigit,
|
|
1512
|
+
NumberLetter,
|
|
1513
|
+
NumberOther,
|
|
1514
|
+
PunctuationOpen,
|
|
1515
|
+
PunctuationClose,
|
|
1516
|
+
PunctuationInitialQuote,
|
|
1517
|
+
PunctuationFinalQuote,
|
|
1518
|
+
PunctuationConnector,
|
|
1519
|
+
PunctuationDash,
|
|
1520
|
+
PunctuationOther,
|
|
1521
|
+
MarkNonSpacing,
|
|
1522
|
+
MarkSpacingCombining,
|
|
1523
|
+
MarkEnclosing,
|
|
1524
|
+
SymbolCurrency,
|
|
1525
|
+
SymbolModifier,
|
|
1526
|
+
SymbolMath,
|
|
1527
|
+
SymbolOther,
|
|
1528
|
+
SeparatorLine,
|
|
1529
|
+
SeparatorParagraph,
|
|
1530
|
+
SeparatorSpace,
|
|
1531
|
+
OtherControl,
|
|
1532
|
+
OtherFormat,
|
|
1533
|
+
OtherSurrogate,
|
|
1534
|
+
OtherNotAssigned,
|
|
1535
|
+
OtherPrivateUse,
|
|
1536
|
+
]
|
|
1537
|
+
),
|
|
1538
|
+
s =>
|
|
1539
|
+
Err(
|
|
1540
|
+
parseErr(
|
|
1541
|
+
buf,
|
|
1542
|
+
"Unrecognized property name in `\\" ++
|
|
1543
|
+
pC ++
|
|
1544
|
+
"`: `" ++
|
|
1545
|
+
s ++
|
|
1546
|
+
"`",
|
|
1547
|
+
0
|
|
1548
|
+
)
|
|
1549
|
+
),
|
|
1197
1550
|
}
|
|
1198
|
-
}
|
|
1551
|
+
},
|
|
1199
1552
|
}
|
|
1200
|
-
match(lst) {
|
|
1553
|
+
match (lst) {
|
|
1201
1554
|
Err(e) => Err(e),
|
|
1202
|
-
Ok(l) => Ok((l, catNegated))
|
|
1555
|
+
Ok(l) => Ok((l, catNegated)),
|
|
1203
1556
|
}
|
|
1204
1557
|
},
|
|
1205
|
-
Ok(_) => Err(parseErr(buf, "Expected `{` after `\\" ++ pC ++ "`", 0))
|
|
1558
|
+
Ok(_) => Err(parseErr(buf, "Expected `{` after `\\" ++ pC ++ "`", 0)),
|
|
1206
1559
|
}
|
|
1207
1560
|
}
|
|
1208
|
-
},
|
|
1209
|
-
|
|
1210
|
-
parseLiteral = (buf: RegExBuf) => {
|
|
1561
|
+
}, parseLiteral = (buf: RegExBuf) => {
|
|
1211
1562
|
if (!more(buf)) {
|
|
1212
1563
|
Err(parseErr(buf, "Expected literal", 0))
|
|
1213
1564
|
} else {
|
|
1214
|
-
match(peek(buf)) {
|
|
1565
|
+
match (peek(buf)) {
|
|
1215
1566
|
Err(e) => Err(e),
|
|
1216
1567
|
Ok('*') => Err(parseErr(buf, "`*` follows nothing in pattern", 0)),
|
|
1217
1568
|
Ok('+') => Err(parseErr(buf, "`+` follows nothing in pattern", 0)),
|
|
1218
1569
|
Ok('?') => Err(parseErr(buf, "`?` follows nothing in pattern", 0)),
|
|
1219
|
-
Ok('{') when buf.config.isPerlRegExp =>
|
|
1570
|
+
Ok('{') when buf.config.isPerlRegExp =>
|
|
1571
|
+
Err(parseErr(buf, "`{` follows nothing in pattern", 0)),
|
|
1220
1572
|
Ok('\\') => {
|
|
1221
1573
|
ignore(eat(buf, '\\'))
|
|
1222
1574
|
parseBackslashLiteral(buf)
|
|
1223
1575
|
},
|
|
1224
1576
|
Ok(')') => Err(parseErr(buf, "Unmatched `)` in pattern", 0)),
|
|
1225
|
-
Ok(c) when
|
|
1577
|
+
Ok(c) when buf.config.isPerlRegExp && (c == ']' || c == '}') =>
|
|
1578
|
+
Err(
|
|
1579
|
+
parseErr(buf, "unmatched `" ++ Char.toString(c) ++ "` in pattern", 0)
|
|
1580
|
+
),
|
|
1226
1581
|
// [TODO] case-insensitive (#691)
|
|
1227
1582
|
Ok(c) when buf.config.caseSensitive => {
|
|
1228
1583
|
ignore(next(buf))
|
|
@@ -1230,46 +1585,52 @@ parseLiteral = (buf: RegExBuf) => {
|
|
|
1230
1585
|
},
|
|
1231
1586
|
Ok(c) => {
|
|
1232
1587
|
ignore(next(buf))
|
|
1233
|
-
match(rangeAddCaseAware([], Some(Char.code(c)), buf.config)) {
|
|
1588
|
+
match (rangeAddCaseAware([], Some(Char.code(c)), buf.config)) {
|
|
1234
1589
|
Ok(rng) => Ok(makeRERange(rng, rangeLimit)),
|
|
1235
|
-
Err(e) => Err(e)
|
|
1590
|
+
Err(e) => Err(e),
|
|
1236
1591
|
}
|
|
1237
|
-
}
|
|
1592
|
+
},
|
|
1238
1593
|
}
|
|
1239
1594
|
}
|
|
1240
|
-
},
|
|
1241
|
-
|
|
1242
|
-
parseBackslashLiteral = (buf: RegExBuf) => {
|
|
1595
|
+
}, parseBackslashLiteral = (buf: RegExBuf) => {
|
|
1243
1596
|
if (!more(buf)) {
|
|
1244
1597
|
// Special case: EOS after backslash matches null
|
|
1245
1598
|
Err(parseErr(buf, "Expected to find escaped value after backslash", 0))
|
|
1246
1599
|
} else {
|
|
1247
|
-
match(peek(buf)) {
|
|
1600
|
+
match (peek(buf)) {
|
|
1248
1601
|
Err(e) => Err(e),
|
|
1249
1602
|
// pregexp:
|
|
1250
|
-
Ok(c) when (
|
|
1603
|
+
Ok(c) when (
|
|
1604
|
+
buf.config.isPerlRegExp &&
|
|
1605
|
+
(Char.code(c) >= Char.code('0') &&
|
|
1606
|
+
Char.code(c) <= Char.code('9'))
|
|
1607
|
+
) => {
|
|
1251
1608
|
buf.config.references := true
|
|
1252
|
-
match(parseInteger(buf, 0)) {
|
|
1609
|
+
match (parseInteger(buf, 0)) {
|
|
1253
1610
|
Err(e) => Err(e),
|
|
1254
1611
|
Ok(n) => {
|
|
1255
1612
|
Ok(REReference(n, buf.config.caseSensitive))
|
|
1256
|
-
}
|
|
1613
|
+
},
|
|
1257
1614
|
}
|
|
1258
1615
|
},
|
|
1259
|
-
Ok(c) when (
|
|
1260
|
-
|
|
1616
|
+
Ok(c) when (
|
|
1617
|
+
buf.config.isPerlRegExp &&
|
|
1618
|
+
(Char.code(c) >= Char.code('a') && Char.code(c) <= Char.code('z') ||
|
|
1619
|
+
Char.code(c) >= Char.code('A') && Char.code(c) <= Char.code('Z'))
|
|
1620
|
+
) => {
|
|
1621
|
+
match (c) {
|
|
1261
1622
|
'p' => {
|
|
1262
1623
|
ignore(eat(buf, 'p'))
|
|
1263
|
-
match(parseUnicodeCategories(buf, "p")) {
|
|
1624
|
+
match (parseUnicodeCategories(buf, "p")) {
|
|
1264
1625
|
Err(e) => Err(e),
|
|
1265
|
-
Ok((cats, negated)) => Ok(REUnicodeCategories(cats, negated))
|
|
1626
|
+
Ok((cats, negated)) => Ok(REUnicodeCategories(cats, negated)),
|
|
1266
1627
|
}
|
|
1267
1628
|
},
|
|
1268
1629
|
'P' => {
|
|
1269
1630
|
ignore(eat(buf, 'P'))
|
|
1270
|
-
match(parseUnicodeCategories(buf, "P")) {
|
|
1631
|
+
match (parseUnicodeCategories(buf, "P")) {
|
|
1271
1632
|
Err(e) => Err(e),
|
|
1272
|
-
Ok((cats, negated)) => Ok(REUnicodeCategories(cats, !negated))
|
|
1633
|
+
Ok((cats, negated)) => Ok(REUnicodeCategories(cats, !negated)),
|
|
1273
1634
|
}
|
|
1274
1635
|
},
|
|
1275
1636
|
'b' => {
|
|
@@ -1281,39 +1642,37 @@ parseBackslashLiteral = (buf: RegExBuf) => {
|
|
|
1281
1642
|
Ok(RENotWordBoundary)
|
|
1282
1643
|
},
|
|
1283
1644
|
_ => {
|
|
1284
|
-
match(parseClass(buf)) {
|
|
1645
|
+
match (parseClass(buf)) {
|
|
1285
1646
|
Err(e) => Err(parseErr(buf, "illegal alphabetic escape", 0)),
|
|
1286
|
-
Ok(rng) => Ok(makeRERange(rng, rangeLimit))
|
|
1647
|
+
Ok(rng) => Ok(makeRERange(rng, rangeLimit)),
|
|
1287
1648
|
}
|
|
1288
|
-
}
|
|
1649
|
+
},
|
|
1289
1650
|
}
|
|
1290
1651
|
},
|
|
1291
1652
|
Ok(c) => {
|
|
1292
1653
|
ignore(next(buf))
|
|
1293
1654
|
Ok(RELiteral(c))
|
|
1294
|
-
}
|
|
1655
|
+
},
|
|
1295
1656
|
}
|
|
1296
1657
|
}
|
|
1297
|
-
},
|
|
1298
|
-
|
|
1299
|
-
parseNonGreedy = (buf: RegExBuf) => {
|
|
1300
|
-
let checkNotNested = (res) => {
|
|
1658
|
+
}, parseNonGreedy = (buf: RegExBuf) => {
|
|
1659
|
+
let checkNotNested = res => {
|
|
1301
1660
|
if (!more(buf)) {
|
|
1302
1661
|
res
|
|
1303
1662
|
} else {
|
|
1304
|
-
match(peek(buf)) {
|
|
1663
|
+
match (peek(buf)) {
|
|
1305
1664
|
Err(e) => Err(e),
|
|
1306
|
-
Ok(c) when
|
|
1665
|
+
Ok(c) when c == '?' || c == '*' || c == '+' => {
|
|
1307
1666
|
Err(parseErr(buf, "nested '" ++ toString(c) ++ "' in pattern", 0))
|
|
1308
1667
|
},
|
|
1309
|
-
Ok(_) => res
|
|
1668
|
+
Ok(_) => res,
|
|
1310
1669
|
}
|
|
1311
1670
|
}
|
|
1312
1671
|
}
|
|
1313
1672
|
if (!more(buf)) {
|
|
1314
1673
|
Ok(false)
|
|
1315
1674
|
} else {
|
|
1316
|
-
match(peek(buf)) {
|
|
1675
|
+
match (peek(buf)) {
|
|
1317
1676
|
Err(e) => Err(e),
|
|
1318
1677
|
Ok('?') => {
|
|
1319
1678
|
ignore(eat(buf, '?'))
|
|
@@ -1322,202 +1681,211 @@ parseNonGreedy = (buf: RegExBuf) => {
|
|
|
1322
1681
|
Ok(_) => checkNotNested(Ok(false)),
|
|
1323
1682
|
}
|
|
1324
1683
|
}
|
|
1325
|
-
},
|
|
1326
|
-
|
|
1327
|
-
parsePCE = (buf: RegExBuf) => {
|
|
1328
|
-
match(parseAtom(buf)) {
|
|
1684
|
+
}, parsePCE = (buf: RegExBuf) => {
|
|
1685
|
+
match (parseAtom(buf)) {
|
|
1329
1686
|
Err(e) => Err(e),
|
|
1330
1687
|
Ok(atom) => {
|
|
1331
1688
|
if (!more(buf)) {
|
|
1332
1689
|
Ok(atom)
|
|
1333
1690
|
} else {
|
|
1334
|
-
match(peek(buf)) {
|
|
1691
|
+
match (peek(buf)) {
|
|
1335
1692
|
Err(e) => Err(e),
|
|
1336
1693
|
Ok('*') => {
|
|
1337
1694
|
ignore(eat(buf, '*'))
|
|
1338
|
-
match(parseNonGreedy(buf)) {
|
|
1695
|
+
match (parseNonGreedy(buf)) {
|
|
1339
1696
|
Err(e) => Err(e),
|
|
1340
|
-
Ok(ng) => Ok(RERepeat(atom, 0, None, ng))
|
|
1697
|
+
Ok(ng) => Ok(RERepeat(atom, 0, None, ng)),
|
|
1341
1698
|
}
|
|
1342
1699
|
},
|
|
1343
1700
|
Ok('+') => {
|
|
1344
1701
|
ignore(eat(buf, '+'))
|
|
1345
|
-
match(parseNonGreedy(buf)) {
|
|
1702
|
+
match (parseNonGreedy(buf)) {
|
|
1346
1703
|
Err(e) => Err(e),
|
|
1347
|
-
Ok(ng) => Ok(RERepeat(atom, 1, None, ng))
|
|
1704
|
+
Ok(ng) => Ok(RERepeat(atom, 1, None, ng)),
|
|
1348
1705
|
}
|
|
1349
1706
|
},
|
|
1350
1707
|
Ok('?') => {
|
|
1351
1708
|
ignore(eat(buf, '?'))
|
|
1352
|
-
match(parseNonGreedy(buf)) {
|
|
1709
|
+
match (parseNonGreedy(buf)) {
|
|
1353
1710
|
Err(e) => Err(e),
|
|
1354
|
-
Ok(ng) => Ok(REMaybe(atom, ng))
|
|
1711
|
+
Ok(ng) => Ok(REMaybe(atom, ng)),
|
|
1355
1712
|
}
|
|
1356
1713
|
},
|
|
1357
1714
|
Ok('{') when buf.config.isPerlRegExp => {
|
|
1358
1715
|
ignore(eat(buf, '{'))
|
|
1359
|
-
match(parseInteger(buf, 0)) {
|
|
1716
|
+
match (parseInteger(buf, 0)) {
|
|
1360
1717
|
Err(e) => Err(e),
|
|
1361
1718
|
Ok(n1) => {
|
|
1362
|
-
match(peek(buf)) {
|
|
1719
|
+
match (peek(buf)) {
|
|
1363
1720
|
Ok(',') => {
|
|
1364
1721
|
ignore(eat(buf, ','))
|
|
1365
1722
|
let curPos = unbox(buf.cursor)
|
|
1366
|
-
match(parseInteger(buf, 0)) {
|
|
1723
|
+
match (parseInteger(buf, 0)) {
|
|
1367
1724
|
Err(e) => Err(e),
|
|
1368
1725
|
Ok(n2) => {
|
|
1369
|
-
match(peek(buf)) {
|
|
1726
|
+
match (peek(buf)) {
|
|
1370
1727
|
Err(e) => Err(e),
|
|
1371
1728
|
Ok('}') => {
|
|
1372
1729
|
// for `{n,}`, we match >= n times, so n2adj should be infinity
|
|
1373
|
-
let n2adj = if (curPos == unbox(buf.cursor)) {
|
|
1730
|
+
let n2adj = if (curPos == unbox(buf.cursor)) {
|
|
1731
|
+
None
|
|
1732
|
+
} else {
|
|
1733
|
+
Some(n2)
|
|
1734
|
+
}
|
|
1374
1735
|
ignore(eat(buf, '}'))
|
|
1375
|
-
match(parseNonGreedy(buf)) {
|
|
1736
|
+
match (parseNonGreedy(buf)) {
|
|
1376
1737
|
Err(e) => Err(e),
|
|
1377
|
-
Ok(ng) => Ok(RERepeat(atom, n1, n2adj, ng))
|
|
1738
|
+
Ok(ng) => Ok(RERepeat(atom, n1, n2adj, ng)),
|
|
1378
1739
|
}
|
|
1379
1740
|
},
|
|
1380
|
-
Ok(_) =>
|
|
1741
|
+
Ok(_) =>
|
|
1742
|
+
Err(
|
|
1743
|
+
parseErr(
|
|
1744
|
+
buf,
|
|
1745
|
+
"expected digit or `}` to end repetition specification started with `{`",
|
|
1746
|
+
0
|
|
1747
|
+
)
|
|
1748
|
+
),
|
|
1381
1749
|
}
|
|
1382
|
-
}
|
|
1750
|
+
},
|
|
1383
1751
|
}
|
|
1384
1752
|
},
|
|
1385
1753
|
Ok('}') => {
|
|
1386
1754
|
ignore(eat(buf, '}'))
|
|
1387
|
-
match(parseNonGreedy(buf)) {
|
|
1755
|
+
match (parseNonGreedy(buf)) {
|
|
1388
1756
|
Err(e) => Err(e),
|
|
1389
1757
|
// match exactly n1 times
|
|
1390
|
-
Ok(ng) => Ok(RERepeat(atom, n1, Some(n1), ng),
|
|
1758
|
+
Ok(ng) => Ok(RERepeat(atom, n1, Some(n1), ng)),
|
|
1391
1759
|
}
|
|
1392
1760
|
},
|
|
1393
|
-
_ =>
|
|
1761
|
+
_ =>
|
|
1762
|
+
Err(
|
|
1763
|
+
parseErr(
|
|
1764
|
+
buf,
|
|
1765
|
+
"expected digit, `,`, or `}' for repetition specification started with `{`",
|
|
1766
|
+
0
|
|
1767
|
+
)
|
|
1768
|
+
),
|
|
1394
1769
|
}
|
|
1395
|
-
}
|
|
1770
|
+
},
|
|
1396
1771
|
}
|
|
1397
1772
|
},
|
|
1398
|
-
Ok(_) => Ok(atom)
|
|
1773
|
+
Ok(_) => Ok(atom),
|
|
1399
1774
|
}
|
|
1400
1775
|
}
|
|
1401
|
-
}
|
|
1776
|
+
},
|
|
1402
1777
|
}
|
|
1403
|
-
},
|
|
1404
|
-
|
|
1405
|
-
parsePCEs = (buf: RegExBuf, toplevel: Bool) => {
|
|
1778
|
+
}, parsePCEs = (buf: RegExBuf, toplevel: Bool) => {
|
|
1406
1779
|
if (!more(buf)) {
|
|
1407
1780
|
Ok([])
|
|
1408
1781
|
} else {
|
|
1409
|
-
match(parsePCE(buf)) {
|
|
1782
|
+
match (parsePCE(buf)) {
|
|
1410
1783
|
Err(e) => Err(e),
|
|
1411
1784
|
Ok(pce) => {
|
|
1412
1785
|
if (!more(buf)) {
|
|
1413
1786
|
Ok([pce])
|
|
1414
1787
|
} else {
|
|
1415
|
-
match(peek(buf)) {
|
|
1788
|
+
match (peek(buf)) {
|
|
1416
1789
|
Err(e) => Err(e),
|
|
1417
1790
|
Ok('|') => Ok([pce]),
|
|
1418
1791
|
Ok(')') when toplevel => Err(parseErr(buf, "Unmatched `)`", 0)),
|
|
1419
1792
|
Ok(')') => Ok([pce]),
|
|
1420
1793
|
Ok(_) => {
|
|
1421
|
-
match(parsePCEs(buf, toplevel)) {
|
|
1794
|
+
match (parsePCEs(buf, toplevel)) {
|
|
1422
1795
|
Err(e) => Err(e),
|
|
1423
|
-
Ok(otherPces) => Ok([pce, ...otherPces])
|
|
1796
|
+
Ok(otherPces) => Ok([pce, ...otherPces]),
|
|
1424
1797
|
}
|
|
1425
|
-
}
|
|
1798
|
+
},
|
|
1426
1799
|
}
|
|
1427
1800
|
}
|
|
1428
|
-
}
|
|
1801
|
+
},
|
|
1429
1802
|
}
|
|
1430
1803
|
}
|
|
1431
|
-
},
|
|
1432
|
-
|
|
1433
|
-
parseRegex = (buf: RegExBuf) => {
|
|
1804
|
+
}, parseRegex = (buf: RegExBuf) => {
|
|
1434
1805
|
if (!more(buf)) {
|
|
1435
1806
|
Ok(REEmpty)
|
|
1436
1807
|
} else {
|
|
1437
|
-
match(peek(buf)) {
|
|
1808
|
+
match (peek(buf)) {
|
|
1438
1809
|
Err(e) => Err(e),
|
|
1439
1810
|
Ok(')') => {
|
|
1440
1811
|
Ok(REEmpty)
|
|
1441
1812
|
},
|
|
1442
1813
|
Ok(_) => {
|
|
1443
|
-
match(parsePCEs(buf, false)) {
|
|
1814
|
+
match (parsePCEs(buf, false)) {
|
|
1444
1815
|
Err(e) => Err(e),
|
|
1445
1816
|
Ok(pces) => {
|
|
1446
1817
|
if (!more(buf)) {
|
|
1447
1818
|
Ok(makeRESequence(pces))
|
|
1448
1819
|
} else {
|
|
1449
|
-
match(peek(buf)) {
|
|
1820
|
+
match (peek(buf)) {
|
|
1450
1821
|
Err(e) => Err(e),
|
|
1451
1822
|
Ok('|') => {
|
|
1452
1823
|
ignore(eat(buf, '|'))
|
|
1453
|
-
match(parseRegex(buf)) {
|
|
1824
|
+
match (parseRegex(buf)) {
|
|
1454
1825
|
Err(e) => Err(e),
|
|
1455
1826
|
Ok(rx2) => {
|
|
1456
1827
|
Ok(makeREAlts(makeRESequence(pces), rx2, rangeLimit))
|
|
1457
|
-
}
|
|
1828
|
+
},
|
|
1458
1829
|
}
|
|
1459
1830
|
},
|
|
1460
|
-
Ok(_) => Ok(makeRESequence(pces))
|
|
1831
|
+
Ok(_) => Ok(makeRESequence(pces)),
|
|
1461
1832
|
}
|
|
1462
1833
|
}
|
|
1463
|
-
}
|
|
1834
|
+
},
|
|
1464
1835
|
}
|
|
1465
|
-
}
|
|
1836
|
+
},
|
|
1466
1837
|
}
|
|
1467
1838
|
}
|
|
1468
|
-
},
|
|
1469
|
-
|
|
1470
|
-
parseRegexNonEmpty = (buf: RegExBuf) => {
|
|
1471
|
-
match(parsePCEs(buf, false)) {
|
|
1839
|
+
}, parseRegexNonEmpty = (buf: RegExBuf) => {
|
|
1840
|
+
match (parsePCEs(buf, false)) {
|
|
1472
1841
|
Err(e) => Err(e),
|
|
1473
1842
|
Ok(pces) => {
|
|
1474
1843
|
if (!more(buf)) {
|
|
1475
1844
|
Ok(makeRESequence(pces))
|
|
1476
1845
|
} else {
|
|
1477
|
-
match(peek(buf)) {
|
|
1846
|
+
match (peek(buf)) {
|
|
1478
1847
|
Err(e) => Err(e),
|
|
1479
1848
|
Ok('|') => {
|
|
1480
1849
|
ignore(eat(buf, '|'))
|
|
1481
|
-
match(parseRegexNonEmpty(buf)) {
|
|
1850
|
+
match (parseRegexNonEmpty(buf)) {
|
|
1482
1851
|
Err(e) => Err(e),
|
|
1483
1852
|
Ok(rx2) => {
|
|
1484
1853
|
Ok(makeREAlts(makeRESequence(pces), rx2, rangeLimit))
|
|
1485
|
-
}
|
|
1854
|
+
},
|
|
1486
1855
|
}
|
|
1487
1856
|
},
|
|
1488
|
-
Ok(_) => Ok(makeRESequence(pces))
|
|
1857
|
+
Ok(_) => Ok(makeRESequence(pces)),
|
|
1489
1858
|
}
|
|
1490
1859
|
}
|
|
1491
|
-
}
|
|
1860
|
+
},
|
|
1492
1861
|
}
|
|
1493
1862
|
}
|
|
1494
1863
|
|
|
1495
1864
|
let parseRegex = (buf: RegExBuf) => {
|
|
1496
|
-
match(parsePCEs(buf, true)) {
|
|
1865
|
+
match (parsePCEs(buf, true)) {
|
|
1497
1866
|
Err(e) => Err(e),
|
|
1498
1867
|
Ok(pces) => {
|
|
1499
1868
|
if (!more(buf)) {
|
|
1500
1869
|
Ok(makeRESequence(pces))
|
|
1501
1870
|
} else {
|
|
1502
|
-
match(peek(buf)) {
|
|
1871
|
+
match (peek(buf)) {
|
|
1503
1872
|
Err(e) => Err(e),
|
|
1504
1873
|
Ok('|') => {
|
|
1505
1874
|
ignore(eat(buf, '|'))
|
|
1506
|
-
match(parseRegex(buf)) {
|
|
1875
|
+
match (parseRegex(buf)) {
|
|
1507
1876
|
Err(e) => Err(e),
|
|
1508
1877
|
Ok(rx2) => {
|
|
1509
1878
|
Ok(makeREAlts(makeRESequence(pces), rx2, rangeLimit))
|
|
1510
|
-
}
|
|
1879
|
+
},
|
|
1511
1880
|
}
|
|
1512
1881
|
},
|
|
1513
|
-
Ok(_) => Ok(makeRESequence(pces))
|
|
1882
|
+
Ok(_) => Ok(makeRESequence(pces)),
|
|
1514
1883
|
}
|
|
1515
1884
|
}
|
|
1516
|
-
}
|
|
1885
|
+
},
|
|
1517
1886
|
}
|
|
1518
1887
|
}
|
|
1519
1888
|
|
|
1520
|
-
|
|
1521
1889
|
/*
|
|
1522
1890
|
|
|
1523
1891
|
REGEX ANALYSIS
|
|
@@ -1534,25 +1902,27 @@ In addition to the parse tree, we take three analyses from Racket:
|
|
|
1534
1902
|
// is-anchored:
|
|
1535
1903
|
|
|
1536
1904
|
let rec isAnchored = (re: ParsedRegularExpression) => {
|
|
1537
|
-
match(re) {
|
|
1905
|
+
match (re) {
|
|
1538
1906
|
REStart => true,
|
|
1539
1907
|
RESequence(lst, _) => {
|
|
1540
|
-
let rec loop =
|
|
1541
|
-
match(lst) {
|
|
1908
|
+
let rec loop = lst => {
|
|
1909
|
+
match (lst) {
|
|
1542
1910
|
[] => false,
|
|
1543
1911
|
[hd, ...tl] => {
|
|
1544
|
-
match(hd) {
|
|
1912
|
+
match (hd) {
|
|
1545
1913
|
RELookahead(_, _, _, _) => loop(tl),
|
|
1546
1914
|
RELookbehind(_, _, _, _, _, _) => loop(tl),
|
|
1547
1915
|
_ => isAnchored(hd),
|
|
1548
1916
|
}
|
|
1549
|
-
}
|
|
1917
|
+
},
|
|
1550
1918
|
}
|
|
1551
1919
|
}
|
|
1552
1920
|
loop(lst)
|
|
1553
1921
|
},
|
|
1554
1922
|
REAlts(a, b) => isAnchored(a) && isAnchored(b),
|
|
1555
|
-
REConditional(_, rx1, rx2, _, _, _) =>
|
|
1923
|
+
REConditional(_, rx1, rx2, _, _, _) =>
|
|
1924
|
+
isAnchored(rx1) &&
|
|
1925
|
+
Option.mapWithDefault(isAnchored, false, rx2),
|
|
1556
1926
|
REGroup(rx, _) => isAnchored(rx),
|
|
1557
1927
|
RECut(rx, _, _, _) => isAnchored(rx),
|
|
1558
1928
|
_ => false,
|
|
@@ -1562,15 +1932,16 @@ let rec isAnchored = (re: ParsedRegularExpression) => {
|
|
|
1562
1932
|
// must-string:
|
|
1563
1933
|
|
|
1564
1934
|
let rec mustString = (re: ParsedRegularExpression) => {
|
|
1565
|
-
match(re) {
|
|
1935
|
+
match (re) {
|
|
1566
1936
|
RELiteral(c) => Some(Char.toString(c)),
|
|
1567
1937
|
RELiteralString(s) => Some(s),
|
|
1568
1938
|
RESequence(pces, _) => {
|
|
1569
1939
|
List.reduce((acc, pce) => {
|
|
1570
|
-
match((mustString(pce), acc)) {
|
|
1940
|
+
match ((mustString(pce), acc)) {
|
|
1571
1941
|
(x, None) => x,
|
|
1572
1942
|
(None, x) => x,
|
|
1573
|
-
(Some(a), Some(b)) when String.length(a) > String.length(b) =>
|
|
1943
|
+
(Some(a), Some(b)) when String.length(a) > String.length(b) =>
|
|
1944
|
+
Some(a),
|
|
1574
1945
|
(Some(a), Some(b)) => Some(b),
|
|
1575
1946
|
}
|
|
1576
1947
|
}, None, pces)
|
|
@@ -1586,14 +1957,14 @@ let rec mustString = (re: ParsedRegularExpression) => {
|
|
|
1586
1957
|
RECut(re, _, _, _) => mustString(re),
|
|
1587
1958
|
RELookahead(re, true, _, _) => mustString(re),
|
|
1588
1959
|
RELookbehind(re, true, _, _, _, _) => mustString(re),
|
|
1589
|
-
_ => None
|
|
1960
|
+
_ => None,
|
|
1590
1961
|
}
|
|
1591
1962
|
}
|
|
1592
1963
|
|
|
1593
1964
|
// start-range
|
|
1594
1965
|
|
|
1595
|
-
let rec zeroSized =
|
|
1596
|
-
match(re) {
|
|
1966
|
+
let rec zeroSized = re => {
|
|
1967
|
+
match (re) {
|
|
1597
1968
|
REEmpty => true,
|
|
1598
1969
|
REStart => true,
|
|
1599
1970
|
RELineStart => true,
|
|
@@ -1607,45 +1978,45 @@ let rec zeroSized = (re) => {
|
|
|
1607
1978
|
}
|
|
1608
1979
|
}
|
|
1609
1980
|
|
|
1610
|
-
let rec startRange =
|
|
1981
|
+
let rec startRange = re => {
|
|
1611
1982
|
match (re) {
|
|
1612
1983
|
RELiteral(c) => Some(rangeAdd([], Char.code(c))),
|
|
1613
1984
|
RELiteralString(s) => Some(rangeAdd([], Char.code(String.charAt(0, s)))),
|
|
1614
1985
|
RESequence(elts, _) => {
|
|
1615
|
-
let rec loop =
|
|
1616
|
-
match(l) {
|
|
1986
|
+
let rec loop = l => {
|
|
1987
|
+
match (l) {
|
|
1617
1988
|
[] => None,
|
|
1618
1989
|
[hd, ...tl] when zeroSized(hd) => loop(tl),
|
|
1619
|
-
[hd, ..._] => startRange(hd)
|
|
1990
|
+
[hd, ..._] => startRange(hd),
|
|
1620
1991
|
}
|
|
1621
1992
|
}
|
|
1622
1993
|
loop(elts)
|
|
1623
1994
|
},
|
|
1624
1995
|
REAlts(re1, re2) => {
|
|
1625
|
-
match(startRange(re1)) {
|
|
1996
|
+
match (startRange(re1)) {
|
|
1626
1997
|
None => None,
|
|
1627
1998
|
Some(rng1) => {
|
|
1628
|
-
match(startRange(re2)) {
|
|
1999
|
+
match (startRange(re2)) {
|
|
1629
2000
|
None => None,
|
|
1630
|
-
Some(rng2) => Some(rangeUnion(rng1, rng2))
|
|
2001
|
+
Some(rng2) => Some(rangeUnion(rng1, rng2)),
|
|
1631
2002
|
}
|
|
1632
|
-
}
|
|
2003
|
+
},
|
|
1633
2004
|
}
|
|
1634
2005
|
},
|
|
1635
2006
|
REConditional(_, re1, re2, _, _, _) => {
|
|
1636
|
-
match(startRange(re1)) {
|
|
2007
|
+
match (startRange(re1)) {
|
|
1637
2008
|
None => None,
|
|
1638
2009
|
Some(rng1) => {
|
|
1639
|
-
match(re2) {
|
|
2010
|
+
match (re2) {
|
|
1640
2011
|
None => None,
|
|
1641
2012
|
Some(re2) => {
|
|
1642
|
-
match(startRange(re2)) {
|
|
2013
|
+
match (startRange(re2)) {
|
|
1643
2014
|
None => None,
|
|
1644
|
-
Some(rng2) => Some(rangeUnion(rng1, rng2))
|
|
2015
|
+
Some(rng2) => Some(rangeUnion(rng1, rng2)),
|
|
1645
2016
|
}
|
|
1646
|
-
}
|
|
2017
|
+
},
|
|
1647
2018
|
}
|
|
1648
|
-
}
|
|
2019
|
+
},
|
|
1649
2020
|
}
|
|
1650
2021
|
},
|
|
1651
2022
|
REGroup(re, _) => startRange(re),
|
|
@@ -1685,18 +2056,22 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
1685
2056
|
/**
|
|
1686
2057
|
Computes the range of possible UTF-8 byte lengths for the given character range
|
|
1687
2058
|
*/
|
|
1688
|
-
|
|
2059
|
+
|
|
2060
|
+
let rangeUtf8EncodingLengths = (rng: CharRange) => {
|
|
1689
2061
|
let (min, max, _) = List.reduce(((min1, max1, n), (segStart, segEnd)) => {
|
|
1690
2062
|
if (rangeOverlaps(rng, segStart, segEnd)) {
|
|
1691
2063
|
(min(min1, n), max(max1, n), n + 1)
|
|
1692
2064
|
} else {
|
|
1693
2065
|
(min1, max1, n + 1)
|
|
1694
2066
|
}
|
|
1695
|
-
},
|
|
2067
|
+
},
|
|
2068
|
+
(4, 0, 1),
|
|
2069
|
+
[(0, 127), (128, 0x7ff), (0x800, 0x7fff), (0x10000, 0x10ffff)]
|
|
2070
|
+
)
|
|
1696
2071
|
(min, max)
|
|
1697
2072
|
}
|
|
1698
|
-
let rec loop =
|
|
1699
|
-
match(re) {
|
|
2073
|
+
let rec loop = re => {
|
|
2074
|
+
match (re) {
|
|
1700
2075
|
RENever => (1, 1, 0),
|
|
1701
2076
|
REAny => (1, 1, 0),
|
|
1702
2077
|
RELiteral(_) => (1, 1, 0),
|
|
@@ -1740,9 +2115,9 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
1740
2115
|
} else {
|
|
1741
2116
|
mustSizes = mergeDependsSizes(oldMustSizes, mustSizes)
|
|
1742
2117
|
dependsSizes = mergeDependsSizes(oldDependsSizes, dependsSizes)
|
|
1743
|
-
let repeatMax = match(repeatMax) {
|
|
2118
|
+
let repeatMax = match (repeatMax) {
|
|
1744
2119
|
None => Float32.toNumber(Float32.infinity),
|
|
1745
|
-
Some(n) => n
|
|
2120
|
+
Some(n) => n,
|
|
1746
2121
|
}
|
|
1747
2122
|
(min1 * repeatMin, max1 * repeatMax, maxL1)
|
|
1748
2123
|
}
|
|
@@ -1754,7 +2129,11 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
1754
2129
|
REConditional(reTest, reTrue, reFalse, _, _, _) => {
|
|
1755
2130
|
let (min1, max1, maxL1) = loop(reTest)
|
|
1756
2131
|
let (min2, max2, maxL2) = loop(reTrue)
|
|
1757
|
-
let (min3, max3, maxL3) = Option.mapWithDefault(
|
|
2132
|
+
let (min3, max3, maxL3) = Option.mapWithDefault(
|
|
2133
|
+
loop,
|
|
2134
|
+
(0, 0, 0),
|
|
2135
|
+
reFalse
|
|
2136
|
+
)
|
|
1758
2137
|
(min(min2, min3), max(max2, max3), max(max(maxL1, maxL2), maxL3))
|
|
1759
2138
|
},
|
|
1760
2139
|
RELookahead(re, _, _, _) => {
|
|
@@ -1780,39 +2159,41 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
1780
2159
|
thrownError = Some(BackreferenceTooBig)
|
|
1781
2160
|
(0, 0, 0)
|
|
1782
2161
|
} else {
|
|
1783
|
-
match(Map.get(n, groupSizes)) {
|
|
2162
|
+
match (Map.get(n, groupSizes)) {
|
|
1784
2163
|
Some(minSize) => (minSize, Float32.toNumber(Float32.infinity), 0),
|
|
1785
2164
|
None => {
|
|
1786
2165
|
Map.set(n - 1, true, dependsSizes)
|
|
1787
2166
|
(1, Float32.toNumber(Float32.infinity), 0)
|
|
1788
|
-
}
|
|
2167
|
+
},
|
|
1789
2168
|
}
|
|
1790
2169
|
}
|
|
1791
2170
|
},
|
|
1792
|
-
REUnicodeCategories(_, _) => (1, 4, 0)
|
|
2171
|
+
REUnicodeCategories(_, _) => (1, 4, 0),
|
|
1793
2172
|
}
|
|
1794
2173
|
}
|
|
1795
2174
|
let (minLen, maxLen, maxLookbehind) = loop(re)
|
|
1796
2175
|
Map.forEach((k, _) => {
|
|
1797
|
-
match(Map.get(k, groupSizes)) {
|
|
2176
|
+
match (Map.get(k, groupSizes)) {
|
|
1798
2177
|
None => void,
|
|
1799
2178
|
Some(sz) => {
|
|
1800
2179
|
if (sz <= 0) {
|
|
1801
2180
|
thrownError = Some(MightBeEmpty)
|
|
1802
2181
|
}
|
|
1803
|
-
}
|
|
2182
|
+
},
|
|
1804
2183
|
}
|
|
1805
2184
|
}, mustSizes)
|
|
1806
|
-
match(thrownError) {
|
|
2185
|
+
match (thrownError) {
|
|
1807
2186
|
Some(MightBeEmpty) => Err("`*`, `+`, or `{...}` operand could be empty"),
|
|
1808
|
-
Some(DoesNotMatchBounded) =>
|
|
1809
|
-
|
|
1810
|
-
Some(
|
|
1811
|
-
|
|
2187
|
+
Some(DoesNotMatchBounded) =>
|
|
2188
|
+
Err("lookbehind pattern does not match a bounded length"),
|
|
2189
|
+
Some(BackreferenceTooBig) =>
|
|
2190
|
+
Err("backreference number is larger than the highest-numbered cluster"),
|
|
2191
|
+
Some(InternalError(re)) =>
|
|
2192
|
+
Err("regex validate: Internal error: " ++ toString(re)),
|
|
2193
|
+
None => Ok(maxLookbehind),
|
|
1812
2194
|
}
|
|
1813
2195
|
}
|
|
1814
2196
|
|
|
1815
|
-
|
|
1816
2197
|
/*
|
|
1817
2198
|
|
|
1818
2199
|
=========================
|
|
@@ -1821,17 +2202,13 @@ REGEX MATCHER COMPILATION
|
|
|
1821
2202
|
|
|
1822
2203
|
*/
|
|
1823
2204
|
|
|
1824
|
-
|
|
1825
2205
|
record MatchBuf {
|
|
1826
2206
|
matchInput: String,
|
|
1827
2207
|
matchInputExploded: Array<Char>,
|
|
1828
2208
|
}
|
|
1829
2209
|
|
|
1830
|
-
let makeMatchBuffer =
|
|
1831
|
-
{
|
|
1832
|
-
matchInput: s,
|
|
1833
|
-
matchInputExploded: String.explode(s),
|
|
1834
|
-
}
|
|
2210
|
+
let makeMatchBuffer = s => {
|
|
2211
|
+
{ matchInput: s, matchInputExploded: String.explode(s) }
|
|
1835
2212
|
}
|
|
1836
2213
|
|
|
1837
2214
|
let matchBufMore = (buf: MatchBuf, pos: Number) => {
|
|
@@ -1851,19 +2228,55 @@ enum StackElt {
|
|
|
1851
2228
|
SESavedGroup(Number, Option<(Number, Number)>),
|
|
1852
2229
|
}
|
|
1853
2230
|
|
|
1854
|
-
let done_m =
|
|
1855
|
-
|
|
1856
|
-
|
|
2231
|
+
let done_m =
|
|
2232
|
+
(
|
|
2233
|
+
buf: MatchBuf,
|
|
2234
|
+
pos: Number,
|
|
2235
|
+
start: Number,
|
|
2236
|
+
limit: Number,
|
|
2237
|
+
end: Number,
|
|
2238
|
+
state,
|
|
2239
|
+
stack,
|
|
2240
|
+
) =>
|
|
2241
|
+
Some(pos)
|
|
2242
|
+
let continue_m =
|
|
2243
|
+
(
|
|
2244
|
+
buf: MatchBuf,
|
|
2245
|
+
pos: Number,
|
|
2246
|
+
start: Number,
|
|
2247
|
+
limit: Number,
|
|
2248
|
+
end: Number,
|
|
2249
|
+
state,
|
|
2250
|
+
stack,
|
|
2251
|
+
) => {
|
|
2252
|
+
match (stack) {
|
|
1857
2253
|
[SEPositionProducer(hd), ..._] => hd(pos),
|
|
1858
2254
|
_ => fail "Impossible: continue_m",
|
|
1859
2255
|
}
|
|
1860
2256
|
}
|
|
1861
|
-
let limit_m =
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
2257
|
+
let limit_m =
|
|
2258
|
+
(
|
|
2259
|
+
buf: MatchBuf,
|
|
2260
|
+
pos: Number,
|
|
2261
|
+
start: Number,
|
|
2262
|
+
limit: Number,
|
|
2263
|
+
end: Number,
|
|
2264
|
+
state,
|
|
2265
|
+
stack,
|
|
2266
|
+
) => if (pos == limit) Some(pos) else None
|
|
2267
|
+
|
|
2268
|
+
let iterateMatcher = (m, size, max) =>
|
|
2269
|
+
(
|
|
2270
|
+
buf: MatchBuf,
|
|
2271
|
+
pos: Number,
|
|
2272
|
+
start: Number,
|
|
2273
|
+
limit: Number,
|
|
2274
|
+
end: Number,
|
|
2275
|
+
state,
|
|
2276
|
+
stack,
|
|
2277
|
+
) => {
|
|
2278
|
+
let limit = match (max) {
|
|
2279
|
+
Some(max) => min(limit, pos + max * size),
|
|
1867
2280
|
None => limit,
|
|
1868
2281
|
}
|
|
1869
2282
|
let rec loop = (pos2, n) => {
|
|
@@ -1879,35 +2292,70 @@ let iterateMatcher = (m, size, max) => (buf: MatchBuf, pos: Number, start: Numbe
|
|
|
1879
2292
|
|
|
1880
2293
|
// single-char matching
|
|
1881
2294
|
|
|
1882
|
-
let charMatcher = (toMatch, next_m) =>
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
2295
|
+
let charMatcher = (toMatch, next_m) =>
|
|
2296
|
+
(
|
|
2297
|
+
buf: MatchBuf,
|
|
2298
|
+
pos: Number,
|
|
2299
|
+
start: Number,
|
|
2300
|
+
limit: Number,
|
|
2301
|
+
end: Number,
|
|
2302
|
+
state,
|
|
2303
|
+
stack,
|
|
2304
|
+
) => {
|
|
2305
|
+
if (
|
|
2306
|
+
{
|
|
2307
|
+
pos < limit &&
|
|
2308
|
+
match (matchBufChar(buf, pos)) {
|
|
2309
|
+
Err(_) => false,
|
|
2310
|
+
Ok(c) => toMatch == c,
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2314
|
+
}
|
|
2315
|
+
|
|
2316
|
+
let charTailMatcher = toMatch =>
|
|
2317
|
+
(
|
|
2318
|
+
buf: MatchBuf,
|
|
2319
|
+
pos: Number,
|
|
2320
|
+
start: Number,
|
|
2321
|
+
limit: Number,
|
|
2322
|
+
end: Number,
|
|
2323
|
+
state,
|
|
2324
|
+
stack,
|
|
2325
|
+
) => {
|
|
2326
|
+
if (
|
|
2327
|
+
{
|
|
2328
|
+
pos < limit &&
|
|
2329
|
+
match (matchBufChar(buf, pos)) {
|
|
2330
|
+
Err(_) => false,
|
|
2331
|
+
Ok(c) => toMatch == c,
|
|
2332
|
+
}
|
|
1887
2333
|
}
|
|
1888
|
-
|
|
2334
|
+
) Some(pos + 1) else None
|
|
1889
2335
|
}
|
|
1890
2336
|
|
|
1891
|
-
let
|
|
1892
|
-
|
|
1893
|
-
|
|
2337
|
+
let charMatcherIterated = (toMatch, max) =>
|
|
2338
|
+
iterateMatcher((
|
|
2339
|
+
buf: MatchBuf,
|
|
2340
|
+
pos: Number,
|
|
2341
|
+
start: Number,
|
|
2342
|
+
limit: Number,
|
|
2343
|
+
end: Number,
|
|
2344
|
+
state,
|
|
2345
|
+
stack
|
|
2346
|
+
) => {
|
|
2347
|
+
match (matchBufChar(buf, pos)) {
|
|
1894
2348
|
Err(_) => false,
|
|
1895
|
-
Ok(c) => toMatch == c
|
|
2349
|
+
Ok(c) => toMatch == c,
|
|
1896
2350
|
}
|
|
1897
|
-
}
|
|
1898
|
-
}
|
|
1899
|
-
|
|
1900
|
-
let charMatcherIterated = (toMatch, max) => iterateMatcher((buf: MatchBuf, pos: Number, start: Number, limit: Number, end: Number, state, stack) => {
|
|
1901
|
-
match(matchBufChar(buf, pos)) {
|
|
1902
|
-
Err(_) => false,
|
|
1903
|
-
Ok(c) => toMatch == c
|
|
1904
|
-
}
|
|
1905
|
-
}, 1, max)
|
|
2351
|
+
}, 1, max)
|
|
1906
2352
|
|
|
1907
2353
|
// string matching
|
|
1908
2354
|
|
|
1909
2355
|
let subArraysEqual = (arr1, start1, arr2, start2, length) => {
|
|
1910
|
-
if (
|
|
2356
|
+
if (
|
|
2357
|
+
Array.length(arr1) - start1 < length || Array.length(arr2) - start2 < length
|
|
2358
|
+
) {
|
|
1911
2359
|
false
|
|
1912
2360
|
} else {
|
|
1913
2361
|
let mut result = true
|
|
@@ -1921,45 +2369,129 @@ let subArraysEqual = (arr1, start1, arr2, start2, length) => {
|
|
|
1921
2369
|
}
|
|
1922
2370
|
}
|
|
1923
2371
|
|
|
1924
|
-
let stringMatcher = (toMatch, len, next_m) =>
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
2372
|
+
let stringMatcher = (toMatch, len, next_m) =>
|
|
2373
|
+
(
|
|
2374
|
+
buf: MatchBuf,
|
|
2375
|
+
pos: Number,
|
|
2376
|
+
start: Number,
|
|
2377
|
+
limit: Number,
|
|
2378
|
+
end: Number,
|
|
2379
|
+
state,
|
|
2380
|
+
stack,
|
|
2381
|
+
) => {
|
|
2382
|
+
if (
|
|
2383
|
+
{
|
|
2384
|
+
pos + len <= limit &&
|
|
2385
|
+
subArraysEqual(
|
|
2386
|
+
buf.matchInputExploded,
|
|
2387
|
+
pos,
|
|
2388
|
+
String.explode(toMatch),
|
|
2389
|
+
0,
|
|
2390
|
+
len
|
|
2391
|
+
)
|
|
2392
|
+
}
|
|
2393
|
+
) next_m(buf, pos + len, start, limit, end, state, stack) else None
|
|
2394
|
+
}
|
|
2395
|
+
|
|
2396
|
+
let stringTailMatcher = (toMatch, len) =>
|
|
2397
|
+
(
|
|
2398
|
+
buf: MatchBuf,
|
|
2399
|
+
pos: Number,
|
|
2400
|
+
start: Number,
|
|
2401
|
+
limit: Number,
|
|
2402
|
+
end: Number,
|
|
2403
|
+
state,
|
|
2404
|
+
stack,
|
|
2405
|
+
) => {
|
|
2406
|
+
if (
|
|
2407
|
+
{
|
|
2408
|
+
pos + len <= limit &&
|
|
2409
|
+
subArraysEqual(
|
|
2410
|
+
buf.matchInputExploded,
|
|
2411
|
+
pos,
|
|
2412
|
+
String.explode(toMatch),
|
|
2413
|
+
0,
|
|
2414
|
+
len
|
|
2415
|
+
)
|
|
2416
|
+
}
|
|
2417
|
+
) Some(pos + len) else None
|
|
2418
|
+
}
|
|
2419
|
+
|
|
2420
|
+
let stringMatcherIterated = (toMatch, len, max) =>
|
|
2421
|
+
iterateMatcher((
|
|
2422
|
+
buf: MatchBuf,
|
|
2423
|
+
pos: Number,
|
|
2424
|
+
start: Number,
|
|
2425
|
+
limit: Number,
|
|
2426
|
+
end: Number,
|
|
2427
|
+
state,
|
|
2428
|
+
stack
|
|
2429
|
+
) => {
|
|
2430
|
+
subArraysEqual(buf.matchInputExploded, pos, String.explode(toMatch), 0, len)
|
|
2431
|
+
}, len, max)
|
|
1940
2432
|
|
|
1941
2433
|
// match nothing
|
|
1942
2434
|
|
|
1943
|
-
let neverMatcher =
|
|
2435
|
+
let neverMatcher =
|
|
2436
|
+
(
|
|
2437
|
+
buf: MatchBuf,
|
|
2438
|
+
pos: Number,
|
|
2439
|
+
start: Number,
|
|
2440
|
+
limit: Number,
|
|
2441
|
+
end: Number,
|
|
2442
|
+
state,
|
|
2443
|
+
stack,
|
|
2444
|
+
) => {
|
|
1944
2445
|
None
|
|
1945
2446
|
}
|
|
1946
2447
|
|
|
1947
2448
|
// match any byte
|
|
1948
2449
|
|
|
1949
|
-
let anyMatcher =
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
2450
|
+
let anyMatcher = next_m =>
|
|
2451
|
+
(
|
|
2452
|
+
buf: MatchBuf,
|
|
2453
|
+
pos: Number,
|
|
2454
|
+
start: Number,
|
|
2455
|
+
limit: Number,
|
|
2456
|
+
end: Number,
|
|
2457
|
+
state,
|
|
2458
|
+
stack,
|
|
2459
|
+
) => {
|
|
2460
|
+
if (
|
|
2461
|
+
{
|
|
2462
|
+
pos < limit
|
|
2463
|
+
}
|
|
2464
|
+
) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
let anyTailMatcher = () =>
|
|
2468
|
+
(
|
|
2469
|
+
buf: MatchBuf,
|
|
2470
|
+
pos: Number,
|
|
2471
|
+
start: Number,
|
|
2472
|
+
limit: Number,
|
|
2473
|
+
end: Number,
|
|
2474
|
+
state,
|
|
2475
|
+
stack,
|
|
2476
|
+
) => {
|
|
2477
|
+
if (
|
|
2478
|
+
{
|
|
2479
|
+
pos < limit
|
|
2480
|
+
}
|
|
2481
|
+
) Some(pos + 1) else None
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2484
|
+
let anyMatcherIterated = max =>
|
|
2485
|
+
(
|
|
2486
|
+
buf: MatchBuf,
|
|
2487
|
+
pos: Number,
|
|
2488
|
+
start: Number,
|
|
2489
|
+
limit: Number,
|
|
2490
|
+
end: Number,
|
|
2491
|
+
state,
|
|
2492
|
+
stack,
|
|
2493
|
+
) => {
|
|
2494
|
+
let n = match (max) {
|
|
1963
2495
|
None => limit - pos,
|
|
1964
2496
|
Some(max) => min(max, limit - pos),
|
|
1965
2497
|
}
|
|
@@ -1968,95 +2500,219 @@ let anyMatcherIterated = (max) => (buf: MatchBuf, pos: Number, start: Number, li
|
|
|
1968
2500
|
|
|
1969
2501
|
// match byte in set (range)
|
|
1970
2502
|
|
|
1971
|
-
let rangeMatcher = (rng
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
2503
|
+
let rangeMatcher = (rng: CharRange, next_m) =>
|
|
2504
|
+
(
|
|
2505
|
+
buf: MatchBuf,
|
|
2506
|
+
pos: Number,
|
|
2507
|
+
start: Number,
|
|
2508
|
+
limit: Number,
|
|
2509
|
+
end: Number,
|
|
2510
|
+
state,
|
|
2511
|
+
stack,
|
|
2512
|
+
) => {
|
|
2513
|
+
if (
|
|
2514
|
+
{
|
|
2515
|
+
pos < limit &&
|
|
2516
|
+
match (matchBufChar(buf, pos)) {
|
|
2517
|
+
Err(_) => false,
|
|
2518
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
2519
|
+
}
|
|
2520
|
+
}
|
|
2521
|
+
) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2522
|
+
}
|
|
2523
|
+
|
|
2524
|
+
let rangeTailMatcher = (rng: CharRange) =>
|
|
2525
|
+
(
|
|
2526
|
+
buf: MatchBuf,
|
|
2527
|
+
pos: Number,
|
|
2528
|
+
start: Number,
|
|
2529
|
+
limit: Number,
|
|
2530
|
+
end: Number,
|
|
2531
|
+
state,
|
|
2532
|
+
stack,
|
|
2533
|
+
) => {
|
|
2534
|
+
if (
|
|
2535
|
+
{
|
|
2536
|
+
pos < limit &&
|
|
2537
|
+
match (matchBufChar(buf, pos)) {
|
|
2538
|
+
Err(_) => false,
|
|
2539
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
2540
|
+
}
|
|
1976
2541
|
}
|
|
1977
|
-
|
|
2542
|
+
) Some(pos + 1) else None
|
|
1978
2543
|
}
|
|
1979
2544
|
|
|
1980
|
-
let
|
|
1981
|
-
|
|
1982
|
-
|
|
2545
|
+
let rangeMatcherIterated = (rng: CharRange, max) =>
|
|
2546
|
+
iterateMatcher((
|
|
2547
|
+
buf: MatchBuf,
|
|
2548
|
+
pos: Number,
|
|
2549
|
+
start: Number,
|
|
2550
|
+
limit: Number,
|
|
2551
|
+
end: Number,
|
|
2552
|
+
state,
|
|
2553
|
+
stack
|
|
2554
|
+
) => {
|
|
2555
|
+
match (matchBufChar(buf, pos)) {
|
|
1983
2556
|
Err(_) => false,
|
|
1984
|
-
Ok(c) => rangeContains(rng, Char.code(c))
|
|
2557
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
1985
2558
|
}
|
|
1986
|
-
}
|
|
1987
|
-
}
|
|
1988
|
-
|
|
1989
|
-
let rangeMatcherIterated = (rng, max) => iterateMatcher((buf: MatchBuf, pos: Number, start: Number, limit: Number, end: Number, state, stack) => {
|
|
1990
|
-
match(matchBufChar(buf, pos)) {
|
|
1991
|
-
Err(_) => false,
|
|
1992
|
-
Ok(c) => rangeContains(rng, Char.code(c))
|
|
1993
|
-
}
|
|
1994
|
-
}, 1, max)
|
|
2559
|
+
}, 1, max)
|
|
1995
2560
|
|
|
1996
2561
|
// zero-width matchers
|
|
1997
2562
|
|
|
1998
|
-
let startMatcher =
|
|
2563
|
+
let startMatcher = next_m =>
|
|
2564
|
+
(
|
|
2565
|
+
buf: MatchBuf,
|
|
2566
|
+
pos: Number,
|
|
2567
|
+
start: Number,
|
|
2568
|
+
limit: Number,
|
|
2569
|
+
end: Number,
|
|
2570
|
+
state,
|
|
2571
|
+
stack,
|
|
2572
|
+
) => {
|
|
1999
2573
|
if (pos == start) next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2000
2574
|
}
|
|
2001
2575
|
|
|
2002
|
-
let endMatcher =
|
|
2576
|
+
let endMatcher = next_m =>
|
|
2577
|
+
(
|
|
2578
|
+
buf: MatchBuf,
|
|
2579
|
+
pos: Number,
|
|
2580
|
+
start: Number,
|
|
2581
|
+
limit: Number,
|
|
2582
|
+
end: Number,
|
|
2583
|
+
state,
|
|
2584
|
+
stack,
|
|
2585
|
+
) => {
|
|
2003
2586
|
if (pos == end) next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2004
2587
|
}
|
|
2005
2588
|
|
|
2006
|
-
let lineStartMatcher =
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2589
|
+
let lineStartMatcher = next_m =>
|
|
2590
|
+
(
|
|
2591
|
+
buf: MatchBuf,
|
|
2592
|
+
pos: Number,
|
|
2593
|
+
start: Number,
|
|
2594
|
+
limit: Number,
|
|
2595
|
+
end: Number,
|
|
2596
|
+
state,
|
|
2597
|
+
stack,
|
|
2598
|
+
) => {
|
|
2599
|
+
if (pos == start || matchBufChar(buf, pos - 1) == Ok('\n'))
|
|
2600
|
+
next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2601
|
+
}
|
|
2602
|
+
|
|
2603
|
+
let lineEndMatcher = next_m =>
|
|
2604
|
+
(
|
|
2605
|
+
buf: MatchBuf,
|
|
2606
|
+
pos: Number,
|
|
2607
|
+
start: Number,
|
|
2608
|
+
limit: Number,
|
|
2609
|
+
end: Number,
|
|
2610
|
+
state,
|
|
2611
|
+
stack,
|
|
2612
|
+
) => {
|
|
2613
|
+
if (pos == end || matchBufChar(buf, pos) == Ok('\n'))
|
|
2614
|
+
next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2615
|
+
}
|
|
2616
|
+
|
|
2617
|
+
let isWordChar = c => {
|
|
2618
|
+
match (c) {
|
|
2016
2619
|
Err(_) => false,
|
|
2017
|
-
Ok(c) when (
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2620
|
+
Ok(c) when (
|
|
2621
|
+
Char.code('0') <= Char.code(c) && Char.code(c) <= Char.code('9')
|
|
2622
|
+
) =>
|
|
2623
|
+
true,
|
|
2624
|
+
Ok(c) when (
|
|
2625
|
+
Char.code('a') <= Char.code(c) && Char.code(c) <= Char.code('z')
|
|
2626
|
+
) =>
|
|
2627
|
+
true,
|
|
2628
|
+
Ok(c) when (
|
|
2629
|
+
Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z')
|
|
2630
|
+
) =>
|
|
2631
|
+
true,
|
|
2632
|
+
Ok(c) when Char.code('_') <= Char.code(c) => true,
|
|
2633
|
+
_ => false,
|
|
2022
2634
|
}
|
|
2023
2635
|
}
|
|
2024
2636
|
|
|
2025
2637
|
let isWordBoundary = (buf, pos, start, limit, end) => {
|
|
2026
|
-
!((pos == start || !isWordChar(matchBufChar(buf, pos - 1))) ==
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2638
|
+
!((pos == start || !isWordChar(matchBufChar(buf, pos - 1))) ==
|
|
2639
|
+
(pos == end || !isWordChar(matchBufChar(buf, pos))))
|
|
2640
|
+
}
|
|
2641
|
+
|
|
2642
|
+
let wordBoundaryMatcher = next_m =>
|
|
2643
|
+
(
|
|
2644
|
+
buf: MatchBuf,
|
|
2645
|
+
pos: Number,
|
|
2646
|
+
start: Number,
|
|
2647
|
+
limit: Number,
|
|
2648
|
+
end: Number,
|
|
2649
|
+
state,
|
|
2650
|
+
stack,
|
|
2651
|
+
) => {
|
|
2652
|
+
if (isWordBoundary(buf, pos, start, limit, end))
|
|
2653
|
+
next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2654
|
+
}
|
|
2655
|
+
|
|
2656
|
+
let notWordBoundaryMatcher = next_m =>
|
|
2657
|
+
(
|
|
2658
|
+
buf: MatchBuf,
|
|
2659
|
+
pos: Number,
|
|
2660
|
+
start: Number,
|
|
2661
|
+
limit: Number,
|
|
2662
|
+
end: Number,
|
|
2663
|
+
state,
|
|
2664
|
+
stack,
|
|
2665
|
+
) => {
|
|
2666
|
+
if (!isWordBoundary(buf, pos, start, limit, end))
|
|
2667
|
+
next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2035
2668
|
}
|
|
2036
2669
|
|
|
2037
2670
|
// Alternatives
|
|
2038
2671
|
|
|
2039
|
-
let altsMatcher = (m1, m2) =>
|
|
2040
|
-
|
|
2672
|
+
let altsMatcher = (m1, m2) =>
|
|
2673
|
+
(
|
|
2674
|
+
buf: MatchBuf,
|
|
2675
|
+
pos: Number,
|
|
2676
|
+
start: Number,
|
|
2677
|
+
limit: Number,
|
|
2678
|
+
end: Number,
|
|
2679
|
+
state,
|
|
2680
|
+
stack,
|
|
2681
|
+
) => {
|
|
2682
|
+
match (m1(buf, pos, start, limit, end, state, stack)) {
|
|
2041
2683
|
None => m2(buf, pos, start, limit, end, state, stack),
|
|
2042
|
-
Some(v) => Some(v)
|
|
2684
|
+
Some(v) => Some(v),
|
|
2043
2685
|
}
|
|
2044
2686
|
}
|
|
2045
2687
|
|
|
2046
2688
|
// repeats, greedy (default) and non-greedy
|
|
2047
2689
|
|
|
2048
|
-
let repeatMatcher = (r_m, min, max, next_m) =>
|
|
2690
|
+
let repeatMatcher = (r_m, min, max, next_m) =>
|
|
2691
|
+
(
|
|
2692
|
+
buf: MatchBuf,
|
|
2693
|
+
pos: Number,
|
|
2694
|
+
start: Number,
|
|
2695
|
+
limit: Number,
|
|
2696
|
+
end: Number,
|
|
2697
|
+
state,
|
|
2698
|
+
stack,
|
|
2699
|
+
) => {
|
|
2049
2700
|
let rec rloop = (pos, n) => {
|
|
2050
2701
|
if (n < min) {
|
|
2051
2702
|
let newStack = [SEPositionProducer(pos => rloop(pos, n + 1)), ...stack]
|
|
2052
2703
|
r_m(buf, pos, start, limit, end, state, newStack)
|
|
2053
|
-
} else if (
|
|
2704
|
+
} else if (
|
|
2705
|
+
match (max) {
|
|
2706
|
+
None => false,
|
|
2707
|
+
Some(max) => max == n,
|
|
2708
|
+
}
|
|
2709
|
+
) {
|
|
2054
2710
|
next_m(buf, pos, start, limit, end, state, stack)
|
|
2055
2711
|
} else {
|
|
2056
2712
|
let newStack = [SEPositionProducer(pos => rloop(pos, n + 1)), ...stack]
|
|
2057
|
-
match(r_m(buf, pos, start, limit, end, state, newStack)) {
|
|
2713
|
+
match (r_m(buf, pos, start, limit, end, state, newStack)) {
|
|
2058
2714
|
Some(v) => Some(v),
|
|
2059
|
-
None => next_m(buf, pos, start, limit, end, state, stack)
|
|
2715
|
+
None => next_m(buf, pos, start, limit, end, state, stack),
|
|
2060
2716
|
}
|
|
2061
2717
|
}
|
|
2062
2718
|
}
|
|
@@ -2069,7 +2725,7 @@ let arrayCopy = (dest, destStart, src, srcStart, srcEnd) => {
|
|
|
2069
2725
|
let mut count = srcStart
|
|
2070
2726
|
while (count < srcEnd) {
|
|
2071
2727
|
dest[destStart + (count - srcStart)] = src[count]
|
|
2072
|
-
count
|
|
2728
|
+
count += 1
|
|
2073
2729
|
}
|
|
2074
2730
|
}
|
|
2075
2731
|
|
|
@@ -2092,28 +2748,39 @@ let restoreGroups = (state, oldState, nStart, numN) => {
|
|
|
2092
2748
|
}
|
|
2093
2749
|
|
|
2094
2750
|
let addRepeatedGroup = (groupN, state, pos, n, backAmt, callback) => {
|
|
2095
|
-
match(groupN) {
|
|
2751
|
+
match (groupN) {
|
|
2096
2752
|
Some(groupN) when Array.length(state) > 0 => {
|
|
2097
2753
|
let oldSpan = state[groupN]
|
|
2098
2754
|
state[groupN] = if (n == 0) None else Some((pos - backAmt, pos))
|
|
2099
|
-
let groupRevert = () => {
|
|
2755
|
+
let groupRevert = () => {
|
|
2756
|
+
state[groupN] = oldSpan
|
|
2757
|
+
}
|
|
2100
2758
|
callback(groupRevert)
|
|
2101
2759
|
},
|
|
2102
2760
|
_ => {
|
|
2103
2761
|
let groupRevert = () => void
|
|
2104
2762
|
callback(groupRevert)
|
|
2105
|
-
}
|
|
2763
|
+
},
|
|
2106
2764
|
}
|
|
2107
2765
|
}
|
|
2108
2766
|
|
|
2109
|
-
let repeatSimpleMatcher = (r_m, min, max, groupN, next_m) =>
|
|
2767
|
+
let repeatSimpleMatcher = (r_m, min, max, groupN, next_m) =>
|
|
2768
|
+
(
|
|
2769
|
+
buf: MatchBuf,
|
|
2770
|
+
pos: Number,
|
|
2771
|
+
start: Number,
|
|
2772
|
+
limit: Number,
|
|
2773
|
+
end: Number,
|
|
2774
|
+
state,
|
|
2775
|
+
stack,
|
|
2776
|
+
) => {
|
|
2110
2777
|
let rec rloop = (pos, n, backAmt) => {
|
|
2111
|
-
let pos2 = match(max) {
|
|
2778
|
+
let pos2 = match (max) {
|
|
2112
2779
|
Some(max) when n < max => r_m(buf, pos, start, limit, end, state, rStack),
|
|
2113
2780
|
Some(_) => None,
|
|
2114
|
-
_ => r_m(buf, pos, start, limit, end, state, rStack)
|
|
2781
|
+
_ => r_m(buf, pos, start, limit, end, state, rStack),
|
|
2115
2782
|
}
|
|
2116
|
-
match(pos2) {
|
|
2783
|
+
match (pos2) {
|
|
2117
2784
|
Some(pos2) => rloop(pos2, n + 1, pos2 - pos),
|
|
2118
2785
|
None => {
|
|
2119
2786
|
// Perform backtracking
|
|
@@ -2121,71 +2788,129 @@ let repeatSimpleMatcher = (r_m, min, max, groupN, next_m) => (buf: MatchBuf, pos
|
|
|
2121
2788
|
if (n < min) {
|
|
2122
2789
|
None
|
|
2123
2790
|
} else {
|
|
2124
|
-
addRepeatedGroup(
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2791
|
+
addRepeatedGroup(
|
|
2792
|
+
groupN,
|
|
2793
|
+
state,
|
|
2794
|
+
pos,
|
|
2795
|
+
n,
|
|
2796
|
+
backAmt,
|
|
2797
|
+
groupRevert => {
|
|
2798
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2799
|
+
Some(v) => Some(v),
|
|
2800
|
+
None => {
|
|
2801
|
+
groupRevert()
|
|
2802
|
+
bloop(pos - backAmt, n - 1)
|
|
2803
|
+
},
|
|
2130
2804
|
}
|
|
2131
2805
|
}
|
|
2132
|
-
|
|
2806
|
+
)
|
|
2133
2807
|
}
|
|
2134
2808
|
}
|
|
2135
2809
|
bloop(pos, n)
|
|
2136
|
-
}
|
|
2810
|
+
},
|
|
2137
2811
|
}
|
|
2138
2812
|
}
|
|
2139
2813
|
rloop(pos, 0, 0)
|
|
2140
2814
|
}
|
|
2141
2815
|
|
|
2142
|
-
let repeatSimpleManyMatcher = (r_m, min, max, groupN, next_m) =>
|
|
2816
|
+
let repeatSimpleManyMatcher = (r_m, min, max, groupN, next_m) =>
|
|
2817
|
+
(
|
|
2818
|
+
buf: MatchBuf,
|
|
2819
|
+
pos: Number,
|
|
2820
|
+
start: Number,
|
|
2821
|
+
limit: Number,
|
|
2822
|
+
end: Number,
|
|
2823
|
+
state,
|
|
2824
|
+
stack,
|
|
2825
|
+
) => {
|
|
2143
2826
|
let (pos2, n, backAmt) = r_m(buf, pos, start, limit, end, state, stack)
|
|
2144
2827
|
let rec bloop = (pos, n) => {
|
|
2145
2828
|
if (n < min) {
|
|
2146
2829
|
None
|
|
2147
2830
|
} else {
|
|
2148
|
-
addRepeatedGroup(
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2831
|
+
addRepeatedGroup(
|
|
2832
|
+
groupN,
|
|
2833
|
+
state,
|
|
2834
|
+
pos,
|
|
2835
|
+
n,
|
|
2836
|
+
backAmt,
|
|
2837
|
+
groupRevert => {
|
|
2838
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2839
|
+
Some(v) => Some(v),
|
|
2840
|
+
None => {
|
|
2841
|
+
groupRevert()
|
|
2842
|
+
bloop(pos - backAmt, n - 1)
|
|
2843
|
+
},
|
|
2154
2844
|
}
|
|
2155
2845
|
}
|
|
2156
|
-
|
|
2846
|
+
)
|
|
2157
2847
|
}
|
|
2158
2848
|
}
|
|
2159
2849
|
bloop(pos2, n)
|
|
2160
2850
|
}
|
|
2161
2851
|
|
|
2162
|
-
let lazyRepeatMatcher = (r_m, min, max, next_m) =>
|
|
2852
|
+
let lazyRepeatMatcher = (r_m, min, max, next_m) =>
|
|
2853
|
+
(
|
|
2854
|
+
buf: MatchBuf,
|
|
2855
|
+
pos: Number,
|
|
2856
|
+
start: Number,
|
|
2857
|
+
limit: Number,
|
|
2858
|
+
end: Number,
|
|
2859
|
+
state,
|
|
2860
|
+
stack,
|
|
2861
|
+
) => {
|
|
2163
2862
|
let rec rloop = (pos, n, min) => {
|
|
2164
2863
|
if (n < min) {
|
|
2165
|
-
let newStack = [
|
|
2864
|
+
let newStack = [
|
|
2865
|
+
SEPositionProducer(pos => rloop(pos, n + 1, min)),
|
|
2866
|
+
...stack
|
|
2867
|
+
]
|
|
2166
2868
|
r_m(buf, pos, start, limit, end, state, newStack)
|
|
2167
|
-
} else if (
|
|
2869
|
+
} else if (
|
|
2870
|
+
match (max) {
|
|
2871
|
+
None => false,
|
|
2872
|
+
Some(max) => max == n,
|
|
2873
|
+
}
|
|
2874
|
+
) {
|
|
2168
2875
|
next_m(buf, pos, start, limit, end, state, stack)
|
|
2169
|
-
} else
|
|
2170
|
-
|
|
2171
|
-
|
|
2876
|
+
} else {
|
|
2877
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2878
|
+
Some(p) => Some(p),
|
|
2879
|
+
None => rloop(pos, n, min + 1),
|
|
2880
|
+
}
|
|
2172
2881
|
}
|
|
2173
2882
|
}
|
|
2174
2883
|
rloop(pos, 0, min)
|
|
2175
2884
|
}
|
|
2176
2885
|
|
|
2177
|
-
let lazyRepeatSimpleMatcher = (r_m, min, max, next_m) =>
|
|
2886
|
+
let lazyRepeatSimpleMatcher = (r_m, min, max, next_m) =>
|
|
2887
|
+
(
|
|
2888
|
+
buf: MatchBuf,
|
|
2889
|
+
pos: Number,
|
|
2890
|
+
start: Number,
|
|
2891
|
+
limit: Number,
|
|
2892
|
+
end: Number,
|
|
2893
|
+
state,
|
|
2894
|
+
stack,
|
|
2895
|
+
) => {
|
|
2178
2896
|
let rec rloop = (pos, n, min) => {
|
|
2179
2897
|
if (n < min) {
|
|
2180
|
-
match(r_m(buf, pos, start, limit, end, state, stack)) {
|
|
2898
|
+
match (r_m(buf, pos, start, limit, end, state, stack)) {
|
|
2181
2899
|
Some(p) => rloop(p, n + 1, min),
|
|
2182
|
-
None => None
|
|
2900
|
+
None => None,
|
|
2901
|
+
}
|
|
2902
|
+
} else if (
|
|
2903
|
+
match (max) {
|
|
2904
|
+
None => false,
|
|
2905
|
+
Some(max) => max == n,
|
|
2183
2906
|
}
|
|
2184
|
-
|
|
2907
|
+
) {
|
|
2185
2908
|
next_m(buf, pos, start, limit, end, state, stack)
|
|
2186
|
-
} else
|
|
2187
|
-
|
|
2188
|
-
|
|
2909
|
+
} else {
|
|
2910
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2911
|
+
Some(p) => Some(p),
|
|
2912
|
+
None => rloop(pos, n, min + 1),
|
|
2913
|
+
}
|
|
2189
2914
|
}
|
|
2190
2915
|
}
|
|
2191
2916
|
rloop(pos, 0, min)
|
|
@@ -2193,46 +2918,87 @@ let lazyRepeatSimpleMatcher = (r_m, min, max, next_m) => (buf: MatchBuf, pos: Nu
|
|
|
2193
2918
|
|
|
2194
2919
|
// Recording and referencing group matches
|
|
2195
2920
|
|
|
2196
|
-
let groupPushMatcher = (n, next_m) =>
|
|
2197
|
-
|
|
2921
|
+
let groupPushMatcher = (n, next_m) =>
|
|
2922
|
+
(
|
|
2923
|
+
buf: MatchBuf,
|
|
2924
|
+
pos: Number,
|
|
2925
|
+
start: Number,
|
|
2926
|
+
limit: Number,
|
|
2927
|
+
end: Number,
|
|
2928
|
+
state,
|
|
2929
|
+
stack,
|
|
2930
|
+
) => {
|
|
2931
|
+
let newStack = [
|
|
2932
|
+
SESavedGroup(pos, if (Array.length(state) > 0) state[n] else None),
|
|
2933
|
+
...stack
|
|
2934
|
+
]
|
|
2198
2935
|
next_m(buf, pos, start, limit, end, state, newStack)
|
|
2199
2936
|
}
|
|
2200
2937
|
|
|
2201
|
-
let groupSetMatcher = (n, next_m) =>
|
|
2202
|
-
|
|
2938
|
+
let groupSetMatcher = (n, next_m) =>
|
|
2939
|
+
(
|
|
2940
|
+
buf: MatchBuf,
|
|
2941
|
+
pos: Number,
|
|
2942
|
+
start: Number,
|
|
2943
|
+
limit: Number,
|
|
2944
|
+
end: Number,
|
|
2945
|
+
state,
|
|
2946
|
+
stack,
|
|
2947
|
+
) => {
|
|
2948
|
+
match (stack) {
|
|
2203
2949
|
[SESavedGroup(oldPos, oldSpan), ...stackTl] => {
|
|
2204
2950
|
if (Array.length(state) > 0) {
|
|
2205
2951
|
state[n] = Some((oldPos, pos))
|
|
2206
2952
|
}
|
|
2207
|
-
match(next_m(buf, pos, start, limit, end, state, stackTl)) {
|
|
2953
|
+
match (next_m(buf, pos, start, limit, end, state, stackTl)) {
|
|
2208
2954
|
Some(v) => Some(v),
|
|
2209
2955
|
None => {
|
|
2210
2956
|
if (Array.length(state) > 0) {
|
|
2211
2957
|
state[n] = oldSpan
|
|
2212
2958
|
}
|
|
2213
2959
|
None
|
|
2214
|
-
}
|
|
2960
|
+
},
|
|
2215
2961
|
}
|
|
2216
2962
|
},
|
|
2217
|
-
_ => fail "Impossible: groupSetMatcher"
|
|
2963
|
+
_ => fail "Impossible: groupSetMatcher",
|
|
2218
2964
|
}
|
|
2219
2965
|
}
|
|
2220
2966
|
|
|
2221
|
-
let makeReferenceMatcher =
|
|
2222
|
-
|
|
2967
|
+
let makeReferenceMatcher = eq => (n, next_m) =>
|
|
2968
|
+
(
|
|
2969
|
+
buf: MatchBuf,
|
|
2970
|
+
pos: Number,
|
|
2971
|
+
start: Number,
|
|
2972
|
+
limit: Number,
|
|
2973
|
+
end: Number,
|
|
2974
|
+
state,
|
|
2975
|
+
stack,
|
|
2976
|
+
) => {
|
|
2977
|
+
match (state[n]) {
|
|
2223
2978
|
None => None,
|
|
2224
2979
|
Some((refStart, refEnd)) => {
|
|
2225
2980
|
let len = refEnd - refStart
|
|
2226
|
-
if (
|
|
2981
|
+
if (
|
|
2982
|
+
pos + len <= limit &&
|
|
2983
|
+
subArraysEqual(
|
|
2984
|
+
buf.matchInputExploded,
|
|
2985
|
+
refStart,
|
|
2986
|
+
buf.matchInputExploded,
|
|
2987
|
+
pos,
|
|
2988
|
+
len
|
|
2989
|
+
)
|
|
2990
|
+
) {
|
|
2227
2991
|
next_m(buf, pos + len, start, limit, end, state, stack)
|
|
2228
|
-
} else
|
|
2229
|
-
|
|
2992
|
+
} else {
|
|
2993
|
+
None
|
|
2994
|
+
}
|
|
2995
|
+
},
|
|
2230
2996
|
}
|
|
2231
2997
|
}
|
|
2232
2998
|
|
|
2233
|
-
let referenceMatcher = makeReferenceMatcher(((a, b)) =>
|
|
2999
|
+
let referenceMatcher = makeReferenceMatcher(((a, b)) => a == b)
|
|
2234
3000
|
|
|
2235
|
-
let asciiCharToLower =
|
|
3001
|
+
let asciiCharToLower = c => {
|
|
2236
3002
|
if (Char.code('Z') <= Char.code(c) && Char.code(c) <= Char.code('Z')) {
|
|
2237
3003
|
Char.fromCode(Char.code(c) + (Char.code('a') - Char.code('A')))
|
|
2238
3004
|
} else {
|
|
@@ -2240,29 +3006,57 @@ let asciiCharToLower = (c) => {
|
|
|
2240
3006
|
}
|
|
2241
3007
|
}
|
|
2242
3008
|
|
|
2243
|
-
let referenceMatcherCaseInsensitive = makeReferenceMatcher(((a, b)) =>
|
|
3009
|
+
let referenceMatcherCaseInsensitive = makeReferenceMatcher(((a, b)) =>
|
|
3010
|
+
asciiCharToLower(a) == asciiCharToLower(b))
|
|
2244
3011
|
|
|
2245
3012
|
// Lookahead, Lookbehind, Conditionals, and Cut
|
|
2246
3013
|
|
|
2247
|
-
let lookaheadMatcher = (isMatch, sub_m, nStart, numN, next_m) =>
|
|
3014
|
+
let lookaheadMatcher = (isMatch, sub_m, nStart, numN, next_m) =>
|
|
3015
|
+
(
|
|
3016
|
+
buf: MatchBuf,
|
|
3017
|
+
pos: Number,
|
|
3018
|
+
start: Number,
|
|
3019
|
+
limit: Number,
|
|
3020
|
+
end: Number,
|
|
3021
|
+
state,
|
|
3022
|
+
stack,
|
|
3023
|
+
) => {
|
|
2248
3024
|
let oldState = saveGroups(state, nStart, numN)
|
|
2249
|
-
let ret = match(sub_m(buf, pos, start, limit, end, state, stack)) {
|
|
3025
|
+
let ret = match (sub_m(buf, pos, start, limit, end, state, stack)) {
|
|
2250
3026
|
Some(_) when isMatch => {
|
|
2251
|
-
match(next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3027
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2252
3028
|
Some(p) => Some(p),
|
|
2253
|
-
None => {
|
|
3029
|
+
None => {
|
|
3030
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3031
|
+
None
|
|
3032
|
+
},
|
|
2254
3033
|
}
|
|
2255
3034
|
},
|
|
2256
|
-
Some(_) => {
|
|
2257
|
-
|
|
2258
|
-
|
|
3035
|
+
Some(_) => {
|
|
3036
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3037
|
+
None
|
|
3038
|
+
},
|
|
3039
|
+
None when isMatch => {
|
|
3040
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3041
|
+
None
|
|
3042
|
+
},
|
|
3043
|
+
_ => next_m(buf, pos, start, limit, end, state, stack),
|
|
2259
3044
|
}
|
|
2260
3045
|
ret
|
|
2261
3046
|
}
|
|
2262
3047
|
|
|
2263
|
-
let lookbehindMatcher = (isMatch, lbMin, lbMax, sub_m, nStart, numN, next_m) =>
|
|
3048
|
+
let lookbehindMatcher = (isMatch, lbMin, lbMax, sub_m, nStart, numN, next_m) =>
|
|
3049
|
+
(
|
|
3050
|
+
buf: MatchBuf,
|
|
3051
|
+
pos: Number,
|
|
3052
|
+
start: Number,
|
|
3053
|
+
limit: Number,
|
|
3054
|
+
end: Number,
|
|
3055
|
+
state,
|
|
3056
|
+
stack,
|
|
3057
|
+
) => {
|
|
2264
3058
|
let lbMinPos = max(start, pos - lbMax)
|
|
2265
|
-
let rec loop =
|
|
3059
|
+
let rec loop = lbPos => {
|
|
2266
3060
|
if (lbPos < lbMinPos) {
|
|
2267
3061
|
if (isMatch) {
|
|
2268
3062
|
None
|
|
@@ -2271,25 +3065,40 @@ let lookbehindMatcher = (isMatch, lbMin, lbMax, sub_m, nStart, numN, next_m) =>
|
|
|
2271
3065
|
}
|
|
2272
3066
|
} else {
|
|
2273
3067
|
let oldState = saveGroups(state, nStart, numN)
|
|
2274
|
-
match(sub_m(buf, lbPos, start, pos, end, state, stack)) {
|
|
3068
|
+
match (sub_m(buf, lbPos, start, pos, end, state, stack)) {
|
|
2275
3069
|
Some(_) when isMatch => {
|
|
2276
|
-
match(next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3070
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2277
3071
|
Some(p) => Some(p),
|
|
2278
|
-
None => {
|
|
3072
|
+
None => {
|
|
3073
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3074
|
+
None
|
|
3075
|
+
},
|
|
2279
3076
|
}
|
|
2280
3077
|
},
|
|
2281
3078
|
_ when isMatch => {
|
|
2282
3079
|
loop(lbPos - 1)
|
|
2283
3080
|
},
|
|
2284
|
-
Some(_) => {
|
|
2285
|
-
|
|
3081
|
+
Some(_) => {
|
|
3082
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3083
|
+
None
|
|
3084
|
+
},
|
|
3085
|
+
_ => next_m(buf, pos, start, limit, end, state, stack),
|
|
2286
3086
|
}
|
|
2287
3087
|
}
|
|
2288
3088
|
}
|
|
2289
3089
|
loop(pos - lbMin)
|
|
2290
3090
|
}
|
|
2291
3091
|
|
|
2292
|
-
let conditionalReferenceMatcher = (n, m1, m2) =>
|
|
3092
|
+
let conditionalReferenceMatcher = (n, m1, m2) =>
|
|
3093
|
+
(
|
|
3094
|
+
buf: MatchBuf,
|
|
3095
|
+
pos: Number,
|
|
3096
|
+
start: Number,
|
|
3097
|
+
limit: Number,
|
|
3098
|
+
end: Number,
|
|
3099
|
+
state,
|
|
3100
|
+
stack,
|
|
3101
|
+
) => {
|
|
2293
3102
|
if (Option.isSome(state[n])) {
|
|
2294
3103
|
m1(buf, pos, start, limit, end, state, stack)
|
|
2295
3104
|
} else {
|
|
@@ -2297,34 +3106,67 @@ let conditionalReferenceMatcher = (n, m1, m2) => (buf: MatchBuf, pos: Number, st
|
|
|
2297
3106
|
}
|
|
2298
3107
|
}
|
|
2299
3108
|
|
|
2300
|
-
let conditionalLookMatcher = (tst_m, m1, m2, nStart, numN) =>
|
|
3109
|
+
let conditionalLookMatcher = (tst_m, m1, m2, nStart, numN) =>
|
|
3110
|
+
(
|
|
3111
|
+
buf: MatchBuf,
|
|
3112
|
+
pos: Number,
|
|
3113
|
+
start: Number,
|
|
3114
|
+
limit: Number,
|
|
3115
|
+
end: Number,
|
|
3116
|
+
state,
|
|
3117
|
+
stack,
|
|
3118
|
+
) => {
|
|
2301
3119
|
let oldState = saveGroups(state, nStart, numN)
|
|
2302
|
-
let res = match(tst_m(buf, pos, start, limit, end, state, [])) {
|
|
3120
|
+
let res = match (tst_m(buf, pos, start, limit, end, state, [])) {
|
|
2303
3121
|
Some(_) => m1(buf, pos, start, limit, end, state, stack),
|
|
2304
|
-
None => m2(buf, pos, start, limit, end, state, stack)
|
|
3122
|
+
None => m2(buf, pos, start, limit, end, state, stack),
|
|
2305
3123
|
}
|
|
2306
|
-
match(res) {
|
|
3124
|
+
match (res) {
|
|
2307
3125
|
Some(p) => Some(p),
|
|
2308
|
-
None => {
|
|
3126
|
+
None => {
|
|
3127
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3128
|
+
None
|
|
3129
|
+
},
|
|
2309
3130
|
}
|
|
2310
3131
|
}
|
|
2311
3132
|
|
|
2312
|
-
let cutMatcher = (sub_m, nStart, numN, next_m) =>
|
|
3133
|
+
let cutMatcher = (sub_m, nStart, numN, next_m) =>
|
|
3134
|
+
(
|
|
3135
|
+
buf: MatchBuf,
|
|
3136
|
+
pos: Number,
|
|
3137
|
+
start: Number,
|
|
3138
|
+
limit: Number,
|
|
3139
|
+
end: Number,
|
|
3140
|
+
state,
|
|
3141
|
+
stack,
|
|
3142
|
+
) => {
|
|
2313
3143
|
let oldState = saveGroups(state, nStart, numN)
|
|
2314
|
-
match(sub_m(buf, pos, start, limit, end, state, [])) {
|
|
3144
|
+
match (sub_m(buf, pos, start, limit, end, state, [])) {
|
|
2315
3145
|
None => None,
|
|
2316
3146
|
Some(_) => {
|
|
2317
|
-
match(next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2318
|
-
None => {
|
|
2319
|
-
|
|
3147
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3148
|
+
None => {
|
|
3149
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3150
|
+
None
|
|
3151
|
+
},
|
|
3152
|
+
Some(p) => Some(p),
|
|
2320
3153
|
}
|
|
2321
|
-
}
|
|
3154
|
+
},
|
|
2322
3155
|
}
|
|
2323
3156
|
}
|
|
2324
3157
|
|
|
2325
3158
|
// Unicode characters in UTF-8 encoding
|
|
2326
3159
|
|
|
2327
|
-
let unicodeCategoriesMatcher = (cats, isMatch, next_m) =>
|
|
3160
|
+
let unicodeCategoriesMatcher = (cats, isMatch, next_m) =>
|
|
3161
|
+
(
|
|
3162
|
+
buf: MatchBuf,
|
|
3163
|
+
pos: Number,
|
|
3164
|
+
start: Number,
|
|
3165
|
+
limit: Number,
|
|
3166
|
+
end: Number,
|
|
3167
|
+
state,
|
|
3168
|
+
stack,
|
|
3169
|
+
) => {
|
|
2328
3170
|
fail "NYI: unicodeCategoriesMatcher is not supported until grain-lang/grain#661 is resolved."
|
|
2329
3171
|
}
|
|
2330
3172
|
|
|
@@ -2332,31 +3174,31 @@ let unicodeCategoriesMatcher = (cats, isMatch, next_m) => (buf: MatchBuf, pos: N
|
|
|
2332
3174
|
// Regex matcher compilation
|
|
2333
3175
|
// -------
|
|
2334
3176
|
|
|
2335
|
-
let countBacktrackPrefix =
|
|
3177
|
+
let countBacktrackPrefix = l => {
|
|
2336
3178
|
let rec loop = (l, total, nonBt) => {
|
|
2337
|
-
match(l) {
|
|
3179
|
+
match (l) {
|
|
2338
3180
|
[] => total - nonBt,
|
|
2339
3181
|
[hd, ...tl] when needsBacktrack(hd) => loop(tl, total + 1, 0),
|
|
2340
|
-
[hd, ...tl] => loop(tl, total + 1, nonBt + 1)
|
|
3182
|
+
[hd, ...tl] => loop(tl, total + 1, nonBt + 1),
|
|
2341
3183
|
}
|
|
2342
3184
|
}
|
|
2343
3185
|
loop(l, 0, 0)
|
|
2344
3186
|
}
|
|
2345
3187
|
|
|
2346
3188
|
let compileMatcherRepeater = (rx, min, max) => {
|
|
2347
|
-
match(rx) {
|
|
3189
|
+
match (rx) {
|
|
2348
3190
|
RELiteral(c) => Some(charMatcherIterated(c, max)),
|
|
2349
3191
|
RELiteralString(s) => Some(stringMatcherIterated(s, String.length(s), max)),
|
|
2350
3192
|
REAny => Some(anyMatcherIterated(max)),
|
|
2351
3193
|
RERange(rng) => Some(rangeMatcherIterated(rng, max)),
|
|
2352
|
-
_ => None
|
|
3194
|
+
_ => None,
|
|
2353
3195
|
}
|
|
2354
3196
|
}
|
|
2355
3197
|
|
|
2356
3198
|
let compileRegexToMatcher = (re: ParsedRegularExpression) => {
|
|
2357
3199
|
let rec compile = (re: ParsedRegularExpression, next_m) => {
|
|
2358
3200
|
let useTail = next_m is done_m
|
|
2359
|
-
match(re) {
|
|
3201
|
+
match (re) {
|
|
2360
3202
|
RELiteral(c) when useTail => charTailMatcher(c),
|
|
2361
3203
|
RELiteral(c) => charMatcher(c, next_m),
|
|
2362
3204
|
RELiteralString(s) when useTail => stringTailMatcher(s, String.length(s)),
|
|
@@ -2376,22 +3218,27 @@ let compileRegexToMatcher = (re: ParsedRegularExpression) => {
|
|
|
2376
3218
|
RESequence(res, _) => {
|
|
2377
3219
|
List.reduceRight(compile, next_m, res)
|
|
2378
3220
|
},
|
|
2379
|
-
REAlts(re1, re2) =>
|
|
2380
|
-
|
|
3221
|
+
REAlts(re1, re2) =>
|
|
3222
|
+
altsMatcher(compile(re1, next_m), compile(re2, next_m)),
|
|
3223
|
+
REMaybe(re, true) =>
|
|
3224
|
+
altsMatcher(next_m, compile(re, next_m)), // non-greedy
|
|
2381
3225
|
REMaybe(re, _) => altsMatcher(compile(re, next_m), next_m),
|
|
2382
3226
|
RERepeat(actualRe, min, max, nonGreedy) => {
|
|
2383
3227
|
// Special case: group around simple pattern in non-lazy repeat
|
|
2384
|
-
let re = match(actualRe) {
|
|
2385
|
-
REGroup(groupRe, n) when !nonGreedy && !needsBacktrack(groupRe) =>
|
|
2386
|
-
|
|
3228
|
+
let re = match (actualRe) {
|
|
3229
|
+
REGroup(groupRe, n) when !nonGreedy && !needsBacktrack(groupRe) =>
|
|
3230
|
+
groupRe,
|
|
3231
|
+
_ => actualRe,
|
|
2387
3232
|
}
|
|
2388
3233
|
let simple = !needsBacktrack(re)
|
|
2389
|
-
let groupN = if (simple)
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
3234
|
+
let groupN = if (simple)
|
|
3235
|
+
match (actualRe) {
|
|
3236
|
+
REGroup(_, n) => Some(n),
|
|
3237
|
+
_ => None,
|
|
3238
|
+
} else None
|
|
3239
|
+
match (compileMatcherRepeater(re, min, max)) {
|
|
3240
|
+
Some(matcher) when !nonGreedy =>
|
|
3241
|
+
repeatSimpleManyMatcher(matcher, min, max, groupN, next_m),
|
|
2395
3242
|
_ => {
|
|
2396
3243
|
let r_m = compile(re, if (simple) done_m else continue_m)
|
|
2397
3244
|
if (nonGreedy) {
|
|
@@ -2407,25 +3254,39 @@ let compileRegexToMatcher = (re: ParsedRegularExpression) => {
|
|
|
2407
3254
|
repeatMatcher(r_m, min, max, next_m)
|
|
2408
3255
|
}
|
|
2409
3256
|
}
|
|
2410
|
-
}
|
|
3257
|
+
},
|
|
2411
3258
|
}
|
|
2412
3259
|
},
|
|
2413
|
-
REGroup(re, n) =>
|
|
3260
|
+
REGroup(re, n) =>
|
|
3261
|
+
groupPushMatcher(n, compile(re, groupSetMatcher(n, next_m))),
|
|
2414
3262
|
REReference(0, _) => neverMatcher,
|
|
2415
3263
|
REReference(n, true) => referenceMatcher(n - 1, next_m), // case-sensitive
|
|
2416
3264
|
REReference(n, _) => referenceMatcherCaseInsensitive(n - 1, next_m),
|
|
2417
|
-
RECut(re, nStart, numN, _) =>
|
|
3265
|
+
RECut(re, nStart, numN, _) =>
|
|
3266
|
+
cutMatcher(compile(re, done_m), nStart, numN, next_m),
|
|
2418
3267
|
REConditional(tst, reTrue, reFalse, nStart, numN, _) => {
|
|
2419
3268
|
let m1 = compile(reTrue, next_m)
|
|
2420
3269
|
let m2 = compile(Option.unwrapWithDefault(REEmpty, reFalse), next_m)
|
|
2421
|
-
match(tst) {
|
|
3270
|
+
match (tst) {
|
|
2422
3271
|
REReference(n, _) => conditionalReferenceMatcher(n - 1, m1, m2),
|
|
2423
|
-
_ =>
|
|
3272
|
+
_ =>
|
|
3273
|
+
conditionalLookMatcher(compile(tst, done_m), m1, m2, nStart, numN),
|
|
2424
3274
|
}
|
|
2425
3275
|
},
|
|
2426
|
-
RELookahead(re, isMatch, nStart, numN) =>
|
|
2427
|
-
|
|
2428
|
-
|
|
3276
|
+
RELookahead(re, isMatch, nStart, numN) =>
|
|
3277
|
+
lookaheadMatcher(isMatch, compile(re, done_m), nStart, numN, next_m),
|
|
3278
|
+
RELookbehind(re, isMatch, lbMin, lbMax, nStart, numN) =>
|
|
3279
|
+
lookbehindMatcher(
|
|
3280
|
+
isMatch,
|
|
3281
|
+
unbox(lbMin),
|
|
3282
|
+
unbox(lbMax),
|
|
3283
|
+
compile(re, done_m),
|
|
3284
|
+
nStart,
|
|
3285
|
+
numN,
|
|
3286
|
+
next_m
|
|
3287
|
+
),
|
|
3288
|
+
REUnicodeCategories(cats, isMatch) =>
|
|
3289
|
+
unicodeCategoriesMatcher(cats, isMatch, next_m),
|
|
2429
3290
|
}
|
|
2430
3291
|
}
|
|
2431
3292
|
compile(re, done_m)
|
|
@@ -2441,10 +3302,18 @@ record RegularExpression {
|
|
|
2441
3302
|
reNumGroups: Number,
|
|
2442
3303
|
reReferences: Bool,
|
|
2443
3304
|
reMaxLookbehind: Number,
|
|
2444
|
-
reCompiled: (
|
|
3305
|
+
reCompiled: (
|
|
3306
|
+
MatchBuf,
|
|
3307
|
+
Number,
|
|
3308
|
+
Number,
|
|
3309
|
+
Number,
|
|
3310
|
+
Number,
|
|
3311
|
+
Array<Option<(Number, Number)>>,
|
|
3312
|
+
List<StackElt>,
|
|
3313
|
+
) -> Option<Number>,
|
|
2445
3314
|
reMustString: Option<String>,
|
|
2446
3315
|
reIsAnchored: Bool,
|
|
2447
|
-
reStartRange: Option<
|
|
3316
|
+
reStartRange: Option<RERange>,
|
|
2448
3317
|
}
|
|
2449
3318
|
|
|
2450
3319
|
/**
|
|
@@ -2601,32 +3470,33 @@ record RegularExpression {
|
|
|
2601
3470
|
*/
|
|
2602
3471
|
export let make = (regexString: String) => {
|
|
2603
3472
|
let buf = makeRegExBuf(regexString)
|
|
2604
|
-
match(parseRegex(buf)) {
|
|
3473
|
+
match (parseRegex(buf)) {
|
|
2605
3474
|
Err(e) => Err(e),
|
|
2606
3475
|
Ok(parsed) => {
|
|
2607
3476
|
let numGroups = unbox(buf.config.groupNumber)
|
|
2608
3477
|
let references = unbox(buf.config.references)
|
|
2609
|
-
match(validate(parsed, numGroups)) {
|
|
3478
|
+
match (validate(parsed, numGroups)) {
|
|
2610
3479
|
Err(e) => Err(e),
|
|
2611
3480
|
Ok(maxLookbehind) => {
|
|
2612
3481
|
let matcher = compileRegexToMatcher(parsed)
|
|
2613
|
-
Ok(
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
3482
|
+
Ok(
|
|
3483
|
+
{
|
|
3484
|
+
reParsed: parsed,
|
|
3485
|
+
reNumGroups: numGroups,
|
|
3486
|
+
reReferences: references,
|
|
3487
|
+
reMaxLookbehind: maxLookbehind,
|
|
3488
|
+
reCompiled: matcher,
|
|
3489
|
+
reMustString: mustString(parsed),
|
|
3490
|
+
reIsAnchored: isAnchored(parsed),
|
|
3491
|
+
reStartRange: startRange(parsed),
|
|
3492
|
+
}
|
|
3493
|
+
)
|
|
3494
|
+
},
|
|
2624
3495
|
}
|
|
2625
|
-
}
|
|
3496
|
+
},
|
|
2626
3497
|
}
|
|
2627
3498
|
}
|
|
2628
3499
|
|
|
2629
|
-
|
|
2630
3500
|
//
|
|
2631
3501
|
//
|
|
2632
3502
|
// ============
|
|
@@ -2637,16 +3507,18 @@ export let make = (regexString: String) => {
|
|
|
2637
3507
|
|
|
2638
3508
|
// speed up failures using must-string
|
|
2639
3509
|
let checkMustString = (ms, buf: MatchBuf, pos, endPos) => {
|
|
2640
|
-
match(ms) {
|
|
3510
|
+
match (ms) {
|
|
2641
3511
|
None => true,
|
|
2642
3512
|
Some(ms) => {
|
|
2643
|
-
let toCheck = if (
|
|
3513
|
+
let toCheck = if (
|
|
3514
|
+
pos == 0 && endPos == Array.length(buf.matchInputExploded)
|
|
3515
|
+
) {
|
|
2644
3516
|
buf.matchInput
|
|
2645
3517
|
} else {
|
|
2646
3518
|
String.slice(pos, endPos, buf.matchInput)
|
|
2647
3519
|
}
|
|
2648
3520
|
Option.isSome(String.indexOf(ms, toCheck))
|
|
2649
|
-
}
|
|
3521
|
+
},
|
|
2650
3522
|
}
|
|
2651
3523
|
}
|
|
2652
3524
|
|
|
@@ -2655,29 +3527,38 @@ let checkStartRange = (startRange, buf, pos, endPos) => {
|
|
|
2655
3527
|
rangeContains(startRange, Char.code(buf.matchInputExploded[pos]))
|
|
2656
3528
|
}
|
|
2657
3529
|
|
|
2658
|
-
|
|
2659
|
-
|
|
3530
|
+
let searchMatch =
|
|
3531
|
+
(
|
|
3532
|
+
rx: RegularExpression,
|
|
3533
|
+
buf: MatchBuf,
|
|
3534
|
+
pos,
|
|
3535
|
+
startPos,
|
|
3536
|
+
endPos,
|
|
3537
|
+
state,
|
|
3538
|
+
) => {
|
|
2660
3539
|
if (!checkMustString(rx.reMustString, buf, pos, endPos)) {
|
|
2661
3540
|
None
|
|
2662
3541
|
} else {
|
|
2663
3542
|
let matcher = rx.reCompiled
|
|
2664
3543
|
let anchored = rx.reIsAnchored
|
|
2665
3544
|
let startRange = rx.reStartRange
|
|
2666
|
-
let rec loop =
|
|
3545
|
+
let rec loop = pos => {
|
|
2667
3546
|
if (anchored && pos != startPos) {
|
|
2668
3547
|
None
|
|
2669
3548
|
} else {
|
|
2670
|
-
match(startRange) {
|
|
2671
|
-
Some(_) when pos == endPos =>
|
|
2672
|
-
|
|
3549
|
+
match (startRange) {
|
|
3550
|
+
Some(_) when pos == endPos =>
|
|
3551
|
+
None, // Can't possibly match if chars are required and we are at EOS
|
|
3552
|
+
Some(rng) when !checkStartRange(rng, buf, pos, endPos) =>
|
|
3553
|
+
loop(pos + 1),
|
|
2673
3554
|
_ => {
|
|
2674
3555
|
let pos2 = interp(matcher, buf, pos, startPos, endPos, state)
|
|
2675
3556
|
match (pos2) {
|
|
2676
3557
|
Some(p) => Some((pos, p)),
|
|
2677
3558
|
None when pos < endPos => loop(pos + 1),
|
|
2678
|
-
None => None
|
|
3559
|
+
None => None,
|
|
2679
3560
|
}
|
|
2680
|
-
}
|
|
3561
|
+
},
|
|
2681
3562
|
}
|
|
2682
3563
|
}
|
|
2683
3564
|
}
|
|
@@ -2727,7 +3608,7 @@ export record MatchResult {
|
|
|
2727
3608
|
/**
|
|
2728
3609
|
* Returns the contents of the given group
|
|
2729
3610
|
*/
|
|
2730
|
-
|
|
3611
|
+
group: Number -> Option<String>,
|
|
2731
3612
|
/**
|
|
2732
3613
|
* Returns the position of the given group
|
|
2733
3614
|
*/
|
|
@@ -2747,20 +3628,22 @@ export record MatchResult {
|
|
|
2747
3628
|
}
|
|
2748
3629
|
|
|
2749
3630
|
let makeMatchResult = (origString, start, end, state) => {
|
|
2750
|
-
let getMatchGroupPosition =
|
|
3631
|
+
let getMatchGroupPosition = n => {
|
|
2751
3632
|
if (n == 0) {
|
|
2752
3633
|
Some((start, end))
|
|
2753
3634
|
} else if (n < 0 || n - 1 > Array.length(state)) {
|
|
2754
3635
|
None
|
|
2755
|
-
} else
|
|
2756
|
-
|
|
2757
|
-
|
|
3636
|
+
} else {
|
|
3637
|
+
match (state[n - 1]) {
|
|
3638
|
+
None => None,
|
|
3639
|
+
Some((start, end)) => Some((start, end)),
|
|
3640
|
+
}
|
|
2758
3641
|
}
|
|
2759
3642
|
}
|
|
2760
|
-
let getMatchGroup =
|
|
2761
|
-
match(getMatchGroupPosition(n)) {
|
|
3643
|
+
let getMatchGroup = n => {
|
|
3644
|
+
match (getMatchGroupPosition(n)) {
|
|
2762
3645
|
Some((start, end)) => Some(String.slice(start, end, origString)),
|
|
2763
|
-
None => None
|
|
3646
|
+
None => None,
|
|
2764
3647
|
}
|
|
2765
3648
|
}
|
|
2766
3649
|
let getAllMatchGroupPositions = () => {
|
|
@@ -2772,9 +3655,9 @@ let makeMatchResult = (origString, start, end, state) => {
|
|
|
2772
3655
|
ret
|
|
2773
3656
|
}
|
|
2774
3657
|
let getAllMatchGroups = () => {
|
|
2775
|
-
Array.map(o => match(o) {
|
|
3658
|
+
Array.map(o => match (o) {
|
|
2776
3659
|
None => None,
|
|
2777
|
-
Some((start, end)) => Some(String.slice(start, end, origString))
|
|
3660
|
+
Some((start, end)) => Some(String.slice(start, end, origString)),
|
|
2778
3661
|
}, getAllMatchGroupPositions())
|
|
2779
3662
|
}
|
|
2780
3663
|
{
|
|
@@ -2782,17 +3665,21 @@ let makeMatchResult = (origString, start, end, state) => {
|
|
|
2782
3665
|
groupPosition: getMatchGroupPosition,
|
|
2783
3666
|
numGroups: Array.length(state) + 1,
|
|
2784
3667
|
allGroupPositions: getAllMatchGroupPositions,
|
|
2785
|
-
allGroups: getAllMatchGroups
|
|
3668
|
+
allGroups: getAllMatchGroups,
|
|
2786
3669
|
}
|
|
2787
3670
|
}
|
|
2788
3671
|
|
|
2789
3672
|
// Helpers for user-facing match functionality
|
|
2790
3673
|
|
|
2791
3674
|
let fastDriveRegexIsMatch = (rx, string, startOffset, endOffset) => {
|
|
2792
|
-
let state = if (rx.reReferences) Array.make(rx.reNumGroups, None)
|
|
2793
|
-
|
|
3675
|
+
let state = if (rx.reReferences) Array.make(rx.reNumGroups, None)
|
|
3676
|
+
else Array.make(0, None)
|
|
3677
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3678
|
+
string else String.slice(startOffset, endOffset, string)
|
|
2794
3679
|
let buf = makeMatchBuffer(toWrap)
|
|
2795
|
-
Option.isSome(
|
|
3680
|
+
Option.isSome(
|
|
3681
|
+
searchMatch(rx, buf, 0, 0, Array.length(buf.matchInputExploded), state)
|
|
3682
|
+
)
|
|
2796
3683
|
}
|
|
2797
3684
|
|
|
2798
3685
|
let rec fastDriveRegexMatchAll = (rx, string, startOffset, endOffset) => {
|
|
@@ -2800,34 +3687,73 @@ let rec fastDriveRegexMatchAll = (rx, string, startOffset, endOffset) => {
|
|
|
2800
3687
|
[]
|
|
2801
3688
|
} else {
|
|
2802
3689
|
let state = Array.make(rx.reNumGroups, None)
|
|
2803
|
-
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3690
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3691
|
+
string else String.slice(startOffset, endOffset, string)
|
|
2804
3692
|
let buf = makeMatchBuffer(toWrap)
|
|
2805
|
-
match(searchMatch(
|
|
3693
|
+
match (searchMatch(
|
|
3694
|
+
rx,
|
|
3695
|
+
buf,
|
|
3696
|
+
0,
|
|
3697
|
+
0,
|
|
3698
|
+
Array.length(buf.matchInputExploded),
|
|
3699
|
+
state
|
|
3700
|
+
)) {
|
|
2806
3701
|
None => [],
|
|
2807
|
-
Some((startPos, endPos)) =>
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
3702
|
+
Some((startPos, endPos)) =>
|
|
3703
|
+
[
|
|
3704
|
+
makeMatchResult(
|
|
3705
|
+
string,
|
|
3706
|
+
startPos + startOffset,
|
|
3707
|
+
endPos + startOffset,
|
|
3708
|
+
Array.map(elt => {
|
|
3709
|
+
match (elt) {
|
|
3710
|
+
None => None,
|
|
3711
|
+
Some((start, end)) =>
|
|
3712
|
+
Some((start + startOffset, end + startOffset)),
|
|
3713
|
+
}
|
|
3714
|
+
}, state)
|
|
3715
|
+
),
|
|
3716
|
+
...fastDriveRegexMatchAll(
|
|
3717
|
+
rx,
|
|
3718
|
+
string,
|
|
3719
|
+
startPos + startOffset + 1,
|
|
3720
|
+
endOffset
|
|
3721
|
+
)
|
|
3722
|
+
],
|
|
2813
3723
|
}
|
|
2814
3724
|
}
|
|
2815
3725
|
}
|
|
2816
3726
|
|
|
2817
3727
|
let fastDriveRegexMatch = (rx, string, startOffset, endOffset) => {
|
|
2818
3728
|
let state = Array.make(rx.reNumGroups, None)
|
|
2819
|
-
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3729
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3730
|
+
string else String.slice(startOffset, endOffset, string)
|
|
2820
3731
|
let buf = makeMatchBuffer(toWrap)
|
|
2821
|
-
match(searchMatch(
|
|
3732
|
+
match (searchMatch(
|
|
3733
|
+
rx,
|
|
3734
|
+
buf,
|
|
3735
|
+
0,
|
|
3736
|
+
0,
|
|
3737
|
+
Array.length(buf.matchInputExploded),
|
|
3738
|
+
state
|
|
3739
|
+
)) {
|
|
2822
3740
|
None => None,
|
|
2823
3741
|
Some((startPos, endPos)) => {
|
|
2824
|
-
Some(
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
3742
|
+
Some(
|
|
3743
|
+
makeMatchResult(
|
|
3744
|
+
string,
|
|
3745
|
+
startPos + startOffset,
|
|
3746
|
+
endPos + startOffset,
|
|
3747
|
+
Array.map(elt => {
|
|
3748
|
+
match (elt) {
|
|
3749
|
+
None => None,
|
|
3750
|
+
Some((start, end)) =>
|
|
3751
|
+
Some((start + startOffset, end + startOffset)),
|
|
3752
|
+
}
|
|
3753
|
+
}, state)
|
|
3754
|
+
)
|
|
3755
|
+
)
|
|
3756
|
+
},
|
|
2831
3757
|
}
|
|
2832
3758
|
}
|
|
2833
3759
|
|
|
@@ -2835,7 +3761,7 @@ let fastDriveRegexMatch = (rx, string, startOffset, endOffset) => {
|
|
|
2835
3761
|
* Determines if the given regular expression has a match in the given string.
|
|
2836
3762
|
* @param rx: The regular expression to search for
|
|
2837
3763
|
* @param string: The string to search within
|
|
2838
|
-
* @returns `true` if the RegExp matches the string
|
|
3764
|
+
* @returns `true` if the RegExp matches the string or `false` otherwise
|
|
2839
3765
|
*
|
|
2840
3766
|
* @example assert Regex.isMatch(Result.unwrap(Regex.make("ca+[at]")), "caaat") == true
|
|
2841
3767
|
*
|
|
@@ -2858,7 +3784,13 @@ export let isMatch = (rx: RegularExpression, string: String) => {
|
|
|
2858
3784
|
*
|
|
2859
3785
|
* @since 0.4.3
|
|
2860
3786
|
*/
|
|
2861
|
-
export let isMatchRange =
|
|
3787
|
+
export let isMatchRange =
|
|
3788
|
+
(
|
|
3789
|
+
rx: RegularExpression,
|
|
3790
|
+
string: String,
|
|
3791
|
+
start: Number,
|
|
3792
|
+
end: Number,
|
|
3793
|
+
) => {
|
|
2862
3794
|
fastDriveRegexIsMatch(rx, string, start, end)
|
|
2863
3795
|
}
|
|
2864
3796
|
|
|
@@ -2889,7 +3821,13 @@ export let find = (rx: RegularExpression, string: String) => {
|
|
|
2889
3821
|
*
|
|
2890
3822
|
* @since 0.4.3
|
|
2891
3823
|
*/
|
|
2892
|
-
export let findRange =
|
|
3824
|
+
export let findRange =
|
|
3825
|
+
(
|
|
3826
|
+
rx: RegularExpression,
|
|
3827
|
+
string: String,
|
|
3828
|
+
start: Number,
|
|
3829
|
+
end: Number,
|
|
3830
|
+
) => {
|
|
2893
3831
|
fastDriveRegexMatch(rx, string, start, end)
|
|
2894
3832
|
}
|
|
2895
3833
|
|
|
@@ -2916,37 +3854,51 @@ export let findAll = (rx: RegularExpression, string: String) => {
|
|
|
2916
3854
|
*
|
|
2917
3855
|
* @since 0.4.3
|
|
2918
3856
|
*/
|
|
2919
|
-
export let findAllRange =
|
|
3857
|
+
export let findAllRange =
|
|
3858
|
+
(
|
|
3859
|
+
rx: RegularExpression,
|
|
3860
|
+
string: String,
|
|
3861
|
+
start: Number,
|
|
3862
|
+
end: Number,
|
|
3863
|
+
) => {
|
|
2920
3864
|
fastDriveRegexMatchAll(rx, string, start, end)
|
|
2921
3865
|
}
|
|
2922
3866
|
|
|
2923
|
-
|
|
2924
|
-
|
|
3867
|
+
let computeReplacement =
|
|
3868
|
+
(
|
|
3869
|
+
matchBuf: MatchBuf,
|
|
3870
|
+
replacementString: String,
|
|
3871
|
+
start,
|
|
3872
|
+
end,
|
|
3873
|
+
state,
|
|
3874
|
+
) => {
|
|
2925
3875
|
let replacementExploded = String.explode(replacementString)
|
|
2926
3876
|
let len = Array.length(replacementExploded)
|
|
2927
3877
|
let mut acc = []
|
|
2928
3878
|
let getBeforeMatch = () => String.slice(0, start, matchBuf.matchInput)
|
|
2929
|
-
let getAfterMatch = () =>
|
|
2930
|
-
|
|
3879
|
+
let getAfterMatch = () =>
|
|
3880
|
+
String.slice(end, String.length(matchBuf.matchInput), matchBuf.matchInput)
|
|
3881
|
+
let getInputSubstr = n => {
|
|
2931
3882
|
if (n == 0) {
|
|
2932
3883
|
String.slice(start, end, matchBuf.matchInput)
|
|
2933
3884
|
} else if (n - 1 < Array.length(state)) {
|
|
2934
|
-
match (state[n-1]) {
|
|
3885
|
+
match (state[n - 1]) {
|
|
2935
3886
|
Some((start, end)) => String.slice(start, end, matchBuf.matchInput),
|
|
2936
|
-
None => ""
|
|
3887
|
+
None => "",
|
|
2937
3888
|
}
|
|
2938
3889
|
} else {
|
|
2939
3890
|
""
|
|
2940
3891
|
}
|
|
2941
3892
|
}
|
|
2942
3893
|
let consRange = (start, end, lst) => {
|
|
2943
|
-
if (start == end) lst
|
|
3894
|
+
if (start == end) lst
|
|
3895
|
+
else [String.slice(start, end, replacementString), ...lst]
|
|
2944
3896
|
}
|
|
2945
3897
|
let rec loop = (pos, since) => {
|
|
2946
3898
|
if (pos == len) {
|
|
2947
3899
|
consRange(since, pos, [])
|
|
2948
3900
|
} else if (replacementExploded[pos] == '$') {
|
|
2949
|
-
let c = if (
|
|
3901
|
+
let c = if (pos + 1 < len) Some(replacementExploded[pos + 1]) else None
|
|
2950
3902
|
if (c == Some('&')) {
|
|
2951
3903
|
consRange(since, pos, [getInputSubstr(0), ...loop(pos + 2, pos + 2)])
|
|
2952
3904
|
} else if (c == Some('`')) {
|
|
@@ -2954,27 +3906,34 @@ let computeReplacement = (matchBuf: MatchBuf, replacementString: String, start,
|
|
|
2954
3906
|
} else if (c == Some('\'')) {
|
|
2955
3907
|
consRange(since, pos, [getAfterMatch(), ...loop(pos + 2, pos + 2)])
|
|
2956
3908
|
} else {
|
|
2957
|
-
consRange(
|
|
2958
|
-
|
|
2959
|
-
|
|
2960
|
-
|
|
2961
|
-
|
|
2962
|
-
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
dLoop(pos + 1, (10 * accum) + (Char.code(c) - Char.code('0')))
|
|
3909
|
+
consRange(
|
|
3910
|
+
since,
|
|
3911
|
+
pos,
|
|
3912
|
+
{
|
|
3913
|
+
if (c == Some('$')) {
|
|
3914
|
+
loop(pos + 2, pos + 1)
|
|
3915
|
+
} else if (c == Some('.')) {
|
|
3916
|
+
loop(pos + 2, pos + 2)
|
|
3917
|
+
} else {
|
|
3918
|
+
let rec dLoop = (pos, accum) => {
|
|
3919
|
+
if (pos == len) {
|
|
3920
|
+
[getInputSubstr(accum)]
|
|
2970
3921
|
} else {
|
|
2971
|
-
|
|
3922
|
+
let c = replacementExploded[pos]
|
|
3923
|
+
if (
|
|
3924
|
+
Char.code('0') <= Char.code(c) &&
|
|
3925
|
+
Char.code(c) <= Char.code('9')
|
|
3926
|
+
) {
|
|
3927
|
+
dLoop(pos + 1, 10 * accum + (Char.code(c) - Char.code('0')))
|
|
3928
|
+
} else {
|
|
3929
|
+
[getInputSubstr(accum), ...loop(pos, pos)]
|
|
3930
|
+
}
|
|
2972
3931
|
}
|
|
2973
3932
|
}
|
|
3933
|
+
dLoop(pos + 1, 0)
|
|
2974
3934
|
}
|
|
2975
|
-
dLoop(pos + 1, 0)
|
|
2976
3935
|
}
|
|
2977
|
-
|
|
3936
|
+
)
|
|
2978
3937
|
}
|
|
2979
3938
|
} else {
|
|
2980
3939
|
loop(pos + 1, since)
|
|
@@ -2984,30 +3943,52 @@ let computeReplacement = (matchBuf: MatchBuf, replacementString: String, start,
|
|
|
2984
3943
|
List.reduceRight(String.concat, "", res)
|
|
2985
3944
|
}
|
|
2986
3945
|
|
|
2987
|
-
|
|
2988
|
-
|
|
3946
|
+
let regexReplaceHelp =
|
|
3947
|
+
(
|
|
3948
|
+
rx: RegularExpression,
|
|
3949
|
+
toSearch: String,
|
|
3950
|
+
replacement: String,
|
|
3951
|
+
all: Bool,
|
|
3952
|
+
) => {
|
|
2989
3953
|
let buf = makeMatchBuffer(toSearch)
|
|
2990
3954
|
let mut out = []
|
|
2991
|
-
let rec loop =
|
|
3955
|
+
let rec loop = searchPos => {
|
|
2992
3956
|
let state = Array.make(rx.reNumGroups, None)
|
|
2993
|
-
let poss = searchMatch(
|
|
3957
|
+
let poss = searchMatch(
|
|
3958
|
+
rx,
|
|
3959
|
+
buf,
|
|
3960
|
+
searchPos,
|
|
3961
|
+
searchPos,
|
|
3962
|
+
Array.length(buf.matchInputExploded),
|
|
3963
|
+
state
|
|
3964
|
+
)
|
|
2994
3965
|
let recur = (start, end) => {
|
|
2995
3966
|
if (end == searchPos) {
|
|
2996
3967
|
if (searchPos == String.length(toSearch)) {
|
|
2997
3968
|
""
|
|
2998
3969
|
} else {
|
|
2999
|
-
String.concat(
|
|
3970
|
+
String.concat(
|
|
3971
|
+
String.slice(searchPos, searchPos + 1, toSearch),
|
|
3972
|
+
loop(searchPos + 1)
|
|
3973
|
+
)
|
|
3000
3974
|
}
|
|
3001
3975
|
} else {
|
|
3002
3976
|
loop(end)
|
|
3003
3977
|
}
|
|
3004
3978
|
}
|
|
3005
|
-
match(poss) {
|
|
3006
|
-
None =>
|
|
3979
|
+
match (poss) {
|
|
3980
|
+
None =>
|
|
3981
|
+
if (searchPos == 0) toSearch
|
|
3982
|
+
else String.slice(searchPos, String.length(toSearch), toSearch),
|
|
3007
3983
|
Some((start, end)) =>
|
|
3008
|
-
String.concat(
|
|
3009
|
-
String.
|
|
3010
|
-
|
|
3984
|
+
String.concat(
|
|
3985
|
+
String.slice(searchPos, start, toSearch),
|
|
3986
|
+
String.concat(
|
|
3987
|
+
computeReplacement(buf, replacement, start, end, state),
|
|
3988
|
+
if (all) recur(start, end)
|
|
3989
|
+
else String.slice(end, String.length(toSearch), toSearch)
|
|
3990
|
+
)
|
|
3991
|
+
),
|
|
3011
3992
|
}
|
|
3012
3993
|
}
|
|
3013
3994
|
loop(0)
|
|
@@ -3033,7 +4014,12 @@ let regexReplaceHelp = (rx: RegularExpression, toSearch: String, replacement: St
|
|
|
3033
4014
|
*
|
|
3034
4015
|
* @since 0.4.3
|
|
3035
4016
|
*/
|
|
3036
|
-
export let replace =
|
|
4017
|
+
export let replace =
|
|
4018
|
+
(
|
|
4019
|
+
rx: RegularExpression,
|
|
4020
|
+
toSearch: String,
|
|
4021
|
+
replacement: String,
|
|
4022
|
+
) => {
|
|
3037
4023
|
regexReplaceHelp(rx, toSearch, replacement, false)
|
|
3038
4024
|
}
|
|
3039
4025
|
|
|
@@ -3050,6 +4036,11 @@ export let replace = (rx: RegularExpression, toSearch: String, replacement: Stri
|
|
|
3050
4036
|
*
|
|
3051
4037
|
* @since 0.4.3
|
|
3052
4038
|
*/
|
|
3053
|
-
export let replaceAll =
|
|
4039
|
+
export let replaceAll =
|
|
4040
|
+
(
|
|
4041
|
+
rx: RegularExpression,
|
|
4042
|
+
toSearch: String,
|
|
4043
|
+
replacement: String,
|
|
4044
|
+
) => {
|
|
3054
4045
|
regexReplaceHelp(rx, toSearch, replacement, true)
|
|
3055
4046
|
}
|