@grain/stdlib 0.5.12 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +200 -0
- package/LICENSE +1 -1
- package/README.md +25 -2
- package/array.gr +1512 -199
- package/array.md +2032 -94
- package/bigint.gr +239 -140
- package/bigint.md +450 -106
- package/buffer.gr +595 -102
- package/buffer.md +903 -145
- package/bytes.gr +401 -110
- package/bytes.md +551 -63
- package/char.gr +228 -49
- package/char.md +373 -7
- package/exception.gr +26 -12
- package/exception.md +29 -5
- package/float32.gr +130 -109
- package/float32.md +185 -57
- package/float64.gr +112 -99
- package/float64.md +185 -57
- package/hash.gr +47 -37
- package/hash.md +21 -3
- package/int16.gr +430 -0
- package/int16.md +618 -0
- package/int32.gr +200 -269
- package/int32.md +254 -289
- package/int64.gr +142 -225
- package/int64.md +254 -289
- package/int8.gr +511 -0
- package/int8.md +786 -0
- package/json.gr +2084 -0
- package/json.md +608 -0
- package/list.gr +120 -68
- package/list.md +125 -80
- package/map.gr +560 -57
- package/map.md +672 -56
- package/marshal.gr +239 -227
- package/marshal.md +36 -4
- package/number.gr +626 -676
- package/number.md +738 -153
- package/option.gr +33 -35
- package/option.md +58 -42
- package/package.json +2 -2
- package/path.gr +148 -187
- package/path.md +47 -96
- package/pervasives.gr +75 -416
- package/pervasives.md +85 -180
- package/priorityqueue.gr +433 -74
- package/priorityqueue.md +422 -54
- package/queue.gr +362 -80
- package/queue.md +433 -38
- package/random.gr +67 -75
- package/random.md +68 -40
- package/range.gr +135 -63
- package/range.md +198 -43
- package/rational.gr +284 -0
- package/rational.md +545 -0
- package/regex.gr +933 -1066
- package/regex.md +59 -60
- package/result.gr +23 -25
- package/result.md +54 -39
- package/runtime/atof/common.gr +78 -82
- package/runtime/atof/common.md +22 -10
- package/runtime/atof/decimal.gr +102 -127
- package/runtime/atof/decimal.md +28 -7
- package/runtime/atof/lemire.gr +56 -71
- package/runtime/atof/lemire.md +9 -1
- package/runtime/atof/parse.gr +83 -110
- package/runtime/atof/parse.md +12 -2
- package/runtime/atof/slow.gr +28 -35
- package/runtime/atof/slow.md +9 -1
- package/runtime/atof/table.gr +19 -18
- package/runtime/atof/table.md +10 -2
- package/runtime/atoi/parse.gr +153 -136
- package/runtime/atoi/parse.md +50 -1
- package/runtime/bigint.gr +410 -517
- package/runtime/bigint.md +71 -57
- package/runtime/compare.gr +176 -85
- package/runtime/compare.md +31 -1
- package/runtime/dataStructures.gr +144 -32
- package/runtime/dataStructures.md +267 -31
- package/runtime/debugPrint.gr +34 -15
- package/runtime/debugPrint.md +37 -5
- package/runtime/equal.gr +53 -52
- package/runtime/equal.md +30 -1
- package/runtime/exception.gr +38 -47
- package/runtime/exception.md +10 -8
- package/runtime/gc.gr +23 -152
- package/runtime/gc.md +13 -17
- package/runtime/malloc.gr +31 -31
- package/runtime/malloc.md +11 -3
- package/runtime/numberUtils.gr +191 -172
- package/runtime/numberUtils.md +17 -9
- package/runtime/numbers.gr +1695 -1021
- package/runtime/numbers.md +1098 -134
- package/runtime/string.gr +540 -242
- package/runtime/string.md +76 -6
- package/runtime/unsafe/constants.gr +30 -13
- package/runtime/unsafe/constants.md +80 -0
- package/runtime/unsafe/conv.gr +55 -28
- package/runtime/unsafe/conv.md +41 -9
- package/runtime/unsafe/memory.gr +10 -30
- package/runtime/unsafe/memory.md +15 -19
- package/runtime/unsafe/tags.gr +37 -21
- package/runtime/unsafe/tags.md +88 -8
- package/runtime/unsafe/wasmf32.gr +30 -36
- package/runtime/unsafe/wasmf32.md +64 -56
- package/runtime/unsafe/wasmf64.gr +30 -36
- package/runtime/unsafe/wasmf64.md +64 -56
- package/runtime/unsafe/wasmi32.gr +49 -66
- package/runtime/unsafe/wasmi32.md +102 -94
- package/runtime/unsafe/wasmi64.gr +52 -79
- package/runtime/unsafe/wasmi64.md +108 -100
- package/runtime/utils/printing.gr +13 -15
- package/runtime/utils/printing.md +11 -3
- package/runtime/wasi.gr +294 -295
- package/runtime/wasi.md +62 -42
- package/set.gr +574 -64
- package/set.md +634 -54
- package/stack.gr +181 -64
- package/stack.md +271 -42
- package/string.gr +453 -533
- package/string.md +241 -151
- package/uint16.gr +369 -0
- package/uint16.md +585 -0
- package/uint32.gr +470 -0
- package/uint32.md +737 -0
- package/uint64.gr +471 -0
- package/uint64.md +737 -0
- package/uint8.gr +369 -0
- package/uint8.md +585 -0
- package/uri.gr +1093 -0
- package/uri.md +477 -0
- package/{sys → wasi}/file.gr +914 -500
- package/{sys → wasi}/file.md +454 -50
- package/wasi/process.gr +292 -0
- package/{sys → wasi}/process.md +164 -6
- package/wasi/random.gr +77 -0
- package/wasi/random.md +80 -0
- package/{sys → wasi}/time.gr +15 -22
- package/{sys → wasi}/time.md +5 -5
- package/immutablearray.gr +0 -929
- package/immutablearray.md +0 -1038
- package/immutablemap.gr +0 -493
- package/immutablemap.md +0 -479
- package/immutablepriorityqueue.gr +0 -360
- package/immutablepriorityqueue.md +0 -291
- package/immutableset.gr +0 -498
- package/immutableset.md +0 -449
- package/runtime/debug.gr +0 -2
- package/runtime/debug.md +0 -6
- package/runtime/unsafe/errors.gr +0 -36
- package/runtime/unsafe/errors.md +0 -204
- package/sys/process.gr +0 -254
- package/sys/random.gr +0 -79
- package/sys/random.md +0 -66
package/regex.gr
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
2
|
+
* Regular Expressions.
|
|
3
|
+
*
|
|
4
|
+
* @example from "regex" include Regex
|
|
4
5
|
*
|
|
5
6
|
* @since 0.4.3
|
|
6
7
|
*/
|
|
8
|
+
module Regex
|
|
7
9
|
|
|
8
10
|
/*
|
|
9
11
|
This library provides support for regular expressions in Grain.
|
|
@@ -11,15 +13,15 @@
|
|
|
11
13
|
which is licensed under Apache 2.0. Racket's regular expression
|
|
12
14
|
engine is itself inspired by the Spencer engine, as found in Tcl.
|
|
13
15
|
*/
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
from "array" include Array
|
|
17
|
+
from "char" include Char
|
|
18
|
+
from "list" include List
|
|
19
|
+
from "map" include Map
|
|
20
|
+
from "option" include Option
|
|
21
|
+
from "result" include Result
|
|
22
|
+
from "string" include String
|
|
23
|
+
from "number" include Number
|
|
24
|
+
use Number.{ min, max }
|
|
23
25
|
|
|
24
26
|
/*
|
|
25
27
|
|
|
@@ -61,11 +63,7 @@ let makeRegExParserConfig = () => {
|
|
|
61
63
|
}
|
|
62
64
|
}
|
|
63
65
|
|
|
64
|
-
let configWithCaseSensitive =
|
|
65
|
-
(
|
|
66
|
-
config: RegExParserConfig,
|
|
67
|
-
caseSensitive: Bool,
|
|
68
|
-
) => {
|
|
66
|
+
let configWithCaseSensitive = (config: RegExParserConfig, caseSensitive: Bool) => {
|
|
69
67
|
{
|
|
70
68
|
isPerlRegExp: config.isPerlRegExp,
|
|
71
69
|
caseSensitive,
|
|
@@ -170,12 +168,12 @@ let eat = (buf: RegExBuf, char: Char) => {
|
|
|
170
168
|
parseErr(
|
|
171
169
|
buf,
|
|
172
170
|
"Expected character '" ++
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
171
|
+
Char.toString(char) ++
|
|
172
|
+
", but found character '" ++
|
|
173
|
+
Char.toString(ret) ++
|
|
174
|
+
"'",
|
|
177
175
|
0
|
|
178
|
-
)
|
|
176
|
+
),
|
|
179
177
|
)
|
|
180
178
|
}
|
|
181
179
|
}
|
|
@@ -241,14 +239,13 @@ let rec rangeAdd = (rng: CharRange, v: CharRangeElt) => {
|
|
|
241
239
|
_ when rangeContains(rng, v) => rng,
|
|
242
240
|
_ => rangeUnion(rng, [(v, v)]),
|
|
243
241
|
}
|
|
244
|
-
}
|
|
245
|
-
rangeUnion = (rng1, rng2) => {
|
|
242
|
+
}
|
|
243
|
+
and rangeUnion = (rng1, rng2) => {
|
|
246
244
|
match ((rng1, rng2)) {
|
|
247
245
|
([], _) => rng2,
|
|
248
246
|
(_, []) => rng1,
|
|
249
|
-
([(r1start, r1end), ...r1tl], [(r2start, r2end), ...r2tl]) when
|
|
250
|
-
|
|
251
|
-
) => {
|
|
247
|
+
([(r1start, r1end), ...r1tl], [(r2start, r2end), ...r2tl]) when r1start <=
|
|
248
|
+
r2start => {
|
|
252
249
|
if (r1end + 1 >= r2start) {
|
|
253
250
|
if (r1end <= r2end) {
|
|
254
251
|
rangeUnion([(r1start, r2end), ...r2tl], r1tl)
|
|
@@ -315,7 +312,7 @@ let rangeAddCaseAware = (rng: CharRange, c, config) => {
|
|
|
315
312
|
Ok(rng)
|
|
316
313
|
*/
|
|
317
314
|
Err(
|
|
318
|
-
"NYI: Case-insensitive matching is not supported until grain-lang/grain#661 is resolved."
|
|
315
|
+
"NYI: Case-insensitive matching is not supported until grain-lang/grain#661 is resolved.",
|
|
319
316
|
)
|
|
320
317
|
}
|
|
321
318
|
},
|
|
@@ -324,17 +321,16 @@ let rangeAddCaseAware = (rng: CharRange, c, config) => {
|
|
|
324
321
|
|
|
325
322
|
let rangeAddSpanCaseAware = (rng: CharRange, fromC, toC, config) => {
|
|
326
323
|
if (config.caseSensitive) {
|
|
327
|
-
Ok(rangeAddSpan(rng, fromC, toC))
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
}
|
|
324
|
+
return Ok(rangeAddSpan(rng, fromC, toC))
|
|
325
|
+
}
|
|
326
|
+
let mut ret = Ok(rng)
|
|
327
|
+
for (let mut i = fromC; i <= toC; i += 1) {
|
|
328
|
+
match (ret) {
|
|
329
|
+
Ok(x) => ret = rangeAddCaseAware(x, Some(i), config),
|
|
330
|
+
Err(e) => break,
|
|
335
331
|
}
|
|
336
|
-
ret
|
|
337
332
|
}
|
|
333
|
+
return ret
|
|
338
334
|
}
|
|
339
335
|
|
|
340
336
|
/*
|
|
@@ -404,7 +400,7 @@ enum UnicodeCategory {
|
|
|
404
400
|
OtherPrivateUse,
|
|
405
401
|
}
|
|
406
402
|
|
|
407
|
-
enum ParsedRegularExpression {
|
|
403
|
+
enum rec ParsedRegularExpression {
|
|
408
404
|
RENever,
|
|
409
405
|
REEmpty,
|
|
410
406
|
REAny,
|
|
@@ -415,18 +411,11 @@ enum ParsedRegularExpression {
|
|
|
415
411
|
REWordBoundary,
|
|
416
412
|
RENotWordBoundary,
|
|
417
413
|
RELiteral(Char),
|
|
418
|
-
RELiteralString(
|
|
419
|
-
String
|
|
420
|
-
), // <- sequences of literals are flattened into a string
|
|
414
|
+
RELiteralString(String), // <- sequences of literals are flattened into a string
|
|
421
415
|
REAlts(ParsedRegularExpression, ParsedRegularExpression),
|
|
422
416
|
RESequence(List<ParsedRegularExpression>, Bool), // seq elts, needs backtrack
|
|
423
417
|
REGroup(ParsedRegularExpression, Number), // regex, group ID
|
|
424
|
-
RERepeat(
|
|
425
|
-
ParsedRegularExpression,
|
|
426
|
-
Number,
|
|
427
|
-
Option<Number>,
|
|
428
|
-
Bool
|
|
429
|
-
), // regex, min, max (None for infinity), true=non-greedy
|
|
418
|
+
RERepeat(ParsedRegularExpression, Number, Option<Number>, Bool), // regex, min, max (None for infinity), true=non-greedy
|
|
430
419
|
REMaybe(ParsedRegularExpression, Bool), // regex, true=non-greedy
|
|
431
420
|
REConditional(
|
|
432
421
|
ParsedRegularExpression,
|
|
@@ -436,12 +425,7 @@ enum ParsedRegularExpression {
|
|
|
436
425
|
Number,
|
|
437
426
|
Bool
|
|
438
427
|
), // test, if-true, if-false, n-start, num-n, needs-backtrack
|
|
439
|
-
RELookahead(
|
|
440
|
-
ParsedRegularExpression,
|
|
441
|
-
Bool,
|
|
442
|
-
Number,
|
|
443
|
-
Number
|
|
444
|
-
), // regex, is-match, n-start, num-n
|
|
428
|
+
RELookahead(ParsedRegularExpression, Bool, Number, Number), // regex, is-match, n-start, num-n
|
|
445
429
|
RELookbehind(
|
|
446
430
|
ParsedRegularExpression,
|
|
447
431
|
Bool,
|
|
@@ -450,30 +434,21 @@ enum ParsedRegularExpression {
|
|
|
450
434
|
Number,
|
|
451
435
|
Number
|
|
452
436
|
), // regex, is-match, lb-min, lb-max, n-start, num-n (lb-xx values patched in later)
|
|
453
|
-
RECut(
|
|
454
|
-
ParsedRegularExpression,
|
|
455
|
-
Number,
|
|
456
|
-
Number,
|
|
457
|
-
Bool
|
|
458
|
-
), // regex, n-start, num-n, needs-backtrack
|
|
437
|
+
RECut(ParsedRegularExpression, Number, Number, Bool), // regex, n-start, num-n, needs-backtrack
|
|
459
438
|
REReference(Number, Bool), // n, case-sensitive
|
|
460
439
|
RERange(RERange),
|
|
461
|
-
REUnicodeCategories(
|
|
462
|
-
List<UnicodeCategory>,
|
|
463
|
-
Bool
|
|
464
|
-
), // symlist, true=match/false=does-not-match
|
|
440
|
+
REUnicodeCategories(List<UnicodeCategory>, Bool), // symlist, true=match/false=does-not-match
|
|
465
441
|
}
|
|
466
442
|
|
|
467
443
|
let needsBacktrack = (rx: ParsedRegularExpression) => {
|
|
468
444
|
match (rx) {
|
|
469
|
-
REAlts(_, _)
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
REMaybe(_, _) => true,
|
|
474
|
-
REConditional(_, _, _, _, _, nb) => nb,
|
|
475
|
-
RECut(_, _, _, nb) => nb,
|
|
445
|
+
REAlts(_, _) |
|
|
446
|
+
REGroup(_, _) |
|
|
447
|
+
RERepeat(_, _, _, _) |
|
|
448
|
+
REMaybe(_, _) |
|
|
476
449
|
REUnicodeCategories(_, _) => true,
|
|
450
|
+
RESequence(_, nb) | REConditional(_, _, _, _, _, nb) | RECut(_, _, _, nb) =>
|
|
451
|
+
nb,
|
|
477
452
|
_ => false,
|
|
478
453
|
}
|
|
479
454
|
}
|
|
@@ -500,8 +475,7 @@ let mergeAdjacent = lst => {
|
|
|
500
475
|
None => false,
|
|
501
476
|
Some(MMChar) => {
|
|
502
477
|
match (hd) {
|
|
503
|
-
RELiteral(
|
|
504
|
-
RELiteralString(x) => false,
|
|
478
|
+
RELiteral(_) | RELiteralString(_) => false,
|
|
505
479
|
_ => true,
|
|
506
480
|
}
|
|
507
481
|
},
|
|
@@ -514,9 +488,7 @@ let mergeAdjacent = lst => {
|
|
|
514
488
|
// flatten nested sequences
|
|
515
489
|
[RESequence(rxs1, _), ...tl] => loop(mode, accum, List.append(rxs1, tl)),
|
|
516
490
|
// drop empty elements
|
|
517
|
-
[REEmpty, ...tl] => loop(mode, accum, tl),
|
|
518
|
-
[RELiteralString(""), ...tl] => loop(mode, accum, tl),
|
|
519
|
-
// TODO(#696): Clean up with or-patterns
|
|
491
|
+
[REEmpty, ...tl] | [RELiteralString(""), ...tl] => loop(mode, accum, tl),
|
|
520
492
|
_ when readyForAccum(l, mode) => {
|
|
521
493
|
match (accum) {
|
|
522
494
|
[] => [],
|
|
@@ -531,8 +503,7 @@ let mergeAdjacent = lst => {
|
|
|
531
503
|
},
|
|
532
504
|
}
|
|
533
505
|
},
|
|
534
|
-
[] =>
|
|
535
|
-
fail "impossible (mergeAdjacent)", // avoid warning (can delete once TODO is resolved)
|
|
506
|
+
[] => fail "impossible (mergeAdjacent)", // avoid warning (can delete once TODO is resolved)
|
|
536
507
|
[RELiteralString(x), ...tl] when Option.isSome(mode) =>
|
|
537
508
|
loop(mode, [x, ...accum], tl),
|
|
538
509
|
[RELiteral(c), ...tl] when Option.isSome(mode) =>
|
|
@@ -578,11 +549,10 @@ let makeRECut = (rx, nStart, numN) => {
|
|
|
578
549
|
}
|
|
579
550
|
|
|
580
551
|
let makeREConditional = (tst, pces1, pces2, nStart, numN) => {
|
|
581
|
-
let nb = needsBacktrack(pces1) ||
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
}
|
|
552
|
+
let nb = needsBacktrack(pces1) || match (pces2) {
|
|
553
|
+
None => false,
|
|
554
|
+
Some(p2) => needsBacktrack(p2),
|
|
555
|
+
}
|
|
586
556
|
REConditional(tst, pces1, pces2, nStart, numN, nb)
|
|
587
557
|
}
|
|
588
558
|
|
|
@@ -640,8 +610,8 @@ let rec parseRangeNot = (buf: RegExBuf) => {
|
|
|
640
610
|
Ok(_) => parseRange(buf),
|
|
641
611
|
}
|
|
642
612
|
}
|
|
643
|
-
}
|
|
644
|
-
parseRange = (buf: RegExBuf) => {
|
|
613
|
+
}
|
|
614
|
+
and parseRange = (buf: RegExBuf) => {
|
|
645
615
|
if (!more(buf)) {
|
|
646
616
|
Err(parseErr(buf, "Missing closing `]`", 0))
|
|
647
617
|
} else {
|
|
@@ -664,12 +634,10 @@ parseRange = (buf: RegExBuf) => {
|
|
|
664
634
|
Ok(_) => parseRangeRest(buf, [], None, None),
|
|
665
635
|
}
|
|
666
636
|
}
|
|
667
|
-
}
|
|
668
|
-
parseClass = (buf: RegExBuf) => {
|
|
637
|
+
}
|
|
638
|
+
and parseClass = (buf: RegExBuf) => {
|
|
669
639
|
if (!more(buf)) {
|
|
670
|
-
Err(
|
|
671
|
-
"no chars"
|
|
672
|
-
) // caught in handler (we use a Result to cleanly mesh with the Result type below)
|
|
640
|
+
Err("no chars") // caught in handler (we use a Result to cleanly mesh with the Result type below)
|
|
673
641
|
} else {
|
|
674
642
|
match (peek(buf)) {
|
|
675
643
|
Err(e) => Err(e),
|
|
@@ -700,8 +668,8 @@ parseClass = (buf: RegExBuf) => {
|
|
|
700
668
|
Ok(c) => Err("unknown class: " ++ toString(c)),
|
|
701
669
|
}
|
|
702
670
|
}
|
|
703
|
-
}
|
|
704
|
-
parsePosixCharClass = (buf: RegExBuf) => {
|
|
671
|
+
}
|
|
672
|
+
and parsePosixCharClass = (buf: RegExBuf) => {
|
|
705
673
|
if (!more(buf)) {
|
|
706
674
|
Err(parseErr(buf, "Missing POSIX character class after `[`", 0))
|
|
707
675
|
} else {
|
|
@@ -719,15 +687,14 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
719
687
|
Ok(_) => Ok(List.join("", List.reverse(acc))),
|
|
720
688
|
}
|
|
721
689
|
},
|
|
722
|
-
Ok(c) when (
|
|
723
|
-
Char.code(
|
|
724
|
-
) => {
|
|
690
|
+
Ok(c) when Char.code('a') <= Char.code(c) &&
|
|
691
|
+
Char.code(c) <= Char.code('z') => {
|
|
725
692
|
ignore(eat(buf, c))
|
|
726
693
|
loop([Char.toString(c), ...acc])
|
|
727
694
|
},
|
|
728
695
|
Ok(_) =>
|
|
729
696
|
Err(
|
|
730
|
-
parseErr(buf, "Invalid character in POSIX character class", 0)
|
|
697
|
+
parseErr(buf, "Invalid character in POSIX character class", 0),
|
|
731
698
|
),
|
|
732
699
|
}
|
|
733
700
|
}
|
|
@@ -741,7 +708,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
741
708
|
rangeAddSpan([], Char.code('a'), Char.code('z')),
|
|
742
709
|
Char.code('A'),
|
|
743
710
|
Char.code('Z')
|
|
744
|
-
)
|
|
711
|
+
),
|
|
745
712
|
),
|
|
746
713
|
"upper" => Ok(rangeAddSpan([], Char.code('A'), Char.code('Z'))),
|
|
747
714
|
"lower" => Ok(rangeAddSpan([], Char.code('a'), Char.code('z'))),
|
|
@@ -756,7 +723,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
756
723
|
),
|
|
757
724
|
Char.code('A'),
|
|
758
725
|
Char.code('F')
|
|
759
|
-
)
|
|
726
|
+
),
|
|
760
727
|
),
|
|
761
728
|
"alnum" =>
|
|
762
729
|
Ok(
|
|
@@ -768,7 +735,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
768
735
|
),
|
|
769
736
|
Char.code('A'),
|
|
770
737
|
Char.code('Z')
|
|
771
|
-
)
|
|
738
|
+
),
|
|
772
739
|
),
|
|
773
740
|
"word" =>
|
|
774
741
|
Ok(
|
|
@@ -779,7 +746,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
779
746
|
Char.code('F')
|
|
780
747
|
),
|
|
781
748
|
Char.code('_')
|
|
782
|
-
)
|
|
749
|
+
),
|
|
783
750
|
),
|
|
784
751
|
"blank" => Ok(rangeAdd(rangeAdd([], 0x20), 0x9)), // space and tab
|
|
785
752
|
"space" => Ok(range_s()),
|
|
@@ -789,7 +756,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
789
756
|
buf,
|
|
790
757
|
"the [:graph:] character class is not currently supported. For more information, see https://github.com/grain-lang/grain/issues/661",
|
|
791
758
|
0
|
|
792
|
-
)
|
|
759
|
+
),
|
|
793
760
|
),
|
|
794
761
|
"print" =>
|
|
795
762
|
Err(
|
|
@@ -797,7 +764,7 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
797
764
|
buf,
|
|
798
765
|
"the [:print:] character class is not currently supported. For more information, see https://github.com/grain-lang/grain/issues/661",
|
|
799
766
|
0
|
|
800
|
-
)
|
|
767
|
+
),
|
|
801
768
|
),
|
|
802
769
|
"cntrl" => Ok(rangeAddSpan([], 0, 31)),
|
|
803
770
|
"ascii" => Ok(rangeAddSpan([], 0, 127)),
|
|
@@ -813,18 +780,17 @@ parsePosixCharClass = (buf: RegExBuf) => {
|
|
|
813
780
|
buf,
|
|
814
781
|
"Expected `:` after `[`. Found: `" ++ Char.toString(c) ++ "`",
|
|
815
782
|
0
|
|
816
|
-
)
|
|
783
|
+
),
|
|
817
784
|
),
|
|
818
785
|
}
|
|
819
786
|
}
|
|
820
|
-
}
|
|
821
|
-
parseRangeRest =
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
) => {
|
|
787
|
+
}
|
|
788
|
+
and parseRangeRest = (
|
|
789
|
+
buf: RegExBuf,
|
|
790
|
+
rng: CharRange,
|
|
791
|
+
spanFrom: Option<Number>,
|
|
792
|
+
mustSpanFrom: Option<Number>,
|
|
793
|
+
) => {
|
|
828
794
|
if (!more(buf)) {
|
|
829
795
|
Err(parseErr(buf, "Missing closing `]`", 0))
|
|
830
796
|
} else {
|
|
@@ -848,7 +814,7 @@ parseRangeRest =
|
|
|
848
814
|
buf,
|
|
849
815
|
"misplaced hyphen within square brackets in pattern",
|
|
850
816
|
1
|
|
851
|
-
)
|
|
817
|
+
),
|
|
852
818
|
),
|
|
853
819
|
None => {
|
|
854
820
|
ignore(eat(buf, '-'))
|
|
@@ -866,7 +832,7 @@ parseRangeRest =
|
|
|
866
832
|
buf,
|
|
867
833
|
"misplaced hyphen within square brackets in pattern",
|
|
868
834
|
1
|
|
869
|
-
)
|
|
835
|
+
),
|
|
870
836
|
),
|
|
871
837
|
Ok(_) => {
|
|
872
838
|
ignore(eat(buf, '-'))
|
|
@@ -886,16 +852,14 @@ parseRangeRest =
|
|
|
886
852
|
buf,
|
|
887
853
|
"escaping backslash at end pattern (within square brackets)",
|
|
888
854
|
0
|
|
889
|
-
)
|
|
855
|
+
),
|
|
890
856
|
)
|
|
891
857
|
} else {
|
|
892
858
|
match (peek(buf)) {
|
|
893
859
|
Err(e) => Err(e),
|
|
894
|
-
Ok(c) when (
|
|
895
|
-
Char.code('a') <= Char.code(c) &&
|
|
860
|
+
Ok(c) when Char.code('a') <= Char.code(c) &&
|
|
896
861
|
Char.code(c) <= Char.code('z') ||
|
|
897
|
-
Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z')
|
|
898
|
-
) => {
|
|
862
|
+
Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z') => {
|
|
899
863
|
match (mustSpanFrom) {
|
|
900
864
|
Some(_) =>
|
|
901
865
|
Err(
|
|
@@ -903,7 +867,7 @@ parseRangeRest =
|
|
|
903
867
|
buf,
|
|
904
868
|
"misplaced hyphen within square brackets in pattern",
|
|
905
869
|
0
|
|
906
|
-
)
|
|
870
|
+
),
|
|
907
871
|
),
|
|
908
872
|
None => {
|
|
909
873
|
let curPos = unbox(buf.cursor)
|
|
@@ -911,8 +875,8 @@ parseRangeRest =
|
|
|
911
875
|
Err(e) =>
|
|
912
876
|
Err(
|
|
913
877
|
"Invalid Regular Expression: illegal alphebetic escape (position " ++
|
|
914
|
-
|
|
915
|
-
|
|
878
|
+
toString(curPos) ++
|
|
879
|
+
")",
|
|
916
880
|
),
|
|
917
881
|
Ok(range1) => {
|
|
918
882
|
match (rangeAddCaseAware(rng, spanFrom, buf.config)) {
|
|
@@ -970,15 +934,14 @@ parseRangeRest =
|
|
|
970
934
|
},
|
|
971
935
|
}
|
|
972
936
|
}
|
|
973
|
-
}
|
|
974
|
-
parseRangeRestSpan =
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
) => {
|
|
937
|
+
}
|
|
938
|
+
and parseRangeRestSpan = (
|
|
939
|
+
buf: RegExBuf,
|
|
940
|
+
c,
|
|
941
|
+
rng: CharRange,
|
|
942
|
+
spanFrom: Option<Number>,
|
|
943
|
+
mustSpanFrom: Option<Number>,
|
|
944
|
+
) => {
|
|
982
945
|
match (mustSpanFrom) {
|
|
983
946
|
Some(n) => {
|
|
984
947
|
if (n > c) {
|
|
@@ -1004,226 +967,215 @@ parseRangeRestSpan =
|
|
|
1004
967
|
let rec parseAtom = (buf: RegExBuf) => {
|
|
1005
968
|
match (peek(buf)) {
|
|
1006
969
|
Err(e) => Err(e),
|
|
1007
|
-
Ok(c) =>
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
Ok(
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
postNumGroups - preNumGroups
|
|
1037
|
-
)
|
|
970
|
+
Ok(c) => match (c) {
|
|
971
|
+
'(' => {
|
|
972
|
+
if (!moreN(buf, 1)) {
|
|
973
|
+
Err(parseErr(buf, "Parentheses not closed", 1))
|
|
974
|
+
} else if (peekN(buf, 1) == Ok('?')) {
|
|
975
|
+
// fancy group
|
|
976
|
+
if (!moreN(buf, 2)) {
|
|
977
|
+
Err(parseErr(buf, "Parentheses not closed", 2))
|
|
978
|
+
} else {
|
|
979
|
+
match (peekN(buf, 2)) {
|
|
980
|
+
Err(e) => Err(e),
|
|
981
|
+
Ok('>') => {
|
|
982
|
+
// cut
|
|
983
|
+
ignore(eat(buf, '('))
|
|
984
|
+
ignore(eat(buf, '?'))
|
|
985
|
+
ignore(eat(buf, '>'))
|
|
986
|
+
let preNumGroups = unbox(buf.config.groupNumber)
|
|
987
|
+
match (parseRegex(buf)) {
|
|
988
|
+
Err(e) => Err(e),
|
|
989
|
+
Ok(rx) => {
|
|
990
|
+
let postNumGroups = unbox(buf.config.groupNumber)
|
|
991
|
+
match (eat(buf, ')')) {
|
|
992
|
+
Err(e) => Err(e),
|
|
993
|
+
Ok(_) =>
|
|
994
|
+
Ok(
|
|
995
|
+
makeRECut(
|
|
996
|
+
rx,
|
|
997
|
+
preNumGroups,
|
|
998
|
+
postNumGroups - preNumGroups
|
|
1038
999
|
),
|
|
1039
|
-
|
|
1040
|
-
}
|
|
1041
|
-
}
|
|
1042
|
-
}
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1000
|
+
),
|
|
1001
|
+
}
|
|
1002
|
+
},
|
|
1003
|
+
}
|
|
1004
|
+
},
|
|
1005
|
+
Ok('(') => {
|
|
1006
|
+
// conditional
|
|
1007
|
+
ignore(eat(buf, '('))
|
|
1008
|
+
ignore(eat(buf, '?'))
|
|
1009
|
+
ignore(eat(buf, '('))
|
|
1010
|
+
let tstPreNumGroups = unbox(buf.config.groupNumber)
|
|
1011
|
+
match (parseTest(buf)) {
|
|
1012
|
+
Err(e) => Err(e),
|
|
1013
|
+
Ok(test) => {
|
|
1014
|
+
let tstSpanNumGroups = unbox(buf.config.groupNumber) -
|
|
1015
|
+
tstPreNumGroups
|
|
1016
|
+
match (parsePCEs(buf, false)) {
|
|
1017
|
+
Err(e) => Err(e),
|
|
1018
|
+
Ok(pces) => {
|
|
1019
|
+
if (!more(buf)) {
|
|
1020
|
+
Err(parseErr(buf, "Parentheses not closed", 0))
|
|
1021
|
+
} else {
|
|
1022
|
+
match (peek(buf)) {
|
|
1023
|
+
Err(e) => Err(e),
|
|
1024
|
+
Ok('|') => {
|
|
1025
|
+
ignore(eat(buf, '|'))
|
|
1026
|
+
match (parsePCEs(buf, false)) {
|
|
1027
|
+
Err(e) => Err(e),
|
|
1028
|
+
Ok(pces2) => {
|
|
1029
|
+
match (peek(buf)) {
|
|
1030
|
+
Err(_) =>
|
|
1031
|
+
Err(
|
|
1032
|
+
parseErr(
|
|
1033
|
+
buf,
|
|
1034
|
+
"Parentheses not closed",
|
|
1035
|
+
0
|
|
1075
1036
|
),
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
)
|
|
1087
|
-
|
|
1088
|
-
}
|
|
1089
|
-
}
|
|
1090
|
-
}
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
)
|
|
1103
|
-
},
|
|
1104
|
-
Ok(_) => {
|
|
1105
|
-
Err(
|
|
1106
|
-
parseErr(buf, "Failed to parse condition", 0)
|
|
1107
|
-
)
|
|
1108
|
-
},
|
|
1109
|
-
}
|
|
1110
|
-
}
|
|
1111
|
-
},
|
|
1112
|
-
}
|
|
1113
|
-
},
|
|
1114
|
-
}
|
|
1115
|
-
},
|
|
1116
|
-
Ok(c) when (
|
|
1117
|
-
c == 'i' || c == 's' || c == 'm' || c == '-' || c == ':'
|
|
1118
|
-
) => {
|
|
1119
|
-
// match with mode
|
|
1120
|
-
ignore(eat(buf, '('))
|
|
1121
|
-
ignore(eat(buf, '?'))
|
|
1122
|
-
match (parseMode(buf)) {
|
|
1123
|
-
Err(e) => Err(e),
|
|
1124
|
-
Ok(config) => {
|
|
1125
|
-
if (!more(buf)) {
|
|
1126
|
-
Err(parseErr(buf, "Parentheses not closed", 0))
|
|
1127
|
-
} else {
|
|
1128
|
-
match (peek(buf)) {
|
|
1129
|
-
Err(e) => Err(e),
|
|
1130
|
-
Ok(':') => {
|
|
1131
|
-
ignore(eat(buf, ':'))
|
|
1132
|
-
match (parseRegex(withConfig(buf, config))) {
|
|
1133
|
-
Err(e) => Err(e),
|
|
1134
|
-
Ok(rx) => {
|
|
1135
|
-
match (eat(buf, ')')) {
|
|
1136
|
-
Err(e) => Err(e),
|
|
1137
|
-
Ok(_) => Ok(rx),
|
|
1138
|
-
}
|
|
1139
|
-
},
|
|
1140
|
-
}
|
|
1141
|
-
},
|
|
1142
|
-
Ok(_) => {
|
|
1143
|
-
Err(
|
|
1144
|
-
parseErr(
|
|
1145
|
-
buf,
|
|
1146
|
-
"expected `:` or another mode after `(?` and a mode sequence; a mode is `i`, `-i`, `m`, `-m`, `s`, or `-s`",
|
|
1147
|
-
0
|
|
1037
|
+
),
|
|
1038
|
+
Ok(_) => {
|
|
1039
|
+
ignore(eat(buf, ')'))
|
|
1040
|
+
Ok(
|
|
1041
|
+
makeREConditional(
|
|
1042
|
+
test,
|
|
1043
|
+
makeRESequence(pces),
|
|
1044
|
+
Some(makeRESequence(pces2)),
|
|
1045
|
+
tstPreNumGroups,
|
|
1046
|
+
tstSpanNumGroups
|
|
1047
|
+
),
|
|
1048
|
+
)
|
|
1049
|
+
},
|
|
1050
|
+
}
|
|
1051
|
+
},
|
|
1052
|
+
}
|
|
1053
|
+
},
|
|
1054
|
+
Ok(')') => {
|
|
1055
|
+
ignore(eat(buf, ')'))
|
|
1056
|
+
Ok(
|
|
1057
|
+
makeREConditional(
|
|
1058
|
+
test,
|
|
1059
|
+
makeRESequence(pces),
|
|
1060
|
+
None,
|
|
1061
|
+
tstPreNumGroups,
|
|
1062
|
+
tstSpanNumGroups
|
|
1063
|
+
),
|
|
1148
1064
|
)
|
|
1149
|
-
|
|
1150
|
-
|
|
1065
|
+
},
|
|
1066
|
+
Ok(_) => {
|
|
1067
|
+
Err(parseErr(buf, "Failed to parse condition", 0))
|
|
1068
|
+
},
|
|
1069
|
+
}
|
|
1151
1070
|
}
|
|
1152
|
-
}
|
|
1153
|
-
}
|
|
1154
|
-
}
|
|
1155
|
-
}
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
}
|
|
1163
|
-
} else {
|
|
1164
|
-
// simple group
|
|
1165
|
-
ignore(eat(buf, '('))
|
|
1166
|
-
let groupNum = unbox(buf.config.groupNumber)
|
|
1167
|
-
// Note that this inc operation is side-effecting
|
|
1168
|
-
match (parseRegex(
|
|
1169
|
-
withConfig(buf, configIncGroupNumber(buf.config))
|
|
1170
|
-
)) {
|
|
1171
|
-
Err(e) => Err(e),
|
|
1172
|
-
Ok(r) => {
|
|
1173
|
-
match (eat(buf, ')')) {
|
|
1071
|
+
},
|
|
1072
|
+
}
|
|
1073
|
+
},
|
|
1074
|
+
}
|
|
1075
|
+
},
|
|
1076
|
+
Ok('i' | 's' | 'm' | '-' | ':') => {
|
|
1077
|
+
// match with mode
|
|
1078
|
+
ignore(eat(buf, '('))
|
|
1079
|
+
ignore(eat(buf, '?'))
|
|
1080
|
+
match (parseMode(buf)) {
|
|
1174
1081
|
Err(e) => Err(e),
|
|
1175
|
-
Ok(
|
|
1082
|
+
Ok(config) => {
|
|
1083
|
+
if (!more(buf)) {
|
|
1084
|
+
Err(parseErr(buf, "Parentheses not closed", 0))
|
|
1085
|
+
} else {
|
|
1086
|
+
match (peek(buf)) {
|
|
1087
|
+
Err(e) => Err(e),
|
|
1088
|
+
Ok(':') => {
|
|
1089
|
+
ignore(eat(buf, ':'))
|
|
1090
|
+
match (parseRegex(withConfig(buf, config))) {
|
|
1091
|
+
Err(e) => Err(e),
|
|
1092
|
+
Ok(rx) => {
|
|
1093
|
+
match (eat(buf, ')')) {
|
|
1094
|
+
Err(e) => Err(e),
|
|
1095
|
+
Ok(_) => Ok(rx),
|
|
1096
|
+
}
|
|
1097
|
+
},
|
|
1098
|
+
}
|
|
1099
|
+
},
|
|
1100
|
+
Ok(_) => {
|
|
1101
|
+
Err(
|
|
1102
|
+
parseErr(
|
|
1103
|
+
buf,
|
|
1104
|
+
"expected `:` or another mode after `(?` and a mode sequence; a mode is `i`, `-i`, `m`, `-m`, `s`, or `-s`",
|
|
1105
|
+
0
|
|
1106
|
+
),
|
|
1107
|
+
)
|
|
1108
|
+
},
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
},
|
|
1176
1112
|
}
|
|
1177
1113
|
},
|
|
1114
|
+
Ok(_) => {
|
|
1115
|
+
ignore(eat(buf, '('))
|
|
1116
|
+
ignore(eat(buf, '?'))
|
|
1117
|
+
parseLook(buf)
|
|
1118
|
+
},
|
|
1178
1119
|
}
|
|
1179
1120
|
}
|
|
1180
|
-
}
|
|
1181
|
-
|
|
1182
|
-
ignore(eat(buf, '
|
|
1183
|
-
|
|
1121
|
+
} else {
|
|
1122
|
+
// simple group
|
|
1123
|
+
ignore(eat(buf, '('))
|
|
1124
|
+
let groupNum = unbox(buf.config.groupNumber)
|
|
1125
|
+
// Note that this inc operation is side-effecting
|
|
1126
|
+
match (parseRegex(withConfig(buf, configIncGroupNumber(buf.config)))) {
|
|
1184
1127
|
Err(e) => Err(e),
|
|
1185
|
-
Ok(
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
// if in multiline mode, '.' matches everything but \n
|
|
1192
|
-
Ok(
|
|
1193
|
-
makeRERange(
|
|
1194
|
-
rangeInvert(rangeAdd([], Char.code('\n')), rangeLimit),
|
|
1195
|
-
rangeLimit
|
|
1196
|
-
)
|
|
1197
|
-
)
|
|
1198
|
-
} else {
|
|
1199
|
-
Ok(REAny)
|
|
1128
|
+
Ok(r) => {
|
|
1129
|
+
match (eat(buf, ')')) {
|
|
1130
|
+
Err(e) => Err(e),
|
|
1131
|
+
Ok(_) => Ok(REGroup(r, groupNum)),
|
|
1132
|
+
}
|
|
1133
|
+
},
|
|
1200
1134
|
}
|
|
1201
|
-
}
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1135
|
+
}
|
|
1136
|
+
},
|
|
1137
|
+
'[' => {
|
|
1138
|
+
ignore(eat(buf, '['))
|
|
1139
|
+
match (parseRangeNot(buf)) {
|
|
1140
|
+
Err(e) => Err(e),
|
|
1141
|
+
Ok(rng) => Ok(makeRERange(rng, rangeLimit)),
|
|
1142
|
+
}
|
|
1143
|
+
},
|
|
1144
|
+
'.' => {
|
|
1145
|
+
ignore(eat(buf, '.'))
|
|
1146
|
+
if (buf.config.multiline) {
|
|
1147
|
+
// if in multiline mode, '.' matches everything but \n
|
|
1214
1148
|
Ok(
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
}
|
|
1149
|
+
makeRERange(
|
|
1150
|
+
rangeInvert(rangeAdd([], Char.code('\n')), rangeLimit),
|
|
1151
|
+
rangeLimit
|
|
1152
|
+
),
|
|
1220
1153
|
)
|
|
1221
|
-
}
|
|
1222
|
-
|
|
1154
|
+
} else {
|
|
1155
|
+
Ok(REAny)
|
|
1156
|
+
}
|
|
1157
|
+
},
|
|
1158
|
+
'^' => {
|
|
1159
|
+
ignore(eat(buf, '^'))
|
|
1160
|
+
Ok(if (buf.config.multiline) {
|
|
1161
|
+
RELineStart
|
|
1162
|
+
} else {
|
|
1163
|
+
REStart
|
|
1164
|
+
})
|
|
1223
1165
|
},
|
|
1166
|
+
'$' => {
|
|
1167
|
+
ignore(eat(buf, '$'))
|
|
1168
|
+
Ok(if (buf.config.multiline) {
|
|
1169
|
+
RELineEnd
|
|
1170
|
+
} else {
|
|
1171
|
+
REEnd
|
|
1172
|
+
})
|
|
1173
|
+
},
|
|
1174
|
+
_ => parseLiteral(buf),
|
|
1175
|
+
},
|
|
1224
1176
|
}
|
|
1225
|
-
}
|
|
1226
|
-
parseLook = (buf: RegExBuf) => {
|
|
1177
|
+
}
|
|
1178
|
+
and parseLook = (buf: RegExBuf) => {
|
|
1227
1179
|
let preNumGroups = unbox(buf.config.groupNumber)
|
|
1228
1180
|
let spanNumGroups = () => unbox(buf.config.groupNumber) - preNumGroups
|
|
1229
1181
|
// (isMatch, isAhead)
|
|
@@ -1279,8 +1231,8 @@ parseLook = (buf: RegExBuf) => {
|
|
|
1279
1231
|
box(0),
|
|
1280
1232
|
box(0),
|
|
1281
1233
|
preNumGroups,
|
|
1282
|
-
spanNumGroups()
|
|
1283
|
-
)
|
|
1234
|
+
spanNumGroups(),
|
|
1235
|
+
),
|
|
1284
1236
|
)
|
|
1285
1237
|
}
|
|
1286
1238
|
},
|
|
@@ -1289,8 +1241,8 @@ parseLook = (buf: RegExBuf) => {
|
|
|
1289
1241
|
}
|
|
1290
1242
|
},
|
|
1291
1243
|
}
|
|
1292
|
-
}
|
|
1293
|
-
parseTest = (buf: RegExBuf) => {
|
|
1244
|
+
}
|
|
1245
|
+
and parseTest = (buf: RegExBuf) => {
|
|
1294
1246
|
if (!more(buf)) {
|
|
1295
1247
|
Err(parseErr(buf, "Expected test", 0))
|
|
1296
1248
|
} else {
|
|
@@ -1300,9 +1252,8 @@ parseTest = (buf: RegExBuf) => {
|
|
|
1300
1252
|
ignore(eat(buf, '?'))
|
|
1301
1253
|
parseLook(buf)
|
|
1302
1254
|
},
|
|
1303
|
-
Ok(c) when (
|
|
1304
|
-
Char.code(c)
|
|
1305
|
-
) => {
|
|
1255
|
+
Ok(c) when Char.code(c) >= Char.code('0') &&
|
|
1256
|
+
Char.code(c) <= Char.code('9') => {
|
|
1306
1257
|
buf.config.references := true
|
|
1307
1258
|
let curPos = unbox(buf.cursor)
|
|
1308
1259
|
match (parseInteger(buf, 0)) {
|
|
@@ -1310,7 +1261,7 @@ parseTest = (buf: RegExBuf) => {
|
|
|
1310
1261
|
Ok(n) => {
|
|
1311
1262
|
if (unbox(buf.cursor) == curPos) {
|
|
1312
1263
|
Err(
|
|
1313
|
-
parseErr(buf, "expected `)` after `(?(` followed by digits", 0)
|
|
1264
|
+
parseErr(buf, "expected `)` after `(?(` followed by digits", 0),
|
|
1314
1265
|
)
|
|
1315
1266
|
} else {
|
|
1316
1267
|
match (eat(buf, ')')) {
|
|
@@ -1323,28 +1274,27 @@ parseTest = (buf: RegExBuf) => {
|
|
|
1323
1274
|
},
|
|
1324
1275
|
Ok(_) =>
|
|
1325
1276
|
Err(
|
|
1326
|
-
parseErr(buf, "expected `(?=`, `(?!`, `(?<`, or digit after `(?(`", 0)
|
|
1277
|
+
parseErr(buf, "expected `(?=`, `(?!`, `(?<`, or digit after `(?(`", 0),
|
|
1327
1278
|
),
|
|
1328
1279
|
}
|
|
1329
1280
|
}
|
|
1330
|
-
}
|
|
1331
|
-
parseInteger = (buf: RegExBuf, n) => {
|
|
1281
|
+
}
|
|
1282
|
+
and parseInteger = (buf: RegExBuf, n) => {
|
|
1332
1283
|
if (!more(buf)) {
|
|
1333
1284
|
Ok(n)
|
|
1334
1285
|
} else {
|
|
1335
1286
|
match (peek(buf)) {
|
|
1336
1287
|
Err(c) => Err(c),
|
|
1337
|
-
Ok(c) when (
|
|
1338
|
-
Char.code(c)
|
|
1339
|
-
) => {
|
|
1288
|
+
Ok(c) when Char.code(c) >= Char.code('0') &&
|
|
1289
|
+
Char.code(c) <= Char.code('9') => {
|
|
1340
1290
|
ignore(next(buf))
|
|
1341
1291
|
parseInteger(buf, 10 * n + (Char.code(c) - Char.code('0')))
|
|
1342
1292
|
},
|
|
1343
1293
|
Ok(_) => Ok(n),
|
|
1344
1294
|
}
|
|
1345
1295
|
}
|
|
1346
|
-
}
|
|
1347
|
-
parseMode = (buf: RegExBuf) => {
|
|
1296
|
+
}
|
|
1297
|
+
and parseMode = (buf: RegExBuf) => {
|
|
1348
1298
|
let processState = ((cs, ml)) => {
|
|
1349
1299
|
let withCs = match (cs) {
|
|
1350
1300
|
None => buf.config,
|
|
@@ -1403,8 +1353,8 @@ parseMode = (buf: RegExBuf) => {
|
|
|
1403
1353
|
}
|
|
1404
1354
|
}
|
|
1405
1355
|
help((None, None))
|
|
1406
|
-
}
|
|
1407
|
-
parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
1356
|
+
}
|
|
1357
|
+
and parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
1408
1358
|
if (!more(buf)) {
|
|
1409
1359
|
Err(parseErr(buf, "Expected unicode category", 0))
|
|
1410
1360
|
} else {
|
|
@@ -1449,7 +1399,7 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1449
1399
|
LetterUppercase,
|
|
1450
1400
|
LetterTitlecase,
|
|
1451
1401
|
LetterModifier,
|
|
1452
|
-
]
|
|
1402
|
+
],
|
|
1453
1403
|
),
|
|
1454
1404
|
"Lo" => Ok([LetterOther]),
|
|
1455
1405
|
"L" =>
|
|
@@ -1460,7 +1410,7 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1460
1410
|
LetterTitlecase,
|
|
1461
1411
|
LetterModifier,
|
|
1462
1412
|
LetterOther,
|
|
1463
|
-
]
|
|
1413
|
+
],
|
|
1464
1414
|
),
|
|
1465
1415
|
"Nd" => Ok([NumberDecimalDigit]),
|
|
1466
1416
|
"Nl" => Ok([NumberLetter]),
|
|
@@ -1483,7 +1433,7 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1483
1433
|
PunctuationConnector,
|
|
1484
1434
|
PunctuationDash,
|
|
1485
1435
|
PunctuationOther,
|
|
1486
|
-
]
|
|
1436
|
+
],
|
|
1487
1437
|
),
|
|
1488
1438
|
"Mn" => Ok([MarkNonSpacing]),
|
|
1489
1439
|
"Mc" => Ok([MarkSpacingCombining]),
|
|
@@ -1512,7 +1462,7 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1512
1462
|
OtherSurrogate,
|
|
1513
1463
|
OtherNotAssigned,
|
|
1514
1464
|
OtherPrivateUse,
|
|
1515
|
-
]
|
|
1465
|
+
],
|
|
1516
1466
|
),
|
|
1517
1467
|
"." =>
|
|
1518
1468
|
Ok(
|
|
@@ -1547,19 +1497,19 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1547
1497
|
OtherSurrogate,
|
|
1548
1498
|
OtherNotAssigned,
|
|
1549
1499
|
OtherPrivateUse,
|
|
1550
|
-
]
|
|
1500
|
+
],
|
|
1551
1501
|
),
|
|
1552
1502
|
s =>
|
|
1553
1503
|
Err(
|
|
1554
1504
|
parseErr(
|
|
1555
1505
|
buf,
|
|
1556
1506
|
"Unrecognized property name in `\\" ++
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1507
|
+
pC ++
|
|
1508
|
+
"`: `" ++
|
|
1509
|
+
s ++
|
|
1510
|
+
"`",
|
|
1561
1511
|
0
|
|
1562
|
-
)
|
|
1512
|
+
),
|
|
1563
1513
|
),
|
|
1564
1514
|
}
|
|
1565
1515
|
},
|
|
@@ -1572,8 +1522,8 @@ parseUnicodeCategories = (buf: RegExBuf, pC: String) => {
|
|
|
1572
1522
|
Ok(_) => Err(parseErr(buf, "Expected `{` after `\\" ++ pC ++ "`", 0)),
|
|
1573
1523
|
}
|
|
1574
1524
|
}
|
|
1575
|
-
}
|
|
1576
|
-
parseLiteral = (buf: RegExBuf) => {
|
|
1525
|
+
}
|
|
1526
|
+
and parseLiteral = (buf: RegExBuf) => {
|
|
1577
1527
|
if (!more(buf)) {
|
|
1578
1528
|
Err(parseErr(buf, "Expected literal", 0))
|
|
1579
1529
|
} else {
|
|
@@ -1591,7 +1541,7 @@ parseLiteral = (buf: RegExBuf) => {
|
|
|
1591
1541
|
Ok(')') => Err(parseErr(buf, "Unmatched `)` in pattern", 0)),
|
|
1592
1542
|
Ok(c) when buf.config.isPerlRegExp && (c == ']' || c == '}') =>
|
|
1593
1543
|
Err(
|
|
1594
|
-
parseErr(buf, "unmatched `" ++ Char.toString(c) ++ "` in pattern", 0)
|
|
1544
|
+
parseErr(buf, "unmatched `" ++ Char.toString(c) ++ "` in pattern", 0),
|
|
1595
1545
|
),
|
|
1596
1546
|
// TODO(#691): Enable case-insensitive regular expression matching
|
|
1597
1547
|
Ok(c) when buf.config.caseSensitive => {
|
|
@@ -1607,8 +1557,8 @@ parseLiteral = (buf: RegExBuf) => {
|
|
|
1607
1557
|
},
|
|
1608
1558
|
}
|
|
1609
1559
|
}
|
|
1610
|
-
}
|
|
1611
|
-
parseBackslashLiteral = (buf: RegExBuf) => {
|
|
1560
|
+
}
|
|
1561
|
+
and parseBackslashLiteral = (buf: RegExBuf) => {
|
|
1612
1562
|
if (!more(buf)) {
|
|
1613
1563
|
// Special case: EOS after backslash matches null
|
|
1614
1564
|
Err(parseErr(buf, "Expected to find escaped value after backslash", 0))
|
|
@@ -1616,11 +1566,8 @@ parseBackslashLiteral = (buf: RegExBuf) => {
|
|
|
1616
1566
|
match (peek(buf)) {
|
|
1617
1567
|
Err(e) => Err(e),
|
|
1618
1568
|
// pregexp:
|
|
1619
|
-
Ok(c) when
|
|
1620
|
-
|
|
1621
|
-
(Char.code(c) >= Char.code('0') &&
|
|
1622
|
-
Char.code(c) <= Char.code('9'))
|
|
1623
|
-
) => {
|
|
1569
|
+
Ok(c) when buf.config.isPerlRegExp &&
|
|
1570
|
+
(Char.code(c) >= Char.code('0') && Char.code(c) <= Char.code('9')) => {
|
|
1624
1571
|
buf.config.references := true
|
|
1625
1572
|
match (parseInteger(buf, 0)) {
|
|
1626
1573
|
Err(e) => Err(e),
|
|
@@ -1629,11 +1576,11 @@ parseBackslashLiteral = (buf: RegExBuf) => {
|
|
|
1629
1576
|
},
|
|
1630
1577
|
}
|
|
1631
1578
|
},
|
|
1632
|
-
Ok(c) when
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
Char.code(c) >= Char.code('A') && Char.code(c) <= Char.code('Z')
|
|
1636
|
-
|
|
1579
|
+
Ok(c) when buf.config.isPerlRegExp &&
|
|
1580
|
+
(
|
|
1581
|
+
Char.code(c) >= Char.code('a') && Char.code(c) <= Char.code('z') ||
|
|
1582
|
+
Char.code(c) >= Char.code('A') && Char.code(c) <= Char.code('Z')
|
|
1583
|
+
) => {
|
|
1637
1584
|
match (c) {
|
|
1638
1585
|
'p' => {
|
|
1639
1586
|
ignore(eat(buf, 'p'))
|
|
@@ -1671,15 +1618,15 @@ parseBackslashLiteral = (buf: RegExBuf) => {
|
|
|
1671
1618
|
},
|
|
1672
1619
|
}
|
|
1673
1620
|
}
|
|
1674
|
-
}
|
|
1675
|
-
parseNonGreedy = (buf: RegExBuf) => {
|
|
1621
|
+
}
|
|
1622
|
+
and parseNonGreedy = (buf: RegExBuf) => {
|
|
1676
1623
|
let checkNotNested = res => {
|
|
1677
1624
|
if (!more(buf)) {
|
|
1678
1625
|
res
|
|
1679
1626
|
} else {
|
|
1680
1627
|
match (peek(buf)) {
|
|
1681
1628
|
Err(e) => Err(e),
|
|
1682
|
-
Ok(
|
|
1629
|
+
Ok('?' | '*' | '+' as c) => {
|
|
1683
1630
|
Err(parseErr(buf, "nested '" ++ toString(c) ++ "' in pattern", 0))
|
|
1684
1631
|
},
|
|
1685
1632
|
Ok(_) => res,
|
|
@@ -1698,8 +1645,8 @@ parseNonGreedy = (buf: RegExBuf) => {
|
|
|
1698
1645
|
Ok(_) => checkNotNested(Ok(false)),
|
|
1699
1646
|
}
|
|
1700
1647
|
}
|
|
1701
|
-
}
|
|
1702
|
-
parsePCE = (buf: RegExBuf) => {
|
|
1648
|
+
}
|
|
1649
|
+
and parsePCE = (buf: RegExBuf) => {
|
|
1703
1650
|
match (parseAtom(buf)) {
|
|
1704
1651
|
Err(e) => Err(e),
|
|
1705
1652
|
Ok(atom) => {
|
|
@@ -1762,7 +1709,7 @@ parsePCE = (buf: RegExBuf) => {
|
|
|
1762
1709
|
buf,
|
|
1763
1710
|
"expected digit or `}` to end repetition specification started with `{`",
|
|
1764
1711
|
0
|
|
1765
|
-
)
|
|
1712
|
+
),
|
|
1766
1713
|
),
|
|
1767
1714
|
}
|
|
1768
1715
|
},
|
|
@@ -1782,7 +1729,7 @@ parsePCE = (buf: RegExBuf) => {
|
|
|
1782
1729
|
buf,
|
|
1783
1730
|
"expected digit, `,`, or `}' for repetition specification started with `{`",
|
|
1784
1731
|
0
|
|
1785
|
-
)
|
|
1732
|
+
),
|
|
1786
1733
|
),
|
|
1787
1734
|
}
|
|
1788
1735
|
},
|
|
@@ -1793,8 +1740,8 @@ parsePCE = (buf: RegExBuf) => {
|
|
|
1793
1740
|
}
|
|
1794
1741
|
},
|
|
1795
1742
|
}
|
|
1796
|
-
}
|
|
1797
|
-
parsePCEs = (buf: RegExBuf, toplevel: Bool) => {
|
|
1743
|
+
}
|
|
1744
|
+
and parsePCEs = (buf: RegExBuf, toplevel: Bool) => {
|
|
1798
1745
|
if (!more(buf)) {
|
|
1799
1746
|
Ok([])
|
|
1800
1747
|
} else {
|
|
@@ -1820,8 +1767,8 @@ parsePCEs = (buf: RegExBuf, toplevel: Bool) => {
|
|
|
1820
1767
|
},
|
|
1821
1768
|
}
|
|
1822
1769
|
}
|
|
1823
|
-
}
|
|
1824
|
-
parseRegex = (buf: RegExBuf) => {
|
|
1770
|
+
}
|
|
1771
|
+
and parseRegex = (buf: RegExBuf) => {
|
|
1825
1772
|
if (!more(buf)) {
|
|
1826
1773
|
Ok(REEmpty)
|
|
1827
1774
|
} else {
|
|
@@ -1856,8 +1803,8 @@ parseRegex = (buf: RegExBuf) => {
|
|
|
1856
1803
|
},
|
|
1857
1804
|
}
|
|
1858
1805
|
}
|
|
1859
|
-
}
|
|
1860
|
-
parseRegexNonEmpty = (buf: RegExBuf) => {
|
|
1806
|
+
}
|
|
1807
|
+
and parseRegexNonEmpty = (buf: RegExBuf) => {
|
|
1861
1808
|
match (parsePCEs(buf, false)) {
|
|
1862
1809
|
Err(e) => Err(e),
|
|
1863
1810
|
Ok(pces) => {
|
|
@@ -1931,8 +1878,8 @@ let rec isAnchored = (re: ParsedRegularExpression) => {
|
|
|
1931
1878
|
[] => false,
|
|
1932
1879
|
[hd, ...tl] => {
|
|
1933
1880
|
match (hd) {
|
|
1934
|
-
RELookahead(_, _, _, _)
|
|
1935
|
-
|
|
1881
|
+
RELookahead(_, _, _, _) | RELookbehind(_, _, _, _, _, _) =>
|
|
1882
|
+
loop(tl),
|
|
1936
1883
|
_ => isAnchored(hd),
|
|
1937
1884
|
}
|
|
1938
1885
|
},
|
|
@@ -1942,8 +1889,7 @@ let rec isAnchored = (re: ParsedRegularExpression) => {
|
|
|
1942
1889
|
},
|
|
1943
1890
|
REAlts(a, b) => isAnchored(a) && isAnchored(b),
|
|
1944
1891
|
REConditional(_, rx1, rx2, _, _, _) =>
|
|
1945
|
-
isAnchored(rx1) &&
|
|
1946
|
-
Option.mapWithDefault(isAnchored, false, rx2),
|
|
1892
|
+
isAnchored(rx1) && Option.mapWithDefault(isAnchored, false, rx2),
|
|
1947
1893
|
REGroup(rx, _) => isAnchored(rx),
|
|
1948
1894
|
RECut(rx, _, _, _) => isAnchored(rx),
|
|
1949
1895
|
_ => false,
|
|
@@ -1986,15 +1932,14 @@ let rec mustString = (re: ParsedRegularExpression) => {
|
|
|
1986
1932
|
|
|
1987
1933
|
let rec zeroSized = re => {
|
|
1988
1934
|
match (re) {
|
|
1989
|
-
REEmpty
|
|
1990
|
-
REStart
|
|
1991
|
-
RELineStart
|
|
1992
|
-
REWordBoundary
|
|
1993
|
-
RENotWordBoundary
|
|
1994
|
-
RELookahead(_, _, _, _)
|
|
1935
|
+
REEmpty |
|
|
1936
|
+
REStart |
|
|
1937
|
+
RELineStart |
|
|
1938
|
+
REWordBoundary |
|
|
1939
|
+
RENotWordBoundary |
|
|
1940
|
+
RELookahead(_, _, _, _) |
|
|
1995
1941
|
RELookbehind(_, _, _, _, _, _) => true,
|
|
1996
|
-
REGroup(re, _) => zeroSized(re),
|
|
1997
|
-
RECut(re, _, _, _) => zeroSized(re),
|
|
1942
|
+
REGroup(re, _) | RECut(re, _, _, _) => zeroSized(re),
|
|
1998
1943
|
_ => false,
|
|
1999
1944
|
}
|
|
2000
1945
|
}
|
|
@@ -2077,37 +2022,29 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
2077
2022
|
/**
|
|
2078
2023
|
Computes the range of possible UTF-8 byte lengths for the given character range
|
|
2079
2024
|
*/
|
|
2080
|
-
|
|
2081
2025
|
let rangeUtf8EncodingLengths = (rng: CharRange) => {
|
|
2082
|
-
let (min, max, _) = List.reduce(
|
|
2083
|
-
|
|
2084
|
-
(
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2026
|
+
let (min, max, _) = List.reduce(
|
|
2027
|
+
((min1, max1, n), (segStart, segEnd)) => {
|
|
2028
|
+
if (rangeOverlaps(rng, segStart, segEnd)) {
|
|
2029
|
+
(min(min1, n), max(max1, n), n + 1)
|
|
2030
|
+
} else {
|
|
2031
|
+
(min1, max1, n + 1)
|
|
2032
|
+
}
|
|
2033
|
+
},
|
|
2034
|
+
(4, 0, 1),
|
|
2035
|
+
[(0, 127), (128, 0x7ff), (0x800, 0x7fff), (0x10000, 0x10ffff)]
|
|
2091
2036
|
)
|
|
2092
2037
|
(min, max)
|
|
2093
2038
|
}
|
|
2094
2039
|
let rec loop = re => {
|
|
2095
2040
|
match (re) {
|
|
2096
|
-
RENever => (1, 1, 0),
|
|
2097
|
-
REAny => (1, 1, 0),
|
|
2098
|
-
RELiteral(_) => (1, 1, 0),
|
|
2099
|
-
RERange(_) => (1, 1, 0),
|
|
2041
|
+
RENever | REAny | RELiteral(_) | RERange(_) => (1, 1, 0),
|
|
2100
2042
|
RELiteralString(s) => {
|
|
2101
2043
|
let ls = String.length(s)
|
|
2102
2044
|
(ls, ls, 0)
|
|
2103
2045
|
},
|
|
2104
|
-
REEmpty => (0, 0, 0),
|
|
2105
|
-
|
|
2106
|
-
RELineEnd => (0, 0, 0),
|
|
2107
|
-
REStart => (0, 0, 1),
|
|
2108
|
-
RELineStart => (0, 0, 1),
|
|
2109
|
-
REWordBoundary => (0, 0, 1),
|
|
2110
|
-
RENotWordBoundary => (0, 0, 1),
|
|
2046
|
+
REEmpty | REEnd | RELineEnd => (0, 0, 0),
|
|
2047
|
+
REStart | RELineStart | REWordBoundary | RENotWordBoundary => (0, 0, 1),
|
|
2111
2048
|
REAlts(re1, re2) => {
|
|
2112
2049
|
let (min1, max1, maxL1) = loop(re1)
|
|
2113
2050
|
let (min2, max2, maxL2) = loop(re2)
|
|
@@ -2137,7 +2074,7 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
2137
2074
|
mustSizes = mergeDependsSizes(oldMustSizes, mustSizes)
|
|
2138
2075
|
dependsSizes = mergeDependsSizes(oldDependsSizes, dependsSizes)
|
|
2139
2076
|
let repeatMax = match (repeatMax) {
|
|
2140
|
-
None =>
|
|
2077
|
+
None => Infinity,
|
|
2141
2078
|
Some(n) => n,
|
|
2142
2079
|
}
|
|
2143
2080
|
(min1 * repeatMin, max1 * repeatMax, maxL1)
|
|
@@ -2163,7 +2100,7 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
2163
2100
|
},
|
|
2164
2101
|
RELookbehind(re, _, lbMin, lbMax, _, _) => {
|
|
2165
2102
|
let (min1, max1, maxL1) = loop(re)
|
|
2166
|
-
if (max1 ==
|
|
2103
|
+
if (max1 == Infinity) {
|
|
2167
2104
|
thrownError = Some(DoesNotMatchBounded)
|
|
2168
2105
|
(0, 0, 0)
|
|
2169
2106
|
} else {
|
|
@@ -2172,19 +2109,17 @@ let rec validate = (re: ParsedRegularExpression, numGroups) => {
|
|
|
2172
2109
|
(0, 0, max(max1, maxL1))
|
|
2173
2110
|
}
|
|
2174
2111
|
},
|
|
2175
|
-
RECut(re, _, _, _) =>
|
|
2176
|
-
loop(re)
|
|
2177
|
-
},
|
|
2112
|
+
RECut(re, _, _, _) => loop(re),
|
|
2178
2113
|
REReference(n, _) => {
|
|
2179
2114
|
if (n > numGroups) {
|
|
2180
2115
|
thrownError = Some(BackreferenceTooBig)
|
|
2181
2116
|
(0, 0, 0)
|
|
2182
2117
|
} else {
|
|
2183
2118
|
match (Map.get(n, groupSizes)) {
|
|
2184
|
-
Some(minSize) => (minSize,
|
|
2119
|
+
Some(minSize) => (minSize, Infinity, 0),
|
|
2185
2120
|
None => {
|
|
2186
2121
|
Map.set(n - 1, true, dependsSizes)
|
|
2187
|
-
(1,
|
|
2122
|
+
(1, Infinity, 0)
|
|
2188
2123
|
},
|
|
2189
2124
|
}
|
|
2190
2125
|
}
|
|
@@ -2245,46 +2180,42 @@ let matchBufChar = (buf: MatchBuf, pos: Number) => {
|
|
|
2245
2180
|
}
|
|
2246
2181
|
|
|
2247
2182
|
enum StackElt {
|
|
2248
|
-
SEPositionProducer(Number
|
|
2183
|
+
SEPositionProducer(Number => Option<Number>),
|
|
2249
2184
|
SESavedGroup(Number, Option<(Number, Number)>),
|
|
2250
2185
|
}
|
|
2251
2186
|
|
|
2252
|
-
let done_m =
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
state,
|
|
2271
|
-
stack,
|
|
2272
|
-
) => {
|
|
2187
|
+
let done_m = (
|
|
2188
|
+
buf: MatchBuf,
|
|
2189
|
+
pos: Number,
|
|
2190
|
+
start: Number,
|
|
2191
|
+
limit: Number,
|
|
2192
|
+
end: Number,
|
|
2193
|
+
state,
|
|
2194
|
+
stack,
|
|
2195
|
+
) => Some(pos)
|
|
2196
|
+
let continue_m = (
|
|
2197
|
+
buf: MatchBuf,
|
|
2198
|
+
pos: Number,
|
|
2199
|
+
start: Number,
|
|
2200
|
+
limit: Number,
|
|
2201
|
+
end: Number,
|
|
2202
|
+
state,
|
|
2203
|
+
stack,
|
|
2204
|
+
) => {
|
|
2273
2205
|
match (stack) {
|
|
2274
2206
|
[SEPositionProducer(hd), ..._] => hd(pos),
|
|
2275
2207
|
_ => fail "Impossible: continue_m",
|
|
2276
2208
|
}
|
|
2277
2209
|
}
|
|
2278
|
-
let limit_m =
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
) => if (pos == limit) Some(pos) else None
|
|
2210
|
+
let limit_m = (
|
|
2211
|
+
buf: MatchBuf,
|
|
2212
|
+
pos: Number,
|
|
2213
|
+
start: Number,
|
|
2214
|
+
limit: Number,
|
|
2215
|
+
end: Number,
|
|
2216
|
+
state,
|
|
2217
|
+
stack,
|
|
2218
|
+
) => if (pos == limit) Some(pos) else None
|
|
2288
2219
|
|
|
2289
2220
|
let iterateMatcher = (m, size, max) =>
|
|
2290
2221
|
(
|
|
@@ -2296,20 +2227,20 @@ let iterateMatcher = (m, size, max) =>
|
|
|
2296
2227
|
state,
|
|
2297
2228
|
stack,
|
|
2298
2229
|
) => {
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2230
|
+
let limit = match (max) {
|
|
2231
|
+
Some(max) => min(limit, pos + max * size),
|
|
2232
|
+
None => limit,
|
|
2233
|
+
}
|
|
2234
|
+
let rec loop = (pos2, n) => {
|
|
2235
|
+
let pos3 = pos2 + size
|
|
2236
|
+
if (pos3 > limit || !m(buf, pos2, start, limit, end, state, stack)) {
|
|
2237
|
+
(pos2, n, size)
|
|
2238
|
+
} else {
|
|
2239
|
+
loop(pos3, n + 1)
|
|
2240
|
+
}
|
|
2309
2241
|
}
|
|
2242
|
+
loop(pos, 0)
|
|
2310
2243
|
}
|
|
2311
|
-
loop(pos, 0)
|
|
2312
|
-
}
|
|
2313
2244
|
|
|
2314
2245
|
// single-char matching
|
|
2315
2246
|
|
|
@@ -2323,16 +2254,13 @@ let charMatcher = (toMatch, next_m) =>
|
|
|
2323
2254
|
state,
|
|
2324
2255
|
stack,
|
|
2325
2256
|
) => {
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
}
|
|
2334
|
-
) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2335
|
-
}
|
|
2257
|
+
if ({
|
|
2258
|
+
pos < limit && match (matchBufChar(buf, pos)) {
|
|
2259
|
+
Err(_) => false,
|
|
2260
|
+
Ok(c) => toMatch == c,
|
|
2261
|
+
}
|
|
2262
|
+
}) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2263
|
+
}
|
|
2336
2264
|
|
|
2337
2265
|
let charTailMatcher = toMatch =>
|
|
2338
2266
|
(
|
|
@@ -2344,50 +2272,49 @@ let charTailMatcher = toMatch =>
|
|
|
2344
2272
|
state,
|
|
2345
2273
|
stack,
|
|
2346
2274
|
) => {
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
}
|
|
2355
|
-
) Some(pos + 1) else None
|
|
2356
|
-
}
|
|
2275
|
+
if ({
|
|
2276
|
+
pos < limit && match (matchBufChar(buf, pos)) {
|
|
2277
|
+
Err(_) => false,
|
|
2278
|
+
Ok(c) => toMatch == c,
|
|
2279
|
+
}
|
|
2280
|
+
}) Some(pos + 1) else None
|
|
2281
|
+
}
|
|
2357
2282
|
|
|
2358
2283
|
let charMatcherIterated = (toMatch, max) =>
|
|
2359
|
-
iterateMatcher(
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2284
|
+
iterateMatcher(
|
|
2285
|
+
(
|
|
2286
|
+
buf: MatchBuf,
|
|
2287
|
+
pos: Number,
|
|
2288
|
+
start: Number,
|
|
2289
|
+
limit: Number,
|
|
2290
|
+
end: Number,
|
|
2291
|
+
state,
|
|
2292
|
+
stack,
|
|
2293
|
+
) => {
|
|
2294
|
+
match (matchBufChar(buf, pos)) {
|
|
2295
|
+
Err(_) => false,
|
|
2296
|
+
Ok(c) => toMatch == c,
|
|
2297
|
+
}
|
|
2298
|
+
},
|
|
2299
|
+
1,
|
|
2300
|
+
max
|
|
2301
|
+
)
|
|
2373
2302
|
|
|
2374
2303
|
// string matching
|
|
2375
2304
|
|
|
2376
2305
|
let subArraysEqual = (arr1, start1, arr2, start2, length) => {
|
|
2377
2306
|
if (
|
|
2378
|
-
Array.length(arr1) - start1 < length ||
|
|
2307
|
+
Array.length(arr1) - start1 < length ||
|
|
2308
|
+
Array.length(arr2) - start2 < length
|
|
2379
2309
|
) {
|
|
2380
|
-
false
|
|
2381
|
-
}
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
result = false
|
|
2386
|
-
break
|
|
2387
|
-
}
|
|
2310
|
+
return false
|
|
2311
|
+
}
|
|
2312
|
+
for (let mut i = 0; i < length; i += 1) {
|
|
2313
|
+
if (arr1[start1 + i] != arr2[start2 + i]) {
|
|
2314
|
+
return false
|
|
2388
2315
|
}
|
|
2389
|
-
result
|
|
2390
2316
|
}
|
|
2317
|
+
return true
|
|
2391
2318
|
}
|
|
2392
2319
|
|
|
2393
2320
|
let stringMatcher = (toMatch, len, next_m) =>
|
|
@@ -2400,8 +2327,7 @@ let stringMatcher = (toMatch, len, next_m) =>
|
|
|
2400
2327
|
state,
|
|
2401
2328
|
stack,
|
|
2402
2329
|
) => {
|
|
2403
|
-
|
|
2404
|
-
{
|
|
2330
|
+
if ({
|
|
2405
2331
|
pos + len <= limit &&
|
|
2406
2332
|
subArraysEqual(
|
|
2407
2333
|
buf.matchInputExploded,
|
|
@@ -2410,9 +2336,8 @@ let stringMatcher = (toMatch, len, next_m) =>
|
|
|
2410
2336
|
0,
|
|
2411
2337
|
len
|
|
2412
2338
|
)
|
|
2413
|
-
}
|
|
2414
|
-
|
|
2415
|
-
}
|
|
2339
|
+
}) next_m(buf, pos + len, start, limit, end, state, stack) else None
|
|
2340
|
+
}
|
|
2416
2341
|
|
|
2417
2342
|
let stringTailMatcher = (toMatch, len) =>
|
|
2418
2343
|
(
|
|
@@ -2424,8 +2349,7 @@ let stringTailMatcher = (toMatch, len) =>
|
|
|
2424
2349
|
state,
|
|
2425
2350
|
stack,
|
|
2426
2351
|
) => {
|
|
2427
|
-
|
|
2428
|
-
{
|
|
2352
|
+
if ({
|
|
2429
2353
|
pos + len <= limit &&
|
|
2430
2354
|
subArraysEqual(
|
|
2431
2355
|
buf.matchInputExploded,
|
|
@@ -2434,27 +2358,50 @@ let stringTailMatcher = (toMatch, len) =>
|
|
|
2434
2358
|
0,
|
|
2435
2359
|
len
|
|
2436
2360
|
)
|
|
2437
|
-
}
|
|
2438
|
-
|
|
2439
|
-
}
|
|
2361
|
+
}) Some(pos + len) else None
|
|
2362
|
+
}
|
|
2440
2363
|
|
|
2441
2364
|
let stringMatcherIterated = (toMatch, len, max) =>
|
|
2442
|
-
iterateMatcher(
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2365
|
+
iterateMatcher(
|
|
2366
|
+
(
|
|
2367
|
+
buf: MatchBuf,
|
|
2368
|
+
pos: Number,
|
|
2369
|
+
start: Number,
|
|
2370
|
+
limit: Number,
|
|
2371
|
+
end: Number,
|
|
2372
|
+
state,
|
|
2373
|
+
stack,
|
|
2374
|
+
) => {
|
|
2375
|
+
subArraysEqual(
|
|
2376
|
+
buf.matchInputExploded,
|
|
2377
|
+
pos,
|
|
2378
|
+
String.explode(toMatch),
|
|
2379
|
+
0,
|
|
2380
|
+
len
|
|
2381
|
+
)
|
|
2382
|
+
},
|
|
2383
|
+
len,
|
|
2384
|
+
max
|
|
2385
|
+
)
|
|
2386
|
+
|
|
2387
|
+
// match nothing
|
|
2388
|
+
|
|
2389
|
+
let neverMatcher = (
|
|
2390
|
+
buf: MatchBuf,
|
|
2391
|
+
pos: Number,
|
|
2392
|
+
start: Number,
|
|
2393
|
+
limit: Number,
|
|
2394
|
+
end: Number,
|
|
2395
|
+
state,
|
|
2396
|
+
stack,
|
|
2397
|
+
) => {
|
|
2398
|
+
None
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
// match any byte
|
|
2402
|
+
|
|
2403
|
+
let anyMatcher = next_m =>
|
|
2404
|
+
(
|
|
2458
2405
|
buf: MatchBuf,
|
|
2459
2406
|
pos: Number,
|
|
2460
2407
|
start: Number,
|
|
@@ -2463,27 +2410,10 @@ let neverMatcher =
|
|
|
2463
2410
|
state,
|
|
2464
2411
|
stack,
|
|
2465
2412
|
) => {
|
|
2466
|
-
|
|
2467
|
-
}
|
|
2468
|
-
|
|
2469
|
-
// match any byte
|
|
2470
|
-
|
|
2471
|
-
let anyMatcher = next_m =>
|
|
2472
|
-
(
|
|
2473
|
-
buf: MatchBuf,
|
|
2474
|
-
pos: Number,
|
|
2475
|
-
start: Number,
|
|
2476
|
-
limit: Number,
|
|
2477
|
-
end: Number,
|
|
2478
|
-
state,
|
|
2479
|
-
stack,
|
|
2480
|
-
) => {
|
|
2481
|
-
if (
|
|
2482
|
-
{
|
|
2413
|
+
if ({
|
|
2483
2414
|
pos < limit
|
|
2484
|
-
}
|
|
2485
|
-
|
|
2486
|
-
}
|
|
2415
|
+
}) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2416
|
+
}
|
|
2487
2417
|
|
|
2488
2418
|
let anyTailMatcher = () =>
|
|
2489
2419
|
(
|
|
@@ -2495,12 +2425,10 @@ let anyTailMatcher = () =>
|
|
|
2495
2425
|
state,
|
|
2496
2426
|
stack,
|
|
2497
2427
|
) => {
|
|
2498
|
-
|
|
2499
|
-
{
|
|
2428
|
+
if ({
|
|
2500
2429
|
pos < limit
|
|
2501
|
-
}
|
|
2502
|
-
|
|
2503
|
-
}
|
|
2430
|
+
}) Some(pos + 1) else None
|
|
2431
|
+
}
|
|
2504
2432
|
|
|
2505
2433
|
let anyMatcherIterated = max =>
|
|
2506
2434
|
(
|
|
@@ -2512,12 +2440,12 @@ let anyMatcherIterated = max =>
|
|
|
2512
2440
|
state,
|
|
2513
2441
|
stack,
|
|
2514
2442
|
) => {
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2443
|
+
let n = match (max) {
|
|
2444
|
+
None => limit - pos,
|
|
2445
|
+
Some(max) => min(max, limit - pos),
|
|
2446
|
+
}
|
|
2447
|
+
(pos + n, n, 1)
|
|
2518
2448
|
}
|
|
2519
|
-
(pos + n, n, 1)
|
|
2520
|
-
}
|
|
2521
2449
|
|
|
2522
2450
|
// match byte in set (range)
|
|
2523
2451
|
|
|
@@ -2531,16 +2459,13 @@ let rangeMatcher = (rng: CharRange, next_m) =>
|
|
|
2531
2459
|
state,
|
|
2532
2460
|
stack,
|
|
2533
2461
|
) => {
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
}
|
|
2542
|
-
) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2543
|
-
}
|
|
2462
|
+
if ({
|
|
2463
|
+
pos < limit && match (matchBufChar(buf, pos)) {
|
|
2464
|
+
Err(_) => false,
|
|
2465
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
2466
|
+
}
|
|
2467
|
+
}) next_m(buf, pos + 1, start, limit, end, state, stack) else None
|
|
2468
|
+
}
|
|
2544
2469
|
|
|
2545
2470
|
let rangeTailMatcher = (rng: CharRange) =>
|
|
2546
2471
|
(
|
|
@@ -2552,32 +2477,33 @@ let rangeTailMatcher = (rng: CharRange) =>
|
|
|
2552
2477
|
state,
|
|
2553
2478
|
stack,
|
|
2554
2479
|
) => {
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
}
|
|
2563
|
-
) Some(pos + 1) else None
|
|
2564
|
-
}
|
|
2480
|
+
if ({
|
|
2481
|
+
pos < limit && match (matchBufChar(buf, pos)) {
|
|
2482
|
+
Err(_) => false,
|
|
2483
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
2484
|
+
}
|
|
2485
|
+
}) Some(pos + 1) else None
|
|
2486
|
+
}
|
|
2565
2487
|
|
|
2566
2488
|
let rangeMatcherIterated = (rng: CharRange, max) =>
|
|
2567
|
-
iterateMatcher(
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2489
|
+
iterateMatcher(
|
|
2490
|
+
(
|
|
2491
|
+
buf: MatchBuf,
|
|
2492
|
+
pos: Number,
|
|
2493
|
+
start: Number,
|
|
2494
|
+
limit: Number,
|
|
2495
|
+
end: Number,
|
|
2496
|
+
state,
|
|
2497
|
+
stack,
|
|
2498
|
+
) => {
|
|
2499
|
+
match (matchBufChar(buf, pos)) {
|
|
2500
|
+
Err(_) => false,
|
|
2501
|
+
Ok(c) => rangeContains(rng, Char.code(c)),
|
|
2502
|
+
}
|
|
2503
|
+
},
|
|
2504
|
+
1,
|
|
2505
|
+
max
|
|
2506
|
+
)
|
|
2581
2507
|
|
|
2582
2508
|
// zero-width matchers
|
|
2583
2509
|
|
|
@@ -2591,8 +2517,11 @@ let startMatcher = next_m =>
|
|
|
2591
2517
|
state,
|
|
2592
2518
|
stack,
|
|
2593
2519
|
) => {
|
|
2594
|
-
|
|
2595
|
-
|
|
2520
|
+
if (pos == start)
|
|
2521
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2522
|
+
else
|
|
2523
|
+
None
|
|
2524
|
+
}
|
|
2596
2525
|
|
|
2597
2526
|
let endMatcher = next_m =>
|
|
2598
2527
|
(
|
|
@@ -2604,8 +2533,8 @@ let endMatcher = next_m =>
|
|
|
2604
2533
|
state,
|
|
2605
2534
|
stack,
|
|
2606
2535
|
) => {
|
|
2607
|
-
|
|
2608
|
-
}
|
|
2536
|
+
if (pos == end) next_m(buf, pos, start, limit, end, state, stack) else None
|
|
2537
|
+
}
|
|
2609
2538
|
|
|
2610
2539
|
let lineStartMatcher = next_m =>
|
|
2611
2540
|
(
|
|
@@ -2617,9 +2546,11 @@ let lineStartMatcher = next_m =>
|
|
|
2617
2546
|
state,
|
|
2618
2547
|
stack,
|
|
2619
2548
|
) => {
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2549
|
+
if (pos == start || matchBufChar(buf, pos - 1) == Ok('\n'))
|
|
2550
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2551
|
+
else
|
|
2552
|
+
None
|
|
2553
|
+
}
|
|
2623
2554
|
|
|
2624
2555
|
let lineEndMatcher = next_m =>
|
|
2625
2556
|
(
|
|
@@ -2631,24 +2562,20 @@ let lineEndMatcher = next_m =>
|
|
|
2631
2562
|
state,
|
|
2632
2563
|
stack,
|
|
2633
2564
|
) => {
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2565
|
+
if (pos == end || matchBufChar(buf, pos) == Ok('\n'))
|
|
2566
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2567
|
+
else
|
|
2568
|
+
None
|
|
2569
|
+
}
|
|
2637
2570
|
|
|
2638
2571
|
let isWordChar = c => {
|
|
2639
2572
|
match (c) {
|
|
2640
2573
|
Err(_) => false,
|
|
2641
|
-
Ok(c) when (
|
|
2642
|
-
Char.code('0') <= Char.code(c) && Char.code(c) <= Char.code('9')
|
|
2643
|
-
) =>
|
|
2574
|
+
Ok(c) when Char.code('0') <= Char.code(c) && Char.code(c) <= Char.code('9') =>
|
|
2644
2575
|
true,
|
|
2645
|
-
Ok(c) when (
|
|
2646
|
-
Char.code('a') <= Char.code(c) && Char.code(c) <= Char.code('z')
|
|
2647
|
-
) =>
|
|
2576
|
+
Ok(c) when Char.code('a') <= Char.code(c) && Char.code(c) <= Char.code('z') =>
|
|
2648
2577
|
true,
|
|
2649
|
-
Ok(c) when (
|
|
2650
|
-
Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z')
|
|
2651
|
-
) =>
|
|
2578
|
+
Ok(c) when Char.code('A') <= Char.code(c) && Char.code(c) <= Char.code('Z') =>
|
|
2652
2579
|
true,
|
|
2653
2580
|
Ok(c) when Char.code('_') <= Char.code(c) => true,
|
|
2654
2581
|
_ => false,
|
|
@@ -2656,8 +2583,10 @@ let isWordChar = c => {
|
|
|
2656
2583
|
}
|
|
2657
2584
|
|
|
2658
2585
|
let isWordBoundary = (buf, pos, start, limit, end) => {
|
|
2659
|
-
!(
|
|
2660
|
-
(pos ==
|
|
2586
|
+
!(
|
|
2587
|
+
(pos == start || !isWordChar(matchBufChar(buf, pos - 1))) ==
|
|
2588
|
+
(pos == end || !isWordChar(matchBufChar(buf, pos)))
|
|
2589
|
+
)
|
|
2661
2590
|
}
|
|
2662
2591
|
|
|
2663
2592
|
let wordBoundaryMatcher = next_m =>
|
|
@@ -2670,9 +2599,11 @@ let wordBoundaryMatcher = next_m =>
|
|
|
2670
2599
|
state,
|
|
2671
2600
|
stack,
|
|
2672
2601
|
) => {
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2602
|
+
if (isWordBoundary(buf, pos, start, limit, end))
|
|
2603
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2604
|
+
else
|
|
2605
|
+
None
|
|
2606
|
+
}
|
|
2676
2607
|
|
|
2677
2608
|
let notWordBoundaryMatcher = next_m =>
|
|
2678
2609
|
(
|
|
@@ -2684,9 +2615,11 @@ let notWordBoundaryMatcher = next_m =>
|
|
|
2684
2615
|
state,
|
|
2685
2616
|
stack,
|
|
2686
2617
|
) => {
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2618
|
+
if (!isWordBoundary(buf, pos, start, limit, end))
|
|
2619
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2620
|
+
else
|
|
2621
|
+
None
|
|
2622
|
+
}
|
|
2690
2623
|
|
|
2691
2624
|
// Alternatives
|
|
2692
2625
|
|
|
@@ -2700,11 +2633,11 @@ let altsMatcher = (m1, m2) =>
|
|
|
2700
2633
|
state,
|
|
2701
2634
|
stack,
|
|
2702
2635
|
) => {
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2636
|
+
match (m1(buf, pos, start, limit, end, state, stack)) {
|
|
2637
|
+
None => m2(buf, pos, start, limit, end, state, stack),
|
|
2638
|
+
Some(v) => Some(v),
|
|
2639
|
+
}
|
|
2706
2640
|
}
|
|
2707
|
-
}
|
|
2708
2641
|
|
|
2709
2642
|
// repeats, greedy (default) and non-greedy
|
|
2710
2643
|
|
|
@@ -2718,27 +2651,25 @@ let repeatMatcher = (r_m, min, max, next_m) =>
|
|
|
2718
2651
|
state,
|
|
2719
2652
|
stack,
|
|
2720
2653
|
) => {
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
|
|
2726
|
-
match (max) {
|
|
2654
|
+
let rec rloop = (pos, n) => {
|
|
2655
|
+
if (n < min) {
|
|
2656
|
+
let newStack = [SEPositionProducer(pos => rloop(pos, n + 1)), ...stack]
|
|
2657
|
+
r_m(buf, pos, start, limit, end, state, newStack)
|
|
2658
|
+
} else if (match (max) {
|
|
2727
2659
|
None => false,
|
|
2728
2660
|
Some(max) => max == n,
|
|
2729
|
-
}
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2661
|
+
}) {
|
|
2662
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2663
|
+
} else {
|
|
2664
|
+
let newStack = [SEPositionProducer(pos => rloop(pos, n + 1)), ...stack]
|
|
2665
|
+
match (r_m(buf, pos, start, limit, end, state, newStack)) {
|
|
2666
|
+
Some(v) => Some(v),
|
|
2667
|
+
None => next_m(buf, pos, start, limit, end, state, stack),
|
|
2668
|
+
}
|
|
2737
2669
|
}
|
|
2738
2670
|
}
|
|
2671
|
+
rloop(pos, 0)
|
|
2739
2672
|
}
|
|
2740
|
-
rloop(pos, 0)
|
|
2741
|
-
}
|
|
2742
2673
|
|
|
2743
2674
|
let rStack = [SEPositionProducer(pos => Some(pos))]
|
|
2744
2675
|
|
|
@@ -2795,44 +2726,45 @@ let repeatSimpleMatcher = (r_m, min, max, groupN, next_m) =>
|
|
|
2795
2726
|
state,
|
|
2796
2727
|
stack,
|
|
2797
2728
|
) => {
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2729
|
+
let rec rloop = (pos, n, backAmt) => {
|
|
2730
|
+
let pos2 = match (max) {
|
|
2731
|
+
Some(max) when n < max =>
|
|
2732
|
+
r_m(buf, pos, start, limit, end, state, rStack),
|
|
2733
|
+
Some(_) => None,
|
|
2734
|
+
_ => r_m(buf, pos, start, limit, end, state, rStack),
|
|
2735
|
+
}
|
|
2736
|
+
match (pos2) {
|
|
2737
|
+
Some(pos2) => rloop(pos2, n + 1, pos2 - pos),
|
|
2738
|
+
None => {
|
|
2739
|
+
// Perform backtracking
|
|
2740
|
+
let rec bloop = (pos, n) => {
|
|
2741
|
+
if (n < min) {
|
|
2742
|
+
None
|
|
2743
|
+
} else {
|
|
2744
|
+
addRepeatedGroup(
|
|
2745
|
+
groupN,
|
|
2746
|
+
state,
|
|
2747
|
+
pos,
|
|
2748
|
+
n,
|
|
2749
|
+
backAmt,
|
|
2750
|
+
groupRevert => {
|
|
2751
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2752
|
+
Some(v) => Some(v),
|
|
2753
|
+
None => {
|
|
2754
|
+
groupRevert()
|
|
2755
|
+
bloop(pos - backAmt, n - 1)
|
|
2756
|
+
},
|
|
2757
|
+
}
|
|
2825
2758
|
}
|
|
2826
|
-
|
|
2827
|
-
|
|
2759
|
+
)
|
|
2760
|
+
}
|
|
2828
2761
|
}
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
}
|
|
2762
|
+
bloop(pos, n)
|
|
2763
|
+
},
|
|
2764
|
+
}
|
|
2832
2765
|
}
|
|
2766
|
+
rloop(pos, 0, 0)
|
|
2833
2767
|
}
|
|
2834
|
-
rloop(pos, 0, 0)
|
|
2835
|
-
}
|
|
2836
2768
|
|
|
2837
2769
|
let repeatSimpleManyMatcher = (r_m, min, max, groupN, next_m) =>
|
|
2838
2770
|
(
|
|
@@ -2844,18 +2776,12 @@ let repeatSimpleManyMatcher = (r_m, min, max, groupN, next_m) =>
|
|
|
2844
2776
|
state,
|
|
2845
2777
|
stack,
|
|
2846
2778
|
) => {
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
groupN,
|
|
2854
|
-
state,
|
|
2855
|
-
pos,
|
|
2856
|
-
n,
|
|
2857
|
-
backAmt,
|
|
2858
|
-
groupRevert => {
|
|
2779
|
+
let (pos2, n, backAmt) = r_m(buf, pos, start, limit, end, state, stack)
|
|
2780
|
+
let rec bloop = (pos, n) => {
|
|
2781
|
+
if (n < min) {
|
|
2782
|
+
None
|
|
2783
|
+
} else {
|
|
2784
|
+
addRepeatedGroup(groupN, state, pos, n, backAmt, groupRevert => {
|
|
2859
2785
|
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2860
2786
|
Some(v) => Some(v),
|
|
2861
2787
|
None => {
|
|
@@ -2863,12 +2789,11 @@ let repeatSimpleManyMatcher = (r_m, min, max, groupN, next_m) =>
|
|
|
2863
2789
|
bloop(pos - backAmt, n - 1)
|
|
2864
2790
|
},
|
|
2865
2791
|
}
|
|
2866
|
-
}
|
|
2867
|
-
|
|
2792
|
+
})
|
|
2793
|
+
}
|
|
2868
2794
|
}
|
|
2795
|
+
bloop(pos2, n)
|
|
2869
2796
|
}
|
|
2870
|
-
bloop(pos2, n)
|
|
2871
|
-
}
|
|
2872
2797
|
|
|
2873
2798
|
let lazyRepeatMatcher = (r_m, min, max, next_m) =>
|
|
2874
2799
|
(
|
|
@@ -2880,29 +2805,27 @@ let lazyRepeatMatcher = (r_m, min, max, next_m) =>
|
|
|
2880
2805
|
state,
|
|
2881
2806
|
stack,
|
|
2882
2807
|
) => {
|
|
2883
|
-
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
match (max) {
|
|
2808
|
+
let rec rloop = (pos, n, min) => {
|
|
2809
|
+
if (n < min) {
|
|
2810
|
+
let newStack = [
|
|
2811
|
+
SEPositionProducer(pos => rloop(pos, n + 1, min)),
|
|
2812
|
+
...stack
|
|
2813
|
+
]
|
|
2814
|
+
r_m(buf, pos, start, limit, end, state, newStack)
|
|
2815
|
+
} else if (match (max) {
|
|
2892
2816
|
None => false,
|
|
2893
2817
|
Some(max) => max == n,
|
|
2894
|
-
}
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2818
|
+
}) {
|
|
2819
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2820
|
+
} else {
|
|
2821
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2822
|
+
Some(p) => Some(p),
|
|
2823
|
+
None => rloop(pos, n, min + 1),
|
|
2824
|
+
}
|
|
2901
2825
|
}
|
|
2902
2826
|
}
|
|
2827
|
+
rloop(pos, 0, min)
|
|
2903
2828
|
}
|
|
2904
|
-
rloop(pos, 0, min)
|
|
2905
|
-
}
|
|
2906
2829
|
|
|
2907
2830
|
let lazyRepeatSimpleMatcher = (r_m, min, max, next_m) =>
|
|
2908
2831
|
(
|
|
@@ -2914,28 +2837,26 @@ let lazyRepeatSimpleMatcher = (r_m, min, max, next_m) =>
|
|
|
2914
2837
|
state,
|
|
2915
2838
|
stack,
|
|
2916
2839
|
) => {
|
|
2917
|
-
|
|
2918
|
-
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2924
|
-
match (max) {
|
|
2840
|
+
let rec rloop = (pos, n, min) => {
|
|
2841
|
+
if (n < min) {
|
|
2842
|
+
match (r_m(buf, pos, start, limit, end, state, stack)) {
|
|
2843
|
+
Some(p) => rloop(p, n + 1, min),
|
|
2844
|
+
None => None,
|
|
2845
|
+
}
|
|
2846
|
+
} else if (match (max) {
|
|
2925
2847
|
None => false,
|
|
2926
2848
|
Some(max) => max == n,
|
|
2927
|
-
}
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
2849
|
+
}) {
|
|
2850
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
2851
|
+
} else {
|
|
2852
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2853
|
+
Some(p) => Some(p),
|
|
2854
|
+
None => rloop(pos, n, min + 1),
|
|
2855
|
+
}
|
|
2934
2856
|
}
|
|
2935
2857
|
}
|
|
2858
|
+
rloop(pos, 0, min)
|
|
2936
2859
|
}
|
|
2937
|
-
rloop(pos, 0, min)
|
|
2938
|
-
}
|
|
2939
2860
|
|
|
2940
2861
|
// Recording and referencing group matches
|
|
2941
2862
|
|
|
@@ -2949,12 +2870,12 @@ let groupPushMatcher = (n, next_m) =>
|
|
|
2949
2870
|
state,
|
|
2950
2871
|
stack,
|
|
2951
2872
|
) => {
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
}
|
|
2873
|
+
let newStack = [
|
|
2874
|
+
SESavedGroup(pos, if (Array.length(state) > 0) state[n] else None),
|
|
2875
|
+
...stack
|
|
2876
|
+
]
|
|
2877
|
+
next_m(buf, pos, start, limit, end, state, newStack)
|
|
2878
|
+
}
|
|
2958
2879
|
|
|
2959
2880
|
let groupSetMatcher = (n, next_m) =>
|
|
2960
2881
|
(
|
|
@@ -2966,56 +2887,57 @@ let groupSetMatcher = (n, next_m) =>
|
|
|
2966
2887
|
state,
|
|
2967
2888
|
stack,
|
|
2968
2889
|
) => {
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2890
|
+
match (stack) {
|
|
2891
|
+
[SESavedGroup(oldPos, oldSpan), ...stackTl] => {
|
|
2892
|
+
if (Array.length(state) > 0) {
|
|
2893
|
+
state[n] = Some((oldPos, pos))
|
|
2894
|
+
}
|
|
2895
|
+
match (next_m(buf, pos, start, limit, end, state, stackTl)) {
|
|
2896
|
+
Some(v) => Some(v),
|
|
2897
|
+
None => {
|
|
2898
|
+
if (Array.length(state) > 0) {
|
|
2899
|
+
state[n] = oldSpan
|
|
2900
|
+
}
|
|
2901
|
+
None
|
|
2902
|
+
},
|
|
2903
|
+
}
|
|
2904
|
+
},
|
|
2905
|
+
_ => fail "Impossible: groupSetMatcher",
|
|
2906
|
+
}
|
|
2985
2907
|
}
|
|
2986
|
-
}
|
|
2987
2908
|
|
|
2988
|
-
let makeReferenceMatcher = eq =>
|
|
2989
|
-
(
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
2909
|
+
let makeReferenceMatcher = eq =>
|
|
2910
|
+
(n, next_m) =>
|
|
2911
|
+
(
|
|
2912
|
+
buf: MatchBuf,
|
|
2913
|
+
pos: Number,
|
|
2914
|
+
start: Number,
|
|
2915
|
+
limit: Number,
|
|
2916
|
+
end: Number,
|
|
2917
|
+
state,
|
|
2918
|
+
stack,
|
|
2919
|
+
) => {
|
|
2920
|
+
match (state[n]) {
|
|
2921
|
+
None => None,
|
|
2922
|
+
Some((refStart, refEnd)) => {
|
|
2923
|
+
let len = refEnd - refStart
|
|
2924
|
+
if (
|
|
2925
|
+
pos + len <= limit &&
|
|
2926
|
+
subArraysEqual(
|
|
2927
|
+
buf.matchInputExploded,
|
|
2928
|
+
refStart,
|
|
2929
|
+
buf.matchInputExploded,
|
|
2930
|
+
pos,
|
|
2931
|
+
len
|
|
2932
|
+
)
|
|
2933
|
+
) {
|
|
2934
|
+
next_m(buf, pos + len, start, limit, end, state, stack)
|
|
2935
|
+
} else {
|
|
2936
|
+
None
|
|
2937
|
+
}
|
|
2938
|
+
},
|
|
3015
2939
|
}
|
|
3016
|
-
}
|
|
3017
|
-
}
|
|
3018
|
-
}
|
|
2940
|
+
}
|
|
3019
2941
|
|
|
3020
2942
|
let referenceMatcher = makeReferenceMatcher(((a, b)) => a == b)
|
|
3021
2943
|
|
|
@@ -3027,8 +2949,9 @@ let asciiCharToLower = c => {
|
|
|
3027
2949
|
}
|
|
3028
2950
|
}
|
|
3029
2951
|
|
|
3030
|
-
let referenceMatcherCaseInsensitive = makeReferenceMatcher(
|
|
3031
|
-
asciiCharToLower(a) == asciiCharToLower(b)
|
|
2952
|
+
let referenceMatcherCaseInsensitive = makeReferenceMatcher(
|
|
2953
|
+
((a, b)) => asciiCharToLower(a) == asciiCharToLower(b)
|
|
2954
|
+
)
|
|
3032
2955
|
|
|
3033
2956
|
// Lookahead, Lookbehind, Conditionals, and Cut
|
|
3034
2957
|
|
|
@@ -3042,29 +2965,29 @@ let lookaheadMatcher = (isMatch, sub_m, nStart, numN, next_m) =>
|
|
|
3042
2965
|
state,
|
|
3043
2966
|
stack,
|
|
3044
2967
|
) => {
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
2968
|
+
let oldState = saveGroups(state, nStart, numN)
|
|
2969
|
+
let ret = match (sub_m(buf, pos, start, limit, end, state, stack)) {
|
|
2970
|
+
Some(_) when isMatch => {
|
|
2971
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
2972
|
+
Some(p) => Some(p),
|
|
2973
|
+
None => {
|
|
2974
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
2975
|
+
None
|
|
2976
|
+
},
|
|
2977
|
+
}
|
|
2978
|
+
},
|
|
2979
|
+
Some(_) => {
|
|
2980
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
2981
|
+
None
|
|
2982
|
+
},
|
|
2983
|
+
None when isMatch => {
|
|
2984
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
2985
|
+
None
|
|
2986
|
+
},
|
|
2987
|
+
_ => next_m(buf, pos, start, limit, end, state, stack),
|
|
2988
|
+
}
|
|
2989
|
+
ret
|
|
3065
2990
|
}
|
|
3066
|
-
ret
|
|
3067
|
-
}
|
|
3068
2991
|
|
|
3069
2992
|
let lookbehindMatcher = (isMatch, lbMin, lbMax, sub_m, nStart, numN, next_m) =>
|
|
3070
2993
|
(
|
|
@@ -3076,39 +2999,39 @@ let lookbehindMatcher = (isMatch, lbMin, lbMax, sub_m, nStart, numN, next_m) =>
|
|
|
3076
2999
|
state,
|
|
3077
3000
|
stack,
|
|
3078
3001
|
) => {
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
None
|
|
3084
|
-
} else {
|
|
3085
|
-
next_m(buf, pos, start, limit, end, state, stack)
|
|
3086
|
-
}
|
|
3087
|
-
} else {
|
|
3088
|
-
let oldState = saveGroups(state, nStart, numN)
|
|
3089
|
-
match (sub_m(buf, lbPos, start, pos, end, state, stack)) {
|
|
3090
|
-
Some(_) when isMatch => {
|
|
3091
|
-
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3092
|
-
Some(p) => Some(p),
|
|
3093
|
-
None => {
|
|
3094
|
-
restoreGroups(state, oldState, nStart, numN)
|
|
3095
|
-
None
|
|
3096
|
-
},
|
|
3097
|
-
}
|
|
3098
|
-
},
|
|
3099
|
-
_ when isMatch => {
|
|
3100
|
-
loop(lbPos - 1)
|
|
3101
|
-
},
|
|
3102
|
-
Some(_) => {
|
|
3103
|
-
restoreGroups(state, oldState, nStart, numN)
|
|
3002
|
+
let lbMinPos = max(start, pos - lbMax)
|
|
3003
|
+
let rec loop = lbPos => {
|
|
3004
|
+
if (lbPos < lbMinPos) {
|
|
3005
|
+
if (isMatch) {
|
|
3104
3006
|
None
|
|
3105
|
-
}
|
|
3106
|
-
|
|
3007
|
+
} else {
|
|
3008
|
+
next_m(buf, pos, start, limit, end, state, stack)
|
|
3009
|
+
}
|
|
3010
|
+
} else {
|
|
3011
|
+
let oldState = saveGroups(state, nStart, numN)
|
|
3012
|
+
match (sub_m(buf, lbPos, start, pos, end, state, stack)) {
|
|
3013
|
+
Some(_) when isMatch => {
|
|
3014
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3015
|
+
Some(p) => Some(p),
|
|
3016
|
+
None => {
|
|
3017
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3018
|
+
None
|
|
3019
|
+
},
|
|
3020
|
+
}
|
|
3021
|
+
},
|
|
3022
|
+
_ when isMatch => {
|
|
3023
|
+
loop(lbPos - 1)
|
|
3024
|
+
},
|
|
3025
|
+
Some(_) => {
|
|
3026
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3027
|
+
None
|
|
3028
|
+
},
|
|
3029
|
+
_ => next_m(buf, pos, start, limit, end, state, stack),
|
|
3030
|
+
}
|
|
3107
3031
|
}
|
|
3108
3032
|
}
|
|
3033
|
+
loop(pos - lbMin)
|
|
3109
3034
|
}
|
|
3110
|
-
loop(pos - lbMin)
|
|
3111
|
-
}
|
|
3112
3035
|
|
|
3113
3036
|
let conditionalReferenceMatcher = (n, m1, m2) =>
|
|
3114
3037
|
(
|
|
@@ -3120,12 +3043,12 @@ let conditionalReferenceMatcher = (n, m1, m2) =>
|
|
|
3120
3043
|
state,
|
|
3121
3044
|
stack,
|
|
3122
3045
|
) => {
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3046
|
+
if (Option.isSome(state[n])) {
|
|
3047
|
+
m1(buf, pos, start, limit, end, state, stack)
|
|
3048
|
+
} else {
|
|
3049
|
+
m2(buf, pos, start, limit, end, state, stack)
|
|
3050
|
+
}
|
|
3127
3051
|
}
|
|
3128
|
-
}
|
|
3129
3052
|
|
|
3130
3053
|
let conditionalLookMatcher = (tst_m, m1, m2, nStart, numN) =>
|
|
3131
3054
|
(
|
|
@@ -3137,19 +3060,19 @@ let conditionalLookMatcher = (tst_m, m1, m2, nStart, numN) =>
|
|
|
3137
3060
|
state,
|
|
3138
3061
|
stack,
|
|
3139
3062
|
) => {
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3063
|
+
let oldState = saveGroups(state, nStart, numN)
|
|
3064
|
+
let res = match (tst_m(buf, pos, start, limit, end, state, [])) {
|
|
3065
|
+
Some(_) => m1(buf, pos, start, limit, end, state, stack),
|
|
3066
|
+
None => m2(buf, pos, start, limit, end, state, stack),
|
|
3067
|
+
}
|
|
3068
|
+
match (res) {
|
|
3069
|
+
Some(p) => Some(p),
|
|
3070
|
+
None => {
|
|
3071
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3072
|
+
None
|
|
3073
|
+
},
|
|
3074
|
+
}
|
|
3151
3075
|
}
|
|
3152
|
-
}
|
|
3153
3076
|
|
|
3154
3077
|
let cutMatcher = (sub_m, nStart, numN, next_m) =>
|
|
3155
3078
|
(
|
|
@@ -3161,20 +3084,20 @@ let cutMatcher = (sub_m, nStart, numN, next_m) =>
|
|
|
3161
3084
|
state,
|
|
3162
3085
|
stack,
|
|
3163
3086
|
) => {
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
|
|
3170
|
-
|
|
3171
|
-
|
|
3172
|
-
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3087
|
+
let oldState = saveGroups(state, nStart, numN)
|
|
3088
|
+
match (sub_m(buf, pos, start, limit, end, state, [])) {
|
|
3089
|
+
None => None,
|
|
3090
|
+
Some(_) => {
|
|
3091
|
+
match (next_m(buf, pos, start, limit, end, state, stack)) {
|
|
3092
|
+
None => {
|
|
3093
|
+
restoreGroups(state, oldState, nStart, numN)
|
|
3094
|
+
None
|
|
3095
|
+
},
|
|
3096
|
+
Some(p) => Some(p),
|
|
3097
|
+
}
|
|
3098
|
+
},
|
|
3099
|
+
}
|
|
3176
3100
|
}
|
|
3177
|
-
}
|
|
3178
3101
|
|
|
3179
3102
|
// Unicode characters in UTF-8 encoding
|
|
3180
3103
|
|
|
@@ -3188,8 +3111,8 @@ let unicodeCategoriesMatcher = (cats, isMatch, next_m) =>
|
|
|
3188
3111
|
state,
|
|
3189
3112
|
stack,
|
|
3190
3113
|
) => {
|
|
3191
|
-
|
|
3192
|
-
}
|
|
3114
|
+
fail "NYI: unicodeCategoriesMatcher is not supported until grain-lang/grain#661 is resolved."
|
|
3115
|
+
}
|
|
3193
3116
|
|
|
3194
3117
|
// -------
|
|
3195
3118
|
// Regex matcher compilation
|
|
@@ -3241,8 +3164,7 @@ let compileRegexToMatcher = (re: ParsedRegularExpression) => {
|
|
|
3241
3164
|
},
|
|
3242
3165
|
REAlts(re1, re2) =>
|
|
3243
3166
|
altsMatcher(compile(re1, next_m), compile(re2, next_m)),
|
|
3244
|
-
REMaybe(re, true) =>
|
|
3245
|
-
altsMatcher(next_m, compile(re, next_m)), // non-greedy
|
|
3167
|
+
REMaybe(re, true) => altsMatcher(next_m, compile(re, next_m)), // non-greedy
|
|
3246
3168
|
REMaybe(re, _) => altsMatcher(compile(re, next_m), next_m),
|
|
3247
3169
|
RERepeat(actualRe, min, max, nonGreedy) => {
|
|
3248
3170
|
// Special case: group around simple pattern in non-lazy repeat
|
|
@@ -3252,11 +3174,10 @@ let compileRegexToMatcher = (re: ParsedRegularExpression) => {
|
|
|
3252
3174
|
_ => actualRe,
|
|
3253
3175
|
}
|
|
3254
3176
|
let simple = !needsBacktrack(re)
|
|
3255
|
-
let groupN = if (simple)
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
} else None
|
|
3177
|
+
let groupN = if (simple) match (actualRe) {
|
|
3178
|
+
REGroup(_, n) => Some(n),
|
|
3179
|
+
_ => None,
|
|
3180
|
+
} else None
|
|
3260
3181
|
match (compileMatcherRepeater(re, min, max)) {
|
|
3261
3182
|
Some(matcher) when !nonGreedy =>
|
|
3262
3183
|
repeatSimpleManyMatcher(matcher, min, max, groupN, next_m),
|
|
@@ -3317,8 +3238,7 @@ let interp = (compiledRe, matchBuffer, pos, start, limitOrEnd, state) => {
|
|
|
3317
3238
|
compiledRe(matchBuffer, pos, start, limitOrEnd, limitOrEnd, state, [])
|
|
3318
3239
|
}
|
|
3319
3240
|
|
|
3320
|
-
|
|
3321
|
-
record RegularExpression {
|
|
3241
|
+
abstract record RegularExpression {
|
|
3322
3242
|
reParsed: ParsedRegularExpression,
|
|
3323
3243
|
reNumGroups: Number,
|
|
3324
3244
|
reReferences: Bool,
|
|
@@ -3331,16 +3251,12 @@ record RegularExpression {
|
|
|
3331
3251
|
Number,
|
|
3332
3252
|
Array<Option<(Number, Number)>>,
|
|
3333
3253
|
List<StackElt>,
|
|
3334
|
-
)
|
|
3254
|
+
) => Option<Number>,
|
|
3335
3255
|
reMustString: Option<String>,
|
|
3336
3256
|
reIsAnchored: Bool,
|
|
3337
3257
|
reStartRange: Option<RERange>,
|
|
3338
3258
|
}
|
|
3339
3259
|
|
|
3340
|
-
/**
|
|
3341
|
-
* @section Values: Functions for working with regular expressions.
|
|
3342
|
-
*/
|
|
3343
|
-
|
|
3344
3260
|
// TODO(#661): re-add the following pieces of documentation:
|
|
3345
3261
|
/*
|
|
3346
3262
|
[Under POSIX character classes]
|
|
@@ -3489,7 +3405,7 @@ record RegularExpression {
|
|
|
3489
3405
|
*
|
|
3490
3406
|
* @since 0.4.3
|
|
3491
3407
|
*/
|
|
3492
|
-
|
|
3408
|
+
provide let make = (regexString: String) => {
|
|
3493
3409
|
let buf = makeRegExBuf(regexString)
|
|
3494
3410
|
match (parseRegex(buf)) {
|
|
3495
3411
|
Err(e) => Err(e),
|
|
@@ -3510,7 +3426,7 @@ export let make = (regexString: String) => {
|
|
|
3510
3426
|
reMustString: mustString(parsed),
|
|
3511
3427
|
reIsAnchored: isAnchored(parsed),
|
|
3512
3428
|
reStartRange: startRange(parsed),
|
|
3513
|
-
}
|
|
3429
|
+
},
|
|
3514
3430
|
)
|
|
3515
3431
|
},
|
|
3516
3432
|
}
|
|
@@ -3532,11 +3448,12 @@ let checkMustString = (ms, buf: MatchBuf, pos, endPos) => {
|
|
|
3532
3448
|
None => true,
|
|
3533
3449
|
Some(ms) => {
|
|
3534
3450
|
let toCheck = if (
|
|
3535
|
-
pos == 0 &&
|
|
3451
|
+
pos == 0 &&
|
|
3452
|
+
endPos == Array.length(buf.matchInputExploded)
|
|
3536
3453
|
) {
|
|
3537
3454
|
buf.matchInput
|
|
3538
3455
|
} else {
|
|
3539
|
-
String.slice(pos, endPos, buf.matchInput)
|
|
3456
|
+
String.slice(pos, end=endPos, buf.matchInput)
|
|
3540
3457
|
}
|
|
3541
3458
|
Option.isSome(String.indexOf(ms, toCheck))
|
|
3542
3459
|
},
|
|
@@ -3548,15 +3465,14 @@ let checkStartRange = (startRange, buf, pos, endPos) => {
|
|
|
3548
3465
|
rangeContains(startRange, Char.code(buf.matchInputExploded[pos]))
|
|
3549
3466
|
}
|
|
3550
3467
|
|
|
3551
|
-
let searchMatch =
|
|
3552
|
-
|
|
3553
|
-
|
|
3554
|
-
|
|
3555
|
-
|
|
3556
|
-
|
|
3557
|
-
|
|
3558
|
-
|
|
3559
|
-
) => {
|
|
3468
|
+
let searchMatch = (
|
|
3469
|
+
rx: RegularExpression,
|
|
3470
|
+
buf: MatchBuf,
|
|
3471
|
+
pos,
|
|
3472
|
+
startPos,
|
|
3473
|
+
endPos,
|
|
3474
|
+
state,
|
|
3475
|
+
) => {
|
|
3560
3476
|
if (!checkMustString(rx.reMustString, buf, pos, endPos)) {
|
|
3561
3477
|
None
|
|
3562
3478
|
} else {
|
|
@@ -3568,8 +3484,7 @@ let searchMatch =
|
|
|
3568
3484
|
None
|
|
3569
3485
|
} else {
|
|
3570
3486
|
match (startRange) {
|
|
3571
|
-
Some(_) when pos == endPos =>
|
|
3572
|
-
None, // Can't possibly match if chars are required and we are at EOS
|
|
3487
|
+
Some(_) when pos == endPos => None, // Can't possibly match if chars are required and we are at EOS
|
|
3573
3488
|
Some(rng) when !checkStartRange(rng, buf, pos, endPos) =>
|
|
3574
3489
|
loop(pos + 1),
|
|
3575
3490
|
_ => {
|
|
@@ -3592,48 +3507,18 @@ let searchMatch =
|
|
|
3592
3507
|
* of a regular expression match. The results can be obtained using
|
|
3593
3508
|
* the following accessors:
|
|
3594
3509
|
*
|
|
3595
|
-
* ```grain
|
|
3596
|
-
* group : Number -> Option<String>
|
|
3597
|
-
* ```
|
|
3598
|
-
*
|
|
3599
|
-
* Returns the contents of the given group. Note that group 0 contains
|
|
3600
|
-
* the entire matched substring, and group 1 contains the first parenthesized group.
|
|
3601
|
-
*
|
|
3602
|
-
* ```grain
|
|
3603
|
-
* groupPosition : Number -> Option<(Number, Number)>
|
|
3604
|
-
* ```
|
|
3605
|
-
*
|
|
3606
|
-
* Returns the position of the given group.
|
|
3607
|
-
*
|
|
3608
|
-
* ```grain
|
|
3609
|
-
* numGroups : Number
|
|
3610
|
-
* ```
|
|
3611
|
-
*
|
|
3612
|
-
* The number of defined groups in this match object (including group 0).
|
|
3613
|
-
*
|
|
3614
|
-
* ```grain
|
|
3615
|
-
* allGroups : () -> Array<Option<String>>
|
|
3616
|
-
* ```
|
|
3617
|
-
*
|
|
3618
|
-
* Returns the contents of all groups matched in this match object.
|
|
3619
|
-
*
|
|
3620
|
-
* ```grain
|
|
3621
|
-
* allGroupPositions : () -> Array<Option<(Number, Number)>>
|
|
3622
|
-
* ```
|
|
3623
|
-
*
|
|
3624
|
-
* Returns the positions of all groups matched in this match object.
|
|
3625
|
-
*
|
|
3626
3510
|
* @since 0.4.3
|
|
3627
3511
|
*/
|
|
3628
|
-
|
|
3512
|
+
provide record MatchResult {
|
|
3629
3513
|
/**
|
|
3630
|
-
* Returns the contents of the given group
|
|
3514
|
+
* Returns the contents of the given group. Note that group 0 contains
|
|
3515
|
+
* the entire matched substring, and group 1 contains the first parenthesized group.
|
|
3631
3516
|
*/
|
|
3632
|
-
group: Number
|
|
3517
|
+
group: Number => Option<String>,
|
|
3633
3518
|
/**
|
|
3634
3519
|
* Returns the position of the given group
|
|
3635
3520
|
*/
|
|
3636
|
-
groupPosition: Number
|
|
3521
|
+
groupPosition: Number => Option<(Number, Number)>,
|
|
3637
3522
|
/**
|
|
3638
3523
|
* Returns the number of defined groups in this match object (includes group 0)
|
|
3639
3524
|
*/
|
|
@@ -3641,11 +3526,11 @@ export record MatchResult {
|
|
|
3641
3526
|
/**
|
|
3642
3527
|
* Returns the contents of all groups matched in this match object
|
|
3643
3528
|
*/
|
|
3644
|
-
allGroups: ()
|
|
3529
|
+
allGroups: () => Array<Option<String>>,
|
|
3645
3530
|
/**
|
|
3646
3531
|
* Returns the positions of all groups matched in this match object
|
|
3647
3532
|
*/
|
|
3648
|
-
allGroupPositions: ()
|
|
3533
|
+
allGroupPositions: () => Array<Option<(Number, Number)>>,
|
|
3649
3534
|
}
|
|
3650
3535
|
|
|
3651
3536
|
let makeMatchResult = (origString, start, end, state) => {
|
|
@@ -3663,7 +3548,7 @@ let makeMatchResult = (origString, start, end, state) => {
|
|
|
3663
3548
|
}
|
|
3664
3549
|
let getMatchGroup = n => {
|
|
3665
3550
|
match (getMatchGroupPosition(n)) {
|
|
3666
|
-
Some((start, end)) => Some(String.slice(start, end, origString)),
|
|
3551
|
+
Some((start, end)) => Some(String.slice(start, end=end, origString)),
|
|
3667
3552
|
None => None,
|
|
3668
3553
|
}
|
|
3669
3554
|
}
|
|
@@ -3678,7 +3563,7 @@ let makeMatchResult = (origString, start, end, state) => {
|
|
|
3678
3563
|
let getAllMatchGroups = () => {
|
|
3679
3564
|
Array.map(o => match (o) {
|
|
3680
3565
|
None => None,
|
|
3681
|
-
Some((start, end)) => Some(String.slice(start, end, origString)),
|
|
3566
|
+
Some((start, end)) => Some(String.slice(start, end=end, origString)),
|
|
3682
3567
|
}, getAllMatchGroupPositions())
|
|
3683
3568
|
}
|
|
3684
3569
|
{
|
|
@@ -3693,12 +3578,14 @@ let makeMatchResult = (origString, start, end, state) => {
|
|
|
3693
3578
|
// Helpers for user-facing match functionality
|
|
3694
3579
|
|
|
3695
3580
|
let fastDriveRegexIsMatch = (rx, string, startOffset, endOffset) => {
|
|
3696
|
-
let state =
|
|
3697
|
-
|
|
3698
|
-
|
|
3699
|
-
|
|
3700
|
-
|
|
3701
|
-
|
|
3581
|
+
let state = if (rx.reReferences)
|
|
3582
|
+
Array.make(rx.reNumGroups, None)
|
|
3583
|
+
else
|
|
3584
|
+
Array.make(0, None)
|
|
3585
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3586
|
+
string
|
|
3587
|
+
else
|
|
3588
|
+
String.slice(startOffset, end=endOffset, string)
|
|
3702
3589
|
let buf = makeMatchBuffer(toWrap)
|
|
3703
3590
|
Option.isSome(
|
|
3704
3591
|
searchMatch(rx, buf, 0, 0, Array.length(buf.matchInputExploded), state)
|
|
@@ -3710,18 +3597,14 @@ let rec fastDriveRegexMatchAll = (rx, string, startOffset, endOffset) => {
|
|
|
3710
3597
|
[]
|
|
3711
3598
|
} else {
|
|
3712
3599
|
let state = Array.make(rx.reNumGroups, None)
|
|
3713
|
-
let toWrap =
|
|
3714
|
-
|
|
3715
|
-
|
|
3600
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3601
|
+
string
|
|
3602
|
+
else
|
|
3603
|
+
String.slice(startOffset, end=endOffset, string)
|
|
3716
3604
|
let buf = makeMatchBuffer(toWrap)
|
|
3717
|
-
match (
|
|
3718
|
-
rx,
|
|
3719
|
-
|
|
3720
|
-
0,
|
|
3721
|
-
0,
|
|
3722
|
-
Array.length(buf.matchInputExploded),
|
|
3723
|
-
state
|
|
3724
|
-
)) {
|
|
3605
|
+
match (
|
|
3606
|
+
searchMatch(rx, buf, 0, 0, Array.length(buf.matchInputExploded), state)
|
|
3607
|
+
) {
|
|
3725
3608
|
None => [],
|
|
3726
3609
|
Some((startPos, endPos)) =>
|
|
3727
3610
|
[
|
|
@@ -3750,18 +3633,14 @@ let rec fastDriveRegexMatchAll = (rx, string, startOffset, endOffset) => {
|
|
|
3750
3633
|
|
|
3751
3634
|
let fastDriveRegexMatch = (rx, string, startOffset, endOffset) => {
|
|
3752
3635
|
let state = Array.make(rx.reNumGroups, None)
|
|
3753
|
-
let toWrap =
|
|
3754
|
-
|
|
3755
|
-
|
|
3636
|
+
let toWrap = if (startOffset == 0 && endOffset == String.length(string))
|
|
3637
|
+
string
|
|
3638
|
+
else
|
|
3639
|
+
String.slice(startOffset, end=endOffset, string)
|
|
3756
3640
|
let buf = makeMatchBuffer(toWrap)
|
|
3757
|
-
match (
|
|
3758
|
-
rx,
|
|
3759
|
-
|
|
3760
|
-
0,
|
|
3761
|
-
0,
|
|
3762
|
-
Array.length(buf.matchInputExploded),
|
|
3763
|
-
state
|
|
3764
|
-
)) {
|
|
3641
|
+
match (
|
|
3642
|
+
searchMatch(rx, buf, 0, 0, Array.length(buf.matchInputExploded), state)
|
|
3643
|
+
) {
|
|
3765
3644
|
None => None,
|
|
3766
3645
|
Some((startPos, endPos)) => {
|
|
3767
3646
|
Some(
|
|
@@ -3776,7 +3655,7 @@ let fastDriveRegexMatch = (rx, string, startOffset, endOffset) => {
|
|
|
3776
3655
|
Some((start + startOffset, end + startOffset)),
|
|
3777
3656
|
}
|
|
3778
3657
|
}, state)
|
|
3779
|
-
)
|
|
3658
|
+
),
|
|
3780
3659
|
)
|
|
3781
3660
|
},
|
|
3782
3661
|
}
|
|
@@ -3792,7 +3671,7 @@ let fastDriveRegexMatch = (rx, string, startOffset, endOffset) => {
|
|
|
3792
3671
|
*
|
|
3793
3672
|
* @since 0.4.3
|
|
3794
3673
|
*/
|
|
3795
|
-
|
|
3674
|
+
provide let isMatch = (rx: RegularExpression, string: String) => {
|
|
3796
3675
|
fastDriveRegexIsMatch(rx, string, 0, String.length(string))
|
|
3797
3676
|
}
|
|
3798
3677
|
|
|
@@ -3809,13 +3688,12 @@ export let isMatch = (rx: RegularExpression, string: String) => {
|
|
|
3809
3688
|
*
|
|
3810
3689
|
* @since 0.4.3
|
|
3811
3690
|
*/
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
3816
|
-
|
|
3817
|
-
|
|
3818
|
-
) => {
|
|
3691
|
+
provide let isMatchRange = (
|
|
3692
|
+
rx: RegularExpression,
|
|
3693
|
+
string: String,
|
|
3694
|
+
start: Number,
|
|
3695
|
+
end: Number,
|
|
3696
|
+
) => {
|
|
3819
3697
|
fastDriveRegexIsMatch(rx, string, start, end)
|
|
3820
3698
|
}
|
|
3821
3699
|
|
|
@@ -3829,7 +3707,7 @@ export let isMatchRange =
|
|
|
3829
3707
|
*
|
|
3830
3708
|
* @since 0.4.3
|
|
3831
3709
|
*/
|
|
3832
|
-
|
|
3710
|
+
provide let find = (rx: RegularExpression, string: String) => {
|
|
3833
3711
|
fastDriveRegexMatch(rx, string, 0, String.length(string))
|
|
3834
3712
|
}
|
|
3835
3713
|
|
|
@@ -3846,13 +3724,12 @@ export let find = (rx: RegularExpression, string: String) => {
|
|
|
3846
3724
|
*
|
|
3847
3725
|
* @since 0.4.3
|
|
3848
3726
|
*/
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
) => {
|
|
3727
|
+
provide let findRange = (
|
|
3728
|
+
rx: RegularExpression,
|
|
3729
|
+
string: String,
|
|
3730
|
+
start: Number,
|
|
3731
|
+
end: Number,
|
|
3732
|
+
) => {
|
|
3856
3733
|
fastDriveRegexMatch(rx, string, start, end)
|
|
3857
3734
|
}
|
|
3858
3735
|
|
|
@@ -3862,7 +3739,7 @@ export let findRange =
|
|
|
3862
3739
|
* @param string: The string to search
|
|
3863
3740
|
* @returns The list of matches
|
|
3864
3741
|
*/
|
|
3865
|
-
|
|
3742
|
+
provide let findAll = (rx: RegularExpression, string: String) => {
|
|
3866
3743
|
fastDriveRegexMatchAll(rx, string, 0, String.length(string))
|
|
3867
3744
|
}
|
|
3868
3745
|
|
|
@@ -3879,36 +3756,33 @@ export let findAll = (rx: RegularExpression, string: String) => {
|
|
|
3879
3756
|
*
|
|
3880
3757
|
* @since 0.4.3
|
|
3881
3758
|
*/
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
|
|
3885
|
-
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
) => {
|
|
3759
|
+
provide let findAllRange = (
|
|
3760
|
+
rx: RegularExpression,
|
|
3761
|
+
string: String,
|
|
3762
|
+
start: Number,
|
|
3763
|
+
end: Number,
|
|
3764
|
+
) => {
|
|
3889
3765
|
fastDriveRegexMatchAll(rx, string, start, end)
|
|
3890
3766
|
}
|
|
3891
3767
|
|
|
3892
|
-
let computeReplacement =
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3899
|
-
) => {
|
|
3768
|
+
let computeReplacement = (
|
|
3769
|
+
matchBuf: MatchBuf,
|
|
3770
|
+
replacementString: String,
|
|
3771
|
+
start,
|
|
3772
|
+
end,
|
|
3773
|
+
state,
|
|
3774
|
+
) => {
|
|
3900
3775
|
let replacementExploded = String.explode(replacementString)
|
|
3901
3776
|
let len = Array.length(replacementExploded)
|
|
3902
3777
|
let mut acc = []
|
|
3903
|
-
let getBeforeMatch = () => String.slice(0, start, matchBuf.matchInput)
|
|
3904
|
-
let getAfterMatch = () =>
|
|
3905
|
-
String.slice(end, String.length(matchBuf.matchInput), matchBuf.matchInput)
|
|
3778
|
+
let getBeforeMatch = () => String.slice(0, end=start, matchBuf.matchInput)
|
|
3779
|
+
let getAfterMatch = () => String.slice(end, matchBuf.matchInput)
|
|
3906
3780
|
let getInputSubstr = n => {
|
|
3907
3781
|
if (n == 0) {
|
|
3908
|
-
String.slice(start, end, matchBuf.matchInput)
|
|
3782
|
+
String.slice(start, end=end, matchBuf.matchInput)
|
|
3909
3783
|
} else if (n - 1 < Array.length(state)) {
|
|
3910
3784
|
match (state[n - 1]) {
|
|
3911
|
-
Some((start, end)) => String.slice(start, end, matchBuf.matchInput),
|
|
3785
|
+
Some((start, end)) => String.slice(start, end=end, matchBuf.matchInput),
|
|
3912
3786
|
None => "",
|
|
3913
3787
|
}
|
|
3914
3788
|
} else {
|
|
@@ -3916,8 +3790,10 @@ let computeReplacement =
|
|
|
3916
3790
|
}
|
|
3917
3791
|
}
|
|
3918
3792
|
let consRange = (start, end, lst) => {
|
|
3919
|
-
if (start == end)
|
|
3920
|
-
|
|
3793
|
+
if (start == end)
|
|
3794
|
+
lst
|
|
3795
|
+
else
|
|
3796
|
+
[String.slice(start, end=end, replacementString), ...lst]
|
|
3921
3797
|
}
|
|
3922
3798
|
let rec loop = (pos, since) => {
|
|
3923
3799
|
if (pos == len) {
|
|
@@ -3931,34 +3807,30 @@ let computeReplacement =
|
|
|
3931
3807
|
} else if (c == Some('\'')) {
|
|
3932
3808
|
consRange(since, pos, [getAfterMatch(), ...loop(pos + 2, pos + 2)])
|
|
3933
3809
|
} else {
|
|
3934
|
-
consRange(
|
|
3935
|
-
|
|
3936
|
-
|
|
3937
|
-
{
|
|
3938
|
-
|
|
3939
|
-
|
|
3940
|
-
|
|
3941
|
-
|
|
3942
|
-
|
|
3943
|
-
|
|
3944
|
-
|
|
3945
|
-
|
|
3810
|
+
consRange(since, pos, {
|
|
3811
|
+
if (c == Some('$')) {
|
|
3812
|
+
loop(pos + 2, pos + 1)
|
|
3813
|
+
} else if (c == Some('.')) {
|
|
3814
|
+
loop(pos + 2, pos + 2)
|
|
3815
|
+
} else {
|
|
3816
|
+
let rec dLoop = (pos, accum) => {
|
|
3817
|
+
if (pos == len) {
|
|
3818
|
+
[getInputSubstr(accum)]
|
|
3819
|
+
} else {
|
|
3820
|
+
let c = replacementExploded[pos]
|
|
3821
|
+
if (
|
|
3822
|
+
Char.code('0') <= Char.code(c) &&
|
|
3823
|
+
Char.code(c) <= Char.code('9')
|
|
3824
|
+
) {
|
|
3825
|
+
dLoop(pos + 1, 10 * accum + (Char.code(c) - Char.code('0')))
|
|
3946
3826
|
} else {
|
|
3947
|
-
|
|
3948
|
-
if (
|
|
3949
|
-
Char.code('0') <= Char.code(c) &&
|
|
3950
|
-
Char.code(c) <= Char.code('9')
|
|
3951
|
-
) {
|
|
3952
|
-
dLoop(pos + 1, 10 * accum + (Char.code(c) - Char.code('0')))
|
|
3953
|
-
} else {
|
|
3954
|
-
[getInputSubstr(accum), ...loop(pos, pos)]
|
|
3955
|
-
}
|
|
3827
|
+
[getInputSubstr(accum), ...loop(pos, pos)]
|
|
3956
3828
|
}
|
|
3957
3829
|
}
|
|
3958
|
-
dLoop(pos + 1, 0)
|
|
3959
3830
|
}
|
|
3831
|
+
dLoop(pos + 1, 0)
|
|
3960
3832
|
}
|
|
3961
|
-
)
|
|
3833
|
+
})
|
|
3962
3834
|
}
|
|
3963
3835
|
} else {
|
|
3964
3836
|
loop(pos + 1, since)
|
|
@@ -3968,13 +3840,12 @@ let computeReplacement =
|
|
|
3968
3840
|
List.reduceRight(String.concat, "", res)
|
|
3969
3841
|
}
|
|
3970
3842
|
|
|
3971
|
-
let regexReplaceHelp =
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
) => {
|
|
3843
|
+
let regexReplaceHelp = (
|
|
3844
|
+
rx: RegularExpression,
|
|
3845
|
+
toSearch: String,
|
|
3846
|
+
replacement: String,
|
|
3847
|
+
all: Bool,
|
|
3848
|
+
) => {
|
|
3978
3849
|
let buf = makeMatchBuffer(toSearch)
|
|
3979
3850
|
let rec loop = searchPos => {
|
|
3980
3851
|
let state = Array.make(rx.reNumGroups, None)
|
|
@@ -3993,7 +3864,7 @@ let regexReplaceHelp =
|
|
|
3993
3864
|
""
|
|
3994
3865
|
} else {
|
|
3995
3866
|
String.concat(
|
|
3996
|
-
String.slice(searchPos, searchPos + 1, toSearch),
|
|
3867
|
+
String.slice(searchPos, end=searchPos + 1, toSearch),
|
|
3997
3868
|
loop(searchPos + 1)
|
|
3998
3869
|
)
|
|
3999
3870
|
}
|
|
@@ -4003,15 +3874,13 @@ let regexReplaceHelp =
|
|
|
4003
3874
|
}
|
|
4004
3875
|
match (poss) {
|
|
4005
3876
|
None =>
|
|
4006
|
-
if (searchPos == 0) toSearch
|
|
4007
|
-
else String.slice(searchPos, String.length(toSearch), toSearch),
|
|
3877
|
+
if (searchPos == 0) toSearch else String.slice(searchPos, toSearch),
|
|
4008
3878
|
Some((start, end)) =>
|
|
4009
3879
|
String.concat(
|
|
4010
|
-
String.slice(searchPos, start, toSearch),
|
|
3880
|
+
String.slice(searchPos, end=start, toSearch),
|
|
4011
3881
|
String.concat(
|
|
4012
3882
|
computeReplacement(buf, replacement, start, end, state),
|
|
4013
|
-
if (all) recur(start, end)
|
|
4014
|
-
else String.slice(end, String.length(toSearch), toSearch)
|
|
3883
|
+
if (all) recur(start, end) else String.slice(end, toSearch)
|
|
4015
3884
|
)
|
|
4016
3885
|
),
|
|
4017
3886
|
}
|
|
@@ -4039,12 +3908,11 @@ let regexReplaceHelp =
|
|
|
4039
3908
|
*
|
|
4040
3909
|
* @since 0.4.3
|
|
4041
3910
|
*/
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
|
-
) => {
|
|
3911
|
+
provide let replace = (
|
|
3912
|
+
rx: RegularExpression,
|
|
3913
|
+
toSearch: String,
|
|
3914
|
+
replacement: String,
|
|
3915
|
+
) => {
|
|
4048
3916
|
regexReplaceHelp(rx, toSearch, replacement, false)
|
|
4049
3917
|
}
|
|
4050
3918
|
|
|
@@ -4061,12 +3929,11 @@ export let replace =
|
|
|
4061
3929
|
*
|
|
4062
3930
|
* @since 0.4.3
|
|
4063
3931
|
*/
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
) => {
|
|
3932
|
+
provide let replaceAll = (
|
|
3933
|
+
rx: RegularExpression,
|
|
3934
|
+
toSearch: String,
|
|
3935
|
+
replacement: String,
|
|
3936
|
+
) => {
|
|
4070
3937
|
regexReplaceHelp(rx, toSearch, replacement, true)
|
|
4071
3938
|
}
|
|
4072
3939
|
|
|
@@ -4090,10 +3957,10 @@ let regexSplitHelp = (rx: RegularExpression, str: String, all: Bool) => {
|
|
|
4090
3957
|
Some((start, end)) => {
|
|
4091
3958
|
if (i == 0) {
|
|
4092
3959
|
// Add the string between this match and the last match
|
|
4093
|
-
out = [String.slice(currentLocation, start, str), ...out]
|
|
3960
|
+
out = [String.slice(currentLocation, end=start, str), ...out]
|
|
4094
3961
|
} else {
|
|
4095
3962
|
// This adds the groups back in
|
|
4096
|
-
out = [String.slice(start, end, str), ...out]
|
|
3963
|
+
out = [String.slice(start, end=end, str), ...out]
|
|
4097
3964
|
}
|
|
4098
3965
|
if (end > currentLocation) currentLocation = end
|
|
4099
3966
|
},
|
|
@@ -4101,7 +3968,7 @@ let regexSplitHelp = (rx: RegularExpression, str: String, all: Bool) => {
|
|
|
4101
3968
|
}
|
|
4102
3969
|
}, locations)
|
|
4103
3970
|
}, regexMatches)
|
|
4104
|
-
out = [String.slice(currentLocation,
|
|
3971
|
+
out = [String.slice(currentLocation, str), ...out]
|
|
4105
3972
|
List.reverse(out)
|
|
4106
3973
|
}
|
|
4107
3974
|
|
|
@@ -4119,7 +3986,7 @@ let regexSplitHelp = (rx: RegularExpression, str: String, all: Bool) => {
|
|
|
4119
3986
|
*
|
|
4120
3987
|
* @since v0.5.5
|
|
4121
3988
|
*/
|
|
4122
|
-
|
|
3989
|
+
provide let split = (rx: RegularExpression, str: String) => {
|
|
4123
3990
|
regexSplitHelp(rx, str, false)
|
|
4124
3991
|
}
|
|
4125
3992
|
|
|
@@ -4137,6 +4004,6 @@ export let split = (rx: RegularExpression, str: String) => {
|
|
|
4137
4004
|
*
|
|
4138
4005
|
* @since v0.5.5
|
|
4139
4006
|
*/
|
|
4140
|
-
|
|
4007
|
+
provide let splitAll = (rx: RegularExpression, str: String) => {
|
|
4141
4008
|
regexSplitHelp(rx, str, true)
|
|
4142
4009
|
}
|