@futpib/parser 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ import { setParserName } from './parser.js';
2
+ export const createRegExpParser = (regexp) => {
3
+ const regexpParser = async (parserContext) => {
4
+ let start = 0;
5
+ let window = 1;
6
+ let lastMatch;
7
+ let reachedEndOfInput = false;
8
+ while (true) {
9
+ const sequence = await parserContext.peekSequence(start, start + window);
10
+ if (sequence === undefined) {
11
+ reachedEndOfInput = true;
12
+ window = Math.floor(window / 2);
13
+ if (window === 0) {
14
+ // Get the full sequence we've accumulated to verify matches
15
+ const fullSequence = await parserContext.peekSequence(0, start);
16
+ // Verify any previous match is still valid with full context
17
+ // For lookahead/lookbehind assertions, additional input might invalidate a match
18
+ if (fullSequence !== undefined) {
19
+ const verifyMatch = regexp.exec(fullSequence);
20
+ if (verifyMatch !== null && verifyMatch.index === 0) {
21
+ parserContext.skip(verifyMatch[0].length);
22
+ return verifyMatch;
23
+ }
24
+ }
25
+ else if (lastMatch !== undefined) {
26
+ // No full sequence available but we have a previous match
27
+ parserContext.skip(lastMatch[0].length);
28
+ return lastMatch;
29
+ }
30
+ // No previous match - try matching against empty string for zero-width patterns (e.g., /a*/, /[ \t]*/)
31
+ const emptyMatch = regexp.exec('');
32
+ if (emptyMatch !== null && emptyMatch.index === 0) {
33
+ return emptyMatch;
34
+ }
35
+ return parserContext.invariant(false, 'Unexpected end of input without regex match');
36
+ }
37
+ continue;
38
+ }
39
+ const fullSequence = await parserContext.peekSequence(0, start + window);
40
+ if (fullSequence === undefined) {
41
+ continue;
42
+ }
43
+ const match = regexp.exec(fullSequence);
44
+ if (match === null || match.index !== 0) {
45
+ if (lastMatch !== undefined) {
46
+ // Verify lastMatch is still valid with current full context
47
+ // For lookahead/lookbehind assertions, a match on shorter input might be
48
+ // invalidated by additional input (e.g., /\|(?!\|)/ matches '|' but not '||')
49
+ const verifyMatch = regexp.exec(fullSequence);
50
+ if (verifyMatch !== null && verifyMatch.index === 0) {
51
+ parserContext.skip(verifyMatch[0].length);
52
+ return verifyMatch;
53
+ }
54
+ // lastMatch was invalidated by additional context
55
+ lastMatch = undefined;
56
+ }
57
+ if (reachedEndOfInput) {
58
+ parserContext.invariant(false, 'Regex did not match at start of input');
59
+ }
60
+ start += window;
61
+ window *= 2;
62
+ continue;
63
+ }
64
+ lastMatch = match;
65
+ start += window;
66
+ window *= 2;
67
+ }
68
+ };
69
+ setParserName(regexpParser, regexp.toString());
70
+ return regexpParser;
71
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,83 @@
1
+ import test from 'ava';
2
+ import * as fc from 'fast-check';
3
+ import { testProp } from '@fast-check/ava';
4
+ import { runParser, runParserWithRemainingInput } from './parser.js';
5
+ import { stringParserInputCompanion } from './parserInputCompanion.js';
6
+ import { createRegExpParser } from './regexpParser.js';
7
+ test('regexpParser matches digits', async (t) => {
8
+ const regexpParser = createRegExpParser(/\d+/);
9
+ const result = await runParser(regexpParser, '123', stringParserInputCompanion);
10
+ t.is(result[0], '123');
11
+ });
12
+ test('regexpParser matches at start only', async (t) => {
13
+ const regexpParser = createRegExpParser(/\d+/);
14
+ const { output, remainingInput } = await runParserWithRemainingInput(regexpParser, '123abc', stringParserInputCompanion);
15
+ t.is(output[0], '123');
16
+ t.truthy(remainingInput);
17
+ });
18
+ test('regexpParser fails when no match at start', async (t) => {
19
+ const regexpParser = createRegExpParser(/\d+/);
20
+ await t.throwsAsync(runParser(regexpParser, 'abc123', stringParserInputCompanion));
21
+ });
22
+ test('regexpParser with capture groups', async (t) => {
23
+ const regexpParser = createRegExpParser(/(\d+)-(\d+)/);
24
+ const result = await runParser(regexpParser, '123-456', stringParserInputCompanion);
25
+ t.is(result[0], '123-456');
26
+ t.is(result[1], '123');
27
+ t.is(result[2], '456');
28
+ });
29
+ test('regexpParser greedy matching', async (t) => {
30
+ const regexpParser = createRegExpParser(/a+/);
31
+ const { output } = await runParserWithRemainingInput(regexpParser, 'aaab', stringParserInputCompanion);
32
+ t.is(output[0], 'aaa');
33
+ });
34
+ test('regexpParser with anchored regexp', async (t) => {
35
+ const regexpParser = createRegExpParser(/^hello/);
36
+ const { output } = await runParserWithRemainingInput(regexpParser, 'hello world', stringParserInputCompanion);
37
+ t.is(output[0], 'hello');
38
+ });
39
+ testProp.serial('regexpParser matches word characters', [
40
+ fc.tuple(fc.stringMatching(/^\w+$/), fc.stringMatching(/^\W*$/)),
41
+ ], async (t, [word, nonWord]) => {
42
+ const regexpParser = createRegExpParser(/\w+/);
43
+ const { output, position } = await runParserWithRemainingInput(regexpParser, word + nonWord, stringParserInputCompanion);
44
+ t.is(output[0], word);
45
+ t.is(position, word.length);
46
+ }, {
47
+ verbose: true,
48
+ });
49
+ // Tests for zero-width/optional patterns at end of input
50
+ test('regexpParser with star quantifier on empty input', async (t) => {
51
+ const regexpParser = createRegExpParser(/a*/);
52
+ const result = await runParser(regexpParser, '', stringParserInputCompanion);
53
+ t.is(result[0], '');
54
+ });
55
+ test('regexpParser with optional whitespace on empty input', async (t) => {
56
+ const regexpParser = createRegExpParser(/[ \t]*/);
57
+ const result = await runParser(regexpParser, '', stringParserInputCompanion);
58
+ t.is(result[0], '');
59
+ });
60
+ test('regexpParser with star quantifier at end of input (no match)', async (t) => {
61
+ const regexpParser = createRegExpParser(/a*/);
62
+ const { output } = await runParserWithRemainingInput(regexpParser, 'bbb', stringParserInputCompanion);
63
+ t.is(output[0], '');
64
+ });
65
+ test('regexpParser with optional group on empty input', async (t) => {
66
+ const regexpParser = createRegExpParser(/(?:foo)?/);
67
+ const result = await runParser(regexpParser, '', stringParserInputCompanion);
68
+ t.is(result[0], '');
69
+ });
70
+ // Tests for negative lookahead
71
+ test('regexpParser with negative lookahead should not match when followed by same char', async (t) => {
72
+ // This regex should NOT match anything in '||' - the | is followed by another |
73
+ const regexpParser = createRegExpParser(/\|(?!\|)/);
74
+ await t.throwsAsync(runParser(regexpParser, '||', stringParserInputCompanion));
75
+ });
76
+ test('regexpParser with negative lookahead should match single char', async (t) => {
77
+ // This regex should match single '|' when followed by something else
78
+ const regexpParser = createRegExpParser(/\|(?!\|)/);
79
+ const { output, position, remainingInput } = await runParserWithRemainingInput(regexpParser, '| ', stringParserInputCompanion);
80
+ t.is(output[0], '|');
81
+ t.is(position, 1); // Consumed 1 character
82
+ t.truthy(remainingInput); // There's remaining input (the space)
83
+ });
@@ -0,0 +1,63 @@
1
+ export type CodePointRange = {
2
+ start: number;
3
+ end: number;
4
+ };
5
+ export type CharacterSet = {
6
+ type: 'empty';
7
+ } | {
8
+ type: 'node';
9
+ range: CodePointRange;
10
+ left: CharacterSet;
11
+ right: CharacterSet;
12
+ };
13
+ export type RepeatBounds = number | {
14
+ min: number;
15
+ max?: number;
16
+ } | {
17
+ min?: number;
18
+ max: number;
19
+ };
20
+ export type RegularExpression = {
21
+ type: 'epsilon';
22
+ } | {
23
+ type: 'literal';
24
+ charset: CharacterSet;
25
+ } | {
26
+ type: 'concat';
27
+ left: RegularExpression;
28
+ right: RegularExpression;
29
+ } | {
30
+ type: 'union';
31
+ left: RegularExpression;
32
+ right: RegularExpression;
33
+ } | {
34
+ type: 'star';
35
+ inner: RegularExpression;
36
+ } | {
37
+ type: 'plus';
38
+ inner: RegularExpression;
39
+ } | {
40
+ type: 'optional';
41
+ inner: RegularExpression;
42
+ } | {
43
+ type: 'repeat';
44
+ inner: RegularExpression;
45
+ bounds: RepeatBounds;
46
+ } | {
47
+ type: 'capture-group';
48
+ inner: RegularExpression;
49
+ name?: string;
50
+ } | {
51
+ type: 'lookahead';
52
+ isPositive: boolean;
53
+ inner: RegularExpression;
54
+ right: RegularExpression;
55
+ } | {
56
+ type: 'start-anchor';
57
+ left: RegularExpression;
58
+ right: RegularExpression;
59
+ } | {
60
+ type: 'end-anchor';
61
+ left: RegularExpression;
62
+ right: RegularExpression;
63
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,3 @@
1
+ import { type Parser } from './parser.js';
2
+ import { type RegularExpression } from './regularExpression.js';
3
+ export declare const regularExpressionParser: Parser<RegularExpression, string>;