@bablr/language-en-regex-vm-pattern 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/grammar.js +48 -52
- package/lib/grammar.macro.js +67 -62
- package/package.json +7 -7
package/lib/grammar.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
/* @macrome
|
|
2
2
|
* @generatedby @bablr/macrome-generator-bablr
|
|
3
|
-
* @generatedfrom ./grammar.macro.js#
|
|
3
|
+
* @generatedfrom ./grammar.macro.js#44cd5c6d803ea69c494531d65db6b33a5faa09cb
|
|
4
4
|
* This file is autogenerated. Please do not edit it directly.
|
|
5
5
|
* When editing run `npx macrome watch` then change the file this is generated from.
|
|
6
6
|
*/
|
|
7
7
|
import _applyDecs from "@babel/runtime/helpers/applyDecs2305";
|
|
8
8
|
let _initProto, _FlagsDecs, _GroupDecs, _AssertionDecs, _StartOfInputAssertionDecs, _EndOfInputAssertionDecs, _WordBoundaryAssertionDecs, _GapDecs, _CharacterDecs, _CharacterClassDecs, _CharacterClassRangeDecs, _CharacterSetDecs, _AnyCharacterSetDecs, _DigitCharacterSetDecs, _SpaceCharacterSetDecs, _WordCharacterSetDecs, _QuantifierDecs, _AnyDecs, _KeywordDecs, _PunctuatorDecs;
|
|
9
9
|
import { re, spam as m } from '@bablr/boot';
|
|
10
|
-
import { Node, CoveredBy, InjectFrom,
|
|
10
|
+
import { Node, CoveredBy, InjectFrom, UndefinedAttributes, AllowEmpty, Literal } from '@bablr/helpers/decorators';
|
|
11
11
|
import objectEntries from 'iter-tools-es/methods/object-entries';
|
|
12
12
|
import * as Shared from '@bablr/helpers/productions';
|
|
13
|
-
import { eat, eatMatch, match,
|
|
13
|
+
import { eat, eatMatch, match, shiftMatch, guard, defineAttribute, fail, o } from '@bablr/helpers/grammar';
|
|
14
14
|
import { buildString, buildBoolean } from '@bablr/helpers/builders';
|
|
15
15
|
export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
|
|
16
16
|
export const dependencies = {};
|
|
@@ -20,21 +20,6 @@ const escapables = new Map(objectEntries({
|
|
|
20
20
|
t: '\t',
|
|
21
21
|
0: '\0'
|
|
22
22
|
}));
|
|
23
|
-
export const getCooked = (escapeNode, span, ctx) => {
|
|
24
|
-
let cooked;
|
|
25
|
-
const codeNode = escapeNode.get('code');
|
|
26
|
-
const type = ctx.sourceTextFor(codeNode.get('typeToken'));
|
|
27
|
-
const value = ctx.sourceTextFor(codeNode.get('value'));
|
|
28
|
-
if (!type) {
|
|
29
|
-
const match_ = ctx.sourceTextFor(codeNode);
|
|
30
|
-
cooked = escapables.get(match_) || match_;
|
|
31
|
-
} else if (type === 'u' || type === 'x') {
|
|
32
|
-
cooked = parseInt(value, 16);
|
|
33
|
-
} else {
|
|
34
|
-
throw new Error();
|
|
35
|
-
}
|
|
36
|
-
return cooked.toString(10);
|
|
37
|
-
};
|
|
38
23
|
const flagCharacters = {
|
|
39
24
|
global: 'g',
|
|
40
25
|
ignoreCase: 'i',
|
|
@@ -46,7 +31,7 @@ const flagCharacters = {
|
|
|
46
31
|
const unique = flags => flags.length === new Set(flags).size;
|
|
47
32
|
const getSpecialPattern = span => {
|
|
48
33
|
if (span === 'Pattern') {
|
|
49
|
-
return re`/[
|
|
34
|
+
return re`/[*+?{}[\]().^$|\n\\<>]/`;
|
|
50
35
|
} else if (span === 'CharacterClass') {
|
|
51
36
|
return re`/[\]\\]/`;
|
|
52
37
|
} else {
|
|
@@ -55,14 +40,14 @@ const getSpecialPattern = span => {
|
|
|
55
40
|
};
|
|
56
41
|
export const grammar = class RegexGrammar {
|
|
57
42
|
static {
|
|
58
|
-
[_initProto] = _applyDecs(this, [[Node, 2, "Pattern"], [_FlagsDecs, 2, "Flags"], [AllowEmpty, 2, "Alternatives"], [[AllowEmpty, Node], 2, "Alternative"], [AllowEmpty, 2, "Elements"], [_GroupDecs, 2, "Group"], [Node, 2, "CapturingGroup"], [_AssertionDecs, 2, "Assertion"], [_StartOfInputAssertionDecs, 2, "StartOfInputAssertion"], [_EndOfInputAssertionDecs, 2, "EndOfInputAssertion"], [_WordBoundaryAssertionDecs, 2, "WordBoundaryAssertion"], [_GapDecs, 2, "Gap"], [_CharacterDecs, 2, "Character"], [_CharacterClassDecs, 2, "CharacterClass"], [_CharacterClassRangeDecs, 2, "CharacterClassRange"], [_CharacterSetDecs, 2, "CharacterSet"], [_AnyCharacterSetDecs, 2, "AnyCharacterSet"], [_DigitCharacterSetDecs, 2, "DigitCharacterSet"], [_SpaceCharacterSetDecs, 2, "SpaceCharacterSet"], [_WordCharacterSetDecs, 2, "WordCharacterSet"], [_QuantifierDecs, 2, "Quantifier"], [Node, 2, "UnsignedInteger"], [Node, 2, "EscapeSequence"], [Node, 2, "EscapeCode"], [Node, 2, "Digit"], [_AnyDecs, 2, "Any"], [_KeywordDecs, 2, "Keyword"], [_PunctuatorDecs, 2, "Punctuator"]], []).e;
|
|
43
|
+
[_initProto] = _applyDecs(this, [[Node, 2, "Pattern"], [_FlagsDecs, 2, "Flags"], [AllowEmpty, 2, "Alternatives"], [[AllowEmpty, Node], 2, "Alternative"], [AllowEmpty, 2, "Elements"], [_GroupDecs, 2, "Group"], [Node, 2, "CapturingGroup"], [_AssertionDecs, 2, "Assertion"], [_StartOfInputAssertionDecs, 2, "StartOfInputAssertion"], [_EndOfInputAssertionDecs, 2, "EndOfInputAssertion"], [_WordBoundaryAssertionDecs, 2, "WordBoundaryAssertion"], [_GapDecs, 2, "Gap"], [_CharacterDecs, 2, "Character"], [_CharacterClassDecs, 2, "CharacterClass"], [_CharacterClassRangeDecs, 2, "CharacterClassRange"], [_CharacterSetDecs, 2, "CharacterSet"], [_AnyCharacterSetDecs, 2, "AnyCharacterSet"], [_DigitCharacterSetDecs, 2, "DigitCharacterSet"], [_SpaceCharacterSetDecs, 2, "SpaceCharacterSet"], [_WordCharacterSetDecs, 2, "WordCharacterSet"], [_QuantifierDecs, 2, "Quantifier"], [Node, 2, "UnsignedInteger"], [Node, 2, "UnsignedHexInteger"], [Node, 2, "EscapeSequence"], [Node, 2, "EscapeCode"], [Node, 2, "Digit"], [_AnyDecs, 2, "Any"], [_KeywordDecs, 2, "Keyword"], [_PunctuatorDecs, 2, "Punctuator"]], []).e;
|
|
59
44
|
}
|
|
60
45
|
constructor() {
|
|
61
46
|
_initProto(this);
|
|
62
47
|
}
|
|
63
|
-
*[(_FlagsDecs = [
|
|
48
|
+
*[(_FlagsDecs = [UndefinedAttributes(Object.keys(flagCharacters)), AllowEmpty, Node], _GroupDecs = [CoveredBy('Element'), Node], _AssertionDecs = CoveredBy('Element'), _StartOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _EndOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _WordBoundaryAssertionDecs = [UndefinedAttributes(['negate']), CoveredBy('Assertion'), Node], _GapDecs = [CoveredBy('Assertion'), Node], _CharacterDecs = [CoveredBy('Element'), CoveredBy('CharacterClassElement'), Node], _CharacterClassDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), Node], _CharacterClassRangeDecs = [CoveredBy('CharacterClassElement'), Node], _CharacterSetDecs = CoveredBy('Element'), _AnyCharacterSetDecs = [CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _DigitCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _SpaceCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _WordCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _QuantifierDecs = [UndefinedAttributes(['min', 'max']), Node], _AnyDecs = InjectFrom(Shared), _KeywordDecs = [Literal, Node, InjectFrom(Shared)], _PunctuatorDecs = [Literal, Node, InjectFrom(Shared)], "Pattern")]() {
|
|
64
49
|
yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
|
|
65
|
-
yield eat(m`<
|
|
50
|
+
yield eat(m`<_Alternatives />`);
|
|
66
51
|
yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
|
|
67
52
|
yield eat(m`flags$: <Flags />`);
|
|
68
53
|
}
|
|
@@ -77,9 +62,9 @@ export const grammar = class RegexGrammar {
|
|
|
77
62
|
1: chr
|
|
78
63
|
} of Object.entries(flagCharacters)) {
|
|
79
64
|
if (flagsStr.includes(chr)) {
|
|
80
|
-
yield
|
|
65
|
+
yield defineAttribute(name, true);
|
|
81
66
|
} else {
|
|
82
|
-
yield
|
|
67
|
+
yield defineAttribute(name, false);
|
|
83
68
|
}
|
|
84
69
|
}
|
|
85
70
|
for (const flagChr of flagsStr) {
|
|
@@ -92,33 +77,33 @@ export const grammar = class RegexGrammar {
|
|
|
92
77
|
} while (yield eatMatch(m`separatorTokens[]: <*Punctuator '|' />`));
|
|
93
78
|
}
|
|
94
79
|
*Alternative() {
|
|
95
|
-
yield eat(m`elements[]
|
|
80
|
+
yield eat(m`elements[]+$: <_Elements />`);
|
|
96
81
|
}
|
|
97
82
|
*Elements() {
|
|
98
83
|
yield eat(m`.[]: []`);
|
|
99
84
|
while (yield match(re`/[^|]/`)) {
|
|
100
|
-
yield eat(m`.[]+: <
|
|
85
|
+
yield eat(m`.[]+: <__Element />`);
|
|
101
86
|
}
|
|
102
87
|
}
|
|
103
88
|
*Element() {
|
|
104
89
|
yield guard(m`<*Keyword /[*+?]/ />`);
|
|
105
|
-
yield eat(m`<
|
|
90
|
+
yield eat(m`<_Any />`, [m`<CharacterClass '[' />`, m`<Group '(?:' />`, m`<_Assertion /[$^]|\\b/i />`, m`<Gap '\\g' />`, m`<_CharacterSet /\.|\\[dswp]/i />`, m`<*Character />`]);
|
|
106
91
|
if (yield match(re`/[*+?{]/`)) {
|
|
107
|
-
return
|
|
92
|
+
return shiftMatch(m`<Quantifier />`);
|
|
108
93
|
}
|
|
109
94
|
}
|
|
110
95
|
*Group() {
|
|
111
96
|
yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
|
|
112
|
-
yield eat(m`<
|
|
97
|
+
yield eat(m`<_Alternatives />`);
|
|
113
98
|
yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
|
|
114
99
|
}
|
|
115
100
|
*CapturingGroup() {
|
|
116
101
|
yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
|
|
117
|
-
yield eat(m`<
|
|
102
|
+
yield eat(m`<_Alternatives />`);
|
|
118
103
|
yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
|
|
119
104
|
}
|
|
120
105
|
*Assertion() {
|
|
121
|
-
yield eat(m`<
|
|
106
|
+
yield eat(m`<_Any />`, [m`<*StartOfInputAssertion '^' />`, m`<*EndOfInputAssertion '$' />`, m`<*WordBoundaryAssertion /\\b/i />`]);
|
|
122
107
|
}
|
|
123
108
|
*StartOfInputAssertion() {
|
|
124
109
|
yield eat(m`sigilToken: <*Keyword '^' />`);
|
|
@@ -131,7 +116,7 @@ export const grammar = class RegexGrammar {
|
|
|
131
116
|
}) {
|
|
132
117
|
yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
|
|
133
118
|
const m_ = yield eat(m`value: <*Keyword /b/i />`);
|
|
134
|
-
yield
|
|
119
|
+
yield defineAttribute('negate', buildBoolean(ctx.sourceTextFor(m_) === 'B'));
|
|
135
120
|
}
|
|
136
121
|
*Gap() {
|
|
137
122
|
yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
|
|
@@ -146,17 +131,19 @@ export const grammar = class RegexGrammar {
|
|
|
146
131
|
}
|
|
147
132
|
*CharacterClass() {
|
|
148
133
|
yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
|
|
149
|
-
let negate = yield eatMatch(m`negateToken: <*Keyword '^'
|
|
150
|
-
|
|
134
|
+
let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({
|
|
135
|
+
bind: true
|
|
136
|
+
}));
|
|
137
|
+
yield defineAttribute('negate', !!negate);
|
|
151
138
|
while (yield match(re`/./s`)) {
|
|
152
|
-
yield eat(m`elements[]+$: <
|
|
139
|
+
yield eat(m`elements[]+$: <__CharacterClassElement />`);
|
|
153
140
|
}
|
|
154
141
|
yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
|
|
155
142
|
}
|
|
156
143
|
*CharacterClassElement() {
|
|
157
|
-
yield eat(m`<
|
|
144
|
+
yield eat(m`<_Any />`, [m`<_CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
|
|
158
145
|
if (yield match('-')) {
|
|
159
|
-
return
|
|
146
|
+
return shiftMatch(m`<CharacterClassRange />`);
|
|
160
147
|
}
|
|
161
148
|
}
|
|
162
149
|
*CharacterClassRange() {
|
|
@@ -165,7 +152,7 @@ export const grammar = class RegexGrammar {
|
|
|
165
152
|
yield eat(m`max+$: <*Character />`);
|
|
166
153
|
}
|
|
167
154
|
*CharacterSet() {
|
|
168
|
-
yield eat(m`<
|
|
155
|
+
yield eat(m`<_Any />`, [m`<AnyCharacterSet '.' />`, m`<DigitCharacterSet /\\[dD]/ />`, m`<SpaceCharacterSet /\\[sS]/ />`, m`<WordCharacterSet /\\[wW]/ />`]);
|
|
169
156
|
}
|
|
170
157
|
*AnyCharacterSet() {
|
|
171
158
|
yield eat(m`sigilToken: <*Keyword '.' />`);
|
|
@@ -175,26 +162,26 @@ export const grammar = class RegexGrammar {
|
|
|
175
162
|
}) {
|
|
176
163
|
yield eat(m`escapeToken: <*Punctuator '\\' />`);
|
|
177
164
|
let code = yield eat(m`value: <*Keyword /[dD]/ />`);
|
|
178
|
-
yield
|
|
165
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
|
|
179
166
|
}
|
|
180
167
|
*SpaceCharacterSet({
|
|
181
168
|
ctx
|
|
182
169
|
}) {
|
|
183
170
|
yield eat(m`escapeToken: <*Punctuator '\\' />`);
|
|
184
171
|
let code = yield eat(m`value: <*Keyword /[sS]/ />`);
|
|
185
|
-
yield
|
|
172
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
|
|
186
173
|
}
|
|
187
174
|
*WordCharacterSet({
|
|
188
175
|
ctx
|
|
189
176
|
}) {
|
|
190
177
|
yield eat(m`escapeToken: <*Punctuator '\\' />`);
|
|
191
178
|
let code = yield eat(m`value: <*Keyword /[wW]/ />`);
|
|
192
|
-
yield
|
|
179
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
|
|
193
180
|
}
|
|
194
181
|
*Quantifier({
|
|
195
182
|
ctx
|
|
196
183
|
}) {
|
|
197
|
-
yield eat(m`element+$: <
|
|
184
|
+
yield eat(m`element+$: <__Element />`);
|
|
198
185
|
let attrs, sigil;
|
|
199
186
|
if (sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`)) {
|
|
200
187
|
switch (ctx.sourceTextFor(sigil)) {
|
|
@@ -216,6 +203,8 @@ export const grammar = class RegexGrammar {
|
|
|
216
203
|
max: 1
|
|
217
204
|
};
|
|
218
205
|
break;
|
|
206
|
+
default:
|
|
207
|
+
yield fail();
|
|
219
208
|
}
|
|
220
209
|
} else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
|
|
221
210
|
let max;
|
|
@@ -233,44 +222,51 @@ export const grammar = class RegexGrammar {
|
|
|
233
222
|
};
|
|
234
223
|
yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
|
|
235
224
|
}
|
|
236
|
-
yield
|
|
237
|
-
yield
|
|
225
|
+
yield defineAttribute('min', attrs.min);
|
|
226
|
+
yield defineAttribute('max', attrs.max);
|
|
238
227
|
}
|
|
239
228
|
*UnsignedInteger() {
|
|
240
229
|
yield eat(re`/\d+/`);
|
|
241
230
|
}
|
|
231
|
+
*UnsignedHexInteger() {
|
|
232
|
+
yield eat(re`/[\da-fA-F]+/`);
|
|
233
|
+
}
|
|
242
234
|
*EscapeSequence({
|
|
243
235
|
state,
|
|
244
|
-
ctx
|
|
245
|
-
value: props
|
|
236
|
+
ctx
|
|
246
237
|
}) {
|
|
247
238
|
const parentSpan = state.span;
|
|
248
239
|
yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
|
|
249
240
|
let m_;
|
|
241
|
+
let cooked;
|
|
250
242
|
if (m_ = yield match(re`/[\\/nrt0]/`)) {
|
|
251
243
|
const match_ = ctx.sourceTextFor(m_);
|
|
252
244
|
yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
|
|
245
|
+
cooked = escapables.get(match_) || match_;
|
|
253
246
|
} else if (m_ = yield match(getSpecialPattern(parentSpan))) {
|
|
254
|
-
|
|
255
|
-
yield eat(m`code: <*Keyword ${buildString(
|
|
247
|
+
cooked = ctx.sourceTextFor(m_);
|
|
248
|
+
yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
|
|
256
249
|
} else if (yield match(re`/[ux]/`)) {
|
|
257
|
-
yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
|
|
250
|
+
let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
|
|
251
|
+
let value = code.get('value');
|
|
252
|
+
cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
|
|
258
253
|
} else {
|
|
259
254
|
yield fail();
|
|
260
255
|
}
|
|
256
|
+
yield defineAttribute('cooked', cooked);
|
|
261
257
|
}
|
|
262
258
|
*EscapeCode() {
|
|
263
259
|
if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
|
|
264
260
|
if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
|
|
265
|
-
yield eatMatch(m`value$: <*
|
|
261
|
+
yield eatMatch(m`value$: <*UnsignedHexInteger />`);
|
|
266
262
|
yield eat(m`closeToken: <*Punctuator '}' />`);
|
|
267
263
|
} else {
|
|
268
|
-
yield eat(m`value$: <*
|
|
264
|
+
yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
|
|
269
265
|
yield eat(m`closeToken: null`);
|
|
270
266
|
}
|
|
271
267
|
} else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
|
|
272
268
|
yield eat(m`openToken: null`);
|
|
273
|
-
yield eat(m`value$: <*
|
|
269
|
+
yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
|
|
274
270
|
yield eat(m`closeToken: null`);
|
|
275
271
|
}
|
|
276
272
|
}
|
package/lib/grammar.macro.js
CHANGED
|
@@ -3,8 +3,9 @@ import {
|
|
|
3
3
|
Node,
|
|
4
4
|
CoveredBy,
|
|
5
5
|
InjectFrom,
|
|
6
|
-
|
|
6
|
+
UndefinedAttributes,
|
|
7
7
|
AllowEmpty,
|
|
8
|
+
Literal,
|
|
8
9
|
} from '@bablr/helpers/decorators';
|
|
9
10
|
import objectEntries from 'iter-tools-es/methods/object-entries';
|
|
10
11
|
import * as Shared from '@bablr/helpers/productions';
|
|
@@ -12,10 +13,11 @@ import {
|
|
|
12
13
|
eat,
|
|
13
14
|
eatMatch,
|
|
14
15
|
match,
|
|
15
|
-
|
|
16
|
+
shiftMatch,
|
|
16
17
|
guard,
|
|
17
|
-
|
|
18
|
+
defineAttribute,
|
|
18
19
|
fail,
|
|
20
|
+
o,
|
|
19
21
|
} from '@bablr/helpers/grammar';
|
|
20
22
|
import { buildString, buildBoolean } from '@bablr/helpers/builders';
|
|
21
23
|
|
|
@@ -32,25 +34,6 @@ const escapables = new Map(
|
|
|
32
34
|
}),
|
|
33
35
|
);
|
|
34
36
|
|
|
35
|
-
export const getCooked = (escapeNode, span, ctx) => {
|
|
36
|
-
let cooked;
|
|
37
|
-
const codeNode = escapeNode.get('code');
|
|
38
|
-
const type = ctx.sourceTextFor(codeNode.get('typeToken'));
|
|
39
|
-
const value = ctx.sourceTextFor(codeNode.get('value'));
|
|
40
|
-
|
|
41
|
-
if (!type) {
|
|
42
|
-
const match_ = ctx.sourceTextFor(codeNode);
|
|
43
|
-
|
|
44
|
-
cooked = escapables.get(match_) || match_;
|
|
45
|
-
} else if (type === 'u' || type === 'x') {
|
|
46
|
-
cooked = parseInt(value, 16);
|
|
47
|
-
} else {
|
|
48
|
-
throw new Error();
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
return cooked.toString(10);
|
|
52
|
-
};
|
|
53
|
-
|
|
54
37
|
const flagCharacters = {
|
|
55
38
|
global: 'g',
|
|
56
39
|
ignoreCase: 'i',
|
|
@@ -64,7 +47,7 @@ const unique = (flags) => flags.length === new Set(flags).size;
|
|
|
64
47
|
|
|
65
48
|
const getSpecialPattern = (span) => {
|
|
66
49
|
if (span === 'Pattern') {
|
|
67
|
-
return re`/[
|
|
50
|
+
return re`/[*+?{}[\]().^$|\n\\<>]/`;
|
|
68
51
|
} else if (span === 'CharacterClass') {
|
|
69
52
|
return re`/[\]\\]/`;
|
|
70
53
|
} else {
|
|
@@ -76,12 +59,12 @@ export const grammar = class RegexGrammar {
|
|
|
76
59
|
@Node
|
|
77
60
|
*Pattern() {
|
|
78
61
|
yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
|
|
79
|
-
yield eat(m`<
|
|
62
|
+
yield eat(m`<_Alternatives />`);
|
|
80
63
|
yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
|
|
81
64
|
yield eat(m`flags$: <Flags />`);
|
|
82
65
|
}
|
|
83
66
|
|
|
84
|
-
@
|
|
67
|
+
@UndefinedAttributes(Object.keys(flagCharacters))
|
|
85
68
|
@AllowEmpty
|
|
86
69
|
@Node
|
|
87
70
|
*Flags({ ctx }) {
|
|
@@ -93,9 +76,9 @@ export const grammar = class RegexGrammar {
|
|
|
93
76
|
|
|
94
77
|
for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
|
|
95
78
|
if (flagsStr.includes(chr)) {
|
|
96
|
-
yield
|
|
79
|
+
yield defineAttribute(name, true);
|
|
97
80
|
} else {
|
|
98
|
-
yield
|
|
81
|
+
yield defineAttribute(name, false);
|
|
99
82
|
}
|
|
100
83
|
}
|
|
101
84
|
|
|
@@ -114,31 +97,31 @@ export const grammar = class RegexGrammar {
|
|
|
114
97
|
@AllowEmpty
|
|
115
98
|
@Node
|
|
116
99
|
*Alternative() {
|
|
117
|
-
yield eat(m`elements[]
|
|
100
|
+
yield eat(m`elements[]+$: <_Elements />`);
|
|
118
101
|
}
|
|
119
102
|
|
|
120
103
|
@AllowEmpty
|
|
121
104
|
*Elements() {
|
|
122
105
|
yield eat(m`.[]: []`);
|
|
123
106
|
while (yield match(re`/[^|]/`)) {
|
|
124
|
-
yield eat(m`.[]+: <
|
|
107
|
+
yield eat(m`.[]+: <__Element />`);
|
|
125
108
|
}
|
|
126
109
|
}
|
|
127
110
|
|
|
128
111
|
*Element() {
|
|
129
112
|
yield guard(m`<*Keyword /[*+?]/ />`);
|
|
130
113
|
|
|
131
|
-
yield eat(m`<
|
|
114
|
+
yield eat(m`<_Any />`, [
|
|
132
115
|
m`<CharacterClass '[' />`,
|
|
133
116
|
m`<Group '(?:' />`,
|
|
134
|
-
m`<
|
|
117
|
+
m`<_Assertion /[$^]|\\b/i />`,
|
|
135
118
|
m`<Gap '\\g' />`,
|
|
136
|
-
m`<
|
|
119
|
+
m`<_CharacterSet /\.|\\[dswp]/i />`,
|
|
137
120
|
m`<*Character />`,
|
|
138
121
|
]);
|
|
139
122
|
|
|
140
123
|
if (yield match(re`/[*+?{]/`)) {
|
|
141
|
-
return
|
|
124
|
+
return shiftMatch(m`<Quantifier />`);
|
|
142
125
|
}
|
|
143
126
|
}
|
|
144
127
|
|
|
@@ -146,20 +129,20 @@ export const grammar = class RegexGrammar {
|
|
|
146
129
|
@Node
|
|
147
130
|
*Group() {
|
|
148
131
|
yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
|
|
149
|
-
yield eat(m`<
|
|
132
|
+
yield eat(m`<_Alternatives />`);
|
|
150
133
|
yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
|
|
151
134
|
}
|
|
152
135
|
|
|
153
136
|
@Node
|
|
154
137
|
*CapturingGroup() {
|
|
155
138
|
yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
|
|
156
|
-
yield eat(m`<
|
|
139
|
+
yield eat(m`<_Alternatives />`);
|
|
157
140
|
yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
|
|
158
141
|
}
|
|
159
142
|
|
|
160
143
|
@CoveredBy('Element')
|
|
161
144
|
*Assertion() {
|
|
162
|
-
yield eat(m`<
|
|
145
|
+
yield eat(m`<_Any />`, [
|
|
163
146
|
m`<*StartOfInputAssertion '^' />`,
|
|
164
147
|
m`<*EndOfInputAssertion '$' />`,
|
|
165
148
|
m`<*WordBoundaryAssertion /\\b/i />`,
|
|
@@ -178,13 +161,13 @@ export const grammar = class RegexGrammar {
|
|
|
178
161
|
yield eatMatch(m`sigilToken: <*Keyword '$' />`);
|
|
179
162
|
}
|
|
180
163
|
|
|
181
|
-
@
|
|
164
|
+
@UndefinedAttributes(['negate'])
|
|
182
165
|
@CoveredBy('Assertion')
|
|
183
166
|
@Node
|
|
184
167
|
*WordBoundaryAssertion({ ctx }) {
|
|
185
168
|
yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
|
|
186
169
|
const m_ = yield eat(m`value: <*Keyword /b/i />`);
|
|
187
|
-
yield
|
|
170
|
+
yield defineAttribute('negate', buildBoolean(ctx.sourceTextFor(m_) === 'B'));
|
|
188
171
|
}
|
|
189
172
|
|
|
190
173
|
@CoveredBy('Assertion')
|
|
@@ -205,28 +188,28 @@ export const grammar = class RegexGrammar {
|
|
|
205
188
|
}
|
|
206
189
|
}
|
|
207
190
|
|
|
208
|
-
@
|
|
191
|
+
@UndefinedAttributes(['negate'])
|
|
209
192
|
@CoveredBy('Element')
|
|
210
193
|
@Node
|
|
211
194
|
*CharacterClass() {
|
|
212
195
|
yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
|
|
213
196
|
|
|
214
|
-
let negate = yield eatMatch(m`negateToken: <*Keyword '^'
|
|
197
|
+
let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({ bind: true }));
|
|
215
198
|
|
|
216
|
-
yield
|
|
199
|
+
yield defineAttribute('negate', !!negate);
|
|
217
200
|
|
|
218
201
|
while (yield match(re`/./s`)) {
|
|
219
|
-
yield eat(m`elements[]+$: <
|
|
202
|
+
yield eat(m`elements[]+$: <__CharacterClassElement />`);
|
|
220
203
|
}
|
|
221
204
|
|
|
222
205
|
yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
|
|
223
206
|
}
|
|
224
207
|
|
|
225
208
|
*CharacterClassElement() {
|
|
226
|
-
yield eat(m`<
|
|
209
|
+
yield eat(m`<_Any />`, [m`<_CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
|
|
227
210
|
|
|
228
211
|
if (yield match('-')) {
|
|
229
|
-
return
|
|
212
|
+
return shiftMatch(m`<CharacterClassRange />`);
|
|
230
213
|
}
|
|
231
214
|
}
|
|
232
215
|
|
|
@@ -240,7 +223,7 @@ export const grammar = class RegexGrammar {
|
|
|
240
223
|
|
|
241
224
|
@CoveredBy('Element')
|
|
242
225
|
*CharacterSet() {
|
|
243
|
-
yield eat(m`<
|
|
226
|
+
yield eat(m`<_Any />`, [
|
|
244
227
|
m`<AnyCharacterSet '.' />`,
|
|
245
228
|
m`<DigitCharacterSet /\\[dD]/ />`,
|
|
246
229
|
m`<SpaceCharacterSet /\\[sS]/ />`,
|
|
@@ -248,13 +231,15 @@ export const grammar = class RegexGrammar {
|
|
|
248
231
|
]);
|
|
249
232
|
}
|
|
250
233
|
|
|
234
|
+
@CoveredBy('Element')
|
|
251
235
|
@CoveredBy('CharacterSet')
|
|
252
236
|
@Node
|
|
253
237
|
*AnyCharacterSet() {
|
|
254
238
|
yield eat(m`sigilToken: <*Keyword '.' />`);
|
|
255
239
|
}
|
|
256
240
|
|
|
257
|
-
@
|
|
241
|
+
@UndefinedAttributes(['negate'])
|
|
242
|
+
@CoveredBy('Element')
|
|
258
243
|
@CoveredBy('CharacterSet')
|
|
259
244
|
@Node
|
|
260
245
|
*DigitCharacterSet({ ctx }) {
|
|
@@ -262,10 +247,11 @@ export const grammar = class RegexGrammar {
|
|
|
262
247
|
|
|
263
248
|
let code = yield eat(m`value: <*Keyword /[dD]/ />`);
|
|
264
249
|
|
|
265
|
-
yield
|
|
250
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
|
|
266
251
|
}
|
|
267
252
|
|
|
268
|
-
@
|
|
253
|
+
@UndefinedAttributes(['negate'])
|
|
254
|
+
@CoveredBy('Element')
|
|
269
255
|
@CoveredBy('CharacterSet')
|
|
270
256
|
@Node
|
|
271
257
|
*SpaceCharacterSet({ ctx }) {
|
|
@@ -273,10 +259,11 @@ export const grammar = class RegexGrammar {
|
|
|
273
259
|
|
|
274
260
|
let code = yield eat(m`value: <*Keyword /[sS]/ />`);
|
|
275
261
|
|
|
276
|
-
yield
|
|
262
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
|
|
277
263
|
}
|
|
278
264
|
|
|
279
|
-
@
|
|
265
|
+
@UndefinedAttributes(['negate'])
|
|
266
|
+
@CoveredBy('Element')
|
|
280
267
|
@CoveredBy('CharacterSet')
|
|
281
268
|
@Node
|
|
282
269
|
*WordCharacterSet({ ctx }) {
|
|
@@ -284,13 +271,13 @@ export const grammar = class RegexGrammar {
|
|
|
284
271
|
|
|
285
272
|
let code = yield eat(m`value: <*Keyword /[wW]/ />`);
|
|
286
273
|
|
|
287
|
-
yield
|
|
274
|
+
yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
|
|
288
275
|
}
|
|
289
276
|
|
|
290
|
-
@
|
|
277
|
+
@UndefinedAttributes(['min', 'max'])
|
|
291
278
|
@Node
|
|
292
279
|
*Quantifier({ ctx }) {
|
|
293
|
-
yield eat(m`element+$: <
|
|
280
|
+
yield eat(m`element+$: <__Element />`);
|
|
294
281
|
|
|
295
282
|
let attrs, sigil;
|
|
296
283
|
|
|
@@ -305,6 +292,8 @@ export const grammar = class RegexGrammar {
|
|
|
305
292
|
case '?':
|
|
306
293
|
attrs = { min: 0, max: 1 };
|
|
307
294
|
break;
|
|
295
|
+
default:
|
|
296
|
+
yield fail();
|
|
308
297
|
}
|
|
309
298
|
} else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
|
|
310
299
|
let max;
|
|
@@ -325,8 +314,8 @@ export const grammar = class RegexGrammar {
|
|
|
325
314
|
yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
|
|
326
315
|
}
|
|
327
316
|
|
|
328
|
-
yield
|
|
329
|
-
yield
|
|
317
|
+
yield defineAttribute('min', attrs.min);
|
|
318
|
+
yield defineAttribute('max', attrs.max);
|
|
330
319
|
}
|
|
331
320
|
|
|
332
321
|
@Node
|
|
@@ -335,39 +324,53 @@ export const grammar = class RegexGrammar {
|
|
|
335
324
|
}
|
|
336
325
|
|
|
337
326
|
@Node
|
|
338
|
-
*
|
|
327
|
+
*UnsignedHexInteger() {
|
|
328
|
+
yield eat(re`/[\da-fA-F]+/`);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
@Node
|
|
332
|
+
*EscapeSequence({ state, ctx }) {
|
|
339
333
|
const parentSpan = state.span;
|
|
340
334
|
|
|
341
335
|
yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
|
|
342
336
|
|
|
343
337
|
let m_;
|
|
338
|
+
let cooked;
|
|
344
339
|
|
|
345
340
|
if ((m_ = yield match(re`/[\\/nrt0]/`))) {
|
|
346
341
|
const match_ = ctx.sourceTextFor(m_);
|
|
347
342
|
yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
|
|
343
|
+
|
|
344
|
+
cooked = escapables.get(match_) || match_;
|
|
348
345
|
} else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
|
|
349
|
-
|
|
350
|
-
yield eat(m`code: <*Keyword ${buildString(
|
|
346
|
+
cooked = ctx.sourceTextFor(m_);
|
|
347
|
+
yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
|
|
351
348
|
} else if (yield match(re`/[ux]/`)) {
|
|
352
|
-
yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
|
|
349
|
+
let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
|
|
350
|
+
|
|
351
|
+
let value = code.get('value');
|
|
352
|
+
|
|
353
|
+
cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
|
|
353
354
|
} else {
|
|
354
355
|
yield fail();
|
|
355
356
|
}
|
|
357
|
+
|
|
358
|
+
yield defineAttribute('cooked', cooked);
|
|
356
359
|
}
|
|
357
360
|
|
|
358
361
|
@Node
|
|
359
362
|
*EscapeCode() {
|
|
360
363
|
if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
|
|
361
364
|
if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
|
|
362
|
-
yield eatMatch(m`value$: <*
|
|
365
|
+
yield eatMatch(m`value$: <*UnsignedHexInteger />`);
|
|
363
366
|
yield eat(m`closeToken: <*Punctuator '}' />`);
|
|
364
367
|
} else {
|
|
365
|
-
yield eat(m`value$: <*
|
|
368
|
+
yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
|
|
366
369
|
yield eat(m`closeToken: null`);
|
|
367
370
|
}
|
|
368
371
|
} else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
|
|
369
372
|
yield eat(m`openToken: null`);
|
|
370
|
-
yield eat(m`value$: <*
|
|
373
|
+
yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
|
|
371
374
|
yield eat(m`closeToken: null`);
|
|
372
375
|
}
|
|
373
376
|
}
|
|
@@ -384,10 +387,12 @@ export const grammar = class RegexGrammar {
|
|
|
384
387
|
@InjectFrom(Shared)
|
|
385
388
|
*Any() {}
|
|
386
389
|
|
|
390
|
+
@Literal
|
|
387
391
|
@Node
|
|
388
392
|
@InjectFrom(Shared)
|
|
389
393
|
*Keyword() {}
|
|
390
394
|
|
|
395
|
+
@Literal
|
|
391
396
|
@Node
|
|
392
397
|
@InjectFrom(Shared)
|
|
393
398
|
*Punctuator() {}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bablr/language-en-regex-vm-pattern",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "A BABLR language for nonbacktracking JS-style regexes",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=12.0.0"
|
|
@@ -22,18 +22,18 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@babel/runtime": "^7.23.2",
|
|
25
|
-
"@bablr/
|
|
26
|
-
"@bablr/
|
|
27
|
-
"@bablr/agast-
|
|
25
|
+
"@bablr/boot": "0.9.0",
|
|
26
|
+
"@bablr/helpers": "0.23.0",
|
|
27
|
+
"@bablr/agast-helpers": "0.8.0",
|
|
28
|
+
"@bablr/agast-vm-helpers": "0.8.0",
|
|
28
29
|
"iter-tools-es": "7.5.3"
|
|
29
30
|
},
|
|
30
31
|
"devDependencies": {
|
|
31
|
-
"@bablr/
|
|
32
|
-
"@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#49f5952efed27f94ee9b94340eb1563c440bf64e",
|
|
32
|
+
"@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#c97bfa4b3663f8378e9b3e42bb5a41e685406cf9",
|
|
33
33
|
"@bablr/macrome": "^0.1.3",
|
|
34
34
|
"@bablr/macrome-generator-bablr": "^0.3.2",
|
|
35
35
|
"@qnighy/dedent": "0.1.1",
|
|
36
|
-
"bablr": "^0.
|
|
36
|
+
"bablr": "^0.7.0",
|
|
37
37
|
"enhanced-resolve": "^5.12.0",
|
|
38
38
|
"eslint": "^8.47.0",
|
|
39
39
|
"eslint-import-resolver-enhanced-resolve": "^1.0.5",
|