@bablr/language-en-regex-vm-pattern 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/grammar.js +943 -974
- package/lib/grammar.macro.js +86 -72
- package/package.json +9 -9
package/lib/grammar.macro.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import { i, re } from '@bablr/boot/shorthand.macro';
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
Node,
|
|
4
|
+
CoveredBy,
|
|
5
|
+
InjectFrom,
|
|
6
|
+
UnboundAttributes,
|
|
7
|
+
AllowEmpty,
|
|
8
|
+
} from '@bablr/helpers/decorators';
|
|
3
9
|
import objectEntries from 'iter-tools-es/methods/object-entries';
|
|
4
10
|
import * as Shared from '@bablr/helpers/productions';
|
|
5
11
|
import { buildString, buildBoolean, buildNumber, buildNullTag } from '@bablr/agast-vm-helpers';
|
|
@@ -17,6 +23,25 @@ const escapables = new Map(
|
|
|
17
23
|
}),
|
|
18
24
|
);
|
|
19
25
|
|
|
26
|
+
export const getCooked = (escapeNode, span, ctx) => {
|
|
27
|
+
let cooked;
|
|
28
|
+
const codeNode = escapeNode.get('code');
|
|
29
|
+
const type = ctx.sourceTextFor(codeNode.get('typeToken'));
|
|
30
|
+
const value = ctx.sourceTextFor(codeNode.get('value'));
|
|
31
|
+
|
|
32
|
+
if (!type) {
|
|
33
|
+
const match_ = ctx.sourceTextFor(codeNode);
|
|
34
|
+
|
|
35
|
+
cooked = escapables.get(match_) || match_;
|
|
36
|
+
} else if (type === 'u' || type === 'x') {
|
|
37
|
+
cooked = parseInt(value, 16);
|
|
38
|
+
} else {
|
|
39
|
+
throw new Error();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return cooked.toString(10);
|
|
43
|
+
};
|
|
44
|
+
|
|
20
45
|
const flagCharacters = {
|
|
21
46
|
global: 'g',
|
|
22
47
|
ignoreCase: 'i',
|
|
@@ -41,13 +66,13 @@ const getSpecialPattern = (span) => {
|
|
|
41
66
|
export const grammar = class RegexGrammar {
|
|
42
67
|
@Node
|
|
43
68
|
*Pattern() {
|
|
44
|
-
yield i`eat(
|
|
69
|
+
yield i`eat(<*Punctuator '/' balanced='/' balancedSpan='Pattern' /> 'openToken')`;
|
|
45
70
|
yield i`eat(<Alternatives />)`;
|
|
46
|
-
yield i`eat(
|
|
47
|
-
yield i`eat(<Flags /> 'flags')`;
|
|
71
|
+
yield i`eat(<*Punctuator '/' balancer /> 'closeToken')`;
|
|
72
|
+
yield i`eat(<Flags /> 'flags$')`;
|
|
48
73
|
}
|
|
49
74
|
|
|
50
|
-
@
|
|
75
|
+
@UnboundAttributes(Object.keys(flagCharacters))
|
|
51
76
|
@AllowEmpty
|
|
52
77
|
@Node
|
|
53
78
|
*Flags({ ctx }) {
|
|
@@ -73,28 +98,26 @@ export const grammar = class RegexGrammar {
|
|
|
73
98
|
@AllowEmpty
|
|
74
99
|
*Alternatives() {
|
|
75
100
|
do {
|
|
76
|
-
yield i`eat(<Alternative /> 'alternatives[]')`;
|
|
77
|
-
} while (yield i`eatMatch(
|
|
101
|
+
yield i`eat(<Alternative /> 'alternatives[]$')`;
|
|
102
|
+
} while (yield i`eatMatch(<*Punctuator '|' /> 'separators[]')`);
|
|
78
103
|
}
|
|
79
104
|
|
|
80
105
|
@AllowEmpty
|
|
81
106
|
@Node
|
|
82
107
|
*Alternative() {
|
|
83
|
-
yield i`eat(<Elements />)`;
|
|
108
|
+
yield i`eat(<Elements /> 'elements[]$')`;
|
|
84
109
|
}
|
|
85
110
|
|
|
86
111
|
@AllowEmpty
|
|
87
112
|
*Elements() {
|
|
88
|
-
|
|
113
|
+
yield i`eat([])`;
|
|
89
114
|
while (yield i`match(/[^|]/)`) {
|
|
90
|
-
|
|
91
|
-
yield i`eat(<+Element /> 'elements[]')`;
|
|
115
|
+
yield i`eat(<+Element />)`;
|
|
92
116
|
}
|
|
93
|
-
if (!matched) yield i`eat(null 'elements[]')`;
|
|
94
117
|
}
|
|
95
118
|
|
|
96
119
|
*Element() {
|
|
97
|
-
yield i`guard(
|
|
120
|
+
yield i`guard(<*Keyword /[*+?]/ />)`;
|
|
98
121
|
|
|
99
122
|
yield i`eat(<Any /> null [
|
|
100
123
|
<+CharacterClass '[' />
|
|
@@ -113,16 +136,16 @@ export const grammar = class RegexGrammar {
|
|
|
113
136
|
@CoveredBy('Element')
|
|
114
137
|
@Node
|
|
115
138
|
*Group() {
|
|
116
|
-
yield i`eat(
|
|
139
|
+
yield i`eat(<*Punctuator '(?:' balanced=')' /> 'openToken')`;
|
|
117
140
|
yield i`eat(<Alternatives />)`;
|
|
118
|
-
yield i`eat(
|
|
141
|
+
yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
|
|
119
142
|
}
|
|
120
143
|
|
|
121
144
|
@Node
|
|
122
145
|
*CapturingGroup() {
|
|
123
|
-
yield i`eat(
|
|
146
|
+
yield i`eat(<*Punctuator '(' balanced=')' /> 'openToken')`;
|
|
124
147
|
yield i`eat(<Alternatives />)`;
|
|
125
|
-
yield i`eat(
|
|
148
|
+
yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
|
|
126
149
|
}
|
|
127
150
|
|
|
128
151
|
@CoveredBy('Element')
|
|
@@ -137,29 +160,29 @@ export const grammar = class RegexGrammar {
|
|
|
137
160
|
@CoveredBy('Assertion')
|
|
138
161
|
@Node
|
|
139
162
|
*StartOfInputAssertion() {
|
|
140
|
-
yield i`eat(
|
|
163
|
+
yield i`eat(<*Keyword '^' /> 'sigilToken')`;
|
|
141
164
|
}
|
|
142
165
|
|
|
143
166
|
@CoveredBy('Assertion')
|
|
144
167
|
@Node
|
|
145
168
|
*EndOfInputAssertion() {
|
|
146
|
-
yield i`eatMatch(
|
|
169
|
+
yield i`eatMatch(<*Keyword '$' /> 'sigilToken')`;
|
|
147
170
|
}
|
|
148
171
|
|
|
149
|
-
@
|
|
172
|
+
@UnboundAttributes(['negate'])
|
|
150
173
|
@CoveredBy('Assertion')
|
|
151
174
|
@Node
|
|
152
175
|
*WordBoundaryAssertion({ ctx }) {
|
|
153
|
-
yield i`eatMatch(
|
|
154
|
-
const m = yield i`eat(
|
|
176
|
+
yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
|
|
177
|
+
const m = yield i`eat(<*Keyword /b/i /> 'value')`;
|
|
155
178
|
yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(m) === 'B')})`;
|
|
156
179
|
}
|
|
157
180
|
|
|
158
181
|
@CoveredBy('Assertion')
|
|
159
182
|
@Node
|
|
160
183
|
*Gap() {
|
|
161
|
-
yield i`eatMatch(
|
|
162
|
-
yield i`eat(
|
|
184
|
+
yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
|
|
185
|
+
yield i`eat(<*Keyword 'g' /> 'value')`;
|
|
163
186
|
}
|
|
164
187
|
|
|
165
188
|
@CoveredBy('Element')
|
|
@@ -173,21 +196,21 @@ export const grammar = class RegexGrammar {
|
|
|
173
196
|
}
|
|
174
197
|
}
|
|
175
198
|
|
|
176
|
-
@
|
|
199
|
+
@UnboundAttributes(['negate'])
|
|
177
200
|
@CoveredBy('Element')
|
|
178
201
|
@Node
|
|
179
202
|
*CharacterClass() {
|
|
180
|
-
yield i`eat(
|
|
203
|
+
yield i`eat(<*Punctuator '[' balancedSpan='CharacterClass' balanced=']' /> 'openToken')`;
|
|
181
204
|
|
|
182
|
-
let neg = yield i`eatMatch(
|
|
205
|
+
let neg = yield i`eatMatch(<*Keyword '^' /> 'negateToken')`;
|
|
183
206
|
|
|
184
207
|
yield i`bindAttribute('negate' ${buildBoolean(neg)})`;
|
|
185
208
|
|
|
186
209
|
while (yield i`match(/./s)`) {
|
|
187
|
-
yield i`eat(<+CharacterClassElement /> 'elements[]')`;
|
|
210
|
+
yield i`eat(<+CharacterClassElement /> 'elements[]$')`;
|
|
188
211
|
}
|
|
189
212
|
|
|
190
|
-
yield i`eat(
|
|
213
|
+
yield i`eat(<*Punctuator ']' balancer /> 'closeToken')`;
|
|
191
214
|
}
|
|
192
215
|
|
|
193
216
|
*CharacterClassElement() {
|
|
@@ -205,15 +228,15 @@ export const grammar = class RegexGrammar {
|
|
|
205
228
|
@CoveredBy('CharacterClassElement')
|
|
206
229
|
@Node
|
|
207
230
|
*CharacterClassRange() {
|
|
208
|
-
yield i`eat(<*+Character /> 'min')`;
|
|
209
|
-
yield i`eat(
|
|
210
|
-
yield i`eat(<*+Character /> 'max')`;
|
|
231
|
+
yield i`eat(<*+Character /> 'min$')`;
|
|
232
|
+
yield i`eat(<*Punctuator '-' /> 'sigilToken')`;
|
|
233
|
+
yield i`eat(<*+Character /> 'max$')`;
|
|
211
234
|
}
|
|
212
235
|
|
|
213
236
|
@CoveredBy('Element')
|
|
214
237
|
*CharacterSet() {
|
|
215
238
|
yield i`eat(<Any /> null [
|
|
216
|
-
<+AnyCharacterSet '.'
|
|
239
|
+
<+AnyCharacterSet '.' />
|
|
217
240
|
<+DigitCharacterSet /\\[dD]/ />
|
|
218
241
|
<+SpaceCharacterSet /\\[sS]/ />
|
|
219
242
|
<+WordCharacterSet /\\[wW]/ />
|
|
@@ -223,50 +246,50 @@ export const grammar = class RegexGrammar {
|
|
|
223
246
|
@CoveredBy('CharacterSet')
|
|
224
247
|
@Node
|
|
225
248
|
*AnyCharacterSet() {
|
|
226
|
-
yield i`eat(
|
|
249
|
+
yield i`eat(<*Keyword '.' /> 'sigilToken')`;
|
|
227
250
|
}
|
|
228
251
|
|
|
229
|
-
@
|
|
252
|
+
@UnboundAttributes(['negate'])
|
|
230
253
|
@CoveredBy('CharacterSet')
|
|
231
254
|
@Node
|
|
232
255
|
*DigitCharacterSet({ ctx }) {
|
|
233
|
-
yield i`eat(
|
|
256
|
+
yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
|
|
234
257
|
|
|
235
|
-
let code = yield i`eat(
|
|
258
|
+
let code = yield i`eat(<*Keyword /[dD]/ /> 'value')`;
|
|
236
259
|
|
|
237
260
|
yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'D')})`;
|
|
238
261
|
}
|
|
239
262
|
|
|
240
|
-
@
|
|
263
|
+
@UnboundAttributes(['negate'])
|
|
241
264
|
@CoveredBy('CharacterSet')
|
|
242
265
|
@Node
|
|
243
266
|
*SpaceCharacterSet({ ctx }) {
|
|
244
|
-
yield i`eat(
|
|
267
|
+
yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
|
|
245
268
|
|
|
246
|
-
let code = yield i`eat(
|
|
269
|
+
let code = yield i`eat(<*Keyword /[sS]/ /> 'value')`;
|
|
247
270
|
|
|
248
271
|
yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'S')})`;
|
|
249
272
|
}
|
|
250
273
|
|
|
251
|
-
@
|
|
274
|
+
@UnboundAttributes(['negate'])
|
|
252
275
|
@CoveredBy('CharacterSet')
|
|
253
276
|
@Node
|
|
254
277
|
*WordCharacterSet({ ctx }) {
|
|
255
|
-
yield i`eat(
|
|
278
|
+
yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
|
|
256
279
|
|
|
257
|
-
let code = yield i`eat(
|
|
280
|
+
let code = yield i`eat(<*Keyword /[wW]/ /> 'value')`;
|
|
258
281
|
|
|
259
282
|
yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'W')})`;
|
|
260
283
|
}
|
|
261
284
|
|
|
262
|
-
@
|
|
285
|
+
@UnboundAttributes(['min', 'max'])
|
|
263
286
|
@Node
|
|
264
287
|
*Quantifier({ ctx }) {
|
|
265
|
-
yield i`eat(<+Element /> 'element')`;
|
|
288
|
+
yield i`eat(<+Element /> 'element$')`;
|
|
266
289
|
|
|
267
290
|
let attrs, sigil;
|
|
268
291
|
|
|
269
|
-
if ((sigil = yield i`eatMatch(
|
|
292
|
+
if ((sigil = yield i`eatMatch(<*Keyword /[*+?]/ /> 'sigilToken')`)) {
|
|
270
293
|
switch (ctx.sourceTextFor(sigil)) {
|
|
271
294
|
case '*':
|
|
272
295
|
attrs = { min: 0, max: Infinity };
|
|
@@ -278,12 +301,12 @@ export const grammar = class RegexGrammar {
|
|
|
278
301
|
attrs = { min: 0, max: 1 };
|
|
279
302
|
break;
|
|
280
303
|
}
|
|
281
|
-
} else if (yield i`eat(
|
|
304
|
+
} else if (yield i`eat(<*Punctuator '{' balanced='}' /> 'openToken')`) {
|
|
282
305
|
let max;
|
|
283
|
-
let min = yield i`eat(<*UnsignedInteger /> 'min')`;
|
|
306
|
+
let min = yield i`eat(<*UnsignedInteger /> 'min$')`;
|
|
284
307
|
|
|
285
|
-
if (yield i`eatMatch(
|
|
286
|
-
max = yield i`eatMatch(<*UnsignedInteger /> 'max')`;
|
|
308
|
+
if (yield i`eatMatch(<*Punctuator ',' /> 'separator')`) {
|
|
309
|
+
max = yield i`eatMatch(<*UnsignedInteger /> 'max$')`;
|
|
287
310
|
}
|
|
288
311
|
|
|
289
312
|
min = min && ctx.sourceTextFor(min);
|
|
@@ -294,7 +317,7 @@ export const grammar = class RegexGrammar {
|
|
|
294
317
|
|
|
295
318
|
attrs = { min, max };
|
|
296
319
|
|
|
297
|
-
yield i`eat(
|
|
320
|
+
yield i`eat(<*Punctuator '}' balancer /> 'closeToken')`;
|
|
298
321
|
}
|
|
299
322
|
|
|
300
323
|
yield i`bindAttribute('min' ${attrs.min ? buildNumber(attrs.min) : buildNullTag()})`;
|
|
@@ -306,51 +329,42 @@ export const grammar = class RegexGrammar {
|
|
|
306
329
|
yield i`eat(/\d+/)`;
|
|
307
330
|
}
|
|
308
331
|
|
|
309
|
-
@Attributes(['cooked'])
|
|
310
332
|
@Node
|
|
311
333
|
*EscapeSequence({ state, ctx, value: props }) {
|
|
312
334
|
const parentSpan = state.span;
|
|
313
335
|
|
|
314
|
-
yield i`eat(
|
|
336
|
+
yield i`eat(<*Punctuator '\\' openSpan='Escape' /> 'escape')`;
|
|
315
337
|
|
|
316
|
-
let match
|
|
338
|
+
let match;
|
|
317
339
|
|
|
318
340
|
if ((match = yield i`match(/[\\/nrt0]/)`)) {
|
|
319
341
|
const match_ = ctx.sourceTextFor(match);
|
|
320
|
-
yield i`eat(
|
|
321
|
-
cooked = escapables.get(match_) || match_;
|
|
342
|
+
yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
|
|
322
343
|
} else if (
|
|
323
344
|
(match = yield i`match(${getSpecialPattern(parentSpan, ctx.reifyExpression(props))})`)
|
|
324
345
|
) {
|
|
325
346
|
const match_ = ctx.sourceTextFor(match);
|
|
326
|
-
yield i`eat(
|
|
327
|
-
cooked = ctx.sourceTextFor(match);
|
|
347
|
+
yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
|
|
328
348
|
} else if (yield i`match(/[ux]/)`) {
|
|
329
|
-
|
|
330
|
-
cooked = parseInt(
|
|
331
|
-
codeNode.properties.digits.map((digit) => ctx.sourceTextFor(digit)).join(''),
|
|
332
|
-
16,
|
|
333
|
-
);
|
|
349
|
+
yield i`eat(<EscapeCode closeSpan='Escape' /> 'code')`;
|
|
334
350
|
} else {
|
|
335
351
|
yield i`fail()`;
|
|
336
352
|
}
|
|
337
|
-
|
|
338
|
-
yield i`bindAttribute(cooked ${buildString(cooked)})`;
|
|
339
353
|
}
|
|
340
354
|
|
|
341
355
|
@Node
|
|
342
356
|
*EscapeCode() {
|
|
343
|
-
if (yield i`eatMatch(
|
|
344
|
-
if (yield i`eatMatch(
|
|
345
|
-
yield i`eatMatch(
|
|
346
|
-
yield i`eat(
|
|
357
|
+
if (yield i`eatMatch(<*Keyword 'u' /> 'type')`) {
|
|
358
|
+
if (yield i`eatMatch(<*Punctuator '{' /> 'openToken')`) {
|
|
359
|
+
yield i`eatMatch(<*UnsignedInteger /> 'value$')`;
|
|
360
|
+
yield i`eat(<*Punctuator '}' /> 'closeToken')`;
|
|
347
361
|
} else {
|
|
348
|
-
yield i`eat(
|
|
362
|
+
yield i`eat(<*UnsignedInteger /\d{4}/ /> 'value$')`;
|
|
349
363
|
yield i`eat(null 'closeToken')`;
|
|
350
364
|
}
|
|
351
|
-
} else if (yield i`eatMatch(
|
|
365
|
+
} else if (yield i`eatMatch(<*Keyword 'x' /> 'type')`) {
|
|
352
366
|
yield i`eat(null 'openToken')`;
|
|
353
|
-
yield i`eat(
|
|
367
|
+
yield i`eat(<*UnsignedInteger /\d{2}/ /> 'value$')`;
|
|
354
368
|
yield i`eat(null 'closeToken')`;
|
|
355
369
|
}
|
|
356
370
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bablr/language-en-regex-vm-pattern",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "A BABLR language for nonbacktracking JS-style regexes",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=12.0.0"
|
|
@@ -22,24 +22,24 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@babel/runtime": "^7.23.2",
|
|
25
|
-
"@bablr/helpers": "0.
|
|
26
|
-
"@bablr/agast-helpers": "0.
|
|
27
|
-
"@bablr/agast-vm-helpers": "0.
|
|
25
|
+
"@bablr/helpers": "^0.20.0",
|
|
26
|
+
"@bablr/agast-helpers": "^0.5.0",
|
|
27
|
+
"@bablr/agast-vm-helpers": "^0.5.0",
|
|
28
28
|
"iter-tools-es": "^7.5.3"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
|
-
"@bablr/boot": "0.
|
|
31
|
+
"@bablr/boot": "^0.6.0",
|
|
32
32
|
"@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#49f5952efed27f94ee9b94340eb1563c440bf64e",
|
|
33
|
-
"@bablr/macrome": "0.1.3",
|
|
34
|
-
"@bablr/macrome-generator-bablr": "0.3.2",
|
|
33
|
+
"@bablr/macrome": "^0.1.3",
|
|
34
|
+
"@bablr/macrome-generator-bablr": "^0.3.2",
|
|
35
35
|
"@qnighy/dedent": "0.1.1",
|
|
36
|
-
"bablr": "^0.
|
|
36
|
+
"bablr": "^0.6.0",
|
|
37
37
|
"enhanced-resolve": "^5.12.0",
|
|
38
38
|
"eslint": "^8.47.0",
|
|
39
39
|
"eslint-import-resolver-enhanced-resolve": "^1.0.5",
|
|
40
40
|
"eslint-plugin-import": "^2.27.5",
|
|
41
41
|
"expect": "^29.6.2",
|
|
42
|
-
"mocha": "10.4.0",
|
|
42
|
+
"mocha": "^10.4.0",
|
|
43
43
|
"prettier": "^2.0.5"
|
|
44
44
|
},
|
|
45
45
|
"keywords": [
|