@bablr/language-en-regex-vm-pattern 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ import { i, re } from '@bablr/boot/shorthand.macro';
2
+ import { Node, CoveredBy, InjectFrom, Attributes, AllowEmpty } from '@bablr/helpers/decorators';
3
+ import objectEntries from 'iter-tools-es/methods/object-entries';
4
+ import * as Shared from '@bablr/helpers/productions';
5
+ import { buildString, buildBoolean, buildNumber, buildNull } from '@bablr/agast-vm-helpers';
6
+
7
+ export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
8
+
9
+ export const dependencies = {};
10
+
11
+ const escapables = new Map(
12
+ objectEntries({
13
+ n: '\n',
14
+ r: '\r',
15
+ t: '\t',
16
+ 0: '\0',
17
+ }),
18
+ );
19
+
20
+ const flagCharacters = {
21
+ global: 'g',
22
+ ignoreCase: 'i',
23
+ multiline: 'm',
24
+ dotAll: 's',
25
+ unicode: 'u',
26
+ sticky: 'y',
27
+ };
28
+
29
+ const unique = (flags) => flags.length === new Set(flags).size;
30
+
31
+ const getSpecialPattern = (span) => {
32
+ if (span === 'Pattern') {
33
+ return re`/[*+{}[\]().^$|\n\\]/`;
34
+ } else if (span === 'CharacterClass') {
35
+ return re`/[\]\\]/`;
36
+ } else {
37
+ throw new Error('unknown span type for special pattern');
38
+ }
39
+ };
40
+
41
+ export const grammar = class RegexGrammar {
42
+ @Node
43
+ *Pattern() {
44
+ yield i`eat(<~*Punctuator '/' balanced='/' balancedSpan='Pattern'> 'openToken')`;
45
+ yield i`eat(<Alternatives>)`;
46
+ yield i`eat(<~*Punctuator '/' balancer> 'closeToken')`;
47
+ yield i`eat(<Flags> 'flags')`;
48
+ }
49
+
50
+ @Attributes(Object.keys(flagCharacters))
51
+ @AllowEmpty
52
+ @Node
53
+ *Flags({ ctx }) {
54
+ const flags = yield i`match(/[gimsuy]+/)`;
55
+
56
+ const flagsStr = ctx.sourceTextFor(flags) || '';
57
+
58
+ if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
59
+
60
+ for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
61
+ if (flagsStr.includes(chr)) {
62
+ yield i`bindAttribute(${buildString(name)} true)`;
63
+ } else {
64
+ yield i`bindAttribute(${buildString(name)} false)`;
65
+ }
66
+ }
67
+
68
+ for (const flagChr of flagsStr) {
69
+ yield i`eat(<*Keyword ${buildString(flagChr)}> 'tokens[]')`;
70
+ }
71
+ }
72
+
73
+ @AllowEmpty
74
+ *Alternatives() {
75
+ do {
76
+ yield i`eat(<Alternative> 'alternatives[]')`;
77
+ } while (yield i`eatMatch(<~*Punctuator '|'> 'separators[]')`);
78
+ }
79
+
80
+ @AllowEmpty
81
+ @Node
82
+ *Alternative() {
83
+ yield i`eat(<Elements>)`;
84
+ }
85
+
86
+ @AllowEmpty
87
+ *Elements() {
88
+ let matched = false;
89
+ while (yield i`match(/[^|]/)`) {
90
+ matched = true;
91
+ yield i`eat(<+Element> 'elements[]')`;
92
+ }
93
+ if (!matched) yield i`eat(null 'elements[]')`;
94
+ }
95
+
96
+ *Element() {
97
+ yield i`guard(<~*Keyword /[*+?]/>)`;
98
+
99
+ yield i`eat(<Any> null [
100
+ <+CharacterClass '['>
101
+ <+Group '(?:'>
102
+ <+Assertion /[$^]|\\b/i>
103
+ <+Gap '\\g'>
104
+ <+CharacterSet /\.|\\[dswp]/i>
105
+ <*+Character>
106
+ ])`;
107
+
108
+ if (yield i`match(/[*+?{]/)`) {
109
+ return i`holdForMatch(<Quantifier>)`;
110
+ }
111
+ }
112
+
113
+ @CoveredBy('Element')
114
+ @Node
115
+ *Group() {
116
+ yield i`eat(<~*Punctuator '(?:' balanced=')'> 'openToken')`;
117
+ yield i`eat(<Alternatives>)`;
118
+ yield i`eat(<~*Punctuator ')' balancer> 'closeToken')`;
119
+ }
120
+
121
+ @Node
122
+ *CapturingGroup() {
123
+ yield i`eat(<~*Punctuator '(' balanced=')'> 'openToken')`;
124
+ yield i`eat(<Alternatives>)`;
125
+ yield i`eat(<~*Punctuator ')' balancer> 'closeToken')`;
126
+ }
127
+
128
+ @CoveredBy('Element')
129
+ *Assertion() {
130
+ yield i`eat(<Any> null [
131
+ <*StartOfInputAssertion '^'>
132
+ <*EndOfInputAssertion '$'>
133
+ <*@WordBoundaryAssertion /\\b/i>
134
+ ])`;
135
+ }
136
+
137
+ @CoveredBy('Assertion')
138
+ @Node
139
+ *StartOfInputAssertion() {
140
+ yield i`eat(<~*Keyword '^'> 'sigilToken')`;
141
+ }
142
+
143
+ @CoveredBy('Assertion')
144
+ @Node
145
+ *EndOfInputAssertion() {
146
+ yield i`eatMatch(<~*Keyword '$'> 'sigilToken')`;
147
+ }
148
+
149
+ @Attributes(['negate'])
150
+ @CoveredBy('Assertion')
151
+ @Node
152
+ *WordBoundaryAssertion({ ctx }) {
153
+ yield i`eatMatch(<~*Punctuator '\\'> 'escapeToken')`;
154
+ const m = yield i`eat(<~*Keyword /b/i> 'value')`;
155
+ yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(m) === 'B')})`;
156
+ }
157
+
158
+ @CoveredBy('Assertion')
159
+ @Node
160
+ *Gap() {
161
+ yield i`eatMatch(<~*Punctuator '\\'> 'escapeToken')`;
162
+ yield i`eat(<~*Keyword 'g'> 'value')`;
163
+ }
164
+
165
+ @CoveredBy('Element')
166
+ @CoveredBy('CharacterClassElement')
167
+ @Node
168
+ *Character() {
169
+ if (yield i`match('\\')`) {
170
+ yield i`eat(<@EscapeSequence> null)`;
171
+ } else {
172
+ yield i`eat(/[^\r\n\t]/)`;
173
+ }
174
+ }
175
+
176
+ @Attributes(['negate'])
177
+ @CoveredBy('Element')
178
+ @Node
179
+ *CharacterClass() {
180
+ yield i`eat(<~*Punctuator '[' balancedSpan='CharacterClass' balanced=']'> 'openToken')`;
181
+
182
+ let neg = yield i`eatMatch(<~*Keyword '^'> 'negateToken')`;
183
+
184
+ yield i`bindAttribute('negate' ${buildBoolean(neg)})`;
185
+
186
+ while (yield i`match(/./s)`) {
187
+ yield i`eat(<+CharacterClassElement> 'elements[]')`;
188
+ }
189
+
190
+ yield i`eat(<~*Punctuator ']' balancer> 'closeToken')`;
191
+ }
192
+
193
+ *CharacterClassElement() {
194
+ yield i`eat(<Any> null [
195
+ <CharacterSet /\\[dswp]/i>
196
+ <Gap '\\g'>
197
+ <*+Character>
198
+ ])`;
199
+
200
+ if (yield i`match('-')`) {
201
+ return i`holdForMatch(<+CharacterClassRange>)`;
202
+ }
203
+ }
204
+
205
+ @CoveredBy('CharacterClassElement')
206
+ @Node
207
+ *CharacterClassRange() {
208
+ yield i`eat(<*+Character> 'min')`;
209
+ yield i`eat(<~*Punctuator '-'> 'sigilToken')`;
210
+ yield i`eat(<*+Character> 'max')`;
211
+ }
212
+
213
+ @CoveredBy('Element')
214
+ *CharacterSet() {
215
+ yield i`eat(<Any> null [
216
+ <+AnyCharacterSet '.'>
217
+ <+DigitCharacterSet /\\[dD]/>
218
+ <+SpaceCharacterSet /\\[sS]/>
219
+ <+WordCharacterSet /\\[wW]/>
220
+ ])`;
221
+ }
222
+
223
+ @CoveredBy('CharacterSet')
224
+ @Node
225
+ *AnyCharacterSet() {
226
+ yield i`eat(<~*Keyword '.'> 'sigilToken')`;
227
+ }
228
+
229
+ @Attributes(['negate'])
230
+ @CoveredBy('CharacterSet')
231
+ @Node
232
+ *DigitCharacterSet({ ctx }) {
233
+ yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
234
+
235
+ let code = yield i`eat(<~*Keyword /[dD]/> 'value')`;
236
+
237
+ yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'D')})`;
238
+ }
239
+
240
+ @Attributes(['negate'])
241
+ @CoveredBy('CharacterSet')
242
+ @Node
243
+ *SpaceCharacterSet({ ctx }) {
244
+ yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
245
+
246
+ let code = yield i`eat(<~*Keyword /[sS]/> 'value')`;
247
+
248
+ yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'S')})`;
249
+ }
250
+
251
+ @Attributes(['negate'])
252
+ @CoveredBy('CharacterSet')
253
+ @Node
254
+ *WordCharacterSet({ ctx }) {
255
+ yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
256
+
257
+ let code = yield i`eat(<~*Keyword /[wW]/> 'value')`;
258
+
259
+ yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'W')})`;
260
+ }
261
+
262
+ @Attributes(['min', 'max'])
263
+ @Node
264
+ *Quantifier({ ctx }) {
265
+ yield i`eat(<+Element> 'element')`;
266
+
267
+ let attrs, sigil;
268
+
269
+ if ((sigil = yield i`eatMatch(<~*Keyword /[*+?]/> 'sigilToken')`)) {
270
+ switch (ctx.sourceTextFor(sigil)) {
271
+ case '*':
272
+ attrs = { min: 0, max: Infinity };
273
+ break;
274
+ case '+':
275
+ attrs = { min: 1, max: Infinity };
276
+ break;
277
+ case '?':
278
+ attrs = { min: 0, max: 1 };
279
+ break;
280
+ }
281
+ } else if (yield i`eat(<~*Punctuator '{' balanced='}'> 'openToken')`) {
282
+ let max;
283
+ let min = yield i`eat(<*UnsignedInteger> 'min')`;
284
+
285
+ if (yield i`eatMatch(<~*Punctuator ','> 'separator')`) {
286
+ max = yield i`eatMatch(<*UnsignedInteger> 'max')`;
287
+ }
288
+
289
+ min = min && ctx.sourceTextFor(min);
290
+ max = max && ctx.sourceTextFor(max);
291
+
292
+ min = min && parseInt(min, 10);
293
+ max = max && parseInt(max, 10);
294
+
295
+ attrs = { min, max };
296
+
297
+ yield i`eat(<~*Punctuator '}' balancer> 'closeToken')`;
298
+ }
299
+
300
+ yield i`bindAttribute('min' ${attrs.min ? buildNumber(attrs.min) : buildNull()})`;
301
+ yield i`bindAttribute('max' ${attrs.max ? buildNumber(attrs.max) : buildNull()})`;
302
+ }
303
+
304
+ @Node
305
+ *UnsignedInteger() {
306
+ yield i`eat(/\d+/)`;
307
+ }
308
+
309
+ @Attributes(['cooked'])
310
+ @Node
311
+ *EscapeSequence({ state, ctx, value: props }) {
312
+ const parentSpan = state.span;
313
+
314
+ yield i`eat(<~*Punctuator '\\' openSpan='Escape'> 'escape')`;
315
+
316
+ let match, cooked;
317
+
318
+ if ((match = yield i`match(/[\\/nrt0]/)`)) {
319
+ const match_ = ctx.sourceTextFor(match);
320
+ yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape'> 'value')`;
321
+ cooked = escapables.get(match_) || match_;
322
+ } else if (
323
+ (match = yield i`match(${getSpecialPattern(parentSpan, ctx.reifyExpression(props))})`)
324
+ ) {
325
+ const match_ = ctx.sourceTextFor(match);
326
+ yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape'> 'value')`;
327
+ cooked = ctx.sourceTextFor(match);
328
+ } else if (yield i`match(/[ux]/)`) {
329
+ const codeNode = yield i`eat(<EscapeCode closeSpan='Escape'> 'value')`;
330
+ cooked = parseInt(
331
+ ctx
332
+ .getProperty(codeNode, 'digits')
333
+ .map((digit) => ctx.sourceTextFor(digit))
334
+ .join(''),
335
+ 16,
336
+ );
337
+ } else {
338
+ yield i`fail()`;
339
+ }
340
+
341
+ yield i`bindAttribute(cooked ${buildString(cooked)})`;
342
+ }
343
+
344
+ @Node
345
+ *EscapeCode() {
346
+ if (yield i`eatMatch(<~*Keyword 'u'> 'type')`) {
347
+ if (yield i`eatMatch(<~*Punctuator '{'> 'openToken')`) {
348
+ yield i`eatMatch(<Digits> 'value')`;
349
+ yield i`eat(<~*Punctuator '}'> 'closeToken')`;
350
+ } else {
351
+ yield i`eat(<Digits /\d{4}/> 'value')`;
352
+ yield i`eat(null 'closeToken')`;
353
+ }
354
+ } else if (yield i`eatMatch(<~*Keyword 'x'> 'type')`) {
355
+ yield i`eat(null 'openToken')`;
356
+ yield i`eat(<Digits /\d{2}/> 'value')`;
357
+ yield i`eat(null 'closeToken')`;
358
+ }
359
+ }
360
+
361
+ *Digits() {
362
+ while (yield i`eatMatch(<*Digit>)`);
363
+ }
364
+
365
+ @Node
366
+ *Digit() {
367
+ yield i`eat(/\d/)`;
368
+ }
369
+
370
+ @InjectFrom(Shared)
371
+ *Any() {}
372
+
373
+ @Node
374
+ @InjectFrom(Shared)
375
+ *Keyword() {}
376
+
377
+ @Node
378
+ @InjectFrom(Shared)
379
+ *Punctuator() {}
380
+ };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@bablr/language-en-regex-vm-pattern",
3
+ "version": "0.4.0",
4
+ "description": "A BABLR language for nonbacktracking JS-style regexes",
5
+ "engines": {
6
+ "node": ">=12.0.0"
7
+ },
8
+ "type": "module",
9
+ "exports": {
10
+ ".": "./lib/grammar.js",
11
+ "./package.json": "./package.json"
12
+ },
13
+ "files": [
14
+ "lib/**/*.js"
15
+ ],
16
+ "sideEffects": false,
17
+ "scripts": {
18
+ "build": "macrome build",
19
+ "watch": "macrome watch",
20
+ "clean": "macrome clean",
21
+ "test": "mocha"
22
+ },
23
+ "dependencies": {
24
+ "@babel/runtime": "^7.23.2",
25
+ "@bablr/helpers": "0.17.0",
26
+ "@bablr/agast-vm-helpers": "0.2.0",
27
+ "iter-tools-es": "^7.5.3"
28
+ },
29
+ "devDependencies": {
30
+ "@bablr/agast-helpers": "0.2.0",
31
+ "@bablr/boot": "0.3.0",
32
+ "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#49f5952efed27f94ee9b94340eb1563c440bf64e",
33
+ "@bablr/macrome": "0.1.3",
34
+ "@bablr/macrome-generator-bablr": "0.3.1",
35
+ "@qnighy/dedent": "0.1.1",
36
+ "bablr": "0.3.0",
37
+ "enhanced-resolve": "^5.12.0",
38
+ "eslint": "^8.47.0",
39
+ "eslint-import-resolver-enhanced-resolve": "^1.0.5",
40
+ "eslint-plugin-import": "^2.27.5",
41
+ "expect": "^29.6.2",
42
+ "mocha": "10.4.0",
43
+ "prettier": "^2.0.5"
44
+ },
45
+ "keywords": [
46
+ "bablr-language",
47
+ "grammar",
48
+ "english",
49
+ "regex"
50
+ ],
51
+ "repository": "git@github.com:bablr-lang/language-en-regex-vm-pattern.git",
52
+ "homepage": "https://github.com/bablr-lang/language-en-regex-vm-pattern",
53
+ "author": "Conrad Buck <conartist6@gmail.com>",
54
+ "license": "MIT"
55
+ }