@bablr/language-en-regex-vm-pattern 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/grammar.js +1871 -1668
  2. package/lib/grammar.macro.js +118 -104
  3. package/package.json +9 -9
@@ -1,8 +1,14 @@
1
1
  import { i, re } from '@bablr/boot/shorthand.macro';
2
- import { Node, CoveredBy, InjectFrom, Attributes, AllowEmpty } from '@bablr/helpers/decorators';
2
+ import {
3
+ Node,
4
+ CoveredBy,
5
+ InjectFrom,
6
+ UnboundAttributes,
7
+ AllowEmpty,
8
+ } from '@bablr/helpers/decorators';
3
9
  import objectEntries from 'iter-tools-es/methods/object-entries';
4
10
  import * as Shared from '@bablr/helpers/productions';
5
- import { buildString, buildBoolean, buildNumber, buildNull } from '@bablr/agast-vm-helpers';
11
+ import { buildString, buildBoolean, buildNumber, buildNullTag } from '@bablr/agast-vm-helpers';
6
12
 
7
13
  export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
8
14
 
@@ -17,6 +23,25 @@ const escapables = new Map(
17
23
  }),
18
24
  );
19
25
 
26
+ export const getCooked = (escapeNode, span, ctx) => {
27
+ let cooked;
28
+ const codeNode = escapeNode.get('code');
29
+ const type = ctx.sourceTextFor(codeNode.get('typeToken'));
30
+ const value = ctx.sourceTextFor(codeNode.get('value'));
31
+
32
+ if (!type) {
33
+ const match_ = ctx.sourceTextFor(codeNode);
34
+
35
+ cooked = escapables.get(match_) || match_;
36
+ } else if (type === 'u' || type === 'x') {
37
+ cooked = parseInt(value, 16);
38
+ } else {
39
+ throw new Error();
40
+ }
41
+
42
+ return cooked.toString(10);
43
+ };
44
+
20
45
  const flagCharacters = {
21
46
  global: 'g',
22
47
  ignoreCase: 'i',
@@ -30,7 +55,7 @@ const unique = (flags) => flags.length === new Set(flags).size;
30
55
 
31
56
  const getSpecialPattern = (span) => {
32
57
  if (span === 'Pattern') {
33
- return re`/[*+{}[\]().^$|\n\\]/`;
58
+ return re`/[*+{}[\]().^$|\n\\<>]/`;
34
59
  } else if (span === 'CharacterClass') {
35
60
  return re`/[\]\\]/`;
36
61
  } else {
@@ -41,13 +66,13 @@ const getSpecialPattern = (span) => {
41
66
  export const grammar = class RegexGrammar {
42
67
  @Node
43
68
  *Pattern() {
44
- yield i`eat(<~*Punctuator '/' balanced='/' balancedSpan='Pattern'> 'openToken')`;
45
- yield i`eat(<Alternatives>)`;
46
- yield i`eat(<~*Punctuator '/' balancer> 'closeToken')`;
47
- yield i`eat(<Flags> 'flags')`;
69
+ yield i`eat(<*Punctuator '/' balanced='/' balancedSpan='Pattern' /> 'openToken')`;
70
+ yield i`eat(<Alternatives />)`;
71
+ yield i`eat(<*Punctuator '/' balancer /> 'closeToken')`;
72
+ yield i`eat(<Flags /> 'flags$')`;
48
73
  }
49
74
 
50
- @Attributes(Object.keys(flagCharacters))
75
+ @UnboundAttributes(Object.keys(flagCharacters))
51
76
  @AllowEmpty
52
77
  @Node
53
78
  *Flags({ ctx }) {
@@ -66,100 +91,98 @@ export const grammar = class RegexGrammar {
66
91
  }
67
92
 
68
93
  for (const flagChr of flagsStr) {
69
- yield i`eat(<*Keyword ${buildString(flagChr)}> 'tokens[]')`;
94
+ yield i`eat(<*Keyword ${buildString(flagChr)} /> 'tokens[]')`;
70
95
  }
71
96
  }
72
97
 
73
98
  @AllowEmpty
74
99
  *Alternatives() {
75
100
  do {
76
- yield i`eat(<Alternative> 'alternatives[]')`;
77
- } while (yield i`eatMatch(<~*Punctuator '|'> 'separators[]')`);
101
+ yield i`eat(<Alternative /> 'alternatives[]$')`;
102
+ } while (yield i`eatMatch(<*Punctuator '|' /> 'separators[]')`);
78
103
  }
79
104
 
80
105
  @AllowEmpty
81
106
  @Node
82
107
  *Alternative() {
83
- yield i`eat(<Elements>)`;
108
+ yield i`eat(<Elements /> 'elements[]$')`;
84
109
  }
85
110
 
86
111
  @AllowEmpty
87
112
  *Elements() {
88
- let matched = false;
113
+ yield i`eat([])`;
89
114
  while (yield i`match(/[^|]/)`) {
90
- matched = true;
91
- yield i`eat(<+Element> 'elements[]')`;
115
+ yield i`eat(<+Element />)`;
92
116
  }
93
- if (!matched) yield i`eat(null 'elements[]')`;
94
117
  }
95
118
 
96
119
  *Element() {
97
- yield i`guard(<~*Keyword /[*+?]/>)`;
98
-
99
- yield i`eat(<Any> null [
100
- <+CharacterClass '['>
101
- <+Group '(?:'>
102
- <+Assertion /[$^]|\\b/i>
103
- <+Gap '\\g'>
104
- <+CharacterSet /\.|\\[dswp]/i>
105
- <*+Character>
120
+ yield i`guard(<*Keyword /[*+?]/ />)`;
121
+
122
+ yield i`eat(<Any /> null [
123
+ <+CharacterClass '[' />
124
+ <+Group '(?:' />
125
+ <+Assertion /[$^]|\\b/i />
126
+ <+Gap '\\g' />
127
+ <+CharacterSet /\.|\\[dswp]/i />
128
+ <*+Character />
106
129
  ])`;
107
130
 
108
131
  if (yield i`match(/[*+?{]/)`) {
109
- return i`holdForMatch(<Quantifier>)`;
132
+ return i`holdForMatch(<Quantifier />)`;
110
133
  }
111
134
  }
112
135
 
113
136
  @CoveredBy('Element')
114
137
  @Node
115
138
  *Group() {
116
- yield i`eat(<~*Punctuator '(?:' balanced=')'> 'openToken')`;
117
- yield i`eat(<Alternatives>)`;
118
- yield i`eat(<~*Punctuator ')' balancer> 'closeToken')`;
139
+ yield i`eat(<*Punctuator '(?:' balanced=')' /> 'openToken')`;
140
+ yield i`eat(<Alternatives />)`;
141
+ yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
119
142
  }
120
143
 
121
144
  @Node
122
145
  *CapturingGroup() {
123
- yield i`eat(<~*Punctuator '(' balanced=')'> 'openToken')`;
124
- yield i`eat(<Alternatives>)`;
125
- yield i`eat(<~*Punctuator ')' balancer> 'closeToken')`;
146
+ yield i`eat(<*Punctuator '(' balanced=')' /> 'openToken')`;
147
+ yield i`eat(<Alternatives />)`;
148
+ yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
126
149
  }
127
150
 
128
151
  @CoveredBy('Element')
129
152
  *Assertion() {
130
- yield i`eat(<Any> null [
131
- <*StartOfInputAssertion '^'>
132
- <*EndOfInputAssertion '$'>
133
- <*@WordBoundaryAssertion /\\b/i>
153
+ yield i`eat(<Any /> null [
154
+ <*StartOfInputAssertion '^' />
155
+ <*EndOfInputAssertion '$' />
156
+ <*@WordBoundaryAssertion /\\b/i />
134
157
  ])`;
135
158
  }
136
159
 
137
160
  @CoveredBy('Assertion')
138
161
  @Node
139
162
  *StartOfInputAssertion() {
140
- yield i`eat(<~*Keyword '^'> 'sigilToken')`;
163
+ yield i`eat(<*Keyword '^' /> 'sigilToken')`;
141
164
  }
142
165
 
143
166
  @CoveredBy('Assertion')
144
167
  @Node
145
168
  *EndOfInputAssertion() {
146
- yield i`eatMatch(<~*Keyword '$'> 'sigilToken')`;
169
+ yield i`eatMatch(<*Keyword '$' /> 'sigilToken')`;
147
170
  }
148
171
 
149
- @Attributes(['negate'])
172
+ @UnboundAttributes(['negate'])
150
173
  @CoveredBy('Assertion')
151
174
  @Node
152
175
  *WordBoundaryAssertion({ ctx }) {
153
- yield i`eatMatch(<~*Punctuator '\\'> 'escapeToken')`;
154
- const m = yield i`eat(<~*Keyword /b/i> 'value')`;
176
+ yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
177
+ const m = yield i`eat(<*Keyword /b/i /> 'value')`;
155
178
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(m) === 'B')})`;
156
179
  }
157
180
 
158
181
  @CoveredBy('Assertion')
159
182
  @Node
160
183
  *Gap() {
161
- yield i`eatMatch(<~*Punctuator '\\'> 'escapeToken')`;
162
- yield i`eat(<~*Keyword 'g'> 'value')`;
184
+ yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
185
+ yield i`eat(<*Keyword 'g' /> 'value')`;
163
186
  }
164
187
 
165
188
  @CoveredBy('Element')
@@ -167,106 +190,106 @@ export const grammar = class RegexGrammar {
167
190
  @Node
168
191
  *Character() {
169
192
  if (yield i`match('\\')`) {
170
- yield i`eat(<@EscapeSequence> null)`;
193
+ yield i`eat(<@EscapeSequence /> null)`;
171
194
  } else {
172
195
  yield i`eat(/[^\r\n\t]/)`;
173
196
  }
174
197
  }
175
198
 
176
- @Attributes(['negate'])
199
+ @UnboundAttributes(['negate'])
177
200
  @CoveredBy('Element')
178
201
  @Node
179
202
  *CharacterClass() {
180
- yield i`eat(<~*Punctuator '[' balancedSpan='CharacterClass' balanced=']'> 'openToken')`;
203
+ yield i`eat(<*Punctuator '[' balancedSpan='CharacterClass' balanced=']' /> 'openToken')`;
181
204
 
182
- let neg = yield i`eatMatch(<~*Keyword '^'> 'negateToken')`;
205
+ let neg = yield i`eatMatch(<*Keyword '^' /> 'negateToken')`;
183
206
 
184
207
  yield i`bindAttribute('negate' ${buildBoolean(neg)})`;
185
208
 
186
209
  while (yield i`match(/./s)`) {
187
- yield i`eat(<+CharacterClassElement> 'elements[]')`;
210
+ yield i`eat(<+CharacterClassElement /> 'elements[]$')`;
188
211
  }
189
212
 
190
- yield i`eat(<~*Punctuator ']' balancer> 'closeToken')`;
213
+ yield i`eat(<*Punctuator ']' balancer /> 'closeToken')`;
191
214
  }
192
215
 
193
216
  *CharacterClassElement() {
194
- yield i`eat(<Any> null [
195
- <CharacterSet /\\[dswp]/i>
196
- <Gap '\\g'>
197
- <*+Character>
217
+ yield i`eat(<Any /> null [
218
+ <CharacterSet /\\[dswp]/i />
219
+ <Gap '\\g' />
220
+ <*+Character />
198
221
  ])`;
199
222
 
200
223
  if (yield i`match('-')`) {
201
- return i`holdForMatch(<+CharacterClassRange>)`;
224
+ return i`holdForMatch(<+CharacterClassRange />)`;
202
225
  }
203
226
  }
204
227
 
205
228
  @CoveredBy('CharacterClassElement')
206
229
  @Node
207
230
  *CharacterClassRange() {
208
- yield i`eat(<*+Character> 'min')`;
209
- yield i`eat(<~*Punctuator '-'> 'sigilToken')`;
210
- yield i`eat(<*+Character> 'max')`;
231
+ yield i`eat(<*+Character /> 'min$')`;
232
+ yield i`eat(<*Punctuator '-' /> 'sigilToken')`;
233
+ yield i`eat(<*+Character /> 'max$')`;
211
234
  }
212
235
 
213
236
  @CoveredBy('Element')
214
237
  *CharacterSet() {
215
- yield i`eat(<Any> null [
216
- <+AnyCharacterSet '.'>
217
- <+DigitCharacterSet /\\[dD]/>
218
- <+SpaceCharacterSet /\\[sS]/>
219
- <+WordCharacterSet /\\[wW]/>
238
+ yield i`eat(<Any /> null [
239
+ <+AnyCharacterSet '.' />
240
+ <+DigitCharacterSet /\\[dD]/ />
241
+ <+SpaceCharacterSet /\\[sS]/ />
242
+ <+WordCharacterSet /\\[wW]/ />
220
243
  ])`;
221
244
  }
222
245
 
223
246
  @CoveredBy('CharacterSet')
224
247
  @Node
225
248
  *AnyCharacterSet() {
226
- yield i`eat(<~*Keyword '.'> 'sigilToken')`;
249
+ yield i`eat(<*Keyword '.' /> 'sigilToken')`;
227
250
  }
228
251
 
229
- @Attributes(['negate'])
252
+ @UnboundAttributes(['negate'])
230
253
  @CoveredBy('CharacterSet')
231
254
  @Node
232
255
  *DigitCharacterSet({ ctx }) {
233
- yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
256
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
234
257
 
235
- let code = yield i`eat(<~*Keyword /[dD]/> 'value')`;
258
+ let code = yield i`eat(<*Keyword /[dD]/ /> 'value')`;
236
259
 
237
260
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'D')})`;
238
261
  }
239
262
 
240
- @Attributes(['negate'])
263
+ @UnboundAttributes(['negate'])
241
264
  @CoveredBy('CharacterSet')
242
265
  @Node
243
266
  *SpaceCharacterSet({ ctx }) {
244
- yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
267
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
245
268
 
246
- let code = yield i`eat(<~*Keyword /[sS]/> 'value')`;
269
+ let code = yield i`eat(<*Keyword /[sS]/ /> 'value')`;
247
270
 
248
271
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'S')})`;
249
272
  }
250
273
 
251
- @Attributes(['negate'])
274
+ @UnboundAttributes(['negate'])
252
275
  @CoveredBy('CharacterSet')
253
276
  @Node
254
277
  *WordCharacterSet({ ctx }) {
255
- yield i`eat(<~*Punctuator '\\'> 'escapeToken')`;
278
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
256
279
 
257
- let code = yield i`eat(<~*Keyword /[wW]/> 'value')`;
280
+ let code = yield i`eat(<*Keyword /[wW]/ /> 'value')`;
258
281
 
259
282
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'W')})`;
260
283
  }
261
284
 
262
- @Attributes(['min', 'max'])
285
+ @UnboundAttributes(['min', 'max'])
263
286
  @Node
264
287
  *Quantifier({ ctx }) {
265
- yield i`eat(<+Element> 'element')`;
288
+ yield i`eat(<+Element /> 'element$')`;
266
289
 
267
290
  let attrs, sigil;
268
291
 
269
- if ((sigil = yield i`eatMatch(<~*Keyword /[*+?]/> 'sigilToken')`)) {
292
+ if ((sigil = yield i`eatMatch(<*Keyword /[*+?]/ /> 'sigilToken')`)) {
270
293
  switch (ctx.sourceTextFor(sigil)) {
271
294
  case '*':
272
295
  attrs = { min: 0, max: Infinity };
@@ -278,12 +301,12 @@ export const grammar = class RegexGrammar {
278
301
  attrs = { min: 0, max: 1 };
279
302
  break;
280
303
  }
281
- } else if (yield i`eat(<~*Punctuator '{' balanced='}'> 'openToken')`) {
304
+ } else if (yield i`eat(<*Punctuator '{' balanced='}' /> 'openToken')`) {
282
305
  let max;
283
- let min = yield i`eat(<*UnsignedInteger> 'min')`;
306
+ let min = yield i`eat(<*UnsignedInteger /> 'min$')`;
284
307
 
285
- if (yield i`eatMatch(<~*Punctuator ','> 'separator')`) {
286
- max = yield i`eatMatch(<*UnsignedInteger> 'max')`;
308
+ if (yield i`eatMatch(<*Punctuator ',' /> 'separator')`) {
309
+ max = yield i`eatMatch(<*UnsignedInteger /> 'max$')`;
287
310
  }
288
311
 
289
312
  min = min && ctx.sourceTextFor(min);
@@ -294,11 +317,11 @@ export const grammar = class RegexGrammar {
294
317
 
295
318
  attrs = { min, max };
296
319
 
297
- yield i`eat(<~*Punctuator '}' balancer> 'closeToken')`;
320
+ yield i`eat(<*Punctuator '}' balancer /> 'closeToken')`;
298
321
  }
299
322
 
300
- yield i`bindAttribute('min' ${attrs.min ? buildNumber(attrs.min) : buildNull()})`;
301
- yield i`bindAttribute('max' ${attrs.max ? buildNumber(attrs.max) : buildNull()})`;
323
+ yield i`bindAttribute('min' ${attrs.min ? buildNumber(attrs.min) : buildNullTag()})`;
324
+ yield i`bindAttribute('max' ${attrs.max ? buildNumber(attrs.max) : buildNullTag()})`;
302
325
  }
303
326
 
304
327
  @Node
@@ -306,57 +329,48 @@ export const grammar = class RegexGrammar {
306
329
  yield i`eat(/\d+/)`;
307
330
  }
308
331
 
309
- @Attributes(['cooked'])
310
332
  @Node
311
333
  *EscapeSequence({ state, ctx, value: props }) {
312
334
  const parentSpan = state.span;
313
335
 
314
- yield i`eat(<~*Punctuator '\\' openSpan='Escape'> 'escape')`;
336
+ yield i`eat(<*Punctuator '\\' openSpan='Escape' /> 'escape')`;
315
337
 
316
- let match, cooked;
338
+ let match;
317
339
 
318
340
  if ((match = yield i`match(/[\\/nrt0]/)`)) {
319
341
  const match_ = ctx.sourceTextFor(match);
320
- yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape'> 'value')`;
321
- cooked = escapables.get(match_) || match_;
342
+ yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
322
343
  } else if (
323
344
  (match = yield i`match(${getSpecialPattern(parentSpan, ctx.reifyExpression(props))})`)
324
345
  ) {
325
346
  const match_ = ctx.sourceTextFor(match);
326
- yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape'> 'value')`;
327
- cooked = ctx.sourceTextFor(match);
347
+ yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
328
348
  } else if (yield i`match(/[ux]/)`) {
329
- const codeNode = yield i`eat(<EscapeCode closeSpan='Escape'> 'value')`;
330
- cooked = parseInt(
331
- codeNode.properties.digits.map((digit) => ctx.sourceTextFor(digit)).join(''),
332
- 16,
333
- );
349
+ yield i`eat(<EscapeCode closeSpan='Escape' /> 'code')`;
334
350
  } else {
335
351
  yield i`fail()`;
336
352
  }
337
-
338
- yield i`bindAttribute(cooked ${buildString(cooked)})`;
339
353
  }
340
354
 
341
355
  @Node
342
356
  *EscapeCode() {
343
- if (yield i`eatMatch(<~*Keyword 'u'> 'type')`) {
344
- if (yield i`eatMatch(<~*Punctuator '{'> 'openToken')`) {
345
- yield i`eatMatch(<Digits> 'value')`;
346
- yield i`eat(<~*Punctuator '}'> 'closeToken')`;
357
+ if (yield i`eatMatch(<*Keyword 'u' /> 'type')`) {
358
+ if (yield i`eatMatch(<*Punctuator '{' /> 'openToken')`) {
359
+ yield i`eatMatch(<*UnsignedInteger /> 'value$')`;
360
+ yield i`eat(<*Punctuator '}' /> 'closeToken')`;
347
361
  } else {
348
- yield i`eat(<Digits /\d{4}/> 'value')`;
362
+ yield i`eat(<*UnsignedInteger /\d{4}/ /> 'value$')`;
349
363
  yield i`eat(null 'closeToken')`;
350
364
  }
351
- } else if (yield i`eatMatch(<~*Keyword 'x'> 'type')`) {
365
+ } else if (yield i`eatMatch(<*Keyword 'x' /> 'type')`) {
352
366
  yield i`eat(null 'openToken')`;
353
- yield i`eat(<Digits /\d{2}/> 'value')`;
367
+ yield i`eat(<*UnsignedInteger /\d{2}/ /> 'value$')`;
354
368
  yield i`eat(null 'closeToken')`;
355
369
  }
356
370
  }
357
371
 
358
372
  *Digits() {
359
- while (yield i`eatMatch(<*Digit>)`);
373
+ while (yield i`eatMatch(<*Digit />)`);
360
374
  }
361
375
 
362
376
  @Node
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bablr/language-en-regex-vm-pattern",
3
- "version": "0.5.1",
3
+ "version": "0.7.0",
4
4
  "description": "A BABLR language for nonbacktracking JS-style regexes",
5
5
  "engines": {
6
6
  "node": ">=12.0.0"
@@ -22,24 +22,24 @@
22
22
  },
23
23
  "dependencies": {
24
24
  "@babel/runtime": "^7.23.2",
25
- "@bablr/helpers": "0.18.1",
26
- "@bablr/agast-helpers": "0.3.2",
27
- "@bablr/agast-vm-helpers": "0.3.2",
25
+ "@bablr/helpers": "^0.20.0",
26
+ "@bablr/agast-helpers": "^0.5.0",
27
+ "@bablr/agast-vm-helpers": "^0.5.0",
28
28
  "iter-tools-es": "^7.5.3"
29
29
  },
30
30
  "devDependencies": {
31
- "@bablr/boot": "0.4.0",
31
+ "@bablr/boot": "^0.6.0",
32
32
  "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#49f5952efed27f94ee9b94340eb1563c440bf64e",
33
- "@bablr/macrome": "0.1.3",
34
- "@bablr/macrome-generator-bablr": "0.3.1",
33
+ "@bablr/macrome": "^0.1.3",
34
+ "@bablr/macrome-generator-bablr": "^0.3.2",
35
35
  "@qnighy/dedent": "0.1.1",
36
- "bablr": "0.4.1",
36
+ "bablr": "^0.6.0",
37
37
  "enhanced-resolve": "^5.12.0",
38
38
  "eslint": "^8.47.0",
39
39
  "eslint-import-resolver-enhanced-resolve": "^1.0.5",
40
40
  "eslint-plugin-import": "^2.27.5",
41
41
  "expect": "^29.6.2",
42
- "mocha": "10.4.0",
42
+ "mocha": "^10.4.0",
43
43
  "prettier": "^2.0.5"
44
44
  },
45
45
  "keywords": [