@bablr/language-en-regex-vm-pattern 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/grammar.js +10225 -2584
  2. package/lib/grammar.macro.js +87 -73
  3. package/package.json +11 -18
@@ -1,8 +1,14 @@
1
1
  import { i, re } from '@bablr/boot/shorthand.macro';
2
- import { Node, CoveredBy, InjectFrom, Attributes, AllowEmpty } from '@bablr/helpers/decorators';
2
+ import {
3
+ Node,
4
+ CoveredBy,
5
+ InjectFrom,
6
+ UnboundAttributes,
7
+ AllowEmpty,
8
+ } from '@bablr/helpers/decorators';
3
9
  import objectEntries from 'iter-tools-es/methods/object-entries';
4
10
  import * as Shared from '@bablr/helpers/productions';
5
- import { buildString, buildBoolean, buildNumber, buildNullTag } from '@bablr/agast-vm-helpers';
11
+ import { buildString, buildBoolean, buildNumber, buildNullTag } from '@bablr/helpers/builders';
6
12
 
7
13
  export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
8
14
 
@@ -17,6 +23,25 @@ const escapables = new Map(
17
23
  }),
18
24
  );
19
25
 
26
+ export const getCooked = (escapeNode, span, ctx) => {
27
+ let cooked;
28
+ const codeNode = escapeNode.get('code');
29
+ const type = ctx.sourceTextFor(codeNode.get('typeToken'));
30
+ const value = ctx.sourceTextFor(codeNode.get('value'));
31
+
32
+ if (!type) {
33
+ const match_ = ctx.sourceTextFor(codeNode);
34
+
35
+ cooked = escapables.get(match_) || match_;
36
+ } else if (type === 'u' || type === 'x') {
37
+ cooked = parseInt(value, 16);
38
+ } else {
39
+ throw new Error();
40
+ }
41
+
42
+ return cooked.toString(10);
43
+ };
44
+
20
45
  const flagCharacters = {
21
46
  global: 'g',
22
47
  ignoreCase: 'i',
@@ -41,13 +66,13 @@ const getSpecialPattern = (span) => {
41
66
  export const grammar = class RegexGrammar {
42
67
  @Node
43
68
  *Pattern() {
44
- yield i`eat(<~*Punctuator '/' balanced='/' balancedSpan='Pattern' /> 'openToken')`;
69
+ yield i`eat(<*Punctuator '/' balanced='/' balancedSpan='Pattern' /> 'openToken')`;
45
70
  yield i`eat(<Alternatives />)`;
46
- yield i`eat(<~*Punctuator '/' balancer /> 'closeToken')`;
47
- yield i`eat(<Flags /> 'flags')`;
71
+ yield i`eat(<*Punctuator '/' balancer /> 'closeToken')`;
72
+ yield i`eat(<Flags /> 'flags$')`;
48
73
  }
49
74
 
50
- @Attributes(Object.keys(flagCharacters))
75
+ @UnboundAttributes(Object.keys(flagCharacters))
51
76
  @AllowEmpty
52
77
  @Node
53
78
  *Flags({ ctx }) {
@@ -73,28 +98,26 @@ export const grammar = class RegexGrammar {
73
98
  @AllowEmpty
74
99
  *Alternatives() {
75
100
  do {
76
- yield i`eat(<Alternative /> 'alternatives[]')`;
77
- } while (yield i`eatMatch(<~*Punctuator '|' /> 'separators[]')`);
101
+ yield i`eat(<Alternative /> 'alternatives[]$')`;
102
+ } while (yield i`eatMatch(<*Punctuator '|' /> 'separators[]')`);
78
103
  }
79
104
 
80
105
  @AllowEmpty
81
106
  @Node
82
107
  *Alternative() {
83
- yield i`eat(<Elements />)`;
108
+ yield i`eat(<Elements /> 'elements[]$')`;
84
109
  }
85
110
 
86
111
  @AllowEmpty
87
112
  *Elements() {
88
- let matched = false;
113
+ yield i`eat([])`;
89
114
  while (yield i`match(/[^|]/)`) {
90
- matched = true;
91
- yield i`eat(<+Element /> 'elements[]')`;
115
+ yield i`eat(<+Element />)`;
92
116
  }
93
- if (!matched) yield i`eat(null 'elements[]')`;
94
117
  }
95
118
 
96
119
  *Element() {
97
- yield i`guard(<~*Keyword /[*+?]/ />)`;
120
+ yield i`guard(<*Keyword /[*+?]/ />)`;
98
121
 
99
122
  yield i`eat(<Any /> null [
100
123
  <+CharacterClass '[' />
@@ -113,16 +136,16 @@ export const grammar = class RegexGrammar {
113
136
  @CoveredBy('Element')
114
137
  @Node
115
138
  *Group() {
116
- yield i`eat(<~*Punctuator '(?:' balanced=')' /> 'openToken')`;
139
+ yield i`eat(<*Punctuator '(?:' balanced=')' /> 'openToken')`;
117
140
  yield i`eat(<Alternatives />)`;
118
- yield i`eat(<~*Punctuator ')' balancer /> 'closeToken')`;
141
+ yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
119
142
  }
120
143
 
121
144
  @Node
122
145
  *CapturingGroup() {
123
- yield i`eat(<~*Punctuator '(' balanced=')' /> 'openToken')`;
146
+ yield i`eat(<*Punctuator '(' balanced=')' /> 'openToken')`;
124
147
  yield i`eat(<Alternatives />)`;
125
- yield i`eat(<~*Punctuator ')' balancer /> 'closeToken')`;
148
+ yield i`eat(<*Punctuator ')' balancer /> 'closeToken')`;
126
149
  }
127
150
 
128
151
  @CoveredBy('Element')
@@ -137,29 +160,29 @@ export const grammar = class RegexGrammar {
137
160
  @CoveredBy('Assertion')
138
161
  @Node
139
162
  *StartOfInputAssertion() {
140
- yield i`eat(<~*Keyword '^' /> 'sigilToken')`;
163
+ yield i`eat(<*Keyword '^' /> 'sigilToken')`;
141
164
  }
142
165
 
143
166
  @CoveredBy('Assertion')
144
167
  @Node
145
168
  *EndOfInputAssertion() {
146
- yield i`eatMatch(<~*Keyword '$' /> 'sigilToken')`;
169
+ yield i`eatMatch(<*Keyword '$' /> 'sigilToken')`;
147
170
  }
148
171
 
149
- @Attributes(['negate'])
172
+ @UnboundAttributes(['negate'])
150
173
  @CoveredBy('Assertion')
151
174
  @Node
152
175
  *WordBoundaryAssertion({ ctx }) {
153
- yield i`eatMatch(<~*Punctuator '\\' /> 'escapeToken')`;
154
- const m = yield i`eat(<~*Keyword /b/i /> 'value')`;
176
+ yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
177
+ const m = yield i`eat(<*Keyword /b/i /> 'value')`;
155
178
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(m) === 'B')})`;
156
179
  }
157
180
 
158
181
  @CoveredBy('Assertion')
159
182
  @Node
160
183
  *Gap() {
161
- yield i`eatMatch(<~*Punctuator '\\' /> 'escapeToken')`;
162
- yield i`eat(<~*Keyword 'g' /> 'value')`;
184
+ yield i`eatMatch(<*Punctuator '\\' /> 'escapeToken')`;
185
+ yield i`eat(<*Keyword 'g' /> 'value')`;
163
186
  }
164
187
 
165
188
  @CoveredBy('Element')
@@ -173,21 +196,21 @@ export const grammar = class RegexGrammar {
173
196
  }
174
197
  }
175
198
 
176
- @Attributes(['negate'])
199
+ @UnboundAttributes(['negate'])
177
200
  @CoveredBy('Element')
178
201
  @Node
179
202
  *CharacterClass() {
180
- yield i`eat(<~*Punctuator '[' balancedSpan='CharacterClass' balanced=']' /> 'openToken')`;
203
+ yield i`eat(<*Punctuator '[' balancedSpan='CharacterClass' balanced=']' /> 'openToken')`;
181
204
 
182
- let neg = yield i`eatMatch(<~*Keyword '^' /> 'negateToken')`;
205
+ let neg = yield i`eatMatch(<*Keyword '^' /> 'negateToken')`;
183
206
 
184
207
  yield i`bindAttribute('negate' ${buildBoolean(neg)})`;
185
208
 
186
209
  while (yield i`match(/./s)`) {
187
- yield i`eat(<+CharacterClassElement /> 'elements[]')`;
210
+ yield i`eat(<+CharacterClassElement /> 'elements[]$')`;
188
211
  }
189
212
 
190
- yield i`eat(<~*Punctuator ']' balancer /> 'closeToken')`;
213
+ yield i`eat(<*Punctuator ']' balancer /> 'closeToken')`;
191
214
  }
192
215
 
193
216
  *CharacterClassElement() {
@@ -205,15 +228,15 @@ export const grammar = class RegexGrammar {
205
228
  @CoveredBy('CharacterClassElement')
206
229
  @Node
207
230
  *CharacterClassRange() {
208
- yield i`eat(<*+Character /> 'min')`;
209
- yield i`eat(<~*Punctuator '-' /> 'sigilToken')`;
210
- yield i`eat(<*+Character /> 'max')`;
231
+ yield i`eat(<*+Character /> 'min$')`;
232
+ yield i`eat(<*Punctuator '-' /> 'sigilToken')`;
233
+ yield i`eat(<*+Character /> 'max$')`;
211
234
  }
212
235
 
213
236
  @CoveredBy('Element')
214
237
  *CharacterSet() {
215
238
  yield i`eat(<Any /> null [
216
- <+AnyCharacterSet '.' />
239
+ <+AnyCharacterSet '.' />
217
240
  <+DigitCharacterSet /\\[dD]/ />
218
241
  <+SpaceCharacterSet /\\[sS]/ />
219
242
  <+WordCharacterSet /\\[wW]/ />
@@ -223,50 +246,50 @@ export const grammar = class RegexGrammar {
223
246
  @CoveredBy('CharacterSet')
224
247
  @Node
225
248
  *AnyCharacterSet() {
226
- yield i`eat(<~*Keyword '.' /> 'sigilToken')`;
249
+ yield i`eat(<*Keyword '.' /> 'sigilToken')`;
227
250
  }
228
251
 
229
- @Attributes(['negate'])
252
+ @UnboundAttributes(['negate'])
230
253
  @CoveredBy('CharacterSet')
231
254
  @Node
232
255
  *DigitCharacterSet({ ctx }) {
233
- yield i`eat(<~*Punctuator '\\' /> 'escapeToken')`;
256
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
234
257
 
235
- let code = yield i`eat(<~*Keyword /[dD]/ /> 'value')`;
258
+ let code = yield i`eat(<*Keyword /[dD]/ /> 'value')`;
236
259
 
237
260
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'D')})`;
238
261
  }
239
262
 
240
- @Attributes(['negate'])
263
+ @UnboundAttributes(['negate'])
241
264
  @CoveredBy('CharacterSet')
242
265
  @Node
243
266
  *SpaceCharacterSet({ ctx }) {
244
- yield i`eat(<~*Punctuator '\\' /> 'escapeToken')`;
267
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
245
268
 
246
- let code = yield i`eat(<~*Keyword /[sS]/ /> 'value')`;
269
+ let code = yield i`eat(<*Keyword /[sS]/ /> 'value')`;
247
270
 
248
271
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'S')})`;
249
272
  }
250
273
 
251
- @Attributes(['negate'])
274
+ @UnboundAttributes(['negate'])
252
275
  @CoveredBy('CharacterSet')
253
276
  @Node
254
277
  *WordCharacterSet({ ctx }) {
255
- yield i`eat(<~*Punctuator '\\' /> 'escapeToken')`;
278
+ yield i`eat(<*Punctuator '\\' /> 'escapeToken')`;
256
279
 
257
- let code = yield i`eat(<~*Keyword /[wW]/ /> 'value')`;
280
+ let code = yield i`eat(<*Keyword /[wW]/ /> 'value')`;
258
281
 
259
282
  yield i`bindAttribute('negate' ${buildBoolean(ctx.sourceTextFor(code) === 'W')})`;
260
283
  }
261
284
 
262
- @Attributes(['min', 'max'])
285
+ @UnboundAttributes(['min', 'max'])
263
286
  @Node
264
287
  *Quantifier({ ctx }) {
265
- yield i`eat(<+Element /> 'element')`;
288
+ yield i`eat(<+Element /> 'element$')`;
266
289
 
267
290
  let attrs, sigil;
268
291
 
269
- if ((sigil = yield i`eatMatch(<~*Keyword /[*+?]/ /> 'sigilToken')`)) {
292
+ if ((sigil = yield i`eatMatch(<*Keyword /[*+?]/ /> 'sigilToken')`)) {
270
293
  switch (ctx.sourceTextFor(sigil)) {
271
294
  case '*':
272
295
  attrs = { min: 0, max: Infinity };
@@ -278,12 +301,12 @@ export const grammar = class RegexGrammar {
278
301
  attrs = { min: 0, max: 1 };
279
302
  break;
280
303
  }
281
- } else if (yield i`eat(<~*Punctuator '{' balanced='}' /> 'openToken')`) {
304
+ } else if (yield i`eat(<*Punctuator '{' balanced='}' /> 'openToken')`) {
282
305
  let max;
283
- let min = yield i`eat(<*UnsignedInteger /> 'min')`;
306
+ let min = yield i`eat(<*UnsignedInteger /> 'min$')`;
284
307
 
285
- if (yield i`eatMatch(<~*Punctuator ',' /> 'separator')`) {
286
- max = yield i`eatMatch(<*UnsignedInteger /> 'max')`;
308
+ if (yield i`eatMatch(<*Punctuator ',' /> 'separator')`) {
309
+ max = yield i`eatMatch(<*UnsignedInteger /> 'max$')`;
287
310
  }
288
311
 
289
312
  min = min && ctx.sourceTextFor(min);
@@ -294,7 +317,7 @@ export const grammar = class RegexGrammar {
294
317
 
295
318
  attrs = { min, max };
296
319
 
297
- yield i`eat(<~*Punctuator '}' balancer /> 'closeToken')`;
320
+ yield i`eat(<*Punctuator '}' balancer /> 'closeToken')`;
298
321
  }
299
322
 
300
323
  yield i`bindAttribute('min' ${attrs.min ? buildNumber(attrs.min) : buildNullTag()})`;
@@ -306,51 +329,42 @@ export const grammar = class RegexGrammar {
306
329
  yield i`eat(/\d+/)`;
307
330
  }
308
331
 
309
- @Attributes(['cooked'])
310
332
  @Node
311
333
  *EscapeSequence({ state, ctx, value: props }) {
312
334
  const parentSpan = state.span;
313
335
 
314
- yield i`eat(<~*Punctuator '\\' openSpan='Escape' /> 'escape')`;
336
+ yield i`eat(<*Punctuator '\\' openSpan='Escape' /> 'escape')`;
315
337
 
316
- let match, cooked;
338
+ let match;
317
339
 
318
340
  if ((match = yield i`match(/[\\/nrt0]/)`)) {
319
341
  const match_ = ctx.sourceTextFor(match);
320
- yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape' /> 'value')`;
321
- cooked = escapables.get(match_) || match_;
342
+ yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
322
343
  } else if (
323
344
  (match = yield i`match(${getSpecialPattern(parentSpan, ctx.reifyExpression(props))})`)
324
345
  ) {
325
346
  const match_ = ctx.sourceTextFor(match);
326
- yield i`eat(<~*Keyword ${buildString(match_)} closeSpan='Escape' /> 'value')`;
327
- cooked = ctx.sourceTextFor(match);
347
+ yield i`eat(<*Keyword ${buildString(match_)} closeSpan='Escape' /> 'code')`;
328
348
  } else if (yield i`match(/[ux]/)`) {
329
- const codeNode = yield i`eat(<EscapeCode closeSpan='Escape' /> 'value')`;
330
- cooked = parseInt(
331
- codeNode.properties.digits.map((digit) => ctx.sourceTextFor(digit)).join(''),
332
- 16,
333
- );
349
+ yield i`eat(<EscapeCode closeSpan='Escape' /> 'code')`;
334
350
  } else {
335
351
  yield i`fail()`;
336
352
  }
337
-
338
- yield i`bindAttribute(cooked ${buildString(cooked)})`;
339
353
  }
340
354
 
341
355
  @Node
342
356
  *EscapeCode() {
343
- if (yield i`eatMatch(<~*Keyword 'u' /> 'type')`) {
344
- if (yield i`eatMatch(<~*Punctuator '{' /> 'openToken')`) {
345
- yield i`eatMatch(<Digits /> 'value')`;
346
- yield i`eat(<~*Punctuator '}' /> 'closeToken')`;
357
+ if (yield i`eatMatch(<*Keyword 'u' /> 'type')`) {
358
+ if (yield i`eatMatch(<*Punctuator '{' /> 'openToken')`) {
359
+ yield i`eatMatch(<*UnsignedInteger /> 'value$')`;
360
+ yield i`eat(<*Punctuator '}' /> 'closeToken')`;
347
361
  } else {
348
- yield i`eat(<Digits /\d{4}/ /> 'value')`;
362
+ yield i`eat(<*UnsignedInteger /\d{4}/ /> 'value$')`;
349
363
  yield i`eat(null 'closeToken')`;
350
364
  }
351
- } else if (yield i`eatMatch(<~*Keyword 'x' /> 'type')`) {
365
+ } else if (yield i`eatMatch(<*Keyword 'x' /> 'type')`) {
352
366
  yield i`eat(null 'openToken')`;
353
- yield i`eat(<Digits /\d{2}/ /> 'value')`;
367
+ yield i`eat(<*UnsignedInteger /\d{2}/ /> 'value$')`;
354
368
  yield i`eat(null 'closeToken')`;
355
369
  }
356
370
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bablr/language-en-regex-vm-pattern",
3
- "version": "0.6.0",
3
+ "version": "0.7.1",
4
4
  "description": "A BABLR language for nonbacktracking JS-style regexes",
5
5
  "engines": {
6
6
  "node": ">=12.0.0"
@@ -10,9 +10,7 @@
10
10
  ".": "./lib/grammar.js",
11
11
  "./package.json": "./package.json"
12
12
  },
13
- "files": [
14
- "lib/**/*.js"
15
- ],
13
+ "files": ["lib/**/*.js"],
16
14
  "sideEffects": false,
17
15
  "scripts": {
18
16
  "build": "macrome build",
@@ -22,32 +20,27 @@
22
20
  },
23
21
  "dependencies": {
24
22
  "@babel/runtime": "^7.23.2",
25
- "@bablr/helpers": "0.19.0",
26
- "@bablr/agast-helpers": "0.4.0",
27
- "@bablr/agast-vm-helpers": "0.4.0",
23
+ "@bablr/helpers": "^0.20.0",
24
+ "@bablr/agast-helpers": "^0.5.0",
25
+ "@bablr/agast-vm-helpers": "^0.5.0",
28
26
  "iter-tools-es": "^7.5.3"
29
27
  },
30
28
  "devDependencies": {
31
- "@bablr/boot": "0.5.0",
29
+ "@bablr/boot": "^0.6.0",
32
30
  "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#49f5952efed27f94ee9b94340eb1563c440bf64e",
33
- "@bablr/macrome": "0.1.3",
34
- "@bablr/macrome-generator-bablr": "0.3.2",
31
+ "@bablr/macrome": "^0.1.3",
32
+ "@bablr/macrome-generator-bablr": "^0.3.2",
35
33
  "@qnighy/dedent": "0.1.1",
36
- "bablr": "^0.5.0",
34
+ "bablr": "^0.6.0",
37
35
  "enhanced-resolve": "^5.12.0",
38
36
  "eslint": "^8.47.0",
39
37
  "eslint-import-resolver-enhanced-resolve": "^1.0.5",
40
38
  "eslint-plugin-import": "^2.27.5",
41
39
  "expect": "^29.6.2",
42
- "mocha": "10.4.0",
40
+ "mocha": "^10.4.0",
43
41
  "prettier": "^2.0.5"
44
42
  },
45
- "keywords": [
46
- "bablr-language",
47
- "grammar",
48
- "english",
49
- "regex"
50
- ],
43
+ "keywords": ["bablr-language", "grammar", "english", "regex"],
51
44
  "repository": "git@github.com:bablr-lang/language-en-regex-vm-pattern.git",
52
45
  "homepage": "https://github.com/bablr-lang/language-en-regex-vm-pattern",
53
46
  "author": "Conrad Buck <conartist6@gmail.com>",