@bablr/language-en-regex-vm-pattern 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js CHANGED
@@ -1,36 +1,49 @@
1
- /* @macrome
2
- * @generatedby @bablr/macrome-generator-bablr
3
- * @generatedfrom ./grammar.macro.js#c3c390f3e54607dfde1d07c0e099e54294ab7f32
4
- * This file is autogenerated. Please do not edit it directly.
5
- * When editing run `npx macrome watch` then change the file this is generated from.
6
- */
7
- import _applyDecs from "@babel/runtime/helpers/applyDecs2305";
8
- let _initProto, _FlagsDecs, _GroupDecs, _AssertionDecs, _StartOfInputAssertionDecs, _EndOfInputAssertionDecs, _WordBoundaryAssertionDecs, _GapDecs, _CharacterDecs, _CharacterClassDecs, _CharacterClassRangeDecs, _CharacterSetDecs, _AnyCharacterSetDecs, _DigitCharacterSetDecs, _SpaceCharacterSetDecs, _WordCharacterSetDecs, _QuantifierDecs, _AnyDecs, _KeywordDecs, _PunctuatorDecs;
9
1
  import { re, spam as m } from '@bablr/boot';
10
- import { Node, CoveredBy, InjectFrom, UndefinedAttributes, AllowEmpty, Literal } from '@bablr/helpers/decorators';
11
2
  import objectEntries from 'iter-tools-es/methods/object-entries';
12
- import * as Shared from '@bablr/helpers/productions';
13
- import { eat, eatMatch, match, shiftMatch, guard, defineAttribute, fail, o } from '@bablr/helpers/grammar';
3
+ import {
4
+ eat,
5
+ eatMatch,
6
+ match,
7
+ shiftMatch,
8
+ guard,
9
+ defineAttribute,
10
+ fail,
11
+ r,
12
+ startSpan,
13
+ endSpan,
14
+ eatHeld,
15
+ } from '@bablr/helpers/grammar';
14
16
  import { buildString } from '@bablr/helpers/builders';
17
+ import { get } from '@bablr/agast-helpers/path';
18
+ import { printSource } from '@bablr/agast-helpers/tree';
19
+
15
20
  export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
21
+
16
22
  export const dependencies = {};
23
+
17
24
  export const defaultMatcher = m`<Pattern />`;
18
- const escapables = new Map(objectEntries({
19
- n: '\n',
20
- r: '\r',
21
- t: '\t',
22
- 0: '\0'
23
- }));
25
+
26
+ const escapables = new Map(
27
+ objectEntries({
28
+ n: '\n',
29
+ r: '\r',
30
+ t: '\t',
31
+ 0: '\0',
32
+ }),
33
+ );
34
+
24
35
  const flagCharacters = {
25
36
  global: 'g',
26
37
  ignoreCase: 'i',
27
38
  multiline: 'm',
28
39
  dotAll: 's',
29
40
  unicode: 'u',
30
- sticky: 'y'
41
+ sticky: 'y',
31
42
  };
32
- const unique = flags => flags.length === new Set(flags).size;
33
- const getSpecialPattern = span => {
43
+
44
+ const unique = (flags) => flags.length === new Set(flags).size;
45
+
46
+ const getSpecialPattern = (span) => {
34
47
  if (span === 'Pattern') {
35
48
  return re`/[*+?{}[\]().^$|\n\\<>]/`;
36
49
  } else if (span === 'CharacterClass') {
@@ -39,90 +52,127 @@ const getSpecialPattern = span => {
39
52
  throw new Error('unknown span type for special pattern');
40
53
  }
41
54
  };
55
+
42
56
  export const grammar = class RegexGrammar {
43
- static {
44
- [_initProto] = _applyDecs(this, [[Node, 2, "Pattern"], [_FlagsDecs, 2, "Flags"], [AllowEmpty, 2, "Alternatives"], [[AllowEmpty, Node], 2, "Alternative"], [AllowEmpty, 2, "Elements"], [_GroupDecs, 2, "Group"], [Node, 2, "CapturingGroup"], [_AssertionDecs, 2, "Assertion"], [_StartOfInputAssertionDecs, 2, "StartOfInputAssertion"], [_EndOfInputAssertionDecs, 2, "EndOfInputAssertion"], [_WordBoundaryAssertionDecs, 2, "WordBoundaryAssertion"], [_GapDecs, 2, "Gap"], [_CharacterDecs, 2, "Character"], [_CharacterClassDecs, 2, "CharacterClass"], [_CharacterClassRangeDecs, 2, "CharacterClassRange"], [_CharacterSetDecs, 2, "CharacterSet"], [_AnyCharacterSetDecs, 2, "AnyCharacterSet"], [_DigitCharacterSetDecs, 2, "DigitCharacterSet"], [_SpaceCharacterSetDecs, 2, "SpaceCharacterSet"], [_WordCharacterSetDecs, 2, "WordCharacterSet"], [_QuantifierDecs, 2, "Quantifier"], [Node, 2, "UnsignedInteger"], [Node, 2, "UnsignedHexInteger"], [Node, 2, "EscapeSequence"], [Node, 2, "EscapeCode"], [Node, 2, "Digit"], [_AnyDecs, 2, "Any"], [_KeywordDecs, 2, "Keyword"], [_PunctuatorDecs, 2, "Punctuator"]], []).e;
45
- }
46
57
  constructor() {
47
- _initProto(this);
48
- }
49
- *[(_FlagsDecs = [UndefinedAttributes(Object.keys(flagCharacters)), AllowEmpty, Node], _GroupDecs = [CoveredBy('Element'), Node], _AssertionDecs = CoveredBy('Element'), _StartOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _EndOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _WordBoundaryAssertionDecs = [UndefinedAttributes(['negate']), CoveredBy('Assertion'), Node], _GapDecs = [CoveredBy('Assertion'), Node], _CharacterDecs = [CoveredBy('Element'), CoveredBy('CharacterClassElement'), Node], _CharacterClassDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), Node], _CharacterClassRangeDecs = [CoveredBy('CharacterClassElement'), Node], _CharacterSetDecs = CoveredBy('Element'), _AnyCharacterSetDecs = [CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _DigitCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _SpaceCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _WordCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _QuantifierDecs = [UndefinedAttributes(['min', 'max']), Node], _AnyDecs = InjectFrom(Shared), _KeywordDecs = [Literal, Node, InjectFrom(Shared)], _PunctuatorDecs = [Literal, Node, InjectFrom(Shared)], "Pattern")]() {
50
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
58
+ this.literals = new Set(['Keyword']);
59
+ this.emptyables = new Set(['Alternatives', 'Alternative', 'Elements', 'Flags']);
60
+ this.attributes = new Map(
61
+ Object.entries({
62
+ Flags: Object.fromEntries(Object.keys(flagCharacters).map((key) => [key, undefined])),
63
+ WordBoundaryAssertion: { negate: undefined },
64
+ CharacterClass: { negate: undefined },
65
+ DigitCharacterSet: { negate: undefined },
66
+ SpaceCharacterSet: { negate: undefined },
67
+ WordCharacterSet: { negate: undefined },
68
+ Quantifier: { min: undefined, max: undefined },
69
+ }),
70
+ );
71
+ }
72
+
73
+ *Pattern() {
74
+ yield eat(m`openToken*: <* '/' />`);
75
+ yield startSpan('Pattern', '/');
51
76
  yield eat(m`<__Alternatives />`);
52
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
53
- yield eat(m`flags$: <Flags />`);
77
+ yield endSpan();
78
+ yield eat(m`closeToken*: <* '/' />`);
79
+ yield eat(m`flags: <Flags />`);
54
80
  }
55
- *Flags({
56
- ctx
57
- }) {
81
+
82
+ *Flags() {
58
83
  const flags = yield match(re`/[gimsuy]+/`);
59
- const flagsStr = ctx.sourceTextFor(flags) || '';
84
+
85
+ const flagsStr = printSource(flags) || '';
86
+
60
87
  if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
61
- for (const {
62
- 0: name,
63
- 1: chr
64
- } of Object.entries(flagCharacters)) {
88
+
89
+ for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
65
90
  if (flagsStr.includes(chr)) {
66
91
  yield defineAttribute(name, true);
67
92
  } else {
68
93
  yield defineAttribute(name, false);
69
94
  }
70
95
  }
96
+
71
97
  for (const flagChr of flagsStr) {
72
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
98
+ yield eat(m`tokens[]*: <*Keyword ${buildString(flagChr)} />`);
73
99
  }
74
100
  }
101
+
75
102
  *Alternatives() {
76
103
  do {
77
104
  yield eat(m`alternatives[]$: <Alternative />`);
78
- } while (yield eatMatch(m`#separatorTokens[]: <*Punctuator '|' />`));
105
+ } while (yield eatMatch(m`#separatorTokens: <* '|' />`));
79
106
  }
107
+
80
108
  *Alternative() {
81
109
  yield eat(m`elements[]+$: <__Elements />`);
82
110
  }
83
- *Elements() {
84
- yield eat(m`.[]: []`);
111
+
112
+ *Elements({ matcher }) {
85
113
  while (yield match(re`/[^|]/`)) {
86
- yield eat(m`.[]+: <_Element />`);
114
+ yield eat(m`${get('refMatcher', matcher)} <_Element />`);
87
115
  }
88
116
  }
117
+
89
118
  *Element() {
90
119
  yield guard(m`<*Keyword /[*+?]/ />`);
91
- yield eat(m`<__Any />`, [m`<CharacterClass '[' />`, m`<Group '(?:' />`, m`<__Assertion /[$^]|\\b/i />`, m`<Gap '\\g' />`, m`<__CharacterSet /\.|\\[dswp]/i />`, m`<*Character />`]);
92
- if (yield match(re`/[*+?{]/`)) {
93
- return shiftMatch(m`<Quantifier />`);
120
+
121
+ if (yield eatMatch(m`<CharacterClass '[' />`)) {
122
+ } else if (yield eatMatch(m`<Group '(?:' />`)) {
123
+ } else if (yield eatMatch(m`<_Assertion /[$^]|\\b/i />`)) {
124
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
125
+ } else if (yield eatMatch(m`<_CharacterSet /\.|\\[dswp]/i />`)) {
126
+ } else {
127
+ yield eat(m`<*Character />`);
94
128
  }
129
+
130
+ return r(shiftMatch(m`<Quantifier /[*+?{]/ />`));
95
131
  }
132
+
96
133
  *Group() {
97
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
134
+ yield eat(m`openToken*: <* '(?:' />`);
135
+ yield startSpan('Pattern', ')');
98
136
  yield eat(m`<__Alternatives />`);
99
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
137
+ yield endSpan();
138
+ yield eat(m`closeToken*: <* ')' />`);
100
139
  }
140
+
101
141
  *CapturingGroup() {
102
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
142
+ yield eat(m`openToken*: <* '(' />`);
143
+ yield startSpan('Pattern', ')');
103
144
  yield eat(m`<__Alternatives />`);
104
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
145
+ yield endSpan();
146
+ yield eat(m`closeToken*: <* ')' />`);
105
147
  }
148
+
106
149
  *Assertion() {
107
- yield eat(m`<__Any />`, [m`<StartOfInputAssertion '^' />`, m`<EndOfInputAssertion '$' />`, m`<WordBoundaryAssertion /\\b/i />`]);
150
+ if (yield eatMatch(m`<StartOfInputAssertion '^' />`)) {
151
+ } else if (yield eatMatch(m`<EndOfInputAssertion '$' />`)) {
152
+ } else {
153
+ yield eat(m`<WordBoundaryAssertion /\\b/i />`);
154
+ }
108
155
  }
156
+
109
157
  *StartOfInputAssertion() {
110
- yield eat(m`sigilToken: <*Keyword '^' />`);
158
+ yield eat(m`sigilToken*: <*Keyword '^' />`);
111
159
  }
160
+
112
161
  *EndOfInputAssertion() {
113
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
162
+ yield eatMatch(m`sigilToken*: <*Keyword '$' />`);
114
163
  }
115
- *WordBoundaryAssertion({
116
- ctx
117
- }) {
118
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
119
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
120
- yield defineAttribute('negate', ctx.sourceTextFor(m_) === 'B');
164
+
165
+ *WordBoundaryAssertion() {
166
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
167
+ const m_ = yield eat(m`value*: <*Keyword /b/i />`);
168
+ yield defineAttribute('negate', printSource(m_.node) === 'B');
121
169
  }
170
+
122
171
  *Gap() {
123
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
124
- yield eat(m`value: <*Keyword 'g' />`);
172
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
173
+ yield eat(m`value*: <*Keyword 'g' />`);
125
174
  }
175
+
126
176
  *Character() {
127
177
  if (yield match('\\')) {
128
178
  yield eat(m`@: <EscapeSequence />`);
@@ -130,154 +180,181 @@ export const grammar = class RegexGrammar {
130
180
  yield eat(re`/[^\r\n\t]/`);
131
181
  }
132
182
  }
183
+
133
184
  *CharacterClass() {
134
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
135
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({
136
- bind: true
137
- }));
185
+ yield eat(m`openToken*: <* '[' />`);
186
+ yield startSpan('CharacterClass', ']');
187
+
188
+ let negate = yield eatMatch(m`negateToken*: <*Keyword '^' />`);
189
+
138
190
  yield defineAttribute('negate', !!negate);
139
- while (yield match(re`/./s`)) {
191
+
192
+ while (yield match(re`/[^\]]/s`)) {
140
193
  yield eat(m`elements[]+$: <_CharacterClassElement />`);
141
194
  }
142
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
195
+
196
+ yield endSpan();
197
+ yield eat(m`closeToken*: <* ']' />`);
143
198
  }
199
+
144
200
  *CharacterClassElement() {
145
- yield eat(m`<__Any />`, [m`<__CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
201
+ if (yield eatMatch(m`<_CharacterSet /\\[dswp]/i />`)) {
202
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
203
+ } else {
204
+ yield eat(m`<*Character />`);
205
+ }
206
+
146
207
  if (yield match('-')) {
147
- return shiftMatch(m`<CharacterClassRange />`);
208
+ return r(shiftMatch(m`<CharacterClassRange />`));
148
209
  }
149
210
  }
211
+
150
212
  *CharacterClassRange() {
151
- yield eat(m`min+$: <*Character />`);
152
- yield eat(m`sigilToken: <*Punctuator '-' />`);
213
+ yield eatHeld(m`min+$: <*Character />`);
214
+ yield eat(m`sigilToken*: <* '-' />`);
153
215
  yield eat(m`max+$: <*Character />`);
154
216
  }
217
+
155
218
  *CharacterSet() {
156
- yield eat(m`<__Any />`, [m`<AnyCharacterSet '.' />`, m`<DigitCharacterSet /\\[dD]/ />`, m`<SpaceCharacterSet /\\[sS]/ />`, m`<WordCharacterSet /\\[wW]/ />`]);
219
+ if (yield eatMatch(m`<AnyCharacterSet '.' />`)) {
220
+ } else if (yield eatMatch(m`<DigitCharacterSet /\\[dD]/ />`)) {
221
+ } else if (yield eatMatch(m`<SpaceCharacterSet /\\[sS]/ />`)) {
222
+ } else {
223
+ yield eat(m`<WordCharacterSet /\\[wW]/ />`);
224
+ }
157
225
  }
226
+
158
227
  *AnyCharacterSet() {
159
- yield eat(m`sigilToken: <*Keyword '.' />`);
160
- }
161
- *DigitCharacterSet({
162
- ctx
163
- }) {
164
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
165
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
166
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
167
- }
168
- *SpaceCharacterSet({
169
- ctx
170
- }) {
171
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
172
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
173
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
174
- }
175
- *WordCharacterSet({
176
- ctx
177
- }) {
178
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
179
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
180
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
181
- }
182
- *Quantifier({
183
- ctx
184
- }) {
185
- yield eat(m`element+$: <_Element />`);
228
+ yield eat(m`sigilToken*: <*Keyword '.' />`);
229
+ }
230
+
231
+ *DigitCharacterSet() {
232
+ yield eat(m`escapeToken*: <* '\\' />`);
233
+
234
+ let code = yield eat(m`value*: <*Keyword /[dD]/ />`);
235
+
236
+ yield defineAttribute('negate', printSource(code.node) === 'D');
237
+ }
238
+
239
+ *SpaceCharacterSet() {
240
+ yield eat(m`escapeToken*: <* '\\' />`);
241
+
242
+ let code = yield eat(m`value*: <*Keyword /[sS]/ />`);
243
+
244
+ yield defineAttribute('negate', printSource(code.node) === 'S');
245
+ }
246
+
247
+ *WordCharacterSet() {
248
+ yield eat(m`escapeToken*: <* '\\' />`);
249
+
250
+ let code = yield eat(m`value*: <*Keyword /[wW]/ />`);
251
+
252
+ yield defineAttribute('negate', printSource(code.node) === 'W');
253
+ }
254
+
255
+ *Quantifier() {
256
+ yield eatHeld(m`element+$: <_Element />`);
257
+
186
258
  let attrs, sigil;
187
- if (sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`)) {
188
- switch (ctx.sourceTextFor(sigil)) {
259
+
260
+ if ((sigil = yield eatMatch(m`sigilToken*: <*Keyword /[*+?]/ />`))) {
261
+ switch (printSource(sigil.node)) {
189
262
  case '*':
190
- attrs = {
191
- min: 0,
192
- max: Infinity
193
- };
263
+ attrs = { min: 0, max: Infinity };
194
264
  break;
195
265
  case '+':
196
- attrs = {
197
- min: 1,
198
- max: Infinity
199
- };
266
+ attrs = { min: 1, max: Infinity };
200
267
  break;
201
268
  case '?':
202
- attrs = {
203
- min: 0,
204
- max: 1
205
- };
269
+ attrs = { min: 0, max: 1 };
206
270
  break;
207
271
  default:
208
272
  yield fail();
209
273
  }
210
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
274
+ } else if (yield eat(m`openToken*: <* '{' />`)) {
211
275
  let max;
212
276
  let min = yield eat(m`min$: <*UnsignedInteger />`);
213
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
277
+
278
+ if (yield eatMatch(m`separator$: <* ',' />`)) {
214
279
  max = yield eatMatch(m`max$: <*UnsignedInteger />`);
215
280
  }
216
- min = min && ctx.sourceTextFor(min);
217
- max = max && ctx.sourceTextFor(max);
281
+
282
+ min = min && printSource(min.node);
283
+ max = max && printSource(max.node);
284
+
218
285
  min = min && parseInt(min, 10);
219
286
  max = max && parseInt(max, 10);
220
- attrs = {
221
- min,
222
- max
223
- };
224
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
287
+
288
+ attrs = { min, max };
289
+
290
+ yield eat(m`closeToken*: <* '}' />`);
225
291
  }
292
+
226
293
  yield defineAttribute('min', attrs.min);
227
294
  yield defineAttribute('max', attrs.max);
228
295
  }
296
+
229
297
  *UnsignedInteger() {
230
298
  yield eat(re`/\d+/`);
231
299
  }
300
+
232
301
  *UnsignedHexInteger() {
233
302
  yield eat(re`/[\da-fA-F]+/`);
234
303
  }
235
- *EscapeSequence({
236
- state,
237
- ctx
238
- }) {
239
- const parentSpan = state.span;
240
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
304
+
305
+ *EscapeSequence({ s }) {
306
+ const parentSpan = s().span;
307
+
308
+ yield startSpan('Escape');
309
+
310
+ yield eat(m`escape*: <* '\\' />`);
311
+
241
312
  let m_;
242
313
  let cooked;
243
- if (m_ = yield match(re`/[\\/nrt0]/`)) {
244
- const match_ = ctx.sourceTextFor(m_);
245
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
314
+
315
+ if ((m_ = yield match(re`/[\\/nrt0]/`))) {
316
+ const match_ = printSource(m_);
317
+ yield eat(m`code*: <*Keyword ${buildString(match_)} />`);
318
+
246
319
  cooked = escapables.get(match_) || match_;
247
- } else if (m_ = yield match(getSpecialPattern(parentSpan))) {
248
- cooked = ctx.sourceTextFor(m_);
249
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
320
+ } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
321
+ cooked = printSource(m_);
322
+ yield eat(m`code*: <*Keyword ${buildString(cooked)} />`);
250
323
  } else if (yield match(re`/[ux]/`)) {
251
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
252
- let value = code.get('value');
253
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
324
+ let code = yield eat(m`code*: <EscapeCode />`);
325
+
326
+ let value = get('value', code.node);
327
+
328
+ cooked = String.fromCodePoint(parseInt(printSource(value), 16));
254
329
  } else {
255
330
  yield fail();
256
331
  }
332
+
333
+ yield endSpan();
334
+
257
335
  yield defineAttribute('cooked', cooked);
258
336
  }
337
+
259
338
  *EscapeCode() {
260
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
261
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
262
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
263
- yield eat(m`closeToken: <*Punctuator '}' />`);
339
+ if (yield eatMatch(m`type*: <*Keyword 'u' />`)) {
340
+ if (yield eatMatch(m`openToken*: <* '{' />`)) {
341
+ yield eatMatch(m`value: <*UnsignedHexInteger />`);
342
+ yield eat(m`closeToken*: <* '}' />`);
264
343
  } else {
265
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
266
- yield eat(m`closeToken: null`);
344
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
267
345
  }
268
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
269
- yield eat(m`openToken: null`);
270
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
271
- yield eat(m`closeToken: null`);
346
+ } else if (yield eatMatch(m`type*: <*Keyword 'x' />`)) {
347
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
272
348
  }
273
349
  }
350
+
274
351
  *Digits() {
275
352
  while (yield eatMatch(m`<*Digit />`));
276
353
  }
354
+
277
355
  *Digit() {
278
356
  yield eat(re`/\d/`);
279
357
  }
280
- *Any() {}
281
- *Keyword() {}
282
- *Punctuator() {}
283
- };
358
+ };
359
+
360
+ export default { canonicalURL, dependencies, grammar, defaultMatcher };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bablr/language-en-regex-vm-pattern",
3
- "version": "0.11.0",
3
+ "version": "0.12.1",
4
4
  "description": "A BABLR language for nonbacktracking JS-style regexes",
5
5
  "engines": {
6
6
  "node": ">=12.0.0"
@@ -15,25 +15,19 @@
15
15
  ],
16
16
  "sideEffects": false,
17
17
  "scripts": {
18
- "build": "macrome build",
19
- "watch": "macrome watch",
20
- "clean": "macrome clean",
21
18
  "test": "mocha"
22
19
  },
23
20
  "dependencies": {
24
- "@babel/runtime": "7.28.2",
25
- "@bablr/boot": "0.10.0",
26
- "@bablr/helpers": "0.24.0",
27
- "@bablr/agast-helpers": "0.9.0",
28
- "@bablr/agast-vm-helpers": "0.9.0",
21
+ "@bablr/boot": "0.11.0",
22
+ "@bablr/helpers": "0.25.0",
23
+ "@bablr/agast-helpers": "0.10.0",
24
+ "@bablr/agast-vm-helpers": "0.10.0",
29
25
  "iter-tools-es": "7.5.3"
30
26
  },
31
27
  "devDependencies": {
32
28
  "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#c97bfa4b3663f8378e9b3e42bb5a41e685406cf9",
33
- "@bablr/macrome": "^0.1.3",
34
- "@bablr/macrome-generator-bablr": "^0.3.2",
35
29
  "@qnighy/dedent": "0.1.1",
36
- "bablr": "^0.10.0",
30
+ "bablr": "^0.11.0",
37
31
  "enhanced-resolve": "^5.12.0",
38
32
  "eslint": "^8.47.0",
39
33
  "eslint-import-resolver-enhanced-resolve": "^1.0.5",
@@ -48,7 +42,10 @@
48
42
  "english",
49
43
  "regex"
50
44
  ],
51
- "repository": "git@github.com:bablr-lang/language-en-regex-vm-pattern.git",
45
+ "repository": {
46
+ "type": "git",
47
+ "url": "git+ssh://git@github.com/bablr-lang/language-en-regex-vm-pattern.git"
48
+ },
52
49
  "homepage": "https://github.com/bablr-lang/language-en-regex-vm-pattern",
53
50
  "author": "Conrad Buck <conartist6@gmail.com>",
54
51
  "license": "MIT"
@@ -1,405 +0,0 @@
1
- import { re, spam as m } from '@bablr/boot';
2
- import {
3
- Node,
4
- CoveredBy,
5
- InjectFrom,
6
- UndefinedAttributes,
7
- AllowEmpty,
8
- Literal,
9
- } from '@bablr/helpers/decorators';
10
- import objectEntries from 'iter-tools-es/methods/object-entries';
11
- import * as Shared from '@bablr/helpers/productions';
12
- import {
13
- eat,
14
- eatMatch,
15
- match,
16
- shiftMatch,
17
- guard,
18
- defineAttribute,
19
- fail,
20
- o,
21
- } from '@bablr/helpers/grammar';
22
- import { buildString } from '@bablr/helpers/builders';
23
-
24
- export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
25
-
26
- export const dependencies = {};
27
-
28
- export const defaultMatcher = m`<Pattern />`;
29
-
30
- const escapables = new Map(
31
- objectEntries({
32
- n: '\n',
33
- r: '\r',
34
- t: '\t',
35
- 0: '\0',
36
- }),
37
- );
38
-
39
- const flagCharacters = {
40
- global: 'g',
41
- ignoreCase: 'i',
42
- multiline: 'm',
43
- dotAll: 's',
44
- unicode: 'u',
45
- sticky: 'y',
46
- };
47
-
48
- const unique = (flags) => flags.length === new Set(flags).size;
49
-
50
- const getSpecialPattern = (span) => {
51
- if (span === 'Pattern') {
52
- return re`/[*+?{}[\]().^$|\n\\<>]/`;
53
- } else if (span === 'CharacterClass') {
54
- return re`/[\]\\]/`;
55
- } else {
56
- throw new Error('unknown span type for special pattern');
57
- }
58
- };
59
-
60
- export const grammar = class RegexGrammar {
61
- @Node
62
- *Pattern() {
63
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
64
- yield eat(m`<__Alternatives />`);
65
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
66
- yield eat(m`flags$: <Flags />`);
67
- }
68
-
69
- @UndefinedAttributes(Object.keys(flagCharacters))
70
- @AllowEmpty
71
- @Node
72
- *Flags({ ctx }) {
73
- const flags = yield match(re`/[gimsuy]+/`);
74
-
75
- const flagsStr = ctx.sourceTextFor(flags) || '';
76
-
77
- if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
78
-
79
- for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
80
- if (flagsStr.includes(chr)) {
81
- yield defineAttribute(name, true);
82
- } else {
83
- yield defineAttribute(name, false);
84
- }
85
- }
86
-
87
- for (const flagChr of flagsStr) {
88
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
89
- }
90
- }
91
-
92
- @AllowEmpty
93
- *Alternatives() {
94
- do {
95
- yield eat(m`alternatives[]$: <Alternative />`);
96
- } while (yield eatMatch(m`#separatorTokens[]: <*Punctuator '|' />`));
97
- }
98
-
99
- @AllowEmpty
100
- @Node
101
- *Alternative() {
102
- yield eat(m`elements[]+$: <__Elements />`);
103
- }
104
-
105
- @AllowEmpty
106
- *Elements() {
107
- yield eat(m`.[]: []`);
108
- while (yield match(re`/[^|]/`)) {
109
- yield eat(m`.[]+: <_Element />`);
110
- }
111
- }
112
-
113
- *Element() {
114
- yield guard(m`<*Keyword /[*+?]/ />`);
115
-
116
- yield eat(m`<__Any />`, [
117
- m`<CharacterClass '[' />`,
118
- m`<Group '(?:' />`,
119
- m`<__Assertion /[$^]|\\b/i />`,
120
- m`<Gap '\\g' />`,
121
- m`<__CharacterSet /\.|\\[dswp]/i />`,
122
- m`<*Character />`,
123
- ]);
124
-
125
- if (yield match(re`/[*+?{]/`)) {
126
- return shiftMatch(m`<Quantifier />`);
127
- }
128
- }
129
-
130
- @CoveredBy('Element')
131
- @Node
132
- *Group() {
133
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
134
- yield eat(m`<__Alternatives />`);
135
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
136
- }
137
-
138
- @Node
139
- *CapturingGroup() {
140
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
141
- yield eat(m`<__Alternatives />`);
142
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
143
- }
144
-
145
- @CoveredBy('Element')
146
- *Assertion() {
147
- yield eat(m`<__Any />`, [
148
- m`<StartOfInputAssertion '^' />`,
149
- m`<EndOfInputAssertion '$' />`,
150
- m`<WordBoundaryAssertion /\\b/i />`,
151
- ]);
152
- }
153
-
154
- @CoveredBy('Assertion')
155
- @Node
156
- *StartOfInputAssertion() {
157
- yield eat(m`sigilToken: <*Keyword '^' />`);
158
- }
159
-
160
- @CoveredBy('Assertion')
161
- @Node
162
- *EndOfInputAssertion() {
163
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
164
- }
165
-
166
- @UndefinedAttributes(['negate'])
167
- @CoveredBy('Assertion')
168
- @Node
169
- *WordBoundaryAssertion({ ctx }) {
170
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
171
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
172
- yield defineAttribute('negate', ctx.sourceTextFor(m_) === 'B');
173
- }
174
-
175
- @CoveredBy('Assertion')
176
- @Node
177
- *Gap() {
178
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
179
- yield eat(m`value: <*Keyword 'g' />`);
180
- }
181
-
182
- @CoveredBy('Element')
183
- @CoveredBy('CharacterClassElement')
184
- @Node
185
- *Character() {
186
- if (yield match('\\')) {
187
- yield eat(m`@: <EscapeSequence />`);
188
- } else {
189
- yield eat(re`/[^\r\n\t]/`);
190
- }
191
- }
192
-
193
- @UndefinedAttributes(['negate'])
194
- @CoveredBy('Element')
195
- @Node
196
- *CharacterClass() {
197
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
198
-
199
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({ bind: true }));
200
-
201
- yield defineAttribute('negate', !!negate);
202
-
203
- while (yield match(re`/./s`)) {
204
- yield eat(m`elements[]+$: <_CharacterClassElement />`);
205
- }
206
-
207
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
208
- }
209
-
210
- *CharacterClassElement() {
211
- yield eat(m`<__Any />`, [
212
- m`<__CharacterSet /\\[dswp]/i />`,
213
- m`<Gap '\\g' />`,
214
- m`<*Character />`,
215
- ]);
216
-
217
- if (yield match('-')) {
218
- return shiftMatch(m`<CharacterClassRange />`);
219
- }
220
- }
221
-
222
- @CoveredBy('CharacterClassElement')
223
- @Node
224
- *CharacterClassRange() {
225
- yield eat(m`min+$: <*Character />`);
226
- yield eat(m`sigilToken: <*Punctuator '-' />`);
227
- yield eat(m`max+$: <*Character />`);
228
- }
229
-
230
- @CoveredBy('Element')
231
- *CharacterSet() {
232
- yield eat(m`<__Any />`, [
233
- m`<AnyCharacterSet '.' />`,
234
- m`<DigitCharacterSet /\\[dD]/ />`,
235
- m`<SpaceCharacterSet /\\[sS]/ />`,
236
- m`<WordCharacterSet /\\[wW]/ />`,
237
- ]);
238
- }
239
-
240
- @CoveredBy('Element')
241
- @CoveredBy('CharacterSet')
242
- @Node
243
- *AnyCharacterSet() {
244
- yield eat(m`sigilToken: <*Keyword '.' />`);
245
- }
246
-
247
- @UndefinedAttributes(['negate'])
248
- @CoveredBy('Element')
249
- @CoveredBy('CharacterSet')
250
- @Node
251
- *DigitCharacterSet({ ctx }) {
252
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
253
-
254
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
255
-
256
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
257
- }
258
-
259
- @UndefinedAttributes(['negate'])
260
- @CoveredBy('Element')
261
- @CoveredBy('CharacterSet')
262
- @Node
263
- *SpaceCharacterSet({ ctx }) {
264
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
265
-
266
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
267
-
268
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
269
- }
270
-
271
- @UndefinedAttributes(['negate'])
272
- @CoveredBy('Element')
273
- @CoveredBy('CharacterSet')
274
- @Node
275
- *WordCharacterSet({ ctx }) {
276
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
277
-
278
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
279
-
280
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
281
- }
282
-
283
- @UndefinedAttributes(['min', 'max'])
284
- @Node
285
- *Quantifier({ ctx }) {
286
- yield eat(m`element+$: <_Element />`);
287
-
288
- let attrs, sigil;
289
-
290
- if ((sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`))) {
291
- switch (ctx.sourceTextFor(sigil)) {
292
- case '*':
293
- attrs = { min: 0, max: Infinity };
294
- break;
295
- case '+':
296
- attrs = { min: 1, max: Infinity };
297
- break;
298
- case '?':
299
- attrs = { min: 0, max: 1 };
300
- break;
301
- default:
302
- yield fail();
303
- }
304
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
305
- let max;
306
- let min = yield eat(m`min$: <*UnsignedInteger />`);
307
-
308
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
309
- max = yield eatMatch(m`max$: <*UnsignedInteger />`);
310
- }
311
-
312
- min = min && ctx.sourceTextFor(min);
313
- max = max && ctx.sourceTextFor(max);
314
-
315
- min = min && parseInt(min, 10);
316
- max = max && parseInt(max, 10);
317
-
318
- attrs = { min, max };
319
-
320
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
321
- }
322
-
323
- yield defineAttribute('min', attrs.min);
324
- yield defineAttribute('max', attrs.max);
325
- }
326
-
327
- @Node
328
- *UnsignedInteger() {
329
- yield eat(re`/\d+/`);
330
- }
331
-
332
- @Node
333
- *UnsignedHexInteger() {
334
- yield eat(re`/[\da-fA-F]+/`);
335
- }
336
-
337
- @Node
338
- *EscapeSequence({ state, ctx }) {
339
- const parentSpan = state.span;
340
-
341
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
342
-
343
- let m_;
344
- let cooked;
345
-
346
- if ((m_ = yield match(re`/[\\/nrt0]/`))) {
347
- const match_ = ctx.sourceTextFor(m_);
348
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
349
-
350
- cooked = escapables.get(match_) || match_;
351
- } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
352
- cooked = ctx.sourceTextFor(m_);
353
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
354
- } else if (yield match(re`/[ux]/`)) {
355
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
356
-
357
- let value = code.get('value');
358
-
359
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
360
- } else {
361
- yield fail();
362
- }
363
-
364
- yield defineAttribute('cooked', cooked);
365
- }
366
-
367
- @Node
368
- *EscapeCode() {
369
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
370
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
371
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
372
- yield eat(m`closeToken: <*Punctuator '}' />`);
373
- } else {
374
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
375
- yield eat(m`closeToken: null`);
376
- }
377
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
378
- yield eat(m`openToken: null`);
379
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
380
- yield eat(m`closeToken: null`);
381
- }
382
- }
383
-
384
- *Digits() {
385
- while (yield eatMatch(m`<*Digit />`));
386
- }
387
-
388
- @Node
389
- *Digit() {
390
- yield eat(re`/\d/`);
391
- }
392
-
393
- @InjectFrom(Shared)
394
- *Any() {}
395
-
396
- @Literal
397
- @Node
398
- @InjectFrom(Shared)
399
- *Keyword() {}
400
-
401
- @Literal
402
- @Node
403
- @InjectFrom(Shared)
404
- *Punctuator() {}
405
- };