@bablr/language-en-regex-vm-pattern 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js CHANGED
@@ -1,36 +1,49 @@
1
- /* @macrome
2
- * @generatedby @bablr/macrome-generator-bablr
3
- * @generatedfrom ./grammar.macro.js#c3c390f3e54607dfde1d07c0e099e54294ab7f32
4
- * This file is autogenerated. Please do not edit it directly.
5
- * When editing run `npx macrome watch` then change the file this is generated from.
6
- */
7
- import _applyDecs from "@babel/runtime/helpers/applyDecs2305";
8
- let _initProto, _FlagsDecs, _GroupDecs, _AssertionDecs, _StartOfInputAssertionDecs, _EndOfInputAssertionDecs, _WordBoundaryAssertionDecs, _GapDecs, _CharacterDecs, _CharacterClassDecs, _CharacterClassRangeDecs, _CharacterSetDecs, _AnyCharacterSetDecs, _DigitCharacterSetDecs, _SpaceCharacterSetDecs, _WordCharacterSetDecs, _QuantifierDecs, _AnyDecs, _KeywordDecs, _PunctuatorDecs;
9
1
  import { re, spam as m } from '@bablr/boot';
10
- import { Node, CoveredBy, InjectFrom, UndefinedAttributes, AllowEmpty, Literal } from '@bablr/helpers/decorators';
11
2
  import objectEntries from 'iter-tools-es/methods/object-entries';
12
- import * as Shared from '@bablr/helpers/productions';
13
- import { eat, eatMatch, match, shiftMatch, guard, defineAttribute, fail, o } from '@bablr/helpers/grammar';
3
+ import {
4
+ eat,
5
+ eatMatch,
6
+ match,
7
+ shiftMatch,
8
+ guard,
9
+ defineAttribute,
10
+ fail,
11
+ r,
12
+ startSpan,
13
+ endSpan,
14
+ eatHeld,
15
+ } from '@bablr/helpers/grammar';
14
16
  import { buildString } from '@bablr/helpers/builders';
17
+ import { get } from '@bablr/agast-helpers/path';
18
+ import { printSource } from '@bablr/agast-helpers/tree';
19
+
15
20
  export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
21
+
16
22
  export const dependencies = {};
23
+
17
24
  export const defaultMatcher = m`<Pattern />`;
18
- const escapables = new Map(objectEntries({
19
- n: '\n',
20
- r: '\r',
21
- t: '\t',
22
- 0: '\0'
23
- }));
25
+
26
+ const escapables = new Map(
27
+ objectEntries({
28
+ n: '\n',
29
+ r: '\r',
30
+ t: '\t',
31
+ 0: '\0',
32
+ }),
33
+ );
34
+
24
35
  const flagCharacters = {
25
36
  global: 'g',
26
37
  ignoreCase: 'i',
27
38
  multiline: 'm',
28
39
  dotAll: 's',
29
40
  unicode: 'u',
30
- sticky: 'y'
41
+ sticky: 'y',
31
42
  };
32
- const unique = flags => flags.length === new Set(flags).size;
33
- const getSpecialPattern = span => {
43
+
44
+ const unique = (flags) => flags.length === new Set(flags).size;
45
+
46
+ const getSpecialPattern = (span) => {
34
47
  if (span === 'Pattern') {
35
48
  return re`/[*+?{}[\]().^$|\n\\<>]/`;
36
49
  } else if (span === 'CharacterClass') {
@@ -39,90 +52,123 @@ const getSpecialPattern = span => {
39
52
  throw new Error('unknown span type for special pattern');
40
53
  }
41
54
  };
55
+
42
56
  export const grammar = class RegexGrammar {
43
- static {
44
- [_initProto] = _applyDecs(this, [[Node, 2, "Pattern"], [_FlagsDecs, 2, "Flags"], [AllowEmpty, 2, "Alternatives"], [[AllowEmpty, Node], 2, "Alternative"], [AllowEmpty, 2, "Elements"], [_GroupDecs, 2, "Group"], [Node, 2, "CapturingGroup"], [_AssertionDecs, 2, "Assertion"], [_StartOfInputAssertionDecs, 2, "StartOfInputAssertion"], [_EndOfInputAssertionDecs, 2, "EndOfInputAssertion"], [_WordBoundaryAssertionDecs, 2, "WordBoundaryAssertion"], [_GapDecs, 2, "Gap"], [_CharacterDecs, 2, "Character"], [_CharacterClassDecs, 2, "CharacterClass"], [_CharacterClassRangeDecs, 2, "CharacterClassRange"], [_CharacterSetDecs, 2, "CharacterSet"], [_AnyCharacterSetDecs, 2, "AnyCharacterSet"], [_DigitCharacterSetDecs, 2, "DigitCharacterSet"], [_SpaceCharacterSetDecs, 2, "SpaceCharacterSet"], [_WordCharacterSetDecs, 2, "WordCharacterSet"], [_QuantifierDecs, 2, "Quantifier"], [Node, 2, "UnsignedInteger"], [Node, 2, "UnsignedHexInteger"], [Node, 2, "EscapeSequence"], [Node, 2, "EscapeCode"], [Node, 2, "Digit"], [_AnyDecs, 2, "Any"], [_KeywordDecs, 2, "Keyword"], [_PunctuatorDecs, 2, "Punctuator"]], []).e;
45
- }
46
57
  constructor() {
47
- _initProto(this);
48
- }
49
- *[(_FlagsDecs = [UndefinedAttributes(Object.keys(flagCharacters)), AllowEmpty, Node], _GroupDecs = [CoveredBy('Element'), Node], _AssertionDecs = CoveredBy('Element'), _StartOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _EndOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _WordBoundaryAssertionDecs = [UndefinedAttributes(['negate']), CoveredBy('Assertion'), Node], _GapDecs = [CoveredBy('Assertion'), Node], _CharacterDecs = [CoveredBy('Element'), CoveredBy('CharacterClassElement'), Node], _CharacterClassDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), Node], _CharacterClassRangeDecs = [CoveredBy('CharacterClassElement'), Node], _CharacterSetDecs = CoveredBy('Element'), _AnyCharacterSetDecs = [CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _DigitCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _SpaceCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _WordCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _QuantifierDecs = [UndefinedAttributes(['min', 'max']), Node], _AnyDecs = InjectFrom(Shared), _KeywordDecs = [Literal, Node, InjectFrom(Shared)], _PunctuatorDecs = [Literal, Node, InjectFrom(Shared)], "Pattern")]() {
50
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
58
+ this.literals = new Set(['Keyword']);
59
+ this.emptyables = new Set(['Alternatives', 'Alternative', 'Elements', 'Flags']);
60
+ this.attributes = new Map(
61
+ Object.entries({
62
+ Flags: Object.fromEntries(Object.keys(flagCharacters).map((key) => [key, undefined])),
63
+ WordBoundaryAssertion: { negate: undefined },
64
+ CharacterClass: { negate: undefined },
65
+ DigitCharacterSet: { negate: undefined },
66
+ SpaceCharacterSet: { negate: undefined },
67
+ WordCharacterSet: { negate: undefined },
68
+ Quantifier: { min: undefined, max: undefined },
69
+ }),
70
+ );
71
+ }
72
+
73
+ *Pattern() {
74
+ yield eat(m`openToken*: <* '/' />`);
75
+ yield startSpan('Pattern', '/');
51
76
  yield eat(m`<__Alternatives />`);
52
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
53
- yield eat(m`flags$: <Flags />`);
77
+ yield endSpan();
78
+ yield eat(m`closeToken*: <* '/' />`);
79
+ yield eat(m`flags: <Flags />`);
54
80
  }
55
- *Flags({
56
- ctx
57
- }) {
81
+
82
+ *Flags() {
58
83
  const flags = yield match(re`/[gimsuy]+/`);
59
- const flagsStr = ctx.sourceTextFor(flags) || '';
84
+
85
+ const flagsStr = printSource(flags) || '';
86
+
60
87
  if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
61
- for (const {
62
- 0: name,
63
- 1: chr
64
- } of Object.entries(flagCharacters)) {
88
+
89
+ for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
65
90
  if (flagsStr.includes(chr)) {
66
91
  yield defineAttribute(name, true);
67
92
  } else {
68
93
  yield defineAttribute(name, false);
69
94
  }
70
95
  }
96
+
71
97
  for (const flagChr of flagsStr) {
72
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
98
+ yield eat(m`tokens[]*: <*Keyword ${buildString(flagChr)} />`);
73
99
  }
74
100
  }
101
+
75
102
  *Alternatives() {
76
103
  do {
77
104
  yield eat(m`alternatives[]$: <Alternative />`);
78
- } while (yield eatMatch(m`#separatorTokens[]: <*Punctuator '|' />`));
105
+ } while (yield eatMatch(m`#separatorTokens: <* '|' />`));
79
106
  }
107
+
80
108
  *Alternative() {
81
109
  yield eat(m`elements[]+$: <__Elements />`);
82
110
  }
83
- *Elements() {
84
- yield eat(m`.[]: []`);
111
+
112
+ *Elements({ matcher }) {
85
113
  while (yield match(re`/[^|]/`)) {
86
- yield eat(m`.[]+: <_Element />`);
114
+ yield eat(m`${get('refMatcher', matcher)} <_Element />`);
87
115
  }
88
116
  }
117
+
89
118
  *Element() {
90
119
  yield guard(m`<*Keyword /[*+?]/ />`);
91
- yield eat(m`<__Any />`, [m`<CharacterClass '[' />`, m`<Group '(?:' />`, m`<__Assertion /[$^]|\\b/i />`, m`<Gap '\\g' />`, m`<__CharacterSet /\.|\\[dswp]/i />`, m`<*Character />`]);
92
- if (yield match(re`/[*+?{]/`)) {
93
- return shiftMatch(m`<Quantifier />`);
120
+
121
+ if (yield eatMatch(m`<CharacterClass '[' />`)) {
122
+ } else if (yield eatMatch(m`<Group '(?:' />`)) {
123
+ } else if (yield eatMatch(m`<_Assertion /[$^]|\\b/i />`)) {
124
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
125
+ } else if (yield eatMatch(m`<_CharacterSet /\.|\\[dswp]/i />`)) {
126
+ } else {
127
+ yield eat(m`<*Character />`);
94
128
  }
129
+
130
+ return r(shiftMatch(m`<Quantifier /[*+?{]/ />`));
95
131
  }
132
+
96
133
  *Group() {
97
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
134
+ yield eat(m`openToken*: <* '(?:' />`);
98
135
  yield eat(m`<__Alternatives />`);
99
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
136
+ yield eat(m`closeToken*: <* ')' />`);
100
137
  }
138
+
101
139
  *CapturingGroup() {
102
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
140
+ yield eat(m`openToken*: <* '(' />`);
103
141
  yield eat(m`<__Alternatives />`);
104
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
142
+ yield eat(m`closeToken*: <* ')' />`);
105
143
  }
144
+
106
145
  *Assertion() {
107
- yield eat(m`<__Any />`, [m`<StartOfInputAssertion '^' />`, m`<EndOfInputAssertion '$' />`, m`<WordBoundaryAssertion /\\b/i />`]);
146
+ if (yield eatMatch(m`<StartOfInputAssertion '^' />`)) {
147
+ } else if (yield eatMatch(m`<EndOfInputAssertion '$' />`)) {
148
+ } else {
149
+ yield eat(m`<WordBoundaryAssertion /\\b/i />`);
150
+ }
108
151
  }
152
+
109
153
  *StartOfInputAssertion() {
110
- yield eat(m`sigilToken: <*Keyword '^' />`);
154
+ yield eat(m`sigilToken*: <*Keyword '^' />`);
111
155
  }
156
+
112
157
  *EndOfInputAssertion() {
113
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
158
+ yield eatMatch(m`sigilToken*: <*Keyword '$' />`);
114
159
  }
115
- *WordBoundaryAssertion({
116
- ctx
117
- }) {
118
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
119
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
120
- yield defineAttribute('negate', ctx.sourceTextFor(m_) === 'B');
160
+
161
+ *WordBoundaryAssertion() {
162
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
163
+ const m_ = yield eat(m`value*: <*Keyword /b/i />`);
164
+ yield defineAttribute('negate', printSource(m_.node) === 'B');
121
165
  }
166
+
122
167
  *Gap() {
123
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
124
- yield eat(m`value: <*Keyword 'g' />`);
168
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
169
+ yield eat(m`value*: <*Keyword 'g' />`);
125
170
  }
171
+
126
172
  *Character() {
127
173
  if (yield match('\\')) {
128
174
  yield eat(m`@: <EscapeSequence />`);
@@ -130,154 +176,181 @@ export const grammar = class RegexGrammar {
130
176
  yield eat(re`/[^\r\n\t]/`);
131
177
  }
132
178
  }
179
+
133
180
  *CharacterClass() {
134
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
135
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({
136
- bind: true
137
- }));
181
+ yield eat(m`openToken*: <* '[' />`);
182
+ yield startSpan('CharacterClass', ']');
183
+
184
+ let negate = yield eatMatch(m`negateToken*: <*Keyword '^' />`);
185
+
138
186
  yield defineAttribute('negate', !!negate);
139
- while (yield match(re`/./s`)) {
187
+
188
+ while (yield match(re`/[^\]]/s`)) {
140
189
  yield eat(m`elements[]+$: <_CharacterClassElement />`);
141
190
  }
142
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
191
+
192
+ yield endSpan();
193
+ yield eat(m`closeToken*: <* ']' />`);
143
194
  }
195
+
144
196
  *CharacterClassElement() {
145
- yield eat(m`<__Any />`, [m`<__CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
197
+ if (yield eatMatch(m`<_CharacterSet /\\[dswp]/i />`)) {
198
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
199
+ } else {
200
+ yield eat(m`<*Character />`);
201
+ }
202
+
146
203
  if (yield match('-')) {
147
- return shiftMatch(m`<CharacterClassRange />`);
204
+ return r(shiftMatch(m`<CharacterClassRange />`));
148
205
  }
149
206
  }
207
+
150
208
  *CharacterClassRange() {
151
- yield eat(m`min+$: <*Character />`);
152
- yield eat(m`sigilToken: <*Punctuator '-' />`);
209
+ yield eatHeld(m`min+$: <*Character />`);
210
+ yield eat(m`sigilToken*: <* '-' />`);
153
211
  yield eat(m`max+$: <*Character />`);
154
212
  }
213
+
155
214
  *CharacterSet() {
156
- yield eat(m`<__Any />`, [m`<AnyCharacterSet '.' />`, m`<DigitCharacterSet /\\[dD]/ />`, m`<SpaceCharacterSet /\\[sS]/ />`, m`<WordCharacterSet /\\[wW]/ />`]);
215
+ if (yield eatMatch(m`<AnyCharacterSet '.' />`)) {
216
+ } else if (yield eatMatch(m`<DigitCharacterSet /\\[dD]/ />`)) {
217
+ } else if (yield eatMatch(m`<SpaceCharacterSet /\\[sS]/ />`)) {
218
+ } else {
219
+ yield eat(m`<WordCharacterSet /\\[wW]/ />`);
220
+ }
157
221
  }
222
+
158
223
  *AnyCharacterSet() {
159
- yield eat(m`sigilToken: <*Keyword '.' />`);
160
- }
161
- *DigitCharacterSet({
162
- ctx
163
- }) {
164
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
165
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
166
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
167
- }
168
- *SpaceCharacterSet({
169
- ctx
170
- }) {
171
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
172
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
173
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
174
- }
175
- *WordCharacterSet({
176
- ctx
177
- }) {
178
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
179
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
180
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
181
- }
182
- *Quantifier({
183
- ctx
184
- }) {
185
- yield eat(m`element+$: <_Element />`);
224
+ yield eat(m`sigilToken*: <*Keyword '.' />`);
225
+ }
226
+
227
+ *DigitCharacterSet() {
228
+ yield eat(m`escapeToken*: <* '\\' />`);
229
+
230
+ let code = yield eat(m`value*: <*Keyword /[dD]/ />`);
231
+
232
+ yield defineAttribute('negate', printSource(code.node) === 'D');
233
+ }
234
+
235
+ *SpaceCharacterSet() {
236
+ yield eat(m`escapeToken*: <* '\\' />`);
237
+
238
+ let code = yield eat(m`value*: <*Keyword /[sS]/ />`);
239
+
240
+ yield defineAttribute('negate', printSource(code.node) === 'S');
241
+ }
242
+
243
+ *WordCharacterSet() {
244
+ yield eat(m`escapeToken*: <* '\\' />`);
245
+
246
+ let code = yield eat(m`value*: <*Keyword /[wW]/ />`);
247
+
248
+ yield defineAttribute('negate', printSource(code.node) === 'W');
249
+ }
250
+
251
+ *Quantifier() {
252
+ yield eatHeld(m`element+$: <_Element />`);
253
+
186
254
  let attrs, sigil;
187
- if (sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`)) {
188
- switch (ctx.sourceTextFor(sigil)) {
255
+
256
+ if ((sigil = yield eatMatch(m`sigilToken*: <*Keyword /[*+?]/ />`))) {
257
+ switch (printSource(sigil.node)) {
189
258
  case '*':
190
- attrs = {
191
- min: 0,
192
- max: Infinity
193
- };
259
+ attrs = { min: 0, max: Infinity };
194
260
  break;
195
261
  case '+':
196
- attrs = {
197
- min: 1,
198
- max: Infinity
199
- };
262
+ attrs = { min: 1, max: Infinity };
200
263
  break;
201
264
  case '?':
202
- attrs = {
203
- min: 0,
204
- max: 1
205
- };
265
+ attrs = { min: 0, max: 1 };
206
266
  break;
207
267
  default:
208
268
  yield fail();
209
269
  }
210
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
270
+ } else if (yield eat(m`openToken*: <* '{' />`)) {
211
271
  let max;
212
272
  let min = yield eat(m`min$: <*UnsignedInteger />`);
213
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
273
+
274
+ if (yield eatMatch(m`separator$: <* ',' />`)) {
214
275
  max = yield eatMatch(m`max$: <*UnsignedInteger />`);
215
276
  }
216
- min = min && ctx.sourceTextFor(min);
217
- max = max && ctx.sourceTextFor(max);
277
+
278
+ min = min && printSource(min.node);
279
+ max = max && printSource(max.node);
280
+
218
281
  min = min && parseInt(min, 10);
219
282
  max = max && parseInt(max, 10);
220
- attrs = {
221
- min,
222
- max
223
- };
224
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
283
+
284
+ attrs = { min, max };
285
+
286
+ yield eat(m`closeToken*: <* '}' />`);
225
287
  }
288
+
226
289
  yield defineAttribute('min', attrs.min);
227
290
  yield defineAttribute('max', attrs.max);
228
291
  }
292
+
229
293
  *UnsignedInteger() {
230
294
  yield eat(re`/\d+/`);
231
295
  }
296
+
232
297
  *UnsignedHexInteger() {
233
298
  yield eat(re`/[\da-fA-F]+/`);
234
299
  }
235
- *EscapeSequence({
236
- state,
237
- ctx
238
- }) {
239
- const parentSpan = state.span;
240
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
300
+
301
+ *EscapeSequence({ s }) {
302
+ const parentSpan = s().span;
303
+
304
+ yield startSpan('Escape');
305
+
306
+ yield eat(m`escape*: <* '\\' />`);
307
+
241
308
  let m_;
242
309
  let cooked;
243
- if (m_ = yield match(re`/[\\/nrt0]/`)) {
244
- const match_ = ctx.sourceTextFor(m_);
245
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
310
+
311
+ if ((m_ = yield match(re`/[\\/nrt0]/`))) {
312
+ const match_ = printSource(m_);
313
+ yield eat(m`code*: <*Keyword ${buildString(match_)} />`);
314
+
246
315
  cooked = escapables.get(match_) || match_;
247
- } else if (m_ = yield match(getSpecialPattern(parentSpan))) {
248
- cooked = ctx.sourceTextFor(m_);
249
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
316
+ } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
317
+ cooked = printSource(m_);
318
+ yield eat(m`code*: <*Keyword ${buildString(cooked)} />`);
250
319
  } else if (yield match(re`/[ux]/`)) {
251
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
252
- let value = code.get('value');
253
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
320
+ let code = yield eat(m`code*: <EscapeCode />`);
321
+
322
+ let value = get('value', code.node);
323
+
324
+ cooked = String.fromCodePoint(parseInt(printSource(value), 16));
254
325
  } else {
255
326
  yield fail();
256
327
  }
328
+
329
+ yield endSpan();
330
+
257
331
  yield defineAttribute('cooked', cooked);
258
332
  }
333
+
259
334
  *EscapeCode() {
260
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
261
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
262
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
263
- yield eat(m`closeToken: <*Punctuator '}' />`);
335
+ if (yield eatMatch(m`type*: <*Keyword 'u' />`)) {
336
+ if (yield eatMatch(m`openToken*: <* '{' />`)) {
337
+ yield eatMatch(m`value: <*UnsignedHexInteger />`);
338
+ yield eat(m`closeToken*: <* '}' />`);
264
339
  } else {
265
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
266
- yield eat(m`closeToken: null`);
340
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
267
341
  }
268
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
269
- yield eat(m`openToken: null`);
270
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
271
- yield eat(m`closeToken: null`);
342
+ } else if (yield eatMatch(m`type*: <*Keyword 'x' />`)) {
343
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
272
344
  }
273
345
  }
346
+
274
347
  *Digits() {
275
348
  while (yield eatMatch(m`<*Digit />`));
276
349
  }
350
+
277
351
  *Digit() {
278
352
  yield eat(re`/\d/`);
279
353
  }
280
- *Any() {}
281
- *Keyword() {}
282
- *Punctuator() {}
283
- };
354
+ };
355
+
356
+ export default { canonicalURL, dependencies, grammar, defaultMatcher };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bablr/language-en-regex-vm-pattern",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "A BABLR language for nonbacktracking JS-style regexes",
5
5
  "engines": {
6
6
  "node": ">=12.0.0"
@@ -15,25 +15,19 @@
15
15
  ],
16
16
  "sideEffects": false,
17
17
  "scripts": {
18
- "build": "macrome build",
19
- "watch": "macrome watch",
20
- "clean": "macrome clean",
21
18
  "test": "mocha"
22
19
  },
23
20
  "dependencies": {
24
- "@babel/runtime": "7.28.2",
25
- "@bablr/boot": "0.10.0",
26
- "@bablr/helpers": "0.24.0",
27
- "@bablr/agast-helpers": "0.9.0",
28
- "@bablr/agast-vm-helpers": "0.9.0",
21
+ "@bablr/boot": "0.11.0",
22
+ "@bablr/helpers": "0.25.0",
23
+ "@bablr/agast-helpers": "0.10.0",
24
+ "@bablr/agast-vm-helpers": "0.10.0",
29
25
  "iter-tools-es": "7.5.3"
30
26
  },
31
27
  "devDependencies": {
32
28
  "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#c97bfa4b3663f8378e9b3e42bb5a41e685406cf9",
33
- "@bablr/macrome": "^0.1.3",
34
- "@bablr/macrome-generator-bablr": "^0.3.2",
35
29
  "@qnighy/dedent": "0.1.1",
36
- "bablr": "^0.10.0",
30
+ "bablr": "^0.11.0",
37
31
  "enhanced-resolve": "^5.12.0",
38
32
  "eslint": "^8.47.0",
39
33
  "eslint-import-resolver-enhanced-resolve": "^1.0.5",
@@ -48,7 +42,10 @@
48
42
  "english",
49
43
  "regex"
50
44
  ],
51
- "repository": "git@github.com:bablr-lang/language-en-regex-vm-pattern.git",
45
+ "repository": {
46
+ "type": "git",
47
+ "url": "git+ssh://git@github.com/bablr-lang/language-en-regex-vm-pattern.git"
48
+ },
52
49
  "homepage": "https://github.com/bablr-lang/language-en-regex-vm-pattern",
53
50
  "author": "Conrad Buck <conartist6@gmail.com>",
54
51
  "license": "MIT"
@@ -1,405 +0,0 @@
1
- import { re, spam as m } from '@bablr/boot';
2
- import {
3
- Node,
4
- CoveredBy,
5
- InjectFrom,
6
- UndefinedAttributes,
7
- AllowEmpty,
8
- Literal,
9
- } from '@bablr/helpers/decorators';
10
- import objectEntries from 'iter-tools-es/methods/object-entries';
11
- import * as Shared from '@bablr/helpers/productions';
12
- import {
13
- eat,
14
- eatMatch,
15
- match,
16
- shiftMatch,
17
- guard,
18
- defineAttribute,
19
- fail,
20
- o,
21
- } from '@bablr/helpers/grammar';
22
- import { buildString } from '@bablr/helpers/builders';
23
-
24
- export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
25
-
26
- export const dependencies = {};
27
-
28
- export const defaultMatcher = m`<Pattern />`;
29
-
30
- const escapables = new Map(
31
- objectEntries({
32
- n: '\n',
33
- r: '\r',
34
- t: '\t',
35
- 0: '\0',
36
- }),
37
- );
38
-
39
- const flagCharacters = {
40
- global: 'g',
41
- ignoreCase: 'i',
42
- multiline: 'm',
43
- dotAll: 's',
44
- unicode: 'u',
45
- sticky: 'y',
46
- };
47
-
48
- const unique = (flags) => flags.length === new Set(flags).size;
49
-
50
- const getSpecialPattern = (span) => {
51
- if (span === 'Pattern') {
52
- return re`/[*+?{}[\]().^$|\n\\<>]/`;
53
- } else if (span === 'CharacterClass') {
54
- return re`/[\]\\]/`;
55
- } else {
56
- throw new Error('unknown span type for special pattern');
57
- }
58
- };
59
-
60
- export const grammar = class RegexGrammar {
61
- @Node
62
- *Pattern() {
63
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
64
- yield eat(m`<__Alternatives />`);
65
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
66
- yield eat(m`flags$: <Flags />`);
67
- }
68
-
69
- @UndefinedAttributes(Object.keys(flagCharacters))
70
- @AllowEmpty
71
- @Node
72
- *Flags({ ctx }) {
73
- const flags = yield match(re`/[gimsuy]+/`);
74
-
75
- const flagsStr = ctx.sourceTextFor(flags) || '';
76
-
77
- if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
78
-
79
- for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
80
- if (flagsStr.includes(chr)) {
81
- yield defineAttribute(name, true);
82
- } else {
83
- yield defineAttribute(name, false);
84
- }
85
- }
86
-
87
- for (const flagChr of flagsStr) {
88
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
89
- }
90
- }
91
-
92
- @AllowEmpty
93
- *Alternatives() {
94
- do {
95
- yield eat(m`alternatives[]$: <Alternative />`);
96
- } while (yield eatMatch(m`#separatorTokens[]: <*Punctuator '|' />`));
97
- }
98
-
99
- @AllowEmpty
100
- @Node
101
- *Alternative() {
102
- yield eat(m`elements[]+$: <__Elements />`);
103
- }
104
-
105
- @AllowEmpty
106
- *Elements() {
107
- yield eat(m`.[]: []`);
108
- while (yield match(re`/[^|]/`)) {
109
- yield eat(m`.[]+: <_Element />`);
110
- }
111
- }
112
-
113
- *Element() {
114
- yield guard(m`<*Keyword /[*+?]/ />`);
115
-
116
- yield eat(m`<__Any />`, [
117
- m`<CharacterClass '[' />`,
118
- m`<Group '(?:' />`,
119
- m`<__Assertion /[$^]|\\b/i />`,
120
- m`<Gap '\\g' />`,
121
- m`<__CharacterSet /\.|\\[dswp]/i />`,
122
- m`<*Character />`,
123
- ]);
124
-
125
- if (yield match(re`/[*+?{]/`)) {
126
- return shiftMatch(m`<Quantifier />`);
127
- }
128
- }
129
-
130
- @CoveredBy('Element')
131
- @Node
132
- *Group() {
133
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
134
- yield eat(m`<__Alternatives />`);
135
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
136
- }
137
-
138
- @Node
139
- *CapturingGroup() {
140
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
141
- yield eat(m`<__Alternatives />`);
142
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
143
- }
144
-
145
- @CoveredBy('Element')
146
- *Assertion() {
147
- yield eat(m`<__Any />`, [
148
- m`<StartOfInputAssertion '^' />`,
149
- m`<EndOfInputAssertion '$' />`,
150
- m`<WordBoundaryAssertion /\\b/i />`,
151
- ]);
152
- }
153
-
154
- @CoveredBy('Assertion')
155
- @Node
156
- *StartOfInputAssertion() {
157
- yield eat(m`sigilToken: <*Keyword '^' />`);
158
- }
159
-
160
- @CoveredBy('Assertion')
161
- @Node
162
- *EndOfInputAssertion() {
163
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
164
- }
165
-
166
- @UndefinedAttributes(['negate'])
167
- @CoveredBy('Assertion')
168
- @Node
169
- *WordBoundaryAssertion({ ctx }) {
170
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
171
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
172
- yield defineAttribute('negate', ctx.sourceTextFor(m_) === 'B');
173
- }
174
-
175
- @CoveredBy('Assertion')
176
- @Node
177
- *Gap() {
178
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
179
- yield eat(m`value: <*Keyword 'g' />`);
180
- }
181
-
182
- @CoveredBy('Element')
183
- @CoveredBy('CharacterClassElement')
184
- @Node
185
- *Character() {
186
- if (yield match('\\')) {
187
- yield eat(m`@: <EscapeSequence />`);
188
- } else {
189
- yield eat(re`/[^\r\n\t]/`);
190
- }
191
- }
192
-
193
- @UndefinedAttributes(['negate'])
194
- @CoveredBy('Element')
195
- @Node
196
- *CharacterClass() {
197
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
198
-
199
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({ bind: true }));
200
-
201
- yield defineAttribute('negate', !!negate);
202
-
203
- while (yield match(re`/./s`)) {
204
- yield eat(m`elements[]+$: <_CharacterClassElement />`);
205
- }
206
-
207
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
208
- }
209
-
210
- *CharacterClassElement() {
211
- yield eat(m`<__Any />`, [
212
- m`<__CharacterSet /\\[dswp]/i />`,
213
- m`<Gap '\\g' />`,
214
- m`<*Character />`,
215
- ]);
216
-
217
- if (yield match('-')) {
218
- return shiftMatch(m`<CharacterClassRange />`);
219
- }
220
- }
221
-
222
- @CoveredBy('CharacterClassElement')
223
- @Node
224
- *CharacterClassRange() {
225
- yield eat(m`min+$: <*Character />`);
226
- yield eat(m`sigilToken: <*Punctuator '-' />`);
227
- yield eat(m`max+$: <*Character />`);
228
- }
229
-
230
- @CoveredBy('Element')
231
- *CharacterSet() {
232
- yield eat(m`<__Any />`, [
233
- m`<AnyCharacterSet '.' />`,
234
- m`<DigitCharacterSet /\\[dD]/ />`,
235
- m`<SpaceCharacterSet /\\[sS]/ />`,
236
- m`<WordCharacterSet /\\[wW]/ />`,
237
- ]);
238
- }
239
-
240
- @CoveredBy('Element')
241
- @CoveredBy('CharacterSet')
242
- @Node
243
- *AnyCharacterSet() {
244
- yield eat(m`sigilToken: <*Keyword '.' />`);
245
- }
246
-
247
- @UndefinedAttributes(['negate'])
248
- @CoveredBy('Element')
249
- @CoveredBy('CharacterSet')
250
- @Node
251
- *DigitCharacterSet({ ctx }) {
252
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
253
-
254
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
255
-
256
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
257
- }
258
-
259
- @UndefinedAttributes(['negate'])
260
- @CoveredBy('Element')
261
- @CoveredBy('CharacterSet')
262
- @Node
263
- *SpaceCharacterSet({ ctx }) {
264
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
265
-
266
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
267
-
268
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
269
- }
270
-
271
- @UndefinedAttributes(['negate'])
272
- @CoveredBy('Element')
273
- @CoveredBy('CharacterSet')
274
- @Node
275
- *WordCharacterSet({ ctx }) {
276
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
277
-
278
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
279
-
280
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
281
- }
282
-
283
- @UndefinedAttributes(['min', 'max'])
284
- @Node
285
- *Quantifier({ ctx }) {
286
- yield eat(m`element+$: <_Element />`);
287
-
288
- let attrs, sigil;
289
-
290
- if ((sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`))) {
291
- switch (ctx.sourceTextFor(sigil)) {
292
- case '*':
293
- attrs = { min: 0, max: Infinity };
294
- break;
295
- case '+':
296
- attrs = { min: 1, max: Infinity };
297
- break;
298
- case '?':
299
- attrs = { min: 0, max: 1 };
300
- break;
301
- default:
302
- yield fail();
303
- }
304
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
305
- let max;
306
- let min = yield eat(m`min$: <*UnsignedInteger />`);
307
-
308
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
309
- max = yield eatMatch(m`max$: <*UnsignedInteger />`);
310
- }
311
-
312
- min = min && ctx.sourceTextFor(min);
313
- max = max && ctx.sourceTextFor(max);
314
-
315
- min = min && parseInt(min, 10);
316
- max = max && parseInt(max, 10);
317
-
318
- attrs = { min, max };
319
-
320
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
321
- }
322
-
323
- yield defineAttribute('min', attrs.min);
324
- yield defineAttribute('max', attrs.max);
325
- }
326
-
327
- @Node
328
- *UnsignedInteger() {
329
- yield eat(re`/\d+/`);
330
- }
331
-
332
- @Node
333
- *UnsignedHexInteger() {
334
- yield eat(re`/[\da-fA-F]+/`);
335
- }
336
-
337
- @Node
338
- *EscapeSequence({ state, ctx }) {
339
- const parentSpan = state.span;
340
-
341
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
342
-
343
- let m_;
344
- let cooked;
345
-
346
- if ((m_ = yield match(re`/[\\/nrt0]/`))) {
347
- const match_ = ctx.sourceTextFor(m_);
348
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
349
-
350
- cooked = escapables.get(match_) || match_;
351
- } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
352
- cooked = ctx.sourceTextFor(m_);
353
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
354
- } else if (yield match(re`/[ux]/`)) {
355
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
356
-
357
- let value = code.get('value');
358
-
359
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
360
- } else {
361
- yield fail();
362
- }
363
-
364
- yield defineAttribute('cooked', cooked);
365
- }
366
-
367
- @Node
368
- *EscapeCode() {
369
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
370
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
371
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
372
- yield eat(m`closeToken: <*Punctuator '}' />`);
373
- } else {
374
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
375
- yield eat(m`closeToken: null`);
376
- }
377
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
378
- yield eat(m`openToken: null`);
379
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
380
- yield eat(m`closeToken: null`);
381
- }
382
- }
383
-
384
- *Digits() {
385
- while (yield eatMatch(m`<*Digit />`));
386
- }
387
-
388
- @Node
389
- *Digit() {
390
- yield eat(re`/\d/`);
391
- }
392
-
393
- @InjectFrom(Shared)
394
- *Any() {}
395
-
396
- @Literal
397
- @Node
398
- @InjectFrom(Shared)
399
- *Keyword() {}
400
-
401
- @Literal
402
- @Node
403
- @InjectFrom(Shared)
404
- *Punctuator() {}
405
- };