@bablr/language-en-regex-vm-pattern 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js CHANGED
@@ -1,35 +1,49 @@
1
- /* @macrome
2
- * @generatedby @bablr/macrome-generator-bablr
3
- * @generatedfrom ./grammar.macro.js#44cd5c6d803ea69c494531d65db6b33a5faa09cb
4
- * This file is autogenerated. Please do not edit it directly.
5
- * When editing run `npx macrome watch` then change the file this is generated from.
6
- */
7
- import _applyDecs from "@babel/runtime/helpers/applyDecs2305";
8
- let _initProto, _FlagsDecs, _GroupDecs, _AssertionDecs, _StartOfInputAssertionDecs, _EndOfInputAssertionDecs, _WordBoundaryAssertionDecs, _GapDecs, _CharacterDecs, _CharacterClassDecs, _CharacterClassRangeDecs, _CharacterSetDecs, _AnyCharacterSetDecs, _DigitCharacterSetDecs, _SpaceCharacterSetDecs, _WordCharacterSetDecs, _QuantifierDecs, _AnyDecs, _KeywordDecs, _PunctuatorDecs;
9
1
  import { re, spam as m } from '@bablr/boot';
10
- import { Node, CoveredBy, InjectFrom, UndefinedAttributes, AllowEmpty, Literal } from '@bablr/helpers/decorators';
11
2
  import objectEntries from 'iter-tools-es/methods/object-entries';
12
- import * as Shared from '@bablr/helpers/productions';
13
- import { eat, eatMatch, match, shiftMatch, guard, defineAttribute, fail, o } from '@bablr/helpers/grammar';
14
- import { buildString, buildBoolean } from '@bablr/helpers/builders';
3
+ import {
4
+ eat,
5
+ eatMatch,
6
+ match,
7
+ shiftMatch,
8
+ guard,
9
+ defineAttribute,
10
+ fail,
11
+ r,
12
+ startSpan,
13
+ endSpan,
14
+ eatHeld,
15
+ } from '@bablr/helpers/grammar';
16
+ import { buildString } from '@bablr/helpers/builders';
17
+ import { get } from '@bablr/agast-helpers/path';
18
+ import { printSource } from '@bablr/agast-helpers/tree';
19
+
15
20
  export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
21
+
16
22
  export const dependencies = {};
17
- const escapables = new Map(objectEntries({
18
- n: '\n',
19
- r: '\r',
20
- t: '\t',
21
- 0: '\0'
22
- }));
23
+
24
+ export const defaultMatcher = m`<Pattern />`;
25
+
26
+ const escapables = new Map(
27
+ objectEntries({
28
+ n: '\n',
29
+ r: '\r',
30
+ t: '\t',
31
+ 0: '\0',
32
+ }),
33
+ );
34
+
23
35
  const flagCharacters = {
24
36
  global: 'g',
25
37
  ignoreCase: 'i',
26
38
  multiline: 'm',
27
39
  dotAll: 's',
28
40
  unicode: 'u',
29
- sticky: 'y'
41
+ sticky: 'y',
30
42
  };
31
- const unique = flags => flags.length === new Set(flags).size;
32
- const getSpecialPattern = span => {
43
+
44
+ const unique = (flags) => flags.length === new Set(flags).size;
45
+
46
+ const getSpecialPattern = (span) => {
33
47
  if (span === 'Pattern') {
34
48
  return re`/[*+?{}[\]().^$|\n\\<>]/`;
35
49
  } else if (span === 'CharacterClass') {
@@ -38,90 +52,123 @@ const getSpecialPattern = span => {
38
52
  throw new Error('unknown span type for special pattern');
39
53
  }
40
54
  };
55
+
41
56
  export const grammar = class RegexGrammar {
42
- static {
43
- [_initProto] = _applyDecs(this, [[Node, 2, "Pattern"], [_FlagsDecs, 2, "Flags"], [AllowEmpty, 2, "Alternatives"], [[AllowEmpty, Node], 2, "Alternative"], [AllowEmpty, 2, "Elements"], [_GroupDecs, 2, "Group"], [Node, 2, "CapturingGroup"], [_AssertionDecs, 2, "Assertion"], [_StartOfInputAssertionDecs, 2, "StartOfInputAssertion"], [_EndOfInputAssertionDecs, 2, "EndOfInputAssertion"], [_WordBoundaryAssertionDecs, 2, "WordBoundaryAssertion"], [_GapDecs, 2, "Gap"], [_CharacterDecs, 2, "Character"], [_CharacterClassDecs, 2, "CharacterClass"], [_CharacterClassRangeDecs, 2, "CharacterClassRange"], [_CharacterSetDecs, 2, "CharacterSet"], [_AnyCharacterSetDecs, 2, "AnyCharacterSet"], [_DigitCharacterSetDecs, 2, "DigitCharacterSet"], [_SpaceCharacterSetDecs, 2, "SpaceCharacterSet"], [_WordCharacterSetDecs, 2, "WordCharacterSet"], [_QuantifierDecs, 2, "Quantifier"], [Node, 2, "UnsignedInteger"], [Node, 2, "UnsignedHexInteger"], [Node, 2, "EscapeSequence"], [Node, 2, "EscapeCode"], [Node, 2, "Digit"], [_AnyDecs, 2, "Any"], [_KeywordDecs, 2, "Keyword"], [_PunctuatorDecs, 2, "Punctuator"]], []).e;
44
- }
45
57
  constructor() {
46
- _initProto(this);
47
- }
48
- *[(_FlagsDecs = [UndefinedAttributes(Object.keys(flagCharacters)), AllowEmpty, Node], _GroupDecs = [CoveredBy('Element'), Node], _AssertionDecs = CoveredBy('Element'), _StartOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _EndOfInputAssertionDecs = [CoveredBy('Assertion'), Node], _WordBoundaryAssertionDecs = [UndefinedAttributes(['negate']), CoveredBy('Assertion'), Node], _GapDecs = [CoveredBy('Assertion'), Node], _CharacterDecs = [CoveredBy('Element'), CoveredBy('CharacterClassElement'), Node], _CharacterClassDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), Node], _CharacterClassRangeDecs = [CoveredBy('CharacterClassElement'), Node], _CharacterSetDecs = CoveredBy('Element'), _AnyCharacterSetDecs = [CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _DigitCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _SpaceCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _WordCharacterSetDecs = [UndefinedAttributes(['negate']), CoveredBy('Element'), CoveredBy('CharacterSet'), Node], _QuantifierDecs = [UndefinedAttributes(['min', 'max']), Node], _AnyDecs = InjectFrom(Shared), _KeywordDecs = [Literal, Node, InjectFrom(Shared)], _PunctuatorDecs = [Literal, Node, InjectFrom(Shared)], "Pattern")]() {
49
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
50
- yield eat(m`<_Alternatives />`);
51
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
52
- yield eat(m`flags$: <Flags />`);
53
- }
54
- *Flags({
55
- ctx
56
- }) {
58
+ this.literals = new Set(['Keyword']);
59
+ this.emptyables = new Set(['Alternatives', 'Alternative', 'Elements', 'Flags']);
60
+ this.attributes = new Map(
61
+ Object.entries({
62
+ Flags: Object.fromEntries(Object.keys(flagCharacters).map((key) => [key, undefined])),
63
+ WordBoundaryAssertion: { negate: undefined },
64
+ CharacterClass: { negate: undefined },
65
+ DigitCharacterSet: { negate: undefined },
66
+ SpaceCharacterSet: { negate: undefined },
67
+ WordCharacterSet: { negate: undefined },
68
+ Quantifier: { min: undefined, max: undefined },
69
+ }),
70
+ );
71
+ }
72
+
73
+ *Pattern() {
74
+ yield eat(m`openToken*: <* '/' />`);
75
+ yield startSpan('Pattern', '/');
76
+ yield eat(m`<__Alternatives />`);
77
+ yield endSpan();
78
+ yield eat(m`closeToken*: <* '/' />`);
79
+ yield eat(m`flags: <Flags />`);
80
+ }
81
+
82
+ *Flags() {
57
83
  const flags = yield match(re`/[gimsuy]+/`);
58
- const flagsStr = ctx.sourceTextFor(flags) || '';
84
+
85
+ const flagsStr = printSource(flags) || '';
86
+
59
87
  if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
60
- for (const {
61
- 0: name,
62
- 1: chr
63
- } of Object.entries(flagCharacters)) {
88
+
89
+ for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
64
90
  if (flagsStr.includes(chr)) {
65
91
  yield defineAttribute(name, true);
66
92
  } else {
67
93
  yield defineAttribute(name, false);
68
94
  }
69
95
  }
96
+
70
97
  for (const flagChr of flagsStr) {
71
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
98
+ yield eat(m`tokens[]*: <*Keyword ${buildString(flagChr)} />`);
72
99
  }
73
100
  }
101
+
74
102
  *Alternatives() {
75
103
  do {
76
104
  yield eat(m`alternatives[]$: <Alternative />`);
77
- } while (yield eatMatch(m`separatorTokens[]: <*Punctuator '|' />`));
105
+ } while (yield eatMatch(m`#separatorTokens: <* '|' />`));
78
106
  }
107
+
79
108
  *Alternative() {
80
- yield eat(m`elements[]+$: <_Elements />`);
109
+ yield eat(m`elements[]+$: <__Elements />`);
81
110
  }
82
- *Elements() {
83
- yield eat(m`.[]: []`);
111
+
112
+ *Elements({ matcher }) {
84
113
  while (yield match(re`/[^|]/`)) {
85
- yield eat(m`.[]+: <__Element />`);
114
+ yield eat(m`${get('refMatcher', matcher)} <_Element />`);
86
115
  }
87
116
  }
117
+
88
118
  *Element() {
89
119
  yield guard(m`<*Keyword /[*+?]/ />`);
90
- yield eat(m`<_Any />`, [m`<CharacterClass '[' />`, m`<Group '(?:' />`, m`<_Assertion /[$^]|\\b/i />`, m`<Gap '\\g' />`, m`<_CharacterSet /\.|\\[dswp]/i />`, m`<*Character />`]);
91
- if (yield match(re`/[*+?{]/`)) {
92
- return shiftMatch(m`<Quantifier />`);
120
+
121
+ if (yield eatMatch(m`<CharacterClass '[' />`)) {
122
+ } else if (yield eatMatch(m`<Group '(?:' />`)) {
123
+ } else if (yield eatMatch(m`<_Assertion /[$^]|\\b/i />`)) {
124
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
125
+ } else if (yield eatMatch(m`<_CharacterSet /\.|\\[dswp]/i />`)) {
126
+ } else {
127
+ yield eat(m`<*Character />`);
93
128
  }
129
+
130
+ return r(shiftMatch(m`<Quantifier /[*+?{]/ />`));
94
131
  }
132
+
95
133
  *Group() {
96
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
97
- yield eat(m`<_Alternatives />`);
98
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
134
+ yield eat(m`openToken*: <* '(?:' />`);
135
+ yield eat(m`<__Alternatives />`);
136
+ yield eat(m`closeToken*: <* ')' />`);
99
137
  }
138
+
100
139
  *CapturingGroup() {
101
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
102
- yield eat(m`<_Alternatives />`);
103
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
140
+ yield eat(m`openToken*: <* '(' />`);
141
+ yield eat(m`<__Alternatives />`);
142
+ yield eat(m`closeToken*: <* ')' />`);
104
143
  }
144
+
105
145
  *Assertion() {
106
- yield eat(m`<_Any />`, [m`<*StartOfInputAssertion '^' />`, m`<*EndOfInputAssertion '$' />`, m`<*WordBoundaryAssertion /\\b/i />`]);
146
+ if (yield eatMatch(m`<StartOfInputAssertion '^' />`)) {
147
+ } else if (yield eatMatch(m`<EndOfInputAssertion '$' />`)) {
148
+ } else {
149
+ yield eat(m`<WordBoundaryAssertion /\\b/i />`);
150
+ }
107
151
  }
152
+
108
153
  *StartOfInputAssertion() {
109
- yield eat(m`sigilToken: <*Keyword '^' />`);
154
+ yield eat(m`sigilToken*: <*Keyword '^' />`);
110
155
  }
156
+
111
157
  *EndOfInputAssertion() {
112
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
158
+ yield eatMatch(m`sigilToken*: <*Keyword '$' />`);
113
159
  }
114
- *WordBoundaryAssertion({
115
- ctx
116
- }) {
117
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
118
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
119
- yield defineAttribute('negate', buildBoolean(ctx.sourceTextFor(m_) === 'B'));
160
+
161
+ *WordBoundaryAssertion() {
162
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
163
+ const m_ = yield eat(m`value*: <*Keyword /b/i />`);
164
+ yield defineAttribute('negate', printSource(m_.node) === 'B');
120
165
  }
166
+
121
167
  *Gap() {
122
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
123
- yield eat(m`value: <*Keyword 'g' />`);
168
+ yield eatMatch(m`escapeToken*: <* '\\' />`);
169
+ yield eat(m`value*: <*Keyword 'g' />`);
124
170
  }
171
+
125
172
  *Character() {
126
173
  if (yield match('\\')) {
127
174
  yield eat(m`@: <EscapeSequence />`);
@@ -129,154 +176,181 @@ export const grammar = class RegexGrammar {
129
176
  yield eat(re`/[^\r\n\t]/`);
130
177
  }
131
178
  }
179
+
132
180
  *CharacterClass() {
133
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
134
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({
135
- bind: true
136
- }));
181
+ yield eat(m`openToken*: <* '[' />`);
182
+ yield startSpan('CharacterClass', ']');
183
+
184
+ let negate = yield eatMatch(m`negateToken*: <*Keyword '^' />`);
185
+
137
186
  yield defineAttribute('negate', !!negate);
138
- while (yield match(re`/./s`)) {
139
- yield eat(m`elements[]+$: <__CharacterClassElement />`);
187
+
188
+ while (yield match(re`/[^\]]/s`)) {
189
+ yield eat(m`elements[]+$: <_CharacterClassElement />`);
140
190
  }
141
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
191
+
192
+ yield endSpan();
193
+ yield eat(m`closeToken*: <* ']' />`);
142
194
  }
195
+
143
196
  *CharacterClassElement() {
144
- yield eat(m`<_Any />`, [m`<_CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
197
+ if (yield eatMatch(m`<_CharacterSet /\\[dswp]/i />`)) {
198
+ } else if (yield eatMatch(m`<Gap '\\g' />`)) {
199
+ } else {
200
+ yield eat(m`<*Character />`);
201
+ }
202
+
145
203
  if (yield match('-')) {
146
- return shiftMatch(m`<CharacterClassRange />`);
204
+ return r(shiftMatch(m`<CharacterClassRange />`));
147
205
  }
148
206
  }
207
+
149
208
  *CharacterClassRange() {
150
- yield eat(m`min+$: <*Character />`);
151
- yield eat(m`sigilToken: <*Punctuator '-' />`);
209
+ yield eatHeld(m`min+$: <*Character />`);
210
+ yield eat(m`sigilToken*: <* '-' />`);
152
211
  yield eat(m`max+$: <*Character />`);
153
212
  }
213
+
154
214
  *CharacterSet() {
155
- yield eat(m`<_Any />`, [m`<AnyCharacterSet '.' />`, m`<DigitCharacterSet /\\[dD]/ />`, m`<SpaceCharacterSet /\\[sS]/ />`, m`<WordCharacterSet /\\[wW]/ />`]);
215
+ if (yield eatMatch(m`<AnyCharacterSet '.' />`)) {
216
+ } else if (yield eatMatch(m`<DigitCharacterSet /\\[dD]/ />`)) {
217
+ } else if (yield eatMatch(m`<SpaceCharacterSet /\\[sS]/ />`)) {
218
+ } else {
219
+ yield eat(m`<WordCharacterSet /\\[wW]/ />`);
220
+ }
156
221
  }
222
+
157
223
  *AnyCharacterSet() {
158
- yield eat(m`sigilToken: <*Keyword '.' />`);
159
- }
160
- *DigitCharacterSet({
161
- ctx
162
- }) {
163
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
164
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
165
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
166
- }
167
- *SpaceCharacterSet({
168
- ctx
169
- }) {
170
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
171
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
172
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
173
- }
174
- *WordCharacterSet({
175
- ctx
176
- }) {
177
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
178
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
179
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
180
- }
181
- *Quantifier({
182
- ctx
183
- }) {
184
- yield eat(m`element+$: <__Element />`);
224
+ yield eat(m`sigilToken*: <*Keyword '.' />`);
225
+ }
226
+
227
+ *DigitCharacterSet() {
228
+ yield eat(m`escapeToken*: <* '\\' />`);
229
+
230
+ let code = yield eat(m`value*: <*Keyword /[dD]/ />`);
231
+
232
+ yield defineAttribute('negate', printSource(code.node) === 'D');
233
+ }
234
+
235
+ *SpaceCharacterSet() {
236
+ yield eat(m`escapeToken*: <* '\\' />`);
237
+
238
+ let code = yield eat(m`value*: <*Keyword /[sS]/ />`);
239
+
240
+ yield defineAttribute('negate', printSource(code.node) === 'S');
241
+ }
242
+
243
+ *WordCharacterSet() {
244
+ yield eat(m`escapeToken*: <* '\\' />`);
245
+
246
+ let code = yield eat(m`value*: <*Keyword /[wW]/ />`);
247
+
248
+ yield defineAttribute('negate', printSource(code.node) === 'W');
249
+ }
250
+
251
+ *Quantifier() {
252
+ yield eatHeld(m`element+$: <_Element />`);
253
+
185
254
  let attrs, sigil;
186
- if (sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`)) {
187
- switch (ctx.sourceTextFor(sigil)) {
255
+
256
+ if ((sigil = yield eatMatch(m`sigilToken*: <*Keyword /[*+?]/ />`))) {
257
+ switch (printSource(sigil.node)) {
188
258
  case '*':
189
- attrs = {
190
- min: 0,
191
- max: Infinity
192
- };
259
+ attrs = { min: 0, max: Infinity };
193
260
  break;
194
261
  case '+':
195
- attrs = {
196
- min: 1,
197
- max: Infinity
198
- };
262
+ attrs = { min: 1, max: Infinity };
199
263
  break;
200
264
  case '?':
201
- attrs = {
202
- min: 0,
203
- max: 1
204
- };
265
+ attrs = { min: 0, max: 1 };
205
266
  break;
206
267
  default:
207
268
  yield fail();
208
269
  }
209
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
270
+ } else if (yield eat(m`openToken*: <* '{' />`)) {
210
271
  let max;
211
272
  let min = yield eat(m`min$: <*UnsignedInteger />`);
212
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
273
+
274
+ if (yield eatMatch(m`separator$: <* ',' />`)) {
213
275
  max = yield eatMatch(m`max$: <*UnsignedInteger />`);
214
276
  }
215
- min = min && ctx.sourceTextFor(min);
216
- max = max && ctx.sourceTextFor(max);
277
+
278
+ min = min && printSource(min.node);
279
+ max = max && printSource(max.node);
280
+
217
281
  min = min && parseInt(min, 10);
218
282
  max = max && parseInt(max, 10);
219
- attrs = {
220
- min,
221
- max
222
- };
223
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
283
+
284
+ attrs = { min, max };
285
+
286
+ yield eat(m`closeToken*: <* '}' />`);
224
287
  }
288
+
225
289
  yield defineAttribute('min', attrs.min);
226
290
  yield defineAttribute('max', attrs.max);
227
291
  }
292
+
228
293
  *UnsignedInteger() {
229
294
  yield eat(re`/\d+/`);
230
295
  }
296
+
231
297
  *UnsignedHexInteger() {
232
298
  yield eat(re`/[\da-fA-F]+/`);
233
299
  }
234
- *EscapeSequence({
235
- state,
236
- ctx
237
- }) {
238
- const parentSpan = state.span;
239
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
300
+
301
+ *EscapeSequence({ s }) {
302
+ const parentSpan = s().span;
303
+
304
+ yield startSpan('Escape');
305
+
306
+ yield eat(m`escape*: <* '\\' />`);
307
+
240
308
  let m_;
241
309
  let cooked;
242
- if (m_ = yield match(re`/[\\/nrt0]/`)) {
243
- const match_ = ctx.sourceTextFor(m_);
244
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
310
+
311
+ if ((m_ = yield match(re`/[\\/nrt0]/`))) {
312
+ const match_ = printSource(m_);
313
+ yield eat(m`code*: <*Keyword ${buildString(match_)} />`);
314
+
245
315
  cooked = escapables.get(match_) || match_;
246
- } else if (m_ = yield match(getSpecialPattern(parentSpan))) {
247
- cooked = ctx.sourceTextFor(m_);
248
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
316
+ } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
317
+ cooked = printSource(m_);
318
+ yield eat(m`code*: <*Keyword ${buildString(cooked)} />`);
249
319
  } else if (yield match(re`/[ux]/`)) {
250
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
251
- let value = code.get('value');
252
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
320
+ let code = yield eat(m`code*: <EscapeCode />`);
321
+
322
+ let value = get('value', code.node);
323
+
324
+ cooked = String.fromCodePoint(parseInt(printSource(value), 16));
253
325
  } else {
254
326
  yield fail();
255
327
  }
328
+
329
+ yield endSpan();
330
+
256
331
  yield defineAttribute('cooked', cooked);
257
332
  }
333
+
258
334
  *EscapeCode() {
259
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
260
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
261
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
262
- yield eat(m`closeToken: <*Punctuator '}' />`);
335
+ if (yield eatMatch(m`type*: <*Keyword 'u' />`)) {
336
+ if (yield eatMatch(m`openToken*: <* '{' />`)) {
337
+ yield eatMatch(m`value: <*UnsignedHexInteger />`);
338
+ yield eat(m`closeToken*: <* '}' />`);
263
339
  } else {
264
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
265
- yield eat(m`closeToken: null`);
340
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
266
341
  }
267
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
268
- yield eat(m`openToken: null`);
269
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
270
- yield eat(m`closeToken: null`);
342
+ } else if (yield eatMatch(m`type*: <*Keyword 'x' />`)) {
343
+ yield eat(m`value: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
271
344
  }
272
345
  }
346
+
273
347
  *Digits() {
274
348
  while (yield eatMatch(m`<*Digit />`));
275
349
  }
350
+
276
351
  *Digit() {
277
352
  yield eat(re`/\d/`);
278
353
  }
279
- *Any() {}
280
- *Keyword() {}
281
- *Punctuator() {}
282
- };
354
+ };
355
+
356
+ export default { canonicalURL, dependencies, grammar, defaultMatcher };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bablr/language-en-regex-vm-pattern",
3
- "version": "0.10.0",
3
+ "version": "0.12.0",
4
4
  "description": "A BABLR language for nonbacktracking JS-style regexes",
5
5
  "engines": {
6
6
  "node": ">=12.0.0"
@@ -15,25 +15,19 @@
15
15
  ],
16
16
  "sideEffects": false,
17
17
  "scripts": {
18
- "build": "macrome build",
19
- "watch": "macrome watch",
20
- "clean": "macrome clean",
21
18
  "test": "mocha"
22
19
  },
23
20
  "dependencies": {
24
- "@babel/runtime": "^7.23.2",
25
- "@bablr/boot": "0.9.0",
26
- "@bablr/helpers": "0.23.0",
27
- "@bablr/agast-helpers": "0.8.0",
28
- "@bablr/agast-vm-helpers": "0.8.0",
21
+ "@bablr/boot": "0.11.0",
22
+ "@bablr/helpers": "0.25.0",
23
+ "@bablr/agast-helpers": "0.10.0",
24
+ "@bablr/agast-vm-helpers": "0.10.0",
29
25
  "iter-tools-es": "7.5.3"
30
26
  },
31
27
  "devDependencies": {
32
28
  "@bablr/eslint-config-base": "github:bablr-lang/eslint-config-base#c97bfa4b3663f8378e9b3e42bb5a41e685406cf9",
33
- "@bablr/macrome": "^0.1.3",
34
- "@bablr/macrome-generator-bablr": "^0.3.2",
35
29
  "@qnighy/dedent": "0.1.1",
36
- "bablr": "^0.7.0",
30
+ "bablr": "^0.11.0",
37
31
  "enhanced-resolve": "^5.12.0",
38
32
  "eslint": "^8.47.0",
39
33
  "eslint-import-resolver-enhanced-resolve": "^1.0.5",
@@ -48,7 +42,10 @@
48
42
  "english",
49
43
  "regex"
50
44
  ],
51
- "repository": "git@github.com:bablr-lang/language-en-regex-vm-pattern.git",
45
+ "repository": {
46
+ "type": "git",
47
+ "url": "git+ssh://git@github.com/bablr-lang/language-en-regex-vm-pattern.git"
48
+ },
52
49
  "homepage": "https://github.com/bablr-lang/language-en-regex-vm-pattern",
53
50
  "author": "Conrad Buck <conartist6@gmail.com>",
54
51
  "license": "MIT"
@@ -1,399 +0,0 @@
1
- import { re, spam as m } from '@bablr/boot';
2
- import {
3
- Node,
4
- CoveredBy,
5
- InjectFrom,
6
- UndefinedAttributes,
7
- AllowEmpty,
8
- Literal,
9
- } from '@bablr/helpers/decorators';
10
- import objectEntries from 'iter-tools-es/methods/object-entries';
11
- import * as Shared from '@bablr/helpers/productions';
12
- import {
13
- eat,
14
- eatMatch,
15
- match,
16
- shiftMatch,
17
- guard,
18
- defineAttribute,
19
- fail,
20
- o,
21
- } from '@bablr/helpers/grammar';
22
- import { buildString, buildBoolean } from '@bablr/helpers/builders';
23
-
24
- export const canonicalURL = 'https://bablr.org/languages/core/en/bablr-regex-pattern';
25
-
26
- export const dependencies = {};
27
-
28
- const escapables = new Map(
29
- objectEntries({
30
- n: '\n',
31
- r: '\r',
32
- t: '\t',
33
- 0: '\0',
34
- }),
35
- );
36
-
37
- const flagCharacters = {
38
- global: 'g',
39
- ignoreCase: 'i',
40
- multiline: 'm',
41
- dotAll: 's',
42
- unicode: 'u',
43
- sticky: 'y',
44
- };
45
-
46
- const unique = (flags) => flags.length === new Set(flags).size;
47
-
48
- const getSpecialPattern = (span) => {
49
- if (span === 'Pattern') {
50
- return re`/[*+?{}[\]().^$|\n\\<>]/`;
51
- } else if (span === 'CharacterClass') {
52
- return re`/[\]\\]/`;
53
- } else {
54
- throw new Error('unknown span type for special pattern');
55
- }
56
- };
57
-
58
- export const grammar = class RegexGrammar {
59
- @Node
60
- *Pattern() {
61
- yield eat(m`openToken: <*Punctuator '/' { balanced: '/', balancedSpan: 'Pattern' } />`);
62
- yield eat(m`<_Alternatives />`);
63
- yield eat(m`closeToken: <*Punctuator '/' { balancer: true } />`);
64
- yield eat(m`flags$: <Flags />`);
65
- }
66
-
67
- @UndefinedAttributes(Object.keys(flagCharacters))
68
- @AllowEmpty
69
- @Node
70
- *Flags({ ctx }) {
71
- const flags = yield match(re`/[gimsuy]+/`);
72
-
73
- const flagsStr = ctx.sourceTextFor(flags) || '';
74
-
75
- if (flagsStr && !unique(flagsStr)) throw new Error('flags must be unique');
76
-
77
- for (const { 0: name, 1: chr } of Object.entries(flagCharacters)) {
78
- if (flagsStr.includes(chr)) {
79
- yield defineAttribute(name, true);
80
- } else {
81
- yield defineAttribute(name, false);
82
- }
83
- }
84
-
85
- for (const flagChr of flagsStr) {
86
- yield eat(m`tokens[]: <*Keyword ${buildString(flagChr)} />`);
87
- }
88
- }
89
-
90
- @AllowEmpty
91
- *Alternatives() {
92
- do {
93
- yield eat(m`alternatives[]$: <Alternative />`);
94
- } while (yield eatMatch(m`separatorTokens[]: <*Punctuator '|' />`));
95
- }
96
-
97
- @AllowEmpty
98
- @Node
99
- *Alternative() {
100
- yield eat(m`elements[]+$: <_Elements />`);
101
- }
102
-
103
- @AllowEmpty
104
- *Elements() {
105
- yield eat(m`.[]: []`);
106
- while (yield match(re`/[^|]/`)) {
107
- yield eat(m`.[]+: <__Element />`);
108
- }
109
- }
110
-
111
- *Element() {
112
- yield guard(m`<*Keyword /[*+?]/ />`);
113
-
114
- yield eat(m`<_Any />`, [
115
- m`<CharacterClass '[' />`,
116
- m`<Group '(?:' />`,
117
- m`<_Assertion /[$^]|\\b/i />`,
118
- m`<Gap '\\g' />`,
119
- m`<_CharacterSet /\.|\\[dswp]/i />`,
120
- m`<*Character />`,
121
- ]);
122
-
123
- if (yield match(re`/[*+?{]/`)) {
124
- return shiftMatch(m`<Quantifier />`);
125
- }
126
- }
127
-
128
- @CoveredBy('Element')
129
- @Node
130
- *Group() {
131
- yield eat(m`openToken: <*Punctuator '(?:' { balanced: ')' } />`);
132
- yield eat(m`<_Alternatives />`);
133
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
134
- }
135
-
136
- @Node
137
- *CapturingGroup() {
138
- yield eat(m`openToken: <*Punctuator '(' { balanced: ')' } />`);
139
- yield eat(m`<_Alternatives />`);
140
- yield eat(m`closeToken: <*Punctuator ')' { balancer: true } />`);
141
- }
142
-
143
- @CoveredBy('Element')
144
- *Assertion() {
145
- yield eat(m`<_Any />`, [
146
- m`<*StartOfInputAssertion '^' />`,
147
- m`<*EndOfInputAssertion '$' />`,
148
- m`<*WordBoundaryAssertion /\\b/i />`,
149
- ]);
150
- }
151
-
152
- @CoveredBy('Assertion')
153
- @Node
154
- *StartOfInputAssertion() {
155
- yield eat(m`sigilToken: <*Keyword '^' />`);
156
- }
157
-
158
- @CoveredBy('Assertion')
159
- @Node
160
- *EndOfInputAssertion() {
161
- yield eatMatch(m`sigilToken: <*Keyword '$' />`);
162
- }
163
-
164
- @UndefinedAttributes(['negate'])
165
- @CoveredBy('Assertion')
166
- @Node
167
- *WordBoundaryAssertion({ ctx }) {
168
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
169
- const m_ = yield eat(m`value: <*Keyword /b/i />`);
170
- yield defineAttribute('negate', buildBoolean(ctx.sourceTextFor(m_) === 'B'));
171
- }
172
-
173
- @CoveredBy('Assertion')
174
- @Node
175
- *Gap() {
176
- yield eatMatch(m`escapeToken: <*Punctuator '\\' />`);
177
- yield eat(m`value: <*Keyword 'g' />`);
178
- }
179
-
180
- @CoveredBy('Element')
181
- @CoveredBy('CharacterClassElement')
182
- @Node
183
- *Character() {
184
- if (yield match('\\')) {
185
- yield eat(m`@: <EscapeSequence />`);
186
- } else {
187
- yield eat(re`/[^\r\n\t]/`);
188
- }
189
- }
190
-
191
- @UndefinedAttributes(['negate'])
192
- @CoveredBy('Element')
193
- @Node
194
- *CharacterClass() {
195
- yield eat(m`openToken: <*Punctuator '[' { balancedSpan: 'CharacterClass', balanced: ']' } />`);
196
-
197
- let negate = yield eatMatch(m`negateToken: <*Keyword '^' />`, null, o({ bind: true }));
198
-
199
- yield defineAttribute('negate', !!negate);
200
-
201
- while (yield match(re`/./s`)) {
202
- yield eat(m`elements[]+$: <__CharacterClassElement />`);
203
- }
204
-
205
- yield eat(m`closeToken: <*Punctuator ']' { balancer: true } />`);
206
- }
207
-
208
- *CharacterClassElement() {
209
- yield eat(m`<_Any />`, [m`<_CharacterSet /\\[dswp]/i />`, m`<Gap '\\g' />`, m`<*Character />`]);
210
-
211
- if (yield match('-')) {
212
- return shiftMatch(m`<CharacterClassRange />`);
213
- }
214
- }
215
-
216
- @CoveredBy('CharacterClassElement')
217
- @Node
218
- *CharacterClassRange() {
219
- yield eat(m`min+$: <*Character />`);
220
- yield eat(m`sigilToken: <*Punctuator '-' />`);
221
- yield eat(m`max+$: <*Character />`);
222
- }
223
-
224
- @CoveredBy('Element')
225
- *CharacterSet() {
226
- yield eat(m`<_Any />`, [
227
- m`<AnyCharacterSet '.' />`,
228
- m`<DigitCharacterSet /\\[dD]/ />`,
229
- m`<SpaceCharacterSet /\\[sS]/ />`,
230
- m`<WordCharacterSet /\\[wW]/ />`,
231
- ]);
232
- }
233
-
234
- @CoveredBy('Element')
235
- @CoveredBy('CharacterSet')
236
- @Node
237
- *AnyCharacterSet() {
238
- yield eat(m`sigilToken: <*Keyword '.' />`);
239
- }
240
-
241
- @UndefinedAttributes(['negate'])
242
- @CoveredBy('Element')
243
- @CoveredBy('CharacterSet')
244
- @Node
245
- *DigitCharacterSet({ ctx }) {
246
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
247
-
248
- let code = yield eat(m`value: <*Keyword /[dD]/ />`);
249
-
250
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'D');
251
- }
252
-
253
- @UndefinedAttributes(['negate'])
254
- @CoveredBy('Element')
255
- @CoveredBy('CharacterSet')
256
- @Node
257
- *SpaceCharacterSet({ ctx }) {
258
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
259
-
260
- let code = yield eat(m`value: <*Keyword /[sS]/ />`);
261
-
262
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'S');
263
- }
264
-
265
- @UndefinedAttributes(['negate'])
266
- @CoveredBy('Element')
267
- @CoveredBy('CharacterSet')
268
- @Node
269
- *WordCharacterSet({ ctx }) {
270
- yield eat(m`escapeToken: <*Punctuator '\\' />`);
271
-
272
- let code = yield eat(m`value: <*Keyword /[wW]/ />`);
273
-
274
- yield defineAttribute('negate', ctx.sourceTextFor(code) === 'W');
275
- }
276
-
277
- @UndefinedAttributes(['min', 'max'])
278
- @Node
279
- *Quantifier({ ctx }) {
280
- yield eat(m`element+$: <__Element />`);
281
-
282
- let attrs, sigil;
283
-
284
- if ((sigil = yield eatMatch(m`sigilToken: <*Keyword /[*+?]/ />`))) {
285
- switch (ctx.sourceTextFor(sigil)) {
286
- case '*':
287
- attrs = { min: 0, max: Infinity };
288
- break;
289
- case '+':
290
- attrs = { min: 1, max: Infinity };
291
- break;
292
- case '?':
293
- attrs = { min: 0, max: 1 };
294
- break;
295
- default:
296
- yield fail();
297
- }
298
- } else if (yield eat(m`openToken: <*Punctuator '{' { balanced: '}' } />`)) {
299
- let max;
300
- let min = yield eat(m`min$: <*UnsignedInteger />`);
301
-
302
- if (yield eatMatch(m`separator: <*Punctuator ',' />`)) {
303
- max = yield eatMatch(m`max$: <*UnsignedInteger />`);
304
- }
305
-
306
- min = min && ctx.sourceTextFor(min);
307
- max = max && ctx.sourceTextFor(max);
308
-
309
- min = min && parseInt(min, 10);
310
- max = max && parseInt(max, 10);
311
-
312
- attrs = { min, max };
313
-
314
- yield eat(m`closeToken: <*Punctuator '}' { balancer: true } />`);
315
- }
316
-
317
- yield defineAttribute('min', attrs.min);
318
- yield defineAttribute('max', attrs.max);
319
- }
320
-
321
- @Node
322
- *UnsignedInteger() {
323
- yield eat(re`/\d+/`);
324
- }
325
-
326
- @Node
327
- *UnsignedHexInteger() {
328
- yield eat(re`/[\da-fA-F]+/`);
329
- }
330
-
331
- @Node
332
- *EscapeSequence({ state, ctx }) {
333
- const parentSpan = state.span;
334
-
335
- yield eat(m`escape: <*Punctuator '\\' { openSpan: 'Escape' } />`);
336
-
337
- let m_;
338
- let cooked;
339
-
340
- if ((m_ = yield match(re`/[\\/nrt0]/`))) {
341
- const match_ = ctx.sourceTextFor(m_);
342
- yield eat(m`code: <*Keyword ${buildString(match_)} { closeSpan: 'Escape' } />`);
343
-
344
- cooked = escapables.get(match_) || match_;
345
- } else if ((m_ = yield match(getSpecialPattern(parentSpan)))) {
346
- cooked = ctx.sourceTextFor(m_);
347
- yield eat(m`code: <*Keyword ${buildString(cooked)} { closeSpan: 'Escape' } />`);
348
- } else if (yield match(re`/[ux]/`)) {
349
- let code = yield eat(m`code: <EscapeCode { closeSpan: 'Escape' } />`);
350
-
351
- let value = code.get('value');
352
-
353
- cooked = String.fromCodePoint(parseInt(ctx.sourceTextFor(value), 16));
354
- } else {
355
- yield fail();
356
- }
357
-
358
- yield defineAttribute('cooked', cooked);
359
- }
360
-
361
- @Node
362
- *EscapeCode() {
363
- if (yield eatMatch(m`type: <*Keyword 'u' />`)) {
364
- if (yield eatMatch(m`openToken: <*Punctuator '{' />`)) {
365
- yield eatMatch(m`value$: <*UnsignedHexInteger />`);
366
- yield eat(m`closeToken: <*Punctuator '}' />`);
367
- } else {
368
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{4}/ />`);
369
- yield eat(m`closeToken: null`);
370
- }
371
- } else if (yield eatMatch(m`type: <*Keyword 'x' />`)) {
372
- yield eat(m`openToken: null`);
373
- yield eat(m`value$: <*UnsignedHexInteger /[\da-fA-F]{2}/ />`);
374
- yield eat(m`closeToken: null`);
375
- }
376
- }
377
-
378
- *Digits() {
379
- while (yield eatMatch(m`<*Digit />`));
380
- }
381
-
382
- @Node
383
- *Digit() {
384
- yield eat(re`/\d/`);
385
- }
386
-
387
- @InjectFrom(Shared)
388
- *Any() {}
389
-
390
- @Literal
391
- @Node
392
- @InjectFrom(Shared)
393
- *Keyword() {}
394
-
395
- @Literal
396
- @Node
397
- @InjectFrom(Shared)
398
- *Punctuator() {}
399
- };