@bablr/boot 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,12 @@
1
1
  const when = require('iter-tools-es/methods/when');
2
- const { escapables } = require('./string.js');
2
+ const { escapables } = require('./cstml.js');
3
3
  const { buildCovers } = require('../utils.js');
4
4
  const { node } = require('../symbols.js');
5
5
 
6
6
  const name = 'Regex';
7
7
 
8
+ const canonicalURL = 'https://bablr.org/languages/core/bablr-regex-pattern';
9
+
8
10
  const dependencies = {};
9
11
 
10
12
  const covers = buildCovers({
@@ -19,14 +21,30 @@ const covers = buildCovers({
19
21
  'Character',
20
22
  'CharacterClass',
21
23
  'CharacterClassRange',
22
- 'CharacterSet',
24
+ 'AnyCharacterSet',
25
+ 'WordCharacterSet',
26
+ 'SpaceCharacterSet',
27
+ 'DigitCharacterSet',
23
28
  'Quantifier',
24
29
  'Punctuator',
25
30
  'Keyword',
26
31
  'Escape',
27
32
  'Number',
33
+ 'Gap',
34
+ ],
35
+ Assertion: ['StartOfInputAssertion', 'EndOfInputAssertion', 'WordBoundaryAssertion'],
36
+ Element: [
37
+ 'CharacterClass',
38
+ 'Group',
39
+ 'CapturingGroup',
40
+ 'Assertion',
41
+ 'CharacterSet',
42
+ 'Gap',
43
+ 'Character',
44
+ 'Quantifier',
28
45
  ],
29
- CharacterClassElement: ['CharacterClassRange', 'Character'],
46
+ CharacterClassElement: ['CharacterClassRange', 'CharacterSet', 'Character', 'Gap'],
47
+ CharacterSet: ['AnyCharacterSet', 'WordCharacterSet', 'SpaceCharacterSet', 'DigitCharacterSet'],
30
48
  });
31
49
 
32
50
  const flags = {
@@ -50,9 +68,9 @@ const getSpecialPattern = (span) => {
50
68
  if (type === 'Bare') {
51
69
  return /[*+{}\[\]()\.^$|\\\n\/]/y;
52
70
  } else if (type === 'CharacterClass') {
53
- return /[\]\\\.]/y;
71
+ return /[\]\\]/y;
54
72
  } else if (type === 'CharacterClass:First') {
55
- return /[\]^\\\.]/y;
73
+ return /[\]^\\]/y;
56
74
  } else if (type === 'Quantifier') {
57
75
  return /[{}]/;
58
76
  } else {
@@ -150,6 +168,8 @@ const grammar = class RegexMiniparserGrammar {
150
168
  p.eatProduction('Assertion');
151
169
  } else if (p.match(/\.|\\[dswp]/iy)) {
152
170
  p.eatProduction('CharacterSet');
171
+ } else if (p.match('\\g')) {
172
+ p.eatProduction('Gap');
153
173
  } else {
154
174
  p.eatProduction('Character');
155
175
  }
@@ -173,20 +193,37 @@ const grammar = class RegexMiniparserGrammar {
173
193
  p.eat(')', PN, { path: 'close', balancer: true });
174
194
  }
175
195
 
176
- // @Node
177
196
  Assertion(p) {
178
- let attrs = {};
179
- if (p.eatMatch('^', PN, { path: 'value' })) {
180
- attrs = { kind: 'start' };
181
- } else if (p.eatMatch('$', KW, { path: 'value' })) {
182
- attrs = { kind: 'end' };
197
+ if (p.match('^')) {
198
+ p.eatProduction('StartOfInputAssertion');
199
+ } else if (p.match('$')) {
200
+ p.eatProduction('EndOfInputAssertion');
201
+ } else if (p.match(/\\b/iy)) {
202
+ p.eatProduction('WordBoundaryAssertion');
203
+ }
204
+ }
205
+
206
+ // @CoveredBy('Assertion')
207
+ // @Node
208
+ StartOfInputAssertion(p) {
209
+ p.eat('^', KW, { path: 'value' });
210
+ }
211
+
212
+ // @CoveredBy('Assertion')
213
+ // @Node
214
+ EndOfInputAssertion(p) {
215
+ p.eat('$', KW, { path: 'value' });
216
+ }
217
+
218
+ // @CoveredBy('Assertion')
219
+ // @Node
220
+ WordBoundaryAssertion(p) {
221
+ let attrs;
222
+ if (p.eatMatch('\\', ESC, { path: 'escape' })) {
223
+ const m = p.eat(/b/iy, KW, { path: 'value' });
224
+ attrs = { negate: m === 'B' };
183
225
  } else {
184
- if (p.eatMatch('\\', ESC, { path: 'escape' })) {
185
- const m = p.eat(/b/iy, KW, { path: 'value' });
186
- attrs = { kind: 'word', negate: m === 'B' };
187
- } else {
188
- throw new Error('invalid boundary');
189
- }
226
+ throw new Error('invalid boundary');
190
227
  }
191
228
  return { attrs };
192
229
  }
@@ -236,13 +273,21 @@ const grammar = class RegexMiniparserGrammar {
236
273
  CharacterClassElement(p, { first }) {
237
274
  if (p.match(/.-[^\]\n]/y)) {
238
275
  p.eatProduction('CharacterClassRange', undefined, { first });
239
- } else if (p.match(/\.|\\[dswp]/iy)) {
276
+ } else if (p.match(/\\[dswp]/iy)) {
240
277
  p.eatProduction('CharacterSet');
278
+ } else if (p.match('\\g')) {
279
+ p.eatProduction('Gap');
241
280
  } else {
242
281
  p.eatProduction('Character', when(first, { span: 'CharacterClass:First' }));
243
282
  }
244
283
  }
245
284
 
285
+ // @Node
286
+ Gap(p) {
287
+ p.eat('\\', PN, { path: 'escape' });
288
+ p.eat('g', KW, { path: 'value' });
289
+ }
290
+
246
291
  // @Node
247
292
  CharacterClassRange(p, { first }) {
248
293
  p.eatProduction('Character', {
@@ -253,32 +298,75 @@ const grammar = class RegexMiniparserGrammar {
253
298
  p.eatProduction('Character', { path: 'max' });
254
299
  }
255
300
 
256
- // @Node
257
301
  CharacterSet(p) {
258
- if (p.eatMatch('.', KW, { path: 'value' })) {
259
- return { attrs: { kind: 'any' } };
302
+ let attrs;
303
+
304
+ if (p.match('.')) {
305
+ p.eatProduction('AnyCharacterSet');
306
+ } else if (p.match(/\\[dD]/y)) {
307
+ p.eatProduction('DigitCharacterSet');
308
+ } else if (p.match(/\\[sS]/y)) {
309
+ p.eatProduction('SpaceCharacterSet');
310
+ } else if (p.match(/\\[wW]/y)) {
311
+ p.eatProduction('WordCharacterSet');
312
+ } else if (p.match(/p/iy)) {
313
+ throw new Error('unicode property character sets are not supported yet');
314
+ } else {
315
+ throw new Error('unknown character set kind');
260
316
  }
261
317
 
318
+ return { attrs };
319
+ }
320
+
321
+ // @CoveredBy('CharacterSet')
322
+ // @Node
323
+ AnyCharacterSet(p) {
324
+ p.eat('.', KW, { path: 'value' });
325
+ }
326
+
327
+ // @CoveredBy('CharacterSet')
328
+ // @Node
329
+ WordCharacterSet(p) {
330
+ p.eat('\\', PN, { path: 'escape' });
331
+
332
+ let attrs;
333
+
334
+ if (p.eatMatch('w', KW, { path: 'value' })) {
335
+ //continue
336
+ } else if (p.eatMatch('W', KW, { path: 'value' })) {
337
+ attrs = { negate: true };
338
+ }
339
+
340
+ return { attrs };
341
+ }
342
+
343
+ // @CoveredBy('CharacterSet')
344
+ // @Node
345
+ SpaceCharacterSet(p) {
346
+ p.eat('\\', PN, { path: 'escape' });
347
+
348
+ let attrs;
349
+
350
+ if (p.eatMatch('s', KW, { path: 'value' })) {
351
+ //continue
352
+ } else if (p.eatMatch('S', KW, { path: 'value' })) {
353
+ attrs = { negate: true };
354
+ }
355
+
356
+ return { attrs };
357
+ }
358
+
359
+ // @CoveredBy('CharacterSet')
360
+ // @Node
361
+ DigitCharacterSet(p) {
262
362
  p.eat('\\', PN, { path: 'escape' });
263
363
 
264
364
  let attrs;
265
365
 
266
366
  if (p.eatMatch('d', KW, { path: 'value' })) {
267
- attrs = { kind: 'digit' };
367
+ //continue
268
368
  } else if (p.eatMatch('D', KW, { path: 'value' })) {
269
- attrs = { kind: 'digit', negate: true };
270
- } else if (p.eatMatch('s', KW, { path: 'value' })) {
271
- attrs = { kind: 'space' };
272
- } else if (p.eatMatch('S', KW, { path: 'value' })) {
273
- attrs = { kind: 'space', negate: true };
274
- } else if (p.eatMatch('w', KW, { path: 'value' })) {
275
- attrs = { kind: 'word' };
276
- } else if (p.eatMatch('W', KW, { path: 'value' })) {
277
- attrs = { kind: 'word', negate: true };
278
- } else if (p.match(/p/iy)) {
279
- throw new Error('unicode property character sets are not supported yet');
280
- } else {
281
- throw new Error('unknown character set kind');
369
+ attrs = { negate: true };
282
370
  }
283
371
 
284
372
  return { attrs };
@@ -319,4 +407,4 @@ const grammar = class RegexMiniparserGrammar {
319
407
  }
320
408
  };
321
409
 
322
- module.exports = { name, dependencies, covers, grammar, cookEscape };
410
+ module.exports = { name, canonicalURL, dependencies, covers, grammar, cookEscape };
@@ -1,39 +1,34 @@
1
1
  const Regex = require('./regex.js');
2
- const StringLanguage = require('./string.js');
3
- const Number = require('./number.js');
2
+ const CSTML = require('./cstml.js');
4
3
  const { buildCovers } = require('../utils.js');
5
4
  const sym = require('../symbols.js');
6
5
 
7
6
  const _ = /\s+/y;
8
7
  const PN = 'Punctuator';
9
- const KW = 'Keyword';
10
8
  const ID = 'Identifier';
11
9
  const LIT = 'Literal';
12
10
 
13
11
  const name = 'Spamex';
14
12
 
15
- const dependencies = { Regex, String: StringLanguage, Number };
13
+ const canonicalURL = 'https://bablr.org/languages/core/spamex';
14
+
15
+ const dependencies = { CSTML, Regex };
16
16
 
17
17
  const covers = buildCovers({
18
- [sym.node]: ['Attribute', 'TagType', 'Matcher', 'Literal'],
18
+ [sym.node]: ['Attribute', 'Identifier', 'Matcher', 'Literal', 'CSTML:Flags'],
19
19
  Attribute: ['MappingAttribute', 'BooleanAttribute'],
20
- AttributeValue: ['String:String', 'Number:Number'],
21
- Matcher: ['NodeMatcher', 'TerminalMatcher', 'TriviaTerminalMatcher', 'StringMatcher'],
22
- StringMatcher: ['String:String', 'Regex:Pattern'],
23
- TagType: ['Identifier', 'GlobalIdentifier'],
20
+ AttributeValue: ['CSTML:String', 'CSTML:Number'],
21
+ Matcher: ['NodeMatcher', 'StringMatcher'],
22
+ StringMatcher: ['CSTML:String', 'Regex:Pattern'],
24
23
  });
25
24
 
26
25
  const grammar = class SpamexMiniparserGrammar {
27
26
  // @Cover
28
27
  Matcher(p) {
29
- if (p.match('<| |>')) {
30
- p.eatProduction('TriviaTerminalMatcher');
31
- } else if (p.match(/<(?:\w|$)/y)) {
28
+ if (p.match(/<(?:[*#@+]*[ \t]*)?(?:\w|$)/y)) {
32
29
  p.eatProduction('NodeMatcher');
33
- } else if (p.match('<|')) {
34
- p.eatProduction('TerminalMatcher');
35
30
  } else if (p.match(/['"]/y)) {
36
- p.eatProduction('String:String');
31
+ p.eatProduction('CSTML:String');
37
32
  } else if (p.match('/')) {
38
33
  p.eatProduction('Regex:Pattern');
39
34
  } else {
@@ -41,48 +36,40 @@ const grammar = class SpamexMiniparserGrammar {
41
36
  }
42
37
  }
43
38
 
44
- // @Node
45
- TriviaTerminalMatcher(p) {
46
- p.eat('<|', PN, { path: 'open', startSpan: 'Tag', balanced: '|>' });
47
- p.eat(' ', KW, { path: 'value' });
48
- p.eat('|>', PN, { path: 'close', endSpan: 'Tag', balancer: true });
49
- }
50
-
51
39
  // @Node
52
40
  NodeMatcher(p) {
53
41
  p.eat('<', PN, { path: 'open', startSpan: 'Tag', balanced: '>' });
54
- p.eatProduction('TagType', { path: 'type' });
55
42
 
56
- let sp = p.eatMatchTrivia(_);
43
+ let tr = p.eatMatch('#', PN, { path: 'triviaFlag' });
44
+ let tok = p.eatMatch('*', PN, { path: 'tokenFlag' });
45
+ let esc = p.eatMatch('@', PN, { path: 'escapeFlag' });
46
+ let exp = p.eatMatch('+', PN, { path: 'expressionFlag' });
57
47
 
58
- if ((sp && p.match(/\w+/y)) || p.atExpression) {
59
- p.eatProduction('Attributes', { path: 'attributes[]' });
60
- sp = p.eatMatchTrivia(_);
61
- }
48
+ if ((tr && esc) || (exp && (tr || esc))) throw new Error();
62
49
 
63
- p.eatMatchTrivia(_);
64
- p.eat('>', PN, { path: 'close', endSpan: 'Tag', balancer: true });
65
- }
50
+ if (p.match(/\w+:/y)) {
51
+ p.eat(/\w+/y, ID, { path: 'language' });
52
+ p.eat(':', PN, { path: 'namespaceOperator' });
53
+ p.eat(/\w+/y, ID, { path: 'type' });
54
+ } else {
55
+ p.eat(/\w+/y, ID, { path: 'type' });
56
+ }
66
57
 
67
- // @Node
68
- TerminalMatcher(p) {
69
- p.eat('<|', PN, { path: 'open', startSpan: 'Tag', balanced: '|>' });
70
- p.eatMatchTrivia(_);
71
- p.eatProduction('TagType', { path: 'type' });
72
58
  let sp = p.eatMatchTrivia(_);
73
59
 
74
- if (sp && (p.match(/['"/]/y) || p.atExpression)) {
75
- p.eatProduction('StringMatcher', { path: 'value' });
60
+ if (tok && sp && (p.match(/['"/]/y) || p.atExpression)) {
61
+ p.eatProduction('StringMatcher', { path: 'intrinsicValue' });
62
+
76
63
  sp = p.eatMatchTrivia(_);
77
64
  }
78
65
 
79
- if (sp && (p.match(/\w+/y) || p.atExpression)) {
66
+ if ((sp && p.match(/\w+/y)) || p.atExpression) {
80
67
  p.eatProduction('Attributes', { path: 'attributes[]' });
81
68
  sp = p.eatMatchTrivia(_);
82
69
  }
83
70
 
84
71
  p.eatMatchTrivia(_);
85
- p.eat('|>', PN, { path: 'close', endSpan: 'Tag', balancer: true });
72
+ p.eat('>', PN, { path: 'close', endSpan: 'Tag', balancer: true });
86
73
  }
87
74
 
88
75
  Attributes(p) {
@@ -121,32 +108,16 @@ const grammar = class SpamexMiniparserGrammar {
121
108
  // @Cover
122
109
  AttributeValue(p) {
123
110
  if (p.match(/['"]/y)) {
124
- p.eatProduction('String:String');
111
+ p.eatProduction('CSTML:String');
125
112
  } else if (p.match(/-|\d/y)) {
126
- p.eatProduction('Number:Number');
127
- }
128
- }
129
-
130
- // @Cover
131
- TagType(p) {
132
- if (p.match(/\w+:/y)) {
133
- p.eatProduction('GlobalIdentifier');
134
- } else {
135
- p.eat(/\w+/y, ID, { path: 'type' });
113
+ p.eatProduction('CSTML:Number');
136
114
  }
137
115
  }
138
116
 
139
- // @Node
140
- GlobalIdentifier(p) {
141
- p.eat(/\w+/y, ID, { path: 'language' });
142
- p.eat(':', PN, { path: 'namespaceOperator' });
143
- p.eat(/\w+/y, ID, { path: 'type' });
144
- }
145
-
146
117
  // @Cover
147
118
  StringMatcher(p) {
148
119
  if (p.match(/['"]/y)) {
149
- p.eatProduction('String:String');
120
+ p.eatProduction('CSTML:String');
150
121
  } else {
151
122
  p.eatProduction('Regex:Pattern');
152
123
  }
@@ -158,4 +129,4 @@ const grammar = class SpamexMiniparserGrammar {
158
129
  }
159
130
  };
160
131
 
161
- module.exports = { name, dependencies, covers, grammar };
132
+ module.exports = { name, canonicalURL, dependencies, covers, grammar };
package/lib/miniparser.js CHANGED
@@ -164,10 +164,10 @@ class TemplateParser {
164
164
  for (const { 0: key, 1: property } of Object.entries(result.properties)) {
165
165
  if (isArray(property)) {
166
166
  for (const value of property) {
167
- set(properties, { pathName: key, pathIsArray: true }, value);
167
+ set(properties, { name: key, isArray: true }, value);
168
168
  }
169
169
  } else {
170
- set(properties, { pathName: key, pathIsArray: false }, property);
170
+ set(properties, { name: key, isArray: false }, property);
171
171
  }
172
172
  }
173
173
  }
@@ -282,16 +282,16 @@ class TemplateParser {
282
282
  throw new Error();
283
283
  }
284
284
 
285
- const { pathIsArray, pathName } = lastChild.value;
285
+ const { isArray, name } = lastChild.value;
286
286
 
287
- this.held = pathIsArray ? arrayLast(properties[pathName]) : properties[pathName];
287
+ this.held = isArray ? arrayLast(properties[name]) : properties[name];
288
288
 
289
289
  children.pop();
290
290
 
291
- if (pathIsArray) {
292
- properties[pathName].pop();
291
+ if (isArray) {
292
+ properties[name].pop();
293
293
  } else {
294
- properties[pathName] = null;
294
+ properties[name] = null;
295
295
  }
296
296
 
297
297
  return this.eval(this.buildId(id), attrs, props);
@@ -301,6 +301,8 @@ class TemplateParser {
301
301
  if (!isString(type)) throw new Error('Cannot eat anonymous token');
302
302
  if (!isObject(attrs) || !attrs.path) throw new Error('a node must have a path');
303
303
 
304
+ const { path, ..._attrs } = attrs;
305
+
304
306
  const result = this.matchSticky(pattern, attrs, this);
305
307
 
306
308
  if (!result) throw new Error('miniparser: parsing failed');
@@ -309,11 +311,11 @@ class TemplateParser {
309
311
 
310
312
  this.updateSpans(attrs);
311
313
 
312
- const path = parsePath(attrs.path);
314
+ const path_ = parsePath(attrs.path);
313
315
 
314
- set(this.node.properties, path, buildNode(this.buildId(type), [lit(result)]));
316
+ set(this.node.properties, path_, buildNode(this.buildId(type), [lit(result)], {}, _attrs));
315
317
 
316
- this.node.children.push(ref(path));
318
+ this.node.children.push(ref(path_));
317
319
 
318
320
  return result;
319
321
  }
package/lib/path.js CHANGED
@@ -13,11 +13,11 @@ const buildNode = (id) => {
13
13
  const stripPathBraces = (str) => (str.endsWith('[]') ? str.slice(0, -2) : str);
14
14
 
15
15
  const parsePath = (str) => {
16
- const pathName = stripPathBraces(str);
16
+ const name = stripPathBraces(str);
17
17
 
18
- if (!/^\w+$/.test(pathName)) throw new Error();
18
+ if (!/^\w+$/.test(name)) throw new Error();
19
19
 
20
- return { pathIsArray: pathName !== str, pathName };
20
+ return { isArray: name !== str, name };
21
21
  };
22
22
 
23
23
  class Path {