clarity-pattern-parser 11.3.6 → 11.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -487
- package/package.json +1 -1
- package/src/intellisense/AutoComplete.test.ts +24 -0
- package/src/intellisense/AutoComplete.ts +1 -1
package/README.md
CHANGED
|
@@ -96,110 +96,13 @@ The playground allows you to:
|
|
|
96
96
|
- [Debugging](#debugging)
|
|
97
97
|
- [Error Handling](#error-handling)
|
|
98
98
|
|
|
99
|
-
## Advanced Topics
|
|
100
|
-
|
|
101
|
-
### Custom Patterns
|
|
102
|
-
|
|
103
|
-
You can create custom patterns by extending the base `Pattern` class:
|
|
104
|
-
|
|
105
|
-
```typescript
|
|
106
|
-
import { Pattern } from "clarity-pattern-parser";
|
|
107
|
-
|
|
108
|
-
class CustomPattern extends Pattern {
|
|
109
|
-
constructor(name: string) {
|
|
110
|
-
super(name);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
exec(text: string) {
|
|
114
|
-
// Custom pattern implementation
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
### Performance Tips
|
|
120
|
-
|
|
121
|
-
1. Use `test()` instead of `exec()` when you only need to check if a pattern matches
|
|
122
|
-
2. Cache frequently used patterns
|
|
123
|
-
3. Use `Reference` for recursive patterns instead of direct recursion
|
|
124
|
-
4. Minimize the use of optional patterns in sequences
|
|
125
|
-
5. Use bounded repetition when possible
|
|
126
|
-
|
|
127
|
-
### Debugging
|
|
128
|
-
|
|
129
|
-
Enable debug mode to get detailed information about pattern execution:
|
|
130
|
-
|
|
131
|
-
```typescript
|
|
132
|
-
const result = pattern.exec("some text", true);
|
|
133
|
-
// Debug information will be available in result.debug
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Error Handling
|
|
137
|
-
|
|
138
|
-
Pattern execution returns a `ParseResult` that includes error information:
|
|
139
|
-
|
|
140
|
-
```typescript
|
|
141
|
-
const result = pattern.exec("invalid text");
|
|
142
|
-
if (result.error) {
|
|
143
|
-
console.error(result.error.message);
|
|
144
|
-
console.error(result.error.expected);
|
|
145
|
-
console.error(result.error.position);
|
|
146
|
-
}
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
## Examples
|
|
150
|
-
|
|
151
|
-
### JSON Parser
|
|
152
|
-
```typescript
|
|
153
|
-
const { json } = patterns`
|
|
154
|
-
# Basic JSON grammar
|
|
155
|
-
ws = /\s+/
|
|
156
|
-
string = /"[^"]*"/
|
|
157
|
-
number = /-?\d+(\.\d+)?/
|
|
158
|
-
boolean = "true" | "false"
|
|
159
|
-
null = "null"
|
|
160
|
-
value = string | number | boolean | null | array | object
|
|
161
|
-
array-items = (value, /\s*,\s*/)+
|
|
162
|
-
array = "[" +ws? + array-items? + ws? + "]"
|
|
163
|
-
object-property = string + ws? + ":" + ws? + value
|
|
164
|
-
object-properties = (object-property, /\s*,\s*/ trim)+
|
|
165
|
-
object = "{" + ws? + object-properties? + ws? + "}"
|
|
166
|
-
json = ws? + value + ws?
|
|
167
|
-
`;
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
### HTML Parser
|
|
171
|
-
```typescript
|
|
172
|
-
const { html } = patterns`
|
|
173
|
-
# Basic HTML grammar
|
|
174
|
-
ws = /\s+/
|
|
175
|
-
tag-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
176
|
-
attribute-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
177
|
-
attribute-value = /"[^"]*"/
|
|
178
|
-
value-attribute = attribute-name + "=" + attribute-value
|
|
179
|
-
bool-attribute = attribute-name
|
|
180
|
-
attribute = value-attribute | bool-attribute
|
|
181
|
-
attributes = (attribute, ws)*
|
|
182
|
-
opening-tag = "<" + ws? + tag-name + ws? + attributes? + ">"
|
|
183
|
-
closing-tag = "</" + ws? + tag-name + ws? + ">"
|
|
184
|
-
text = /[^<]+/
|
|
185
|
-
child = text | element
|
|
186
|
-
children = (child, /\s*/)+
|
|
187
|
-
element = opening-tag + children? + closing-tag
|
|
188
|
-
html = ws? + element + ws?
|
|
189
|
-
`;
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
## License
|
|
193
|
-
|
|
194
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
195
|
-
|
|
196
99
|
## Grammar Documentation
|
|
197
100
|
|
|
198
101
|
This document describes the grammar features supported by the Clarity Pattern Parser.
|
|
199
102
|
|
|
200
|
-
|
|
103
|
+
### Basic Patterns
|
|
201
104
|
|
|
202
|
-
|
|
105
|
+
#### Literal Strings
|
|
203
106
|
Define literal string patterns using double quotes:
|
|
204
107
|
```
|
|
205
108
|
name = "John"
|
|
@@ -218,21 +121,21 @@ Escaped characters are supported in literals:
|
|
|
218
121
|
- `\"` - escaped quote
|
|
219
122
|
- `\\` - escaped backslash
|
|
220
123
|
|
|
221
|
-
|
|
124
|
+
#### Regular Expressions
|
|
222
125
|
Define regex patterns using forward slashes:
|
|
223
126
|
```
|
|
224
127
|
name = /\w/
|
|
225
128
|
```
|
|
226
129
|
|
|
227
|
-
|
|
130
|
+
### Pattern Operators
|
|
228
131
|
|
|
229
|
-
|
|
132
|
+
#### Options (|)
|
|
230
133
|
Match one of multiple patterns using the `|` operator. This is used for simple alternatives where order doesn't matter:
|
|
231
134
|
```
|
|
232
135
|
names = john | jane
|
|
233
136
|
```
|
|
234
137
|
|
|
235
|
-
|
|
138
|
+
#### Expression (|)
|
|
236
139
|
Expression patterns also use the `|` operator but are used for defining operator precedence in expressions. The order of alternatives determines precedence, with earlier alternatives having higher precedence. By default, operators are left-associative.
|
|
237
140
|
|
|
238
141
|
Example of an arithmetic expression grammar:
|
|
@@ -248,63 +151,28 @@ mul-div-expression = expression + mul-div-operators + expression
|
|
|
248
151
|
expression = prefix-expression | mul-div-expression | add-sub-expression | postfix-expression
|
|
249
152
|
```
|
|
250
153
|
|
|
251
|
-
|
|
252
|
-
- `prefix-expression` has highest precedence
|
|
253
|
-
- `mul-div-expression` has next highest precedence
|
|
254
|
-
- `add-sub-expression` has next highest precedence
|
|
255
|
-
- `postfix-expression` has lowest precedence
|
|
256
|
-
|
|
257
|
-
To make an operator right-associative, add the `right` keyword:
|
|
258
|
-
```
|
|
259
|
-
expression = prefix-expression | mul-div-expression | add-sub-expression right | postfix-expression
|
|
260
|
-
```
|
|
261
|
-
|
|
262
|
-
### Sequence (+)
|
|
263
|
-
Concatenate patterns in sequence using the `+` operator:
|
|
264
|
-
```
|
|
265
|
-
full-name = first-name + space + last-name
|
|
266
|
-
```
|
|
267
|
-
|
|
268
|
-
### Optional (?)
|
|
269
|
-
Make a pattern optional using the `?` operator:
|
|
270
|
-
```
|
|
271
|
-
full-name = first-name + space + middle-name? + last-name
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
### Not (!)
|
|
275
|
-
Negative lookahead using the `!` operator:
|
|
276
|
-
```
|
|
277
|
-
pattern = !excluded-pattern + actual-pattern
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
-
### Take Until (?->|)
|
|
281
|
-
Match all characters until a specific pattern is found:
|
|
282
|
-
```
|
|
283
|
-
script-text = ?->| "</script"
|
|
284
|
-
```
|
|
285
|
-
|
|
286
|
-
## Repetition
|
|
154
|
+
### Repetition
|
|
287
155
|
|
|
288
|
-
|
|
156
|
+
#### Basic Repeat
|
|
289
157
|
Repeat a pattern one or more times using `+`:
|
|
290
158
|
```
|
|
291
159
|
digits = (digit)+
|
|
292
160
|
```
|
|
293
161
|
|
|
294
|
-
|
|
162
|
+
#### Zero or More
|
|
295
163
|
Repeat a pattern zero or more times using `*`:
|
|
296
164
|
```
|
|
297
165
|
digits = (digit)*
|
|
298
166
|
```
|
|
299
167
|
|
|
300
|
-
|
|
168
|
+
#### Bounded Repetition
|
|
301
169
|
Specify exact repetition counts using curly braces:
|
|
302
170
|
- `{n}` - Exactly n times: `(pattern){3}`
|
|
303
171
|
- `{n,}` - At least n times: `(pattern){1,}`
|
|
304
172
|
- `{,n}` - At most n times: `(pattern){,3}`
|
|
305
173
|
- `{n,m}` - Between n and m times: `(pattern){1,3}`
|
|
306
174
|
|
|
307
|
-
|
|
175
|
+
#### Repetition with Divider
|
|
308
176
|
Repeat patterns with a divider between occurrences:
|
|
309
177
|
```
|
|
310
178
|
digits = (digit, comma){3}
|
|
@@ -315,15 +183,15 @@ Add `trim` keyword to trim the divider from the end:
|
|
|
315
183
|
digits = (digit, comma trim)+
|
|
316
184
|
```
|
|
317
185
|
|
|
318
|
-
|
|
186
|
+
### Imports and Parameters
|
|
319
187
|
|
|
320
|
-
|
|
188
|
+
#### Basic Import
|
|
321
189
|
Import patterns from other files:
|
|
322
190
|
```
|
|
323
191
|
import { pattern-name } from "path/to/file.cpat"
|
|
324
192
|
```
|
|
325
193
|
|
|
326
|
-
|
|
194
|
+
#### Import with Parameters
|
|
327
195
|
Import with custom parameters:
|
|
328
196
|
```
|
|
329
197
|
import { pattern } from "file.cpat" with params {
|
|
@@ -331,7 +199,7 @@ import { pattern } from "file.cpat" with params {
|
|
|
331
199
|
}
|
|
332
200
|
```
|
|
333
201
|
|
|
334
|
-
|
|
202
|
+
#### Parameter Declaration
|
|
335
203
|
Declare parameters that can be passed to the grammar:
|
|
336
204
|
```
|
|
337
205
|
use params {
|
|
@@ -339,7 +207,7 @@ use params {
|
|
|
339
207
|
}
|
|
340
208
|
```
|
|
341
209
|
|
|
342
|
-
|
|
210
|
+
#### Default Parameters
|
|
343
211
|
Specify default values for parameters:
|
|
344
212
|
```
|
|
345
213
|
use params {
|
|
@@ -347,154 +215,16 @@ use params {
|
|
|
347
215
|
}
|
|
348
216
|
```
|
|
349
217
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
The Clarity Pattern Parser allows you to provide your own resolver for handling imports of `.cpat` files. This is useful when you need to load patterns from different sources like a database, network, or custom file system.
|
|
353
|
-
|
|
354
|
-
### Basic Resolver Example
|
|
355
|
-
|
|
356
|
-
```typescript
|
|
357
|
-
import { Grammar } from "clarity-pattern-parser";
|
|
358
|
-
|
|
359
|
-
// Simple in-memory resolver
|
|
360
|
-
const pathMap: Record<string, string> = {
|
|
361
|
-
"first-name.cpat": `first-name = "John"`,
|
|
362
|
-
"space.cpat": `space = " "`
|
|
363
|
-
};
|
|
364
|
-
|
|
365
|
-
const resolver = (resource: string) => {
|
|
366
|
-
return Promise.resolve({
|
|
367
|
-
expression: pathMap[resource],
|
|
368
|
-
resource
|
|
369
|
-
});
|
|
370
|
-
};
|
|
371
|
-
|
|
372
|
-
const patterns = await Grammar.parse(`
|
|
373
|
-
import { first-name } from "first-name.cpat"
|
|
374
|
-
import { space } from "space.cpat"
|
|
375
|
-
last-name = "Doe"
|
|
376
|
-
full-name = first-name + space + last-name
|
|
377
|
-
`, { resolveImport: resolver });
|
|
378
|
-
|
|
379
|
-
const result = patterns["full-name"].exec("John Doe");
|
|
380
|
-
// result.ast.value will be "John Doe"
|
|
381
|
-
```
|
|
382
|
-
|
|
383
|
-
### Resolver with Parameters
|
|
384
|
-
|
|
385
|
-
```typescript
|
|
386
|
-
const spaceExpression = `
|
|
387
|
-
use params { custom-space }
|
|
388
|
-
space = custom-space
|
|
389
|
-
`;
|
|
390
|
-
|
|
391
|
-
const pathMap: Record<string, string> = {
|
|
392
|
-
"space.cpat": spaceExpression
|
|
393
|
-
};
|
|
394
|
-
|
|
395
|
-
const resolver = (resource: string) => {
|
|
396
|
-
return Promise.resolve({
|
|
397
|
-
expression: pathMap[resource],
|
|
398
|
-
resource
|
|
399
|
-
});
|
|
400
|
-
};
|
|
401
|
-
|
|
402
|
-
const patterns = await Grammar.parse(`
|
|
403
|
-
import { space } from "space.cpat" with params {
|
|
404
|
-
custom-space = " "
|
|
405
|
-
}
|
|
406
|
-
last-name = "Doe"
|
|
407
|
-
full-name = first-name + space + last-name
|
|
408
|
-
`, { resolveImport: resolver });
|
|
409
|
-
|
|
410
|
-
const result = patterns["full-name"].exec("John Doe");
|
|
411
|
-
// result.ast.value will be "John Doe"
|
|
412
|
-
```
|
|
413
|
-
|
|
414
|
-
### Resolver with Aliases
|
|
415
|
-
|
|
416
|
-
```typescript
|
|
417
|
-
const pathMap: Record<string, string> = {
|
|
418
|
-
"resource1.cpat": `value = "Value"`,
|
|
419
|
-
"resource2.cpat": `
|
|
420
|
-
use params { param }
|
|
421
|
-
export-value = param
|
|
422
|
-
`
|
|
423
|
-
};
|
|
424
|
-
|
|
425
|
-
const resolver = (resource: string) => {
|
|
426
|
-
return Promise.resolve({
|
|
427
|
-
expression: pathMap[resource],
|
|
428
|
-
resource
|
|
429
|
-
});
|
|
430
|
-
};
|
|
431
|
-
|
|
432
|
-
const patterns = await Grammar.parse(`
|
|
433
|
-
import { value as alias } from "resource1.cpat"
|
|
434
|
-
import { export-value } from "resource2.cpat" with params {
|
|
435
|
-
param = alias
|
|
436
|
-
}
|
|
437
|
-
name = export-value
|
|
438
|
-
`, { resolveImport: resolver });
|
|
439
|
-
|
|
440
|
-
const result = patterns["name"].exec("Value");
|
|
441
|
-
// result.ast.value will be "Value"
|
|
442
|
-
```
|
|
443
|
-
|
|
444
|
-
### Resolver with Default Values
|
|
445
|
-
|
|
446
|
-
```typescript
|
|
447
|
-
const resolver = (_: string) => {
|
|
448
|
-
return Promise.reject(new Error("No Import"));
|
|
449
|
-
};
|
|
450
|
-
|
|
451
|
-
const patterns = await Grammar.parse(`
|
|
452
|
-
use params {
|
|
453
|
-
value = default-value
|
|
454
|
-
}
|
|
455
|
-
default-value = "DefaultValue"
|
|
456
|
-
alias = value
|
|
457
|
-
`, {
|
|
458
|
-
resolveImport: resolver,
|
|
459
|
-
params: [new Literal("value", "Value")]
|
|
460
|
-
});
|
|
461
|
-
|
|
462
|
-
const result = patterns["alias"].exec("Value");
|
|
463
|
-
// result.ast.value will be "Value"
|
|
464
|
-
```
|
|
465
|
-
|
|
466
|
-
### Key Features of Custom Resolvers
|
|
218
|
+
### Decorators
|
|
467
219
|
|
|
468
|
-
|
|
469
|
-
2. **Parameter Support**: Handle parameter passing between imported patterns
|
|
470
|
-
3. **Alias Support**: Support pattern aliasing during import
|
|
471
|
-
4. **Default Values**: Provide default values for parameters
|
|
472
|
-
5. **Error Handling**: Custom error handling for import failures
|
|
473
|
-
6. **Resource Tracking**: Track the origin of imported patterns
|
|
474
|
-
|
|
475
|
-
### Resolver Interface
|
|
476
|
-
|
|
477
|
-
The resolver function should implement the following interface:
|
|
478
|
-
|
|
479
|
-
```typescript
|
|
480
|
-
type Resolver = (resource: string, originResource: string | null) => Promise<{
|
|
481
|
-
expression: string; // The pattern expression to parse
|
|
482
|
-
resource: string; // The resource identifier
|
|
483
|
-
}>;
|
|
484
|
-
```
|
|
485
|
-
|
|
486
|
-
## Decorators
|
|
487
|
-
|
|
488
|
-
Decorators can be applied to patterns using the `@` syntax:
|
|
489
|
-
|
|
490
|
-
### Token Decorator
|
|
220
|
+
#### Token Decorator
|
|
491
221
|
Specify tokens for a pattern:
|
|
492
222
|
```
|
|
493
223
|
@tokens([" "])
|
|
494
224
|
spaces = /\s+/
|
|
495
225
|
```
|
|
496
226
|
|
|
497
|
-
|
|
227
|
+
#### Custom Decorators
|
|
498
228
|
Support for custom decorators with various argument types:
|
|
499
229
|
```
|
|
500
230
|
@decorator() // No arguments
|
|
@@ -502,31 +232,31 @@ Support for custom decorators with various argument types:
|
|
|
502
232
|
@decorator({"prop": value}) // Object argument
|
|
503
233
|
```
|
|
504
234
|
|
|
505
|
-
|
|
235
|
+
### Comments
|
|
506
236
|
Add comments using the `#` symbol:
|
|
507
237
|
```
|
|
508
238
|
# This is a comment
|
|
509
239
|
pattern = "value"
|
|
510
240
|
```
|
|
511
241
|
|
|
512
|
-
|
|
242
|
+
### Pattern References
|
|
513
243
|
Reference other patterns by name:
|
|
514
244
|
```
|
|
515
245
|
pattern1 = "value"
|
|
516
246
|
pattern2 = pattern1
|
|
517
247
|
```
|
|
518
248
|
|
|
519
|
-
|
|
249
|
+
### Pattern Aliasing
|
|
520
250
|
Import patterns with aliases:
|
|
521
251
|
```
|
|
522
252
|
import { original as alias } from "file.cpat"
|
|
523
253
|
```
|
|
524
254
|
|
|
525
|
-
|
|
255
|
+
### String Template Patterns
|
|
526
256
|
|
|
527
257
|
Patterns can be defined inline using string templates. This allows for quick pattern definition and testing without creating separate files.
|
|
528
258
|
|
|
529
|
-
|
|
259
|
+
#### Basic Example
|
|
530
260
|
```typescript
|
|
531
261
|
const { fullName } = patterns`
|
|
532
262
|
first-name = "John"
|
|
@@ -539,7 +269,7 @@ const result = fullName.exec("John Doe");
|
|
|
539
269
|
// result.ast.value will be "John Doe"
|
|
540
270
|
```
|
|
541
271
|
|
|
542
|
-
|
|
272
|
+
#### Complex Example (HTML-like Markup)
|
|
543
273
|
```typescript
|
|
544
274
|
const { body } = patterns`
|
|
545
275
|
tag-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
@@ -564,14 +294,6 @@ result?.ast?.findAll(n => n.name.includes("ws")).forEach(n => n.remove());
|
|
|
564
294
|
// result.ast.value will be "<div><div></div><div></div></div>"
|
|
565
295
|
```
|
|
566
296
|
|
|
567
|
-
### Key Features
|
|
568
|
-
1. Patterns are defined using backticks (`)
|
|
569
|
-
2. Each pattern definition is on a new line
|
|
570
|
-
3. The `patterns` function returns an object with all defined patterns
|
|
571
|
-
4. Patterns can be used immediately after definition
|
|
572
|
-
5. The AST can be manipulated after parsing (e.g., removing spaces)
|
|
573
|
-
6. The `exec` method can take an optional second parameter to enable debug mode
|
|
574
|
-
|
|
575
297
|
## Direct Pattern Usage
|
|
576
298
|
|
|
577
299
|
While the grammar provides a convenient way to define patterns, you can also use the Pattern classes directly for more control and flexibility.
|
|
@@ -636,39 +358,6 @@ const result = expression.exec("a ? b : c");
|
|
|
636
358
|
// result.ast.value will be "a ? b : c"
|
|
637
359
|
```
|
|
638
360
|
|
|
639
|
-
#### Not (Negative Lookahead)
|
|
640
|
-
```typescript
|
|
641
|
-
import { Not, Literal, Sequence } from "clarity-pattern-parser";
|
|
642
|
-
|
|
643
|
-
const notJohn = new Not("not-john", new Literal("john", "John"));
|
|
644
|
-
const name = new Literal("name", "Jane");
|
|
645
|
-
const pattern = new Sequence("pattern", [notJohn, name]);
|
|
646
|
-
|
|
647
|
-
const result = pattern.exec("Jane");
|
|
648
|
-
// result.ast.value will be "Jane"
|
|
649
|
-
```
|
|
650
|
-
|
|
651
|
-
#### Repeat
|
|
652
|
-
```typescript
|
|
653
|
-
import { Repeat, Regex, Literal } from "clarity-pattern-parser";
|
|
654
|
-
|
|
655
|
-
const digit = new Regex("digit", "\\d+");
|
|
656
|
-
const comma = new Literal("comma", ",");
|
|
657
|
-
const digits = new Repeat("digits", digit, { divider: comma, min: 1, max: 3 });
|
|
658
|
-
|
|
659
|
-
const result = digits.exec("1,2,3");
|
|
660
|
-
// result.ast.value will be "1,2,3"
|
|
661
|
-
```
|
|
662
|
-
|
|
663
|
-
#### Take Until
|
|
664
|
-
```typescript
|
|
665
|
-
import { TakeUntil, Literal } from "clarity-pattern-parser";
|
|
666
|
-
|
|
667
|
-
const scriptText = new TakeUntil("script-text", new Literal("end-script", "</script"));
|
|
668
|
-
const result = scriptText.exec("function() { return 1; }</script>");
|
|
669
|
-
// result.ast.value will be "function() { return 1; }"
|
|
670
|
-
```
|
|
671
|
-
|
|
672
361
|
### Pattern Context
|
|
673
362
|
```typescript
|
|
674
363
|
import { Context, Literal } from "clarity-pattern-parser";
|
|
@@ -692,95 +381,25 @@ const result = pattern.exec("John");
|
|
|
692
381
|
// result.ast.value will be "John"
|
|
693
382
|
```
|
|
694
383
|
|
|
695
|
-
###
|
|
696
|
-
1. Full control over pattern construction and configuration
|
|
697
|
-
2. Ability to create custom pattern types
|
|
698
|
-
3. Direct access to pattern execution and AST manipulation
|
|
699
|
-
4. Better performance for complex patterns
|
|
700
|
-
5. Easier debugging and testing
|
|
701
|
-
6. More flexible pattern composition
|
|
702
|
-
|
|
703
|
-
## Pattern Interface
|
|
704
|
-
|
|
705
|
-
All patterns implement the `Pattern` interface, which provides a consistent API for pattern matching and manipulation.
|
|
384
|
+
### Pattern Execution
|
|
706
385
|
|
|
707
|
-
|
|
386
|
+
Pattern execution returns a `ParseResult` that includes the AST and any error information:
|
|
708
387
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
#### `test(text: string, record?: boolean): boolean`
|
|
721
|
-
Tests if the pattern matches the given text without building an AST.
|
|
722
|
-
- `text`: The text to test
|
|
723
|
-
- `record`: Optional boolean to enable debug recording
|
|
724
|
-
- Returns: `true` if the pattern matches, `false` otherwise
|
|
725
|
-
|
|
726
|
-
#### `clone(name?: string): Pattern`
|
|
727
|
-
Creates a deep copy of the pattern.
|
|
728
|
-
- `name`: Optional new name for the cloned pattern
|
|
729
|
-
- Returns: A new instance of the pattern
|
|
730
|
-
|
|
731
|
-
### Token Methods
|
|
732
|
-
|
|
733
|
-
#### `getTokens(): string[]`
|
|
734
|
-
Returns all possible tokens that this pattern can match.
|
|
735
|
-
- Returns: Array of possible token strings
|
|
736
|
-
|
|
737
|
-
#### `getTokensAfter(childReference: Pattern): string[]`
|
|
738
|
-
Returns tokens that can appear after a specific child pattern.
|
|
739
|
-
- `childReference`: The child pattern to check after
|
|
740
|
-
- Returns: Array of possible token strings
|
|
741
|
-
|
|
742
|
-
#### `getNextTokens(): string[]`
|
|
743
|
-
Returns the next possible tokens based on the current state.
|
|
744
|
-
- Returns: Array of possible token strings
|
|
745
|
-
|
|
746
|
-
### Pattern Methods
|
|
747
|
-
|
|
748
|
-
#### `getPatterns(): Pattern[]`
|
|
749
|
-
Returns all child patterns.
|
|
750
|
-
- Returns: Array of child patterns
|
|
751
|
-
|
|
752
|
-
#### `getPatternsAfter(childReference: Pattern): Pattern[]`
|
|
753
|
-
Returns patterns that can appear after a specific child pattern.
|
|
754
|
-
- `childReference`: The child pattern to check after
|
|
755
|
-
- Returns: Array of possible patterns
|
|
756
|
-
|
|
757
|
-
#### `getNextPatterns(): Pattern[]`
|
|
758
|
-
Returns the next possible patterns based on the current state.
|
|
759
|
-
- Returns: Array of possible patterns
|
|
760
|
-
|
|
761
|
-
### Utility Methods
|
|
762
|
-
|
|
763
|
-
#### `find(predicate: (pattern: Pattern) => boolean): Pattern | null`
|
|
764
|
-
Finds a pattern that matches the given predicate.
|
|
765
|
-
- `predicate`: Function that tests each pattern
|
|
766
|
-
- Returns: The first matching pattern or null
|
|
767
|
-
|
|
768
|
-
#### `isEqual(pattern: Pattern): boolean`
|
|
769
|
-
Tests if this pattern is equal to another pattern.
|
|
770
|
-
- `pattern`: The pattern to compare with
|
|
771
|
-
- Returns: `true` if patterns are equal, `false` otherwise
|
|
772
|
-
|
|
773
|
-
### Properties
|
|
774
|
-
|
|
775
|
-
- `id`: Unique identifier for the pattern
|
|
776
|
-
- `type`: Type of the pattern (e.g., "literal", "regex", "sequence")
|
|
777
|
-
- `name`: Name of the pattern
|
|
778
|
-
- `parent`: Parent pattern or null
|
|
779
|
-
- `children`: Array of child patterns
|
|
780
|
-
- `startedOnIndex`: Index where pattern matching started parsing
|
|
388
|
+
```typescript
|
|
389
|
+
const result = pattern.exec("some text");
|
|
390
|
+
if (result.error) {
|
|
391
|
+
console.error(result.error.message);
|
|
392
|
+
console.error(result.error.expected);
|
|
393
|
+
console.error(result.error.position);
|
|
394
|
+
} else {
|
|
395
|
+
console.log(result.ast?.value);
|
|
396
|
+
}
|
|
397
|
+
```
|
|
781
398
|
|
|
782
399
|
### AST Manipulation
|
|
400
|
+
|
|
783
401
|
The AST (Abstract Syntax Tree) returned by pattern execution can be manipulated:
|
|
402
|
+
|
|
784
403
|
```typescript
|
|
785
404
|
const result = pattern.exec("some text");
|
|
786
405
|
if (result.ast) {
|
|
@@ -795,93 +414,100 @@ if (result.ast) {
|
|
|
795
414
|
}
|
|
796
415
|
```
|
|
797
416
|
|
|
798
|
-
|
|
417
|
+
## Advanced Topics
|
|
799
418
|
|
|
800
|
-
|
|
419
|
+
### Custom Patterns
|
|
801
420
|
|
|
802
|
-
|
|
803
|
-
- `id`: Unique identifier for the node
|
|
804
|
-
- `type`: Type of the node (e.g., "literal", "regex", "sequence")
|
|
805
|
-
- `name`: Name of the node
|
|
806
|
-
- `value`: String value of the node (concatenated from children if present)
|
|
807
|
-
- `firstIndex`: First character index in the input text
|
|
808
|
-
- `lastIndex`: Last character index in the input text
|
|
809
|
-
- `startIndex`: Starting position in the input text
|
|
810
|
-
- `endIndex`: Ending position in the input text
|
|
811
|
-
- `parent`: Parent node or null
|
|
812
|
-
- `children`: Array of child nodes
|
|
813
|
-
- `hasChildren`: Whether the node has any children
|
|
814
|
-
- `isLeaf`: Whether the node is a leaf (no children)
|
|
421
|
+
You can create custom patterns by extending the base `Pattern` class:
|
|
815
422
|
|
|
816
|
-
#### Tree Manipulation
|
|
817
423
|
```typescript
|
|
818
|
-
|
|
819
|
-
const node = Node.createValueNode("type", "name", "value");
|
|
820
|
-
const parent = Node.createNode("type", "name", [node]);
|
|
821
|
-
|
|
822
|
-
// Add/remove children
|
|
823
|
-
parent.appendChild(newNode);
|
|
824
|
-
parent.removeChild(node);
|
|
825
|
-
parent.removeAllChildren();
|
|
424
|
+
import { Pattern } from "clarity-pattern-parser";
|
|
826
425
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
426
|
+
class CustomPattern extends Pattern {
|
|
427
|
+
constructor(name: string) {
|
|
428
|
+
super(name);
|
|
429
|
+
}
|
|
831
430
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
431
|
+
exec(text: string) {
|
|
432
|
+
// Custom pattern implementation
|
|
433
|
+
}
|
|
434
|
+
}
|
|
835
435
|
```
|
|
836
436
|
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
437
|
+
### Performance Tips
|
|
438
|
+
|
|
439
|
+
1. Use `test()` instead of `exec()` when you only need to check if a pattern matches
|
|
440
|
+
2. Cache frequently used patterns
|
|
441
|
+
3. Use `Reference` for recursive patterns instead of direct recursion
|
|
442
|
+
4. Minimize the use of optional patterns in sequences
|
|
443
|
+
5. Use bounded repetition when possible
|
|
842
444
|
|
|
843
|
-
|
|
844
|
-
node.walkUp(n => console.log(n.name)); // Bottom-up
|
|
845
|
-
node.walkDown(n => console.log(n.name)); // Top-down
|
|
846
|
-
node.walkBreadthFirst(n => console.log(n.name)); // Level by level
|
|
445
|
+
### Debugging
|
|
847
446
|
|
|
848
|
-
|
|
849
|
-
const ancestor = node.findAncestor(n => n.type === "parent");
|
|
850
|
-
```
|
|
447
|
+
Enable debug mode to get detailed information about pattern execution:
|
|
851
448
|
|
|
852
|
-
#### Tree Transformation
|
|
853
449
|
```typescript
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
"literal": n => Node.createValueNode("new-type", n.name, n.value),
|
|
857
|
-
"sequence": n => Node.createNode("new-type", n.name, n.children)
|
|
858
|
-
});
|
|
450
|
+
const result = pattern.exec("some text", true);
|
|
451
|
+
// Debug information will be available in result.debug
|
|
859
452
|
```
|
|
860
453
|
|
|
861
|
-
|
|
862
|
-
```typescript
|
|
863
|
-
// Flatten tree to array
|
|
864
|
-
const nodes = node.flatten();
|
|
454
|
+
### Error Handling
|
|
865
455
|
|
|
866
|
-
|
|
867
|
-
node.compact();
|
|
456
|
+
Pattern execution returns a `ParseResult` that includes error information:
|
|
868
457
|
|
|
869
|
-
|
|
870
|
-
const
|
|
458
|
+
```typescript
|
|
459
|
+
const result = pattern.exec("invalid text");
|
|
460
|
+
if (result.error) {
|
|
461
|
+
console.error(result.error.message);
|
|
462
|
+
console.error(result.error.expected);
|
|
463
|
+
console.error(result.error.position);
|
|
464
|
+
}
|
|
465
|
+
```
|
|
871
466
|
|
|
872
|
-
|
|
873
|
-
node.normalize();
|
|
467
|
+
## Examples
|
|
874
468
|
|
|
875
|
-
|
|
876
|
-
|
|
469
|
+
### JSON Parser
|
|
470
|
+
```typescript
|
|
471
|
+
const { json } = patterns`
|
|
472
|
+
# Basic JSON grammar
|
|
473
|
+
ws = /\s+/
|
|
474
|
+
string = /"[^"]*"/
|
|
475
|
+
number = /-?\d+(\.\d+)?/
|
|
476
|
+
boolean = "true" | "false"
|
|
477
|
+
null = "null"
|
|
478
|
+
value = string | number | boolean | null | array | object
|
|
479
|
+
array-items = (value, /\s*,\s*/)+
|
|
480
|
+
array = "[" +ws? + array-items? + ws? + "]"
|
|
481
|
+
object-property = string + ws? + ":" + ws? + value
|
|
482
|
+
object-properties = (object-property, /\s*,\s*/ trim)+
|
|
483
|
+
object = "{" + ws? + object-properties? + ws? + "}"
|
|
484
|
+
json = ws? + value + ws?
|
|
485
|
+
`;
|
|
877
486
|
```
|
|
878
487
|
|
|
879
|
-
|
|
488
|
+
### HTML Parser
|
|
880
489
|
```typescript
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
490
|
+
const { html } = patterns`
|
|
491
|
+
# Basic HTML grammar
|
|
492
|
+
ws = /\s+/
|
|
493
|
+
tag-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
494
|
+
attribute-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
495
|
+
attribute-value = /"[^"]*"/
|
|
496
|
+
value-attribute = attribute-name + "=" + attribute-value
|
|
497
|
+
bool-attribute = attribute-name
|
|
498
|
+
attribute = value-attribute | bool-attribute
|
|
499
|
+
attributes = (attribute, ws)*
|
|
500
|
+
opening-tag = "<" + ws? + tag-name + ws? + attributes? + ">"
|
|
501
|
+
closing-tag = "</" + ws? + tag-name + ws? + ">"
|
|
502
|
+
text = /[^<]+/
|
|
503
|
+
child = text | element
|
|
504
|
+
children = (child, /\s*/)+
|
|
505
|
+
element = opening-tag + children? + closing-tag
|
|
506
|
+
html = ws? + element + ws?
|
|
507
|
+
`;
|
|
886
508
|
```
|
|
887
509
|
|
|
510
|
+
## License
|
|
511
|
+
|
|
512
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
513
|
+
|
package/package.json
CHANGED
|
@@ -123,6 +123,30 @@ describe("AutoComplete", () => {
|
|
|
123
123
|
expect(result.cursor).not.toBeNull();
|
|
124
124
|
});
|
|
125
125
|
|
|
126
|
+
|
|
127
|
+
test("Option should error at furthest match index", () => {
|
|
128
|
+
const john = new Literal("john", "John");
|
|
129
|
+
const space = new Literal("space", " ");
|
|
130
|
+
const doe = new Literal("doe", "Doe");
|
|
131
|
+
const smith = new Literal("smith", "Smith");
|
|
132
|
+
const name = new Sequence("name", [john, space, new Options("last-name", [smith, doe])]);
|
|
133
|
+
|
|
134
|
+
const text = "John Smi"
|
|
135
|
+
const autoComplete = new AutoComplete(name);
|
|
136
|
+
const result = autoComplete.suggestFor(text);
|
|
137
|
+
const expectedOptions = [{
|
|
138
|
+
text: "th",
|
|
139
|
+
startIndex: 8
|
|
140
|
+
}];
|
|
141
|
+
|
|
142
|
+
expect(result.ast).toBeNull();
|
|
143
|
+
expect(result.options).toEqual(expectedOptions);
|
|
144
|
+
expect(result.errorAtIndex).toBe(text.length);
|
|
145
|
+
expect(result.isComplete).toBeFalsy();
|
|
146
|
+
expect(result.cursor).not.toBeNull();
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
|
|
126
150
|
test("Root Regex Pattern suggests customTokens", () => {
|
|
127
151
|
const freeTextPattern = new Regex(
|
|
128
152
|
`free-text`,
|
|
@@ -93,7 +93,7 @@ export class AutoComplete {
|
|
|
93
93
|
const furthestMatch = cursor.allMatchedNodes[cursor.allMatchedNodes.length - 1];
|
|
94
94
|
|
|
95
95
|
if (furthestError && furthestMatch) {
|
|
96
|
-
if (furthestError.lastIndex
|
|
96
|
+
if (furthestMatch.endIndex > furthestError.lastIndex ) {
|
|
97
97
|
return furthestMatch.endIndex;
|
|
98
98
|
} else {
|
|
99
99
|
return furthestError.lastIndex;
|