clarity-pattern-parser 11.3.5 → 11.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +469 -428
- package/grammar.md +254 -0
- package/package.json +23 -2
- package/src/intellisense/AutoComplete.test.ts +59 -4
- package/src/intellisense/AutoComplete.ts +34 -18
package/grammar.md
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# Clarity Pattern Parser Grammar
|
|
2
|
+
|
|
3
|
+
This document describes the grammar features supported by the Clarity Pattern Parser.
|
|
4
|
+
|
|
5
|
+
## Basic Patterns
|
|
6
|
+
|
|
7
|
+
### Literal Strings
|
|
8
|
+
Define literal string patterns using double quotes:
|
|
9
|
+
```
|
|
10
|
+
name = "John"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Escaped characters are supported in literals:
|
|
14
|
+
- `\n` - newline
|
|
15
|
+
- `\r` - carriage return
|
|
16
|
+
- `\t` - tab
|
|
17
|
+
- `\b` - backspace
|
|
18
|
+
- `\f` - form feed
|
|
19
|
+
- `\v` - vertical tab
|
|
20
|
+
- `\0` - null character
|
|
21
|
+
- `\x00` - hex character
|
|
22
|
+
- `\u0000` - unicode character
|
|
23
|
+
- `\"` - escaped quote
|
|
24
|
+
- `\\` - escaped backslash
|
|
25
|
+
|
|
26
|
+
### Regular Expressions
|
|
27
|
+
Define regex patterns using forward slashes:
|
|
28
|
+
```
|
|
29
|
+
name = /\w/
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Pattern Operators
|
|
33
|
+
|
|
34
|
+
### Options (|)
|
|
35
|
+
Match one of multiple patterns using the `|` operator. This is used for simple alternatives where order doesn't matter:
|
|
36
|
+
```
|
|
37
|
+
names = john | jane
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Expression (|)
|
|
41
|
+
Expression patterns also use the `|` operator but are used for defining operator precedence in expressions. The order of alternatives determines precedence, with earlier alternatives having higher precedence. By default, operators are left-associative.
|
|
42
|
+
|
|
43
|
+
Example of an arithmetic expression grammar:
|
|
44
|
+
```
|
|
45
|
+
prefix-operators = "+" | "-"
|
|
46
|
+
prefix-expression = prefix-operators + expression
|
|
47
|
+
postfix-operators = "++" | "--"
|
|
48
|
+
postfix-expression = expression + postfix-operators
|
|
49
|
+
add-sub-operators = "+" | "-"
|
|
50
|
+
add-sub-expression = expression + add-sub-operators + expression
|
|
51
|
+
mul-div-operators = "*" | "/"
|
|
52
|
+
mul-div-expression = expression + mul-div-operators + expression
|
|
53
|
+
expression = prefix-expression | mul-div-expression | add-sub-expression | postfix-expression
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
In this example:
|
|
57
|
+
- `prefix-expression` has highest precedence
|
|
58
|
+
- `mul-div-expression` has next highest precedence
|
|
59
|
+
- `add-sub-expression` has next highest precedence
|
|
60
|
+
- `postfix-expression` has lowest precedence
|
|
61
|
+
|
|
62
|
+
To make an operator right-associative, add the `right` keyword:
|
|
63
|
+
```
|
|
64
|
+
expression = prefix-expression | mul-div-expression | add-sub-expression right | postfix-expression
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Sequence (+)
|
|
68
|
+
Concatenate patterns in sequence using the `+` operator:
|
|
69
|
+
```
|
|
70
|
+
full-name = first-name + space + last-name
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Optional (?)
|
|
74
|
+
Make a pattern optional using the `?` operator:
|
|
75
|
+
```
|
|
76
|
+
full-name = first-name + space + middle-name? + last-name
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Not (!)
|
|
80
|
+
Negative lookahead using the `!` operator:
|
|
81
|
+
```
|
|
82
|
+
pattern = !excluded-pattern + actual-pattern
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Take Until (?->|)
|
|
86
|
+
Match all characters until a specific pattern is found:
|
|
87
|
+
```
|
|
88
|
+
script-text = ?->| "</script"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Repetition
|
|
92
|
+
|
|
93
|
+
### Basic Repeat
|
|
94
|
+
Repeat a pattern one or more times using `+`:
|
|
95
|
+
```
|
|
96
|
+
digits = (digit)+
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Zero or More
|
|
100
|
+
Repeat a pattern zero or more times using `*`:
|
|
101
|
+
```
|
|
102
|
+
digits = (digit)*
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Bounded Repetition
|
|
106
|
+
Specify exact repetition counts using curly braces:
|
|
107
|
+
- `{n}` - Exactly n times: `(pattern){3}`
|
|
108
|
+
- `{n,}` - At least n times: `(pattern){1,}`
|
|
109
|
+
- `{,n}` - At most n times: `(pattern){,3}`
|
|
110
|
+
- `{n,m}` - Between n and m times: `(pattern){1,3}`
|
|
111
|
+
|
|
112
|
+
### Repetition with Divider
|
|
113
|
+
Repeat patterns with a divider between occurrences:
|
|
114
|
+
```
|
|
115
|
+
digits = (digit, comma){3}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Add `trim` keyword to trim the divider from the end:
|
|
119
|
+
```
|
|
120
|
+
digits = (digit, comma trim)+
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Imports and Parameters
|
|
124
|
+
|
|
125
|
+
### Basic Import
|
|
126
|
+
Import patterns from other files:
|
|
127
|
+
```
|
|
128
|
+
import { pattern-name } from "path/to/file.cpat"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Import with Parameters
|
|
132
|
+
Import with custom parameters:
|
|
133
|
+
```
|
|
134
|
+
import { pattern } from "file.cpat" with params {
|
|
135
|
+
custom-param = "value"
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Parameter Declaration
|
|
140
|
+
Declare parameters that can be passed to the grammar:
|
|
141
|
+
```
|
|
142
|
+
use params {
|
|
143
|
+
param-name
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Default Parameters
|
|
148
|
+
Specify default values for parameters:
|
|
149
|
+
```
|
|
150
|
+
use params {
|
|
151
|
+
param = default-value
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Decorators
|
|
156
|
+
|
|
157
|
+
Decorators can be applied to patterns using the `@` syntax:
|
|
158
|
+
|
|
159
|
+
### Token Decorator
|
|
160
|
+
Specify tokens for a pattern:
|
|
161
|
+
```
|
|
162
|
+
@tokens([" "])
|
|
163
|
+
spaces = /\s+/
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Custom Decorators
|
|
167
|
+
Support for custom decorators with various argument types:
|
|
168
|
+
```
|
|
169
|
+
@decorator() // No arguments
|
|
170
|
+
@decorator(["value"]) // Array argument
|
|
171
|
+
@decorator({"prop": value}) // Object argument
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Comments
|
|
175
|
+
Add comments using the `#` symbol:
|
|
176
|
+
```
|
|
177
|
+
# This is a comment
|
|
178
|
+
pattern = "value"
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Expression Patterns
|
|
182
|
+
Support for recursive expression patterns with optional right association:
|
|
183
|
+
```
|
|
184
|
+
expression = ternary | variables
|
|
185
|
+
ternary = expression + " ? " + expression + " : " + expression
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Add `right` keyword for right association:
|
|
189
|
+
```
|
|
190
|
+
expression = ternary right | variables
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Pattern References
|
|
194
|
+
Reference other patterns by name:
|
|
195
|
+
```
|
|
196
|
+
pattern1 = "value"
|
|
197
|
+
pattern2 = pattern1
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Pattern Aliasing
|
|
201
|
+
Import patterns with aliases:
|
|
202
|
+
```
|
|
203
|
+
import { original as alias } from "file.cpat"
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## String Template Patterns
|
|
207
|
+
|
|
208
|
+
Patterns can be defined inline using string templates. This allows for quick pattern definition and testing without creating separate files.
|
|
209
|
+
|
|
210
|
+
### Basic Example
|
|
211
|
+
```typescript
|
|
212
|
+
const { fullName } = patterns`
|
|
213
|
+
first-name = "John"
|
|
214
|
+
last-name = "Doe"
|
|
215
|
+
space = /\s+/
|
|
216
|
+
full-name = first-name + space + last-name
|
|
217
|
+
`;
|
|
218
|
+
|
|
219
|
+
const result = fullName.exec("John Doe");
|
|
220
|
+
// result.ast.value will be "John Doe"
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Complex Example (HTML-like Markup)
|
|
224
|
+
```typescript
|
|
225
|
+
const { body } = patterns`
|
|
226
|
+
tag-name = /[a-zA-Z_-]+[a-zA-Z0-9_-]*/
|
|
227
|
+
space = /\s+/
|
|
228
|
+
opening-tag = "<" + tag-name + space? + ">"
|
|
229
|
+
closing-tag = "</" + tag-name + space? + ">"
|
|
230
|
+
child = space? + element + space?
|
|
231
|
+
children = (child)*
|
|
232
|
+
element = opening-tag + children + closing-tag
|
|
233
|
+
body = space? + element + space?
|
|
234
|
+
`;
|
|
235
|
+
|
|
236
|
+
const result = body.exec(`
|
|
237
|
+
<div>
|
|
238
|
+
<div></div>
|
|
239
|
+
<div></div>
|
|
240
|
+
</div>
|
|
241
|
+
`, true);
|
|
242
|
+
|
|
243
|
+
// Clean up spaces from the AST
|
|
244
|
+
result?.ast?.findAll(n => n.name.includes("space")).forEach(n => n.remove());
|
|
245
|
+
// result.ast.value will be "<div><div></div><div></div></div>"
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Key Features
|
|
249
|
+
1. Patterns are defined using backticks (`)
|
|
250
|
+
2. Each pattern definition is on a new line
|
|
251
|
+
3. The `patterns` function returns an object with all defined patterns
|
|
252
|
+
4. Patterns can be used immediately after definition
|
|
253
|
+
5. The AST can be manipulated after parsing (e.g., removing spaces)
|
|
254
|
+
6. The `exec` method can take an optional second parameter to enable debug mode
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "clarity-pattern-parser",
|
|
3
|
-
"version": "11.3.
|
|
3
|
+
"version": "11.3.7",
|
|
4
4
|
"description": "Parsing Library for Typescript and Javascript.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.esm.js",
|
|
@@ -10,7 +10,28 @@
|
|
|
10
10
|
"build": "rollup -c -m",
|
|
11
11
|
"test": "jest --config=jest.coverage.config.js --coverage"
|
|
12
12
|
},
|
|
13
|
-
"keywords": [
|
|
13
|
+
"keywords": [
|
|
14
|
+
"parser",
|
|
15
|
+
"ast",
|
|
16
|
+
"pattern-matching",
|
|
17
|
+
"grammar",
|
|
18
|
+
"typescript",
|
|
19
|
+
"javascript",
|
|
20
|
+
"parsing",
|
|
21
|
+
"tree",
|
|
22
|
+
"pattern",
|
|
23
|
+
"syntax",
|
|
24
|
+
"language",
|
|
25
|
+
"compiler",
|
|
26
|
+
"interpreter",
|
|
27
|
+
"lexer",
|
|
28
|
+
"tokenizer",
|
|
29
|
+
"regex",
|
|
30
|
+
"regular-expressions",
|
|
31
|
+
"text-processing",
|
|
32
|
+
"validation",
|
|
33
|
+
"parser-generator"
|
|
34
|
+
],
|
|
14
35
|
"devDependencies": {
|
|
15
36
|
"@types/jest": "^26.0.23",
|
|
16
37
|
"jest": "^26.6.3",
|
|
@@ -81,7 +81,7 @@ describe("AutoComplete", () => {
|
|
|
81
81
|
expect(result.isComplete).toBeFalsy();
|
|
82
82
|
});
|
|
83
83
|
|
|
84
|
-
test("Full Pattern Match", () => {
|
|
84
|
+
test("Full Pattern Match Simple", () => {
|
|
85
85
|
const john = new Literal("john", "John");
|
|
86
86
|
const space = new Literal("space", " ");
|
|
87
87
|
const doe = new Literal("doe", "Doe");
|
|
@@ -123,6 +123,63 @@ describe("AutoComplete", () => {
|
|
|
123
123
|
expect(result.cursor).not.toBeNull();
|
|
124
124
|
});
|
|
125
125
|
|
|
126
|
+
test("Root Regex Pattern suggests customTokens", () => {
|
|
127
|
+
const freeTextPattern = new Regex(
|
|
128
|
+
`free-text`,
|
|
129
|
+
'[(\\w)\\s]+'
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
const customTokensMap:Record<string, string[]> = {
|
|
133
|
+
'free-text': ['luke',"leia skywalker",'luke skywalker']
|
|
134
|
+
}
|
|
135
|
+
const autoComplete = new AutoComplete(freeTextPattern,{
|
|
136
|
+
customTokens:customTokensMap
|
|
137
|
+
});
|
|
138
|
+
const result = autoComplete.suggestFor("luke");
|
|
139
|
+
|
|
140
|
+
const expected = [
|
|
141
|
+
{ text: " skywalker", startIndex: 4 },
|
|
142
|
+
];
|
|
143
|
+
|
|
144
|
+
expect(result.ast?.value).toBe("luke");
|
|
145
|
+
expect(result.options).toEqual(expected);
|
|
146
|
+
expect(result.errorAtIndex).toBeNull()
|
|
147
|
+
expect(result.isComplete).toBeTruthy();
|
|
148
|
+
expect(result.cursor).not.toBeNull();
|
|
149
|
+
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
test("Sequence Regex Pattern suggests customTokens", () => {
|
|
154
|
+
const jediLiteral = new Literal("jedi", "jedi ");
|
|
155
|
+
const freeTextPattern = new Regex(
|
|
156
|
+
`free-text`,
|
|
157
|
+
'[(\\w)\\s]+'
|
|
158
|
+
);
|
|
159
|
+
const sequence = new Sequence('sequence', [jediLiteral,freeTextPattern])
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
const customTokensMap:Record<string, string[]> = {
|
|
163
|
+
'free-text': ['luke',"leia skywalker",'luke skywalker']
|
|
164
|
+
}
|
|
165
|
+
const autoComplete = new AutoComplete(sequence,{
|
|
166
|
+
customTokens:customTokensMap
|
|
167
|
+
});
|
|
168
|
+
const result = autoComplete.suggestFor("jedi luke sky");
|
|
169
|
+
|
|
170
|
+
const expected = [
|
|
171
|
+
{ text: "walker", startIndex: 13 },
|
|
172
|
+
];
|
|
173
|
+
|
|
174
|
+
expect(result.ast?.value).toBe("jedi luke sky");
|
|
175
|
+
expect(result.options).toEqual(expected);
|
|
176
|
+
expect(result.errorAtIndex).toBeNull()
|
|
177
|
+
expect(result.isComplete).toBeTruthy();
|
|
178
|
+
expect(result.cursor).not.toBeNull();
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
126
183
|
test("Full Pattern Match With Root Repeat", () => {
|
|
127
184
|
const john = new Literal("john", "John");
|
|
128
185
|
const space = new Literal("space", " ");
|
|
@@ -148,7 +205,7 @@ describe("AutoComplete", () => {
|
|
|
148
205
|
expect(result.cursor).not.toBeNull();
|
|
149
206
|
});
|
|
150
207
|
|
|
151
|
-
test("Partial", () => {
|
|
208
|
+
test("Partial Simple", () => {
|
|
152
209
|
const name = new Literal("name", "Name");
|
|
153
210
|
const autoComplete = new AutoComplete(name);
|
|
154
211
|
// Use deprecated suggest for code coverage.
|
|
@@ -257,8 +314,6 @@ describe("AutoComplete", () => {
|
|
|
257
314
|
|
|
258
315
|
const suggestion = autoComplete.suggestFor(text)
|
|
259
316
|
|
|
260
|
-
console.log('suggestion: ',suggestion)
|
|
261
|
-
|
|
262
317
|
const expectedOptions = [
|
|
263
318
|
{ text: "Jack", startIndex: 0 },
|
|
264
319
|
{ text: "John", startIndex: 0 },
|
|
@@ -175,18 +175,31 @@ export class AutoComplete {
|
|
|
175
175
|
return this._createSuggestions(-1, this._getTokensForPattern(this._pattern));
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
178
|
+
if ( match.node != null) {
|
|
179
|
+
const textStartingMatch = this._text.slice(match.node.startIndex,match.node.endIndex)
|
|
180
|
+
const currentPatternsTokens = this._getTokensForPattern(match.pattern);
|
|
181
|
+
/**
|
|
182
|
+
* Compares tokens to current text and extracts remainder tokens
|
|
183
|
+
* - IE. **currentText:** *abc*, **baseToken:** *abcdef*, **trailingToken:** *def*
|
|
184
|
+
*/
|
|
185
|
+
const trailingTokens = currentPatternsTokens.reduce<string[]>((acc, token) => {
|
|
186
|
+
if (token.startsWith(textStartingMatch)) {
|
|
187
|
+
const sliced = token.slice(textStartingMatch.length);
|
|
188
|
+
if (sliced !== '') {
|
|
189
|
+
acc.push(sliced);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return acc;
|
|
193
|
+
}, []);
|
|
194
|
+
|
|
195
|
+
const leafPatterns = match.pattern.getNextPatterns();
|
|
196
|
+
const leafTokens = leafPatterns.reduce((acc: string[], leafPattern) => {
|
|
197
|
+
acc.push(...this._getTokensForPattern(leafPattern));
|
|
198
|
+
return acc;
|
|
199
|
+
}, []);
|
|
200
|
+
|
|
201
|
+
const allTokens = [...trailingTokens,...leafTokens]
|
|
202
|
+
return this._createSuggestions(match.node.lastIndex, allTokens);
|
|
190
203
|
} else {
|
|
191
204
|
return [];
|
|
192
205
|
}
|
|
@@ -238,19 +251,20 @@ export class AutoComplete {
|
|
|
238
251
|
}
|
|
239
252
|
|
|
240
253
|
private _createSuggestions(lastIndex: number, tokens: string[]): SuggestionOption[] {
|
|
241
|
-
let
|
|
254
|
+
let textToIndex = lastIndex === -1 ? "" : this._cursor.getChars(0, lastIndex);
|
|
242
255
|
const suggestionStrings: string[] = [];
|
|
243
256
|
const options: SuggestionOption[] = [];
|
|
244
257
|
|
|
245
258
|
for (const token of tokens) {
|
|
246
|
-
|
|
247
|
-
const
|
|
259
|
+
// concatenated for start index identification inside createSuggestion
|
|
260
|
+
const suggestion = textToIndex + token;
|
|
248
261
|
const alreadyExist = suggestionStrings.includes(suggestion);
|
|
249
262
|
const isSameAsText = suggestion === this._text;
|
|
250
263
|
|
|
251
|
-
if (
|
|
264
|
+
if ( !alreadyExist && !isSameAsText) {
|
|
252
265
|
suggestionStrings.push(suggestion);
|
|
253
|
-
|
|
266
|
+
const suggestionOption = this._createSuggestion(this._cursor.text, suggestion)
|
|
267
|
+
options.push(suggestionOption);
|
|
254
268
|
}
|
|
255
269
|
}
|
|
256
270
|
|
|
@@ -264,12 +278,14 @@ export class AutoComplete {
|
|
|
264
278
|
const furthestMatch = findMatchIndex(suggestion, fullText);
|
|
265
279
|
const text = suggestion.slice(furthestMatch);
|
|
266
280
|
|
|
267
|
-
|
|
281
|
+
const option:SuggestionOption = {
|
|
268
282
|
text: text,
|
|
269
283
|
startIndex: furthestMatch,
|
|
270
284
|
};
|
|
285
|
+
return option
|
|
271
286
|
}
|
|
272
287
|
|
|
288
|
+
|
|
273
289
|
static suggestFor(text: string, pattern: Pattern, options?: AutoCompleteOptions) {
|
|
274
290
|
return new AutoComplete(pattern, options).suggestFor(text);
|
|
275
291
|
}
|