papagaio 0.2.7 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -17
- package/package.json +1 -1
- package/src/papagaio.js +348 -190
- package/tests/tests.json +63 -63
package/README.md
CHANGED
|
@@ -46,17 +46,6 @@ apple banana cherry
|
|
|
46
46
|
```
|
|
47
47
|
Output: `cherry, banana, apple`
|
|
48
48
|
|
|
49
|
-
### 3. Flexible Whitespace (`$$`)
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
pattern {$x$$and$$$y} {$x & $y}
|
|
53
|
-
hello and world
|
|
54
|
-
hello and world
|
|
55
|
-
```
|
|
56
|
-
Output: `hello & world` (both)
|
|
57
|
-
|
|
58
|
-
`$$` = zero or more spaces/tabs/newlines.
|
|
59
|
-
|
|
60
49
|
## Blocks
|
|
61
50
|
|
|
62
51
|
Capture content between delimiters.
|
|
@@ -136,7 +125,7 @@ context {
|
|
|
136
125
|
Output:
|
|
137
126
|
```
|
|
138
127
|
<apple>
|
|
139
|
-
<banana>
|
|
128
|
+
<banana>
|
|
140
129
|
```
|
|
141
130
|
|
|
142
131
|
**Empty contexts are automatically removed.**
|
|
@@ -266,9 +255,13 @@ Output:
|
|
|
266
255
|
|
|
267
256
|
### Matching
|
|
268
257
|
- Variables (`$x`) capture **one word** (no spaces)
|
|
269
|
-
-
|
|
258
|
+
- Variables (`$$x`) captures one or more words (with spaces)
|
|
270
259
|
- Patterns apply **globally** each iteration
|
|
271
260
|
- Auto-recursion until: max 512 iterations OR no changes
|
|
261
|
+
- `$ ` = one or more of this whitespace (spaces, tabs, newlines)
|
|
262
|
+
- `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
|
|
263
|
+
- `$$$ `= one or more whitespaces
|
|
264
|
+
- `$$$$ `= zero or more whitespaces
|
|
272
265
|
|
|
273
266
|
### Block Matching
|
|
274
267
|
- `$block name {open}{close}` captures between delimiters
|
|
@@ -280,8 +273,11 @@ Output:
|
|
|
280
273
|
- Reuse: `$x` appears multiple times in replace
|
|
281
274
|
- Undefined: becomes empty string
|
|
282
275
|
|
|
283
|
-
###
|
|
284
|
-
- You cannot match words containing the sigil character.
|
|
276
|
+
### Limitations
|
|
277
|
+
- You cannot match words containing the current sigil character.
|
|
278
|
+
- You cannot match a $block{}{} using the current delimiters.
|
|
279
|
+
- By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
|
|
280
|
+
- Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
|
|
285
281
|
|
|
286
282
|
---
|
|
287
283
|
|
|
@@ -289,11 +285,13 @@ Output:
|
|
|
289
285
|
|
|
290
286
|
| Problem | Solution |
|
|
291
287
|
|---------|----------|
|
|
292
|
-
| Pattern doesn't match | Use `$$` between elements for flexible whitespace |
|
|
293
288
|
| Variable not captured | Check space between variables |
|
|
294
289
|
| Block not working | Verify balanced delimiters `{` `}` |
|
|
295
290
|
| Infinite recursion | Use `$clear` or reduce `recursion_limit` |
|
|
296
291
|
| $eval not working | Errors return empty string, use try-catch |
|
|
292
|
+
| Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
|
|
293
|
+
| Whitespace operators | Remember they need a whitespace after them to work properly |
|
|
294
|
+
| Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
|
|
297
295
|
|
|
298
296
|
## Known Bugs
|
|
299
297
|
|
|
@@ -305,7 +303,7 @@ Output:
|
|
|
305
303
|
|
|
306
304
|
```
|
|
307
305
|
pattern {$x $y} {$y, $x} # basic pattern
|
|
308
|
-
pattern {$x
|
|
306
|
+
pattern {$x$ $y} {$x-$y} # flexible whitespace
|
|
309
307
|
pattern {$block n {o}{c}} {$n} # block
|
|
310
308
|
context { ... } # recursive scope
|
|
311
309
|
$unique # unique ID per pattern
|
|
@@ -313,6 +311,7 @@ $match # full match
|
|
|
313
311
|
$prefix / $suffix # before/after
|
|
314
312
|
$clear # clear before
|
|
315
313
|
$eval{code} # execute JS
|
|
314
|
+
$ / $$ / $$$ / $$$$ # whitespace operators
|
|
316
315
|
```
|
|
317
316
|
|
|
318
317
|
---
|
package/package.json
CHANGED
package/src/papagaio.js
CHANGED
|
@@ -1,95 +1,88 @@
|
|
|
1
1
|
// https://github.com/jardimdanificado/papagaio
|
|
2
|
-
|
|
3
|
-
function
|
|
4
|
-
const
|
|
5
|
-
let
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
2
|
+
|
|
3
|
+
function parsePattern(papagaio, pattern) {
|
|
4
|
+
const tokens = [], S = papagaio.symbols.sigil, S2 = S + S;
|
|
5
|
+
let i = 0;
|
|
6
|
+
const isWhitespaceChar = c => /\s/.test(c);
|
|
7
|
+
const getWhitespaceType = c => c === ' ' ? 'space' : c === '\t' ? 'tab' : c === '\n' ? 'newline' : c === '\r' ? 'carriage-return' : 'other';
|
|
8
|
+
while (i < pattern.length) {
|
|
9
|
+
if (pattern.startsWith(S + S + S, i) && i + 3 < pattern.length && isWhitespaceChar(pattern[i + 3])) {
|
|
10
|
+
tokens.push({ type: 'any-ws-required', wsChar: pattern[i + 3] });
|
|
11
|
+
i += 4;
|
|
12
12
|
continue;
|
|
13
13
|
}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
src = left + prefix + proc + right;
|
|
19
|
-
}
|
|
20
|
-
return src;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
function extractBlock(papagaio, src, openPos, openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close) {
|
|
24
|
-
let i = openPos;
|
|
25
|
-
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
26
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
27
|
-
i += openDelim.length;
|
|
28
|
-
const innerStart = i;
|
|
29
|
-
let d = 0;
|
|
30
|
-
while (i < src.length) {
|
|
31
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
32
|
-
d++;
|
|
33
|
-
i += openDelim.length;
|
|
34
|
-
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
35
|
-
if (d === 0) return [src.substring(innerStart, i), i + closeDelim.length];
|
|
36
|
-
d--;
|
|
37
|
-
i += closeDelim.length;
|
|
38
|
-
} else i++;
|
|
39
|
-
}
|
|
40
|
-
return [src.substring(innerStart), src.length];
|
|
14
|
+
if (pattern.startsWith(S + S + S + S, i) && i + 4 < pattern.length && isWhitespaceChar(pattern[i + 4])) {
|
|
15
|
+
tokens.push({ type: 'any-ws-optional', wsChar: pattern[i + 4] });
|
|
16
|
+
i += 5;
|
|
17
|
+
continue;
|
|
41
18
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
19
|
+
if (pattern.startsWith(S2, i)) {
|
|
20
|
+
let j = i + S2.length, varName = '';
|
|
21
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
22
|
+
if (varName) {
|
|
23
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
24
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
25
|
+
i = j + 1;
|
|
26
|
+
continue;
|
|
27
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
28
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: 'optional', wsChar: null });
|
|
29
|
+
i = j + 1;
|
|
30
|
+
continue;
|
|
31
|
+
} else {
|
|
32
|
+
tokens.push({ type: 'var-ws', varName });
|
|
33
|
+
i = j;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
55
36
|
}
|
|
56
|
-
return [src.substring(innerStart, i), i + 1];
|
|
57
37
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const tokens = [];
|
|
64
|
-
let i = 0;
|
|
65
|
-
const S = papagaio.symbols.sigil, S2 = S + S;
|
|
66
|
-
while (i < pattern.length) {
|
|
38
|
+
if (pattern.startsWith(S2, i) && i + 2 < pattern.length && isWhitespaceChar(pattern[i + 2])) {
|
|
39
|
+
tokens.push({ type: 'ws-optional', wsType: getWhitespaceType(pattern[i + 2]), wsChar: pattern[i + 2] });
|
|
40
|
+
i += 3;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
67
43
|
if (pattern.startsWith(S2, i)) {
|
|
68
44
|
tokens.push({ type: 'whitespace-optional' });
|
|
69
45
|
i += S2.length;
|
|
70
46
|
continue;
|
|
71
47
|
}
|
|
48
|
+
if (pattern[i] === S && i + 1 < pattern.length && isWhitespaceChar(pattern[i + 1])) {
|
|
49
|
+
tokens.push({ type: 'ws-required', wsType: getWhitespaceType(pattern[i + 1]), wsChar: pattern[i + 1] });
|
|
50
|
+
i += 2;
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
72
53
|
if (pattern.startsWith(S + 'block', i)) {
|
|
73
|
-
let j = i + S.length +
|
|
54
|
+
let j = i + S.length + 5;
|
|
74
55
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
75
56
|
let varName = '';
|
|
76
57
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
77
58
|
if (varName) {
|
|
78
59
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
79
|
-
let openDelim = papagaio.symbols.open;
|
|
60
|
+
let openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close;
|
|
61
|
+
let openDelimIsWs = false, closeDelimIsWs = false;
|
|
80
62
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
81
63
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
82
|
-
|
|
64
|
+
const trimmed = c.trim();
|
|
65
|
+
if (trimmed === '') {
|
|
66
|
+
openDelimIsWs = true;
|
|
67
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
68
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
69
|
+
openDelim = pattern.substring(wsStart, wsEnd);
|
|
70
|
+
} else openDelim = unescapeDelimiter(trimmed) || papagaio.symbols.open;
|
|
83
71
|
j = e;
|
|
84
72
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
85
73
|
}
|
|
86
|
-
let closeDelim = papagaio.symbols.close;
|
|
87
74
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
88
75
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
89
|
-
|
|
76
|
+
const trimmed = c.trim();
|
|
77
|
+
if (trimmed === '') {
|
|
78
|
+
closeDelimIsWs = true;
|
|
79
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
80
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
81
|
+
closeDelim = pattern.substring(wsStart, wsEnd);
|
|
82
|
+
} else closeDelim = unescapeDelimiter(trimmed) || papagaio.symbols.close;
|
|
90
83
|
j = e;
|
|
91
84
|
}
|
|
92
|
-
tokens.push({ type: 'block', varName, openDelim, closeDelim });
|
|
85
|
+
tokens.push({ type: 'block', varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs });
|
|
93
86
|
i = j;
|
|
94
87
|
continue;
|
|
95
88
|
}
|
|
@@ -98,210 +91,375 @@ function parsePattern(papagaio, pattern) {
|
|
|
98
91
|
let j = i + S.length, varName = '';
|
|
99
92
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
100
93
|
if (varName) {
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
94
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
95
|
+
tokens.push({ type: 'var', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
96
|
+
i = j + 1;
|
|
97
|
+
continue;
|
|
98
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
99
|
+
tokens.push({ type: 'var', varName, wsTrailing: 'optional', wsChar: null });
|
|
100
|
+
i = j + 1;
|
|
101
|
+
continue;
|
|
102
|
+
} else {
|
|
103
|
+
tokens.push({ type: 'var', varName });
|
|
104
|
+
i = j;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
104
107
|
}
|
|
105
|
-
tokens.push({ type: 'literal', value: S });
|
|
106
|
-
i += S.length;
|
|
107
|
-
continue;
|
|
108
108
|
}
|
|
109
|
-
if (
|
|
109
|
+
if (isWhitespaceChar(pattern[i])) {
|
|
110
110
|
let ws = '';
|
|
111
|
-
while (i < pattern.length &&
|
|
112
|
-
tokens.push({ type: '
|
|
111
|
+
while (i < pattern.length && isWhitespaceChar(pattern[i])) ws += pattern[i++];
|
|
112
|
+
tokens.push({ type: 'literal-ws', value: ws });
|
|
113
113
|
continue;
|
|
114
114
|
}
|
|
115
115
|
let literal = '';
|
|
116
|
-
while (i < pattern.length && !pattern.startsWith(S, i) &&
|
|
116
|
+
while (i < pattern.length && !pattern.startsWith(S, i) && !isWhitespaceChar(pattern[i])) literal += pattern[i++];
|
|
117
117
|
if (literal) tokens.push({ type: 'literal', value: literal });
|
|
118
118
|
}
|
|
119
119
|
return tokens;
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
123
|
-
let pos = startPos;
|
|
124
|
-
const
|
|
123
|
+
let pos = startPos, captures = {};
|
|
124
|
+
const matchWhitespaceType = (str, idx, wsType) => {
|
|
125
|
+
if (idx >= str.length) return { matched: '', newPos: idx };
|
|
126
|
+
if (wsType === 'space' && str[idx] === ' ') {
|
|
127
|
+
let j = idx;
|
|
128
|
+
while (j < str.length && str[j] === ' ') j++;
|
|
129
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
130
|
+
}
|
|
131
|
+
if (wsType === 'tab' && str[idx] === '\t') {
|
|
132
|
+
let j = idx;
|
|
133
|
+
while (j < str.length && str[j] === '\t') j++;
|
|
134
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
135
|
+
}
|
|
136
|
+
if (wsType === 'newline' && str[idx] === '\n') {
|
|
137
|
+
let j = idx;
|
|
138
|
+
while (j < str.length && str[j] === '\n') j++;
|
|
139
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
140
|
+
}
|
|
141
|
+
return { matched: '', newPos: idx };
|
|
142
|
+
};
|
|
125
143
|
for (let ti = 0; ti < tokens.length; ti++) {
|
|
126
144
|
const token = tokens[ti];
|
|
127
|
-
if (token.type === '
|
|
128
|
-
|
|
145
|
+
if (token.type === 'literal-ws') {
|
|
146
|
+
if (!src.startsWith(token.value, pos)) return null;
|
|
147
|
+
pos += token.value.length;
|
|
129
148
|
continue;
|
|
130
149
|
}
|
|
131
|
-
if (token.type === '
|
|
150
|
+
if (token.type === 'ws-required') {
|
|
151
|
+
const { matched, newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
152
|
+
if (!matched) return null;
|
|
153
|
+
pos = newPos;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (token.type === 'ws-optional') {
|
|
157
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
158
|
+
pos = newPos;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
if (token.type === 'any-ws-required') {
|
|
132
162
|
if (pos >= src.length || !/\s/.test(src[pos])) return null;
|
|
133
163
|
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
134
164
|
continue;
|
|
135
165
|
}
|
|
166
|
+
if (token.type === 'any-ws-optional') {
|
|
167
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (token.type === 'whitespace-optional') {
|
|
171
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
136
174
|
if (token.type === 'literal') {
|
|
137
175
|
if (!src.startsWith(token.value, pos)) return null;
|
|
138
176
|
pos += token.value.length;
|
|
139
177
|
continue;
|
|
140
178
|
}
|
|
141
179
|
if (token.type === 'var') {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (nextToken) {
|
|
145
|
-
|
|
146
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
147
|
-
} else if (nextToken.type === 'literal') {
|
|
148
|
-
const stopChar = nextToken.value[0];
|
|
149
|
-
while (pos < src.length && src[pos] !== stopChar && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
150
|
-
} else if (nextToken.type === 'block') {
|
|
151
|
-
while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
152
|
-
} else {
|
|
153
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
154
|
-
}
|
|
180
|
+
let v = '';
|
|
181
|
+
const nextToken = findNextSignificantToken(tokens, ti);
|
|
182
|
+
if (nextToken && nextToken.type === 'literal') {
|
|
183
|
+
while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) v += src[pos++];
|
|
155
184
|
} else {
|
|
156
|
-
while (pos < src.length && !/\s/.test(src[pos]))
|
|
185
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
157
186
|
}
|
|
158
|
-
if (
|
|
159
|
-
|
|
187
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
188
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
189
|
+
pos = newPos;
|
|
190
|
+
} else if (token.wsTrailing === 'optional') {
|
|
191
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
192
|
+
pos = newPos;
|
|
193
|
+
}
|
|
194
|
+
if (!v) return null;
|
|
195
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (token.type === 'var-ws') {
|
|
199
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
200
|
+
const n = findNextSignificantToken(tokens, ti);
|
|
201
|
+
let v = '';
|
|
202
|
+
if (!n || ['var', 'var-ws', 'block'].includes(n.type)) {
|
|
203
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
204
|
+
} else if (n.type === 'literal') {
|
|
205
|
+
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
206
|
+
v = v.trimEnd();
|
|
207
|
+
}
|
|
208
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
209
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
210
|
+
pos = newPos;
|
|
211
|
+
} else if (token.wsTrailing === 'optional') {
|
|
212
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
213
|
+
pos = newPos;
|
|
214
|
+
}
|
|
215
|
+
if (!v) return null;
|
|
216
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
160
217
|
continue;
|
|
161
218
|
}
|
|
162
219
|
if (token.type === 'block') {
|
|
163
|
-
const { varName, openDelim, closeDelim } = token;
|
|
220
|
+
const { varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs } = token;
|
|
164
221
|
if (!src.startsWith(openDelim, pos)) return null;
|
|
165
|
-
const [
|
|
166
|
-
captures[papagaio.symbols.sigil + varName] =
|
|
167
|
-
pos =
|
|
222
|
+
const [c, e] = extractBlockWithWsDelimiter(papagaio, src, pos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs);
|
|
223
|
+
captures[papagaio.symbols.sigil + varName] = c;
|
|
224
|
+
pos = e;
|
|
168
225
|
continue;
|
|
169
226
|
}
|
|
170
227
|
}
|
|
171
228
|
return { captures, endPos: pos };
|
|
172
229
|
}
|
|
173
230
|
|
|
174
|
-
function
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
231
|
+
function findNextSignificantToken(t, i) {
|
|
232
|
+
for (let k = i + 1; k < t.length; k++) {
|
|
233
|
+
if (!['whitespace-optional', 'ws-optional', 'ws-required', 'any-ws-optional', 'any-ws-required'].includes(t[k].type)) return t[k];
|
|
234
|
+
}
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function extractBlockWithWsDelimiter(p, src, openPos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs) {
|
|
239
|
+
let i = openPos;
|
|
240
|
+
if (openDelimIsWs || closeDelimIsWs) {
|
|
241
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
242
|
+
i += openDelim.length;
|
|
243
|
+
const s = i;
|
|
244
|
+
let d = 0;
|
|
245
|
+
while (i < src.length) {
|
|
246
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
247
|
+
d++;
|
|
248
|
+
i += openDelim.length;
|
|
249
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
250
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
251
|
+
d--;
|
|
252
|
+
i += closeDelim.length;
|
|
253
|
+
} else i++;
|
|
254
|
+
}
|
|
255
|
+
return [src.substring(s), src.length];
|
|
256
|
+
}
|
|
257
|
+
return ['', i];
|
|
258
|
+
}
|
|
259
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
260
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
261
|
+
i += openDelim.length;
|
|
262
|
+
const s = i;
|
|
263
|
+
let d = 0;
|
|
264
|
+
while (i < src.length) {
|
|
265
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
266
|
+
d++;
|
|
267
|
+
i += openDelim.length;
|
|
268
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
269
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
270
|
+
d--;
|
|
271
|
+
i += closeDelim.length;
|
|
272
|
+
} else i++;
|
|
273
|
+
}
|
|
274
|
+
return [src.substring(s), src.length];
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (src[i] === openDelim) {
|
|
278
|
+
i++;
|
|
279
|
+
const s = i;
|
|
280
|
+
if (openDelim === closeDelim) {
|
|
281
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
282
|
+
return [src.substring(s, i), i + 1];
|
|
283
|
+
}
|
|
284
|
+
let d = 1;
|
|
285
|
+
while (i < src.length && d > 0) {
|
|
286
|
+
if (src[i] === openDelim) d++;
|
|
287
|
+
else if (src[i] === closeDelim) d--;
|
|
288
|
+
if (d > 0) i++;
|
|
289
|
+
}
|
|
290
|
+
return [src.substring(s, i), i + 1];
|
|
291
|
+
}
|
|
292
|
+
return ['', i];
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
|
|
296
|
+
let i = openPos;
|
|
297
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
298
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
299
|
+
i += openDelim.length;
|
|
300
|
+
const s = i;
|
|
301
|
+
let d = 0;
|
|
302
|
+
while (i < src.length) {
|
|
303
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
304
|
+
d++;
|
|
305
|
+
i += openDelim.length;
|
|
306
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
307
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
308
|
+
d--;
|
|
309
|
+
i += closeDelim.length;
|
|
310
|
+
} else i++;
|
|
311
|
+
}
|
|
312
|
+
return [src.substring(s), src.length];
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (src[i] === openDelim) {
|
|
316
|
+
i++;
|
|
317
|
+
const s = i;
|
|
318
|
+
if (openDelim === closeDelim) {
|
|
319
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
320
|
+
return [src.substring(s, i), i + 1];
|
|
321
|
+
}
|
|
322
|
+
let d = 1;
|
|
323
|
+
while (i < src.length && d > 0) {
|
|
324
|
+
if (src[i] === openDelim) d++;
|
|
325
|
+
else if (src[i] === closeDelim) d--;
|
|
326
|
+
if (d > 0) i++;
|
|
327
|
+
}
|
|
328
|
+
return [src.substring(s, i), i + 1];
|
|
329
|
+
}
|
|
330
|
+
return ['', i];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function processContext(p, src) {
|
|
334
|
+
const r = new RegExp(`\\b${p.symbols.context}\\s*\\${p.symbols.open}`, "g");
|
|
335
|
+
let m, a = [];
|
|
336
|
+
while ((m = r.exec(src)) !== null) a.push({ idx: m.index, pos: m.index + m[0].length - 1 });
|
|
337
|
+
for (let j = a.length - 1; j >= 0; j--) {
|
|
338
|
+
const x = a[j], [c, e] = extractBlock(p, src, x.pos);
|
|
339
|
+
if (!c.trim()) {
|
|
340
|
+
src = src.slice(0, x.idx) + src.slice(e);
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
const r2 = p.process(c);
|
|
344
|
+
let L = src.substring(0, x.idx), R = src.substring(e);
|
|
345
|
+
let pre = L.endsWith("\n") ? "\n" : "";
|
|
346
|
+
if (pre) L = L.slice(0, -1);
|
|
347
|
+
src = L + pre + r2 + R;
|
|
348
|
+
}
|
|
349
|
+
return src;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function collectPatterns(p, src) {
|
|
353
|
+
const A = [], r = new RegExp(`\\b${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
|
|
354
|
+
let out = src;
|
|
355
|
+
while (1) {
|
|
356
|
+
r.lastIndex = 0;
|
|
357
|
+
const m = r.exec(out);
|
|
181
358
|
if (!m) break;
|
|
182
|
-
const
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
result = result.slice(0, start) + result.slice(posAfterReplace);
|
|
359
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
360
|
+
const [mp, em] = extractBlock(p, out, o);
|
|
361
|
+
let k = em;
|
|
362
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
363
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
364
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
365
|
+
A.push({ match: mp.trim(), replace: rp.trim() });
|
|
366
|
+
out = out.slice(0, s) + out.slice(er);
|
|
191
367
|
continue;
|
|
192
368
|
}
|
|
193
|
-
|
|
369
|
+
out = out.slice(0, s) + out.slice(em);
|
|
194
370
|
}
|
|
195
|
-
return [
|
|
371
|
+
return [A, out];
|
|
196
372
|
}
|
|
197
373
|
|
|
198
|
-
function applyPatterns(
|
|
199
|
-
let
|
|
200
|
-
for (const pat of
|
|
201
|
-
const
|
|
202
|
-
let
|
|
203
|
-
let pos = 0, matched = false;
|
|
374
|
+
function applyPatterns(p, src, pats) {
|
|
375
|
+
let clear = false, last = "", S = p.symbols.sigil;
|
|
376
|
+
for (const pat of pats) {
|
|
377
|
+
const t = parsePattern(p, pat.match);
|
|
378
|
+
let n = '', pos = 0, ok = false;
|
|
204
379
|
while (pos < src.length) {
|
|
205
|
-
const
|
|
206
|
-
if (
|
|
207
|
-
|
|
208
|
-
const { captures, endPos } =
|
|
209
|
-
let
|
|
380
|
+
const m = matchPattern(p, src, t, pos);
|
|
381
|
+
if (m) {
|
|
382
|
+
ok = true;
|
|
383
|
+
const { captures, endPos } = m;
|
|
384
|
+
let r = pat.replace;
|
|
210
385
|
for (const [k, v] of Object.entries(captures)) {
|
|
211
|
-
const
|
|
212
|
-
|
|
386
|
+
const e = escapeRegex(k);
|
|
387
|
+
r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
|
|
213
388
|
}
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
result = result.replace(/\$eval\{([^}]*)\}/g, (_, code) => {
|
|
389
|
+
const uid = p.unique_id++;
|
|
390
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
|
|
391
|
+
r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => {
|
|
218
392
|
try {
|
|
219
|
-
|
|
220
|
-
return String(Function("papagaio", "ctx", wrapped)(papagaio, {}));
|
|
393
|
+
return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
|
|
221
394
|
} catch {
|
|
222
395
|
return "";
|
|
223
396
|
}
|
|
224
397
|
});
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
result = result.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
230
|
-
clearFlag = true;
|
|
398
|
+
r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
|
|
399
|
+
if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
|
|
400
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
401
|
+
clear = true;
|
|
231
402
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
.replace(new RegExp(`${escapeRegex(S)}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
newSrc += result;
|
|
239
|
-
lastResult = result;
|
|
403
|
+
const ms = pos, me = endPos;
|
|
404
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
|
|
405
|
+
.replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
|
|
406
|
+
.replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
|
|
407
|
+
n += r;
|
|
408
|
+
last = r;
|
|
240
409
|
pos = endPos;
|
|
241
410
|
} else {
|
|
242
|
-
|
|
411
|
+
n += src[pos];
|
|
243
412
|
pos++;
|
|
244
413
|
}
|
|
245
414
|
}
|
|
246
|
-
if (
|
|
247
|
-
src =
|
|
248
|
-
|
|
415
|
+
if (ok) {
|
|
416
|
+
src = clear ? last : n;
|
|
417
|
+
clear = false;
|
|
249
418
|
}
|
|
250
419
|
}
|
|
251
420
|
return src;
|
|
252
421
|
}
|
|
253
422
|
|
|
254
|
-
function escapeRegex(
|
|
255
|
-
return
|
|
423
|
+
function escapeRegex(s) {
|
|
424
|
+
return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
|
|
256
425
|
}
|
|
257
426
|
|
|
258
|
-
function unescapeDelimiter(
|
|
259
|
-
let
|
|
260
|
-
for (let i = 0; i <
|
|
261
|
-
if (
|
|
262
|
-
const
|
|
263
|
-
if (
|
|
264
|
-
|
|
427
|
+
function unescapeDelimiter(s) {
|
|
428
|
+
let r = '';
|
|
429
|
+
for (let i = 0; i < s.length; i++) {
|
|
430
|
+
if (s[i] === '\\' && i + 1 < s.length) {
|
|
431
|
+
const n = s[i + 1];
|
|
432
|
+
if (n === '"' || n === "'" || n === '\\') {
|
|
433
|
+
r += n;
|
|
265
434
|
i++;
|
|
266
|
-
} else
|
|
267
|
-
|
|
268
|
-
}
|
|
269
|
-
} else {
|
|
270
|
-
result += str[i];
|
|
271
|
-
}
|
|
435
|
+
} else r += s[i];
|
|
436
|
+
} else r += s[i];
|
|
272
437
|
}
|
|
273
|
-
return
|
|
438
|
+
return r;
|
|
274
439
|
}
|
|
275
440
|
|
|
276
441
|
export class Papagaio {
|
|
277
442
|
constructor() {
|
|
278
443
|
this.recursion_limit = 512;
|
|
279
444
|
this.unique_id = 0;
|
|
280
|
-
this.symbols = {
|
|
281
|
-
pattern: "pattern",
|
|
282
|
-
context: "context",
|
|
283
|
-
open: "{",
|
|
284
|
-
close: "}",
|
|
285
|
-
sigil: "$"
|
|
286
|
-
};
|
|
445
|
+
this.symbols = { pattern: "pattern", context: "context", open: "{", close: "}", sigil: "$" };
|
|
287
446
|
this.content = "";
|
|
288
447
|
}
|
|
289
|
-
|
|
290
448
|
process(input) {
|
|
291
449
|
this.content = input;
|
|
292
|
-
let src = input, last = null,
|
|
293
|
-
const
|
|
294
|
-
const
|
|
295
|
-
const
|
|
296
|
-
return
|
|
450
|
+
let src = input, last = null, it = 0;
|
|
451
|
+
const pend = () => {
|
|
452
|
+
const r1 = new RegExp(`\\b${this.symbols.context}\\s*\\${this.symbols.open}`, "g");
|
|
453
|
+
const r2 = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
|
|
454
|
+
return r1.test(src) || r2.test(src);
|
|
297
455
|
};
|
|
298
|
-
while (src !== last &&
|
|
299
|
-
|
|
456
|
+
while (src !== last && it < this.recursion_limit) {
|
|
457
|
+
it++;
|
|
300
458
|
last = src;
|
|
301
459
|
src = processContext(this, src);
|
|
302
|
-
const [
|
|
303
|
-
src = applyPatterns(this, s2,
|
|
304
|
-
if (!
|
|
460
|
+
const [p, s2] = collectPatterns(this, src);
|
|
461
|
+
src = applyPatterns(this, s2, p);
|
|
462
|
+
if (!pend()) break;
|
|
305
463
|
}
|
|
306
464
|
return this.content = src, src;
|
|
307
465
|
}
|
package/tests/tests.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
{
|
|
10
10
|
"id": 2,
|
|
11
11
|
"name": "Flexible whitespace ($)",
|
|
12
|
-
"code": "pattern {
|
|
12
|
+
"code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
|
|
13
13
|
"expected": "hello & world"
|
|
14
14
|
},
|
|
15
15
|
{
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
{
|
|
76
76
|
"id": 13,
|
|
77
77
|
"name": "Pattern with flexible whitespace in middle",
|
|
78
|
-
"code": "pattern {from
|
|
78
|
+
"code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
|
|
79
79
|
"expected": "path: from -> to"
|
|
80
80
|
},
|
|
81
81
|
{
|
|
@@ -129,7 +129,7 @@
|
|
|
129
129
|
{
|
|
130
130
|
"id": 22,
|
|
131
131
|
"name": "Flexible whitespace with semicolon",
|
|
132
|
-
"code": "pattern {
|
|
132
|
+
"code": "pattern {$$a;$$b} {$a AND $b}\nx ; y\nx; y\nx ; y",
|
|
133
133
|
"expected": "x AND y"
|
|
134
134
|
},
|
|
135
135
|
{
|
|
@@ -182,8 +182,8 @@
|
|
|
182
182
|
},
|
|
183
183
|
{
|
|
184
184
|
"id": 31,
|
|
185
|
-
"name": "
|
|
186
|
-
"code": "pattern {$block data {}{}} {DATA:$data}\ndata
|
|
185
|
+
"name": "Delimiter test",
|
|
186
|
+
"code": "pattern {$block data {[}{]}} {DATA:$data}\ndata [ hello world ]",
|
|
187
187
|
"expected": "DATA: hello world"
|
|
188
188
|
},
|
|
189
189
|
{
|
|
@@ -267,7 +267,7 @@
|
|
|
267
267
|
{
|
|
268
268
|
"id": 45,
|
|
269
269
|
"name": "Pattern with flexible space before and after",
|
|
270
|
-
"code": "pattern {$$hello$$} {FOUND}\n hello ",
|
|
270
|
+
"code": "pattern {$$ hello$$ } {FOUND}\n hello ",
|
|
271
271
|
"expected": "FOUND"
|
|
272
272
|
},
|
|
273
273
|
{
|
|
@@ -315,7 +315,7 @@
|
|
|
315
315
|
{
|
|
316
316
|
"id": 53,
|
|
317
317
|
"name": "Trim multiple surrounding spaces",
|
|
318
|
-
"code": "pattern {
|
|
318
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
319
319
|
"expected": "FOUND: word"
|
|
320
320
|
},
|
|
321
321
|
{
|
|
@@ -422,296 +422,296 @@
|
|
|
422
422
|
},
|
|
423
423
|
{
|
|
424
424
|
"id": 71,
|
|
425
|
-
"name": "
|
|
425
|
+
"name": "Simple Variables",
|
|
426
426
|
"code": "pattern {$x} {$x}\nhello",
|
|
427
427
|
"expected": "hello"
|
|
428
428
|
},
|
|
429
429
|
{
|
|
430
430
|
"id": 72,
|
|
431
|
-
"name": "
|
|
431
|
+
"name": "Multiple Variables",
|
|
432
432
|
"code": "pattern {$x $y $z} {$z, $y, $x}\napple banana cherry",
|
|
433
433
|
"expected": "cherry, banana, apple"
|
|
434
434
|
},
|
|
435
435
|
{
|
|
436
436
|
"id": 73,
|
|
437
|
-
"name": "
|
|
437
|
+
"name": "Flexible Whitespace",
|
|
438
438
|
"code": "pattern {$x$$and$$$y} {$x & $y}\nhello and world",
|
|
439
439
|
"expected": "hello & world"
|
|
440
440
|
},
|
|
441
441
|
{
|
|
442
442
|
"id": 74,
|
|
443
|
-
"name": "
|
|
443
|
+
"name": "Flexible Whitespace (compact)",
|
|
444
444
|
"code": "pattern {$x$$and$$$y} {$x & $y}\nhello and world",
|
|
445
445
|
"expected": "hello & world"
|
|
446
446
|
},
|
|
447
447
|
{
|
|
448
448
|
"id": 75,
|
|
449
|
-
"name": "
|
|
450
|
-
"code": "pattern {$block content {(}{)}} {[$content]}\ndata (
|
|
449
|
+
"name": "Block with parentheses",
|
|
450
|
+
"code": "pattern {$name $block content {(}{)}} {[$content]}\ndata (hello world)",
|
|
451
451
|
"expected": "[hello world]"
|
|
452
452
|
},
|
|
453
453
|
{
|
|
454
454
|
"id": 76,
|
|
455
|
-
"name": "
|
|
455
|
+
"name": "Block with custom delimiters <<>>",
|
|
456
456
|
"code": "pattern {$block data {<<}{>>}} {DATA: $data}\n<<json stuff>>",
|
|
457
457
|
"expected": "DATA: json stuff"
|
|
458
458
|
},
|
|
459
459
|
{
|
|
460
460
|
"id": 77,
|
|
461
|
-
"name": "
|
|
461
|
+
"name": "Multiple Blocks in pattern",
|
|
462
462
|
"code": "pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}\n(first), [second]",
|
|
463
463
|
"expected": "first|second"
|
|
464
464
|
},
|
|
465
465
|
{
|
|
466
466
|
"id": 78,
|
|
467
|
-
"name": "
|
|
467
|
+
"name": "Basic pattern with literals",
|
|
468
468
|
"code": "pattern {match} {replace}\nmatch",
|
|
469
469
|
"expected": "replace"
|
|
470
470
|
},
|
|
471
471
|
{
|
|
472
472
|
"id": 79,
|
|
473
|
-
"name": "
|
|
473
|
+
"name": "Markdown heading example",
|
|
474
474
|
"code": "pattern {# $title} {<h1>$title</h1>}\n# Welcome",
|
|
475
475
|
"expected": "<h1>Welcome</h1>"
|
|
476
476
|
},
|
|
477
477
|
{
|
|
478
478
|
"id": 80,
|
|
479
|
-
"name": "
|
|
479
|
+
"name": "Multiple patterns cascade",
|
|
480
480
|
"code": "pattern {a} {b}\npattern {b} {c}\npattern {c} {d}\na",
|
|
481
481
|
"expected": "d"
|
|
482
482
|
},
|
|
483
483
|
{
|
|
484
484
|
"id": 81,
|
|
485
|
-
"name": "
|
|
485
|
+
"name": "Context basic",
|
|
486
486
|
"code": "context {\n pattern {$x} {<$x>}\n \n apple\n banana\n}",
|
|
487
|
-
"expected": "<apple>\n<banana>"
|
|
487
|
+
"expected": "<apple>\n <banana>"
|
|
488
488
|
},
|
|
489
489
|
{
|
|
490
490
|
"id": 82,
|
|
491
|
-
"name": "
|
|
491
|
+
"name": "$unique incremental",
|
|
492
492
|
"code": "pattern {item} {[$unique]item_$unique}\nitem\nitem\nitem",
|
|
493
493
|
"expected": "[0]item_0\n[1]item_1\n[2]item_2"
|
|
494
494
|
},
|
|
495
495
|
{
|
|
496
496
|
"id": 83,
|
|
497
|
-
"name": "
|
|
497
|
+
"name": "$unique same ID in replacement",
|
|
498
498
|
"code": "pattern {a} {$unique $unique}\na",
|
|
499
499
|
"expected": "0 0"
|
|
500
500
|
},
|
|
501
501
|
{
|
|
502
502
|
"id": 84,
|
|
503
|
-
"name": "
|
|
503
|
+
"name": "$match full match",
|
|
504
504
|
"code": "pattern {[$x]} {FOUND: $match}\n[data]",
|
|
505
505
|
"expected": "FOUND: [data]"
|
|
506
506
|
},
|
|
507
507
|
{
|
|
508
508
|
"id": 85,
|
|
509
|
-
"name": "
|
|
509
|
+
"name": "$prefix e $suffix",
|
|
510
510
|
"code": "pattern {world} {$prefix$suffix}hello world test",
|
|
511
511
|
"expected": "hello hello test test"
|
|
512
512
|
},
|
|
513
513
|
{
|
|
514
514
|
"id": 86,
|
|
515
|
-
"name": "
|
|
515
|
+
"name": "$clear remove before match",
|
|
516
516
|
"code": "pattern {SKIP $x} {$clear KEEP: $x}\nIGNORE_THIS SKIP keep_this",
|
|
517
517
|
"expected": "KEEP: keep_this"
|
|
518
518
|
},
|
|
519
519
|
{
|
|
520
520
|
"id": 87,
|
|
521
|
-
"name": "
|
|
521
|
+
"name": "$eval arithmetic multiplication",
|
|
522
522
|
"code": "pattern {$x} {$eval{return parseInt($x) * 2;}}\n5",
|
|
523
523
|
"expected": "10"
|
|
524
524
|
},
|
|
525
525
|
{
|
|
526
526
|
"id": 88,
|
|
527
|
-
"name": "
|
|
527
|
+
"name": "Markdown h1 and h2",
|
|
528
528
|
"code": "pattern {## $t} {<h2>$t</h2>}\npattern {# $t} {<h1>$t</h1>}\n# Title\n## Subtitle",
|
|
529
529
|
"expected": "<h1>Title</h1>\n<h2>Subtitle</h2>"
|
|
530
530
|
},
|
|
531
531
|
{
|
|
532
532
|
"id": 89,
|
|
533
|
-
"name": "
|
|
533
|
+
"name": "Markdown bold and italic",
|
|
534
534
|
"code": "pattern {**$t**} {<strong>$t</strong>}\npattern {*$t*} {<em>$t</em>}\n**bold** and *italic*",
|
|
535
535
|
"expected": "<strong>bold</strong> and <em>italic</em>"
|
|
536
536
|
},
|
|
537
537
|
{
|
|
538
538
|
"id": 90,
|
|
539
|
-
"name": "
|
|
539
|
+
"name": "Markdown list",
|
|
540
540
|
"code": "pattern {- $i} {<li>$i</li>}\n- item1\n- item2",
|
|
541
541
|
"expected": "<li>item1</li>\n<li>item2</li>"
|
|
542
542
|
},
|
|
543
543
|
{
|
|
544
544
|
"id": 91,
|
|
545
|
-
"name": "
|
|
546
|
-
"code": "pattern {$a
|
|
545
|
+
"name": "CSV to JSON",
|
|
546
|
+
"code": "pattern {$a,$$b,$$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
|
|
547
547
|
"expected": "{ id: '1', name: 'Alice', role: 'Engineer' }\n{ id: '2', name: 'Bob', role: 'Designer' }"
|
|
548
548
|
},
|
|
549
549
|
{
|
|
550
550
|
"id": 92,
|
|
551
|
-
"name": "
|
|
551
|
+
"name": "Config parser",
|
|
552
552
|
"code": "pattern {$key = $value} {const $key = '$value';}\nhost = localhost\nport = 3000",
|
|
553
553
|
"expected": "const host = 'localhost';\nconst port = '3000';"
|
|
554
554
|
},
|
|
555
555
|
{
|
|
556
556
|
"id": 93,
|
|
557
|
-
"name": "
|
|
557
|
+
"name": "HTML generator",
|
|
558
558
|
"code": "pattern {$tag $content} {<$tag>$content</$tag>}\ndiv HelloWorld\nspan Test",
|
|
559
559
|
"expected": "<div>HelloWorld</div>\n<span>Test</span>"
|
|
560
560
|
},
|
|
561
561
|
{
|
|
562
562
|
"id": 94,
|
|
563
|
-
"name": "
|
|
563
|
+
"name": "Variable reuse in replacement",
|
|
564
564
|
"code": "pattern {$x} {$x:$x:$x}\ndata",
|
|
565
565
|
"expected": "data:data:data"
|
|
566
566
|
},
|
|
567
567
|
{
|
|
568
568
|
"id": 95,
|
|
569
|
-
"name": "
|
|
569
|
+
"name": "Literal dollar sign in replacement",
|
|
570
570
|
"code": "pattern {price} {Price: $50}\nprice",
|
|
571
571
|
"expected": "Price: $50"
|
|
572
572
|
},
|
|
573
573
|
{
|
|
574
574
|
"id": 96,
|
|
575
|
-
"name": "
|
|
575
|
+
"name": "Block with angle brackets nesting",
|
|
576
576
|
"code": "pattern {$block content {<}{>}} {CONTENT:$content}\nouter <inner <deep> more>",
|
|
577
577
|
"expected": "CONTENT:inner <deep> more"
|
|
578
578
|
},
|
|
579
579
|
{
|
|
580
580
|
"id": 97,
|
|
581
|
-
"name": "
|
|
581
|
+
"name": "Pattern with special regex characters",
|
|
582
582
|
"code": "pattern {a.*b} {MATCHED}\na.*b",
|
|
583
583
|
"expected": "MATCHED"
|
|
584
584
|
},
|
|
585
585
|
{
|
|
586
586
|
"id": 98,
|
|
587
|
-
"name": "
|
|
587
|
+
"name": "Multiple $eval operations",
|
|
588
588
|
"code": "pattern {add $a $b} {$eval{return parseInt($a) + parseInt($b)}}\nadd 15 25",
|
|
589
589
|
"expected": "40"
|
|
590
590
|
},
|
|
591
591
|
{
|
|
592
592
|
"id": 99,
|
|
593
|
-
"name": "
|
|
593
|
+
"name": "$eval with division and decimals",
|
|
594
594
|
"code": "pattern {div $a $b} {$eval{return $a / $b}}\ndiv 7 2",
|
|
595
595
|
"expected": "3.5"
|
|
596
596
|
},
|
|
597
597
|
{
|
|
598
598
|
"id": 100,
|
|
599
|
-
"name": "
|
|
599
|
+
"name": "Unicode characters support",
|
|
600
600
|
"code": "pattern {oi $name} {Olá $name!}\noi Mundo",
|
|
601
601
|
"expected": "Olá Mundo!"
|
|
602
602
|
},
|
|
603
603
|
{
|
|
604
604
|
"id": 101,
|
|
605
|
-
"name": "
|
|
605
|
+
"name": "Emoji support",
|
|
606
606
|
"code": "pattern {react $emoji} {Você reagiu: $emoji}\nreact 😊",
|
|
607
607
|
"expected": "Você reagiu: 😊"
|
|
608
608
|
},
|
|
609
609
|
{
|
|
610
610
|
"id": 102,
|
|
611
|
-
"name": "
|
|
611
|
+
"name": "Tab as flexible whitespace",
|
|
612
612
|
"code": "pattern {from$$to} {path: from -> to}\nfrom\tto",
|
|
613
613
|
"expected": "path: from -> to"
|
|
614
614
|
},
|
|
615
615
|
{
|
|
616
616
|
"id": 103,
|
|
617
|
-
"name": "
|
|
617
|
+
"name": "No match leaves input intact",
|
|
618
618
|
"code": "pattern {nomatch $x} {REPLACED}\nnothing matches here",
|
|
619
619
|
"expected": "nothing matches here"
|
|
620
620
|
},
|
|
621
621
|
{
|
|
622
622
|
"id": 104,
|
|
623
|
-
"name": "
|
|
623
|
+
"name": "Empty context is removed",
|
|
624
624
|
"code": "Start\ncontext {}End",
|
|
625
625
|
"expected": "Start\nEnd"
|
|
626
626
|
},
|
|
627
627
|
{
|
|
628
628
|
"id": 105,
|
|
629
|
-
"name": "
|
|
629
|
+
"name": "Block with pipe delimiters",
|
|
630
630
|
"code": "pattern {$block content {|}{|}} {PIPE[$content]}\n|value|",
|
|
631
631
|
"expected": "PIPE[value]"
|
|
632
632
|
},
|
|
633
633
|
{
|
|
634
634
|
"id": 106,
|
|
635
|
-
"name": "
|
|
635
|
+
"name": "Block with double quotes delimiter",
|
|
636
636
|
"code": "pattern {$block str {\"}{\"}} {STR[$str]}\n\"hello world\"",
|
|
637
637
|
"expected": "STR[hello world]"
|
|
638
638
|
},
|
|
639
639
|
{
|
|
640
640
|
"id": 107,
|
|
641
|
-
"name": "
|
|
642
|
-
"code": "pattern {
|
|
641
|
+
"name": "Comma-separated pattern matching",
|
|
642
|
+
"code": "pattern {$$a,$$b,$$c} {[$a] [$b] [$c]}\none,two,three",
|
|
643
643
|
"expected": "[one] [two] [three]"
|
|
644
644
|
},
|
|
645
645
|
{
|
|
646
646
|
"id": 108,
|
|
647
|
-
"name": "
|
|
647
|
+
"name": "Apostrophe in pattern literal",
|
|
648
648
|
"code": "pattern {It's $name} {Hello $name}\nIt's John",
|
|
649
649
|
"expected": "Hello John"
|
|
650
650
|
},
|
|
651
651
|
{
|
|
652
652
|
"id": 109,
|
|
653
|
-
"name": "
|
|
653
|
+
"name": "Math operation with $eval",
|
|
654
654
|
"code": "pattern {num $a} {$eval{return parseInt($a) + 1}}\nnum 999",
|
|
655
655
|
"expected": "1000"
|
|
656
656
|
},
|
|
657
657
|
{
|
|
658
658
|
"id": 110,
|
|
659
|
-
"name": "
|
|
659
|
+
"name": "Multi-character block delimiters",
|
|
660
660
|
"code": "pattern {$block code {open}{close}} {CODE[$code]}\nopenFunction()close",
|
|
661
661
|
"expected": "CODE[Function()]"
|
|
662
662
|
},
|
|
663
663
|
{
|
|
664
664
|
"id": 111,
|
|
665
|
-
"name": "
|
|
666
|
-
"code": "pattern {
|
|
665
|
+
"name": "Pattern with dot literal",
|
|
666
|
+
"code": "pattern {$$file.txt} {File: $file.txt}\ndocument.txt",
|
|
667
667
|
"expected": "File: document"
|
|
668
668
|
},
|
|
669
669
|
{
|
|
670
670
|
"id": 112,
|
|
671
|
-
"name": "
|
|
671
|
+
"name": "Repeated global pattern matching",
|
|
672
672
|
"code": "pattern {x} {X}\nxxxxx",
|
|
673
673
|
"expected": "XXXXX"
|
|
674
674
|
},
|
|
675
675
|
{
|
|
676
676
|
"id": 113,
|
|
677
|
-
"name": "
|
|
677
|
+
"name": "Overlapping patterns cascade",
|
|
678
678
|
"code": "pattern {a} {A}\npattern {A} {B}\npattern {B} {C}\na",
|
|
679
679
|
"expected": "C"
|
|
680
680
|
},
|
|
681
681
|
{
|
|
682
682
|
"id": 114,
|
|
683
|
-
"name": "
|
|
683
|
+
"name": "Context with nested patterns",
|
|
684
684
|
"code": "context {\n pattern {$x} {\n<$x>}\napple\nbanana\ncherry\n}",
|
|
685
685
|
"expected": "<apple>\n<banana>\n<cherry>"
|
|
686
686
|
},
|
|
687
687
|
{
|
|
688
688
|
"id": 115,
|
|
689
|
-
"name": "
|
|
689
|
+
"name": "$eval with subtraction (negatives)",
|
|
690
690
|
"code": "pattern {calc $a $b} {$eval{return $a - $b}}\ncalc -5 -10",
|
|
691
691
|
"expected": "5"
|
|
692
692
|
},
|
|
693
693
|
{
|
|
694
694
|
"id": 116,
|
|
695
|
-
"name": "
|
|
696
|
-
"code": "pattern {
|
|
695
|
+
"name": "Multiple spaces trim with flexible whitespace",
|
|
696
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
697
697
|
"expected": "FOUND: word"
|
|
698
698
|
},
|
|
699
699
|
{
|
|
700
700
|
"id": 117,
|
|
701
|
-
"name": "
|
|
701
|
+
"name": "Block captures parentheses with nesting",
|
|
702
702
|
"code": "pattern {$block content {(}{)}} {B[$content]}\n(a(b)c)",
|
|
703
703
|
"expected": "B[a(b)c]"
|
|
704
704
|
},
|
|
705
705
|
{
|
|
706
706
|
"id": 118,
|
|
707
|
-
"name": "
|
|
707
|
+
"name": "Arithmetic in config pattern",
|
|
708
708
|
"code": "pattern {$x + $y = $z} {$z is sum of $x and $y}\n5 + 3 = 8",
|
|
709
709
|
"expected": "8 is sum of 5 and 3"
|
|
710
710
|
},
|
|
711
711
|
{
|
|
712
712
|
"id": 119,
|
|
713
|
-
"name": "
|
|
714
|
-
"code": "pattern {
|
|
713
|
+
"name": "Trailing punctuation in pattern",
|
|
714
|
+
"code": "pattern {$$w,} {$$w!}\nhello,",
|
|
715
715
|
"expected": "hello!"
|
|
716
716
|
}
|
|
717
717
|
]
|