papagaio 0.2.8 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -16
- package/package.json +1 -1
- package/src/papagaio.js +348 -190
- package/tests/tests.json +12 -12
package/README.md
CHANGED
|
@@ -46,17 +46,6 @@ apple banana cherry
|
|
|
46
46
|
```
|
|
47
47
|
Output: `cherry, banana, apple`
|
|
48
48
|
|
|
49
|
-
### 3. Flexible Whitespace (`$$`)
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
pattern {$x$$and$$$y} {$x & $y}
|
|
53
|
-
hello and world
|
|
54
|
-
hello and world
|
|
55
|
-
```
|
|
56
|
-
Output: `hello & world` (both)
|
|
57
|
-
|
|
58
|
-
`$$` = zero or more spaces/tabs/newlines.
|
|
59
|
-
|
|
60
49
|
## Blocks
|
|
61
50
|
|
|
62
51
|
Capture content between delimiters.
|
|
@@ -266,9 +255,13 @@ Output:
|
|
|
266
255
|
|
|
267
256
|
### Matching
|
|
268
257
|
- Variables (`$x`) capture **one word** (no spaces)
|
|
269
|
-
-
|
|
258
|
+
- Variables (`$$x`) captures one or more words (with spaces)
|
|
270
259
|
- Patterns apply **globally** each iteration
|
|
271
260
|
- Auto-recursion until: max 512 iterations OR no changes
|
|
261
|
+
- `$ ` = one or more of this whitespace (spaces, tabs, newlines)
|
|
262
|
+
- `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
|
|
263
|
+
- `$$$ `= one or more whitespaces
|
|
264
|
+
- `$$$$ `= zero or more whitespaces
|
|
272
265
|
|
|
273
266
|
### Block Matching
|
|
274
267
|
- `$block name {open}{close}` captures between delimiters
|
|
@@ -280,8 +273,11 @@ Output:
|
|
|
280
273
|
- Reuse: `$x` appears multiple times in replace
|
|
281
274
|
- Undefined: becomes empty string
|
|
282
275
|
|
|
283
|
-
###
|
|
284
|
-
- You cannot match words containing the sigil character.
|
|
276
|
+
### Limitations
|
|
277
|
+
- You cannot match words containing the current sigil character.
|
|
278
|
+
- You cannot match a $block{}{} using the current delimiters.
|
|
279
|
+
- By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
|
|
280
|
+
- Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
|
|
285
281
|
|
|
286
282
|
---
|
|
287
283
|
|
|
@@ -289,11 +285,13 @@ Output:
|
|
|
289
285
|
|
|
290
286
|
| Problem | Solution |
|
|
291
287
|
|---------|----------|
|
|
292
|
-
| Pattern doesn't match | Use `$$` between elements for flexible whitespace |
|
|
293
288
|
| Variable not captured | Check space between variables |
|
|
294
289
|
| Block not working | Verify balanced delimiters `{` `}` |
|
|
295
290
|
| Infinite recursion | Use `$clear` or reduce `recursion_limit` |
|
|
296
291
|
| $eval not working | Errors return empty string, use try-catch |
|
|
292
|
+
| Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
|
|
293
|
+
| Whitespace operators | Remember they need a whitespace after them to work properly |
|
|
294
|
+
| Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
|
|
297
295
|
|
|
298
296
|
## Known Bugs
|
|
299
297
|
|
|
@@ -305,7 +303,7 @@ Output:
|
|
|
305
303
|
|
|
306
304
|
```
|
|
307
305
|
pattern {$x $y} {$y, $x} # basic pattern
|
|
308
|
-
pattern {$x
|
|
306
|
+
pattern {$x$ $y} {$x-$y} # flexible whitespace
|
|
309
307
|
pattern {$block n {o}{c}} {$n} # block
|
|
310
308
|
context { ... } # recursive scope
|
|
311
309
|
$unique # unique ID per pattern
|
|
@@ -313,6 +311,7 @@ $match # full match
|
|
|
313
311
|
$prefix / $suffix # before/after
|
|
314
312
|
$clear # clear before
|
|
315
313
|
$eval{code} # execute JS
|
|
314
|
+
$ / $$ / $$$ / $$$$ # whitespace operators
|
|
316
315
|
```
|
|
317
316
|
|
|
318
317
|
---
|
package/package.json
CHANGED
package/src/papagaio.js
CHANGED
|
@@ -1,95 +1,88 @@
|
|
|
1
1
|
// https://github.com/jardimdanificado/papagaio
|
|
2
|
-
|
|
3
|
-
function
|
|
4
|
-
const
|
|
5
|
-
let
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
2
|
+
|
|
3
|
+
function parsePattern(papagaio, pattern) {
|
|
4
|
+
const tokens = [], S = papagaio.symbols.sigil, S2 = S + S;
|
|
5
|
+
let i = 0;
|
|
6
|
+
const isWhitespaceChar = c => /\s/.test(c);
|
|
7
|
+
const getWhitespaceType = c => c === ' ' ? 'space' : c === '\t' ? 'tab' : c === '\n' ? 'newline' : c === '\r' ? 'carriage-return' : 'other';
|
|
8
|
+
while (i < pattern.length) {
|
|
9
|
+
if (pattern.startsWith(S + S + S, i) && i + 3 < pattern.length && isWhitespaceChar(pattern[i + 3])) {
|
|
10
|
+
tokens.push({ type: 'any-ws-required', wsChar: pattern[i + 3] });
|
|
11
|
+
i += 4;
|
|
12
12
|
continue;
|
|
13
13
|
}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
src = left + prefix + proc + right;
|
|
19
|
-
}
|
|
20
|
-
return src;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
function extractBlock(papagaio, src, openPos, openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close) {
|
|
24
|
-
let i = openPos;
|
|
25
|
-
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
26
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
27
|
-
i += openDelim.length;
|
|
28
|
-
const innerStart = i;
|
|
29
|
-
let d = 0;
|
|
30
|
-
while (i < src.length) {
|
|
31
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
32
|
-
d++;
|
|
33
|
-
i += openDelim.length;
|
|
34
|
-
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
35
|
-
if (d === 0) return [src.substring(innerStart, i), i + closeDelim.length];
|
|
36
|
-
d--;
|
|
37
|
-
i += closeDelim.length;
|
|
38
|
-
} else i++;
|
|
39
|
-
}
|
|
40
|
-
return [src.substring(innerStart), src.length];
|
|
14
|
+
if (pattern.startsWith(S + S + S + S, i) && i + 4 < pattern.length && isWhitespaceChar(pattern[i + 4])) {
|
|
15
|
+
tokens.push({ type: 'any-ws-optional', wsChar: pattern[i + 4] });
|
|
16
|
+
i += 5;
|
|
17
|
+
continue;
|
|
41
18
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
19
|
+
if (pattern.startsWith(S2, i)) {
|
|
20
|
+
let j = i + S2.length, varName = '';
|
|
21
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
22
|
+
if (varName) {
|
|
23
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
24
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
25
|
+
i = j + 1;
|
|
26
|
+
continue;
|
|
27
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
28
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: 'optional', wsChar: null });
|
|
29
|
+
i = j + 1;
|
|
30
|
+
continue;
|
|
31
|
+
} else {
|
|
32
|
+
tokens.push({ type: 'var-ws', varName });
|
|
33
|
+
i = j;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
55
36
|
}
|
|
56
|
-
return [src.substring(innerStart, i), i + 1];
|
|
57
37
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const tokens = [];
|
|
64
|
-
let i = 0;
|
|
65
|
-
const S = papagaio.symbols.sigil, S2 = S + S;
|
|
66
|
-
while (i < pattern.length) {
|
|
38
|
+
if (pattern.startsWith(S2, i) && i + 2 < pattern.length && isWhitespaceChar(pattern[i + 2])) {
|
|
39
|
+
tokens.push({ type: 'ws-optional', wsType: getWhitespaceType(pattern[i + 2]), wsChar: pattern[i + 2] });
|
|
40
|
+
i += 3;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
67
43
|
if (pattern.startsWith(S2, i)) {
|
|
68
44
|
tokens.push({ type: 'whitespace-optional' });
|
|
69
45
|
i += S2.length;
|
|
70
46
|
continue;
|
|
71
47
|
}
|
|
48
|
+
if (pattern[i] === S && i + 1 < pattern.length && isWhitespaceChar(pattern[i + 1])) {
|
|
49
|
+
tokens.push({ type: 'ws-required', wsType: getWhitespaceType(pattern[i + 1]), wsChar: pattern[i + 1] });
|
|
50
|
+
i += 2;
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
72
53
|
if (pattern.startsWith(S + 'block', i)) {
|
|
73
|
-
let j = i + S.length +
|
|
54
|
+
let j = i + S.length + 5;
|
|
74
55
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
75
56
|
let varName = '';
|
|
76
57
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
77
58
|
if (varName) {
|
|
78
59
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
79
|
-
let openDelim = papagaio.symbols.open;
|
|
60
|
+
let openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close;
|
|
61
|
+
let openDelimIsWs = false, closeDelimIsWs = false;
|
|
80
62
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
81
63
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
82
|
-
|
|
64
|
+
const trimmed = c.trim();
|
|
65
|
+
if (trimmed === '') {
|
|
66
|
+
openDelimIsWs = true;
|
|
67
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
68
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
69
|
+
openDelim = pattern.substring(wsStart, wsEnd);
|
|
70
|
+
} else openDelim = unescapeDelimiter(trimmed) || papagaio.symbols.open;
|
|
83
71
|
j = e;
|
|
84
72
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
85
73
|
}
|
|
86
|
-
let closeDelim = papagaio.symbols.close;
|
|
87
74
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
88
75
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
89
|
-
|
|
76
|
+
const trimmed = c.trim();
|
|
77
|
+
if (trimmed === '') {
|
|
78
|
+
closeDelimIsWs = true;
|
|
79
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
80
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
81
|
+
closeDelim = pattern.substring(wsStart, wsEnd);
|
|
82
|
+
} else closeDelim = unescapeDelimiter(trimmed) || papagaio.symbols.close;
|
|
90
83
|
j = e;
|
|
91
84
|
}
|
|
92
|
-
tokens.push({ type: 'block', varName, openDelim, closeDelim });
|
|
85
|
+
tokens.push({ type: 'block', varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs });
|
|
93
86
|
i = j;
|
|
94
87
|
continue;
|
|
95
88
|
}
|
|
@@ -98,210 +91,375 @@ function parsePattern(papagaio, pattern) {
|
|
|
98
91
|
let j = i + S.length, varName = '';
|
|
99
92
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
100
93
|
if (varName) {
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
94
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
95
|
+
tokens.push({ type: 'var', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
96
|
+
i = j + 1;
|
|
97
|
+
continue;
|
|
98
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
99
|
+
tokens.push({ type: 'var', varName, wsTrailing: 'optional', wsChar: null });
|
|
100
|
+
i = j + 1;
|
|
101
|
+
continue;
|
|
102
|
+
} else {
|
|
103
|
+
tokens.push({ type: 'var', varName });
|
|
104
|
+
i = j;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
104
107
|
}
|
|
105
|
-
tokens.push({ type: 'literal', value: S });
|
|
106
|
-
i += S.length;
|
|
107
|
-
continue;
|
|
108
108
|
}
|
|
109
|
-
if (
|
|
109
|
+
if (isWhitespaceChar(pattern[i])) {
|
|
110
110
|
let ws = '';
|
|
111
|
-
while (i < pattern.length &&
|
|
112
|
-
tokens.push({ type: '
|
|
111
|
+
while (i < pattern.length && isWhitespaceChar(pattern[i])) ws += pattern[i++];
|
|
112
|
+
tokens.push({ type: 'literal-ws', value: ws });
|
|
113
113
|
continue;
|
|
114
114
|
}
|
|
115
115
|
let literal = '';
|
|
116
|
-
while (i < pattern.length && !pattern.startsWith(S, i) &&
|
|
116
|
+
while (i < pattern.length && !pattern.startsWith(S, i) && !isWhitespaceChar(pattern[i])) literal += pattern[i++];
|
|
117
117
|
if (literal) tokens.push({ type: 'literal', value: literal });
|
|
118
118
|
}
|
|
119
119
|
return tokens;
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
123
|
-
let pos = startPos;
|
|
124
|
-
const
|
|
123
|
+
let pos = startPos, captures = {};
|
|
124
|
+
const matchWhitespaceType = (str, idx, wsType) => {
|
|
125
|
+
if (idx >= str.length) return { matched: '', newPos: idx };
|
|
126
|
+
if (wsType === 'space' && str[idx] === ' ') {
|
|
127
|
+
let j = idx;
|
|
128
|
+
while (j < str.length && str[j] === ' ') j++;
|
|
129
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
130
|
+
}
|
|
131
|
+
if (wsType === 'tab' && str[idx] === '\t') {
|
|
132
|
+
let j = idx;
|
|
133
|
+
while (j < str.length && str[j] === '\t') j++;
|
|
134
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
135
|
+
}
|
|
136
|
+
if (wsType === 'newline' && str[idx] === '\n') {
|
|
137
|
+
let j = idx;
|
|
138
|
+
while (j < str.length && str[j] === '\n') j++;
|
|
139
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
140
|
+
}
|
|
141
|
+
return { matched: '', newPos: idx };
|
|
142
|
+
};
|
|
125
143
|
for (let ti = 0; ti < tokens.length; ti++) {
|
|
126
144
|
const token = tokens[ti];
|
|
127
|
-
if (token.type === '
|
|
128
|
-
|
|
145
|
+
if (token.type === 'literal-ws') {
|
|
146
|
+
if (!src.startsWith(token.value, pos)) return null;
|
|
147
|
+
pos += token.value.length;
|
|
129
148
|
continue;
|
|
130
149
|
}
|
|
131
|
-
if (token.type === '
|
|
150
|
+
if (token.type === 'ws-required') {
|
|
151
|
+
const { matched, newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
152
|
+
if (!matched) return null;
|
|
153
|
+
pos = newPos;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (token.type === 'ws-optional') {
|
|
157
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
158
|
+
pos = newPos;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
if (token.type === 'any-ws-required') {
|
|
132
162
|
if (pos >= src.length || !/\s/.test(src[pos])) return null;
|
|
133
163
|
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
134
164
|
continue;
|
|
135
165
|
}
|
|
166
|
+
if (token.type === 'any-ws-optional') {
|
|
167
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (token.type === 'whitespace-optional') {
|
|
171
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
136
174
|
if (token.type === 'literal') {
|
|
137
175
|
if (!src.startsWith(token.value, pos)) return null;
|
|
138
176
|
pos += token.value.length;
|
|
139
177
|
continue;
|
|
140
178
|
}
|
|
141
179
|
if (token.type === 'var') {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (nextToken) {
|
|
145
|
-
|
|
146
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
147
|
-
} else if (nextToken.type === 'literal') {
|
|
148
|
-
const stopChar = nextToken.value[0];
|
|
149
|
-
while (pos < src.length && src[pos] !== stopChar && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
150
|
-
} else if (nextToken.type === 'block') {
|
|
151
|
-
while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
152
|
-
} else {
|
|
153
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
154
|
-
}
|
|
180
|
+
let v = '';
|
|
181
|
+
const nextToken = findNextSignificantToken(tokens, ti);
|
|
182
|
+
if (nextToken && nextToken.type === 'literal') {
|
|
183
|
+
while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) v += src[pos++];
|
|
155
184
|
} else {
|
|
156
|
-
while (pos < src.length && !/\s/.test(src[pos]))
|
|
185
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
157
186
|
}
|
|
158
|
-
if (
|
|
159
|
-
|
|
187
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
188
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
189
|
+
pos = newPos;
|
|
190
|
+
} else if (token.wsTrailing === 'optional') {
|
|
191
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
192
|
+
pos = newPos;
|
|
193
|
+
}
|
|
194
|
+
if (!v) return null;
|
|
195
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (token.type === 'var-ws') {
|
|
199
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
200
|
+
const n = findNextSignificantToken(tokens, ti);
|
|
201
|
+
let v = '';
|
|
202
|
+
if (!n || ['var', 'var-ws', 'block'].includes(n.type)) {
|
|
203
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
204
|
+
} else if (n.type === 'literal') {
|
|
205
|
+
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
206
|
+
v = v.trimEnd();
|
|
207
|
+
}
|
|
208
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
209
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
210
|
+
pos = newPos;
|
|
211
|
+
} else if (token.wsTrailing === 'optional') {
|
|
212
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
213
|
+
pos = newPos;
|
|
214
|
+
}
|
|
215
|
+
if (!v) return null;
|
|
216
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
160
217
|
continue;
|
|
161
218
|
}
|
|
162
219
|
if (token.type === 'block') {
|
|
163
|
-
const { varName, openDelim, closeDelim } = token;
|
|
220
|
+
const { varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs } = token;
|
|
164
221
|
if (!src.startsWith(openDelim, pos)) return null;
|
|
165
|
-
const [
|
|
166
|
-
captures[papagaio.symbols.sigil + varName] =
|
|
167
|
-
pos =
|
|
222
|
+
const [c, e] = extractBlockWithWsDelimiter(papagaio, src, pos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs);
|
|
223
|
+
captures[papagaio.symbols.sigil + varName] = c;
|
|
224
|
+
pos = e;
|
|
168
225
|
continue;
|
|
169
226
|
}
|
|
170
227
|
}
|
|
171
228
|
return { captures, endPos: pos };
|
|
172
229
|
}
|
|
173
230
|
|
|
174
|
-
function
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
231
|
+
function findNextSignificantToken(t, i) {
|
|
232
|
+
for (let k = i + 1; k < t.length; k++) {
|
|
233
|
+
if (!['whitespace-optional', 'ws-optional', 'ws-required', 'any-ws-optional', 'any-ws-required'].includes(t[k].type)) return t[k];
|
|
234
|
+
}
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function extractBlockWithWsDelimiter(p, src, openPos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs) {
|
|
239
|
+
let i = openPos;
|
|
240
|
+
if (openDelimIsWs || closeDelimIsWs) {
|
|
241
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
242
|
+
i += openDelim.length;
|
|
243
|
+
const s = i;
|
|
244
|
+
let d = 0;
|
|
245
|
+
while (i < src.length) {
|
|
246
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
247
|
+
d++;
|
|
248
|
+
i += openDelim.length;
|
|
249
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
250
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
251
|
+
d--;
|
|
252
|
+
i += closeDelim.length;
|
|
253
|
+
} else i++;
|
|
254
|
+
}
|
|
255
|
+
return [src.substring(s), src.length];
|
|
256
|
+
}
|
|
257
|
+
return ['', i];
|
|
258
|
+
}
|
|
259
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
260
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
261
|
+
i += openDelim.length;
|
|
262
|
+
const s = i;
|
|
263
|
+
let d = 0;
|
|
264
|
+
while (i < src.length) {
|
|
265
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
266
|
+
d++;
|
|
267
|
+
i += openDelim.length;
|
|
268
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
269
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
270
|
+
d--;
|
|
271
|
+
i += closeDelim.length;
|
|
272
|
+
} else i++;
|
|
273
|
+
}
|
|
274
|
+
return [src.substring(s), src.length];
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (src[i] === openDelim) {
|
|
278
|
+
i++;
|
|
279
|
+
const s = i;
|
|
280
|
+
if (openDelim === closeDelim) {
|
|
281
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
282
|
+
return [src.substring(s, i), i + 1];
|
|
283
|
+
}
|
|
284
|
+
let d = 1;
|
|
285
|
+
while (i < src.length && d > 0) {
|
|
286
|
+
if (src[i] === openDelim) d++;
|
|
287
|
+
else if (src[i] === closeDelim) d--;
|
|
288
|
+
if (d > 0) i++;
|
|
289
|
+
}
|
|
290
|
+
return [src.substring(s, i), i + 1];
|
|
291
|
+
}
|
|
292
|
+
return ['', i];
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
|
|
296
|
+
let i = openPos;
|
|
297
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
298
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
299
|
+
i += openDelim.length;
|
|
300
|
+
const s = i;
|
|
301
|
+
let d = 0;
|
|
302
|
+
while (i < src.length) {
|
|
303
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
304
|
+
d++;
|
|
305
|
+
i += openDelim.length;
|
|
306
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
307
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
308
|
+
d--;
|
|
309
|
+
i += closeDelim.length;
|
|
310
|
+
} else i++;
|
|
311
|
+
}
|
|
312
|
+
return [src.substring(s), src.length];
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (src[i] === openDelim) {
|
|
316
|
+
i++;
|
|
317
|
+
const s = i;
|
|
318
|
+
if (openDelim === closeDelim) {
|
|
319
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
320
|
+
return [src.substring(s, i), i + 1];
|
|
321
|
+
}
|
|
322
|
+
let d = 1;
|
|
323
|
+
while (i < src.length && d > 0) {
|
|
324
|
+
if (src[i] === openDelim) d++;
|
|
325
|
+
else if (src[i] === closeDelim) d--;
|
|
326
|
+
if (d > 0) i++;
|
|
327
|
+
}
|
|
328
|
+
return [src.substring(s, i), i + 1];
|
|
329
|
+
}
|
|
330
|
+
return ['', i];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function processContext(p, src) {
|
|
334
|
+
const r = new RegExp(`\\b${p.symbols.context}\\s*\\${p.symbols.open}`, "g");
|
|
335
|
+
let m, a = [];
|
|
336
|
+
while ((m = r.exec(src)) !== null) a.push({ idx: m.index, pos: m.index + m[0].length - 1 });
|
|
337
|
+
for (let j = a.length - 1; j >= 0; j--) {
|
|
338
|
+
const x = a[j], [c, e] = extractBlock(p, src, x.pos);
|
|
339
|
+
if (!c.trim()) {
|
|
340
|
+
src = src.slice(0, x.idx) + src.slice(e);
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
const r2 = p.process(c);
|
|
344
|
+
let L = src.substring(0, x.idx), R = src.substring(e);
|
|
345
|
+
let pre = L.endsWith("\n") ? "\n" : "";
|
|
346
|
+
if (pre) L = L.slice(0, -1);
|
|
347
|
+
src = L + pre + r2 + R;
|
|
348
|
+
}
|
|
349
|
+
return src;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function collectPatterns(p, src) {
|
|
353
|
+
const A = [], r = new RegExp(`\\b${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
|
|
354
|
+
let out = src;
|
|
355
|
+
while (1) {
|
|
356
|
+
r.lastIndex = 0;
|
|
357
|
+
const m = r.exec(out);
|
|
181
358
|
if (!m) break;
|
|
182
|
-
const
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
result = result.slice(0, start) + result.slice(posAfterReplace);
|
|
359
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
360
|
+
const [mp, em] = extractBlock(p, out, o);
|
|
361
|
+
let k = em;
|
|
362
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
363
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
364
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
365
|
+
A.push({ match: mp.trim(), replace: rp.trim() });
|
|
366
|
+
out = out.slice(0, s) + out.slice(er);
|
|
191
367
|
continue;
|
|
192
368
|
}
|
|
193
|
-
|
|
369
|
+
out = out.slice(0, s) + out.slice(em);
|
|
194
370
|
}
|
|
195
|
-
return [
|
|
371
|
+
return [A, out];
|
|
196
372
|
}
|
|
197
373
|
|
|
198
|
-
function applyPatterns(
|
|
199
|
-
let
|
|
200
|
-
for (const pat of
|
|
201
|
-
const
|
|
202
|
-
let
|
|
203
|
-
let pos = 0, matched = false;
|
|
374
|
+
function applyPatterns(p, src, pats) {
|
|
375
|
+
let clear = false, last = "", S = p.symbols.sigil;
|
|
376
|
+
for (const pat of pats) {
|
|
377
|
+
const t = parsePattern(p, pat.match);
|
|
378
|
+
let n = '', pos = 0, ok = false;
|
|
204
379
|
while (pos < src.length) {
|
|
205
|
-
const
|
|
206
|
-
if (
|
|
207
|
-
|
|
208
|
-
const { captures, endPos } =
|
|
209
|
-
let
|
|
380
|
+
const m = matchPattern(p, src, t, pos);
|
|
381
|
+
if (m) {
|
|
382
|
+
ok = true;
|
|
383
|
+
const { captures, endPos } = m;
|
|
384
|
+
let r = pat.replace;
|
|
210
385
|
for (const [k, v] of Object.entries(captures)) {
|
|
211
|
-
const
|
|
212
|
-
|
|
386
|
+
const e = escapeRegex(k);
|
|
387
|
+
r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
|
|
213
388
|
}
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
result = result.replace(/\$eval\{([^}]*)\}/g, (_, code) => {
|
|
389
|
+
const uid = p.unique_id++;
|
|
390
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
|
|
391
|
+
r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => {
|
|
218
392
|
try {
|
|
219
|
-
|
|
220
|
-
return String(Function("papagaio", "ctx", wrapped)(papagaio, {}));
|
|
393
|
+
return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
|
|
221
394
|
} catch {
|
|
222
395
|
return "";
|
|
223
396
|
}
|
|
224
397
|
});
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
result = result.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
230
|
-
clearFlag = true;
|
|
398
|
+
r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
|
|
399
|
+
if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
|
|
400
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
401
|
+
clear = true;
|
|
231
402
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
.replace(new RegExp(`${escapeRegex(S)}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
newSrc += result;
|
|
239
|
-
lastResult = result;
|
|
403
|
+
const ms = pos, me = endPos;
|
|
404
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
|
|
405
|
+
.replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
|
|
406
|
+
.replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
|
|
407
|
+
n += r;
|
|
408
|
+
last = r;
|
|
240
409
|
pos = endPos;
|
|
241
410
|
} else {
|
|
242
|
-
|
|
411
|
+
n += src[pos];
|
|
243
412
|
pos++;
|
|
244
413
|
}
|
|
245
414
|
}
|
|
246
|
-
if (
|
|
247
|
-
src =
|
|
248
|
-
|
|
415
|
+
if (ok) {
|
|
416
|
+
src = clear ? last : n;
|
|
417
|
+
clear = false;
|
|
249
418
|
}
|
|
250
419
|
}
|
|
251
420
|
return src;
|
|
252
421
|
}
|
|
253
422
|
|
|
254
|
-
function escapeRegex(
|
|
255
|
-
return
|
|
423
|
+
function escapeRegex(s) {
|
|
424
|
+
return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
|
|
256
425
|
}
|
|
257
426
|
|
|
258
|
-
function unescapeDelimiter(
|
|
259
|
-
let
|
|
260
|
-
for (let i = 0; i <
|
|
261
|
-
if (
|
|
262
|
-
const
|
|
263
|
-
if (
|
|
264
|
-
|
|
427
|
+
function unescapeDelimiter(s) {
|
|
428
|
+
let r = '';
|
|
429
|
+
for (let i = 0; i < s.length; i++) {
|
|
430
|
+
if (s[i] === '\\' && i + 1 < s.length) {
|
|
431
|
+
const n = s[i + 1];
|
|
432
|
+
if (n === '"' || n === "'" || n === '\\') {
|
|
433
|
+
r += n;
|
|
265
434
|
i++;
|
|
266
|
-
} else
|
|
267
|
-
|
|
268
|
-
}
|
|
269
|
-
} else {
|
|
270
|
-
result += str[i];
|
|
271
|
-
}
|
|
435
|
+
} else r += s[i];
|
|
436
|
+
} else r += s[i];
|
|
272
437
|
}
|
|
273
|
-
return
|
|
438
|
+
return r;
|
|
274
439
|
}
|
|
275
440
|
|
|
276
441
|
export class Papagaio {
|
|
277
442
|
constructor() {
|
|
278
443
|
this.recursion_limit = 512;
|
|
279
444
|
this.unique_id = 0;
|
|
280
|
-
this.symbols = {
|
|
281
|
-
pattern: "pattern",
|
|
282
|
-
context: "context",
|
|
283
|
-
open: "{",
|
|
284
|
-
close: "}",
|
|
285
|
-
sigil: "$"
|
|
286
|
-
};
|
|
445
|
+
this.symbols = { pattern: "pattern", context: "context", open: "{", close: "}", sigil: "$" };
|
|
287
446
|
this.content = "";
|
|
288
447
|
}
|
|
289
|
-
|
|
290
448
|
process(input) {
|
|
291
449
|
this.content = input;
|
|
292
|
-
let src = input, last = null,
|
|
293
|
-
const
|
|
294
|
-
const
|
|
295
|
-
const
|
|
296
|
-
return
|
|
450
|
+
let src = input, last = null, it = 0;
|
|
451
|
+
const pend = () => {
|
|
452
|
+
const r1 = new RegExp(`\\b${this.symbols.context}\\s*\\${this.symbols.open}`, "g");
|
|
453
|
+
const r2 = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
|
|
454
|
+
return r1.test(src) || r2.test(src);
|
|
297
455
|
};
|
|
298
|
-
while (src !== last &&
|
|
299
|
-
|
|
456
|
+
while (src !== last && it < this.recursion_limit) {
|
|
457
|
+
it++;
|
|
300
458
|
last = src;
|
|
301
459
|
src = processContext(this, src);
|
|
302
|
-
const [
|
|
303
|
-
src = applyPatterns(this, s2,
|
|
304
|
-
if (!
|
|
460
|
+
const [p, s2] = collectPatterns(this, src);
|
|
461
|
+
src = applyPatterns(this, s2, p);
|
|
462
|
+
if (!pend()) break;
|
|
305
463
|
}
|
|
306
464
|
return this.content = src, src;
|
|
307
465
|
}
|
package/tests/tests.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
{
|
|
10
10
|
"id": 2,
|
|
11
11
|
"name": "Flexible whitespace ($)",
|
|
12
|
-
"code": "pattern {
|
|
12
|
+
"code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
|
|
13
13
|
"expected": "hello & world"
|
|
14
14
|
},
|
|
15
15
|
{
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
{
|
|
76
76
|
"id": 13,
|
|
77
77
|
"name": "Pattern with flexible whitespace in middle",
|
|
78
|
-
"code": "pattern {from
|
|
78
|
+
"code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
|
|
79
79
|
"expected": "path: from -> to"
|
|
80
80
|
},
|
|
81
81
|
{
|
|
@@ -129,7 +129,7 @@
|
|
|
129
129
|
{
|
|
130
130
|
"id": 22,
|
|
131
131
|
"name": "Flexible whitespace with semicolon",
|
|
132
|
-
"code": "pattern {
|
|
132
|
+
"code": "pattern {$$a;$$b} {$a AND $b}\nx ; y\nx; y\nx ; y",
|
|
133
133
|
"expected": "x AND y"
|
|
134
134
|
},
|
|
135
135
|
{
|
|
@@ -182,8 +182,8 @@
|
|
|
182
182
|
},
|
|
183
183
|
{
|
|
184
184
|
"id": 31,
|
|
185
|
-
"name": "
|
|
186
|
-
"code": "pattern {$block data {}{}} {DATA:$data}\ndata
|
|
185
|
+
"name": "Delimiter test",
|
|
186
|
+
"code": "pattern {$block data {[}{]}} {DATA:$data}\ndata [ hello world ]",
|
|
187
187
|
"expected": "DATA: hello world"
|
|
188
188
|
},
|
|
189
189
|
{
|
|
@@ -267,7 +267,7 @@
|
|
|
267
267
|
{
|
|
268
268
|
"id": 45,
|
|
269
269
|
"name": "Pattern with flexible space before and after",
|
|
270
|
-
"code": "pattern {$$hello$$} {FOUND}\n hello ",
|
|
270
|
+
"code": "pattern {$$ hello$$ } {FOUND}\n hello ",
|
|
271
271
|
"expected": "FOUND"
|
|
272
272
|
},
|
|
273
273
|
{
|
|
@@ -315,7 +315,7 @@
|
|
|
315
315
|
{
|
|
316
316
|
"id": 53,
|
|
317
317
|
"name": "Trim multiple surrounding spaces",
|
|
318
|
-
"code": "pattern {
|
|
318
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
319
319
|
"expected": "FOUND: word"
|
|
320
320
|
},
|
|
321
321
|
{
|
|
@@ -543,7 +543,7 @@
|
|
|
543
543
|
{
|
|
544
544
|
"id": 91,
|
|
545
545
|
"name": "CSV to JSON",
|
|
546
|
-
"code": "pattern {$a
|
|
546
|
+
"code": "pattern {$a,$$b,$$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
|
|
547
547
|
"expected": "{ id: '1', name: 'Alice', role: 'Engineer' }\n{ id: '2', name: 'Bob', role: 'Designer' }"
|
|
548
548
|
},
|
|
549
549
|
{
|
|
@@ -639,7 +639,7 @@
|
|
|
639
639
|
{
|
|
640
640
|
"id": 107,
|
|
641
641
|
"name": "Comma-separated pattern matching",
|
|
642
|
-
"code": "pattern {
|
|
642
|
+
"code": "pattern {$$a,$$b,$$c} {[$a] [$b] [$c]}\none,two,three",
|
|
643
643
|
"expected": "[one] [two] [three]"
|
|
644
644
|
},
|
|
645
645
|
{
|
|
@@ -663,7 +663,7 @@
|
|
|
663
663
|
{
|
|
664
664
|
"id": 111,
|
|
665
665
|
"name": "Pattern with dot literal",
|
|
666
|
-
"code": "pattern {
|
|
666
|
+
"code": "pattern {$$file.txt} {File: $file.txt}\ndocument.txt",
|
|
667
667
|
"expected": "File: document"
|
|
668
668
|
},
|
|
669
669
|
{
|
|
@@ -693,7 +693,7 @@
|
|
|
693
693
|
{
|
|
694
694
|
"id": 116,
|
|
695
695
|
"name": "Multiple spaces trim with flexible whitespace",
|
|
696
|
-
"code": "pattern {
|
|
696
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
697
697
|
"expected": "FOUND: word"
|
|
698
698
|
},
|
|
699
699
|
{
|
|
@@ -711,7 +711,7 @@
|
|
|
711
711
|
{
|
|
712
712
|
"id": 119,
|
|
713
713
|
"name": "Trailing punctuation in pattern",
|
|
714
|
-
"code": "pattern {
|
|
714
|
+
"code": "pattern {$$w,} {$$w!}\nhello,",
|
|
715
715
|
"expected": "hello!"
|
|
716
716
|
}
|
|
717
717
|
]
|