papagaio 0.2.8 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -16
- package/examples/simple.html +15 -0
- package/package.json +1 -1
- package/src/papagaio-bootstrap.mjs +25 -0
- package/src/papagaio.js +367 -191
- package/tests/tests.json +12 -12
package/README.md
CHANGED
|
@@ -46,17 +46,6 @@ apple banana cherry
|
|
|
46
46
|
```
|
|
47
47
|
Output: `cherry, banana, apple`
|
|
48
48
|
|
|
49
|
-
### 3. Flexible Whitespace (`$$`)
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
pattern {$x$$and$$$y} {$x & $y}
|
|
53
|
-
hello and world
|
|
54
|
-
hello and world
|
|
55
|
-
```
|
|
56
|
-
Output: `hello & world` (both)
|
|
57
|
-
|
|
58
|
-
`$$` = zero or more spaces/tabs/newlines.
|
|
59
|
-
|
|
60
49
|
## Blocks
|
|
61
50
|
|
|
62
51
|
Capture content between delimiters.
|
|
@@ -266,9 +255,13 @@ Output:
|
|
|
266
255
|
|
|
267
256
|
### Matching
|
|
268
257
|
- Variables (`$x`) capture **one word** (no spaces)
|
|
269
|
-
-
|
|
258
|
+
- Variables (`$$x`) captures one or more words (with spaces)
|
|
270
259
|
- Patterns apply **globally** each iteration
|
|
271
260
|
- Auto-recursion until: max 512 iterations OR no changes
|
|
261
|
+
- `$ ` = one or more of this whitespace (spaces, tabs, newlines)
|
|
262
|
+
- `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
|
|
263
|
+
- `$$$ `= one or more whitespaces
|
|
264
|
+
- `$$$$ `= zero or more whitespaces
|
|
272
265
|
|
|
273
266
|
### Block Matching
|
|
274
267
|
- `$block name {open}{close}` captures between delimiters
|
|
@@ -280,8 +273,11 @@ Output:
|
|
|
280
273
|
- Reuse: `$x` appears multiple times in replace
|
|
281
274
|
- Undefined: becomes empty string
|
|
282
275
|
|
|
283
|
-
###
|
|
284
|
-
- You cannot match words containing the sigil character.
|
|
276
|
+
### Limitations
|
|
277
|
+
- You cannot match words containing the current sigil character.
|
|
278
|
+
- You cannot match a $block{}{} using the current delimiters.
|
|
279
|
+
- By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
|
|
280
|
+
- Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
|
|
285
281
|
|
|
286
282
|
---
|
|
287
283
|
|
|
@@ -289,11 +285,13 @@ Output:
|
|
|
289
285
|
|
|
290
286
|
| Problem | Solution |
|
|
291
287
|
|---------|----------|
|
|
292
|
-
| Pattern doesn't match | Use `$$` between elements for flexible whitespace |
|
|
293
288
|
| Variable not captured | Check space between variables |
|
|
294
289
|
| Block not working | Verify balanced delimiters `{` `}` |
|
|
295
290
|
| Infinite recursion | Use `$clear` or reduce `recursion_limit` |
|
|
296
291
|
| $eval not working | Errors return empty string, use try-catch |
|
|
292
|
+
| Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
|
|
293
|
+
| Whitespace operators | Remember they need a whitespace after them to work properly |
|
|
294
|
+
| Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
|
|
297
295
|
|
|
298
296
|
## Known Bugs
|
|
299
297
|
|
|
@@ -305,7 +303,7 @@ Output:
|
|
|
305
303
|
|
|
306
304
|
```
|
|
307
305
|
pattern {$x $y} {$y, $x} # basic pattern
|
|
308
|
-
pattern {$x
|
|
306
|
+
pattern {$x$ $y} {$x-$y} # flexible whitespace
|
|
309
307
|
pattern {$block n {o}{c}} {$n} # block
|
|
310
308
|
context { ... } # recursive scope
|
|
311
309
|
$unique # unique ID per pattern
|
|
@@ -313,6 +311,7 @@ $match # full match
|
|
|
313
311
|
$prefix / $suffix # before/after
|
|
314
312
|
$clear # clear before
|
|
315
313
|
$eval{code} # execute JS
|
|
314
|
+
$ / $$ / $$$ / $$$$ # whitespace operators
|
|
316
315
|
```
|
|
317
316
|
|
|
318
317
|
---
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>papagaio test</title>
|
|
7
|
+
</head>
|
|
8
|
+
<script src="src/papagaio-bootstrap.mjs" type="module"></script>
|
|
9
|
+
<script type="papagaio">
|
|
10
|
+
pattern {abc} {$eval{console.log(papagaio)}}
|
|
11
|
+
abc
|
|
12
|
+
</script>
|
|
13
|
+
<body>
|
|
14
|
+
</body>
|
|
15
|
+
</html>
|
package/package.json
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// papagaio-bootstrap.js
|
|
2
|
+
import { Papagaio } from "./papagaio.js";
|
|
3
|
+
|
|
4
|
+
(async () => {
|
|
5
|
+
const p = new Papagaio();
|
|
6
|
+
|
|
7
|
+
const nodes = [...document.querySelectorAll('script[type="papagaio"]')];
|
|
8
|
+
|
|
9
|
+
for (const el of nodes) {
|
|
10
|
+
let src = el.textContent;
|
|
11
|
+
|
|
12
|
+
if (el.src) {
|
|
13
|
+
src = await fetch(el.src).then(r => r.text());
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const out = p.process(src);
|
|
17
|
+
|
|
18
|
+
const s = document.createElement("script");
|
|
19
|
+
s.type = "module";
|
|
20
|
+
s.textContent = out;
|
|
21
|
+
|
|
22
|
+
// executa no mesmo ponto onde script estava
|
|
23
|
+
el.replaceWith(s);
|
|
24
|
+
}
|
|
25
|
+
})();
|
package/src/papagaio.js
CHANGED
|
@@ -1,95 +1,88 @@
|
|
|
1
1
|
// https://github.com/jardimdanificado/papagaio
|
|
2
|
-
|
|
3
|
-
function
|
|
4
|
-
const
|
|
5
|
-
let
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
2
|
+
|
|
3
|
+
function parsePattern(papagaio, pattern) {
|
|
4
|
+
const tokens = [], S = papagaio.symbols.sigil, S2 = S + S;
|
|
5
|
+
let i = 0;
|
|
6
|
+
const isWhitespaceChar = c => /\s/.test(c);
|
|
7
|
+
const getWhitespaceType = c => c === ' ' ? 'space' : c === '\t' ? 'tab' : c === '\n' ? 'newline' : c === '\r' ? 'carriage-return' : 'other';
|
|
8
|
+
while (i < pattern.length) {
|
|
9
|
+
if (pattern.startsWith(S + S + S, i) && i + 3 < pattern.length && isWhitespaceChar(pattern[i + 3])) {
|
|
10
|
+
tokens.push({ type: 'any-ws-required', wsChar: pattern[i + 3] });
|
|
11
|
+
i += 4;
|
|
12
12
|
continue;
|
|
13
13
|
}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
src = left + prefix + proc + right;
|
|
19
|
-
}
|
|
20
|
-
return src;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
function extractBlock(papagaio, src, openPos, openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close) {
|
|
24
|
-
let i = openPos;
|
|
25
|
-
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
26
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
27
|
-
i += openDelim.length;
|
|
28
|
-
const innerStart = i;
|
|
29
|
-
let d = 0;
|
|
30
|
-
while (i < src.length) {
|
|
31
|
-
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
32
|
-
d++;
|
|
33
|
-
i += openDelim.length;
|
|
34
|
-
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
35
|
-
if (d === 0) return [src.substring(innerStart, i), i + closeDelim.length];
|
|
36
|
-
d--;
|
|
37
|
-
i += closeDelim.length;
|
|
38
|
-
} else i++;
|
|
39
|
-
}
|
|
40
|
-
return [src.substring(innerStart), src.length];
|
|
14
|
+
if (pattern.startsWith(S + S + S + S, i) && i + 4 < pattern.length && isWhitespaceChar(pattern[i + 4])) {
|
|
15
|
+
tokens.push({ type: 'any-ws-optional', wsChar: pattern[i + 4] });
|
|
16
|
+
i += 5;
|
|
17
|
+
continue;
|
|
41
18
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
19
|
+
if (pattern.startsWith(S2, i)) {
|
|
20
|
+
let j = i + S2.length, varName = '';
|
|
21
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
22
|
+
if (varName) {
|
|
23
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
24
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
25
|
+
i = j + 1;
|
|
26
|
+
continue;
|
|
27
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
28
|
+
tokens.push({ type: 'var-ws', varName, wsTrailing: 'optional', wsChar: null });
|
|
29
|
+
i = j + 1;
|
|
30
|
+
continue;
|
|
31
|
+
} else {
|
|
32
|
+
tokens.push({ type: 'var-ws', varName });
|
|
33
|
+
i = j;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
55
36
|
}
|
|
56
|
-
return [src.substring(innerStart, i), i + 1];
|
|
57
37
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const tokens = [];
|
|
64
|
-
let i = 0;
|
|
65
|
-
const S = papagaio.symbols.sigil, S2 = S + S;
|
|
66
|
-
while (i < pattern.length) {
|
|
38
|
+
if (pattern.startsWith(S2, i) && i + 2 < pattern.length && isWhitespaceChar(pattern[i + 2])) {
|
|
39
|
+
tokens.push({ type: 'ws-optional', wsType: getWhitespaceType(pattern[i + 2]), wsChar: pattern[i + 2] });
|
|
40
|
+
i += 3;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
67
43
|
if (pattern.startsWith(S2, i)) {
|
|
68
44
|
tokens.push({ type: 'whitespace-optional' });
|
|
69
45
|
i += S2.length;
|
|
70
46
|
continue;
|
|
71
47
|
}
|
|
48
|
+
if (pattern[i] === S && i + 1 < pattern.length && isWhitespaceChar(pattern[i + 1])) {
|
|
49
|
+
tokens.push({ type: 'ws-required', wsType: getWhitespaceType(pattern[i + 1]), wsChar: pattern[i + 1] });
|
|
50
|
+
i += 2;
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
72
53
|
if (pattern.startsWith(S + 'block', i)) {
|
|
73
|
-
let j = i + S.length +
|
|
54
|
+
let j = i + S.length + 5;
|
|
74
55
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
75
56
|
let varName = '';
|
|
76
57
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
77
58
|
if (varName) {
|
|
78
59
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
79
|
-
let openDelim = papagaio.symbols.open;
|
|
60
|
+
let openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close;
|
|
61
|
+
let openDelimIsWs = false, closeDelimIsWs = false;
|
|
80
62
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
81
63
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
82
|
-
|
|
64
|
+
const trimmed = c.trim();
|
|
65
|
+
if (trimmed === '') {
|
|
66
|
+
openDelimIsWs = true;
|
|
67
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
68
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
69
|
+
openDelim = pattern.substring(wsStart, wsEnd);
|
|
70
|
+
} else openDelim = unescapeDelimiter(trimmed) || papagaio.symbols.open;
|
|
83
71
|
j = e;
|
|
84
72
|
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
85
73
|
}
|
|
86
|
-
let closeDelim = papagaio.symbols.close;
|
|
87
74
|
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
88
75
|
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
89
|
-
|
|
76
|
+
const trimmed = c.trim();
|
|
77
|
+
if (trimmed === '') {
|
|
78
|
+
closeDelimIsWs = true;
|
|
79
|
+
let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
|
|
80
|
+
while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
|
|
81
|
+
closeDelim = pattern.substring(wsStart, wsEnd);
|
|
82
|
+
} else closeDelim = unescapeDelimiter(trimmed) || papagaio.symbols.close;
|
|
90
83
|
j = e;
|
|
91
84
|
}
|
|
92
|
-
tokens.push({ type: 'block', varName, openDelim, closeDelim });
|
|
85
|
+
tokens.push({ type: 'block', varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs });
|
|
93
86
|
i = j;
|
|
94
87
|
continue;
|
|
95
88
|
}
|
|
@@ -98,210 +91,393 @@ function parsePattern(papagaio, pattern) {
|
|
|
98
91
|
let j = i + S.length, varName = '';
|
|
99
92
|
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
100
93
|
if (varName) {
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
94
|
+
if (j < pattern.length && isWhitespaceChar(pattern[j])) {
|
|
95
|
+
tokens.push({ type: 'var', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
|
|
96
|
+
i = j + 1;
|
|
97
|
+
continue;
|
|
98
|
+
} else if (j < pattern.length && pattern[j] === S) {
|
|
99
|
+
tokens.push({ type: 'var', varName, wsTrailing: 'optional', wsChar: null });
|
|
100
|
+
i = j + 1;
|
|
101
|
+
continue;
|
|
102
|
+
} else {
|
|
103
|
+
tokens.push({ type: 'var', varName });
|
|
104
|
+
i = j;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
104
107
|
}
|
|
105
|
-
tokens.push({ type: 'literal', value: S });
|
|
106
|
-
i += S.length;
|
|
107
|
-
continue;
|
|
108
108
|
}
|
|
109
|
-
if (
|
|
109
|
+
if (isWhitespaceChar(pattern[i])) {
|
|
110
110
|
let ws = '';
|
|
111
|
-
while (i < pattern.length &&
|
|
112
|
-
tokens.push({ type: '
|
|
111
|
+
while (i < pattern.length && isWhitespaceChar(pattern[i])) ws += pattern[i++];
|
|
112
|
+
tokens.push({ type: 'literal-ws', value: ws });
|
|
113
113
|
continue;
|
|
114
114
|
}
|
|
115
115
|
let literal = '';
|
|
116
|
-
while (i < pattern.length && !pattern.startsWith(S, i) &&
|
|
116
|
+
while (i < pattern.length && !pattern.startsWith(S, i) && !isWhitespaceChar(pattern[i])) literal += pattern[i++];
|
|
117
117
|
if (literal) tokens.push({ type: 'literal', value: literal });
|
|
118
118
|
}
|
|
119
119
|
return tokens;
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
123
|
-
let pos = startPos;
|
|
124
|
-
const
|
|
123
|
+
let pos = startPos, captures = {};
|
|
124
|
+
const matchWhitespaceType = (str, idx, wsType) => {
|
|
125
|
+
if (idx >= str.length) return { matched: '', newPos: idx };
|
|
126
|
+
if (wsType === 'space' && str[idx] === ' ') {
|
|
127
|
+
let j = idx;
|
|
128
|
+
while (j < str.length && str[j] === ' ') j++;
|
|
129
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
130
|
+
}
|
|
131
|
+
if (wsType === 'tab' && str[idx] === '\t') {
|
|
132
|
+
let j = idx;
|
|
133
|
+
while (j < str.length && str[j] === '\t') j++;
|
|
134
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
135
|
+
}
|
|
136
|
+
if (wsType === 'newline' && str[idx] === '\n') {
|
|
137
|
+
let j = idx;
|
|
138
|
+
while (j < str.length && str[j] === '\n') j++;
|
|
139
|
+
return { matched: str.slice(idx, j), newPos: j };
|
|
140
|
+
}
|
|
141
|
+
return { matched: '', newPos: idx };
|
|
142
|
+
};
|
|
125
143
|
for (let ti = 0; ti < tokens.length; ti++) {
|
|
126
144
|
const token = tokens[ti];
|
|
127
|
-
if (token.type === '
|
|
128
|
-
|
|
145
|
+
if (token.type === 'literal-ws') {
|
|
146
|
+
if (!src.startsWith(token.value, pos)) return null;
|
|
147
|
+
pos += token.value.length;
|
|
129
148
|
continue;
|
|
130
149
|
}
|
|
131
|
-
if (token.type === '
|
|
150
|
+
if (token.type === 'ws-required') {
|
|
151
|
+
const { matched, newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
152
|
+
if (!matched) return null;
|
|
153
|
+
pos = newPos;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (token.type === 'ws-optional') {
|
|
157
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsType);
|
|
158
|
+
pos = newPos;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
if (token.type === 'any-ws-required') {
|
|
132
162
|
if (pos >= src.length || !/\s/.test(src[pos])) return null;
|
|
133
163
|
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
134
164
|
continue;
|
|
135
165
|
}
|
|
166
|
+
if (token.type === 'any-ws-optional') {
|
|
167
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (token.type === 'whitespace-optional') {
|
|
171
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
136
174
|
if (token.type === 'literal') {
|
|
137
175
|
if (!src.startsWith(token.value, pos)) return null;
|
|
138
176
|
pos += token.value.length;
|
|
139
177
|
continue;
|
|
140
178
|
}
|
|
141
179
|
if (token.type === 'var') {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (nextToken) {
|
|
145
|
-
|
|
146
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
147
|
-
} else if (nextToken.type === 'literal') {
|
|
148
|
-
const stopChar = nextToken.value[0];
|
|
149
|
-
while (pos < src.length && src[pos] !== stopChar && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
150
|
-
} else if (nextToken.type === 'block') {
|
|
151
|
-
while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
152
|
-
} else {
|
|
153
|
-
while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
|
|
154
|
-
}
|
|
180
|
+
let v = '';
|
|
181
|
+
const nextToken = findNextSignificantToken(tokens, ti);
|
|
182
|
+
if (nextToken && nextToken.type === 'literal') {
|
|
183
|
+
while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) v += src[pos++];
|
|
155
184
|
} else {
|
|
156
|
-
while (pos < src.length && !/\s/.test(src[pos]))
|
|
185
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
186
|
+
}
|
|
187
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
188
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
189
|
+
pos = newPos;
|
|
190
|
+
} else if (token.wsTrailing === 'optional') {
|
|
191
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
192
|
+
pos = newPos;
|
|
193
|
+
}
|
|
194
|
+
if (!v) return null;
|
|
195
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (token.type === 'var-ws') {
|
|
199
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
200
|
+
const n = findNextSignificantToken(tokens, ti);
|
|
201
|
+
let v = '';
|
|
202
|
+
if (!n || ['var', 'var-ws', 'block'].includes(n.type)) {
|
|
203
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
204
|
+
} else if (n.type === 'literal') {
|
|
205
|
+
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
206
|
+
v = v.trimEnd();
|
|
157
207
|
}
|
|
158
|
-
if (
|
|
159
|
-
|
|
208
|
+
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
209
|
+
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
210
|
+
pos = newPos;
|
|
211
|
+
} else if (token.wsTrailing === 'optional') {
|
|
212
|
+
const { newPos } = matchWhitespaceType(src, pos, 'space');
|
|
213
|
+
pos = newPos;
|
|
214
|
+
}
|
|
215
|
+
if (!v) return null;
|
|
216
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
160
217
|
continue;
|
|
161
218
|
}
|
|
162
219
|
if (token.type === 'block') {
|
|
163
|
-
const { varName, openDelim, closeDelim } = token;
|
|
220
|
+
const { varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs } = token;
|
|
164
221
|
if (!src.startsWith(openDelim, pos)) return null;
|
|
165
|
-
const [
|
|
166
|
-
captures[papagaio.symbols.sigil + varName] =
|
|
167
|
-
pos =
|
|
222
|
+
const [c, e] = extractBlockWithWsDelimiter(papagaio, src, pos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs);
|
|
223
|
+
captures[papagaio.symbols.sigil + varName] = c;
|
|
224
|
+
pos = e;
|
|
168
225
|
continue;
|
|
169
226
|
}
|
|
170
227
|
}
|
|
171
228
|
return { captures, endPos: pos };
|
|
172
229
|
}
|
|
173
230
|
|
|
174
|
-
function
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
231
|
+
function findNextSignificantToken(t, i) {
|
|
232
|
+
for (let k = i + 1; k < t.length; k++) {
|
|
233
|
+
if (!['whitespace-optional', 'ws-optional', 'ws-required', 'any-ws-optional', 'any-ws-required'].includes(t[k].type)) return t[k];
|
|
234
|
+
}
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function extractBlockWithWsDelimiter(p, src, openPos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs) {
|
|
239
|
+
let i = openPos;
|
|
240
|
+
if (openDelimIsWs || closeDelimIsWs) {
|
|
241
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
242
|
+
i += openDelim.length;
|
|
243
|
+
const s = i;
|
|
244
|
+
let d = 0;
|
|
245
|
+
while (i < src.length) {
|
|
246
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
247
|
+
d++;
|
|
248
|
+
i += openDelim.length;
|
|
249
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
250
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
251
|
+
d--;
|
|
252
|
+
i += closeDelim.length;
|
|
253
|
+
} else i++;
|
|
254
|
+
}
|
|
255
|
+
return [src.substring(s), src.length];
|
|
256
|
+
}
|
|
257
|
+
return ['', i];
|
|
258
|
+
}
|
|
259
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
260
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
261
|
+
i += openDelim.length;
|
|
262
|
+
const s = i;
|
|
263
|
+
let d = 0;
|
|
264
|
+
while (i < src.length) {
|
|
265
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
266
|
+
d++;
|
|
267
|
+
i += openDelim.length;
|
|
268
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
269
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
270
|
+
d--;
|
|
271
|
+
i += closeDelim.length;
|
|
272
|
+
} else i++;
|
|
273
|
+
}
|
|
274
|
+
return [src.substring(s), src.length];
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (src[i] === openDelim) {
|
|
278
|
+
i++;
|
|
279
|
+
const s = i;
|
|
280
|
+
if (openDelim === closeDelim) {
|
|
281
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
282
|
+
return [src.substring(s, i), i + 1];
|
|
283
|
+
}
|
|
284
|
+
let d = 1;
|
|
285
|
+
while (i < src.length && d > 0) {
|
|
286
|
+
if (src[i] === openDelim) d++;
|
|
287
|
+
else if (src[i] === closeDelim) d--;
|
|
288
|
+
if (d > 0) i++;
|
|
289
|
+
}
|
|
290
|
+
return [src.substring(s, i), i + 1];
|
|
291
|
+
}
|
|
292
|
+
return ['', i];
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
|
|
296
|
+
let i = openPos;
|
|
297
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
298
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
299
|
+
i += openDelim.length;
|
|
300
|
+
const s = i;
|
|
301
|
+
let d = 0;
|
|
302
|
+
while (i < src.length) {
|
|
303
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
304
|
+
d++;
|
|
305
|
+
i += openDelim.length;
|
|
306
|
+
} else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
307
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
308
|
+
d--;
|
|
309
|
+
i += closeDelim.length;
|
|
310
|
+
} else i++;
|
|
311
|
+
}
|
|
312
|
+
return [src.substring(s), src.length];
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (src[i] === openDelim) {
|
|
316
|
+
i++;
|
|
317
|
+
const s = i;
|
|
318
|
+
if (openDelim === closeDelim) {
|
|
319
|
+
while (i < src.length && src[i] !== closeDelim) i++;
|
|
320
|
+
return [src.substring(s, i), i + 1];
|
|
321
|
+
}
|
|
322
|
+
let d = 1;
|
|
323
|
+
while (i < src.length && d > 0) {
|
|
324
|
+
if (src[i] === openDelim) d++;
|
|
325
|
+
else if (src[i] === closeDelim) d--;
|
|
326
|
+
if (d > 0) i++;
|
|
327
|
+
}
|
|
328
|
+
return [src.substring(s, i), i + 1];
|
|
329
|
+
}
|
|
330
|
+
return ['', i];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function collectPatterns(p, src) {
|
|
334
|
+
const A = [], r = new RegExp(`\\b${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
|
|
335
|
+
let out = src;
|
|
336
|
+
while (1) {
|
|
337
|
+
r.lastIndex = 0;
|
|
338
|
+
const m = r.exec(out);
|
|
339
|
+
if (!m) break;
|
|
340
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
341
|
+
const [mp, em] = extractBlock(p, out, o);
|
|
342
|
+
let k = em;
|
|
343
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
344
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
345
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
346
|
+
A.push({ match: mp.trim(), replace: rp.trim() });
|
|
347
|
+
out = out.slice(0, s) + out.slice(er);
|
|
348
|
+
continue;
|
|
349
|
+
}
|
|
350
|
+
out = out.slice(0, s) + out.slice(em);
|
|
351
|
+
}
|
|
352
|
+
return [A, out];
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function extractNestedPatterns(p, replaceText) {
|
|
356
|
+
const nested = [];
|
|
357
|
+
const r = new RegExp(`\\${p.symbols.sigil}${escapeRegex(p.symbols.pattern)}\\s*\\${p.symbols.open}`, "g");
|
|
358
|
+
let out = replaceText;
|
|
359
|
+
|
|
360
|
+
while (1) {
|
|
361
|
+
r.lastIndex = 0;
|
|
362
|
+
const m = r.exec(out);
|
|
181
363
|
if (!m) break;
|
|
182
|
-
|
|
183
|
-
const
|
|
184
|
-
const [
|
|
185
|
-
let k =
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
364
|
+
|
|
365
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
366
|
+
const [mp, em] = extractBlock(p, out, o);
|
|
367
|
+
let k = em;
|
|
368
|
+
|
|
369
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
370
|
+
|
|
371
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
372
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
373
|
+
nested.push({ match: mp.trim(), replace: rp.trim() });
|
|
374
|
+
out = out.slice(0, s) + out.slice(er);
|
|
191
375
|
continue;
|
|
192
376
|
}
|
|
193
|
-
|
|
377
|
+
out = out.slice(0, s) + out.slice(em);
|
|
194
378
|
}
|
|
195
|
-
|
|
379
|
+
|
|
380
|
+
return [nested, out];
|
|
196
381
|
}
|
|
197
382
|
|
|
198
|
-
function applyPatterns(
|
|
199
|
-
let
|
|
200
|
-
for (const pat of
|
|
201
|
-
const
|
|
202
|
-
let
|
|
203
|
-
let pos = 0, matched = false;
|
|
383
|
+
function applyPatterns(p, src, pats) {
|
|
384
|
+
let clear = false, last = "", S = p.symbols.sigil;
|
|
385
|
+
for (const pat of pats) {
|
|
386
|
+
const t = parsePattern(p, pat.match);
|
|
387
|
+
let n = '', pos = 0, ok = false;
|
|
204
388
|
while (pos < src.length) {
|
|
205
|
-
const
|
|
206
|
-
if (
|
|
207
|
-
|
|
208
|
-
const { captures, endPos } =
|
|
209
|
-
let
|
|
389
|
+
const m = matchPattern(p, src, t, pos);
|
|
390
|
+
if (m) {
|
|
391
|
+
ok = true;
|
|
392
|
+
const { captures, endPos } = m;
|
|
393
|
+
let r = pat.replace;
|
|
394
|
+
|
|
395
|
+
// Extrai e processa padrões aninhados ($pattern)
|
|
396
|
+
const [nestedPats, cleanReplace] = extractNestedPatterns(p, r);
|
|
397
|
+
r = cleanReplace;
|
|
398
|
+
|
|
210
399
|
for (const [k, v] of Object.entries(captures)) {
|
|
211
|
-
const
|
|
212
|
-
|
|
400
|
+
const e = escapeRegex(k);
|
|
401
|
+
r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Aplica padrões aninhados ao resultado
|
|
405
|
+
if (nestedPats.length > 0) {
|
|
406
|
+
r = applyPatterns(p, r, nestedPats);
|
|
213
407
|
}
|
|
214
408
|
|
|
215
|
-
const
|
|
216
|
-
|
|
217
|
-
|
|
409
|
+
const uid = p.unique_id++;
|
|
410
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
|
|
411
|
+
r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => {
|
|
218
412
|
try {
|
|
219
|
-
|
|
220
|
-
return String(Function("papagaio", "ctx", wrapped)(papagaio, {}));
|
|
413
|
+
return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
|
|
221
414
|
} catch {
|
|
222
415
|
return "";
|
|
223
416
|
}
|
|
224
417
|
});
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
result = result.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
230
|
-
clearFlag = true;
|
|
418
|
+
r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
|
|
419
|
+
if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
|
|
420
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
|
|
421
|
+
clear = true;
|
|
231
422
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
.replace(new RegExp(`${escapeRegex(S)}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
newSrc += result;
|
|
239
|
-
lastResult = result;
|
|
423
|
+
const ms = pos, me = endPos;
|
|
424
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
|
|
425
|
+
.replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
|
|
426
|
+
.replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
|
|
427
|
+
n += r;
|
|
428
|
+
last = r;
|
|
240
429
|
pos = endPos;
|
|
241
430
|
} else {
|
|
242
|
-
|
|
431
|
+
n += src[pos];
|
|
243
432
|
pos++;
|
|
244
433
|
}
|
|
245
434
|
}
|
|
246
|
-
if (
|
|
247
|
-
src =
|
|
248
|
-
|
|
435
|
+
if (ok) {
|
|
436
|
+
src = clear ? last : n;
|
|
437
|
+
clear = false;
|
|
249
438
|
}
|
|
250
439
|
}
|
|
251
440
|
return src;
|
|
252
441
|
}
|
|
253
442
|
|
|
254
|
-
function escapeRegex(
|
|
255
|
-
return
|
|
443
|
+
function escapeRegex(s) {
|
|
444
|
+
return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
|
|
256
445
|
}
|
|
257
446
|
|
|
258
|
-
function unescapeDelimiter(
|
|
259
|
-
let
|
|
260
|
-
for (let i = 0; i <
|
|
261
|
-
if (
|
|
262
|
-
const
|
|
263
|
-
if (
|
|
264
|
-
|
|
447
|
+
function unescapeDelimiter(s) {
|
|
448
|
+
let r = '';
|
|
449
|
+
for (let i = 0; i < s.length; i++) {
|
|
450
|
+
if (s[i] === '\\' && i + 1 < s.length) {
|
|
451
|
+
const n = s[i + 1];
|
|
452
|
+
if (n === '"' || n === "'" || n === '\\') {
|
|
453
|
+
r += n;
|
|
265
454
|
i++;
|
|
266
|
-
} else
|
|
267
|
-
|
|
268
|
-
}
|
|
269
|
-
} else {
|
|
270
|
-
result += str[i];
|
|
271
|
-
}
|
|
455
|
+
} else r += s[i];
|
|
456
|
+
} else r += s[i];
|
|
272
457
|
}
|
|
273
|
-
return
|
|
458
|
+
return r;
|
|
274
459
|
}
|
|
275
460
|
|
|
276
461
|
export class Papagaio {
|
|
277
|
-
constructor() {
|
|
462
|
+
constructor(sigil = "$", open = "{", close = "}", pattern = "pattern") {
|
|
278
463
|
this.recursion_limit = 512;
|
|
279
464
|
this.unique_id = 0;
|
|
280
|
-
this.symbols = {
|
|
281
|
-
pattern: "pattern",
|
|
282
|
-
context: "context",
|
|
283
|
-
open: "{",
|
|
284
|
-
close: "}",
|
|
285
|
-
sigil: "$"
|
|
286
|
-
};
|
|
465
|
+
this.symbols = { sigil: sigil, open: open, close: close, pattern: pattern};
|
|
287
466
|
this.content = "";
|
|
288
467
|
}
|
|
289
|
-
|
|
290
468
|
process(input) {
|
|
291
469
|
this.content = input;
|
|
292
|
-
let src = input, last = null,
|
|
293
|
-
const
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
return rCtx.test(src) || rPat.test(src);
|
|
470
|
+
let src = input, last = null, it = 0;
|
|
471
|
+
const pend = () => {
|
|
472
|
+
const r2 = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
|
|
473
|
+
return r2.test(src);
|
|
297
474
|
};
|
|
298
|
-
while (src !== last &&
|
|
299
|
-
|
|
475
|
+
while (src !== last && it < this.recursion_limit) {
|
|
476
|
+
it++;
|
|
300
477
|
last = src;
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
if (!pending()) break;
|
|
478
|
+
const [p, s2] = collectPatterns(this, src);
|
|
479
|
+
src = applyPatterns(this, s2, p);
|
|
480
|
+
if (!pend()) break;
|
|
305
481
|
}
|
|
306
482
|
return this.content = src, src;
|
|
307
483
|
}
|
package/tests/tests.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
{
|
|
10
10
|
"id": 2,
|
|
11
11
|
"name": "Flexible whitespace ($)",
|
|
12
|
-
"code": "pattern {
|
|
12
|
+
"code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
|
|
13
13
|
"expected": "hello & world"
|
|
14
14
|
},
|
|
15
15
|
{
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
{
|
|
76
76
|
"id": 13,
|
|
77
77
|
"name": "Pattern with flexible whitespace in middle",
|
|
78
|
-
"code": "pattern {from
|
|
78
|
+
"code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
|
|
79
79
|
"expected": "path: from -> to"
|
|
80
80
|
},
|
|
81
81
|
{
|
|
@@ -129,7 +129,7 @@
|
|
|
129
129
|
{
|
|
130
130
|
"id": 22,
|
|
131
131
|
"name": "Flexible whitespace with semicolon",
|
|
132
|
-
"code": "pattern {
|
|
132
|
+
"code": "pattern {$$a;$$b} {$a AND $b}\nx ; y\nx; y\nx ; y",
|
|
133
133
|
"expected": "x AND y"
|
|
134
134
|
},
|
|
135
135
|
{
|
|
@@ -182,8 +182,8 @@
|
|
|
182
182
|
},
|
|
183
183
|
{
|
|
184
184
|
"id": 31,
|
|
185
|
-
"name": "
|
|
186
|
-
"code": "pattern {$block data {}{}} {DATA:$data}\ndata
|
|
185
|
+
"name": "Delimiter test",
|
|
186
|
+
"code": "pattern {$block data {[}{]}} {DATA:$data}\ndata [ hello world ]",
|
|
187
187
|
"expected": "DATA: hello world"
|
|
188
188
|
},
|
|
189
189
|
{
|
|
@@ -267,7 +267,7 @@
|
|
|
267
267
|
{
|
|
268
268
|
"id": 45,
|
|
269
269
|
"name": "Pattern with flexible space before and after",
|
|
270
|
-
"code": "pattern {$$hello$$} {FOUND}\n hello ",
|
|
270
|
+
"code": "pattern {$$ hello$$ } {FOUND}\n hello ",
|
|
271
271
|
"expected": "FOUND"
|
|
272
272
|
},
|
|
273
273
|
{
|
|
@@ -315,7 +315,7 @@
|
|
|
315
315
|
{
|
|
316
316
|
"id": 53,
|
|
317
317
|
"name": "Trim multiple surrounding spaces",
|
|
318
|
-
"code": "pattern {
|
|
318
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
319
319
|
"expected": "FOUND: word"
|
|
320
320
|
},
|
|
321
321
|
{
|
|
@@ -543,7 +543,7 @@
|
|
|
543
543
|
{
|
|
544
544
|
"id": 91,
|
|
545
545
|
"name": "CSV to JSON",
|
|
546
|
-
"code": "pattern {$a
|
|
546
|
+
"code": "pattern {$a,$$b,$$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
|
|
547
547
|
"expected": "{ id: '1', name: 'Alice', role: 'Engineer' }\n{ id: '2', name: 'Bob', role: 'Designer' }"
|
|
548
548
|
},
|
|
549
549
|
{
|
|
@@ -639,7 +639,7 @@
|
|
|
639
639
|
{
|
|
640
640
|
"id": 107,
|
|
641
641
|
"name": "Comma-separated pattern matching",
|
|
642
|
-
"code": "pattern {
|
|
642
|
+
"code": "pattern {$$a,$$b,$$c} {[$a] [$b] [$c]}\none,two,three",
|
|
643
643
|
"expected": "[one] [two] [three]"
|
|
644
644
|
},
|
|
645
645
|
{
|
|
@@ -663,7 +663,7 @@
|
|
|
663
663
|
{
|
|
664
664
|
"id": 111,
|
|
665
665
|
"name": "Pattern with dot literal",
|
|
666
|
-
"code": "pattern {
|
|
666
|
+
"code": "pattern {$$file.txt} {File: $file.txt}\ndocument.txt",
|
|
667
667
|
"expected": "File: document"
|
|
668
668
|
},
|
|
669
669
|
{
|
|
@@ -693,7 +693,7 @@
|
|
|
693
693
|
{
|
|
694
694
|
"id": 116,
|
|
695
695
|
"name": "Multiple spaces trim with flexible whitespace",
|
|
696
|
-
"code": "pattern {
|
|
696
|
+
"code": "pattern {$ $word$ } {FOUND: $word}\n word ",
|
|
697
697
|
"expected": "FOUND: word"
|
|
698
698
|
},
|
|
699
699
|
{
|
|
@@ -711,7 +711,7 @@
|
|
|
711
711
|
{
|
|
712
712
|
"id": 119,
|
|
713
713
|
"name": "Trailing punctuation in pattern",
|
|
714
|
-
"code": "pattern {
|
|
714
|
+
"code": "pattern {$$w,} {$$w!}\nhello,",
|
|
715
715
|
"expected": "hello!"
|
|
716
716
|
}
|
|
717
717
|
]
|