papagaio 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -26
- package/package.json +1 -1
- package/src/papagaio.js +168 -223
- package/tests/tests.json +5 -5
package/README.md
CHANGED
|
@@ -4,19 +4,22 @@ Minimal yet powerful text preprocessor with support for multi-character delimite
|
|
|
4
4
|
## Installation
|
|
5
5
|
```javascript
|
|
6
6
|
import { Papagaio } from './src/papagaio.js';
|
|
7
|
-
const
|
|
8
|
-
const result =
|
|
7
|
+
const papagaio = new Papagaio();
|
|
8
|
+
const result = papagaio.process(input);
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
## Configuration
|
|
12
12
|
```javascript
|
|
13
|
-
|
|
13
|
+
papagaio.symbols = {
|
|
14
14
|
pattern: "pattern", // pattern keyword
|
|
15
15
|
open: "{", // opening delimiter (multi-char supported)
|
|
16
16
|
close: "}", // closing delimiter (multi-char supported)
|
|
17
|
-
sigil: "$"
|
|
17
|
+
sigil: "$", // variable marker
|
|
18
|
+
eval: "eval", // eval keyword
|
|
19
|
+
block: "block", // block keyword
|
|
20
|
+
regex: "regex" // regex keyword
|
|
18
21
|
};
|
|
19
|
-
|
|
22
|
+
papagaio.recursion_limit = 512;
|
|
20
23
|
```
|
|
21
24
|
|
|
22
25
|
---
|
|
@@ -39,34 +42,86 @@ Output: `cherry, banana, apple`
|
|
|
39
42
|
|
|
40
43
|
---
|
|
41
44
|
|
|
42
|
-
##
|
|
45
|
+
## Variables
|
|
43
46
|
|
|
44
|
-
Papagaio provides flexible
|
|
47
|
+
Papagaio provides flexible variable capture with automatic context-aware behavior.
|
|
48
|
+
|
|
49
|
+
### `$x` - Smart Variable
|
|
50
|
+
Automatically adapts based on context:
|
|
51
|
+
- **Before a block**: Captures everything until the block's opening delimiter
|
|
52
|
+
- **Before a literal**: Captures everything until that literal appears
|
|
53
|
+
- **Otherwise**: Captures a single word (non-whitespace token)
|
|
45
54
|
|
|
46
|
-
### `$x` - Single Word Variable
|
|
47
|
-
Captures a single non-whitespace token.
|
|
48
55
|
```
|
|
49
56
|
pattern {$x} {[$x]}
|
|
50
57
|
hello world
|
|
51
58
|
```
|
|
52
59
|
Output: `[hello]`
|
|
53
60
|
|
|
54
|
-
### `$$x` - Whitespace-Sensitive Variable
|
|
55
|
-
Captures text including surrounding whitespace until the next significant token.
|
|
56
61
|
```
|
|
57
|
-
pattern {
|
|
58
|
-
hello
|
|
62
|
+
pattern {$name $block content {(}{)}} {$name: $content}
|
|
63
|
+
greeting (hello world)
|
|
59
64
|
```
|
|
60
|
-
Output: `
|
|
65
|
+
Output: `greeting: hello world`
|
|
61
66
|
|
|
62
|
-
### `$$$x` - Optional Whitespace Variable
|
|
63
|
-
Captures with optional whitespace (no error if empty).
|
|
64
67
|
```
|
|
65
|
-
pattern {
|
|
68
|
+
pattern {$prefix:$suffix} {$suffix-$prefix}
|
|
69
|
+
key:value
|
|
70
|
+
```
|
|
71
|
+
Output: `value-key`
|
|
72
|
+
|
|
73
|
+
### `$x?` - Optional Variable
|
|
74
|
+
Same behavior as `$x`, but won't fail if empty or not found.
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
pattern {$x? world} {<$x>}
|
|
66
78
|
world
|
|
67
79
|
```
|
|
68
80
|
Output: `<>`
|
|
69
81
|
|
|
82
|
+
```
|
|
83
|
+
pattern {$greeting? $name} {Hello $name$greeting}
|
|
84
|
+
Hi John
|
|
85
|
+
```
|
|
86
|
+
Output: `Hello JohnHi`
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Regex Matching
|
|
91
|
+
|
|
92
|
+
Capture content using JavaScript regular expressions.
|
|
93
|
+
|
|
94
|
+
### Syntax
|
|
95
|
+
```
|
|
96
|
+
$regex varName {pattern}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Basic Example
|
|
100
|
+
```
|
|
101
|
+
pattern {$regex num {[0-9]+}} {Number: $num}
|
|
102
|
+
The answer is 42
|
|
103
|
+
```
|
|
104
|
+
Output: `Number: 42`
|
|
105
|
+
|
|
106
|
+
### Complex Patterns
|
|
107
|
+
```
|
|
108
|
+
pattern {$regex email {\w+@\w+\.\w+}} {Email found: $email}
|
|
109
|
+
Contact: user@example.com
|
|
110
|
+
```
|
|
111
|
+
Output: `Email found: user@example.com`
|
|
112
|
+
|
|
113
|
+
### Multiple Regex Variables
|
|
114
|
+
```
|
|
115
|
+
pattern {$regex year {[0-9]{4}}-$regex month {[0-9]{2}}} {Month $month in $year}
|
|
116
|
+
2024-03
|
|
117
|
+
```
|
|
118
|
+
Output: `Month 03 in 2024`
|
|
119
|
+
|
|
120
|
+
### Notes
|
|
121
|
+
- Regex patterns are cached for performance
|
|
122
|
+
- Matches are anchored at the current position (no searching ahead)
|
|
123
|
+
- Invalid regex patterns will cause the match to fail gracefully
|
|
124
|
+
|
|
70
125
|
---
|
|
71
126
|
|
|
72
127
|
## Blocks
|
|
@@ -191,13 +246,14 @@ pattern {$x} {$eval<<parseInt($x)*2>>}
|
|
|
191
246
|
```
|
|
192
247
|
Output: `10`
|
|
193
248
|
|
|
249
|
+
---
|
|
194
250
|
|
|
195
251
|
## Important Rules
|
|
196
252
|
|
|
197
|
-
### Matching
|
|
198
|
-
* `$x` =
|
|
199
|
-
*
|
|
200
|
-
*
|
|
253
|
+
### Variable Matching
|
|
254
|
+
* `$x` = smart capture (context-aware: word, until literal, or until block)
|
|
255
|
+
* `$x?` = optional version of `$x` (won't fail if empty)
|
|
256
|
+
* `$regex name {pattern}` = regex-based capture
|
|
201
257
|
* Patterns apply globally until stable
|
|
202
258
|
* Blocks support arbitrary nesting depth
|
|
203
259
|
|
|
@@ -207,7 +263,6 @@ Output: `10`
|
|
|
207
263
|
* Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
|
|
208
264
|
|
|
209
265
|
### Whitespace Handling
|
|
210
|
-
* Whitespace-optional tokens (`$$` alone) skip optional whitespace
|
|
211
266
|
* Variables automatically skip leading whitespace when needed
|
|
212
267
|
* Trailing whitespace is trimmed when variables appear before literals
|
|
213
268
|
|
|
@@ -241,12 +296,13 @@ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
|
|
|
241
296
|
|
|
242
297
|
| Problem | Solution |
|
|
243
298
|
|---------|----------|
|
|
244
|
-
| Variable not captured | Check
|
|
299
|
+
| Variable not captured | Check context: use `$x?` for optional, or verify literals/blocks exist |
|
|
245
300
|
| Block mismatch | Verify opening and closing delimiters match the declaration |
|
|
246
301
|
| Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
|
|
247
|
-
| Pattern not matching |
|
|
302
|
+
| Pattern not matching | Verify whitespace between tokens, check if variable should be optional |
|
|
248
303
|
| Nested blocks fail | Ensure delimiters are properly balanced |
|
|
249
304
|
| Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
|
|
305
|
+
| Regex not matching | Test regex pattern separately; ensure it matches at the exact position |
|
|
250
306
|
|
|
251
307
|
---
|
|
252
308
|
|
|
@@ -254,8 +310,8 @@ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
|
|
|
254
310
|
|
|
255
311
|
```
|
|
256
312
|
pattern {$x $y} {$y, $x} # basic pattern with variables
|
|
257
|
-
pattern {
|
|
258
|
-
pattern {
|
|
313
|
+
pattern {$x? $y} {$y, $x} # optional variable
|
|
314
|
+
pattern {$regex n {[0-9]+}} {$n} # regex capture
|
|
259
315
|
pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
|
|
260
316
|
$pattern {a} {b} # subpattern (scoped to parent)
|
|
261
317
|
$eval{code} # JavaScript evaluation
|
|
@@ -267,5 +323,6 @@ $eval{code} # JavaScript evaluation
|
|
|
267
323
|
|
|
268
324
|
* Patterns apply recursively until no changes occur (up to `recursion_limit`)
|
|
269
325
|
* Multi-character delimiter matching is optimized with regex escaping
|
|
326
|
+
* Regex patterns are automatically cached to improve performance
|
|
270
327
|
* Nested blocks and subpatterns have no theoretical depth limit
|
|
271
328
|
* Large recursion limits can impact performance on complex inputs
|
package/package.json
CHANGED
package/src/papagaio.js
CHANGED
|
@@ -1,319 +1,264 @@
|
|
|
1
|
-
function parsePattern(
|
|
2
|
-
const
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
while (i <
|
|
6
|
-
if (
|
|
7
|
-
let j = i +
|
|
8
|
-
while (j <
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
function parsePattern(p, pat) {
|
|
2
|
+
const t = [], S = p.symbols.sigil, O = p.symbols.open;
|
|
3
|
+
let i = 0;
|
|
4
|
+
|
|
5
|
+
while (i < pat.length) {
|
|
6
|
+
if (pat.startsWith(S + p.symbols.regex, i)) {
|
|
7
|
+
let j = i + S.length + p.symbols.regex.length;
|
|
8
|
+
while (j < pat.length && /\s/.test(pat[j])) j++;
|
|
9
|
+
let v = '';
|
|
10
|
+
while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
|
|
11
|
+
if (v) {
|
|
12
|
+
while (j < pat.length && /\s/.test(pat[j])) j++;
|
|
13
|
+
if (pat[j] === O) {
|
|
14
|
+
const [rx, e] = extractBlock(p, pat, j);
|
|
15
|
+
t.push({ type: 'regex', varName: v, regex: rx.trim() });
|
|
16
|
+
i = e; continue;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
16
19
|
}
|
|
17
|
-
if (
|
|
18
|
-
let j = i + S.length +
|
|
19
|
-
while (j <
|
|
20
|
-
let
|
|
21
|
-
while (j <
|
|
22
|
-
if (
|
|
23
|
-
while (j <
|
|
24
|
-
let
|
|
25
|
-
if (
|
|
26
|
-
const [c, e] = extractBlock(
|
|
27
|
-
|
|
28
|
-
j = e; while (j <
|
|
20
|
+
if (pat.startsWith(S + p.symbols.block, i)) {
|
|
21
|
+
let j = i + S.length + p.symbols.block.length;
|
|
22
|
+
while (j < pat.length && /\s/.test(pat[j])) j++;
|
|
23
|
+
let v = '';
|
|
24
|
+
while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
|
|
25
|
+
if (v) {
|
|
26
|
+
while (j < pat.length && /\s/.test(pat[j])) j++;
|
|
27
|
+
let od = O, cd = p.symbols.close;
|
|
28
|
+
if (pat[j] === O) {
|
|
29
|
+
const [c, e] = extractBlock(p, pat, j);
|
|
30
|
+
od = unescapeDelim(c.trim()) || O;
|
|
31
|
+
j = e; while (j < pat.length && /\s/.test(pat[j])) j++;
|
|
29
32
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
closeDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.close;
|
|
33
|
+
if (pat[j] === O) {
|
|
34
|
+
const [c, e] = extractBlock(p, pat, j);
|
|
35
|
+
cd = unescapeDelim(c.trim()) || cd;
|
|
34
36
|
j = e;
|
|
35
37
|
}
|
|
36
|
-
|
|
38
|
+
t.push({ type: 'block', varName: v, open: od, close: cd });
|
|
39
|
+
i = j; continue;
|
|
37
40
|
}
|
|
38
41
|
}
|
|
39
|
-
if (
|
|
40
|
-
let j = i + S.length,
|
|
41
|
-
while (j <
|
|
42
|
-
if (
|
|
43
|
-
|
|
42
|
+
if (pat[i] === S) {
|
|
43
|
+
let j = i + S.length, v = '';
|
|
44
|
+
while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
|
|
45
|
+
if (v) {
|
|
46
|
+
const optional = pat[j] === '?';
|
|
47
|
+
if (optional) j++;
|
|
48
|
+
t.push({ type: 'var', varName: v, optional });
|
|
49
|
+
i = j;
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
t.push({ type: 'lit', value: S }); i += S.length; continue;
|
|
44
53
|
}
|
|
45
|
-
if (/\s/.test(
|
|
46
|
-
while (i <
|
|
47
|
-
|
|
54
|
+
if (/\s/.test(pat[i])) {
|
|
55
|
+
while (i < pat.length && /\s/.test(pat[i])) i++;
|
|
56
|
+
t.push({ type: 'ws' }); continue;
|
|
48
57
|
}
|
|
49
|
-
let
|
|
50
|
-
while (i <
|
|
51
|
-
if (
|
|
58
|
+
let lit = '';
|
|
59
|
+
while (i < pat.length && pat[i] !== S && !/\s/.test(pat[i])) lit += pat[i++];
|
|
60
|
+
if (lit) t.push({ type: 'lit', value: lit });
|
|
52
61
|
}
|
|
53
|
-
return
|
|
62
|
+
return t;
|
|
54
63
|
}
|
|
55
64
|
|
|
56
|
-
function matchPattern(
|
|
57
|
-
let
|
|
65
|
+
function matchPattern(p, src, tokens, pos = 0) {
|
|
66
|
+
let cap = {};
|
|
58
67
|
for (let ti = 0; ti < tokens.length; ti++) {
|
|
59
|
-
const
|
|
60
|
-
if (
|
|
61
|
-
if (
|
|
62
|
-
if (
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
v += src[pos++];
|
|
68
|
+
const tok = tokens[ti];
|
|
69
|
+
if (tok.type === 'ws') { while (pos < src.length && /\s/.test(src[pos])) pos++; continue; }
|
|
70
|
+
if (tok.type === 'lit') { if (!src.startsWith(tok.value, pos)) return null; pos += tok.value.length; continue; }
|
|
71
|
+
if (tok.type === 'regex') {
|
|
72
|
+
try {
|
|
73
|
+
let regex = p._regexCache.get(tok.regex);
|
|
74
|
+
if (!regex) {
|
|
75
|
+
regex = new RegExp(tok.regex);
|
|
76
|
+
p._regexCache.set(tok.regex, regex);
|
|
69
77
|
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
78
|
+
const m = src.slice(pos).match(regex);
|
|
79
|
+
if (!m || m.index !== 0) return null;
|
|
80
|
+
cap[p.symbols.sigil + tok.varName] = m[0];
|
|
81
|
+
pos += m[0].length;
|
|
82
|
+
} catch { return null; }
|
|
76
83
|
continue;
|
|
77
84
|
}
|
|
78
|
-
if (
|
|
85
|
+
if (tok.type === 'var') {
|
|
79
86
|
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
80
|
-
const
|
|
87
|
+
const nx = findNext(tokens, ti);
|
|
81
88
|
let v = '';
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
while (pos < src.length && !src.startsWith(n.openDelim, pos) && src[pos] !== '\n') {
|
|
85
|
-
v += src[pos++];
|
|
86
|
-
}
|
|
89
|
+
if (nx?.type === 'block') {
|
|
90
|
+
while (pos < src.length && !src.startsWith(nx.open, pos) && src[pos] !== '\n') v += src[pos++];
|
|
87
91
|
v = v.trimEnd();
|
|
88
|
-
} else if (
|
|
89
|
-
while (pos < src.length &&
|
|
90
|
-
} else if (n.type === 'literal') {
|
|
91
|
-
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
92
|
+
} else if (nx?.type === 'lit') {
|
|
93
|
+
while (pos < src.length && !src.startsWith(nx.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
92
94
|
v = v.trimEnd();
|
|
95
|
+
} else {
|
|
96
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
93
97
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
98
|
+
if (!v && !tok.optional) return null;
|
|
99
|
+
cap[p.symbols.sigil + tok.varName] = v;
|
|
97
100
|
continue;
|
|
98
101
|
}
|
|
99
|
-
if (
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
captures[papagaio.symbols.sigil + varName] = c; pos = e; continue;
|
|
102
|
+
if (tok.type === 'block') {
|
|
103
|
+
if (!src.startsWith(tok.open, pos)) return null;
|
|
104
|
+
const [c, e] = extractBlock(p, src, pos, tok.open, tok.close);
|
|
105
|
+
cap[p.symbols.sigil + tok.varName] = c; pos = e; continue;
|
|
104
106
|
}
|
|
105
107
|
}
|
|
106
|
-
return { captures, endPos: pos };
|
|
108
|
+
return { captures: cap, endPos: pos };
|
|
107
109
|
}
|
|
108
110
|
|
|
109
|
-
function
|
|
111
|
+
function findNext(t, i) { for (let k = i + 1; k < t.length; k++) if (t[k].type !== 'ws') return t[k]; return null; }
|
|
110
112
|
|
|
111
|
-
function extractBlock(p, src,
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
i += openDelim.length; const s = i; let d = 0;
|
|
113
|
+
function extractBlock(p, src, i, od = p.symbols.open, cd = p.symbols.close) {
|
|
114
|
+
if (od.length > 1 || cd.length > 1) {
|
|
115
|
+
if (src.substring(i, i + od.length) === od) {
|
|
116
|
+
i += od.length; const s = i; let d = 0;
|
|
116
117
|
while (i < src.length) {
|
|
117
|
-
if (src.substring(i, i +
|
|
118
|
-
else if (src.substring(i, i +
|
|
119
|
-
if (!d) return [src.substring(s, i), i +
|
|
120
|
-
d--; i +=
|
|
118
|
+
if (src.substring(i, i + od.length) === od) { d++; i += od.length; }
|
|
119
|
+
else if (src.substring(i, i + cd.length) === cd) {
|
|
120
|
+
if (!d) return [src.substring(s, i), i + cd.length];
|
|
121
|
+
d--; i += cd.length;
|
|
121
122
|
} else i++;
|
|
122
123
|
}
|
|
123
124
|
return [src.substring(s), src.length];
|
|
124
125
|
}
|
|
125
126
|
}
|
|
126
|
-
if (src[i] ===
|
|
127
|
+
if (src[i] === od) {
|
|
127
128
|
i++; const s = i;
|
|
128
|
-
if (
|
|
129
|
+
if (od === cd) { while (i < src.length && src[i] !== cd) i++; return [src.substring(s, i), i + 1]; }
|
|
129
130
|
let d = 1;
|
|
130
|
-
while (i < src.length && d > 0) { if (src[i] ===
|
|
131
|
+
while (i < src.length && d > 0) { if (src[i] === od) d++; else if (src[i] === cd) d--; if (d > 0) i++; }
|
|
131
132
|
return [src.substring(s, i), i + 1];
|
|
132
133
|
}
|
|
133
134
|
return ['', i];
|
|
134
135
|
}
|
|
135
136
|
|
|
136
|
-
function
|
|
137
|
-
const
|
|
138
|
-
const
|
|
137
|
+
function collectPats(p, src) {
|
|
138
|
+
const arr = [];
|
|
139
|
+
const rx = new RegExp(`(?:^|\\b)${esc(p.symbols.pattern)}\\s*${esc(p.symbols.open)}`, "g");
|
|
139
140
|
let out = src;
|
|
140
|
-
|
|
141
141
|
while (1) {
|
|
142
|
-
|
|
142
|
+
rx.lastIndex = 0; const m = rx.exec(out); if (!m) break;
|
|
143
143
|
const s = m.index, o = m.index + m[0].length - p.symbols.open.length;
|
|
144
144
|
const [mp, em] = extractBlock(p, out, o); let k = em;
|
|
145
145
|
while (k < out.length && /\s/.test(out[k])) k++;
|
|
146
146
|
if (k < out.length && out.substring(k, k + p.symbols.open.length) === p.symbols.open) {
|
|
147
147
|
const [rp, er] = extractBlock(p, out, k);
|
|
148
|
-
|
|
148
|
+
arr.push({ m: mp.trim(), r: rp.trim() });
|
|
149
149
|
out = out.slice(0, s) + out.slice(er); continue;
|
|
150
150
|
}
|
|
151
151
|
out = out.slice(0, s) + out.slice(em);
|
|
152
152
|
}
|
|
153
|
-
return [
|
|
153
|
+
return [arr, out];
|
|
154
154
|
}
|
|
155
155
|
|
|
156
|
-
function
|
|
157
|
-
const
|
|
158
|
-
const
|
|
159
|
-
let out =
|
|
160
|
-
|
|
156
|
+
function extractNested(p, txt) {
|
|
157
|
+
const n = [];
|
|
158
|
+
const rx = new RegExp(`${esc(p.symbols.sigil)}${esc(p.symbols.pattern)}\\s*${esc(p.symbols.open)}`, "g");
|
|
159
|
+
let out = txt;
|
|
161
160
|
while (1) {
|
|
162
|
-
|
|
163
|
-
const m = r.exec(out);
|
|
164
|
-
if (!m) break;
|
|
165
|
-
|
|
161
|
+
rx.lastIndex = 0; const m = rx.exec(out); if (!m) break;
|
|
166
162
|
const s = m.index, o = m.index + m[0].length - p.symbols.open.length;
|
|
167
|
-
const [mp, em] = extractBlock(p, out, o);
|
|
168
|
-
let k = em;
|
|
169
|
-
|
|
163
|
+
const [mp, em] = extractBlock(p, out, o); let k = em;
|
|
170
164
|
while (k < out.length && /\s/.test(out[k])) k++;
|
|
171
|
-
|
|
172
165
|
if (k < out.length && out.substring(k, k + p.symbols.open.length) === p.symbols.open) {
|
|
173
166
|
const [rp, er] = extractBlock(p, out, k);
|
|
174
|
-
|
|
175
|
-
out = out.slice(0, s) + out.slice(er);
|
|
176
|
-
continue;
|
|
167
|
+
n.push({ m: mp.trim(), r: rp.trim() });
|
|
168
|
+
out = out.slice(0, s) + out.slice(er); continue;
|
|
177
169
|
}
|
|
178
170
|
out = out.slice(0, s) + out.slice(em);
|
|
179
171
|
}
|
|
180
|
-
|
|
181
|
-
return [nested, out];
|
|
172
|
+
return [n, out];
|
|
182
173
|
}
|
|
183
174
|
|
|
184
|
-
function
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
let j = i + S.length + evalKeyword.length;
|
|
200
|
-
|
|
201
|
-
while (j < text.length && /\s/.test(text[j])) j++;
|
|
202
|
-
|
|
203
|
-
if (j < text.length && text.substring(j, j + O.length) === O) {
|
|
204
|
-
const startPos = i;
|
|
205
|
-
const blockStart = j;
|
|
206
|
-
|
|
207
|
-
const [content, endPos] = extractBlock(p, text, blockStart, O, C);
|
|
208
|
-
|
|
209
|
-
evals.push({
|
|
210
|
-
fullMatch: text.substring(startPos, endPos),
|
|
211
|
-
code: content,
|
|
212
|
-
startPos: startPos - offset,
|
|
213
|
-
endPos: endPos - offset
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
const before = out.substring(0, startPos - offset);
|
|
217
|
-
const after = out.substring(endPos - offset);
|
|
218
|
-
const placeholder = `__EVAL_${evals.length - 1}__`;
|
|
219
|
-
out = before + placeholder + after;
|
|
220
|
-
|
|
221
|
-
offset += (endPos - startPos) - placeholder.length;
|
|
222
|
-
i = endPos;
|
|
223
|
-
continue;
|
|
175
|
+
function extractEvals(p, txt) {
|
|
176
|
+
const ev = [], S = p.symbols.sigil, O = p.symbols.open;
|
|
177
|
+
let i = 0, out = txt, off = 0;
|
|
178
|
+
while (i < txt.length) {
|
|
179
|
+
if (txt.substring(i, i + S.length) === S && txt.substring(i + S.length, i + S.length + p.symbols.eval.length) === p.symbols.eval) {
|
|
180
|
+
let j = i + S.length + p.symbols.eval.length;
|
|
181
|
+
while (j < txt.length && /\s/.test(txt[j])) j++;
|
|
182
|
+
if (j < txt.length && txt.substring(j, j + O.length) === O) {
|
|
183
|
+
const sp = i, bp = j;
|
|
184
|
+
const [c, ep] = extractBlock(p, txt, bp);
|
|
185
|
+
ev.push({ code: c, sp: sp - off, ep: ep - off });
|
|
186
|
+
const ph = `__E${ev.length - 1}__`;
|
|
187
|
+
out = out.substring(0, sp - off) + ph + out.substring(ep - off);
|
|
188
|
+
off += (ep - sp) - ph.length; i = ep; continue;
|
|
224
189
|
}
|
|
225
190
|
}
|
|
226
191
|
i++;
|
|
227
192
|
}
|
|
228
|
-
|
|
229
|
-
return [evals, out];
|
|
193
|
+
return [ev, out];
|
|
230
194
|
}
|
|
231
195
|
|
|
232
|
-
function
|
|
233
|
-
let
|
|
234
|
-
for (let i =
|
|
235
|
-
const
|
|
236
|
-
let
|
|
237
|
-
try {
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
evalResult = "javascript error: " + e.message;
|
|
241
|
-
}
|
|
242
|
-
result = result.replace(placeholder, evalResult);
|
|
196
|
+
function applyEvals(p, txt, ev) {
|
|
197
|
+
let r = txt;
|
|
198
|
+
for (let i = ev.length - 1; i >= 0; i--) {
|
|
199
|
+
const ph = `__E${i}__`;
|
|
200
|
+
let res;
|
|
201
|
+
try { res = String(Function("papagaio", "ctx", `"use strict";return(function(){${ev[i].code}})();`)(p, {})); }
|
|
202
|
+
catch (e) { res = "error: " + e.message; }
|
|
203
|
+
r = r.replace(ph, res);
|
|
243
204
|
}
|
|
244
|
-
return
|
|
205
|
+
return r;
|
|
245
206
|
}
|
|
246
207
|
|
|
247
|
-
function
|
|
248
|
-
let
|
|
208
|
+
function applyPats(p, src, pats) {
|
|
209
|
+
let last = "", S = p.symbols.sigil;
|
|
249
210
|
for (const pat of pats) {
|
|
250
|
-
const
|
|
211
|
+
const tok = parsePattern(p, pat.m);
|
|
212
|
+
let n = '', pos = 0, ok = false;
|
|
251
213
|
while (pos < src.length) {
|
|
252
|
-
const m = matchPattern(p, src,
|
|
214
|
+
const m = matchPattern(p, src, tok, pos);
|
|
253
215
|
if (m) {
|
|
254
|
-
ok = true;
|
|
255
|
-
let r = pat.
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
for (const [k, v] of Object.entries(captures)) {
|
|
261
|
-
const e = escapeRegex(k); r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
|
|
216
|
+
ok = true;
|
|
217
|
+
let r = pat.r;
|
|
218
|
+
const [nested, clean] = extractNested(p, r);
|
|
219
|
+
r = clean;
|
|
220
|
+
for (const [k, v] of Object.entries(m.captures)) {
|
|
221
|
+
r = r.replace(new RegExp(esc(k) + '(?![A-Za-z0-9_])', 'g'), v);
|
|
262
222
|
}
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
const [evals, cleanText] = extractEvalExpressions(p, r);
|
|
269
|
-
if (evals.length > 0) {
|
|
270
|
-
r = applyEvalExpressions(p, cleanText, evals);
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
n += r; last = r; pos = endPos;
|
|
223
|
+
if (nested.length) r = applyPats(p, r, nested);
|
|
224
|
+
p.match = src.slice(pos, m.endPos);
|
|
225
|
+
const [ev, ct] = extractEvals(p, r);
|
|
226
|
+
if (ev.length) r = applyEvals(p, ct, ev);
|
|
227
|
+
n += r; last = r; pos = m.endPos;
|
|
274
228
|
} else { n += src[pos]; pos++; }
|
|
275
229
|
}
|
|
276
|
-
if (ok)
|
|
230
|
+
if (ok) src = n;
|
|
277
231
|
}
|
|
278
232
|
return src;
|
|
279
233
|
}
|
|
280
234
|
|
|
281
|
-
function
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
if (s[i] === '\\' && i + 1 < s.length) {
|
|
286
|
-
|
|
287
|
-
if (n === '"' || n === "'" || n === '\\') { r += n; i++; }
|
|
288
|
-
else r += s[i];
|
|
289
|
-
} else r += s[i];
|
|
235
|
+
function esc(s) { return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&'); }
|
|
236
|
+
function unescapeDelim(s) {
|
|
237
|
+
let r = '';
|
|
238
|
+
for (let i = 0; i < s.length; i++) {
|
|
239
|
+
if (s[i] === '\\' && i + 1 < s.length && (s[i+1] === '"' || s[i+1] === "'" || s[i+1] === '\\')) { r += s[i+1]; i++; }
|
|
240
|
+
else r += s[i];
|
|
290
241
|
}
|
|
291
242
|
return r;
|
|
292
243
|
}
|
|
293
244
|
|
|
294
245
|
export class Papagaio {
|
|
295
|
-
constructor(sigil = '$', open = '{', close = '}', pattern = 'pattern', evalKeyword = 'eval', blockKeyword = 'block') {
|
|
246
|
+
constructor(sigil = '$', open = '{', close = '}', pattern = 'pattern', evalKeyword = 'eval', blockKeyword = 'block', regexKeyword = 'regex') {
|
|
296
247
|
this.recursion_limit = 512;
|
|
297
|
-
this.symbols = {
|
|
298
|
-
pattern: pattern,
|
|
299
|
-
open: open,
|
|
300
|
-
close: close,
|
|
301
|
-
sigil: sigil,
|
|
302
|
-
eval: evalKeyword,
|
|
303
|
-
block: blockKeyword
|
|
304
|
-
};
|
|
248
|
+
this.symbols = { pattern, open, close, sigil, eval: evalKeyword, block: blockKeyword, regex: regexKeyword };
|
|
305
249
|
this.content = "";
|
|
250
|
+
this.match = "";
|
|
251
|
+
this._regexCache = new Map();
|
|
306
252
|
}
|
|
307
253
|
process(input) {
|
|
308
|
-
this.content = input;
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
return r2.test(src);
|
|
312
|
-
};
|
|
254
|
+
this.content = input;
|
|
255
|
+
let src = input, last = null, it = 0;
|
|
256
|
+
const pending = () => new RegExp(`(?:^|\\b)${esc(this.symbols.pattern)}\\s*${esc(this.symbols.open)}`, "g").test(src);
|
|
313
257
|
while (src !== last && it < this.recursion_limit) {
|
|
314
258
|
it++; last = src;
|
|
315
|
-
const [p, s2] =
|
|
316
|
-
|
|
259
|
+
const [p, s2] = collectPats(this, src);
|
|
260
|
+
src = applyPats(this, s2, p);
|
|
261
|
+
if (!pending()) break;
|
|
317
262
|
}
|
|
318
263
|
return this.content = src, src;
|
|
319
264
|
}
|
package/tests/tests.json
CHANGED
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
"id": 2,
|
|
11
|
-
"name": "Flexible whitespace with
|
|
12
|
-
"code": "pattern {
|
|
11
|
+
"name": "Flexible whitespace with $",
|
|
12
|
+
"code": "pattern {$x and $y} {$x & $y}\nhello and world",
|
|
13
13
|
"expected": "hello & world"
|
|
14
14
|
},
|
|
15
15
|
{
|
|
@@ -80,8 +80,8 @@
|
|
|
80
80
|
},
|
|
81
81
|
{
|
|
82
82
|
"id": 14,
|
|
83
|
-
"name": "Optional whitespace with
|
|
84
|
-
"code": "pattern {hello
|
|
83
|
+
"name": "Optional whitespace with $ optional",
|
|
84
|
+
"code": "pattern {hello$world} {HI}\nhello\n\nworld",
|
|
85
85
|
"expected": "HI"
|
|
86
86
|
},
|
|
87
87
|
{
|
|
@@ -111,7 +111,7 @@
|
|
|
111
111
|
{
|
|
112
112
|
"id": 19,
|
|
113
113
|
"name": "Pattern without whitespace matching",
|
|
114
|
-
"code": "pattern {
|
|
114
|
+
"code": "pattern {$a,$b} {$b,$a} one,two",
|
|
115
115
|
"expected": "two,one"
|
|
116
116
|
},
|
|
117
117
|
{
|