papagaio 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,19 +4,22 @@ Minimal yet powerful text preprocessor with support for multi-character delimite
4
4
  ## Installation
5
5
  ```javascript
6
6
  import { Papagaio } from './src/papagaio.js';
7
- const p = new Papagaio();
8
- const result = p.process(input);
7
+ const papagaio = new Papagaio();
8
+ const result = papagaio.process(input);
9
9
  ```
10
10
 
11
11
  ## Configuration
12
12
  ```javascript
13
- p.symbols = {
13
+ papagaio.symbols = {
14
14
  pattern: "pattern", // pattern keyword
15
15
  open: "{", // opening delimiter (multi-char supported)
16
16
  close: "}", // closing delimiter (multi-char supported)
17
- sigil: "$" // variable marker
17
+ sigil: "$", // variable marker
18
+ eval: "eval", // eval keyword
19
+ block: "block", // block keyword
20
+ regex: "regex" // regex keyword
18
21
  };
19
- p.recursion_limit = 512;
22
+ papagaio.recursion_limit = 512;
20
23
  ```
21
24
 
22
25
  ---
@@ -39,34 +42,86 @@ Output: `cherry, banana, apple`
39
42
 
40
43
  ---
41
44
 
42
- ## Whitespace Operators
45
+ ## Variables
43
46
 
44
- Papagaio provides flexible whitespace handling for variable capture.
47
+ Papagaio provides flexible variable capture with automatic context-aware behavior.
48
+
49
+ ### `$x` - Smart Variable
50
+ Automatically adapts based on context:
51
+ - **Before a block**: Captures everything until the block's opening delimiter
52
+ - **Before a literal**: Captures everything until that literal appears
53
+ - **Otherwise**: Captures a single word (non-whitespace token)
45
54
 
46
- ### `$x` - Single Word Variable
47
- Captures a single non-whitespace token.
48
55
  ```
49
56
  pattern {$x} {[$x]}
50
57
  hello world
51
58
  ```
52
59
  Output: `[hello]`
53
60
 
54
- ### `$$x` - Whitespace-Sensitive Variable
55
- Captures text including surrounding whitespace until the next significant token.
56
61
  ```
57
- pattern {$$x world} {[$x]}
58
- hello world
62
+ pattern {$name $block content {(}{)}} {$name: $content}
63
+ greeting (hello world)
59
64
  ```
60
- Output: `[hello ]`
65
+ Output: `greeting: hello world`
61
66
 
62
- ### `$$$x` - Optional Whitespace Variable
63
- Captures with optional whitespace (no error if empty).
64
67
  ```
65
- pattern {$$$x world} {<$x>}
68
+ pattern {$prefix:$suffix} {$suffix-$prefix}
69
+ key:value
70
+ ```
71
+ Output: `value-key`
72
+
73
+ ### `$x?` - Optional Variable
74
+ Same behavior as `$x`, but won't fail if empty or not found.
75
+
76
+ ```
77
+ pattern {$x? world} {<$x>}
66
78
  world
67
79
  ```
68
80
  Output: `<>`
69
81
 
82
+ ```
83
+ pattern {$greeting? $name} {Hello $name$greeting}
84
+ Hi John
85
+ ```
86
+ Output: `Hello JohnHi`
87
+
88
+ ---
89
+
90
+ ## Regex Matching
91
+
92
+ Capture content using JavaScript regular expressions.
93
+
94
+ ### Syntax
95
+ ```
96
+ $regex varName {pattern}
97
+ ```
98
+
99
+ ### Basic Example
100
+ ```
101
+ pattern {$regex num {[0-9]+}} {Number: $num}
102
+ The answer is 42
103
+ ```
104
+ Output: `Number: 42`
105
+
106
+ ### Complex Patterns
107
+ ```
108
+ pattern {$regex email {\w+@\w+\.\w+}} {Email found: $email}
109
+ Contact: user@example.com
110
+ ```
111
+ Output: `Email found: user@example.com`
112
+
113
+ ### Multiple Regex Variables
114
+ ```
115
+ pattern {$regex year {[0-9]{4}}-$regex month {[0-9]{2}}} {Month $month in $year}
116
+ 2024-03
117
+ ```
118
+ Output: `Month 03 in 2024`
119
+
120
+ ### Notes
121
+ - Regex patterns are cached for performance
122
+ - Matches are anchored at the current position (no searching ahead)
123
+ - Invalid regex patterns will cause the match to fail gracefully
124
+
70
125
  ---
71
126
 
72
127
  ## Blocks
@@ -191,13 +246,14 @@ pattern {$x} {$eval<<parseInt($x)*2>>}
191
246
  ```
192
247
  Output: `10`
193
248
 
249
+ ---
194
250
 
195
251
  ## Important Rules
196
252
 
197
- ### Matching
198
- * `$x` = one word (no whitespace)
199
- * `$$x` = captures text with optional surrounding whitespace
200
- * `$$$x` = captures text with optional surrounding whitespace, can be empty or not found
253
+ ### Variable Matching
254
+ * `$x` = smart capture (context-aware: word, until literal, or until block)
255
+ * `$x?` = optional version of `$x` (won't fail if empty)
256
+ * `$regex name {pattern}` = regex-based capture
201
257
  * Patterns apply globally until stable
202
258
  * Blocks support arbitrary nesting depth
203
259
 
@@ -207,7 +263,6 @@ Output: `10`
207
263
  * Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
208
264
 
209
265
  ### Whitespace Handling
210
- * Whitespace-optional tokens (`$$` alone) skip optional whitespace
211
266
  * Variables automatically skip leading whitespace when needed
212
267
  * Trailing whitespace is trimmed when variables appear before literals
213
268
 
@@ -241,12 +296,13 @@ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
241
296
 
242
297
  | Problem | Solution |
243
298
  |---------|----------|
244
- | Variable not captured | Check spacing and use appropriate whitespace operator (`$x`, `$$x`, `$$$x`) |
299
+ | Variable not captured | Check context: use `$x?` for optional, or verify literals/blocks exist |
245
300
  | Block mismatch | Verify opening and closing delimiters match the declaration |
246
301
  | Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
247
- | Pattern not matching | Add whitespace operators (`$$`) for multi-word content |
302
+ | Pattern not matching | Verify whitespace between tokens, check if variable should be optional |
248
303
  | Nested blocks fail | Ensure delimiters are properly balanced |
249
304
  | Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
305
+ | Regex not matching | Test regex pattern separately; ensure it matches at the exact position |
250
306
 
251
307
  ---
252
308
 
@@ -254,8 +310,8 @@ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
254
310
 
255
311
  ```
256
312
  pattern {$x $y} {$y, $x} # basic pattern with variables
257
- pattern {$$x $y} {$y, $x} # whitespace-sensitive capture
258
- pattern {$$$x $y} {$y, $x} # optional whitespace capture
313
+ pattern {$x? $y} {$y, $x} # optional variable
314
+ pattern {$regex n {[0-9]+}} {$n} # regex capture
259
315
  pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
260
316
  $pattern {a} {b} # subpattern (scoped to parent)
261
317
  $eval{code} # JavaScript evaluation
@@ -267,5 +323,6 @@ $eval{code} # JavaScript evaluation
267
323
 
268
324
  * Patterns apply recursively until no changes occur (up to `recursion_limit`)
269
325
  * Multi-character delimiter matching is optimized with regex escaping
326
+ * Regex patterns are automatically cached to improve performance
270
327
  * Nested blocks and subpatterns have no theoretical depth limit
271
328
  * Large recursion limits can impact performance on complex inputs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "papagaio",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "easy yet powerful preprocessor",
5
5
  "main": "src/papagaio.js",
6
6
  "type": "module",
package/src/papagaio.js CHANGED
@@ -1,319 +1,264 @@
1
- function parsePattern(papagaio, pattern) {
2
- const tokens = []; let i = 0;
3
- const S = papagaio.symbols.sigil, S2 = S + S;
4
- const blockKw = papagaio.symbols.block;
5
- while (i < pattern.length) {
6
- if (pattern.startsWith(S2 + S, i)) {
7
- let j = i + S2.length + S.length, varName = '';
8
- while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
9
- if (varName) { tokens.push({ type: 'var-ws-optional', varName }); i = j; continue; }
10
- }
11
- if (pattern.startsWith(S2, i)) {
12
- let j = i + S2.length, varName = '';
13
- while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
14
- if (varName) { tokens.push({ type: 'var-ws', varName }); i = j; continue; }
15
- tokens.push({ type: 'whitespace-optional' }); i += S2.length; continue;
1
+ function parsePattern(p, pat) {
2
+ const t = [], S = p.symbols.sigil, O = p.symbols.open;
3
+ let i = 0;
4
+
5
+ while (i < pat.length) {
6
+ if (pat.startsWith(S + p.symbols.regex, i)) {
7
+ let j = i + S.length + p.symbols.regex.length;
8
+ while (j < pat.length && /\s/.test(pat[j])) j++;
9
+ let v = '';
10
+ while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
11
+ if (v) {
12
+ while (j < pat.length && /\s/.test(pat[j])) j++;
13
+ if (pat[j] === O) {
14
+ const [rx, e] = extractBlock(p, pat, j);
15
+ t.push({ type: 'regex', varName: v, regex: rx.trim() });
16
+ i = e; continue;
17
+ }
18
+ }
16
19
  }
17
- if (pattern.startsWith(S + blockKw, i)) {
18
- let j = i + S.length + blockKw.length;
19
- while (j < pattern.length && /\s/.test(pattern[j])) j++;
20
- let varName = '';
21
- while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
22
- if (varName) {
23
- while (j < pattern.length && /\s/.test(pattern[j])) j++;
24
- let openDelim = papagaio.symbols.open;
25
- if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
26
- const [c, e] = extractBlock(papagaio, pattern, j);
27
- openDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.open;
28
- j = e; while (j < pattern.length && /\s/.test(pattern[j])) j++;
20
+ if (pat.startsWith(S + p.symbols.block, i)) {
21
+ let j = i + S.length + p.symbols.block.length;
22
+ while (j < pat.length && /\s/.test(pat[j])) j++;
23
+ let v = '';
24
+ while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
25
+ if (v) {
26
+ while (j < pat.length && /\s/.test(pat[j])) j++;
27
+ let od = O, cd = p.symbols.close;
28
+ if (pat[j] === O) {
29
+ const [c, e] = extractBlock(p, pat, j);
30
+ od = unescapeDelim(c.trim()) || O;
31
+ j = e; while (j < pat.length && /\s/.test(pat[j])) j++;
29
32
  }
30
- let closeDelim = papagaio.symbols.close;
31
- if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
32
- const [c, e] = extractBlock(papagaio, pattern, j);
33
- closeDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.close;
33
+ if (pat[j] === O) {
34
+ const [c, e] = extractBlock(p, pat, j);
35
+ cd = unescapeDelim(c.trim()) || cd;
34
36
  j = e;
35
37
  }
36
- tokens.push({ type: 'block', varName, openDelim, closeDelim }); i = j; continue;
38
+ t.push({ type: 'block', varName: v, open: od, close: cd });
39
+ i = j; continue;
37
40
  }
38
41
  }
39
- if (pattern[i] === S) {
40
- let j = i + S.length, varName = '';
41
- while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
42
- if (varName) { tokens.push({ type: 'var', varName }); i = j; continue; }
43
- tokens.push({ type: 'literal', value: S }); i += S.length; continue;
42
+ if (pat[i] === S) {
43
+ let j = i + S.length, v = '';
44
+ while (j < pat.length && /[A-Za-z0-9_]/.test(pat[j])) v += pat[j++];
45
+ if (v) {
46
+ const optional = pat[j] === '?';
47
+ if (optional) j++;
48
+ t.push({ type: 'var', varName: v, optional });
49
+ i = j;
50
+ continue;
51
+ }
52
+ t.push({ type: 'lit', value: S }); i += S.length; continue;
44
53
  }
45
- if (/\s/.test(pattern[i])) {
46
- while (i < pattern.length && /\s/.test(pattern[i])) i++;
47
- tokens.push({ type: 'whitespace-optional' }); continue;
54
+ if (/\s/.test(pat[i])) {
55
+ while (i < pat.length && /\s/.test(pat[i])) i++;
56
+ t.push({ type: 'ws' }); continue;
48
57
  }
49
- let literal = '';
50
- while (i < pattern.length && !pattern.startsWith(S, i) && !/\s/.test(pattern[i])) literal += pattern[i++];
51
- if (literal) tokens.push({ type: 'literal', value: literal });
58
+ let lit = '';
59
+ while (i < pat.length && pat[i] !== S && !/\s/.test(pat[i])) lit += pat[i++];
60
+ if (lit) t.push({ type: 'lit', value: lit });
52
61
  }
53
- return tokens;
62
+ return t;
54
63
  }
55
64
 
56
- function matchPattern(papagaio, src, tokens, startPos = 0) {
57
- let pos = startPos, captures = {};
65
+ function matchPattern(p, src, tokens, pos = 0) {
66
+ let cap = {};
58
67
  for (let ti = 0; ti < tokens.length; ti++) {
59
- const token = tokens[ti];
60
- if (token.type === 'whitespace-optional') { while (pos < src.length && /\s/.test(src[pos])) pos++; continue; }
61
- if (token.type === 'literal') { if (!src.startsWith(token.value, pos)) return null; pos += token.value.length; continue; }
62
- if (token.type === 'var') {
63
- const nextToken = findNextSignificantToken(tokens, ti);
64
- let v = '';
65
-
66
- if (nextToken && nextToken.type === 'block') {
67
- while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) {
68
- v += src[pos++];
68
+ const tok = tokens[ti];
69
+ if (tok.type === 'ws') { while (pos < src.length && /\s/.test(src[pos])) pos++; continue; }
70
+ if (tok.type === 'lit') { if (!src.startsWith(tok.value, pos)) return null; pos += tok.value.length; continue; }
71
+ if (tok.type === 'regex') {
72
+ try {
73
+ let regex = p._regexCache.get(tok.regex);
74
+ if (!regex) {
75
+ regex = new RegExp(tok.regex);
76
+ p._regexCache.set(tok.regex, regex);
69
77
  }
70
- } else {
71
- while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
72
- }
73
-
74
- if (!v) return null;
75
- captures[papagaio.symbols.sigil + token.varName] = v;
78
+ const m = src.slice(pos).match(regex);
79
+ if (!m || m.index !== 0) return null;
80
+ cap[p.symbols.sigil + tok.varName] = m[0];
81
+ pos += m[0].length;
82
+ } catch { return null; }
76
83
  continue;
77
84
  }
78
- if (token.type === 'var-ws' || token.type === 'var-ws-optional') {
85
+ if (tok.type === 'var') {
79
86
  while (pos < src.length && /\s/.test(src[pos])) pos++;
80
- const n = findNextSignificantToken(tokens, ti);
87
+ const nx = findNext(tokens, ti);
81
88
  let v = '';
82
-
83
- if (n && n.type === 'block') {
84
- while (pos < src.length && !src.startsWith(n.openDelim, pos) && src[pos] !== '\n') {
85
- v += src[pos++];
86
- }
89
+ if (nx?.type === 'block') {
90
+ while (pos < src.length && !src.startsWith(nx.open, pos) && src[pos] !== '\n') v += src[pos++];
87
91
  v = v.trimEnd();
88
- } else if (!n || ['var', 'var-ws', 'var-ws-optional'].includes(n.type)) {
89
- while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
90
- } else if (n.type === 'literal') {
91
- while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
92
+ } else if (nx?.type === 'lit') {
93
+ while (pos < src.length && !src.startsWith(nx.value, pos) && src[pos] !== '\n') v += src[pos++];
92
94
  v = v.trimEnd();
95
+ } else {
96
+ while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
93
97
  }
94
-
95
- if (token.type === 'var-ws' && !v) return null;
96
- captures[papagaio.symbols.sigil + token.varName] = v;
98
+ if (!v && !tok.optional) return null;
99
+ cap[p.symbols.sigil + tok.varName] = v;
97
100
  continue;
98
101
  }
99
- if (token.type === 'block') {
100
- const { varName, openDelim, closeDelim } = token;
101
- if (!src.startsWith(openDelim, pos)) return null;
102
- const [c, e] = extractBlock(papagaio, src, pos, openDelim, closeDelim);
103
- captures[papagaio.symbols.sigil + varName] = c; pos = e; continue;
102
+ if (tok.type === 'block') {
103
+ if (!src.startsWith(tok.open, pos)) return null;
104
+ const [c, e] = extractBlock(p, src, pos, tok.open, tok.close);
105
+ cap[p.symbols.sigil + tok.varName] = c; pos = e; continue;
104
106
  }
105
107
  }
106
- return { captures, endPos: pos };
108
+ return { captures: cap, endPos: pos };
107
109
  }
108
110
 
109
- function findNextSignificantToken(t, i) { for (let k = i + 1; k < t.length; k++) if (t[k].type !== 'whitespace-optional') return t[k]; return null; }
111
+ function findNext(t, i) { for (let k = i + 1; k < t.length; k++) if (t[k].type !== 'ws') return t[k]; return null; }
110
112
 
111
- function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
112
- let i = openPos;
113
- if (openDelim.length > 1 || closeDelim.length > 1) {
114
- if (src.substring(i, i + openDelim.length) === openDelim) {
115
- i += openDelim.length; const s = i; let d = 0;
113
+ function extractBlock(p, src, i, od = p.symbols.open, cd = p.symbols.close) {
114
+ if (od.length > 1 || cd.length > 1) {
115
+ if (src.substring(i, i + od.length) === od) {
116
+ i += od.length; const s = i; let d = 0;
116
117
  while (i < src.length) {
117
- if (src.substring(i, i + openDelim.length) === openDelim) { d++; i += openDelim.length; }
118
- else if (src.substring(i, i + closeDelim.length) === closeDelim) {
119
- if (!d) return [src.substring(s, i), i + closeDelim.length];
120
- d--; i += closeDelim.length;
118
+ if (src.substring(i, i + od.length) === od) { d++; i += od.length; }
119
+ else if (src.substring(i, i + cd.length) === cd) {
120
+ if (!d) return [src.substring(s, i), i + cd.length];
121
+ d--; i += cd.length;
121
122
  } else i++;
122
123
  }
123
124
  return [src.substring(s), src.length];
124
125
  }
125
126
  }
126
- if (src[i] === openDelim) {
127
+ if (src[i] === od) {
127
128
  i++; const s = i;
128
- if (openDelim === closeDelim) { while (i < src.length && src[i] !== closeDelim) i++; return [src.substring(s, i), i + 1]; }
129
+ if (od === cd) { while (i < src.length && src[i] !== cd) i++; return [src.substring(s, i), i + 1]; }
129
130
  let d = 1;
130
- while (i < src.length && d > 0) { if (src[i] === openDelim) d++; else if (src[i] === closeDelim) d--; if (d > 0) i++; }
131
+ while (i < src.length && d > 0) { if (src[i] === od) d++; else if (src[i] === cd) d--; if (d > 0) i++; }
131
132
  return [src.substring(s, i), i + 1];
132
133
  }
133
134
  return ['', i];
134
135
  }
135
136
 
136
- function collectPatterns(p, src) {
137
- const A = [];
138
- const r = new RegExp(`(?:^|\\b)${escapeRegex(p.symbols.pattern)}\\s*${escapeRegex(p.symbols.open)}`, "g");
137
+ function collectPats(p, src) {
138
+ const arr = [];
139
+ const rx = new RegExp(`(?:^|\\b)${esc(p.symbols.pattern)}\\s*${esc(p.symbols.open)}`, "g");
139
140
  let out = src;
140
-
141
141
  while (1) {
142
- r.lastIndex = 0; const m = r.exec(out); if (!m) break;
142
+ rx.lastIndex = 0; const m = rx.exec(out); if (!m) break;
143
143
  const s = m.index, o = m.index + m[0].length - p.symbols.open.length;
144
144
  const [mp, em] = extractBlock(p, out, o); let k = em;
145
145
  while (k < out.length && /\s/.test(out[k])) k++;
146
146
  if (k < out.length && out.substring(k, k + p.symbols.open.length) === p.symbols.open) {
147
147
  const [rp, er] = extractBlock(p, out, k);
148
- A.push({ match: mp.trim(), replace: rp.trim() });
148
+ arr.push({ m: mp.trim(), r: rp.trim() });
149
149
  out = out.slice(0, s) + out.slice(er); continue;
150
150
  }
151
151
  out = out.slice(0, s) + out.slice(em);
152
152
  }
153
- return [A, out];
153
+ return [arr, out];
154
154
  }
155
155
 
156
- function extractNestedPatterns(p, replaceText) {
157
- const nested = [];
158
- const r = new RegExp(`${escapeRegex(p.symbols.sigil)}${escapeRegex(p.symbols.pattern)}\\s*${escapeRegex(p.symbols.open)}`, "g");
159
- let out = replaceText;
160
-
156
+ function extractNested(p, txt) {
157
+ const n = [];
158
+ const rx = new RegExp(`${esc(p.symbols.sigil)}${esc(p.symbols.pattern)}\\s*${esc(p.symbols.open)}`, "g");
159
+ let out = txt;
161
160
  while (1) {
162
- r.lastIndex = 0;
163
- const m = r.exec(out);
164
- if (!m) break;
165
-
161
+ rx.lastIndex = 0; const m = rx.exec(out); if (!m) break;
166
162
  const s = m.index, o = m.index + m[0].length - p.symbols.open.length;
167
- const [mp, em] = extractBlock(p, out, o);
168
- let k = em;
169
-
163
+ const [mp, em] = extractBlock(p, out, o); let k = em;
170
164
  while (k < out.length && /\s/.test(out[k])) k++;
171
-
172
165
  if (k < out.length && out.substring(k, k + p.symbols.open.length) === p.symbols.open) {
173
166
  const [rp, er] = extractBlock(p, out, k);
174
- nested.push({ match: mp.trim(), replace: rp.trim() });
175
- out = out.slice(0, s) + out.slice(er);
176
- continue;
167
+ n.push({ m: mp.trim(), r: rp.trim() });
168
+ out = out.slice(0, s) + out.slice(er); continue;
177
169
  }
178
170
  out = out.slice(0, s) + out.slice(em);
179
171
  }
180
-
181
- return [nested, out];
172
+ return [n, out];
182
173
  }
183
174
 
184
- function extractEvalExpressions(p, text) {
185
- const evals = [];
186
- const S = p.symbols.sigil;
187
- const O = p.symbols.open;
188
- const C = p.symbols.close;
189
- const evalKeyword = p.symbols.eval;
190
-
191
- let i = 0;
192
- let out = text;
193
- let offset = 0;
194
-
195
- while (i < text.length) {
196
- if (text.substring(i, i + S.length) === S &&
197
- text.substring(i + S.length, i + S.length + evalKeyword.length) === evalKeyword) {
198
-
199
- let j = i + S.length + evalKeyword.length;
200
-
201
- while (j < text.length && /\s/.test(text[j])) j++;
202
-
203
- if (j < text.length && text.substring(j, j + O.length) === O) {
204
- const startPos = i;
205
- const blockStart = j;
206
-
207
- const [content, endPos] = extractBlock(p, text, blockStart, O, C);
208
-
209
- evals.push({
210
- fullMatch: text.substring(startPos, endPos),
211
- code: content,
212
- startPos: startPos - offset,
213
- endPos: endPos - offset
214
- });
215
-
216
- const before = out.substring(0, startPos - offset);
217
- const after = out.substring(endPos - offset);
218
- const placeholder = `__EVAL_${evals.length - 1}__`;
219
- out = before + placeholder + after;
220
-
221
- offset += (endPos - startPos) - placeholder.length;
222
- i = endPos;
223
- continue;
175
+ function extractEvals(p, txt) {
176
+ const ev = [], S = p.symbols.sigil, O = p.symbols.open;
177
+ let i = 0, out = txt, off = 0;
178
+ while (i < txt.length) {
179
+ if (txt.substring(i, i + S.length) === S && txt.substring(i + S.length, i + S.length + p.symbols.eval.length) === p.symbols.eval) {
180
+ let j = i + S.length + p.symbols.eval.length;
181
+ while (j < txt.length && /\s/.test(txt[j])) j++;
182
+ if (j < txt.length && txt.substring(j, j + O.length) === O) {
183
+ const sp = i, bp = j;
184
+ const [c, ep] = extractBlock(p, txt, bp);
185
+ ev.push({ code: c, sp: sp - off, ep: ep - off });
186
+ const ph = `__E${ev.length - 1}__`;
187
+ out = out.substring(0, sp - off) + ph + out.substring(ep - off);
188
+ off += (ep - sp) - ph.length; i = ep; continue;
224
189
  }
225
190
  }
226
191
  i++;
227
192
  }
228
-
229
- return [evals, out];
193
+ return [ev, out];
230
194
  }
231
195
 
232
- function applyEvalExpressions(p, text, evals) {
233
- let result = text;
234
- for (let i = evals.length - 1; i >= 0; i--) {
235
- const placeholder = `__EVAL_${i}__`;
236
- let evalResult;
237
- try {
238
- evalResult = String(Function("papagaio", "ctx", `"use strict";return(function(){${evals[i].code}})();`)(p, {}));
239
- } catch (e) {
240
- evalResult = "javascript error: " + e.message;
241
- }
242
- result = result.replace(placeholder, evalResult);
196
+ function applyEvals(p, txt, ev) {
197
+ let r = txt;
198
+ for (let i = ev.length - 1; i >= 0; i--) {
199
+ const ph = `__E${i}__`;
200
+ let res;
201
+ try { res = String(Function("papagaio", "ctx", `"use strict";return(function(){${ev[i].code}})();`)(p, {})); }
202
+ catch (e) { res = "error: " + e.message; }
203
+ r = r.replace(ph, res);
243
204
  }
244
- return result;
205
+ return r;
245
206
  }
246
207
 
247
- function applyPatterns(p, src, pats) {
248
- let clear = false, last = "", S = p.symbols.sigil;
208
+ function applyPats(p, src, pats) {
209
+ let last = "", S = p.symbols.sigil;
249
210
  for (const pat of pats) {
250
- const t = parsePattern(p, pat.match); let n = '', pos = 0, ok = false;
211
+ const tok = parsePattern(p, pat.m);
212
+ let n = '', pos = 0, ok = false;
251
213
  while (pos < src.length) {
252
- const m = matchPattern(p, src, t, pos);
214
+ const m = matchPattern(p, src, tok, pos);
253
215
  if (m) {
254
- ok = true; const { captures, endPos } = m;
255
- let r = pat.replace;
256
-
257
- const [nestedPats, cleanReplace] = extractNestedPatterns(p, r);
258
- r = cleanReplace;
259
-
260
- for (const [k, v] of Object.entries(captures)) {
261
- const e = escapeRegex(k); r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
216
+ ok = true;
217
+ let r = pat.r;
218
+ const [nested, clean] = extractNested(p, r);
219
+ r = clean;
220
+ for (const [k, v] of Object.entries(m.captures)) {
221
+ r = r.replace(new RegExp(esc(k) + '(?![A-Za-z0-9_])', 'g'), v);
262
222
  }
263
-
264
- if (nestedPats.length > 0) r = applyPatterns(p, r, nestedPats);
265
-
266
- p.match = src.slice(pos, endPos);
267
-
268
- const [evals, cleanText] = extractEvalExpressions(p, r);
269
- if (evals.length > 0) {
270
- r = applyEvalExpressions(p, cleanText, evals);
271
- }
272
-
273
- n += r; last = r; pos = endPos;
223
+ if (nested.length) r = applyPats(p, r, nested);
224
+ p.match = src.slice(pos, m.endPos);
225
+ const [ev, ct] = extractEvals(p, r);
226
+ if (ev.length) r = applyEvals(p, ct, ev);
227
+ n += r; last = r; pos = m.endPos;
274
228
  } else { n += src[pos]; pos++; }
275
229
  }
276
- if (ok) { src = clear ? last : n; clear = false; }
230
+ if (ok) src = n;
277
231
  }
278
232
  return src;
279
233
  }
280
234
 
281
- function escapeRegex(s) { return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&'); }
282
-
283
- function unescapeDelimiter(s) {
284
- let r = ''; for (let i = 0; i < s.length; i++) {
285
- if (s[i] === '\\' && i + 1 < s.length) {
286
- const n = s[i + 1];
287
- if (n === '"' || n === "'" || n === '\\') { r += n; i++; }
288
- else r += s[i];
289
- } else r += s[i];
235
+ function esc(s) { return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&'); }
236
+ function unescapeDelim(s) {
237
+ let r = '';
238
+ for (let i = 0; i < s.length; i++) {
239
+ if (s[i] === '\\' && i + 1 < s.length && (s[i+1] === '"' || s[i+1] === "'" || s[i+1] === '\\')) { r += s[i+1]; i++; }
240
+ else r += s[i];
290
241
  }
291
242
  return r;
292
243
  }
293
244
 
294
245
  export class Papagaio {
295
- constructor(sigil = '$', open = '{', close = '}', pattern = 'pattern', evalKeyword = 'eval', blockKeyword = 'block') {
246
+ constructor(sigil = '$', open = '{', close = '}', pattern = 'pattern', evalKeyword = 'eval', blockKeyword = 'block', regexKeyword = 'regex') {
296
247
  this.recursion_limit = 512;
297
- this.symbols = {
298
- pattern: pattern,
299
- open: open,
300
- close: close,
301
- sigil: sigil,
302
- eval: evalKeyword,
303
- block: blockKeyword
304
- };
248
+ this.symbols = { pattern, open, close, sigil, eval: evalKeyword, block: blockKeyword, regex: regexKeyword };
305
249
  this.content = "";
250
+ this.match = "";
251
+ this._regexCache = new Map();
306
252
  }
307
253
  process(input) {
308
- this.content = input; let src = input, last = null, it = 0;
309
- const pend = () => {
310
- const r2 = new RegExp(`(?:^|\\b)${escapeRegex(this.symbols.pattern)}\\s*${escapeRegex(this.symbols.open)}`, "g");
311
- return r2.test(src);
312
- };
254
+ this.content = input;
255
+ let src = input, last = null, it = 0;
256
+ const pending = () => new RegExp(`(?:^|\\b)${esc(this.symbols.pattern)}\\s*${esc(this.symbols.open)}`, "g").test(src);
313
257
  while (src !== last && it < this.recursion_limit) {
314
258
  it++; last = src;
315
- const [p, s2] = collectPatterns(this, src); src = applyPatterns(this, s2, p);
316
- if (!pend()) break;
259
+ const [p, s2] = collectPats(this, src);
260
+ src = applyPats(this, s2, p);
261
+ if (!pending()) break;
317
262
  }
318
263
  return this.content = src, src;
319
264
  }
package/tests/tests.json CHANGED
@@ -8,8 +8,8 @@
8
8
  },
9
9
  {
10
10
  "id": 2,
11
- "name": "Flexible whitespace with $$",
12
- "code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
11
+ "name": "Flexible whitespace with $",
12
+ "code": "pattern {$x and $y} {$x & $y}\nhello and world",
13
13
  "expected": "hello & world"
14
14
  },
15
15
  {
@@ -80,8 +80,8 @@
80
80
  },
81
81
  {
82
82
  "id": 14,
83
- "name": "Optional whitespace with $$ optional",
84
- "code": "pattern {hello$$world} {HI}\nhello\n\nworld",
83
+ "name": "Optional whitespace with $ optional",
84
+ "code": "pattern {hello$world} {HI}\nhello\n\nworld",
85
85
  "expected": "HI"
86
86
  },
87
87
  {
@@ -111,7 +111,7 @@
111
111
  {
112
112
  "id": 19,
113
113
  "name": "Pattern without whitespace matching",
114
- "code": "pattern {$$a,$b} {$b,$a} one,two",
114
+ "code": "pattern {$a,$b} {$b,$a} one,two",
115
115
  "expected": "two,one"
116
116
  },
117
117
  {