papagaio 0.2.8 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,17 +46,6 @@ apple banana cherry
46
46
  ```
47
47
  Output: `cherry, banana, apple`
48
48
 
49
- ### 3. Flexible Whitespace (`$$`)
50
-
51
- ```
52
- pattern {$x$$and$$$y} {$x & $y}
53
- hello and world
54
- hello and world
55
- ```
56
- Output: `hello & world` (both)
57
-
58
- `$$` = zero or more spaces/tabs/newlines.
59
-
60
49
  ## Blocks
61
50
 
62
51
  Capture content between delimiters.
@@ -266,9 +255,13 @@ Output:
266
255
 
267
256
  ### Matching
268
257
  - Variables (`$x`) capture **one word** (no spaces)
269
- - `$$` = flexible whitespace (0+ spaces/tabs/newlines)
258
+ - Variables (`$$x`) captures one or more words (with spaces)
270
259
  - Patterns apply **globally** each iteration
271
260
  - Auto-recursion until: max 512 iterations OR no changes
261
+ - `$ ` = one or more of this whitespace (spaces, tabs, newlines)
262
+ - `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
263
+ - `$$$ `= one or more whitespaces
264
+ - `$$$$ `= zero or more whitespaces
272
265
 
273
266
  ### Block Matching
274
267
  - `$block name {open}{close}` captures between delimiters
@@ -280,8 +273,11 @@ Output:
280
273
  - Reuse: `$x` appears multiple times in replace
281
274
  - Undefined: becomes empty string
282
275
 
283
- ### Sigil
284
- - You cannot match words containing the sigil character.
276
+ ### Limitations
277
+ - You cannot match words containing the current sigil character.
278
+ - You cannot match a $block{}{} using the current delimiters.
279
+ - By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
280
+ - Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
285
281
 
286
282
  ---
287
283
 
@@ -289,11 +285,13 @@ Output:
289
285
 
290
286
  | Problem | Solution |
291
287
  |---------|----------|
292
- | Pattern doesn't match | Use `$$` between elements for flexible whitespace |
293
288
  | Variable not captured | Check space between variables |
294
289
  | Block not working | Verify balanced delimiters `{` `}` |
295
290
  | Infinite recursion | Use `$clear` or reduce `recursion_limit` |
296
291
  | $eval not working | Errors return empty string, use try-catch |
292
+ | Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
293
+ | Whitespace operators | Remember they need a whitespace after them to work properly |
294
+ | Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
297
295
 
298
296
  ## Known Bugs
299
297
 
@@ -305,7 +303,7 @@ Output:
305
303
 
306
304
  ```
307
305
  pattern {$x $y} {$y, $x} # basic pattern
308
- pattern {$x$$y} {$x-$y} # flexible whitespace
306
+ pattern {$x$ $y} {$x-$y} # flexible whitespace
309
307
  pattern {$block n {o}{c}} {$n} # block
310
308
  context { ... } # recursive scope
311
309
  $unique # unique ID per pattern
@@ -313,6 +311,7 @@ $match # full match
313
311
  $prefix / $suffix # before/after
314
312
  $clear # clear before
315
313
  $eval{code} # execute JS
314
+ $ / $$ / $$$ / $$$$ # whitespace operators
316
315
  ```
317
316
 
318
317
  ---
@@ -0,0 +1,15 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>papagaio test</title>
7
+ </head>
8
+ <script src="src/papagaio-bootstrap.mjs" type="module"></script>
9
+ <script type="papagaio">
10
+ pattern {abc} {$eval{console.log(papagaio)}}
11
+ abc
12
+ </script>
13
+ <body>
14
+ </body>
15
+ </html>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "papagaio",
3
- "version": "0.2.8",
3
+ "version": "0.4.1",
4
4
  "description": "easy yet powerful preprocessor",
5
5
  "main": "src/papagaio.js",
6
6
  "type": "module",
@@ -0,0 +1,25 @@
1
+ // papagaio-bootstrap.js
2
+ import { Papagaio } from "./papagaio.js";
3
+
4
+ (async () => {
5
+ const p = new Papagaio();
6
+
7
+ const nodes = [...document.querySelectorAll('script[type="papagaio"]')];
8
+
9
+ for (const el of nodes) {
10
+ let src = el.textContent;
11
+
12
+ if (el.src) {
13
+ src = await fetch(el.src).then(r => r.text());
14
+ }
15
+
16
+ const out = p.process(src);
17
+
18
+ const s = document.createElement("script");
19
+ s.type = "module";
20
+ s.textContent = out;
21
+
22
+ // executa no mesmo ponto onde script estava
23
+ el.replaceWith(s);
24
+ }
25
+ })();
package/src/papagaio.js CHANGED
@@ -1,95 +1,88 @@
1
1
  // https://github.com/jardimdanificado/papagaio
2
-
3
- function processContext(papagaio, src) {
4
- const ctxRe = new RegExp(`\\b${papagaio.symbols.context}\\s*\\${papagaio.symbols.open}`, "g");
5
- let m, matches = [];
6
- while ((m = ctxRe.exec(src)) !== null)
7
- matches.push({ idx: m.index, pos: m.index + m[0].length - 1 });
8
- for (let j = matches.length - 1; j >= 0; j--) {
9
- const x = matches[j], [content, posAfter] = extractBlock(papagaio, src, x.pos);
10
- if (!content.trim()) {
11
- src = src.slice(0, x.idx) + src.slice(posAfter);
2
+
3
+ function parsePattern(papagaio, pattern) {
4
+ const tokens = [], S = papagaio.symbols.sigil, S2 = S + S;
5
+ let i = 0;
6
+ const isWhitespaceChar = c => /\s/.test(c);
7
+ const getWhitespaceType = c => c === ' ' ? 'space' : c === '\t' ? 'tab' : c === '\n' ? 'newline' : c === '\r' ? 'carriage-return' : 'other';
8
+ while (i < pattern.length) {
9
+ if (pattern.startsWith(S + S + S, i) && i + 3 < pattern.length && isWhitespaceChar(pattern[i + 3])) {
10
+ tokens.push({ type: 'any-ws-required', wsChar: pattern[i + 3] });
11
+ i += 4;
12
12
  continue;
13
13
  }
14
- const proc = papagaio.process(content);
15
- let left = src.substring(0, x.idx), right = src.substring(posAfter);
16
- let prefix = left.endsWith("\n") ? "\n" : "";
17
- if (prefix) left = left.slice(0, -1);
18
- src = left + prefix + proc + right;
19
- }
20
- return src;
21
- }
22
-
23
- function extractBlock(papagaio, src, openPos, openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close) {
24
- let i = openPos;
25
- if (openDelim.length > 1 || closeDelim.length > 1) {
26
- if (src.substring(i, i + openDelim.length) === openDelim) {
27
- i += openDelim.length;
28
- const innerStart = i;
29
- let d = 0;
30
- while (i < src.length) {
31
- if (src.substring(i, i + openDelim.length) === openDelim) {
32
- d++;
33
- i += openDelim.length;
34
- } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
35
- if (d === 0) return [src.substring(innerStart, i), i + closeDelim.length];
36
- d--;
37
- i += closeDelim.length;
38
- } else i++;
39
- }
40
- return [src.substring(innerStart), src.length];
14
+ if (pattern.startsWith(S + S + S + S, i) && i + 4 < pattern.length && isWhitespaceChar(pattern[i + 4])) {
15
+ tokens.push({ type: 'any-ws-optional', wsChar: pattern[i + 4] });
16
+ i += 5;
17
+ continue;
41
18
  }
42
- }
43
- if (src[i] === openDelim) {
44
- i++;
45
- const innerStart = i;
46
- if (openDelim === closeDelim) {
47
- while (i < src.length && src[i] !== closeDelim) i++;
48
- return [src.substring(innerStart, i), i + 1];
49
- } else {
50
- let depth = 1;
51
- while (i < src.length && depth > 0) {
52
- if (src[i] === openDelim) depth++;
53
- else if (src[i] === closeDelim) depth--;
54
- if (depth > 0) i++;
19
+ if (pattern.startsWith(S2, i)) {
20
+ let j = i + S2.length, varName = '';
21
+ while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
22
+ if (varName) {
23
+ if (j < pattern.length && isWhitespaceChar(pattern[j])) {
24
+ tokens.push({ type: 'var-ws', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
25
+ i = j + 1;
26
+ continue;
27
+ } else if (j < pattern.length && pattern[j] === S) {
28
+ tokens.push({ type: 'var-ws', varName, wsTrailing: 'optional', wsChar: null });
29
+ i = j + 1;
30
+ continue;
31
+ } else {
32
+ tokens.push({ type: 'var-ws', varName });
33
+ i = j;
34
+ continue;
35
+ }
55
36
  }
56
- return [src.substring(innerStart, i), i + 1];
57
37
  }
58
- }
59
- return ['', i];
60
- }
61
-
62
- function parsePattern(papagaio, pattern) {
63
- const tokens = [];
64
- let i = 0;
65
- const S = papagaio.symbols.sigil, S2 = S + S;
66
- while (i < pattern.length) {
38
+ if (pattern.startsWith(S2, i) && i + 2 < pattern.length && isWhitespaceChar(pattern[i + 2])) {
39
+ tokens.push({ type: 'ws-optional', wsType: getWhitespaceType(pattern[i + 2]), wsChar: pattern[i + 2] });
40
+ i += 3;
41
+ continue;
42
+ }
67
43
  if (pattern.startsWith(S2, i)) {
68
44
  tokens.push({ type: 'whitespace-optional' });
69
45
  i += S2.length;
70
46
  continue;
71
47
  }
48
+ if (pattern[i] === S && i + 1 < pattern.length && isWhitespaceChar(pattern[i + 1])) {
49
+ tokens.push({ type: 'ws-required', wsType: getWhitespaceType(pattern[i + 1]), wsChar: pattern[i + 1] });
50
+ i += 2;
51
+ continue;
52
+ }
72
53
  if (pattern.startsWith(S + 'block', i)) {
73
- let j = i + S.length + 'block'.length;
54
+ let j = i + S.length + 5;
74
55
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
75
56
  let varName = '';
76
57
  while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
77
58
  if (varName) {
78
59
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
79
- let openDelim = papagaio.symbols.open;
60
+ let openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close;
61
+ let openDelimIsWs = false, closeDelimIsWs = false;
80
62
  if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
81
63
  const [c, e] = extractBlock(papagaio, pattern, j);
82
- openDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.open;
64
+ const trimmed = c.trim();
65
+ if (trimmed === '') {
66
+ openDelimIsWs = true;
67
+ let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
68
+ while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
69
+ openDelim = pattern.substring(wsStart, wsEnd);
70
+ } else openDelim = unescapeDelimiter(trimmed) || papagaio.symbols.open;
83
71
  j = e;
84
72
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
85
73
  }
86
- let closeDelim = papagaio.symbols.close;
87
74
  if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
88
75
  const [c, e] = extractBlock(papagaio, pattern, j);
89
- closeDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.close;
76
+ const trimmed = c.trim();
77
+ if (trimmed === '') {
78
+ closeDelimIsWs = true;
79
+ let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
80
+ while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
81
+ closeDelim = pattern.substring(wsStart, wsEnd);
82
+ } else closeDelim = unescapeDelimiter(trimmed) || papagaio.symbols.close;
90
83
  j = e;
91
84
  }
92
- tokens.push({ type: 'block', varName, openDelim, closeDelim });
85
+ tokens.push({ type: 'block', varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs });
93
86
  i = j;
94
87
  continue;
95
88
  }
@@ -98,210 +91,393 @@ function parsePattern(papagaio, pattern) {
98
91
  let j = i + S.length, varName = '';
99
92
  while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
100
93
  if (varName) {
101
- tokens.push({ type: 'var', varName });
102
- i = j;
103
- continue;
94
+ if (j < pattern.length && isWhitespaceChar(pattern[j])) {
95
+ tokens.push({ type: 'var', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
96
+ i = j + 1;
97
+ continue;
98
+ } else if (j < pattern.length && pattern[j] === S) {
99
+ tokens.push({ type: 'var', varName, wsTrailing: 'optional', wsChar: null });
100
+ i = j + 1;
101
+ continue;
102
+ } else {
103
+ tokens.push({ type: 'var', varName });
104
+ i = j;
105
+ continue;
106
+ }
104
107
  }
105
- tokens.push({ type: 'literal', value: S });
106
- i += S.length;
107
- continue;
108
108
  }
109
- if (/\s/.test(pattern[i])) {
109
+ if (isWhitespaceChar(pattern[i])) {
110
110
  let ws = '';
111
- while (i < pattern.length && /\s/.test(pattern[i])) ws += pattern[i++];
112
- tokens.push({ type: 'whitespace', value: ws });
111
+ while (i < pattern.length && isWhitespaceChar(pattern[i])) ws += pattern[i++];
112
+ tokens.push({ type: 'literal-ws', value: ws });
113
113
  continue;
114
114
  }
115
115
  let literal = '';
116
- while (i < pattern.length && !pattern.startsWith(S, i) && !/\s/.test(pattern[i])) literal += pattern[i++];
116
+ while (i < pattern.length && !pattern.startsWith(S, i) && !isWhitespaceChar(pattern[i])) literal += pattern[i++];
117
117
  if (literal) tokens.push({ type: 'literal', value: literal });
118
118
  }
119
119
  return tokens;
120
120
  }
121
121
 
122
122
  function matchPattern(papagaio, src, tokens, startPos = 0) {
123
- let pos = startPos;
124
- const captures = {};
123
+ let pos = startPos, captures = {};
124
+ const matchWhitespaceType = (str, idx, wsType) => {
125
+ if (idx >= str.length) return { matched: '', newPos: idx };
126
+ if (wsType === 'space' && str[idx] === ' ') {
127
+ let j = idx;
128
+ while (j < str.length && str[j] === ' ') j++;
129
+ return { matched: str.slice(idx, j), newPos: j };
130
+ }
131
+ if (wsType === 'tab' && str[idx] === '\t') {
132
+ let j = idx;
133
+ while (j < str.length && str[j] === '\t') j++;
134
+ return { matched: str.slice(idx, j), newPos: j };
135
+ }
136
+ if (wsType === 'newline' && str[idx] === '\n') {
137
+ let j = idx;
138
+ while (j < str.length && str[j] === '\n') j++;
139
+ return { matched: str.slice(idx, j), newPos: j };
140
+ }
141
+ return { matched: '', newPos: idx };
142
+ };
125
143
  for (let ti = 0; ti < tokens.length; ti++) {
126
144
  const token = tokens[ti];
127
- if (token.type === 'whitespace-optional') {
128
- while (pos < src.length && /\s/.test(src[pos])) pos++;
145
+ if (token.type === 'literal-ws') {
146
+ if (!src.startsWith(token.value, pos)) return null;
147
+ pos += token.value.length;
129
148
  continue;
130
149
  }
131
- if (token.type === 'whitespace') {
150
+ if (token.type === 'ws-required') {
151
+ const { matched, newPos } = matchWhitespaceType(src, pos, token.wsType);
152
+ if (!matched) return null;
153
+ pos = newPos;
154
+ continue;
155
+ }
156
+ if (token.type === 'ws-optional') {
157
+ const { newPos } = matchWhitespaceType(src, pos, token.wsType);
158
+ pos = newPos;
159
+ continue;
160
+ }
161
+ if (token.type === 'any-ws-required') {
132
162
  if (pos >= src.length || !/\s/.test(src[pos])) return null;
133
163
  while (pos < src.length && /\s/.test(src[pos])) pos++;
134
164
  continue;
135
165
  }
166
+ if (token.type === 'any-ws-optional') {
167
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
168
+ continue;
169
+ }
170
+ if (token.type === 'whitespace-optional') {
171
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
172
+ continue;
173
+ }
136
174
  if (token.type === 'literal') {
137
175
  if (!src.startsWith(token.value, pos)) return null;
138
176
  pos += token.value.length;
139
177
  continue;
140
178
  }
141
179
  if (token.type === 'var') {
142
- const nextToken = ti + 1 < tokens.length ? tokens[ti + 1] : null;
143
- let varValue = '';
144
- if (nextToken) {
145
- if (nextToken.type === 'whitespace' || nextToken.type === 'whitespace-optional') {
146
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
147
- } else if (nextToken.type === 'literal') {
148
- const stopChar = nextToken.value[0];
149
- while (pos < src.length && src[pos] !== stopChar && !/\s/.test(src[pos])) varValue += src[pos++];
150
- } else if (nextToken.type === 'block') {
151
- while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) varValue += src[pos++];
152
- } else {
153
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
154
- }
180
+ let v = '';
181
+ const nextToken = findNextSignificantToken(tokens, ti);
182
+ if (nextToken && nextToken.type === 'literal') {
183
+ while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) v += src[pos++];
155
184
  } else {
156
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
185
+ while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
186
+ }
187
+ if (token.wsTrailing && token.wsTrailing !== 'optional') {
188
+ const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
189
+ pos = newPos;
190
+ } else if (token.wsTrailing === 'optional') {
191
+ const { newPos } = matchWhitespaceType(src, pos, 'space');
192
+ pos = newPos;
193
+ }
194
+ if (!v) return null;
195
+ captures[papagaio.symbols.sigil + token.varName] = v;
196
+ continue;
197
+ }
198
+ if (token.type === 'var-ws') {
199
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
200
+ const n = findNextSignificantToken(tokens, ti);
201
+ let v = '';
202
+ if (!n || ['var', 'var-ws', 'block'].includes(n.type)) {
203
+ while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
204
+ } else if (n.type === 'literal') {
205
+ while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
206
+ v = v.trimEnd();
157
207
  }
158
- if (!varValue) return null;
159
- captures[papagaio.symbols.sigil + token.varName] = varValue;
208
+ if (token.wsTrailing && token.wsTrailing !== 'optional') {
209
+ const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
210
+ pos = newPos;
211
+ } else if (token.wsTrailing === 'optional') {
212
+ const { newPos } = matchWhitespaceType(src, pos, 'space');
213
+ pos = newPos;
214
+ }
215
+ if (!v) return null;
216
+ captures[papagaio.symbols.sigil + token.varName] = v;
160
217
  continue;
161
218
  }
162
219
  if (token.type === 'block') {
163
- const { varName, openDelim, closeDelim } = token;
220
+ const { varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs } = token;
164
221
  if (!src.startsWith(openDelim, pos)) return null;
165
- const [blockContent, endPos] = extractBlock(papagaio, src, pos, openDelim, closeDelim);
166
- captures[papagaio.symbols.sigil + varName] = blockContent;
167
- pos = endPos;
222
+ const [c, e] = extractBlockWithWsDelimiter(papagaio, src, pos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs);
223
+ captures[papagaio.symbols.sigil + varName] = c;
224
+ pos = e;
168
225
  continue;
169
226
  }
170
227
  }
171
228
  return { captures, endPos: pos };
172
229
  }
173
230
 
174
- function collectPatterns(papagaio, src) {
175
- const patterns = [];
176
- const patRe = new RegExp(`\\b${papagaio.symbols.pattern}\\s*\\${papagaio.symbols.open}`, "g");
177
- let result = src;
178
- while (true) {
179
- patRe.lastIndex = 0;
180
- const m = patRe.exec(result);
231
+ function findNextSignificantToken(t, i) {
232
+ for (let k = i + 1; k < t.length; k++) {
233
+ if (!['whitespace-optional', 'ws-optional', 'ws-required', 'any-ws-optional', 'any-ws-required'].includes(t[k].type)) return t[k];
234
+ }
235
+ return null;
236
+ }
237
+
238
+ function extractBlockWithWsDelimiter(p, src, openPos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs) {
239
+ let i = openPos;
240
+ if (openDelimIsWs || closeDelimIsWs) {
241
+ if (src.substring(i, i + openDelim.length) === openDelim) {
242
+ i += openDelim.length;
243
+ const s = i;
244
+ let d = 0;
245
+ while (i < src.length) {
246
+ if (src.substring(i, i + openDelim.length) === openDelim) {
247
+ d++;
248
+ i += openDelim.length;
249
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
250
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
251
+ d--;
252
+ i += closeDelim.length;
253
+ } else i++;
254
+ }
255
+ return [src.substring(s), src.length];
256
+ }
257
+ return ['', i];
258
+ }
259
+ if (openDelim.length > 1 || closeDelim.length > 1) {
260
+ if (src.substring(i, i + openDelim.length) === openDelim) {
261
+ i += openDelim.length;
262
+ const s = i;
263
+ let d = 0;
264
+ while (i < src.length) {
265
+ if (src.substring(i, i + openDelim.length) === openDelim) {
266
+ d++;
267
+ i += openDelim.length;
268
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
269
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
270
+ d--;
271
+ i += closeDelim.length;
272
+ } else i++;
273
+ }
274
+ return [src.substring(s), src.length];
275
+ }
276
+ }
277
+ if (src[i] === openDelim) {
278
+ i++;
279
+ const s = i;
280
+ if (openDelim === closeDelim) {
281
+ while (i < src.length && src[i] !== closeDelim) i++;
282
+ return [src.substring(s, i), i + 1];
283
+ }
284
+ let d = 1;
285
+ while (i < src.length && d > 0) {
286
+ if (src[i] === openDelim) d++;
287
+ else if (src[i] === closeDelim) d--;
288
+ if (d > 0) i++;
289
+ }
290
+ return [src.substring(s, i), i + 1];
291
+ }
292
+ return ['', i];
293
+ }
294
+
295
+ function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
296
+ let i = openPos;
297
+ if (openDelim.length > 1 || closeDelim.length > 1) {
298
+ if (src.substring(i, i + openDelim.length) === openDelim) {
299
+ i += openDelim.length;
300
+ const s = i;
301
+ let d = 0;
302
+ while (i < src.length) {
303
+ if (src.substring(i, i + openDelim.length) === openDelim) {
304
+ d++;
305
+ i += openDelim.length;
306
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
307
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
308
+ d--;
309
+ i += closeDelim.length;
310
+ } else i++;
311
+ }
312
+ return [src.substring(s), src.length];
313
+ }
314
+ }
315
+ if (src[i] === openDelim) {
316
+ i++;
317
+ const s = i;
318
+ if (openDelim === closeDelim) {
319
+ while (i < src.length && src[i] !== closeDelim) i++;
320
+ return [src.substring(s, i), i + 1];
321
+ }
322
+ let d = 1;
323
+ while (i < src.length && d > 0) {
324
+ if (src[i] === openDelim) d++;
325
+ else if (src[i] === closeDelim) d--;
326
+ if (d > 0) i++;
327
+ }
328
+ return [src.substring(s, i), i + 1];
329
+ }
330
+ return ['', i];
331
+ }
332
+
333
+ function collectPatterns(p, src) {
334
+ const A = [], r = new RegExp(`\\b${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
335
+ let out = src;
336
+ while (1) {
337
+ r.lastIndex = 0;
338
+ const m = r.exec(out);
339
+ if (!m) break;
340
+ const s = m.index, o = m.index + m[0].length - 1;
341
+ const [mp, em] = extractBlock(p, out, o);
342
+ let k = em;
343
+ while (k < out.length && /\s/.test(out[k])) k++;
344
+ if (k < out.length && out[k] === p.symbols.open) {
345
+ const [rp, er] = extractBlock(p, out, k);
346
+ A.push({ match: mp.trim(), replace: rp.trim() });
347
+ out = out.slice(0, s) + out.slice(er);
348
+ continue;
349
+ }
350
+ out = out.slice(0, s) + out.slice(em);
351
+ }
352
+ return [A, out];
353
+ }
354
+
355
+ function extractNestedPatterns(p, replaceText) {
356
+ const nested = [];
357
+ const r = new RegExp(`\\${p.symbols.sigil}${escapeRegex(p.symbols.pattern)}\\s*\\${p.symbols.open}`, "g");
358
+ let out = replaceText;
359
+
360
+ while (1) {
361
+ r.lastIndex = 0;
362
+ const m = r.exec(out);
181
363
  if (!m) break;
182
- const start = m.index;
183
- const openPos = m.index + m[0].length - 1;
184
- const [matchPat, posAfterMatch] = extractBlock(papagaio, result, openPos);
185
- let k = posAfterMatch;
186
- while (k < result.length && /\s/.test(result[k])) k++;
187
- if (k < result.length && result[k] === papagaio.symbols.open) {
188
- const [replacePat, posAfterReplace] = extractBlock(papagaio, result, k);
189
- patterns.push({ match: matchPat.trim(), replace: replacePat.trim() });
190
- result = result.slice(0, start) + result.slice(posAfterReplace);
364
+
365
+ const s = m.index, o = m.index + m[0].length - 1;
366
+ const [mp, em] = extractBlock(p, out, o);
367
+ let k = em;
368
+
369
+ while (k < out.length && /\s/.test(out[k])) k++;
370
+
371
+ if (k < out.length && out[k] === p.symbols.open) {
372
+ const [rp, er] = extractBlock(p, out, k);
373
+ nested.push({ match: mp.trim(), replace: rp.trim() });
374
+ out = out.slice(0, s) + out.slice(er);
191
375
  continue;
192
376
  }
193
- result = result.slice(0, start) + result.slice(posAfterMatch);
377
+ out = out.slice(0, s) + out.slice(em);
194
378
  }
195
- return [patterns, result];
379
+
380
+ return [nested, out];
196
381
  }
197
382
 
198
- function applyPatterns(papagaio, src, patterns) {
199
- let clearFlag = false, lastResult = "", S = papagaio.symbols.sigil;
200
- for (const pat of patterns) {
201
- const tokens = parsePattern(papagaio, pat.match);
202
- let newSrc = '';
203
- let pos = 0, matched = false;
383
+ function applyPatterns(p, src, pats) {
384
+ let clear = false, last = "", S = p.symbols.sigil;
385
+ for (const pat of pats) {
386
+ const t = parsePattern(p, pat.match);
387
+ let n = '', pos = 0, ok = false;
204
388
  while (pos < src.length) {
205
- const matchResult = matchPattern(papagaio, src, tokens, pos);
206
- if (matchResult) {
207
- matched = true;
208
- const { captures, endPos } = matchResult;
209
- let result = pat.replace;
389
+ const m = matchPattern(p, src, t, pos);
390
+ if (m) {
391
+ ok = true;
392
+ const { captures, endPos } = m;
393
+ let r = pat.replace;
394
+
395
+ // Extrai e processa padrões aninhados ($pattern)
396
+ const [nestedPats, cleanReplace] = extractNestedPatterns(p, r);
397
+ r = cleanReplace;
398
+
210
399
  for (const [k, v] of Object.entries(captures)) {
211
- const keyEsc = escapeRegex(k);
212
- result = result.replace(new RegExp(keyEsc + '(?![A-Za-z0-9_])', 'g'), v);
400
+ const e = escapeRegex(k);
401
+ r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
402
+ }
403
+
404
+ // Aplica padrões aninhados ao resultado
405
+ if (nestedPats.length > 0) {
406
+ r = applyPatterns(p, r, nestedPats);
213
407
  }
214
408
 
215
- const uniqueId = papagaio.unique_id++;
216
- result = result.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uniqueId));
217
- result = result.replace(/\$eval\{([^}]*)\}/g, (_, code) => {
409
+ const uid = p.unique_id++;
410
+ r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
411
+ r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => {
218
412
  try {
219
- const wrapped = `"use strict"; return (function() { ${code} })();`;
220
- return String(Function("papagaio", "ctx", wrapped)(papagaio, {}));
413
+ return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
221
414
  } catch {
222
415
  return "";
223
416
  }
224
417
  });
225
- const S2 = S + S;
226
- result = result.replace(new RegExp(escapeRegex(S2), 'g'), '');
227
-
228
- if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(result)) {
229
- result = result.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
230
- clearFlag = true;
418
+ r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
419
+ if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
420
+ r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
421
+ clear = true;
231
422
  }
232
-
233
- const matchStart = pos, matchEnd = endPos;
234
- result = result
235
- .replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, matchStart))
236
- .replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(matchEnd))
237
- .replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(matchStart, matchEnd));
238
- newSrc += result;
239
- lastResult = result;
423
+ const ms = pos, me = endPos;
424
+ r = r.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
425
+ .replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
426
+ .replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
427
+ n += r;
428
+ last = r;
240
429
  pos = endPos;
241
430
  } else {
242
- newSrc += src[pos];
431
+ n += src[pos];
243
432
  pos++;
244
433
  }
245
434
  }
246
- if (matched) {
247
- src = clearFlag ? lastResult : newSrc;
248
- clearFlag = false;
435
+ if (ok) {
436
+ src = clear ? last : n;
437
+ clear = false;
249
438
  }
250
439
  }
251
440
  return src;
252
441
  }
253
442
 
254
- function escapeRegex(str) {
255
- return str.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
443
+ function escapeRegex(s) {
444
+ return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
256
445
  }
257
446
 
258
- function unescapeDelimiter(str) {
259
- let result = '';
260
- for (let i = 0; i < str.length; i++) {
261
- if (str[i] === '\\' && i + 1 < str.length) {
262
- const next = str[i + 1];
263
- if (next === '"' || next === "'" || next === '\\') {
264
- result += next;
447
+ function unescapeDelimiter(s) {
448
+ let r = '';
449
+ for (let i = 0; i < s.length; i++) {
450
+ if (s[i] === '\\' && i + 1 < s.length) {
451
+ const n = s[i + 1];
452
+ if (n === '"' || n === "'" || n === '\\') {
453
+ r += n;
265
454
  i++;
266
- } else {
267
- result += str[i];
268
- }
269
- } else {
270
- result += str[i];
271
- }
455
+ } else r += s[i];
456
+ } else r += s[i];
272
457
  }
273
- return result;
458
+ return r;
274
459
  }
275
460
 
276
461
  export class Papagaio {
277
- constructor() {
462
+ constructor(sigil = "$", open = "{", close = "}", pattern = "pattern") {
278
463
  this.recursion_limit = 512;
279
464
  this.unique_id = 0;
280
- this.symbols = {
281
- pattern: "pattern",
282
- context: "context",
283
- open: "{",
284
- close: "}",
285
- sigil: "$"
286
- };
465
+ this.symbols = { sigil: sigil, open: open, close: close, pattern: pattern};
287
466
  this.content = "";
288
467
  }
289
-
290
468
  process(input) {
291
469
  this.content = input;
292
- let src = input, last = null, iter = 0;
293
- const pending = () => {
294
- const rCtx = new RegExp(`\\b${this.symbols.context}\\s*\\${this.symbols.open}`, "g");
295
- const rPat = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
296
- return rCtx.test(src) || rPat.test(src);
470
+ let src = input, last = null, it = 0;
471
+ const pend = () => {
472
+ const r2 = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
473
+ return r2.test(src);
297
474
  };
298
- while (src !== last && iter < this.recursion_limit) {
299
- iter++;
475
+ while (src !== last && it < this.recursion_limit) {
476
+ it++;
300
477
  last = src;
301
- src = processContext(this, src);
302
- const [patterns, s2] = collectPatterns(this, src);
303
- src = applyPatterns(this, s2, patterns);
304
- if (!pending()) break;
478
+ const [p, s2] = collectPatterns(this, src);
479
+ src = applyPatterns(this, s2, p);
480
+ if (!pend()) break;
305
481
  }
306
482
  return this.content = src, src;
307
483
  }
package/tests/tests.json CHANGED
@@ -9,7 +9,7 @@
9
9
  {
10
10
  "id": 2,
11
11
  "name": "Flexible whitespace ($)",
12
- "code": "pattern {$x$$and$$$y} {$x & $y}\nhello and world",
12
+ "code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
13
13
  "expected": "hello & world"
14
14
  },
15
15
  {
@@ -75,7 +75,7 @@
75
75
  {
76
76
  "id": 13,
77
77
  "name": "Pattern with flexible whitespace in middle",
78
- "code": "pattern {from$$to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
78
+ "code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
79
79
  "expected": "path: from -> to"
80
80
  },
81
81
  {
@@ -129,7 +129,7 @@
129
129
  {
130
130
  "id": 22,
131
131
  "name": "Flexible whitespace with semicolon",
132
- "code": "pattern {$$$a$$;$$$b$$} {$a AND $b}\nx ; y\nx; y\nx ; y",
132
+ "code": "pattern {$$a;$$b} {$a AND $b}\nx ; y\nx; y\nx ; y",
133
133
  "expected": "x AND y"
134
134
  },
135
135
  {
@@ -182,8 +182,8 @@
182
182
  },
183
183
  {
184
184
  "id": 31,
185
- "name": "Block with empty delimiter (default)",
186
- "code": "pattern {$block data {}{}} {DATA:$data}\ndata { hello world }",
185
+ "name": "Delimiter test",
186
+ "code": "pattern {$block data {[}{]}} {DATA:$data}\ndata [ hello world ]",
187
187
  "expected": "DATA: hello world"
188
188
  },
189
189
  {
@@ -267,7 +267,7 @@
267
267
  {
268
268
  "id": 45,
269
269
  "name": "Pattern with flexible space before and after",
270
- "code": "pattern {$$hello$$} {FOUND}\n hello ",
270
+ "code": "pattern {$$ hello$$ } {FOUND}\n hello ",
271
271
  "expected": "FOUND"
272
272
  },
273
273
  {
@@ -315,7 +315,7 @@
315
315
  {
316
316
  "id": 53,
317
317
  "name": "Trim multiple surrounding spaces",
318
- "code": "pattern {$$$word$$} {FOUND: $word}\n word ",
318
+ "code": "pattern {$ $word$ } {FOUND: $word}\n word ",
319
319
  "expected": "FOUND: word"
320
320
  },
321
321
  {
@@ -543,7 +543,7 @@
543
543
  {
544
544
  "id": 91,
545
545
  "name": "CSV to JSON",
546
- "code": "pattern {$a,$b,$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
546
+ "code": "pattern {$a,$$b,$$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
547
547
  "expected": "{ id: '1', name: 'Alice', role: 'Engineer' }\n{ id: '2', name: 'Bob', role: 'Designer' }"
548
548
  },
549
549
  {
@@ -639,7 +639,7 @@
639
639
  {
640
640
  "id": 107,
641
641
  "name": "Comma-separated pattern matching",
642
- "code": "pattern {$a,$b,$c} {[$a] [$b] [$c]}\none,two,three",
642
+ "code": "pattern {$$a,$$b,$$c} {[$a] [$b] [$c]}\none,two,three",
643
643
  "expected": "[one] [two] [three]"
644
644
  },
645
645
  {
@@ -663,7 +663,7 @@
663
663
  {
664
664
  "id": 111,
665
665
  "name": "Pattern with dot literal",
666
- "code": "pattern {$file.txt} {File: $file}\ndocument.txt",
666
+ "code": "pattern {$$file.txt} {File: $file.txt}\ndocument.txt",
667
667
  "expected": "File: document"
668
668
  },
669
669
  {
@@ -693,7 +693,7 @@
693
693
  {
694
694
  "id": 116,
695
695
  "name": "Multiple spaces trim with flexible whitespace",
696
- "code": "pattern {$$$word$$} {FOUND: $word}\n word ",
696
+ "code": "pattern {$ $word$ } {FOUND: $word}\n word ",
697
697
  "expected": "FOUND: word"
698
698
  },
699
699
  {
@@ -711,7 +711,7 @@
711
711
  {
712
712
  "id": 119,
713
713
  "name": "Trailing punctuation in pattern",
714
- "code": "pattern {$w,} {$w!}\nhello,",
714
+ "code": "pattern {$$w,} {$$w!}\nhello,",
715
715
  "expected": "hello!"
716
716
  }
717
717
  ]