papagaio 0.2.8 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,17 +46,6 @@ apple banana cherry
46
46
  ```
47
47
  Output: `cherry, banana, apple`
48
48
 
49
- ### 3. Flexible Whitespace (`$$`)
50
-
51
- ```
52
- pattern {$x$$and$$$y} {$x & $y}
53
- hello and world
54
- hello and world
55
- ```
56
- Output: `hello & world` (both)
57
-
58
- `$$` = zero or more spaces/tabs/newlines.
59
-
60
49
  ## Blocks
61
50
 
62
51
  Capture content between delimiters.
@@ -266,9 +255,13 @@ Output:
266
255
 
267
256
  ### Matching
268
257
  - Variables (`$x`) capture **one word** (no spaces)
269
- - `$$` = flexible whitespace (0+ spaces/tabs/newlines)
258
+ - Variables (`$$x`) captures one or more words (with spaces)
270
259
  - Patterns apply **globally** each iteration
271
260
  - Auto-recursion until: max 512 iterations OR no changes
261
+ - `$ ` = one or more of this whitespace (spaces, tabs, newlines)
262
+ - `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
263
+ - `$$$ `= one or more whitespaces
264
+ - `$$$$ `= zero or more whitespaces
272
265
 
273
266
  ### Block Matching
274
267
  - `$block name {open}{close}` captures between delimiters
@@ -280,8 +273,11 @@ Output:
280
273
  - Reuse: `$x` appears multiple times in replace
281
274
  - Undefined: becomes empty string
282
275
 
283
- ### Sigil
284
- - You cannot match words containing the sigil character.
276
+ ### Limitations
277
+ - You cannot match words containing the current sigil character.
278
+ - You cannot match a $block{}{} using the current delimiters.
279
+ - By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
280
+ - Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
285
281
 
286
282
  ---
287
283
 
@@ -289,11 +285,13 @@ Output:
289
285
 
290
286
  | Problem | Solution |
291
287
  |---------|----------|
292
- | Pattern doesn't match | Use `$$` between elements for flexible whitespace |
293
288
  | Variable not captured | Check space between variables |
294
289
  | Block not working | Verify balanced delimiters `{` `}` |
295
290
  | Infinite recursion | Use `$clear` or reduce `recursion_limit` |
296
291
  | $eval not working | Errors return empty string, use try-catch |
292
+ | Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
293
+ | Whitespace operators | Remember they need a whitespace after them to work properly |
294
+ | Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
297
295
 
298
296
  ## Known Bugs
299
297
 
@@ -305,7 +303,7 @@ Output:
305
303
 
306
304
  ```
307
305
  pattern {$x $y} {$y, $x} # basic pattern
308
- pattern {$x$$y} {$x-$y} # flexible whitespace
306
+ pattern {$x$ $y} {$x-$y} # flexible whitespace
309
307
  pattern {$block n {o}{c}} {$n} # block
310
308
  context { ... } # recursive scope
311
309
  $unique # unique ID per pattern
@@ -313,6 +311,7 @@ $match # full match
313
311
  $prefix / $suffix # before/after
314
312
  $clear # clear before
315
313
  $eval{code} # execute JS
314
+ $ / $$ / $$$ / $$$$ # whitespace operators
316
315
  ```
317
316
 
318
317
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "papagaio",
3
- "version": "0.2.8",
3
+ "version": "0.3.1",
4
4
  "description": "easy yet powerful preprocessor",
5
5
  "main": "src/papagaio.js",
6
6
  "type": "module",
package/src/papagaio.js CHANGED
@@ -1,95 +1,88 @@
1
1
  // https://github.com/jardimdanificado/papagaio
2
-
3
- function processContext(papagaio, src) {
4
- const ctxRe = new RegExp(`\\b${papagaio.symbols.context}\\s*\\${papagaio.symbols.open}`, "g");
5
- let m, matches = [];
6
- while ((m = ctxRe.exec(src)) !== null)
7
- matches.push({ idx: m.index, pos: m.index + m[0].length - 1 });
8
- for (let j = matches.length - 1; j >= 0; j--) {
9
- const x = matches[j], [content, posAfter] = extractBlock(papagaio, src, x.pos);
10
- if (!content.trim()) {
11
- src = src.slice(0, x.idx) + src.slice(posAfter);
2
+
3
+ function parsePattern(papagaio, pattern) {
4
+ const tokens = [], S = papagaio.symbols.sigil, S2 = S + S;
5
+ let i = 0;
6
+ const isWhitespaceChar = c => /\s/.test(c);
7
+ const getWhitespaceType = c => c === ' ' ? 'space' : c === '\t' ? 'tab' : c === '\n' ? 'newline' : c === '\r' ? 'carriage-return' : 'other';
8
+ while (i < pattern.length) {
9
+ if (pattern.startsWith(S + S + S, i) && i + 3 < pattern.length && isWhitespaceChar(pattern[i + 3])) {
10
+ tokens.push({ type: 'any-ws-required', wsChar: pattern[i + 3] });
11
+ i += 4;
12
12
  continue;
13
13
  }
14
- const proc = papagaio.process(content);
15
- let left = src.substring(0, x.idx), right = src.substring(posAfter);
16
- let prefix = left.endsWith("\n") ? "\n" : "";
17
- if (prefix) left = left.slice(0, -1);
18
- src = left + prefix + proc + right;
19
- }
20
- return src;
21
- }
22
-
23
- function extractBlock(papagaio, src, openPos, openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close) {
24
- let i = openPos;
25
- if (openDelim.length > 1 || closeDelim.length > 1) {
26
- if (src.substring(i, i + openDelim.length) === openDelim) {
27
- i += openDelim.length;
28
- const innerStart = i;
29
- let d = 0;
30
- while (i < src.length) {
31
- if (src.substring(i, i + openDelim.length) === openDelim) {
32
- d++;
33
- i += openDelim.length;
34
- } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
35
- if (d === 0) return [src.substring(innerStart, i), i + closeDelim.length];
36
- d--;
37
- i += closeDelim.length;
38
- } else i++;
39
- }
40
- return [src.substring(innerStart), src.length];
14
+ if (pattern.startsWith(S + S + S + S, i) && i + 4 < pattern.length && isWhitespaceChar(pattern[i + 4])) {
15
+ tokens.push({ type: 'any-ws-optional', wsChar: pattern[i + 4] });
16
+ i += 5;
17
+ continue;
41
18
  }
42
- }
43
- if (src[i] === openDelim) {
44
- i++;
45
- const innerStart = i;
46
- if (openDelim === closeDelim) {
47
- while (i < src.length && src[i] !== closeDelim) i++;
48
- return [src.substring(innerStart, i), i + 1];
49
- } else {
50
- let depth = 1;
51
- while (i < src.length && depth > 0) {
52
- if (src[i] === openDelim) depth++;
53
- else if (src[i] === closeDelim) depth--;
54
- if (depth > 0) i++;
19
+ if (pattern.startsWith(S2, i)) {
20
+ let j = i + S2.length, varName = '';
21
+ while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
22
+ if (varName) {
23
+ if (j < pattern.length && isWhitespaceChar(pattern[j])) {
24
+ tokens.push({ type: 'var-ws', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
25
+ i = j + 1;
26
+ continue;
27
+ } else if (j < pattern.length && pattern[j] === S) {
28
+ tokens.push({ type: 'var-ws', varName, wsTrailing: 'optional', wsChar: null });
29
+ i = j + 1;
30
+ continue;
31
+ } else {
32
+ tokens.push({ type: 'var-ws', varName });
33
+ i = j;
34
+ continue;
35
+ }
55
36
  }
56
- return [src.substring(innerStart, i), i + 1];
57
37
  }
58
- }
59
- return ['', i];
60
- }
61
-
62
- function parsePattern(papagaio, pattern) {
63
- const tokens = [];
64
- let i = 0;
65
- const S = papagaio.symbols.sigil, S2 = S + S;
66
- while (i < pattern.length) {
38
+ if (pattern.startsWith(S2, i) && i + 2 < pattern.length && isWhitespaceChar(pattern[i + 2])) {
39
+ tokens.push({ type: 'ws-optional', wsType: getWhitespaceType(pattern[i + 2]), wsChar: pattern[i + 2] });
40
+ i += 3;
41
+ continue;
42
+ }
67
43
  if (pattern.startsWith(S2, i)) {
68
44
  tokens.push({ type: 'whitespace-optional' });
69
45
  i += S2.length;
70
46
  continue;
71
47
  }
48
+ if (pattern[i] === S && i + 1 < pattern.length && isWhitespaceChar(pattern[i + 1])) {
49
+ tokens.push({ type: 'ws-required', wsType: getWhitespaceType(pattern[i + 1]), wsChar: pattern[i + 1] });
50
+ i += 2;
51
+ continue;
52
+ }
72
53
  if (pattern.startsWith(S + 'block', i)) {
73
- let j = i + S.length + 'block'.length;
54
+ let j = i + S.length + 5;
74
55
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
75
56
  let varName = '';
76
57
  while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
77
58
  if (varName) {
78
59
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
79
- let openDelim = papagaio.symbols.open;
60
+ let openDelim = papagaio.symbols.open, closeDelim = papagaio.symbols.close;
61
+ let openDelimIsWs = false, closeDelimIsWs = false;
80
62
  if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
81
63
  const [c, e] = extractBlock(papagaio, pattern, j);
82
- openDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.open;
64
+ const trimmed = c.trim();
65
+ if (trimmed === '') {
66
+ openDelimIsWs = true;
67
+ let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
68
+ while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
69
+ openDelim = pattern.substring(wsStart, wsEnd);
70
+ } else openDelim = unescapeDelimiter(trimmed) || papagaio.symbols.open;
83
71
  j = e;
84
72
  while (j < pattern.length && /\s/.test(pattern[j])) j++;
85
73
  }
86
- let closeDelim = papagaio.symbols.close;
87
74
  if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
88
75
  const [c, e] = extractBlock(papagaio, pattern, j);
89
- closeDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.close;
76
+ const trimmed = c.trim();
77
+ if (trimmed === '') {
78
+ closeDelimIsWs = true;
79
+ let wsStart = j + papagaio.symbols.open.length, wsEnd = wsStart;
80
+ while (wsEnd < pattern.length && pattern[wsEnd] !== papagaio.symbols.close) wsEnd++;
81
+ closeDelim = pattern.substring(wsStart, wsEnd);
82
+ } else closeDelim = unescapeDelimiter(trimmed) || papagaio.symbols.close;
90
83
  j = e;
91
84
  }
92
- tokens.push({ type: 'block', varName, openDelim, closeDelim });
85
+ tokens.push({ type: 'block', varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs });
93
86
  i = j;
94
87
  continue;
95
88
  }
@@ -98,210 +91,375 @@ function parsePattern(papagaio, pattern) {
98
91
  let j = i + S.length, varName = '';
99
92
  while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
100
93
  if (varName) {
101
- tokens.push({ type: 'var', varName });
102
- i = j;
103
- continue;
94
+ if (j < pattern.length && isWhitespaceChar(pattern[j])) {
95
+ tokens.push({ type: 'var', varName, wsTrailing: getWhitespaceType(pattern[j]), wsChar: pattern[j] });
96
+ i = j + 1;
97
+ continue;
98
+ } else if (j < pattern.length && pattern[j] === S) {
99
+ tokens.push({ type: 'var', varName, wsTrailing: 'optional', wsChar: null });
100
+ i = j + 1;
101
+ continue;
102
+ } else {
103
+ tokens.push({ type: 'var', varName });
104
+ i = j;
105
+ continue;
106
+ }
104
107
  }
105
- tokens.push({ type: 'literal', value: S });
106
- i += S.length;
107
- continue;
108
108
  }
109
- if (/\s/.test(pattern[i])) {
109
+ if (isWhitespaceChar(pattern[i])) {
110
110
  let ws = '';
111
- while (i < pattern.length && /\s/.test(pattern[i])) ws += pattern[i++];
112
- tokens.push({ type: 'whitespace', value: ws });
111
+ while (i < pattern.length && isWhitespaceChar(pattern[i])) ws += pattern[i++];
112
+ tokens.push({ type: 'literal-ws', value: ws });
113
113
  continue;
114
114
  }
115
115
  let literal = '';
116
- while (i < pattern.length && !pattern.startsWith(S, i) && !/\s/.test(pattern[i])) literal += pattern[i++];
116
+ while (i < pattern.length && !pattern.startsWith(S, i) && !isWhitespaceChar(pattern[i])) literal += pattern[i++];
117
117
  if (literal) tokens.push({ type: 'literal', value: literal });
118
118
  }
119
119
  return tokens;
120
120
  }
121
121
 
122
122
  function matchPattern(papagaio, src, tokens, startPos = 0) {
123
- let pos = startPos;
124
- const captures = {};
123
+ let pos = startPos, captures = {};
124
+ const matchWhitespaceType = (str, idx, wsType) => {
125
+ if (idx >= str.length) return { matched: '', newPos: idx };
126
+ if (wsType === 'space' && str[idx] === ' ') {
127
+ let j = idx;
128
+ while (j < str.length && str[j] === ' ') j++;
129
+ return { matched: str.slice(idx, j), newPos: j };
130
+ }
131
+ if (wsType === 'tab' && str[idx] === '\t') {
132
+ let j = idx;
133
+ while (j < str.length && str[j] === '\t') j++;
134
+ return { matched: str.slice(idx, j), newPos: j };
135
+ }
136
+ if (wsType === 'newline' && str[idx] === '\n') {
137
+ let j = idx;
138
+ while (j < str.length && str[j] === '\n') j++;
139
+ return { matched: str.slice(idx, j), newPos: j };
140
+ }
141
+ return { matched: '', newPos: idx };
142
+ };
125
143
  for (let ti = 0; ti < tokens.length; ti++) {
126
144
  const token = tokens[ti];
127
- if (token.type === 'whitespace-optional') {
128
- while (pos < src.length && /\s/.test(src[pos])) pos++;
145
+ if (token.type === 'literal-ws') {
146
+ if (!src.startsWith(token.value, pos)) return null;
147
+ pos += token.value.length;
129
148
  continue;
130
149
  }
131
- if (token.type === 'whitespace') {
150
+ if (token.type === 'ws-required') {
151
+ const { matched, newPos } = matchWhitespaceType(src, pos, token.wsType);
152
+ if (!matched) return null;
153
+ pos = newPos;
154
+ continue;
155
+ }
156
+ if (token.type === 'ws-optional') {
157
+ const { newPos } = matchWhitespaceType(src, pos, token.wsType);
158
+ pos = newPos;
159
+ continue;
160
+ }
161
+ if (token.type === 'any-ws-required') {
132
162
  if (pos >= src.length || !/\s/.test(src[pos])) return null;
133
163
  while (pos < src.length && /\s/.test(src[pos])) pos++;
134
164
  continue;
135
165
  }
166
+ if (token.type === 'any-ws-optional') {
167
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
168
+ continue;
169
+ }
170
+ if (token.type === 'whitespace-optional') {
171
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
172
+ continue;
173
+ }
136
174
  if (token.type === 'literal') {
137
175
  if (!src.startsWith(token.value, pos)) return null;
138
176
  pos += token.value.length;
139
177
  continue;
140
178
  }
141
179
  if (token.type === 'var') {
142
- const nextToken = ti + 1 < tokens.length ? tokens[ti + 1] : null;
143
- let varValue = '';
144
- if (nextToken) {
145
- if (nextToken.type === 'whitespace' || nextToken.type === 'whitespace-optional') {
146
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
147
- } else if (nextToken.type === 'literal') {
148
- const stopChar = nextToken.value[0];
149
- while (pos < src.length && src[pos] !== stopChar && !/\s/.test(src[pos])) varValue += src[pos++];
150
- } else if (nextToken.type === 'block') {
151
- while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) varValue += src[pos++];
152
- } else {
153
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
154
- }
180
+ let v = '';
181
+ const nextToken = findNextSignificantToken(tokens, ti);
182
+ if (nextToken && nextToken.type === 'literal') {
183
+ while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) v += src[pos++];
155
184
  } else {
156
- while (pos < src.length && !/\s/.test(src[pos])) varValue += src[pos++];
185
+ while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
157
186
  }
158
- if (!varValue) return null;
159
- captures[papagaio.symbols.sigil + token.varName] = varValue;
187
+ if (token.wsTrailing && token.wsTrailing !== 'optional') {
188
+ const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
189
+ pos = newPos;
190
+ } else if (token.wsTrailing === 'optional') {
191
+ const { newPos } = matchWhitespaceType(src, pos, 'space');
192
+ pos = newPos;
193
+ }
194
+ if (!v) return null;
195
+ captures[papagaio.symbols.sigil + token.varName] = v;
196
+ continue;
197
+ }
198
+ if (token.type === 'var-ws') {
199
+ while (pos < src.length && /\s/.test(src[pos])) pos++;
200
+ const n = findNextSignificantToken(tokens, ti);
201
+ let v = '';
202
+ if (!n || ['var', 'var-ws', 'block'].includes(n.type)) {
203
+ while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
204
+ } else if (n.type === 'literal') {
205
+ while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
206
+ v = v.trimEnd();
207
+ }
208
+ if (token.wsTrailing && token.wsTrailing !== 'optional') {
209
+ const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
210
+ pos = newPos;
211
+ } else if (token.wsTrailing === 'optional') {
212
+ const { newPos } = matchWhitespaceType(src, pos, 'space');
213
+ pos = newPos;
214
+ }
215
+ if (!v) return null;
216
+ captures[papagaio.symbols.sigil + token.varName] = v;
160
217
  continue;
161
218
  }
162
219
  if (token.type === 'block') {
163
- const { varName, openDelim, closeDelim } = token;
220
+ const { varName, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs } = token;
164
221
  if (!src.startsWith(openDelim, pos)) return null;
165
- const [blockContent, endPos] = extractBlock(papagaio, src, pos, openDelim, closeDelim);
166
- captures[papagaio.symbols.sigil + varName] = blockContent;
167
- pos = endPos;
222
+ const [c, e] = extractBlockWithWsDelimiter(papagaio, src, pos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs);
223
+ captures[papagaio.symbols.sigil + varName] = c;
224
+ pos = e;
168
225
  continue;
169
226
  }
170
227
  }
171
228
  return { captures, endPos: pos };
172
229
  }
173
230
 
174
- function collectPatterns(papagaio, src) {
175
- const patterns = [];
176
- const patRe = new RegExp(`\\b${papagaio.symbols.pattern}\\s*\\${papagaio.symbols.open}`, "g");
177
- let result = src;
178
- while (true) {
179
- patRe.lastIndex = 0;
180
- const m = patRe.exec(result);
231
+ function findNextSignificantToken(t, i) {
232
+ for (let k = i + 1; k < t.length; k++) {
233
+ if (!['whitespace-optional', 'ws-optional', 'ws-required', 'any-ws-optional', 'any-ws-required'].includes(t[k].type)) return t[k];
234
+ }
235
+ return null;
236
+ }
237
+
238
+ function extractBlockWithWsDelimiter(p, src, openPos, openDelim, closeDelim, openDelimIsWs, closeDelimIsWs) {
239
+ let i = openPos;
240
+ if (openDelimIsWs || closeDelimIsWs) {
241
+ if (src.substring(i, i + openDelim.length) === openDelim) {
242
+ i += openDelim.length;
243
+ const s = i;
244
+ let d = 0;
245
+ while (i < src.length) {
246
+ if (src.substring(i, i + openDelim.length) === openDelim) {
247
+ d++;
248
+ i += openDelim.length;
249
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
250
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
251
+ d--;
252
+ i += closeDelim.length;
253
+ } else i++;
254
+ }
255
+ return [src.substring(s), src.length];
256
+ }
257
+ return ['', i];
258
+ }
259
+ if (openDelim.length > 1 || closeDelim.length > 1) {
260
+ if (src.substring(i, i + openDelim.length) === openDelim) {
261
+ i += openDelim.length;
262
+ const s = i;
263
+ let d = 0;
264
+ while (i < src.length) {
265
+ if (src.substring(i, i + openDelim.length) === openDelim) {
266
+ d++;
267
+ i += openDelim.length;
268
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
269
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
270
+ d--;
271
+ i += closeDelim.length;
272
+ } else i++;
273
+ }
274
+ return [src.substring(s), src.length];
275
+ }
276
+ }
277
+ if (src[i] === openDelim) {
278
+ i++;
279
+ const s = i;
280
+ if (openDelim === closeDelim) {
281
+ while (i < src.length && src[i] !== closeDelim) i++;
282
+ return [src.substring(s, i), i + 1];
283
+ }
284
+ let d = 1;
285
+ while (i < src.length && d > 0) {
286
+ if (src[i] === openDelim) d++;
287
+ else if (src[i] === closeDelim) d--;
288
+ if (d > 0) i++;
289
+ }
290
+ return [src.substring(s, i), i + 1];
291
+ }
292
+ return ['', i];
293
+ }
294
+
295
+ function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
296
+ let i = openPos;
297
+ if (openDelim.length > 1 || closeDelim.length > 1) {
298
+ if (src.substring(i, i + openDelim.length) === openDelim) {
299
+ i += openDelim.length;
300
+ const s = i;
301
+ let d = 0;
302
+ while (i < src.length) {
303
+ if (src.substring(i, i + openDelim.length) === openDelim) {
304
+ d++;
305
+ i += openDelim.length;
306
+ } else if (src.substring(i, i + closeDelim.length) === closeDelim) {
307
+ if (!d) return [src.substring(s, i), i + closeDelim.length];
308
+ d--;
309
+ i += closeDelim.length;
310
+ } else i++;
311
+ }
312
+ return [src.substring(s), src.length];
313
+ }
314
+ }
315
+ if (src[i] === openDelim) {
316
+ i++;
317
+ const s = i;
318
+ if (openDelim === closeDelim) {
319
+ while (i < src.length && src[i] !== closeDelim) i++;
320
+ return [src.substring(s, i), i + 1];
321
+ }
322
+ let d = 1;
323
+ while (i < src.length && d > 0) {
324
+ if (src[i] === openDelim) d++;
325
+ else if (src[i] === closeDelim) d--;
326
+ if (d > 0) i++;
327
+ }
328
+ return [src.substring(s, i), i + 1];
329
+ }
330
+ return ['', i];
331
+ }
332
+
333
+ function processContext(p, src) {
334
+ const r = new RegExp(`\\b${p.symbols.context}\\s*\\${p.symbols.open}`, "g");
335
+ let m, a = [];
336
+ while ((m = r.exec(src)) !== null) a.push({ idx: m.index, pos: m.index + m[0].length - 1 });
337
+ for (let j = a.length - 1; j >= 0; j--) {
338
+ const x = a[j], [c, e] = extractBlock(p, src, x.pos);
339
+ if (!c.trim()) {
340
+ src = src.slice(0, x.idx) + src.slice(e);
341
+ continue;
342
+ }
343
+ const r2 = p.process(c);
344
+ let L = src.substring(0, x.idx), R = src.substring(e);
345
+ let pre = L.endsWith("\n") ? "\n" : "";
346
+ if (pre) L = L.slice(0, -1);
347
+ src = L + pre + r2 + R;
348
+ }
349
+ return src;
350
+ }
351
+
352
+ function collectPatterns(p, src) {
353
+ const A = [], r = new RegExp(`\\b${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
354
+ let out = src;
355
+ while (1) {
356
+ r.lastIndex = 0;
357
+ const m = r.exec(out);
181
358
  if (!m) break;
182
- const start = m.index;
183
- const openPos = m.index + m[0].length - 1;
184
- const [matchPat, posAfterMatch] = extractBlock(papagaio, result, openPos);
185
- let k = posAfterMatch;
186
- while (k < result.length && /\s/.test(result[k])) k++;
187
- if (k < result.length && result[k] === papagaio.symbols.open) {
188
- const [replacePat, posAfterReplace] = extractBlock(papagaio, result, k);
189
- patterns.push({ match: matchPat.trim(), replace: replacePat.trim() });
190
- result = result.slice(0, start) + result.slice(posAfterReplace);
359
+ const s = m.index, o = m.index + m[0].length - 1;
360
+ const [mp, em] = extractBlock(p, out, o);
361
+ let k = em;
362
+ while (k < out.length && /\s/.test(out[k])) k++;
363
+ if (k < out.length && out[k] === p.symbols.open) {
364
+ const [rp, er] = extractBlock(p, out, k);
365
+ A.push({ match: mp.trim(), replace: rp.trim() });
366
+ out = out.slice(0, s) + out.slice(er);
191
367
  continue;
192
368
  }
193
- result = result.slice(0, start) + result.slice(posAfterMatch);
369
+ out = out.slice(0, s) + out.slice(em);
194
370
  }
195
- return [patterns, result];
371
+ return [A, out];
196
372
  }
197
373
 
198
- function applyPatterns(papagaio, src, patterns) {
199
- let clearFlag = false, lastResult = "", S = papagaio.symbols.sigil;
200
- for (const pat of patterns) {
201
- const tokens = parsePattern(papagaio, pat.match);
202
- let newSrc = '';
203
- let pos = 0, matched = false;
374
+ function applyPatterns(p, src, pats) {
375
+ let clear = false, last = "", S = p.symbols.sigil;
376
+ for (const pat of pats) {
377
+ const t = parsePattern(p, pat.match);
378
+ let n = '', pos = 0, ok = false;
204
379
  while (pos < src.length) {
205
- const matchResult = matchPattern(papagaio, src, tokens, pos);
206
- if (matchResult) {
207
- matched = true;
208
- const { captures, endPos } = matchResult;
209
- let result = pat.replace;
380
+ const m = matchPattern(p, src, t, pos);
381
+ if (m) {
382
+ ok = true;
383
+ const { captures, endPos } = m;
384
+ let r = pat.replace;
210
385
  for (const [k, v] of Object.entries(captures)) {
211
- const keyEsc = escapeRegex(k);
212
- result = result.replace(new RegExp(keyEsc + '(?![A-Za-z0-9_])', 'g'), v);
386
+ const e = escapeRegex(k);
387
+ r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
213
388
  }
214
-
215
- const uniqueId = papagaio.unique_id++;
216
- result = result.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uniqueId));
217
- result = result.replace(/\$eval\{([^}]*)\}/g, (_, code) => {
389
+ const uid = p.unique_id++;
390
+ r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
391
+ r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => {
218
392
  try {
219
- const wrapped = `"use strict"; return (function() { ${code} })();`;
220
- return String(Function("papagaio", "ctx", wrapped)(papagaio, {}));
393
+ return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
221
394
  } catch {
222
395
  return "";
223
396
  }
224
397
  });
225
- const S2 = S + S;
226
- result = result.replace(new RegExp(escapeRegex(S2), 'g'), '');
227
-
228
- if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(result)) {
229
- result = result.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
230
- clearFlag = true;
398
+ r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
399
+ if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
400
+ r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), '');
401
+ clear = true;
231
402
  }
232
-
233
- const matchStart = pos, matchEnd = endPos;
234
- result = result
235
- .replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, matchStart))
236
- .replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(matchEnd))
237
- .replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(matchStart, matchEnd));
238
- newSrc += result;
239
- lastResult = result;
403
+ const ms = pos, me = endPos;
404
+ r = r.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
405
+ .replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
406
+ .replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
407
+ n += r;
408
+ last = r;
240
409
  pos = endPos;
241
410
  } else {
242
- newSrc += src[pos];
411
+ n += src[pos];
243
412
  pos++;
244
413
  }
245
414
  }
246
- if (matched) {
247
- src = clearFlag ? lastResult : newSrc;
248
- clearFlag = false;
415
+ if (ok) {
416
+ src = clear ? last : n;
417
+ clear = false;
249
418
  }
250
419
  }
251
420
  return src;
252
421
  }
253
422
 
254
- function escapeRegex(str) {
255
- return str.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
423
+ function escapeRegex(s) {
424
+ return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&');
256
425
  }
257
426
 
258
- function unescapeDelimiter(str) {
259
- let result = '';
260
- for (let i = 0; i < str.length; i++) {
261
- if (str[i] === '\\' && i + 1 < str.length) {
262
- const next = str[i + 1];
263
- if (next === '"' || next === "'" || next === '\\') {
264
- result += next;
427
+ function unescapeDelimiter(s) {
428
+ let r = '';
429
+ for (let i = 0; i < s.length; i++) {
430
+ if (s[i] === '\\' && i + 1 < s.length) {
431
+ const n = s[i + 1];
432
+ if (n === '"' || n === "'" || n === '\\') {
433
+ r += n;
265
434
  i++;
266
- } else {
267
- result += str[i];
268
- }
269
- } else {
270
- result += str[i];
271
- }
435
+ } else r += s[i];
436
+ } else r += s[i];
272
437
  }
273
- return result;
438
+ return r;
274
439
  }
275
440
 
276
441
  export class Papagaio {
277
442
  constructor() {
278
443
  this.recursion_limit = 512;
279
444
  this.unique_id = 0;
280
- this.symbols = {
281
- pattern: "pattern",
282
- context: "context",
283
- open: "{",
284
- close: "}",
285
- sigil: "$"
286
- };
445
+ this.symbols = { pattern: "pattern", context: "context", open: "{", close: "}", sigil: "$" };
287
446
  this.content = "";
288
447
  }
289
-
290
448
  process(input) {
291
449
  this.content = input;
292
- let src = input, last = null, iter = 0;
293
- const pending = () => {
294
- const rCtx = new RegExp(`\\b${this.symbols.context}\\s*\\${this.symbols.open}`, "g");
295
- const rPat = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
296
- return rCtx.test(src) || rPat.test(src);
450
+ let src = input, last = null, it = 0;
451
+ const pend = () => {
452
+ const r1 = new RegExp(`\\b${this.symbols.context}\\s*\\${this.symbols.open}`, "g");
453
+ const r2 = new RegExp(`\\b${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
454
+ return r1.test(src) || r2.test(src);
297
455
  };
298
- while (src !== last && iter < this.recursion_limit) {
299
- iter++;
456
+ while (src !== last && it < this.recursion_limit) {
457
+ it++;
300
458
  last = src;
301
459
  src = processContext(this, src);
302
- const [patterns, s2] = collectPatterns(this, src);
303
- src = applyPatterns(this, s2, patterns);
304
- if (!pending()) break;
460
+ const [p, s2] = collectPatterns(this, src);
461
+ src = applyPatterns(this, s2, p);
462
+ if (!pend()) break;
305
463
  }
306
464
  return this.content = src, src;
307
465
  }
package/tests/tests.json CHANGED
@@ -9,7 +9,7 @@
9
9
  {
10
10
  "id": 2,
11
11
  "name": "Flexible whitespace ($)",
12
- "code": "pattern {$x$$and$$$y} {$x & $y}\nhello and world",
12
+ "code": "pattern {$$x and $$y} {$x & $y}\nhello and world",
13
13
  "expected": "hello & world"
14
14
  },
15
15
  {
@@ -75,7 +75,7 @@
75
75
  {
76
76
  "id": 13,
77
77
  "name": "Pattern with flexible whitespace in middle",
78
- "code": "pattern {from$$to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
78
+ "code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
79
79
  "expected": "path: from -> to"
80
80
  },
81
81
  {
@@ -129,7 +129,7 @@
129
129
  {
130
130
  "id": 22,
131
131
  "name": "Flexible whitespace with semicolon",
132
- "code": "pattern {$$$a$$;$$$b$$} {$a AND $b}\nx ; y\nx; y\nx ; y",
132
+ "code": "pattern {$$a;$$b} {$a AND $b}\nx ; y\nx; y\nx ; y",
133
133
  "expected": "x AND y"
134
134
  },
135
135
  {
@@ -182,8 +182,8 @@
182
182
  },
183
183
  {
184
184
  "id": 31,
185
- "name": "Block with empty delimiter (default)",
186
- "code": "pattern {$block data {}{}} {DATA:$data}\ndata { hello world }",
185
+ "name": "Delimiter test",
186
+ "code": "pattern {$block data {[}{]}} {DATA:$data}\ndata [ hello world ]",
187
187
  "expected": "DATA: hello world"
188
188
  },
189
189
  {
@@ -267,7 +267,7 @@
267
267
  {
268
268
  "id": 45,
269
269
  "name": "Pattern with flexible space before and after",
270
- "code": "pattern {$$hello$$} {FOUND}\n hello ",
270
+ "code": "pattern {$$ hello$$ } {FOUND}\n hello ",
271
271
  "expected": "FOUND"
272
272
  },
273
273
  {
@@ -315,7 +315,7 @@
315
315
  {
316
316
  "id": 53,
317
317
  "name": "Trim multiple surrounding spaces",
318
- "code": "pattern {$$$word$$} {FOUND: $word}\n word ",
318
+ "code": "pattern {$ $word$ } {FOUND: $word}\n word ",
319
319
  "expected": "FOUND: word"
320
320
  },
321
321
  {
@@ -543,7 +543,7 @@
543
543
  {
544
544
  "id": 91,
545
545
  "name": "CSV to JSON",
546
- "code": "pattern {$a,$b,$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
546
+ "code": "pattern {$a,$$b,$$c} {{ id: '$a', name: '$b', role: '$c' }}\n1,Alice,Engineer\n2,Bob,Designer",
547
547
  "expected": "{ id: '1', name: 'Alice', role: 'Engineer' }\n{ id: '2', name: 'Bob', role: 'Designer' }"
548
548
  },
549
549
  {
@@ -639,7 +639,7 @@
639
639
  {
640
640
  "id": 107,
641
641
  "name": "Comma-separated pattern matching",
642
- "code": "pattern {$a,$b,$c} {[$a] [$b] [$c]}\none,two,three",
642
+ "code": "pattern {$$a,$$b,$$c} {[$a] [$b] [$c]}\none,two,three",
643
643
  "expected": "[one] [two] [three]"
644
644
  },
645
645
  {
@@ -663,7 +663,7 @@
663
663
  {
664
664
  "id": 111,
665
665
  "name": "Pattern with dot literal",
666
- "code": "pattern {$file.txt} {File: $file}\ndocument.txt",
666
+ "code": "pattern {$$file.txt} {File: $file.txt}\ndocument.txt",
667
667
  "expected": "File: document"
668
668
  },
669
669
  {
@@ -693,7 +693,7 @@
693
693
  {
694
694
  "id": 116,
695
695
  "name": "Multiple spaces trim with flexible whitespace",
696
- "code": "pattern {$$$word$$} {FOUND: $word}\n word ",
696
+ "code": "pattern {$ $word$ } {FOUND: $word}\n word ",
697
697
  "expected": "FOUND: word"
698
698
  },
699
699
  {
@@ -711,7 +711,7 @@
711
711
  {
712
712
  "id": 119,
713
713
  "name": "Trailing punctuation in pattern",
714
- "code": "pattern {$w,} {$w!}\nhello,",
714
+ "code": "pattern {$$w,} {$$w!}\nhello,",
715
715
  "expected": "hello!"
716
716
  }
717
717
  ]