papagaio 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -171
- package/package.json +1 -1
- package/src/louro.js +259 -0
- package/src/papagaio.js +29 -6
- package/tests/test.js +1 -1
- package/tests/tests.json +5 -5
package/README.md
CHANGED
|
@@ -15,13 +15,13 @@ const result = p.process(input);
|
|
|
15
15
|
```javascript
|
|
16
16
|
p.symbols = {
|
|
17
17
|
pattern: "pattern", // pattern keyword
|
|
18
|
-
context: "context", // context keyword
|
|
19
18
|
open: "{", // opening delimiter
|
|
20
19
|
close: "}", // closing delimiter
|
|
21
20
|
sigil: "$" // variable marker
|
|
22
21
|
};
|
|
23
|
-
|
|
24
|
-
p.
|
|
22
|
+
|
|
23
|
+
p.recursion_limit = 512;
|
|
24
|
+
p.unique_id = 0;
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
---
|
|
@@ -34,9 +34,8 @@ p.unique_id = 0; // unique ID counter
|
|
|
34
34
|
pattern {$x} {$x}
|
|
35
35
|
hello
|
|
36
36
|
```
|
|
37
|
-
Output: `hello`
|
|
38
37
|
|
|
39
|
-
|
|
38
|
+
Output: `hello`
|
|
40
39
|
|
|
41
40
|
### 2. Multiple Variables
|
|
42
41
|
|
|
@@ -44,8 +43,11 @@ Variables capture words (non-whitespace sequences).
|
|
|
44
43
|
pattern {$x $y $z} {$z, $y, $x}
|
|
45
44
|
apple banana cherry
|
|
46
45
|
```
|
|
46
|
+
|
|
47
47
|
Output: `cherry, banana, apple`
|
|
48
48
|
|
|
49
|
+
---
|
|
50
|
+
|
|
49
51
|
## Blocks
|
|
50
52
|
|
|
51
53
|
Capture content between delimiters.
|
|
@@ -62,6 +64,7 @@ $block name {open}{close}
|
|
|
62
64
|
pattern {$name $block content {(}{)}} {[$content]}
|
|
63
65
|
data (hello world)
|
|
64
66
|
```
|
|
67
|
+
|
|
65
68
|
Output: `[hello world]`
|
|
66
69
|
|
|
67
70
|
### Custom Delimiters
|
|
@@ -70,6 +73,7 @@ Output: `[hello world]`
|
|
|
70
73
|
pattern {$block data {<<}{>>}} {DATA: $data}
|
|
71
74
|
<<json stuff>>
|
|
72
75
|
```
|
|
76
|
+
|
|
73
77
|
Output: `DATA: json stuff`
|
|
74
78
|
|
|
75
79
|
### Multiple Blocks
|
|
@@ -78,6 +82,7 @@ Output: `DATA: json stuff`
|
|
|
78
82
|
pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
|
|
79
83
|
(first), [second]
|
|
80
84
|
```
|
|
85
|
+
|
|
81
86
|
Output: `first|second`
|
|
82
87
|
|
|
83
88
|
---
|
|
@@ -90,15 +95,16 @@ Output: `first|second`
|
|
|
90
95
|
pattern {match} {replace}
|
|
91
96
|
```
|
|
92
97
|
|
|
93
|
-
###
|
|
98
|
+
### Example
|
|
94
99
|
|
|
95
100
|
```
|
|
96
101
|
pattern {# $title} {<h1>$title</h1>}
|
|
97
102
|
# Welcome
|
|
98
103
|
```
|
|
104
|
+
|
|
99
105
|
Output: `<h1>Welcome</h1>`
|
|
100
106
|
|
|
101
|
-
### Multiple Patterns
|
|
107
|
+
### Multiple Patterns Cascade
|
|
102
108
|
|
|
103
109
|
```
|
|
104
110
|
pattern {a} {b}
|
|
@@ -106,246 +112,160 @@ pattern {b} {c}
|
|
|
106
112
|
pattern {c} {d}
|
|
107
113
|
a
|
|
108
114
|
```
|
|
109
|
-
|
|
115
|
+
|
|
116
|
+
Output: `d`
|
|
110
117
|
|
|
111
118
|
---
|
|
112
119
|
|
|
113
|
-
|
|
120
|
+
# Subpatterns
|
|
121
|
+
|
|
122
|
+
Subpatterns allow patterns to be declared *inside* other patterns, existing only during the execution of that parent pattern.
|
|
123
|
+
|
|
124
|
+
### Syntax
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
$pattern {match} {replace}
|
|
128
|
+
```
|
|
114
129
|
|
|
115
|
-
|
|
130
|
+
A subpattern behaves like a normal pattern but is **scoped only to the replacement body where it appears**.
|
|
131
|
+
|
|
132
|
+
### Example
|
|
116
133
|
|
|
117
134
|
```
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
135
|
+
pattern {eval $block code {(}{)}} {
|
|
136
|
+
$eval{
|
|
137
|
+
$pattern {undefined} {}
|
|
138
|
+
$code;
|
|
139
|
+
return "";
|
|
140
|
+
}
|
|
123
141
|
}
|
|
142
|
+
|
|
143
|
+
eval(console.log(123))
|
|
124
144
|
```
|
|
145
|
+
|
|
125
146
|
Output:
|
|
147
|
+
|
|
126
148
|
```
|
|
127
|
-
|
|
128
|
-
<banana>
|
|
149
|
+
123
|
|
129
150
|
```
|
|
130
151
|
|
|
131
|
-
|
|
152
|
+
### Key Properties
|
|
153
|
+
|
|
154
|
+
* Subpatterns exist only within the running pattern.
|
|
155
|
+
* They do not leak into the global pattern list.
|
|
156
|
+
* They can recursively modify inner content before `$eval` or other processors handle it.
|
|
157
|
+
* Multiple subpatterns can coexist inside the same replacement.
|
|
132
158
|
|
|
133
159
|
---
|
|
134
160
|
|
|
135
161
|
## Special Keywords
|
|
136
162
|
|
|
137
163
|
### $unique
|
|
138
|
-
|
|
164
|
+
|
|
165
|
+
Generates unique incremental IDs.
|
|
139
166
|
|
|
140
167
|
```
|
|
141
168
|
pattern {item} {[$unique]item_$unique}
|
|
142
169
|
item
|
|
143
170
|
item
|
|
144
|
-
item
|
|
145
171
|
```
|
|
146
|
-
|
|
172
|
+
|
|
173
|
+
Outputs:
|
|
147
174
|
|
|
148
175
|
```
|
|
149
|
-
|
|
150
|
-
|
|
176
|
+
[0]item_0
|
|
177
|
+
[1]item_1
|
|
151
178
|
```
|
|
152
|
-
Output: `0 0` (same ID for both occurrences)
|
|
153
179
|
|
|
154
180
|
### $match
|
|
155
|
-
|
|
181
|
+
|
|
182
|
+
Full matched text.
|
|
156
183
|
|
|
157
184
|
```
|
|
158
185
|
pattern {[$x]} {FOUND: $match}
|
|
159
186
|
[data]
|
|
160
187
|
```
|
|
188
|
+
|
|
161
189
|
Output: `FOUND: [data]`
|
|
162
190
|
|
|
163
191
|
### $prefix / $suffix
|
|
164
|
-
|
|
192
|
+
|
|
193
|
+
Text before and after match.
|
|
165
194
|
|
|
166
195
|
```
|
|
167
196
|
pattern {world} {$prefix$suffix}hello world test
|
|
168
197
|
```
|
|
198
|
+
|
|
169
199
|
Output: `hello hello test test`
|
|
170
200
|
|
|
171
201
|
### $clear
|
|
172
|
-
Remove everything before the match.
|
|
173
|
-
|
|
174
|
-
```
|
|
175
|
-
pattern {SKIP $x} {$clear KEEP: $x}
|
|
176
|
-
IGNORE_THIS SKIP keep_this
|
|
177
|
-
```
|
|
178
|
-
Output: `KEEP: keep_this`
|
|
179
202
|
|
|
180
|
-
|
|
181
|
-
Execute JavaScript code.
|
|
203
|
+
Removes everything before match.
|
|
182
204
|
|
|
183
205
|
```
|
|
184
|
-
pattern {$x} {$
|
|
185
|
-
|
|
186
|
-
```
|
|
187
|
-
Output: `10`
|
|
188
|
-
|
|
189
|
-
---
|
|
190
|
-
|
|
191
|
-
## Practical Examples
|
|
192
|
-
|
|
193
|
-
### Markdown → HTML
|
|
194
|
-
|
|
195
|
-
```
|
|
196
|
-
context {
|
|
197
|
-
pattern {## $t} {<h2>$t</h2>}
|
|
198
|
-
pattern {# $t} {<h1>$t</h1>}
|
|
199
|
-
pattern {**$t**} {<strong>$t</strong>}
|
|
200
|
-
pattern {*$t*} {<em>$t</em>}
|
|
201
|
-
pattern {- $i} {<li>$i</li>}
|
|
202
|
-
|
|
203
|
-
# Title
|
|
204
|
-
**bold** and *italic*
|
|
205
|
-
- item1
|
|
206
|
-
- item2
|
|
207
|
-
}
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
### CSV → JSON
|
|
211
|
-
|
|
212
|
-
```
|
|
213
|
-
pattern {$a,$b,$c} {{ id: '$a', name: '$b', role: '$c' }}
|
|
214
|
-
1,Alice,Engineer
|
|
215
|
-
2,Bob,Designer
|
|
216
|
-
```
|
|
217
|
-
|
|
218
|
-
Output:
|
|
219
|
-
```
|
|
220
|
-
{ id: '1', name: 'Alice', role: 'Engineer' }
|
|
221
|
-
{ id: '2', name: 'Bob', role: 'Designer' }
|
|
206
|
+
pattern {SKIP $x} {$clear KEEP: $x}
|
|
207
|
+
IGNORE SKIP keep
|
|
222
208
|
```
|
|
223
209
|
|
|
224
|
-
|
|
210
|
+
Output: `KEEP: keep`
|
|
225
211
|
|
|
226
|
-
|
|
227
|
-
pattern {$key = $value} {const $key = '$value';}
|
|
228
|
-
host = localhost
|
|
229
|
-
port = 3000
|
|
230
|
-
```
|
|
231
|
-
|
|
232
|
-
Output:
|
|
233
|
-
```
|
|
234
|
-
const host = 'localhost';
|
|
235
|
-
const port = '3000';
|
|
236
|
-
```
|
|
212
|
+
### $eval
|
|
237
213
|
|
|
238
|
-
|
|
214
|
+
Executes JS.
|
|
239
215
|
|
|
240
216
|
```
|
|
241
|
-
pattern {$
|
|
242
|
-
|
|
243
|
-
span Test
|
|
217
|
+
pattern {$x} {$eval{return parseInt($x)*2;}}
|
|
218
|
+
5
|
|
244
219
|
```
|
|
245
220
|
|
|
246
|
-
Output:
|
|
247
|
-
```
|
|
248
|
-
<div>HelloWorld</div>
|
|
249
|
-
<span>Test</span>
|
|
250
|
-
```
|
|
221
|
+
Output: `10`
|
|
251
222
|
|
|
252
223
|
---
|
|
253
224
|
|
|
254
225
|
## Important Rules
|
|
255
226
|
|
|
256
227
|
### Matching
|
|
257
|
-
- Variables (`$x`) capture **one word** (no spaces)
|
|
258
|
-
- Variables (`$$x`) captures one or more words (with spaces)
|
|
259
|
-
- Patterns apply **globally** each iteration
|
|
260
|
-
- Auto-recursion until: max 512 iterations OR no changes
|
|
261
|
-
- `$ ` = one or more of this whitespace (spaces, tabs, newlines)
|
|
262
|
-
- `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
|
|
263
|
-
- `$$$ `= one or more whitespaces
|
|
264
|
-
- `$$$$ `= zero or more whitespaces
|
|
265
228
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
229
|
+
* `$x` = one word
|
|
230
|
+
* `$$x` = multiword (captures whitespace too)
|
|
231
|
+
* `$`, `$$`, `$$$`, `$$$$` = whitespace operators
|
|
232
|
+
* Patterns apply globally until stable
|
|
233
|
+
* Blocks can be nested
|
|
270
234
|
|
|
271
|
-
###
|
|
272
|
-
- Names: `[A-Za-z0-9_]`
|
|
273
|
-
- Reuse: `$x` appears multiple times in replace
|
|
274
|
-
- Undefined: becomes empty string
|
|
235
|
+
### Block Matching
|
|
275
236
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
- You cannot match a $block{}{} using the current delimiters.
|
|
279
|
-
- By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
|
|
280
|
-
- Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
|
|
237
|
+
* `$block name {open}{close}` captures delimited regions
|
|
238
|
+
* Supports nested delimiters
|
|
281
239
|
|
|
282
240
|
---
|
|
283
241
|
|
|
284
242
|
## Troubleshooting
|
|
285
243
|
|
|
286
|
-
| Problem
|
|
287
|
-
|
|
288
|
-
| Variable not captured | Check
|
|
289
|
-
| Block
|
|
290
|
-
| Infinite recursion
|
|
291
|
-
|
|
|
292
|
-
|
|
|
293
|
-
|
|
294
|
-
|
|
244
|
+
| Problem | Solution |
|
|
245
|
+
| --------------------- | -------------------------- |
|
|
246
|
+
| Variable not captured | Check spacing |
|
|
247
|
+
| Block wrong | Verify delimiters |
|
|
248
|
+
| Infinite recursion | Reduce recursion limit |
|
|
249
|
+
| Pattern not matching | Add whitespace operators |
|
|
250
|
+
| Multiword var issues | Beware whitespace consumed |
|
|
251
|
+
|
|
252
|
+
---
|
|
295
253
|
|
|
296
254
|
## Known Bugs
|
|
297
255
|
|
|
298
|
-
|
|
256
|
+
* Multi-character delimiters containing `"` break nested parsing.
|
|
299
257
|
|
|
300
258
|
---
|
|
301
259
|
|
|
302
260
|
## Syntax Reference
|
|
303
261
|
|
|
304
262
|
```
|
|
305
|
-
pattern {$x $y} {$y, $x}
|
|
306
|
-
pattern {$
|
|
307
|
-
pattern {
|
|
308
|
-
|
|
309
|
-
$
|
|
310
|
-
$
|
|
311
|
-
$
|
|
312
|
-
$
|
|
313
|
-
$eval{code} # execute JS
|
|
314
|
-
$ / $$ / $$$ / $$$$ # whitespace operators
|
|
315
|
-
```
|
|
316
|
-
|
|
317
|
-
---
|
|
318
|
-
|
|
319
|
-
## Complete Example
|
|
320
|
-
|
|
321
|
-
```
|
|
322
|
-
context {
|
|
323
|
-
# Markdown headers
|
|
324
|
-
pattern {# $title} {<h1>$title</h1>}
|
|
325
|
-
|
|
326
|
-
# Lists
|
|
327
|
-
pattern {- $item} {<li>$item</li>}
|
|
328
|
-
|
|
329
|
-
# Inline formatting
|
|
330
|
-
pattern {**$text**} {<strong>$text</strong>}
|
|
331
|
-
pattern {*$text*} {<em>$text</em>}
|
|
332
|
-
|
|
333
|
-
# Process content
|
|
334
|
-
# Welcome
|
|
335
|
-
# Getting Started
|
|
336
|
-
This is **important** and *italic*
|
|
337
|
-
- First item
|
|
338
|
-
- Second item
|
|
339
|
-
}
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
Output:
|
|
343
|
-
```html
|
|
344
|
-
<h1>Welcome</h1>
|
|
345
|
-
<h2>Getting Started</h2>
|
|
346
|
-
This is <strong>important</strong> and <em>italic</em>
|
|
347
|
-
<li>First item</li>
|
|
348
|
-
<li>Second item</li>
|
|
263
|
+
pattern {$x $y} {$y, $x}
|
|
264
|
+
pattern {$block n {o}{c}} {$n}
|
|
265
|
+
$pattern {a} {b} # subpattern
|
|
266
|
+
$unique
|
|
267
|
+
$match
|
|
268
|
+
$prefix / $suffix
|
|
269
|
+
$clear
|
|
270
|
+
$eval{code}
|
|
349
271
|
```
|
|
350
|
-
|
|
351
|
-
---
|
package/package.json
CHANGED
package/src/louro.js
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
function parsePattern(papagaio, pattern) {
|
|
2
|
+
const tokens = []; let i = 0;
|
|
3
|
+
const S = papagaio.symbols.sigil, S2 = S + S;
|
|
4
|
+
while (i < pattern.length) {
|
|
5
|
+
if (pattern.startsWith(S2 + S, i)) {
|
|
6
|
+
let j = i + S2.length + S.length, varName = '';
|
|
7
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
8
|
+
if (varName) { tokens.push({ type: 'var-ws-optional', varName }); i = j; continue; }
|
|
9
|
+
}
|
|
10
|
+
if (pattern.startsWith(S2, i)) {
|
|
11
|
+
let j = i + S2.length, varName = '';
|
|
12
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
13
|
+
if (varName) { tokens.push({ type: 'var-ws', varName }); i = j; continue; }
|
|
14
|
+
tokens.push({ type: 'whitespace-optional' }); i += S2.length; continue;
|
|
15
|
+
}
|
|
16
|
+
if (pattern.startsWith(S + 'block', i)) {
|
|
17
|
+
let j = i + S.length + 'block'.length;
|
|
18
|
+
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
19
|
+
let varName = '';
|
|
20
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
21
|
+
if (varName) {
|
|
22
|
+
while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
23
|
+
let openDelim = papagaio.symbols.open;
|
|
24
|
+
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
25
|
+
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
26
|
+
openDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.open;
|
|
27
|
+
j = e; while (j < pattern.length && /\s/.test(pattern[j])) j++;
|
|
28
|
+
}
|
|
29
|
+
let closeDelim = papagaio.symbols.close;
|
|
30
|
+
if (j < pattern.length && pattern[j] === papagaio.symbols.open) {
|
|
31
|
+
const [c, e] = extractBlock(papagaio, pattern, j);
|
|
32
|
+
closeDelim = unescapeDelimiter(c.trim()) || papagaio.symbols.close;
|
|
33
|
+
j = e;
|
|
34
|
+
}
|
|
35
|
+
tokens.push({ type: 'block', varName, openDelim, closeDelim }); i = j; continue;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (pattern[i] === S) {
|
|
39
|
+
let j = i + S.length, varName = '';
|
|
40
|
+
while (j < pattern.length && /[A-Za-z0-9_]/.test(pattern[j])) varName += pattern[j++];
|
|
41
|
+
if (varName) { tokens.push({ type: 'var', varName }); i = j; continue; }
|
|
42
|
+
tokens.push({ type: 'literal', value: S }); i += S.length; continue;
|
|
43
|
+
}
|
|
44
|
+
if (/\s/.test(pattern[i])) {
|
|
45
|
+
while (i < pattern.length && /\s/.test(pattern[i])) i++;
|
|
46
|
+
tokens.push({ type: 'whitespace-optional' }); continue;
|
|
47
|
+
}
|
|
48
|
+
let literal = '';
|
|
49
|
+
while (i < pattern.length && !pattern.startsWith(S, i) && !/\s/.test(pattern[i])) literal += pattern[i++];
|
|
50
|
+
if (literal) tokens.push({ type: 'literal', value: literal });
|
|
51
|
+
}
|
|
52
|
+
return tokens;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
56
|
+
let pos = startPos, captures = {};
|
|
57
|
+
for (let ti = 0; ti < tokens.length; ti++) {
|
|
58
|
+
const token = tokens[ti];
|
|
59
|
+
if (token.type === 'whitespace-optional') { while (pos < src.length && /\s/.test(src[pos])) pos++; continue; }
|
|
60
|
+
if (token.type === 'literal') { if (!src.startsWith(token.value, pos)) return null; pos += token.value.length; continue; }
|
|
61
|
+
if (token.type === 'var') {
|
|
62
|
+
const nextToken = findNextSignificantToken(tokens, ti);
|
|
63
|
+
let v = '';
|
|
64
|
+
|
|
65
|
+
// Se o próximo token é um block, captura até o delimitador de abertura
|
|
66
|
+
if (nextToken && nextToken.type === 'block') {
|
|
67
|
+
while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) {
|
|
68
|
+
v += src[pos++];
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (!v) return null;
|
|
75
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
if (token.type === 'var-ws' || token.type === 'var-ws-optional') {
|
|
79
|
+
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
80
|
+
const n = findNextSignificantToken(tokens, ti);
|
|
81
|
+
let v = '';
|
|
82
|
+
|
|
83
|
+
// Se o próximo token é um block, captura até o delimitador de abertura
|
|
84
|
+
if (n && n.type === 'block') {
|
|
85
|
+
while (pos < src.length && !src.startsWith(n.openDelim, pos) && src[pos] !== '\n') {
|
|
86
|
+
v += src[pos++];
|
|
87
|
+
}
|
|
88
|
+
v = v.trimEnd();
|
|
89
|
+
} else if (!n || ['var','var-ws','var-ws-optional'].includes(n.type)) {
|
|
90
|
+
while (pos < src.length && !/\s/.test(src[pos])) v += src[pos++];
|
|
91
|
+
} else if (n.type === 'literal') {
|
|
92
|
+
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') v += src[pos++];
|
|
93
|
+
v = v.trimEnd();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (token.type === 'var-ws' && !v) return null;
|
|
97
|
+
captures[papagaio.symbols.sigil + token.varName] = v;
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (token.type === 'block') {
|
|
101
|
+
const { varName, openDelim, closeDelim } = token;
|
|
102
|
+
if (!src.startsWith(openDelim, pos)) return null;
|
|
103
|
+
const [c, e] = extractBlock(papagaio, src, pos, openDelim, closeDelim);
|
|
104
|
+
captures[papagaio.symbols.sigil + varName] = c; pos = e; continue;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return { captures, endPos: pos };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function findNextSignificantToken(t, i) { for (let k = i + 1; k < t.length; k++) if (t[k].type !== 'whitespace-optional') return t[k]; return null; }
|
|
111
|
+
|
|
112
|
+
function extractBlock(p, src, openPos, openDelim = p.symbols.open, closeDelim = p.symbols.close) {
|
|
113
|
+
let i = openPos;
|
|
114
|
+
if (openDelim.length > 1 || closeDelim.length > 1) {
|
|
115
|
+
if (src.substring(i, i + openDelim.length) === openDelim) {
|
|
116
|
+
i += openDelim.length; const s = i; let d = 0;
|
|
117
|
+
while (i < src.length) {
|
|
118
|
+
if (src.substring(i, i + openDelim.length) === openDelim) { d++; i += openDelim.length; }
|
|
119
|
+
else if (src.substring(i, i + closeDelim.length) === closeDelim) {
|
|
120
|
+
if (!d) return [src.substring(s, i), i + closeDelim.length];
|
|
121
|
+
d--; i += closeDelim.length;
|
|
122
|
+
} else i++;
|
|
123
|
+
}
|
|
124
|
+
return [src.substring(s), src.length];
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (src[i] === openDelim) {
|
|
128
|
+
i++; const s = i;
|
|
129
|
+
if (openDelim === closeDelim) { while (i < src.length && src[i] !== closeDelim) i++; return [src.substring(s, i), i + 1]; }
|
|
130
|
+
let d = 1;
|
|
131
|
+
while (i < src.length && d > 0) { if (src[i] === openDelim) d++; else if (src[i] === closeDelim) d--; if (d > 0) i++; }
|
|
132
|
+
return [src.substring(s, i), i + 1];
|
|
133
|
+
}
|
|
134
|
+
return ['', i];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function collectPatterns(p, src) {
|
|
138
|
+
const A = [], r = new RegExp(`(?:^|\\b)${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g"); let out = src;
|
|
139
|
+
while (1) {
|
|
140
|
+
r.lastIndex = 0; const m = r.exec(out); if (!m) break;
|
|
141
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
142
|
+
const [mp, em] = extractBlock(p, out, o); let k = em;
|
|
143
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
144
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
145
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
146
|
+
A.push({ match: mp.trim(), replace: rp.trim() });
|
|
147
|
+
out = out.slice(0, s) + out.slice(er); continue;
|
|
148
|
+
}
|
|
149
|
+
out = out.slice(0, s) + out.slice(em);
|
|
150
|
+
}
|
|
151
|
+
return [A, out];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function extractNestedPatterns(p, replaceText) {
|
|
155
|
+
const nested = [];
|
|
156
|
+
const r = new RegExp(`\\${p.symbols.sigil}${p.symbols.pattern}\\s*\\${p.symbols.open}`, "g");
|
|
157
|
+
let out = replaceText;
|
|
158
|
+
|
|
159
|
+
while (1) {
|
|
160
|
+
r.lastIndex = 0;
|
|
161
|
+
const m = r.exec(out);
|
|
162
|
+
if (!m) break;
|
|
163
|
+
|
|
164
|
+
const s = m.index, o = m.index + m[0].length - 1;
|
|
165
|
+
const [mp, em] = extractBlock(p, out, o);
|
|
166
|
+
let k = em;
|
|
167
|
+
|
|
168
|
+
while (k < out.length && /\s/.test(out[k])) k++;
|
|
169
|
+
|
|
170
|
+
if (k < out.length && out[k] === p.symbols.open) {
|
|
171
|
+
const [rp, er] = extractBlock(p, out, k);
|
|
172
|
+
nested.push({ match: mp.trim(), replace: rp.trim() });
|
|
173
|
+
out = out.slice(0, s) + out.slice(er);
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
out = out.slice(0, s) + out.slice(em);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return [nested, out];
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function applyPatterns(p, src, pats) {
|
|
183
|
+
let clear = false, last = "", S = p.symbols.sigil;
|
|
184
|
+
for (const pat of pats) {
|
|
185
|
+
const t = parsePattern(p, pat.match); let n = '', pos = 0, ok = false;
|
|
186
|
+
while (pos < src.length) {
|
|
187
|
+
const m = matchPattern(p, src, t, pos);
|
|
188
|
+
if (m) {
|
|
189
|
+
ok = true; const { captures, endPos } = m;
|
|
190
|
+
let r = pat.replace;
|
|
191
|
+
|
|
192
|
+
// Extrai e processa padrões aninhados ($pattern)
|
|
193
|
+
const [nestedPats, cleanReplace] = extractNestedPatterns(p, r);
|
|
194
|
+
r = cleanReplace;
|
|
195
|
+
|
|
196
|
+
for (const [k, v] of Object.entries(captures)) {
|
|
197
|
+
const e = escapeRegex(k); r = r.replace(new RegExp(e + '(?![A-Za-z0-9_])', 'g'), v);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Aplica padrões aninhados ao resultado
|
|
201
|
+
if (nestedPats.length > 0) {
|
|
202
|
+
r = applyPatterns(p, r, nestedPats);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const uid = p.unique_id++; r = r.replace(new RegExp(`${escapeRegex(S)}unique\\b`, 'g'), () => String(uid));
|
|
206
|
+
r = r.replace(/\$eval\{([^}]*)\}/g, (_, c) => { try {
|
|
207
|
+
return String(Function("papagaio", "ctx", `"use strict";return(function(){${c}})();`)(p, {}));
|
|
208
|
+
} catch { return ""; } });
|
|
209
|
+
r = r.replace(new RegExp(escapeRegex(S + S), 'g'), '');
|
|
210
|
+
if (new RegExp(`${escapeRegex(S)}clear\\b`, 'g').test(r)) {
|
|
211
|
+
r = r.replace(new RegExp(`${escapeRegex(S)}clear\\b\\s?`, 'g'), ''); clear = true;
|
|
212
|
+
}
|
|
213
|
+
const ms = pos, me = endPos;
|
|
214
|
+
r = r
|
|
215
|
+
.replace(new RegExp(`${escapeRegex(S)}prefix\\b`, 'g'), src.slice(0, ms))
|
|
216
|
+
.replace(new RegExp(`${escapeRegex(S)}suffix\\b`, 'g'), src.slice(me))
|
|
217
|
+
.replace(new RegExp(`${escapeRegex(S)}match\\b`, 'g'), src.slice(ms, me));
|
|
218
|
+
n += r; last = r; pos = endPos;
|
|
219
|
+
} else { n += src[pos]; pos++; }
|
|
220
|
+
}
|
|
221
|
+
if (ok) { src = clear ? last : n; clear = false; }
|
|
222
|
+
}
|
|
223
|
+
return src;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function escapeRegex(s) { return s.replace(/[.*+?^${}()|[\]\\""']/g, '\\$&'); }
|
|
227
|
+
|
|
228
|
+
function unescapeDelimiter(s) {
|
|
229
|
+
let r = ''; for (let i = 0; i < s.length; i++) {
|
|
230
|
+
if (s[i] === '\\' && i + 1 < s.length) {
|
|
231
|
+
const n = s[i + 1];
|
|
232
|
+
if (n === '"' || n === "'" || n === '\\') { r += n; i++; }
|
|
233
|
+
else r += s[i];
|
|
234
|
+
} else r += s[i];
|
|
235
|
+
}
|
|
236
|
+
return r;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
export class Papagaio {
|
|
240
|
+
constructor(sigil = '$', open = '{', close = '}', pattern = 'pattern') {
|
|
241
|
+
this.recursion_limit = 512;
|
|
242
|
+
this.unique_id = 0;
|
|
243
|
+
this.symbols = { pattern: pattern, open: open, close: close, sigil: sigil };
|
|
244
|
+
this.content = "";
|
|
245
|
+
}
|
|
246
|
+
process(input) {
|
|
247
|
+
this.content = input; let src = input, last = null, it = 0;
|
|
248
|
+
const pend = () => {
|
|
249
|
+
const r2 = new RegExp(`(?:^|\\b)${this.symbols.pattern}\\s*\\${this.symbols.open}`, "g");
|
|
250
|
+
return r2.test(src);
|
|
251
|
+
};
|
|
252
|
+
while (src !== last && it < this.recursion_limit) {
|
|
253
|
+
it++; last = src;
|
|
254
|
+
const [p, s2] = collectPatterns(this, src); src = applyPatterns(this, s2, p);
|
|
255
|
+
if (!pend()) break;
|
|
256
|
+
}
|
|
257
|
+
return this.content = src, src;
|
|
258
|
+
}
|
|
259
|
+
}
|
package/src/papagaio.js
CHANGED
|
@@ -179,11 +179,22 @@ function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
|
179
179
|
if (token.type === 'var') {
|
|
180
180
|
let v = '';
|
|
181
181
|
const nextToken = findNextSignificantToken(tokens, ti);
|
|
182
|
-
|
|
183
|
-
|
|
182
|
+
|
|
183
|
+
// Se o próximo token é um block, captura até o delimitador de abertura
|
|
184
|
+
if (nextToken && nextToken.type === 'block') {
|
|
185
|
+
while (pos < src.length && !src.startsWith(nextToken.openDelim, pos) && !/\s/.test(src[pos])) {
|
|
186
|
+
v += src[pos++];
|
|
187
|
+
}
|
|
188
|
+
} else if (nextToken && nextToken.type === 'literal') {
|
|
189
|
+
while (pos < src.length && !src.startsWith(nextToken.value, pos) && !/\s/.test(src[pos])) {
|
|
190
|
+
v += src[pos++];
|
|
191
|
+
}
|
|
184
192
|
} else {
|
|
185
|
-
while (pos < src.length && !/\s/.test(src[pos]))
|
|
193
|
+
while (pos < src.length && !/\s/.test(src[pos])) {
|
|
194
|
+
v += src[pos++];
|
|
195
|
+
}
|
|
186
196
|
}
|
|
197
|
+
|
|
187
198
|
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
188
199
|
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
189
200
|
pos = newPos;
|
|
@@ -199,12 +210,24 @@ function matchPattern(papagaio, src, tokens, startPos = 0) {
|
|
|
199
210
|
while (pos < src.length && /\s/.test(src[pos])) pos++;
|
|
200
211
|
const n = findNextSignificantToken(tokens, ti);
|
|
201
212
|
let v = '';
|
|
202
|
-
|
|
203
|
-
|
|
213
|
+
|
|
214
|
+
// Se o próximo token é um block, captura até o delimitador de abertura
|
|
215
|
+
if (n && n.type === 'block') {
|
|
216
|
+
while (pos < src.length && !src.startsWith(n.openDelim, pos) && src[pos] !== '\n') {
|
|
217
|
+
v += src[pos++];
|
|
218
|
+
}
|
|
219
|
+
v = v.trimEnd();
|
|
220
|
+
} else if (!n || ['var', 'var-ws'].includes(n.type)) {
|
|
221
|
+
while (pos < src.length && !/\s/.test(src[pos])) {
|
|
222
|
+
v += src[pos++];
|
|
223
|
+
}
|
|
204
224
|
} else if (n.type === 'literal') {
|
|
205
|
-
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n')
|
|
225
|
+
while (pos < src.length && !src.startsWith(n.value, pos) && src[pos] !== '\n') {
|
|
226
|
+
v += src[pos++];
|
|
227
|
+
}
|
|
206
228
|
v = v.trimEnd();
|
|
207
229
|
}
|
|
230
|
+
|
|
208
231
|
if (token.wsTrailing && token.wsTrailing !== 'optional') {
|
|
209
232
|
const { newPos } = matchWhitespaceType(src, pos, token.wsTrailing);
|
|
210
233
|
pos = newPos;
|
package/tests/test.js
CHANGED
|
@@ -33,7 +33,7 @@ for (const test of tests) {
|
|
|
33
33
|
|
|
34
34
|
try {
|
|
35
35
|
const result = p.process(test.code).trim();
|
|
36
|
-
const success = result
|
|
36
|
+
const success = result === test.expected.trim();
|
|
37
37
|
|
|
38
38
|
if (success) {
|
|
39
39
|
console.log(`${colors.green}[PASS]${colors.reset} [${test.id}] ${test.name}`);
|
package/tests/tests.json
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
{
|
|
16
16
|
"id": 3,
|
|
17
17
|
"name": "Block with delimiters ( )",
|
|
18
|
-
"code": "pattern {$block content {(}{)}} {[$content]}\ndata (hello world)",
|
|
18
|
+
"code": "pattern {$name $block content {(}{)}} {[$content]}\ndata (hello world)",
|
|
19
19
|
"expected": "[hello world]"
|
|
20
20
|
},
|
|
21
21
|
{
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
{
|
|
58
58
|
"id": 10,
|
|
59
59
|
"name": "Block with parentheses ( )",
|
|
60
|
-
"code": "pattern {$block params {(}{)}} {FUNC[$params]}\ncalcular(a, b, c)",
|
|
60
|
+
"code": "pattern {$nome$$ $block params {(}{)}} {FUNC[$params]}\ncalcular(a, b, c)",
|
|
61
61
|
"expected": "FUNC[a, b, c]"
|
|
62
62
|
},
|
|
63
63
|
{
|
|
@@ -69,14 +69,14 @@
|
|
|
69
69
|
{
|
|
70
70
|
"id": 12,
|
|
71
71
|
"name": "Nested block with multiple levels",
|
|
72
|
-
"code": "pattern {$block inner {<}{>}} {INNER{$inner}}\
|
|
72
|
+
"code": "pattern {$block inner {<}{>}} {INNER{$inner}}\n<inner content here>",
|
|
73
73
|
"expected": "INNER{inner content here}"
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
76
|
"id": 13,
|
|
77
77
|
"name": "Pattern with flexible whitespace in middle",
|
|
78
78
|
"code": "pattern {from to} {path: from -> to}\nfrom to\nfrom to\nfrom to",
|
|
79
|
-
"expected": "path: from -> to"
|
|
79
|
+
"expected": "path: from -> to\npath: from -> to\npath: from -> to"
|
|
80
80
|
},
|
|
81
81
|
{
|
|
82
82
|
"id": 14,
|
|
@@ -135,7 +135,7 @@
|
|
|
135
135
|
{
|
|
136
136
|
"id": 23,
|
|
137
137
|
"name": "Context with nested pattern",
|
|
138
|
-
"code": "context {
|
|
138
|
+
"code": "context {pattern {$x} {<$x>} apple banana cherry}",
|
|
139
139
|
"expected": "<apple>"
|
|
140
140
|
},
|
|
141
141
|
{
|