papagaio 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -211
- package/bin/cli.qjs +57 -0
- package/index.html +278 -380
- package/package.json +1 -1
- package/src/papagaio.js +279 -443
- package/tests/test.js +1 -1
- package/tests/tests.json +100 -604
package/README.md
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# Papagaio
|
|
2
|
-
|
|
3
|
-
Minimal yet powerful text preprocessor.
|
|
2
|
+
Minimal yet powerful text preprocessor with support for multi-character delimiters.
|
|
4
3
|
|
|
5
4
|
## Installation
|
|
6
|
-
|
|
7
5
|
```javascript
|
|
8
6
|
import { Papagaio } from './src/papagaio.js';
|
|
9
7
|
const p = new Papagaio();
|
|
@@ -11,17 +9,14 @@ const result = p.process(input);
|
|
|
11
9
|
```
|
|
12
10
|
|
|
13
11
|
## Configuration
|
|
14
|
-
|
|
15
12
|
```javascript
|
|
16
13
|
p.symbols = {
|
|
17
14
|
pattern: "pattern", // pattern keyword
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
close: "}", // closing delimiter
|
|
15
|
+
open: "{", // opening delimiter (multi-char supported)
|
|
16
|
+
close: "}", // closing delimiter (multi-char supported)
|
|
21
17
|
sigil: "$" // variable marker
|
|
22
18
|
};
|
|
23
|
-
p.recursion_limit = 512;
|
|
24
|
-
p.unique_id = 0; // unique ID counter
|
|
19
|
+
p.recursion_limit = 512;
|
|
25
20
|
```
|
|
26
21
|
|
|
27
22
|
---
|
|
@@ -29,35 +24,61 @@ p.unique_id = 0; // unique ID counter
|
|
|
29
24
|
## Core Concepts
|
|
30
25
|
|
|
31
26
|
### 1. Simple Variables
|
|
32
|
-
|
|
33
27
|
```
|
|
34
28
|
pattern {$x} {$x}
|
|
35
29
|
hello
|
|
36
30
|
```
|
|
37
31
|
Output: `hello`
|
|
38
32
|
|
|
39
|
-
Variables capture words (non-whitespace sequences).
|
|
40
|
-
|
|
41
33
|
### 2. Multiple Variables
|
|
42
|
-
|
|
43
34
|
```
|
|
44
35
|
pattern {$x $y $z} {$z, $y, $x}
|
|
45
36
|
apple banana cherry
|
|
46
37
|
```
|
|
47
38
|
Output: `cherry, banana, apple`
|
|
48
39
|
|
|
49
|
-
|
|
40
|
+
---
|
|
50
41
|
|
|
51
|
-
|
|
42
|
+
## Whitespace Operators
|
|
52
43
|
|
|
53
|
-
|
|
44
|
+
Papagaio provides flexible whitespace handling for variable capture.
|
|
54
45
|
|
|
46
|
+
### `$x` - Single Word Variable
|
|
47
|
+
Captures a single non-whitespace token.
|
|
55
48
|
```
|
|
56
|
-
|
|
49
|
+
pattern {$x} {[$x]}
|
|
50
|
+
hello world
|
|
57
51
|
```
|
|
52
|
+
Output: `[hello]`
|
|
58
53
|
|
|
59
|
-
###
|
|
54
|
+
### `$$x` - Whitespace-Sensitive Variable
|
|
55
|
+
Captures text including surrounding whitespace until the next significant token.
|
|
56
|
+
```
|
|
57
|
+
pattern {$$x world} {[$x]}
|
|
58
|
+
hello world
|
|
59
|
+
```
|
|
60
|
+
Output: `[hello ]`
|
|
61
|
+
|
|
62
|
+
### `$$$x` - Optional Whitespace Variable
|
|
63
|
+
Captures with optional whitespace (no error if empty).
|
|
64
|
+
```
|
|
65
|
+
pattern {$$$x world} {<$x>}
|
|
66
|
+
world
|
|
67
|
+
```
|
|
68
|
+
Output: `<>`
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Blocks
|
|
73
|
+
|
|
74
|
+
Capture content between delimiters with full nesting support.
|
|
60
75
|
|
|
76
|
+
### Syntax
|
|
77
|
+
```
|
|
78
|
+
$block varName {open}{close}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Basic Example
|
|
61
82
|
```
|
|
62
83
|
pattern {$name $block content {(}{)}} {[$content]}
|
|
63
84
|
data (hello world)
|
|
@@ -65,287 +86,186 @@ data (hello world)
|
|
|
65
86
|
Output: `[hello world]`
|
|
66
87
|
|
|
67
88
|
### Custom Delimiters
|
|
68
|
-
|
|
69
89
|
```
|
|
70
90
|
pattern {$block data {<<}{>>}} {DATA: $data}
|
|
71
91
|
<<json stuff>>
|
|
72
92
|
```
|
|
73
93
|
Output: `DATA: json stuff`
|
|
74
94
|
|
|
75
|
-
###
|
|
95
|
+
### Multi-Character Delimiters
|
|
96
|
+
```
|
|
97
|
+
pattern {$block code {```}{```}} {<pre>$code</pre>}
|
|
98
|
+
```markdown
|
|
99
|
+
# Title
|
|
100
|
+
```
|
|
101
|
+
Output: `<pre># Title</pre>`
|
|
76
102
|
|
|
103
|
+
### Multiple Blocks
|
|
77
104
|
```
|
|
78
105
|
pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
|
|
79
106
|
(first), [second]
|
|
80
107
|
```
|
|
81
108
|
Output: `first|second`
|
|
82
109
|
|
|
110
|
+
### Nested Blocks
|
|
111
|
+
```
|
|
112
|
+
pattern {$block outer {(}{)}} {[$outer]}
|
|
113
|
+
(outer (inner))
|
|
114
|
+
```
|
|
115
|
+
Output: `[outer (inner)]`
|
|
116
|
+
|
|
83
117
|
---
|
|
84
118
|
|
|
85
119
|
## Patterns
|
|
86
120
|
|
|
87
|
-
### Basic
|
|
88
|
-
|
|
121
|
+
### Basic Pattern
|
|
89
122
|
```
|
|
90
123
|
pattern {match} {replace}
|
|
91
124
|
```
|
|
92
125
|
|
|
93
|
-
###
|
|
94
|
-
|
|
126
|
+
### Example
|
|
95
127
|
```
|
|
96
128
|
pattern {# $title} {<h1>$title</h1>}
|
|
97
129
|
# Welcome
|
|
98
130
|
```
|
|
99
131
|
Output: `<h1>Welcome</h1>`
|
|
100
132
|
|
|
101
|
-
### Multiple Patterns
|
|
102
|
-
|
|
133
|
+
### Multiple Patterns Cascade
|
|
103
134
|
```
|
|
104
135
|
pattern {a} {b}
|
|
105
136
|
pattern {b} {c}
|
|
106
137
|
pattern {c} {d}
|
|
107
138
|
a
|
|
108
139
|
```
|
|
109
|
-
Output: `d`
|
|
140
|
+
Output: `d`
|
|
110
141
|
|
|
111
142
|
---
|
|
112
143
|
|
|
113
|
-
##
|
|
144
|
+
## Subpatterns
|
|
114
145
|
|
|
115
|
-
|
|
146
|
+
Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
|
|
116
147
|
|
|
148
|
+
### Syntax
|
|
149
|
+
```
|
|
150
|
+
$pattern {match} {replace}
|
|
117
151
|
```
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
152
|
+
|
|
153
|
+
### Example
|
|
154
|
+
```
|
|
155
|
+
pattern {eval $block code {(}{)}} {
|
|
156
|
+
$eval{
|
|
157
|
+
$pattern {undefined} {}
|
|
158
|
+
$code;
|
|
159
|
+
return "";
|
|
160
|
+
}
|
|
123
161
|
}
|
|
162
|
+
eval(console.log(123))
|
|
124
163
|
```
|
|
125
164
|
Output:
|
|
126
165
|
```
|
|
127
|
-
|
|
128
|
-
<banana>
|
|
166
|
+
123
|
|
129
167
|
```
|
|
130
168
|
|
|
131
|
-
|
|
169
|
+
### Key Properties
|
|
170
|
+
* Subpatterns exist only within the running pattern.
|
|
171
|
+
* They do not leak into the global pattern list.
|
|
172
|
+
* They can recursively modify inner content before `$eval` or other processors.
|
|
173
|
+
* Multiple subpatterns can coexist in the same replacement.
|
|
132
174
|
|
|
133
175
|
---
|
|
134
176
|
|
|
135
177
|
## Special Keywords
|
|
136
178
|
|
|
137
|
-
### $
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
```
|
|
141
|
-
pattern {item} {[$unique]item_$unique}
|
|
142
|
-
item
|
|
143
|
-
item
|
|
144
|
-
item
|
|
145
|
-
```
|
|
146
|
-
Output: `[0]item_0`, `[1]item_1`, `[2]item_2`
|
|
147
|
-
|
|
148
|
-
```
|
|
149
|
-
pattern {a} {$unique $unique}
|
|
150
|
-
a
|
|
151
|
-
```
|
|
152
|
-
Output: `0 0` (same ID for both occurrences)
|
|
153
|
-
|
|
154
|
-
### $match
|
|
155
|
-
Return the full match.
|
|
156
|
-
|
|
157
|
-
```
|
|
158
|
-
pattern {[$x]} {FOUND: $match}
|
|
159
|
-
[data]
|
|
160
|
-
```
|
|
161
|
-
Output: `FOUND: [data]`
|
|
162
|
-
|
|
163
|
-
### $prefix / $suffix
|
|
164
|
-
Text before and after the match.
|
|
165
|
-
|
|
166
|
-
```
|
|
167
|
-
pattern {world} {$prefix$suffix}hello world test
|
|
168
|
-
```
|
|
169
|
-
Output: `hello hello test test`
|
|
170
|
-
|
|
171
|
-
### $clear
|
|
172
|
-
Remove everything before the match.
|
|
173
|
-
|
|
179
|
+
### $eval
|
|
180
|
+
Executes JavaScript code.
|
|
174
181
|
```
|
|
175
|
-
pattern {
|
|
176
|
-
|
|
182
|
+
pattern {$x} {$eval{return parseInt($x)*2;}}
|
|
183
|
+
5
|
|
177
184
|
```
|
|
178
|
-
Output: `
|
|
179
|
-
|
|
180
|
-
### $eval
|
|
181
|
-
Execute JavaScript code.
|
|
185
|
+
Output: `10`
|
|
182
186
|
|
|
187
|
+
Supports multi-character delimiters:
|
|
183
188
|
```
|
|
184
|
-
pattern {$x} {$eval
|
|
189
|
+
pattern {$x} {$eval<<parseInt($x)*2>>}
|
|
185
190
|
5
|
|
186
191
|
```
|
|
187
192
|
Output: `10`
|
|
188
193
|
|
|
189
|
-
---
|
|
190
194
|
|
|
191
|
-
##
|
|
195
|
+
## Important Rules
|
|
192
196
|
|
|
193
|
-
###
|
|
197
|
+
### Matching
|
|
198
|
+
* `$x` = one word (no whitespace)
|
|
199
|
+
* `$$x` = captures text with optional surrounding whitespace
|
|
200
|
+
* `$$$x` = captures text with optional surrounding whitespace, can be empty or not found
|
|
201
|
+
* Patterns apply globally until stable
|
|
202
|
+
* Blocks support arbitrary nesting depth
|
|
194
203
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
pattern {**$t**} {<strong>$t</strong>}
|
|
200
|
-
pattern {*$t*} {<em>$t</em>}
|
|
201
|
-
pattern {- $i} {<li>$i</li>}
|
|
202
|
-
|
|
203
|
-
# Title
|
|
204
|
-
**bold** and *italic*
|
|
205
|
-
- item1
|
|
206
|
-
- item2
|
|
207
|
-
}
|
|
208
|
-
```
|
|
204
|
+
### Block Matching
|
|
205
|
+
* `$block name {open}{close}` captures delimited regions
|
|
206
|
+
* Supports nested delimiters of any length
|
|
207
|
+
* Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
|
|
209
208
|
|
|
210
|
-
###
|
|
209
|
+
### Whitespace Handling
|
|
210
|
+
* Whitespace-optional tokens (`$$` alone) skip optional whitespace
|
|
211
|
+
* Variables automatically skip leading whitespace when needed
|
|
212
|
+
* Trailing whitespace is trimmed when variables appear before literals
|
|
211
213
|
|
|
212
|
-
|
|
213
|
-
pattern {$a,$b,$c} {{ id: '$a', name: '$b', role: '$c' }}
|
|
214
|
-
1,Alice,Engineer
|
|
215
|
-
2,Bob,Designer
|
|
216
|
-
```
|
|
214
|
+
---
|
|
217
215
|
|
|
218
|
-
|
|
219
|
-
```
|
|
220
|
-
{ id: '1', name: 'Alice', role: 'Engineer' }
|
|
221
|
-
{ id: '2', name: 'Bob', role: 'Designer' }
|
|
222
|
-
```
|
|
216
|
+
## Multi-Character Delimiter Support
|
|
223
217
|
|
|
224
|
-
|
|
218
|
+
The updated version fully supports multi-character delimiters throughout all features.
|
|
225
219
|
|
|
220
|
+
### Examples
|
|
221
|
+
```javascript
|
|
222
|
+
const p = new Papagaio('$', '<<<', '>>>');
|
|
226
223
|
```
|
|
227
|
-
pattern {$key = $value} {const $key = '$value';}
|
|
228
|
-
host = localhost
|
|
229
|
-
port = 3000
|
|
230
|
-
```
|
|
231
|
-
|
|
232
|
-
Output:
|
|
233
|
-
```
|
|
234
|
-
const host = 'localhost';
|
|
235
|
-
const port = '3000';
|
|
236
|
-
```
|
|
237
|
-
|
|
238
|
-
### HTML Generator
|
|
239
224
|
|
|
225
|
+
### In Blocks
|
|
240
226
|
```
|
|
241
|
-
pattern {$
|
|
242
|
-
|
|
243
|
-
span Test
|
|
227
|
+
pattern {$block data {<<}{>>}} {$data}
|
|
228
|
+
<<content>>
|
|
244
229
|
```
|
|
245
230
|
|
|
246
|
-
|
|
231
|
+
### In Eval
|
|
247
232
|
```
|
|
248
|
-
|
|
249
|
-
|
|
233
|
+
// const p = new Papagaio('$', '<<<', '>>>');
|
|
234
|
+
pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
|
|
235
|
+
5
|
|
250
236
|
```
|
|
251
237
|
|
|
252
238
|
---
|
|
253
239
|
|
|
254
|
-
## Important Rules
|
|
255
|
-
|
|
256
|
-
### Matching
|
|
257
|
-
- Variables (`$x`) capture **one word** (no spaces)
|
|
258
|
-
- Variables (`$$x`) captures one or more words (with spaces)
|
|
259
|
-
- Patterns apply **globally** each iteration
|
|
260
|
-
- Auto-recursion until: max 512 iterations OR no changes
|
|
261
|
-
- `$ ` = one or more of this whitespace (spaces, tabs, newlines)
|
|
262
|
-
- `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
|
|
263
|
-
- `$$$ `= one or more whitespaces
|
|
264
|
-
- `$$$$ `= zero or more whitespaces
|
|
265
|
-
|
|
266
|
-
### Block Matching
|
|
267
|
-
- `$block name {open}{close}` captures between delimiters
|
|
268
|
-
- Supports nested delimiters automatically
|
|
269
|
-
- Multiple blocks in one pattern work
|
|
270
|
-
|
|
271
|
-
### Variables
|
|
272
|
-
- Names: `[A-Za-z0-9_]`
|
|
273
|
-
- Reuse: `$x` appears multiple times in replace
|
|
274
|
-
- Undefined: becomes empty string
|
|
275
|
-
|
|
276
|
-
### Limitations
|
|
277
|
-
- You cannot match words containing the current sigil character.
|
|
278
|
-
- You cannot match a $block{}{} using the current delimiters.
|
|
279
|
-
- By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
|
|
280
|
-
- Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
|
|
281
|
-
|
|
282
|
-
---
|
|
283
|
-
|
|
284
240
|
## Troubleshooting
|
|
285
241
|
|
|
286
242
|
| Problem | Solution |
|
|
287
243
|
|---------|----------|
|
|
288
|
-
| Variable not captured | Check
|
|
289
|
-
| Block
|
|
290
|
-
| Infinite recursion |
|
|
291
|
-
|
|
|
292
|
-
|
|
|
293
|
-
|
|
|
294
|
-
| Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
|
|
295
|
-
|
|
296
|
-
## Known Bugs
|
|
297
|
-
|
|
298
|
-
- Multi-character block delimiters that contains double quotes doesnt match properly.
|
|
244
|
+
| Variable not captured | Check spacing and use appropriate whitespace operator (`$x`, `$$x`, `$$$x`) |
|
|
245
|
+
| Block mismatch | Verify opening and closing delimiters match the declaration |
|
|
246
|
+
| Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
|
|
247
|
+
| Pattern not matching | Add whitespace operators (`$$`) for multi-word content |
|
|
248
|
+
| Nested blocks fail | Ensure delimiters are properly balanced |
|
|
249
|
+
| Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
|
|
299
250
|
|
|
300
251
|
---
|
|
301
252
|
|
|
302
253
|
## Syntax Reference
|
|
303
254
|
|
|
304
255
|
```
|
|
305
|
-
pattern {$x $y} {$y, $x}
|
|
306
|
-
pattern {
|
|
307
|
-
pattern {$
|
|
308
|
-
|
|
309
|
-
$
|
|
310
|
-
$
|
|
311
|
-
$prefix / $suffix # before/after
|
|
312
|
-
$clear # clear before
|
|
313
|
-
$eval{code} # execute JS
|
|
314
|
-
$ / $$ / $$$ / $$$$ # whitespace operators
|
|
256
|
+
pattern {$x $y} {$y, $x} # basic pattern with variables
|
|
257
|
+
pattern {$$x $y} {$y, $x} # whitespace-sensitive capture
|
|
258
|
+
pattern {$$$x $y} {$y, $x} # optional whitespace capture
|
|
259
|
+
pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
|
|
260
|
+
$pattern {a} {b} # subpattern (scoped to parent)
|
|
261
|
+
$eval{code} # JavaScript evaluation
|
|
315
262
|
```
|
|
316
263
|
|
|
317
264
|
---
|
|
318
265
|
|
|
319
|
-
##
|
|
320
|
-
|
|
321
|
-
```
|
|
322
|
-
context {
|
|
323
|
-
# Markdown headers
|
|
324
|
-
pattern {# $title} {<h1>$title</h1>}
|
|
325
|
-
|
|
326
|
-
# Lists
|
|
327
|
-
pattern {- $item} {<li>$item</li>}
|
|
328
|
-
|
|
329
|
-
# Inline formatting
|
|
330
|
-
pattern {**$text**} {<strong>$text</strong>}
|
|
331
|
-
pattern {*$text*} {<em>$text</em>}
|
|
332
|
-
|
|
333
|
-
# Process content
|
|
334
|
-
# Welcome
|
|
335
|
-
# Getting Started
|
|
336
|
-
This is **important** and *italic*
|
|
337
|
-
- First item
|
|
338
|
-
- Second item
|
|
339
|
-
}
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
Output:
|
|
343
|
-
```html
|
|
344
|
-
<h1>Welcome</h1>
|
|
345
|
-
<h2>Getting Started</h2>
|
|
346
|
-
This is <strong>important</strong> and <em>italic</em>
|
|
347
|
-
<li>First item</li>
|
|
348
|
-
<li>Second item</li>
|
|
349
|
-
```
|
|
266
|
+
## Performance Notes
|
|
350
267
|
|
|
351
|
-
|
|
268
|
+
* Patterns apply recursively until no changes occur (up to `recursion_limit`)
|
|
269
|
+
* Multi-character delimiter matching is optimized with regex escaping
|
|
270
|
+
* Nested blocks and subpatterns have no theoretical depth limit
|
|
271
|
+
* Large recursion limits can impact performance on complex inputs
|
package/bin/cli.qjs
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env qjs
|
|
2
|
+
import * as std from "std";
|
|
3
|
+
import * as os from "os";
|
|
4
|
+
|
|
5
|
+
// Import Papagaio class - ajuste o caminho conforme necessário
|
|
6
|
+
// Para QuickJS, você pode incluir o arquivo diretamente ou usar import
|
|
7
|
+
import { Papagaio } from "../src/papagaio.js";
|
|
8
|
+
|
|
9
|
+
// Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
|
|
10
|
+
const VERSION = "1.0.0";
|
|
11
|
+
|
|
12
|
+
// Parse command line arguments
|
|
13
|
+
const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
|
|
14
|
+
|
|
15
|
+
// Help & Version
|
|
16
|
+
if (args.includes("-v") || args.includes("--version")) {
|
|
17
|
+
std.out.puts(VERSION + "\n");
|
|
18
|
+
std.exit(0);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (args.includes("-h") || args.includes("--help")) {
|
|
22
|
+
std.out.puts(`Usage: papagaio [options] <file>
|
|
23
|
+
Options:
|
|
24
|
+
-h, --help Show this help message
|
|
25
|
+
-v, --version Show version number
|
|
26
|
+
`);
|
|
27
|
+
std.exit(0);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// File input
|
|
31
|
+
const file = args.find(arg => !arg.startsWith("-"));
|
|
32
|
+
if (!file) {
|
|
33
|
+
std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
|
|
34
|
+
std.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Read file
|
|
38
|
+
let src;
|
|
39
|
+
try {
|
|
40
|
+
const f = std.open(file, "r");
|
|
41
|
+
if (!f) {
|
|
42
|
+
std.err.puts(`Error: cannot open file '${file}'\n`);
|
|
43
|
+
std.exit(1);
|
|
44
|
+
}
|
|
45
|
+
src = f.readAsString();
|
|
46
|
+
f.close();
|
|
47
|
+
} catch (e) {
|
|
48
|
+
std.err.puts(`Error reading file: ${e}\n`);
|
|
49
|
+
std.exit(1);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Process with Papagaio
|
|
53
|
+
const p = new Papagaio();
|
|
54
|
+
const out = p.process(src);
|
|
55
|
+
|
|
56
|
+
// Output result
|
|
57
|
+
std.out.puts(out);
|