papagaio 0.4.2 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +176 -119
- package/bin/cli.qjs +57 -0
- package/index.html +278 -380
- package/package.json +1 -1
- package/src/papagaio.js +229 -471
- package/tests/tests.json +102 -606
- package/src/louro.js +0 -259
package/README.md
CHANGED
|
@@ -1,27 +1,25 @@
|
|
|
1
1
|
# Papagaio
|
|
2
|
-
|
|
3
|
-
Minimal yet powerful text preprocessor.
|
|
2
|
+
Minimal yet powerful text preprocessor with support for multi-character delimiters.
|
|
4
3
|
|
|
5
4
|
## Installation
|
|
6
|
-
|
|
7
5
|
```javascript
|
|
8
6
|
import { Papagaio } from './src/papagaio.js';
|
|
9
|
-
const
|
|
10
|
-
const result =
|
|
7
|
+
const papagaio = new Papagaio();
|
|
8
|
+
const result = papagaio.process(input);
|
|
11
9
|
```
|
|
12
10
|
|
|
13
11
|
## Configuration
|
|
14
|
-
|
|
15
12
|
```javascript
|
|
16
|
-
|
|
13
|
+
papagaio.symbols = {
|
|
17
14
|
pattern: "pattern", // pattern keyword
|
|
18
|
-
open: "{", // opening delimiter
|
|
19
|
-
close: "}", // closing delimiter
|
|
20
|
-
sigil: "$"
|
|
15
|
+
open: "{", // opening delimiter (multi-char supported)
|
|
16
|
+
close: "}", // closing delimiter (multi-char supported)
|
|
17
|
+
sigil: "$", // variable marker
|
|
18
|
+
eval: "eval", // eval keyword
|
|
19
|
+
block: "block", // block keyword
|
|
20
|
+
regex: "regex" // regex keyword
|
|
21
21
|
};
|
|
22
|
-
|
|
23
|
-
p.recursion_limit = 512;
|
|
24
|
-
p.unique_id = 0;
|
|
22
|
+
papagaio.recursion_limit = 512;
|
|
25
23
|
```
|
|
26
24
|
|
|
27
25
|
---
|
|
@@ -29,108 +27,185 @@ p.unique_id = 0;
|
|
|
29
27
|
## Core Concepts
|
|
30
28
|
|
|
31
29
|
### 1. Simple Variables
|
|
32
|
-
|
|
33
30
|
```
|
|
34
31
|
pattern {$x} {$x}
|
|
35
32
|
hello
|
|
36
33
|
```
|
|
37
|
-
|
|
38
34
|
Output: `hello`
|
|
39
35
|
|
|
40
36
|
### 2. Multiple Variables
|
|
41
|
-
|
|
42
37
|
```
|
|
43
38
|
pattern {$x $y $z} {$z, $y, $x}
|
|
44
39
|
apple banana cherry
|
|
45
40
|
```
|
|
46
|
-
|
|
47
41
|
Output: `cherry, banana, apple`
|
|
48
42
|
|
|
49
43
|
---
|
|
50
44
|
|
|
51
|
-
##
|
|
45
|
+
## Variables
|
|
46
|
+
|
|
47
|
+
Papagaio provides flexible variable capture with automatic context-aware behavior.
|
|
48
|
+
|
|
49
|
+
### `$x` - Smart Variable
|
|
50
|
+
Automatically adapts based on context:
|
|
51
|
+
- **Before a block**: Captures everything until the block's opening delimiter
|
|
52
|
+
- **Before a literal**: Captures everything until that literal appears
|
|
53
|
+
- **Otherwise**: Captures a single word (non-whitespace token)
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
pattern {$x} {[$x]}
|
|
57
|
+
hello world
|
|
58
|
+
```
|
|
59
|
+
Output: `[hello]`
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
pattern {$name $block content {(}{)}} {$name: $content}
|
|
63
|
+
greeting (hello world)
|
|
64
|
+
```
|
|
65
|
+
Output: `greeting: hello world`
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
pattern {$prefix:$suffix} {$suffix-$prefix}
|
|
69
|
+
key:value
|
|
70
|
+
```
|
|
71
|
+
Output: `value-key`
|
|
72
|
+
|
|
73
|
+
### `$x?` - Optional Variable
|
|
74
|
+
Same behavior as `$x`, but won't fail if empty or not found.
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
pattern {$x? world} {<$x>}
|
|
78
|
+
world
|
|
79
|
+
```
|
|
80
|
+
Output: `<>`
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
pattern {$greeting? $name} {Hello $name$greeting}
|
|
84
|
+
Hi John
|
|
85
|
+
```
|
|
86
|
+
Output: `Hello JohnHi`
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Regex Matching
|
|
52
91
|
|
|
53
|
-
Capture content
|
|
92
|
+
Capture content using JavaScript regular expressions.
|
|
54
93
|
|
|
55
94
|
### Syntax
|
|
95
|
+
```
|
|
96
|
+
$regex varName {pattern}
|
|
97
|
+
```
|
|
56
98
|
|
|
99
|
+
### Basic Example
|
|
57
100
|
```
|
|
58
|
-
$
|
|
101
|
+
pattern {$regex num {[0-9]+}} {Number: $num}
|
|
102
|
+
The answer is 42
|
|
59
103
|
```
|
|
104
|
+
Output: `Number: 42`
|
|
60
105
|
|
|
61
|
-
###
|
|
106
|
+
### Complex Patterns
|
|
107
|
+
```
|
|
108
|
+
pattern {$regex email {\w+@\w+\.\w+}} {Email found: $email}
|
|
109
|
+
Contact: user@example.com
|
|
110
|
+
```
|
|
111
|
+
Output: `Email found: user@example.com`
|
|
112
|
+
|
|
113
|
+
### Multiple Regex Variables
|
|
114
|
+
```
|
|
115
|
+
pattern {$regex year {[0-9]{4}}-$regex month {[0-9]{2}}} {Month $month in $year}
|
|
116
|
+
2024-03
|
|
117
|
+
```
|
|
118
|
+
Output: `Month 03 in 2024`
|
|
62
119
|
|
|
120
|
+
### Notes
|
|
121
|
+
- Regex patterns are cached for performance
|
|
122
|
+
- Matches are anchored at the current position (no searching ahead)
|
|
123
|
+
- Invalid regex patterns will cause the match to fail gracefully
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Blocks
|
|
128
|
+
|
|
129
|
+
Capture content between delimiters with full nesting support.
|
|
130
|
+
|
|
131
|
+
### Syntax
|
|
132
|
+
```
|
|
133
|
+
$block varName {open}{close}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Basic Example
|
|
63
137
|
```
|
|
64
138
|
pattern {$name $block content {(}{)}} {[$content]}
|
|
65
139
|
data (hello world)
|
|
66
140
|
```
|
|
67
|
-
|
|
68
141
|
Output: `[hello world]`
|
|
69
142
|
|
|
70
143
|
### Custom Delimiters
|
|
71
|
-
|
|
72
144
|
```
|
|
73
145
|
pattern {$block data {<<}{>>}} {DATA: $data}
|
|
74
146
|
<<json stuff>>
|
|
75
147
|
```
|
|
76
|
-
|
|
77
148
|
Output: `DATA: json stuff`
|
|
78
149
|
|
|
79
|
-
###
|
|
150
|
+
### Multi-Character Delimiters
|
|
151
|
+
```
|
|
152
|
+
pattern {$block code {```}{```}} {<pre>$code</pre>}
|
|
153
|
+
```markdown
|
|
154
|
+
# Title
|
|
155
|
+
```
|
|
156
|
+
Output: `<pre># Title</pre>`
|
|
80
157
|
|
|
158
|
+
### Multiple Blocks
|
|
81
159
|
```
|
|
82
160
|
pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
|
|
83
161
|
(first), [second]
|
|
84
162
|
```
|
|
85
|
-
|
|
86
163
|
Output: `first|second`
|
|
87
164
|
|
|
165
|
+
### Nested Blocks
|
|
166
|
+
```
|
|
167
|
+
pattern {$block outer {(}{)}} {[$outer]}
|
|
168
|
+
(outer (inner))
|
|
169
|
+
```
|
|
170
|
+
Output: `[outer (inner)]`
|
|
171
|
+
|
|
88
172
|
---
|
|
89
173
|
|
|
90
174
|
## Patterns
|
|
91
175
|
|
|
92
|
-
### Basic
|
|
93
|
-
|
|
176
|
+
### Basic Pattern
|
|
94
177
|
```
|
|
95
178
|
pattern {match} {replace}
|
|
96
179
|
```
|
|
97
180
|
|
|
98
181
|
### Example
|
|
99
|
-
|
|
100
182
|
```
|
|
101
183
|
pattern {# $title} {<h1>$title</h1>}
|
|
102
184
|
# Welcome
|
|
103
185
|
```
|
|
104
|
-
|
|
105
186
|
Output: `<h1>Welcome</h1>`
|
|
106
187
|
|
|
107
188
|
### Multiple Patterns Cascade
|
|
108
|
-
|
|
109
189
|
```
|
|
110
190
|
pattern {a} {b}
|
|
111
191
|
pattern {b} {c}
|
|
112
192
|
pattern {c} {d}
|
|
113
193
|
a
|
|
114
194
|
```
|
|
115
|
-
|
|
116
195
|
Output: `d`
|
|
117
196
|
|
|
118
197
|
---
|
|
119
198
|
|
|
120
|
-
|
|
199
|
+
## Subpatterns
|
|
121
200
|
|
|
122
|
-
Subpatterns
|
|
201
|
+
Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
|
|
123
202
|
|
|
124
203
|
### Syntax
|
|
125
|
-
|
|
126
204
|
```
|
|
127
205
|
$pattern {match} {replace}
|
|
128
206
|
```
|
|
129
207
|
|
|
130
|
-
A subpattern behaves like a normal pattern but is **scoped only to the replacement body where it appears**.
|
|
131
|
-
|
|
132
208
|
### Example
|
|
133
|
-
|
|
134
209
|
```
|
|
135
210
|
pattern {eval $block code {(}{)}} {
|
|
136
211
|
$eval{
|
|
@@ -139,133 +214,115 @@ pattern {eval $block code {(}{)}} {
|
|
|
139
214
|
return "";
|
|
140
215
|
}
|
|
141
216
|
}
|
|
142
|
-
|
|
143
217
|
eval(console.log(123))
|
|
144
218
|
```
|
|
145
|
-
|
|
146
219
|
Output:
|
|
147
|
-
|
|
148
220
|
```
|
|
149
221
|
123
|
|
150
222
|
```
|
|
151
223
|
|
|
152
224
|
### Key Properties
|
|
153
|
-
|
|
154
225
|
* Subpatterns exist only within the running pattern.
|
|
155
226
|
* They do not leak into the global pattern list.
|
|
156
|
-
* They can recursively modify inner content before `$eval` or other processors
|
|
157
|
-
* Multiple subpatterns can coexist
|
|
227
|
+
* They can recursively modify inner content before `$eval` or other processors.
|
|
228
|
+
* Multiple subpatterns can coexist in the same replacement.
|
|
158
229
|
|
|
159
230
|
---
|
|
160
231
|
|
|
161
232
|
## Special Keywords
|
|
162
233
|
|
|
163
|
-
### $
|
|
164
|
-
|
|
165
|
-
Generates unique incremental IDs.
|
|
166
|
-
|
|
234
|
+
### $eval
|
|
235
|
+
Executes JavaScript code.
|
|
167
236
|
```
|
|
168
|
-
pattern {
|
|
169
|
-
|
|
170
|
-
item
|
|
237
|
+
pattern {$x} {$eval{return parseInt($x)*2;}}
|
|
238
|
+
5
|
|
171
239
|
```
|
|
240
|
+
Output: `10`
|
|
172
241
|
|
|
173
|
-
|
|
174
|
-
|
|
242
|
+
Supports multi-character delimiters:
|
|
175
243
|
```
|
|
176
|
-
|
|
177
|
-
|
|
244
|
+
pattern {$x} {$eval<<parseInt($x)*2>>}
|
|
245
|
+
5
|
|
178
246
|
```
|
|
247
|
+
Output: `10`
|
|
179
248
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
Full matched text.
|
|
249
|
+
---
|
|
183
250
|
|
|
184
|
-
|
|
185
|
-
pattern {[$x]} {FOUND: $match}
|
|
186
|
-
[data]
|
|
187
|
-
```
|
|
251
|
+
## Important Rules
|
|
188
252
|
|
|
189
|
-
|
|
253
|
+
### Variable Matching
|
|
254
|
+
* `$x` = smart capture (context-aware: word, until literal, or until block)
|
|
255
|
+
* `$x?` = optional version of `$x` (won't fail if empty)
|
|
256
|
+
* `$regex name {pattern}` = regex-based capture
|
|
257
|
+
* Patterns apply globally until stable
|
|
258
|
+
* Blocks support arbitrary nesting depth
|
|
190
259
|
|
|
191
|
-
###
|
|
260
|
+
### Block Matching
|
|
261
|
+
* `$block name {open}{close}` captures delimited regions
|
|
262
|
+
* Supports nested delimiters of any length
|
|
263
|
+
* Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
|
|
192
264
|
|
|
193
|
-
|
|
265
|
+
### Whitespace Handling
|
|
266
|
+
* Variables automatically skip leading whitespace when needed
|
|
267
|
+
* Trailing whitespace is trimmed when variables appear before literals
|
|
194
268
|
|
|
195
|
-
|
|
196
|
-
pattern {world} {$prefix$suffix}hello world test
|
|
197
|
-
```
|
|
269
|
+
---
|
|
198
270
|
|
|
199
|
-
|
|
271
|
+
## Multi-Character Delimiter Support
|
|
200
272
|
|
|
201
|
-
|
|
273
|
+
The updated version fully supports multi-character delimiters throughout all features.
|
|
202
274
|
|
|
203
|
-
|
|
275
|
+
### Examples
|
|
276
|
+
```javascript
|
|
277
|
+
const p = new Papagaio('$', '<<<', '>>>');
|
|
278
|
+
```
|
|
204
279
|
|
|
280
|
+
### In Blocks
|
|
205
281
|
```
|
|
206
|
-
pattern {
|
|
207
|
-
|
|
282
|
+
pattern {$block data {<<}{>>}} {$data}
|
|
283
|
+
<<content>>
|
|
208
284
|
```
|
|
209
285
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
### $eval
|
|
213
|
-
|
|
214
|
-
Executes JS.
|
|
215
|
-
|
|
286
|
+
### In Eval
|
|
216
287
|
```
|
|
217
|
-
|
|
288
|
+
// const p = new Papagaio('$', '<<<', '>>>');
|
|
289
|
+
pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
|
|
218
290
|
5
|
|
219
291
|
```
|
|
220
292
|
|
|
221
|
-
Output: `10`
|
|
222
|
-
|
|
223
|
-
---
|
|
224
|
-
|
|
225
|
-
## Important Rules
|
|
226
|
-
|
|
227
|
-
### Matching
|
|
228
|
-
|
|
229
|
-
* `$x` = one word
|
|
230
|
-
* `$$x` = multiword (captures whitespace too)
|
|
231
|
-
* `$`, `$$`, `$$$`, `$$$$` = whitespace operators
|
|
232
|
-
* Patterns apply globally until stable
|
|
233
|
-
* Blocks can be nested
|
|
234
|
-
|
|
235
|
-
### Block Matching
|
|
236
|
-
|
|
237
|
-
* `$block name {open}{close}` captures delimited regions
|
|
238
|
-
* Supports nested delimiters
|
|
239
|
-
|
|
240
293
|
---
|
|
241
294
|
|
|
242
295
|
## Troubleshooting
|
|
243
296
|
|
|
244
|
-
| Problem
|
|
245
|
-
|
|
246
|
-
| Variable not captured | Check
|
|
247
|
-
| Block
|
|
248
|
-
| Infinite recursion
|
|
249
|
-
| Pattern not matching
|
|
250
|
-
|
|
|
297
|
+
| Problem | Solution |
|
|
298
|
+
|---------|----------|
|
|
299
|
+
| Variable not captured | Check context: use `$x?` for optional, or verify literals/blocks exist |
|
|
300
|
+
| Block mismatch | Verify opening and closing delimiters match the declaration |
|
|
301
|
+
| Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
|
|
302
|
+
| Pattern not matching | Verify whitespace between tokens, check if variable should be optional |
|
|
303
|
+
| Nested blocks fail | Ensure delimiters are properly balanced |
|
|
304
|
+
| Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
|
|
305
|
+
| Regex not matching | Test regex pattern separately; ensure it matches at the exact position |
|
|
251
306
|
|
|
252
307
|
---
|
|
253
308
|
|
|
254
|
-
##
|
|
309
|
+
## Syntax Reference
|
|
255
310
|
|
|
256
|
-
|
|
311
|
+
```
|
|
312
|
+
pattern {$x $y} {$y, $x} # basic pattern with variables
|
|
313
|
+
pattern {$x? $y} {$y, $x} # optional variable
|
|
314
|
+
pattern {$regex n {[0-9]+}} {$n} # regex capture
|
|
315
|
+
pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
|
|
316
|
+
$pattern {a} {b} # subpattern (scoped to parent)
|
|
317
|
+
$eval{code} # JavaScript evaluation
|
|
318
|
+
```
|
|
257
319
|
|
|
258
320
|
---
|
|
259
321
|
|
|
260
|
-
##
|
|
322
|
+
## Performance Notes
|
|
261
323
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
$match
|
|
268
|
-
$prefix / $suffix
|
|
269
|
-
$clear
|
|
270
|
-
$eval{code}
|
|
271
|
-
```
|
|
324
|
+
* Patterns apply recursively until no changes occur (up to `recursion_limit`)
|
|
325
|
+
* Multi-character delimiter matching is optimized with regex escaping
|
|
326
|
+
* Regex patterns are automatically cached to improve performance
|
|
327
|
+
* Nested blocks and subpatterns have no theoretical depth limit
|
|
328
|
+
* Large recursion limits can impact performance on complex inputs
|
package/bin/cli.qjs
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env qjs
|
|
2
|
+
import * as std from "std";
|
|
3
|
+
import * as os from "os";
|
|
4
|
+
|
|
5
|
+
// Import Papagaio class - ajuste o caminho conforme necessário
|
|
6
|
+
// Para QuickJS, você pode incluir o arquivo diretamente ou usar import
|
|
7
|
+
import { Papagaio } from "../src/papagaio.js";
|
|
8
|
+
|
|
9
|
+
// Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
|
|
10
|
+
const VERSION = "1.0.0";
|
|
11
|
+
|
|
12
|
+
// Parse command line arguments
|
|
13
|
+
const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
|
|
14
|
+
|
|
15
|
+
// Help & Version
|
|
16
|
+
if (args.includes("-v") || args.includes("--version")) {
|
|
17
|
+
std.out.puts(VERSION + "\n");
|
|
18
|
+
std.exit(0);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (args.includes("-h") || args.includes("--help")) {
|
|
22
|
+
std.out.puts(`Usage: papagaio [options] <file>
|
|
23
|
+
Options:
|
|
24
|
+
-h, --help Show this help message
|
|
25
|
+
-v, --version Show version number
|
|
26
|
+
`);
|
|
27
|
+
std.exit(0);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// File input
|
|
31
|
+
const file = args.find(arg => !arg.startsWith("-"));
|
|
32
|
+
if (!file) {
|
|
33
|
+
std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
|
|
34
|
+
std.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Read file
|
|
38
|
+
let src;
|
|
39
|
+
try {
|
|
40
|
+
const f = std.open(file, "r");
|
|
41
|
+
if (!f) {
|
|
42
|
+
std.err.puts(`Error: cannot open file '${file}'\n`);
|
|
43
|
+
std.exit(1);
|
|
44
|
+
}
|
|
45
|
+
src = f.readAsString();
|
|
46
|
+
f.close();
|
|
47
|
+
} catch (e) {
|
|
48
|
+
std.err.puts(`Error reading file: ${e}\n`);
|
|
49
|
+
std.exit(1);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Process with Papagaio
|
|
53
|
+
const p = new Papagaio();
|
|
54
|
+
const out = p.process(src);
|
|
55
|
+
|
|
56
|
+
// Output result
|
|
57
|
+
std.out.puts(out);
|