papagaio 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -115
- package/bin/cli.qjs +57 -0
- package/index.html +278 -380
- package/package.json +1 -1
- package/src/papagaio.js +277 -464
- package/tests/tests.json +101 -605
- package/src/louro.js +0 -259
package/README.md
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# Papagaio
|
|
2
|
-
|
|
3
|
-
Minimal yet powerful text preprocessor.
|
|
2
|
+
Minimal yet powerful text preprocessor with support for multi-character delimiters.
|
|
4
3
|
|
|
5
4
|
## Installation
|
|
6
|
-
|
|
7
5
|
```javascript
|
|
8
6
|
import { Papagaio } from './src/papagaio.js';
|
|
9
7
|
const p = new Papagaio();
|
|
@@ -11,17 +9,14 @@ const result = p.process(input);
|
|
|
11
9
|
```
|
|
12
10
|
|
|
13
11
|
## Configuration
|
|
14
|
-
|
|
15
12
|
```javascript
|
|
16
13
|
p.symbols = {
|
|
17
14
|
pattern: "pattern", // pattern keyword
|
|
18
|
-
open: "{", // opening delimiter
|
|
19
|
-
close: "}", // closing delimiter
|
|
15
|
+
open: "{", // opening delimiter (multi-char supported)
|
|
16
|
+
close: "}", // closing delimiter (multi-char supported)
|
|
20
17
|
sigil: "$" // variable marker
|
|
21
18
|
};
|
|
22
|
-
|
|
23
19
|
p.recursion_limit = 512;
|
|
24
|
-
p.unique_id = 0;
|
|
25
20
|
```
|
|
26
21
|
|
|
27
22
|
---
|
|
@@ -29,108 +24,133 @@ p.unique_id = 0;
|
|
|
29
24
|
## Core Concepts
|
|
30
25
|
|
|
31
26
|
### 1. Simple Variables
|
|
32
|
-
|
|
33
27
|
```
|
|
34
28
|
pattern {$x} {$x}
|
|
35
29
|
hello
|
|
36
30
|
```
|
|
37
|
-
|
|
38
31
|
Output: `hello`
|
|
39
32
|
|
|
40
33
|
### 2. Multiple Variables
|
|
41
|
-
|
|
42
34
|
```
|
|
43
35
|
pattern {$x $y $z} {$z, $y, $x}
|
|
44
36
|
apple banana cherry
|
|
45
37
|
```
|
|
46
|
-
|
|
47
38
|
Output: `cherry, banana, apple`
|
|
48
39
|
|
|
49
40
|
---
|
|
50
41
|
|
|
51
|
-
##
|
|
42
|
+
## Whitespace Operators
|
|
52
43
|
|
|
53
|
-
|
|
44
|
+
Papagaio provides flexible whitespace handling for variable capture.
|
|
54
45
|
|
|
55
|
-
###
|
|
46
|
+
### `$x` - Single Word Variable
|
|
47
|
+
Captures a single non-whitespace token.
|
|
48
|
+
```
|
|
49
|
+
pattern {$x} {[$x]}
|
|
50
|
+
hello world
|
|
51
|
+
```
|
|
52
|
+
Output: `[hello]`
|
|
56
53
|
|
|
54
|
+
### `$$x` - Whitespace-Sensitive Variable
|
|
55
|
+
Captures text including surrounding whitespace until the next significant token.
|
|
57
56
|
```
|
|
58
|
-
|
|
57
|
+
pattern {$$x world} {[$x]}
|
|
58
|
+
hello world
|
|
59
59
|
```
|
|
60
|
+
Output: `[hello ]`
|
|
60
61
|
|
|
61
|
-
###
|
|
62
|
+
### `$$$x` - Optional Whitespace Variable
|
|
63
|
+
Captures with optional whitespace (no error if empty).
|
|
64
|
+
```
|
|
65
|
+
pattern {$$$x world} {<$x>}
|
|
66
|
+
world
|
|
67
|
+
```
|
|
68
|
+
Output: `<>`
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Blocks
|
|
73
|
+
|
|
74
|
+
Capture content between delimiters with full nesting support.
|
|
75
|
+
|
|
76
|
+
### Syntax
|
|
77
|
+
```
|
|
78
|
+
$block varName {open}{close}
|
|
79
|
+
```
|
|
62
80
|
|
|
81
|
+
### Basic Example
|
|
63
82
|
```
|
|
64
83
|
pattern {$name $block content {(}{)}} {[$content]}
|
|
65
84
|
data (hello world)
|
|
66
85
|
```
|
|
67
|
-
|
|
68
86
|
Output: `[hello world]`
|
|
69
87
|
|
|
70
88
|
### Custom Delimiters
|
|
71
|
-
|
|
72
89
|
```
|
|
73
90
|
pattern {$block data {<<}{>>}} {DATA: $data}
|
|
74
91
|
<<json stuff>>
|
|
75
92
|
```
|
|
76
|
-
|
|
77
93
|
Output: `DATA: json stuff`
|
|
78
94
|
|
|
79
|
-
###
|
|
95
|
+
### Multi-Character Delimiters
|
|
96
|
+
```
|
|
97
|
+
pattern {$block code {```}{```}} {<pre>$code</pre>}
|
|
98
|
+
```markdown
|
|
99
|
+
# Title
|
|
100
|
+
```
|
|
101
|
+
Output: `<pre># Title</pre>`
|
|
80
102
|
|
|
103
|
+
### Multiple Blocks
|
|
81
104
|
```
|
|
82
105
|
pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
|
|
83
106
|
(first), [second]
|
|
84
107
|
```
|
|
85
|
-
|
|
86
108
|
Output: `first|second`
|
|
87
109
|
|
|
110
|
+
### Nested Blocks
|
|
111
|
+
```
|
|
112
|
+
pattern {$block outer {(}{)}} {[$outer]}
|
|
113
|
+
(outer (inner))
|
|
114
|
+
```
|
|
115
|
+
Output: `[outer (inner)]`
|
|
116
|
+
|
|
88
117
|
---
|
|
89
118
|
|
|
90
119
|
## Patterns
|
|
91
120
|
|
|
92
|
-
### Basic
|
|
93
|
-
|
|
121
|
+
### Basic Pattern
|
|
94
122
|
```
|
|
95
123
|
pattern {match} {replace}
|
|
96
124
|
```
|
|
97
125
|
|
|
98
126
|
### Example
|
|
99
|
-
|
|
100
127
|
```
|
|
101
128
|
pattern {# $title} {<h1>$title</h1>}
|
|
102
129
|
# Welcome
|
|
103
130
|
```
|
|
104
|
-
|
|
105
131
|
Output: `<h1>Welcome</h1>`
|
|
106
132
|
|
|
107
133
|
### Multiple Patterns Cascade
|
|
108
|
-
|
|
109
134
|
```
|
|
110
135
|
pattern {a} {b}
|
|
111
136
|
pattern {b} {c}
|
|
112
137
|
pattern {c} {d}
|
|
113
138
|
a
|
|
114
139
|
```
|
|
115
|
-
|
|
116
140
|
Output: `d`
|
|
117
141
|
|
|
118
142
|
---
|
|
119
143
|
|
|
120
|
-
|
|
144
|
+
## Subpatterns
|
|
121
145
|
|
|
122
|
-
Subpatterns
|
|
146
|
+
Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
|
|
123
147
|
|
|
124
148
|
### Syntax
|
|
125
|
-
|
|
126
149
|
```
|
|
127
150
|
$pattern {match} {replace}
|
|
128
151
|
```
|
|
129
152
|
|
|
130
|
-
A subpattern behaves like a normal pattern but is **scoped only to the replacement body where it appears**.
|
|
131
|
-
|
|
132
153
|
### Example
|
|
133
|
-
|
|
134
154
|
```
|
|
135
155
|
pattern {eval $block code {(}{)}} {
|
|
136
156
|
$eval{
|
|
@@ -139,133 +159,113 @@ pattern {eval $block code {(}{)}} {
|
|
|
139
159
|
return "";
|
|
140
160
|
}
|
|
141
161
|
}
|
|
142
|
-
|
|
143
162
|
eval(console.log(123))
|
|
144
163
|
```
|
|
145
|
-
|
|
146
164
|
Output:
|
|
147
|
-
|
|
148
165
|
```
|
|
149
166
|
123
|
|
150
167
|
```
|
|
151
168
|
|
|
152
169
|
### Key Properties
|
|
153
|
-
|
|
154
170
|
* Subpatterns exist only within the running pattern.
|
|
155
171
|
* They do not leak into the global pattern list.
|
|
156
|
-
* They can recursively modify inner content before `$eval` or other processors
|
|
157
|
-
* Multiple subpatterns can coexist
|
|
172
|
+
* They can recursively modify inner content before `$eval` or other processors.
|
|
173
|
+
* Multiple subpatterns can coexist in the same replacement.
|
|
158
174
|
|
|
159
175
|
---
|
|
160
176
|
|
|
161
177
|
## Special Keywords
|
|
162
178
|
|
|
163
|
-
### $
|
|
164
|
-
|
|
165
|
-
Generates unique incremental IDs.
|
|
166
|
-
|
|
179
|
+
### $eval
|
|
180
|
+
Executes JavaScript code.
|
|
167
181
|
```
|
|
168
|
-
pattern {
|
|
169
|
-
|
|
170
|
-
item
|
|
182
|
+
pattern {$x} {$eval{return parseInt($x)*2;}}
|
|
183
|
+
5
|
|
171
184
|
```
|
|
185
|
+
Output: `10`
|
|
172
186
|
|
|
173
|
-
|
|
174
|
-
|
|
187
|
+
Supports multi-character delimiters:
|
|
175
188
|
```
|
|
176
|
-
|
|
177
|
-
|
|
189
|
+
pattern {$x} {$eval<<parseInt($x)*2>>}
|
|
190
|
+
5
|
|
178
191
|
```
|
|
192
|
+
Output: `10`
|
|
179
193
|
|
|
180
|
-
### $match
|
|
181
|
-
|
|
182
|
-
Full matched text.
|
|
183
194
|
|
|
184
|
-
|
|
185
|
-
pattern {[$x]} {FOUND: $match}
|
|
186
|
-
[data]
|
|
187
|
-
```
|
|
195
|
+
## Important Rules
|
|
188
196
|
|
|
189
|
-
|
|
197
|
+
### Matching
|
|
198
|
+
* `$x` = one word (no whitespace)
|
|
199
|
+
* `$$x` = captures text with optional surrounding whitespace
|
|
200
|
+
* `$$$x` = captures text with optional surrounding whitespace, can be empty or not found
|
|
201
|
+
* Patterns apply globally until stable
|
|
202
|
+
* Blocks support arbitrary nesting depth
|
|
190
203
|
|
|
191
|
-
###
|
|
204
|
+
### Block Matching
|
|
205
|
+
* `$block name {open}{close}` captures delimited regions
|
|
206
|
+
* Supports nested delimiters of any length
|
|
207
|
+
* Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
|
|
192
208
|
|
|
193
|
-
|
|
209
|
+
### Whitespace Handling
|
|
210
|
+
* Whitespace-optional tokens (`$$` alone) skip optional whitespace
|
|
211
|
+
* Variables automatically skip leading whitespace when needed
|
|
212
|
+
* Trailing whitespace is trimmed when variables appear before literals
|
|
194
213
|
|
|
195
|
-
|
|
196
|
-
pattern {world} {$prefix$suffix}hello world test
|
|
197
|
-
```
|
|
214
|
+
---
|
|
198
215
|
|
|
199
|
-
|
|
216
|
+
## Multi-Character Delimiter Support
|
|
200
217
|
|
|
201
|
-
|
|
218
|
+
The updated version fully supports multi-character delimiters throughout all features.
|
|
202
219
|
|
|
203
|
-
|
|
220
|
+
### Examples
|
|
221
|
+
```javascript
|
|
222
|
+
const p = new Papagaio('$', '<<<', '>>>');
|
|
223
|
+
```
|
|
204
224
|
|
|
225
|
+
### In Blocks
|
|
205
226
|
```
|
|
206
|
-
pattern {
|
|
207
|
-
|
|
227
|
+
pattern {$block data {<<}{>>}} {$data}
|
|
228
|
+
<<content>>
|
|
208
229
|
```
|
|
209
230
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
### $eval
|
|
213
|
-
|
|
214
|
-
Executes JS.
|
|
215
|
-
|
|
231
|
+
### In Eval
|
|
216
232
|
```
|
|
217
|
-
|
|
233
|
+
// const p = new Papagaio('$', '<<<', '>>>');
|
|
234
|
+
pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
|
|
218
235
|
5
|
|
219
236
|
```
|
|
220
237
|
|
|
221
|
-
Output: `10`
|
|
222
|
-
|
|
223
|
-
---
|
|
224
|
-
|
|
225
|
-
## Important Rules
|
|
226
|
-
|
|
227
|
-
### Matching
|
|
228
|
-
|
|
229
|
-
* `$x` = one word
|
|
230
|
-
* `$$x` = multiword (captures whitespace too)
|
|
231
|
-
* `$`, `$$`, `$$$`, `$$$$` = whitespace operators
|
|
232
|
-
* Patterns apply globally until stable
|
|
233
|
-
* Blocks can be nested
|
|
234
|
-
|
|
235
|
-
### Block Matching
|
|
236
|
-
|
|
237
|
-
* `$block name {open}{close}` captures delimited regions
|
|
238
|
-
* Supports nested delimiters
|
|
239
|
-
|
|
240
238
|
---
|
|
241
239
|
|
|
242
240
|
## Troubleshooting
|
|
243
241
|
|
|
244
|
-
| Problem
|
|
245
|
-
|
|
246
|
-
| Variable not captured | Check spacing
|
|
247
|
-
| Block
|
|
248
|
-
| Infinite recursion
|
|
249
|
-
| Pattern not matching
|
|
250
|
-
|
|
|
242
|
+
| Problem | Solution |
|
|
243
|
+
|---------|----------|
|
|
244
|
+
| Variable not captured | Check spacing and use appropriate whitespace operator (`$x`, `$$x`, `$$$x`) |
|
|
245
|
+
| Block mismatch | Verify opening and closing delimiters match the declaration |
|
|
246
|
+
| Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
|
|
247
|
+
| Pattern not matching | Add whitespace operators (`$$`) for multi-word content |
|
|
248
|
+
| Nested blocks fail | Ensure delimiters are properly balanced |
|
|
249
|
+
| Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
|
|
251
250
|
|
|
252
251
|
---
|
|
253
252
|
|
|
254
|
-
##
|
|
253
|
+
## Syntax Reference
|
|
255
254
|
|
|
256
|
-
|
|
255
|
+
```
|
|
256
|
+
pattern {$x $y} {$y, $x} # basic pattern with variables
|
|
257
|
+
pattern {$$x $y} {$y, $x} # whitespace-sensitive capture
|
|
258
|
+
pattern {$$$x $y} {$y, $x} # optional whitespace capture
|
|
259
|
+
pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
|
|
260
|
+
$pattern {a} {b} # subpattern (scoped to parent)
|
|
261
|
+
$eval{code} # JavaScript evaluation
|
|
262
|
+
```
|
|
257
263
|
|
|
258
264
|
---
|
|
259
265
|
|
|
260
|
-
##
|
|
266
|
+
## Performance Notes
|
|
261
267
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
$unique
|
|
267
|
-
$match
|
|
268
|
-
$prefix / $suffix
|
|
269
|
-
$clear
|
|
270
|
-
$eval{code}
|
|
271
|
-
```
|
|
268
|
+
* Patterns apply recursively until no changes occur (up to `recursion_limit`)
|
|
269
|
+
* Multi-character delimiter matching is optimized with regex escaping
|
|
270
|
+
* Nested blocks and subpatterns have no theoretical depth limit
|
|
271
|
+
* Large recursion limits can impact performance on complex inputs
|
package/bin/cli.qjs
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env qjs
|
|
2
|
+
import * as std from "std";
|
|
3
|
+
import * as os from "os";
|
|
4
|
+
|
|
5
|
+
// Import Papagaio class - ajuste o caminho conforme necessário
|
|
6
|
+
// Para QuickJS, você pode incluir o arquivo diretamente ou usar import
|
|
7
|
+
import { Papagaio } from "../src/papagaio.js";
|
|
8
|
+
|
|
9
|
+
// Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
|
|
10
|
+
const VERSION = "1.0.0";
|
|
11
|
+
|
|
12
|
+
// Parse command line arguments
|
|
13
|
+
const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
|
|
14
|
+
|
|
15
|
+
// Help & Version
|
|
16
|
+
if (args.includes("-v") || args.includes("--version")) {
|
|
17
|
+
std.out.puts(VERSION + "\n");
|
|
18
|
+
std.exit(0);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (args.includes("-h") || args.includes("--help")) {
|
|
22
|
+
std.out.puts(`Usage: papagaio [options] <file>
|
|
23
|
+
Options:
|
|
24
|
+
-h, --help Show this help message
|
|
25
|
+
-v, --version Show version number
|
|
26
|
+
`);
|
|
27
|
+
std.exit(0);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// File input
|
|
31
|
+
const file = args.find(arg => !arg.startsWith("-"));
|
|
32
|
+
if (!file) {
|
|
33
|
+
std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
|
|
34
|
+
std.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Read file
|
|
38
|
+
let src;
|
|
39
|
+
try {
|
|
40
|
+
const f = std.open(file, "r");
|
|
41
|
+
if (!f) {
|
|
42
|
+
std.err.puts(`Error: cannot open file '${file}'\n`);
|
|
43
|
+
std.exit(1);
|
|
44
|
+
}
|
|
45
|
+
src = f.readAsString();
|
|
46
|
+
f.close();
|
|
47
|
+
} catch (e) {
|
|
48
|
+
std.err.puts(`Error reading file: ${e}\n`);
|
|
49
|
+
std.exit(1);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Process with Papagaio
|
|
53
|
+
const p = new Papagaio();
|
|
54
|
+
const out = p.process(src);
|
|
55
|
+
|
|
56
|
+
// Output result
|
|
57
|
+
std.out.puts(out);
|