papagaio 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,9 +1,7 @@
1
1
  # Papagaio
2
-
3
- Minimal yet powerful text preprocessor.
2
+ Minimal yet powerful text preprocessor with support for multi-character delimiters.
4
3
 
5
4
  ## Installation
6
-
7
5
  ```javascript
8
6
  import { Papagaio } from './src/papagaio.js';
9
7
  const p = new Papagaio();
@@ -11,17 +9,14 @@ const result = p.process(input);
11
9
  ```
12
10
 
13
11
  ## Configuration
14
-
15
12
  ```javascript
16
13
  p.symbols = {
17
14
  pattern: "pattern", // pattern keyword
18
- context: "context", // context keyword
19
- open: "{", // opening delimiter
20
- close: "}", // closing delimiter
15
+ open: "{", // opening delimiter (multi-char supported)
16
+ close: "}", // closing delimiter (multi-char supported)
21
17
  sigil: "$" // variable marker
22
18
  };
23
- p.recursion_limit = 512; // iteration limit
24
- p.unique_id = 0; // unique ID counter
19
+ p.recursion_limit = 512;
25
20
  ```
26
21
 
27
22
  ---
@@ -29,35 +24,61 @@ p.unique_id = 0; // unique ID counter
29
24
  ## Core Concepts
30
25
 
31
26
  ### 1. Simple Variables
32
-
33
27
  ```
34
28
  pattern {$x} {$x}
35
29
  hello
36
30
  ```
37
31
  Output: `hello`
38
32
 
39
- Variables capture words (non-whitespace sequences).
40
-
41
33
  ### 2. Multiple Variables
42
-
43
34
  ```
44
35
  pattern {$x $y $z} {$z, $y, $x}
45
36
  apple banana cherry
46
37
  ```
47
38
  Output: `cherry, banana, apple`
48
39
 
49
- ## Blocks
40
+ ---
50
41
 
51
- Capture content between delimiters.
42
+ ## Whitespace Operators
52
43
 
53
- ### Syntax
44
+ Papagaio provides flexible whitespace handling for variable capture.
54
45
 
46
+ ### `$x` - Single Word Variable
47
+ Captures a single non-whitespace token.
55
48
  ```
56
- $block name {open}{close}
49
+ pattern {$x} {[$x]}
50
+ hello world
57
51
  ```
52
+ Output: `[hello]`
58
53
 
59
- ### Example
54
+ ### `$$x` - Whitespace-Sensitive Variable
55
+ Captures text including surrounding whitespace until the next significant token.
56
+ ```
57
+ pattern {$$x world} {[$x]}
58
+ hello world
59
+ ```
60
+ Output: `[hello ]`
61
+
62
+ ### `$$$x` - Optional Whitespace Variable
63
+ Captures with optional whitespace (no error if empty).
64
+ ```
65
+ pattern {$$$x world} {<$x>}
66
+ world
67
+ ```
68
+ Output: `<>`
69
+
70
+ ---
71
+
72
+ ## Blocks
73
+
74
+ Capture content between delimiters with full nesting support.
60
75
 
76
+ ### Syntax
77
+ ```
78
+ $block varName {open}{close}
79
+ ```
80
+
81
+ ### Basic Example
61
82
  ```
62
83
  pattern {$name $block content {(}{)}} {[$content]}
63
84
  data (hello world)
@@ -65,287 +86,186 @@ data (hello world)
65
86
  Output: `[hello world]`
66
87
 
67
88
  ### Custom Delimiters
68
-
69
89
  ```
70
90
  pattern {$block data {<<}{>>}} {DATA: $data}
71
91
  <<json stuff>>
72
92
  ```
73
93
  Output: `DATA: json stuff`
74
94
 
75
- ### Multiple Blocks
95
+ ### Multi-Character Delimiters
96
+ ```
97
+ pattern {$block code {```}{```}} {<pre>$code</pre>}
98
+ ```markdown
99
+ # Title
100
+ ```
101
+ Output: `<pre># Title</pre>`
76
102
 
103
+ ### Multiple Blocks
77
104
  ```
78
105
  pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
79
106
  (first), [second]
80
107
  ```
81
108
  Output: `first|second`
82
109
 
110
+ ### Nested Blocks
111
+ ```
112
+ pattern {$block outer {(}{)}} {[$outer]}
113
+ (outer (inner))
114
+ ```
115
+ Output: `[outer (inner)]`
116
+
83
117
  ---
84
118
 
85
119
  ## Patterns
86
120
 
87
- ### Basic
88
-
121
+ ### Basic Pattern
89
122
  ```
90
123
  pattern {match} {replace}
91
124
  ```
92
125
 
93
- ### Real Example
94
-
126
+ ### Example
95
127
  ```
96
128
  pattern {# $title} {<h1>$title</h1>}
97
129
  # Welcome
98
130
  ```
99
131
  Output: `<h1>Welcome</h1>`
100
132
 
101
- ### Multiple Patterns
102
-
133
+ ### Multiple Patterns Cascade
103
134
  ```
104
135
  pattern {a} {b}
105
136
  pattern {b} {c}
106
137
  pattern {c} {d}
107
138
  a
108
139
  ```
109
- Output: `d` (automatic cascade)
140
+ Output: `d`
110
141
 
111
142
  ---
112
143
 
113
- ## Contexts
144
+ ## Subpatterns
114
145
 
115
- Recursive processing scope.
146
+ Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
116
147
 
148
+ ### Syntax
149
+ ```
150
+ $pattern {match} {replace}
117
151
  ```
118
- context {
119
- pattern {$x} {<$x>}
120
-
121
- apple
122
- banana
152
+
153
+ ### Example
154
+ ```
155
+ pattern {eval $block code {(}{)}} {
156
+ $eval{
157
+ $pattern {undefined} {}
158
+ $code;
159
+ return "";
160
+ }
123
161
  }
162
+ eval(console.log(123))
124
163
  ```
125
164
  Output:
126
165
  ```
127
- <apple>
128
- <banana>
166
+ 123
129
167
  ```
130
168
 
131
- **Empty contexts are automatically removed.**
169
+ ### Key Properties
170
+ * Subpatterns exist only within the running pattern.
171
+ * They do not leak into the global pattern list.
172
+ * They can recursively modify inner content before `$eval` or other processors.
173
+ * Multiple subpatterns can coexist in the same replacement.
132
174
 
133
175
  ---
134
176
 
135
177
  ## Special Keywords
136
178
 
137
- ### $unique
138
- Generate unique incremental IDs for each pattern call. All occurrences of `$unique` within the same pattern replacement share the same ID.
139
-
140
- ```
141
- pattern {item} {[$unique]item_$unique}
142
- item
143
- item
144
- item
145
- ```
146
- Output: `[0]item_0`, `[1]item_1`, `[2]item_2`
147
-
148
- ```
149
- pattern {a} {$unique $unique}
150
- a
151
- ```
152
- Output: `0 0` (same ID for both occurrences)
153
-
154
- ### $match
155
- Return the full match.
156
-
157
- ```
158
- pattern {[$x]} {FOUND: $match}
159
- [data]
160
- ```
161
- Output: `FOUND: [data]`
162
-
163
- ### $prefix / $suffix
164
- Text before and after the match.
165
-
166
- ```
167
- pattern {world} {$prefix$suffix}hello world test
168
- ```
169
- Output: `hello hello test test`
170
-
171
- ### $clear
172
- Remove everything before the match.
173
-
179
+ ### $eval
180
+ Executes JavaScript code.
174
181
  ```
175
- pattern {SKIP $x} {$clear KEEP: $x}
176
- IGNORE_THIS SKIP keep_this
182
+ pattern {$x} {$eval{return parseInt($x)*2;}}
183
+ 5
177
184
  ```
178
- Output: `KEEP: keep_this`
179
-
180
- ### $eval
181
- Execute JavaScript code.
185
+ Output: `10`
182
186
 
187
+ Supports multi-character delimiters:
183
188
  ```
184
- pattern {$x} {$eval{return parseInt($x) * 2;}}
189
+ pattern {$x} {$eval<<parseInt($x)*2>>}
185
190
  5
186
191
  ```
187
192
  Output: `10`
188
193
 
189
- ---
190
194
 
191
- ## Practical Examples
195
+ ## Important Rules
192
196
 
193
- ### Markdown → HTML
197
+ ### Matching
198
+ * `$x` = one word (no whitespace)
199
+ * `$$x` = captures text with optional surrounding whitespace
200
+ * `$$$x` = captures text with optional surrounding whitespace, can be empty or not found
201
+ * Patterns apply globally until stable
202
+ * Blocks support arbitrary nesting depth
194
203
 
195
- ```
196
- context {
197
- pattern {## $t} {<h2>$t</h2>}
198
- pattern {# $t} {<h1>$t</h1>}
199
- pattern {**$t**} {<strong>$t</strong>}
200
- pattern {*$t*} {<em>$t</em>}
201
- pattern {- $i} {<li>$i</li>}
202
-
203
- # Title
204
- **bold** and *italic*
205
- - item1
206
- - item2
207
- }
208
- ```
204
+ ### Block Matching
205
+ * `$block name {open}{close}` captures delimited regions
206
+ * Supports nested delimiters of any length
207
+ * Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
209
208
 
210
- ### CSV → JSON
209
+ ### Whitespace Handling
210
+ * Whitespace-optional tokens (`$$` alone) skip optional whitespace
211
+ * Variables automatically skip leading whitespace when needed
212
+ * Trailing whitespace is trimmed when variables appear before literals
211
213
 
212
- ```
213
- pattern {$a,$b,$c} {{ id: '$a', name: '$b', role: '$c' }}
214
- 1,Alice,Engineer
215
- 2,Bob,Designer
216
- ```
214
+ ---
217
215
 
218
- Output:
219
- ```
220
- { id: '1', name: 'Alice', role: 'Engineer' }
221
- { id: '2', name: 'Bob', role: 'Designer' }
222
- ```
216
+ ## Multi-Character Delimiter Support
223
217
 
224
- ### Config Parser
218
+ The updated version fully supports multi-character delimiters throughout all features.
225
219
 
220
+ ### Examples
221
+ ```javascript
222
+ const p = new Papagaio('$', '<<<', '>>>');
226
223
  ```
227
- pattern {$key = $value} {const $key = '$value';}
228
- host = localhost
229
- port = 3000
230
- ```
231
-
232
- Output:
233
- ```
234
- const host = 'localhost';
235
- const port = '3000';
236
- ```
237
-
238
- ### HTML Generator
239
224
 
225
+ ### In Blocks
240
226
  ```
241
- pattern {$tag $content} {<$tag>$content</$tag>}
242
- div HelloWorld
243
- span Test
227
+ pattern {$block data {<<}{>>}} {$data}
228
+ <<content>>
244
229
  ```
245
230
 
246
- Output:
231
+ ### In Eval
247
232
  ```
248
- <div>HelloWorld</div>
249
- <span>Test</span>
233
+ // const p = new Papagaio('$', '<<<', '>>>');
234
+ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
235
+ 5
250
236
  ```
251
237
 
252
238
  ---
253
239
 
254
- ## Important Rules
255
-
256
- ### Matching
257
- - Variables (`$x`) capture **one word** (no spaces)
258
- - Variables (`$$x`) captures one or more words (with spaces)
259
- - Patterns apply **globally** each iteration
260
- - Auto-recursion until: max 512 iterations OR no changes
261
- - `$ ` = one or more of this whitespace (spaces, tabs, newlines)
262
- - `$$ ` = zero or more of this whitespace (spaces, tabs, newlines)
263
- - `$$$ `= one or more whitespaces
264
- - `$$$$ `= zero or more whitespaces
265
-
266
- ### Block Matching
267
- - `$block name {open}{close}` captures between delimiters
268
- - Supports nested delimiters automatically
269
- - Multiple blocks in one pattern work
270
-
271
- ### Variables
272
- - Names: `[A-Za-z0-9_]`
273
- - Reuse: `$x` appears multiple times in replace
274
- - Undefined: becomes empty string
275
-
276
- ### Limitations
277
- - You cannot match words containing the current sigil character.
278
- - You cannot match a $block{}{} using the current delimiters.
279
- - By design, whitespace operators need a whitespace after them to work properly, even the `$$$ ` and `$$$$ ` ones.
280
- - Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators.
281
-
282
- ---
283
-
284
240
  ## Troubleshooting
285
241
 
286
242
  | Problem | Solution |
287
243
  |---------|----------|
288
- | Variable not captured | Check space between variables |
289
- | Block not working | Verify balanced delimiters `{` `}` |
290
- | Infinite recursion | Use `$clear` or reduce `recursion_limit` |
291
- | $eval not working | Errors return empty string, use try-catch |
292
- | Pattern doesn't match | Use whitespace operators between elements for flexible whitespace |
293
- | Whitespace operators | Remember they need a whitespace after them to work properly |
294
- | Whitespace operators not matching | Multiple word variables (`$$x`) also captures leading/trailing whitespaces, so be careful when using them together with whitespace operators. |
295
-
296
- ## Known Bugs
297
-
298
- - Multi-character block delimiters that contains double quotes doesnt match properly.
244
+ | Variable not captured | Check spacing and use appropriate whitespace operator (`$x`, `$$x`, `$$$x`) |
245
+ | Block mismatch | Verify opening and closing delimiters match the declaration |
246
+ | Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
247
+ | Pattern not matching | Add whitespace operators (`$$`) for multi-word content |
248
+ | Nested blocks fail | Ensure delimiters are properly balanced |
249
+ | Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
299
250
 
300
251
  ---
301
252
 
302
253
  ## Syntax Reference
303
254
 
304
255
  ```
305
- pattern {$x $y} {$y, $x} # basic pattern
306
- pattern {$x$ $y} {$x-$y} # flexible whitespace
307
- pattern {$block n {o}{c}} {$n} # block
308
- context { ... } # recursive scope
309
- $unique # unique ID per pattern
310
- $match # full match
311
- $prefix / $suffix # before/after
312
- $clear # clear before
313
- $eval{code} # execute JS
314
- $ / $$ / $$$ / $$$$ # whitespace operators
256
+ pattern {$x $y} {$y, $x} # basic pattern with variables
257
+ pattern {$$x $y} {$y, $x} # whitespace-sensitive capture
258
+ pattern {$$$x $y} {$y, $x} # optional whitespace capture
259
+ pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
260
+ $pattern {a} {b} # subpattern (scoped to parent)
261
+ $eval{code} # JavaScript evaluation
315
262
  ```
316
263
 
317
264
  ---
318
265
 
319
- ## Complete Example
320
-
321
- ```
322
- context {
323
- # Markdown headers
324
- pattern {# $title} {<h1>$title</h1>}
325
-
326
- # Lists
327
- pattern {- $item} {<li>$item</li>}
328
-
329
- # Inline formatting
330
- pattern {**$text**} {<strong>$text</strong>}
331
- pattern {*$text*} {<em>$text</em>}
332
-
333
- # Process content
334
- # Welcome
335
- # Getting Started
336
- This is **important** and *italic*
337
- - First item
338
- - Second item
339
- }
340
- ```
341
-
342
- Output:
343
- ```html
344
- <h1>Welcome</h1>
345
- <h2>Getting Started</h2>
346
- This is <strong>important</strong> and <em>italic</em>
347
- <li>First item</li>
348
- <li>Second item</li>
349
- ```
266
+ ## Performance Notes
350
267
 
351
- ---
268
+ * Patterns apply recursively until no changes occur (up to `recursion_limit`)
269
+ * Multi-character delimiter matching is optimized with regex escaping
270
+ * Nested blocks and subpatterns have no theoretical depth limit
271
+ * Large recursion limits can impact performance on complex inputs
package/bin/cli.qjs ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env qjs
2
+ import * as std from "std";
3
+ import * as os from "os";
4
+
5
+ // Import Papagaio class - ajuste o caminho conforme necessário
6
+ // Para QuickJS, você pode incluir o arquivo diretamente ou usar import
7
+ import { Papagaio } from "../src/papagaio.js";
8
+
9
+ // Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
10
+ const VERSION = "1.0.0";
11
+
12
+ // Parse command line arguments
13
+ const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
14
+
15
+ // Help & Version
16
+ if (args.includes("-v") || args.includes("--version")) {
17
+ std.out.puts(VERSION + "\n");
18
+ std.exit(0);
19
+ }
20
+
21
+ if (args.includes("-h") || args.includes("--help")) {
22
+ std.out.puts(`Usage: papagaio [options] <file>
23
+ Options:
24
+ -h, --help Show this help message
25
+ -v, --version Show version number
26
+ `);
27
+ std.exit(0);
28
+ }
29
+
30
+ // File input
31
+ const file = args.find(arg => !arg.startsWith("-"));
32
+ if (!file) {
33
+ std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
34
+ std.exit(1);
35
+ }
36
+
37
+ // Read file
38
+ let src;
39
+ try {
40
+ const f = std.open(file, "r");
41
+ if (!f) {
42
+ std.err.puts(`Error: cannot open file '${file}'\n`);
43
+ std.exit(1);
44
+ }
45
+ src = f.readAsString();
46
+ f.close();
47
+ } catch (e) {
48
+ std.err.puts(`Error reading file: ${e}\n`);
49
+ std.exit(1);
50
+ }
51
+
52
+ // Process with Papagaio
53
+ const p = new Papagaio();
54
+ const out = p.process(src);
55
+
56
+ // Output result
57
+ std.out.puts(out);