papagaio 0.4.2 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,27 +1,25 @@
1
1
  # Papagaio
2
-
3
- Minimal yet powerful text preprocessor.
2
+ Minimal yet powerful text preprocessor with support for multi-character delimiters.
4
3
 
5
4
  ## Installation
6
-
7
5
  ```javascript
8
6
  import { Papagaio } from './src/papagaio.js';
9
- const p = new Papagaio();
10
- const result = p.process(input);
7
+ const papagaio = new Papagaio();
8
+ const result = papagaio.process(input);
11
9
  ```
12
10
 
13
11
  ## Configuration
14
-
15
12
  ```javascript
16
- p.symbols = {
13
+ papagaio.symbols = {
17
14
  pattern: "pattern", // pattern keyword
18
- open: "{", // opening delimiter
19
- close: "}", // closing delimiter
20
- sigil: "$" // variable marker
15
+ open: "{", // opening delimiter (multi-char supported)
16
+ close: "}", // closing delimiter (multi-char supported)
17
+ sigil: "$", // variable marker
18
+ eval: "eval", // eval keyword
19
+ block: "block", // block keyword
20
+ regex: "regex" // regex keyword
21
21
  };
22
-
23
- p.recursion_limit = 512;
24
- p.unique_id = 0;
22
+ papagaio.recursion_limit = 512;
25
23
  ```
26
24
 
27
25
  ---
@@ -29,108 +27,185 @@ p.unique_id = 0;
29
27
  ## Core Concepts
30
28
 
31
29
  ### 1. Simple Variables
32
-
33
30
  ```
34
31
  pattern {$x} {$x}
35
32
  hello
36
33
  ```
37
-
38
34
  Output: `hello`
39
35
 
40
36
  ### 2. Multiple Variables
41
-
42
37
  ```
43
38
  pattern {$x $y $z} {$z, $y, $x}
44
39
  apple banana cherry
45
40
  ```
46
-
47
41
  Output: `cherry, banana, apple`
48
42
 
49
43
  ---
50
44
 
51
- ## Blocks
45
+ ## Variables
46
+
47
+ Papagaio provides flexible variable capture with automatic context-aware behavior.
48
+
49
+ ### `$x` - Smart Variable
50
+ Automatically adapts based on context:
51
+ - **Before a block**: Captures everything until the block's opening delimiter
52
+ - **Before a literal**: Captures everything until that literal appears
53
+ - **Otherwise**: Captures a single word (non-whitespace token)
54
+
55
+ ```
56
+ pattern {$x} {[$x]}
57
+ hello world
58
+ ```
59
+ Output: `[hello]`
60
+
61
+ ```
62
+ pattern {$name $block content {(}{)}} {$name: $content}
63
+ greeting (hello world)
64
+ ```
65
+ Output: `greeting: hello world`
66
+
67
+ ```
68
+ pattern {$prefix:$suffix} {$suffix-$prefix}
69
+ key:value
70
+ ```
71
+ Output: `value-key`
72
+
73
+ ### `$x?` - Optional Variable
74
+ Same behavior as `$x`, but won't fail if empty or not found.
75
+
76
+ ```
77
+ pattern {$x? world} {<$x>}
78
+ world
79
+ ```
80
+ Output: `<>`
81
+
82
+ ```
83
+ pattern {$greeting? $name} {Hello $name$greeting}
84
+ Hi John
85
+ ```
86
+ Output: `Hello JohnHi`
87
+
88
+ ---
89
+
90
+ ## Regex Matching
52
91
 
53
- Capture content between delimiters.
92
+ Capture content using JavaScript regular expressions.
54
93
 
55
94
  ### Syntax
95
+ ```
96
+ $regex varName {pattern}
97
+ ```
56
98
 
99
+ ### Basic Example
57
100
  ```
58
- $block name {open}{close}
101
+ pattern {$regex num {[0-9]+}} {Number: $num}
102
+ The answer is 42
59
103
  ```
104
+ Output: `Number: 42`
60
105
 
61
- ### Example
106
+ ### Complex Patterns
107
+ ```
108
+ pattern {$regex email {\w+@\w+\.\w+}} {Email found: $email}
109
+ Contact: user@example.com
110
+ ```
111
+ Output: `Email found: user@example.com`
112
+
113
+ ### Multiple Regex Variables
114
+ ```
115
+ pattern {$regex year {[0-9]{4}}-$regex month {[0-9]{2}}} {Month $month in $year}
116
+ 2024-03
117
+ ```
118
+ Output: `Month 03 in 2024`
62
119
 
120
+ ### Notes
121
+ - Regex patterns are cached for performance
122
+ - Matches are anchored at the current position (no searching ahead)
123
+ - Invalid regex patterns will cause the match to fail gracefully
124
+
125
+ ---
126
+
127
+ ## Blocks
128
+
129
+ Capture content between delimiters with full nesting support.
130
+
131
+ ### Syntax
132
+ ```
133
+ $block varName {open}{close}
134
+ ```
135
+
136
+ ### Basic Example
63
137
  ```
64
138
  pattern {$name $block content {(}{)}} {[$content]}
65
139
  data (hello world)
66
140
  ```
67
-
68
141
  Output: `[hello world]`
69
142
 
70
143
  ### Custom Delimiters
71
-
72
144
  ```
73
145
  pattern {$block data {<<}{>>}} {DATA: $data}
74
146
  <<json stuff>>
75
147
  ```
76
-
77
148
  Output: `DATA: json stuff`
78
149
 
79
- ### Multiple Blocks
150
+ ### Multi-Character Delimiters
151
+ ```
152
+ pattern {$block code {```}{```}} {<pre>$code</pre>}
153
+ ```markdown
154
+ # Title
155
+ ```
156
+ Output: `<pre># Title</pre>`
80
157
 
158
+ ### Multiple Blocks
81
159
  ```
82
160
  pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
83
161
  (first), [second]
84
162
  ```
85
-
86
163
  Output: `first|second`
87
164
 
165
+ ### Nested Blocks
166
+ ```
167
+ pattern {$block outer {(}{)}} {[$outer]}
168
+ (outer (inner))
169
+ ```
170
+ Output: `[outer (inner)]`
171
+
88
172
  ---
89
173
 
90
174
  ## Patterns
91
175
 
92
- ### Basic
93
-
176
+ ### Basic Pattern
94
177
  ```
95
178
  pattern {match} {replace}
96
179
  ```
97
180
 
98
181
  ### Example
99
-
100
182
  ```
101
183
  pattern {# $title} {<h1>$title</h1>}
102
184
  # Welcome
103
185
  ```
104
-
105
186
  Output: `<h1>Welcome</h1>`
106
187
 
107
188
  ### Multiple Patterns Cascade
108
-
109
189
  ```
110
190
  pattern {a} {b}
111
191
  pattern {b} {c}
112
192
  pattern {c} {d}
113
193
  a
114
194
  ```
115
-
116
195
  Output: `d`
117
196
 
118
197
  ---
119
198
 
120
- # Subpatterns
199
+ ## Subpatterns
121
200
 
122
- Subpatterns allow patterns to be declared *inside* other patterns, existing only during the execution of that parent pattern.
201
+ Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
123
202
 
124
203
  ### Syntax
125
-
126
204
  ```
127
205
  $pattern {match} {replace}
128
206
  ```
129
207
 
130
- A subpattern behaves like a normal pattern but is **scoped only to the replacement body where it appears**.
131
-
132
208
  ### Example
133
-
134
209
  ```
135
210
  pattern {eval $block code {(}{)}} {
136
211
  $eval{
@@ -139,133 +214,115 @@ pattern {eval $block code {(}{)}} {
139
214
  return "";
140
215
  }
141
216
  }
142
-
143
217
  eval(console.log(123))
144
218
  ```
145
-
146
219
  Output:
147
-
148
220
  ```
149
221
  123
150
222
  ```
151
223
 
152
224
  ### Key Properties
153
-
154
225
  * Subpatterns exist only within the running pattern.
155
226
  * They do not leak into the global pattern list.
156
- * They can recursively modify inner content before `$eval` or other processors handle it.
157
- * Multiple subpatterns can coexist inside the same replacement.
227
+ * They can recursively modify inner content before `$eval` or other processors.
228
+ * Multiple subpatterns can coexist in the same replacement.
158
229
 
159
230
  ---
160
231
 
161
232
  ## Special Keywords
162
233
 
163
- ### $unique
164
-
165
- Generates unique incremental IDs.
166
-
234
+ ### $eval
235
+ Executes JavaScript code.
167
236
  ```
168
- pattern {item} {[$unique]item_$unique}
169
- item
170
- item
237
+ pattern {$x} {$eval{return parseInt($x)*2;}}
238
+ 5
171
239
  ```
240
+ Output: `10`
172
241
 
173
- Outputs:
174
-
242
+ Supports multi-character delimiters:
175
243
  ```
176
- [0]item_0
177
- [1]item_1
244
+ pattern {$x} {$eval<<parseInt($x)*2>>}
245
+ 5
178
246
  ```
247
+ Output: `10`
179
248
 
180
- ### $match
181
-
182
- Full matched text.
249
+ ---
183
250
 
184
- ```
185
- pattern {[$x]} {FOUND: $match}
186
- [data]
187
- ```
251
+ ## Important Rules
188
252
 
189
- Output: `FOUND: [data]`
253
+ ### Variable Matching
254
+ * `$x` = smart capture (context-aware: word, until literal, or until block)
255
+ * `$x?` = optional version of `$x` (won't fail if empty)
256
+ * `$regex name {pattern}` = regex-based capture
257
+ * Patterns apply globally until stable
258
+ * Blocks support arbitrary nesting depth
190
259
 
191
- ### $prefix / $suffix
260
+ ### Block Matching
261
+ * `$block name {open}{close}` captures delimited regions
262
+ * Supports nested delimiters of any length
263
+ * Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
192
264
 
193
- Text before and after match.
265
+ ### Whitespace Handling
266
+ * Variables automatically skip leading whitespace when needed
267
+ * Trailing whitespace is trimmed when variables appear before literals
194
268
 
195
- ```
196
- pattern {world} {$prefix$suffix}hello world test
197
- ```
269
+ ---
198
270
 
199
- Output: `hello hello test test`
271
+ ## Multi-Character Delimiter Support
200
272
 
201
- ### $clear
273
+ The updated version fully supports multi-character delimiters throughout all features.
202
274
 
203
- Removes everything before match.
275
+ ### Examples
276
+ ```javascript
277
+ const p = new Papagaio('$', '<<<', '>>>');
278
+ ```
204
279
 
280
+ ### In Blocks
205
281
  ```
206
- pattern {SKIP $x} {$clear KEEP: $x}
207
- IGNORE SKIP keep
282
+ pattern {$block data {<<}{>>}} {$data}
283
+ <<content>>
208
284
  ```
209
285
 
210
- Output: `KEEP: keep`
211
-
212
- ### $eval
213
-
214
- Executes JS.
215
-
286
+ ### In Eval
216
287
  ```
217
- pattern {$x} {$eval{return parseInt($x)*2;}}
288
+ // const p = new Papagaio('$', '<<<', '>>>');
289
+ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
218
290
  5
219
291
  ```
220
292
 
221
- Output: `10`
222
-
223
- ---
224
-
225
- ## Important Rules
226
-
227
- ### Matching
228
-
229
- * `$x` = one word
230
- * `$$x` = multiword (captures whitespace too)
231
- * `$`, `$$`, `$$$`, `$$$$` = whitespace operators
232
- * Patterns apply globally until stable
233
- * Blocks can be nested
234
-
235
- ### Block Matching
236
-
237
- * `$block name {open}{close}` captures delimited regions
238
- * Supports nested delimiters
239
-
240
293
  ---
241
294
 
242
295
  ## Troubleshooting
243
296
 
244
- | Problem | Solution |
245
- | --------------------- | -------------------------- |
246
- | Variable not captured | Check spacing |
247
- | Block wrong | Verify delimiters |
248
- | Infinite recursion | Reduce recursion limit |
249
- | Pattern not matching | Add whitespace operators |
250
- | Multiword var issues | Beware whitespace consumed |
297
+ | Problem | Solution |
298
+ |---------|----------|
299
+ | Variable not captured | Check context: use `$x?` for optional, or verify literals/blocks exist |
300
+ | Block mismatch | Verify opening and closing delimiters match the declaration |
301
+ | Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
302
+ | Pattern not matching | Verify whitespace between tokens, check if variable should be optional |
303
+ | Nested blocks fail | Ensure delimiters are properly balanced |
304
+ | Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
305
+ | Regex not matching | Test regex pattern separately; ensure it matches at the exact position |
251
306
 
252
307
  ---
253
308
 
254
- ## Known Bugs
309
+ ## Syntax Reference
255
310
 
256
- * Multi-character delimiters containing `"` break nested parsing.
311
+ ```
312
+ pattern {$x $y} {$y, $x} # basic pattern with variables
313
+ pattern {$x? $y} {$y, $x} # optional variable
314
+ pattern {$regex n {[0-9]+}} {$n} # regex capture
315
+ pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
316
+ $pattern {a} {b} # subpattern (scoped to parent)
317
+ $eval{code} # JavaScript evaluation
318
+ ```
257
319
 
258
320
  ---
259
321
 
260
- ## Syntax Reference
322
+ ## Performance Notes
261
323
 
262
- ```
263
- pattern {$x $y} {$y, $x}
264
- pattern {$block n {o}{c}} {$n}
265
- $pattern {a} {b} # subpattern
266
- $unique
267
- $match
268
- $prefix / $suffix
269
- $clear
270
- $eval{code}
271
- ```
324
+ * Patterns apply recursively until no changes occur (up to `recursion_limit`)
325
+ * Multi-character delimiter matching is optimized with regex escaping
326
+ * Regex patterns are automatically cached to improve performance
327
+ * Nested blocks and subpatterns have no theoretical depth limit
328
+ * Large recursion limits can impact performance on complex inputs
package/bin/cli.qjs ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env qjs
2
+ import * as std from "std";
3
+ import * as os from "os";
4
+
5
+ // Import Papagaio class - ajuste o caminho conforme necessário
6
+ // Para QuickJS, você pode incluir o arquivo diretamente ou usar import
7
+ import { Papagaio } from "../src/papagaio.js";
8
+
9
+ // Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
10
+ const VERSION = "1.0.0";
11
+
12
+ // Parse command line arguments
13
+ const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
14
+
15
+ // Help & Version
16
+ if (args.includes("-v") || args.includes("--version")) {
17
+ std.out.puts(VERSION + "\n");
18
+ std.exit(0);
19
+ }
20
+
21
+ if (args.includes("-h") || args.includes("--help")) {
22
+ std.out.puts(`Usage: papagaio [options] <file>
23
+ Options:
24
+ -h, --help Show this help message
25
+ -v, --version Show version number
26
+ `);
27
+ std.exit(0);
28
+ }
29
+
30
+ // File input
31
+ const file = args.find(arg => !arg.startsWith("-"));
32
+ if (!file) {
33
+ std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
34
+ std.exit(1);
35
+ }
36
+
37
+ // Read file
38
+ let src;
39
+ try {
40
+ const f = std.open(file, "r");
41
+ if (!f) {
42
+ std.err.puts(`Error: cannot open file '${file}'\n`);
43
+ std.exit(1);
44
+ }
45
+ src = f.readAsString();
46
+ f.close();
47
+ } catch (e) {
48
+ std.err.puts(`Error reading file: ${e}\n`);
49
+ std.exit(1);
50
+ }
51
+
52
+ // Process with Papagaio
53
+ const p = new Papagaio();
54
+ const out = p.process(src);
55
+
56
+ // Output result
57
+ std.out.puts(out);