papagaio 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,9 +1,7 @@
1
1
  # Papagaio
2
-
3
- Minimal yet powerful text preprocessor.
2
+ Minimal yet powerful text preprocessor with support for multi-character delimiters.
4
3
 
5
4
  ## Installation
6
-
7
5
  ```javascript
8
6
  import { Papagaio } from './src/papagaio.js';
9
7
  const p = new Papagaio();
@@ -11,17 +9,14 @@ const result = p.process(input);
11
9
  ```
12
10
 
13
11
  ## Configuration
14
-
15
12
  ```javascript
16
13
  p.symbols = {
17
14
  pattern: "pattern", // pattern keyword
18
- open: "{", // opening delimiter
19
- close: "}", // closing delimiter
15
+ open: "{", // opening delimiter (multi-char supported)
16
+ close: "}", // closing delimiter (multi-char supported)
20
17
  sigil: "$" // variable marker
21
18
  };
22
-
23
19
  p.recursion_limit = 512;
24
- p.unique_id = 0;
25
20
  ```
26
21
 
27
22
  ---
@@ -29,108 +24,133 @@ p.unique_id = 0;
29
24
  ## Core Concepts
30
25
 
31
26
  ### 1. Simple Variables
32
-
33
27
  ```
34
28
  pattern {$x} {$x}
35
29
  hello
36
30
  ```
37
-
38
31
  Output: `hello`
39
32
 
40
33
  ### 2. Multiple Variables
41
-
42
34
  ```
43
35
  pattern {$x $y $z} {$z, $y, $x}
44
36
  apple banana cherry
45
37
  ```
46
-
47
38
  Output: `cherry, banana, apple`
48
39
 
49
40
  ---
50
41
 
51
- ## Blocks
42
+ ## Whitespace Operators
52
43
 
53
- Capture content between delimiters.
44
+ Papagaio provides flexible whitespace handling for variable capture.
54
45
 
55
- ### Syntax
46
+ ### `$x` - Single Word Variable
47
+ Captures a single non-whitespace token.
48
+ ```
49
+ pattern {$x} {[$x]}
50
+ hello world
51
+ ```
52
+ Output: `[hello]`
56
53
 
54
+ ### `$$x` - Whitespace-Sensitive Variable
55
+ Captures text including surrounding whitespace until the next significant token.
57
56
  ```
58
- $block name {open}{close}
57
+ pattern {$$x world} {[$x]}
58
+ hello world
59
59
  ```
60
+ Output: `[hello ]`
60
61
 
61
- ### Example
62
+ ### `$$$x` - Optional Whitespace Variable
63
+ Captures with optional whitespace (no error if empty).
64
+ ```
65
+ pattern {$$$x world} {<$x>}
66
+ world
67
+ ```
68
+ Output: `<>`
69
+
70
+ ---
71
+
72
+ ## Blocks
73
+
74
+ Capture content between delimiters with full nesting support.
75
+
76
+ ### Syntax
77
+ ```
78
+ $block varName {open}{close}
79
+ ```
62
80
 
81
+ ### Basic Example
63
82
  ```
64
83
  pattern {$name $block content {(}{)}} {[$content]}
65
84
  data (hello world)
66
85
  ```
67
-
68
86
  Output: `[hello world]`
69
87
 
70
88
  ### Custom Delimiters
71
-
72
89
  ```
73
90
  pattern {$block data {<<}{>>}} {DATA: $data}
74
91
  <<json stuff>>
75
92
  ```
76
-
77
93
  Output: `DATA: json stuff`
78
94
 
79
- ### Multiple Blocks
95
+ ### Multi-Character Delimiters
96
+ ```
97
+ pattern {$block code {```}{```}} {<pre>$code</pre>}
98
+ ```markdown
99
+ # Title
100
+ ```
101
+ Output: `<pre># Title</pre>`
80
102
 
103
+ ### Multiple Blocks
81
104
  ```
82
105
  pattern {$block a {(}{)}, $block b {[}{]}} {$a|$b}
83
106
  (first), [second]
84
107
  ```
85
-
86
108
  Output: `first|second`
87
109
 
110
+ ### Nested Blocks
111
+ ```
112
+ pattern {$block outer {(}{)}} {[$outer]}
113
+ (outer (inner))
114
+ ```
115
+ Output: `[outer (inner)]`
116
+
88
117
  ---
89
118
 
90
119
  ## Patterns
91
120
 
92
- ### Basic
93
-
121
+ ### Basic Pattern
94
122
  ```
95
123
  pattern {match} {replace}
96
124
  ```
97
125
 
98
126
  ### Example
99
-
100
127
  ```
101
128
  pattern {# $title} {<h1>$title</h1>}
102
129
  # Welcome
103
130
  ```
104
-
105
131
  Output: `<h1>Welcome</h1>`
106
132
 
107
133
  ### Multiple Patterns Cascade
108
-
109
134
  ```
110
135
  pattern {a} {b}
111
136
  pattern {b} {c}
112
137
  pattern {c} {d}
113
138
  a
114
139
  ```
115
-
116
140
  Output: `d`
117
141
 
118
142
  ---
119
143
 
120
- # Subpatterns
144
+ ## Subpatterns
121
145
 
122
- Subpatterns allow patterns to be declared *inside* other patterns, existing only during the execution of that parent pattern.
146
+ Subpatterns are patterns declared *inside* replacement bodies, existing only during parent pattern execution.
123
147
 
124
148
  ### Syntax
125
-
126
149
  ```
127
150
  $pattern {match} {replace}
128
151
  ```
129
152
 
130
- A subpattern behaves like a normal pattern but is **scoped only to the replacement body where it appears**.
131
-
132
153
  ### Example
133
-
134
154
  ```
135
155
  pattern {eval $block code {(}{)}} {
136
156
  $eval{
@@ -139,133 +159,113 @@ pattern {eval $block code {(}{)}} {
139
159
  return "";
140
160
  }
141
161
  }
142
-
143
162
  eval(console.log(123))
144
163
  ```
145
-
146
164
  Output:
147
-
148
165
  ```
149
166
  123
150
167
  ```
151
168
 
152
169
  ### Key Properties
153
-
154
170
  * Subpatterns exist only within the running pattern.
155
171
  * They do not leak into the global pattern list.
156
- * They can recursively modify inner content before `$eval` or other processors handle it.
157
- * Multiple subpatterns can coexist inside the same replacement.
172
+ * They can recursively modify inner content before `$eval` or other processors.
173
+ * Multiple subpatterns can coexist in the same replacement.
158
174
 
159
175
  ---
160
176
 
161
177
  ## Special Keywords
162
178
 
163
- ### $unique
164
-
165
- Generates unique incremental IDs.
166
-
179
+ ### $eval
180
+ Executes JavaScript code.
167
181
  ```
168
- pattern {item} {[$unique]item_$unique}
169
- item
170
- item
182
+ pattern {$x} {$eval{return parseInt($x)*2;}}
183
+ 5
171
184
  ```
185
+ Output: `10`
172
186
 
173
- Outputs:
174
-
187
+ Supports multi-character delimiters:
175
188
  ```
176
- [0]item_0
177
- [1]item_1
189
+ pattern {$x} {$eval<<parseInt($x)*2>>}
190
+ 5
178
191
  ```
192
+ Output: `10`
179
193
 
180
- ### $match
181
-
182
- Full matched text.
183
194
 
184
- ```
185
- pattern {[$x]} {FOUND: $match}
186
- [data]
187
- ```
195
+ ## Important Rules
188
196
 
189
- Output: `FOUND: [data]`
197
+ ### Matching
198
+ * `$x` = one word (no whitespace)
199
+ * `$$x` = captures text with optional surrounding whitespace
200
+ * `$$$x` = captures text with optional surrounding whitespace, can be empty or not found
201
+ * Patterns apply globally until stable
202
+ * Blocks support arbitrary nesting depth
190
203
 
191
- ### $prefix / $suffix
204
+ ### Block Matching
205
+ * `$block name {open}{close}` captures delimited regions
206
+ * Supports nested delimiters of any length
207
+ * Multi-character delimiters fully supported (e.g., `{>>>}{<<<}`)
192
208
 
193
- Text before and after match.
209
+ ### Whitespace Handling
210
+ * Whitespace-optional tokens (`$$` alone) skip optional whitespace
211
+ * Variables automatically skip leading whitespace when needed
212
+ * Trailing whitespace is trimmed when variables appear before literals
194
213
 
195
- ```
196
- pattern {world} {$prefix$suffix}hello world test
197
- ```
214
+ ---
198
215
 
199
- Output: `hello hello test test`
216
+ ## Multi-Character Delimiter Support
200
217
 
201
- ### $clear
218
+ The updated version fully supports multi-character delimiters throughout all features.
202
219
 
203
- Removes everything before match.
220
+ ### Examples
221
+ ```javascript
222
+ const p = new Papagaio('$', '<<<', '>>>');
223
+ ```
204
224
 
225
+ ### In Blocks
205
226
  ```
206
- pattern {SKIP $x} {$clear KEEP: $x}
207
- IGNORE SKIP keep
227
+ pattern {$block data {<<}{>>}} {$data}
228
+ <<content>>
208
229
  ```
209
230
 
210
- Output: `KEEP: keep`
211
-
212
- ### $eval
213
-
214
- Executes JS.
215
-
231
+ ### In Eval
216
232
  ```
217
- pattern {$x} {$eval{return parseInt($x)*2;}}
233
+ // const p = new Papagaio('$', '<<<', '>>>');
234
+ pattern <<<$x>>> <<<$eval<<<return $x + 1>>>>>>
218
235
  5
219
236
  ```
220
237
 
221
- Output: `10`
222
-
223
- ---
224
-
225
- ## Important Rules
226
-
227
- ### Matching
228
-
229
- * `$x` = one word
230
- * `$$x` = multiword (captures whitespace too)
231
- * `$`, `$$`, `$$$`, `$$$$` = whitespace operators
232
- * Patterns apply globally until stable
233
- * Blocks can be nested
234
-
235
- ### Block Matching
236
-
237
- * `$block name {open}{close}` captures delimited regions
238
- * Supports nested delimiters
239
-
240
238
  ---
241
239
 
242
240
  ## Troubleshooting
243
241
 
244
- | Problem | Solution |
245
- | --------------------- | -------------------------- |
246
- | Variable not captured | Check spacing |
247
- | Block wrong | Verify delimiters |
248
- | Infinite recursion | Reduce recursion limit |
249
- | Pattern not matching | Add whitespace operators |
250
- | Multiword var issues | Beware whitespace consumed |
242
+ | Problem | Solution |
243
+ |---------|----------|
244
+ | Variable not captured | Check spacing and use appropriate whitespace operator (`$x`, `$$x`, `$$$x`) |
245
+ | Block mismatch | Verify opening and closing delimiters match the declaration |
246
+ | Infinite recursion | Reduce `recursion_limit` or simplify pattern dependencies |
247
+ | Pattern not matching | Add whitespace operators (`$$`) for multi-word content |
248
+ | Nested blocks fail | Ensure delimiters are properly balanced |
249
+ | Multi-char delimiters broken | Check delimiters don't conflict; use escaping if needed |
251
250
 
252
251
  ---
253
252
 
254
- ## Known Bugs
253
+ ## Syntax Reference
255
254
 
256
- * Multi-character delimiters containing `"` break nested parsing.
255
+ ```
256
+ pattern {$x $y} {$y, $x} # basic pattern with variables
257
+ pattern {$$x $y} {$y, $x} # whitespace-sensitive capture
258
+ pattern {$$$x $y} {$y, $x} # optional whitespace capture
259
+ pattern {$block n {o}{c}} {$n} # block capture with custom delimiters
260
+ $pattern {a} {b} # subpattern (scoped to parent)
261
+ $eval{code} # JavaScript evaluation
262
+ ```
257
263
 
258
264
  ---
259
265
 
260
- ## Syntax Reference
266
+ ## Performance Notes
261
267
 
262
- ```
263
- pattern {$x $y} {$y, $x}
264
- pattern {$block n {o}{c}} {$n}
265
- $pattern {a} {b} # subpattern
266
- $unique
267
- $match
268
- $prefix / $suffix
269
- $clear
270
- $eval{code}
271
- ```
268
+ * Patterns apply recursively until no changes occur (up to `recursion_limit`)
269
+ * Multi-character delimiter matching is optimized with regex escaping
270
+ * Nested blocks and subpatterns have no theoretical depth limit
271
+ * Large recursion limits can impact performance on complex inputs
package/bin/cli.qjs ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env qjs
2
+ import * as std from "std";
3
+ import * as os from "os";
4
+
5
+ // Import Papagaio class - ajuste o caminho conforme necessário
6
+ // Para QuickJS, você pode incluir o arquivo diretamente ou usar import
7
+ import { Papagaio } from "../src/papagaio.js";
8
+
9
+ // Version (você pode hardcoded ou ler de um arquivo JSON se necessário)
10
+ const VERSION = "1.0.0";
11
+
12
+ // Parse command line arguments
13
+ const args = scriptArgs.slice(1); // QuickJS usa scriptArgs ao invés de process.argv
14
+
15
+ // Help & Version
16
+ if (args.includes("-v") || args.includes("--version")) {
17
+ std.out.puts(VERSION + "\n");
18
+ std.exit(0);
19
+ }
20
+
21
+ if (args.includes("-h") || args.includes("--help")) {
22
+ std.out.puts(`Usage: papagaio [options] <file>
23
+ Options:
24
+ -h, --help Show this help message
25
+ -v, --version Show version number
26
+ `);
27
+ std.exit(0);
28
+ }
29
+
30
+ // File input
31
+ const file = args.find(arg => !arg.startsWith("-"));
32
+ if (!file) {
33
+ std.err.puts("Error: no input file specified.\nUse --help for usage.\n");
34
+ std.exit(1);
35
+ }
36
+
37
+ // Read file
38
+ let src;
39
+ try {
40
+ const f = std.open(file, "r");
41
+ if (!f) {
42
+ std.err.puts(`Error: cannot open file '${file}'\n`);
43
+ std.exit(1);
44
+ }
45
+ src = f.readAsString();
46
+ f.close();
47
+ } catch (e) {
48
+ std.err.puts(`Error reading file: ${e}\n`);
49
+ std.exit(1);
50
+ }
51
+
52
+ // Process with Papagaio
53
+ const p = new Papagaio();
54
+ const out = p.process(src);
55
+
56
+ // Output result
57
+ std.out.puts(out);