parsanol 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +242 -13
- data/HISTORY.txt +50 -0
- data/README.adoc +97 -0
- data/ext/parsanol_native/Cargo.toml +1 -1
- data/ext/parsanol_native/src/lib.rs +2 -2
- data/lib/parsanol/native/dynamic.rb +237 -0
- data/lib/parsanol/native/serializer.rb +23 -20
- data/lib/parsanol/native.rb +2 -0
- data/lib/parsanol/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 523e36dd19f7b509167089dcf1d566abb341ae2da29cec57a1ac9297650042bf
|
|
4
|
+
data.tar.gz: 7d0b4bd344cb8dd514a07a82426d3136a0953bcf086b8344a56a735b0f0fdad3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1c1199d60ae79ef564f811686e4f1dd164c515c8457673ec4a9bd8c2c1cf1d7e84828afa8904c62c931ab0645126f97e4a639fae47900f964da0e10d6f13b82a
|
|
7
|
+
data.tar.gz: 95c0a72489855df18dffcdee93ac1d8cced717b563f85e7a18bbc8be60419cf1aabf73e8b44f6f4aa7d02dee9e638b1dabd4fff6871aac09e7cb896adbcb185b
|
data/Cargo.lock
CHANGED
|
@@ -9,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
9
9
|
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"cfg-if",
|
|
12
|
-
"getrandom",
|
|
12
|
+
"getrandom 0.3.4",
|
|
13
13
|
"once_cell",
|
|
14
14
|
"version_check",
|
|
15
15
|
"zerocopy",
|
|
@@ -30,6 +30,12 @@ version = "0.2.21"
|
|
|
30
30
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
31
31
|
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
|
32
32
|
|
|
33
|
+
[[package]]
|
|
34
|
+
name = "anyhow"
|
|
35
|
+
version = "1.0.102"
|
|
36
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
37
|
+
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
|
38
|
+
|
|
33
39
|
[[package]]
|
|
34
40
|
name = "bindgen"
|
|
35
41
|
version = "0.69.5"
|
|
@@ -94,17 +100,48 @@ version = "1.15.0"
|
|
|
94
100
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
95
101
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
96
102
|
|
|
103
|
+
[[package]]
|
|
104
|
+
name = "equivalent"
|
|
105
|
+
version = "1.0.2"
|
|
106
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
107
|
+
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
|
108
|
+
|
|
109
|
+
[[package]]
|
|
110
|
+
name = "foldhash"
|
|
111
|
+
version = "0.1.5"
|
|
112
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
113
|
+
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
|
114
|
+
|
|
115
|
+
[[package]]
|
|
116
|
+
name = "foldhash"
|
|
117
|
+
version = "0.2.0"
|
|
118
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
119
|
+
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
|
|
120
|
+
|
|
97
121
|
[[package]]
|
|
98
122
|
name = "getrandom"
|
|
99
123
|
version = "0.3.4"
|
|
100
124
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
101
125
|
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
|
126
|
+
dependencies = [
|
|
127
|
+
"cfg-if",
|
|
128
|
+
"libc",
|
|
129
|
+
"r-efi 5.3.0",
|
|
130
|
+
"wasip2",
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
[[package]]
|
|
134
|
+
name = "getrandom"
|
|
135
|
+
version = "0.4.2"
|
|
136
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
137
|
+
checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
|
|
102
138
|
dependencies = [
|
|
103
139
|
"cfg-if",
|
|
104
140
|
"js-sys",
|
|
105
141
|
"libc",
|
|
106
|
-
"r-efi",
|
|
142
|
+
"r-efi 6.0.0",
|
|
107
143
|
"wasip2",
|
|
144
|
+
"wasip3",
|
|
108
145
|
"wasm-bindgen",
|
|
109
146
|
]
|
|
110
147
|
|
|
@@ -116,12 +153,46 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
|
|
116
153
|
|
|
117
154
|
[[package]]
|
|
118
155
|
name = "hashbrown"
|
|
119
|
-
version = "0.
|
|
156
|
+
version = "0.15.5"
|
|
120
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
121
|
-
checksum = "
|
|
158
|
+
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
|
159
|
+
dependencies = [
|
|
160
|
+
"foldhash 0.1.5",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
[[package]]
|
|
164
|
+
name = "hashbrown"
|
|
165
|
+
version = "0.16.1"
|
|
166
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
167
|
+
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
|
122
168
|
dependencies = [
|
|
123
|
-
"ahash",
|
|
124
169
|
"allocator-api2",
|
|
170
|
+
"equivalent",
|
|
171
|
+
"foldhash 0.2.0",
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
[[package]]
|
|
175
|
+
name = "heck"
|
|
176
|
+
version = "0.5.0"
|
|
177
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
178
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
179
|
+
|
|
180
|
+
[[package]]
|
|
181
|
+
name = "id-arena"
|
|
182
|
+
version = "2.3.0"
|
|
183
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
184
|
+
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
|
185
|
+
|
|
186
|
+
[[package]]
|
|
187
|
+
name = "indexmap"
|
|
188
|
+
version = "2.13.0"
|
|
189
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
190
|
+
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
|
|
191
|
+
dependencies = [
|
|
192
|
+
"equivalent",
|
|
193
|
+
"hashbrown 0.16.1",
|
|
194
|
+
"serde",
|
|
195
|
+
"serde_core",
|
|
125
196
|
]
|
|
126
197
|
|
|
127
198
|
[[package]]
|
|
@@ -161,6 +232,12 @@ version = "1.3.0"
|
|
|
161
232
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
162
233
|
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
163
234
|
|
|
235
|
+
[[package]]
|
|
236
|
+
name = "leb128fmt"
|
|
237
|
+
version = "0.1.0"
|
|
238
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
239
|
+
checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
|
240
|
+
|
|
164
241
|
[[package]]
|
|
165
242
|
name = "libc"
|
|
166
243
|
version = "0.2.182"
|
|
@@ -236,16 +313,15 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
|
236
313
|
|
|
237
314
|
[[package]]
|
|
238
315
|
name = "parsanol"
|
|
239
|
-
version = "0.
|
|
316
|
+
version = "0.3.0"
|
|
240
317
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
241
|
-
checksum = "
|
|
318
|
+
checksum = "40e84f8cdc9f85960871f57d5fff33cf9e03deae0fd7f87b1f9655f35451fdf7"
|
|
242
319
|
dependencies = [
|
|
243
320
|
"ahash",
|
|
244
|
-
"getrandom",
|
|
245
|
-
"hashbrown",
|
|
321
|
+
"getrandom 0.4.2",
|
|
322
|
+
"hashbrown 0.16.1",
|
|
246
323
|
"magnus",
|
|
247
324
|
"memchr",
|
|
248
|
-
"once_cell",
|
|
249
325
|
"parsanol-derive",
|
|
250
326
|
"rb-sys",
|
|
251
327
|
"regex",
|
|
@@ -255,9 +331,9 @@ dependencies = [
|
|
|
255
331
|
|
|
256
332
|
[[package]]
|
|
257
333
|
name = "parsanol-derive"
|
|
258
|
-
version = "0.
|
|
334
|
+
version = "0.3.0"
|
|
259
335
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
260
|
-
checksum = "
|
|
336
|
+
checksum = "bbd12999fd4452b4713c3d32ca200de0894da2636a215ed54e83c42a553c1aae"
|
|
261
337
|
dependencies = [
|
|
262
338
|
"proc-macro2",
|
|
263
339
|
"quote",
|
|
@@ -266,7 +342,7 @@ dependencies = [
|
|
|
266
342
|
|
|
267
343
|
[[package]]
|
|
268
344
|
name = "parsanol_native"
|
|
269
|
-
version = "1.0.
|
|
345
|
+
version = "1.0.2"
|
|
270
346
|
dependencies = [
|
|
271
347
|
"log",
|
|
272
348
|
"magnus",
|
|
@@ -274,6 +350,16 @@ dependencies = [
|
|
|
274
350
|
"rb-sys",
|
|
275
351
|
]
|
|
276
352
|
|
|
353
|
+
[[package]]
|
|
354
|
+
name = "prettyplease"
|
|
355
|
+
version = "0.2.37"
|
|
356
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
357
|
+
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
|
|
358
|
+
dependencies = [
|
|
359
|
+
"proc-macro2",
|
|
360
|
+
"syn",
|
|
361
|
+
]
|
|
362
|
+
|
|
277
363
|
[[package]]
|
|
278
364
|
name = "proc-macro2"
|
|
279
365
|
version = "1.0.106"
|
|
@@ -298,6 +384,12 @@ version = "5.3.0"
|
|
|
298
384
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
299
385
|
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
|
|
300
386
|
|
|
387
|
+
[[package]]
|
|
388
|
+
name = "r-efi"
|
|
389
|
+
version = "6.0.0"
|
|
390
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
391
|
+
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
|
|
392
|
+
|
|
301
393
|
[[package]]
|
|
302
394
|
name = "rb-sys"
|
|
303
395
|
version = "0.9.124"
|
|
@@ -369,6 +461,12 @@ version = "1.0.22"
|
|
|
369
461
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
370
462
|
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
|
371
463
|
|
|
464
|
+
[[package]]
|
|
465
|
+
name = "semver"
|
|
466
|
+
version = "1.0.27"
|
|
467
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
468
|
+
checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
|
|
469
|
+
|
|
372
470
|
[[package]]
|
|
373
471
|
name = "seq-macro"
|
|
374
472
|
version = "0.3.6"
|
|
@@ -447,6 +545,12 @@ version = "1.0.24"
|
|
|
447
545
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
448
546
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
449
547
|
|
|
548
|
+
[[package]]
|
|
549
|
+
name = "unicode-xid"
|
|
550
|
+
version = "0.2.6"
|
|
551
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
552
|
+
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
|
553
|
+
|
|
450
554
|
[[package]]
|
|
451
555
|
name = "version_check"
|
|
452
556
|
version = "0.9.5"
|
|
@@ -462,6 +566,15 @@ dependencies = [
|
|
|
462
566
|
"wit-bindgen",
|
|
463
567
|
]
|
|
464
568
|
|
|
569
|
+
[[package]]
|
|
570
|
+
name = "wasip3"
|
|
571
|
+
version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
|
|
572
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
573
|
+
checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
|
|
574
|
+
dependencies = [
|
|
575
|
+
"wit-bindgen",
|
|
576
|
+
]
|
|
577
|
+
|
|
465
578
|
[[package]]
|
|
466
579
|
name = "wasm-bindgen"
|
|
467
580
|
version = "0.2.114"
|
|
@@ -507,6 +620,40 @@ dependencies = [
|
|
|
507
620
|
"unicode-ident",
|
|
508
621
|
]
|
|
509
622
|
|
|
623
|
+
[[package]]
|
|
624
|
+
name = "wasm-encoder"
|
|
625
|
+
version = "0.244.0"
|
|
626
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
627
|
+
checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
|
|
628
|
+
dependencies = [
|
|
629
|
+
"leb128fmt",
|
|
630
|
+
"wasmparser",
|
|
631
|
+
]
|
|
632
|
+
|
|
633
|
+
[[package]]
|
|
634
|
+
name = "wasm-metadata"
|
|
635
|
+
version = "0.244.0"
|
|
636
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
637
|
+
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
|
638
|
+
dependencies = [
|
|
639
|
+
"anyhow",
|
|
640
|
+
"indexmap",
|
|
641
|
+
"wasm-encoder",
|
|
642
|
+
"wasmparser",
|
|
643
|
+
]
|
|
644
|
+
|
|
645
|
+
[[package]]
|
|
646
|
+
name = "wasmparser"
|
|
647
|
+
version = "0.244.0"
|
|
648
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
649
|
+
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
|
650
|
+
dependencies = [
|
|
651
|
+
"bitflags",
|
|
652
|
+
"hashbrown 0.15.5",
|
|
653
|
+
"indexmap",
|
|
654
|
+
"semver",
|
|
655
|
+
]
|
|
656
|
+
|
|
510
657
|
[[package]]
|
|
511
658
|
name = "windows-link"
|
|
512
659
|
version = "0.2.1"
|
|
@@ -518,6 +665,88 @@ name = "wit-bindgen"
|
|
|
518
665
|
version = "0.51.0"
|
|
519
666
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
520
667
|
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
|
|
668
|
+
dependencies = [
|
|
669
|
+
"wit-bindgen-rust-macro",
|
|
670
|
+
]
|
|
671
|
+
|
|
672
|
+
[[package]]
|
|
673
|
+
name = "wit-bindgen-core"
|
|
674
|
+
version = "0.51.0"
|
|
675
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
676
|
+
checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
|
|
677
|
+
dependencies = [
|
|
678
|
+
"anyhow",
|
|
679
|
+
"heck",
|
|
680
|
+
"wit-parser",
|
|
681
|
+
]
|
|
682
|
+
|
|
683
|
+
[[package]]
|
|
684
|
+
name = "wit-bindgen-rust"
|
|
685
|
+
version = "0.51.0"
|
|
686
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
687
|
+
checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
|
688
|
+
dependencies = [
|
|
689
|
+
"anyhow",
|
|
690
|
+
"heck",
|
|
691
|
+
"indexmap",
|
|
692
|
+
"prettyplease",
|
|
693
|
+
"syn",
|
|
694
|
+
"wasm-metadata",
|
|
695
|
+
"wit-bindgen-core",
|
|
696
|
+
"wit-component",
|
|
697
|
+
]
|
|
698
|
+
|
|
699
|
+
[[package]]
|
|
700
|
+
name = "wit-bindgen-rust-macro"
|
|
701
|
+
version = "0.51.0"
|
|
702
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
703
|
+
checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
|
|
704
|
+
dependencies = [
|
|
705
|
+
"anyhow",
|
|
706
|
+
"prettyplease",
|
|
707
|
+
"proc-macro2",
|
|
708
|
+
"quote",
|
|
709
|
+
"syn",
|
|
710
|
+
"wit-bindgen-core",
|
|
711
|
+
"wit-bindgen-rust",
|
|
712
|
+
]
|
|
713
|
+
|
|
714
|
+
[[package]]
|
|
715
|
+
name = "wit-component"
|
|
716
|
+
version = "0.244.0"
|
|
717
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
718
|
+
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
|
719
|
+
dependencies = [
|
|
720
|
+
"anyhow",
|
|
721
|
+
"bitflags",
|
|
722
|
+
"indexmap",
|
|
723
|
+
"log",
|
|
724
|
+
"serde",
|
|
725
|
+
"serde_derive",
|
|
726
|
+
"serde_json",
|
|
727
|
+
"wasm-encoder",
|
|
728
|
+
"wasm-metadata",
|
|
729
|
+
"wasmparser",
|
|
730
|
+
"wit-parser",
|
|
731
|
+
]
|
|
732
|
+
|
|
733
|
+
[[package]]
|
|
734
|
+
name = "wit-parser"
|
|
735
|
+
version = "0.244.0"
|
|
736
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
737
|
+
checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
|
738
|
+
dependencies = [
|
|
739
|
+
"anyhow",
|
|
740
|
+
"id-arena",
|
|
741
|
+
"indexmap",
|
|
742
|
+
"log",
|
|
743
|
+
"semver",
|
|
744
|
+
"serde",
|
|
745
|
+
"serde_derive",
|
|
746
|
+
"serde_json",
|
|
747
|
+
"unicode-xid",
|
|
748
|
+
"wasmparser",
|
|
749
|
+
]
|
|
521
750
|
|
|
522
751
|
[[package]]
|
|
523
752
|
name = "zerocopy"
|
data/HISTORY.txt
CHANGED
|
@@ -1,3 +1,53 @@
|
|
|
1
|
+
== Parsanol 1.2.0 (2026-03-07)
|
|
2
|
+
|
|
3
|
+
New features for advanced parsing scenarios:
|
|
4
|
+
|
|
5
|
+
=== Capture Atoms
|
|
6
|
+
|
|
7
|
+
Extract named values from parsed input without building full AST:
|
|
8
|
+
|
|
9
|
+
* `atom.capture(:name)` - Capture matched text by name
|
|
10
|
+
* Zero-copy extraction using offset/length
|
|
11
|
+
* Works across all backends (Packrat, Bytecode, Streaming)
|
|
12
|
+
|
|
13
|
+
=== Scope Atoms
|
|
14
|
+
|
|
15
|
+
Create isolated capture contexts for nested parsing:
|
|
16
|
+
|
|
17
|
+
* `scope { inner }` - Discards inner captures on exit
|
|
18
|
+
* Prevents capture pollution in recursive structures
|
|
19
|
+
* Memory-bounded parsing for repeated patterns
|
|
20
|
+
|
|
21
|
+
=== Dynamic Atoms
|
|
22
|
+
|
|
23
|
+
Runtime-determined parsing via FFI callbacks:
|
|
24
|
+
|
|
25
|
+
* `dynamic { |ctx| parser }` - Context-sensitive parsing
|
|
26
|
+
* Access captures, position, and input in callback
|
|
27
|
+
* Full Packrat backend support
|
|
28
|
+
|
|
29
|
+
=== Native Extension Updates
|
|
30
|
+
|
|
31
|
+
* Updated to parsanol-rs 0.2.0
|
|
32
|
+
* New backend abstraction (Packrat, Bytecode, Auto)
|
|
33
|
+
* Streaming parser with capture extraction
|
|
34
|
+
* Performance improvements
|
|
35
|
+
|
|
36
|
+
== Parsanol 1.1.0 (2025-03-15)
|
|
37
|
+
|
|
38
|
+
Position information is now returned by default:
|
|
39
|
+
|
|
40
|
+
* All parse methods return `Parsanol::Slice` objects
|
|
41
|
+
* `Slice#offset`, `#length`, `#line_and_column` methods
|
|
42
|
+
* Zero-copy extraction via `Slice#extract_from(input)`
|
|
43
|
+
* JSON output format includes position inline
|
|
44
|
+
|
|
45
|
+
Performance improvements:
|
|
46
|
+
|
|
47
|
+
* Native extension ~20x faster than pure Ruby
|
|
48
|
+
* ZeroCopy API for direct FFI object construction
|
|
49
|
+
* Parallel batch parsing with `Parsanol::Parallel`
|
|
50
|
+
|
|
1
51
|
== Parsanol 1.0.0 (2025-03-02)
|
|
2
52
|
|
|
3
53
|
Initial release of Parsanol, a high-performance PEG parser library for Ruby.
|
data/README.adoc
CHANGED
|
@@ -247,8 +247,13 @@ parser.parse('123') # Works exactly the same
|
|
|
247
247
|
| `sequence(:x)` | ✅ | Match array of values
|
|
248
248
|
| `subtree(:x)` | ✅ | Match any subtree
|
|
249
249
|
| `Parslet::Slice` | ✅ | Parsanol::Slice compatible
|
|
250
|
+
| `.capture(:name)` | ✅ | Named capture extraction (NEW in 1.1.0)
|
|
251
|
+
| `scope { }` | ✅ | Isolated capture context (NEW in 1.1.0)
|
|
252
|
+
| `dynamic { \|ctx\| }` | ✅ | Runtime-determined parsing (NEW in 1.1.0)
|
|
250
253
|
|===
|
|
251
254
|
|
|
255
|
+
NOTE: The new capture, scope, and dynamic atoms provide powerful extraction and context-sensitive parsing capabilities. See the <<captures,Captures>> section for details.
|
|
256
|
+
|
|
252
257
|
== Architecture
|
|
253
258
|
|
|
254
259
|
.Parsanol architecture overview
|
|
@@ -467,7 +472,99 @@ NOTE: The backend selection is transparent to Ruby users. The parser object auto
|
|
|
467
472
|
|
|
468
473
|
For more details on backend selection and grammar analysis, see the https://parsanol.github.io/backends[Parsing Backends documentation].
|
|
469
474
|
|
|
475
|
+
[[captures]]
|
|
476
|
+
== Captures, Scopes, and Dynamic Atoms
|
|
477
|
+
|
|
478
|
+
Parsanol 1.1.0 introduces powerful new features for extracting and managing parsed data.
|
|
479
|
+
|
|
480
|
+
[[capture-atoms]]
|
|
481
|
+
=== Capture Atoms
|
|
482
|
+
|
|
483
|
+
Extract named values from parsed input, similar to named groups in regular expressions:
|
|
484
|
+
|
|
485
|
+
[source,ruby]
|
|
486
|
+
----
|
|
487
|
+
require 'parsanol/parslet'
|
|
488
|
+
|
|
489
|
+
include Parsanol::Parslet
|
|
490
|
+
|
|
491
|
+
# Basic capture
|
|
492
|
+
parser = str('hello').capture(:greeting)
|
|
493
|
+
result = parser.parse("hello")
|
|
494
|
+
puts result[:greeting].to_s # => "hello"
|
|
495
|
+
|
|
496
|
+
# Multiple captures - parse key=value pairs
|
|
497
|
+
kv_parser = match('[a-z]+').capture(:key) >>
|
|
498
|
+
str('=') >>
|
|
499
|
+
match('[a-zA-Z0-9]+').capture(:value)
|
|
500
|
+
|
|
501
|
+
result = kv_parser.parse("name=Alice")
|
|
502
|
+
puts result[:key].to_s # => "name"
|
|
503
|
+
puts result[:value].to_s # => "Alice"
|
|
504
|
+
----
|
|
505
|
+
|
|
506
|
+
[[scope-atoms]]
|
|
507
|
+
=== Scope Atoms
|
|
508
|
+
|
|
509
|
+
Create isolated capture contexts. Captures inside a scope are discarded when the scope exits:
|
|
510
|
+
|
|
511
|
+
[source,ruby]
|
|
512
|
+
----
|
|
513
|
+
# Without scope: inner captures leak out
|
|
514
|
+
parser = str('a').capture(:temp) >> str('b') >> str('c').capture(:temp)
|
|
515
|
+
|
|
516
|
+
# With scope: inner captures are discarded
|
|
517
|
+
parser = str('prefix').capture(:outer) >>
|
|
518
|
+
scope { str('inner').capture(:inner) } >>
|
|
519
|
+
str('suffix').capture(:outer_end)
|
|
520
|
+
|
|
521
|
+
result = parser.parse("prefix inner suffix")
|
|
522
|
+
puts result[:inner] # => nil (discarded)
|
|
523
|
+
puts result[:outer] # => "prefix"
|
|
524
|
+
----
|
|
525
|
+
|
|
526
|
+
Scopes are essential for:
|
|
527
|
+
- Parsing nested structures without capture pollution
|
|
528
|
+
- Recursive parsing with isolated capture state
|
|
529
|
+
- Memory-bounded parsing of repeated structures
|
|
530
|
+
|
|
531
|
+
[[dynamic-atoms]]
|
|
532
|
+
=== Dynamic Atoms
|
|
533
|
+
|
|
534
|
+
Runtime-determined parsing via callbacks. The grammar can change based on context:
|
|
535
|
+
|
|
536
|
+
[source,ruby]
|
|
537
|
+
----
|
|
538
|
+
# Type-driven value parsing
|
|
539
|
+
class TypeParser < Parsanol::Parser
|
|
540
|
+
include Parsanol::Parslet
|
|
541
|
+
|
|
542
|
+
rule(:type) { match('[a-z]+').capture(:type) }
|
|
543
|
+
rule(:value) do
|
|
544
|
+
dynamic do |ctx|
|
|
545
|
+
case ctx[:type].to_s
|
|
546
|
+
when 'int' then match('\d+')
|
|
547
|
+
when 'str' then match('[a-z]+')
|
|
548
|
+
when 'bool' then str('true') | str('false')
|
|
549
|
+
else match('[a-z]+')
|
|
550
|
+
end.capture(:value)
|
|
551
|
+
end
|
|
552
|
+
end
|
|
553
|
+
rule(:declaration) { type >> str(':') >> match('[a-z]+').capture(:name) >> str('=') >> value }
|
|
554
|
+
root :declaration
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
parser = TypeParser.new
|
|
558
|
+
result = parser.parse("int:count=42")
|
|
559
|
+
puts result[:type].to_s # => "int"
|
|
560
|
+
puts result[:value].to_s # => "42"
|
|
561
|
+
----
|
|
470
562
|
|
|
563
|
+
The `DynamicContext` provides:
|
|
564
|
+
- `ctx[:name]` - Access captured values
|
|
565
|
+
- `ctx.remaining` - Remaining input from current position
|
|
566
|
+
- `ctx.pos` - Current byte position
|
|
567
|
+
- `ctx.input` - Full input string
|
|
471
568
|
|
|
472
569
|
[[streaming-builder]]
|
|
473
570
|
== Streaming Builder API
|
|
@@ -28,7 +28,7 @@ rb-sys = { version = "0.9.124", features = ["link-ruby", "global-allocator"] }
|
|
|
28
28
|
magnus = "0.8"
|
|
29
29
|
|
|
30
30
|
# parsanol parser library
|
|
31
|
-
parsanol = { version = "0.
|
|
31
|
+
parsanol = { version = "0.3", features = ["ruby"] }
|
|
32
32
|
|
|
33
33
|
# Logging
|
|
34
34
|
log = "0.4"
|
|
@@ -11,7 +11,7 @@ use magnus::{Error, Ruby};
|
|
|
11
11
|
/// functions from parsanol-rs.
|
|
12
12
|
#[magnus::init]
|
|
13
13
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
14
|
-
// Initialize the parsanol-rs
|
|
14
|
+
// Initialize the parsanol-rs ffi::ruby module
|
|
15
15
|
// This sets up Parsanol::Native with all the functions
|
|
16
|
-
parsanol::
|
|
16
|
+
parsanol::ffi::ruby::init(ruby)
|
|
17
17
|
}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Native
|
|
7
|
+
# Manages Ruby callbacks for dynamic atoms
|
|
8
|
+
#
|
|
9
|
+
# Dynamic atoms allow runtime-determined parsing by invoking Ruby code
|
|
10
|
+
# during parsing. This module provides:
|
|
11
|
+
# - Registration of Ruby procs as callbacks
|
|
12
|
+
# - Thread-safe callback storage
|
|
13
|
+
# - GC-safe references (callbacks are kept alive while registered)
|
|
14
|
+
#
|
|
15
|
+
# @example Basic usage
|
|
16
|
+
# # Register a callback
|
|
17
|
+
# callback_id = Parsanol::Native::Dynamic.register(->(ctx) {
|
|
18
|
+
# ctx[:mode] == 'A' ? str('alpha') : str('beta')
|
|
19
|
+
# })
|
|
20
|
+
#
|
|
21
|
+
# # Use in grammar
|
|
22
|
+
# grammar = str('MODE:').capture(:mode) >> dynamic(callback_id)
|
|
23
|
+
#
|
|
24
|
+
# # Unregister when done
|
|
25
|
+
# Parsanol::Native::Dynamic.unregister(callback_id)
|
|
26
|
+
#
|
|
27
|
+
module Dynamic
|
|
28
|
+
# Callback storage (callback_id => block)
|
|
29
|
+
# This keeps strong references to prevent GC
|
|
30
|
+
@callbacks = {}
|
|
31
|
+
@mutex = Mutex.new
|
|
32
|
+
@next_id = 1_000_000 # Start high to avoid conflicts with Rust-side IDs
|
|
33
|
+
|
|
34
|
+
class << self
|
|
35
|
+
# Register a Ruby block as a dynamic callback
|
|
36
|
+
#
|
|
37
|
+
# @param block [Proc] The block to register (must accept a context hash)
|
|
38
|
+
# @param description [String, nil] Optional description for debugging
|
|
39
|
+
# @return [Integer] Unique callback ID for use in grammar
|
|
40
|
+
#
|
|
41
|
+
# @example
|
|
42
|
+
# id = Parsanol::Native::Dynamic.register(->(ctx) {
|
|
43
|
+
# case ctx[:type]
|
|
44
|
+
# when 'int' then str('integer')
|
|
45
|
+
# when 'str' then str('string')
|
|
46
|
+
# else nil
|
|
47
|
+
# end
|
|
48
|
+
# })
|
|
49
|
+
#
|
|
50
|
+
def register(block, description: nil)
|
|
51
|
+
# Register with Rust FFI
|
|
52
|
+
ffi_id = Native.register_callback(@next_id, description || "Ruby callback ##{@next_id}")
|
|
53
|
+
|
|
54
|
+
# Also keep a Ruby-side reference for GC safety
|
|
55
|
+
@mutex.synchronize do
|
|
56
|
+
@callbacks[ffi_id] = {
|
|
57
|
+
block: block,
|
|
58
|
+
description: description || "Ruby callback ##{ffi_id}"
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
ffi_id
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Unregister a callback (free memory)
|
|
66
|
+
#
|
|
67
|
+
# @param callback_id [Integer] The callback ID to remove
|
|
68
|
+
# @return [Boolean] True if the callback was found and removed
|
|
69
|
+
#
|
|
70
|
+
def unregister(callback_id)
|
|
71
|
+
# Remove from Rust FFI
|
|
72
|
+
Native.unregister_callback(callback_id)
|
|
73
|
+
|
|
74
|
+
# Remove from Ruby storage
|
|
75
|
+
@mutex.synchronize do
|
|
76
|
+
@callbacks.delete(callback_id)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Get the description of a registered callback
|
|
81
|
+
#
|
|
82
|
+
# @param callback_id [Integer] The callback ID
|
|
83
|
+
# @return [String, nil] The description or nil if not found
|
|
84
|
+
#
|
|
85
|
+
def description(callback_id)
|
|
86
|
+
# Try Ruby-side first
|
|
87
|
+
ruby_desc = @mutex.synchronize do
|
|
88
|
+
@callbacks[callback_id]&.dig(:description)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
return ruby_desc if ruby_desc
|
|
92
|
+
|
|
93
|
+
# Fall back to FFI
|
|
94
|
+
Native.get_callback_description(callback_id)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get the number of registered callbacks
|
|
98
|
+
#
|
|
99
|
+
# @return [Integer] Number of registered callbacks
|
|
100
|
+
#
|
|
101
|
+
def count
|
|
102
|
+
Native.callback_count
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Clear all callbacks (for testing)
|
|
106
|
+
#
|
|
107
|
+
# WARNING: This clears all callbacks globally, including those
|
|
108
|
+
# registered by other code. Use with caution.
|
|
109
|
+
#
|
|
110
|
+
def clear
|
|
111
|
+
Native.clear_callbacks
|
|
112
|
+
@mutex.synchronize { @callbacks.clear }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check if a callback is registered
|
|
116
|
+
#
|
|
117
|
+
# @param callback_id [Integer] The callback ID
|
|
118
|
+
# @return [Boolean] True if registered
|
|
119
|
+
#
|
|
120
|
+
def registered?(callback_id)
|
|
121
|
+
@mutex.synchronize { @callbacks.key?(callback_id) } ||
|
|
122
|
+
Native.has_callback(callback_id)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Invoke a callback from Rust (called via FFI)
|
|
126
|
+
#
|
|
127
|
+
# @param callback_id [Integer] The callback ID
|
|
128
|
+
# @param context [Hash] The context hash from Rust
|
|
129
|
+
# @return [Object, nil] The returned atom (parslet) or nil
|
|
130
|
+
#
|
|
131
|
+
def invoke_from_rust(callback_id, context)
|
|
132
|
+
block = @mutex.synchronize { @callbacks[callback_id]&.dig(:block) }
|
|
133
|
+
return nil unless block
|
|
134
|
+
|
|
135
|
+
# Build DynamicContext from hash
|
|
136
|
+
ctx = DynamicContext.new(
|
|
137
|
+
context[:input],
|
|
138
|
+
context[:pos],
|
|
139
|
+
context[:captures].transform_keys(&:to_sym)
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Call the block
|
|
143
|
+
result = block.call(ctx)
|
|
144
|
+
|
|
145
|
+
return nil unless result
|
|
146
|
+
|
|
147
|
+
# Return the result (should be a parslet/atom)
|
|
148
|
+
result
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
warn "[Parsanol::Native::Dynamic] Invoke error: #{e.message}"
|
|
151
|
+
nil
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Context object passed to dynamic callbacks
|
|
157
|
+
#
|
|
158
|
+
# Provides read-only access to the parsing context including
|
|
159
|
+
# input string, current position, and captured values.
|
|
160
|
+
#
|
|
161
|
+
# @example
|
|
162
|
+
# dynamic { |ctx|
|
|
163
|
+
# if ctx[:mode] == 'strict'
|
|
164
|
+
# str('strict_value')
|
|
165
|
+
# else
|
|
166
|
+
# str('relaxed_value')
|
|
167
|
+
# end
|
|
168
|
+
# }
|
|
169
|
+
#
|
|
170
|
+
class DynamicContext
|
|
171
|
+
# @return [String] The full input string being parsed
|
|
172
|
+
attr_reader :input
|
|
173
|
+
|
|
174
|
+
# @return [Integer] Current byte position in the input
|
|
175
|
+
attr_reader :pos
|
|
176
|
+
|
|
177
|
+
# @return [Hash<Symbol, String>] Captured values
|
|
178
|
+
attr_reader :captures
|
|
179
|
+
|
|
180
|
+
def initialize(input, pos, captures)
|
|
181
|
+
@input = input
|
|
182
|
+
@pos = pos
|
|
183
|
+
@captures = captures.transform_keys(&:to_sym)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Get a captured value by name
|
|
187
|
+
#
|
|
188
|
+
# @param name [Symbol, String] The capture name
|
|
189
|
+
# @return [String, nil] The captured value or nil
|
|
190
|
+
#
|
|
191
|
+
def [](name)
|
|
192
|
+
@captures[name.to_sym]
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Check if a capture exists
|
|
196
|
+
#
|
|
197
|
+
# @param name [Symbol, String] The capture name
|
|
198
|
+
# @return [Boolean] True if the capture exists
|
|
199
|
+
#
|
|
200
|
+
def key?(name)
|
|
201
|
+
@captures.key?(name.to_sym)
|
|
202
|
+
end
|
|
203
|
+
alias has_key? key?
|
|
204
|
+
|
|
205
|
+
# Get the remaining input from the current position
|
|
206
|
+
#
|
|
207
|
+
# @return [String] The remaining input
|
|
208
|
+
#
|
|
209
|
+
def remaining
|
|
210
|
+
@input[@pos..] || ''
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Check if at end of input
|
|
214
|
+
#
|
|
215
|
+
# @return [Boolean] True if at end
|
|
216
|
+
#
|
|
217
|
+
def eos?
|
|
218
|
+
@pos >= @input.length
|
|
219
|
+
end
|
|
220
|
+
alias at_end? eos?
|
|
221
|
+
|
|
222
|
+
# Get a slice of the input
|
|
223
|
+
#
|
|
224
|
+
# @param start [Integer] Start position (relative to current pos if negative)
|
|
225
|
+
# @param length [Integer, nil] Length of slice (nil = to end)
|
|
226
|
+
# @return [String] The sliced input
|
|
227
|
+
#
|
|
228
|
+
def slice(start, length = nil)
|
|
229
|
+
if length
|
|
230
|
+
@input[@pos + start, length]
|
|
231
|
+
else
|
|
232
|
+
@input[@pos + start..]
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
@@ -199,38 +199,41 @@ module Parsanol
|
|
|
199
199
|
end
|
|
200
200
|
|
|
201
201
|
def serialize_capture(atom)
|
|
202
|
-
# Capture stores matched text for later
|
|
203
|
-
#
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
202
|
+
# Capture stores matched text for later reference by Dynamic atoms.
|
|
203
|
+
# Now properly serialized for native parser support (parsanol-rs 0.3.0+).
|
|
204
|
+
{
|
|
205
|
+
'Capture' => {
|
|
206
|
+
'name' => atom.capture_key.to_s,
|
|
207
|
+
'atom' => serialize_atom(atom.inner_atom)
|
|
208
|
+
}
|
|
209
|
+
}
|
|
207
210
|
end
|
|
208
211
|
|
|
209
212
|
def serialize_scope(atom)
|
|
210
|
-
# Scope creates
|
|
211
|
-
#
|
|
212
|
-
# so we just serialize the inner atom from the block.
|
|
213
|
+
# Scope creates an isolated capture context.
|
|
214
|
+
# Captures made within scope are discarded when scope exits.
|
|
213
215
|
inner = begin
|
|
214
216
|
atom.block.call
|
|
215
217
|
rescue StandardError
|
|
216
218
|
nil
|
|
217
219
|
end
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
return serialize_unknown(atom) unless inner
|
|
221
|
+
|
|
222
|
+
{
|
|
223
|
+
'Scope' => {
|
|
224
|
+
'atom' => serialize_atom(inner)
|
|
225
|
+
}
|
|
226
|
+
}
|
|
223
227
|
end
|
|
224
228
|
|
|
225
|
-
def serialize_dynamic(
|
|
229
|
+
def serialize_dynamic(atom)
|
|
226
230
|
# Dynamic evaluates a Ruby block at parse time.
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# with a clear error message.
|
|
231
|
+
# Register the block and get a callback ID for FFI.
|
|
232
|
+
callback_id = Parsanol::Native::Dynamic.register(atom.block)
|
|
233
|
+
|
|
231
234
|
{
|
|
232
|
-
'
|
|
233
|
-
'
|
|
235
|
+
'Dynamic' => {
|
|
236
|
+
'callback_id' => callback_id
|
|
234
237
|
}
|
|
235
238
|
}
|
|
236
239
|
end
|
data/lib/parsanol/native.rb
CHANGED
data/lib/parsanol/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parsanol
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -213,6 +213,7 @@ files:
|
|
|
213
213
|
- lib/parsanol/lazy_result.rb
|
|
214
214
|
- lib/parsanol/mermaid.rb
|
|
215
215
|
- lib/parsanol/native.rb
|
|
216
|
+
- lib/parsanol/native/dynamic.rb
|
|
216
217
|
- lib/parsanol/native/parser.rb
|
|
217
218
|
- lib/parsanol/native/serializer.rb
|
|
218
219
|
- lib/parsanol/native/transformer.rb
|