parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: bda4f457cf0c67cd837ad2dd9e85c0a1692fabc6a794cec0305f4d9ac01f522e
|
|
4
|
+
data.tar.gz: ba7a62e1ee69770af33e1d76d011a00a15cea3fbd8d949f17d61ad76ba951c2d
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5bcebc0381df461b2202609c7f8e567924876d5ef738a362f3f23fea85ad3529d65ebefff7d284b2a1f5ad29f0f9d4b19c4ec4228675efbf71129c0aa30d6adb
|
|
7
|
+
data.tar.gz: 13a3ac83a800399126bfb0a121f58356f680f1a3cf71f7861787a8468a534d4470078be6edf4add0c9c72376fe973a0861a1e1f2cc52464fede7629dc1a21938
|
data/HISTORY.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
== Parsanol 1.0.0 (2025-03-02)
|
|
2
|
+
|
|
3
|
+
Initial release of Parsanol, a high-performance PEG parser library for Ruby.
|
|
4
|
+
|
|
5
|
+
Features:
|
|
6
|
+
|
|
7
|
+
* Parslet-compatible API for easy migration
|
|
8
|
+
* Optional Rust native extension for 17-42x faster parsing
|
|
9
|
+
* Static frozen parsers for thread-safe operation
|
|
10
|
+
* Dynamic parsers for mutable grammars
|
|
11
|
+
* Great error reporting with source location tracking
|
|
12
|
+
* Packrat memoization for O(n) parsing complexity
|
data/LICENSE
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Copyright (c) 2025 Ribose Inc.
|
|
2
|
+
Copyright (c) 2010-2018 Kaspar Schiess
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person
|
|
5
|
+
obtaining a copy of this software and associated documentation
|
|
6
|
+
files (the "Software"), to deal in the Software without
|
|
7
|
+
restriction, including without limitation the rights to use,
|
|
8
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the
|
|
10
|
+
Software is furnished to do so, subject to the following
|
|
11
|
+
conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
18
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
20
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
21
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
22
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
23
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.adoc
ADDED
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
= Parsanol
|
|
2
|
+
|
|
3
|
+
image:https://img.shields.io/gem/v/parsanol.svg[RubyGems Version]
|
|
4
|
+
image:https://img.shields.io/github/license/parsanol/parsanol-ruby.svg[License]
|
|
5
|
+
image:https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml/badge.svg["Build", link="https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml"]
|
|
6
|
+
|
|
7
|
+
A high-performance PEG (Parsing Expression Grammar) parser construction library for Ruby with optional Rust native extensions.
|
|
8
|
+
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
Parsanol provides a declarative DSL for constructing parsers using PEG semantics. It offers excellent error reporting, memory efficiency through object pooling, and optional Rust native extensions for maximum performance. The library is designed as a drop-in replacement for Parslet while offering significant performance improvements.
|
|
12
|
+
|
|
13
|
+
// Inspiration attribution
|
|
14
|
+
[NOTE]
|
|
15
|
+
====
|
|
16
|
+
Parsanol is inspired by the https://github.com/kschiess/parslet[Parslet] library by Kaspar Schiess.
|
|
17
|
+
While maintaining full API compatibility with Parslet, Parsanol features a complete independent implementation with additional performance optimizations and features.
|
|
18
|
+
====
|
|
19
|
+
|
|
20
|
+
== Features
|
|
21
|
+
|
|
22
|
+
* <<basic-parsing,PEG-based Parser Construction>> - Declarative grammar definition
|
|
23
|
+
* <<error-reporting,Detailed Error Reporting>> - Precise failure location and context
|
|
24
|
+
* <<native-extension,Rust Native Extension>> - Up to 29x faster parsing
|
|
25
|
+
* <<slice-support,Slice Support>> - Source position preservation for linters and IDEs
|
|
26
|
+
* <<transformation,Tree Transformation>> - Pattern-based AST construction
|
|
27
|
+
* <<streaming-builder,Streaming Builder API>> - Single-pass parsing with callbacks
|
|
28
|
+
* <<parallel-parsing,Parallel Parsing>> - Multi-core batch processing
|
|
29
|
+
* <<infix-expressions,Infix Expression Parsing>> - Built-in operator precedence support
|
|
30
|
+
* <<security-features,Security Features>> - Input size and recursion limits
|
|
31
|
+
* <<debug-tools,Debug Tools>> - Tracing and grammar visualization
|
|
32
|
+
|
|
33
|
+
== Installation
|
|
34
|
+
|
|
35
|
+
Add this line to your application's Gemfile:
|
|
36
|
+
|
|
37
|
+
[source,ruby]
|
|
38
|
+
----
|
|
39
|
+
gem 'parsanol'
|
|
40
|
+
----
|
|
41
|
+
|
|
42
|
+
And then execute:
|
|
43
|
+
|
|
44
|
+
[source,shell]
|
|
45
|
+
----
|
|
46
|
+
bundle install
|
|
47
|
+
----
|
|
48
|
+
|
|
49
|
+
Or install it yourself as:
|
|
50
|
+
|
|
51
|
+
[source,shell]
|
|
52
|
+
----
|
|
53
|
+
gem install parsanol
|
|
54
|
+
----
|
|
55
|
+
|
|
56
|
+
== Usage
|
|
57
|
+
|
|
58
|
+
=== Basic Parser
|
|
59
|
+
<<<basic-parsing>>
|
|
60
|
+
|
|
61
|
+
Define parsers by creating a class that inherits from `Parsanol::Parser` and declaring rules:
|
|
62
|
+
|
|
63
|
+
[source,ruby]
|
|
64
|
+
----
|
|
65
|
+
require 'parsanol'
|
|
66
|
+
|
|
67
|
+
class MyParser < Parsanol::Parser
|
|
68
|
+
rule(:keyword) { str('if') | str('while') }
|
|
69
|
+
rule(:expression) { keyword >> str('(') >> expression >> str(')') }
|
|
70
|
+
root(:expression)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
parser = MyParser.new
|
|
74
|
+
result = parser.parse('if(x)')
|
|
75
|
+
----
|
|
76
|
+
|
|
77
|
+
=== Error Reporting
|
|
78
|
+
<<<error-reporting>>
|
|
79
|
+
|
|
80
|
+
Parsanol provides detailed error messages when parsing fails:
|
|
81
|
+
|
|
82
|
+
[source,ruby]
|
|
83
|
+
----
|
|
84
|
+
begin
|
|
85
|
+
parser.parse('invalid input')
|
|
86
|
+
rescue Parsanol::ParseFailed => e
|
|
87
|
+
puts e.message
|
|
88
|
+
# => "Expected 'if' at line 1 char 1."
|
|
89
|
+
end
|
|
90
|
+
----
|
|
91
|
+
|
|
92
|
+
=== Transformation
|
|
93
|
+
<<<transformation>>
|
|
94
|
+
|
|
95
|
+
Convert parse trees to AST using pattern-based transformations:
|
|
96
|
+
|
|
97
|
+
[source,ruby]
|
|
98
|
+
----
|
|
99
|
+
class MyTransform < Parsanol::Transform
|
|
100
|
+
rule(keyword: simple(:k)) { KeywordNode.new(k) }
|
|
101
|
+
rule(expression: subtree(:e)) { ExpressionNode.new(e) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
ast = MyTransform.new.apply(parse_tree)
|
|
105
|
+
----
|
|
106
|
+
|
|
107
|
+
=== Native Extension
|
|
108
|
+
<<<native-extension>>
|
|
109
|
+
|
|
110
|
+
For maximum performance, compile the Rust native extension:
|
|
111
|
+
|
|
112
|
+
[source,shell]
|
|
113
|
+
----
|
|
114
|
+
# Install Rust toolchain first
|
|
115
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
116
|
+
|
|
117
|
+
# Compile the extension
|
|
118
|
+
bundle exec rake compile
|
|
119
|
+
----
|
|
120
|
+
|
|
121
|
+
=== Slice Support
|
|
122
|
+
<<<slice-support>>
|
|
123
|
+
|
|
124
|
+
Parsanol preserves source positions for each parsed element:
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
# Result includes position information
|
|
129
|
+
[{"word" => "hello"@0}, " "@5, {"name" => "world"@6}]
|
|
130
|
+
|
|
131
|
+
# The @N notation shows the byte offset in the original input
|
|
132
|
+
# Parsanol::Slice is fully compatible with Parslet::Slice
|
|
133
|
+
----
|
|
134
|
+
|
|
135
|
+
This is essential for linters, IDEs, and tools that need to map parsed elements back to source locations.
|
|
136
|
+
|
|
137
|
+
== Migrating from Parslet
|
|
138
|
+
|
|
139
|
+
Parsanol provides full Parslet API compatibility with two migration modes.
|
|
140
|
+
|
|
141
|
+
=== Drop-in Replacement (Zero Code Changes)
|
|
142
|
+
|
|
143
|
+
Simply replace the parslet gem with parsanol in your Gemfile:
|
|
144
|
+
|
|
145
|
+
[source,ruby]
|
|
146
|
+
----
|
|
147
|
+
# Gemfile
|
|
148
|
+
- gem 'parslet'
|
|
149
|
+
+ gem 'parsanol'
|
|
150
|
+
----
|
|
151
|
+
|
|
152
|
+
Your existing code works without modification:
|
|
153
|
+
|
|
154
|
+
[source,ruby]
|
|
155
|
+
----
|
|
156
|
+
# No changes needed!
|
|
157
|
+
require 'parslet' # Parsanol aliases itself
|
|
158
|
+
|
|
159
|
+
class MyParser < Parslet::Parser
|
|
160
|
+
rule(:number) { match('[0-9]').repeat(1) }
|
|
161
|
+
root(:number)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
parser = MyParser.new
|
|
165
|
+
parser.parse('123') # Works exactly the same
|
|
166
|
+
----
|
|
167
|
+
|
|
168
|
+
=== API Compatibility Matrix
|
|
169
|
+
|
|
170
|
+
[cols="2,1,3"]
|
|
171
|
+
|===
|
|
172
|
+
| Parslet API | Status | Notes
|
|
173
|
+
|
|
174
|
+
| `str('foo')` | ✅ | Literal string match
|
|
175
|
+
| `match('[0-9]')` | ✅ | Character class
|
|
176
|
+
| `any` | ✅ | Any single character
|
|
177
|
+
| `>>` (sequence) | ✅ | Sequential composition
|
|
178
|
+
| `\|` (choice) | ✅ | Ordered choice
|
|
179
|
+
| `.repeat(n, m)` | ✅ | Repetition with bounds
|
|
180
|
+
| `.maybe` | ✅ | Optional (zero or one)
|
|
181
|
+
| `.as(:name)` | ✅ | Label capture
|
|
182
|
+
| `.absent?` | ✅ | Negative lookahead
|
|
183
|
+
| `.present?` | ✅ | Positive lookahead
|
|
184
|
+
| `infix_expression` | ✅ | Precedence climbing
|
|
185
|
+
| `exp('...')` | ✅ | Treetop-style expression parsing
|
|
186
|
+
| `Parslet::Transform` | ✅ | Tree transformation
|
|
187
|
+
| `simple(:x)` | ✅ | Match simple value
|
|
188
|
+
| `sequence(:x)` | ✅ | Match array of values
|
|
189
|
+
| `subtree(:x)` | ✅ | Match any subtree
|
|
190
|
+
| `Parslet::Slice` | ✅ | Parsanol::Slice compatible
|
|
191
|
+
|===
|
|
192
|
+
|
|
193
|
+
== Architecture
|
|
194
|
+
|
|
195
|
+
.Parsanol architecture overview
|
|
196
|
+
[source]
|
|
197
|
+
----
|
|
198
|
+
┌─────────────────────────────────────┐
|
|
199
|
+
│ User Parser │
|
|
200
|
+
│ (inherits from Parsanol::Parser) │
|
|
201
|
+
└─────────────────┬───────────────────┘
|
|
202
|
+
│
|
|
203
|
+
┌─────────────────▼───────────────────┐
|
|
204
|
+
│ Parsing Backend │
|
|
205
|
+
├─────────────────┬───────────────────┤
|
|
206
|
+
│ Pure Ruby │ Rust Native │
|
|
207
|
+
│ (default) │ (optional) │
|
|
208
|
+
└─────────────────┴───────────────────┘
|
|
209
|
+
│
|
|
210
|
+
┌─────────────────▼───────────────────┐
|
|
211
|
+
│ Parse Tree │
|
|
212
|
+
│ (with Slice position info) │
|
|
213
|
+
└─────────────────┬───────────────────┘
|
|
214
|
+
│
|
|
215
|
+
┌─────────────────▼───────────────────┐
|
|
216
|
+
│ Parsanol::Transform │
|
|
217
|
+
│ (pattern-based transformation) │
|
|
218
|
+
└─────────────────┬───────────────────┘
|
|
219
|
+
│
|
|
220
|
+
┌─────────────────▼───────────────────┐
|
|
221
|
+
│ User AST │
|
|
222
|
+
└─────────────────────────────────────┘
|
|
223
|
+
----
|
|
224
|
+
|
|
225
|
+
=== Performance Modes
|
|
226
|
+
|
|
227
|
+
Parsanol offers multiple parsing modes with different performance characteristics:
|
|
228
|
+
|
|
229
|
+
[cols="4,2,2,3"]
|
|
230
|
+
|===
|
|
231
|
+
| Mode | Speed | Use Case | How It Works
|
|
232
|
+
|
|
233
|
+
| Pure Ruby | 1x (baseline) | Compatibility, debugging | Ruby parsing engine
|
|
234
|
+
| Native Batch | ~20x | Need Ruby objects | Rust parsing, AST via u64
|
|
235
|
+
| Native ZeroCopy | ~25x | Maximum performance | Direct FFI construction
|
|
236
|
+
| Native ZeroCopy + Slice | ~29x | Linters, IDEs | Zero-copy with positions
|
|
237
|
+
|===
|
|
238
|
+
|
|
239
|
+
== Streaming Builder API
|
|
240
|
+
<<<streaming-builder>>
|
|
241
|
+
|
|
242
|
+
For maximum performance, use the streaming builder API which eliminates intermediate AST construction:
|
|
243
|
+
|
|
244
|
+
[source,ruby]
|
|
245
|
+
----
|
|
246
|
+
require 'parsanol'
|
|
247
|
+
|
|
248
|
+
class StringCollector
|
|
249
|
+
include Parsanol::BuilderCallbacks
|
|
250
|
+
|
|
251
|
+
def initialize
|
|
252
|
+
@strings = []
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def on_string(value, offset, length)
|
|
256
|
+
@strings << value
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def finish
|
|
260
|
+
@strings
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
grammar = Parsanol::Native.serialize_grammar(MyParser.new.root)
|
|
265
|
+
builder = StringCollector.new
|
|
266
|
+
result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
267
|
+
# result: ["hello", "world"]
|
|
268
|
+
----
|
|
269
|
+
|
|
270
|
+
==== Available Callback Methods
|
|
271
|
+
|
|
272
|
+
[cols="1,3,2"]
|
|
273
|
+
|===
|
|
274
|
+
| Method | Description | Default
|
|
275
|
+
|
|
276
|
+
| `on_start(input)` | Parsing started | No-op
|
|
277
|
+
| `on_success` | Parsing succeeded | No-op
|
|
278
|
+
| `on_error(message)` | Parsing failed | No-op
|
|
279
|
+
| `on_string(value, offset, length)` | String/slice matched | No-op
|
|
280
|
+
| `on_int(value)` | Integer matched | No-op
|
|
281
|
+
| `on_float(value)` | Float matched | No-op
|
|
282
|
+
| `on_bool(value)` | Boolean matched | No-op
|
|
283
|
+
| `on_nil` | Nil matched | No-op
|
|
284
|
+
| `on_hash_start(size)` | Entering a hash/object | No-op
|
|
285
|
+
| `on_hash_key(key)` | Hash key encountered | No-op
|
|
286
|
+
| `on_hash_end(size)` | Exiting a hash/object | No-op
|
|
287
|
+
| `on_array_start(size)` | Entering an array | No-op
|
|
288
|
+
| `on_array_end(size)` | Exiting an array | No-op
|
|
289
|
+
| `finish` | Parsing complete | Returns nil
|
|
290
|
+
|===
|
|
291
|
+
|
|
292
|
+
== Parallel Parsing
|
|
293
|
+
<<<parallel-parsing>>
|
|
294
|
+
|
|
295
|
+
Parse multiple inputs using all CPU cores:
|
|
296
|
+
|
|
297
|
+
[source,ruby]
|
|
298
|
+
----
|
|
299
|
+
require 'parsanol/parallel'
|
|
300
|
+
|
|
301
|
+
grammar = MyParser.new.serialize_grammar
|
|
302
|
+
inputs = Dir.glob("*.json").map { |f| File.read(f) }
|
|
303
|
+
|
|
304
|
+
# Parse all files in parallel
|
|
305
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs)
|
|
306
|
+
|
|
307
|
+
# With configuration
|
|
308
|
+
config = Parsanol::Parallel::Config.new
|
|
309
|
+
.with_num_threads(4)
|
|
310
|
+
.with_min_chunk_size(50)
|
|
311
|
+
|
|
312
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
313
|
+
----
|
|
314
|
+
|
|
315
|
+
== Infix Expression Parsing
|
|
316
|
+
<<<infix-expressions>>
|
|
317
|
+
|
|
318
|
+
Built-in support for parsing infix expressions with operator precedence:
|
|
319
|
+
|
|
320
|
+
[source,ruby]
|
|
321
|
+
----
|
|
322
|
+
class CalculatorParser < Parsanol::Parser
|
|
323
|
+
rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
324
|
+
rule(:primary) { number | str('(') >> expr >> str(')') }
|
|
325
|
+
|
|
326
|
+
rule(:expr) {
|
|
327
|
+
infix_expression(primary,
|
|
328
|
+
[str('*'), 2, :left],
|
|
329
|
+
[str('/'), 2, :left],
|
|
330
|
+
[str('+'), 1, :left],
|
|
331
|
+
[str('-'), 1, :left],
|
|
332
|
+
[str('^'), 3, :right] # Right-associative
|
|
333
|
+
)
|
|
334
|
+
}
|
|
335
|
+
root(:expr)
|
|
336
|
+
end
|
|
337
|
+
----
|
|
338
|
+
|
|
339
|
+
== Treetop Expression Syntax
|
|
340
|
+
<<<treetop-expressions>>
|
|
341
|
+
|
|
342
|
+
Parsanol supports treetop-style expression strings for quick grammar definition:
|
|
343
|
+
|
|
344
|
+
[source,ruby]
|
|
345
|
+
----
|
|
346
|
+
# Using exp() for treetop-style expressions
|
|
347
|
+
class QuickParser < Parsanol::Parser
|
|
348
|
+
rule(:word) { exp("'a' 'b' ?") } # 'a' followed by optional 'b'
|
|
349
|
+
root(:word)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Equivalent to:
|
|
353
|
+
rule(:word) { str('a') >> str('b').maybe }
|
|
354
|
+
----
|
|
355
|
+
|
|
356
|
+
=== Treetop Syntax Reference
|
|
357
|
+
|
|
358
|
+
[cols="2,3"]
|
|
359
|
+
|===
|
|
360
|
+
| Syntax | Description
|
|
361
|
+
|
|
362
|
+
| `'hello'` | Literal string match
|
|
363
|
+
| `[a-z]` | Character class
|
|
364
|
+
| `.` | Any single character
|
|
365
|
+
| `'a' 'b'` | Sequence (concatenation)
|
|
366
|
+
| `'a' / 'b'` | Alternative (choice)
|
|
367
|
+
| `'a' ?` | Optional (zero or one)
|
|
368
|
+
| `'a' *` | Zero or more repetitions
|
|
369
|
+
| `'a' +` | One or more repetitions
|
|
370
|
+
| `'a'{2,5}` | Between 2 and 5 repetitions
|
|
371
|
+
| `('a' / 'b')` | Grouping
|
|
372
|
+
|===
|
|
373
|
+
|
|
374
|
+
[NOTE]
|
|
375
|
+
====
|
|
376
|
+
Whitespace is required before operators: `'a' ?` not `'a'?`
|
|
377
|
+
====
|
|
378
|
+
|
|
379
|
+
=== Expression Parsing Performance
|
|
380
|
+
|
|
381
|
+
The expression parser is pure Ruby (not Rust-accelerated) since it runs only at grammar definition time. The resulting atoms can still be used with Rust-accelerated parsing:
|
|
382
|
+
|
|
383
|
+
[source,ruby]
|
|
384
|
+
----
|
|
385
|
+
atom = Parsanol.exp("'a' +")
|
|
386
|
+
|
|
387
|
+
# Ruby parsing
|
|
388
|
+
atom.parse('aaa')
|
|
389
|
+
|
|
390
|
+
# Rust-accelerated parsing (if native extension available)
|
|
391
|
+
grammar = Parsanol::Native.serialize_grammar(atom)
|
|
392
|
+
Parsanol::Native.parse_to_ruby_objects(grammar, 'aaa')
|
|
393
|
+
----
|
|
394
|
+
|
|
395
|
+
== Security Features
|
|
396
|
+
<<<security-features>>
|
|
397
|
+
|
|
398
|
+
For parsing untrusted input, use built-in limits:
|
|
399
|
+
|
|
400
|
+
[source,ruby]
|
|
401
|
+
----
|
|
402
|
+
result = Parsanol::Native.parse_with_limits(
|
|
403
|
+
grammar_json,
|
|
404
|
+
untrusted_input,
|
|
405
|
+
max_input_size: 10 * 1024 * 1024, # 10 MB max
|
|
406
|
+
max_recursion_depth: 100 # Limit recursion
|
|
407
|
+
)
|
|
408
|
+
----
|
|
409
|
+
|
|
410
|
+
== Debug Tools
|
|
411
|
+
<<<debug-tools>>
|
|
412
|
+
|
|
413
|
+
Enable tracing for debugging grammars:
|
|
414
|
+
|
|
415
|
+
[source,ruby]
|
|
416
|
+
----
|
|
417
|
+
# Parse with trace
|
|
418
|
+
result, trace = Parsanol::Native.parse_with_trace(grammar_json, input)
|
|
419
|
+
puts trace
|
|
420
|
+
|
|
421
|
+
# Generate grammar visualization
|
|
422
|
+
mermaid = Parsanol::Native.grammar_to_mermaid(grammar_json)
|
|
423
|
+
dot = Parsanol::Native.grammar_to_dot(grammar_json)
|
|
424
|
+
----
|
|
425
|
+
|
|
426
|
+
== Development
|
|
427
|
+
|
|
428
|
+
=== Setup
|
|
429
|
+
|
|
430
|
+
[source,shell]
|
|
431
|
+
----
|
|
432
|
+
bundle install
|
|
433
|
+
----
|
|
434
|
+
|
|
435
|
+
=== Testing
|
|
436
|
+
|
|
437
|
+
[source,shell]
|
|
438
|
+
----
|
|
439
|
+
# Run all tests
|
|
440
|
+
bundle exec rake spec
|
|
441
|
+
|
|
442
|
+
# Run unit tests only
|
|
443
|
+
bundle exec rake spec:unit
|
|
444
|
+
|
|
445
|
+
# Run specific test file
|
|
446
|
+
bundle exec rspec spec/parsanol/atoms/str_spec.rb
|
|
447
|
+
----
|
|
448
|
+
|
|
449
|
+
=== Compiling Native Extension
|
|
450
|
+
|
|
451
|
+
[source,shell]
|
|
452
|
+
----
|
|
453
|
+
# Install Rust (if not already installed)
|
|
454
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
455
|
+
|
|
456
|
+
# Compile the native extension
|
|
457
|
+
bundle exec rake compile
|
|
458
|
+
|
|
459
|
+
# Verify native extension is working
|
|
460
|
+
ruby -I lib -e "require 'parsanol'; puts Parsanol::Native.available?"
|
|
461
|
+
# => true
|
|
462
|
+
----
|
|
463
|
+
|
|
464
|
+
=== Running Benchmarks
|
|
465
|
+
|
|
466
|
+
[source,shell]
|
|
467
|
+
----
|
|
468
|
+
# Quick benchmarks
|
|
469
|
+
bundle exec rake benchmark
|
|
470
|
+
|
|
471
|
+
# Comprehensive benchmark suite
|
|
472
|
+
bundle exec rake benchmark:all
|
|
473
|
+
----
|
|
474
|
+
|
|
475
|
+
== License
|
|
476
|
+
|
|
477
|
+
MIT License - see LICENSE file for details.
|
|
478
|
+
|
|
479
|
+
== Acknowledgments
|
|
480
|
+
|
|
481
|
+
Parsanol is inspired by the https://github.com/kschiess/parslet[Parslet] library. We thank Kaspar Schiess and all Parslet contributors for creating an excellent parser library that served as inspiration for this project.
|
|
482
|
+
|
|
483
|
+
== Resources
|
|
484
|
+
|
|
485
|
+
* https://github.com/parsanol/parsanol-ruby[GitHub Repository]
|
|
486
|
+
* https://github.com/parsanol/parsanol-rs[Rust Crate]
|
|
487
|
+
* https://github.com/kschiess/parslet[Original Parslet Library]
|
data/Rakefile
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
4
|
+
require 'rspec/core/rake_task'
|
|
5
|
+
require 'rdoc/task'
|
|
6
|
+
require 'rubygems/package_task'
|
|
7
|
+
|
|
8
|
+
begin
|
|
9
|
+
require 'opal/rspec/rake_task'
|
|
10
|
+
rescue LoadError, NoMethodError
|
|
11
|
+
# Opal not available or incompatible with current Ruby version
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
GEMSPEC = Gem::Specification.load('parsanol-ruby.gemspec')
|
|
15
|
+
|
|
16
|
+
# Load rake tasks from rakelib/
|
|
17
|
+
Dir.glob('rakelib/*.rake').each { |r| load r }
|
|
18
|
+
|
|
19
|
+
desc 'Run all tests'
|
|
20
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
21
|
+
|
|
22
|
+
namespace :spec do
|
|
23
|
+
desc 'Run unit tests only'
|
|
24
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
|
25
|
+
task.pattern = 'spec/parsanol/**/*_spec.rb'
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
if defined?(Opal::RSpec::RakeTask)
|
|
29
|
+
desc 'Run Opal (JavaScript) tests'
|
|
30
|
+
Opal::RSpec::RakeTask.new(:opal) do |task|
|
|
31
|
+
task.append_path 'lib'
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
RDoc::Task.new do |rdoc|
|
|
37
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
38
|
+
rdoc.title = 'Parsanol'
|
|
39
|
+
rdoc.options << '--line-numbers'
|
|
40
|
+
rdoc.rdoc_files.include('README.adoc')
|
|
41
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
desc 'Print LOC statistics'
|
|
45
|
+
task :stat do
|
|
46
|
+
%w[lib spec example].each do |dir|
|
|
47
|
+
next unless Dir.exist?(dir)
|
|
48
|
+
|
|
49
|
+
loc = `find #{dir} -name "*.rb" | xargs wc -l | grep 'total'`.split.first.to_i
|
|
50
|
+
printf("%20s %d\n", dir, loc)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# ===== Native Gem Building =====
|
|
55
|
+
namespace :gem do
|
|
56
|
+
desc 'Build source gem (compile on install)'
|
|
57
|
+
task 'native:any' do
|
|
58
|
+
sh 'rake gem:platform:any gem'
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
desc 'Define the gem task to build on any platform (compile on install)'
|
|
62
|
+
task 'platform:any' do
|
|
63
|
+
spec = Gem::Specification.load('parsanol-ruby.gemspec').dup
|
|
64
|
+
task = Gem::PackageTask.new(spec)
|
|
65
|
+
task.define
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
namespace :benchmark do
|
|
70
|
+
desc 'Run comprehensive benchmark suite'
|
|
71
|
+
task :all do
|
|
72
|
+
ruby 'benchmark/benchmark_suite.rb'
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
desc 'Run example-focused benchmarks'
|
|
76
|
+
task :examples do
|
|
77
|
+
ruby 'benchmark/example_benchmarks.rb'
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
desc 'Run benchmarks and export results to JSON/YAML'
|
|
81
|
+
task :export do
|
|
82
|
+
ruby 'benchmark/benchmark_runner.rb'
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
desc 'Run quick benchmark (examples only)'
|
|
86
|
+
task quick: :examples
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Load comparative benchmark tasks
|
|
90
|
+
Dir.glob('benchmark/tasks/*.rake').each { |r| load r }
|
|
91
|
+
|
|
92
|
+
desc 'Run quick benchmarks'
|
|
93
|
+
task benchmark: 'benchmark:quick'
|
|
94
|
+
|
|
95
|
+
# ===== Parslet Compatibility Tests =====
|
|
96
|
+
namespace :compat do
|
|
97
|
+
desc 'Run imported Parslet tests with original Parslet (baseline)'
|
|
98
|
+
task :parslet do
|
|
99
|
+
ENV['PARSANOL_BACKEND'] = 'parslet'
|
|
100
|
+
sh 'bundle exec rspec spec/parslet_imported/ --format documentation'
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
desc 'Run imported Parslet tests with Parsanol compatibility layer'
|
|
104
|
+
task :parsanol do
|
|
105
|
+
ENV['PARSANOL_BACKEND'] = 'parsanol'
|
|
106
|
+
sh 'bundle exec rspec spec/parslet_imported/ --format documentation'
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
desc 'Run both and save results for comparison'
|
|
110
|
+
task :compare do
|
|
111
|
+
require 'fileutils'
|
|
112
|
+
|
|
113
|
+
results_dir = 'tmp/compat_results'
|
|
114
|
+
FileUtils.mkdir_p(results_dir)
|
|
115
|
+
|
|
116
|
+
puts '=== Running with original Parslet ==='
|
|
117
|
+
ENV['PARSANOL_BACKEND'] = 'parslet'
|
|
118
|
+
sh "bundle exec rspec spec/parslet_imported/ --format documentation > #{results_dir}/parslet.txt 2>&1"
|
|
119
|
+
|
|
120
|
+
puts "\n=== Running with Parsanol::Parslet ==="
|
|
121
|
+
ENV['PARSANOL_BACKEND'] = 'parsanol'
|
|
122
|
+
sh "bundle exec rspec spec/parslet_imported/ --format documentation > #{results_dir}/parsanol.txt 2>&1"
|
|
123
|
+
|
|
124
|
+
puts "\n=== Comparing results ==="
|
|
125
|
+
puts 'Results saved to:'
|
|
126
|
+
puts " - #{results_dir}/parslet.txt"
|
|
127
|
+
puts " - #{results_dir}/parsanol.txt"
|
|
128
|
+
puts "\nTo compare: diff #{results_dir}/parslet.txt #{results_dir}/parsanol.txt"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
desc 'Run imported Parslet tests (default: with Parsanol)'
|
|
132
|
+
task run: :parsanol
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
task default: :spec
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|