parsanol 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +546 -0
- data/Cargo.toml +9 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/ext/parsanol_native/Cargo.toml +34 -0
- data/ext/parsanol_native/extconf.rb +15 -0
- data/ext/parsanol_native/src/lib.rs +17 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +293 -0
data/README.adoc
ADDED
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
= Parsanol
|
|
2
|
+
|
|
3
|
+
image:https://img.shields.io/gem/v/parsanol.svg[RubyGems Version]
|
|
4
|
+
image:https://img.shields.io/github/license/parsanol/parsanol-ruby.svg[License]
|
|
5
|
+
image:https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml/badge.svg["Build", link="https://github.com/parsanol/parsanol-ruby/actions/workflows/test.yml"]
|
|
6
|
+
|
|
7
|
+
A high-performance PEG (Parsing Expression Grammar) parser construction library for Ruby with optional Rust native extensions.
|
|
8
|
+
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
Parsanol provides a declarative DSL for constructing parsers using PEG semantics. It offers excellent error reporting, memory efficiency through object pooling, and optional Rust native extensions for maximum performance. The library is designed as a drop-in replacement for Parslet while offering significant performance improvements.
|
|
12
|
+
|
|
13
|
+
// Inspiration attribution
|
|
14
|
+
[NOTE]
|
|
15
|
+
====
|
|
16
|
+
Parsanol is inspired by the https://github.com/kschiess/parslet[Parslet] library by Kaspar Schiess.
|
|
17
|
+
While maintaining full API compatibility with Parslet, Parsanol features a complete independent implementation with additional performance optimizations and features.
|
|
18
|
+
====
|
|
19
|
+
|
|
20
|
+
== Features
|
|
21
|
+
|
|
22
|
+
* <<basic-parsing,PEG-based Parser Construction>> - Declarative grammar definition
|
|
23
|
+
* <<error-reporting,Detailed Error Reporting>> - Precise failure location and context
|
|
24
|
+
* <<native-extension,Rust Native Extension>> - Up to 29x faster parsing
|
|
25
|
+
* <<slice-support,Slice Support>> - Source position preservation for linters and IDEs
|
|
26
|
+
* <<transformation,Tree Transformation>> - Pattern-based AST construction
|
|
27
|
+
* <<streaming-builder,Streaming Builder API>> - Single-pass parsing with callbacks
|
|
28
|
+
* <<parallel-parsing,Parallel Parsing>> - Multi-core batch processing
|
|
29
|
+
* <<infix-expressions,Infix Expression Parsing>> - Built-in operator precedence support
|
|
30
|
+
* <<security-features,Security Features>> - Input size and recursion limits
|
|
31
|
+
* <<debug-tools,Debug Tools>> - Tracing and grammar visualization
|
|
32
|
+
|
|
33
|
+
== Installation
|
|
34
|
+
|
|
35
|
+
Add this line to your application's Gemfile:
|
|
36
|
+
|
|
37
|
+
[source,ruby]
|
|
38
|
+
----
|
|
39
|
+
gem 'parsanol'
|
|
40
|
+
----
|
|
41
|
+
|
|
42
|
+
And then execute:
|
|
43
|
+
|
|
44
|
+
[source,shell]
|
|
45
|
+
----
|
|
46
|
+
bundle install
|
|
47
|
+
----
|
|
48
|
+
|
|
49
|
+
Or install it yourself as:
|
|
50
|
+
|
|
51
|
+
[source,shell]
|
|
52
|
+
----
|
|
53
|
+
gem install parsanol
|
|
54
|
+
----
|
|
55
|
+
|
|
56
|
+
== Usage
|
|
57
|
+
|
|
58
|
+
=== Basic Parser
|
|
59
|
+
<<<basic-parsing>>
|
|
60
|
+
|
|
61
|
+
Define parsers by creating a class that inherits from `Parsanol::Parser` and declaring rules:
|
|
62
|
+
|
|
63
|
+
[source,ruby]
|
|
64
|
+
----
|
|
65
|
+
require 'parsanol'
|
|
66
|
+
|
|
67
|
+
class MyParser < Parsanol::Parser
|
|
68
|
+
rule(:keyword) { str('if') | str('while') }
|
|
69
|
+
rule(:expression) { keyword >> str('(') >> expression >> str(')') }
|
|
70
|
+
root(:expression)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
parser = MyParser.new
|
|
74
|
+
result = parser.parse('if(x)')
|
|
75
|
+
----
|
|
76
|
+
|
|
77
|
+
=== Error Reporting
|
|
78
|
+
<<<error-reporting>>
|
|
79
|
+
|
|
80
|
+
Parsanol provides detailed error messages when parsing fails:
|
|
81
|
+
|
|
82
|
+
[source,ruby]
|
|
83
|
+
----
|
|
84
|
+
begin
|
|
85
|
+
parser.parse('invalid input')
|
|
86
|
+
rescue Parsanol::ParseFailed => e
|
|
87
|
+
puts e.message
|
|
88
|
+
# => "Expected 'if' at line 1 char 1."
|
|
89
|
+
end
|
|
90
|
+
----
|
|
91
|
+
|
|
92
|
+
=== Transformation
|
|
93
|
+
<<<transformation>>
|
|
94
|
+
|
|
95
|
+
Convert parse trees to AST using pattern-based transformations:
|
|
96
|
+
|
|
97
|
+
[source,ruby]
|
|
98
|
+
----
|
|
99
|
+
class MyTransform < Parsanol::Transform
|
|
100
|
+
rule(keyword: simple(:k)) { KeywordNode.new(k) }
|
|
101
|
+
rule(expression: subtree(:e)) { ExpressionNode.new(e) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
ast = MyTransform.new.apply(parse_tree)
|
|
105
|
+
----
|
|
106
|
+
|
|
107
|
+
=== Native Extension
|
|
108
|
+
<<<native-extension>>
|
|
109
|
+
|
|
110
|
+
For maximum performance, compile the Rust native extension:
|
|
111
|
+
|
|
112
|
+
[source,shell]
|
|
113
|
+
----
|
|
114
|
+
# Install Rust toolchain first
|
|
115
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
116
|
+
|
|
117
|
+
# Compile the extension
|
|
118
|
+
bundle exec rake compile
|
|
119
|
+
----
|
|
120
|
+
|
|
121
|
+
=== Slice Support
|
|
122
|
+
<<<slice-support>>
|
|
123
|
+
|
|
124
|
+
Parsanol preserves source positions for each parsed element:
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
# Result includes position information
|
|
129
|
+
[{"word" => "hello"@0}, " "@5, {"name" => "world"@6}]
|
|
130
|
+
|
|
131
|
+
# The @N notation shows the byte offset in the original input
|
|
132
|
+
# Parsanol::Slice is fully compatible with Parslet::Slice
|
|
133
|
+
----
|
|
134
|
+
|
|
135
|
+
This is essential for linters, IDEs, and tools that need to map parsed elements back to source locations.
|
|
136
|
+
|
|
137
|
+
== Migrating from Parslet
|
|
138
|
+
|
|
139
|
+
Parsanol provides full Parslet API compatibility with two migration modes.
|
|
140
|
+
|
|
141
|
+
=== Drop-in Replacement (Zero Code Changes)
|
|
142
|
+
|
|
143
|
+
Simply replace the parslet gem with parsanol in your Gemfile:
|
|
144
|
+
|
|
145
|
+
[source,ruby]
|
|
146
|
+
----
|
|
147
|
+
# Gemfile
|
|
148
|
+
- gem 'parslet'
|
|
149
|
+
+ gem 'parsanol'
|
|
150
|
+
----
|
|
151
|
+
|
|
152
|
+
Your existing code works without modification:
|
|
153
|
+
|
|
154
|
+
[source,ruby]
|
|
155
|
+
----
|
|
156
|
+
# No changes needed!
|
|
157
|
+
require 'parslet' # Parsanol aliases itself
|
|
158
|
+
|
|
159
|
+
class MyParser < Parslet::Parser
|
|
160
|
+
rule(:number) { match('[0-9]').repeat(1) }
|
|
161
|
+
root(:number)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
parser = MyParser.new
|
|
165
|
+
parser.parse('123') # Works exactly the same
|
|
166
|
+
----
|
|
167
|
+
|
|
168
|
+
=== API Compatibility Matrix
|
|
169
|
+
|
|
170
|
+
[cols="2,1,3"]
|
|
171
|
+
|===
|
|
172
|
+
| Parslet API | Status | Notes
|
|
173
|
+
|
|
174
|
+
| `str('foo')` | ✅ | Literal string match
|
|
175
|
+
| `match('[0-9]')` | ✅ | Character class
|
|
176
|
+
| `any` | ✅ | Any single character
|
|
177
|
+
| `>>` (sequence) | ✅ | Sequential composition
|
|
178
|
+
| `\|` (choice) | ✅ | Ordered choice
|
|
179
|
+
| `.repeat(n, m)` | ✅ | Repetition with bounds
|
|
180
|
+
| `.maybe` | ✅ | Optional (zero or one)
|
|
181
|
+
| `.as(:name)` | ✅ | Label capture
|
|
182
|
+
| `.absent?` | ✅ | Negative lookahead
|
|
183
|
+
| `.present?` | ✅ | Positive lookahead
|
|
184
|
+
| `infix_expression` | ✅ | Precedence climbing
|
|
185
|
+
| `exp('...')` | ✅ | Treetop-style expression parsing
|
|
186
|
+
| `Parslet::Transform` | ✅ | Tree transformation
|
|
187
|
+
| `simple(:x)` | ✅ | Match simple value
|
|
188
|
+
| `sequence(:x)` | ✅ | Match array of values
|
|
189
|
+
| `subtree(:x)` | ✅ | Match any subtree
|
|
190
|
+
| `Parslet::Slice` | ✅ | Parsanol::Slice compatible
|
|
191
|
+
|===
|
|
192
|
+
|
|
193
|
+
== Architecture
|
|
194
|
+
|
|
195
|
+
.Parsanol architecture overview
|
|
196
|
+
[source]
|
|
197
|
+
----
|
|
198
|
+
┌─────────────────────────────────────┐
|
|
199
|
+
│ User Parser │
|
|
200
|
+
│ (inherits from Parsanol::Parser) │
|
|
201
|
+
└─────────────────┬───────────────────┘
|
|
202
|
+
│
|
|
203
|
+
┌─────────────────▼───────────────────┐
|
|
204
|
+
│ Parsing Backend │
|
|
205
|
+
├─────────────────┬───────────────────┤
|
|
206
|
+
│ Pure Ruby │ Rust Native │
|
|
207
|
+
│ (default) │ (optional) │
|
|
208
|
+
└─────────────────┴───────────────────┘
|
|
209
|
+
│
|
|
210
|
+
┌─────────────────▼───────────────────┐
|
|
211
|
+
│ Parse Tree │
|
|
212
|
+
│ (with Slice position info) │
|
|
213
|
+
└─────────────────┬───────────────────┘
|
|
214
|
+
│
|
|
215
|
+
┌─────────────────▼───────────────────┐
|
|
216
|
+
│ Parsanol::Transform │
|
|
217
|
+
│ (pattern-based transformation) │
|
|
218
|
+
└─────────────────┬───────────────────┘
|
|
219
|
+
│
|
|
220
|
+
┌─────────────────▼───────────────────┐
|
|
221
|
+
│ User AST │
|
|
222
|
+
└─────────────────────────────────────┘
|
|
223
|
+
----
|
|
224
|
+
|
|
225
|
+
=== Performance Modes
|
|
226
|
+
|
|
227
|
+
Parsanol offers multiple parsing modes with different performance characteristics:
|
|
228
|
+
|
|
229
|
+
[cols="4,2,2,3"]
|
|
230
|
+
|===
|
|
231
|
+
| Mode | Speed | Use Case | How It Works
|
|
232
|
+
|
|
233
|
+
| Pure Ruby | 1x (baseline) | Compatibility, debugging | Ruby parsing engine
|
|
234
|
+
| Native Batch | ~20x | Need Ruby objects | Rust parsing, AST via u64
|
|
235
|
+
| Native ZeroCopy | ~25x | Maximum performance | Direct FFI construction
|
|
236
|
+
| Native ZeroCopy + Slice | ~29x | Linters, IDEs | Zero-copy with positions
|
|
237
|
+
|===
|
|
238
|
+
|
|
239
|
+
== Streaming Builder API
|
|
240
|
+
<<<streaming-builder>>
|
|
241
|
+
|
|
242
|
+
For maximum performance, use the streaming builder API which eliminates intermediate AST construction:
|
|
243
|
+
|
|
244
|
+
[source,ruby]
|
|
245
|
+
----
|
|
246
|
+
require 'parsanol'
|
|
247
|
+
|
|
248
|
+
class StringCollector
|
|
249
|
+
include Parsanol::BuilderCallbacks
|
|
250
|
+
|
|
251
|
+
def initialize
|
|
252
|
+
@strings = []
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def on_string(value, offset, length)
|
|
256
|
+
@strings << value
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def finish
|
|
260
|
+
@strings
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
grammar = Parsanol::Native.serialize_grammar(MyParser.new.root)
|
|
265
|
+
builder = StringCollector.new
|
|
266
|
+
result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
267
|
+
# result: ["hello", "world"]
|
|
268
|
+
----
|
|
269
|
+
|
|
270
|
+
==== Available Callback Methods
|
|
271
|
+
|
|
272
|
+
[cols="1,3,2"]
|
|
273
|
+
|===
|
|
274
|
+
| Method | Description | Default
|
|
275
|
+
|
|
276
|
+
| `on_start(input)` | Parsing started | No-op
|
|
277
|
+
| `on_success` | Parsing succeeded | No-op
|
|
278
|
+
| `on_error(message)` | Parsing failed | No-op
|
|
279
|
+
| `on_string(value, offset, length)` | String/slice matched | No-op
|
|
280
|
+
| `on_int(value)` | Integer matched | No-op
|
|
281
|
+
| `on_float(value)` | Float matched | No-op
|
|
282
|
+
| `on_bool(value)` | Boolean matched | No-op
|
|
283
|
+
| `on_nil` | Nil matched | No-op
|
|
284
|
+
| `on_hash_start(size)` | Entering a hash/object | No-op
|
|
285
|
+
| `on_hash_key(key)` | Hash key encountered | No-op
|
|
286
|
+
| `on_hash_end(size)` | Exiting a hash/object | No-op
|
|
287
|
+
| `on_array_start(size)` | Entering an array | No-op
|
|
288
|
+
| `on_array_end(size)` | Exiting an array | No-op
|
|
289
|
+
| `finish` | Parsing complete | Returns nil
|
|
290
|
+
|===
|
|
291
|
+
|
|
292
|
+
== Parallel Parsing
|
|
293
|
+
<<<parallel-parsing>>
|
|
294
|
+
|
|
295
|
+
Parse multiple inputs using all CPU cores:
|
|
296
|
+
|
|
297
|
+
[source,ruby]
|
|
298
|
+
----
|
|
299
|
+
require 'parsanol/parallel'
|
|
300
|
+
|
|
301
|
+
grammar = MyParser.new.serialize_grammar
|
|
302
|
+
inputs = Dir.glob("*.json").map { |f| File.read(f) }
|
|
303
|
+
|
|
304
|
+
# Parse all files in parallel
|
|
305
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs)
|
|
306
|
+
|
|
307
|
+
# With configuration
|
|
308
|
+
config = Parsanol::Parallel::Config.new
|
|
309
|
+
.with_num_threads(4)
|
|
310
|
+
.with_min_chunk_size(50)
|
|
311
|
+
|
|
312
|
+
results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
313
|
+
----
|
|
314
|
+
|
|
315
|
+
== Infix Expression Parsing
|
|
316
|
+
<<<infix-expressions>>
|
|
317
|
+
|
|
318
|
+
Built-in support for parsing infix expressions with operator precedence:
|
|
319
|
+
|
|
320
|
+
[source,ruby]
|
|
321
|
+
----
|
|
322
|
+
class CalculatorParser < Parsanol::Parser
|
|
323
|
+
rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
324
|
+
rule(:primary) { number | str('(') >> expr >> str(')') }
|
|
325
|
+
|
|
326
|
+
rule(:expr) {
|
|
327
|
+
infix_expression(primary,
|
|
328
|
+
[str('*'), 2, :left],
|
|
329
|
+
[str('/'), 2, :left],
|
|
330
|
+
[str('+'), 1, :left],
|
|
331
|
+
[str('-'), 1, :left],
|
|
332
|
+
[str('^'), 3, :right] # Right-associative
|
|
333
|
+
)
|
|
334
|
+
}
|
|
335
|
+
root(:expr)
|
|
336
|
+
end
|
|
337
|
+
----
|
|
338
|
+
|
|
339
|
+
== Treetop Expression Syntax
|
|
340
|
+
<<<treetop-expressions>>
|
|
341
|
+
|
|
342
|
+
Parsanol supports treetop-style expression strings for quick grammar definition:
|
|
343
|
+
|
|
344
|
+
[source,ruby]
|
|
345
|
+
----
|
|
346
|
+
# Using exp() for treetop-style expressions
|
|
347
|
+
class QuickParser < Parsanol::Parser
|
|
348
|
+
rule(:word) { exp("'a' 'b' ?") } # 'a' followed by optional 'b'
|
|
349
|
+
root(:word)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Equivalent to:
|
|
353
|
+
rule(:word) { str('a') >> str('b').maybe }
|
|
354
|
+
----
|
|
355
|
+
|
|
356
|
+
=== Treetop Syntax Reference
|
|
357
|
+
|
|
358
|
+
[cols="2,3"]
|
|
359
|
+
|===
|
|
360
|
+
| Syntax | Description
|
|
361
|
+
|
|
362
|
+
| `'hello'` | Literal string match
|
|
363
|
+
| `[a-z]` | Character class
|
|
364
|
+
| `.` | Any single character
|
|
365
|
+
| `'a' 'b'` | Sequence (concatenation)
|
|
366
|
+
| `'a' / 'b'` | Alternative (choice)
|
|
367
|
+
| `'a' ?` | Optional (zero or one)
|
|
368
|
+
| `'a' *` | Zero or more repetitions
|
|
369
|
+
| `'a' +` | One or more repetitions
|
|
370
|
+
| `'a'{2,5}` | Between 2 and 5 repetitions
|
|
371
|
+
| `('a' / 'b')` | Grouping
|
|
372
|
+
|===
|
|
373
|
+
|
|
374
|
+
[NOTE]
|
|
375
|
+
====
|
|
376
|
+
Whitespace is required before operators: `'a' ?` not `'a'?`
|
|
377
|
+
====
|
|
378
|
+
|
|
379
|
+
=== Expression Parsing Performance
|
|
380
|
+
|
|
381
|
+
The expression parser is pure Ruby (not Rust-accelerated) since it runs only at grammar definition time. The resulting atoms can still be used with Rust-accelerated parsing:
|
|
382
|
+
|
|
383
|
+
[source,ruby]
|
|
384
|
+
----
|
|
385
|
+
atom = Parsanol.exp("'a' +")
|
|
386
|
+
|
|
387
|
+
# Ruby parsing
|
|
388
|
+
atom.parse('aaa')
|
|
389
|
+
|
|
390
|
+
# Rust-accelerated parsing (if native extension available)
|
|
391
|
+
grammar = Parsanol::Native.serialize_grammar(atom)
|
|
392
|
+
Parsanol::Native.parse_to_ruby_objects(grammar, 'aaa')
|
|
393
|
+
----
|
|
394
|
+
|
|
395
|
+
== Security Features
|
|
396
|
+
<<<security-features>>
|
|
397
|
+
|
|
398
|
+
For parsing untrusted input, use built-in limits:
|
|
399
|
+
|
|
400
|
+
[source,ruby]
|
|
401
|
+
----
|
|
402
|
+
result = Parsanol::Native.parse_with_limits(
|
|
403
|
+
grammar_json,
|
|
404
|
+
untrusted_input,
|
|
405
|
+
max_input_size: 10 * 1024 * 1024, # 10 MB max
|
|
406
|
+
max_recursion_depth: 100 # Limit recursion
|
|
407
|
+
)
|
|
408
|
+
----
|
|
409
|
+
|
|
410
|
+
== Debug Tools
|
|
411
|
+
<<<debug-tools>>
|
|
412
|
+
|
|
413
|
+
Enable tracing for debugging grammars:
|
|
414
|
+
|
|
415
|
+
[source,ruby]
|
|
416
|
+
----
|
|
417
|
+
# Parse with trace
|
|
418
|
+
result, trace = Parsanol::Native.parse_with_trace(grammar_json, input)
|
|
419
|
+
puts trace
|
|
420
|
+
|
|
421
|
+
# Generate grammar visualization
|
|
422
|
+
mermaid = Parsanol::Native.grammar_to_mermaid(grammar_json)
|
|
423
|
+
dot = Parsanol::Native.grammar_to_dot(grammar_json)
|
|
424
|
+
----
|
|
425
|
+
|
|
426
|
+
== Development
|
|
427
|
+
|
|
428
|
+
=== Setup
|
|
429
|
+
|
|
430
|
+
[source,shell]
|
|
431
|
+
----
|
|
432
|
+
bundle install
|
|
433
|
+
----
|
|
434
|
+
|
|
435
|
+
=== Testing
|
|
436
|
+
|
|
437
|
+
[source,shell]
|
|
438
|
+
----
|
|
439
|
+
# Run all tests
|
|
440
|
+
bundle exec rake spec
|
|
441
|
+
|
|
442
|
+
# Run unit tests only
|
|
443
|
+
bundle exec rake spec:unit
|
|
444
|
+
|
|
445
|
+
# Run specific test file
|
|
446
|
+
bundle exec rspec spec/parsanol/atoms/str_spec.rb
|
|
447
|
+
----
|
|
448
|
+
|
|
449
|
+
=== Compiling Native Extension
|
|
450
|
+
|
|
451
|
+
[source,shell]
|
|
452
|
+
----
|
|
453
|
+
# Install Rust (if not already installed)
|
|
454
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
455
|
+
|
|
456
|
+
# Compile the native extension
|
|
457
|
+
bundle exec rake compile
|
|
458
|
+
|
|
459
|
+
# Verify native extension is working
|
|
460
|
+
ruby -I lib -e "require 'parsanol'; puts Parsanol::Native.available?"
|
|
461
|
+
# => true
|
|
462
|
+
----
|
|
463
|
+
|
|
464
|
+
=== Running Benchmarks
|
|
465
|
+
|
|
466
|
+
[source,shell]
|
|
467
|
+
----
|
|
468
|
+
# Quick benchmarks
|
|
469
|
+
bundle exec rake benchmark
|
|
470
|
+
|
|
471
|
+
# Comprehensive benchmark suite
|
|
472
|
+
bundle exec rake benchmark:all
|
|
473
|
+
----
|
|
474
|
+
|
|
475
|
+
== License
|
|
476
|
+
|
|
477
|
+
MIT License - see LICENSE file for details.
|
|
478
|
+
|
|
479
|
+
== Acknowledgments
|
|
480
|
+
|
|
481
|
+
Parsanol is inspired by the https://github.com/kschiess/parslet[Parslet] library. We thank Kaspar Schiess and all Parslet contributors for creating an excellent parser library that served as inspiration for this project.
|
|
482
|
+
|
|
483
|
+
== Resources
|
|
484
|
+
|
|
485
|
+
* https://github.com/parsanol/parsanol-ruby[GitHub Repository]
|
|
486
|
+
* https://github.com/parsanol/parsanol-rs[Rust Crate]
|
|
487
|
+
* https://github.com/kschiess/parslet[Original Parslet Library]
|
data/Rakefile
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
4
|
+
require 'rspec/core/rake_task'
|
|
5
|
+
require 'rdoc/task'
|
|
6
|
+
require 'rubygems/package_task'
|
|
7
|
+
|
|
8
|
+
begin
|
|
9
|
+
require 'opal/rspec/rake_task'
|
|
10
|
+
rescue LoadError, NoMethodError
|
|
11
|
+
# Opal not available or incompatible with current Ruby version
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
GEMSPEC = Gem::Specification.load('parsanol-ruby.gemspec')
|
|
15
|
+
|
|
16
|
+
# Load rake tasks from rakelib/
|
|
17
|
+
Dir.glob('rakelib/*.rake').each { |r| load r }
|
|
18
|
+
|
|
19
|
+
desc 'Run all tests'
|
|
20
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
21
|
+
|
|
22
|
+
namespace :spec do
|
|
23
|
+
desc 'Run unit tests only'
|
|
24
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
|
25
|
+
task.pattern = 'spec/parsanol/**/*_spec.rb'
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
if defined?(Opal::RSpec::RakeTask)
|
|
29
|
+
desc 'Run Opal (JavaScript) tests'
|
|
30
|
+
Opal::RSpec::RakeTask.new(:opal) do |task|
|
|
31
|
+
task.append_path 'lib'
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
RDoc::Task.new do |rdoc|
|
|
37
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
38
|
+
rdoc.title = 'Parsanol'
|
|
39
|
+
rdoc.options << '--line-numbers'
|
|
40
|
+
rdoc.rdoc_files.include('README.adoc')
|
|
41
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
desc 'Print LOC statistics'
|
|
45
|
+
task :stat do
|
|
46
|
+
%w[lib spec example].each do |dir|
|
|
47
|
+
next unless Dir.exist?(dir)
|
|
48
|
+
|
|
49
|
+
loc = `find #{dir} -name "*.rb" | xargs wc -l | grep 'total'`.split.first.to_i
|
|
50
|
+
printf("%20s %d\n", dir, loc)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# ===== Native Gem Building =====
|
|
55
|
+
namespace :gem do
|
|
56
|
+
desc 'Build source gem (compile on install)'
|
|
57
|
+
task 'native:any' do
|
|
58
|
+
sh 'rake gem:platform:any gem'
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
desc 'Define the gem task to build on any platform (compile on install)'
|
|
62
|
+
task 'platform:any' do
|
|
63
|
+
spec = Gem::Specification.load('parsanol-ruby.gemspec').dup
|
|
64
|
+
task = Gem::PackageTask.new(spec)
|
|
65
|
+
task.define
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
namespace :benchmark do
|
|
70
|
+
desc 'Run comprehensive benchmark suite'
|
|
71
|
+
task :all do
|
|
72
|
+
ruby 'benchmark/benchmark_suite.rb'
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
desc 'Run example-focused benchmarks'
|
|
76
|
+
task :examples do
|
|
77
|
+
ruby 'benchmark/example_benchmarks.rb'
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
desc 'Run benchmarks and export results to JSON/YAML'
|
|
81
|
+
task :export do
|
|
82
|
+
ruby 'benchmark/benchmark_runner.rb'
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
desc 'Run quick benchmark (examples only)'
|
|
86
|
+
task quick: :examples
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Load comparative benchmark tasks
|
|
90
|
+
Dir.glob('benchmark/tasks/*.rake').each { |r| load r }
|
|
91
|
+
|
|
92
|
+
desc 'Run quick benchmarks'
|
|
93
|
+
task benchmark: 'benchmark:quick'
|
|
94
|
+
|
|
95
|
+
# ===== Parslet Compatibility Tests =====
|
|
96
|
+
namespace :compat do
|
|
97
|
+
desc 'Run imported Parslet tests with original Parslet (baseline)'
|
|
98
|
+
task :parslet do
|
|
99
|
+
ENV['PARSANOL_BACKEND'] = 'parslet'
|
|
100
|
+
sh 'bundle exec rspec spec/parslet_imported/ --format documentation'
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
desc 'Run imported Parslet tests with Parsanol compatibility layer'
|
|
104
|
+
task :parsanol do
|
|
105
|
+
ENV['PARSANOL_BACKEND'] = 'parsanol'
|
|
106
|
+
sh 'bundle exec rspec spec/parslet_imported/ --format documentation'
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
desc 'Run both and save results for comparison'
|
|
110
|
+
task :compare do
|
|
111
|
+
require 'fileutils'
|
|
112
|
+
|
|
113
|
+
results_dir = 'tmp/compat_results'
|
|
114
|
+
FileUtils.mkdir_p(results_dir)
|
|
115
|
+
|
|
116
|
+
puts '=== Running with original Parslet ==='
|
|
117
|
+
ENV['PARSANOL_BACKEND'] = 'parslet'
|
|
118
|
+
sh "bundle exec rspec spec/parslet_imported/ --format documentation > #{results_dir}/parslet.txt 2>&1"
|
|
119
|
+
|
|
120
|
+
puts "\n=== Running with Parsanol::Parslet ==="
|
|
121
|
+
ENV['PARSANOL_BACKEND'] = 'parsanol'
|
|
122
|
+
sh "bundle exec rspec spec/parslet_imported/ --format documentation > #{results_dir}/parsanol.txt 2>&1"
|
|
123
|
+
|
|
124
|
+
puts "\n=== Comparing results ==="
|
|
125
|
+
puts 'Results saved to:'
|
|
126
|
+
puts " - #{results_dir}/parslet.txt"
|
|
127
|
+
puts " - #{results_dir}/parsanol.txt"
|
|
128
|
+
puts "\nTo compare: diff #{results_dir}/parslet.txt #{results_dir}/parsanol.txt"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
desc 'Run imported Parslet tests (default: with Parsanol)'
|
|
132
|
+
task run: :parsanol
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
task default: :spec
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Parsanol Native Extension
|
|
2
|
+
#
|
|
3
|
+
# Rust extension for parsanol-ruby that provides:
|
|
4
|
+
# - Fast parsing with packrat memoization
|
|
5
|
+
# - Three transform modes: RubyTransform, Serialized, ZeroCopy
|
|
6
|
+
# - Source location tracking
|
|
7
|
+
# - Streaming parsing
|
|
8
|
+
# - Incremental parsing for editor integration
|
|
9
|
+
# - Custom atom extension points
|
|
10
|
+
# - Plugin architecture
|
|
11
|
+
|
|
12
|
+
[package]
|
|
13
|
+
name = "parsanol_native"
|
|
14
|
+
version = "1.0.0"
|
|
15
|
+
edition = "2021"
|
|
16
|
+
rust-version = "1.75"
|
|
17
|
+
|
|
18
|
+
[lib]
|
|
19
|
+
crate-type = ["cdylib"]
|
|
20
|
+
|
|
21
|
+
[dependencies]
|
|
22
|
+
# rb-sys for Ruby C API - with link-ruby to link at compile time
|
|
23
|
+
# On Windows ARM64, we need to build from source since pre-built binaries aren't available
|
|
24
|
+
rb-sys = { version = "0.9.124", features = ["link-ruby", "global-allocator"] }
|
|
25
|
+
|
|
26
|
+
# magnus for Ruby bindings (0.8 for Ruby 3.1 support)
|
|
27
|
+
magnus = "0.8"
|
|
28
|
+
|
|
29
|
+
# parsanol parser library
|
|
30
|
+
parsanol = { version = "0.1.6", features = ["ruby"] }
|
|
31
|
+
|
|
32
|
+
# Logging
|
|
33
|
+
log = "0.4"
|
|
34
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'mkmf'
|
|
4
|
+
require 'rb_sys/mkmf'
|
|
5
|
+
|
|
6
|
+
create_rust_makefile('parsanol/parsanol_native') do |r|
|
|
7
|
+
# Create debug builds in dev, release in production
|
|
8
|
+
r.profile = ENV.fetch('RB_SYS_CARGO_PROFILE', :dev).to_sym
|
|
9
|
+
|
|
10
|
+
# Enable stable API compiled fallback for ruby-head and older Ruby versions
|
|
11
|
+
r.use_stable_api_compiled_fallback = true
|
|
12
|
+
|
|
13
|
+
# Force install rust toolchain if needed (can also set RB_SYS_FORCE_INSTALL_RUST_TOOLCHAIN=true)
|
|
14
|
+
r.force_install_rust_toolchain = false
|
|
15
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
//! Parsanol Native Extension
|
|
2
|
+
//!
|
|
3
|
+
//! This is the native Rust extension for parsanol-ruby.
|
|
4
|
+
//! It compiles the parsanol-rs crate with Ruby FFI bindings enabled.
|
|
5
|
+
|
|
6
|
+
use magnus::{Error, Ruby};
|
|
7
|
+
|
|
8
|
+
/// Initialize the Parsanol native extension
|
|
9
|
+
///
|
|
10
|
+
/// This function sets up the Parsanol::Native module with all the
|
|
11
|
+
/// functions from parsanol-rs.
|
|
12
|
+
#[magnus::init]
|
|
13
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
14
|
+
// Initialize the parsanol-rs ruby_ffi module
|
|
15
|
+
// This sets up Parsanol::Native with all the functions
|
|
16
|
+
parsanol::ruby_ffi::init(ruby)
|
|
17
|
+
}
|