parsanol 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +262 -35
- data/Rakefile +9 -3
- data/ext/parsanol_native/Cargo.toml +3 -2
- data/lib/parsanol/native/parser.rb +46 -18
- data/lib/parsanol/native/transformer.rb +74 -31
- data/lib/parsanol/options/zero_copy.rb +1 -1
- data/lib/parsanol/options.rb +8 -7
- data/lib/parsanol/parser.rb +24 -7
- data/lib/parsanol/slice.rb +61 -0
- data/lib/parsanol/version.rb +1 -1
- data/{parsanol-ruby.gemspec → parsanol.gemspec} +1 -1
- metadata +2 -5
- data/lib/parsanol/lexer.rb +0 -144
- data/lib/parsanol/options/ruby_transform.rb +0 -107
- data/lib/parsanol/options/serialized.rb +0 -94
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d6a6abf1fc72b2167ed8fb0b494128d8a7e219396954c2a1e81b2360c6ff2c21
|
|
4
|
+
data.tar.gz: 6465b111339a2ec6dddbc7e63b1116ff7ed0f0e3c02e490bfdd5ba939fb338cc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 591469089373168dfa0293b027097054a29b1a2e0353f5ed08224999965af5c2a49ed3fe2041cdee8a206d1527e91b2899ad7a9062f17eeb75ac13d43f928048
|
|
7
|
+
data.tar.gz: '0871ec48005b0d7df2a79b7d4c269aca9e1953733e45fad55a1c4126d3916389eb2879634246a8c3a2a69f02bd8ecbfeb363ee908dc212a3ce20f369ccc057ff'
|
data/README.adoc
CHANGED
|
@@ -55,9 +55,8 @@ gem install parsanol
|
|
|
55
55
|
|
|
56
56
|
== Usage
|
|
57
57
|
|
|
58
|
+
[[basic-parsing]]
|
|
58
59
|
=== Basic Parser
|
|
59
|
-
<<<basic-parsing>>
|
|
60
|
-
|
|
61
60
|
Define parsers by creating a class that inherits from `Parsanol::Parser` and declaring rules:
|
|
62
61
|
|
|
63
62
|
[source,ruby]
|
|
@@ -74,9 +73,8 @@ parser = MyParser.new
|
|
|
74
73
|
result = parser.parse('if(x)')
|
|
75
74
|
----
|
|
76
75
|
|
|
76
|
+
[[error-reporting]]
|
|
77
77
|
=== Error Reporting
|
|
78
|
-
<<<error-reporting>>
|
|
79
|
-
|
|
80
78
|
Parsanol provides detailed error messages when parsing fails:
|
|
81
79
|
|
|
82
80
|
[source,ruby]
|
|
@@ -89,9 +87,8 @@ rescue Parsanol::ParseFailed => e
|
|
|
89
87
|
end
|
|
90
88
|
----
|
|
91
89
|
|
|
90
|
+
[[transformation]]
|
|
92
91
|
=== Transformation
|
|
93
|
-
<<<transformation>>
|
|
94
|
-
|
|
95
92
|
Convert parse trees to AST using pattern-based transformations:
|
|
96
93
|
|
|
97
94
|
[source,ruby]
|
|
@@ -104,9 +101,8 @@ end
|
|
|
104
101
|
ast = MyTransform.new.apply(parse_tree)
|
|
105
102
|
----
|
|
106
103
|
|
|
104
|
+
[[native-extension]]
|
|
107
105
|
=== Native Extension
|
|
108
|
-
<<<native-extension>>
|
|
109
|
-
|
|
110
106
|
For maximum performance, compile the Rust native extension:
|
|
111
107
|
|
|
112
108
|
[source,shell]
|
|
@@ -118,21 +114,84 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
|
118
114
|
bundle exec rake compile
|
|
119
115
|
----
|
|
120
116
|
|
|
117
|
+
[[slice-support]]
|
|
121
118
|
=== Slice Support
|
|
122
|
-
<<<slice-support>>
|
|
123
119
|
|
|
124
|
-
|
|
120
|
+
All parse results include source position information through `Parsanol::Slice` objects:
|
|
121
|
+
|
|
122
|
+
[source,ruby]
|
|
123
|
+
----
|
|
124
|
+
# Parse returns results with position info
|
|
125
|
+
result = parser.parse("hello world", mode: :native)
|
|
126
|
+
name = result[:name]
|
|
127
|
+
|
|
128
|
+
# Access the value
|
|
129
|
+
name.to_s # => "hello"
|
|
130
|
+
|
|
131
|
+
# Access position information
|
|
132
|
+
name.offset # => 0 (byte offset in original input)
|
|
133
|
+
name.length # => 5
|
|
134
|
+
name.line_and_column # => [1, 1] (1-indexed)
|
|
135
|
+
|
|
136
|
+
# Compare with strings (Slice compares by content)
|
|
137
|
+
name == "hello" # => true
|
|
138
|
+
|
|
139
|
+
# Extract from original source
|
|
140
|
+
name.extract_from(input) # => "hello"
|
|
141
|
+
----
|
|
142
|
+
|
|
143
|
+
==== JSON Output with Position Info
|
|
144
|
+
|
|
145
|
+
When using JSON mode, position information is included inline with each value:
|
|
125
146
|
|
|
126
147
|
[source,ruby]
|
|
127
148
|
----
|
|
128
|
-
|
|
129
|
-
|
|
149
|
+
result = parser.parse("hello", mode: :json)
|
|
150
|
+
# => {
|
|
151
|
+
# "name": {
|
|
152
|
+
# "value": "hello",
|
|
153
|
+
# "offset": 0,
|
|
154
|
+
# "length": 5,
|
|
155
|
+
# "line": 1,
|
|
156
|
+
# "column": 1
|
|
157
|
+
# }
|
|
158
|
+
# }
|
|
159
|
+
----
|
|
160
|
+
|
|
161
|
+
This format ensures position information is available for all downstream consumers including IDEs, linters, and error reporting tools.
|
|
162
|
+
|
|
163
|
+
==== Slice API
|
|
130
164
|
|
|
131
|
-
|
|
132
|
-
|
|
165
|
+
[source,ruby]
|
|
166
|
+
----
|
|
167
|
+
class Parsanol::Slice
|
|
168
|
+
# Core attributes
|
|
169
|
+
def content # String content
|
|
170
|
+
def offset # Byte offset in original input
|
|
171
|
+
def length # Length of the slice
|
|
172
|
+
def line_and_column # [line, column] tuple (requires line cache)
|
|
173
|
+
|
|
174
|
+
# String compatibility
|
|
175
|
+
def to_s # Returns content
|
|
176
|
+
def to_str # Implicit string conversion
|
|
177
|
+
def ==(other) # Compares content with String or Slice
|
|
178
|
+
|
|
179
|
+
# JSON serialization
|
|
180
|
+
def to_json # Returns { "value" => ..., "offset" => ..., ... }
|
|
181
|
+
def as_json # Returns hash with position info
|
|
182
|
+
|
|
183
|
+
# Utility
|
|
184
|
+
def to_span(input) # Returns SourceSpan object
|
|
185
|
+
def extract_from(input) # Extracts content from original input
|
|
186
|
+
end
|
|
133
187
|
----
|
|
134
188
|
|
|
135
|
-
This is essential for
|
|
189
|
+
This is essential for:
|
|
190
|
+
|
|
191
|
+
* **Linters** - Map errors back to source locations
|
|
192
|
+
* **IDEs** - Provide go-to-definition, hover info
|
|
193
|
+
* **Comment attachment** - Attach remarks to AST nodes by position
|
|
194
|
+
* **Source extraction** - Get original text for any parsed element
|
|
136
195
|
|
|
137
196
|
== Migrating from Parslet
|
|
138
197
|
|
|
@@ -222,23 +281,196 @@ parser.parse('123') # Works exactly the same
|
|
|
222
281
|
└─────────────────────────────────────┘
|
|
223
282
|
----
|
|
224
283
|
|
|
225
|
-
===
|
|
284
|
+
=== Parse Modes
|
|
285
|
+
|
|
286
|
+
Parsanol offers 3 parsing modes through the `parse` method. All modes return `Parsanol::Slice` objects with position information:
|
|
226
287
|
|
|
227
|
-
|
|
288
|
+
[source,ruby]
|
|
289
|
+
----
|
|
290
|
+
result = parser.parse(input, mode: :native) # mode is optional, :native is default
|
|
291
|
+
----
|
|
228
292
|
|
|
229
|
-
[cols="
|
|
293
|
+
[cols="1,1,1,2,2"]
|
|
230
294
|
|===
|
|
231
|
-
| Mode |
|
|
295
|
+
| Mode | Backend | Keys | Values | Best For
|
|
232
296
|
|
|
233
|
-
| Pure Ruby |
|
|
234
|
-
|
|
|
235
|
-
|
|
|
236
|
-
| Native ZeroCopy + Slice | ~29x | Linters, IDEs | Zero-copy with positions
|
|
297
|
+
| `:ruby` | Pure Ruby | Symbol | Slice | Debugging, fallback
|
|
298
|
+
| `:native` | Rust FFI | Symbol | Slice | **Production (DEFAULT)**
|
|
299
|
+
| `:json` | Rust FFI | String | Hash + position | APIs, serialization
|
|
237
300
|
|===
|
|
238
301
|
|
|
239
|
-
|
|
240
|
-
|
|
302
|
+
All modes include position info (offset, length, line, column) by default.
|
|
303
|
+
|
|
304
|
+
==== Mode Details
|
|
305
|
+
|
|
306
|
+
**Ruby Mode** (`:ruby`)::
|
|
307
|
+
Pure Ruby parsing engine. Use for debugging grammar issues or when native extension is unavailable.
|
|
308
|
+
|
|
309
|
+
**Native Mode** (`:native`)::
|
|
310
|
+
Rust parser via FFI with automatic transformation to Ruby-friendly format (Symbol keys). ~20x faster than pure Ruby. This is the **default mode**.
|
|
311
|
+
|
|
312
|
+
**JSON Mode** (`:json`)::
|
|
313
|
+
Rust parser that returns JSON-serializable output with inline position information. Use for APIs and when you need JSON-compatible output.
|
|
314
|
+
|
|
315
|
+
=== ZeroCopy Interface (Low-Level API)
|
|
316
|
+
|
|
317
|
+
For maximum performance (~29x faster than pure Ruby), use the ZeroCopy interface which bypasses Ruby transformation:
|
|
318
|
+
|
|
319
|
+
[source,ruby]
|
|
320
|
+
----
|
|
321
|
+
# Low-level API: Direct Rust access, String keys
|
|
322
|
+
grammar = Parsanol::Native.serialize_grammar(parser.root)
|
|
323
|
+
result = Parsanol::Native.parse_to_ruby_objects(grammar, input)
|
|
324
|
+
# Returns: { "name" => Slice("hello", offset: 0, length: 5) }
|
|
325
|
+
|
|
326
|
+
# High-level ZeroCopy: Include module for direct Ruby objects
|
|
327
|
+
class FastParser < Parsanol::Parser
|
|
328
|
+
include Parsanol::ZeroCopy
|
|
329
|
+
|
|
330
|
+
rule(:number) { match('[0-9]').repeat(1) }
|
|
331
|
+
root(:number)
|
|
332
|
+
|
|
333
|
+
output_types(number: MyNumberClass) # Map to Ruby classes
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
parser = FastParser.new
|
|
337
|
+
expr = parser.parse("42") # Returns MyNumberClass instance directly
|
|
338
|
+
----
|
|
339
|
+
|
|
340
|
+
[cols="1,2,2"]
|
|
341
|
+
|===
|
|
342
|
+
| Method | Keys | Use Case
|
|
343
|
+
|
|
344
|
+
| `parse_to_ruby_objects` | String | Low-level, Slice objects directly from Rust
|
|
345
|
+
| `Parsanol::ZeroCopy` module | Ruby objects | Maximum performance, direct object construction
|
|
346
|
+
|===
|
|
347
|
+
|
|
348
|
+
NOTE: ZeroCopy requires the native extension and type mapping definitions.
|
|
349
|
+
|
|
350
|
+
==== When to Use Parse Modes vs ZeroCopy
|
|
351
|
+
|
|
352
|
+
[cols="1,2,2"]
|
|
353
|
+
|===
|
|
354
|
+
| Your Need | Use This | Why
|
|
355
|
+
|
|
356
|
+
| Building an API | JSON mode (`:json`) | Direct JSON serialization
|
|
357
|
+
| Building a linter/IDE | Native mode (`:native`) | Position info for errors
|
|
358
|
+
| Need position info | Parse Modes (not ZeroCopy) | ZeroCopy skips position tracking
|
|
359
|
+
| High-throughput parsing | ZeroCopy | Maximum performance
|
|
360
|
+
| Type-safe AST with methods | ZeroCopy | Direct typed object construction
|
|
361
|
+
| Debugging grammar | Ruby mode (`:ruby`) | Pure Ruby, easier to trace
|
|
362
|
+
|===
|
|
363
|
+
|
|
364
|
+
==== ZeroCopy Example: Calculator with Direct Object Construction
|
|
365
|
+
|
|
366
|
+
[source,ruby]
|
|
367
|
+
----
|
|
368
|
+
# 1. Define your AST classes with methods
|
|
369
|
+
module Calculator
|
|
370
|
+
class Expr
|
|
371
|
+
def eval = raise NotImplementedError
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
class Number < Expr
|
|
375
|
+
attr_reader :value
|
|
376
|
+
def initialize(value) = @value = value
|
|
377
|
+
def eval = @value
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
class BinOp < Expr
|
|
381
|
+
attr_reader :left, :op, :right
|
|
382
|
+
def initialize(left:, op:, right:)
|
|
383
|
+
@left, @op, @right = left, op, right
|
|
384
|
+
end
|
|
385
|
+
def eval
|
|
386
|
+
case @op
|
|
387
|
+
when '+' then @left.eval + @right.eval
|
|
388
|
+
when '-' then @left.eval - @right.eval
|
|
389
|
+
when '*' then @left.eval * @right.eval
|
|
390
|
+
when '/' then @left.eval / @right.eval
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# 2. Define parser with ZeroCopy and output_types
|
|
397
|
+
class CalculatorParser < Parsanol::Parser
|
|
398
|
+
include Parsanol::ZeroCopy
|
|
399
|
+
|
|
400
|
+
rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
401
|
+
rule(:expression) { (number.as(:left) >> add_op >> expression.as(:right)).as(:binop) | number }
|
|
402
|
+
root(:expression)
|
|
403
|
+
|
|
404
|
+
# Map rules to Ruby classes - Rust constructs these directly!
|
|
405
|
+
output_types(
|
|
406
|
+
number: Calculator::Number,
|
|
407
|
+
binop: Calculator::BinOp
|
|
408
|
+
)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# 3. Parse and evaluate - no transform needed!
|
|
412
|
+
parser = CalculatorParser.new
|
|
413
|
+
expr = parser.parse("2 + 3 * 4") # Returns Calculator::BinOp directly
|
|
414
|
+
puts expr.eval # => 14 (with proper precedence)
|
|
415
|
+
----
|
|
416
|
+
|
|
417
|
+
==== Low-Level ZeroCopy: `parse_to_ruby_objects`
|
|
418
|
+
|
|
419
|
+
When you don't need typed objects, use `parse_to_ruby_objects` for direct Slice access:
|
|
420
|
+
|
|
421
|
+
[source,ruby]
|
|
422
|
+
----
|
|
423
|
+
# Direct FFI call - bypasses transformation, String keys
|
|
424
|
+
grammar = Parsanol::Native.serialize_grammar(MyParser.new.root)
|
|
425
|
+
result = Parsanol::Native.parse_to_ruby_objects(grammar, input)
|
|
241
426
|
|
|
427
|
+
# Result structure (String keys, Slice values):
|
|
428
|
+
# { "name" => Slice("hello", offset: 0, length: 5),
|
|
429
|
+
# "value" => Slice("42", offset: 10, length: 2) }
|
|
430
|
+
|
|
431
|
+
# Access position info directly
|
|
432
|
+
result["name"].offset # => 0
|
|
433
|
+
result["name"].to_s # => "hello"
|
|
434
|
+
----
|
|
435
|
+
|
|
436
|
+
==== ZeroCopy Requirements
|
|
437
|
+
|
|
438
|
+
The ZeroCopy module requires:
|
|
439
|
+
|
|
440
|
+
1. **Native extension** - Run `bundle exec rake compile`
|
|
441
|
+
2. **Type mapping** - Define `output_types` in your parser
|
|
442
|
+
3. **Matching constructors** - Your Ruby classes must accept the parsed attributes
|
|
443
|
+
|
|
444
|
+
For complex types, you may also need Rust-side type definitions with `#[derive(RubyObject)]` for full zero-copy FFI construction.
|
|
445
|
+
|
|
446
|
+
=== Parsing Backends (Rust Core)
|
|
447
|
+
|
|
448
|
+
Behind the scenes, the Rust implementation uses one of two parsing backends:
|
|
449
|
+
|
|
450
|
+
[cols="2,2,3"]
|
|
451
|
+
|===
|
|
452
|
+
| Backend | Use Case | Characteristics
|
|
453
|
+
|
|
454
|
+
| Packrat (default) | Complex grammars | O(n) guaranteed, higher memory
|
|
455
|
+
| Bytecode VM | Simple patterns | Lower memory, faster for linear patterns
|
|
456
|
+
| Auto | Variable workloads | Analyzes grammar, selects best backend
|
|
457
|
+
|===
|
|
458
|
+
|
|
459
|
+
The Ruby bindings automatically use the best backend for your grammar:
|
|
460
|
+
|
|
461
|
+
* Uses `Backend::Auto` by default (same as parsanol-rs)
|
|
462
|
+
* Detects nested repetitions, overlapping choices
|
|
463
|
+
* Recommends Packrat for complex grammars
|
|
464
|
+
* Falls back to Bytecode for simple patterns
|
|
465
|
+
|
|
466
|
+
NOTE: The backend selection is transparent to Ruby users. The parser object automatically uses the optimal backend based on grammar analysis.
|
|
467
|
+
|
|
468
|
+
For more details on backend selection and grammar analysis, see the https://parsanol.github.io/backends[Parsing Backends documentation].
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
[[streaming-builder]]
|
|
473
|
+
== Streaming Builder API
|
|
242
474
|
For maximum performance, use the streaming builder API which eliminates intermediate AST construction:
|
|
243
475
|
|
|
244
476
|
[source,ruby]
|
|
@@ -289,9 +521,8 @@ result = Parsanol::Native.parse_with_builder(grammar, input, builder)
|
|
|
289
521
|
| `finish` | Parsing complete | Returns nil
|
|
290
522
|
|===
|
|
291
523
|
|
|
524
|
+
[[parallel-parsing]]
|
|
292
525
|
== Parallel Parsing
|
|
293
|
-
<<<parallel-parsing>>
|
|
294
|
-
|
|
295
526
|
Parse multiple inputs using all CPU cores:
|
|
296
527
|
|
|
297
528
|
[source,ruby]
|
|
@@ -312,9 +543,8 @@ config = Parsanol::Parallel::Config.new
|
|
|
312
543
|
results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
313
544
|
----
|
|
314
545
|
|
|
546
|
+
[[infix-expressions]]
|
|
315
547
|
== Infix Expression Parsing
|
|
316
|
-
<<<infix-expressions>>
|
|
317
|
-
|
|
318
548
|
Built-in support for parsing infix expressions with operator precedence:
|
|
319
549
|
|
|
320
550
|
[source,ruby]
|
|
@@ -336,9 +566,8 @@ class CalculatorParser < Parsanol::Parser
|
|
|
336
566
|
end
|
|
337
567
|
----
|
|
338
568
|
|
|
569
|
+
[[treetop-expressions]]
|
|
339
570
|
== Treetop Expression Syntax
|
|
340
|
-
<<<treetop-expressions>>
|
|
341
|
-
|
|
342
571
|
Parsanol supports treetop-style expression strings for quick grammar definition:
|
|
343
572
|
|
|
344
573
|
[source,ruby]
|
|
@@ -392,9 +621,8 @@ grammar = Parsanol::Native.serialize_grammar(atom)
|
|
|
392
621
|
Parsanol::Native.parse_to_ruby_objects(grammar, 'aaa')
|
|
393
622
|
----
|
|
394
623
|
|
|
624
|
+
[[security-features]]
|
|
395
625
|
== Security Features
|
|
396
|
-
<<<security-features>>
|
|
397
|
-
|
|
398
626
|
For parsing untrusted input, use built-in limits:
|
|
399
627
|
|
|
400
628
|
[source,ruby]
|
|
@@ -407,9 +635,8 @@ result = Parsanol::Native.parse_with_limits(
|
|
|
407
635
|
)
|
|
408
636
|
----
|
|
409
637
|
|
|
638
|
+
[[debug-tools]]
|
|
410
639
|
== Debug Tools
|
|
411
|
-
<<<debug-tools>>
|
|
412
|
-
|
|
413
640
|
Enable tracing for debugging grammars:
|
|
414
641
|
|
|
415
642
|
[source,ruby]
|
data/Rakefile
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'bundler/gem_tasks'
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
begin
|
|
6
|
+
require 'rspec/core/rake_task'
|
|
7
|
+
rescue LoadError
|
|
8
|
+
# RSpec not available in this environment
|
|
9
|
+
end
|
|
10
|
+
|
|
5
11
|
require 'rdoc/task'
|
|
6
12
|
require 'rubygems/package_task'
|
|
7
13
|
|
|
@@ -11,7 +17,7 @@ rescue LoadError, NoMethodError
|
|
|
11
17
|
# Opal not available or incompatible with current Ruby version
|
|
12
18
|
end
|
|
13
19
|
|
|
14
|
-
GEMSPEC = Gem::Specification.load('parsanol
|
|
20
|
+
GEMSPEC = Gem::Specification.load('parsanol.gemspec')
|
|
15
21
|
|
|
16
22
|
# Load rake tasks from rakelib/
|
|
17
23
|
Dir.glob('rakelib/*.rake').each { |r| load r }
|
|
@@ -60,7 +66,7 @@ namespace :gem do
|
|
|
60
66
|
|
|
61
67
|
desc 'Define the gem task to build on any platform (compile on install)'
|
|
62
68
|
task 'platform:any' do
|
|
63
|
-
spec = Gem::Specification.load('parsanol
|
|
69
|
+
spec = Gem::Specification.load('parsanol.gemspec').dup
|
|
64
70
|
task = Gem::PackageTask.new(spec)
|
|
65
71
|
task.define
|
|
66
72
|
end
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
#
|
|
3
3
|
# Rust extension for parsanol-ruby that provides:
|
|
4
4
|
# - Fast parsing with packrat memoization
|
|
5
|
-
# - Three
|
|
5
|
+
# - Three parse modes: ruby, native, json
|
|
6
|
+
# - ZeroCopy interface for maximum performance
|
|
6
7
|
# - Source location tracking
|
|
7
8
|
# - Streaming parsing
|
|
8
9
|
# - Incremental parsing for editor integration
|
|
@@ -11,7 +12,7 @@
|
|
|
11
12
|
|
|
12
13
|
[package]
|
|
13
14
|
name = "parsanol_native"
|
|
14
|
-
version = "1.0.
|
|
15
|
+
version = "1.0.2"
|
|
15
16
|
edition = "2021"
|
|
16
17
|
rust-version = "1.75"
|
|
17
18
|
|
|
@@ -13,6 +13,7 @@ module Parsanol
|
|
|
13
13
|
#
|
|
14
14
|
module Parser
|
|
15
15
|
# Two-level grammar cache (module-level for proper initialization)
|
|
16
|
+
# These MUST be mutable for caching to work
|
|
16
17
|
GRAMMAR_HASH_CACHE = Hash.new # object_id => hash_key
|
|
17
18
|
GRAMMAR_CACHE = Hash.new # hash_key => grammar_json
|
|
18
19
|
|
|
@@ -35,42 +36,50 @@ module Parsanol
|
|
|
35
36
|
# Parse using native engine
|
|
36
37
|
# @param grammar_json [String] JSON-serialized grammar
|
|
37
38
|
# @param input [String] Input string to parse
|
|
38
|
-
# @
|
|
39
|
-
|
|
39
|
+
# @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache for position info
|
|
40
|
+
# @return Ruby AST from parsing with Slice objects for strings
|
|
41
|
+
def parse(grammar_json, input, line_cache = nil)
|
|
40
42
|
raise LoadError, 'Native parser not available. Run `rake compile` to build.' unless available?
|
|
41
43
|
|
|
44
|
+
# Build line cache if not provided
|
|
45
|
+
line_cache ||= build_line_cache(input)
|
|
46
|
+
|
|
42
47
|
# Call native parse_batch (returns flat u64 array)
|
|
43
48
|
flat = Parsanol::Native.parse_batch(grammar_json, input)
|
|
44
|
-
# Decode flat array to Ruby AST
|
|
45
|
-
decode_flat(flat, input)
|
|
49
|
+
# Decode flat array to Ruby AST with Slice objects
|
|
50
|
+
decode_flat(flat, input, line_cache)
|
|
46
51
|
end
|
|
47
52
|
|
|
48
53
|
# Parse a grammar with automatic serialization and caching
|
|
49
54
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
50
55
|
# @param input [String] Input string to parse
|
|
51
|
-
# @
|
|
52
|
-
|
|
56
|
+
# @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache
|
|
57
|
+
# @return Ruby AST from parsing with Slice objects
|
|
58
|
+
def parse_with_grammar(root_atom, input, line_cache = nil)
|
|
53
59
|
# Extract root atom if a Parser is passed
|
|
54
60
|
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
55
61
|
grammar_json = serialize_grammar(root_atom)
|
|
56
|
-
parse(grammar_json, input)
|
|
62
|
+
parse(grammar_json, input, line_cache)
|
|
57
63
|
end
|
|
58
64
|
|
|
59
65
|
# Parse and transform to Parslet-compatible format
|
|
66
|
+
# NOTE: This method now returns Slice objects with position info by default.
|
|
67
|
+
# The name is kept for backward compatibility but it's now the primary parse method.
|
|
60
68
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
61
69
|
# @param input [String] Input string to parse
|
|
62
|
-
# @
|
|
63
|
-
|
|
70
|
+
# @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache
|
|
71
|
+
# @return Ruby AST in Parslet-compatible format with Slice objects
|
|
72
|
+
def parse_parslet_compatible(root_atom, input, line_cache = nil)
|
|
64
73
|
# Extract root atom if a Parser is passed
|
|
65
74
|
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
66
|
-
raw_ast = parse_with_grammar(root_atom, input)
|
|
75
|
+
raw_ast = parse_with_grammar(root_atom, input, line_cache)
|
|
67
76
|
AstTransformer.transform(raw_ast)
|
|
68
77
|
end
|
|
69
78
|
|
|
70
79
|
# Parse multiple inputs with the same grammar (more efficient)
|
|
71
80
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
72
81
|
# @param inputs [Array<String>] Array of input strings to parse
|
|
73
|
-
# @return [Array] Array of
|
|
82
|
+
# @return [Array] Array of Ruby ASTs with Slice objects
|
|
74
83
|
def parse_batch_inputs(root_atom, inputs)
|
|
75
84
|
# Extract root atom if a Parser is passed
|
|
76
85
|
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
@@ -81,7 +90,7 @@ module Parsanol
|
|
|
81
90
|
# Parse multiple inputs with transformation
|
|
82
91
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
83
92
|
# @param inputs [Array<String>] Array of input strings to parse
|
|
84
|
-
# @return [Array] Array of transformed Ruby ASTs
|
|
93
|
+
# @return [Array] Array of transformed Ruby ASTs with Slice objects
|
|
85
94
|
def parse_batch_with_transform(root_atom, inputs)
|
|
86
95
|
# Extract root atom if a Parser is passed
|
|
87
96
|
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
@@ -95,13 +104,22 @@ module Parsanol
|
|
|
95
104
|
# Parse without transformation (faster for raw AST access)
|
|
96
105
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
97
106
|
# @param input [String] Input string to parse
|
|
98
|
-
# @return Raw Ruby AST from parsing
|
|
107
|
+
# @return Raw Ruby AST from parsing with Slice objects
|
|
99
108
|
def parse_raw(root_atom, input)
|
|
100
109
|
# Extract root atom if a Parser is passed
|
|
101
110
|
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
102
111
|
parse_with_grammar(root_atom, input)
|
|
103
112
|
end
|
|
104
113
|
|
|
114
|
+
# Build a line cache for an input string
|
|
115
|
+
# @param input [String] The input string
|
|
116
|
+
# @return [Parsanol::Source::LineCache] The line cache
|
|
117
|
+
def build_line_cache(input)
|
|
118
|
+
cache = ::Parsanol::Source::LineCache.new
|
|
119
|
+
cache.scan_for_line_endings(0, input)
|
|
120
|
+
cache
|
|
121
|
+
end
|
|
122
|
+
|
|
105
123
|
# Serialize a grammar to JSON, with two-level caching
|
|
106
124
|
# Level 1: object_id => hash_key (avoids grammar traversal)
|
|
107
125
|
# Level 2: hash_key => grammar_json (avoids serialization)
|
|
@@ -471,13 +489,19 @@ module Parsanol
|
|
|
471
489
|
# 0x01 = bool
|
|
472
490
|
# 0x02 = int
|
|
473
491
|
# 0x03 = float
|
|
474
|
-
# 0x04 = string_ref (offset, length)
|
|
492
|
+
# 0x04 = string_ref (offset, length) - creates Slice with position info
|
|
475
493
|
# 0x05 = array_start
|
|
476
494
|
# 0x06 = array_end
|
|
477
495
|
# 0x07 = hash_start
|
|
478
496
|
# 0x08 = hash_end
|
|
479
497
|
# 0x09 = hash_key (tag, len, key_chunks..., value)
|
|
480
|
-
|
|
498
|
+
# 0x0A = inline_string (interned string from arena)
|
|
499
|
+
#
|
|
500
|
+
# @param flat [Array<Integer>] Flat u64 array from native parser
|
|
501
|
+
# @param input [String] Original input string
|
|
502
|
+
# @param line_cache [Parsanol::Source::LineCache, nil] Line cache for position info
|
|
503
|
+
# @return Ruby AST with Slice objects for all string values
|
|
504
|
+
def decode_flat(flat, input, line_cache = nil)
|
|
481
505
|
stack = []
|
|
482
506
|
i = 0
|
|
483
507
|
|
|
@@ -500,10 +524,12 @@ module Parsanol
|
|
|
500
524
|
float = [bits].pack('Q').unpack1('D')
|
|
501
525
|
stack << float
|
|
502
526
|
i += 2
|
|
503
|
-
when 0x04 # string_ref (from input)
|
|
527
|
+
when 0x04 # string_ref (from input) - create Slice with position info
|
|
504
528
|
offset = flat[i + 1]
|
|
505
529
|
length = flat[i + 2]
|
|
506
|
-
|
|
530
|
+
content = input.byteslice(offset, length)
|
|
531
|
+
# Create Slice with position info - this is the key change
|
|
532
|
+
stack << ::Parsanol::Slice.new(offset, content, line_cache)
|
|
507
533
|
i += 3
|
|
508
534
|
when 0x0A # inline_string (interned string from arena)
|
|
509
535
|
# Format: tag, len, u64 chunks of string bytes
|
|
@@ -523,7 +549,9 @@ module Parsanol
|
|
|
523
549
|
end
|
|
524
550
|
i += chunks
|
|
525
551
|
|
|
526
|
-
|
|
552
|
+
# Inline strings don't have source position, use Slice with offset 0
|
|
553
|
+
content = bytes.pack('C*').force_encoding('UTF-8')
|
|
554
|
+
stack << ::Parsanol::Slice.new(0, content, nil)
|
|
527
555
|
when 0x05 # array_start
|
|
528
556
|
stack << :array_marker
|
|
529
557
|
i += 1
|