parsanol 1.0.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +12 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +487 -0
  5. data/Rakefile +135 -0
  6. data/lib/parsanol/3.2/parsanol_native.so +0 -0
  7. data/lib/parsanol/3.3/parsanol_native.so +0 -0
  8. data/lib/parsanol/3.4/parsanol_native.so +0 -0
  9. data/lib/parsanol/4.0/parsanol_native.so +0 -0
  10. data/lib/parsanol/ast_visitor.rb +122 -0
  11. data/lib/parsanol/atoms/alternative.rb +122 -0
  12. data/lib/parsanol/atoms/base.rb +202 -0
  13. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  14. data/lib/parsanol/atoms/capture.rb +38 -0
  15. data/lib/parsanol/atoms/context.rb +334 -0
  16. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  17. data/lib/parsanol/atoms/custom.rb +110 -0
  18. data/lib/parsanol/atoms/cut.rb +66 -0
  19. data/lib/parsanol/atoms/dsl.rb +96 -0
  20. data/lib/parsanol/atoms/dynamic.rb +39 -0
  21. data/lib/parsanol/atoms/entity.rb +75 -0
  22. data/lib/parsanol/atoms/ignored.rb +37 -0
  23. data/lib/parsanol/atoms/infix.rb +162 -0
  24. data/lib/parsanol/atoms/lookahead.rb +82 -0
  25. data/lib/parsanol/atoms/named.rb +74 -0
  26. data/lib/parsanol/atoms/re.rb +83 -0
  27. data/lib/parsanol/atoms/repetition.rb +259 -0
  28. data/lib/parsanol/atoms/scope.rb +35 -0
  29. data/lib/parsanol/atoms/sequence.rb +194 -0
  30. data/lib/parsanol/atoms/str.rb +103 -0
  31. data/lib/parsanol/atoms/visitor.rb +91 -0
  32. data/lib/parsanol/atoms.rb +46 -0
  33. data/lib/parsanol/buffer.rb +133 -0
  34. data/lib/parsanol/builder_callbacks.rb +353 -0
  35. data/lib/parsanol/cause.rb +122 -0
  36. data/lib/parsanol/context.rb +39 -0
  37. data/lib/parsanol/convenience.rb +36 -0
  38. data/lib/parsanol/edit_tracker.rb +111 -0
  39. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  40. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  41. data/lib/parsanol/error_reporter/tree.rb +63 -0
  42. data/lib/parsanol/error_reporter.rb +100 -0
  43. data/lib/parsanol/expression/treetop.rb +154 -0
  44. data/lib/parsanol/expression.rb +106 -0
  45. data/lib/parsanol/fast_mode.rb +149 -0
  46. data/lib/parsanol/first_set.rb +79 -0
  47. data/lib/parsanol/grammar_builder.rb +177 -0
  48. data/lib/parsanol/incremental_parser.rb +177 -0
  49. data/lib/parsanol/interval_tree.rb +217 -0
  50. data/lib/parsanol/lazy_result.rb +179 -0
  51. data/lib/parsanol/lexer.rb +144 -0
  52. data/lib/parsanol/mermaid.rb +139 -0
  53. data/lib/parsanol/native/parser.rb +612 -0
  54. data/lib/parsanol/native/serializer.rb +248 -0
  55. data/lib/parsanol/native/transformer.rb +435 -0
  56. data/lib/parsanol/native/types.rb +42 -0
  57. data/lib/parsanol/native.rb +217 -0
  58. data/lib/parsanol/optimizer.rb +85 -0
  59. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  60. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  61. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  62. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  63. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  64. data/lib/parsanol/options/ruby_transform.rb +107 -0
  65. data/lib/parsanol/options/serialized.rb +94 -0
  66. data/lib/parsanol/options/zero_copy.rb +128 -0
  67. data/lib/parsanol/options.rb +20 -0
  68. data/lib/parsanol/parallel.rb +133 -0
  69. data/lib/parsanol/parser.rb +182 -0
  70. data/lib/parsanol/parslet.rb +151 -0
  71. data/lib/parsanol/pattern/binding.rb +91 -0
  72. data/lib/parsanol/pattern.rb +159 -0
  73. data/lib/parsanol/pool.rb +219 -0
  74. data/lib/parsanol/pools/array_pool.rb +75 -0
  75. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  76. data/lib/parsanol/pools/position_pool.rb +92 -0
  77. data/lib/parsanol/pools/slice_pool.rb +64 -0
  78. data/lib/parsanol/position.rb +94 -0
  79. data/lib/parsanol/resettable.rb +29 -0
  80. data/lib/parsanol/result.rb +46 -0
  81. data/lib/parsanol/result_builder.rb +208 -0
  82. data/lib/parsanol/result_stream.rb +261 -0
  83. data/lib/parsanol/rig/rspec.rb +71 -0
  84. data/lib/parsanol/rope.rb +81 -0
  85. data/lib/parsanol/scope.rb +104 -0
  86. data/lib/parsanol/slice.rb +146 -0
  87. data/lib/parsanol/source/line_cache.rb +109 -0
  88. data/lib/parsanol/source.rb +180 -0
  89. data/lib/parsanol/source_location.rb +167 -0
  90. data/lib/parsanol/streaming_parser.rb +124 -0
  91. data/lib/parsanol/string_view.rb +195 -0
  92. data/lib/parsanol/transform.rb +226 -0
  93. data/lib/parsanol/version.rb +5 -0
  94. data/lib/parsanol/wasm/README.md +80 -0
  95. data/lib/parsanol/wasm/package.json +51 -0
  96. data/lib/parsanol/wasm/parsanol.js +252 -0
  97. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  98. data/lib/parsanol/wasm_parser.rb +240 -0
  99. data/lib/parsanol.rb +280 -0
  100. data/parsanol-ruby.gemspec +67 -0
  101. metadata +280 -0
@@ -0,0 +1,612 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Parsanol
6
+ module Native
7
+ # Core parsing functionality using Rust native extension
8
+ #
9
+ # Provides three parsing modes:
10
+ # - :ruby - Parse and transform to Parslet-compatible format
11
+ # - :json - Parse and return JSON-serialized AST
12
+ # - :slice - Parse and return raw native format (fastest)
13
+ #
14
+ module Parser
15
+ # Two-level grammar cache (module-level for proper initialization)
16
+ GRAMMAR_HASH_CACHE = Hash.new # object_id => hash_key
17
+ GRAMMAR_CACHE = Hash.new # hash_key => grammar_json
18
+
19
+ class << self
20
+ # Cached availability check
21
+ @cached_available = nil
22
+
23
+ # Check if native extension is available
24
+ def available?
25
+ return @cached_available unless @cached_available.nil?
26
+
27
+ @cached_available = begin
28
+ require 'parsanol/parsanol_native'
29
+ Parsanol::Native.is_available
30
+ rescue LoadError
31
+ false
32
+ end
33
+ end
34
+
35
+ # Parse using native engine
36
+ # @param grammar_json [String] JSON-serialized grammar
37
+ # @param input [String] Input string to parse
38
+ # @return Ruby AST from parsing
39
+ def parse(grammar_json, input)
40
+ raise LoadError, 'Native parser not available. Run `rake compile` to build.' unless available?
41
+
42
+ # Call native parse_batch (returns flat u64 array)
43
+ flat = Parsanol::Native.parse_batch(grammar_json, input)
44
+ # Decode flat array to Ruby AST
45
+ decode_flat(flat, input)
46
+ end
47
+
48
+ # Parse a grammar with automatic serialization and caching
49
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
50
+ # @param input [String] Input string to parse
51
+ # @return Ruby AST from parsing
52
+ def parse_with_grammar(root_atom, input)
53
+ # Extract root atom if a Parser is passed
54
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
55
+ grammar_json = serialize_grammar(root_atom)
56
+ parse(grammar_json, input)
57
+ end
58
+
59
+ # Parse and transform to Parslet-compatible format
60
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
61
+ # @param input [String] Input string to parse
62
+ # @return Ruby AST in Parslet-compatible format
63
+ def parse_parslet_compatible(root_atom, input)
64
+ # Extract root atom if a Parser is passed
65
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
66
+ raw_ast = parse_with_grammar(root_atom, input)
67
+ AstTransformer.transform(raw_ast)
68
+ end
69
+
70
+ # Parse multiple inputs with the same grammar (more efficient)
71
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
72
+ # @param inputs [Array<String>] Array of input strings to parse
73
+ # @return [Array] Array of raw Ruby ASTs from parsing
74
+ def parse_batch_inputs(root_atom, inputs)
75
+ # Extract root atom if a Parser is passed
76
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
77
+ grammar_json = serialize_grammar(root_atom)
78
+ inputs.map { |input| parse(grammar_json, input) }
79
+ end
80
+
81
+ # Parse multiple inputs with transformation
82
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
83
+ # @param inputs [Array<String>] Array of input strings to parse
84
+ # @return [Array] Array of transformed Ruby ASTs
85
+ def parse_batch_with_transform(root_atom, inputs)
86
+ # Extract root atom if a Parser is passed
87
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
88
+ grammar_json = serialize_grammar(root_atom)
89
+ # First parse all inputs, then batch transform
90
+ # This provides better cache locality
91
+ raw_asts = inputs.map { |input| parse(grammar_json, input) }
92
+ AstTransformer.transform_batch(raw_asts)
93
+ end
94
+
95
+ # Parse without transformation (faster for raw AST access)
96
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
97
+ # @param input [String] Input string to parse
98
+ # @return Raw Ruby AST from parsing (native format)
99
+ def parse_raw(root_atom, input)
100
+ # Extract root atom if a Parser is passed
101
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
102
+ parse_with_grammar(root_atom, input)
103
+ end
104
+
105
+ # Serialize a grammar to JSON, with two-level caching
106
+ # Level 1: object_id => hash_key (avoids grammar traversal)
107
+ # Level 2: hash_key => grammar_json (avoids serialization)
108
+ # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
109
+ # @return [String] JSON string
110
+ def serialize_grammar(root_atom)
111
+ # Level 1: Check if we've already computed the hash for this object
112
+ obj_id = root_atom.object_id
113
+ cache_key = GRAMMAR_HASH_CACHE[obj_id]
114
+
115
+ if cache_key
116
+ # Fast path: already computed hash, check grammar cache
117
+ else
118
+ # Slow path: compute structural hash
119
+ cache_key = grammar_structure_hash(root_atom)
120
+ GRAMMAR_HASH_CACHE[obj_id] = cache_key
121
+ end
122
+ GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
123
+ end
124
+
125
+ # Clear grammar caches (call if grammar changes)
126
+ def clear_cache
127
+ GRAMMAR_HASH_CACHE.clear
128
+ GRAMMAR_CACHE.clear
129
+ end
130
+
131
+ # Get cache statistics
132
+ def cache_stats
133
+ {
134
+ hash_cache_size: GRAMMAR_HASH_CACHE.size,
135
+ grammar_cache_size: GRAMMAR_CACHE.size,
136
+ grammar_keys: GRAMMAR_CACHE.keys
137
+ }
138
+ end
139
+
140
+ # ===== Serialized Mode (JSON Output) =====
141
+
142
+ # Parse input and return JSON string
143
+ # Uses native parsing and serializes the result to JSON
144
+ #
145
+ # @param grammar_json [String] JSON-serialized grammar
146
+ # @param input [String] Input string to parse
147
+ # @return [String] JSON string representing the result
148
+ def parse_to_json(grammar_json, input)
149
+ unless available?
150
+ raise LoadError,
151
+ "Serialized mode requires native extension. " \
152
+ "Run `rake compile` to build the extension."
153
+ end
154
+
155
+ # Parse using native engine and convert result to JSON
156
+ result = parse(grammar_json, input)
157
+ result.to_json
158
+ end
159
+
160
+ # Parse and return direct Ruby objects via FFI
161
+ # Uses ZeroCopy mode - Rust constructs Ruby objects directly via magnus FFI
162
+ # This bypasses the u64 serialization step for maximum performance.
163
+ #
164
+ # Slice information is preserved: InputRef nodes from Rust are returned
165
+ # directly as Parsanol::Slice objects (no intermediate hash conversion needed).
166
+ #
167
+ # @param grammar_json [String] JSON-serialized grammar
168
+ # @param input [String] Input string to parse
169
+ # @param type_map [Hash] Mapping of rule names to Ruby classes (not used in this mode)
170
+ # @return [Object] Direct Ruby object (type depends on grammar)
171
+ def parse_to_objects(grammar_json, input, _type_map = nil)
172
+ unless available?
173
+ raise LoadError,
174
+ "ZeroCopy mode requires native extension. " \
175
+ "Run `rake compile` to build the extension."
176
+ end
177
+
178
+ # Call Rust function that returns Slice objects directly
179
+ # No need to convert - they are already Parsanol::Slice objects
180
+ Parsanol::Native.parse_to_ruby_objects(grammar_json, input)
181
+ end
182
+
183
+ # Recursively convert slice hashes to Parsanol::Slice objects
184
+ # Rust returns { "_slice" => true, "str" => "...", "offset" => N, "length" => N }
185
+ # for InputRef nodes, which we convert to Slice objects preserving position info.
186
+ #
187
+ # @param obj [Object] The object to convert (may be Hash, Array, or leaf value)
188
+ # @param input [String] The original input string (for Slice source reference)
189
+ # @return [Object] The converted object with Slice objects in place of slice hashes
190
+ def convert_slices(obj, input)
191
+ case obj
192
+ when Hash
193
+ # Check if this is a slice marker from Rust
194
+ if obj['_slice'] == true
195
+ Parsanol::Slice.new(obj['offset'], obj['str'])
196
+ else
197
+ # Recursively convert hash values
198
+ obj.transform_values { |v| convert_slices(v, input) }
199
+ end
200
+ when Array
201
+ # Recursively convert array elements
202
+ obj.map { |item| convert_slices(item, input) }
203
+ else
204
+ # Leaf values (strings, integers, etc.) are returned as-is
205
+ obj
206
+ end
207
+ end
208
+
209
+ # ===== Source Location Tracking =====
210
+
211
+ # Parse with source location tracking
212
+ # Returns both the AST and a hash of spans
213
+ #
214
+ # @param grammar_json [String] JSON-serialized grammar
215
+ # @param input [String] Input string to parse
216
+ # @return [Array<(Object, Hash)>] Tuple of [parsed_result, spans_hash]
217
+ def parse_with_spans(grammar_json, input)
218
+ unless available?
219
+ raise LoadError,
220
+ "Source location tracking requires native extension. " \
221
+ "Run `rake compile` to build the extension."
222
+ end
223
+
224
+ _parse_with_spans(grammar_json, input)
225
+ end
226
+
227
+ # Get span for a specific node
228
+ #
229
+ # @param result [Object] Parse result from parse_with_spans
230
+ # @param node_id [Integer] Node identifier
231
+ # @return [Hash] Span information {start: {offset, line, column}, end: {...}}
232
+ def get_span(result, node_id)
233
+ raise LoadError, 'Source location tracking requires native extension.' unless available?
234
+
235
+ _get_span(result, node_id)
236
+ end
237
+
238
+ # ===== Grammar Composition =====
239
+
240
+ # Import another grammar with optional prefix
241
+ #
242
+ # @param builder_json [String] GrammarBuilder JSON
243
+ # @param grammar_json [String] Grammar to import
244
+ # @param prefix [String, nil] Optional prefix for imported rules
245
+ # @return [String] Updated GrammarBuilder JSON
246
+ def grammar_import(builder_json, grammar_json, prefix = nil)
247
+ raise LoadError, 'Grammar composition requires native extension.' unless available?
248
+
249
+ _grammar_import(builder_json, grammar_json, prefix)
250
+ end
251
+
252
+ # Get mutable reference to a rule
253
+ #
254
+ # @param builder_json [String] GrammarBuilder JSON
255
+ # @param rule_name [String] Name of the rule to modify
256
+ # @return [String] Updated GrammarBuilder JSON
257
+ def grammar_rule_mut(builder_json, rule_name)
258
+ raise LoadError, 'Grammar composition requires native extension.' unless available?
259
+
260
+ _grammar_rule_mut(builder_json, rule_name)
261
+ end
262
+
263
+ # ===== Streaming Parser =====
264
+
265
+ # Create a new streaming parser
266
+ #
267
+ # @param grammar_json [String] JSON-serialized grammar
268
+ # @return [Object] Streaming parser instance
269
+ def streaming_parser_new(grammar_json)
270
+ raise LoadError, 'Streaming parser requires native extension.' unless available?
271
+
272
+ _streaming_parser_new(grammar_json)
273
+ end
274
+
275
+ # Add a chunk to the streaming parser
276
+ #
277
+ # @param parser [Object] Streaming parser instance
278
+ # @param chunk [String] Input chunk to add
279
+ # @return [Boolean] True if more chunks needed, false if ready
280
+ def streaming_parser_add_chunk(parser, chunk)
281
+ raise LoadError, 'Streaming parser requires native extension.' unless available?
282
+
283
+ _streaming_parser_add_chunk(parser, chunk)
284
+ end
285
+
286
+ # Parse what we have so far
287
+ #
288
+ # @param parser [Object] Streaming parser instance
289
+ # @return [Object, nil] Parsed result or nil if need more data
290
+ def streaming_parser_parse_chunk(parser)
291
+ raise LoadError, 'Streaming parser requires native extension.' unless available?
292
+
293
+ _streaming_parser_parse_chunk(parser)
294
+ end
295
+
296
+ # ===== Incremental Parser =====
297
+
298
+ # Create a new incremental parser
299
+ #
300
+ # @param grammar_json [String] JSON-serialized grammar
301
+ # @param initial_input [String] Initial input string
302
+ # @return [Object] Incremental parser instance
303
+ def incremental_parser_new(grammar_json, initial_input)
304
+ raise LoadError, 'Incremental parser requires native extension.' unless available?
305
+
306
+ _incremental_parser_new(grammar_json, initial_input)
307
+ end
308
+
309
+ # Apply an edit to the incremental parser
310
+ #
311
+ # @param parser [Object] Incremental parser instance
312
+ # @param start [Integer] Start position of edit
313
+ # @param deleted [Integer] Number of characters deleted
314
+ # @param inserted [String] Text to insert
315
+ # @return [Object] Updated parser state
316
+ def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
317
+ raise LoadError, 'Incremental parser requires native extension.' unless available?
318
+
319
+ _incremental_parser_apply_edit(parser, start, deleted, inserted)
320
+ end
321
+
322
+ # Reparse with changes
323
+ #
324
+ # @param parser [Object] Incremental parser instance
325
+ # @param new_input [String, nil] Optional new input (if not using apply_edit)
326
+ # @return [Object] Parse result
327
+ def incremental_parser_reparse(parser, new_input = nil)
328
+ raise LoadError, 'Incremental parser requires native extension.' unless available?
329
+
330
+ _incremental_parser_reparse(parser, new_input)
331
+ end
332
+
333
+ # ===== Streaming Builder =====
334
+
335
+ # Parse with a streaming builder for maximum performance.
336
+ # The builder receives callbacks as parsing progresses, eliminating
337
+ # intermediate AST construction.
338
+ #
339
+ # @param grammar_json [String] JSON-serialized grammar
340
+ # @param input [String] Input string to parse
341
+ # @param builder [Object] Object including BuilderCallbacks module
342
+ # @return [Object] Result of builder.finish
343
+ def parse_with_builder(grammar_json, input, builder)
344
+ unless available?
345
+ raise LoadError,
346
+ "Streaming builder requires native extension. " \
347
+ "Run `rake compile` to build the extension."
348
+ end
349
+
350
+ _parse_with_builder(grammar_json, input, builder)
351
+ end
352
+
353
+ # ===== Parallel Parsing =====
354
+
355
+ # Parse multiple inputs in parallel using rayon.
356
+ # Provides linear speedup on multi-core systems.
357
+ #
358
+ # @param grammar_json [String] JSON-serialized grammar
359
+ # @param inputs [Array<String>] Array of input strings to parse
360
+ # @param num_threads [Integer, nil] Number of threads (nil = auto-detect)
361
+ # @return [Array<Object>] Array of parse results in same order as inputs
362
+ def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
363
+ unless available?
364
+ raise LoadError,
365
+ "Parallel parsing requires native extension. " \
366
+ "Run `rake compile` to build the extension."
367
+ end
368
+
369
+ _parse_batch_parallel(grammar_json, inputs, num_threads)
370
+ end
371
+
372
+ # ===== Security / Limits =====
373
+
374
+ # Parse with custom limits for untrusted input.
375
+ #
376
+ # @param grammar_json [String] JSON-serialized grammar
377
+ # @param input [String] Input string to parse
378
+ # @param max_input_size [Integer] Maximum input size in bytes (default: 100MB)
379
+ # @param max_recursion_depth [Integer] Maximum recursion depth (default: 1000)
380
+ # @return [Object] Parse result
381
+ def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
382
+ unless available?
383
+ raise LoadError,
384
+ "Security limits require native extension. " \
385
+ "Run `rake compile` to build the extension."
386
+ end
387
+
388
+ _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
389
+ end
390
+
391
+ # ===== Debug Tools =====
392
+
393
+ # Parse with tracing enabled for debugging.
394
+ #
395
+ # @param grammar_json [String] JSON-serialized grammar
396
+ # @param input [String] Input string to parse
397
+ # @return [Array<(Object, Array)>] Tuple of [parse_result, trace_events]
398
+ def parse_with_trace(grammar_json, input)
399
+ unless available?
400
+ raise LoadError,
401
+ "Debug tracing requires native extension. " \
402
+ "Run `rake compile` to build the extension."
403
+ end
404
+
405
+ _parse_with_trace(grammar_json, input)
406
+ end
407
+
408
+ # Generate Mermaid diagram for a grammar.
409
+ #
410
+ # @param grammar_json [String] JSON-serialized grammar
411
+ # @return [String] Mermaid diagram source
412
+ def grammar_to_mermaid(grammar_json)
413
+ unless available?
414
+ raise LoadError,
415
+ "Grammar visualization requires native extension. " \
416
+ "Run `rake compile` to build the extension."
417
+ end
418
+
419
+ _grammar_to_mermaid(grammar_json)
420
+ end
421
+
422
+ # Generate GraphViz DOT diagram for a grammar.
423
+ #
424
+ # @param grammar_json [String] JSON-serialized grammar
425
+ # @return [String] GraphViz DOT source
426
+ def grammar_to_dot(grammar_json)
427
+ unless available?
428
+ raise LoadError,
429
+ "Grammar visualization requires native extension. " \
430
+ "Run `rake compile` to build the extension."
431
+ end
432
+
433
+ _grammar_to_dot(grammar_json)
434
+ end
435
+
436
+ private
437
+
438
+ def _incremental_parser_reparse(parser, new_input)
439
+ raise NotImplementedError, 'Native extension method not available'
440
+ end
441
+
442
+ def _parse_with_builder(grammar_json, input, builder)
443
+ # Call native Rust function directly - parse_with_builder is exposed
444
+ # from the native extension as a Ruby function
445
+ Parsanol::Native.parse_with_builder(grammar_json, input, builder)
446
+ end
447
+
448
+ def _parse_batch_parallel(grammar_json, inputs, num_threads)
449
+ raise NotImplementedError, 'Native extension method not available'
450
+ end
451
+
452
+ def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
453
+ raise NotImplementedError, 'Native extension method not available'
454
+ end
455
+
456
+ def _parse_with_trace(grammar_json, input)
457
+ raise NotImplementedError, 'Native extension method not available'
458
+ end
459
+
460
+ def _grammar_to_mermaid(grammar_json)
461
+ raise NotImplementedError, 'Native extension method not available'
462
+ end
463
+
464
+ def _grammar_to_dot(grammar_json)
465
+ raise NotImplementedError, 'Native extension method not available'
466
+ end
467
+
468
+ # Decode flat u64 array to Ruby AST
469
+ # Tags:
470
+ # 0x00 = nil
471
+ # 0x01 = bool
472
+ # 0x02 = int
473
+ # 0x03 = float
474
+ # 0x04 = string_ref (offset, length)
475
+ # 0x05 = array_start
476
+ # 0x06 = array_end
477
+ # 0x07 = hash_start
478
+ # 0x08 = hash_end
479
+ # 0x09 = hash_key (tag, len, key_chunks..., value)
480
+ def decode_flat(flat, input)
481
+ stack = []
482
+ i = 0
483
+
484
+ while i < flat.length
485
+ tag = flat[i]
486
+
487
+ case tag
488
+ when 0x00 # nil
489
+ stack << nil
490
+ i += 1
491
+ when 0x01 # bool
492
+ stack << (flat[i + 1] != 0)
493
+ i += 2
494
+ when 0x02 # int
495
+ stack << flat[i + 1]
496
+ i += 2
497
+ when 0x03 # float
498
+ # Decode IEEE 754 float from bits
499
+ bits = flat[i + 1]
500
+ float = [bits].pack('Q').unpack1('D')
501
+ stack << float
502
+ i += 2
503
+ when 0x04 # string_ref (from input)
504
+ offset = flat[i + 1]
505
+ length = flat[i + 2]
506
+ stack << input.byteslice(offset, length)
507
+ i += 3
508
+ when 0x0A # inline_string (interned string from arena)
509
+ # Format: tag, len, u64 chunks of string bytes
510
+ len = flat[i + 1]
511
+ i += 2
512
+
513
+ # Read string bytes from u64 chunks
514
+ chunks = (len + 7) / 8
515
+ bytes = []
516
+ chunks.times do |j|
517
+ chunk = flat[i + j]
518
+ 8.times do |k|
519
+ break if bytes.length >= len
520
+
521
+ bytes << ((chunk >> (k * 8)) & 0xff)
522
+ end
523
+ end
524
+ i += chunks
525
+
526
+ stack << bytes.pack('C*').force_encoding('UTF-8')
527
+ when 0x05 # array_start
528
+ stack << :array_marker
529
+ i += 1
530
+ when 0x06 # array_end
531
+ items = []
532
+ items.unshift(stack.pop) until stack.last == :array_marker
533
+ stack.pop # Remove marker
534
+ stack << items
535
+ i += 1
536
+ when 0x07 # hash_start
537
+ stack << :hash_marker
538
+ i += 1
539
+ when 0x08 # hash_end
540
+ pairs = []
541
+ while stack.last != :hash_marker
542
+ value = stack.pop
543
+ key = stack.pop
544
+ pairs.unshift([key, value])
545
+ end
546
+ stack.pop # Remove marker
547
+ stack << pairs.to_h
548
+ i += 1
549
+ when 0x09 # hash_key
550
+ # Format: tag, len, key_chunks..., then value
551
+ len = flat[i + 1]
552
+ i += 2 # Skip tag and len
553
+
554
+ # Read key bytes from u64 chunks
555
+ chunks = (len + 7) / 8
556
+ key_bytes = []
557
+ chunks.times do |j|
558
+ chunk = flat[i + j]
559
+ 8.times do |k|
560
+ break if key_bytes.length >= len
561
+
562
+ key_bytes << ((chunk >> (k * 8)) & 0xff)
563
+ end
564
+ end
565
+ i += chunks
566
+
567
+ key = key_bytes.pack('C*').force_encoding('UTF-8')
568
+ stack << key
569
+ else
570
+ raise "Unknown tag: #{tag} at index #{i}"
571
+ end
572
+ end
573
+
574
+ stack.first
575
+ end
576
+
577
+ # Compute structural hash of a grammar atom
578
+ # This returns the same hash for grammars with the same structure
579
+ # regardless of whether they are different object instances
580
+ def grammar_structure_hash(atom)
581
+ structure = atom_structure(atom)
582
+ Digest::MD5.hexdigest(structure.to_s)
583
+ end
584
+
585
+ # Recursively build structure representation for hashing
586
+ def atom_structure(atom)
587
+ case atom
588
+ when ::Parsanol::Atoms::Str
589
+ [:str, atom.str]
590
+ when ::Parsanol::Atoms::Re
591
+ [:re, atom.match]
592
+ when ::Parsanol::Atoms::Sequence
593
+ [:seq, atom.parslets.map { |p| atom_structure(p) }]
594
+ when ::Parsanol::Atoms::Alternative
595
+ [:alt, atom.alternatives.map { |p| atom_structure(p) }]
596
+ when ::Parsanol::Atoms::Repetition
597
+ [:rep, atom.min, atom.max, atom_structure(atom.parslet)]
598
+ when ::Parsanol::Atoms::Named
599
+ [:named, atom.name.to_s, atom_structure(atom.parslet)]
600
+ when ::Parsanol::Atoms::Lookahead
601
+ [:lookahead, atom.positive, atom_structure(atom.bound_parslet)]
602
+ when ::Parsanol::Atoms::Entity
603
+ # Entity is a lazy reference - use its name for hashing
604
+ [:entity, atom.name.to_s]
605
+ else
606
+ [:unknown, atom.class.name]
607
+ end
608
+ end
609
+ end
610
+ end
611
+ end
612
+ end