parsanol 1.2.2-aarch64-linux → 1.3.2-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,23 +5,13 @@ require 'digest'
5
5
  module Parsanol
6
6
  module Native
7
7
  # Core parsing functionality using Rust native extension
8
- #
9
- # Provides three parsing modes:
10
- # - :ruby - Parse and transform to Parslet-compatible format
11
- # - :json - Parse and return JSON-serialized AST
12
- # - :slice - Parse and return raw native format (fastest)
13
- #
14
8
  module Parser
15
- # Two-level grammar cache (module-level for proper initialization)
16
- # These MUST be mutable for caching to work
17
- GRAMMAR_HASH_CACHE = Hash.new # object_id => hash_key
18
- GRAMMAR_CACHE = Hash.new # hash_key => grammar_json
9
+ GRAMMAR_HASH_CACHE = Hash.new
10
+ GRAMMAR_CACHE = Hash.new
19
11
 
20
12
  class << self
21
- # Cached availability check
22
13
  @cached_available = nil
23
14
 
24
- # Check if native extension is available
25
15
  def available?
26
16
  return @cached_available unless @cached_available.nil?
27
17
 
@@ -33,603 +23,67 @@ module Parsanol
33
23
  end
34
24
  end
35
25
 
36
- # Parse using native engine
37
- # @param grammar_json [String] JSON-serialized grammar
38
- # @param input [String] Input string to parse
39
- # @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache for position info
40
- # @return Ruby AST from parsing with Slice objects for strings
41
- def parse(grammar_json, input, line_cache = nil)
42
- raise LoadError, 'Native parser not available. Run `rake compile` to build.' unless available?
43
-
44
- # Build line cache if not provided
45
- line_cache ||= build_line_cache(input)
46
-
47
- # Call native parse_batch (returns flat u64 array)
48
- flat = Parsanol::Native.parse_batch(grammar_json, input)
49
- # Decode flat array to Ruby AST with Slice objects
50
- decode_flat(flat, input, line_cache)
51
- end
52
-
53
- # Parse a grammar with automatic serialization and caching
54
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
55
- # @param input [String] Input string to parse
56
- # @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache
57
- # @return Ruby AST from parsing with Slice objects
58
- def parse_with_grammar(root_atom, input, line_cache = nil)
59
- # Extract root atom if a Parser is passed
60
- root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
61
- grammar_json = serialize_grammar(root_atom)
62
- parse(grammar_json, input, line_cache)
63
- end
64
-
65
- # Parse and transform to Parslet-compatible format
66
- # NOTE: This method now returns Slice objects with position info by default.
67
- # The name is kept for backward compatibility but it's now the primary parse method.
68
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
69
- # @param input [String] Input string to parse
70
- # @param line_cache [Parsanol::Source::LineCache, nil] Optional line cache
71
- # @return Ruby AST in Parslet-compatible format with Slice objects
72
- def parse_parslet_compatible(root_atom, input, line_cache = nil)
73
- # Extract root atom if a Parser is passed
74
- root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
75
- raw_ast = parse_with_grammar(root_atom, input, line_cache)
76
- AstTransformer.transform(raw_ast)
77
- end
78
-
79
- # Parse multiple inputs with the same grammar (more efficient)
80
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
81
- # @param inputs [Array<String>] Array of input strings to parse
82
- # @return [Array] Array of Ruby ASTs with Slice objects
83
- def parse_batch_inputs(root_atom, inputs)
84
- # Extract root atom if a Parser is passed
85
- root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
86
- grammar_json = serialize_grammar(root_atom)
87
- inputs.map { |input| parse(grammar_json, input) }
88
- end
89
-
90
- # Parse multiple inputs with transformation
91
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
92
- # @param inputs [Array<String>] Array of input strings to parse
93
- # @return [Array] Array of transformed Ruby ASTs with Slice objects
94
- def parse_batch_with_transform(root_atom, inputs)
95
- # Extract root atom if a Parser is passed
96
- root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
97
- grammar_json = serialize_grammar(root_atom)
98
- # First parse all inputs, then batch transform
99
- # This provides better cache locality
100
- raw_asts = inputs.map { |input| parse(grammar_json, input) }
101
- AstTransformer.transform_batch(raw_asts)
102
- end
103
-
104
- # Parse without transformation (faster for raw AST access)
105
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
26
+ # Parse input with a Ruby grammar, returning clean AST.
27
+ #
28
+ # @param grammar [Parsanol::Atoms::Base] Ruby grammar or JSON string
106
29
  # @param input [String] Input string to parse
107
- # @return Raw Ruby AST from parsing with Slice objects
108
- def parse_raw(root_atom, input)
109
- # Extract root atom if a Parser is passed
110
- root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
111
- parse_with_grammar(root_atom, input)
112
- end
113
-
114
- # Build a line cache for an input string
115
- # @param input [String] The input string
116
- # @return [Parsanol::Source::LineCache] The line cache
117
- def build_line_cache(input)
118
- cache = ::Parsanol::Source::LineCache.new
119
- cache.scan_for_line_endings(0, input)
120
- cache
30
+ def parse(grammar, input)
31
+ # Delegate to Parsanol::Native.parse for consistency
32
+ Parsanol::Native.parse(grammar, input)
121
33
  end
122
34
 
123
- # Serialize a grammar to JSON, with two-level caching
124
- # Level 1: object_id => hash_key (avoids grammar traversal)
125
- # Level 2: hash_key => grammar_json (avoids serialization)
126
- # @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
127
- # @return [String] JSON string
35
+ # Serialize a Ruby grammar to JSON (cached).
128
36
  def serialize_grammar(root_atom)
129
- # Level 1: Check if we've already computed the hash for this object
37
+ root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
130
38
  obj_id = root_atom.object_id
131
- cache_key = GRAMMAR_HASH_CACHE[obj_id]
132
-
133
- if cache_key
134
- # Fast path: already computed hash, check grammar cache
135
- else
136
- # Slow path: compute structural hash
137
- cache_key = grammar_structure_hash(root_atom)
138
- GRAMMAR_HASH_CACHE[obj_id] = cache_key
139
- end
39
+ cache_key = GRAMMAR_HASH_CACHE[obj_id] ||= grammar_structure_hash(root_atom)
140
40
  GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
141
41
  end
142
42
 
143
- # Clear grammar caches (call if grammar changes)
144
43
  def clear_cache
145
44
  GRAMMAR_HASH_CACHE.clear
146
45
  GRAMMAR_CACHE.clear
147
46
  end
148
47
 
149
- # Get cache statistics
150
48
  def cache_stats
151
49
  {
152
50
  hash_cache_size: GRAMMAR_HASH_CACHE.size,
153
- grammar_cache_size: GRAMMAR_CACHE.size,
154
- grammar_keys: GRAMMAR_CACHE.keys
51
+ grammar_cache_size: GRAMMAR_CACHE.size
155
52
  }
156
53
  end
157
54
 
158
- # ===== Serialized Mode (JSON Output) =====
159
-
160
- # Parse input and return JSON string
161
- # Uses native parsing and serializes the result to JSON
162
- #
163
- # @param grammar_json [String] JSON-serialized grammar
164
- # @param input [String] Input string to parse
165
- # @return [String] JSON string representing the result
166
- def parse_to_json(grammar_json, input)
167
- unless available?
168
- raise LoadError,
169
- "Serialized mode requires native extension. " \
170
- "Run `rake compile` to build the extension."
171
- end
172
-
173
- # Parse using native engine and convert result to JSON
174
- result = parse(grammar_json, input)
175
- result.to_json
176
- end
177
-
178
- # Parse and return direct Ruby objects via FFI
179
- # Uses ZeroCopy mode - Rust constructs Ruby objects directly via magnus FFI
180
- # This bypasses the u64 serialization step for maximum performance.
181
- #
182
- # Slice information is preserved: InputRef nodes from Rust are returned
183
- # directly as Parsanol::Slice objects (no intermediate hash conversion needed).
184
- #
185
- # @param grammar_json [String] JSON-serialized grammar
186
- # @param input [String] Input string to parse
187
- # @param type_map [Hash] Mapping of rule names to Ruby classes (not used in this mode)
188
- # @return [Object] Direct Ruby object (type depends on grammar)
189
- def parse_to_objects(grammar_json, input, _type_map = nil)
190
- unless available?
191
- raise LoadError,
192
- "ZeroCopy mode requires native extension. " \
193
- "Run `rake compile` to build the extension."
194
- end
195
-
196
- # Call Rust function that returns Slice objects directly
197
- # No need to convert - they are already Parsanol::Slice objects
198
- Parsanol::Native.parse_to_ruby_objects(grammar_json, input)
199
- end
200
-
201
- # Recursively convert slice hashes to Parsanol::Slice objects
202
- # Rust returns { "_slice" => true, "str" => "...", "offset" => N, "length" => N }
203
- # for InputRef nodes, which we convert to Slice objects preserving position info.
204
- #
205
- # @param obj [Object] The object to convert (may be Hash, Array, or leaf value)
206
- # @param input [String] The original input string (for Slice source reference)
207
- # @return [Object] The converted object with Slice objects in place of slice hashes
208
- def convert_slices(obj, input)
209
- case obj
210
- when Hash
211
- # Check if this is a slice marker from Rust
212
- if obj['_slice'] == true
213
- Parsanol::Slice.new(obj['offset'], obj['str'])
214
- else
215
- # Recursively convert hash values
216
- obj.transform_values { |v| convert_slices(v, input) }
217
- end
218
- when Array
219
- # Recursively convert array elements
220
- obj.map { |item| convert_slices(item, input) }
221
- else
222
- # Leaf values (strings, integers, etc.) are returned as-is
223
- obj
224
- end
225
- end
226
-
227
- # ===== Source Location Tracking =====
228
-
229
- # Parse with source location tracking
230
- # Returns both the AST and a hash of spans
231
- #
232
- # @param grammar_json [String] JSON-serialized grammar
233
- # @param input [String] Input string to parse
234
- # @return [Array<(Object, Hash)>] Tuple of [parsed_result, spans_hash]
235
- def parse_with_spans(grammar_json, input)
236
- unless available?
237
- raise LoadError,
238
- "Source location tracking requires native extension. " \
239
- "Run `rake compile` to build the extension."
240
- end
241
-
242
- _parse_with_spans(grammar_json, input)
243
- end
244
-
245
- # Get span for a specific node
246
- #
247
- # @param result [Object] Parse result from parse_with_spans
248
- # @param node_id [Integer] Node identifier
249
- # @return [Hash] Span information {start: {offset, line, column}, end: {...}}
250
- def get_span(result, node_id)
251
- raise LoadError, 'Source location tracking requires native extension.' unless available?
252
-
253
- _get_span(result, node_id)
254
- end
255
-
256
- # ===== Grammar Composition =====
257
-
258
- # Import another grammar with optional prefix
259
- #
260
- # @param builder_json [String] GrammarBuilder JSON
261
- # @param grammar_json [String] Grammar to import
262
- # @param prefix [String, nil] Optional prefix for imported rules
263
- # @return [String] Updated GrammarBuilder JSON
264
- def grammar_import(builder_json, grammar_json, prefix = nil)
265
- raise LoadError, 'Grammar composition requires native extension.' unless available?
266
-
267
- _grammar_import(builder_json, grammar_json, prefix)
268
- end
269
-
270
- # Get mutable reference to a rule
271
- #
272
- # @param builder_json [String] GrammarBuilder JSON
273
- # @param rule_name [String] Name of the rule to modify
274
- # @return [String] Updated GrammarBuilder JSON
275
- def grammar_rule_mut(builder_json, rule_name)
276
- raise LoadError, 'Grammar composition requires native extension.' unless available?
277
-
278
- _grammar_rule_mut(builder_json, rule_name)
279
- end
280
-
281
- # ===== Streaming Parser =====
282
-
283
- # Create a new streaming parser
284
- #
285
- # @param grammar_json [String] JSON-serialized grammar
286
- # @return [Object] Streaming parser instance
287
- def streaming_parser_new(grammar_json)
288
- raise LoadError, 'Streaming parser requires native extension.' unless available?
289
-
290
- _streaming_parser_new(grammar_json)
291
- end
292
-
293
- # Add a chunk to the streaming parser
294
- #
295
- # @param parser [Object] Streaming parser instance
296
- # @param chunk [String] Input chunk to add
297
- # @return [Boolean] True if more chunks needed, false if ready
298
- def streaming_parser_add_chunk(parser, chunk)
299
- raise LoadError, 'Streaming parser requires native extension.' unless available?
300
-
301
- _streaming_parser_add_chunk(parser, chunk)
302
- end
303
-
304
- # Parse what we have so far
305
- #
306
- # @param parser [Object] Streaming parser instance
307
- # @return [Object, nil] Parsed result or nil if need more data
308
- def streaming_parser_parse_chunk(parser)
309
- raise LoadError, 'Streaming parser requires native extension.' unless available?
310
-
311
- _streaming_parser_parse_chunk(parser)
312
- end
313
-
314
- # ===== Incremental Parser =====
315
-
316
- # Create a new incremental parser
317
- #
318
- # @param grammar_json [String] JSON-serialized grammar
319
- # @param initial_input [String] Initial input string
320
- # @return [Object] Incremental parser instance
321
- def incremental_parser_new(grammar_json, initial_input)
322
- raise LoadError, 'Incremental parser requires native extension.' unless available?
323
-
324
- _incremental_parser_new(grammar_json, initial_input)
325
- end
326
-
327
- # Apply an edit to the incremental parser
328
- #
329
- # @param parser [Object] Incremental parser instance
330
- # @param start [Integer] Start position of edit
331
- # @param deleted [Integer] Number of characters deleted
332
- # @param inserted [String] Text to insert
333
- # @return [Object] Updated parser state
334
- def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
335
- raise LoadError, 'Incremental parser requires native extension.' unless available?
336
-
337
- _incremental_parser_apply_edit(parser, start, deleted, inserted)
338
- end
339
-
340
- # Reparse with changes
341
- #
342
- # @param parser [Object] Incremental parser instance
343
- # @param new_input [String, nil] Optional new input (if not using apply_edit)
344
- # @return [Object] Parse result
345
- def incremental_parser_reparse(parser, new_input = nil)
346
- raise LoadError, 'Incremental parser requires native extension.' unless available?
347
-
348
- _incremental_parser_reparse(parser, new_input)
349
- end
350
-
351
- # ===== Streaming Builder =====
352
-
353
- # Parse with a streaming builder for maximum performance.
354
- # The builder receives callbacks as parsing progresses, eliminating
355
- # intermediate AST construction.
356
- #
357
- # @param grammar_json [String] JSON-serialized grammar
358
- # @param input [String] Input string to parse
359
- # @param builder [Object] Object including BuilderCallbacks module
360
- # @return [Object] Result of builder.finish
361
- def parse_with_builder(grammar_json, input, builder)
362
- unless available?
363
- raise LoadError,
364
- "Streaming builder requires native extension. " \
365
- "Run `rake compile` to build the extension."
366
- end
367
-
368
- _parse_with_builder(grammar_json, input, builder)
369
- end
370
-
371
- # ===== Parallel Parsing =====
372
-
373
- # Parse multiple inputs in parallel using rayon.
374
- # Provides linear speedup on multi-core systems.
375
- #
376
- # @param grammar_json [String] JSON-serialized grammar
377
- # @param inputs [Array<String>] Array of input strings to parse
378
- # @param num_threads [Integer, nil] Number of threads (nil = auto-detect)
379
- # @return [Array<Object>] Array of parse results in same order as inputs
380
- def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
381
- unless available?
382
- raise LoadError,
383
- "Parallel parsing requires native extension. " \
384
- "Run `rake compile` to build the extension."
385
- end
386
-
387
- _parse_batch_parallel(grammar_json, inputs, num_threads)
388
- end
389
-
390
- # ===== Security / Limits =====
391
-
392
- # Parse with custom limits for untrusted input.
393
- #
394
- # @param grammar_json [String] JSON-serialized grammar
395
- # @param input [String] Input string to parse
396
- # @param max_input_size [Integer] Maximum input size in bytes (default: 100MB)
397
- # @param max_recursion_depth [Integer] Maximum recursion depth (default: 1000)
398
- # @return [Object] Parse result
399
- def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
400
- unless available?
401
- raise LoadError,
402
- "Security limits require native extension. " \
403
- "Run `rake compile` to build the extension."
404
- end
405
-
406
- _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
407
- end
408
-
409
- # ===== Debug Tools =====
410
-
411
- # Parse with tracing enabled for debugging.
412
- #
413
- # @param grammar_json [String] JSON-serialized grammar
414
- # @param input [String] Input string to parse
415
- # @return [Array<(Object, Array)>] Tuple of [parse_result, trace_events]
416
- def parse_with_trace(grammar_json, input)
417
- unless available?
418
- raise LoadError,
419
- "Debug tracing requires native extension. " \
420
- "Run `rake compile` to build the extension."
421
- end
422
-
423
- _parse_with_trace(grammar_json, input)
424
- end
425
-
426
- # Generate Mermaid diagram for a grammar.
427
- #
428
- # @param grammar_json [String] JSON-serialized grammar
429
- # @return [String] Mermaid diagram source
430
- def grammar_to_mermaid(grammar_json)
431
- unless available?
432
- raise LoadError,
433
- "Grammar visualization requires native extension. " \
434
- "Run `rake compile` to build the extension."
435
- end
436
-
437
- _grammar_to_mermaid(grammar_json)
438
- end
439
-
440
- # Generate GraphViz DOT diagram for a grammar.
441
- #
442
- # @param grammar_json [String] JSON-serialized grammar
443
- # @return [String] GraphViz DOT source
444
- def grammar_to_dot(grammar_json)
445
- unless available?
446
- raise LoadError,
447
- "Grammar visualization requires native extension. " \
448
- "Run `rake compile` to build the extension."
449
- end
450
-
451
- _grammar_to_dot(grammar_json)
452
- end
453
-
454
55
  private
455
56
 
456
- def _incremental_parser_reparse(parser, new_input)
457
- raise NotImplementedError, 'Native extension method not available'
458
- end
459
-
460
- def _parse_with_builder(grammar_json, input, builder)
461
- # Call native Rust function directly - parse_with_builder is exposed
462
- # from the native extension as a Ruby function
463
- Parsanol::Native.parse_with_builder(grammar_json, input, builder)
464
- end
465
-
466
- def _parse_batch_parallel(grammar_json, inputs, num_threads)
467
- raise NotImplementedError, 'Native extension method not available'
468
- end
469
-
470
- def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
471
- raise NotImplementedError, 'Native extension method not available'
472
- end
473
-
474
- def _parse_with_trace(grammar_json, input)
475
- raise NotImplementedError, 'Native extension method not available'
476
- end
477
-
478
- def _grammar_to_mermaid(grammar_json)
479
- raise NotImplementedError, 'Native extension method not available'
480
- end
481
-
482
- def _grammar_to_dot(grammar_json)
483
- raise NotImplementedError, 'Native extension method not available'
57
+ def grammar_structure_hash(atom)
58
+ Digest::MD5.hexdigest(atom_structure(atom).to_s)
484
59
  end
485
60
 
486
- # Decode flat u64 array to Ruby AST
487
- # Tags:
488
- # 0x00 = nil
489
- # 0x01 = bool
490
- # 0x02 = int
491
- # 0x03 = float
492
- # 0x04 = string_ref (offset, length) - creates Slice with position info
493
- # 0x05 = array_start
494
- # 0x06 = array_end
495
- # 0x07 = hash_start
496
- # 0x08 = hash_end
497
- # 0x09 = hash_key (tag, len, key_chunks..., value)
498
- # 0x0A = inline_string (interned string from arena)
499
- #
500
- # @param flat [Array<Integer>] Flat u64 array from native parser
501
- # @param input [String] Original input string
502
- # @param line_cache [Parsanol::Source::LineCache, nil] Line cache for position info
503
- # @return Ruby AST with Slice objects for all string values
504
- def decode_flat(flat, input, line_cache = nil)
505
- stack = []
506
- i = 0
507
-
508
- while i < flat.length
509
- tag = flat[i]
510
-
511
- case tag
512
- when 0x00 # nil
513
- stack << nil
514
- i += 1
515
- when 0x01 # bool
516
- stack << (flat[i + 1] != 0)
517
- i += 2
518
- when 0x02 # int
519
- stack << flat[i + 1]
520
- i += 2
521
- when 0x03 # float
522
- # Decode IEEE 754 float from bits
523
- bits = flat[i + 1]
524
- float = [bits].pack('Q').unpack1('D')
525
- stack << float
526
- i += 2
527
- when 0x04 # string_ref (from input) - create Slice with position info
528
- offset = flat[i + 1]
529
- length = flat[i + 2]
530
- content = input.byteslice(offset, length)
531
- # Create Slice with position info - this is the key change
532
- stack << ::Parsanol::Slice.new(offset, content, line_cache)
533
- i += 3
534
- when 0x0A # inline_string (interned string from arena)
535
- # Format: tag, len, u64 chunks of string bytes
536
- len = flat[i + 1]
537
- i += 2
538
-
539
- # Read string bytes from u64 chunks
540
- chunks = (len + 7) / 8
541
- bytes = []
542
- chunks.times do |j|
543
- chunk = flat[i + j]
544
- 8.times do |k|
545
- break if bytes.length >= len
546
-
547
- bytes << ((chunk >> (k * 8)) & 0xff)
548
- end
549
- end
550
- i += chunks
551
-
552
- # Inline strings don't have source position, use Slice with offset 0
553
- content = bytes.pack('C*').force_encoding('UTF-8')
554
- stack << ::Parsanol::Slice.new(0, content, nil)
555
- when 0x05 # array_start
556
- stack << :array_marker
557
- i += 1
558
- when 0x06 # array_end
559
- items = []
560
- items.unshift(stack.pop) until stack.last == :array_marker
561
- stack.pop # Remove marker
562
- stack << items
563
- i += 1
564
- when 0x07 # hash_start
565
- stack << :hash_marker
566
- i += 1
567
- when 0x08 # hash_end
568
- pairs = []
569
- while stack.last != :hash_marker
570
- value = stack.pop
571
- key = stack.pop
572
- pairs.unshift([key, value])
573
- end
574
- stack.pop # Remove marker
575
- stack << pairs.to_h
576
- i += 1
577
- when 0x09 # hash_key
578
- # Format: tag, len, key_chunks..., then value
579
- len = flat[i + 1]
580
- i += 2 # Skip tag and len
581
-
582
- # Read key bytes from u64 chunks
583
- chunks = (len + 7) / 8
584
- key_bytes = []
585
- chunks.times do |j|
586
- chunk = flat[i + j]
587
- 8.times do |k|
588
- break if key_bytes.length >= len
589
-
590
- key_bytes << ((chunk >> (k * 8)) & 0xff)
591
- end
592
- end
593
- i += chunks
594
-
595
- key = key_bytes.pack('C*').force_encoding('UTF-8')
596
- stack << key
597
- else
598
- raise "Unknown tag: #{tag} at index #{i}"
599
- end
61
+ def atom_structure(atom, visited = {})
62
+ # Cycle detection - return a placeholder if we've seen this atom before
63
+ obj_id = atom.object_id
64
+ if visited[obj_id]
65
+ return [:cycle, atom.class.name]
600
66
  end
67
+ visited[obj_id] = true
601
68
 
602
- stack.first
603
- end
604
-
605
- # Compute structural hash of a grammar atom
606
- # This returns the same hash for grammars with the same structure
607
- # regardless of whether they are different object instances
608
- def grammar_structure_hash(atom)
609
- structure = atom_structure(atom)
610
- Digest::MD5.hexdigest(structure.to_s)
611
- end
612
-
613
- # Recursively build structure representation for hashing
614
- def atom_structure(atom)
615
69
  case atom
70
+ when ::Parsanol::Atoms::Entity
71
+ # Recursively resolve entity to get actual structure for hash
72
+ atom_structure(atom.parslet, visited)
616
73
  when ::Parsanol::Atoms::Str
617
74
  [:str, atom.str]
618
75
  when ::Parsanol::Atoms::Re
619
76
  [:re, atom.match]
620
77
  when ::Parsanol::Atoms::Sequence
621
- [:seq, atom.parslets.map { |p| atom_structure(p) }]
78
+ [:seq, atom.parslets.map { |p| atom_structure(p, visited) }]
622
79
  when ::Parsanol::Atoms::Alternative
623
- [:alt, atom.alternatives.map { |p| atom_structure(p) }]
80
+ [:alt, atom.alternatives.map { |p| atom_structure(p, visited) }]
624
81
  when ::Parsanol::Atoms::Repetition
625
- [:rep, atom.min, atom.max, atom_structure(atom.parslet)]
82
+ [:rep, atom.min, atom.max, atom_structure(atom.parslet, visited)]
626
83
  when ::Parsanol::Atoms::Named
627
- [:named, atom.name.to_s, atom_structure(atom.parslet)]
84
+ [:named, atom.name.to_s, atom_structure(atom.parslet, visited)]
628
85
  when ::Parsanol::Atoms::Lookahead
629
- [:lookahead, atom.positive, atom_structure(atom.bound_parslet)]
630
- when ::Parsanol::Atoms::Entity
631
- # Entity is a lazy reference - use its name for hashing
632
- [:entity, atom.name.to_s]
86
+ [:lookahead, atom.positive, atom_structure(atom.bound_parslet, visited)]
633
87
  else
634
88
  [:unknown, atom.class.name]
635
89
  end