parsanol 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +546 -0
  3. data/Cargo.toml +9 -0
  4. data/HISTORY.txt +12 -0
  5. data/LICENSE +23 -0
  6. data/README.adoc +487 -0
  7. data/Rakefile +135 -0
  8. data/ext/parsanol_native/Cargo.toml +34 -0
  9. data/ext/parsanol_native/extconf.rb +15 -0
  10. data/ext/parsanol_native/src/lib.rs +17 -0
  11. data/lib/parsanol/ast_visitor.rb +122 -0
  12. data/lib/parsanol/atoms/alternative.rb +122 -0
  13. data/lib/parsanol/atoms/base.rb +202 -0
  14. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  15. data/lib/parsanol/atoms/capture.rb +38 -0
  16. data/lib/parsanol/atoms/context.rb +334 -0
  17. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  18. data/lib/parsanol/atoms/custom.rb +110 -0
  19. data/lib/parsanol/atoms/cut.rb +66 -0
  20. data/lib/parsanol/atoms/dsl.rb +96 -0
  21. data/lib/parsanol/atoms/dynamic.rb +39 -0
  22. data/lib/parsanol/atoms/entity.rb +75 -0
  23. data/lib/parsanol/atoms/ignored.rb +37 -0
  24. data/lib/parsanol/atoms/infix.rb +162 -0
  25. data/lib/parsanol/atoms/lookahead.rb +82 -0
  26. data/lib/parsanol/atoms/named.rb +74 -0
  27. data/lib/parsanol/atoms/re.rb +83 -0
  28. data/lib/parsanol/atoms/repetition.rb +259 -0
  29. data/lib/parsanol/atoms/scope.rb +35 -0
  30. data/lib/parsanol/atoms/sequence.rb +194 -0
  31. data/lib/parsanol/atoms/str.rb +103 -0
  32. data/lib/parsanol/atoms/visitor.rb +91 -0
  33. data/lib/parsanol/atoms.rb +46 -0
  34. data/lib/parsanol/buffer.rb +133 -0
  35. data/lib/parsanol/builder_callbacks.rb +353 -0
  36. data/lib/parsanol/cause.rb +122 -0
  37. data/lib/parsanol/context.rb +39 -0
  38. data/lib/parsanol/convenience.rb +36 -0
  39. data/lib/parsanol/edit_tracker.rb +111 -0
  40. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  41. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  42. data/lib/parsanol/error_reporter/tree.rb +63 -0
  43. data/lib/parsanol/error_reporter.rb +100 -0
  44. data/lib/parsanol/expression/treetop.rb +154 -0
  45. data/lib/parsanol/expression.rb +106 -0
  46. data/lib/parsanol/fast_mode.rb +149 -0
  47. data/lib/parsanol/first_set.rb +79 -0
  48. data/lib/parsanol/grammar_builder.rb +177 -0
  49. data/lib/parsanol/incremental_parser.rb +177 -0
  50. data/lib/parsanol/interval_tree.rb +217 -0
  51. data/lib/parsanol/lazy_result.rb +179 -0
  52. data/lib/parsanol/lexer.rb +144 -0
  53. data/lib/parsanol/mermaid.rb +139 -0
  54. data/lib/parsanol/native/parser.rb +612 -0
  55. data/lib/parsanol/native/serializer.rb +248 -0
  56. data/lib/parsanol/native/transformer.rb +435 -0
  57. data/lib/parsanol/native/types.rb +42 -0
  58. data/lib/parsanol/native.rb +217 -0
  59. data/lib/parsanol/optimizer.rb +85 -0
  60. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  61. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  62. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  63. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  64. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  65. data/lib/parsanol/options/ruby_transform.rb +107 -0
  66. data/lib/parsanol/options/serialized.rb +94 -0
  67. data/lib/parsanol/options/zero_copy.rb +128 -0
  68. data/lib/parsanol/options.rb +20 -0
  69. data/lib/parsanol/parallel.rb +133 -0
  70. data/lib/parsanol/parser.rb +182 -0
  71. data/lib/parsanol/parslet.rb +151 -0
  72. data/lib/parsanol/pattern/binding.rb +91 -0
  73. data/lib/parsanol/pattern.rb +159 -0
  74. data/lib/parsanol/pool.rb +219 -0
  75. data/lib/parsanol/pools/array_pool.rb +75 -0
  76. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  77. data/lib/parsanol/pools/position_pool.rb +92 -0
  78. data/lib/parsanol/pools/slice_pool.rb +64 -0
  79. data/lib/parsanol/position.rb +94 -0
  80. data/lib/parsanol/resettable.rb +29 -0
  81. data/lib/parsanol/result.rb +46 -0
  82. data/lib/parsanol/result_builder.rb +208 -0
  83. data/lib/parsanol/result_stream.rb +261 -0
  84. data/lib/parsanol/rig/rspec.rb +71 -0
  85. data/lib/parsanol/rope.rb +81 -0
  86. data/lib/parsanol/scope.rb +104 -0
  87. data/lib/parsanol/slice.rb +146 -0
  88. data/lib/parsanol/source/line_cache.rb +109 -0
  89. data/lib/parsanol/source.rb +180 -0
  90. data/lib/parsanol/source_location.rb +167 -0
  91. data/lib/parsanol/streaming_parser.rb +124 -0
  92. data/lib/parsanol/string_view.rb +195 -0
  93. data/lib/parsanol/transform.rb +226 -0
  94. data/lib/parsanol/version.rb +5 -0
  95. data/lib/parsanol/wasm/README.md +80 -0
  96. data/lib/parsanol/wasm/package.json +51 -0
  97. data/lib/parsanol/wasm/parsanol.js +252 -0
  98. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  99. data/lib/parsanol/wasm_parser.rb +240 -0
  100. data/lib/parsanol.rb +280 -0
  101. data/parsanol-ruby.gemspec +67 -0
  102. metadata +293 -0
@@ -0,0 +1,248 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ # Grammar serializer for native parser
5
+ # Serializes Parslet atoms to JSON format expected by Rust parser
6
+ #
7
+ class GrammarSerializer
8
+ # Serialize a Parslet grammar (root atom) to JSON string
9
+ #
10
+ # @param root [Parsanol::Atoms::Base] The root atom of the grammar
11
+ # @return [String] JSON representation of the grammar
12
+ def self.serialize(root)
13
+ # Create fresh instance for each serialization
14
+ # (state is specific to each grammar)
15
+ new.serialize(root)
16
+ end
17
+
18
+ def initialize
19
+ @atoms = []
20
+ @atom_cache = {} # object_id => atom_id for deduplication
21
+ end
22
+
23
+ # Main serialization method
24
+ def serialize(root)
25
+ root_id = serialize_atom(root)
26
+
27
+ # Build JSON output directly to avoid intermediate Hash
28
+ # This is faster than creating a Hash and calling to_json
29
+ %({"atoms":#{@atoms.to_json},"root":#{root_id}})
30
+ end
31
+
32
+ private
33
+
34
+ # Serialize a single atom and return its atom_id
35
+ def serialize_atom(atom)
36
+ # Check cache for deduplication
37
+ cache_key = atom.object_id
38
+ return @atom_cache[cache_key] if @atom_cache.key?(cache_key)
39
+
40
+ # Entity atoms are special - they're just lazy references to other atoms
41
+ # Don't create a new atom, just resolve and return the referenced atom_id
42
+ return serialize_entity(atom) if atom.is_a?(Parsanol::Atoms::Entity)
43
+
44
+ # Serialize based on atom type first (recursively)
45
+ serialized = case atom
46
+ when Parsanol::Atoms::Str
47
+ serialize_str(atom)
48
+ when Parsanol::Atoms::Re
49
+ serialize_re(atom)
50
+ when Parsanol::Atoms::Sequence
51
+ serialize_sequence(atom)
52
+ when Parsanol::Atoms::Alternative
53
+ serialize_alternative(atom)
54
+ when Parsanol::Atoms::Repetition
55
+ serialize_repetition(atom)
56
+ when Parsanol::Atoms::Named
57
+ serialize_named(atom)
58
+ when Parsanol::Atoms::Lookahead
59
+ serialize_lookahead(atom)
60
+ when Parsanol::Atoms::Capture
61
+ serialize_capture(atom)
62
+ when Parsanol::Atoms::Scope
63
+ serialize_scope(atom)
64
+ when Parsanol::Atoms::Dynamic
65
+ serialize_dynamic(atom)
66
+ else
67
+ # Fallback for unknown atom types
68
+ serialize_unknown(atom)
69
+ end
70
+
71
+ # Now reserve an atom_id and cache
72
+ atom_id = @atoms.size
73
+ @atom_cache[cache_key] = atom_id
74
+ @atoms << serialized
75
+
76
+ atom_id
77
+ end
78
+
79
+ def serialize_str(atom)
80
+ {
81
+ 'Str' => {
82
+ 'pattern' => atom.str
83
+ }
84
+ }
85
+ end
86
+
87
+ def serialize_re(atom)
88
+ # Ruby's Regexp#to_s produces "(?-mix:pattern)" format
89
+ # We need to extract just the pattern for the Rust parser
90
+ pattern = atom.match
91
+ pattern = ::Regexp.last_match(1) if pattern =~ /^\(\?[-mix]*:(.+)\)$/
92
+ {
93
+ 'Re' => {
94
+ 'pattern' => pattern
95
+ }
96
+ }
97
+ end
98
+
99
+ def serialize_sequence(atom)
100
+ atom_ids = atom.parslets.map { |p| serialize_atom(p) }
101
+ {
102
+ 'Sequence' => {
103
+ 'atoms' => atom_ids
104
+ }
105
+ }
106
+ end
107
+
108
+ def serialize_alternative(atom)
109
+ atom_ids = atom.alternatives.map { |p| serialize_atom(p) }
110
+ {
111
+ 'Alternative' => {
112
+ 'atoms' => atom_ids
113
+ }
114
+ }
115
+ end
116
+
117
+ def serialize_repetition(atom)
118
+ {
119
+ 'Repetition' => {
120
+ 'atom' => serialize_atom(atom.parslet),
121
+ 'min' => atom.min,
122
+ 'max' => atom.max
123
+ }
124
+ }
125
+ end
126
+
127
+ def serialize_named(atom)
128
+ {
129
+ 'Named' => {
130
+ 'name' => atom.name.to_s,
131
+ 'atom' => serialize_atom(atom.parslet)
132
+ }
133
+ }
134
+ end
135
+
136
+ def serialize_entity(atom)
137
+ # Entity is a lazy reference - resolve it to the actual parslet
138
+ # Cache FIRST before resolving to handle circular references
139
+ cache_key = atom.object_id
140
+
141
+ # Reserve an atom_id and cache it before resolving
142
+ # This prevents infinite recursion when a rule references itself
143
+ atom_id = @atoms.size
144
+ @atom_cache[cache_key] = atom_id
145
+
146
+ # Add a placeholder that will be replaced
147
+ @atoms << nil
148
+
149
+ parslet = begin
150
+ atom.parslet
151
+ rescue StandardError
152
+ nil
153
+ end
154
+
155
+ if parslet
156
+ # Serialize the resolved parslet inline (don't call serialize_atom to avoid double-caching)
157
+ serialized = case parslet
158
+ when Parsanol::Atoms::Str
159
+ serialize_str(parslet)
160
+ when Parsanol::Atoms::Re
161
+ serialize_re(parslet)
162
+ when Parsanol::Atoms::Sequence
163
+ serialize_sequence(parslet)
164
+ when Parsanol::Atoms::Alternative
165
+ serialize_alternative(parslet)
166
+ when Parsanol::Atoms::Repetition
167
+ serialize_repetition(parslet)
168
+ when Parsanol::Atoms::Named
169
+ serialize_named(parslet)
170
+ when Parsanol::Atoms::Entity
171
+ # Nested entity - just reference it via serialize_atom
172
+ { 'Entity' => { 'atom' => serialize_atom(parslet) } }
173
+ when Parsanol::Atoms::Lookahead
174
+ serialize_lookahead(parslet)
175
+ else
176
+ serialize_unknown(parslet)
177
+ end
178
+
179
+ # Replace the placeholder with the serialized atom
180
+ @atoms[atom_id] = serialized
181
+ else
182
+ # If the entity's block returns nil, create a placeholder that will fail
183
+ @atoms[atom_id] = {
184
+ 'Str' => {
185
+ 'pattern' => "\x00__UNIMPLEMENTED_ENTITY_#{atom.name}__"
186
+ }
187
+ }
188
+ end
189
+ atom_id
190
+ end
191
+
192
+ def serialize_lookahead(atom)
193
+ {
194
+ 'Lookahead' => {
195
+ 'atom' => serialize_atom(atom.bound_parslet),
196
+ 'positive' => atom.positive
197
+ }
198
+ }
199
+ end
200
+
201
+ def serialize_capture(atom)
202
+ # Capture stores matched text for later use by Dynamic.
203
+ # Native parser doesn't support cross-atom captures,
204
+ # so we serialize the inner atom but the capture is a no-op.
205
+ # Grammars using capture+dynamic will need Ruby fallback.
206
+ serialize_atom(atom.parslet)
207
+ end
208
+
209
+ def serialize_scope(atom)
210
+ # Scope creates a new capture scope.
211
+ # Native parser doesn't have scoped captures,
212
+ # so we just serialize the inner atom from the block.
213
+ inner = begin
214
+ atom.block.call
215
+ rescue StandardError
216
+ nil
217
+ end
218
+ if inner
219
+ serialize_atom(inner)
220
+ else
221
+ serialize_unknown(atom)
222
+ end
223
+ end
224
+
225
+ def serialize_dynamic(_atom)
226
+ # Dynamic evaluates a Ruby block at parse time.
227
+ # This cannot be serialized to JSON - the grammar
228
+ # requires Ruby fallback for this portion.
229
+ # We create a marker that will fail at parse time
230
+ # with a clear error message.
231
+ {
232
+ 'Str' => {
233
+ 'pattern' => "\x00__DYNAMIC_NOT_SUPPORTED__"
234
+ }
235
+ }
236
+ end
237
+
238
+ def serialize_unknown(_atom)
239
+ # For unsupported atom types, create a placeholder
240
+ # This will cause a parse error at runtime
241
+ {
242
+ 'Str' => {
243
+ 'pattern' => '' # Empty pattern that will never match
244
+ }
245
+ }
246
+ end
247
+ end
248
+ end
@@ -0,0 +1,435 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Native
5
+ # Transforms native AST format to Parslet-compatible format
6
+ #
7
+ # Native format from Rust parser:
8
+ # - Strings: "text"
9
+ # - Sequences: [":sequence", item1, item2, ...]
10
+ # - Repetitions: [":repetition", item1, item2, ...]
11
+ # - Named captures: {"name" => value}
12
+ #
13
+ # Parslet format:
14
+ # - Strings: "text" (with Parsanol::Slice for position info)
15
+ # - Sequences: merged hash {:key1 => val1, :key2 => val2, ...}
16
+ # - Repetitions: array of items (or "" if empty string-like)
17
+ # - Named wrapping Repetition: {:name => [{:name => item1}, {:name => item2}, ...]}
18
+ #
19
+ class AstTransformer
20
+ # Frozen string constants for tag comparisons (avoid allocations)
21
+ SEQUENCE_TAG = ':sequence'
22
+ REPETITION_TAG = ':repetition'
23
+ EMPTY_STRING = ''
24
+ EMPTY_ARRAY = [].freeze
25
+ EMPTY_HASH = {}.freeze
26
+
27
+ # Symbol cache to avoid repeated string-to-symbol conversions
28
+ # This is a class variable to share across all transformations
29
+ @@symbol_cache = {}
30
+
31
+ def self.transform(ast)
32
+ case ast
33
+ when Array
34
+ transform_array(ast)
35
+ when Hash
36
+ transform_hash(ast)
37
+ else
38
+ ast
39
+ end
40
+ end
41
+
42
+ # Batch transformation for multiple ASTs
43
+ # Provides better cache locality than transforming individually
44
+ def self.transform_batch(asts)
45
+ asts.map { |ast| transform(ast) }
46
+ end
47
+
48
+ # Convert string key to symbol with caching
49
+ def self.cached_symbol(key)
50
+ return key if key.is_a?(Symbol)
51
+
52
+ @@symbol_cache[key] ||= key.to_sym
53
+ end
54
+
55
+ def self.transform_array(arr)
56
+ return EMPTY_ARRAY if arr.empty? # Match Parsanol Ruby mode behavior
57
+
58
+ # Check if this is a tagged array from native parser
59
+ first = arr.first
60
+ if first.is_a?(String) && first.start_with?(':')
61
+ if first == SEQUENCE_TAG
62
+ # Optimized: transform items starting from index 1
63
+ # Avoid creating arr[1..] slice
64
+ len = arr.length
65
+ return EMPTY_ARRAY if len == 1
66
+
67
+ items = Array.new(len - 1)
68
+ i = 0
69
+ while i < len - 1
70
+ items[i] = transform(arr[i + 1])
71
+ i += 1
72
+ end
73
+ flatten_sequence(items)
74
+ elsif first == REPETITION_TAG
75
+ # Optimized: transform items starting from index 1
76
+ len = arr.length
77
+ return EMPTY_ARRAY if len == 1
78
+
79
+ items = Array.new(len - 1)
80
+ i = 0
81
+ while i < len - 1
82
+ items[i] = transform(arr[i + 1])
83
+ i += 1
84
+ end
85
+ flatten_repetition(items)
86
+ else
87
+ arr.map { |item| transform(item) }
88
+ end
89
+ else
90
+ # Untagged arrays from native parser are SEQUENCES
91
+ # Apply flatten_sequence to get Parslet-compatible output
92
+ items = arr.map { |item| transform(item) }
93
+ flatten_sequence(items)
94
+ end
95
+ end
96
+
97
+ def self.transform_hash(hash)
98
+ # Fast path: single-key hash (99.9% of cases from native parser)
99
+ # Native parser always produces single-key hashes: {"name" => value}
100
+ return transform_single_key_hash(hash) if hash.length == 1
101
+
102
+ # Slow path: multi-key hash (rare, from nested structures)
103
+ transform_multi_key_hash(hash)
104
+ end
105
+
106
+ # Optimized handling for single-key hashes (the common case)
107
+ def self.transform_single_key_hash(hash)
108
+ # Extract the single key-value pair without iteration
109
+ key = hash.keys.first
110
+ value = hash[key]
111
+ sym_key = cached_symbol(key)
112
+
113
+ # Check if value is a tagged repetition from native parser
114
+ is_tagged_repetition = value.is_a?(Array) && !value.empty? &&
115
+ value.first.is_a?(String) && value.first == REPETITION_TAG
116
+
117
+ # Check RAW value for repetition pattern BEFORE transformation
118
+ # Array with items that all have the parent key
119
+ # e.g., [{x: 1}, {x: 2}] where parent key is :x
120
+ is_raw_array_repetition = value.is_a?(Array) && !value.empty? &&
121
+ value.all? { |item| item.is_a?(Hash) && item.keys.length == 1 && item.key?(key) }
122
+
123
+ # Empty array from native parser is a repetition result (not a sequence)
124
+ # Sequences produce arrays of arrays like [[], []], not empty arrays
125
+ is_empty_repetition = value.is_a?(Array) && value.empty?
126
+
127
+ # Transform the value
128
+ transformed = transform(value)
129
+
130
+ # Special handling for arrays that look like character repetitions
131
+ # (arrays of single-character strings should be joined)
132
+ if transformed.is_a?(Array) && !transformed.empty? &&
133
+ transformed.all? { |item| item.is_a?(String) && item.length == 1 }
134
+ transformed = transformed.join
135
+ end
136
+
137
+ # Check for UNTAGGED repetition pattern (native output):
138
+ # If array items all have the same key as parent, it's a repetition
139
+ is_transformed_repetition = transformed.is_a?(Array) && !transformed.empty? &&
140
+ transformed.all? do |item|
141
+ item.is_a?(Hash) && item.keys.length == 1 && item.key?(sym_key)
142
+ end
143
+
144
+ is_repetition = is_tagged_repetition || is_raw_array_repetition || is_transformed_repetition || is_empty_repetition
145
+
146
+ # Handle based on type
147
+ if is_repetition
148
+ transform_repetition_value(sym_key, transformed)
149
+ elsif transformed.is_a?(Hash)
150
+ { sym_key => transformed }
151
+ elsif transformed.is_a?(Array)
152
+ transform_array_value(sym_key, transformed)
153
+ else
154
+ # Simple value (string, nil, etc.) - most common case
155
+ { sym_key => transformed }
156
+ end
157
+ end
158
+
159
+ # Handle repetition values (named wrapping repetition)
160
+ def self.transform_repetition_value(sym_key, transformed)
161
+ if transformed.is_a?(Array)
162
+ # Empty array from repetition stays as empty array
163
+ if transformed.empty?
164
+ { sym_key => EMPTY_ARRAY }
165
+ # Check if items already have the same key (avoid double-wrapping)
166
+ elsif transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
167
+ { sym_key => transformed }
168
+ else
169
+ # Wrap each item with the name
170
+ { sym_key => transformed.map { |item| { sym_key => item } } }
171
+ end
172
+ elsif transformed == EMPTY_STRING
173
+ { sym_key => EMPTY_ARRAY } # Empty repetition should be [], not ""
174
+ else
175
+ { sym_key => transformed }
176
+ end
177
+ end
178
+
179
+ # Handle array values (non-repetition case)
180
+ def self.transform_array_value(sym_key, transformed)
181
+ if transformed.empty?
182
+ # For empty arrays, we need to determine if this is a repetition or sequence
183
+ # Repetitions should return [], sequences should return ""
184
+ # We can't tell from the value alone, so we return "" (sequence semantics)
185
+ # The repetition detection in transform_single_key_hash will handle the other case
186
+ { sym_key => EMPTY_STRING }
187
+ elsif transformed.all? { |v| v.is_a?(Hash) && v.keys.length == 1 && v.key?(sym_key) }
188
+ # Items already have the parent key (repetition pattern) - keep as-is
189
+ { sym_key => transformed }
190
+ elsif transformed.all?(Hash)
191
+ # Items are hashes with DIFFERENT keys (not the parent key)
192
+ # This is a repetition result from (separator >> item).repeat pattern
193
+ # The items already have their correct structure, DON'T wrap them
194
+ # Example: [{name: "b"}, {name: "c"}] for (str(',') >> item).repeat.as(:rest)
195
+ { sym_key => transformed }
196
+ else
197
+ { sym_key => transformed }
198
+ end
199
+ end
200
+
201
+ # Slow path: multi-key hash (rare)
202
+ def self.transform_multi_key_hash(hash)
203
+ result = {}
204
+
205
+ hash.each do |key, value|
206
+ sym_key = cached_symbol(key)
207
+
208
+ is_repetition = value.is_a?(Array) && !value.empty? &&
209
+ value.first.is_a?(String) && value.first == REPETITION_TAG
210
+
211
+ transformed = transform(value)
212
+
213
+ result[sym_key] = if is_repetition
214
+ if transformed.is_a?(Array)
215
+ if transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
216
+ transformed
217
+ else
218
+ transformed.map { |item| { sym_key => item } }
219
+ end
220
+ elsif transformed == EMPTY_STRING
221
+ EMPTY_STRING
222
+ else
223
+ transformed
224
+ end
225
+ elsif transformed.is_a?(Hash)
226
+ transformed
227
+ elsif transformed.is_a?(Array)
228
+ if transformed.empty?
229
+ EMPTY_ARRAY
230
+ elsif transformed.all?(Hash)
231
+ transformed.map { |item| { sym_key => item } }
232
+ else
233
+ transformed
234
+ end
235
+ else
236
+ transformed
237
+ end
238
+ end
239
+
240
+ result
241
+ end
242
+
243
+ # Flatten sequence items according to Parslet semantics:
244
+ # 1. If ALL items are hashes, return as array (this is a repetition result)
245
+ # 2. If there are named captures (hashes) among strings, return ONLY the merged hash (discard strings)
246
+ # 3. If only strings, join them (or return single string)
247
+ # 4. Return single value if only one item
248
+ #
249
+ # This matches Parslet's behavior where:
250
+ # str('SCHEMA') >> str(' ') >> match('[a-z]').repeat(1).as(:name) >> str(';')
251
+ # returns: {:name => "test"} (not ["SCHEMA ", {:name=>"test"}, ";"])
252
+ #
253
+ # But for repetitions with named captures:
254
+ # match('[a-z]').as(:x).repeat(2)
255
+ # returns: [{:x => "a"}, {:x => "b"}] (array of hashes, NOT merged!)
256
+ #
257
+ # Optimized: Single-pass with direct result building
258
+ def self.flatten_sequence(items)
259
+ return EMPTY_ARRAY if items.empty? # Match Parsanol Ruby mode
260
+
261
+ # DON'T unwrap single items - let the caller handle this
262
+ # This preserves repetition results like [{:x => 1}]
263
+ return items if items.length == 1
264
+
265
+ # Single pass: categorize items
266
+ merged_hash = {}
267
+ string_parts = []
268
+ hash_count = 0
269
+ total_items = 0
270
+ has_non_empty_array = false
271
+
272
+ items.each do |item|
273
+ case item
274
+ when Hash
275
+ merged_hash.merge!(item)
276
+ hash_count += 1
277
+ total_items += 1
278
+ when String
279
+ string_parts << item
280
+ total_items += 1
281
+ when Array
282
+ # Check if this is a non-empty array (repetition result with content)
283
+ # Parslet behavior: when a sequence contains a non-empty repetition,
284
+ # the WHOLE sequence should be kept as array, not merged.
285
+ if item.empty?
286
+ # Empty repetition - skip (sequence semantics: merge rest)
287
+ else
288
+ # Non-empty repetition - mark that we should keep as array
289
+ has_non_empty_array = true
290
+ # Still collect items for potential array result
291
+ item.each do |sub_item|
292
+ case sub_item
293
+ when Hash
294
+ hash_count += 1
295
+ when String
296
+ string_parts << sub_item
297
+ end
298
+ end
299
+ end
300
+ total_items += 1
301
+ when nil
302
+ # Skip nil values (from lookahead or optional that didn't match)
303
+ else
304
+ total_items += 1
305
+ end
306
+ end
307
+
308
+ # PARSLET SEQUENCE BEHAVIOR WITH REPETITIONS:
309
+ # If the sequence contains a non-empty repetition result (array with items),
310
+ # return as array instead of merging.
311
+ # Example: factor.as(:left) >> (op >> factor).as(:rhs).repeat
312
+ # With input "a+b" produces: [{left: {...}}, {rhs: {...}}]
313
+ # With input "a" produces: {left: {...}} (empty repetition, merge)
314
+ if has_non_empty_array
315
+ # Flatten the items: top-level hashes + array items
316
+ result = []
317
+ items.each do |item|
318
+ case item
319
+ when Hash
320
+ result << item
321
+ when Array
322
+ result.concat(item)
323
+ when String
324
+ # Skip unnamed strings when we have named captures
325
+ end
326
+ end
327
+ return result.length == 1 ? result.first : result
328
+ end
329
+
330
+ # KEY INSIGHT: If ALL items are hashes, we need to determine:
331
+ # 1. WRAPPER PATTERN: All hashes have the SAME single key, and values are HASHES
332
+ # => Merge the inner hashes under that key
333
+ # Example: [{:syntax => {:spaces => {...}}},
334
+ # {:syntax => {:schemaDecl => [...]}}]
335
+ # Result: {:syntax => {:spaces => {...}, :schemaDecl => [...]}}
336
+ #
337
+ # 2. REPETITION PATTERN: All hashes have the SAME single key, but values are SIMPLE
338
+ # => Keep as array (this is a repetition result)
339
+ # Example: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
340
+ # Result: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
341
+ #
342
+ # 3. MIXED KEYS: Hashes have DIFFERENT keys
343
+ # => Keep as array
344
+ # Example: [{:a => 1}, {:b => 2}]
345
+ # Result: [{:a => 1}, {:b => 2}]
346
+ if hash_count == total_items && hash_count > 1
347
+ # Check if all hashes have the same single key
348
+ first_item = items.first
349
+ if first_item.is_a?(Hash) && first_item.keys.length == 1
350
+ wrapper_key = first_item.keys.first
351
+
352
+ # Verify all items are hashes with the same single key
353
+ all_same_wrapper = items.all? do |item|
354
+ item.is_a?(Hash) && item.keys.length == 1 && item.keys.first == wrapper_key
355
+ end
356
+
357
+ if all_same_wrapper
358
+ # Check if values are all hashes (wrapper pattern) or not (repetition pattern)
359
+ all_values_are_hashes = items.all? do |item|
360
+ item[wrapper_key].is_a?(Hash)
361
+ end
362
+
363
+ return items unless all_values_are_hashes
364
+
365
+ # Wrapper pattern: merge the inner hashes
366
+ merged_inner = {}
367
+ items.each do |item|
368
+ inner_value = item[wrapper_key]
369
+ merged_inner.merge!(inner_value)
370
+ end
371
+ return { wrapper_key => merged_inner }
372
+
373
+ # Repetition pattern: keep as array
374
+
375
+ end
376
+ end
377
+
378
+ # MIXED KEYS: Hashes have different keys
379
+ # Parslet sequence semantics: merge into single hash
380
+ return merged_hash
381
+ end
382
+
383
+ # PARSLET SEQUENCE SEMANTICS:
384
+ # If there are named captures (hashes) mixed with other things,
385
+ # return ONLY the merged hash (discard unnamed strings)
386
+ return merged_hash unless merged_hash.empty?
387
+
388
+ # No named captures - handle strings and other items
389
+ if string_parts.any?
390
+ return string_parts.length == 1 ? string_parts.first : string_parts.join
391
+ end
392
+
393
+ # Only other items (arrays, etc.)
394
+ return EMPTY_ARRAY if total_items.zero?
395
+
396
+ items.length == 1 ? items.first : items
397
+ end
398
+
399
+ # Parslet/Parsanol repetition semantics:
400
+ # 1. Return [] for empty repetitions
401
+ # 2. If all items are strings, join them
402
+ # 3. Otherwise return array
403
+ def self.flatten_repetition(items)
404
+ return EMPTY_ARRAY if items.empty?
405
+
406
+ # Single-pass flatten and check
407
+ flat_items = []
408
+ all_strings = true
409
+
410
+ items.each do |item|
411
+ if item.is_a?(Array)
412
+ item.each do |sub|
413
+ flat_items << sub
414
+ all_strings = false unless sub.is_a?(String)
415
+ end
416
+ else
417
+ flat_items << item
418
+ all_strings = false unless item.is_a?(String)
419
+ end
420
+ end
421
+
422
+ return EMPTY_ARRAY if flat_items.empty?
423
+
424
+ # If all strings, join them (string-like repetition)
425
+ if all_strings && flat_items.all?(String)
426
+ flat_items.join
427
+ else
428
+ flat_items
429
+ end
430
+ end
431
+ end
432
+
433
+ private_constant :AstTransformer
434
+ end
435
+ end