parsanol 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +546 -0
- data/Cargo.toml +9 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/ext/parsanol_native/Cargo.toml +34 -0
- data/ext/parsanol_native/extconf.rb +15 -0
- data/ext/parsanol_native/src/lib.rs +17 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +293 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Grammar serializer for native parser
|
|
5
|
+
# Serializes Parslet atoms to JSON format expected by Rust parser
|
|
6
|
+
#
|
|
7
|
+
class GrammarSerializer
|
|
8
|
+
# Serialize a Parslet grammar (root atom) to JSON string
|
|
9
|
+
#
|
|
10
|
+
# @param root [Parsanol::Atoms::Base] The root atom of the grammar
|
|
11
|
+
# @return [String] JSON representation of the grammar
|
|
12
|
+
def self.serialize(root)
|
|
13
|
+
# Create fresh instance for each serialization
|
|
14
|
+
# (state is specific to each grammar)
|
|
15
|
+
new.serialize(root)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def initialize
|
|
19
|
+
@atoms = []
|
|
20
|
+
@atom_cache = {} # object_id => atom_id for deduplication
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Main serialization method
|
|
24
|
+
def serialize(root)
|
|
25
|
+
root_id = serialize_atom(root)
|
|
26
|
+
|
|
27
|
+
# Build JSON output directly to avoid intermediate Hash
|
|
28
|
+
# This is faster than creating a Hash and calling to_json
|
|
29
|
+
%({"atoms":#{@atoms.to_json},"root":#{root_id}})
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
# Serialize a single atom and return its atom_id
|
|
35
|
+
def serialize_atom(atom)
|
|
36
|
+
# Check cache for deduplication
|
|
37
|
+
cache_key = atom.object_id
|
|
38
|
+
return @atom_cache[cache_key] if @atom_cache.key?(cache_key)
|
|
39
|
+
|
|
40
|
+
# Entity atoms are special - they're just lazy references to other atoms
|
|
41
|
+
# Don't create a new atom, just resolve and return the referenced atom_id
|
|
42
|
+
return serialize_entity(atom) if atom.is_a?(Parsanol::Atoms::Entity)
|
|
43
|
+
|
|
44
|
+
# Serialize based on atom type first (recursively)
|
|
45
|
+
serialized = case atom
|
|
46
|
+
when Parsanol::Atoms::Str
|
|
47
|
+
serialize_str(atom)
|
|
48
|
+
when Parsanol::Atoms::Re
|
|
49
|
+
serialize_re(atom)
|
|
50
|
+
when Parsanol::Atoms::Sequence
|
|
51
|
+
serialize_sequence(atom)
|
|
52
|
+
when Parsanol::Atoms::Alternative
|
|
53
|
+
serialize_alternative(atom)
|
|
54
|
+
when Parsanol::Atoms::Repetition
|
|
55
|
+
serialize_repetition(atom)
|
|
56
|
+
when Parsanol::Atoms::Named
|
|
57
|
+
serialize_named(atom)
|
|
58
|
+
when Parsanol::Atoms::Lookahead
|
|
59
|
+
serialize_lookahead(atom)
|
|
60
|
+
when Parsanol::Atoms::Capture
|
|
61
|
+
serialize_capture(atom)
|
|
62
|
+
when Parsanol::Atoms::Scope
|
|
63
|
+
serialize_scope(atom)
|
|
64
|
+
when Parsanol::Atoms::Dynamic
|
|
65
|
+
serialize_dynamic(atom)
|
|
66
|
+
else
|
|
67
|
+
# Fallback for unknown atom types
|
|
68
|
+
serialize_unknown(atom)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Now reserve an atom_id and cache
|
|
72
|
+
atom_id = @atoms.size
|
|
73
|
+
@atom_cache[cache_key] = atom_id
|
|
74
|
+
@atoms << serialized
|
|
75
|
+
|
|
76
|
+
atom_id
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def serialize_str(atom)
|
|
80
|
+
{
|
|
81
|
+
'Str' => {
|
|
82
|
+
'pattern' => atom.str
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def serialize_re(atom)
|
|
88
|
+
# Ruby's Regexp#to_s produces "(?-mix:pattern)" format
|
|
89
|
+
# We need to extract just the pattern for the Rust parser
|
|
90
|
+
pattern = atom.match
|
|
91
|
+
pattern = ::Regexp.last_match(1) if pattern =~ /^\(\?[-mix]*:(.+)\)$/
|
|
92
|
+
{
|
|
93
|
+
'Re' => {
|
|
94
|
+
'pattern' => pattern
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def serialize_sequence(atom)
|
|
100
|
+
atom_ids = atom.parslets.map { |p| serialize_atom(p) }
|
|
101
|
+
{
|
|
102
|
+
'Sequence' => {
|
|
103
|
+
'atoms' => atom_ids
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def serialize_alternative(atom)
|
|
109
|
+
atom_ids = atom.alternatives.map { |p| serialize_atom(p) }
|
|
110
|
+
{
|
|
111
|
+
'Alternative' => {
|
|
112
|
+
'atoms' => atom_ids
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def serialize_repetition(atom)
|
|
118
|
+
{
|
|
119
|
+
'Repetition' => {
|
|
120
|
+
'atom' => serialize_atom(atom.parslet),
|
|
121
|
+
'min' => atom.min,
|
|
122
|
+
'max' => atom.max
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def serialize_named(atom)
|
|
128
|
+
{
|
|
129
|
+
'Named' => {
|
|
130
|
+
'name' => atom.name.to_s,
|
|
131
|
+
'atom' => serialize_atom(atom.parslet)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def serialize_entity(atom)
|
|
137
|
+
# Entity is a lazy reference - resolve it to the actual parslet
|
|
138
|
+
# Cache FIRST before resolving to handle circular references
|
|
139
|
+
cache_key = atom.object_id
|
|
140
|
+
|
|
141
|
+
# Reserve an atom_id and cache it before resolving
|
|
142
|
+
# This prevents infinite recursion when a rule references itself
|
|
143
|
+
atom_id = @atoms.size
|
|
144
|
+
@atom_cache[cache_key] = atom_id
|
|
145
|
+
|
|
146
|
+
# Add a placeholder that will be replaced
|
|
147
|
+
@atoms << nil
|
|
148
|
+
|
|
149
|
+
parslet = begin
|
|
150
|
+
atom.parslet
|
|
151
|
+
rescue StandardError
|
|
152
|
+
nil
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
if parslet
|
|
156
|
+
# Serialize the resolved parslet inline (don't call serialize_atom to avoid double-caching)
|
|
157
|
+
serialized = case parslet
|
|
158
|
+
when Parsanol::Atoms::Str
|
|
159
|
+
serialize_str(parslet)
|
|
160
|
+
when Parsanol::Atoms::Re
|
|
161
|
+
serialize_re(parslet)
|
|
162
|
+
when Parsanol::Atoms::Sequence
|
|
163
|
+
serialize_sequence(parslet)
|
|
164
|
+
when Parsanol::Atoms::Alternative
|
|
165
|
+
serialize_alternative(parslet)
|
|
166
|
+
when Parsanol::Atoms::Repetition
|
|
167
|
+
serialize_repetition(parslet)
|
|
168
|
+
when Parsanol::Atoms::Named
|
|
169
|
+
serialize_named(parslet)
|
|
170
|
+
when Parsanol::Atoms::Entity
|
|
171
|
+
# Nested entity - just reference it via serialize_atom
|
|
172
|
+
{ 'Entity' => { 'atom' => serialize_atom(parslet) } }
|
|
173
|
+
when Parsanol::Atoms::Lookahead
|
|
174
|
+
serialize_lookahead(parslet)
|
|
175
|
+
else
|
|
176
|
+
serialize_unknown(parslet)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Replace the placeholder with the serialized atom
|
|
180
|
+
@atoms[atom_id] = serialized
|
|
181
|
+
else
|
|
182
|
+
# If the entity's block returns nil, create a placeholder that will fail
|
|
183
|
+
@atoms[atom_id] = {
|
|
184
|
+
'Str' => {
|
|
185
|
+
'pattern' => "\x00__UNIMPLEMENTED_ENTITY_#{atom.name}__"
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
end
|
|
189
|
+
atom_id
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def serialize_lookahead(atom)
|
|
193
|
+
{
|
|
194
|
+
'Lookahead' => {
|
|
195
|
+
'atom' => serialize_atom(atom.bound_parslet),
|
|
196
|
+
'positive' => atom.positive
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def serialize_capture(atom)
|
|
202
|
+
# Capture stores matched text for later use by Dynamic.
|
|
203
|
+
# Native parser doesn't support cross-atom captures,
|
|
204
|
+
# so we serialize the inner atom but the capture is a no-op.
|
|
205
|
+
# Grammars using capture+dynamic will need Ruby fallback.
|
|
206
|
+
serialize_atom(atom.parslet)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def serialize_scope(atom)
|
|
210
|
+
# Scope creates a new capture scope.
|
|
211
|
+
# Native parser doesn't have scoped captures,
|
|
212
|
+
# so we just serialize the inner atom from the block.
|
|
213
|
+
inner = begin
|
|
214
|
+
atom.block.call
|
|
215
|
+
rescue StandardError
|
|
216
|
+
nil
|
|
217
|
+
end
|
|
218
|
+
if inner
|
|
219
|
+
serialize_atom(inner)
|
|
220
|
+
else
|
|
221
|
+
serialize_unknown(atom)
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def serialize_dynamic(_atom)
|
|
226
|
+
# Dynamic evaluates a Ruby block at parse time.
|
|
227
|
+
# This cannot be serialized to JSON - the grammar
|
|
228
|
+
# requires Ruby fallback for this portion.
|
|
229
|
+
# We create a marker that will fail at parse time
|
|
230
|
+
# with a clear error message.
|
|
231
|
+
{
|
|
232
|
+
'Str' => {
|
|
233
|
+
'pattern' => "\x00__DYNAMIC_NOT_SUPPORTED__"
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def serialize_unknown(_atom)
|
|
239
|
+
# For unsupported atom types, create a placeholder
|
|
240
|
+
# This will cause a parse error at runtime
|
|
241
|
+
{
|
|
242
|
+
'Str' => {
|
|
243
|
+
'pattern' => '' # Empty pattern that will never match
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Native
|
|
5
|
+
# Transforms native AST format to Parslet-compatible format
|
|
6
|
+
#
|
|
7
|
+
# Native format from Rust parser:
|
|
8
|
+
# - Strings: "text"
|
|
9
|
+
# - Sequences: [":sequence", item1, item2, ...]
|
|
10
|
+
# - Repetitions: [":repetition", item1, item2, ...]
|
|
11
|
+
# - Named captures: {"name" => value}
|
|
12
|
+
#
|
|
13
|
+
# Parslet format:
|
|
14
|
+
# - Strings: "text" (with Parsanol::Slice for position info)
|
|
15
|
+
# - Sequences: merged hash {:key1 => val1, :key2 => val2, ...}
|
|
16
|
+
# - Repetitions: array of items (or "" if empty string-like)
|
|
17
|
+
# - Named wrapping Repetition: {:name => [{:name => item1}, {:name => item2}, ...]}
|
|
18
|
+
#
|
|
19
|
+
class AstTransformer
|
|
20
|
+
# Frozen string constants for tag comparisons (avoid allocations)
|
|
21
|
+
SEQUENCE_TAG = ':sequence'
|
|
22
|
+
REPETITION_TAG = ':repetition'
|
|
23
|
+
EMPTY_STRING = ''
|
|
24
|
+
EMPTY_ARRAY = [].freeze
|
|
25
|
+
EMPTY_HASH = {}.freeze
|
|
26
|
+
|
|
27
|
+
# Symbol cache to avoid repeated string-to-symbol conversions
|
|
28
|
+
# This is a class variable to share across all transformations
|
|
29
|
+
@@symbol_cache = {}
|
|
30
|
+
|
|
31
|
+
def self.transform(ast)
|
|
32
|
+
case ast
|
|
33
|
+
when Array
|
|
34
|
+
transform_array(ast)
|
|
35
|
+
when Hash
|
|
36
|
+
transform_hash(ast)
|
|
37
|
+
else
|
|
38
|
+
ast
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Batch transformation for multiple ASTs
|
|
43
|
+
# Provides better cache locality than transforming individually
|
|
44
|
+
def self.transform_batch(asts)
|
|
45
|
+
asts.map { |ast| transform(ast) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Convert string key to symbol with caching
|
|
49
|
+
def self.cached_symbol(key)
|
|
50
|
+
return key if key.is_a?(Symbol)
|
|
51
|
+
|
|
52
|
+
@@symbol_cache[key] ||= key.to_sym
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.transform_array(arr)
|
|
56
|
+
return EMPTY_ARRAY if arr.empty? # Match Parsanol Ruby mode behavior
|
|
57
|
+
|
|
58
|
+
# Check if this is a tagged array from native parser
|
|
59
|
+
first = arr.first
|
|
60
|
+
if first.is_a?(String) && first.start_with?(':')
|
|
61
|
+
if first == SEQUENCE_TAG
|
|
62
|
+
# Optimized: transform items starting from index 1
|
|
63
|
+
# Avoid creating arr[1..] slice
|
|
64
|
+
len = arr.length
|
|
65
|
+
return EMPTY_ARRAY if len == 1
|
|
66
|
+
|
|
67
|
+
items = Array.new(len - 1)
|
|
68
|
+
i = 0
|
|
69
|
+
while i < len - 1
|
|
70
|
+
items[i] = transform(arr[i + 1])
|
|
71
|
+
i += 1
|
|
72
|
+
end
|
|
73
|
+
flatten_sequence(items)
|
|
74
|
+
elsif first == REPETITION_TAG
|
|
75
|
+
# Optimized: transform items starting from index 1
|
|
76
|
+
len = arr.length
|
|
77
|
+
return EMPTY_ARRAY if len == 1
|
|
78
|
+
|
|
79
|
+
items = Array.new(len - 1)
|
|
80
|
+
i = 0
|
|
81
|
+
while i < len - 1
|
|
82
|
+
items[i] = transform(arr[i + 1])
|
|
83
|
+
i += 1
|
|
84
|
+
end
|
|
85
|
+
flatten_repetition(items)
|
|
86
|
+
else
|
|
87
|
+
arr.map { |item| transform(item) }
|
|
88
|
+
end
|
|
89
|
+
else
|
|
90
|
+
# Untagged arrays from native parser are SEQUENCES
|
|
91
|
+
# Apply flatten_sequence to get Parslet-compatible output
|
|
92
|
+
items = arr.map { |item| transform(item) }
|
|
93
|
+
flatten_sequence(items)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.transform_hash(hash)
|
|
98
|
+
# Fast path: single-key hash (99.9% of cases from native parser)
|
|
99
|
+
# Native parser always produces single-key hashes: {"name" => value}
|
|
100
|
+
return transform_single_key_hash(hash) if hash.length == 1
|
|
101
|
+
|
|
102
|
+
# Slow path: multi-key hash (rare, from nested structures)
|
|
103
|
+
transform_multi_key_hash(hash)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Optimized handling for single-key hashes (the common case)
|
|
107
|
+
def self.transform_single_key_hash(hash)
|
|
108
|
+
# Extract the single key-value pair without iteration
|
|
109
|
+
key = hash.keys.first
|
|
110
|
+
value = hash[key]
|
|
111
|
+
sym_key = cached_symbol(key)
|
|
112
|
+
|
|
113
|
+
# Check if value is a tagged repetition from native parser
|
|
114
|
+
is_tagged_repetition = value.is_a?(Array) && !value.empty? &&
|
|
115
|
+
value.first.is_a?(String) && value.first == REPETITION_TAG
|
|
116
|
+
|
|
117
|
+
# Check RAW value for repetition pattern BEFORE transformation
|
|
118
|
+
# Array with items that all have the parent key
|
|
119
|
+
# e.g., [{x: 1}, {x: 2}] where parent key is :x
|
|
120
|
+
is_raw_array_repetition = value.is_a?(Array) && !value.empty? &&
|
|
121
|
+
value.all? { |item| item.is_a?(Hash) && item.keys.length == 1 && item.key?(key) }
|
|
122
|
+
|
|
123
|
+
# Empty array from native parser is a repetition result (not a sequence)
|
|
124
|
+
# Sequences produce arrays of arrays like [[], []], not empty arrays
|
|
125
|
+
is_empty_repetition = value.is_a?(Array) && value.empty?
|
|
126
|
+
|
|
127
|
+
# Transform the value
|
|
128
|
+
transformed = transform(value)
|
|
129
|
+
|
|
130
|
+
# Special handling for arrays that look like character repetitions
|
|
131
|
+
# (arrays of single-character strings should be joined)
|
|
132
|
+
if transformed.is_a?(Array) && !transformed.empty? &&
|
|
133
|
+
transformed.all? { |item| item.is_a?(String) && item.length == 1 }
|
|
134
|
+
transformed = transformed.join
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Check for UNTAGGED repetition pattern (native output):
|
|
138
|
+
# If array items all have the same key as parent, it's a repetition
|
|
139
|
+
is_transformed_repetition = transformed.is_a?(Array) && !transformed.empty? &&
|
|
140
|
+
transformed.all? do |item|
|
|
141
|
+
item.is_a?(Hash) && item.keys.length == 1 && item.key?(sym_key)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
is_repetition = is_tagged_repetition || is_raw_array_repetition || is_transformed_repetition || is_empty_repetition
|
|
145
|
+
|
|
146
|
+
# Handle based on type
|
|
147
|
+
if is_repetition
|
|
148
|
+
transform_repetition_value(sym_key, transformed)
|
|
149
|
+
elsif transformed.is_a?(Hash)
|
|
150
|
+
{ sym_key => transformed }
|
|
151
|
+
elsif transformed.is_a?(Array)
|
|
152
|
+
transform_array_value(sym_key, transformed)
|
|
153
|
+
else
|
|
154
|
+
# Simple value (string, nil, etc.) - most common case
|
|
155
|
+
{ sym_key => transformed }
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Handle repetition values (named wrapping repetition)
|
|
160
|
+
def self.transform_repetition_value(sym_key, transformed)
|
|
161
|
+
if transformed.is_a?(Array)
|
|
162
|
+
# Empty array from repetition stays as empty array
|
|
163
|
+
if transformed.empty?
|
|
164
|
+
{ sym_key => EMPTY_ARRAY }
|
|
165
|
+
# Check if items already have the same key (avoid double-wrapping)
|
|
166
|
+
elsif transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
|
|
167
|
+
{ sym_key => transformed }
|
|
168
|
+
else
|
|
169
|
+
# Wrap each item with the name
|
|
170
|
+
{ sym_key => transformed.map { |item| { sym_key => item } } }
|
|
171
|
+
end
|
|
172
|
+
elsif transformed == EMPTY_STRING
|
|
173
|
+
{ sym_key => EMPTY_ARRAY } # Empty repetition should be [], not ""
|
|
174
|
+
else
|
|
175
|
+
{ sym_key => transformed }
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Handle array values (non-repetition case)
|
|
180
|
+
def self.transform_array_value(sym_key, transformed)
|
|
181
|
+
if transformed.empty?
|
|
182
|
+
# For empty arrays, we need to determine if this is a repetition or sequence
|
|
183
|
+
# Repetitions should return [], sequences should return ""
|
|
184
|
+
# We can't tell from the value alone, so we return "" (sequence semantics)
|
|
185
|
+
# The repetition detection in transform_single_key_hash will handle the other case
|
|
186
|
+
{ sym_key => EMPTY_STRING }
|
|
187
|
+
elsif transformed.all? { |v| v.is_a?(Hash) && v.keys.length == 1 && v.key?(sym_key) }
|
|
188
|
+
# Items already have the parent key (repetition pattern) - keep as-is
|
|
189
|
+
{ sym_key => transformed }
|
|
190
|
+
elsif transformed.all?(Hash)
|
|
191
|
+
# Items are hashes with DIFFERENT keys (not the parent key)
|
|
192
|
+
# This is a repetition result from (separator >> item).repeat pattern
|
|
193
|
+
# The items already have their correct structure, DON'T wrap them
|
|
194
|
+
# Example: [{name: "b"}, {name: "c"}] for (str(',') >> item).repeat.as(:rest)
|
|
195
|
+
{ sym_key => transformed }
|
|
196
|
+
else
|
|
197
|
+
{ sym_key => transformed }
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Slow path: multi-key hash (rare)
|
|
202
|
+
def self.transform_multi_key_hash(hash)
|
|
203
|
+
result = {}
|
|
204
|
+
|
|
205
|
+
hash.each do |key, value|
|
|
206
|
+
sym_key = cached_symbol(key)
|
|
207
|
+
|
|
208
|
+
is_repetition = value.is_a?(Array) && !value.empty? &&
|
|
209
|
+
value.first.is_a?(String) && value.first == REPETITION_TAG
|
|
210
|
+
|
|
211
|
+
transformed = transform(value)
|
|
212
|
+
|
|
213
|
+
result[sym_key] = if is_repetition
|
|
214
|
+
if transformed.is_a?(Array)
|
|
215
|
+
if transformed.all? { |item| item.is_a?(Hash) && item.key?(sym_key) }
|
|
216
|
+
transformed
|
|
217
|
+
else
|
|
218
|
+
transformed.map { |item| { sym_key => item } }
|
|
219
|
+
end
|
|
220
|
+
elsif transformed == EMPTY_STRING
|
|
221
|
+
EMPTY_STRING
|
|
222
|
+
else
|
|
223
|
+
transformed
|
|
224
|
+
end
|
|
225
|
+
elsif transformed.is_a?(Hash)
|
|
226
|
+
transformed
|
|
227
|
+
elsif transformed.is_a?(Array)
|
|
228
|
+
if transformed.empty?
|
|
229
|
+
EMPTY_ARRAY
|
|
230
|
+
elsif transformed.all?(Hash)
|
|
231
|
+
transformed.map { |item| { sym_key => item } }
|
|
232
|
+
else
|
|
233
|
+
transformed
|
|
234
|
+
end
|
|
235
|
+
else
|
|
236
|
+
transformed
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
result
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Flatten sequence items according to Parslet semantics:
|
|
244
|
+
# 1. If ALL items are hashes, return as array (this is a repetition result)
|
|
245
|
+
# 2. If there are named captures (hashes) among strings, return ONLY the merged hash (discard strings)
|
|
246
|
+
# 3. If only strings, join them (or return single string)
|
|
247
|
+
# 4. Return single value if only one item
|
|
248
|
+
#
|
|
249
|
+
# This matches Parslet's behavior where:
|
|
250
|
+
# str('SCHEMA') >> str(' ') >> match('[a-z]').repeat(1).as(:name) >> str(';')
|
|
251
|
+
# returns: {:name => "test"} (not ["SCHEMA ", {:name=>"test"}, ";"])
|
|
252
|
+
#
|
|
253
|
+
# But for repetitions with named captures:
|
|
254
|
+
# match('[a-z]').as(:x).repeat(2)
|
|
255
|
+
# returns: [{:x => "a"}, {:x => "b"}] (array of hashes, NOT merged!)
|
|
256
|
+
#
|
|
257
|
+
# Optimized: Single-pass with direct result building
|
|
258
|
+
def self.flatten_sequence(items)
|
|
259
|
+
return EMPTY_ARRAY if items.empty? # Match Parsanol Ruby mode
|
|
260
|
+
|
|
261
|
+
# DON'T unwrap single items - let the caller handle this
|
|
262
|
+
# This preserves repetition results like [{:x => 1}]
|
|
263
|
+
return items if items.length == 1
|
|
264
|
+
|
|
265
|
+
# Single pass: categorize items
|
|
266
|
+
merged_hash = {}
|
|
267
|
+
string_parts = []
|
|
268
|
+
hash_count = 0
|
|
269
|
+
total_items = 0
|
|
270
|
+
has_non_empty_array = false
|
|
271
|
+
|
|
272
|
+
items.each do |item|
|
|
273
|
+
case item
|
|
274
|
+
when Hash
|
|
275
|
+
merged_hash.merge!(item)
|
|
276
|
+
hash_count += 1
|
|
277
|
+
total_items += 1
|
|
278
|
+
when String
|
|
279
|
+
string_parts << item
|
|
280
|
+
total_items += 1
|
|
281
|
+
when Array
|
|
282
|
+
# Check if this is a non-empty array (repetition result with content)
|
|
283
|
+
# Parslet behavior: when a sequence contains a non-empty repetition,
|
|
284
|
+
# the WHOLE sequence should be kept as array, not merged.
|
|
285
|
+
if item.empty?
|
|
286
|
+
# Empty repetition - skip (sequence semantics: merge rest)
|
|
287
|
+
else
|
|
288
|
+
# Non-empty repetition - mark that we should keep as array
|
|
289
|
+
has_non_empty_array = true
|
|
290
|
+
# Still collect items for potential array result
|
|
291
|
+
item.each do |sub_item|
|
|
292
|
+
case sub_item
|
|
293
|
+
when Hash
|
|
294
|
+
hash_count += 1
|
|
295
|
+
when String
|
|
296
|
+
string_parts << sub_item
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
total_items += 1
|
|
301
|
+
when nil
|
|
302
|
+
# Skip nil values (from lookahead or optional that didn't match)
|
|
303
|
+
else
|
|
304
|
+
total_items += 1
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# PARSLET SEQUENCE BEHAVIOR WITH REPETITIONS:
|
|
309
|
+
# If the sequence contains a non-empty repetition result (array with items),
|
|
310
|
+
# return as array instead of merging.
|
|
311
|
+
# Example: factor.as(:left) >> (op >> factor).as(:rhs).repeat
|
|
312
|
+
# With input "a+b" produces: [{left: {...}}, {rhs: {...}}]
|
|
313
|
+
# With input "a" produces: {left: {...}} (empty repetition, merge)
|
|
314
|
+
if has_non_empty_array
|
|
315
|
+
# Flatten the items: top-level hashes + array items
|
|
316
|
+
result = []
|
|
317
|
+
items.each do |item|
|
|
318
|
+
case item
|
|
319
|
+
when Hash
|
|
320
|
+
result << item
|
|
321
|
+
when Array
|
|
322
|
+
result.concat(item)
|
|
323
|
+
when String
|
|
324
|
+
# Skip unnamed strings when we have named captures
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
return result.length == 1 ? result.first : result
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# KEY INSIGHT: If ALL items are hashes, we need to determine:
|
|
331
|
+
# 1. WRAPPER PATTERN: All hashes have the SAME single key, and values are HASHES
|
|
332
|
+
# => Merge the inner hashes under that key
|
|
333
|
+
# Example: [{:syntax => {:spaces => {...}}},
|
|
334
|
+
# {:syntax => {:schemaDecl => [...]}}]
|
|
335
|
+
# Result: {:syntax => {:spaces => {...}, :schemaDecl => [...]}}
|
|
336
|
+
#
|
|
337
|
+
# 2. REPETITION PATTERN: All hashes have the SAME single key, but values are SIMPLE
|
|
338
|
+
# => Keep as array (this is a repetition result)
|
|
339
|
+
# Example: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
|
|
340
|
+
# Result: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
|
|
341
|
+
#
|
|
342
|
+
# 3. MIXED KEYS: Hashes have DIFFERENT keys
|
|
343
|
+
# => Keep as array
|
|
344
|
+
# Example: [{:a => 1}, {:b => 2}]
|
|
345
|
+
# Result: [{:a => 1}, {:b => 2}]
|
|
346
|
+
if hash_count == total_items && hash_count > 1
|
|
347
|
+
# Check if all hashes have the same single key
|
|
348
|
+
first_item = items.first
|
|
349
|
+
if first_item.is_a?(Hash) && first_item.keys.length == 1
|
|
350
|
+
wrapper_key = first_item.keys.first
|
|
351
|
+
|
|
352
|
+
# Verify all items are hashes with the same single key
|
|
353
|
+
all_same_wrapper = items.all? do |item|
|
|
354
|
+
item.is_a?(Hash) && item.keys.length == 1 && item.keys.first == wrapper_key
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
if all_same_wrapper
|
|
358
|
+
# Check if values are all hashes (wrapper pattern) or not (repetition pattern)
|
|
359
|
+
all_values_are_hashes = items.all? do |item|
|
|
360
|
+
item[wrapper_key].is_a?(Hash)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
return items unless all_values_are_hashes
|
|
364
|
+
|
|
365
|
+
# Wrapper pattern: merge the inner hashes
|
|
366
|
+
merged_inner = {}
|
|
367
|
+
items.each do |item|
|
|
368
|
+
inner_value = item[wrapper_key]
|
|
369
|
+
merged_inner.merge!(inner_value)
|
|
370
|
+
end
|
|
371
|
+
return { wrapper_key => merged_inner }
|
|
372
|
+
|
|
373
|
+
# Repetition pattern: keep as array
|
|
374
|
+
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# MIXED KEYS: Hashes have different keys
|
|
379
|
+
# Parslet sequence semantics: merge into single hash
|
|
380
|
+
return merged_hash
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# PARSLET SEQUENCE SEMANTICS:
|
|
384
|
+
# If there are named captures (hashes) mixed with other things,
|
|
385
|
+
# return ONLY the merged hash (discard unnamed strings)
|
|
386
|
+
return merged_hash unless merged_hash.empty?
|
|
387
|
+
|
|
388
|
+
# No named captures - handle strings and other items
|
|
389
|
+
if string_parts.any?
|
|
390
|
+
return string_parts.length == 1 ? string_parts.first : string_parts.join
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Only other items (arrays, etc.)
|
|
394
|
+
return EMPTY_ARRAY if total_items.zero?
|
|
395
|
+
|
|
396
|
+
items.length == 1 ? items.first : items
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Parslet/Parsanol repetition semantics:
|
|
400
|
+
# 1. Return [] for empty repetitions
|
|
401
|
+
# 2. If all items are strings, join them
|
|
402
|
+
# 3. Otherwise return array
|
|
403
|
+
def self.flatten_repetition(items)
|
|
404
|
+
return EMPTY_ARRAY if items.empty?
|
|
405
|
+
|
|
406
|
+
# Single-pass flatten and check
|
|
407
|
+
flat_items = []
|
|
408
|
+
all_strings = true
|
|
409
|
+
|
|
410
|
+
items.each do |item|
|
|
411
|
+
if item.is_a?(Array)
|
|
412
|
+
item.each do |sub|
|
|
413
|
+
flat_items << sub
|
|
414
|
+
all_strings = false unless sub.is_a?(String)
|
|
415
|
+
end
|
|
416
|
+
else
|
|
417
|
+
flat_items << item
|
|
418
|
+
all_strings = false unless item.is_a?(String)
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
return EMPTY_ARRAY if flat_items.empty?
|
|
423
|
+
|
|
424
|
+
# If all strings, join them (string-like repetition)
|
|
425
|
+
if all_strings && flat_items.all?(String)
|
|
426
|
+
flat_items.join
|
|
427
|
+
else
|
|
428
|
+
flat_items
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
private_constant :AstTransformer
|
|
434
|
+
end
|
|
435
|
+
end
|