parsanol 1.3.4 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +0 -2
- data/Rakefile +48 -48
- data/ext/parsanol_native/Cargo.toml +1 -2
- data/ext/parsanol_native/extconf.rb +4 -4
- data/lib/parsanol/ast_visitor.rb +1 -1
- data/lib/parsanol/atoms/alternative.rb +3 -2
- data/lib/parsanol/atoms/base.rb +12 -6
- data/lib/parsanol/atoms/can_flatten.rb +8 -8
- data/lib/parsanol/atoms/context.rb +23 -16
- data/lib/parsanol/atoms/custom.rb +2 -2
- data/lib/parsanol/atoms/dynamic.rb +1 -1
- data/lib/parsanol/atoms/infix.rb +10 -5
- data/lib/parsanol/atoms/lookahead.rb +7 -4
- data/lib/parsanol/atoms/re.rb +1 -1
- data/lib/parsanol/atoms/repetition.rb +29 -11
- data/lib/parsanol/atoms/sequence.rb +3 -2
- data/lib/parsanol/atoms/str.rb +9 -3
- data/lib/parsanol/atoms.rb +20 -20
- data/lib/parsanol/builder_callbacks.rb +2 -2
- data/lib/parsanol/cause.rb +2 -2
- data/lib/parsanol/context.rb +2 -2
- data/lib/parsanol/error_reporter.rb +5 -5
- data/lib/parsanol/expression/treetop.rb +17 -17
- data/lib/parsanol/expression.rb +1 -1
- data/lib/parsanol/fast_mode.rb +50 -12
- data/lib/parsanol/first_set.rb +1 -1
- data/lib/parsanol/grammar_builder.rb +10 -8
- data/lib/parsanol/incremental_parser.rb +13 -8
- data/lib/parsanol/interval_tree.rb +12 -3
- data/lib/parsanol/lazy_result.rb +2 -2
- data/lib/parsanol/mermaid.rb +12 -9
- data/lib/parsanol/native/batch_decoder.rb +13 -9
- data/lib/parsanol/native/dynamic.rb +7 -6
- data/lib/parsanol/native/parser.rb +12 -4
- data/lib/parsanol/native/serializer.rb +42 -42
- data/lib/parsanol/native/transformer.rb +55 -28
- data/lib/parsanol/native/types.rb +3 -3
- data/lib/parsanol/native.rb +60 -21
- data/lib/parsanol/optimizer.rb +6 -6
- data/lib/parsanol/optimizers/choice_optimizer.rb +1 -1
- data/lib/parsanol/optimizers/cut_inserter.rb +5 -2
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +9 -3
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +5 -5
- data/lib/parsanol/optimizers/sequence_optimizer.rb +1 -1
- data/lib/parsanol/options/zero_copy.rb +1 -1
- data/lib/parsanol/options.rb +1 -1
- data/lib/parsanol/parallel.rb +8 -13
- data/lib/parsanol/parser.rb +51 -13
- data/lib/parsanol/parslet.rb +7 -7
- data/lib/parsanol/pattern/binding.rb +1 -1
- data/lib/parsanol/pattern.rb +4 -1
- data/lib/parsanol/pool.rb +3 -3
- data/lib/parsanol/pools/buffer_pool.rb +2 -2
- data/lib/parsanol/pools/position_pool.rb +2 -2
- data/lib/parsanol/position.rb +1 -1
- data/lib/parsanol/result_builder.rb +4 -4
- data/lib/parsanol/result_stream.rb +10 -5
- data/lib/parsanol/slice.rb +11 -8
- data/lib/parsanol/source.rb +14 -9
- data/lib/parsanol/source_location.rb +1 -1
- data/lib/parsanol/streaming_parser.rb +3 -3
- data/lib/parsanol/string_view.rb +4 -1
- data/lib/parsanol/transform.rb +2 -2
- data/lib/parsanol/version.rb +1 -1
- data/lib/parsanol/wasm_parser.rb +1 -1
- data/lib/parsanol.rb +37 -39
- data/parsanol.gemspec +30 -30
- metadata +1 -1
|
@@ -18,9 +18,9 @@ module Parsanol
|
|
|
18
18
|
#
|
|
19
19
|
class AstTransformer
|
|
20
20
|
# Frozen string constants for tag comparisons (avoid allocations)
|
|
21
|
-
SEQUENCE_TAG =
|
|
22
|
-
REPETITION_TAG =
|
|
23
|
-
EMPTY_STRING =
|
|
21
|
+
SEQUENCE_TAG = ":sequence"
|
|
22
|
+
REPETITION_TAG = ":repetition"
|
|
23
|
+
EMPTY_STRING = ""
|
|
24
24
|
EMPTY_ARRAY = [].freeze
|
|
25
25
|
EMPTY_HASH = {}.freeze
|
|
26
26
|
|
|
@@ -62,7 +62,7 @@ module Parsanol
|
|
|
62
62
|
# Check if this is a tagged array from native parser
|
|
63
63
|
# Native parser produces Symbol tags: [:sequence, item1, item2, ...]
|
|
64
64
|
first = arr.first
|
|
65
|
-
if
|
|
65
|
+
if [SEQUENCE_SYM, SEQUENCE_TAG].include?(first)
|
|
66
66
|
# Optimized: transform items starting from index 1
|
|
67
67
|
# Avoid creating arr[1..] slice
|
|
68
68
|
len = arr.length
|
|
@@ -75,7 +75,7 @@ module Parsanol
|
|
|
75
75
|
i += 1
|
|
76
76
|
end
|
|
77
77
|
flatten_sequence(items)
|
|
78
|
-
elsif
|
|
78
|
+
elsif [REPETITION_SYM, REPETITION_TAG].include?(first)
|
|
79
79
|
# Optimized: transform items starting from index 1
|
|
80
80
|
len = arr.length
|
|
81
81
|
return EMPTY_ARRAY if len == 1
|
|
@@ -87,7 +87,7 @@ module Parsanol
|
|
|
87
87
|
i += 1
|
|
88
88
|
end
|
|
89
89
|
flatten_repetition(items)
|
|
90
|
-
elsif first.is_a?(Symbol) || (first.is_a?(String) && first.start_with?(
|
|
90
|
+
elsif first.is_a?(Symbol) || (first.is_a?(String) && first.start_with?(":"))
|
|
91
91
|
# Other tagged arrays - pass through
|
|
92
92
|
arr.map { |item| transform(item) }
|
|
93
93
|
else
|
|
@@ -119,13 +119,15 @@ module Parsanol
|
|
|
119
119
|
|
|
120
120
|
# Check if value is a tagged repetition from native parser
|
|
121
121
|
is_tagged_repetition = value.is_a?(Array) && !value.empty? &&
|
|
122
|
-
|
|
122
|
+
value.first.is_a?(String) && value.first == REPETITION_TAG
|
|
123
123
|
|
|
124
124
|
# Check RAW value for repetition pattern BEFORE transformation
|
|
125
125
|
# Array with items that all have the parent key
|
|
126
126
|
# e.g., [{x: 1}, {x: 2}] where parent key is :x
|
|
127
127
|
is_raw_array_repetition = value.is_a?(Array) && !value.empty? &&
|
|
128
|
-
|
|
128
|
+
value.all? do |item|
|
|
129
|
+
item.is_a?(Hash) && item.keys.length == 1 && item.key?(key)
|
|
130
|
+
end
|
|
129
131
|
|
|
130
132
|
# Empty array from native parser is a repetition result (not a sequence)
|
|
131
133
|
# Sequences produce arrays of arrays like [[], []], not empty arrays
|
|
@@ -134,12 +136,15 @@ module Parsanol
|
|
|
134
136
|
# Special handling for arrays that look like character repetitions
|
|
135
137
|
# (arrays of single-character Slices/strings should be joined)
|
|
136
138
|
if transformed.is_a?(Array) && !transformed.empty? &&
|
|
137
|
-
|
|
139
|
+
transformed.all? do |item|
|
|
140
|
+
slice_or_string?(item) && item_length(item) == 1
|
|
141
|
+
end
|
|
138
142
|
# Join preserving position from first Slice
|
|
139
143
|
first_slice = transformed.find { |i| i.is_a?(::Parsanol::Slice) }
|
|
140
144
|
content = transformed.map { |i| slice_content(i) }.join
|
|
141
145
|
transformed = if first_slice
|
|
142
|
-
::Parsanol::Slice.new(first_slice.offset, content,
|
|
146
|
+
::Parsanol::Slice.new(first_slice.offset, content,
|
|
147
|
+
first_slice.input)
|
|
143
148
|
else
|
|
144
149
|
content
|
|
145
150
|
end
|
|
@@ -148,9 +153,9 @@ module Parsanol
|
|
|
148
153
|
# Check for UNTAGGED repetition pattern (native output):
|
|
149
154
|
# If array items all have the same key as parent, it's a repetition
|
|
150
155
|
is_transformed_repetition = transformed.is_a?(Array) && !transformed.empty? &&
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
156
|
+
transformed.all? do |item|
|
|
157
|
+
item.is_a?(Hash) && item.keys.length == 1 && item.key?(sym_key)
|
|
158
|
+
end
|
|
154
159
|
|
|
155
160
|
is_repetition = is_tagged_repetition || is_raw_array_repetition || is_transformed_repetition || is_empty_repetition
|
|
156
161
|
|
|
@@ -184,7 +189,9 @@ module Parsanol
|
|
|
184
189
|
if transformed.empty?
|
|
185
190
|
{ sym_key => EMPTY_ARRAY }
|
|
186
191
|
# Check if items already have the same key (avoid double-wrapping)
|
|
187
|
-
elsif transformed.all?
|
|
192
|
+
elsif transformed.all? do |item|
|
|
193
|
+
item.is_a?(Hash) && item.key?(sym_key)
|
|
194
|
+
end
|
|
188
195
|
{ sym_key => transformed }
|
|
189
196
|
else
|
|
190
197
|
# Wrap each item with the name
|
|
@@ -207,7 +214,9 @@ module Parsanol
|
|
|
207
214
|
# We can't tell from the value alone, so we return empty Slice (sequence semantics)
|
|
208
215
|
# The repetition detection in transform_single_key_hash will handle the other case
|
|
209
216
|
{ sym_key => ::Parsanol::Slice.new(0, EMPTY_STRING, nil) }
|
|
210
|
-
elsif transformed.all?
|
|
217
|
+
elsif transformed.all? do |v|
|
|
218
|
+
v.is_a?(Hash) && v.keys.length == 1 && v.key?(sym_key)
|
|
219
|
+
end
|
|
211
220
|
# Items already have the parent key (repetition pattern) - keep as-is
|
|
212
221
|
{ sym_key => transformed }
|
|
213
222
|
elsif transformed.all?(Hash)
|
|
@@ -229,13 +238,15 @@ module Parsanol
|
|
|
229
238
|
sym_key = cached_symbol(key)
|
|
230
239
|
|
|
231
240
|
is_repetition = value.is_a?(Array) && !value.empty? &&
|
|
232
|
-
|
|
241
|
+
value.first.is_a?(String) && value.first == REPETITION_TAG
|
|
233
242
|
|
|
234
243
|
transformed = transform(value)
|
|
235
244
|
|
|
236
245
|
result[sym_key] = if is_repetition
|
|
237
246
|
if transformed.is_a?(Array)
|
|
238
|
-
if transformed.all?
|
|
247
|
+
if transformed.all? do |item|
|
|
248
|
+
item.is_a?(Hash) && item.key?(sym_key)
|
|
249
|
+
end
|
|
239
250
|
transformed
|
|
240
251
|
else
|
|
241
252
|
transformed.map { |item| { sym_key => item } }
|
|
@@ -308,7 +319,7 @@ module Parsanol
|
|
|
308
319
|
else
|
|
309
320
|
# Check if array contains only hashes (repetition wrapper pattern)
|
|
310
321
|
# In this case, merge the inner hashes into merged_hash
|
|
311
|
-
non_hash_items = item.
|
|
322
|
+
non_hash_items = item.grep_v(Hash)
|
|
312
323
|
all_items_are_hashes = non_hash_items.empty?
|
|
313
324
|
|
|
314
325
|
if all_items_are_hashes
|
|
@@ -399,12 +410,16 @@ module Parsanol
|
|
|
399
410
|
# REPETITION pattern (same keys like entity_decl): keep as array
|
|
400
411
|
# WRAPPER pattern (different keys like spaces vs schemaDecl): merge
|
|
401
412
|
first_inner_keys = items.first[wrapper_key].keys.to_set
|
|
402
|
-
|
|
413
|
+
items.all? do |item|
|
|
414
|
+
item[wrapper_key].keys.to_set == first_inner_keys
|
|
415
|
+
end
|
|
403
416
|
|
|
404
417
|
# Check if items have single keys or multiple keys
|
|
405
418
|
# - Single key items with repeated outer key = true repetition (keep array)
|
|
406
419
|
# - Multiple key items with repeated outer key = duplicate labels in sequence (merge)
|
|
407
|
-
max_keys_per_item = items.map
|
|
420
|
+
max_keys_per_item = items.map do |item|
|
|
421
|
+
item.is_a?(Hash) ? item.keys.length : 0
|
|
422
|
+
end.max || 0
|
|
408
423
|
|
|
409
424
|
# Check if inner values are hashes with different keys
|
|
410
425
|
# This distinguishes:
|
|
@@ -412,9 +427,13 @@ module Parsanol
|
|
|
412
427
|
# - Duplicate labels: [{group: {char: 'a'}}, {group: {digit: '5'}}] - inner is hash with different keys
|
|
413
428
|
inner_keys_all_same = true
|
|
414
429
|
first_inner_keys = nil
|
|
415
|
-
if items.all?
|
|
430
|
+
if items.all? do |item|
|
|
431
|
+
item.is_a?(Hash) && item[wrapper_key].is_a?(Hash)
|
|
432
|
+
end
|
|
416
433
|
first_inner_keys = items.first[wrapper_key].keys.to_set
|
|
417
|
-
inner_keys_all_same = items.all?
|
|
434
|
+
inner_keys_all_same = items.all? do |item|
|
|
435
|
+
item[wrapper_key].keys.to_set == first_inner_keys
|
|
436
|
+
end
|
|
418
437
|
end
|
|
419
438
|
|
|
420
439
|
# DUPLICATE LABELS IN SEQUENCE: multiple keys per item with repeated outer key
|
|
@@ -426,7 +445,9 @@ module Parsanol
|
|
|
426
445
|
|
|
427
446
|
# Check if inner hashes have the same keys or different keys
|
|
428
447
|
first_inner_keys ||= items.first[wrapper_key].keys.to_set
|
|
429
|
-
all_same_keys = items.all?
|
|
448
|
+
all_same_keys = items.all? do |item|
|
|
449
|
+
item[wrapper_key].keys.to_set == first_inner_keys
|
|
450
|
+
end
|
|
430
451
|
|
|
431
452
|
if has_duplicate_labels
|
|
432
453
|
# DUPLICATE LABELS PATTERN: items have multiple keys with repeated outer key
|
|
@@ -436,7 +457,7 @@ module Parsanol
|
|
|
436
457
|
merged = {}
|
|
437
458
|
items.each do |item|
|
|
438
459
|
item.each do |k, v|
|
|
439
|
-
merged[k] = v
|
|
460
|
+
merged[k] = v # Last value wins
|
|
440
461
|
end
|
|
441
462
|
end
|
|
442
463
|
# Return only the wrapper key with its last value
|
|
@@ -475,11 +496,14 @@ module Parsanol
|
|
|
475
496
|
if slice_or_string_parts.any?
|
|
476
497
|
# Join Slices/strings, preserving position from first Slice
|
|
477
498
|
first_slice = slice_or_string_parts.find { |i| i.is_a?(::Parsanol::Slice) }
|
|
478
|
-
content = slice_or_string_parts.map
|
|
499
|
+
content = slice_or_string_parts.map do |i|
|
|
500
|
+
i.is_a?(::Parsanol::Slice) ? i.content : i
|
|
501
|
+
end.join
|
|
479
502
|
|
|
480
503
|
if first_slice
|
|
481
504
|
# Create new Slice with combined content, preserving position from first
|
|
482
|
-
return ::Parsanol::Slice.new(first_slice.offset, content,
|
|
505
|
+
return ::Parsanol::Slice.new(first_slice.offset, content,
|
|
506
|
+
first_slice.input)
|
|
483
507
|
else
|
|
484
508
|
# All plain strings (shouldn't happen with new decode_flat, but handle it)
|
|
485
509
|
return slice_or_string_parts.length == 1 ? slice_or_string_parts.first : content
|
|
@@ -520,11 +544,14 @@ module Parsanol
|
|
|
520
544
|
# If all Slices or strings, join them preserving position from first Slice
|
|
521
545
|
if all_slices_or_strings
|
|
522
546
|
first_slice = flat_items.find { |i| i.is_a?(::Parsanol::Slice) }
|
|
523
|
-
content = flat_items.map
|
|
547
|
+
content = flat_items.map do |i|
|
|
548
|
+
i.is_a?(::Parsanol::Slice) ? i.content : i
|
|
549
|
+
end.join
|
|
524
550
|
|
|
525
551
|
if first_slice
|
|
526
552
|
# Create new Slice with combined content, preserving position from first
|
|
527
|
-
::Parsanol::Slice.new(first_slice.offset, content,
|
|
553
|
+
::Parsanol::Slice.new(first_slice.offset, content,
|
|
554
|
+
first_slice.input)
|
|
528
555
|
else
|
|
529
556
|
# All plain strings (shouldn't happen with new decode_flat, but handle it)
|
|
530
557
|
content
|
|
@@ -19,9 +19,9 @@ module Parsanol
|
|
|
19
19
|
TAG_INLINE_STRING = 0x0A
|
|
20
20
|
|
|
21
21
|
# Frozen string constants for transformer (avoid allocations)
|
|
22
|
-
SEQUENCE_TAG =
|
|
23
|
-
REPETITION_TAG =
|
|
24
|
-
EMPTY_STRING =
|
|
22
|
+
SEQUENCE_TAG = ":sequence"
|
|
23
|
+
REPETITION_TAG = ":repetition"
|
|
24
|
+
EMPTY_STRING = ""
|
|
25
25
|
EMPTY_ARRAY = [].freeze
|
|
26
26
|
EMPTY_HASH = {}.freeze
|
|
27
27
|
end
|
data/lib/parsanol/native.rb
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
4
|
-
require
|
|
3
|
+
require "json"
|
|
4
|
+
require "digest"
|
|
5
5
|
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
-
require
|
|
9
|
-
require
|
|
6
|
+
require "parsanol/native/types"
|
|
7
|
+
require "parsanol/native/parser"
|
|
8
|
+
require "parsanol/native/serializer"
|
|
9
|
+
require "parsanol/native/batch_decoder"
|
|
10
10
|
|
|
11
11
|
module Parsanol
|
|
12
12
|
module Native
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = "0.1.0"
|
|
14
14
|
|
|
15
15
|
class << self
|
|
16
16
|
# Check if native extension is available
|
|
@@ -53,7 +53,8 @@ module Parsanol
|
|
|
53
53
|
# The batch format doesn't preserve :repetition/:sequence tags, so we use
|
|
54
54
|
# the direct FFI path. Apply the Ruby transformer to handle tags correctly.
|
|
55
55
|
raw_ast = _parse_raw(grammar_json, input)
|
|
56
|
-
BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice,
|
|
56
|
+
BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice,
|
|
57
|
+
grammar_atom)
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
# Parse and return RAW AST without transformation.
|
|
@@ -76,11 +77,11 @@ module Parsanol
|
|
|
76
77
|
raise LoadError, "Native parser not available" unless available?
|
|
77
78
|
|
|
78
79
|
# Handle both grammar atoms and pre-serialized JSON strings
|
|
79
|
-
if grammar.is_a?(String)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
grammar_json = if grammar.is_a?(String)
|
|
81
|
+
grammar
|
|
82
|
+
else
|
|
83
|
+
Parser.serialize_grammar(grammar)
|
|
84
|
+
end
|
|
84
85
|
|
|
85
86
|
# Use batch_raw format for raw AST (no transformation)
|
|
86
87
|
slice_class = Parsanol::Slice
|
|
@@ -112,11 +113,6 @@ module Parsanol
|
|
|
112
113
|
BatchDecoder.decode(batch_data, input, slice_class)
|
|
113
114
|
end
|
|
114
115
|
|
|
115
|
-
# Get the Slice class
|
|
116
|
-
private def get_slice_class
|
|
117
|
-
Parsanol::Slice
|
|
118
|
-
end
|
|
119
|
-
|
|
120
116
|
# Serialize a Ruby grammar to JSON (cached).
|
|
121
117
|
#
|
|
122
118
|
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
@@ -128,11 +124,54 @@ module Parsanol
|
|
|
128
124
|
# Clear grammar caches (call if grammar changes)
|
|
129
125
|
def clear_cache
|
|
130
126
|
Parser.clear_cache
|
|
127
|
+
clear_grammar_cache if available?
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Clear the Rust grammar cache to free memory.
|
|
131
|
+
#
|
|
132
|
+
# This is useful for batch processing scenarios where you want to
|
|
133
|
+
# limit memory usage by clearing unused grammars.
|
|
134
|
+
#
|
|
135
|
+
# @return [nil]
|
|
136
|
+
def clear_grammar_cache
|
|
137
|
+
raise LoadError, "Native parser not available" unless available?
|
|
138
|
+
|
|
139
|
+
_clear_grammar_cache
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Get the current number of cached grammars in Rust.
|
|
143
|
+
#
|
|
144
|
+
# @return [Integer] Number of cached grammars
|
|
145
|
+
def grammar_cache_size
|
|
146
|
+
raise LoadError, "Native parser not available" unless available?
|
|
147
|
+
|
|
148
|
+
_grammar_cache_size
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Get the grammar cache capacity.
|
|
152
|
+
#
|
|
153
|
+
# @return [Integer] Maximum cache capacity
|
|
154
|
+
def grammar_cache_capacity
|
|
155
|
+
raise LoadError, "Native parser not available" unless available?
|
|
156
|
+
|
|
157
|
+
_grammar_cache_capacity
|
|
131
158
|
end
|
|
132
159
|
|
|
133
160
|
# Get cache statistics
|
|
134
161
|
def cache_stats
|
|
135
|
-
Parser.cache_stats
|
|
162
|
+
stats = Parser.cache_stats
|
|
163
|
+
if available?
|
|
164
|
+
stats[:rust_grammar_cache_size] = grammar_cache_size
|
|
165
|
+
stats[:rust_grammar_cache_capacity] = grammar_cache_capacity
|
|
166
|
+
end
|
|
167
|
+
stats
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
private
|
|
171
|
+
|
|
172
|
+
# Get the Slice class
|
|
173
|
+
def get_slice_class
|
|
174
|
+
Parsanol::Slice
|
|
136
175
|
end
|
|
137
176
|
end
|
|
138
177
|
end
|
|
@@ -140,11 +179,11 @@ end
|
|
|
140
179
|
|
|
141
180
|
# Attempt to load native extension
|
|
142
181
|
begin
|
|
143
|
-
ruby_version = RUBY_VERSION.split(
|
|
182
|
+
ruby_version = RUBY_VERSION.split(".").take(2).join(".")
|
|
144
183
|
require "parsanol/#{ruby_version}/parsanol_native"
|
|
145
184
|
rescue LoadError
|
|
146
185
|
begin
|
|
147
|
-
require
|
|
186
|
+
require "parsanol/parsanol_native"
|
|
148
187
|
rescue LoadError
|
|
149
188
|
# Native extension not built yet
|
|
150
189
|
end
|
data/lib/parsanol/optimizer.rb
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
4
|
-
require_relative
|
|
5
|
-
require_relative
|
|
6
|
-
require_relative
|
|
7
|
-
require_relative
|
|
8
|
-
require_relative
|
|
3
|
+
require_relative "ast_visitor"
|
|
4
|
+
require_relative "optimizers/quantifier_optimizer"
|
|
5
|
+
require_relative "optimizers/sequence_optimizer"
|
|
6
|
+
require_relative "optimizers/choice_optimizer"
|
|
7
|
+
require_relative "optimizers/lookahead_optimizer"
|
|
8
|
+
require_relative "optimizers/cut_inserter"
|
|
9
9
|
|
|
10
10
|
# Grammar-level optimizations for Parslet parsers
|
|
11
11
|
# These optimizations transform the parser AST to reduce runtime overhead
|
|
@@ -84,7 +84,7 @@ module Parsanol
|
|
|
84
84
|
Parsanol::Atoms::Repetition.new(
|
|
85
85
|
optimized_parslet,
|
|
86
86
|
rep.min,
|
|
87
|
-
rep.max
|
|
87
|
+
rep.max,
|
|
88
88
|
)
|
|
89
89
|
end
|
|
90
90
|
|
|
@@ -101,7 +101,10 @@ module Parsanol
|
|
|
101
101
|
# For sequences, find the longest safe prefix
|
|
102
102
|
if parslet.is_a?(Parsanol::Atoms::Sequence)
|
|
103
103
|
prefix_parslets = find_deterministic_prefix(parslet)
|
|
104
|
-
|
|
104
|
+
if prefix_parslets && !prefix_parslets.empty?
|
|
105
|
+
return build_cut_sequence(parslet,
|
|
106
|
+
prefix_parslets)
|
|
107
|
+
end
|
|
105
108
|
end
|
|
106
109
|
|
|
107
110
|
# For other atoms, cut the whole thing if safe
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
3
|
+
require_relative "../ast_visitor"
|
|
4
4
|
|
|
5
5
|
module Parsanol
|
|
6
6
|
module Optimizers
|
|
@@ -26,13 +26,19 @@ module Parsanol
|
|
|
26
26
|
inner_positive = inner.positive
|
|
27
27
|
|
|
28
28
|
# !(!x) => &x (double negation)
|
|
29
|
-
|
|
29
|
+
if !outer_positive && !inner_positive
|
|
30
|
+
return Parsanol::Atoms::Lookahead.new(inner.bound_parslet,
|
|
31
|
+
true)
|
|
32
|
+
end
|
|
30
33
|
|
|
31
34
|
# &(&x) => &x (idempotent)
|
|
32
35
|
return inner if outer_positive && inner_positive
|
|
33
36
|
|
|
34
37
|
# !(&x) => !x (negative of positive)
|
|
35
|
-
|
|
38
|
+
if !outer_positive && inner_positive
|
|
39
|
+
return Parsanol::Atoms::Lookahead.new(inner.bound_parslet,
|
|
40
|
+
false)
|
|
41
|
+
end
|
|
36
42
|
|
|
37
43
|
# &(!x) => !x (positive of negative)
|
|
38
44
|
return inner if outer_positive && !inner_positive
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
3
|
+
require_relative "../ast_visitor"
|
|
4
4
|
|
|
5
5
|
module Parsanol
|
|
6
6
|
module Optimizers
|
|
@@ -26,19 +26,19 @@ module Parsanol
|
|
|
26
26
|
if inner.is_a?(Parsanol::Atoms::Repetition)
|
|
27
27
|
# repeat(0,1).repeat(0,1) => repeat(0,1) (idempotent)
|
|
28
28
|
if parslet.min.zero? && parslet.max == 1 &&
|
|
29
|
-
|
|
29
|
+
inner.min.zero? && inner.max == 1
|
|
30
30
|
return inner
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
# repeat(n,n).repeat(m,m) => repeat(n*m,n*m) for exact counts
|
|
34
34
|
if parslet.min == parslet.max && inner.min == inner.max &&
|
|
35
|
-
|
|
35
|
+
parslet.max && inner.max
|
|
36
36
|
new_count = parslet.min * inner.min
|
|
37
37
|
return Parsanol::Atoms::Repetition.new(
|
|
38
38
|
inner.parslet,
|
|
39
39
|
new_count,
|
|
40
40
|
new_count,
|
|
41
|
-
parslet.result_tag
|
|
41
|
+
parslet.result_tag,
|
|
42
42
|
)
|
|
43
43
|
end
|
|
44
44
|
end
|
|
@@ -51,7 +51,7 @@ module Parsanol
|
|
|
51
51
|
inner,
|
|
52
52
|
parslet.min,
|
|
53
53
|
parslet.max,
|
|
54
|
-
parslet.result_tag
|
|
54
|
+
parslet.result_tag,
|
|
55
55
|
)
|
|
56
56
|
end
|
|
57
57
|
end
|
|
@@ -119,7 +119,7 @@ module Parsanol
|
|
|
119
119
|
# @param type_map [Hash] Override type mapping for this parse
|
|
120
120
|
# @return [Object] Direct Ruby object
|
|
121
121
|
def parse_with_types(input, type_map)
|
|
122
|
-
raise LoadError,
|
|
122
|
+
raise LoadError, "ZeroCopy mode requires native extension." unless Parsanol::Native.available?
|
|
123
123
|
|
|
124
124
|
grammar_json = Parsanol::Native.serialize_grammar(root)
|
|
125
125
|
Parsanol::Native.parse_to_objects(grammar_json, input, type_map)
|
data/lib/parsanol/options.rb
CHANGED
data/lib/parsanol/parallel.rb
CHANGED
|
@@ -83,17 +83,11 @@ module Parsanol
|
|
|
83
83
|
"Run `rake compile` to build the extension."
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
num_threads: config.num_threads
|
|
92
|
-
)
|
|
93
|
-
else
|
|
94
|
-
# Fallback to sequential if parallel not available
|
|
95
|
-
inputs.map { |input| Parsanol::Native.parse(grammar_json, input) }
|
|
96
|
-
end
|
|
86
|
+
Parsanol::Native.parse_batch_parallel(
|
|
87
|
+
grammar_json,
|
|
88
|
+
inputs,
|
|
89
|
+
num_threads: config.num_threads,
|
|
90
|
+
)
|
|
97
91
|
end
|
|
98
92
|
|
|
99
93
|
# Parse multiple inputs in parallel with transformation.
|
|
@@ -104,7 +98,8 @@ module Parsanol
|
|
|
104
98
|
# @param config [Config] Parallel configuration (optional)
|
|
105
99
|
# @return [Array<Object>] Array of transformed results
|
|
106
100
|
#
|
|
107
|
-
def parse_batch_with_transform(grammar_json, inputs, transform,
|
|
101
|
+
def parse_batch_with_transform(grammar_json, inputs, transform,
|
|
102
|
+
config: Config.new)
|
|
108
103
|
results = parse_batch(grammar_json, inputs, config: config)
|
|
109
104
|
results.map { |result| transform.apply(result) }
|
|
110
105
|
end
|
|
@@ -113,7 +108,7 @@ module Parsanol
|
|
|
113
108
|
#
|
|
114
109
|
# @return [Integer] Number of available cores
|
|
115
110
|
def available_cores
|
|
116
|
-
require
|
|
111
|
+
require "etc"
|
|
117
112
|
Etc.nprocessors
|
|
118
113
|
rescue StandardError
|
|
119
114
|
1
|
data/lib/parsanol/parser.rb
CHANGED
|
@@ -109,13 +109,24 @@ module Parsanol
|
|
|
109
109
|
# result[:name].to_s # => "hello"
|
|
110
110
|
#
|
|
111
111
|
def parse(input, mode_or_opts = {}, **kwargs)
|
|
112
|
-
if mode_or_opts.is_a?(Hash)
|
|
112
|
+
if mode_or_opts.is_a?(Hash) && !kwargs.key?(:mode)
|
|
113
113
|
# Legacy API: parse(input, options={})
|
|
114
114
|
merged = mode_or_opts.merge(kwargs)
|
|
115
115
|
super(input, merged)
|
|
116
116
|
else
|
|
117
117
|
# New API: parse(input, mode:, **options)
|
|
118
|
-
|
|
118
|
+
mode = kwargs.delete(:mode) || :ruby
|
|
119
|
+
case mode
|
|
120
|
+
when :ruby
|
|
121
|
+
super(input, kwargs)
|
|
122
|
+
when :native
|
|
123
|
+
parse_native(input, kwargs)
|
|
124
|
+
when :json
|
|
125
|
+
parse_json(input, kwargs)
|
|
126
|
+
else
|
|
127
|
+
raise ArgumentError,
|
|
128
|
+
"Unknown mode: #{mode}. Valid modes: :ruby, :native, :json"
|
|
129
|
+
end
|
|
119
130
|
end
|
|
120
131
|
end
|
|
121
132
|
|
|
@@ -130,6 +141,38 @@ module Parsanol
|
|
|
130
141
|
inputs.map { |str| parse(str, mode: mode, **opts) }
|
|
131
142
|
end
|
|
132
143
|
|
|
144
|
+
# Clear the Rust grammar cache to free memory.
|
|
145
|
+
#
|
|
146
|
+
# @return [nil]
|
|
147
|
+
# @raise [LoadError] if native parser is not available
|
|
148
|
+
def clear_grammar_cache
|
|
149
|
+
Parsanol::Native.clear_grammar_cache
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Get the current number of cached grammars in Rust.
|
|
153
|
+
#
|
|
154
|
+
# @return [Integer] number of cached grammars
|
|
155
|
+
# @raise [LoadError] if native parser is not available
|
|
156
|
+
def grammar_cache_size
|
|
157
|
+
Parsanol::Native.grammar_cache_size
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Get the grammar cache capacity.
|
|
161
|
+
#
|
|
162
|
+
# @return [Integer] maximum cache capacity
|
|
163
|
+
# @raise [LoadError] if native parser is not available
|
|
164
|
+
def grammar_cache_capacity
|
|
165
|
+
Parsanol::Native.grammar_cache_capacity
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Get cache statistics for both Ruby and Rust caches.
|
|
169
|
+
#
|
|
170
|
+
# @return [Hash] cache statistics including Ruby GRAMMAR_CACHE and Rust grammar cache
|
|
171
|
+
# @raise [LoadError] if native parser is not available for Rust stats
|
|
172
|
+
def cache_stats
|
|
173
|
+
Parsanol::Native.cache_stats
|
|
174
|
+
end
|
|
175
|
+
|
|
133
176
|
private
|
|
134
177
|
|
|
135
178
|
# Dispatches to the appropriate parsing backend based on mode.
|
|
@@ -142,23 +185,19 @@ module Parsanol
|
|
|
142
185
|
def dispatch_parse(mode, input, opts)
|
|
143
186
|
case mode
|
|
144
187
|
when :ruby
|
|
145
|
-
|
|
188
|
+
# Call base class parse directly (send needed since parse is defined in parent)
|
|
189
|
+
Parsanol::Atoms::Base.instance_method(:parse).bind_call(self, input,
|
|
190
|
+
opts)
|
|
146
191
|
when :native
|
|
147
192
|
parse_native(input, opts)
|
|
148
193
|
when :json
|
|
149
194
|
parse_json(input, opts)
|
|
150
195
|
else
|
|
151
|
-
raise ArgumentError,
|
|
196
|
+
raise ArgumentError,
|
|
197
|
+
"Unknown mode: #{mode}. Valid modes: :ruby, :native, :json"
|
|
152
198
|
end
|
|
153
199
|
end
|
|
154
200
|
|
|
155
|
-
# Pure Ruby parsing (delegates to Base implementation).
|
|
156
|
-
#
|
|
157
|
-
# @param input [String] input to parse
|
|
158
|
-
# @param opts [Hash] parsing options
|
|
159
|
-
# @return [Object] parse result
|
|
160
|
-
#
|
|
161
|
-
|
|
162
201
|
# Native extension parsing with Ruby fallback.
|
|
163
202
|
# Returns results with Slice objects containing position info.
|
|
164
203
|
#
|
|
@@ -168,10 +207,9 @@ module Parsanol
|
|
|
168
207
|
#
|
|
169
208
|
def parse_native(input, opts)
|
|
170
209
|
if Parsanol::Native.available?
|
|
171
|
-
# Parse with lazy line/column support
|
|
172
210
|
Parsanol::Native.parse(root, input)
|
|
173
211
|
else
|
|
174
|
-
|
|
212
|
+
super
|
|
175
213
|
end
|
|
176
214
|
end
|
|
177
215
|
|