parsanol 1.2.2-aarch64-linux → 1.3.2-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.txt +33 -3
- data/README.adoc +103 -9
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/native/batch_decoder.rb +252 -0
- data/lib/parsanol/native/parser.rb +28 -574
- data/lib/parsanol/native/transformer.rb +125 -58
- data/lib/parsanol/native.rb +107 -183
- data/lib/parsanol/parser.rb +2 -6
- data/lib/parsanol/slice.rb +51 -105
- data/lib/parsanol/version.rb +1 -1
- metadata +3 -2
|
@@ -52,40 +52,44 @@ module Parsanol
|
|
|
52
52
|
@@symbol_cache[key] ||= key.to_sym
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
# Symbol tags from native parser
|
|
56
|
+
SEQUENCE_SYM = :sequence
|
|
57
|
+
REPETITION_SYM = :repetition
|
|
58
|
+
|
|
55
59
|
def self.transform_array(arr)
|
|
56
60
|
return EMPTY_ARRAY if arr.empty? # Match Parsanol Ruby mode behavior
|
|
57
61
|
|
|
58
62
|
# Check if this is a tagged array from native parser
|
|
63
|
+
# Native parser produces Symbol tags: [:sequence, item1, item2, ...]
|
|
59
64
|
first = arr.first
|
|
60
|
-
if first
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
i += 1
|
|
84
|
-
end
|
|
85
|
-
flatten_repetition(items)
|
|
86
|
-
else
|
|
87
|
-
arr.map { |item| transform(item) }
|
|
65
|
+
if first == SEQUENCE_SYM || first == SEQUENCE_TAG
|
|
66
|
+
# Optimized: transform items starting from index 1
|
|
67
|
+
# Avoid creating arr[1..] slice
|
|
68
|
+
len = arr.length
|
|
69
|
+
return EMPTY_ARRAY if len == 1
|
|
70
|
+
|
|
71
|
+
items = Array.new(len - 1)
|
|
72
|
+
i = 0
|
|
73
|
+
while i < len - 1
|
|
74
|
+
items[i] = transform(arr[i + 1])
|
|
75
|
+
i += 1
|
|
76
|
+
end
|
|
77
|
+
flatten_sequence(items)
|
|
78
|
+
elsif first == REPETITION_SYM || first == REPETITION_TAG
|
|
79
|
+
# Optimized: transform items starting from index 1
|
|
80
|
+
len = arr.length
|
|
81
|
+
return EMPTY_ARRAY if len == 1
|
|
82
|
+
|
|
83
|
+
items = Array.new(len - 1)
|
|
84
|
+
i = 0
|
|
85
|
+
while i < len - 1
|
|
86
|
+
items[i] = transform(arr[i + 1])
|
|
87
|
+
i += 1
|
|
88
88
|
end
|
|
89
|
+
flatten_repetition(items)
|
|
90
|
+
elsif first.is_a?(Symbol) || (first.is_a?(String) && first.start_with?(':'))
|
|
91
|
+
# Other tagged arrays - pass through
|
|
92
|
+
arr.map { |item| transform(item) }
|
|
89
93
|
else
|
|
90
94
|
# Untagged arrays from native parser are SEQUENCES
|
|
91
95
|
# Apply flatten_sequence to get Parslet-compatible output
|
|
@@ -110,6 +114,9 @@ module Parsanol
|
|
|
110
114
|
value = hash[key]
|
|
111
115
|
sym_key = cached_symbol(key)
|
|
112
116
|
|
|
117
|
+
# Transform the value
|
|
118
|
+
transformed = transform(value)
|
|
119
|
+
|
|
113
120
|
# Check if value is a tagged repetition from native parser
|
|
114
121
|
is_tagged_repetition = value.is_a?(Array) && !value.empty? &&
|
|
115
122
|
value.first.is_a?(String) && value.first == REPETITION_TAG
|
|
@@ -124,9 +131,6 @@ module Parsanol
|
|
|
124
131
|
# Sequences produce arrays of arrays like [[], []], not empty arrays
|
|
125
132
|
is_empty_repetition = value.is_a?(Array) && value.empty?
|
|
126
133
|
|
|
127
|
-
# Transform the value
|
|
128
|
-
transformed = transform(value)
|
|
129
|
-
|
|
130
134
|
# Special handling for arrays that look like character repetitions
|
|
131
135
|
# (arrays of single-character Slices/strings should be joined)
|
|
132
136
|
if transformed.is_a?(Array) && !transformed.empty? &&
|
|
@@ -135,7 +139,7 @@ module Parsanol
|
|
|
135
139
|
first_slice = transformed.find { |i| i.is_a?(::Parsanol::Slice) }
|
|
136
140
|
content = transformed.map { |i| slice_content(i) }.join
|
|
137
141
|
transformed = if first_slice
|
|
138
|
-
::Parsanol::Slice.new(first_slice.offset, content, first_slice.
|
|
142
|
+
::Parsanol::Slice.new(first_slice.offset, content, first_slice.input)
|
|
139
143
|
else
|
|
140
144
|
content
|
|
141
145
|
end
|
|
@@ -276,10 +280,7 @@ module Parsanol
|
|
|
276
280
|
# Optimized: Single-pass with direct result building
|
|
277
281
|
def self.flatten_sequence(items)
|
|
278
282
|
return EMPTY_ARRAY if items.empty? # Match Parsanol Ruby mode
|
|
279
|
-
|
|
280
|
-
# DON'T unwrap single items - let the caller handle this
|
|
281
|
-
# This preserves repetition results like [{:x => 1}]
|
|
282
|
-
return items if items.length == 1
|
|
283
|
+
return items.first if items.length == 1
|
|
283
284
|
|
|
284
285
|
# Single pass: categorize items
|
|
285
286
|
merged_hash = {}
|
|
@@ -303,20 +304,33 @@ module Parsanol
|
|
|
303
304
|
# the WHOLE sequence should be kept as array, not merged.
|
|
304
305
|
if item.empty?
|
|
305
306
|
# Empty repetition - skip (sequence semantics: merge rest)
|
|
307
|
+
# Don't increment total_items for empty arrays
|
|
306
308
|
else
|
|
307
|
-
#
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
309
|
+
# Check if array contains only hashes (repetition wrapper pattern)
|
|
310
|
+
# In this case, merge the inner hashes into merged_hash
|
|
311
|
+
non_hash_items = item.reject { |sub| sub.is_a?(Hash) }
|
|
312
|
+
all_items_are_hashes = non_hash_items.empty?
|
|
313
|
+
|
|
314
|
+
if all_items_are_hashes
|
|
315
|
+
# Merge all inner hashes into merged_hash
|
|
316
|
+
item.each do |sub_item|
|
|
317
|
+
merged_hash.merge!(sub_item) if sub_item.is_a?(Hash)
|
|
316
318
|
end
|
|
319
|
+
else
|
|
320
|
+
# Non-empty repetition with non-hash items - mark that we should keep as array
|
|
321
|
+
has_non_empty_array = true
|
|
322
|
+
# Still collect items for potential array result
|
|
323
|
+
item.each do |sub_item|
|
|
324
|
+
case sub_item
|
|
325
|
+
when Hash
|
|
326
|
+
hash_count += 1
|
|
327
|
+
when ::Parsanol::Slice, String
|
|
328
|
+
slice_or_string_parts << sub_item
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
total_items += 1
|
|
317
332
|
end
|
|
318
333
|
end
|
|
319
|
-
total_items += 1
|
|
320
334
|
when nil
|
|
321
335
|
# Skip nil values (from lookahead or optional that didn't match)
|
|
322
336
|
else
|
|
@@ -359,9 +373,9 @@ module Parsanol
|
|
|
359
373
|
# Result: [{:letter => "a"}, {:letter => "b"}, {:letter => "c"}]
|
|
360
374
|
#
|
|
361
375
|
# 3. MIXED KEYS: Hashes have DIFFERENT keys
|
|
362
|
-
# =>
|
|
363
|
-
# Example: [{:
|
|
364
|
-
# Result:
|
|
376
|
+
# => Merge into single hash
|
|
377
|
+
# Example: [{:explicitAttr => {...}}, {:whereClause => {...}}]
|
|
378
|
+
# Result: {:explicitAttr => {...}, :whereClause => {...}}
|
|
365
379
|
if hash_count == total_items && hash_count > 1
|
|
366
380
|
# Check if all hashes have the same single key
|
|
367
381
|
first_item = items.first
|
|
@@ -381,13 +395,66 @@ module Parsanol
|
|
|
381
395
|
|
|
382
396
|
return items unless all_values_are_hashes
|
|
383
397
|
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
398
|
+
# Check if inner hashes have the same keys or different keys
|
|
399
|
+
# REPETITION pattern (same keys like entity_decl): keep as array
|
|
400
|
+
# WRAPPER pattern (different keys like spaces vs schemaDecl): merge
|
|
401
|
+
first_inner_keys = items.first[wrapper_key].keys.to_set
|
|
402
|
+
all_same_keys = items.all? { |item| item[wrapper_key].keys.to_set == first_inner_keys }
|
|
403
|
+
|
|
404
|
+
# Check if items have single keys or multiple keys
|
|
405
|
+
# - Single key items with repeated outer key = true repetition (keep array)
|
|
406
|
+
# - Multiple key items with repeated outer key = duplicate labels in sequence (merge)
|
|
407
|
+
max_keys_per_item = items.map { |item| item.is_a?(Hash) ? item.keys.length : 0 }.max || 0
|
|
408
|
+
|
|
409
|
+
# Check if inner values are hashes with different keys
|
|
410
|
+
# This distinguishes:
|
|
411
|
+
# - True repetition: [{letter: 'a'}, {letter: 'b'}] - inner is string
|
|
412
|
+
# - Duplicate labels: [{group: {char: 'a'}}, {group: {digit: '5'}}] - inner is hash with different keys
|
|
413
|
+
inner_keys_all_same = true
|
|
414
|
+
first_inner_keys = nil
|
|
415
|
+
if items.all? { |item| item.is_a?(Hash) && item[wrapper_key].is_a?(Hash) }
|
|
416
|
+
first_inner_keys = items.first[wrapper_key].keys.to_set
|
|
417
|
+
inner_keys_all_same = items.all? { |item| item[wrapper_key].keys.to_set == first_inner_keys }
|
|
389
418
|
end
|
|
390
|
-
|
|
419
|
+
|
|
420
|
+
# DUPLICATE LABELS IN SEQUENCE: multiple keys per item with repeated outer key
|
|
421
|
+
# OR inner hashes with different keys
|
|
422
|
+
# Example: [{group: {char: 'a'}}, {group: {digit: '5'}}]
|
|
423
|
+
# Ruby semantics: merge with last value wins for the outer key
|
|
424
|
+
# This is different from true repetition where each item has exactly one key
|
|
425
|
+
has_duplicate_labels = max_keys_per_item > 1 || (first_inner_keys && !inner_keys_all_same)
|
|
426
|
+
|
|
427
|
+
# Check if inner hashes have the same keys or different keys
|
|
428
|
+
first_inner_keys ||= items.first[wrapper_key].keys.to_set
|
|
429
|
+
all_same_keys = items.all? { |item| item[wrapper_key].keys.to_set == first_inner_keys }
|
|
430
|
+
|
|
431
|
+
if has_duplicate_labels
|
|
432
|
+
# DUPLICATE LABELS PATTERN: items have multiple keys with repeated outer key
|
|
433
|
+
# OR inner hashes have different keys
|
|
434
|
+
# This is a SEQUENCE with duplicate .as() labels
|
|
435
|
+
# Ruby semantics: merge and keep last value for the outer key
|
|
436
|
+
merged = {}
|
|
437
|
+
items.each do |item|
|
|
438
|
+
item.each do |k, v|
|
|
439
|
+
merged[k] = v # Last value wins
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
# Return only the wrapper key with its last value
|
|
443
|
+
return { wrapper_key => merged[wrapper_key] }
|
|
444
|
+
elsif all_same_keys
|
|
445
|
+
# TRUE REPETITION: each item has exactly one key
|
|
446
|
+
# Keep as array of hashes
|
|
447
|
+
# Example: [{letter: 'a'}, {letter: 'b'}] or [{schemaDecl: ...}, {schemaDecl: ...}]
|
|
448
|
+
return items
|
|
449
|
+
else
|
|
450
|
+
# DIFFERENT INNER KEYS with single keys: Same outer key with different inner keys
|
|
451
|
+
# This is a WRAPPER pattern - keep all items as array
|
|
452
|
+
# Example: [{:syntax => {:entityDecl => ...}}, {:syntax => {:typeDecl => ...}}]
|
|
453
|
+
# Should NOT merge or drop items - keep all declarations
|
|
454
|
+
return items
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
return items unless all_values_are_hashes
|
|
391
458
|
|
|
392
459
|
# Repetition pattern: keep as array
|
|
393
460
|
|
|
@@ -412,10 +479,10 @@ module Parsanol
|
|
|
412
479
|
|
|
413
480
|
if first_slice
|
|
414
481
|
# Create new Slice with combined content, preserving position from first
|
|
415
|
-
::Parsanol::Slice.new(first_slice.offset, content, first_slice.
|
|
482
|
+
return ::Parsanol::Slice.new(first_slice.offset, content, first_slice.input)
|
|
416
483
|
else
|
|
417
484
|
# All plain strings (shouldn't happen with new decode_flat, but handle it)
|
|
418
|
-
slice_or_string_parts.length == 1 ? slice_or_string_parts.first : content
|
|
485
|
+
return slice_or_string_parts.length == 1 ? slice_or_string_parts.first : content
|
|
419
486
|
end
|
|
420
487
|
end
|
|
421
488
|
|
|
@@ -457,7 +524,7 @@ module Parsanol
|
|
|
457
524
|
|
|
458
525
|
if first_slice
|
|
459
526
|
# Create new Slice with combined content, preserving position from first
|
|
460
|
-
::Parsanol::Slice.new(first_slice.offset, content, first_slice.
|
|
527
|
+
::Parsanol::Slice.new(first_slice.offset, content, first_slice.input)
|
|
461
528
|
else
|
|
462
529
|
# All plain strings (shouldn't happen with new decode_flat, but handle it)
|
|
463
530
|
content
|
data/lib/parsanol/native.rb
CHANGED
|
@@ -3,223 +3,147 @@
|
|
|
3
3
|
require 'json'
|
|
4
4
|
require 'digest'
|
|
5
5
|
|
|
6
|
-
# Entry point for native parsing functionality
|
|
7
|
-
# Requires the individual components
|
|
8
6
|
require 'parsanol/native/types'
|
|
9
7
|
require 'parsanol/native/parser'
|
|
10
|
-
require 'parsanol/native/transformer'
|
|
11
8
|
require 'parsanol/native/serializer'
|
|
12
|
-
require 'parsanol/native/
|
|
13
|
-
require 'parsanol/native/dynamic'
|
|
9
|
+
require 'parsanol/native/batch_decoder'
|
|
14
10
|
|
|
15
11
|
module Parsanol
|
|
16
12
|
module Native
|
|
17
13
|
VERSION = '0.1.0'
|
|
18
14
|
|
|
19
15
|
class << self
|
|
20
|
-
#
|
|
16
|
+
# Check if native extension is available
|
|
21
17
|
def available?
|
|
22
18
|
Parser.available?
|
|
23
19
|
end
|
|
24
20
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
21
|
+
# Parse input with a Ruby grammar, returning clean AST with lazy line/column.
|
|
22
|
+
#
|
|
23
|
+
# Uses batch FFI format for maximum performance (3-5x faster than object-by-object).
|
|
24
|
+
# The Rust-side transformation (to_parslet_compatible) produces Parslet-compatible
|
|
25
|
+
# output that can be consumed directly by Builder.build without additional
|
|
26
|
+
# Ruby-side transformation.
|
|
27
|
+
#
|
|
28
|
+
# @param grammar [Parsanol::Atoms::Base] Ruby grammar definition
|
|
29
|
+
# @param input [String] Input string to parse
|
|
30
|
+
# @return [Hash, Array, Parsanol::Slice] Transformed AST
|
|
31
|
+
#
|
|
32
|
+
# @example Simple parsing
|
|
33
|
+
# result = Parsanol::Native.parse(str('hello').as(:greeting), 'hello')
|
|
34
|
+
# # => {greeting: "hello"@0}
|
|
35
|
+
#
|
|
36
|
+
# @example With lazy line/column
|
|
37
|
+
# result = Parsanol::Native.parse(str('hello').as(:greeting), "hello\nworld")
|
|
38
|
+
# result[:greeting].line_and_column # => [1, 1]
|
|
39
|
+
#
|
|
40
|
+
def parse(grammar, input)
|
|
41
|
+
raise LoadError, "Native parser not available" unless available?
|
|
42
|
+
|
|
43
|
+
# Handle both grammar atoms and pre-serialized JSON strings
|
|
44
|
+
if grammar.is_a?(String)
|
|
45
|
+
grammar_json = grammar
|
|
46
|
+
grammar_atom = nil
|
|
47
|
+
else
|
|
48
|
+
grammar_json = Parser.serialize_grammar(grammar)
|
|
49
|
+
grammar_atom = grammar
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Use _parse_raw which returns properly tagged Ruby arrays via transform_ast.
|
|
53
|
+
# The batch format doesn't preserve :repetition/:sequence tags, so we use
|
|
54
|
+
# the direct FFI path. Apply the Ruby transformer to handle tags correctly.
|
|
55
|
+
raw_ast = _parse_raw(grammar_json, input)
|
|
56
|
+
BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice, grammar_atom)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Parse and return RAW AST without transformation.
|
|
60
|
+
#
|
|
61
|
+
# This returns the raw Parslet intermediate format before any transformation.
|
|
62
|
+
# Use this only if you need the raw AST for custom processing.
|
|
63
|
+
#
|
|
64
|
+
# For most use cases (including Expressir), use parse() instead which
|
|
65
|
+
# returns properly transformed AST.
|
|
66
|
+
#
|
|
67
|
+
# @param grammar [Parsanol::Atoms::Base] Ruby grammar definition
|
|
68
|
+
# @param input [String] Input string to parse
|
|
69
|
+
# @return [Hash, Array] Raw untransformed AST
|
|
70
|
+
#
|
|
71
|
+
# @example Raw parsing
|
|
72
|
+
# result = Parsanol::Native.parse_raw(str('hello').as(:greeting), 'hello')
|
|
73
|
+
# # => {:syntax => [{:spaces => ...}, {:greeting => "hello"@0}, {:spaces => ...}]}
|
|
74
|
+
#
|
|
75
|
+
def parse_raw(grammar, input)
|
|
76
|
+
raise LoadError, "Native parser not available" unless available?
|
|
77
|
+
|
|
78
|
+
# Handle both grammar atoms and pre-serialized JSON strings
|
|
79
|
+
if grammar.is_a?(String)
|
|
80
|
+
grammar_json = grammar
|
|
81
|
+
else
|
|
82
|
+
grammar_json = Parser.serialize_grammar(grammar)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Use batch_raw format for raw AST (no transformation)
|
|
86
|
+
slice_class = Parsanol::Slice
|
|
87
|
+
batch_data = _parse_batch_raw(grammar_json, input)
|
|
88
|
+
|
|
89
|
+
# Decode without transformation - raw AST format
|
|
90
|
+
BatchDecoder.decode(batch_data, input, slice_class)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Fast batch parsing - uses u64 array format to minimize FFI overhead.
|
|
94
|
+
#
|
|
95
|
+
# This is 3-5x faster than regular parse() for large grammars.
|
|
96
|
+
# The batch format passes a flat u64 array across FFI, then decodes
|
|
97
|
+
# in pure Ruby, avoiding expensive per-node FFI calls.
|
|
98
|
+
#
|
|
99
|
+
# Returns RAW AST without transformation. For Expressir use case,
|
|
100
|
+
# use parse() instead which returns properly transformed AST.
|
|
101
|
+
#
|
|
102
|
+
# @param grammar_json [String] Pre-serialized grammar JSON
|
|
103
|
+
# @param input [String] Input string to parse
|
|
104
|
+
# @param slice_class [Class] The Slice class to use for string refs
|
|
105
|
+
# @return [Hash, Array, Slice] Raw AST (not transformed)
|
|
106
|
+
def parse_batch(grammar_json, input, slice_class)
|
|
107
|
+
raise LoadError, "Native parser not available" unless available?
|
|
108
|
+
|
|
109
|
+
# Call native extension's _parse_batch_raw method (named with _raw suffix
|
|
110
|
+
# to avoid conflict with this Ruby wrapper method)
|
|
111
|
+
batch_data = _parse_batch_raw(grammar_json, input)
|
|
112
|
+
BatchDecoder.decode(batch_data, input, slice_class)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Get the Slice class
|
|
116
|
+
private def get_slice_class
|
|
117
|
+
Parsanol::Slice
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Serialize a Ruby grammar to JSON (cached).
|
|
121
|
+
#
|
|
122
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
123
|
+
# @return [String] JSON string
|
|
49
124
|
def serialize_grammar(root_atom)
|
|
50
125
|
Parser.serialize_grammar(root_atom)
|
|
51
126
|
end
|
|
52
127
|
|
|
128
|
+
# Clear grammar caches (call if grammar changes)
|
|
53
129
|
def clear_cache
|
|
54
130
|
Parser.clear_cache
|
|
55
131
|
end
|
|
56
132
|
|
|
133
|
+
# Get cache statistics
|
|
57
134
|
def cache_stats
|
|
58
135
|
Parser.cache_stats
|
|
59
136
|
end
|
|
60
|
-
|
|
61
|
-
# Serialized Mode (JSON Output)
|
|
62
|
-
def parse_to_json(grammar_json, input)
|
|
63
|
-
Parser.parse_to_json(grammar_json, input)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# ZeroCopy Mode (Direct Ruby Objects)
|
|
67
|
-
def parse_to_objects(grammar_json, input, type_map = nil)
|
|
68
|
-
Parser.parse_to_objects(grammar_json, input, type_map)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def convert_slices(obj, input)
|
|
72
|
-
Parser.convert_slices(obj, input)
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Source Location Tracking
|
|
76
|
-
def parse_with_spans(grammar_json, input)
|
|
77
|
-
Parser.parse_with_spans(grammar_json, input)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
def get_span(result, node_id)
|
|
81
|
-
Parser.get_span(result, node_id)
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
# Grammar Composition
|
|
85
|
-
def grammar_import(builder_json, grammar_json, prefix = nil)
|
|
86
|
-
Parser.grammar_import(builder_json, grammar_json, prefix)
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def grammar_rule_mut(builder_json, rule_name)
|
|
90
|
-
Parser.grammar_rule_mut(builder_json, rule_name)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Streaming Parser
|
|
94
|
-
def streaming_parser_new(grammar_json)
|
|
95
|
-
Parser.streaming_parser_new(grammar_json)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def streaming_parser_add_chunk(parser, chunk)
|
|
99
|
-
Parser.streaming_parser_add_chunk(parser, chunk)
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def streaming_parser_parse_chunk(parser)
|
|
103
|
-
Parser.streaming_parser_parse_chunk(parser)
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Incremental Parser
|
|
107
|
-
def incremental_parser_new(grammar_json, initial_input)
|
|
108
|
-
Parser.incremental_parser_new(grammar_json, initial_input)
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
|
|
112
|
-
Parser.incremental_parser_apply_edit(parser, start, deleted, inserted)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def incremental_parser_reparse(parser, new_input = nil)
|
|
116
|
-
Parser.incremental_parser_reparse(parser, new_input)
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# Streaming Builder - uses native parse_with_builder directly (exposed from Rust)
|
|
120
|
-
# The native function is exposed directly on Parsanol::Native module
|
|
121
|
-
|
|
122
|
-
# Alias for parse_with_builder (same functionality)
|
|
123
|
-
def parse_with_callback(grammar_json, input, callback)
|
|
124
|
-
parse_with_builder(grammar_json, input, callback)
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
# Parallel Parsing - uses native _parse_batch_parallel
|
|
128
|
-
def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
|
|
129
|
-
_parse_batch_parallel(grammar_json, inputs, num_threads || 0)
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
# Security / Limits - uses native _parse_with_limits
|
|
133
|
-
def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
|
|
134
|
-
_parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# Debug Tools
|
|
138
|
-
def parse_with_trace(grammar_json, input)
|
|
139
|
-
Parser.parse_with_trace(grammar_json, input)
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def grammar_to_mermaid(grammar_json)
|
|
143
|
-
Parser.grammar_to_mermaid(grammar_json)
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
def grammar_to_dot(grammar_json)
|
|
147
|
-
Parser.grammar_to_dot(grammar_json)
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
# Legacy internal methods (for backward compatibility)
|
|
151
|
-
def _parse_with_spans(grammar_json, input)
|
|
152
|
-
Parser.send(:_parse_with_spans, grammar_json, input)
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
def _get_span(result, node_id)
|
|
156
|
-
Parser.send(:_get_span, result, node_id)
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
def _grammar_import(builder_json, grammar_json, prefix)
|
|
160
|
-
Parser.send(:_grammar_import, builder_json, grammar_json, prefix)
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
def _grammar_rule_mut(builder_json, rule_name)
|
|
164
|
-
Parser.send(:_grammar_rule_mut, builder_json, rule_name)
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
def _streaming_parser_new(grammar_json)
|
|
168
|
-
Parser.send(:_streaming_parser_new, grammar_json)
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
def _streaming_parser_add_chunk(parser, chunk)
|
|
172
|
-
Parser.send(:_streaming_parser_add_chunk, parser, chunk)
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
def _streaming_parser_parse_chunk(parser)
|
|
176
|
-
Parser.send(:_streaming_parser_parse_chunk, parser)
|
|
177
|
-
end
|
|
178
|
-
|
|
179
|
-
def _incremental_parser_new(grammar_json, initial_input)
|
|
180
|
-
Parser.send(:_incremental_parser_new, grammar_json, initial_input)
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
def _incremental_parser_apply_edit(parser, start, deleted, inserted)
|
|
184
|
-
Parser.send(:_incremental_parser_apply_edit, parser, start, deleted, inserted)
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def _incremental_parser_reparse(parser, new_input)
|
|
188
|
-
Parser.send(:_incremental_parser_reparse, parser, new_input)
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
def _parse_batch_parallel(grammar_json, inputs, num_threads)
|
|
192
|
-
Parser.send(:_parse_batch_parallel, grammar_json, inputs, num_threads)
|
|
193
|
-
end
|
|
194
|
-
|
|
195
|
-
def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
196
|
-
Parser.send(:_parse_with_limits, grammar_json, input, max_input_size, max_recursion_depth)
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
def _parse_with_trace(grammar_json, input)
|
|
200
|
-
Parser.send(:_parse_with_trace, grammar_json, input)
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
def _grammar_to_mermaid(grammar_json)
|
|
204
|
-
Parser.send(:_grammar_to_mermaid, grammar_json)
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
def _grammar_to_dot(grammar_json)
|
|
208
|
-
Parser.send(:_grammar_to_dot, grammar_json)
|
|
209
|
-
end
|
|
210
137
|
end
|
|
211
138
|
end
|
|
212
139
|
end
|
|
213
140
|
|
|
214
141
|
# Attempt to load native extension
|
|
215
|
-
# rb_sys puts native extensions in version-specific directories (e.g., parsanol/3.2/parsanol_native)
|
|
216
142
|
begin
|
|
217
|
-
# Try version-specific path first (for precompiled gems)
|
|
218
143
|
ruby_version = RUBY_VERSION.split('.').take(2).join('.')
|
|
219
144
|
require "parsanol/#{ruby_version}/parsanol_native"
|
|
220
145
|
rescue LoadError
|
|
221
146
|
begin
|
|
222
|
-
# Fall back to generic path (for locally compiled extensions)
|
|
223
147
|
require 'parsanol/parsanol_native'
|
|
224
148
|
rescue LoadError
|
|
225
149
|
# Native extension not built yet
|
data/lib/parsanol/parser.rb
CHANGED
|
@@ -168,12 +168,8 @@ module Parsanol
|
|
|
168
168
|
#
|
|
169
169
|
def parse_native(input, opts)
|
|
170
170
|
if Parsanol::Native.available?
|
|
171
|
-
#
|
|
172
|
-
|
|
173
|
-
line_cache.scan_for_line_endings(0, input)
|
|
174
|
-
|
|
175
|
-
# Parse with position info (now the default)
|
|
176
|
-
Parsanol::Native.parse_parslet_compatible(root, input, line_cache)
|
|
171
|
+
# Parse with lazy line/column support
|
|
172
|
+
Parsanol::Native.parse(root, input)
|
|
177
173
|
else
|
|
178
174
|
parse_ruby(input, opts)
|
|
179
175
|
end
|