parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Native
|
|
7
|
+
# Core parsing functionality using Rust native extension
|
|
8
|
+
#
|
|
9
|
+
# Provides three parsing modes:
|
|
10
|
+
# - :ruby - Parse and transform to Parslet-compatible format
|
|
11
|
+
# - :json - Parse and return JSON-serialized AST
|
|
12
|
+
# - :slice - Parse and return raw native format (fastest)
|
|
13
|
+
#
|
|
14
|
+
module Parser
|
|
15
|
+
# Two-level grammar cache (module-level for proper initialization)
|
|
16
|
+
GRAMMAR_HASH_CACHE = {} # object_id => hash_key
|
|
17
|
+
GRAMMAR_CACHE = {} # hash_key => grammar_json
|
|
18
|
+
|
|
19
|
+
class << self
|
|
20
|
+
# Cached availability check
|
|
21
|
+
@cached_available = nil
|
|
22
|
+
|
|
23
|
+
# Check if native extension is available
|
|
24
|
+
def available?
|
|
25
|
+
return @cached_available unless @cached_available.nil?
|
|
26
|
+
@cached_available = begin
|
|
27
|
+
require 'parsanol/parsanol_native'
|
|
28
|
+
Parsanol::Native.is_available
|
|
29
|
+
rescue LoadError
|
|
30
|
+
false
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Parse using native engine
|
|
35
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
36
|
+
# @param input [String] Input string to parse
|
|
37
|
+
# @return Ruby AST from parsing
|
|
38
|
+
def parse(grammar_json, input)
|
|
39
|
+
unless available?
|
|
40
|
+
raise LoadError, 'Native parser not available. Run `rake compile` to build.'
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Call native parse_batch (returns flat u64 array)
|
|
44
|
+
flat = Parsanol::Native.parse_batch(grammar_json, input)
|
|
45
|
+
# Decode flat array to Ruby AST
|
|
46
|
+
decode_flat(flat, input)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Parse a grammar with automatic serialization and caching
|
|
50
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
51
|
+
# @param input [String] Input string to parse
|
|
52
|
+
# @return Ruby AST from parsing
|
|
53
|
+
def parse_with_grammar(root_atom, input)
|
|
54
|
+
# Extract root atom if a Parser is passed
|
|
55
|
+
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
56
|
+
grammar_json = serialize_grammar(root_atom)
|
|
57
|
+
parse(grammar_json, input)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Parse and transform to Parslet-compatible format
|
|
61
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
62
|
+
# @param input [String] Input string to parse
|
|
63
|
+
# @return Ruby AST in Parslet-compatible format
|
|
64
|
+
def parse_parslet_compatible(root_atom, input)
|
|
65
|
+
# Extract root atom if a Parser is passed
|
|
66
|
+
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
67
|
+
raw_ast = parse_with_grammar(root_atom, input)
|
|
68
|
+
AstTransformer.transform(raw_ast)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Parse multiple inputs with the same grammar (more efficient)
|
|
72
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
73
|
+
# @param inputs [Array<String>] Array of input strings to parse
|
|
74
|
+
# @return [Array] Array of raw Ruby ASTs from parsing
|
|
75
|
+
def parse_batch_inputs(root_atom, inputs)
|
|
76
|
+
# Extract root atom if a Parser is passed
|
|
77
|
+
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
78
|
+
grammar_json = serialize_grammar(root_atom)
|
|
79
|
+
inputs.map { |input| parse(grammar_json, input) }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Parse multiple inputs with transformation
|
|
83
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
84
|
+
# @param inputs [Array<String>] Array of input strings to parse
|
|
85
|
+
# @return [Array] Array of transformed Ruby ASTs
|
|
86
|
+
def parse_batch_with_transform(root_atom, inputs)
|
|
87
|
+
# Extract root atom if a Parser is passed
|
|
88
|
+
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
89
|
+
grammar_json = serialize_grammar(root_atom)
|
|
90
|
+
# First parse all inputs, then batch transform
|
|
91
|
+
# This provides better cache locality
|
|
92
|
+
raw_asts = inputs.map { |input| parse(grammar_json, input) }
|
|
93
|
+
AstTransformer.transform_batch(raw_asts)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Parse without transformation (faster for raw AST access)
|
|
97
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
98
|
+
# @param input [String] Input string to parse
|
|
99
|
+
# @return Raw Ruby AST from parsing (native format)
|
|
100
|
+
def parse_raw(root_atom, input)
|
|
101
|
+
# Extract root atom if a Parser is passed
|
|
102
|
+
root_atom = root_atom.root if root_atom.is_a?(::Parsanol::Parser)
|
|
103
|
+
parse_with_grammar(root_atom, input)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Serialize a grammar to JSON, with two-level caching
|
|
107
|
+
# Level 1: object_id => hash_key (avoids grammar traversal)
|
|
108
|
+
# Level 2: hash_key => grammar_json (avoids serialization)
|
|
109
|
+
# @param root_atom [Parsanol::Atoms::Base] Root atom of the grammar
|
|
110
|
+
# @return [String] JSON string
|
|
111
|
+
def serialize_grammar(root_atom)
|
|
112
|
+
# Level 1: Check if we've already computed the hash for this object
|
|
113
|
+
obj_id = root_atom.object_id
|
|
114
|
+
cache_key = GRAMMAR_HASH_CACHE[obj_id]
|
|
115
|
+
|
|
116
|
+
if cache_key
|
|
117
|
+
# Fast path: already computed hash, check grammar cache
|
|
118
|
+
GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
|
|
119
|
+
else
|
|
120
|
+
# Slow path: compute structural hash
|
|
121
|
+
cache_key = grammar_structure_hash(root_atom)
|
|
122
|
+
GRAMMAR_HASH_CACHE[obj_id] = cache_key
|
|
123
|
+
GRAMMAR_CACHE[cache_key] ||= GrammarSerializer.serialize(root_atom)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Clear grammar caches (call if grammar changes)
|
|
128
|
+
def clear_cache
|
|
129
|
+
GRAMMAR_HASH_CACHE.clear
|
|
130
|
+
GRAMMAR_CACHE.clear
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Get cache statistics
|
|
134
|
+
def cache_stats
|
|
135
|
+
{
|
|
136
|
+
hash_cache_size: GRAMMAR_HASH_CACHE.size,
|
|
137
|
+
grammar_cache_size: GRAMMAR_CACHE.size,
|
|
138
|
+
grammar_keys: GRAMMAR_CACHE.keys
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# ===== Serialized Mode (JSON Output) =====
|
|
143
|
+
|
|
144
|
+
# Parse input and return JSON string
|
|
145
|
+
# Uses native parsing and serializes the result to JSON
|
|
146
|
+
#
|
|
147
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
148
|
+
# @param input [String] Input string to parse
|
|
149
|
+
# @return [String] JSON string representing the result
|
|
150
|
+
def parse_to_json(grammar_json, input)
|
|
151
|
+
unless available?
|
|
152
|
+
raise LoadError,
|
|
153
|
+
"Serialized mode requires native extension. " \
|
|
154
|
+
"Run `rake compile` to build the extension."
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Parse using native engine and convert result to JSON
|
|
158
|
+
result = parse(grammar_json, input)
|
|
159
|
+
result.to_json
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Parse and return direct Ruby objects via FFI
|
|
163
|
+
# Uses ZeroCopy mode - Rust constructs Ruby objects directly via magnus FFI
|
|
164
|
+
# This bypasses the u64 serialization step for maximum performance.
|
|
165
|
+
#
|
|
166
|
+
# Slice information is preserved: InputRef nodes from Rust are returned
|
|
167
|
+
# directly as Parsanol::Slice objects (no intermediate hash conversion needed).
|
|
168
|
+
#
|
|
169
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
170
|
+
# @param input [String] Input string to parse
|
|
171
|
+
# @param type_map [Hash] Mapping of rule names to Ruby classes (not used in this mode)
|
|
172
|
+
# @return [Object] Direct Ruby object (type depends on grammar)
|
|
173
|
+
def parse_to_objects(grammar_json, input, type_map = nil)
|
|
174
|
+
unless available?
|
|
175
|
+
raise LoadError,
|
|
176
|
+
"ZeroCopy mode requires native extension. " \
|
|
177
|
+
"Run `rake compile` to build the extension."
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Call Rust function that returns Slice objects directly
|
|
181
|
+
# No need to convert - they are already Parsanol::Slice objects
|
|
182
|
+
Parsanol::Native.parse_to_ruby_objects(grammar_json, input)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Recursively convert slice hashes to Parsanol::Slice objects
|
|
186
|
+
# Rust returns { "_slice" => true, "str" => "...", "offset" => N, "length" => N }
|
|
187
|
+
# for InputRef nodes, which we convert to Slice objects preserving position info.
|
|
188
|
+
#
|
|
189
|
+
# @param obj [Object] The object to convert (may be Hash, Array, or leaf value)
|
|
190
|
+
# @param input [String] The original input string (for Slice source reference)
|
|
191
|
+
# @return [Object] The converted object with Slice objects in place of slice hashes
|
|
192
|
+
def convert_slices(obj, input)
|
|
193
|
+
case obj
|
|
194
|
+
when Hash
|
|
195
|
+
# Check if this is a slice marker from Rust
|
|
196
|
+
if obj["_slice"] == true
|
|
197
|
+
Parsanol::Slice.new(obj["offset"], obj["str"])
|
|
198
|
+
else
|
|
199
|
+
# Recursively convert hash values
|
|
200
|
+
obj.transform_values { |v| convert_slices(v, input) }
|
|
201
|
+
end
|
|
202
|
+
when Array
|
|
203
|
+
# Recursively convert array elements
|
|
204
|
+
obj.map { |item| convert_slices(item, input) }
|
|
205
|
+
else
|
|
206
|
+
# Leaf values (strings, integers, etc.) are returned as-is
|
|
207
|
+
obj
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# ===== Source Location Tracking =====
|
|
212
|
+
|
|
213
|
+
# Parse with source location tracking
|
|
214
|
+
# Returns both the AST and a hash of spans
|
|
215
|
+
#
|
|
216
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
217
|
+
# @param input [String] Input string to parse
|
|
218
|
+
# @return [Array<(Object, Hash)>] Tuple of [parsed_result, spans_hash]
|
|
219
|
+
def parse_with_spans(grammar_json, input)
|
|
220
|
+
unless available?
|
|
221
|
+
raise LoadError,
|
|
222
|
+
"Source location tracking requires native extension. " \
|
|
223
|
+
"Run `rake compile` to build the extension."
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
_parse_with_spans(grammar_json, input)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Get span for a specific node
|
|
230
|
+
#
|
|
231
|
+
# @param result [Object] Parse result from parse_with_spans
|
|
232
|
+
# @param node_id [Integer] Node identifier
|
|
233
|
+
# @return [Hash] Span information {start: {offset, line, column}, end: {...}}
|
|
234
|
+
def get_span(result, node_id)
|
|
235
|
+
unless available?
|
|
236
|
+
raise LoadError, "Source location tracking requires native extension."
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
_get_span(result, node_id)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# ===== Grammar Composition =====
|
|
243
|
+
|
|
244
|
+
# Import another grammar with optional prefix
|
|
245
|
+
#
|
|
246
|
+
# @param builder_json [String] GrammarBuilder JSON
|
|
247
|
+
# @param grammar_json [String] Grammar to import
|
|
248
|
+
# @param prefix [String, nil] Optional prefix for imported rules
|
|
249
|
+
# @return [String] Updated GrammarBuilder JSON
|
|
250
|
+
def grammar_import(builder_json, grammar_json, prefix = nil)
|
|
251
|
+
unless available?
|
|
252
|
+
raise LoadError, "Grammar composition requires native extension."
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
_grammar_import(builder_json, grammar_json, prefix)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Get mutable reference to a rule
|
|
259
|
+
#
|
|
260
|
+
# @param builder_json [String] GrammarBuilder JSON
|
|
261
|
+
# @param rule_name [String] Name of the rule to modify
|
|
262
|
+
# @return [String] Updated GrammarBuilder JSON
|
|
263
|
+
def grammar_rule_mut(builder_json, rule_name)
|
|
264
|
+
unless available?
|
|
265
|
+
raise LoadError, "Grammar composition requires native extension."
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
_grammar_rule_mut(builder_json, rule_name)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# ===== Streaming Parser =====
|
|
272
|
+
|
|
273
|
+
# Create a new streaming parser
|
|
274
|
+
#
|
|
275
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
276
|
+
# @return [Object] Streaming parser instance
|
|
277
|
+
def streaming_parser_new(grammar_json)
|
|
278
|
+
unless available?
|
|
279
|
+
raise LoadError, "Streaming parser requires native extension."
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
_streaming_parser_new(grammar_json)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Add a chunk to the streaming parser
|
|
286
|
+
#
|
|
287
|
+
# @param parser [Object] Streaming parser instance
|
|
288
|
+
# @param chunk [String] Input chunk to add
|
|
289
|
+
# @return [Boolean] True if more chunks needed, false if ready
|
|
290
|
+
def streaming_parser_add_chunk(parser, chunk)
|
|
291
|
+
unless available?
|
|
292
|
+
raise LoadError, "Streaming parser requires native extension."
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
_streaming_parser_add_chunk(parser, chunk)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Parse what we have so far
|
|
299
|
+
#
|
|
300
|
+
# @param parser [Object] Streaming parser instance
|
|
301
|
+
# @return [Object, nil] Parsed result or nil if need more data
|
|
302
|
+
def streaming_parser_parse_chunk(parser)
|
|
303
|
+
unless available?
|
|
304
|
+
raise LoadError, "Streaming parser requires native extension."
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
_streaming_parser_parse_chunk(parser)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# ===== Incremental Parser =====
|
|
311
|
+
|
|
312
|
+
# Create a new incremental parser
|
|
313
|
+
#
|
|
314
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
315
|
+
# @param initial_input [String] Initial input string
|
|
316
|
+
# @return [Object] Incremental parser instance
|
|
317
|
+
def incremental_parser_new(grammar_json, initial_input)
|
|
318
|
+
unless available?
|
|
319
|
+
raise LoadError, "Incremental parser requires native extension."
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
_incremental_parser_new(grammar_json, initial_input)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Apply an edit to the incremental parser
|
|
326
|
+
#
|
|
327
|
+
# @param parser [Object] Incremental parser instance
|
|
328
|
+
# @param start [Integer] Start position of edit
|
|
329
|
+
# @param deleted [Integer] Number of characters deleted
|
|
330
|
+
# @param inserted [String] Text to insert
|
|
331
|
+
# @return [Object] Updated parser state
|
|
332
|
+
def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
|
|
333
|
+
unless available?
|
|
334
|
+
raise LoadError, "Incremental parser requires native extension."
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
_incremental_parser_apply_edit(parser, start, deleted, inserted)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Reparse with changes
|
|
341
|
+
#
|
|
342
|
+
# @param parser [Object] Incremental parser instance
|
|
343
|
+
# @param new_input [String, nil] Optional new input (if not using apply_edit)
|
|
344
|
+
# @return [Object] Parse result
|
|
345
|
+
def incremental_parser_reparse(parser, new_input = nil)
|
|
346
|
+
unless available?
|
|
347
|
+
raise LoadError, "Incremental parser requires native extension."
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
_incremental_parser_reparse(parser, new_input)
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# ===== Streaming Builder =====
|
|
354
|
+
|
|
355
|
+
# Parse with a streaming builder for maximum performance.
|
|
356
|
+
# The builder receives callbacks as parsing progresses, eliminating
|
|
357
|
+
# intermediate AST construction.
|
|
358
|
+
#
|
|
359
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
360
|
+
# @param input [String] Input string to parse
|
|
361
|
+
# @param builder [Object] Object including BuilderCallbacks module
|
|
362
|
+
# @return [Object] Result of builder.finish
|
|
363
|
+
def parse_with_builder(grammar_json, input, builder)
|
|
364
|
+
unless available?
|
|
365
|
+
raise LoadError,
|
|
366
|
+
"Streaming builder requires native extension. " \
|
|
367
|
+
"Run `rake compile` to build the extension."
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
_parse_with_builder(grammar_json, input, builder)
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# ===== Parallel Parsing =====
|
|
374
|
+
|
|
375
|
+
# Parse multiple inputs in parallel using rayon.
|
|
376
|
+
# Provides linear speedup on multi-core systems.
|
|
377
|
+
#
|
|
378
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
379
|
+
# @param inputs [Array<String>] Array of input strings to parse
|
|
380
|
+
# @param num_threads [Integer, nil] Number of threads (nil = auto-detect)
|
|
381
|
+
# @return [Array<Object>] Array of parse results in same order as inputs
|
|
382
|
+
def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
|
|
383
|
+
unless available?
|
|
384
|
+
raise LoadError,
|
|
385
|
+
"Parallel parsing requires native extension. " \
|
|
386
|
+
"Run `rake compile` to build the extension."
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
_parse_batch_parallel(grammar_json, inputs, num_threads)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# ===== Security / Limits =====
|
|
393
|
+
|
|
394
|
+
# Parse with custom limits for untrusted input.
|
|
395
|
+
#
|
|
396
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
397
|
+
# @param input [String] Input string to parse
|
|
398
|
+
# @param max_input_size [Integer] Maximum input size in bytes (default: 100MB)
|
|
399
|
+
# @param max_recursion_depth [Integer] Maximum recursion depth (default: 1000)
|
|
400
|
+
# @return [Object] Parse result
|
|
401
|
+
def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
|
|
402
|
+
unless available?
|
|
403
|
+
raise LoadError,
|
|
404
|
+
"Security limits require native extension. " \
|
|
405
|
+
"Run `rake compile` to build the extension."
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
_parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# ===== Debug Tools =====
|
|
412
|
+
|
|
413
|
+
# Parse with tracing enabled for debugging.
|
|
414
|
+
#
|
|
415
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
416
|
+
# @param input [String] Input string to parse
|
|
417
|
+
# @return [Array<(Object, Array)>] Tuple of [parse_result, trace_events]
|
|
418
|
+
def parse_with_trace(grammar_json, input)
|
|
419
|
+
unless available?
|
|
420
|
+
raise LoadError,
|
|
421
|
+
"Debug tracing requires native extension. " \
|
|
422
|
+
"Run `rake compile` to build the extension."
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
_parse_with_trace(grammar_json, input)
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Generate Mermaid diagram for a grammar.
|
|
429
|
+
#
|
|
430
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
431
|
+
# @return [String] Mermaid diagram source
|
|
432
|
+
def grammar_to_mermaid(grammar_json)
|
|
433
|
+
unless available?
|
|
434
|
+
raise LoadError,
|
|
435
|
+
"Grammar visualization requires native extension. " \
|
|
436
|
+
"Run `rake compile` to build the extension."
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
_grammar_to_mermaid(grammar_json)
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Generate GraphViz DOT diagram for a grammar.
|
|
443
|
+
#
|
|
444
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
445
|
+
# @return [String] GraphViz DOT source
|
|
446
|
+
def grammar_to_dot(grammar_json)
|
|
447
|
+
unless available?
|
|
448
|
+
raise LoadError,
|
|
449
|
+
"Grammar visualization requires native extension. " \
|
|
450
|
+
"Run `rake compile` to build the extension."
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
_grammar_to_dot(grammar_json)
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
private
|
|
457
|
+
|
|
458
|
+
def _incremental_parser_reparse(parser, new_input)
|
|
459
|
+
raise NotImplementedError, "Native extension method not available"
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def _parse_with_builder(grammar_json, input, builder)
|
|
463
|
+
# Call native Rust function directly - parse_with_builder is exposed
|
|
464
|
+
# from the native extension as a Ruby function
|
|
465
|
+
Parsanol::Native.parse_with_builder(grammar_json, input, builder)
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def _parse_batch_parallel(grammar_json, inputs, num_threads)
|
|
469
|
+
raise NotImplementedError, "Native extension method not available"
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
473
|
+
raise NotImplementedError, "Native extension method not available"
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def _parse_with_trace(grammar_json, input)
|
|
477
|
+
raise NotImplementedError, "Native extension method not available"
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def _grammar_to_mermaid(grammar_json)
|
|
481
|
+
raise NotImplementedError, "Native extension method not available"
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def _grammar_to_dot(grammar_json)
|
|
485
|
+
raise NotImplementedError, "Native extension method not available"
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# Decode flat u64 array to Ruby AST
|
|
489
|
+
# Tags:
|
|
490
|
+
# 0x00 = nil
|
|
491
|
+
# 0x01 = bool
|
|
492
|
+
# 0x02 = int
|
|
493
|
+
# 0x03 = float
|
|
494
|
+
# 0x04 = string_ref (offset, length)
|
|
495
|
+
# 0x05 = array_start
|
|
496
|
+
# 0x06 = array_end
|
|
497
|
+
# 0x07 = hash_start
|
|
498
|
+
# 0x08 = hash_end
|
|
499
|
+
# 0x09 = hash_key (tag, len, key_chunks..., value)
|
|
500
|
+
def decode_flat(flat, input)
|
|
501
|
+
stack = []
|
|
502
|
+
i = 0
|
|
503
|
+
|
|
504
|
+
while i < flat.length
|
|
505
|
+
tag = flat[i]
|
|
506
|
+
|
|
507
|
+
case tag
|
|
508
|
+
when 0x00 # nil
|
|
509
|
+
stack << nil
|
|
510
|
+
i += 1
|
|
511
|
+
when 0x01 # bool
|
|
512
|
+
stack << (flat[i + 1] != 0)
|
|
513
|
+
i += 2
|
|
514
|
+
when 0x02 # int
|
|
515
|
+
stack << flat[i + 1]
|
|
516
|
+
i += 2
|
|
517
|
+
when 0x03 # float
|
|
518
|
+
# Decode IEEE 754 float from bits
|
|
519
|
+
bits = flat[i + 1]
|
|
520
|
+
float = [bits].pack('Q').unpack1('D')
|
|
521
|
+
stack << float
|
|
522
|
+
i += 2
|
|
523
|
+
when 0x04 # string_ref (from input)
|
|
524
|
+
offset = flat[i + 1]
|
|
525
|
+
length = flat[i + 2]
|
|
526
|
+
stack << input.byteslice(offset, length)
|
|
527
|
+
i += 3
|
|
528
|
+
when 0x0A # inline_string (interned string from arena)
|
|
529
|
+
# Format: tag, len, u64 chunks of string bytes
|
|
530
|
+
len = flat[i + 1]
|
|
531
|
+
i += 2
|
|
532
|
+
|
|
533
|
+
# Read string bytes from u64 chunks
|
|
534
|
+
chunks = (len + 7) / 8
|
|
535
|
+
bytes = []
|
|
536
|
+
chunks.times do |j|
|
|
537
|
+
chunk = flat[i + j]
|
|
538
|
+
8.times do |k|
|
|
539
|
+
break if bytes.length >= len
|
|
540
|
+
bytes << ((chunk >> (k * 8)) & 0xff)
|
|
541
|
+
end
|
|
542
|
+
end
|
|
543
|
+
i += chunks
|
|
544
|
+
|
|
545
|
+
stack << bytes.pack('C*').force_encoding('UTF-8')
|
|
546
|
+
when 0x05 # array_start
|
|
547
|
+
stack << :array_marker
|
|
548
|
+
i += 1
|
|
549
|
+
when 0x06 # array_end
|
|
550
|
+
items = []
|
|
551
|
+
items.unshift(stack.pop) until stack.last == :array_marker
|
|
552
|
+
stack.pop # Remove marker
|
|
553
|
+
stack << items
|
|
554
|
+
i += 1
|
|
555
|
+
when 0x07 # hash_start
|
|
556
|
+
stack << :hash_marker
|
|
557
|
+
i += 1
|
|
558
|
+
when 0x08 # hash_end
|
|
559
|
+
pairs = []
|
|
560
|
+
while stack.last != :hash_marker
|
|
561
|
+
value = stack.pop
|
|
562
|
+
key = stack.pop
|
|
563
|
+
pairs.unshift([key, value])
|
|
564
|
+
end
|
|
565
|
+
stack.pop # Remove marker
|
|
566
|
+
stack << pairs.to_h
|
|
567
|
+
i += 1
|
|
568
|
+
when 0x09 # hash_key
|
|
569
|
+
# Format: tag, len, key_chunks..., then value
|
|
570
|
+
len = flat[i + 1]
|
|
571
|
+
i += 2 # Skip tag and len
|
|
572
|
+
|
|
573
|
+
# Read key bytes from u64 chunks
|
|
574
|
+
chunks = (len + 7) / 8
|
|
575
|
+
key_bytes = []
|
|
576
|
+
chunks.times do |j|
|
|
577
|
+
chunk = flat[i + j]
|
|
578
|
+
8.times do |k|
|
|
579
|
+
break if key_bytes.length >= len
|
|
580
|
+
key_bytes << ((chunk >> (k * 8)) & 0xff)
|
|
581
|
+
end
|
|
582
|
+
end
|
|
583
|
+
i += chunks
|
|
584
|
+
|
|
585
|
+
key = key_bytes.pack('C*').force_encoding('UTF-8')
|
|
586
|
+
stack << key
|
|
587
|
+
else
|
|
588
|
+
raise "Unknown tag: #{tag} at index #{i}"
|
|
589
|
+
end
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
stack.first
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
# Compute structural hash of a grammar atom
|
|
596
|
+
# This returns the same hash for grammars with the same structure
|
|
597
|
+
# regardless of whether they are different object instances
|
|
598
|
+
def grammar_structure_hash(atom)
|
|
599
|
+
structure = atom_structure(atom)
|
|
600
|
+
Digest::MD5.hexdigest(structure.to_s)
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
# Recursively build structure representation for hashing
|
|
604
|
+
def atom_structure(atom)
|
|
605
|
+
case atom
|
|
606
|
+
when ::Parsanol::Atoms::Str
|
|
607
|
+
[:str, atom.str]
|
|
608
|
+
when ::Parsanol::Atoms::Re
|
|
609
|
+
[:re, atom.match]
|
|
610
|
+
when ::Parsanol::Atoms::Sequence
|
|
611
|
+
[:seq, atom.parslets.map { |p| atom_structure(p) }]
|
|
612
|
+
when ::Parsanol::Atoms::Alternative
|
|
613
|
+
[:alt, atom.alternatives.map { |p| atom_structure(p) }]
|
|
614
|
+
when ::Parsanol::Atoms::Repetition
|
|
615
|
+
[:rep, atom.min, atom.max, atom_structure(atom.parslet)]
|
|
616
|
+
when ::Parsanol::Atoms::Named
|
|
617
|
+
[:named, atom.name.to_s, atom_structure(atom.parslet)]
|
|
618
|
+
when ::Parsanol::Atoms::Lookahead
|
|
619
|
+
[:lookahead, atom.positive, atom_structure(atom.bound_parslet)]
|
|
620
|
+
when ::Parsanol::Atoms::Entity
|
|
621
|
+
# Entity is a lazy reference - use its name for hashing
|
|
622
|
+
[:entity, atom.name.to_s]
|
|
623
|
+
else
|
|
624
|
+
[:unknown, atom.class.name]
|
|
625
|
+
end
|
|
626
|
+
end
|
|
627
|
+
end
|
|
628
|
+
end
|
|
629
|
+
end
|
|
630
|
+
end
|