parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Native
|
|
5
|
+
# Type tags used in AST serialization
|
|
6
|
+
# These must match the tags used by the Rust parser
|
|
7
|
+
module Types
|
|
8
|
+
# AST node type tags (must match Rust parser output)
|
|
9
|
+
TAG_NIL = 0x00
|
|
10
|
+
TAG_BOOL = 0x01
|
|
11
|
+
TAG_INT = 0x02
|
|
12
|
+
TAG_FLOAT = 0x03
|
|
13
|
+
TAG_STRING_REF = 0x04
|
|
14
|
+
TAG_ARRAY_START = 0x05
|
|
15
|
+
TAG_ARRAY_END = 0x06
|
|
16
|
+
TAG_HASH_START = 0x07
|
|
17
|
+
TAG_HASH_END = 0x08
|
|
18
|
+
TAG_HASH_KEY = 0x09
|
|
19
|
+
TAG_INLINE_STRING = 0x0A
|
|
20
|
+
|
|
21
|
+
# Frozen string constants for transformer (avoid allocations)
|
|
22
|
+
SEQUENCE_TAG = ':sequence'.freeze
|
|
23
|
+
REPETITION_TAG = ':repetition'.freeze
|
|
24
|
+
EMPTY_STRING = ''.freeze
|
|
25
|
+
EMPTY_ARRAY = [].freeze
|
|
26
|
+
EMPTY_HASH = {}.freeze
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Symbol cache to avoid repeated string-to-symbol conversions
|
|
30
|
+
# This is a class variable to share across all transformations
|
|
31
|
+
@@symbol_cache = {}
|
|
32
|
+
|
|
33
|
+
# Convert string key to symbol with caching
|
|
34
|
+
# @param key [String, Symbol] The key to convert
|
|
35
|
+
# @return [Symbol] The symbol version of the key
|
|
36
|
+
def self.cached_symbol(key)
|
|
37
|
+
return key if key.is_a?(Symbol)
|
|
38
|
+
@@symbol_cache[key] ||= key.to_sym
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'digest'
|
|
5
|
+
|
|
6
|
+
# Entry point for native parsing functionality
|
|
7
|
+
# Requires the individual components
|
|
8
|
+
require 'parsanol/native/types'
|
|
9
|
+
require 'parsanol/native/parser'
|
|
10
|
+
require 'parsanol/native/transformer'
|
|
11
|
+
require 'parsanol/native/serializer'
|
|
12
|
+
|
|
13
|
+
module Parsanol
|
|
14
|
+
module Native
|
|
15
|
+
VERSION = '0.1.0'
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
# Delegate to Parser module
|
|
19
|
+
def available?
|
|
20
|
+
Parser.available?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def parse(grammar_json, input)
|
|
24
|
+
Parser.parse(grammar_json, input)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def parse_with_grammar(root_atom, input)
|
|
28
|
+
Parser.parse_with_grammar(root_atom, input)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def parse_parslet_compatible(root_atom, input)
|
|
32
|
+
Parser.parse_parslet_compatible(root_atom, input)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def parse_batch_inputs(root_atom, inputs)
|
|
36
|
+
Parser.parse_batch_inputs(root_atom, inputs)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def parse_batch_with_transform(root_atom, inputs)
|
|
40
|
+
Parser.parse_batch_with_transform(root_atom, inputs)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def parse_raw(root_atom, input)
|
|
44
|
+
Parser.parse_raw(root_atom, input)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def serialize_grammar(root_atom)
|
|
48
|
+
Parser.serialize_grammar(root_atom)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def clear_cache
|
|
52
|
+
Parser.clear_cache
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def cache_stats
|
|
56
|
+
Parser.cache_stats
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Serialized Mode (JSON Output)
|
|
60
|
+
def parse_to_json(grammar_json, input)
|
|
61
|
+
Parser.parse_to_json(grammar_json, input)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# ZeroCopy Mode (Direct Ruby Objects)
|
|
65
|
+
def parse_to_objects(grammar_json, input, type_map = nil)
|
|
66
|
+
Parser.parse_to_objects(grammar_json, input, type_map)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def convert_slices(obj, input)
|
|
70
|
+
Parser.convert_slices(obj, input)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Source Location Tracking
|
|
74
|
+
def parse_with_spans(grammar_json, input)
|
|
75
|
+
Parser.parse_with_spans(grammar_json, input)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def get_span(result, node_id)
|
|
79
|
+
Parser.get_span(result, node_id)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Grammar Composition
|
|
83
|
+
def grammar_import(builder_json, grammar_json, prefix = nil)
|
|
84
|
+
Parser.grammar_import(builder_json, grammar_json, prefix)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def grammar_rule_mut(builder_json, rule_name)
|
|
88
|
+
Parser.grammar_rule_mut(builder_json, rule_name)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Streaming Parser
|
|
92
|
+
def streaming_parser_new(grammar_json)
|
|
93
|
+
Parser.streaming_parser_new(grammar_json)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def streaming_parser_add_chunk(parser, chunk)
|
|
97
|
+
Parser.streaming_parser_add_chunk(parser, chunk)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def streaming_parser_parse_chunk(parser)
|
|
101
|
+
Parser.streaming_parser_parse_chunk(parser)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Incremental Parser
|
|
105
|
+
def incremental_parser_new(grammar_json, initial_input)
|
|
106
|
+
Parser.incremental_parser_new(grammar_json, initial_input)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def incremental_parser_apply_edit(parser, start, deleted, inserted = '')
|
|
110
|
+
Parser.incremental_parser_apply_edit(parser, start, deleted, inserted)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def incremental_parser_reparse(parser, new_input = nil)
|
|
114
|
+
Parser.incremental_parser_reparse(parser, new_input)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Streaming Builder - uses native parse_with_builder directly (exposed from Rust)
|
|
118
|
+
# The native function is exposed directly on Parsanol::Native module
|
|
119
|
+
|
|
120
|
+
# Alias for parse_with_builder (same functionality)
|
|
121
|
+
def parse_with_callback(grammar_json, input, callback)
|
|
122
|
+
parse_with_builder(grammar_json, input, callback)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Parallel Parsing - uses native _parse_batch_parallel
|
|
126
|
+
def parse_batch_parallel(grammar_json, inputs, num_threads: nil)
|
|
127
|
+
_parse_batch_parallel(grammar_json, inputs, num_threads || 0)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Security / Limits - uses native _parse_with_limits
|
|
131
|
+
def parse_with_limits(grammar_json, input, max_input_size: 100 * 1024 * 1024, max_recursion_depth: 1000)
|
|
132
|
+
_parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Debug Tools
|
|
136
|
+
def parse_with_trace(grammar_json, input)
|
|
137
|
+
Parser.parse_with_trace(grammar_json, input)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def grammar_to_mermaid(grammar_json)
|
|
141
|
+
Parser.grammar_to_mermaid(grammar_json)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def grammar_to_dot(grammar_json)
|
|
145
|
+
Parser.grammar_to_dot(grammar_json)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Legacy internal methods (for backward compatibility)
|
|
149
|
+
def _parse_with_spans(grammar_json, input)
|
|
150
|
+
Parser.send(:_parse_with_spans, grammar_json, input)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def _get_span(result, node_id)
|
|
154
|
+
Parser.send(:_get_span, result, node_id)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def _grammar_import(builder_json, grammar_json, prefix)
|
|
158
|
+
Parser.send(:_grammar_import, builder_json, grammar_json, prefix)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def _grammar_rule_mut(builder_json, rule_name)
|
|
162
|
+
Parser.send(:_grammar_rule_mut, builder_json, rule_name)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def _streaming_parser_new(grammar_json)
|
|
166
|
+
Parser.send(:_streaming_parser_new, grammar_json)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def _streaming_parser_add_chunk(parser, chunk)
|
|
170
|
+
Parser.send(:_streaming_parser_add_chunk, parser, chunk)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def _streaming_parser_parse_chunk(parser)
|
|
174
|
+
Parser.send(:_streaming_parser_parse_chunk, parser)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def _incremental_parser_new(grammar_json, initial_input)
|
|
178
|
+
Parser.send(:_incremental_parser_new, grammar_json, initial_input)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def _incremental_parser_apply_edit(parser, start, deleted, inserted)
|
|
182
|
+
Parser.send(:_incremental_parser_apply_edit, parser, start, deleted, inserted)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def _incremental_parser_reparse(parser, new_input)
|
|
186
|
+
Parser.send(:_incremental_parser_reparse, parser, new_input)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def _parse_batch_parallel(grammar_json, inputs, num_threads)
|
|
190
|
+
Parser.send(:_parse_batch_parallel, grammar_json, inputs, num_threads)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def _parse_with_limits(grammar_json, input, max_input_size, max_recursion_depth)
|
|
194
|
+
Parser.send(:_parse_with_limits, grammar_json, input, max_input_size, max_recursion_depth)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def _parse_with_trace(grammar_json, input)
|
|
198
|
+
Parser.send(:_parse_with_trace, grammar_json, input)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def _grammar_to_mermaid(grammar_json)
|
|
202
|
+
Parser.send(:_grammar_to_mermaid, grammar_json)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def _grammar_to_dot(grammar_json)
|
|
206
|
+
Parser.send(:_grammar_to_dot, grammar_json)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Attempt to load native extension
|
|
213
|
+
begin
|
|
214
|
+
require 'parsanol/parsanol_native'
|
|
215
|
+
rescue LoadError
|
|
216
|
+
# Native extension not built yet
|
|
217
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'ast_visitor'
|
|
4
|
+
require_relative 'optimizers/quantifier_optimizer'
|
|
5
|
+
require_relative 'optimizers/sequence_optimizer'
|
|
6
|
+
require_relative 'optimizers/choice_optimizer'
|
|
7
|
+
require_relative 'optimizers/lookahead_optimizer'
|
|
8
|
+
require_relative 'optimizers/cut_inserter'
|
|
9
|
+
|
|
10
|
+
# Grammar-level optimizations for Parslet parsers
|
|
11
|
+
# These optimizations transform the parser AST to reduce runtime overhead
|
|
12
|
+
# without changing semantics.
|
|
13
|
+
#
|
|
14
|
+
# Architecture:
|
|
15
|
+
# - Uses Visitor pattern for clean separation of traversal and transformation
|
|
16
|
+
# - Each optimizer is a separate class inheriting from ASTVisitor
|
|
17
|
+
# - Optimizer module provides facade methods for easy access
|
|
18
|
+
module Parsanol
|
|
19
|
+
module Optimizer
|
|
20
|
+
# Simplifies redundant quantifiers in a parslet tree
|
|
21
|
+
# Example: str('a').repeat(1, 1) => str('a')
|
|
22
|
+
# str('a').repeat(0, 1).repeat(0, 1) => str('a').repeat(0, 1)
|
|
23
|
+
#
|
|
24
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to simplify
|
|
25
|
+
# @return [Parsanol::Atoms::Base] simplified parslet
|
|
26
|
+
def self.simplify_quantifiers(parslet)
|
|
27
|
+
Optimizers::QuantifierOptimizer.new.visit(parslet)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Simplifies sequences by flattening and merging adjacent strings
|
|
31
|
+
# Example: str('a') >> str('b') => str('ab')
|
|
32
|
+
# (str('a') >> str('b')) >> str('c') => str('abc')
|
|
33
|
+
#
|
|
34
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to simplify
|
|
35
|
+
# @return [Parsanol::Atoms::Base] simplified parslet
|
|
36
|
+
def self.simplify_sequences(parslet)
|
|
37
|
+
Optimizers::SequenceOptimizer.new.visit(parslet)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Simplifies choice/alternative patterns
|
|
41
|
+
# Example: (A | B) | C => A | B | C
|
|
42
|
+
# A | B | A => A | B
|
|
43
|
+
#
|
|
44
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to simplify
|
|
45
|
+
# @return [Parsanol::Atoms::Base] simplified parslet
|
|
46
|
+
def self.simplify_choices(parslet)
|
|
47
|
+
Optimizers::ChoiceOptimizer.new.visit(parslet)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Simplifies lookahead patterns
|
|
51
|
+
# Example: !(!x) => &x (double negation elimination)
|
|
52
|
+
#
|
|
53
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to simplify
|
|
54
|
+
# @return [Parsanol::Atoms::Base] simplified parslet
|
|
55
|
+
def self.simplify_lookaheads(parslet)
|
|
56
|
+
Optimizers::LookaheadOptimizer.new.visit(parslet)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Automatically insert cut operators where safe (AC-FIRST algorithm)
|
|
60
|
+
# Inserts cuts after deterministic prefixes when alternatives have disjoint FIRST sets
|
|
61
|
+
# This enables O(1) space complexity by allowing aggressive cache eviction
|
|
62
|
+
#
|
|
63
|
+
# Example: str('if') >> x | str('while') >> y
|
|
64
|
+
# => str('if').cut >> x | str('while').cut >> y
|
|
65
|
+
#
|
|
66
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to optimize
|
|
67
|
+
# @return [Parsanol::Atoms::Base] optimized parslet with cuts inserted
|
|
68
|
+
def self.insert_cuts(parslet)
|
|
69
|
+
Optimizers::CutInserter.new.optimize(parslet)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Apply all optimizations in recommended order
|
|
73
|
+
# This is a convenience method that applies all optimizer passes
|
|
74
|
+
#
|
|
75
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to optimize
|
|
76
|
+
# @return [Parsanol::Atoms::Base] fully optimized parslet
|
|
77
|
+
def self.optimize_all(parslet)
|
|
78
|
+
result = simplify_quantifiers(parslet)
|
|
79
|
+
result = simplify_sequences(result)
|
|
80
|
+
result = simplify_choices(result)
|
|
81
|
+
result = simplify_lookaheads(result)
|
|
82
|
+
result = insert_cuts(result)
|
|
83
|
+
result
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast_visitor'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Optimizers
|
|
7
|
+
# Optimizes alternative/choice patterns in the AST
|
|
8
|
+
# Follows visitor pattern for clean separation of concerns
|
|
9
|
+
#
|
|
10
|
+
# Transformations:
|
|
11
|
+
# - (A | B) | C => A | B | C (flatten nested alternatives)
|
|
12
|
+
# - A | B | A => A | B (remove duplicates)
|
|
13
|
+
# - Alternative(A) => A (unwrap single-element alternatives)
|
|
14
|
+
class ChoiceOptimizer < ASTVisitor
|
|
15
|
+
# Visit an alternative node and apply choice optimizations
|
|
16
|
+
# @param parslet [Parsanol::Atoms::Alternative] alternative to optimize
|
|
17
|
+
# @return [Parsanol::Atoms::Base] optimized parslet
|
|
18
|
+
def visit_alternative(parslet)
|
|
19
|
+
# First optimize children recursively
|
|
20
|
+
new_alternatives = parslet.alternatives.map { |p| visit(p) }
|
|
21
|
+
|
|
22
|
+
# Optimization 1: Flatten nested alternatives
|
|
23
|
+
flattened = flatten_alternatives(new_alternatives)
|
|
24
|
+
|
|
25
|
+
# Optimization 2: Remove duplicate alternatives
|
|
26
|
+
deduplicated = deduplicate_alternatives(flattened)
|
|
27
|
+
|
|
28
|
+
# Optimization 3: Unwrap single-element alternatives
|
|
29
|
+
return deduplicated[0] if deduplicated.size == 1
|
|
30
|
+
|
|
31
|
+
# Return optimized alternative if changed
|
|
32
|
+
if deduplicated != parslet.alternatives
|
|
33
|
+
Parsanol::Atoms::Alternative.new(*deduplicated)
|
|
34
|
+
else
|
|
35
|
+
parslet
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Flatten nested alternatives into a single level
|
|
42
|
+
# @param alternatives [Array<Parsanol::Atoms::Base>] array of alternatives
|
|
43
|
+
# @return [Array<Parsanol::Atoms::Base>] flattened array
|
|
44
|
+
def flatten_alternatives(alternatives)
|
|
45
|
+
result = []
|
|
46
|
+
alternatives.each do |alt|
|
|
47
|
+
if alt.is_a?(Parsanol::Atoms::Alternative)
|
|
48
|
+
result.concat(alt.alternatives)
|
|
49
|
+
else
|
|
50
|
+
result << alt
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
result
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Remove duplicate alternatives using structural equality
|
|
57
|
+
# @param alternatives [Array<Parsanol::Atoms::Base>] array of alternatives
|
|
58
|
+
# @return [Array<Parsanol::Atoms::Base>] deduplicated array
|
|
59
|
+
def deduplicate_alternatives(alternatives)
|
|
60
|
+
return alternatives if alternatives.size < 2
|
|
61
|
+
|
|
62
|
+
# Use to_s as proxy for structural equality
|
|
63
|
+
seen = {}
|
|
64
|
+
result = []
|
|
65
|
+
|
|
66
|
+
alternatives.each do |alt|
|
|
67
|
+
key = alt.to_s
|
|
68
|
+
unless seen[key]
|
|
69
|
+
seen[key] = true
|
|
70
|
+
result << alt
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
result
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Automatic Cut Insertion (AC-FIRST Algorithm)
|
|
2
|
+
#
|
|
3
|
+
# This optimizer implements the AC-FIRST algorithm from Mizushima et al. (2010)
|
|
4
|
+
# to automatically insert cut operators when alternatives have disjoint FIRST sets.
|
|
5
|
+
#
|
|
6
|
+
# When all alternatives in a choice have non-overlapping FIRST sets, we can safely
|
|
7
|
+
# insert a cut after the deterministic prefix, since backtracking will never be
|
|
8
|
+
# needed.
|
|
9
|
+
#
|
|
10
|
+
# Example:
|
|
11
|
+
# str('if') >> condition >> then_clause |
|
|
12
|
+
# str('while') >> condition >> body |
|
|
13
|
+
# str('print') >> expression
|
|
14
|
+
#
|
|
15
|
+
# Becomes:
|
|
16
|
+
# str('if').cut >> condition >> then_clause |
|
|
17
|
+
# str('while').cut >> condition >> body |
|
|
18
|
+
# str('print').cut >> expression
|
|
19
|
+
#
|
|
20
|
+
# Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
|
|
21
|
+
# Grammars in Mostly Constant Space"
|
|
22
|
+
#
|
|
23
|
+
class Parsanol::Optimizers::CutInserter
|
|
24
|
+
# Optimize a parslet by inserting cuts where safe
|
|
25
|
+
# Recursively traverses the grammar AST
|
|
26
|
+
#
|
|
27
|
+
# @param parslet [Parsanol::Atoms::Base] The parslet to optimize
|
|
28
|
+
# @return [Parsanol::Atoms::Base] Optimized parslet with cuts inserted
|
|
29
|
+
def optimize(parslet)
|
|
30
|
+
case parslet
|
|
31
|
+
when Parsanol::Atoms::Alternative
|
|
32
|
+
optimize_alternative(parslet)
|
|
33
|
+
when Parsanol::Atoms::Sequence
|
|
34
|
+
optimize_sequence(parslet)
|
|
35
|
+
when Parsanol::Atoms::Repetition
|
|
36
|
+
optimize_repetition(parslet)
|
|
37
|
+
when Parsanol::Atoms::Named
|
|
38
|
+
optimize_named(parslet)
|
|
39
|
+
else
|
|
40
|
+
# Return atom unchanged (Str, Re, Lookahead, etc.)
|
|
41
|
+
parslet
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
# Optimize an Alternative atom by inserting cuts when all alternatives
|
|
48
|
+
# have disjoint FIRST sets
|
|
49
|
+
def optimize_alternative(alt)
|
|
50
|
+
alternatives = alt.alternatives
|
|
51
|
+
first_sets = alternatives.map(&:first_set)
|
|
52
|
+
|
|
53
|
+
# Only optimize if all FIRST sets are disjoint
|
|
54
|
+
unless Parsanol::FirstSet.all_disjoint?(first_sets)
|
|
55
|
+
# Not safe to insert cuts - return alternatives with recursive optimization
|
|
56
|
+
optimized = alternatives.map { |a| optimize(a) }
|
|
57
|
+
return Parsanol::Atoms::Alternative.new(*optimized)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# All FIRST sets are disjoint - safe to insert cuts!
|
|
61
|
+
# Insert cuts after deterministic prefixes
|
|
62
|
+
optimized = alternatives.map do |alternative|
|
|
63
|
+
insert_cut_if_safe(alternative)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
Parsanol::Atoms::Alternative.new(*optimized)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Optimize a Sequence atom by recursively optimizing its elements
|
|
70
|
+
def optimize_sequence(seq)
|
|
71
|
+
optimized_parslets = seq.parslets.map { |p| optimize(p) }
|
|
72
|
+
Parsanol::Atoms::Sequence.new(*optimized_parslets)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Optimize a Repetition atom by recursively optimizing its parslet
|
|
76
|
+
def optimize_repetition(rep)
|
|
77
|
+
optimized_parslet = optimize(rep.parslet)
|
|
78
|
+
# Create new repetition with same min/max
|
|
79
|
+
# Note: We use default tag since it's not exposed as a reader
|
|
80
|
+
Parsanol::Atoms::Repetition.new(
|
|
81
|
+
optimized_parslet,
|
|
82
|
+
rep.min,
|
|
83
|
+
rep.max
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Optimize a Named atom by recursively optimizing its parslet
|
|
88
|
+
def optimize_named(named)
|
|
89
|
+
optimized_parslet = optimize(named.parslet)
|
|
90
|
+
optimized_parslet.as(named.name)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Insert a cut after the deterministic prefix if safe
|
|
94
|
+
# For sequences: find longest prefix without EPSILON
|
|
95
|
+
# For other atoms: cut the whole thing if it doesn't include EPSILON
|
|
96
|
+
def insert_cut_if_safe(parslet)
|
|
97
|
+
# For sequences, find the longest safe prefix
|
|
98
|
+
if parslet.is_a?(Parsanol::Atoms::Sequence)
|
|
99
|
+
prefix_parslets = find_deterministic_prefix(parslet)
|
|
100
|
+
if prefix_parslets && !prefix_parslets.empty?
|
|
101
|
+
return build_cut_sequence(parslet, prefix_parslets)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# For other atoms, cut the whole thing if safe
|
|
106
|
+
if safe_to_cut?(parslet)
|
|
107
|
+
return parslet.cut
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Not safe to cut - recursively optimize and return
|
|
111
|
+
optimize(parslet)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Find the longest deterministic prefix of a sequence
|
|
115
|
+
# A deterministic prefix doesn't include EPSILON in its FIRST set
|
|
116
|
+
#
|
|
117
|
+
# @param sequence [Parsanol::Atoms::Sequence] The sequence to analyze
|
|
118
|
+
# @return [Array<Parsanol::Atoms::Base>] Prefix parslets, or nil if none
|
|
119
|
+
def find_deterministic_prefix(sequence)
|
|
120
|
+
parslets = sequence.parslets
|
|
121
|
+
prefix_length = 0
|
|
122
|
+
|
|
123
|
+
# Find longest prefix where no element can match empty
|
|
124
|
+
parslets.each do |p|
|
|
125
|
+
break if p.first_set.include?(Parsanol::FirstSet::EPSILON)
|
|
126
|
+
prefix_length += 1
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
prefix_length > 0 ? parslets[0...prefix_length] : nil
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Check if it's safe to cut after this parslet
|
|
133
|
+
# Safe if the parslet doesn't have EPSILON in its FIRST set
|
|
134
|
+
# (i.e., it always consumes input)
|
|
135
|
+
def safe_to_cut?(parslet)
|
|
136
|
+
first = parslet.first_set
|
|
137
|
+
# Don't cut if EPSILON is in FIRST set (might not consume)
|
|
138
|
+
# Also don't cut if FIRST set contains only nil (unknown)
|
|
139
|
+
return false if first.include?(Parsanol::FirstSet::EPSILON)
|
|
140
|
+
return false if first.all?(&:nil?)
|
|
141
|
+
true
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Build a new sequence with a cut after the prefix
|
|
145
|
+
#
|
|
146
|
+
# @param sequence [Parsanol::Atoms::Sequence] Original sequence
|
|
147
|
+
# @param prefix_parslets [Array] Parslets forming the deterministic prefix
|
|
148
|
+
# @return [Parsanol::Atoms::Base] New sequence with cut inserted
|
|
149
|
+
def build_cut_sequence(sequence, prefix_parslets)
|
|
150
|
+
# Recursively optimize prefix parslets
|
|
151
|
+
optimized_prefix = prefix_parslets.map { |p| optimize(p) }
|
|
152
|
+
|
|
153
|
+
# Build prefix (single parslet or sequence)
|
|
154
|
+
prefix = if optimized_prefix.length == 1
|
|
155
|
+
optimized_prefix.first
|
|
156
|
+
else
|
|
157
|
+
Parsanol::Atoms::Sequence.new(*optimized_prefix)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Get remaining parslets after prefix
|
|
161
|
+
remaining = sequence.parslets[prefix_parslets.length..-1]
|
|
162
|
+
|
|
163
|
+
# Recursively optimize remaining parslets
|
|
164
|
+
optimized_remaining = remaining.map { |p| optimize(p) }
|
|
165
|
+
|
|
166
|
+
# Build final sequence with cut
|
|
167
|
+
if optimized_remaining.empty?
|
|
168
|
+
# Prefix is the entire sequence
|
|
169
|
+
prefix.cut
|
|
170
|
+
else
|
|
171
|
+
# Prefix + cut + remaining
|
|
172
|
+
Parsanol::Atoms::Sequence.new(prefix.cut, *optimized_remaining)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast_visitor'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Optimizers
|
|
7
|
+
# Optimizes lookahead patterns in the AST
|
|
8
|
+
# Follows visitor pattern for clean separation of concerns
|
|
9
|
+
#
|
|
10
|
+
# Transformations:
|
|
11
|
+
# - !(!x) => &x (double negation elimination)
|
|
12
|
+
# - &(&x) => &x (positive lookahead is idempotent)
|
|
13
|
+
# - !(&x) => !x (negative of positive)
|
|
14
|
+
# - &(!x) => !x (positive of negative)
|
|
15
|
+
class LookaheadOptimizer < ASTVisitor
|
|
16
|
+
# Visit a lookahead node and apply lookahead optimizations
|
|
17
|
+
# @param parslet [Parsanol::Atoms::Lookahead] lookahead to optimize
|
|
18
|
+
# @return [Parsanol::Atoms::Base] optimized parslet
|
|
19
|
+
def visit_lookahead(parslet)
|
|
20
|
+
# First optimize the child
|
|
21
|
+
inner = visit(parslet.bound_parslet)
|
|
22
|
+
|
|
23
|
+
# If inner is also a lookahead, simplify nested lookaheads
|
|
24
|
+
if inner.is_a?(Parsanol::Atoms::Lookahead)
|
|
25
|
+
outer_positive = parslet.positive
|
|
26
|
+
inner_positive = inner.positive
|
|
27
|
+
|
|
28
|
+
# !(!x) => &x (double negation)
|
|
29
|
+
if !outer_positive && !inner_positive
|
|
30
|
+
return Parsanol::Atoms::Lookahead.new(inner.bound_parslet, true)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# &(&x) => &x (idempotent)
|
|
34
|
+
if outer_positive && inner_positive
|
|
35
|
+
return inner
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# !(&x) => !x (negative of positive)
|
|
39
|
+
if !outer_positive && inner_positive
|
|
40
|
+
return Parsanol::Atoms::Lookahead.new(inner.bound_parslet, false)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# &(!x) => !x (positive of negative)
|
|
44
|
+
if outer_positive && !inner_positive
|
|
45
|
+
return inner
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Return lookahead with optimized child
|
|
50
|
+
if inner.equal?(parslet.bound_parslet)
|
|
51
|
+
parslet
|
|
52
|
+
else
|
|
53
|
+
Parsanol::Atoms::Lookahead.new(inner, parslet.positive)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|