parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Interval tree implementation for GPeg-style incremental parsing
|
|
2
|
+
# Based on the GPeg paper: "Fast Incremental PEG Parsing" (Yedidia, SLE 2021)
|
|
3
|
+
#
|
|
4
|
+
# This data structure stores memoization results keyed by position intervals [start, end)
|
|
5
|
+
# rather than single positions, enabling efficient invalidation of changed regions.
|
|
6
|
+
#
|
|
7
|
+
# Performance characteristics:
|
|
8
|
+
# - Insert: O(log n)
|
|
9
|
+
# - Query: O(log n + k) where k is number of overlapping intervals
|
|
10
|
+
# - Delete overlapping: O(log n + k)
|
|
11
|
+
#
|
|
12
|
+
class Parsanol::IntervalTree
|
|
13
|
+
# A node in the interval tree
|
|
14
|
+
# Each node stores an interval [low, high) and associated data
|
|
15
|
+
class Node
|
|
16
|
+
attr_accessor :interval, :data, :max, :left, :right
|
|
17
|
+
|
|
18
|
+
def initialize(low, high, data)
|
|
19
|
+
@interval = [low, high] # [start, end) half-open interval
|
|
20
|
+
@data = data
|
|
21
|
+
@max = high # Maximum endpoint in subtree
|
|
22
|
+
@left = nil
|
|
23
|
+
@right = nil
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def low
|
|
27
|
+
@interval[0]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def high
|
|
31
|
+
@interval[1]
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def initialize
|
|
36
|
+
@root = nil
|
|
37
|
+
@size = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
attr_reader :size
|
|
41
|
+
|
|
42
|
+
# Insert an interval with associated data
|
|
43
|
+
# @param low [Integer] Start position (inclusive)
|
|
44
|
+
# @param high [Integer] End position (exclusive)
|
|
45
|
+
# @param data [Object] Data to associate with this interval
|
|
46
|
+
def insert(low, high, data)
|
|
47
|
+
@root = insert_recursive(@root, low, high, data)
|
|
48
|
+
@size += 1
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Query for all intervals that overlap with [low, high)
|
|
52
|
+
# @param low [Integer] Start position (inclusive)
|
|
53
|
+
# @param high [Integer] End position (exclusive)
|
|
54
|
+
# @return [Array<Object>] Array of data from overlapping intervals
|
|
55
|
+
def query_overlapping(low, high)
|
|
56
|
+
# Empty intervals cannot overlap with anything
|
|
57
|
+
return [] if low >= high
|
|
58
|
+
|
|
59
|
+
results = []
|
|
60
|
+
query_recursive(@root, low, high, results)
|
|
61
|
+
results
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Query for exact interval match
|
|
65
|
+
# @param low [Integer] Start position (inclusive)
|
|
66
|
+
# @param high [Integer] End position (exclusive)
|
|
67
|
+
# @return [Object, nil] Data if exact match found, nil otherwise
|
|
68
|
+
def query_exact(low, high)
|
|
69
|
+
find_exact(@root, low, high)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Delete all intervals that overlap with [low, high)
|
|
73
|
+
# Returns array of deleted data
|
|
74
|
+
# @param low [Integer] Start position (inclusive)
|
|
75
|
+
# @param high [Integer] End position (exclusive)
|
|
76
|
+
# @return [Array<Object>] Array of data from deleted intervals
|
|
77
|
+
def delete_overlapping(low, high)
|
|
78
|
+
deleted = []
|
|
79
|
+
@root = delete_overlapping_recursive(@root, low, high, deleted)
|
|
80
|
+
@size -= deleted.size
|
|
81
|
+
deleted
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Clear all intervals
|
|
85
|
+
def clear
|
|
86
|
+
@root = nil
|
|
87
|
+
@size = 0
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Check if tree is empty
|
|
91
|
+
def empty?
|
|
92
|
+
@root.nil?
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# Insert node recursively maintaining BST property on interval start
|
|
98
|
+
def insert_recursive(node, low, high, data)
|
|
99
|
+
return Node.new(low, high, data) if node.nil?
|
|
100
|
+
|
|
101
|
+
# BST insertion based on interval start position
|
|
102
|
+
if low < node.low
|
|
103
|
+
node.left = insert_recursive(node.left, low, high, data)
|
|
104
|
+
else
|
|
105
|
+
node.right = insert_recursive(node.right, low, high, data)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Update max endpoint in this subtree
|
|
109
|
+
node.max = [node.max, high].max
|
|
110
|
+
node.max = [node.max, node.left.max].max if node.left
|
|
111
|
+
node.max = [node.max, node.right.max].max if node.right
|
|
112
|
+
|
|
113
|
+
node
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Query recursively for overlapping intervals
|
|
117
|
+
def query_recursive(node, low, high, results)
|
|
118
|
+
return if node.nil?
|
|
119
|
+
|
|
120
|
+
# If no interval in this subtree can overlap, prune search
|
|
121
|
+
return if node.max <= low
|
|
122
|
+
|
|
123
|
+
# Check left subtree (may have overlapping intervals)
|
|
124
|
+
query_recursive(node.left, low, high, results) if node.left
|
|
125
|
+
|
|
126
|
+
# Check current node for overlap
|
|
127
|
+
# Two intervals [a,b) and [c,d) overlap if: a < d AND c < b
|
|
128
|
+
if node.low < high && low < node.high
|
|
129
|
+
results << node.data
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Check right subtree
|
|
133
|
+
# Only search right if intervals starting there could overlap
|
|
134
|
+
query_recursive(node.right, low, high, results) if node.right && node.low < high
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Find exact interval match
|
|
138
|
+
def find_exact(node, low, high)
|
|
139
|
+
return nil if node.nil?
|
|
140
|
+
|
|
141
|
+
if node.low == low && node.high == high
|
|
142
|
+
return node.data
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Search in appropriate subtree
|
|
146
|
+
if low < node.low
|
|
147
|
+
find_exact(node.left, low, high)
|
|
148
|
+
else
|
|
149
|
+
find_exact(node.right, low, high)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Delete overlapping intervals recursively
|
|
154
|
+
def delete_overlapping_recursive(node, low, high, deleted)
|
|
155
|
+
return nil if node.nil?
|
|
156
|
+
|
|
157
|
+
# Recursively delete from left subtree
|
|
158
|
+
node.left = delete_overlapping_recursive(node.left, low, high, deleted) if node.left
|
|
159
|
+
|
|
160
|
+
# Recursively delete from right subtree
|
|
161
|
+
node.right = delete_overlapping_recursive(node.right, low, high, deleted) if node.right
|
|
162
|
+
|
|
163
|
+
# Check if current node overlaps
|
|
164
|
+
if node.low < high && low < node.high
|
|
165
|
+
# This node overlaps - delete it
|
|
166
|
+
deleted << node.data
|
|
167
|
+
|
|
168
|
+
# Remove this node and reinsert children
|
|
169
|
+
if node.left.nil?
|
|
170
|
+
return node.right
|
|
171
|
+
elsif node.right.nil?
|
|
172
|
+
return node.left
|
|
173
|
+
else
|
|
174
|
+
# Node has two children - replace with inorder successor
|
|
175
|
+
# Find minimum node in right subtree
|
|
176
|
+
min_node = find_min(node.right)
|
|
177
|
+
|
|
178
|
+
# Replace current node's interval and data with successor's
|
|
179
|
+
node.interval = min_node.interval
|
|
180
|
+
node.data = min_node.data
|
|
181
|
+
|
|
182
|
+
# Delete the successor from right subtree
|
|
183
|
+
node.right = delete_min(node.right)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Update max for this node after potential deletions
|
|
188
|
+
if node
|
|
189
|
+
node.max = node.high
|
|
190
|
+
node.max = [node.max, node.left.max].max if node.left
|
|
191
|
+
node.max = [node.max, node.right.max].max if node.right
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
node
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Find minimum node in subtree (leftmost)
|
|
198
|
+
def find_min(node)
|
|
199
|
+
return node if node.left.nil?
|
|
200
|
+
find_min(node.left)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Delete minimum node from subtree
|
|
204
|
+
def delete_min(node)
|
|
205
|
+
return node.right if node.left.nil?
|
|
206
|
+
node.left = delete_min(node.left)
|
|
207
|
+
|
|
208
|
+
# Update max
|
|
209
|
+
node.max = node.high
|
|
210
|
+
node.max = [node.max, node.left.max].max if node.left
|
|
211
|
+
node.max = [node.max, node.right.max].max if node.right
|
|
212
|
+
|
|
213
|
+
node
|
|
214
|
+
end
|
|
215
|
+
end
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Lazy wrapper around Buffer that defers array materialization.
|
|
5
|
+
#
|
|
6
|
+
# LazyResult wraps a Buffer and only creates an Array when the result
|
|
7
|
+
# is actually accessed. This reduces allocations for results that are
|
|
8
|
+
# never used (cache hits, backtracking, etc.).
|
|
9
|
+
#
|
|
10
|
+
# == Usage
|
|
11
|
+
#
|
|
12
|
+
# lazy = LazyResult.new(buffer, context)
|
|
13
|
+
# # No array allocated yet
|
|
14
|
+
#
|
|
15
|
+
# lazy.to_a # Now array is materialized and cached
|
|
16
|
+
# lazy.to_a # Returns cached array
|
|
17
|
+
#
|
|
18
|
+
# == Transparency
|
|
19
|
+
#
|
|
20
|
+
# LazyResult acts like an Array for most operations:
|
|
21
|
+
# - Enumerable methods work (each, map, select, etc.)
|
|
22
|
+
# - Array access works ([], size, empty?, etc.)
|
|
23
|
+
# - Can be used in transforms without changes
|
|
24
|
+
#
|
|
25
|
+
class LazyResult
|
|
26
|
+
# @return [Buffer] The underlying buffer
|
|
27
|
+
attr_reader :buffer
|
|
28
|
+
|
|
29
|
+
# @return [Context] The context (for buffer release)
|
|
30
|
+
attr_reader :context
|
|
31
|
+
|
|
32
|
+
# @return [Array, nil] Cached materialized array
|
|
33
|
+
attr_reader :materialized
|
|
34
|
+
|
|
35
|
+
# Initialize a new LazyResult.
|
|
36
|
+
#
|
|
37
|
+
# @param buffer [Buffer] Buffer containing elements
|
|
38
|
+
# @param context [Context] Context for buffer management
|
|
39
|
+
#
|
|
40
|
+
def initialize(buffer, context)
|
|
41
|
+
@buffer = buffer
|
|
42
|
+
@context = context
|
|
43
|
+
@materialized = nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Materialize to array (with caching).
|
|
47
|
+
#
|
|
48
|
+
# First call creates array from buffer, subsequent calls return cached.
|
|
49
|
+
#
|
|
50
|
+
# @return [Array] Materialized array
|
|
51
|
+
#
|
|
52
|
+
def to_a
|
|
53
|
+
@materialized ||= @buffer.to_a
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Get element at index (materializes if needed).
|
|
57
|
+
#
|
|
58
|
+
# @param index [Integer] Zero-based index
|
|
59
|
+
# @return [Object] Element at index
|
|
60
|
+
#
|
|
61
|
+
def [](index)
|
|
62
|
+
to_a[index]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Get number of elements.
|
|
66
|
+
#
|
|
67
|
+
# @return [Integer] Number of elements
|
|
68
|
+
#
|
|
69
|
+
def size
|
|
70
|
+
@buffer.size
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
alias length size
|
|
74
|
+
|
|
75
|
+
# Check if empty.
|
|
76
|
+
#
|
|
77
|
+
# @return [Boolean] true if no elements
|
|
78
|
+
#
|
|
79
|
+
def empty?
|
|
80
|
+
@buffer.empty?
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Iterate over elements (materializes if needed).
|
|
84
|
+
#
|
|
85
|
+
# @yield [element] Each element
|
|
86
|
+
# @return [Enumerator, self] Enumerator if no block, self otherwise
|
|
87
|
+
#
|
|
88
|
+
def each(&block)
|
|
89
|
+
return to_enum(:each) unless block_given?
|
|
90
|
+
to_a.each(&block)
|
|
91
|
+
self
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Check if acts like an array.
|
|
95
|
+
#
|
|
96
|
+
# @param other [Class] Class to check against
|
|
97
|
+
# @return [Boolean] true if Array
|
|
98
|
+
#
|
|
99
|
+
def is_a?(other)
|
|
100
|
+
other == Array || super
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
alias kind_of? is_a?
|
|
104
|
+
|
|
105
|
+
# Respond to array methods.
|
|
106
|
+
#
|
|
107
|
+
# @param method [Symbol] Method name
|
|
108
|
+
# @param include_private [Boolean] Include private methods
|
|
109
|
+
# @return [Boolean] true if responds
|
|
110
|
+
#
|
|
111
|
+
def respond_to?(method, include_private = false)
|
|
112
|
+
super || to_a.respond_to?(method, include_private)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Delegate unknown methods to materialized array.
|
|
116
|
+
#
|
|
117
|
+
# @param method [Symbol] Method name
|
|
118
|
+
# @param args [Array] Arguments
|
|
119
|
+
# @param block [Proc] Block if given
|
|
120
|
+
# @return [Object] Result of method call
|
|
121
|
+
#
|
|
122
|
+
def method_missing(method, *args, &block)
|
|
123
|
+
if to_a.respond_to?(method)
|
|
124
|
+
to_a.public_send(method, *args, &block)
|
|
125
|
+
else
|
|
126
|
+
super
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Support respond_to_missing? for proper method_missing implementation.
|
|
131
|
+
#
|
|
132
|
+
# @param method [Symbol] Method name
|
|
133
|
+
# @param include_private [Boolean] Include private methods
|
|
134
|
+
# @return [Boolean] true if method is supported
|
|
135
|
+
#
|
|
136
|
+
def respond_to_missing?(method, include_private = false)
|
|
137
|
+
to_a.respond_to?(method, include_private) || super
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Compare with another object.
|
|
141
|
+
# LazyResult compares equal to arrays with the same content.
|
|
142
|
+
#
|
|
143
|
+
# @param other [Object] Object to compare with
|
|
144
|
+
# @return [Boolean] true if equal
|
|
145
|
+
#
|
|
146
|
+
def ==(other)
|
|
147
|
+
if other.is_a?(Array)
|
|
148
|
+
to_a == other
|
|
149
|
+
elsif other.is_a?(LazyResult)
|
|
150
|
+
to_a == other.to_a
|
|
151
|
+
else
|
|
152
|
+
super
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
alias eql? ==
|
|
157
|
+
|
|
158
|
+
# Hash code based on materialized array.
|
|
159
|
+
#
|
|
160
|
+
# @return [Integer] Hash code
|
|
161
|
+
#
|
|
162
|
+
def hash
|
|
163
|
+
to_a.hash
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Inspect for debugging.
|
|
167
|
+
#
|
|
168
|
+
# @return [String] Inspection string
|
|
169
|
+
#
|
|
170
|
+
def inspect
|
|
171
|
+
if @materialized
|
|
172
|
+
"#<LazyResult:#{object_id} materialized=#{@materialized.inspect}>"
|
|
173
|
+
else
|
|
174
|
+
"#<LazyResult:#{object_id} buffer.size=#{@buffer.size}>"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "parsanol/native"
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
# Generic lexer for fast tokenization
|
|
7
|
+
#
|
|
8
|
+
# Create a lexer by subclassing and defining tokens:
|
|
9
|
+
#
|
|
10
|
+
# class JsonLexer < Parsanol::Lexer
|
|
11
|
+
# token :string, /"[^"]*"/
|
|
12
|
+
# token :number, /-?[0-9]+(\.[0-9]+)?/
|
|
13
|
+
# token :true, /true/
|
|
14
|
+
# token :false, /false/
|
|
15
|
+
# token :null, /null/
|
|
16
|
+
# token :lbrace, /\{/
|
|
17
|
+
# token :rbrace, /\}/
|
|
18
|
+
# token :lbracket, /\[/
|
|
19
|
+
# token :rbracket, /\]/
|
|
20
|
+
# token :colon, /:/
|
|
21
|
+
# token :comma, /,/
|
|
22
|
+
#
|
|
23
|
+
# ignore /\s+/
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# lexer = JsonLexer.new
|
|
27
|
+
# tokens = lexer.tokenize('{"name": "test"}')
|
|
28
|
+
#
|
|
29
|
+
class Lexer
|
|
30
|
+
class << self
|
|
31
|
+
# Define a token pattern
|
|
32
|
+
#
|
|
33
|
+
# @param name [Symbol] Token type name
|
|
34
|
+
# @param pattern [Regexp] Pattern to match
|
|
35
|
+
# @param priority [Integer] Priority for conflict resolution (higher = preferred)
|
|
36
|
+
# @param block [Proc] Optional block to transform the matched value
|
|
37
|
+
def token(name, pattern, priority: 0, &block)
|
|
38
|
+
token_definitions << Definition.new(
|
|
39
|
+
name: name.to_s,
|
|
40
|
+
pattern: pattern.source,
|
|
41
|
+
priority: priority,
|
|
42
|
+
ignore: false,
|
|
43
|
+
transform: block
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Define patterns to ignore (e.g., whitespace, comments)
|
|
48
|
+
#
|
|
49
|
+
# @param pattern [Regexp] Pattern to ignore
|
|
50
|
+
def ignore(pattern)
|
|
51
|
+
token_definitions << Definition.new(
|
|
52
|
+
name: "__ignore__",
|
|
53
|
+
pattern: pattern.source,
|
|
54
|
+
priority: 0,
|
|
55
|
+
ignore: true,
|
|
56
|
+
transform: nil
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Define keywords (identifiers with higher priority)
|
|
61
|
+
#
|
|
62
|
+
# @param keywords [Array<Symbol>] Keyword names
|
|
63
|
+
# @param priority [Integer] Priority (default: 100)
|
|
64
|
+
def keyword(*keywords, priority: 100)
|
|
65
|
+
keywords.each do |kw|
|
|
66
|
+
token_definitions << Definition.new(
|
|
67
|
+
name: kw.to_s.upcase,
|
|
68
|
+
pattern: Regexp.new(Regexp.escape(kw.to_s), Regexp::IGNORECASE).source,
|
|
69
|
+
priority: priority,
|
|
70
|
+
ignore: false,
|
|
71
|
+
transform: nil
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Get token definitions for this lexer class
|
|
77
|
+
#
|
|
78
|
+
# @return [Array<Definition>] Token definitions
|
|
79
|
+
def token_definitions
|
|
80
|
+
@token_definitions ||= []
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Inherit token definitions from parent class
|
|
84
|
+
def inherited(subclass)
|
|
85
|
+
super
|
|
86
|
+
subclass.instance_variable_set(:@token_definitions, token_definitions.dup)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Token definition
|
|
91
|
+
Definition = Struct.new(:name, :pattern, :priority, :ignore, :transform)
|
|
92
|
+
|
|
93
|
+
# Initialize the lexer
|
|
94
|
+
def initialize
|
|
95
|
+
@lexer_id = nil
|
|
96
|
+
@transforms = build_transforms
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Tokenize input string
|
|
100
|
+
#
|
|
101
|
+
# @param input [String] Input to tokenize
|
|
102
|
+
# @return [Array<Hash>] Array of tokens with type, value, and location
|
|
103
|
+
def tokenize(input)
|
|
104
|
+
ensure_lexer_created
|
|
105
|
+
|
|
106
|
+
tokens = Native.tokenize_with_lexer(@lexer_id, input)
|
|
107
|
+
|
|
108
|
+
# Apply any transforms
|
|
109
|
+
tokens.map do |token|
|
|
110
|
+
transform = @transforms[token["type"]]
|
|
111
|
+
if transform
|
|
112
|
+
token = token.dup
|
|
113
|
+
token["value"] = transform.call(token["value"])
|
|
114
|
+
end
|
|
115
|
+
token
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
def ensure_lexer_created
|
|
122
|
+
return if @lexer_id
|
|
123
|
+
|
|
124
|
+
definitions = self.class.token_definitions.map do |d|
|
|
125
|
+
{
|
|
126
|
+
"name" => d.name,
|
|
127
|
+
"pattern" => d.pattern,
|
|
128
|
+
"priority" => d.priority,
|
|
129
|
+
"ignore" => d.ignore
|
|
130
|
+
}
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@lexer_id = Native.create_lexer(definitions)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def build_transforms
|
|
137
|
+
transforms = {}
|
|
138
|
+
self.class.token_definitions.each do |d|
|
|
139
|
+
if d.transform && !d.ignore
|
|
140
|
+
transforms[d.name] = d.transform
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
transforms
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|