parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Base class for efficient result construction.
|
|
5
|
+
#
|
|
6
|
+
# ResultBuilder provides specialized construction patterns that avoid
|
|
7
|
+
# intermediate array allocations by building results directly.
|
|
8
|
+
#
|
|
9
|
+
# == Usage
|
|
10
|
+
#
|
|
11
|
+
# builder = ResultBuilder.for(:repetition, context, estimated_size: 10)
|
|
12
|
+
# builder.add_element(value1)
|
|
13
|
+
# builder.add_element(value2)
|
|
14
|
+
# result = builder.build # Returns LazyResult
|
|
15
|
+
#
|
|
16
|
+
# == Builders
|
|
17
|
+
#
|
|
18
|
+
# - RepetitionBuilder: For repetition results
|
|
19
|
+
# - SequenceBuilder: For sequence results
|
|
20
|
+
# - HashBuilder: For named capture results
|
|
21
|
+
#
|
|
22
|
+
class ResultBuilder
|
|
23
|
+
# Factory method to create appropriate builder.
|
|
24
|
+
#
|
|
25
|
+
# @param type [Symbol] Builder type (:repetition, :sequence, :hash)
|
|
26
|
+
# @param context [Context] Parse context
|
|
27
|
+
# @param options [Hash] Builder options
|
|
28
|
+
# @return [ResultBuilder] Appropriate builder instance
|
|
29
|
+
#
|
|
30
|
+
def self.for(type, context, **options)
|
|
31
|
+
case type
|
|
32
|
+
when :repetition
|
|
33
|
+
RepetitionBuilder.new(context, **options)
|
|
34
|
+
when :sequence
|
|
35
|
+
SequenceBuilder.new(context, **options)
|
|
36
|
+
when :hash
|
|
37
|
+
HashBuilder.new(context, **options)
|
|
38
|
+
else
|
|
39
|
+
raise ArgumentError, "Unknown builder type: #{type}"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Initialize builder.
|
|
44
|
+
#
|
|
45
|
+
# @param context [Context] Parse context for buffer access
|
|
46
|
+
#
|
|
47
|
+
def initialize(context)
|
|
48
|
+
@context = context
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Add element to result (subclasses implement).
|
|
52
|
+
#
|
|
53
|
+
# @param value [Object] Value to add
|
|
54
|
+
# @return [self] For method chaining
|
|
55
|
+
#
|
|
56
|
+
def add_element(value)
|
|
57
|
+
raise NotImplementedError
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Build final result (subclasses implement).
|
|
61
|
+
#
|
|
62
|
+
# @return [Object] Constructed result
|
|
63
|
+
#
|
|
64
|
+
def build
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Release resources (subclasses implement).
|
|
69
|
+
#
|
|
70
|
+
# @return [void]
|
|
71
|
+
#
|
|
72
|
+
def release
|
|
73
|
+
# Default: no-op
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Builder for repetition results.
|
|
78
|
+
#
|
|
79
|
+
# Constructs [:repetition, ...] arrays efficiently.
|
|
80
|
+
#
|
|
81
|
+
class RepetitionBuilder < ResultBuilder
|
|
82
|
+
# Initialize repetition builder.
|
|
83
|
+
#
|
|
84
|
+
# @param context [Context] Parse context
|
|
85
|
+
# @param tag [Symbol] Tag to use (default: :repetition)
|
|
86
|
+
# @param estimated_size [Integer] Estimated element count
|
|
87
|
+
#
|
|
88
|
+
def initialize(context, tag: :repetition, estimated_size: 10)
|
|
89
|
+
super(context)
|
|
90
|
+
@tag = tag
|
|
91
|
+
@buffer = context.acquire_buffer(size: estimated_size + 1)
|
|
92
|
+
@buffer.push(@tag)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Add element to repetition.
|
|
96
|
+
#
|
|
97
|
+
# @param value [Object] Element to add
|
|
98
|
+
# @return [self]
|
|
99
|
+
#
|
|
100
|
+
def add_element(value)
|
|
101
|
+
@buffer.push(value)
|
|
102
|
+
self
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Build LazyResult.
|
|
106
|
+
#
|
|
107
|
+
# @return [LazyResult] Lazy repetition result
|
|
108
|
+
#
|
|
109
|
+
def build
|
|
110
|
+
Parsanol::LazyResult.new(@buffer, @context)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Release buffer on failure.
|
|
114
|
+
#
|
|
115
|
+
# @return [void]
|
|
116
|
+
#
|
|
117
|
+
def release
|
|
118
|
+
@context.release_buffer(@buffer) if @buffer
|
|
119
|
+
@buffer = nil
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Builder for sequence results.
|
|
124
|
+
#
|
|
125
|
+
# Constructs [:sequence, ...] arrays efficiently.
|
|
126
|
+
#
|
|
127
|
+
class SequenceBuilder < ResultBuilder
|
|
128
|
+
# Initialize sequence builder.
|
|
129
|
+
#
|
|
130
|
+
# @param context [Context] Parse context
|
|
131
|
+
# @param size [Integer] Expected sequence length
|
|
132
|
+
#
|
|
133
|
+
def initialize(context, size: 5)
|
|
134
|
+
super(context)
|
|
135
|
+
@buffer = context.acquire_buffer(size: size + 1)
|
|
136
|
+
@buffer.push(:sequence)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Add element to sequence.
|
|
140
|
+
#
|
|
141
|
+
# @param value [Object] Element to add
|
|
142
|
+
# @return [self]
|
|
143
|
+
#
|
|
144
|
+
def add_element(value)
|
|
145
|
+
@buffer.push(value) if value # Skip nil values
|
|
146
|
+
self
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Build LazyResult.
|
|
150
|
+
#
|
|
151
|
+
# @return [LazyResult] Lazy sequence result
|
|
152
|
+
#
|
|
153
|
+
def build
|
|
154
|
+
Parsanol::LazyResult.new(@buffer, @context)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Release buffer on failure.
|
|
158
|
+
#
|
|
159
|
+
# @return [void]
|
|
160
|
+
#
|
|
161
|
+
def release
|
|
162
|
+
@context.release_buffer(@buffer) if @buffer
|
|
163
|
+
@buffer = nil
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Builder for hash results (named captures).
|
|
168
|
+
#
|
|
169
|
+
# Constructs hashes directly without intermediate arrays.
|
|
170
|
+
#
|
|
171
|
+
class HashBuilder < ResultBuilder
|
|
172
|
+
# Initialize hash builder.
|
|
173
|
+
#
|
|
174
|
+
# @param context [Context] Parse context
|
|
175
|
+
#
|
|
176
|
+
def initialize(context)
|
|
177
|
+
super(context)
|
|
178
|
+
@hash = {}
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Add key-value pair.
|
|
182
|
+
#
|
|
183
|
+
# @param key [Symbol] Hash key
|
|
184
|
+
# @param value [Object] Hash value
|
|
185
|
+
# @return [self]
|
|
186
|
+
#
|
|
187
|
+
def add_pair(key, value)
|
|
188
|
+
@hash[key] = value
|
|
189
|
+
self
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Build hash result.
|
|
193
|
+
#
|
|
194
|
+
# @return [Hash] Constructed hash
|
|
195
|
+
#
|
|
196
|
+
def build
|
|
197
|
+
@hash
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Release resources (hash cleanup).
|
|
201
|
+
#
|
|
202
|
+
# @return [void]
|
|
203
|
+
#
|
|
204
|
+
def release
|
|
205
|
+
@hash = nil
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Streaming result iterator for memory-efficient parsing.
|
|
5
|
+
#
|
|
6
|
+
# Provides an Enumerable interface over parse results, allowing
|
|
7
|
+
# incremental processing without materializing the entire tree.
|
|
8
|
+
# Uses depth-first traversal to minimize memory usage.
|
|
9
|
+
#
|
|
10
|
+
# == Motivation
|
|
11
|
+
#
|
|
12
|
+
# Traditional parsing materializes the entire parse tree in memory:
|
|
13
|
+
#
|
|
14
|
+
# results = parser.parse(large_input) # Full tree in memory
|
|
15
|
+
# results.each { |node| process(node) }
|
|
16
|
+
#
|
|
17
|
+
# For large inputs, this can consume significant memory. ResultStream
|
|
18
|
+
# provides lazy iteration without full tree materialization:
|
|
19
|
+
#
|
|
20
|
+
# stream = ResultStream.new(parser.parse(input))
|
|
21
|
+
# stream.each { |node| process(node) } # Processes incrementally
|
|
22
|
+
#
|
|
23
|
+
# == Usage
|
|
24
|
+
#
|
|
25
|
+
# Basic iteration:
|
|
26
|
+
#
|
|
27
|
+
# stream = ResultStream.new(parse_tree)
|
|
28
|
+
# stream.each { |node| puts node }
|
|
29
|
+
#
|
|
30
|
+
# Filtering (leverages Enumerable):
|
|
31
|
+
#
|
|
32
|
+
# stream.select { |node| node.is_a?(Hash) }.each { |hash| process(hash) }
|
|
33
|
+
#
|
|
34
|
+
# Mapping:
|
|
35
|
+
#
|
|
36
|
+
# transformed = stream.map { |node| transform(node) }
|
|
37
|
+
#
|
|
38
|
+
# == Performance Characteristics
|
|
39
|
+
#
|
|
40
|
+
# - Memory: O(tree depth) instead of O(tree size)
|
|
41
|
+
# - Speed: Minimal overhead (~1-2% vs direct iteration)
|
|
42
|
+
# - Lazy evaluation: Nodes processed on-demand
|
|
43
|
+
#
|
|
44
|
+
# == Integration with Parser
|
|
45
|
+
#
|
|
46
|
+
# Can be used directly with parse results:
|
|
47
|
+
#
|
|
48
|
+
# parser = MyParser.new
|
|
49
|
+
# result = parser.parse(input)
|
|
50
|
+
# stream = ResultStream.new(result)
|
|
51
|
+
#
|
|
52
|
+
# Or through the optional stream method on Base:
|
|
53
|
+
#
|
|
54
|
+
# stream = parser.stream(input) # If available
|
|
55
|
+
#
|
|
56
|
+
class ResultStream
|
|
57
|
+
include Enumerable
|
|
58
|
+
|
|
59
|
+
# Creates a new result stream.
|
|
60
|
+
#
|
|
61
|
+
# @param tree [Object] Parse tree (Hash, Array, or scalar)
|
|
62
|
+
def initialize(tree)
|
|
63
|
+
@tree = tree
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Iterates over all nodes in the parse tree.
|
|
67
|
+
# Uses depth-first traversal to minimize memory usage.
|
|
68
|
+
#
|
|
69
|
+
# Traversal order:
|
|
70
|
+
# 1. Current node (pre-order)
|
|
71
|
+
# 2. Child nodes (recursive)
|
|
72
|
+
#
|
|
73
|
+
# This ensures that:
|
|
74
|
+
# - Only the current path is kept in memory (stack)
|
|
75
|
+
# - Parent nodes are yielded before children
|
|
76
|
+
# - Natural processing order for most use cases
|
|
77
|
+
#
|
|
78
|
+
# @yield [node] Each node in the tree
|
|
79
|
+
# @yieldparam node [Object] Current node (Hash, Array, or scalar)
|
|
80
|
+
# @return [Enumerator] if no block given
|
|
81
|
+
#
|
|
82
|
+
# @example Basic iteration
|
|
83
|
+
# stream.each { |node| puts node.class }
|
|
84
|
+
#
|
|
85
|
+
# @example Lazy enumeration
|
|
86
|
+
# enum = stream.each # Returns Enumerator
|
|
87
|
+
# enum.next # Get next node
|
|
88
|
+
#
|
|
89
|
+
def each(&block)
|
|
90
|
+
return enum_for(:each) unless block_given?
|
|
91
|
+
traverse(@tree, &block)
|
|
92
|
+
self
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Filters nodes by type.
|
|
96
|
+
#
|
|
97
|
+
# @param klass [Class] Class to filter by
|
|
98
|
+
# @return [Enumerator] Filtered nodes
|
|
99
|
+
#
|
|
100
|
+
# @example Get all hash nodes
|
|
101
|
+
# stream.nodes_of_type(Hash)
|
|
102
|
+
#
|
|
103
|
+
def nodes_of_type(klass)
|
|
104
|
+
select { |node| node.is_a?(klass) }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Returns all hash nodes in the tree.
|
|
108
|
+
#
|
|
109
|
+
# @return [Enumerator] Hash nodes
|
|
110
|
+
#
|
|
111
|
+
# @example
|
|
112
|
+
# stream.hashes.each { |h| puts h.keys }
|
|
113
|
+
#
|
|
114
|
+
def hashes
|
|
115
|
+
nodes_of_type(Hash)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns all array nodes in the tree.
|
|
119
|
+
#
|
|
120
|
+
# @return [Enumerator] Array nodes
|
|
121
|
+
#
|
|
122
|
+
# @example
|
|
123
|
+
# stream.arrays.each { |a| puts a.size }
|
|
124
|
+
#
|
|
125
|
+
def arrays
|
|
126
|
+
nodes_of_type(Array)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Returns all scalar nodes (non-Hash, non-Array).
|
|
130
|
+
#
|
|
131
|
+
# @return [Enumerator] Scalar nodes
|
|
132
|
+
#
|
|
133
|
+
# @example
|
|
134
|
+
# stream.scalars.each { |s| puts s }
|
|
135
|
+
#
|
|
136
|
+
def scalars
|
|
137
|
+
select { |node| !node.is_a?(Hash) && !node.is_a?(Array) }
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Returns nodes matching a predicate at a specific depth.
|
|
141
|
+
#
|
|
142
|
+
# @param depth [Integer] Tree depth (0 = root)
|
|
143
|
+
# @yield [node] Predicate to test each node
|
|
144
|
+
# @return [Enumerator] Matching nodes
|
|
145
|
+
#
|
|
146
|
+
# @example Get all nodes at depth 2
|
|
147
|
+
# stream.at_depth(2) { true }
|
|
148
|
+
#
|
|
149
|
+
def at_depth(target_depth, &predicate)
|
|
150
|
+
predicate ||= proc { true }
|
|
151
|
+
depth_traverse(@tree, 0, target_depth, &predicate)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Counts total nodes in the tree.
|
|
155
|
+
#
|
|
156
|
+
# @return [Integer] Total node count
|
|
157
|
+
#
|
|
158
|
+
# @example
|
|
159
|
+
# stream.count # => 42
|
|
160
|
+
#
|
|
161
|
+
def count
|
|
162
|
+
counter = 0
|
|
163
|
+
each { counter += 1 }
|
|
164
|
+
counter
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Returns maximum depth of the tree.
|
|
168
|
+
#
|
|
169
|
+
# @return [Integer] Maximum depth
|
|
170
|
+
#
|
|
171
|
+
# @example
|
|
172
|
+
# stream.max_depth # => 5
|
|
173
|
+
#
|
|
174
|
+
def max_depth
|
|
175
|
+
find_max_depth(@tree, 0)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
private
|
|
179
|
+
|
|
180
|
+
# Depth-first tree traversal with pre-order visiting.
|
|
181
|
+
#
|
|
182
|
+
# @param node [Object] Current node
|
|
183
|
+
# @yield [node] Each visited node
|
|
184
|
+
#
|
|
185
|
+
def traverse(node, &block)
|
|
186
|
+
# Yield current node first (pre-order)
|
|
187
|
+
yield node
|
|
188
|
+
|
|
189
|
+
# Recursively traverse children
|
|
190
|
+
case node
|
|
191
|
+
when Array
|
|
192
|
+
node.each { |item| traverse(item, &block) }
|
|
193
|
+
when Hash
|
|
194
|
+
node.each_value { |value| traverse(value, &block) }
|
|
195
|
+
end
|
|
196
|
+
# Scalars have no children, stop here
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Depth-aware traversal for filtering by level.
|
|
200
|
+
#
|
|
201
|
+
# @param node [Object] Current node
|
|
202
|
+
# @param current_depth [Integer] Current depth in tree
|
|
203
|
+
# @param target_depth [Integer] Depth to match
|
|
204
|
+
# @yield [node] Matching nodes at target depth
|
|
205
|
+
# @return [Enumerator]
|
|
206
|
+
#
|
|
207
|
+
def depth_traverse(node, current_depth, target_depth, &block)
|
|
208
|
+
return enum_for(:depth_traverse, node, current_depth, target_depth) unless block_given?
|
|
209
|
+
|
|
210
|
+
# Check if we're at target depth
|
|
211
|
+
if current_depth == target_depth && yield(node)
|
|
212
|
+
return [node].to_enum
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Recurse to children if not at target depth yet
|
|
216
|
+
results = []
|
|
217
|
+
if current_depth < target_depth
|
|
218
|
+
case node
|
|
219
|
+
when Array
|
|
220
|
+
node.each do |item|
|
|
221
|
+
depth_traverse(item, current_depth + 1, target_depth, &block).each do |result|
|
|
222
|
+
results << result
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
when Hash
|
|
226
|
+
node.each_value do |value|
|
|
227
|
+
depth_traverse(value, current_depth + 1, target_depth, &block).each do |result|
|
|
228
|
+
results << result
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
results.to_enum
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Find maximum depth of tree recursively.
|
|
238
|
+
#
|
|
239
|
+
# @param node [Object] Current node
|
|
240
|
+
# @param current_depth [Integer] Current depth
|
|
241
|
+
# @return [Integer] Maximum depth from this node
|
|
242
|
+
#
|
|
243
|
+
def find_max_depth(node, current_depth)
|
|
244
|
+
max = current_depth
|
|
245
|
+
|
|
246
|
+
case node
|
|
247
|
+
when Array
|
|
248
|
+
node.each do |item|
|
|
249
|
+
depth = find_max_depth(item, current_depth + 1)
|
|
250
|
+
max = depth if depth > max
|
|
251
|
+
end
|
|
252
|
+
when Hash
|
|
253
|
+
node.each_value do |value|
|
|
254
|
+
depth = find_max_depth(value, current_depth + 1)
|
|
255
|
+
max = depth if depth > max
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
max
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
RSpec::Matchers.define(:parse) do |input, opts|
|
|
2
|
+
as = block = nil
|
|
3
|
+
result = trace = nil
|
|
4
|
+
|
|
5
|
+
match do |parser|
|
|
6
|
+
begin
|
|
7
|
+
result = parser.parse(input)
|
|
8
|
+
block ?
|
|
9
|
+
block.call(result) :
|
|
10
|
+
(as == result || as.nil?)
|
|
11
|
+
rescue Parsanol::ParseFailed => ex
|
|
12
|
+
trace = ex.parse_failure_cause.ascii_tree if opts && opts[:trace]
|
|
13
|
+
false
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
public_send(respond_to?(:failure_message) ? :failure_message : :failure_message_for_should) do |is|
|
|
18
|
+
if block
|
|
19
|
+
"expected output of parsing #{input.inspect}" <<
|
|
20
|
+
" with #{is.inspect} to meet block conditions, but it didn't"
|
|
21
|
+
else
|
|
22
|
+
"expected " <<
|
|
23
|
+
(as ?
|
|
24
|
+
"output of parsing #{input.inspect}"<<
|
|
25
|
+
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
|
26
|
+
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
|
27
|
+
(trace ?
|
|
28
|
+
"\n"+trace :
|
|
29
|
+
'')
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
public_send(respond_to?(:failure_message_when_negated) ? :failure_message_when_negated : :failure_message_for_should_not) do |is|
|
|
34
|
+
if block
|
|
35
|
+
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
|
36
|
+
else
|
|
37
|
+
"expected " <<
|
|
38
|
+
(as ?
|
|
39
|
+
"output of parsing #{input.inspect}"<<
|
|
40
|
+
" with #{is.inspect} not to equal #{as.inspect}" :
|
|
41
|
+
|
|
42
|
+
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
|
47
|
+
# Object, a thing I would never allow.
|
|
48
|
+
chain :as do |expected_output=nil, &my_block|
|
|
49
|
+
as = expected_output
|
|
50
|
+
block = my_block
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
module Parsanol
|
|
2
|
+
# Rope data structure for efficient string accumulation.
|
|
3
|
+
#
|
|
4
|
+
# Uses deferred concatenation to avoid O(n²) repeated string building.
|
|
5
|
+
# Segments are accumulated in O(1) time and joined once in O(n) time when
|
|
6
|
+
# converted to a final string.
|
|
7
|
+
#
|
|
8
|
+
# @example Basic usage
|
|
9
|
+
# rope = Rope.new
|
|
10
|
+
# rope.append('hello')
|
|
11
|
+
# rope.append(' ')
|
|
12
|
+
# rope.append('world')
|
|
13
|
+
# rope.to_s # => "hello world"
|
|
14
|
+
#
|
|
15
|
+
# @example With Slices
|
|
16
|
+
# rope = Rope.new
|
|
17
|
+
# rope.append(Slice.new(0, 'hello'))
|
|
18
|
+
# rope.append(Slice.new(5, ' world'))
|
|
19
|
+
# rope.to_s # => "hello world"
|
|
20
|
+
#
|
|
21
|
+
class Rope
|
|
22
|
+
# Creates a new empty Rope.
|
|
23
|
+
def initialize
|
|
24
|
+
@segments = []
|
|
25
|
+
@frozen = false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Appends a string or Slice to the rope.
|
|
29
|
+
#
|
|
30
|
+
# This is an O(1) operation. The segment is stored as-is and will be
|
|
31
|
+
# joined later when {#to_s} is called.
|
|
32
|
+
#
|
|
33
|
+
# @param segment [String, Slice] The segment to append
|
|
34
|
+
# @return [Rope] self for method chaining
|
|
35
|
+
# @raise [FrozenError] if rope has been frozen by calling {#to_s}
|
|
36
|
+
def append(segment)
|
|
37
|
+
raise FrozenError, "can't modify frozen Rope" if @frozen
|
|
38
|
+
@segments << segment
|
|
39
|
+
self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Converts the rope to a final string.
|
|
43
|
+
#
|
|
44
|
+
# This is an O(n) operation performed once. After calling this method,
|
|
45
|
+
# the rope is frozen and cannot be modified further.
|
|
46
|
+
#
|
|
47
|
+
# @return [String] The concatenated result of all segments
|
|
48
|
+
def to_s
|
|
49
|
+
@frozen = true
|
|
50
|
+
@segments.join
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Checks if the rope is empty (contains no segments).
|
|
54
|
+
#
|
|
55
|
+
# @return [Boolean] true if no segments have been appended
|
|
56
|
+
def empty?
|
|
57
|
+
@segments.empty?
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Estimates the total size of all segments.
|
|
61
|
+
#
|
|
62
|
+
# This is an estimate because segments may be Slice objects or other
|
|
63
|
+
# types that respond to #size or #to_s.
|
|
64
|
+
#
|
|
65
|
+
# @return [Integer] The sum of all segment sizes
|
|
66
|
+
def size
|
|
67
|
+
@segments.sum { |s| s.respond_to?(:size) ? s.size : s.to_s.size }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Creates a rope from an existing string.
|
|
71
|
+
#
|
|
72
|
+
# @param str [String] The string to initialize the rope with
|
|
73
|
+
# @return [Rope] A new rope containing the string
|
|
74
|
+
def self.from_string(str)
|
|
75
|
+
new.tap { |r| r.append(str) unless str.empty? }
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
class Parsanol::Scope
|
|
2
|
+
# Raised when the accessed slot has never been assigned a value.
|
|
3
|
+
#
|
|
4
|
+
class NotFound < StandardError
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
class Binding
|
|
8
|
+
attr_reader :parent
|
|
9
|
+
|
|
10
|
+
def initialize(parent=nil)
|
|
11
|
+
@parent = parent
|
|
12
|
+
@hash = Hash.new
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def [](k)
|
|
16
|
+
@hash.has_key?(k) && @hash[k] ||
|
|
17
|
+
parent && parent[k] or
|
|
18
|
+
raise NotFound
|
|
19
|
+
end
|
|
20
|
+
def []=(k,v)
|
|
21
|
+
@hash.store(k,v)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def [](k)
|
|
26
|
+
@current[k]
|
|
27
|
+
end
|
|
28
|
+
def []=(k,v)
|
|
29
|
+
@current[k] = v
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
@current = Binding.new
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def push
|
|
37
|
+
@current = Binding.new(@current)
|
|
38
|
+
end
|
|
39
|
+
def pop
|
|
40
|
+
@current = @current.parent
|
|
41
|
+
end
|
|
42
|
+
end
|