parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# A slice is a small part from the parse input. A slice mainly behaves like
|
|
3
|
+
# any other string, except that it remembers where it came from (offset in
|
|
4
|
+
# original input).
|
|
5
|
+
#
|
|
6
|
+
# == Extracting line and column
|
|
7
|
+
#
|
|
8
|
+
# Using the #line_and_column method, you can extract the line and column in
|
|
9
|
+
# the original input where this slice starts.
|
|
10
|
+
#
|
|
11
|
+
# Example:
|
|
12
|
+
# slice.line_and_column # => [1, 13]
|
|
13
|
+
# slice.offset # => 12
|
|
14
|
+
#
|
|
15
|
+
# == Likeness to strings
|
|
16
|
+
#
|
|
17
|
+
# Parsanol::Slice behaves in many ways like a Ruby String. This likeness
|
|
18
|
+
# however is not complete - many of the myriad of operations String supports
|
|
19
|
+
# are not yet in Slice. You can always extract the internal string instance by
|
|
20
|
+
# calling #to_s.
|
|
21
|
+
#
|
|
22
|
+
# These omissions are somewhat intentional. Rather than maintaining a full
|
|
23
|
+
# delegation, we opt for a partial emulation that gets the job done.
|
|
24
|
+
#
|
|
25
|
+
class Parsanol::Slice
|
|
26
|
+
attr_reader :str, :line_cache
|
|
27
|
+
|
|
28
|
+
# Construct a slice using an integer byte position, a string, and an optional line cache.
|
|
29
|
+
# The line cache should be able to answer to the #line_and_column message.
|
|
30
|
+
#
|
|
31
|
+
# @param bytepos [Integer] Byte position in the original input
|
|
32
|
+
# @param string [String] The slice content
|
|
33
|
+
# @param line_cache [Object] Optional line cache for line/column info
|
|
34
|
+
#
|
|
35
|
+
def initialize(bytepos=0, string='', line_cache = nil)
|
|
36
|
+
@bytepos = bytepos
|
|
37
|
+
@str = string
|
|
38
|
+
@line_cache = line_cache
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Reset the slice for reuse in object pooling.
|
|
42
|
+
# This allows the slice to be reinitialized with new values for efficient reuse.
|
|
43
|
+
#
|
|
44
|
+
# @param bytepos [Integer] New byte position in the original input
|
|
45
|
+
# @param string [String] New slice content
|
|
46
|
+
# @param line_cache [Object] Optional line cache for line/column info
|
|
47
|
+
# @return [self] Returns self for method chaining
|
|
48
|
+
#
|
|
49
|
+
def reset!(bytepos=0, string='', line_cache=nil)
|
|
50
|
+
@bytepos = bytepos
|
|
51
|
+
@str = string
|
|
52
|
+
@line_cache = line_cache
|
|
53
|
+
self
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Create a Slice from a Rope.
|
|
57
|
+
# The rope is converted to a string and used to create the slice.
|
|
58
|
+
#
|
|
59
|
+
# @param rope [Parsanol::Rope] The rope to convert
|
|
60
|
+
# @param bytepos [Integer] Byte position in the input
|
|
61
|
+
# @param line_cache [Object] Optional line cache for line/column info
|
|
62
|
+
# @return [Parsanol::Slice] A new slice with the rope's content
|
|
63
|
+
#
|
|
64
|
+
def self.from_rope(rope, bytepos, line_cache = nil)
|
|
65
|
+
new(bytepos, rope.to_s, line_cache)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns the byte position of this slice in the original input.
|
|
69
|
+
# This is the primary position tracking mechanism.
|
|
70
|
+
#
|
|
71
|
+
def offset
|
|
72
|
+
@bytepos
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Alias for offset - returns byte position.
|
|
76
|
+
# For backward compatibility and clarity.
|
|
77
|
+
#
|
|
78
|
+
alias bytepos offset
|
|
79
|
+
|
|
80
|
+
# Alias for offset - returns byte position.
|
|
81
|
+
# Note: For ASCII text, bytepos == charpos.
|
|
82
|
+
# For UTF-8, this is an approximation (byte position, not character position).
|
|
83
|
+
#
|
|
84
|
+
alias charpos offset
|
|
85
|
+
|
|
86
|
+
# Compares slices to other slices or strings.
|
|
87
|
+
# Fast path: Compare strings directly, most common case
|
|
88
|
+
#
|
|
89
|
+
def ==(other)
|
|
90
|
+
# Fast path: direct string comparison
|
|
91
|
+
return str == other if other.is_a?(String)
|
|
92
|
+
# Slice to Slice comparison
|
|
93
|
+
return str == other.str if other.is_a?(Parsanol::Slice)
|
|
94
|
+
str == other
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Type-strict equality comparison.
|
|
98
|
+
# This only returns true for Slice-to-Slice comparison with equal content.
|
|
99
|
+
#
|
|
100
|
+
def eql?(other)
|
|
101
|
+
other.is_a?(Parsanol::Slice) && str.eql?(other.str)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Hash code for using Slices as hash keys.
|
|
105
|
+
# Incorporates both the string content and position to distinguish
|
|
106
|
+
# Slices from plain Strings and from Slices at different positions.
|
|
107
|
+
#
|
|
108
|
+
def hash
|
|
109
|
+
[str, offset].hash
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Match regular expressions.
|
|
113
|
+
#
|
|
114
|
+
def match(regexp)
|
|
115
|
+
str.match(regexp)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns the slices size in characters.
|
|
119
|
+
#
|
|
120
|
+
def size
|
|
121
|
+
str.size
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
alias length size
|
|
125
|
+
|
|
126
|
+
# Concatenate two slices; it is assumed that the second slice begins
|
|
127
|
+
# where the first one ends. The offset of the resulting slice is the same
|
|
128
|
+
# as the one of this slice.
|
|
129
|
+
#
|
|
130
|
+
def +(other)
|
|
131
|
+
self.class.new(@bytepos, str + other.to_s, line_cache)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Returns a <line, column> tuple referring to the original input.
|
|
135
|
+
# LineCache expects an integer byte position.
|
|
136
|
+
#
|
|
137
|
+
def line_and_column
|
|
138
|
+
raise ArgumentError, 'No line cache was given, cannot infer line and column.' \
|
|
139
|
+
unless line_cache
|
|
140
|
+
|
|
141
|
+
line_cache.line_and_column(@bytepos)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Conversion operators -----------------------------------------------------
|
|
145
|
+
def to_str
|
|
146
|
+
str.is_a?(String) ? str : str.to_s
|
|
147
|
+
end
|
|
148
|
+
alias to_s to_str
|
|
149
|
+
|
|
150
|
+
def to_slice
|
|
151
|
+
self
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def to_sym
|
|
155
|
+
str.to_sym
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def to_i
|
|
159
|
+
self.str.to_i
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def to_f
|
|
163
|
+
str.to_f
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Inspection & Debugging ---------------------------------------------------
|
|
167
|
+
|
|
168
|
+
# Prints the slice as <code>"string"@offset</code>.
|
|
169
|
+
def inspect
|
|
170
|
+
str.inspect + "@#{offset}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
class Parsanol::Source
|
|
4
|
+
# A cache for line start positions.
|
|
5
|
+
#
|
|
6
|
+
class LineCache
|
|
7
|
+
def initialize
|
|
8
|
+
# Stores line endings as a simple position number. The first line always
|
|
9
|
+
# starts at 0; numbers beyond the biggest entry are on any line > size,
|
|
10
|
+
# but probably make a scan to that position neccessary.
|
|
11
|
+
@line_ends = []
|
|
12
|
+
@line_ends.extend RangeSearch
|
|
13
|
+
@last_line_end = nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Returns a <line, column> tuple for the given input position. Input
|
|
17
|
+
# position must be given as byte offset into original string.
|
|
18
|
+
#
|
|
19
|
+
def line_and_column(pos)
|
|
20
|
+
pos = pos.bytepos if pos.respond_to? :bytepos
|
|
21
|
+
eol_idx = @line_ends.lbound(pos)
|
|
22
|
+
|
|
23
|
+
if eol_idx
|
|
24
|
+
# eol_idx points to the offset that ends the current line.
|
|
25
|
+
# Let's try to find the offset that starts it:
|
|
26
|
+
offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
|
|
27
|
+
return [eol_idx+1, pos-offset+1]
|
|
28
|
+
else
|
|
29
|
+
# eol_idx is nil, that means that we're beyond the last line end that
|
|
30
|
+
# we know about. Pretend for now that we're just on the last line.
|
|
31
|
+
offset = @line_ends.last || 0
|
|
32
|
+
return [@line_ends.size+1, pos-offset+1]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def scan_for_line_endings(start_pos, buf)
|
|
37
|
+
return unless buf
|
|
38
|
+
|
|
39
|
+
buf = StringScanner.new(buf)
|
|
40
|
+
return unless buf.exist?(/\n/)
|
|
41
|
+
|
|
42
|
+
## If we have already read part or all of buf, we already know about
|
|
43
|
+
## line ends in that portion. remove it and correct cur (search index)
|
|
44
|
+
if @last_line_end && start_pos < @last_line_end
|
|
45
|
+
# Let's not search the range from start_pos to last_line_end again.
|
|
46
|
+
buf.pos = @last_line_end - start_pos
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
## Scan the string for line endings; store the positions of all endings
|
|
50
|
+
## in @line_ends.
|
|
51
|
+
while buf.skip_until(/\n/)
|
|
52
|
+
@last_line_end = start_pos + buf.pos
|
|
53
|
+
@line_ends << @last_line_end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Mixin for arrays that implicitly give a number of ranges, where one range
|
|
59
|
+
# begins where the other one ends.
|
|
60
|
+
#
|
|
61
|
+
# Example:
|
|
62
|
+
#
|
|
63
|
+
# [10, 20, 30]
|
|
64
|
+
# # would describe [0, 10], (10, 20], (20, 30]
|
|
65
|
+
#
|
|
66
|
+
module RangeSearch
|
|
67
|
+
def find_mid(left, right)
|
|
68
|
+
# NOTE: Jonathan Hinkle reported that when mathn is required, just
|
|
69
|
+
# dividing and relying on the integer truncation is not enough.
|
|
70
|
+
left + ((right - left) / 2).floor
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Scans the array for the first number that is > than bound. Returns the
|
|
74
|
+
# index of that number.
|
|
75
|
+
#
|
|
76
|
+
def lbound(bound)
|
|
77
|
+
return nil if empty?
|
|
78
|
+
return nil unless last > bound
|
|
79
|
+
|
|
80
|
+
left = 0
|
|
81
|
+
right = size - 1
|
|
82
|
+
|
|
83
|
+
loop do
|
|
84
|
+
mid = find_mid(left, right)
|
|
85
|
+
|
|
86
|
+
if self[mid] > bound
|
|
87
|
+
right = mid
|
|
88
|
+
else
|
|
89
|
+
# assert: self[mid] <= bound
|
|
90
|
+
left = mid+1
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
if right <= left
|
|
94
|
+
return right
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require 'strscan'
|
|
5
|
+
|
|
6
|
+
require 'parsanol/position'
|
|
7
|
+
require 'parsanol/source/line_cache'
|
|
8
|
+
require 'parsanol/pools/slice_pool'
|
|
9
|
+
require 'parsanol/pools/position_pool'
|
|
10
|
+
|
|
11
|
+
module Parsanol
|
|
12
|
+
# Wraps the input string for parslet.
|
|
13
|
+
#
|
|
14
|
+
class Source
|
|
15
|
+
attr_reader :slice_pool, :position_pool
|
|
16
|
+
|
|
17
|
+
def initialize(str)
|
|
18
|
+
raise(
|
|
19
|
+
ArgumentError,
|
|
20
|
+
"Must construct Source with a string like object."
|
|
21
|
+
) unless str.respond_to?(:to_str)
|
|
22
|
+
|
|
23
|
+
@str = StringScanner.new(str)
|
|
24
|
+
@source_str = str.to_str
|
|
25
|
+
|
|
26
|
+
# maps 1 => /./m, 2 => /../m, etc...
|
|
27
|
+
@re_cache = Hash.new { |h,k|
|
|
28
|
+
h[k] = /(.|$){#{k}}/m }
|
|
29
|
+
|
|
30
|
+
@line_cache = LineCache.new
|
|
31
|
+
@line_cache.scan_for_line_endings(0, str)
|
|
32
|
+
|
|
33
|
+
# Phase 1.2: SlicePool for reducing GC pressure during parsing
|
|
34
|
+
# Size of 5000 handles most typical parsing scenarios
|
|
35
|
+
@slice_pool = Parsanol::Pools::SlicePool.new(size: 5000)
|
|
36
|
+
|
|
37
|
+
# Phase 1.4: PositionPool for error reporting positions
|
|
38
|
+
# Size of 1000 handles typical error reporting scenarios
|
|
39
|
+
@position_pool = Parsanol::Pools::PositionPool.new(size: 1000)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Checks if the given pattern matches at the current input position.
|
|
43
|
+
#
|
|
44
|
+
# @param pattern [Regexp] pattern to check for
|
|
45
|
+
# @return [Boolean] true if the pattern matches at #pos
|
|
46
|
+
#
|
|
47
|
+
def matches?(pattern)
|
|
48
|
+
@str.match?(pattern)
|
|
49
|
+
end
|
|
50
|
+
alias match matches?
|
|
51
|
+
|
|
52
|
+
# Consumes n characters from the input, returning them as a slice of the
|
|
53
|
+
# input.
|
|
54
|
+
#
|
|
55
|
+
def consume(n)
|
|
56
|
+
bytepos = self.bytepos
|
|
57
|
+
slice_str = @str.scan(@re_cache[n])
|
|
58
|
+
slice = @slice_pool.acquire_with(bytepos, slice_str, @line_cache)
|
|
59
|
+
|
|
60
|
+
return slice
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Helper method to acquire a pooled slice.
|
|
64
|
+
# This is the preferred way for atoms to create slices.
|
|
65
|
+
#
|
|
66
|
+
# @param bytepos [Integer] Byte position in the input
|
|
67
|
+
# @param str [String] The slice content
|
|
68
|
+
# @return [Parsanol::Slice] A pooled slice ready for use
|
|
69
|
+
#
|
|
70
|
+
def slice(bytepos, str)
|
|
71
|
+
@slice_pool.acquire_with(bytepos, str, @line_cache)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Release a slice back to the pool.
|
|
75
|
+
# Currently not actively used - slices are released when pool is recycled.
|
|
76
|
+
#
|
|
77
|
+
# @param slice [Parsanol::Slice] The slice to release
|
|
78
|
+
#
|
|
79
|
+
def release_slice(slice)
|
|
80
|
+
@slice_pool.release(slice)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Returns how many chars remain in the input.
|
|
84
|
+
#
|
|
85
|
+
def chars_left
|
|
86
|
+
@str.rest_size
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns how many chars there are between current position and the
|
|
90
|
+
# string given. If the string given doesn't occur in the source, then
|
|
91
|
+
# the remaining chars (#chars_left) are returned.
|
|
92
|
+
#
|
|
93
|
+
# @return [Fixnum] count of chars until str or #chars_left
|
|
94
|
+
#
|
|
95
|
+
def chars_until str
|
|
96
|
+
slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
|
|
97
|
+
return chars_left unless slice_str
|
|
98
|
+
return slice_str.size - str.size
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Phase 31: Scan forward to find the next occurrence of a character.
|
|
102
|
+
# Returns the byte position of the next occurrence, or nil if not found.
|
|
103
|
+
# Does not move the scanner position.
|
|
104
|
+
#
|
|
105
|
+
# @param char [String] single character to search for
|
|
106
|
+
# @return [Integer, nil] byte position or nil if not found
|
|
107
|
+
#
|
|
108
|
+
def index_of_char(char)
|
|
109
|
+
# Use StringScanner's string directly for fast indexOf
|
|
110
|
+
idx = @str.rest.index(char)
|
|
111
|
+
return nil unless idx
|
|
112
|
+
@str.pos + idx
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Position of the parse as a byte offset into the original string.
|
|
116
|
+
# Returns an integer byte position instead of a Position object.
|
|
117
|
+
#
|
|
118
|
+
# @return [Integer] Current byte position in the input
|
|
119
|
+
# @note Please be aware of encodings at this point.
|
|
120
|
+
#
|
|
121
|
+
def pos
|
|
122
|
+
@str.pos
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Alias for pos - returns the current byte position.
|
|
126
|
+
# Provided for clarity and backward compatibility.
|
|
127
|
+
#
|
|
128
|
+
# @return [Integer] Current byte position in the input
|
|
129
|
+
#
|
|
130
|
+
alias bytepos pos
|
|
131
|
+
|
|
132
|
+
# @note Please be aware of encodings at this point.
|
|
133
|
+
#
|
|
134
|
+
def bytepos=(n)
|
|
135
|
+
@str.pos = n
|
|
136
|
+
rescue RangeError
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
|
140
|
+
# given, line/column information is returned for the current position
|
|
141
|
+
# given by #pos.
|
|
142
|
+
#
|
|
143
|
+
def line_and_column(position=nil)
|
|
144
|
+
@line_cache.line_and_column(position || self.bytepos)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Creates a pooled Position object for the given byte position.
|
|
148
|
+
# This is used for error reporting where Position objects are needed.
|
|
149
|
+
#
|
|
150
|
+
# Phase 1.4: Uses PositionPool to reduce GC pressure when materializing
|
|
151
|
+
# position objects for error messages.
|
|
152
|
+
#
|
|
153
|
+
# @param bytepos [Integer] Byte position in source (defaults to current position)
|
|
154
|
+
# @return [Parsanol::Position] A pooled Position object
|
|
155
|
+
#
|
|
156
|
+
def position(bytepos = nil)
|
|
157
|
+
effective_pos = bytepos || self.bytepos
|
|
158
|
+
line, column = line_and_column(effective_pos)
|
|
159
|
+
|
|
160
|
+
# Calculate character position (approximation for ASCII, exact for UTF-8)
|
|
161
|
+
charpos = @source_str.byteslice(0, effective_pos).size
|
|
162
|
+
|
|
163
|
+
@position_pool.acquire_with(
|
|
164
|
+
string: @source_str,
|
|
165
|
+
bytepos: effective_pos,
|
|
166
|
+
charpos: charpos
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::SourceLocation - Source Location Tracking
|
|
4
|
+
#
|
|
5
|
+
# Track source positions (line, column, offset) through the parsing and
|
|
6
|
+
# transformation pipeline. This is useful for error reporting, IDE integration,
|
|
7
|
+
# and source mapping.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# # Parse with source tracking
|
|
11
|
+
# result = parser.parse_with_spans("hello world")
|
|
12
|
+
# tree = result.tree
|
|
13
|
+
# spans = result.spans
|
|
14
|
+
#
|
|
15
|
+
# # Access span for a node
|
|
16
|
+
# span = spans[node_id]
|
|
17
|
+
# puts "Matched at line #{span.start.line}, column #{span.start.column}"
|
|
18
|
+
#
|
|
19
|
+
# Requires native extension for full functionality.
|
|
20
|
+
|
|
21
|
+
module Parsanol
|
|
22
|
+
# Represents a position in source code
|
|
23
|
+
class SourcePosition
|
|
24
|
+
attr_reader :offset, :line, :column
|
|
25
|
+
|
|
26
|
+
def initialize(offset:, line:, column:)
|
|
27
|
+
@offset = offset
|
|
28
|
+
@line = line
|
|
29
|
+
@column = column
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_s
|
|
33
|
+
"line #{@line}, column #{@column} (offset #{@offset})"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_h
|
|
37
|
+
{ offset: @offset, line: @line, column: @column }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def ==(other)
|
|
41
|
+
return false unless other.is_a?(SourcePosition)
|
|
42
|
+
@offset == other.offset && @line == other.line && @column == other.column
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def eql?(other)
|
|
46
|
+
self == other
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def hash
|
|
50
|
+
[@offset, @line, @column].hash
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Represents a span in source code (from start to end position)
|
|
55
|
+
class SourceSpan
|
|
56
|
+
attr_reader :start, :end
|
|
57
|
+
|
|
58
|
+
def initialize(start_pos:, end_pos:)
|
|
59
|
+
@start = start_pos.is_a?(SourcePosition) ? start_pos : SourcePosition.new(**start_pos)
|
|
60
|
+
@end = end_pos.is_a?(SourcePosition) ? end_pos : SourcePosition.new(**end_pos)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Create a span from offsets (computes line/column from input)
|
|
64
|
+
def self.from_offsets(input, start_offset, end_offset)
|
|
65
|
+
start_pos = compute_position(input, start_offset)
|
|
66
|
+
end_pos = compute_position(input, end_offset)
|
|
67
|
+
new(start_pos: start_pos, end_pos: end_pos)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Merge two spans (returns a new span covering both)
|
|
71
|
+
def merge(other)
|
|
72
|
+
return self if other.nil?
|
|
73
|
+
SourceSpan.new(
|
|
74
|
+
start_pos: [@start, other.start].min_by(&:offset),
|
|
75
|
+
end_pos: [@end, other.end].max_by(&:offset)
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if this span overlaps with another
|
|
80
|
+
def overlaps?(other)
|
|
81
|
+
return false if other.nil?
|
|
82
|
+
@start.offset < other.end.offset && @end.offset > other.start.offset
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Check if this span is adjacent to another
|
|
86
|
+
def adjacent?(other)
|
|
87
|
+
return false if other.nil?
|
|
88
|
+
@end.offset == other.start.offset || other.end.offset == @start.offset
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Check if a position is within this span
|
|
92
|
+
def contains?(position)
|
|
93
|
+
offset = position.is_a?(SourcePosition) ? position.offset : position
|
|
94
|
+
offset >= @start.offset && offset <= @end.offset
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get the length of the span in bytes
|
|
98
|
+
def length
|
|
99
|
+
@end.offset - @start.offset
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Extract the source text from the input
|
|
103
|
+
def extract(input)
|
|
104
|
+
input.byteslice(@start.offset, length)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def to_s
|
|
108
|
+
"#{@start} - #{@end}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def to_h
|
|
112
|
+
{ start: @start.to_h, end: @end.to_h }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def ==(other)
|
|
116
|
+
return false unless other.is_a?(SourceSpan)
|
|
117
|
+
@start == other.start && @end == other.end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
# Compute line and column from offset
|
|
123
|
+
def self.compute_position(input, offset)
|
|
124
|
+
line = 1
|
|
125
|
+
column = 1
|
|
126
|
+
current_offset = 0
|
|
127
|
+
|
|
128
|
+
input.each_char do |char|
|
|
129
|
+
break if current_offset >= offset
|
|
130
|
+
|
|
131
|
+
if char == "\n"
|
|
132
|
+
line += 1
|
|
133
|
+
column = 1
|
|
134
|
+
else
|
|
135
|
+
column += 1
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
current_offset += 1
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
SourcePosition.new(offset: offset, line: line, column: column)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Result wrapper for parse_with_spans
|
|
146
|
+
class ParseResultWithSpans
|
|
147
|
+
attr_reader :tree, :spans
|
|
148
|
+
|
|
149
|
+
def initialize(tree:, spans:)
|
|
150
|
+
@tree = tree
|
|
151
|
+
@spans = spans
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Get span for a specific node
|
|
155
|
+
def span_for(node_id)
|
|
156
|
+
@spans[node_id]
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Get all spans that contain a position
|
|
160
|
+
def spans_at(offset)
|
|
161
|
+
@spans.values.select { |span| span.contains?(offset) }
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|