parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Rope data structure for efficient string accumulation.
|
|
5
|
+
#
|
|
6
|
+
# Uses deferred concatenation to avoid O(n²) repeated string building.
|
|
7
|
+
# Segments are accumulated in O(1) time and joined once in O(n) time when
|
|
8
|
+
# converted to a final string.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic usage
|
|
11
|
+
# rope = Rope.new
|
|
12
|
+
# rope.append('hello')
|
|
13
|
+
# rope.append(' ')
|
|
14
|
+
# rope.append('world')
|
|
15
|
+
# rope.to_s # => "hello world"
|
|
16
|
+
#
|
|
17
|
+
# @example With Slices
|
|
18
|
+
# rope = Rope.new
|
|
19
|
+
# rope.append(Slice.new(0, 'hello'))
|
|
20
|
+
# rope.append(Slice.new(5, ' world'))
|
|
21
|
+
# rope.to_s # => "hello world"
|
|
22
|
+
#
|
|
23
|
+
class Rope
|
|
24
|
+
# Creates a new empty Rope.
|
|
25
|
+
def initialize
|
|
26
|
+
@segments = []
|
|
27
|
+
@frozen = false
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Appends a string or Slice to the rope.
|
|
31
|
+
#
|
|
32
|
+
# This is an O(1) operation. The segment is stored as-is and will be
|
|
33
|
+
# joined later when {#to_s} is called.
|
|
34
|
+
#
|
|
35
|
+
# @param segment [String, Slice] The segment to append
|
|
36
|
+
# @return [Rope] self for method chaining
|
|
37
|
+
# @raise [FrozenError] if rope has been frozen by calling {#to_s}
|
|
38
|
+
def append(segment)
|
|
39
|
+
raise FrozenError, "can't modify frozen Rope" if @frozen
|
|
40
|
+
|
|
41
|
+
@segments << segment
|
|
42
|
+
self
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Converts the rope to a final string.
|
|
46
|
+
#
|
|
47
|
+
# This is an O(n) operation performed once. After calling this method,
|
|
48
|
+
# the rope is frozen and cannot be modified further.
|
|
49
|
+
#
|
|
50
|
+
# @return [String] The concatenated result of all segments
|
|
51
|
+
def to_s
|
|
52
|
+
@frozen = true
|
|
53
|
+
@segments.join
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Checks if the rope is empty (contains no segments).
|
|
57
|
+
#
|
|
58
|
+
# @return [Boolean] true if no segments have been appended
|
|
59
|
+
def empty?
|
|
60
|
+
@segments.empty?
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Estimates the total size of all segments.
|
|
64
|
+
#
|
|
65
|
+
# This is an estimate because segments may be Slice objects or other
|
|
66
|
+
# types that respond to #size or #to_s.
|
|
67
|
+
#
|
|
68
|
+
# @return [Integer] The sum of all segment sizes
|
|
69
|
+
def size
|
|
70
|
+
@segments.sum { |s| s.respond_to?(:size) ? s.size : s.to_s.size }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Creates a rope from an existing string.
|
|
74
|
+
#
|
|
75
|
+
# @param str [String] The string to initialize the rope with
|
|
76
|
+
# @return [Rope] A new rope containing the string
|
|
77
|
+
def self.from_string(str)
|
|
78
|
+
new.tap { |r| r.append(str) unless str.empty? }
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Scoped variable bindings for parser context management. Provides a
|
|
4
|
+
# stack-like interface for creating nested scopes that inherit from
|
|
5
|
+
# parent scopes.
|
|
6
|
+
#
|
|
7
|
+
# @example Basic usage
|
|
8
|
+
# scope = Parsanol::Scope.new
|
|
9
|
+
# scope[:x] = 1
|
|
10
|
+
# scope.push # Create nested scope
|
|
11
|
+
# scope[:x] # => 1 (inherited from parent)
|
|
12
|
+
# scope[:y] = 2
|
|
13
|
+
# scope.pop # Return to parent scope
|
|
14
|
+
# scope[:y] # raises NotFound
|
|
15
|
+
#
|
|
16
|
+
# Inspired by lexical scoping patterns in programming languages.
|
|
17
|
+
#
|
|
18
|
+
module Parsanol
|
|
19
|
+
class Scope
|
|
20
|
+
# Error raised when attempting to access an undefined binding.
|
|
21
|
+
class UndefinedVariable < StandardError
|
|
22
|
+
end
|
|
23
|
+
# Legacy alias for backward compatibility
|
|
24
|
+
NotFound = UndefinedVariable
|
|
25
|
+
|
|
26
|
+
# Internal class representing a single scope level. Each frame can
|
|
27
|
+
# look up values in its parent frame if not found locally.
|
|
28
|
+
class Frame
|
|
29
|
+
# @return [Frame, nil] parent frame in the scope chain
|
|
30
|
+
attr_reader :parent_frame
|
|
31
|
+
|
|
32
|
+
# Creates a new frame optionally linked to a parent.
|
|
33
|
+
#
|
|
34
|
+
# @param parent [Frame, nil] the parent frame to inherit from
|
|
35
|
+
def initialize(parent = nil)
|
|
36
|
+
@parent_frame = parent
|
|
37
|
+
@bindings = {}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Retrieves a value by key, searching parent frames if necessary.
|
|
41
|
+
#
|
|
42
|
+
# @param key [Symbol] the variable name to look up
|
|
43
|
+
# @return [Object] the bound value
|
|
44
|
+
# @raise [UndefinedVariable] if key not found in any frame
|
|
45
|
+
def fetch(key)
|
|
46
|
+
if @bindings.key?(key)
|
|
47
|
+
@bindings[key]
|
|
48
|
+
elsif @parent_frame
|
|
49
|
+
@parent_frame.fetch(key)
|
|
50
|
+
else
|
|
51
|
+
raise UndefinedVariable, "No binding for #{key.inspect}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Stores a value in the current frame.
|
|
56
|
+
#
|
|
57
|
+
# @param key [Symbol] the variable name
|
|
58
|
+
# @param value [Object] the value to bind
|
|
59
|
+
# @return [Object] the stored value
|
|
60
|
+
def store(key, value)
|
|
61
|
+
@bindings[key] = value
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
alias [] fetch
|
|
65
|
+
alias []= store
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Creates a new scope with an empty root frame.
|
|
69
|
+
def initialize
|
|
70
|
+
@active_frame = Frame.new
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Retrieves a value from the current scope chain.
|
|
74
|
+
#
|
|
75
|
+
# @param key [Symbol] the variable name
|
|
76
|
+
# @return [Object] the bound value
|
|
77
|
+
# @raise [UndefinedVariable] if not found
|
|
78
|
+
def [](key)
|
|
79
|
+
@active_frame.fetch(key)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Stores a value in the current frame.
|
|
83
|
+
#
|
|
84
|
+
# @param key [Symbol] the variable name
|
|
85
|
+
# @param value [Object] the value to bind
|
|
86
|
+
def []=(key, value)
|
|
87
|
+
@active_frame.store(key, value)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Creates a new nested scope frame. Call #pop to restore.
|
|
91
|
+
#
|
|
92
|
+
# @return [void]
|
|
93
|
+
def push
|
|
94
|
+
@active_frame = Frame.new(@active_frame)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Returns to the parent scope frame.
|
|
98
|
+
#
|
|
99
|
+
# @return [void]
|
|
100
|
+
def pop
|
|
101
|
+
@active_frame = @active_frame.parent_frame
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Source position tracker for parsed content.
|
|
4
|
+
# Preserves both the string value and its byte offset in the original input,
|
|
5
|
+
# enabling precise error reporting and source mapping.
|
|
6
|
+
#
|
|
7
|
+
# Inspired by string slicing concepts in text editors and IDEs.
|
|
8
|
+
module Parsanol
|
|
9
|
+
class Slice
|
|
10
|
+
include Parsanol::Resettable
|
|
11
|
+
|
|
12
|
+
attr_reader :content, :position_cache
|
|
13
|
+
|
|
14
|
+
# Creates a slice with position tracking.
|
|
15
|
+
#
|
|
16
|
+
# @param byte_offset [Integer] position in original input
|
|
17
|
+
# @param string_content [String] the slice content
|
|
18
|
+
# @param cache [Object] optional cache for line/column lookup
|
|
19
|
+
def initialize(byte_offset = 0, string_content = '', cache = nil)
|
|
20
|
+
@byte_position = byte_offset
|
|
21
|
+
@content = string_content
|
|
22
|
+
@position_cache = cache
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Resets slice state for object pool reuse.
|
|
26
|
+
#
|
|
27
|
+
# @param new_offset [Integer] new byte position
|
|
28
|
+
# @param new_content [String] new content
|
|
29
|
+
# @param new_cache [Object] new line cache
|
|
30
|
+
# @return [self] for method chaining
|
|
31
|
+
def reset!(new_offset = 0, new_content = '', new_cache = nil)
|
|
32
|
+
@byte_position = new_offset
|
|
33
|
+
@content = new_content
|
|
34
|
+
@position_cache = new_cache
|
|
35
|
+
self
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Creates a Slice from a Rope concatenation.
|
|
39
|
+
#
|
|
40
|
+
# @param rope [Parsanol::Rope] rope to convert
|
|
41
|
+
# @param offset [Integer] byte position
|
|
42
|
+
# @param cache [Object] optional cache
|
|
43
|
+
# @return [Parsanol::Slice] new slice
|
|
44
|
+
def self.from_rope(rope, offset, cache = nil)
|
|
45
|
+
new(offset, rope.to_s, cache)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Integer] byte offset in original input
|
|
49
|
+
def offset
|
|
50
|
+
@byte_position
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
alias bytepos offset
|
|
54
|
+
alias charpos offset
|
|
55
|
+
alias str content # backward compatibility
|
|
56
|
+
alias line_cache position_cache # backward compatibility
|
|
57
|
+
|
|
58
|
+
# Compares slices or strings for equality.
|
|
59
|
+
#
|
|
60
|
+
# @param other [Object] object to compare
|
|
61
|
+
# @return [Boolean] true if equal
|
|
62
|
+
def ==(other)
|
|
63
|
+
return content == other if other.is_a?(String)
|
|
64
|
+
return content == other.content if other.is_a?(Parsanol::Slice)
|
|
65
|
+
|
|
66
|
+
content == other
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Type-strict equality check.
|
|
70
|
+
#
|
|
71
|
+
# @param other [Object] object to compare
|
|
72
|
+
# @return [Boolean] true if same type and content
|
|
73
|
+
def eql?(other)
|
|
74
|
+
other.is_a?(Parsanol::Slice) && content.eql?(other.content)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Hash for use as hash keys.
|
|
78
|
+
#
|
|
79
|
+
# @return [Integer] hash combining content and position
|
|
80
|
+
def hash
|
|
81
|
+
[content, offset].hash
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Matches regular expression against content.
|
|
85
|
+
#
|
|
86
|
+
# @param pattern [Regexp] pattern to match
|
|
87
|
+
# @return [MatchData, nil] match result
|
|
88
|
+
def match(pattern)
|
|
89
|
+
content.match(pattern)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @return [Integer] length of slice content
|
|
93
|
+
def size
|
|
94
|
+
content.size
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
alias length size
|
|
98
|
+
|
|
99
|
+
# Concatenates slices assuming second continues from first's end.
|
|
100
|
+
#
|
|
101
|
+
# @param other [Slice, String] slice to append
|
|
102
|
+
# @return [Parsanol::Slice] combined slice
|
|
103
|
+
def +(other)
|
|
104
|
+
self.class.new(@byte_position, content + other.to_s, position_cache)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Returns [line, column] tuple for this position.
|
|
108
|
+
#
|
|
109
|
+
# @return [Array<Integer, Integer>] line and column (1-indexed)
|
|
110
|
+
# @raise [ArgumentError] if no line cache available
|
|
111
|
+
def line_and_column
|
|
112
|
+
raise ArgumentError, 'Line/column info requires a line cache. Pass one during parsing.' unless position_cache
|
|
113
|
+
|
|
114
|
+
position_cache.line_and_column(@byte_position)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# String conversions ---------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
def to_str
|
|
120
|
+
content.is_a?(String) ? content : content.to_s
|
|
121
|
+
end
|
|
122
|
+
alias to_s to_str
|
|
123
|
+
|
|
124
|
+
def to_slice
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def to_sym
|
|
129
|
+
content.to_sym
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def to_i
|
|
133
|
+
content.to_i
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def to_f
|
|
137
|
+
content.to_f
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Inspection ---------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
def inspect
|
|
143
|
+
"#{content.inspect}@#{offset}"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Line position caching for efficient line/column lookups.
|
|
4
|
+
# Stores line ending positions to enable O(log n) line number queries.
|
|
5
|
+
#
|
|
6
|
+
# Inspired by Parslet (MIT License).
|
|
7
|
+
|
|
8
|
+
module Parsanol
|
|
9
|
+
class Source
|
|
10
|
+
# Caches line ending positions for quick line/column resolution.
|
|
11
|
+
# Uses binary search for efficient position lookup.
|
|
12
|
+
class LineCache
|
|
13
|
+
def initialize
|
|
14
|
+
# Array of byte offsets where each line ends
|
|
15
|
+
@breaks = []
|
|
16
|
+
@breaks.extend(IntervalLookup)
|
|
17
|
+
@max_scanned = nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Converts a byte offset to [line_number, column_number].
|
|
21
|
+
# Line and column numbers are 1-indexed.
|
|
22
|
+
#
|
|
23
|
+
# @param position [Integer, #bytepos] the byte offset to convert
|
|
24
|
+
# @return [Array<Integer, Integer>] [line, column] tuple
|
|
25
|
+
def line_and_column(position)
|
|
26
|
+
position = position.bytepos if position.respond_to?(:bytepos)
|
|
27
|
+
|
|
28
|
+
line_idx = @breaks.lower_bound_index(position)
|
|
29
|
+
|
|
30
|
+
if line_idx
|
|
31
|
+
# Found a line ending after this position
|
|
32
|
+
line_start = line_idx.positive? ? @breaks[line_idx - 1] : 0
|
|
33
|
+
[line_idx + 1, position - line_start + 1]
|
|
34
|
+
else
|
|
35
|
+
# Position is beyond all known line endings
|
|
36
|
+
line_start = @breaks.last || 0
|
|
37
|
+
[@breaks.size + 1, position - line_start + 1]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Scans a string buffer for line endings and caches their positions.
|
|
42
|
+
# Avoids re-scanning already processed regions.
|
|
43
|
+
#
|
|
44
|
+
# @param start_offset [Integer] the byte offset where buffer starts
|
|
45
|
+
# @param buffer [String] the string content to scan
|
|
46
|
+
def scan_for_line_endings(start_offset, buffer)
|
|
47
|
+
return unless buffer
|
|
48
|
+
|
|
49
|
+
scanner = StringScanner.new(buffer)
|
|
50
|
+
return unless scanner.exist?(/\n/)
|
|
51
|
+
|
|
52
|
+
# Skip already-scanned content
|
|
53
|
+
scanner.pos = @max_scanned - start_offset if @max_scanned && start_offset < @max_scanned
|
|
54
|
+
|
|
55
|
+
# Record all newline positions
|
|
56
|
+
while scanner.skip_until(/\n/)
|
|
57
|
+
@max_scanned = start_offset + scanner.pos
|
|
58
|
+
@breaks << @max_scanned
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Mixin providing binary search for interval containment queries.
|
|
64
|
+
# Treats array values as interval endpoints where each interval [n-1, n]
|
|
65
|
+
# is represented by value at index n.
|
|
66
|
+
#
|
|
67
|
+
# @example
|
|
68
|
+
# [10, 20, 30] represents intervals [0,10], (10,20], (20,30]
|
|
69
|
+
module IntervalLookup
|
|
70
|
+
# Calculates midpoint index for binary search.
|
|
71
|
+
# Uses floor to ensure integer result.
|
|
72
|
+
def midpoint_index(lo, hi)
|
|
73
|
+
lo + ((hi - lo) / 2).floor
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Finds the index of the first value greater than the bound.
|
|
77
|
+
# Returns nil if no such value exists.
|
|
78
|
+
#
|
|
79
|
+
# @param bound [Numeric] the value to search against
|
|
80
|
+
# @return [Integer, nil] index of first value > bound, or nil
|
|
81
|
+
def lower_bound_index(bound)
|
|
82
|
+
return nil if empty?
|
|
83
|
+
return nil unless last > bound
|
|
84
|
+
|
|
85
|
+
lo = 0
|
|
86
|
+
hi = size - 1
|
|
87
|
+
|
|
88
|
+
loop do
|
|
89
|
+
mid = midpoint_index(lo, hi)
|
|
90
|
+
|
|
91
|
+
if self[mid] > bound
|
|
92
|
+
hi = mid
|
|
93
|
+
else
|
|
94
|
+
lo = mid + 1
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
return hi if hi <= lo
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Legacy method name for backward compatibility
|
|
102
|
+
alias find_mid midpoint_index
|
|
103
|
+
alias lbound lower_bound_index
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Legacy constant name for backward compatibility
|
|
107
|
+
RangeSearch = IntervalLookup
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require 'strscan'
|
|
5
|
+
|
|
6
|
+
require 'parsanol/position'
|
|
7
|
+
require 'parsanol/source/line_cache'
|
|
8
|
+
require 'parsanol/pools/slice_pool'
|
|
9
|
+
require 'parsanol/pools/position_pool'
|
|
10
|
+
|
|
11
|
+
module Parsanol
|
|
12
|
+
# Encapsulates input source for parsing operations. Provides position tracking,
|
|
13
|
+
# character consumption, line/column calculation, and object pooling for
|
|
14
|
+
# memory efficiency.
|
|
15
|
+
#
|
|
16
|
+
# @example Creating a source
|
|
17
|
+
# src = Parsanol::Source.new("input string")
|
|
18
|
+
# src.matches?(/a/) # => true if 'a' is at current position
|
|
19
|
+
# src.consume(1) # => Slice containing one character
|
|
20
|
+
#
|
|
21
|
+
# Inspired by source/position tracking patterns in parser implementations.
|
|
22
|
+
#
|
|
23
|
+
class Source
|
|
24
|
+
# @return [Parsanol::Pools::SlicePool] pool for Slice objects
|
|
25
|
+
attr_reader :slice_pool
|
|
26
|
+
|
|
27
|
+
# @return [Parsanol::Pools::PositionPool] pool for Position objects
|
|
28
|
+
attr_reader :position_pool
|
|
29
|
+
|
|
30
|
+
# Creates a new source wrapper around a string.
|
|
31
|
+
#
|
|
32
|
+
# @param input [#to_str] string-like object to parse
|
|
33
|
+
# @raise [ArgumentError] if input doesn't respond to to_str
|
|
34
|
+
#
|
|
35
|
+
def initialize(input)
|
|
36
|
+
raise ArgumentError, 'Source requires a string-like object (responds to to_str)' unless input.respond_to?(:to_str)
|
|
37
|
+
|
|
38
|
+
# Core scanner for input traversal
|
|
39
|
+
@scanner = StringScanner.new(input)
|
|
40
|
+
@raw_string = input.to_str
|
|
41
|
+
|
|
42
|
+
# Regex cache: maps count n to /(.|$){n}/m pattern
|
|
43
|
+
@regex_cache = Hash.new { |h, count| h[count] = Regexp.new("(.|$){#{count}}", Regexp::MULTILINE) }
|
|
44
|
+
|
|
45
|
+
# Line ending cache for position-to-line/column mapping
|
|
46
|
+
@line_data = LineCache.new
|
|
47
|
+
@line_data.scan_for_line_endings(0, input)
|
|
48
|
+
|
|
49
|
+
# Object pools for memory efficiency
|
|
50
|
+
# SlicePool: reduces Slice allocations during matching
|
|
51
|
+
@slice_pool = Parsanol::Pools::SlicePool.new(size: 5000)
|
|
52
|
+
|
|
53
|
+
# PositionPool: reduces Position allocations for error reporting
|
|
54
|
+
@position_pool = Parsanol::Pools::PositionPool.new(size: 1000)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Checks if a pattern matches at the current input position without consuming.
|
|
58
|
+
#
|
|
59
|
+
# @param pattern [Regexp] pattern to test
|
|
60
|
+
# @return [Boolean] true if pattern matches at current position
|
|
61
|
+
#
|
|
62
|
+
def matches?(pattern)
|
|
63
|
+
@scanner.match?(pattern)
|
|
64
|
+
end
|
|
65
|
+
alias match matches?
|
|
66
|
+
|
|
67
|
+
# Consumes n characters from input and returns them as a pooled Slice.
|
|
68
|
+
#
|
|
69
|
+
# @param count [Integer] number of characters to consume
|
|
70
|
+
# @return [Parsanol::Slice] slice containing consumed characters
|
|
71
|
+
#
|
|
72
|
+
def consume(count)
|
|
73
|
+
current_pos = @scanner.pos
|
|
74
|
+
content = @scanner.scan(@regex_cache[count])
|
|
75
|
+
@slice_pool.acquire_with(current_pos, content, @line_data)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Creates a pooled slice at a specific position.
|
|
79
|
+
# Preferred method for atoms to construct slices.
|
|
80
|
+
#
|
|
81
|
+
# @param offset [Integer] byte position in source
|
|
82
|
+
# @param content [String] slice content
|
|
83
|
+
# @return [Parsanol::Slice] pooled slice instance
|
|
84
|
+
#
|
|
85
|
+
def slice(offset, content)
|
|
86
|
+
@slice_pool.acquire_with(offset, content, @line_data)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns a slice to the pool for reuse.
|
|
90
|
+
#
|
|
91
|
+
# @param sl [Parsanol::Slice] slice to release
|
|
92
|
+
#
|
|
93
|
+
def release_slice(sl)
|
|
94
|
+
@slice_pool.release(sl)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Returns count of remaining characters in input.
|
|
98
|
+
#
|
|
99
|
+
# @return [Integer] characters left to consume
|
|
100
|
+
#
|
|
101
|
+
def chars_left
|
|
102
|
+
@scanner.rest_size
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Counts characters from current position until a target string.
|
|
106
|
+
# Returns chars_left if target is not found.
|
|
107
|
+
#
|
|
108
|
+
# @param target [String] string to search for
|
|
109
|
+
# @return [Integer] count of chars until target or remaining chars
|
|
110
|
+
#
|
|
111
|
+
def chars_until(target)
|
|
112
|
+
found = @scanner.check_until(Regexp.new(Regexp.escape(target)))
|
|
113
|
+
return chars_left unless found
|
|
114
|
+
|
|
115
|
+
found.size - target.size
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Finds the byte position of the next occurrence of a character.
|
|
119
|
+
# Does not move the scanner position.
|
|
120
|
+
#
|
|
121
|
+
# @param ch [String] character to search for
|
|
122
|
+
# @return [Integer, nil] byte position or nil if not found
|
|
123
|
+
#
|
|
124
|
+
def index_of_char(ch)
|
|
125
|
+
rel_idx = @scanner.rest.index(ch)
|
|
126
|
+
return nil unless rel_idx
|
|
127
|
+
|
|
128
|
+
@scanner.pos + rel_idx
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Returns current byte position in input.
|
|
132
|
+
#
|
|
133
|
+
# @return [Integer] current byte offset
|
|
134
|
+
# @note Encoding-aware: position is in bytes, not characters
|
|
135
|
+
#
|
|
136
|
+
def pos
|
|
137
|
+
@scanner.pos
|
|
138
|
+
end
|
|
139
|
+
alias bytepos pos
|
|
140
|
+
|
|
141
|
+
# Sets the current byte position.
|
|
142
|
+
#
|
|
143
|
+
# @param new_pos [Integer] target byte position
|
|
144
|
+
#
|
|
145
|
+
def bytepos=(new_pos)
|
|
146
|
+
@scanner.pos = new_pos
|
|
147
|
+
rescue RangeError
|
|
148
|
+
# Silently ignore out-of-range positions
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Converts a byte position to line and column numbers.
|
|
152
|
+
#
|
|
153
|
+
# @param offset [Integer, nil] byte position (defaults to current)
|
|
154
|
+
# @return [Array<Integer, Integer>] [line, column] tuple (1-indexed)
|
|
155
|
+
#
|
|
156
|
+
def line_and_column(offset = nil)
|
|
157
|
+
effective = offset || @scanner.pos
|
|
158
|
+
@line_data.line_and_column(effective)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Creates a pooled Position object for error reporting.
|
|
162
|
+
#
|
|
163
|
+
# @param offset [Integer, nil] byte position (defaults to current)
|
|
164
|
+
# @return [Parsanol::Position] pooled position instance
|
|
165
|
+
#
|
|
166
|
+
def position(offset = nil)
|
|
167
|
+
effective = offset || @scanner.pos
|
|
168
|
+
line_and_column(effective)
|
|
169
|
+
|
|
170
|
+
# Character position approximation
|
|
171
|
+
char_pos = @raw_string.byteslice(0, effective).size
|
|
172
|
+
|
|
173
|
+
@position_pool.acquire_with(
|
|
174
|
+
string: @raw_string,
|
|
175
|
+
bytepos: effective,
|
|
176
|
+
charpos: char_pos
|
|
177
|
+
)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|