parsanol 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +546 -0
- data/Cargo.toml +9 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/ext/parsanol_native/Cargo.toml +34 -0
- data/ext/parsanol_native/extconf.rb +15 -0
- data/ext/parsanol_native/src/lib.rs +17 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +293 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parses treetop-style expression strings and converts them to Parsanol atoms.
|
|
4
|
+
#
|
|
5
|
+
# This allows specifying parser rules as strings using treetop syntax instead
|
|
6
|
+
# of building atoms explicitly with the DSL.
|
|
7
|
+
#
|
|
8
|
+
# == Performance Note
|
|
9
|
+
#
|
|
10
|
+
# The expression parser is implemented in pure Ruby and is NOT accelerated by
|
|
11
|
+
# the Rust native extension. This is intentional and acceptable because:
|
|
12
|
+
#
|
|
13
|
+
# 1. Expression parsing happens at grammar definition time (once)
|
|
14
|
+
# 2. Expression strings are typically short (< 100 characters)
|
|
15
|
+
# 3. The resulting atoms can still be used with Rust-accelerated parsing
|
|
16
|
+
#
|
|
17
|
+
# If you need maximum performance for dynamically generated parsers, consider
|
|
18
|
+
# building atoms directly with the DSL (str, match, any, etc.) instead.
|
|
19
|
+
#
|
|
20
|
+
# == Syntax
|
|
21
|
+
#
|
|
22
|
+
# The treetop syntax supports:
|
|
23
|
+
#
|
|
24
|
+
# - Strings: 'hello' (single quotes)
|
|
25
|
+
# - Character classes: [a-z], [0-9]
|
|
26
|
+
# - Any character: .
|
|
27
|
+
# - Sequence: 'a' 'b' (concatenation)
|
|
28
|
+
# - Alternative: 'a' / 'b'
|
|
29
|
+
# - Optional: 'a' ? (space before ? required)
|
|
30
|
+
# - Zero or more: 'a' * (space before * required)
|
|
31
|
+
# - One or more: 'a' + (space before + required)
|
|
32
|
+
# - Repetition: 'a'{1,3}
|
|
33
|
+
# - Grouping: ('a' / 'b')+
|
|
34
|
+
#
|
|
35
|
+
# == Example
|
|
36
|
+
#
|
|
37
|
+
# # Using exp()
|
|
38
|
+
# rule(:word) { exp("'a' 'b' ?") }
|
|
39
|
+
#
|
|
40
|
+
# # Equivalent DSL:
|
|
41
|
+
# rule(:word) { str('a') >> str('b').maybe }
|
|
42
|
+
#
|
|
43
|
+
# == Result Usage
|
|
44
|
+
#
|
|
45
|
+
# The atoms produced by exp() can be used with Rust-accelerated parsing:
|
|
46
|
+
#
|
|
47
|
+
# atom = Parsanol.exp("'a' +")
|
|
48
|
+
#
|
|
49
|
+
# # Ruby parsing
|
|
50
|
+
# atom.parse('aaa')
|
|
51
|
+
#
|
|
52
|
+
# # Rust-accelerated parsing (if native extension available)
|
|
53
|
+
# Parsanol::Native.parse_with_grammar(atom, 'aaa')
|
|
54
|
+
#
|
|
55
|
+
module Parsanol
|
|
56
|
+
class Expression
|
|
57
|
+
include Parsanol
|
|
58
|
+
|
|
59
|
+
autoload :Treetop, 'parsanol/expression/treetop'
|
|
60
|
+
|
|
61
|
+
# Creates a parser atom from a treetop-style expression string.
|
|
62
|
+
#
|
|
63
|
+
# @param str [String] a treetop expression
|
|
64
|
+
# @param opts [Hash] options (:type => :treetop, default)
|
|
65
|
+
# @return [Parsanol::Expression] expression object (call #to_parslet for atom)
|
|
66
|
+
#
|
|
67
|
+
# @example
|
|
68
|
+
# expr = Parsanol::Expression.new("'a' 'b' ?")
|
|
69
|
+
# atom = expr.to_parslet
|
|
70
|
+
# atom.parse('a') # => "a"@0
|
|
71
|
+
#
|
|
72
|
+
def initialize(str, opts = {}, _context = self)
|
|
73
|
+
@type = opts[:type] || :treetop
|
|
74
|
+
@exp = str
|
|
75
|
+
@parslet = transform(parse(str))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Transforms the parse tree into a parser atom.
|
|
79
|
+
#
|
|
80
|
+
# @param tree [Hash] parse tree from Treetop::Parser
|
|
81
|
+
# @return [Parsanol::Atoms::Base] parser atom
|
|
82
|
+
def transform(tree)
|
|
83
|
+
transform = Treetop::Transform.new
|
|
84
|
+
transform.apply(tree)
|
|
85
|
+
rescue StandardError
|
|
86
|
+
warn "Could not transform: #{tree.inspect}"
|
|
87
|
+
raise
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Parses the expression string and returns a parse tree.
|
|
91
|
+
#
|
|
92
|
+
# @param str [String] treetop expression
|
|
93
|
+
# @return [Hash] parse tree
|
|
94
|
+
def parse(str)
|
|
95
|
+
parser = Treetop::Parser.new
|
|
96
|
+
parser.parse(str)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Returns the parser atom for this expression.
|
|
100
|
+
#
|
|
101
|
+
# @return [Parsanol::Atoms::Base] parser atom
|
|
102
|
+
def to_parslet
|
|
103
|
+
@parslet
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Fast mode patch for Parslet - matches vanilla parslet 2.0 behavior.
|
|
4
|
+
#
|
|
5
|
+
# For grammars with many small allocations (like EXPRESS), this is faster
|
|
6
|
+
# because the overhead of pool management exceeds the benefit.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# require 'parslet'
|
|
10
|
+
# require 'parsanol/fast_mode'
|
|
11
|
+
# # Now all parsing uses fast mode methods
|
|
12
|
+
#
|
|
13
|
+
|
|
14
|
+
module Parsanol
|
|
15
|
+
FAST_MODE = true
|
|
16
|
+
|
|
17
|
+
module Atoms
|
|
18
|
+
# Fast mode Context - matches vanilla parslet 2.0 simplicity
|
|
19
|
+
class Context
|
|
20
|
+
# Override try_with_cache with vanilla-like version (no eviction, no pooling)
|
|
21
|
+
def try_with_cache(obj, source, consume_all)
|
|
22
|
+
beg = source.bytepos
|
|
23
|
+
|
|
24
|
+
# Not in cache yet? Return early.
|
|
25
|
+
unless (entry = @cache[beg]&.[](obj.object_id))
|
|
26
|
+
result = obj.try(source, self, consume_all)
|
|
27
|
+
|
|
28
|
+
(@cache[beg] ||= {})[obj.object_id] = [result, source.bytepos - beg] if obj.cached?
|
|
29
|
+
|
|
30
|
+
return result
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Cache hit
|
|
34
|
+
result, advance = entry
|
|
35
|
+
source.bytepos = beg + advance
|
|
36
|
+
result
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Fast mode Sequence - direct array creation, no lazy evaluation
|
|
41
|
+
class Sequence
|
|
42
|
+
def try(source, context, consume_all)
|
|
43
|
+
parslets = @parslets
|
|
44
|
+
|
|
45
|
+
case parslets.size
|
|
46
|
+
when 1
|
|
47
|
+
success, value = parslets[0].apply(source, context, consume_all)
|
|
48
|
+
success ? succ([:sequence, value]) : context.err(self, source, @error_msg, [value])
|
|
49
|
+
when 2
|
|
50
|
+
success, v1 = parslets[0].apply(source, context, false)
|
|
51
|
+
return context.err(self, source, @error_msg, [v1]) unless success
|
|
52
|
+
|
|
53
|
+
success, v2 = parslets[1].apply(source, context, consume_all)
|
|
54
|
+
success ? succ([:sequence, v1, v2]) : context.err(self, source, @error_msg, [v2])
|
|
55
|
+
when 3
|
|
56
|
+
success, v1 = parslets[0].apply(source, context, false)
|
|
57
|
+
return context.err(self, source, @error_msg, [v1]) unless success
|
|
58
|
+
|
|
59
|
+
success, v2 = parslets[1].apply(source, context, false)
|
|
60
|
+
return context.err(self, source, @error_msg, [v2]) unless success
|
|
61
|
+
|
|
62
|
+
success, v3 = parslets[2].apply(source, context, consume_all)
|
|
63
|
+
success ? succ([:sequence, v1, v2, v3]) : context.err(self, source, @error_msg, [v3])
|
|
64
|
+
else
|
|
65
|
+
result = [:sequence]
|
|
66
|
+
last_idx = parslets.size - 1
|
|
67
|
+
i = 0
|
|
68
|
+
while i <= last_idx
|
|
69
|
+
success, value = parslets[i].apply(source, context, consume_all && i == last_idx)
|
|
70
|
+
return context.err(self, source, @error_msg, [value]) unless success
|
|
71
|
+
|
|
72
|
+
result << value
|
|
73
|
+
i += 1
|
|
74
|
+
end
|
|
75
|
+
succ(result)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Fast mode Repetition - direct array creation, no lazy evaluation
|
|
81
|
+
class Repetition
|
|
82
|
+
EMPTY_REPETITION_ARRAY = [:repetition].freeze
|
|
83
|
+
|
|
84
|
+
def try(source, context, consume_all)
|
|
85
|
+
parslet = @parslet
|
|
86
|
+
min = @min
|
|
87
|
+
max = @max
|
|
88
|
+
tag = @tag
|
|
89
|
+
|
|
90
|
+
# Fast path for .maybe
|
|
91
|
+
if min.zero? && max == 1
|
|
92
|
+
success, value = parslet.apply(source, context, false)
|
|
93
|
+
return succ([tag, value]) if success
|
|
94
|
+
|
|
95
|
+
return succ(tag == :repetition ? EMPTY_REPETITION_ARRAY : [tag])
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Fast path for exact count
|
|
99
|
+
if min == max && max && max <= 3
|
|
100
|
+
case max
|
|
101
|
+
when 1
|
|
102
|
+
success, value = parslet.apply(source, context, consume_all)
|
|
103
|
+
return success ? succ([tag, value]) : context.err_at(self, source, @error_msg, source.bytepos, [value])
|
|
104
|
+
when 2
|
|
105
|
+
success, v1 = parslet.apply(source, context, false)
|
|
106
|
+
return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
|
|
107
|
+
|
|
108
|
+
success, v2 = parslet.apply(source, context, consume_all)
|
|
109
|
+
return success ? succ([tag, v1, v2]) : context.err_at(self, source, @error_msg, source.bytepos, [v2])
|
|
110
|
+
when 3
|
|
111
|
+
success, v1 = parslet.apply(source, context, false)
|
|
112
|
+
return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
|
|
113
|
+
|
|
114
|
+
success, v2 = parslet.apply(source, context, false)
|
|
115
|
+
return context.err_at(self, source, @error_msg, source.bytepos, [v2]) unless success
|
|
116
|
+
|
|
117
|
+
success, v3 = parslet.apply(source, context, consume_all)
|
|
118
|
+
return success ? succ([tag, v1, v2, v3]) : context.err_at(self, source, @error_msg, source.bytepos, [v3])
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# General case
|
|
123
|
+
start_pos = source.bytepos
|
|
124
|
+
occ = 0
|
|
125
|
+
result = [tag]
|
|
126
|
+
break_on = nil
|
|
127
|
+
|
|
128
|
+
loop do
|
|
129
|
+
success, value = parslet.apply(source, context, false)
|
|
130
|
+
break_on = value
|
|
131
|
+
break unless success
|
|
132
|
+
|
|
133
|
+
occ += 1
|
|
134
|
+
result << value
|
|
135
|
+
break if max && occ >= max
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
if occ < min
|
|
139
|
+
source.bytepos = start_pos
|
|
140
|
+
return context.err_at(self, source, @error_msg, start_pos, [break_on])
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
return context.err(self, source, @unconsumed_msg, [break_on]) if consume_all && source.chars_left.positive?
|
|
144
|
+
|
|
145
|
+
succ(result)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# FIRST Set Analysis for PEG Grammars
|
|
4
|
+
#
|
|
5
|
+
# FIRST sets help identify which terminals can appear at the beginning of
|
|
6
|
+
# a parse. This is essential for:
|
|
7
|
+
# 1. Automatic cut operator insertion (AC-FIRST algorithm)
|
|
8
|
+
# 2. Grammar analysis and optimization
|
|
9
|
+
# 3. Detecting ambiguous choices
|
|
10
|
+
#
|
|
11
|
+
# Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
|
|
12
|
+
# Grammars in Mostly Constant Space"
|
|
13
|
+
#
|
|
14
|
+
module Parsanol
|
|
15
|
+
module FirstSet
|
|
16
|
+
# Sentinel value representing the empty string (ε)
|
|
17
|
+
EPSILON = :epsilon
|
|
18
|
+
|
|
19
|
+
# Compute the FIRST set for this parslet atom
|
|
20
|
+
# Returns a Set containing:
|
|
21
|
+
# - Terminal atoms (Str, Re) that can match first
|
|
22
|
+
# - EPSILON if the atom can match empty string
|
|
23
|
+
# - nil elements represent unknown/variable terminals (e.g., any)
|
|
24
|
+
#
|
|
25
|
+
# @return [Set] FIRST set containing terminal atoms or EPSILON
|
|
26
|
+
def first_set
|
|
27
|
+
@first_set ||= compute_first_set
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Clear cached FIRST set (useful after grammar modifications)
|
|
31
|
+
def clear_first_set_cache
|
|
32
|
+
@first_set = nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
protected
|
|
36
|
+
|
|
37
|
+
# Override in subclasses to compute FIRST set
|
|
38
|
+
# Default: conservative approximation (unknown)
|
|
39
|
+
def compute_first_set
|
|
40
|
+
Set.new([nil]) # nil = unknown terminal
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Class methods for FIRST set analysis
|
|
44
|
+
class << self
|
|
45
|
+
# Check if two FIRST sets are disjoint
|
|
46
|
+
# Two sets are disjoint if they have no common elements
|
|
47
|
+
# EPSILON is ignored when checking disjointness
|
|
48
|
+
#
|
|
49
|
+
# @param set1 [Set] First FIRST set
|
|
50
|
+
# @param set2 [Set] Second FIRST set
|
|
51
|
+
# @return [Boolean] true if sets are disjoint
|
|
52
|
+
def disjoint?(set1, set2)
|
|
53
|
+
# Remove EPSILON and nil from both sets for comparison
|
|
54
|
+
real_set1 = set1.reject { |x| x == EPSILON || x.nil? }
|
|
55
|
+
real_set2 = set2.reject { |x| x == EPSILON || x.nil? }
|
|
56
|
+
|
|
57
|
+
# If either set is empty (only EPSILON/nil), consider disjoint
|
|
58
|
+
return true if real_set1.empty? || real_set2.empty?
|
|
59
|
+
|
|
60
|
+
# Check if intersection is empty (using to_a for Opal compatibility)
|
|
61
|
+
(real_set1.to_a & real_set2.to_a).empty?
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check if all FIRST sets in a collection are mutually disjoint
|
|
65
|
+
# This is critical for AC-FIRST algorithm - we can only insert
|
|
66
|
+
# cuts when all alternatives have non-overlapping FIRST sets
|
|
67
|
+
#
|
|
68
|
+
# @param sets [Array<Set>] Collection of FIRST sets
|
|
69
|
+
# @return [Boolean] true if all pairs are disjoint
|
|
70
|
+
def all_disjoint?(sets)
|
|
71
|
+
# Need at least 2 sets to check disjointness
|
|
72
|
+
return true if sets.length < 2
|
|
73
|
+
|
|
74
|
+
# Check all pairs
|
|
75
|
+
sets.combination(2).all? { |s1, s2| disjoint?(s1, s2) }
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::GrammarBuilder - Grammar Composition
|
|
4
|
+
#
|
|
5
|
+
# Build complex grammars by importing and composing smaller grammars.
|
|
6
|
+
# This enables reusable grammar modules.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# # Define reusable grammars
|
|
10
|
+
# expression_grammar = GrammarBuilder.new
|
|
11
|
+
# .rule("expr", str("a") | str("b"))
|
|
12
|
+
# .build
|
|
13
|
+
#
|
|
14
|
+
# type_grammar = GrammarBuilder.new
|
|
15
|
+
# .rule("type", str("int") | str("str"))
|
|
16
|
+
# .build
|
|
17
|
+
#
|
|
18
|
+
# # Compose into a new grammar
|
|
19
|
+
# combined = GrammarBuilder.new
|
|
20
|
+
# .import(expression_grammar, prefix: "expr")
|
|
21
|
+
# .import(type_grammar, prefix: "type")
|
|
22
|
+
# .rule("typed", seq([ref("expr:root"), str(":"), ref("type:root")]))
|
|
23
|
+
# .build
|
|
24
|
+
#
|
|
25
|
+
# Requires native extension for full functionality.
|
|
26
|
+
|
|
27
|
+
module Parsanol
|
|
28
|
+
class GrammarBuilder
|
|
29
|
+
# Create a new grammar builder
|
|
30
|
+
def initialize
|
|
31
|
+
@rules = {}
|
|
32
|
+
@imports = []
|
|
33
|
+
@root = nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Define a rule
|
|
37
|
+
#
|
|
38
|
+
# @param name [String, Symbol] Rule name
|
|
39
|
+
# @param parslet [Parsanol::Atoms::Base] Parslet atom
|
|
40
|
+
# @return [self] For chaining
|
|
41
|
+
def rule(name, parslet)
|
|
42
|
+
@rules[name.to_s] = parslet
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get a rule for modification
|
|
47
|
+
#
|
|
48
|
+
# @param name [String, Symbol] Rule name
|
|
49
|
+
# @return [Parsanol::Atoms::Base, nil] The rule atom
|
|
50
|
+
def [](name)
|
|
51
|
+
@rules[name.to_s]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Set the root rule
|
|
55
|
+
#
|
|
56
|
+
# @param name [String, Symbol] Root rule name
|
|
57
|
+
# @return [self] For chaining
|
|
58
|
+
def root(name)
|
|
59
|
+
@root = name.to_s
|
|
60
|
+
self
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Import another grammar with optional prefix
|
|
64
|
+
#
|
|
65
|
+
# @param grammar [GrammarBuilder, Hash] Grammar to import
|
|
66
|
+
# @param prefix [String, nil] Optional prefix for imported rules
|
|
67
|
+
# @return [self] For chaining
|
|
68
|
+
def import(grammar, prefix: nil)
|
|
69
|
+
grammar_data = case grammar
|
|
70
|
+
when GrammarBuilder
|
|
71
|
+
grammar.to_h
|
|
72
|
+
when Hash
|
|
73
|
+
grammar
|
|
74
|
+
else
|
|
75
|
+
raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
@imports << { grammar: grammar_data, prefix: prefix }
|
|
79
|
+
self
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Import with explicit rule mapping
|
|
83
|
+
#
|
|
84
|
+
# @param grammar [GrammarBuilder, Hash] Grammar to import
|
|
85
|
+
# @param prefix [String, nil] Optional prefix
|
|
86
|
+
# @param rules [Hash] Rule mapping {from_rule: to_rule}
|
|
87
|
+
# @return [self] For chaining
|
|
88
|
+
def import_with_rules(grammar, prefix: nil, rules: {})
|
|
89
|
+
grammar_data = case grammar
|
|
90
|
+
when GrammarBuilder
|
|
91
|
+
grammar.to_h
|
|
92
|
+
when Hash
|
|
93
|
+
grammar
|
|
94
|
+
else
|
|
95
|
+
raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
@imports << { grammar: grammar_data, prefix: prefix, rules: rules }
|
|
99
|
+
self
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Build the grammar
|
|
103
|
+
#
|
|
104
|
+
# @return [Hash] Grammar representation
|
|
105
|
+
def build
|
|
106
|
+
{
|
|
107
|
+
rules: @rules,
|
|
108
|
+
root: @root,
|
|
109
|
+
imports: @imports
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Convert to JSON for native parser
|
|
114
|
+
#
|
|
115
|
+
# @return [String] JSON representation
|
|
116
|
+
def to_json(*_args)
|
|
117
|
+
build.to_json
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Get as a Hash
|
|
121
|
+
#
|
|
122
|
+
# @return [Hash] Grammar representation
|
|
123
|
+
def to_h
|
|
124
|
+
build
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Reference another rule in this grammar
|
|
128
|
+
#
|
|
129
|
+
# @param name [String, Symbol] Rule name
|
|
130
|
+
# @return [Parsanol::Atoms::Entity] Entity referencing the rule
|
|
131
|
+
def ref(name)
|
|
132
|
+
Parsanol::Atoms::Entity.new(name)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Reference the root of another grammar
|
|
136
|
+
#
|
|
137
|
+
# @param grammar_name [String] Name of the grammar (for prefixed imports)
|
|
138
|
+
# @return [Parsanol::Atoms::Entity] Entity referencing the root
|
|
139
|
+
def ref_root(grammar_name = nil)
|
|
140
|
+
if grammar_name
|
|
141
|
+
ref("#{grammar_name}:root")
|
|
142
|
+
else
|
|
143
|
+
ref('root')
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
class << self
|
|
148
|
+
# Create a grammar from a block
|
|
149
|
+
#
|
|
150
|
+
# @yield [GrammarBuilder] Builder to configure
|
|
151
|
+
# @return [Hash] Built grammar
|
|
152
|
+
def build(&block)
|
|
153
|
+
builder = new
|
|
154
|
+
builder.instance_eval(&block)
|
|
155
|
+
builder.build
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Import a grammar from JSON string
|
|
159
|
+
#
|
|
160
|
+
# @param json [String] JSON representation
|
|
161
|
+
# @return [Hash] Grammar representation
|
|
162
|
+
def from_json(json)
|
|
163
|
+
JSON.parse(json)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Module methods for DSL
|
|
169
|
+
module GrammarBuilderDSL
|
|
170
|
+
# Create a new grammar builder
|
|
171
|
+
#
|
|
172
|
+
# @return [GrammarBuilder] New builder
|
|
173
|
+
def grammar(&block)
|
|
174
|
+
GrammarBuilder.build(&block)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::IncrementalParser - Incremental Parser for Editor Integration
|
|
4
|
+
#
|
|
5
|
+
# Parse with support for incremental edits. This is useful for editor integration
|
|
6
|
+
# where the input changes frequently (e.g., as the user types).
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# parser = Parsanol::IncrementalParser.new(grammar, initial_text)
|
|
10
|
+
#
|
|
11
|
+
# # When text changes
|
|
12
|
+
# parser.apply_edit(start: 5, deleted: 3, inserted: "new")
|
|
13
|
+
# result = parser.reparse
|
|
14
|
+
#
|
|
15
|
+
# Requires native extension for full functionality.
|
|
16
|
+
|
|
17
|
+
module Parsanol
|
|
18
|
+
# Represents an edit to apply to the input
|
|
19
|
+
class Edit
|
|
20
|
+
attr_reader :start, :deleted, :inserted
|
|
21
|
+
|
|
22
|
+
def initialize(start:, deleted:, inserted: '')
|
|
23
|
+
@start = start
|
|
24
|
+
@deleted = deleted
|
|
25
|
+
@inserted = inserted
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Get the old range that was replaced
|
|
29
|
+
def old_range
|
|
30
|
+
@start...(@start + @deleted)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Check if this edit affects a specific position
|
|
34
|
+
def affects_position?(position)
|
|
35
|
+
position >= @start && position < @start + @deleted + @inserted.length
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Get the new position after this edit
|
|
39
|
+
def new_position
|
|
40
|
+
@start + @inserted.length
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Apply this edit to a string
|
|
44
|
+
def apply(input)
|
|
45
|
+
input[0...@start] + @inserted + input[(@start + @deleted)..]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_s
|
|
49
|
+
"Edit(#{@start}, +#{@inserted.length}, -#{@deleted})"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def ==(other)
|
|
53
|
+
return false unless other.is_a?(Edit)
|
|
54
|
+
|
|
55
|
+
@start == other.start && @deleted == other.deleted && @inserted == other.inserted
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
class IncrementalParser
|
|
60
|
+
# Create a new incremental parser
|
|
61
|
+
#
|
|
62
|
+
# @param grammar [Parsanol::Parser, Parsanol::Atoms::Base] Grammar to use
|
|
63
|
+
# @param initial_input [String] Initial input string
|
|
64
|
+
def initialize(grammar, initial_input = '')
|
|
65
|
+
@grammar = grammar
|
|
66
|
+
@input = initial_input
|
|
67
|
+
|
|
68
|
+
if Parsanol::Native.available?
|
|
69
|
+
grammar_json = Parsanol::Native.serialize_grammar(grammar.root)
|
|
70
|
+
@native_parser = Parsanol::Native.incremental_parser_new(grammar_json, initial_input)
|
|
71
|
+
else
|
|
72
|
+
@native_parser = nil
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
@edits = []
|
|
76
|
+
@cached_result = nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Apply an edit to the parser
|
|
80
|
+
#
|
|
81
|
+
# @param start [Integer] Start position of edit
|
|
82
|
+
# @param deleted [Integer] Number of characters deleted
|
|
83
|
+
# @param inserted [String] Text to insert
|
|
84
|
+
def apply_edit(start:, deleted:, inserted: '')
|
|
85
|
+
edit = Edit.new(start: start, deleted: deleted, inserted: inserted)
|
|
86
|
+
@edits << edit
|
|
87
|
+
|
|
88
|
+
# Update cached input
|
|
89
|
+
@input = edit.apply(@input)
|
|
90
|
+
|
|
91
|
+
# Invalidate cached result
|
|
92
|
+
@cached_result = nil
|
|
93
|
+
|
|
94
|
+
return unless @native_parser
|
|
95
|
+
|
|
96
|
+
Parsanol::Native.incremental_parser_apply_edit(@native_parser, start, deleted, inserted)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Convenience method to apply multiple edits
|
|
100
|
+
#
|
|
101
|
+
# @param edits [Array<Hash>] Array of {start:, deleted:, inserted:} hashes
|
|
102
|
+
def apply_edits(edits)
|
|
103
|
+
edits.each do |edit_hash|
|
|
104
|
+
apply_edit(**edit_hash)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Reparse with current input (or optional new input)
|
|
109
|
+
#
|
|
110
|
+
# @param new_input [String, nil] Optional new input (replaces current)
|
|
111
|
+
# @return [Object] Parse result
|
|
112
|
+
def reparse(new_input = nil)
|
|
113
|
+
if new_input
|
|
114
|
+
@input = new_input
|
|
115
|
+
@edits.clear
|
|
116
|
+
@cached_result = nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
return @cached_result if @cached_result
|
|
120
|
+
|
|
121
|
+
if @native_parser
|
|
122
|
+
@cached_result = Parsanol::Native.incremental_parser_reparse(@native_parser, @input)
|
|
123
|
+
else
|
|
124
|
+
# Pure Ruby fallback - reparse from scratch
|
|
125
|
+
root = @grammar.root
|
|
126
|
+
@cached_result = root.parse(@input)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
@cached_result
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Invalidate a range (for external changes)
|
|
133
|
+
#
|
|
134
|
+
# @param start [Integer] Start position
|
|
135
|
+
# @param end_pos [Integer] End position
|
|
136
|
+
def invalidate_range(_start, _end_pos)
|
|
137
|
+
# Clear cached result if the invalidated range might affect it
|
|
138
|
+
@cached_result = nil
|
|
139
|
+
|
|
140
|
+
nil unless @native_parser
|
|
141
|
+
# Native implementation handles invalidation
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Get the current input
|
|
145
|
+
#
|
|
146
|
+
# @return [String] Current input
|
|
147
|
+
attr_reader :input
|
|
148
|
+
|
|
149
|
+
# Get all applied edits
|
|
150
|
+
#
|
|
151
|
+
# @return [Array<Edit>] Array of edits
|
|
152
|
+
def edits
|
|
153
|
+
@edits.dup
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Check if there are unapplied edits
|
|
157
|
+
#
|
|
158
|
+
# @return [Boolean] True if there are pending edits
|
|
159
|
+
def dirty?
|
|
160
|
+
@cached_result.nil? && !@edits.empty?
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Reset to initial state
|
|
164
|
+
#
|
|
165
|
+
# @param new_input [String, nil] Optional new initial input
|
|
166
|
+
def reset(new_input = nil)
|
|
167
|
+
@input = new_input || ''
|
|
168
|
+
@edits.clear
|
|
169
|
+
@cached_result = nil
|
|
170
|
+
|
|
171
|
+
return unless @native_parser && new_input
|
|
172
|
+
|
|
173
|
+
grammar_json = Parsanol::Native.serialize_grammar(@grammar.root)
|
|
174
|
+
@native_parser = Parsanol::Native.incremental_parser_new(grammar_json, @input)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|