parsanol 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +546 -0
- data/Cargo.toml +9 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/ext/parsanol_native/Cargo.toml +34 -0
- data/ext/parsanol_native/extconf.rb +15 -0
- data/ext/parsanol_native/src/lib.rs +17 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +293 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Base class for AST visitors following the Visitor pattern
|
|
4
|
+
# This separates tree traversal logic from transformation logic
|
|
5
|
+
# making the code more maintainable and extensible.
|
|
6
|
+
module Parsanol
|
|
7
|
+
# Base visitor class that traverses the Parslet AST
|
|
8
|
+
# Subclasses override visit_* methods to perform transformations
|
|
9
|
+
class ASTVisitor
|
|
10
|
+
# Visit a parslet and its children
|
|
11
|
+
# Subclasses should override specific visit_* methods
|
|
12
|
+
# @param parslet [Parsanol::Atoms::Base] parslet to visit
|
|
13
|
+
# @return [Parsanol::Atoms::Base] transformed parslet
|
|
14
|
+
def visit(parslet)
|
|
15
|
+
case parslet
|
|
16
|
+
when Parsanol::Atoms::Sequence
|
|
17
|
+
visit_sequence(parslet)
|
|
18
|
+
when Parsanol::Atoms::Alternative
|
|
19
|
+
visit_alternative(parslet)
|
|
20
|
+
when Parsanol::Atoms::Repetition
|
|
21
|
+
visit_repetition(parslet)
|
|
22
|
+
when Parsanol::Atoms::Lookahead
|
|
23
|
+
visit_lookahead(parslet)
|
|
24
|
+
when Parsanol::Atoms::Named
|
|
25
|
+
visit_named(parslet)
|
|
26
|
+
when Parsanol::Atoms::Str
|
|
27
|
+
visit_str(parslet)
|
|
28
|
+
when Parsanol::Atoms::Re
|
|
29
|
+
visit_re(parslet)
|
|
30
|
+
else
|
|
31
|
+
# Leaf nodes or unknown types - return as-is
|
|
32
|
+
parslet
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Visit a sequence node
|
|
37
|
+
# Default implementation visits children and reconstructs if changed
|
|
38
|
+
# @param parslet [Parsanol::Atoms::Sequence] sequence to visit
|
|
39
|
+
# @return [Parsanol::Atoms::Base] transformed sequence
|
|
40
|
+
def visit_sequence(parslet)
|
|
41
|
+
new_parslets = parslet.parslets.map { |p| visit(p) }
|
|
42
|
+
if new_parslets == parslet.parslets
|
|
43
|
+
parslet
|
|
44
|
+
else
|
|
45
|
+
Parsanol::Atoms::Sequence.new(*new_parslets)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Visit an alternative node
|
|
50
|
+
# Default implementation visits children and reconstructs if changed
|
|
51
|
+
# @param parslet [Parsanol::Atoms::Alternative] alternative to visit
|
|
52
|
+
# @return [Parsanol::Atoms::Base] transformed alternative
|
|
53
|
+
def visit_alternative(parslet)
|
|
54
|
+
new_alternatives = parslet.alternatives.map { |p| visit(p) }
|
|
55
|
+
if new_alternatives == parslet.alternatives
|
|
56
|
+
parslet
|
|
57
|
+
else
|
|
58
|
+
Parsanol::Atoms::Alternative.new(*new_alternatives)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Visit a repetition node
|
|
63
|
+
# Default implementation visits child and reconstructs if changed
|
|
64
|
+
# @param parslet [Parsanol::Atoms::Repetition] repetition to visit
|
|
65
|
+
# @return [Parsanol::Atoms::Base] transformed repetition
|
|
66
|
+
def visit_repetition(parslet)
|
|
67
|
+
new_parslet = visit(parslet.parslet)
|
|
68
|
+
if new_parslet.equal?(parslet.parslet)
|
|
69
|
+
parslet
|
|
70
|
+
else
|
|
71
|
+
Parsanol::Atoms::Repetition.new(
|
|
72
|
+
new_parslet,
|
|
73
|
+
parslet.min,
|
|
74
|
+
parslet.max,
|
|
75
|
+
parslet.instance_variable_get(:@tag)
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Visit a lookahead node
|
|
81
|
+
# Default implementation visits child and reconstructs if changed
|
|
82
|
+
# @param parslet [Parsanol::Atoms::Lookahead] lookahead to visit
|
|
83
|
+
# @return [Parsanol::Atoms::Base] transformed lookahead
|
|
84
|
+
def visit_lookahead(parslet)
|
|
85
|
+
new_bound = visit(parslet.bound_parslet)
|
|
86
|
+
if new_bound.equal?(parslet.bound_parslet)
|
|
87
|
+
parslet
|
|
88
|
+
else
|
|
89
|
+
Parsanol::Atoms::Lookahead.new(new_bound, parslet.positive)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Visit a named node
|
|
94
|
+
# Default implementation visits child and reconstructs if changed
|
|
95
|
+
# @param parslet [Parsanol::Atoms::Named] named to visit
|
|
96
|
+
# @return [Parsanol::Atoms::Base] transformed named
|
|
97
|
+
def visit_named(parslet)
|
|
98
|
+
new_parslet = visit(parslet.parslet)
|
|
99
|
+
if new_parslet.equal?(parslet.parslet)
|
|
100
|
+
parslet
|
|
101
|
+
else
|
|
102
|
+
Parsanol::Atoms::Named.new(new_parslet, parslet.name)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Visit a string literal node
|
|
107
|
+
# Default implementation returns as-is (leaf node)
|
|
108
|
+
# @param parslet [Parsanol::Atoms::Str] string to visit
|
|
109
|
+
# @return [Parsanol::Atoms::Base] transformed string
|
|
110
|
+
def visit_str(parslet)
|
|
111
|
+
parslet
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Visit a regex node
|
|
115
|
+
# Default implementation returns as-is (leaf node)
|
|
116
|
+
# @param parslet [Parsanol::Atoms::Re] regex to visit
|
|
117
|
+
# @return [Parsanol::Atoms::Base] transformed regex
|
|
118
|
+
def visit_re(parslet)
|
|
119
|
+
parslet
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Ordered choice - tries alternatives left-to-right, returning first success.
|
|
4
|
+
# Fails only if all alternatives fail.
|
|
5
|
+
#
|
|
6
|
+
# @example Simple choice
|
|
7
|
+
# str('a') | str('b') # matches 'a' or 'b'
|
|
8
|
+
#
|
|
9
|
+
# This is PEG ordered choice - no backtracking to later alternatives.
|
|
10
|
+
#
|
|
11
|
+
module Parsanol
|
|
12
|
+
module Atoms
|
|
13
|
+
class Alternative < Parsanol::Atoms::Base
|
|
14
|
+
# @return [Array<Parsanol::Atoms::Base>] alternative parsers
|
|
15
|
+
attr_reader :alternatives
|
|
16
|
+
|
|
17
|
+
# Creates a new choice.
|
|
18
|
+
#
|
|
19
|
+
# @param options [Array<Parsanol::Atoms::Base>] alternatives
|
|
20
|
+
def initialize(*options)
|
|
21
|
+
super()
|
|
22
|
+
@alternatives = options
|
|
23
|
+
@choice_error = "Expected one of #{options.inspect}"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Adds an alternative with flattening.
|
|
27
|
+
#
|
|
28
|
+
# @param parser [Parsanol::Atoms::Base] new alternative
|
|
29
|
+
# @return [Parsanol::Atoms::Alternative] flattened choice
|
|
30
|
+
def |(other)
|
|
31
|
+
expanded = if other.is_a?(Parsanol::Atoms::Alternative)
|
|
32
|
+
@alternatives + other.alternatives
|
|
33
|
+
else
|
|
34
|
+
@alternatives + [other]
|
|
35
|
+
end
|
|
36
|
+
self.class.new(*expanded)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Tries each alternative in order.
|
|
40
|
+
#
|
|
41
|
+
# @param source [Parsanol::Source] input
|
|
42
|
+
# @param context [Parsanol::Atoms::Context] context
|
|
43
|
+
# @param consume_all [Boolean] require full consumption
|
|
44
|
+
# @return [Array(Boolean, Object)] result
|
|
45
|
+
def try(source, context, consume_all)
|
|
46
|
+
options = @alternatives
|
|
47
|
+
count = options.size
|
|
48
|
+
|
|
49
|
+
# Optimized paths for common sizes
|
|
50
|
+
case count
|
|
51
|
+
when 2
|
|
52
|
+
try_two(options[0], options[1], source, context, consume_all)
|
|
53
|
+
when 3
|
|
54
|
+
try_three(options[0], options[1], options[2], source, context, consume_all)
|
|
55
|
+
else
|
|
56
|
+
try_many(options, source, context, consume_all)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
precedence CHOICE
|
|
61
|
+
|
|
62
|
+
# String representation.
|
|
63
|
+
#
|
|
64
|
+
# @param prec [Integer] precedence
|
|
65
|
+
# @return [String]
|
|
66
|
+
def to_s_inner(prec)
|
|
67
|
+
@alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# FIRST set is union of all alternatives' FIRST sets.
|
|
71
|
+
#
|
|
72
|
+
# @return [Set]
|
|
73
|
+
def compute_first_set
|
|
74
|
+
return Set.new if @alternatives.empty?
|
|
75
|
+
|
|
76
|
+
@alternatives.map(&:first_set).reduce(&:union)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
# Two-alternative fast path
|
|
82
|
+
def try_two(a1, a2, source, context, consume_all)
|
|
83
|
+
success, value1 = a1.apply(source, context, consume_all)
|
|
84
|
+
return [success, value1] if success
|
|
85
|
+
|
|
86
|
+
success, value2 = a2.apply(source, context, consume_all)
|
|
87
|
+
return [success, value2] if success
|
|
88
|
+
|
|
89
|
+
context.err(self, source, @choice_error, [value1, value2])
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Three-alternative fast path
|
|
93
|
+
def try_three(a1, a2, a3, source, context, consume_all)
|
|
94
|
+
success, value1 = a1.apply(source, context, consume_all)
|
|
95
|
+
return [success, value1] if success
|
|
96
|
+
|
|
97
|
+
success, value2 = a2.apply(source, context, consume_all)
|
|
98
|
+
return [success, value2] if success
|
|
99
|
+
|
|
100
|
+
success, value3 = a3.apply(source, context, consume_all)
|
|
101
|
+
return [success, value3] if success
|
|
102
|
+
|
|
103
|
+
context.err(self, source, @choice_error, [value1, value2, value3])
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# General case for N alternatives
|
|
107
|
+
def try_many(options, source, context, consume_all)
|
|
108
|
+
errors = nil
|
|
109
|
+
|
|
110
|
+
options.each do |alt|
|
|
111
|
+
success, value = alt.apply(source, context, consume_all)
|
|
112
|
+
return [success, value] if success
|
|
113
|
+
|
|
114
|
+
errors ||= []
|
|
115
|
+
errors << value
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
context.err(self, source, @choice_error, errors)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Base class for all parser atoms. Handles parsing orchestration,
|
|
4
|
+
# memoization, error handling, and result processing.
|
|
5
|
+
#
|
|
6
|
+
# Concrete atoms must implement #try(source, context, consume_all).
|
|
7
|
+
#
|
|
8
|
+
# @abstract Implement #try to create custom parser atoms
|
|
9
|
+
module Parsanol
|
|
10
|
+
module Atoms
|
|
11
|
+
class Base
|
|
12
|
+
include Parsanol::Atoms::Precedence
|
|
13
|
+
include Parsanol::Atoms::DSL
|
|
14
|
+
include Parsanol::Atoms::CanFlatten
|
|
15
|
+
include Parsanol::FirstSet
|
|
16
|
+
|
|
17
|
+
# Label used for error messages (optional)
|
|
18
|
+
attr_accessor :label
|
|
19
|
+
|
|
20
|
+
# Error message for unconsumed input
|
|
21
|
+
UNCONSUMED_INPUT_MSG = "Don't know what to do with "
|
|
22
|
+
|
|
23
|
+
# Primary parsing interface. Takes a string or Source and returns
|
|
24
|
+
# the parsed tree, or raises ParseFailed on error.
|
|
25
|
+
#
|
|
26
|
+
# @param source [String, Parsanol::Source] input to parse
|
|
27
|
+
# @param options [Hash] parsing options
|
|
28
|
+
# @option options [Parsanol::ErrorReporter] :reporter error collector
|
|
29
|
+
# @option options [Boolean] :prefix allow partial parse (default: false)
|
|
30
|
+
# @return [Object] the parsed result
|
|
31
|
+
# @raise [Parsanol::ParseFailed] on parse failure
|
|
32
|
+
def parse(source, options = {})
|
|
33
|
+
input = normalize_input(source)
|
|
34
|
+
must_consume_all = !options[:prefix]
|
|
35
|
+
|
|
36
|
+
# Initial parse attempt (no error collection)
|
|
37
|
+
success, value = run_with_context(input, nil, must_consume_all)
|
|
38
|
+
return finalize_result(value) if success
|
|
39
|
+
|
|
40
|
+
# Reparse with error reporting for diagnostics
|
|
41
|
+
report_detailed_error(input, must_consume_all, options[:reporter], value)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Creates a new parsing context and executes the atom.
|
|
45
|
+
#
|
|
46
|
+
# @param input [Parsanol::Source] the source
|
|
47
|
+
# @param reporter [Object, nil] error reporter
|
|
48
|
+
# @param consume_all [Boolean] require complete consumption
|
|
49
|
+
# @return [Array(Boolean, Object)] outcome tuple
|
|
50
|
+
def run_with_context(input, reporter, consume_all)
|
|
51
|
+
parser_class = detect_parser_class
|
|
52
|
+
context = Parsanol::Atoms::Context.new(reporter, parser_class: parser_class)
|
|
53
|
+
apply(input, context, consume_all)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Core execution method. Manages position, caching, and error handling.
|
|
57
|
+
#
|
|
58
|
+
# @param input [Parsanol::Source] source to parse
|
|
59
|
+
# @param context [Parsanol::Atoms::Context] parsing state
|
|
60
|
+
# @param consume_all [Boolean] consume entire input
|
|
61
|
+
# @return [Array(Boolean, Object)] outcome pair
|
|
62
|
+
def apply(input, context, consume_all = false)
|
|
63
|
+
position_before = input.bytepos
|
|
64
|
+
outcome = context.try_with_cache(self, input, consume_all)
|
|
65
|
+
succeeded = outcome.first
|
|
66
|
+
|
|
67
|
+
return handle_failure(input, position_before, outcome) unless succeeded
|
|
68
|
+
|
|
69
|
+
context.succ(input)
|
|
70
|
+
|
|
71
|
+
# Verify full consumption when required
|
|
72
|
+
return unconsumed_error(input, context, position_before) if consume_all && input.chars_left.positive?
|
|
73
|
+
|
|
74
|
+
outcome
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Abstract matching method - override in subclasses.
|
|
78
|
+
#
|
|
79
|
+
# @param input [Parsanol::Source] source
|
|
80
|
+
# @param context [Parsanol::Atoms::Context] context
|
|
81
|
+
# @param consume_all [Boolean] consume all flag
|
|
82
|
+
# @return [Array(Boolean, Object)] parse result
|
|
83
|
+
# @raise [NotImplementedError] if not overridden
|
|
84
|
+
def try(input, context, consume_all)
|
|
85
|
+
raise NotImplementedError,
|
|
86
|
+
'Atom must implement #try(source, context, consume_all)'
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Whether packrat caching benefits this atom.
|
|
90
|
+
# Override to disable caching for simple atoms.
|
|
91
|
+
#
|
|
92
|
+
# @return [Boolean]
|
|
93
|
+
def cached?
|
|
94
|
+
true
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Whether this atom produces flat results.
|
|
98
|
+
# When true, flattening can be skipped.
|
|
99
|
+
#
|
|
100
|
+
# @return [Boolean]
|
|
101
|
+
def flat?
|
|
102
|
+
false
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# DSL for setting precedence level (for pretty-printing).
|
|
106
|
+
#
|
|
107
|
+
# @param level [Integer] precedence value
|
|
108
|
+
def self.precedence(level)
|
|
109
|
+
define_method(:precedence) { level }
|
|
110
|
+
end
|
|
111
|
+
precedence ATOM
|
|
112
|
+
|
|
113
|
+
# String representation with precedence-aware parenthesization.
|
|
114
|
+
#
|
|
115
|
+
# @param outer [Integer] caller's precedence
|
|
116
|
+
# @return [String]
|
|
117
|
+
def to_s(outer = TOP)
|
|
118
|
+
text = label || to_s_inner(precedence)
|
|
119
|
+
outer < precedence ? "(#{text})" : text
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def inspect
|
|
123
|
+
to_s(TOP)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
protected
|
|
127
|
+
|
|
128
|
+
# Pre-allocated constant result tuples
|
|
129
|
+
NIL_OK = [true, nil].freeze
|
|
130
|
+
EMPTY_ARR = [].freeze
|
|
131
|
+
REP_TAG = [:repetition].freeze
|
|
132
|
+
REP_OK = [true, REP_TAG].freeze
|
|
133
|
+
SEQ_TAG = [:sequence].freeze
|
|
134
|
+
SEQ_OK = [true, SEQ_TAG].freeze
|
|
135
|
+
EMPTY_MAP = {}.freeze
|
|
136
|
+
MAP_OK = [true, EMPTY_MAP].freeze
|
|
137
|
+
CAP_TAG = [:capture].freeze
|
|
138
|
+
CAP_OK = [true, CAP_TAG].freeze
|
|
139
|
+
|
|
140
|
+
# Creates a success tuple.
|
|
141
|
+
#
|
|
142
|
+
# @param data [Object] the value
|
|
143
|
+
# @return [Array(true, Object)]
|
|
144
|
+
def ok(data)
|
|
145
|
+
return NIL_OK if data.nil?
|
|
146
|
+
return [true, EMPTY_ARR] if data.equal?(EMPTY_ARR)
|
|
147
|
+
return MAP_OK if data.equal?(EMPTY_MAP)
|
|
148
|
+
return REP_OK if data.equal?(REP_TAG)
|
|
149
|
+
return SEQ_OK if data.equal?(SEQ_TAG)
|
|
150
|
+
return CAP_OK if data.equal?(CAP_TAG)
|
|
151
|
+
|
|
152
|
+
[true, data]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Alias for ok (legacy compatibility)
|
|
156
|
+
alias succ ok
|
|
157
|
+
|
|
158
|
+
private
|
|
159
|
+
|
|
160
|
+
# Converts raw input to Source if needed.
|
|
161
|
+
def normalize_input(source)
|
|
162
|
+
source.respond_to?(:line_and_column) ? source : Parsanol::Source.new(source)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Detects if we're in a Parser context.
|
|
166
|
+
def detect_parser_class
|
|
167
|
+
is_a?(Parsanol::Parser) ? self.class : nil
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Handles parse failure by restoring position.
|
|
171
|
+
def handle_failure(input, saved_pos, outcome)
|
|
172
|
+
input.bytepos = saved_pos
|
|
173
|
+
outcome
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Creates error for unconsumed input.
|
|
177
|
+
def unconsumed_error(input, context, saved_pos)
|
|
178
|
+
excess_pos = input.bytepos
|
|
179
|
+
preview = input.consume(10)
|
|
180
|
+
input.bytepos = saved_pos
|
|
181
|
+
context.err_at(self, input, UNCONSUMED_INPUT_MSG + preview.to_s.inspect, excess_pos)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Reports detailed error by reparsing with reporter.
|
|
185
|
+
def report_detailed_error(input, consume_all, reporter, _initial_error)
|
|
186
|
+
input.bytepos = 0
|
|
187
|
+
error_reporter = reporter || Parsanol::ErrorReporter::Tree.new
|
|
188
|
+
success, cause = run_with_context(input, error_reporter, consume_all)
|
|
189
|
+
|
|
190
|
+
# Second parse should also fail
|
|
191
|
+
raise 'Invariant violation: parse succeeded during error reporting' if success
|
|
192
|
+
|
|
193
|
+
cause.raise
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Finalizes result by flattening.
|
|
197
|
+
def finalize_result(value)
|
|
198
|
+
flatten(value)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module Atoms
|
|
5
|
+
# A series of helper functions that have the common topic of flattening
|
|
6
|
+
# result values into the intermediary tree that consists of Ruby Hashes and
|
|
7
|
+
# Arrays.
|
|
8
|
+
#
|
|
9
|
+
# This module has one main function, #flatten, that takes an annotated
|
|
10
|
+
# structure as input and returns the reduced form that users expect from
|
|
11
|
+
# Atom#parse.
|
|
12
|
+
#
|
|
13
|
+
# NOTE: Since all of these functions are just that, functions without
|
|
14
|
+
# side effects, they are in a module and not in a class. Its hard to draw
|
|
15
|
+
# the line sometimes, but this is beyond.
|
|
16
|
+
#
|
|
17
|
+
module CanFlatten
|
|
18
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
|
19
|
+
# value for the user by dropping things and merging hashes.
|
|
20
|
+
#
|
|
21
|
+
# Named is set to true if this result will be embedded in a Hash result from
|
|
22
|
+
# naming something using <code>.as(...)</code>. It changes the folding
|
|
23
|
+
# semantics of repetition.
|
|
24
|
+
#
|
|
25
|
+
def flatten(value, named = false)
|
|
26
|
+
# Passes through everything that isn't an array of things
|
|
27
|
+
# Phase 43: Use simpler check - if it's not an Array, return as-is
|
|
28
|
+
return value unless value.is_a?(Array)
|
|
29
|
+
|
|
30
|
+
# Extracts the s-expression tag
|
|
31
|
+
tag = value[0]
|
|
32
|
+
|
|
33
|
+
# Phase 43: Optimize flattening - reduce method call overhead
|
|
34
|
+
# For single element arrays (common case), handle directly
|
|
35
|
+
tail_size = value.size - 1
|
|
36
|
+
if tail_size == 1
|
|
37
|
+
flattened = flatten(value[1])
|
|
38
|
+
case tag
|
|
39
|
+
when :sequence
|
|
40
|
+
return flattened
|
|
41
|
+
when :maybe
|
|
42
|
+
return named ? flattened : (flattened || '')
|
|
43
|
+
when :repetition
|
|
44
|
+
return flatten_repetition([flattened], named)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Flatten each element
|
|
49
|
+
result = Array.new(tail_size)
|
|
50
|
+
i = 0
|
|
51
|
+
while i < tail_size
|
|
52
|
+
result[i] = flatten(value[i + 1])
|
|
53
|
+
i += 1
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
case tag
|
|
57
|
+
when :sequence
|
|
58
|
+
return flatten_sequence(result)
|
|
59
|
+
when :maybe
|
|
60
|
+
return named ? result.first : result.first || ''
|
|
61
|
+
when :repetition
|
|
62
|
+
return flatten_repetition(result, named)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
raise "BUG: Unknown tag #{tag.inspect}."
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Lisp style fold left where the first element builds the basis for
|
|
69
|
+
# an inject. Optimized with early return and reduced method calls.
|
|
70
|
+
#
|
|
71
|
+
def foldl(list, &block)
|
|
72
|
+
len = list.size
|
|
73
|
+
return '' if len.zero?
|
|
74
|
+
return list[0] if len == 1 # Fast path for single element
|
|
75
|
+
|
|
76
|
+
result = list[0]
|
|
77
|
+
i = 1
|
|
78
|
+
while i < len
|
|
79
|
+
result = block.call(result, list[i])
|
|
80
|
+
i += 1
|
|
81
|
+
end
|
|
82
|
+
result
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Flatten results from a sequence of parslets.
|
|
86
|
+
#
|
|
87
|
+
# @api private
|
|
88
|
+
#
|
|
89
|
+
def flatten_sequence(list)
|
|
90
|
+
foldl(list.compact) do |r, e| # and then merge flat elements
|
|
91
|
+
merge_fold(r, e)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# @api private
|
|
96
|
+
# Phase 43: Optimized merge_fold - reduce repeated class checks
|
|
97
|
+
def merge_fold(l, r)
|
|
98
|
+
l_class = l.class
|
|
99
|
+
r_class = r.class
|
|
100
|
+
|
|
101
|
+
# equal pairs: merge. ----------------------------------------------------
|
|
102
|
+
if l_class == r_class
|
|
103
|
+
return l + r unless l_class == Hash
|
|
104
|
+
|
|
105
|
+
warn_about_duplicate_keys(l, r)
|
|
106
|
+
return l.merge(r)
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Phase 43: Cache instance_of? checks to avoid repeated method calls
|
|
111
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
|
112
|
+
l_is_slice = l.instance_of?(Parsanol::Slice)
|
|
113
|
+
r_is_slice = r.instance_of?(Parsanol::Slice)
|
|
114
|
+
l_is_str = l_class == String || l_is_slice
|
|
115
|
+
r_is_str = r_class == String || r_is_slice
|
|
116
|
+
|
|
117
|
+
# Maybe classes are not equal, but both are stringlike?
|
|
118
|
+
if l_is_str && r_is_str
|
|
119
|
+
# if we're merging a String with a Slice, the slice wins.
|
|
120
|
+
return r if r_is_slice
|
|
121
|
+
return l if l_is_slice
|
|
122
|
+
|
|
123
|
+
raise 'NOTREACHED: What other stringlike classes are there?'
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# special case: If one of them is a string/slice, the other is more important
|
|
127
|
+
return l if r_is_str
|
|
128
|
+
return r if l_is_str
|
|
129
|
+
|
|
130
|
+
# otherwise just create an array for one of them to live in
|
|
131
|
+
return l + [r] if r_class == Hash
|
|
132
|
+
return [l] + r if l_class == Hash
|
|
133
|
+
|
|
134
|
+
raise "Unhandled case when foldr'ing sequence."
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
|
138
|
+
# whether the user has named the result or not. If the user has named
|
|
139
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
|
140
|
+
# turned into an empty string.
|
|
141
|
+
#
|
|
142
|
+
# @api private
|
|
143
|
+
#
|
|
144
|
+
# Phase 43: Optimized flatten_repetition - reduce array iterations
|
|
145
|
+
def flatten_repetition(list, named)
|
|
146
|
+
# Phase 43: Single pass to check for hashes and arrays
|
|
147
|
+
has_hash = false
|
|
148
|
+
has_array = false
|
|
149
|
+
|
|
150
|
+
i = 0
|
|
151
|
+
len = list.size
|
|
152
|
+
while i < len
|
|
153
|
+
e = list[i]
|
|
154
|
+
has_hash = true if e.instance_of?(Hash)
|
|
155
|
+
has_array = true if e.instance_of?(Array)
|
|
156
|
+
break if has_hash && has_array # Early exit if both found
|
|
157
|
+
|
|
158
|
+
i += 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
if has_hash
|
|
162
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
|
163
|
+
# strings inbetween. To keep them, name them.
|
|
164
|
+
return list.select { |e| e.instance_of?(Hash) }
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
if has_array
|
|
168
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
|
169
|
+
# level.
|
|
170
|
+
return list
|
|
171
|
+
.select { |e| e.instance_of?(Array) }
|
|
172
|
+
.flatten(1)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Consistent handling of empty lists, when we act on a named result
|
|
176
|
+
return [] if named && list.empty?
|
|
177
|
+
|
|
178
|
+
# If there are only strings, concatenate them and return that.
|
|
179
|
+
foldl(list.compact) { |s, e| s + e }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# That annoying warning 'Duplicate subtrees while merging result' comes
|
|
183
|
+
# from here. You should add more '.as(...)' names to your intermediary tree.
|
|
184
|
+
#
|
|
185
|
+
def warn_about_duplicate_keys(h1, h2)
|
|
186
|
+
d = h1.keys & h2.keys
|
|
187
|
+
return if d.empty?
|
|
188
|
+
|
|
189
|
+
warn "Duplicate subtrees while merging result of \n #{inspect}\nonly the values " \
|
|
190
|
+
"of the latter will be kept. (keys: #{d.inspect})"
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Captures the result of parsing and stores it for later use.
|
|
4
|
+
# Use the capture method to capture a sub-expression result, then
|
|
5
|
+
# access it via context.captures[:name] in dynamic blocks.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# str('a').capture(:first) >> dynamic { |ctx| str(ctx.captures[:first]) }
|
|
9
|
+
#
|
|
10
|
+
module Parsanol
|
|
11
|
+
module Atoms
|
|
12
|
+
class Capture < Parsanol::Atoms::Base
|
|
13
|
+
attr_reader :inner_atom, :capture_key
|
|
14
|
+
|
|
15
|
+
def initialize(atom, name)
|
|
16
|
+
super()
|
|
17
|
+
@inner_atom = atom
|
|
18
|
+
@capture_key = name.to_sym
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def apply(source, context, consume_all)
|
|
22
|
+
success, result = @inner_atom.apply(source, context, consume_all)
|
|
23
|
+
|
|
24
|
+
if success
|
|
25
|
+
# Flatten and store the captured value in context
|
|
26
|
+
flattened = flatten(result)
|
|
27
|
+
context.captures[@capture_key] = flattened
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
[success, result]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_s_inner(prec)
|
|
34
|
+
"(#{@capture_key.inspect} = #{@inner_atom.to_s(prec)})"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|