parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Debug helper for parser development.
|
|
4
|
+
# Adds a convenient method to Parsanol::Atoms::Base for debugging parse failures.
|
|
5
|
+
#
|
|
6
|
+
# @example
|
|
7
|
+
# class MyParser < Parsanol::Parser
|
|
8
|
+
# rule(:foo) { str('foo') }
|
|
9
|
+
# root(:foo)
|
|
10
|
+
# end
|
|
11
|
+
#
|
|
12
|
+
# # Instead of writing rescue blocks:
|
|
13
|
+
# MyParser.new.parse_with_debug('invalid')
|
|
14
|
+
# # Prints the error tree automatically and returns nil
|
|
15
|
+
#
|
|
16
|
+
# Inspired by Parslet (MIT License).
|
|
17
|
+
module Parsanol
|
|
18
|
+
module Atoms
|
|
19
|
+
class Base
|
|
20
|
+
# Parses input and automatically displays error information on failure.
|
|
21
|
+
# This is a convenience method for development and debugging.
|
|
22
|
+
# Unlike #parse, this method catches ParseFailed and prints debug info.
|
|
23
|
+
#
|
|
24
|
+
# @param input [String] the input to parse
|
|
25
|
+
# @param options [Hash] options passed to #parse
|
|
26
|
+
# @return [Object] parse result on success, nil on failure
|
|
27
|
+
def parse_with_debug(input, options = {})
|
|
28
|
+
parse(input, options)
|
|
29
|
+
rescue Parsanol::ParseFailed => e
|
|
30
|
+
# Display the error tree for debugging
|
|
31
|
+
puts e.parse_failure_cause.ascii_tree
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Edit tracking for GPeg-style incremental parsing
|
|
4
|
+
# Based on the GPeg paper: "Fast Incremental PEG Parsing" (Yedidia, SLE 2021)
|
|
5
|
+
#
|
|
6
|
+
# Tracks edits to the input as [position, delta] pairs and enables lazy shifting
|
|
7
|
+
# of cached intervals without rebuilding the entire cache (O(1) edit cost).
|
|
8
|
+
#
|
|
9
|
+
module Parsanol
|
|
10
|
+
class EditTracker
|
|
11
|
+
# An edit operation: insertion (+delta) or deletion (-delta) at a position
|
|
12
|
+
class Edit
|
|
13
|
+
attr_reader :position, :delta
|
|
14
|
+
|
|
15
|
+
def initialize(position, delta)
|
|
16
|
+
@position = position
|
|
17
|
+
@delta = delta
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_s
|
|
21
|
+
if @delta.positive?
|
|
22
|
+
"Insert(#{@delta} chars at #{@position})"
|
|
23
|
+
else
|
|
24
|
+
"Delete(#{-@delta} chars at #{@position})"
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def initialize
|
|
30
|
+
@edits = [] # List of edits in chronological order
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Record an insertion at position
|
|
34
|
+
# @param position [Integer] Where the insertion occurred
|
|
35
|
+
# @param length [Integer] Number of characters inserted
|
|
36
|
+
def insert(position, length)
|
|
37
|
+
@edits << Edit.new(position, length)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Record a deletion at position
|
|
41
|
+
# @param position [Integer] Where the deletion occurred
|
|
42
|
+
# @param length [Integer] Number of characters deleted
|
|
43
|
+
def delete(position, length)
|
|
44
|
+
@edits << Edit.new(position, -length)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Shift an interval based on accumulated edits
|
|
48
|
+
# Returns the shifted interval [low', high') or nil if interval is invalidated
|
|
49
|
+
#
|
|
50
|
+
# An interval is invalidated if any edit overlaps with it, as the cached
|
|
51
|
+
# parse result is no longer valid.
|
|
52
|
+
#
|
|
53
|
+
# @param low [Integer] Interval start position
|
|
54
|
+
# @param high [Integer] Interval end position (exclusive)
|
|
55
|
+
# @return [Array<Integer>, nil] Shifted [low, high) or nil if invalidated
|
|
56
|
+
def shift_interval(low, high)
|
|
57
|
+
shifted_low = low
|
|
58
|
+
shifted_high = high
|
|
59
|
+
|
|
60
|
+
@edits.each do |edit|
|
|
61
|
+
# Skip zero-length edits (no-ops)
|
|
62
|
+
next if edit.delta.zero?
|
|
63
|
+
|
|
64
|
+
# Check if edit overlaps with current interval
|
|
65
|
+
# Edit overlaps if it occurs within [shifted_low, shifted_high)
|
|
66
|
+
if edit.position >= shifted_low && edit.position < shifted_high
|
|
67
|
+
# Edit inside interval - invalidate
|
|
68
|
+
return nil
|
|
69
|
+
elsif edit.position < shifted_low
|
|
70
|
+
# Edit before interval - shift both boundaries
|
|
71
|
+
shifted_low += edit.delta
|
|
72
|
+
shifted_high += edit.delta
|
|
73
|
+
elsif edit.position >= shifted_high
|
|
74
|
+
# Edit after interval - no shift needed
|
|
75
|
+
# Continue to next edit
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Sanity check: ensure interval remains valid
|
|
79
|
+
return nil if shifted_low.negative? || shifted_high < shifted_low
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
[shifted_low, shifted_high]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Check if interval needs invalidation (overlaps with any edit)
|
|
86
|
+
# @param low [Integer] Interval start position
|
|
87
|
+
# @param high [Integer] Interval end position (exclusive)
|
|
88
|
+
# @return [Boolean] true if interval should be invalidated
|
|
89
|
+
def invalidates?(low, high)
|
|
90
|
+
shift_interval(low, high).nil?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Clear all recorded edits
|
|
94
|
+
def clear
|
|
95
|
+
@edits.clear
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Number of edits tracked
|
|
99
|
+
def size
|
|
100
|
+
@edits.size
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Check if any edits have been recorded
|
|
104
|
+
def empty?
|
|
105
|
+
@edits.empty?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Get all edits (for debugging)
|
|
109
|
+
attr_reader :edits
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
# Enhanced error reporter that uses contextual heuristics to provide
|
|
6
|
+
# more relevant error messages. Builds on the Deepest reporter by adding
|
|
7
|
+
# label tracking and intelligent error reset behavior.
|
|
8
|
+
#
|
|
9
|
+
# The key insight is that in a sequence of alternatives, the deepest error
|
|
10
|
+
# from a branch that was partially successful is more meaningful than
|
|
11
|
+
# errors from branches that failed immediately.
|
|
12
|
+
#
|
|
13
|
+
# @example Parser with labeled rules
|
|
14
|
+
# class MyParser < Parsanol::Parser
|
|
15
|
+
# rule(:expression, label: 'math expression') { ... }
|
|
16
|
+
# rule(:term, label: 'number or variable') { ... }
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# # Error messages will include "while parsing math expression"
|
|
20
|
+
# # context when expression rule fails deep in parsing
|
|
21
|
+
#
|
|
22
|
+
# Inspired by contextual error reporting strategies in modern parsers.
|
|
23
|
+
#
|
|
24
|
+
class Contextual < Deepest
|
|
25
|
+
# Creates a new contextual error reporter.
|
|
26
|
+
#
|
|
27
|
+
def initialize
|
|
28
|
+
@prev_success_pos = 0
|
|
29
|
+
clear_state
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Called when a sequence successfully matches. Resets error tracking
|
|
33
|
+
# if this success is at or beyond the previous success position.
|
|
34
|
+
# This ensures we keep errors from "partially successful" branches
|
|
35
|
+
# rather than early failures in alternative choices.
|
|
36
|
+
#
|
|
37
|
+
# @param src [Parsanol::Source] input source
|
|
38
|
+
# @return [nil]
|
|
39
|
+
#
|
|
40
|
+
def succ(src)
|
|
41
|
+
current_pos = src.pos.bytepos
|
|
42
|
+
# Only reset if we've made forward progress
|
|
43
|
+
return if current_pos < @prev_success_pos
|
|
44
|
+
|
|
45
|
+
@prev_success_pos = current_pos
|
|
46
|
+
reset
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Clears all tracked state for a fresh start.
|
|
51
|
+
#
|
|
52
|
+
# @return [void]
|
|
53
|
+
#
|
|
54
|
+
def reset
|
|
55
|
+
@deepest_cause = nil
|
|
56
|
+
@active_label_pos = -1
|
|
57
|
+
@active_label_text = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
alias clear_state reset
|
|
61
|
+
|
|
62
|
+
# Records an error and applies contextual labeling if the atom has one.
|
|
63
|
+
# Delegates to parent class for deepest tracking.
|
|
64
|
+
#
|
|
65
|
+
# @param atom [Parsanol::Atoms::Base] atom that failed
|
|
66
|
+
# @param src [Parsanol::Source] input source
|
|
67
|
+
# @param msg [String, Array] error message
|
|
68
|
+
# @param nested [Array, nil] child causes
|
|
69
|
+
# @return [Parsanol::Cause] the error cause
|
|
70
|
+
#
|
|
71
|
+
def err(atom, src, msg, nested = nil)
|
|
72
|
+
cause = super
|
|
73
|
+
|
|
74
|
+
# Apply label if the atom has one
|
|
75
|
+
if atom.respond_to?(:label) && (lbl = atom.label)
|
|
76
|
+
maybe_update_label(lbl, src.pos.bytepos)
|
|
77
|
+
cause.set_label(@active_label_text)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
cause
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Updates the active context label if the new position is at or
|
|
84
|
+
# beyond the current label position. This ensures we track the
|
|
85
|
+
# label for the deepest/most specific failing construct.
|
|
86
|
+
#
|
|
87
|
+
# @param lbl [String] label text
|
|
88
|
+
# @param byte_pos [Integer] position in input
|
|
89
|
+
# @return [void]
|
|
90
|
+
#
|
|
91
|
+
def maybe_update_label(lbl, byte_pos)
|
|
92
|
+
return unless byte_pos >= @active_label_pos
|
|
93
|
+
|
|
94
|
+
@active_label_pos = byte_pos
|
|
95
|
+
@active_label_text = lbl
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
# Error reporter that tracks the deepest (furthest into input) parse failure.
|
|
6
|
+
# Unlike Tree reporter which returns the most recent error, this reporter
|
|
7
|
+
# keeps track of errors at the greatest input position, as these are typically
|
|
8
|
+
# more useful for diagnosing what went wrong.
|
|
9
|
+
#
|
|
10
|
+
# The rationale is that errors occurring later in the input are more likely
|
|
11
|
+
# to represent what the user intended - early failures often represent
|
|
12
|
+
# alternative branches that simply didn't match.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# reporter = Parsanol::ErrorReporter::Deepest.new
|
|
16
|
+
# parser.parse(input, reporter: reporter)
|
|
17
|
+
# # The error cause will be the one at the furthest input position
|
|
18
|
+
#
|
|
19
|
+
# Inspired by "furthest failure" error reporting strategies in parser tools.
|
|
20
|
+
#
|
|
21
|
+
class Deepest < Base
|
|
22
|
+
# @return [Parsanol::Cause, nil] the deepest cause encountered so far
|
|
23
|
+
attr_reader :deepest_cause
|
|
24
|
+
|
|
25
|
+
# Creates a new deepest error reporter.
|
|
26
|
+
#
|
|
27
|
+
def initialize
|
|
28
|
+
@deepest_cause = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Records an error at the current source position.
|
|
32
|
+
# Updates the tracked deepest cause if this error is further into input.
|
|
33
|
+
#
|
|
34
|
+
# @param atom [Parsanol::Atoms::Base] atom that failed
|
|
35
|
+
# @param source [Parsanol::Source] input source
|
|
36
|
+
# @param message [String, Array] error message
|
|
37
|
+
# @param children [Array, nil] child error causes
|
|
38
|
+
# @return [Parsanol::Cause] the deepest known error cause
|
|
39
|
+
#
|
|
40
|
+
def err(_atom, source, message, children = nil)
|
|
41
|
+
error_pos = source.pos
|
|
42
|
+
cause = Cause.format(source, error_pos, message, children)
|
|
43
|
+
deepest(cause)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Records an error at a specific source position.
|
|
47
|
+
# Updates the tracked deepest cause if this error is further into input.
|
|
48
|
+
#
|
|
49
|
+
# @param atom [Parsanol::Atoms::Base] atom that failed
|
|
50
|
+
# @param source [Parsanol::Source] input source
|
|
51
|
+
# @param message [String, Array] error message
|
|
52
|
+
# @param pos [Integer] byte position of error
|
|
53
|
+
# @param children [Array, nil] child error causes
|
|
54
|
+
# @return [Parsanol::Cause] the deepest known error cause
|
|
55
|
+
#
|
|
56
|
+
def err_at(_atom, source, message, pos, children = nil)
|
|
57
|
+
cause = Cause.format(source, pos, message, children)
|
|
58
|
+
deepest(cause)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Notification of successful parse (unused in this reporter).
|
|
62
|
+
#
|
|
63
|
+
# @param source [Parsanol::Source] input source
|
|
64
|
+
# @return [nil]
|
|
65
|
+
#
|
|
66
|
+
def succ(_source)
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Evaluates a cause and returns the deepest known cause.
|
|
71
|
+
# If the given cause is deeper than the currently tracked deepest,
|
|
72
|
+
# updates tracking and returns the given cause. Otherwise returns
|
|
73
|
+
# the previously tracked deepest cause.
|
|
74
|
+
#
|
|
75
|
+
# @param cause [Parsanol::Cause] error cause to evaluate
|
|
76
|
+
# @return [Parsanol::Cause] the deepest known cause
|
|
77
|
+
#
|
|
78
|
+
def deepest(cause)
|
|
79
|
+
# Find the deepest leaf in the cause tree
|
|
80
|
+
_, leaf = find_deepest_leaf(cause)
|
|
81
|
+
|
|
82
|
+
# Update tracking if this goes deeper than what we've seen
|
|
83
|
+
if !@deepest_cause || leaf.pos >= @deepest_cause.pos
|
|
84
|
+
@deepest_cause = leaf
|
|
85
|
+
return cause
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
@deepest_cause
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# Recursively finds the leaf node with the greatest depth (rank) in
|
|
94
|
+
# the error tree. The deepest leaf is the one furthest from root.
|
|
95
|
+
#
|
|
96
|
+
# @param node [Parsanol::Cause] current node in error tree
|
|
97
|
+
# @param rank [Integer] current depth from root
|
|
98
|
+
# @return [Array<Integer, Parsanol::Cause>] [depth, deepest_leaf]
|
|
99
|
+
#
|
|
100
|
+
def find_deepest_leaf(node, rank = 0)
|
|
101
|
+
best_node = node
|
|
102
|
+
best_rank = rank
|
|
103
|
+
|
|
104
|
+
kids = node.children
|
|
105
|
+
if kids && !kids.empty?
|
|
106
|
+
kids.each do |kid|
|
|
107
|
+
kid_rank, kid_node = find_deepest_leaf(kid, rank + 1)
|
|
108
|
+
|
|
109
|
+
if kid_rank > best_rank
|
|
110
|
+
best_rank = kid_rank
|
|
111
|
+
best_node = kid_node
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
[best_rank, best_node]
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
module ErrorReporter
|
|
5
|
+
# Default error reporter that builds a hierarchical tree of failure causes.
|
|
6
|
+
# Each parse failure creates a Cause node that can contain child causes
|
|
7
|
+
# from nested parse attempts.
|
|
8
|
+
#
|
|
9
|
+
# The resulting error tree mirrors the grammar structure, making it easy
|
|
10
|
+
# to understand which parts of the grammar failed and why.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# reporter = Parsanol::ErrorReporter::Tree.new
|
|
14
|
+
# parser.parse(input, reporter: reporter)
|
|
15
|
+
# # On failure, causes are available for inspection
|
|
16
|
+
#
|
|
17
|
+
# Inspired by error tree reporting patterns in parser generators.
|
|
18
|
+
#
|
|
19
|
+
class Tree < Base
|
|
20
|
+
# Records a parse failure at the current source position.
|
|
21
|
+
# Creates a Cause node that may contain child causes from deeper
|
|
22
|
+
# parsing levels.
|
|
23
|
+
#
|
|
24
|
+
# @param parser_atom [Parsanol::Atoms::Base] atom that failed to match
|
|
25
|
+
# @param src [Parsanol::Source] input source being parsed
|
|
26
|
+
# @param msg [String, Array<String>] error description
|
|
27
|
+
# @param nested_errors [Array<Cause>, nil] failures from inner parse attempts
|
|
28
|
+
# @return [Parsanol::Cause] error cause node for this failure
|
|
29
|
+
#
|
|
30
|
+
def err(_parser_atom, src, msg, nested_errors = nil)
|
|
31
|
+
error_pos = src.pos
|
|
32
|
+
Cause.format(src, error_pos, msg, nested_errors)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Records a parse failure at a specific position (not current position).
|
|
36
|
+
# Used when the error occurred at a different location than where it's
|
|
37
|
+
# being reported.
|
|
38
|
+
#
|
|
39
|
+
# @param parser_atom [Parsanol::Atoms::Base] atom that failed to match
|
|
40
|
+
# @param src [Parsanol::Source] input source being parsed
|
|
41
|
+
# @param msg [String, Array<String>] error description
|
|
42
|
+
# @param error_pos [Integer] byte position where error actually occurred
|
|
43
|
+
# @param nested_errors [Array<Cause>, nil] failures from inner parse attempts
|
|
44
|
+
# @return [Parsanol::Cause] error cause node for this failure
|
|
45
|
+
#
|
|
46
|
+
def err_at(_parser_atom, src, msg, error_pos, nested_errors = nil)
|
|
47
|
+
Cause.format(src, error_pos, msg, nested_errors)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Notification that a parse succeeded.
|
|
51
|
+
# Base implementation does nothing - see Contextual reporter for
|
|
52
|
+
# success tracking.
|
|
53
|
+
#
|
|
54
|
+
# @param src [Parsanol::Source] input source being parsed
|
|
55
|
+
# @return [nil]
|
|
56
|
+
#
|
|
57
|
+
def succ(_src)
|
|
58
|
+
# Tree reporter doesn't track successes
|
|
59
|
+
nil
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# A namespace for all error reporters.
|
|
4
|
+
#
|
|
5
|
+
# Error reporters collect and format parse errors. The parsing engine
|
|
6
|
+
# calls reporter methods as it attempts to match atoms, building up
|
|
7
|
+
# an error structure that can be presented to the user.
|
|
8
|
+
#
|
|
9
|
+
# @example Using a specific error reporter
|
|
10
|
+
# parser = MyParser.new
|
|
11
|
+
# parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
|
|
12
|
+
#
|
|
13
|
+
# @example Creating a custom error reporter
|
|
14
|
+
# class MyReporter < Parsanol::ErrorReporter::Base
|
|
15
|
+
# def initialize
|
|
16
|
+
# @errors = []
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# def err(atom, source, message, children = nil)
|
|
20
|
+
# @errors << { position: source.pos, message: message }
|
|
21
|
+
# @errors.last
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# def err_at(atom, source, message, pos, children = nil)
|
|
25
|
+
# @errors << { position: pos, message: message }
|
|
26
|
+
# @errors.last
|
|
27
|
+
# end
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
module Parsanol
|
|
31
|
+
module ErrorReporter
|
|
32
|
+
# Base class for error reporters.
|
|
33
|
+
#
|
|
34
|
+
# Error reporters collect and format parse errors. The parsing engine
|
|
35
|
+
# calls reporter methods as it attempts to match atoms, building up
|
|
36
|
+
# an error structure that can be presented to the user.
|
|
37
|
+
#
|
|
38
|
+
# Subclasses must implement {#err} and {#err_at} methods.
|
|
39
|
+
#
|
|
40
|
+
class Base
|
|
41
|
+
# Report an error at the current parse position.
|
|
42
|
+
#
|
|
43
|
+
# @param atom [Parsanol::Atoms::Base] The atom that failed to match
|
|
44
|
+
# @param source [Parsanol::Source] The input source
|
|
45
|
+
# @param message [String, Array<String>] Error message(s)
|
|
46
|
+
# @param children [Array<Cause>, nil] Child errors from deeper levels
|
|
47
|
+
# @return [Object] An error cause object (implementation-specific)
|
|
48
|
+
#
|
|
49
|
+
# @abstract Subclasses must implement this method
|
|
50
|
+
#
|
|
51
|
+
def err(atom, source, message, children = nil)
|
|
52
|
+
raise NotImplementedError,
|
|
53
|
+
'Error reporters must implement #err(atom, source, message, children)'
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Report an error at a specific position.
|
|
57
|
+
#
|
|
58
|
+
# @param atom [Parsanol::Atoms::Base] The atom that failed to match
|
|
59
|
+
# @param source [Parsanol::Source] The input source
|
|
60
|
+
# @param message [String, Array<String>] Error message(s)
|
|
61
|
+
# @param pos [Integer] The byte position of the error
|
|
62
|
+
# @param children [Array<Cause>, nil] Child errors from deeper levels
|
|
63
|
+
# @return [Object] An error cause object (implementation-specific)
|
|
64
|
+
#
|
|
65
|
+
# @abstract Subclasses must implement this method
|
|
66
|
+
#
|
|
67
|
+
def err_at(atom, source, message, pos, children = nil)
|
|
68
|
+
raise NotImplementedError,
|
|
69
|
+
'Error reporters must implement #err_at(atom, source, message, pos, children)'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Called when an expression successfully parses.
|
|
73
|
+
#
|
|
74
|
+
# This method allows reporters to track successful parses for
|
|
75
|
+
# better error context. The default implementation does nothing.
|
|
76
|
+
#
|
|
77
|
+
# @param source [Parsanol::Source] The input source at success position
|
|
78
|
+
# @return [void]
|
|
79
|
+
#
|
|
80
|
+
def succ(source)
|
|
81
|
+
# Default: no-op
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Called after parse completes for finalization.
|
|
85
|
+
#
|
|
86
|
+
# Override this method to perform cleanup or generate final reports.
|
|
87
|
+
# The default implementation does nothing.
|
|
88
|
+
#
|
|
89
|
+
# @return [void]
|
|
90
|
+
#
|
|
91
|
+
def finalize
|
|
92
|
+
# Default: no-op
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
require 'parsanol/error_reporter/tree'
|
|
99
|
+
require 'parsanol/error_reporter/deepest'
|
|
100
|
+
require 'parsanol/error_reporter/contextual'
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Treetop-style expression parser for Parsanol.
|
|
4
|
+
#
|
|
5
|
+
# This module provides a parser and transform for converting treetop-style
|
|
6
|
+
# expression strings into Parsanol atoms. The implementation is pure Ruby
|
|
7
|
+
# and is not accelerated by the Rust native extension.
|
|
8
|
+
#
|
|
9
|
+
# == Why Pure Ruby?
|
|
10
|
+
#
|
|
11
|
+
# Expression parsing happens at grammar definition time (once per parser class),
|
|
12
|
+
# not during input parsing. The overhead is negligible for typical use cases.
|
|
13
|
+
# The resulting atoms can still be used with Rust-accelerated parsing.
|
|
14
|
+
#
|
|
15
|
+
# == Syntax Reference
|
|
16
|
+
#
|
|
17
|
+
# Expression ::= Alternative ('/' Alternative)*
|
|
18
|
+
# Alternative ::= Sequence+
|
|
19
|
+
# Sequence ::= Occurrence+
|
|
20
|
+
# Occurrence ::= Atom ('?' | '*' | '+' | '{min,max}')?
|
|
21
|
+
# Atom ::= '(' Expression ')' | '.' | String | CharClass
|
|
22
|
+
# String ::= "'" (escape | [^'])* "'"
|
|
23
|
+
# CharClass ::= '[' (escape | [^'])* ']'
|
|
24
|
+
#
|
|
25
|
+
# @note Whitespace is required before operators: 'a' ? not 'a'?
|
|
26
|
+
#
|
|
27
|
+
module Parsanol
|
|
28
|
+
class Expression
|
|
29
|
+
module Treetop
|
|
30
|
+
# Parser for treetop-style expression strings.
|
|
31
|
+
#
|
|
32
|
+
# Parses expressions like "'a' 'b' ?" and produces a parse tree
|
|
33
|
+
# that can be transformed into Parsanol atoms.
|
|
34
|
+
#
|
|
35
|
+
# @example
|
|
36
|
+
# parser = Parser.new
|
|
37
|
+
# tree = parser.parse("'a' / 'b'")
|
|
38
|
+
# # => {:alt=>[{:seq=>[{:string=>"a"}]}, {:seq=>[{:string=>"b"}]}]}
|
|
39
|
+
#
|
|
40
|
+
class Parser < Parsanol::Parser
|
|
41
|
+
root(:expression)
|
|
42
|
+
|
|
43
|
+
rule(:expression) { alternatives }
|
|
44
|
+
|
|
45
|
+
# Alternative: 'a' / 'b'
|
|
46
|
+
rule(:alternatives) do
|
|
47
|
+
(simple >> (spaced('/') >> simple).repeat).as(:alt)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Sequence by concatenation: 'a' 'b'
|
|
51
|
+
rule(:simple) { occurrence.repeat(1).as(:seq) }
|
|
52
|
+
|
|
53
|
+
# Occurrence modifiers: ?, *, +, {min,max}
|
|
54
|
+
rule(:occurrence) do
|
|
55
|
+
(atom.as(:repetition) >> spaced('*').as(:sign)) |
|
|
56
|
+
(atom.as(:repetition) >> spaced('+').as(:sign)) |
|
|
57
|
+
(atom.as(:repetition) >> repetition_spec) |
|
|
58
|
+
(atom.as(:maybe) >> spaced('?')) |
|
|
59
|
+
atom
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
rule(:atom) do
|
|
63
|
+
(spaced('(') >> expression.as(:unwrap) >> spaced(')')) |
|
|
64
|
+
dot |
|
|
65
|
+
string |
|
|
66
|
+
char_class
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Character class: [a-z], [0-9], etc.
|
|
70
|
+
rule(:char_class) do
|
|
71
|
+
(str('[') >>
|
|
72
|
+
((str('\\') >> any) | (str(']').absent? >> any)).repeat(1) >>
|
|
73
|
+
str(']')).as(:match) >> space?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Any character: .
|
|
77
|
+
rule(:dot) { spaced('.').as(:any) }
|
|
78
|
+
|
|
79
|
+
# String literal: 'hello'
|
|
80
|
+
rule(:string) do
|
|
81
|
+
str("'") >>
|
|
82
|
+
((str('\\') >> any) | (str("'").absent? >> any)).repeat.as(:string) >>
|
|
83
|
+
str("'") >> space?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Repetition specification: {1,3}, {2,}, {,5}
|
|
87
|
+
rule(:repetition_spec) do
|
|
88
|
+
spaced('{') >>
|
|
89
|
+
integer.maybe.as(:min) >> spaced(',') >>
|
|
90
|
+
integer.maybe.as(:max) >> spaced('}')
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
rule(:integer) do
|
|
94
|
+
match['0-9'].repeat(1)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Whitespace handling
|
|
98
|
+
rule(:space) { match('\s').repeat(1) }
|
|
99
|
+
rule(:space?) { space.maybe }
|
|
100
|
+
|
|
101
|
+
# Helper: match string followed by optional whitespace
|
|
102
|
+
def spaced(str)
|
|
103
|
+
str(str) >> space?
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Transform for converting parse trees to Parsanol atoms.
|
|
108
|
+
#
|
|
109
|
+
# @example
|
|
110
|
+
# tree = {:seq=>[{:string=>"a"}, {:string=>"b"}]}
|
|
111
|
+
# transform = Transform.new
|
|
112
|
+
# atom = transform.apply(tree)
|
|
113
|
+
# # => Sequence.new([Str.new('a'), Str.new('b')])
|
|
114
|
+
#
|
|
115
|
+
class Transform < Parsanol::Transform
|
|
116
|
+
# Repetition with sign: * (zero+) or + (one+)
|
|
117
|
+
rule(repetition: simple(:rep), sign: simple(:sign)) do
|
|
118
|
+
min = sign == '+' ? 1 : 0
|
|
119
|
+
Parsanol::Atoms::Repetition.new(rep, min, nil)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Repetition with bounds: {min,max}
|
|
123
|
+
rule(repetition: simple(:rep), min: simple(:min), max: simple(:max)) do
|
|
124
|
+
Parsanol::Atoms::Repetition.new(
|
|
125
|
+
rep,
|
|
126
|
+
Integer(min || 0),
|
|
127
|
+
(max && Integer(max)) || nil
|
|
128
|
+
)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Alternative: a / b
|
|
132
|
+
rule(alt: subtree(:alt)) { Parsanol::Atoms::Alternative.new(*alt) }
|
|
133
|
+
|
|
134
|
+
# Sequence: a b
|
|
135
|
+
rule(seq: sequence(:s)) { Parsanol::Atoms::Sequence.new(*s) }
|
|
136
|
+
|
|
137
|
+
# Unwrap parentheses
|
|
138
|
+
rule(unwrap: simple(:u)) { u }
|
|
139
|
+
|
|
140
|
+
# Optional: a ?
|
|
141
|
+
rule(maybe: simple(:m)) { |d| d[:m].maybe }
|
|
142
|
+
|
|
143
|
+
# String literal
|
|
144
|
+
rule(string: simple(:s)) { Parsanol::Atoms::Str.new(s) }
|
|
145
|
+
|
|
146
|
+
# Character class
|
|
147
|
+
rule(match: simple(:m)) { Parsanol::Atoms::Re.new("[#{m}]") }
|
|
148
|
+
|
|
149
|
+
# Any character: .
|
|
150
|
+
rule(any: simple(:_a)) { Parsanol::Atoms::Re.new('.') }
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|