parsanol 1.0.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +12 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +487 -0
  5. data/Rakefile +135 -0
  6. data/lib/parsanol/3.2/parsanol_native.so +0 -0
  7. data/lib/parsanol/3.3/parsanol_native.so +0 -0
  8. data/lib/parsanol/3.4/parsanol_native.so +0 -0
  9. data/lib/parsanol/4.0/parsanol_native.so +0 -0
  10. data/lib/parsanol/ast_visitor.rb +122 -0
  11. data/lib/parsanol/atoms/alternative.rb +122 -0
  12. data/lib/parsanol/atoms/base.rb +202 -0
  13. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  14. data/lib/parsanol/atoms/capture.rb +38 -0
  15. data/lib/parsanol/atoms/context.rb +334 -0
  16. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  17. data/lib/parsanol/atoms/custom.rb +110 -0
  18. data/lib/parsanol/atoms/cut.rb +66 -0
  19. data/lib/parsanol/atoms/dsl.rb +96 -0
  20. data/lib/parsanol/atoms/dynamic.rb +39 -0
  21. data/lib/parsanol/atoms/entity.rb +75 -0
  22. data/lib/parsanol/atoms/ignored.rb +37 -0
  23. data/lib/parsanol/atoms/infix.rb +162 -0
  24. data/lib/parsanol/atoms/lookahead.rb +82 -0
  25. data/lib/parsanol/atoms/named.rb +74 -0
  26. data/lib/parsanol/atoms/re.rb +83 -0
  27. data/lib/parsanol/atoms/repetition.rb +259 -0
  28. data/lib/parsanol/atoms/scope.rb +35 -0
  29. data/lib/parsanol/atoms/sequence.rb +194 -0
  30. data/lib/parsanol/atoms/str.rb +103 -0
  31. data/lib/parsanol/atoms/visitor.rb +91 -0
  32. data/lib/parsanol/atoms.rb +46 -0
  33. data/lib/parsanol/buffer.rb +133 -0
  34. data/lib/parsanol/builder_callbacks.rb +353 -0
  35. data/lib/parsanol/cause.rb +122 -0
  36. data/lib/parsanol/context.rb +39 -0
  37. data/lib/parsanol/convenience.rb +36 -0
  38. data/lib/parsanol/edit_tracker.rb +111 -0
  39. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  40. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  41. data/lib/parsanol/error_reporter/tree.rb +63 -0
  42. data/lib/parsanol/error_reporter.rb +100 -0
  43. data/lib/parsanol/expression/treetop.rb +154 -0
  44. data/lib/parsanol/expression.rb +106 -0
  45. data/lib/parsanol/fast_mode.rb +149 -0
  46. data/lib/parsanol/first_set.rb +79 -0
  47. data/lib/parsanol/grammar_builder.rb +177 -0
  48. data/lib/parsanol/incremental_parser.rb +177 -0
  49. data/lib/parsanol/interval_tree.rb +217 -0
  50. data/lib/parsanol/lazy_result.rb +179 -0
  51. data/lib/parsanol/lexer.rb +144 -0
  52. data/lib/parsanol/mermaid.rb +139 -0
  53. data/lib/parsanol/native/parser.rb +612 -0
  54. data/lib/parsanol/native/serializer.rb +248 -0
  55. data/lib/parsanol/native/transformer.rb +435 -0
  56. data/lib/parsanol/native/types.rb +42 -0
  57. data/lib/parsanol/native.rb +217 -0
  58. data/lib/parsanol/optimizer.rb +85 -0
  59. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  60. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  61. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  62. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  63. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  64. data/lib/parsanol/options/ruby_transform.rb +107 -0
  65. data/lib/parsanol/options/serialized.rb +94 -0
  66. data/lib/parsanol/options/zero_copy.rb +128 -0
  67. data/lib/parsanol/options.rb +20 -0
  68. data/lib/parsanol/parallel.rb +133 -0
  69. data/lib/parsanol/parser.rb +182 -0
  70. data/lib/parsanol/parslet.rb +151 -0
  71. data/lib/parsanol/pattern/binding.rb +91 -0
  72. data/lib/parsanol/pattern.rb +159 -0
  73. data/lib/parsanol/pool.rb +219 -0
  74. data/lib/parsanol/pools/array_pool.rb +75 -0
  75. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  76. data/lib/parsanol/pools/position_pool.rb +92 -0
  77. data/lib/parsanol/pools/slice_pool.rb +64 -0
  78. data/lib/parsanol/position.rb +94 -0
  79. data/lib/parsanol/resettable.rb +29 -0
  80. data/lib/parsanol/result.rb +46 -0
  81. data/lib/parsanol/result_builder.rb +208 -0
  82. data/lib/parsanol/result_stream.rb +261 -0
  83. data/lib/parsanol/rig/rspec.rb +71 -0
  84. data/lib/parsanol/rope.rb +81 -0
  85. data/lib/parsanol/scope.rb +104 -0
  86. data/lib/parsanol/slice.rb +146 -0
  87. data/lib/parsanol/source/line_cache.rb +109 -0
  88. data/lib/parsanol/source.rb +180 -0
  89. data/lib/parsanol/source_location.rb +167 -0
  90. data/lib/parsanol/streaming_parser.rb +124 -0
  91. data/lib/parsanol/string_view.rb +195 -0
  92. data/lib/parsanol/transform.rb +226 -0
  93. data/lib/parsanol/version.rb +5 -0
  94. data/lib/parsanol/wasm/README.md +80 -0
  95. data/lib/parsanol/wasm/package.json +51 -0
  96. data/lib/parsanol/wasm/parsanol.js +252 -0
  97. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  98. data/lib/parsanol/wasm_parser.rb +240 -0
  99. data/lib/parsanol.rb +280 -0
  100. data/parsanol-ruby.gemspec +67 -0
  101. metadata +280 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Debug helper for parser development.
4
+ # Adds a convenient method to Parsanol::Atoms::Base for debugging parse failures.
5
+ #
6
+ # @example
7
+ # class MyParser < Parsanol::Parser
8
+ # rule(:foo) { str('foo') }
9
+ # root(:foo)
10
+ # end
11
+ #
12
+ # # Instead of writing rescue blocks:
13
+ # MyParser.new.parse_with_debug('invalid')
14
+ # # Prints the error tree automatically and returns nil
15
+ #
16
+ # Inspired by Parslet (MIT License).
17
+ module Parsanol
18
+ module Atoms
19
+ class Base
20
+ # Parses input and automatically displays error information on failure.
21
+ # This is a convenience method for development and debugging.
22
+ # Unlike #parse, this method catches ParseFailed and prints debug info.
23
+ #
24
+ # @param input [String] the input to parse
25
+ # @param options [Hash] options passed to #parse
26
+ # @return [Object] parse result on success, nil on failure
27
+ def parse_with_debug(input, options = {})
28
+ parse(input, options)
29
+ rescue Parsanol::ParseFailed => e
30
+ # Display the error tree for debugging
31
+ puts e.parse_failure_cause.ascii_tree
32
+ nil
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Edit tracking for GPeg-style incremental parsing
4
+ # Based on the GPeg paper: "Fast Incremental PEG Parsing" (Yedidia, SLE 2021)
5
+ #
6
+ # Tracks edits to the input as [position, delta] pairs and enables lazy shifting
7
+ # of cached intervals without rebuilding the entire cache (O(1) edit cost).
8
+ #
9
+ module Parsanol
10
+ class EditTracker
11
+ # An edit operation: insertion (+delta) or deletion (-delta) at a position
12
+ class Edit
13
+ attr_reader :position, :delta
14
+
15
+ def initialize(position, delta)
16
+ @position = position
17
+ @delta = delta
18
+ end
19
+
20
+ def to_s
21
+ if @delta.positive?
22
+ "Insert(#{@delta} chars at #{@position})"
23
+ else
24
+ "Delete(#{-@delta} chars at #{@position})"
25
+ end
26
+ end
27
+ end
28
+
29
+ def initialize
30
+ @edits = [] # List of edits in chronological order
31
+ end
32
+
33
+ # Record an insertion at position
34
+ # @param position [Integer] Where the insertion occurred
35
+ # @param length [Integer] Number of characters inserted
36
+ def insert(position, length)
37
+ @edits << Edit.new(position, length)
38
+ end
39
+
40
+ # Record a deletion at position
41
+ # @param position [Integer] Where the deletion occurred
42
+ # @param length [Integer] Number of characters deleted
43
+ def delete(position, length)
44
+ @edits << Edit.new(position, -length)
45
+ end
46
+
47
+ # Shift an interval based on accumulated edits
48
+ # Returns the shifted interval [low', high') or nil if interval is invalidated
49
+ #
50
+ # An interval is invalidated if any edit overlaps with it, as the cached
51
+ # parse result is no longer valid.
52
+ #
53
+ # @param low [Integer] Interval start position
54
+ # @param high [Integer] Interval end position (exclusive)
55
+ # @return [Array<Integer>, nil] Shifted [low, high) or nil if invalidated
56
+ def shift_interval(low, high)
57
+ shifted_low = low
58
+ shifted_high = high
59
+
60
+ @edits.each do |edit|
61
+ # Skip zero-length edits (no-ops)
62
+ next if edit.delta.zero?
63
+
64
+ # Check if edit overlaps with current interval
65
+ # Edit overlaps if it occurs within [shifted_low, shifted_high)
66
+ if edit.position >= shifted_low && edit.position < shifted_high
67
+ # Edit inside interval - invalidate
68
+ return nil
69
+ elsif edit.position < shifted_low
70
+ # Edit before interval - shift both boundaries
71
+ shifted_low += edit.delta
72
+ shifted_high += edit.delta
73
+ elsif edit.position >= shifted_high
74
+ # Edit after interval - no shift needed
75
+ # Continue to next edit
76
+ end
77
+
78
+ # Sanity check: ensure interval remains valid
79
+ return nil if shifted_low.negative? || shifted_high < shifted_low
80
+ end
81
+
82
+ [shifted_low, shifted_high]
83
+ end
84
+
85
+ # Check if interval needs invalidation (overlaps with any edit)
86
+ # @param low [Integer] Interval start position
87
+ # @param high [Integer] Interval end position (exclusive)
88
+ # @return [Boolean] true if interval should be invalidated
89
+ def invalidates?(low, high)
90
+ shift_interval(low, high).nil?
91
+ end
92
+
93
+ # Clear all recorded edits
94
+ def clear
95
+ @edits.clear
96
+ end
97
+
98
+ # Number of edits tracked
99
+ def size
100
+ @edits.size
101
+ end
102
+
103
+ # Check if any edits have been recorded
104
+ def empty?
105
+ @edits.empty?
106
+ end
107
+
108
+ # Get all edits (for debugging)
109
+ attr_reader :edits
110
+ end
111
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+ # Enhanced error reporter that uses contextual heuristics to provide
6
+ # more relevant error messages. Builds on the Deepest reporter by adding
7
+ # label tracking and intelligent error reset behavior.
8
+ #
9
+ # The key insight is that in a sequence of alternatives, the deepest error
10
+ # from a branch that was partially successful is more meaningful than
11
+ # errors from branches that failed immediately.
12
+ #
13
+ # @example Parser with labeled rules
14
+ # class MyParser < Parsanol::Parser
15
+ # rule(:expression, label: 'math expression') { ... }
16
+ # rule(:term, label: 'number or variable') { ... }
17
+ # end
18
+ #
19
+ # # Error messages will include "while parsing math expression"
20
+ # # context when expression rule fails deep in parsing
21
+ #
22
+ # Inspired by contextual error reporting strategies in modern parsers.
23
+ #
24
+ class Contextual < Deepest
25
+ # Creates a new contextual error reporter.
26
+ #
27
+ def initialize
28
+ @prev_success_pos = 0
29
+ clear_state
30
+ end
31
+
32
+ # Called when a sequence successfully matches. Resets error tracking
33
+ # if this success is at or beyond the previous success position.
34
+ # This ensures we keep errors from "partially successful" branches
35
+ # rather than early failures in alternative choices.
36
+ #
37
+ # @param src [Parsanol::Source] input source
38
+ # @return [nil]
39
+ #
40
+ def succ(src)
41
+ current_pos = src.pos.bytepos
42
+ # Only reset if we've made forward progress
43
+ return if current_pos < @prev_success_pos
44
+
45
+ @prev_success_pos = current_pos
46
+ reset
47
+ nil
48
+ end
49
+
50
+ # Clears all tracked state for a fresh start.
51
+ #
52
+ # @return [void]
53
+ #
54
+ def reset
55
+ @deepest_cause = nil
56
+ @active_label_pos = -1
57
+ @active_label_text = nil
58
+ end
59
+
60
+ alias clear_state reset
61
+
62
+ # Records an error and applies contextual labeling if the atom has one.
63
+ # Delegates to parent class for deepest tracking.
64
+ #
65
+ # @param atom [Parsanol::Atoms::Base] atom that failed
66
+ # @param src [Parsanol::Source] input source
67
+ # @param msg [String, Array] error message
68
+ # @param nested [Array, nil] child causes
69
+ # @return [Parsanol::Cause] the error cause
70
+ #
71
+ def err(atom, src, msg, nested = nil)
72
+ cause = super
73
+
74
+ # Apply label if the atom has one
75
+ if atom.respond_to?(:label) && (lbl = atom.label)
76
+ maybe_update_label(lbl, src.pos.bytepos)
77
+ cause.set_label(@active_label_text)
78
+ end
79
+
80
+ cause
81
+ end
82
+
83
+ # Updates the active context label if the new position is at or
84
+ # beyond the current label position. This ensures we track the
85
+ # label for the deepest/most specific failing construct.
86
+ #
87
+ # @param lbl [String] label text
88
+ # @param byte_pos [Integer] position in input
89
+ # @return [void]
90
+ #
91
+ def maybe_update_label(lbl, byte_pos)
92
+ return unless byte_pos >= @active_label_pos
93
+
94
+ @active_label_pos = byte_pos
95
+ @active_label_text = lbl
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+ # Error reporter that tracks the deepest (furthest into input) parse failure.
6
+ # Unlike Tree reporter which returns the most recent error, this reporter
7
+ # keeps track of errors at the greatest input position, as these are typically
8
+ # more useful for diagnosing what went wrong.
9
+ #
10
+ # The rationale is that errors occurring later in the input are more likely
11
+ # to represent what the user intended - early failures often represent
12
+ # alternative branches that simply didn't match.
13
+ #
14
+ # @example
15
+ # reporter = Parsanol::ErrorReporter::Deepest.new
16
+ # parser.parse(input, reporter: reporter)
17
+ # # The error cause will be the one at the furthest input position
18
+ #
19
+ # Inspired by "furthest failure" error reporting strategies in parser tools.
20
+ #
21
+ class Deepest < Base
22
+ # @return [Parsanol::Cause, nil] the deepest cause encountered so far
23
+ attr_reader :deepest_cause
24
+
25
+ # Creates a new deepest error reporter.
26
+ #
27
+ def initialize
28
+ @deepest_cause = nil
29
+ end
30
+
31
+ # Records an error at the current source position.
32
+ # Updates the tracked deepest cause if this error is further into input.
33
+ #
34
+ # @param atom [Parsanol::Atoms::Base] atom that failed
35
+ # @param source [Parsanol::Source] input source
36
+ # @param message [String, Array] error message
37
+ # @param children [Array, nil] child error causes
38
+ # @return [Parsanol::Cause] the deepest known error cause
39
+ #
40
+ def err(_atom, source, message, children = nil)
41
+ error_pos = source.pos
42
+ cause = Cause.format(source, error_pos, message, children)
43
+ deepest(cause)
44
+ end
45
+
46
+ # Records an error at a specific source position.
47
+ # Updates the tracked deepest cause if this error is further into input.
48
+ #
49
+ # @param atom [Parsanol::Atoms::Base] atom that failed
50
+ # @param source [Parsanol::Source] input source
51
+ # @param message [String, Array] error message
52
+ # @param pos [Integer] byte position of error
53
+ # @param children [Array, nil] child error causes
54
+ # @return [Parsanol::Cause] the deepest known error cause
55
+ #
56
+ def err_at(_atom, source, message, pos, children = nil)
57
+ cause = Cause.format(source, pos, message, children)
58
+ deepest(cause)
59
+ end
60
+
61
+ # Notification of successful parse (unused in this reporter).
62
+ #
63
+ # @param source [Parsanol::Source] input source
64
+ # @return [nil]
65
+ #
66
+ def succ(_source)
67
+ nil
68
+ end
69
+
70
+ # Evaluates a cause and returns the deepest known cause.
71
+ # If the given cause is deeper than the currently tracked deepest,
72
+ # updates tracking and returns the given cause. Otherwise returns
73
+ # the previously tracked deepest cause.
74
+ #
75
+ # @param cause [Parsanol::Cause] error cause to evaluate
76
+ # @return [Parsanol::Cause] the deepest known cause
77
+ #
78
+ def deepest(cause)
79
+ # Find the deepest leaf in the cause tree
80
+ _, leaf = find_deepest_leaf(cause)
81
+
82
+ # Update tracking if this goes deeper than what we've seen
83
+ if !@deepest_cause || leaf.pos >= @deepest_cause.pos
84
+ @deepest_cause = leaf
85
+ return cause
86
+ end
87
+
88
+ @deepest_cause
89
+ end
90
+
91
+ private
92
+
93
+ # Recursively finds the leaf node with the greatest depth (rank) in
94
+ # the error tree. The deepest leaf is the one furthest from root.
95
+ #
96
+ # @param node [Parsanol::Cause] current node in error tree
97
+ # @param rank [Integer] current depth from root
98
+ # @return [Array<Integer, Parsanol::Cause>] [depth, deepest_leaf]
99
+ #
100
+ def find_deepest_leaf(node, rank = 0)
101
+ best_node = node
102
+ best_rank = rank
103
+
104
+ kids = node.children
105
+ if kids && !kids.empty?
106
+ kids.each do |kid|
107
+ kid_rank, kid_node = find_deepest_leaf(kid, rank + 1)
108
+
109
+ if kid_rank > best_rank
110
+ best_rank = kid_rank
111
+ best_node = kid_node
112
+ end
113
+ end
114
+ end
115
+
116
+ [best_rank, best_node]
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module ErrorReporter
5
+ # Default error reporter that builds a hierarchical tree of failure causes.
6
+ # Each parse failure creates a Cause node that can contain child causes
7
+ # from nested parse attempts.
8
+ #
9
+ # The resulting error tree mirrors the grammar structure, making it easy
10
+ # to understand which parts of the grammar failed and why.
11
+ #
12
+ # @example
13
+ # reporter = Parsanol::ErrorReporter::Tree.new
14
+ # parser.parse(input, reporter: reporter)
15
+ # # On failure, causes are available for inspection
16
+ #
17
+ # Inspired by error tree reporting patterns in parser generators.
18
+ #
19
+ class Tree < Base
20
+ # Records a parse failure at the current source position.
21
+ # Creates a Cause node that may contain child causes from deeper
22
+ # parsing levels.
23
+ #
24
+ # @param parser_atom [Parsanol::Atoms::Base] atom that failed to match
25
+ # @param src [Parsanol::Source] input source being parsed
26
+ # @param msg [String, Array<String>] error description
27
+ # @param nested_errors [Array<Cause>, nil] failures from inner parse attempts
28
+ # @return [Parsanol::Cause] error cause node for this failure
29
+ #
30
+ def err(_parser_atom, src, msg, nested_errors = nil)
31
+ error_pos = src.pos
32
+ Cause.format(src, error_pos, msg, nested_errors)
33
+ end
34
+
35
+ # Records a parse failure at a specific position (not current position).
36
+ # Used when the error occurred at a different location than where it's
37
+ # being reported.
38
+ #
39
+ # @param parser_atom [Parsanol::Atoms::Base] atom that failed to match
40
+ # @param src [Parsanol::Source] input source being parsed
41
+ # @param msg [String, Array<String>] error description
42
+ # @param error_pos [Integer] byte position where error actually occurred
43
+ # @param nested_errors [Array<Cause>, nil] failures from inner parse attempts
44
+ # @return [Parsanol::Cause] error cause node for this failure
45
+ #
46
+ def err_at(_parser_atom, src, msg, error_pos, nested_errors = nil)
47
+ Cause.format(src, error_pos, msg, nested_errors)
48
+ end
49
+
50
+ # Notification that a parse succeeded.
51
+ # Base implementation does nothing - see Contextual reporter for
52
+ # success tracking.
53
+ #
54
+ # @param src [Parsanol::Source] input source being parsed
55
+ # @return [nil]
56
+ #
57
+ def succ(_src)
58
+ # Tree reporter doesn't track successes
59
+ nil
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A namespace for all error reporters.
4
+ #
5
+ # Error reporters collect and format parse errors. The parsing engine
6
+ # calls reporter methods as it attempts to match atoms, building up
7
+ # an error structure that can be presented to the user.
8
+ #
9
+ # @example Using a specific error reporter
10
+ # parser = MyParser.new
11
+ # parser.parse(input, reporter: Parsanol::ErrorReporter::Deepest.new)
12
+ #
13
+ # @example Creating a custom error reporter
14
+ # class MyReporter < Parsanol::ErrorReporter::Base
15
+ # def initialize
16
+ # @errors = []
17
+ # end
18
+ #
19
+ # def err(atom, source, message, children = nil)
20
+ # @errors << { position: source.pos, message: message }
21
+ # @errors.last
22
+ # end
23
+ #
24
+ # def err_at(atom, source, message, pos, children = nil)
25
+ # @errors << { position: pos, message: message }
26
+ # @errors.last
27
+ # end
28
+ # end
29
+ #
30
+ module Parsanol
31
+ module ErrorReporter
32
+ # Base class for error reporters.
33
+ #
34
+ # Error reporters collect and format parse errors. The parsing engine
35
+ # calls reporter methods as it attempts to match atoms, building up
36
+ # an error structure that can be presented to the user.
37
+ #
38
+ # Subclasses must implement {#err} and {#err_at} methods.
39
+ #
40
+ class Base
41
+ # Report an error at the current parse position.
42
+ #
43
+ # @param atom [Parsanol::Atoms::Base] The atom that failed to match
44
+ # @param source [Parsanol::Source] The input source
45
+ # @param message [String, Array<String>] Error message(s)
46
+ # @param children [Array<Cause>, nil] Child errors from deeper levels
47
+ # @return [Object] An error cause object (implementation-specific)
48
+ #
49
+ # @abstract Subclasses must implement this method
50
+ #
51
+ def err(atom, source, message, children = nil)
52
+ raise NotImplementedError,
53
+ 'Error reporters must implement #err(atom, source, message, children)'
54
+ end
55
+
56
+ # Report an error at a specific position.
57
+ #
58
+ # @param atom [Parsanol::Atoms::Base] The atom that failed to match
59
+ # @param source [Parsanol::Source] The input source
60
+ # @param message [String, Array<String>] Error message(s)
61
+ # @param pos [Integer] The byte position of the error
62
+ # @param children [Array<Cause>, nil] Child errors from deeper levels
63
+ # @return [Object] An error cause object (implementation-specific)
64
+ #
65
+ # @abstract Subclasses must implement this method
66
+ #
67
+ def err_at(atom, source, message, pos, children = nil)
68
+ raise NotImplementedError,
69
+ 'Error reporters must implement #err_at(atom, source, message, pos, children)'
70
+ end
71
+
72
+ # Called when an expression successfully parses.
73
+ #
74
+ # This method allows reporters to track successful parses for
75
+ # better error context. The default implementation does nothing.
76
+ #
77
+ # @param source [Parsanol::Source] The input source at success position
78
+ # @return [void]
79
+ #
80
+ def succ(source)
81
+ # Default: no-op
82
+ end
83
+
84
+ # Called after parse completes for finalization.
85
+ #
86
+ # Override this method to perform cleanup or generate final reports.
87
+ # The default implementation does nothing.
88
+ #
89
+ # @return [void]
90
+ #
91
+ def finalize
92
+ # Default: no-op
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+ require 'parsanol/error_reporter/tree'
99
+ require 'parsanol/error_reporter/deepest'
100
+ require 'parsanol/error_reporter/contextual'
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Treetop-style expression parser for Parsanol.
4
+ #
5
+ # This module provides a parser and transform for converting treetop-style
6
+ # expression strings into Parsanol atoms. The implementation is pure Ruby
7
+ # and is not accelerated by the Rust native extension.
8
+ #
9
+ # == Why Pure Ruby?
10
+ #
11
+ # Expression parsing happens at grammar definition time (once per parser class),
12
+ # not during input parsing. The overhead is negligible for typical use cases.
13
+ # The resulting atoms can still be used with Rust-accelerated parsing.
14
+ #
15
+ # == Syntax Reference
16
+ #
17
+ # Expression ::= Alternative ('/' Alternative)*
18
+ # Alternative ::= Sequence+
19
+ # Sequence ::= Occurrence+
20
+ # Occurrence ::= Atom ('?' | '*' | '+' | '{min,max}')?
21
+ # Atom ::= '(' Expression ')' | '.' | String | CharClass
22
+ # String ::= "'" (escape | [^'])* "'"
23
+ # CharClass ::= '[' (escape | [^'])* ']'
24
+ #
25
+ # @note Whitespace is required before operators: 'a' ? not 'a'?
26
+ #
27
+ module Parsanol
28
+ class Expression
29
+ module Treetop
30
+ # Parser for treetop-style expression strings.
31
+ #
32
+ # Parses expressions like "'a' 'b' ?" and produces a parse tree
33
+ # that can be transformed into Parsanol atoms.
34
+ #
35
+ # @example
36
+ # parser = Parser.new
37
+ # tree = parser.parse("'a' / 'b'")
38
+ # # => {:alt=>[{:seq=>[{:string=>"a"}]}, {:seq=>[{:string=>"b"}]}]}
39
+ #
40
+ class Parser < Parsanol::Parser
41
+ root(:expression)
42
+
43
+ rule(:expression) { alternatives }
44
+
45
+ # Alternative: 'a' / 'b'
46
+ rule(:alternatives) do
47
+ (simple >> (spaced('/') >> simple).repeat).as(:alt)
48
+ end
49
+
50
+ # Sequence by concatenation: 'a' 'b'
51
+ rule(:simple) { occurrence.repeat(1).as(:seq) }
52
+
53
+ # Occurrence modifiers: ?, *, +, {min,max}
54
+ rule(:occurrence) do
55
+ (atom.as(:repetition) >> spaced('*').as(:sign)) |
56
+ (atom.as(:repetition) >> spaced('+').as(:sign)) |
57
+ (atom.as(:repetition) >> repetition_spec) |
58
+ (atom.as(:maybe) >> spaced('?')) |
59
+ atom
60
+ end
61
+
62
+ rule(:atom) do
63
+ (spaced('(') >> expression.as(:unwrap) >> spaced(')')) |
64
+ dot |
65
+ string |
66
+ char_class
67
+ end
68
+
69
+ # Character class: [a-z], [0-9], etc.
70
+ rule(:char_class) do
71
+ (str('[') >>
72
+ ((str('\\') >> any) | (str(']').absent? >> any)).repeat(1) >>
73
+ str(']')).as(:match) >> space?
74
+ end
75
+
76
+ # Any character: .
77
+ rule(:dot) { spaced('.').as(:any) }
78
+
79
+ # String literal: 'hello'
80
+ rule(:string) do
81
+ str("'") >>
82
+ ((str('\\') >> any) | (str("'").absent? >> any)).repeat.as(:string) >>
83
+ str("'") >> space?
84
+ end
85
+
86
+ # Repetition specification: {1,3}, {2,}, {,5}
87
+ rule(:repetition_spec) do
88
+ spaced('{') >>
89
+ integer.maybe.as(:min) >> spaced(',') >>
90
+ integer.maybe.as(:max) >> spaced('}')
91
+ end
92
+
93
+ rule(:integer) do
94
+ match['0-9'].repeat(1)
95
+ end
96
+
97
+ # Whitespace handling
98
+ rule(:space) { match('\s').repeat(1) }
99
+ rule(:space?) { space.maybe }
100
+
101
+ # Helper: match string followed by optional whitespace
102
+ def spaced(str)
103
+ str(str) >> space?
104
+ end
105
+ end
106
+
107
+ # Transform for converting parse trees to Parsanol atoms.
108
+ #
109
+ # @example
110
+ # tree = {:seq=>[{:string=>"a"}, {:string=>"b"}]}
111
+ # transform = Transform.new
112
+ # atom = transform.apply(tree)
113
+ # # => Sequence.new([Str.new('a'), Str.new('b')])
114
+ #
115
+ class Transform < Parsanol::Transform
116
+ # Repetition with sign: * (zero+) or + (one+)
117
+ rule(repetition: simple(:rep), sign: simple(:sign)) do
118
+ min = sign == '+' ? 1 : 0
119
+ Parsanol::Atoms::Repetition.new(rep, min, nil)
120
+ end
121
+
122
+ # Repetition with bounds: {min,max}
123
+ rule(repetition: simple(:rep), min: simple(:min), max: simple(:max)) do
124
+ Parsanol::Atoms::Repetition.new(
125
+ rep,
126
+ Integer(min || 0),
127
+ (max && Integer(max)) || nil
128
+ )
129
+ end
130
+
131
+ # Alternative: a / b
132
+ rule(alt: subtree(:alt)) { Parsanol::Atoms::Alternative.new(*alt) }
133
+
134
+ # Sequence: a b
135
+ rule(seq: sequence(:s)) { Parsanol::Atoms::Sequence.new(*s) }
136
+
137
+ # Unwrap parentheses
138
+ rule(unwrap: simple(:u)) { u }
139
+
140
+ # Optional: a ?
141
+ rule(maybe: simple(:m)) { |d| d[:m].maybe }
142
+
143
+ # String literal
144
+ rule(string: simple(:s)) { Parsanol::Atoms::Str.new(s) }
145
+
146
+ # Character class
147
+ rule(match: simple(:m)) { Parsanol::Atoms::Re.new("[#{m}]") }
148
+
149
+ # Any character: .
150
+ rule(any: simple(:_a)) { Parsanol::Atoms::Re.new('.') }
151
+ end
152
+ end
153
+ end
154
+ end