parsanol 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +546 -0
  3. data/Cargo.toml +9 -0
  4. data/HISTORY.txt +12 -0
  5. data/LICENSE +23 -0
  6. data/README.adoc +487 -0
  7. data/Rakefile +135 -0
  8. data/ext/parsanol_native/Cargo.toml +34 -0
  9. data/ext/parsanol_native/extconf.rb +15 -0
  10. data/ext/parsanol_native/src/lib.rs +17 -0
  11. data/lib/parsanol/ast_visitor.rb +122 -0
  12. data/lib/parsanol/atoms/alternative.rb +122 -0
  13. data/lib/parsanol/atoms/base.rb +202 -0
  14. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  15. data/lib/parsanol/atoms/capture.rb +38 -0
  16. data/lib/parsanol/atoms/context.rb +334 -0
  17. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  18. data/lib/parsanol/atoms/custom.rb +110 -0
  19. data/lib/parsanol/atoms/cut.rb +66 -0
  20. data/lib/parsanol/atoms/dsl.rb +96 -0
  21. data/lib/parsanol/atoms/dynamic.rb +39 -0
  22. data/lib/parsanol/atoms/entity.rb +75 -0
  23. data/lib/parsanol/atoms/ignored.rb +37 -0
  24. data/lib/parsanol/atoms/infix.rb +162 -0
  25. data/lib/parsanol/atoms/lookahead.rb +82 -0
  26. data/lib/parsanol/atoms/named.rb +74 -0
  27. data/lib/parsanol/atoms/re.rb +83 -0
  28. data/lib/parsanol/atoms/repetition.rb +259 -0
  29. data/lib/parsanol/atoms/scope.rb +35 -0
  30. data/lib/parsanol/atoms/sequence.rb +194 -0
  31. data/lib/parsanol/atoms/str.rb +103 -0
  32. data/lib/parsanol/atoms/visitor.rb +91 -0
  33. data/lib/parsanol/atoms.rb +46 -0
  34. data/lib/parsanol/buffer.rb +133 -0
  35. data/lib/parsanol/builder_callbacks.rb +353 -0
  36. data/lib/parsanol/cause.rb +122 -0
  37. data/lib/parsanol/context.rb +39 -0
  38. data/lib/parsanol/convenience.rb +36 -0
  39. data/lib/parsanol/edit_tracker.rb +111 -0
  40. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  41. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  42. data/lib/parsanol/error_reporter/tree.rb +63 -0
  43. data/lib/parsanol/error_reporter.rb +100 -0
  44. data/lib/parsanol/expression/treetop.rb +154 -0
  45. data/lib/parsanol/expression.rb +106 -0
  46. data/lib/parsanol/fast_mode.rb +149 -0
  47. data/lib/parsanol/first_set.rb +79 -0
  48. data/lib/parsanol/grammar_builder.rb +177 -0
  49. data/lib/parsanol/incremental_parser.rb +177 -0
  50. data/lib/parsanol/interval_tree.rb +217 -0
  51. data/lib/parsanol/lazy_result.rb +179 -0
  52. data/lib/parsanol/lexer.rb +144 -0
  53. data/lib/parsanol/mermaid.rb +139 -0
  54. data/lib/parsanol/native/parser.rb +612 -0
  55. data/lib/parsanol/native/serializer.rb +248 -0
  56. data/lib/parsanol/native/transformer.rb +435 -0
  57. data/lib/parsanol/native/types.rb +42 -0
  58. data/lib/parsanol/native.rb +217 -0
  59. data/lib/parsanol/optimizer.rb +85 -0
  60. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  61. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  62. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  63. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  64. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  65. data/lib/parsanol/options/ruby_transform.rb +107 -0
  66. data/lib/parsanol/options/serialized.rb +94 -0
  67. data/lib/parsanol/options/zero_copy.rb +128 -0
  68. data/lib/parsanol/options.rb +20 -0
  69. data/lib/parsanol/parallel.rb +133 -0
  70. data/lib/parsanol/parser.rb +182 -0
  71. data/lib/parsanol/parslet.rb +151 -0
  72. data/lib/parsanol/pattern/binding.rb +91 -0
  73. data/lib/parsanol/pattern.rb +159 -0
  74. data/lib/parsanol/pool.rb +219 -0
  75. data/lib/parsanol/pools/array_pool.rb +75 -0
  76. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  77. data/lib/parsanol/pools/position_pool.rb +92 -0
  78. data/lib/parsanol/pools/slice_pool.rb +64 -0
  79. data/lib/parsanol/position.rb +94 -0
  80. data/lib/parsanol/resettable.rb +29 -0
  81. data/lib/parsanol/result.rb +46 -0
  82. data/lib/parsanol/result_builder.rb +208 -0
  83. data/lib/parsanol/result_stream.rb +261 -0
  84. data/lib/parsanol/rig/rspec.rb +71 -0
  85. data/lib/parsanol/rope.rb +81 -0
  86. data/lib/parsanol/scope.rb +104 -0
  87. data/lib/parsanol/slice.rb +146 -0
  88. data/lib/parsanol/source/line_cache.rb +109 -0
  89. data/lib/parsanol/source.rb +180 -0
  90. data/lib/parsanol/source_location.rb +167 -0
  91. data/lib/parsanol/streaming_parser.rb +124 -0
  92. data/lib/parsanol/string_view.rb +195 -0
  93. data/lib/parsanol/transform.rb +226 -0
  94. data/lib/parsanol/version.rb +5 -0
  95. data/lib/parsanol/wasm/README.md +80 -0
  96. data/lib/parsanol/wasm/package.json +51 -0
  97. data/lib/parsanol/wasm/parsanol.js +252 -0
  98. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  99. data/lib/parsanol/wasm_parser.rb +240 -0
  100. data/lib/parsanol.rb +280 -0
  101. data/parsanol-ruby.gemspec +67 -0
  102. metadata +293 -0
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base class for AST visitors following the Visitor pattern
4
+ # This separates tree traversal logic from transformation logic
5
+ # making the code more maintainable and extensible.
6
+ module Parsanol
7
+ # Base visitor class that traverses the Parslet AST
8
+ # Subclasses override visit_* methods to perform transformations
9
+ class ASTVisitor
10
+ # Visit a parslet and its children
11
+ # Subclasses should override specific visit_* methods
12
+ # @param parslet [Parsanol::Atoms::Base] parslet to visit
13
+ # @return [Parsanol::Atoms::Base] transformed parslet
14
+ def visit(parslet)
15
+ case parslet
16
+ when Parsanol::Atoms::Sequence
17
+ visit_sequence(parslet)
18
+ when Parsanol::Atoms::Alternative
19
+ visit_alternative(parslet)
20
+ when Parsanol::Atoms::Repetition
21
+ visit_repetition(parslet)
22
+ when Parsanol::Atoms::Lookahead
23
+ visit_lookahead(parslet)
24
+ when Parsanol::Atoms::Named
25
+ visit_named(parslet)
26
+ when Parsanol::Atoms::Str
27
+ visit_str(parslet)
28
+ when Parsanol::Atoms::Re
29
+ visit_re(parslet)
30
+ else
31
+ # Leaf nodes or unknown types - return as-is
32
+ parslet
33
+ end
34
+ end
35
+
36
+ # Visit a sequence node
37
+ # Default implementation visits children and reconstructs if changed
38
+ # @param parslet [Parsanol::Atoms::Sequence] sequence to visit
39
+ # @return [Parsanol::Atoms::Base] transformed sequence
40
+ def visit_sequence(parslet)
41
+ new_parslets = parslet.parslets.map { |p| visit(p) }
42
+ if new_parslets == parslet.parslets
43
+ parslet
44
+ else
45
+ Parsanol::Atoms::Sequence.new(*new_parslets)
46
+ end
47
+ end
48
+
49
+ # Visit an alternative node
50
+ # Default implementation visits children and reconstructs if changed
51
+ # @param parslet [Parsanol::Atoms::Alternative] alternative to visit
52
+ # @return [Parsanol::Atoms::Base] transformed alternative
53
+ def visit_alternative(parslet)
54
+ new_alternatives = parslet.alternatives.map { |p| visit(p) }
55
+ if new_alternatives == parslet.alternatives
56
+ parslet
57
+ else
58
+ Parsanol::Atoms::Alternative.new(*new_alternatives)
59
+ end
60
+ end
61
+
62
+ # Visit a repetition node
63
+ # Default implementation visits child and reconstructs if changed
64
+ # @param parslet [Parsanol::Atoms::Repetition] repetition to visit
65
+ # @return [Parsanol::Atoms::Base] transformed repetition
66
+ def visit_repetition(parslet)
67
+ new_parslet = visit(parslet.parslet)
68
+ if new_parslet.equal?(parslet.parslet)
69
+ parslet
70
+ else
71
+ Parsanol::Atoms::Repetition.new(
72
+ new_parslet,
73
+ parslet.min,
74
+ parslet.max,
75
+ parslet.instance_variable_get(:@tag)
76
+ )
77
+ end
78
+ end
79
+
80
+ # Visit a lookahead node
81
+ # Default implementation visits child and reconstructs if changed
82
+ # @param parslet [Parsanol::Atoms::Lookahead] lookahead to visit
83
+ # @return [Parsanol::Atoms::Base] transformed lookahead
84
+ def visit_lookahead(parslet)
85
+ new_bound = visit(parslet.bound_parslet)
86
+ if new_bound.equal?(parslet.bound_parslet)
87
+ parslet
88
+ else
89
+ Parsanol::Atoms::Lookahead.new(new_bound, parslet.positive)
90
+ end
91
+ end
92
+
93
+ # Visit a named node
94
+ # Default implementation visits child and reconstructs if changed
95
+ # @param parslet [Parsanol::Atoms::Named] named to visit
96
+ # @return [Parsanol::Atoms::Base] transformed named
97
+ def visit_named(parslet)
98
+ new_parslet = visit(parslet.parslet)
99
+ if new_parslet.equal?(parslet.parslet)
100
+ parslet
101
+ else
102
+ Parsanol::Atoms::Named.new(new_parslet, parslet.name)
103
+ end
104
+ end
105
+
106
+ # Visit a string literal node
107
+ # Default implementation returns as-is (leaf node)
108
+ # @param parslet [Parsanol::Atoms::Str] string to visit
109
+ # @return [Parsanol::Atoms::Base] transformed string
110
+ def visit_str(parslet)
111
+ parslet
112
+ end
113
+
114
+ # Visit a regex node
115
+ # Default implementation returns as-is (leaf node)
116
+ # @param parslet [Parsanol::Atoms::Re] regex to visit
117
+ # @return [Parsanol::Atoms::Base] transformed regex
118
+ def visit_re(parslet)
119
+ parslet
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Ordered choice - tries alternatives left-to-right, returning first success.
4
+ # Fails only if all alternatives fail.
5
+ #
6
+ # @example Simple choice
7
+ # str('a') | str('b') # matches 'a' or 'b'
8
+ #
9
+ # This is PEG ordered choice - no backtracking to later alternatives.
10
+ #
11
+ module Parsanol
12
+ module Atoms
13
+ class Alternative < Parsanol::Atoms::Base
14
+ # @return [Array<Parsanol::Atoms::Base>] alternative parsers
15
+ attr_reader :alternatives
16
+
17
+ # Creates a new choice.
18
+ #
19
+ # @param options [Array<Parsanol::Atoms::Base>] alternatives
20
+ def initialize(*options)
21
+ super()
22
+ @alternatives = options
23
+ @choice_error = "Expected one of #{options.inspect}"
24
+ end
25
+
26
+ # Adds an alternative with flattening.
27
+ #
28
+ # @param parser [Parsanol::Atoms::Base] new alternative
29
+ # @return [Parsanol::Atoms::Alternative] flattened choice
30
+ def |(other)
31
+ expanded = if other.is_a?(Parsanol::Atoms::Alternative)
32
+ @alternatives + other.alternatives
33
+ else
34
+ @alternatives + [other]
35
+ end
36
+ self.class.new(*expanded)
37
+ end
38
+
39
+ # Tries each alternative in order.
40
+ #
41
+ # @param source [Parsanol::Source] input
42
+ # @param context [Parsanol::Atoms::Context] context
43
+ # @param consume_all [Boolean] require full consumption
44
+ # @return [Array(Boolean, Object)] result
45
+ def try(source, context, consume_all)
46
+ options = @alternatives
47
+ count = options.size
48
+
49
+ # Optimized paths for common sizes
50
+ case count
51
+ when 2
52
+ try_two(options[0], options[1], source, context, consume_all)
53
+ when 3
54
+ try_three(options[0], options[1], options[2], source, context, consume_all)
55
+ else
56
+ try_many(options, source, context, consume_all)
57
+ end
58
+ end
59
+
60
+ precedence CHOICE
61
+
62
+ # String representation.
63
+ #
64
+ # @param prec [Integer] precedence
65
+ # @return [String]
66
+ def to_s_inner(prec)
67
+ @alternatives.map { |a| a.to_s(prec) }.join(' / ')
68
+ end
69
+
70
+ # FIRST set is union of all alternatives' FIRST sets.
71
+ #
72
+ # @return [Set]
73
+ def compute_first_set
74
+ return Set.new if @alternatives.empty?
75
+
76
+ @alternatives.map(&:first_set).reduce(&:union)
77
+ end
78
+
79
+ private
80
+
81
+ # Two-alternative fast path
82
+ def try_two(a1, a2, source, context, consume_all)
83
+ success, value1 = a1.apply(source, context, consume_all)
84
+ return [success, value1] if success
85
+
86
+ success, value2 = a2.apply(source, context, consume_all)
87
+ return [success, value2] if success
88
+
89
+ context.err(self, source, @choice_error, [value1, value2])
90
+ end
91
+
92
+ # Three-alternative fast path
93
+ def try_three(a1, a2, a3, source, context, consume_all)
94
+ success, value1 = a1.apply(source, context, consume_all)
95
+ return [success, value1] if success
96
+
97
+ success, value2 = a2.apply(source, context, consume_all)
98
+ return [success, value2] if success
99
+
100
+ success, value3 = a3.apply(source, context, consume_all)
101
+ return [success, value3] if success
102
+
103
+ context.err(self, source, @choice_error, [value1, value2, value3])
104
+ end
105
+
106
+ # General case for N alternatives
107
+ def try_many(options, source, context, consume_all)
108
+ errors = nil
109
+
110
+ options.each do |alt|
111
+ success, value = alt.apply(source, context, consume_all)
112
+ return [success, value] if success
113
+
114
+ errors ||= []
115
+ errors << value
116
+ end
117
+
118
+ context.err(self, source, @choice_error, errors)
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base class for all parser atoms. Handles parsing orchestration,
4
+ # memoization, error handling, and result processing.
5
+ #
6
+ # Concrete atoms must implement #try(source, context, consume_all).
7
+ #
8
+ # @abstract Implement #try to create custom parser atoms
9
+ module Parsanol
10
+ module Atoms
11
+ class Base
12
+ include Parsanol::Atoms::Precedence
13
+ include Parsanol::Atoms::DSL
14
+ include Parsanol::Atoms::CanFlatten
15
+ include Parsanol::FirstSet
16
+
17
+ # Label used for error messages (optional)
18
+ attr_accessor :label
19
+
20
+ # Error message for unconsumed input
21
+ UNCONSUMED_INPUT_MSG = "Don't know what to do with "
22
+
23
+ # Primary parsing interface. Takes a string or Source and returns
24
+ # the parsed tree, or raises ParseFailed on error.
25
+ #
26
+ # @param source [String, Parsanol::Source] input to parse
27
+ # @param options [Hash] parsing options
28
+ # @option options [Parsanol::ErrorReporter] :reporter error collector
29
+ # @option options [Boolean] :prefix allow partial parse (default: false)
30
+ # @return [Object] the parsed result
31
+ # @raise [Parsanol::ParseFailed] on parse failure
32
+ def parse(source, options = {})
33
+ input = normalize_input(source)
34
+ must_consume_all = !options[:prefix]
35
+
36
+ # Initial parse attempt (no error collection)
37
+ success, value = run_with_context(input, nil, must_consume_all)
38
+ return finalize_result(value) if success
39
+
40
+ # Reparse with error reporting for diagnostics
41
+ report_detailed_error(input, must_consume_all, options[:reporter], value)
42
+ end
43
+
44
+ # Creates a new parsing context and executes the atom.
45
+ #
46
+ # @param input [Parsanol::Source] the source
47
+ # @param reporter [Object, nil] error reporter
48
+ # @param consume_all [Boolean] require complete consumption
49
+ # @return [Array(Boolean, Object)] outcome tuple
50
+ def run_with_context(input, reporter, consume_all)
51
+ parser_class = detect_parser_class
52
+ context = Parsanol::Atoms::Context.new(reporter, parser_class: parser_class)
53
+ apply(input, context, consume_all)
54
+ end
55
+
56
+ # Core execution method. Manages position, caching, and error handling.
57
+ #
58
+ # @param input [Parsanol::Source] source to parse
59
+ # @param context [Parsanol::Atoms::Context] parsing state
60
+ # @param consume_all [Boolean] consume entire input
61
+ # @return [Array(Boolean, Object)] outcome pair
62
+ def apply(input, context, consume_all = false)
63
+ position_before = input.bytepos
64
+ outcome = context.try_with_cache(self, input, consume_all)
65
+ succeeded = outcome.first
66
+
67
+ return handle_failure(input, position_before, outcome) unless succeeded
68
+
69
+ context.succ(input)
70
+
71
+ # Verify full consumption when required
72
+ return unconsumed_error(input, context, position_before) if consume_all && input.chars_left.positive?
73
+
74
+ outcome
75
+ end
76
+
77
+ # Abstract matching method - override in subclasses.
78
+ #
79
+ # @param input [Parsanol::Source] source
80
+ # @param context [Parsanol::Atoms::Context] context
81
+ # @param consume_all [Boolean] consume all flag
82
+ # @return [Array(Boolean, Object)] parse result
83
+ # @raise [NotImplementedError] if not overridden
84
+ def try(input, context, consume_all)
85
+ raise NotImplementedError,
86
+ 'Atom must implement #try(source, context, consume_all)'
87
+ end
88
+
89
+ # Whether packrat caching benefits this atom.
90
+ # Override to disable caching for simple atoms.
91
+ #
92
+ # @return [Boolean]
93
+ def cached?
94
+ true
95
+ end
96
+
97
+ # Whether this atom produces flat results.
98
+ # When true, flattening can be skipped.
99
+ #
100
+ # @return [Boolean]
101
+ def flat?
102
+ false
103
+ end
104
+
105
+ # DSL for setting precedence level (for pretty-printing).
106
+ #
107
+ # @param level [Integer] precedence value
108
+ def self.precedence(level)
109
+ define_method(:precedence) { level }
110
+ end
111
+ precedence ATOM
112
+
113
+ # String representation with precedence-aware parenthesization.
114
+ #
115
+ # @param outer [Integer] caller's precedence
116
+ # @return [String]
117
+ def to_s(outer = TOP)
118
+ text = label || to_s_inner(precedence)
119
+ outer < precedence ? "(#{text})" : text
120
+ end
121
+
122
+ def inspect
123
+ to_s(TOP)
124
+ end
125
+
126
+ protected
127
+
128
+ # Pre-allocated constant result tuples
129
+ NIL_OK = [true, nil].freeze
130
+ EMPTY_ARR = [].freeze
131
+ REP_TAG = [:repetition].freeze
132
+ REP_OK = [true, REP_TAG].freeze
133
+ SEQ_TAG = [:sequence].freeze
134
+ SEQ_OK = [true, SEQ_TAG].freeze
135
+ EMPTY_MAP = {}.freeze
136
+ MAP_OK = [true, EMPTY_MAP].freeze
137
+ CAP_TAG = [:capture].freeze
138
+ CAP_OK = [true, CAP_TAG].freeze
139
+
140
+ # Creates a success tuple.
141
+ #
142
+ # @param data [Object] the value
143
+ # @return [Array(true, Object)]
144
+ def ok(data)
145
+ return NIL_OK if data.nil?
146
+ return [true, EMPTY_ARR] if data.equal?(EMPTY_ARR)
147
+ return MAP_OK if data.equal?(EMPTY_MAP)
148
+ return REP_OK if data.equal?(REP_TAG)
149
+ return SEQ_OK if data.equal?(SEQ_TAG)
150
+ return CAP_OK if data.equal?(CAP_TAG)
151
+
152
+ [true, data]
153
+ end
154
+
155
+ # Alias for ok (legacy compatibility)
156
+ alias succ ok
157
+
158
+ private
159
+
160
+ # Converts raw input to Source if needed.
161
+ def normalize_input(source)
162
+ source.respond_to?(:line_and_column) ? source : Parsanol::Source.new(source)
163
+ end
164
+
165
+ # Detects if we're in a Parser context.
166
+ def detect_parser_class
167
+ is_a?(Parsanol::Parser) ? self.class : nil
168
+ end
169
+
170
+ # Handles parse failure by restoring position.
171
+ def handle_failure(input, saved_pos, outcome)
172
+ input.bytepos = saved_pos
173
+ outcome
174
+ end
175
+
176
+ # Creates error for unconsumed input.
177
+ def unconsumed_error(input, context, saved_pos)
178
+ excess_pos = input.bytepos
179
+ preview = input.consume(10)
180
+ input.bytepos = saved_pos
181
+ context.err_at(self, input, UNCONSUMED_INPUT_MSG + preview.to_s.inspect, excess_pos)
182
+ end
183
+
184
+ # Reports detailed error by reparsing with reporter.
185
+ def report_detailed_error(input, consume_all, reporter, _initial_error)
186
+ input.bytepos = 0
187
+ error_reporter = reporter || Parsanol::ErrorReporter::Tree.new
188
+ success, cause = run_with_context(input, error_reporter, consume_all)
189
+
190
+ # Second parse should also fail
191
+ raise 'Invariant violation: parse succeeded during error reporting' if success
192
+
193
+ cause.raise
194
+ end
195
+
196
+ # Finalizes result by flattening.
197
+ def finalize_result(value)
198
+ flatten(value)
199
+ end
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ module Atoms
5
+ # A series of helper functions that have the common topic of flattening
6
+ # result values into the intermediary tree that consists of Ruby Hashes and
7
+ # Arrays.
8
+ #
9
+ # This module has one main function, #flatten, that takes an annotated
10
+ # structure as input and returns the reduced form that users expect from
11
+ # Atom#parse.
12
+ #
13
+ # NOTE: Since all of these functions are just that, functions without
14
+ # side effects, they are in a module and not in a class. Its hard to draw
15
+ # the line sometimes, but this is beyond.
16
+ #
17
+ module CanFlatten
18
+ # Takes a mixed value coming out of a parslet and converts it to a return
19
+ # value for the user by dropping things and merging hashes.
20
+ #
21
+ # Named is set to true if this result will be embedded in a Hash result from
22
+ # naming something using <code>.as(...)</code>. It changes the folding
23
+ # semantics of repetition.
24
+ #
25
+ def flatten(value, named = false)
26
+ # Passes through everything that isn't an array of things
27
+ # Phase 43: Use simpler check - if it's not an Array, return as-is
28
+ return value unless value.is_a?(Array)
29
+
30
+ # Extracts the s-expression tag
31
+ tag = value[0]
32
+
33
+ # Phase 43: Optimize flattening - reduce method call overhead
34
+ # For single element arrays (common case), handle directly
35
+ tail_size = value.size - 1
36
+ if tail_size == 1
37
+ flattened = flatten(value[1])
38
+ case tag
39
+ when :sequence
40
+ return flattened
41
+ when :maybe
42
+ return named ? flattened : (flattened || '')
43
+ when :repetition
44
+ return flatten_repetition([flattened], named)
45
+ end
46
+ end
47
+
48
+ # Flatten each element
49
+ result = Array.new(tail_size)
50
+ i = 0
51
+ while i < tail_size
52
+ result[i] = flatten(value[i + 1])
53
+ i += 1
54
+ end
55
+
56
+ case tag
57
+ when :sequence
58
+ return flatten_sequence(result)
59
+ when :maybe
60
+ return named ? result.first : result.first || ''
61
+ when :repetition
62
+ return flatten_repetition(result, named)
63
+ end
64
+
65
+ raise "BUG: Unknown tag #{tag.inspect}."
66
+ end
67
+
68
+ # Lisp style fold left where the first element builds the basis for
69
+ # an inject. Optimized with early return and reduced method calls.
70
+ #
71
+ def foldl(list, &block)
72
+ len = list.size
73
+ return '' if len.zero?
74
+ return list[0] if len == 1 # Fast path for single element
75
+
76
+ result = list[0]
77
+ i = 1
78
+ while i < len
79
+ result = block.call(result, list[i])
80
+ i += 1
81
+ end
82
+ result
83
+ end
84
+
85
+ # Flatten results from a sequence of parslets.
86
+ #
87
+ # @api private
88
+ #
89
+ def flatten_sequence(list)
90
+ foldl(list.compact) do |r, e| # and then merge flat elements
91
+ merge_fold(r, e)
92
+ end
93
+ end
94
+
95
+ # @api private
96
+ # Phase 43: Optimized merge_fold - reduce repeated class checks
97
+ def merge_fold(l, r)
98
+ l_class = l.class
99
+ r_class = r.class
100
+
101
+ # equal pairs: merge. ----------------------------------------------------
102
+ if l_class == r_class
103
+ return l + r unless l_class == Hash
104
+
105
+ warn_about_duplicate_keys(l, r)
106
+ return l.merge(r)
107
+
108
+ end
109
+
110
+ # Phase 43: Cache instance_of? checks to avoid repeated method calls
111
+ # unequal pairs: hoist to same level. ------------------------------------
112
+ l_is_slice = l.instance_of?(Parsanol::Slice)
113
+ r_is_slice = r.instance_of?(Parsanol::Slice)
114
+ l_is_str = l_class == String || l_is_slice
115
+ r_is_str = r_class == String || r_is_slice
116
+
117
+ # Maybe classes are not equal, but both are stringlike?
118
+ if l_is_str && r_is_str
119
+ # if we're merging a String with a Slice, the slice wins.
120
+ return r if r_is_slice
121
+ return l if l_is_slice
122
+
123
+ raise 'NOTREACHED: What other stringlike classes are there?'
124
+ end
125
+
126
+ # special case: If one of them is a string/slice, the other is more important
127
+ return l if r_is_str
128
+ return r if l_is_str
129
+
130
+ # otherwise just create an array for one of them to live in
131
+ return l + [r] if r_class == Hash
132
+ return [l] + r if l_class == Hash
133
+
134
+ raise "Unhandled case when foldr'ing sequence."
135
+ end
136
+
137
+ # Flatten results from a repetition of a single parslet. named indicates
138
+ # whether the user has named the result or not. If the user has named
139
+ # the results, we want to leave an empty list alone - otherwise it is
140
+ # turned into an empty string.
141
+ #
142
+ # @api private
143
+ #
144
+ # Phase 43: Optimized flatten_repetition - reduce array iterations
145
+ def flatten_repetition(list, named)
146
+ # Phase 43: Single pass to check for hashes and arrays
147
+ has_hash = false
148
+ has_array = false
149
+
150
+ i = 0
151
+ len = list.size
152
+ while i < len
153
+ e = list[i]
154
+ has_hash = true if e.instance_of?(Hash)
155
+ has_array = true if e.instance_of?(Array)
156
+ break if has_hash && has_array # Early exit if both found
157
+
158
+ i += 1
159
+ end
160
+
161
+ if has_hash
162
+ # If keyed subtrees are in the array, we'll want to discard all
163
+ # strings inbetween. To keep them, name them.
164
+ return list.select { |e| e.instance_of?(Hash) }
165
+ end
166
+
167
+ if has_array
168
+ # If any arrays are nested in this array, flatten all arrays to this
169
+ # level.
170
+ return list
171
+ .select { |e| e.instance_of?(Array) }
172
+ .flatten(1)
173
+ end
174
+
175
+ # Consistent handling of empty lists, when we act on a named result
176
+ return [] if named && list.empty?
177
+
178
+ # If there are only strings, concatenate them and return that.
179
+ foldl(list.compact) { |s, e| s + e }
180
+ end
181
+
182
+ # That annoying warning 'Duplicate subtrees while merging result' comes
183
+ # from here. You should add more '.as(...)' names to your intermediary tree.
184
+ #
185
+ def warn_about_duplicate_keys(h1, h2)
186
+ d = h1.keys & h2.keys
187
+ return if d.empty?
188
+
189
+ warn "Duplicate subtrees while merging result of \n #{inspect}\nonly the values " \
190
+ "of the latter will be kept. (keys: #{d.inspect})"
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Captures the result of parsing and stores it for later use.
4
+ # Use the capture method to capture a sub-expression result, then
5
+ # access it via context.captures[:name] in dynamic blocks.
6
+ #
7
+ # @example
8
+ # str('a').capture(:first) >> dynamic { |ctx| str(ctx.captures[:first]) }
9
+ #
10
+ module Parsanol
11
+ module Atoms
12
+ class Capture < Parsanol::Atoms::Base
13
+ attr_reader :inner_atom, :capture_key
14
+
15
+ def initialize(atom, name)
16
+ super()
17
+ @inner_atom = atom
18
+ @capture_key = name.to_sym
19
+ end
20
+
21
+ def apply(source, context, consume_all)
22
+ success, result = @inner_atom.apply(source, context, consume_all)
23
+
24
+ if success
25
+ # Flatten and store the captured value in context
26
+ flattened = flatten(result)
27
+ context.captures[@capture_key] = flattened
28
+ end
29
+
30
+ [success, result]
31
+ end
32
+
33
+ def to_s_inner(prec)
34
+ "(#{@capture_key.inspect} = #{@inner_atom.to_s(prec)})"
35
+ end
36
+ end
37
+ end
38
+ end