parsanol 1.0.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +12 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +487 -0
  5. data/Rakefile +135 -0
  6. data/lib/parsanol/3.2/parsanol_native.so +0 -0
  7. data/lib/parsanol/3.3/parsanol_native.so +0 -0
  8. data/lib/parsanol/3.4/parsanol_native.so +0 -0
  9. data/lib/parsanol/4.0/parsanol_native.so +0 -0
  10. data/lib/parsanol/ast_visitor.rb +122 -0
  11. data/lib/parsanol/atoms/alternative.rb +122 -0
  12. data/lib/parsanol/atoms/base.rb +202 -0
  13. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  14. data/lib/parsanol/atoms/capture.rb +38 -0
  15. data/lib/parsanol/atoms/context.rb +334 -0
  16. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  17. data/lib/parsanol/atoms/custom.rb +110 -0
  18. data/lib/parsanol/atoms/cut.rb +66 -0
  19. data/lib/parsanol/atoms/dsl.rb +96 -0
  20. data/lib/parsanol/atoms/dynamic.rb +39 -0
  21. data/lib/parsanol/atoms/entity.rb +75 -0
  22. data/lib/parsanol/atoms/ignored.rb +37 -0
  23. data/lib/parsanol/atoms/infix.rb +162 -0
  24. data/lib/parsanol/atoms/lookahead.rb +82 -0
  25. data/lib/parsanol/atoms/named.rb +74 -0
  26. data/lib/parsanol/atoms/re.rb +83 -0
  27. data/lib/parsanol/atoms/repetition.rb +259 -0
  28. data/lib/parsanol/atoms/scope.rb +35 -0
  29. data/lib/parsanol/atoms/sequence.rb +194 -0
  30. data/lib/parsanol/atoms/str.rb +103 -0
  31. data/lib/parsanol/atoms/visitor.rb +91 -0
  32. data/lib/parsanol/atoms.rb +46 -0
  33. data/lib/parsanol/buffer.rb +133 -0
  34. data/lib/parsanol/builder_callbacks.rb +353 -0
  35. data/lib/parsanol/cause.rb +122 -0
  36. data/lib/parsanol/context.rb +39 -0
  37. data/lib/parsanol/convenience.rb +36 -0
  38. data/lib/parsanol/edit_tracker.rb +111 -0
  39. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  40. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  41. data/lib/parsanol/error_reporter/tree.rb +63 -0
  42. data/lib/parsanol/error_reporter.rb +100 -0
  43. data/lib/parsanol/expression/treetop.rb +154 -0
  44. data/lib/parsanol/expression.rb +106 -0
  45. data/lib/parsanol/fast_mode.rb +149 -0
  46. data/lib/parsanol/first_set.rb +79 -0
  47. data/lib/parsanol/grammar_builder.rb +177 -0
  48. data/lib/parsanol/incremental_parser.rb +177 -0
  49. data/lib/parsanol/interval_tree.rb +217 -0
  50. data/lib/parsanol/lazy_result.rb +179 -0
  51. data/lib/parsanol/lexer.rb +144 -0
  52. data/lib/parsanol/mermaid.rb +139 -0
  53. data/lib/parsanol/native/parser.rb +612 -0
  54. data/lib/parsanol/native/serializer.rb +248 -0
  55. data/lib/parsanol/native/transformer.rb +435 -0
  56. data/lib/parsanol/native/types.rb +42 -0
  57. data/lib/parsanol/native.rb +217 -0
  58. data/lib/parsanol/optimizer.rb +85 -0
  59. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  60. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  61. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  62. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  63. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  64. data/lib/parsanol/options/ruby_transform.rb +107 -0
  65. data/lib/parsanol/options/serialized.rb +94 -0
  66. data/lib/parsanol/options/zero_copy.rb +128 -0
  67. data/lib/parsanol/options.rb +20 -0
  68. data/lib/parsanol/parallel.rb +133 -0
  69. data/lib/parsanol/parser.rb +182 -0
  70. data/lib/parsanol/parslet.rb +151 -0
  71. data/lib/parsanol/pattern/binding.rb +91 -0
  72. data/lib/parsanol/pattern.rb +159 -0
  73. data/lib/parsanol/pool.rb +219 -0
  74. data/lib/parsanol/pools/array_pool.rb +75 -0
  75. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  76. data/lib/parsanol/pools/position_pool.rb +92 -0
  77. data/lib/parsanol/pools/slice_pool.rb +64 -0
  78. data/lib/parsanol/position.rb +94 -0
  79. data/lib/parsanol/resettable.rb +29 -0
  80. data/lib/parsanol/result.rb +46 -0
  81. data/lib/parsanol/result_builder.rb +208 -0
  82. data/lib/parsanol/result_stream.rb +261 -0
  83. data/lib/parsanol/rig/rspec.rb +71 -0
  84. data/lib/parsanol/rope.rb +81 -0
  85. data/lib/parsanol/scope.rb +104 -0
  86. data/lib/parsanol/slice.rb +146 -0
  87. data/lib/parsanol/source/line_cache.rb +109 -0
  88. data/lib/parsanol/source.rb +180 -0
  89. data/lib/parsanol/source_location.rb +167 -0
  90. data/lib/parsanol/streaming_parser.rb +124 -0
  91. data/lib/parsanol/string_view.rb +195 -0
  92. data/lib/parsanol/transform.rb +226 -0
  93. data/lib/parsanol/version.rb +5 -0
  94. data/lib/parsanol/wasm/README.md +80 -0
  95. data/lib/parsanol/wasm/package.json +51 -0
  96. data/lib/parsanol/wasm/parsanol.js +252 -0
  97. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  98. data/lib/parsanol/wasm_parser.rb +240 -0
  99. data/lib/parsanol.rb +280 -0
  100. data/parsanol-ruby.gemspec +67 -0
  101. metadata +280 -0
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Named rule wrapper that provides lazy evaluation and caching for grammar
4
+ # rules. Rules are defined as Entity atoms and named, and they can be
5
+ # referenced by other rules with automatic cycle detection.
6
+ #
7
+ # @example
8
+ # class MyParser < Parsanol::Parser
9
+ # rule(:expression) { str('a') >> str('b') }
10
+ # root(:expression)
11
+ # end
12
+ #
13
+ # MyParser.new.parse('ab') # => ["a", "b"]
14
+ #
15
+ module Parsanol
16
+ module Atoms
17
+ class Entity < Parsanol::Atoms::Base
18
+ attr_reader :rule_name, :block_definition
19
+
20
+ # Alias for backward compatibility
21
+ alias name rule_name
22
+
23
+ def initialize(name, label_or_opts = {}, &body)
24
+ super()
25
+ @rule_name = name
26
+ # Support both old API (label string) and new API (options hash)
27
+ @options = if label_or_opts.is_a?(Hash)
28
+ label_or_opts
29
+ else
30
+ { label: label_or_opts }
31
+ end
32
+ @body = body
33
+ @cached_atom = nil
34
+ # Set label on self for display purposes
35
+ self.label = @options[:label] if @options[:label]
36
+ end
37
+
38
+ # Evaluates the rule body, returns cached result.
39
+ def parslet
40
+ return @cached_atom unless @cached_atom.nil?
41
+
42
+ @cached_atom = @body.call
43
+
44
+ raise_not_implemented if @cached_atom.nil?
45
+
46
+ @cached_atom.label = @options[:label] if @options[:label]
47
+ @cached_atom
48
+ end
49
+
50
+ def try(source, context, consume_all)
51
+ atom = parslet
52
+ atom.apply(source, context, consume_all)
53
+ end
54
+
55
+ # Entities don't need caching since the underlying atom is already cached.
56
+ def cached?
57
+ false
58
+ end
59
+
60
+ def to_s_inner(_prec)
61
+ rule_name.to_s.upcase
62
+ end
63
+
64
+ private
65
+
66
+ def raise_not_implemented
67
+ trace_lines = caller.grep_v(/#{Regexp.escape(__FILE__)}/)
68
+ error_message = "rule '#{@rule_name}' has not been implemented, but already used?"
69
+ exception = NotImplementedError.new(error_message)
70
+ exception.set_backtrace(trace_lines)
71
+ raise exception
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Ignores the result of a match, Useful for cases where you want to match
4
+ # prefix or suffix without returning any.
5
+
6
+ #
7
+ # @example
8
+ # str('foo') # will return 'foo',
9
+ # str('foo').ignore # will return nil
10
+ #
11
+ # Inspired by Parslet (MIT License).
12
+
13
+ module Parsanol
14
+ module Atoms
15
+ class Ignored < Parsanol::Atoms::Base
16
+ attr_reader :wrapped_atom
17
+
18
+ def initialize(atom)
19
+ super()
20
+ @wrapped_atom = atom
21
+ end
22
+
23
+ def apply(source, context, consume_all)
24
+ ok, result = @wrapped_atom.apply(source, context, consume_all)
25
+
26
+ return [false, result] unless ok
27
+
28
+ # Success - return nil instead of the matched value
29
+ [true, nil]
30
+ end
31
+
32
+ def to_s_inner(prec)
33
+ "ignored(#{@wrapped_atom.to_s(prec)})"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Infix expression parser using precedence climbing algorithm.
4
+ # Parses mathematical-style expressions with configurable operators.
5
+ #
6
+ # @example Basic usage
7
+ # element = match('[0-9]').repeat(1)
8
+ # operations = [
9
+ # [str('+'), 1, :left],
10
+ # [str('*'), 2, :left]
11
+ # ]
12
+ # infix = Parsanol::Atoms::Infix.new(element, *operations)
13
+ #
14
+ # Inspired by Parslet (MIT License).
15
+ # Algorithm reference: http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
16
+
17
+ module Parsanol
18
+ module Atoms
19
+ class Infix < Parsanol::Atoms::Base
20
+ attr_reader :base_element, :operator_table, :result_combiner
21
+
22
+ # Creates a new infix expression parser.
23
+ #
24
+ # @param base_element [Parsanol::Atoms::Base] parser for atomic operands
25
+ # @param operations [Array<Array>] operator definitions [atom, precedence, associativity]
26
+ # @yield block to combine left, operator, right into result
27
+ def initialize(base_element, operations, &combiner)
28
+ super()
29
+ @base_element = base_element
30
+ @operator_table = operations
31
+ @result_combiner = combiner || default_combiner
32
+ end
33
+
34
+ # Attempts to parse an infix expression from the source.
35
+ #
36
+ # @param source [Parsanol::Source] input source
37
+ # @param context [Parsanol::Atoms::Context] parsing context
38
+ # @param consume_all [Boolean] whether to consume all input
39
+ # @return [Array] success/error tuple
40
+ def try(source, context, consume_all)
41
+ catch(:parse_error) do
42
+ raw_result = climb_precedence(source, context, consume_all)
43
+ structured_result = build_result_tree(raw_result)
44
+ return succ(structured_result)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Default combiner creates nested hash structure
51
+ def default_combiner
52
+ lambda do |left_side, operator, right_side|
53
+ { left: left_side, op: operator, right: right_side }
54
+ end
55
+ end
56
+
57
+ # Converts flat array representation to nested structure.
58
+ # Input: ['1', '+', ['2', '*', '3']]
59
+ # Output: { left: '1', op: '+', right: { left: '2', op: '*', right: '3' } }
60
+ #
61
+ # @param expression [Object] array or leaf value
62
+ # @return [Object] structured result
63
+ def build_result_tree(expression)
64
+ return expression unless expression.is_a?(Array)
65
+
66
+ combiner = @result_combiner
67
+ accumulator = expression.shift
68
+
69
+ until expression.empty?
70
+ operator_token, right_operand = expression.shift(2)
71
+
72
+ if right_operand.is_a?(Array)
73
+ # Recursively process nested expressions
74
+ right_operand = build_result_tree(right_operand)
75
+ end
76
+
77
+ accumulator = combiner.call(accumulator, operator_token, right_operand)
78
+ end
79
+
80
+ accumulator
81
+ end
82
+
83
+ # Main precedence climbing loop.
84
+ # Parses operands and operators, respecting precedence and associativity.
85
+ #
86
+ # @param source [Parsanol::Source] input source
87
+ # @param context [Parsanol::Atoms::Context] parsing context
88
+ # @param consume_all [Boolean] consume all flag
89
+ # @param min_precedence [Integer] minimum precedence to continue (default: 1)
90
+ # @return [Object] parsed expression
91
+ def climb_precedence(source, context, consume_all, min_precedence = 1)
92
+ element_parser = @base_element
93
+ expression_parts = []
94
+
95
+ # Must match at least one element to start
96
+ ok, first_value = element_parser.apply(source, context, false)
97
+ unless ok
98
+ throw :parse_error,
99
+ context.err(self, source, "Expected #{element_parser.inspect}", [first_value])
100
+ end
101
+
102
+ expression_parts << flatten(first_value, true)
103
+
104
+ # Continue while operators match
105
+ loop do
106
+ saved_position = source.bytepos
107
+ operator_match, precedence, associativity = try_match_operator(source, context, false)
108
+
109
+ # No operator found - done with this level
110
+ break unless operator_match
111
+
112
+ if precedence >= min_precedence
113
+ # Calculate next minimum precedence based on associativity
114
+ next_min = associativity == :left ? precedence + 1 : precedence
115
+
116
+ expression_parts << operator_match
117
+ expression_parts << climb_precedence(source, context, consume_all, next_min)
118
+ else
119
+ # Operator has lower precedence - backtrack and return
120
+ source.bytepos = saved_position
121
+ return simplify_result(expression_parts)
122
+ end
123
+ end
124
+
125
+ simplify_result(expression_parts)
126
+ end
127
+
128
+ # Attempts to match any operator from the operator table.
129
+ #
130
+ # @param source [Parsanol::Source] input source
131
+ # @param context [Parsanol::Atoms::Context] parsing context
132
+ # @param consume_all [Boolean] consume all flag
133
+ # @return [Array, nil] [matched_value, precedence, associativity] or nil
134
+ def try_match_operator(source, context, consume_all)
135
+ operators = @operator_table
136
+
137
+ operators.each do |op_parser, prec, assoc|
138
+ ok, value = op_parser.apply(source, context, consume_all)
139
+ return [flatten(value, true), prec, assoc] if ok
140
+ end
141
+
142
+ nil
143
+ end
144
+
145
+ # Simplifies single-element results to avoid unnecessary nesting.
146
+ #
147
+ # @param result [Array] expression parts
148
+ # @return [Object] simplified result
149
+ def simplify_result(result)
150
+ result.length == 1 ? result.first : result
151
+ end
152
+
153
+ public
154
+
155
+ # Returns string representation for debugging
156
+ def to_s_inner(_precedence)
157
+ op_list = @operator_table.map { |op, _, _| op.inspect }.join(', ')
158
+ "infix_expression(#{@base_element.inspect}, [#{op_list}])"
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Lookahead assertion - checks for pattern presence/absence without consuming.
4
+ # Position is always restored after the check.
5
+ #
6
+ # @example Positive lookahead (must be present)
7
+ # str('foo').present? # succeeds if 'foo' ahead
8
+ #
9
+ # @example Negative lookahead (must not be present)
10
+ # str('foo').absent? # succeeds if 'foo' not ahead
11
+ #
12
+ module Parsanol
13
+ module Atoms
14
+ class Lookahead < Parsanol::Atoms::Base
15
+ # @return [Boolean] true for positive, false for negative
16
+ attr_reader :positive
17
+
18
+ # @return [Parsanol::Atoms::Base] parser to check
19
+ attr_reader :bound_parslet
20
+
21
+ # Creates a new lookahead.
22
+ #
23
+ # @param parser [Parsanol::Atoms::Base] parser to check
24
+ # @param is_positive [Boolean] positive vs negative
25
+ def initialize(parser, is_positive = true)
26
+ super()
27
+ @positive = is_positive
28
+ @bound_parslet = parser
29
+
30
+ # Pre-built error components
31
+ @should_start = ['Input should start with ', parser].freeze
32
+ @should_not_start = ['Input should not start with ', parser].freeze
33
+ end
34
+
35
+ # Tests lookahead without consuming input.
36
+ #
37
+ # @param source [Parsanol::Source] input
38
+ # @param context [Parsanol::Atoms::Context] context
39
+ # @param consume_all [Boolean] ignored
40
+ # @return [Array(Boolean, Object)] result
41
+ def try(source, context, consume_all)
42
+ # Save position - never consume
43
+ saved = source.bytepos
44
+
45
+ matched, = @bound_parslet.apply(source, context, consume_all)
46
+
47
+ # Always restore
48
+ source.bytepos = saved
49
+
50
+ if @positive
51
+ # Positive: succeed if matched
52
+ return ok(nil) if matched
53
+
54
+ context.err_at(self, source, @should_start, source.bytepos)
55
+ else
56
+ # Negative: succeed if not matched
57
+ return context.err_at(self, source, @should_not_start, source.bytepos) if matched
58
+
59
+ ok(nil)
60
+ end
61
+ end
62
+
63
+ precedence LOOKAHEAD
64
+
65
+ # String representation.
66
+ #
67
+ # @param prec [Integer] precedence
68
+ # @return [String]
69
+ def to_s_inner(prec)
70
+ symbol = @positive ? '&' : '!'
71
+ "#{symbol}#{@bound_parslet.to_s(prec)}"
72
+ end
73
+
74
+ # FIRST set is always EPSILON (zero-width).
75
+ #
76
+ # @return [Set]
77
+ def compute_first_set
78
+ Set.new([Parsanol::FirstSet::EPSILON])
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Named capture - assigns a label to matched content.
4
+ # Results appear as { label: value } in the parse tree.
5
+ #
6
+ # @example Labeling matches
7
+ # str('foo').as(:name) # returns { name: 'foo' }
8
+ #
9
+ module Parsanol
10
+ module Atoms
11
+ class Named < Parsanol::Atoms::Base
12
+ # @return [Parsanol::Atoms::Base] wrapped parser
13
+ attr_reader :parslet
14
+
15
+ # @return [Symbol] the capture label
16
+ attr_reader :name
17
+
18
+ # Creates a new named capture.
19
+ #
20
+ # @param parser [Parsanol::Atoms::Base] parser to wrap
21
+ # @param label [Symbol] name for captures
22
+ def initialize(parser, label)
23
+ super()
24
+ @parslet = parser
25
+ @name = label
26
+ end
27
+
28
+ # Applies parser and wraps result in hash.
29
+ #
30
+ # @param source [Parsanol::Source] input
31
+ # @param context [Parsanol::Atoms::Context] context
32
+ # @param consume_all [Boolean] require full consumption
33
+ # @return [Array(Boolean, Object)] result
34
+ def apply(source, context, consume_all)
35
+ success, value = @parslet.apply(source, context, consume_all)
36
+ return [false, value] unless success
37
+
38
+ ok(wrap_result(value))
39
+ end
40
+
41
+ # Named wrappers skip caching (inner parser handles it).
42
+ #
43
+ # @return [Boolean]
44
+ def cached?
45
+ false
46
+ end
47
+
48
+ # String representation.
49
+ #
50
+ # @param prec [Integer] precedence
51
+ # @return [String]
52
+ def to_s_inner(prec)
53
+ "#{@name}:#{@parslet.to_s(prec)}"
54
+ end
55
+
56
+ # FIRST set is wrapped parser's FIRST set.
57
+ #
58
+ # @return [Set]
59
+ def compute_first_set
60
+ @parslet.first_set
61
+ end
62
+
63
+ private
64
+
65
+ # Wraps matched value in labeled hash.
66
+ #
67
+ # @param matched [Object] matched value
68
+ # @return [Hash] labeled result
69
+ def wrap_result(matched)
70
+ { @name => flatten(matched, true) }
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Regular expression matcher for single characters.
4
+ # Matches one character against a character class pattern.
5
+ #
6
+ # @example Character classes
7
+ # match('[a-z]') # matches a-z
8
+ # match('\d') # matches digits
9
+ # any # matches any character
10
+ #
11
+ module Parsanol
12
+ module Atoms
13
+ class Re < Parsanol::Atoms::Base
14
+ # @return [String] the pattern string
15
+ attr_reader :match
16
+
17
+ # @return [Regexp] compiled pattern
18
+ attr_reader :re
19
+
20
+ # Creates a new regex matcher.
21
+ #
22
+ # @param pattern [String, Object] regex character class
23
+ def initialize(pattern)
24
+ super()
25
+ @match = pattern.to_s
26
+ @re = Regexp.new(@match, Regexp::MULTILINE)
27
+
28
+ # Extract pattern for display (strip delimiters)
29
+ @display = @match.inspect[1..-2] || @match
30
+
31
+ # Pre-built error messages
32
+ @eof_error = 'Unexpected end of input'
33
+ @no_match_error = "Failed to match #{@display}"
34
+ end
35
+
36
+ # Matches one character against the pattern.
37
+ #
38
+ # @param source [Parsanol::Source] input
39
+ # @param context [Parsanol::Atoms::Context] context
40
+ # @param _consume_all [Boolean] ignored
41
+ # @return [Array(Boolean, Object)] result
42
+ def try(source, context, _consume_all)
43
+ # Fast path: check if next char matches
44
+ return ok(source.consume(1)) if source.matches?(@re)
45
+
46
+ # No input left
47
+ return context.err(self, source, @eof_error) if source.chars_left < 1
48
+
49
+ # Character doesn't match
50
+ context.err(self, source, @no_match_error)
51
+ end
52
+
53
+ # String representation.
54
+ #
55
+ # @param _prec [Integer] unused
56
+ # @return [String]
57
+ def to_s_inner(_prec)
58
+ @display
59
+ end
60
+
61
+ # Simple atoms don't benefit from caching.
62
+ #
63
+ # @return [Boolean]
64
+ def cached?
65
+ false
66
+ end
67
+
68
+ # Produces flat results.
69
+ #
70
+ # @return [Boolean]
71
+ def flat?
72
+ true
73
+ end
74
+
75
+ # FIRST set is this atom.
76
+ #
77
+ # @return [Set]
78
+ def compute_first_set
79
+ Set.new([self])
80
+ end
81
+ end
82
+ end
83
+ end