parsanol 1.0.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +12 -0
- data/LICENSE +23 -0
- data/README.adoc +487 -0
- data/Rakefile +135 -0
- data/lib/parsanol/3.2/parsanol_native.so +0 -0
- data/lib/parsanol/3.3/parsanol_native.so +0 -0
- data/lib/parsanol/3.4/parsanol_native.so +0 -0
- data/lib/parsanol/4.0/parsanol_native.so +0 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +122 -0
- data/lib/parsanol/atoms/base.rb +202 -0
- data/lib/parsanol/atoms/can_flatten.rb +194 -0
- data/lib/parsanol/atoms/capture.rb +38 -0
- data/lib/parsanol/atoms/context.rb +334 -0
- data/lib/parsanol/atoms/context_optimized.rb +38 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +66 -0
- data/lib/parsanol/atoms/dsl.rb +96 -0
- data/lib/parsanol/atoms/dynamic.rb +39 -0
- data/lib/parsanol/atoms/entity.rb +75 -0
- data/lib/parsanol/atoms/ignored.rb +37 -0
- data/lib/parsanol/atoms/infix.rb +162 -0
- data/lib/parsanol/atoms/lookahead.rb +82 -0
- data/lib/parsanol/atoms/named.rb +74 -0
- data/lib/parsanol/atoms/re.rb +83 -0
- data/lib/parsanol/atoms/repetition.rb +259 -0
- data/lib/parsanol/atoms/scope.rb +35 -0
- data/lib/parsanol/atoms/sequence.rb +194 -0
- data/lib/parsanol/atoms/str.rb +103 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +46 -0
- data/lib/parsanol/buffer.rb +133 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +122 -0
- data/lib/parsanol/context.rb +39 -0
- data/lib/parsanol/convenience.rb +36 -0
- data/lib/parsanol/edit_tracker.rb +111 -0
- data/lib/parsanol/error_reporter/contextual.rb +99 -0
- data/lib/parsanol/error_reporter/deepest.rb +120 -0
- data/lib/parsanol/error_reporter/tree.rb +63 -0
- data/lib/parsanol/error_reporter.rb +100 -0
- data/lib/parsanol/expression/treetop.rb +154 -0
- data/lib/parsanol/expression.rb +106 -0
- data/lib/parsanol/fast_mode.rb +149 -0
- data/lib/parsanol/first_set.rb +79 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/incremental_parser.rb +177 -0
- data/lib/parsanol/interval_tree.rb +217 -0
- data/lib/parsanol/lazy_result.rb +179 -0
- data/lib/parsanol/lexer.rb +144 -0
- data/lib/parsanol/mermaid.rb +139 -0
- data/lib/parsanol/native/parser.rb +612 -0
- data/lib/parsanol/native/serializer.rb +248 -0
- data/lib/parsanol/native/transformer.rb +435 -0
- data/lib/parsanol/native/types.rb +42 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +85 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +107 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +128 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parser.rb +182 -0
- data/lib/parsanol/parslet.rb +151 -0
- data/lib/parsanol/pattern/binding.rb +91 -0
- data/lib/parsanol/pattern.rb +159 -0
- data/lib/parsanol/pool.rb +219 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +175 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +94 -0
- data/lib/parsanol/resettable.rb +29 -0
- data/lib/parsanol/result.rb +46 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +261 -0
- data/lib/parsanol/rig/rspec.rb +71 -0
- data/lib/parsanol/rope.rb +81 -0
- data/lib/parsanol/scope.rb +104 -0
- data/lib/parsanol/slice.rb +146 -0
- data/lib/parsanol/source/line_cache.rb +109 -0
- data/lib/parsanol/source.rb +180 -0
- data/lib/parsanol/source_location.rb +167 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +195 -0
- data/lib/parsanol/transform.rb +226 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +240 -0
- data/lib/parsanol.rb +280 -0
- data/parsanol-ruby.gemspec +67 -0
- metadata +280 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# WASM-based parser for Opal environments
|
|
5
|
+
#
|
|
6
|
+
# This class provides a bridge between Opal (Ruby compiled to JavaScript)
|
|
7
|
+
# and the WASM parser. It uses the Parslet WASM module.
|
|
8
|
+
#
|
|
9
|
+
# @example In Opal environment
|
|
10
|
+
# # First, ensure WASM is loaded (in your HTML/JS)
|
|
11
|
+
# # <script src="parslet_wasm.js"></script>
|
|
12
|
+
# # <script>
|
|
13
|
+
# # ParsletWasm.init().then(() => console.log('ready'));
|
|
14
|
+
# # </script>
|
|
15
|
+
#
|
|
16
|
+
# # Then in Ruby/Opal:
|
|
17
|
+
# grammar_json = parser.to_json
|
|
18
|
+
# wasm_parser = Parsanol::WasmParser.new(grammar_json)
|
|
19
|
+
# result = wasm_parser.parse(input)
|
|
20
|
+
#
|
|
21
|
+
class WasmParser
|
|
22
|
+
# Tags for flat array format
|
|
23
|
+
TAG_NIL = 0x00
|
|
24
|
+
TAG_BOOL = 0x01
|
|
25
|
+
TAG_INT = 0x02
|
|
26
|
+
TAG_FLOAT = 0x03
|
|
27
|
+
TAG_STRING = 0x04
|
|
28
|
+
TAG_ARRAY_START = 0x05
|
|
29
|
+
TAG_ARRAY_END = 0x06
|
|
30
|
+
TAG_HASH_START = 0x07
|
|
31
|
+
TAG_HASH_END = 0x08
|
|
32
|
+
TAG_HASH_KEY = 0x09
|
|
33
|
+
|
|
34
|
+
# @return [String] The grammar JSON
|
|
35
|
+
attr_reader :grammar_json
|
|
36
|
+
|
|
37
|
+
# Create a new WASM parser
|
|
38
|
+
#
|
|
39
|
+
# @param grammar_json [String, Hash] Grammar JSON string or hash
|
|
40
|
+
# @raise [RuntimeError] If WASM is not initialized
|
|
41
|
+
#
|
|
42
|
+
def initialize(grammar_json)
|
|
43
|
+
@grammar_json = grammar_json.is_a?(Hash) ? grammar_json.to_json : grammar_json
|
|
44
|
+
@parser = nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Parse input string and return AST
|
|
48
|
+
#
|
|
49
|
+
# @param input [String] Input string to parse
|
|
50
|
+
# @return [Hash, Array, String, nil] Parsed AST
|
|
51
|
+
# @raise [RuntimeError] If parsing fails
|
|
52
|
+
#
|
|
53
|
+
def parse(input)
|
|
54
|
+
ensure_initialized
|
|
55
|
+
result = `#{@parser}.parse(#{input})`
|
|
56
|
+
convert_js_to_ruby(result)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Parse input and return flat array (more efficient for large results)
|
|
60
|
+
#
|
|
61
|
+
# @param input [String] Input string to parse
|
|
62
|
+
# @return [Array] Flat array with tagged values
|
|
63
|
+
# @raise [RuntimeError] If parsing fails
|
|
64
|
+
#
|
|
65
|
+
def parse_flat(input)
|
|
66
|
+
ensure_initialized
|
|
67
|
+
flat = `#{@parser}.parseFlat(#{input})`
|
|
68
|
+
decode_flat(flat, input)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Parse input and return JSON string
|
|
72
|
+
#
|
|
73
|
+
# @param input [String] Input string to parse
|
|
74
|
+
# @return [String] JSON string of parsed AST
|
|
75
|
+
# @raise [RuntimeError] If parsing fails
|
|
76
|
+
#
|
|
77
|
+
def parse_json(input)
|
|
78
|
+
ensure_initialized
|
|
79
|
+
`#{@parser}.parseJson(#{input})`
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Check if WASM is available and initialized
|
|
83
|
+
#
|
|
84
|
+
# @return [Boolean]
|
|
85
|
+
#
|
|
86
|
+
def self.available?
|
|
87
|
+
`
|
|
88
|
+
if (typeof ParsletWasm === 'undefined') {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
return ParsletWasm.isInitialized ? ParsletWasm.isInitialized() : false;
|
|
92
|
+
`
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Initialize WASM module (async)
|
|
96
|
+
#
|
|
97
|
+
# @return [Promise] Promise that resolves when WASM is ready
|
|
98
|
+
#
|
|
99
|
+
def self.init
|
|
100
|
+
`
|
|
101
|
+
if (typeof ParsletWasm !== 'undefined' && ParsletWasm.initParslet) {
|
|
102
|
+
return ParsletWasm.initParslet();
|
|
103
|
+
}
|
|
104
|
+
return Promise.reject(new Error('ParsletWasm not loaded'));
|
|
105
|
+
`
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
private
|
|
109
|
+
|
|
110
|
+
def ensure_initialized
|
|
111
|
+
return if @parser
|
|
112
|
+
|
|
113
|
+
`
|
|
114
|
+
if (typeof ParsletWasm === 'undefined') {
|
|
115
|
+
throw new Error('ParsletWasm not loaded. Include parslet.js and parsanol_native_bg.wasm');
|
|
116
|
+
}
|
|
117
|
+
if (!ParsletWasm.isInitialized || !ParsletWasm.isInitialized()) {
|
|
118
|
+
throw new Error('WASM not initialized. Call Parsanol::WasmParser.init first');
|
|
119
|
+
}
|
|
120
|
+
#{@parser} = new ParsletWasm.ParsletParser(#{@grammar_json});
|
|
121
|
+
`
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Convert JavaScript result to Ruby
|
|
125
|
+
def convert_js_to_ruby(_js_obj)
|
|
126
|
+
%x{
|
|
127
|
+
if (js_obj === null || js_obj === undefined) {
|
|
128
|
+
return nil;
|
|
129
|
+
}
|
|
130
|
+
if (typeof js_obj === 'boolean') {
|
|
131
|
+
return js_obj;
|
|
132
|
+
}
|
|
133
|
+
if (typeof js_obj === 'number') {
|
|
134
|
+
return js_obj;
|
|
135
|
+
}
|
|
136
|
+
if (typeof js_obj === 'string') {
|
|
137
|
+
return js_obj;
|
|
138
|
+
}
|
|
139
|
+
if (Array.isArray(js_obj)) {
|
|
140
|
+
return js_obj.map(function(item) {
|
|
141
|
+
return #{convert_js_to_ruby(`item`)};
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
if (typeof js_obj === 'object') {
|
|
145
|
+
var hash = {};
|
|
146
|
+
Object.keys(js_obj).forEach(function(key) {
|
|
147
|
+
hash[key] = #{convert_js_to_ruby(`js_obj[key]`)};
|
|
148
|
+
});
|
|
149
|
+
return hash;
|
|
150
|
+
}
|
|
151
|
+
return nil;
|
|
152
|
+
}
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Decode flat array format to Ruby objects
|
|
156
|
+
def decode_flat(flat, input)
|
|
157
|
+
stack = []
|
|
158
|
+
i = 0
|
|
159
|
+
length = `#{flat}.length`
|
|
160
|
+
|
|
161
|
+
while i < length
|
|
162
|
+
tag = `#{flat}[#{i}]`
|
|
163
|
+
|
|
164
|
+
case tag
|
|
165
|
+
when TAG_NIL
|
|
166
|
+
stack << nil
|
|
167
|
+
i += 1
|
|
168
|
+
when TAG_BOOL
|
|
169
|
+
stack << (`#{flat}[#{i + 1}]` != 0)
|
|
170
|
+
i += 2
|
|
171
|
+
when TAG_INT
|
|
172
|
+
stack << `#{flat}[#{i + 1}]`
|
|
173
|
+
i += 2
|
|
174
|
+
when TAG_FLOAT
|
|
175
|
+
bits = `#{flat}[#{i + 1}]`
|
|
176
|
+
float = `new Float64Array(new BigUint64Array([#{bits}]).buffer)[0]`
|
|
177
|
+
stack << float
|
|
178
|
+
i += 2
|
|
179
|
+
when TAG_STRING
|
|
180
|
+
offset = `#{flat}[#{i + 1}]`
|
|
181
|
+
len = `#{flat}[#{i + 2}]`
|
|
182
|
+
stack << input.byteslice(offset, len)
|
|
183
|
+
i += 3
|
|
184
|
+
when TAG_ARRAY_START
|
|
185
|
+
stack << :array_marker
|
|
186
|
+
i += 1
|
|
187
|
+
when TAG_ARRAY_END
|
|
188
|
+
items = []
|
|
189
|
+
items.unshift(stack.pop) while stack.last != :array_marker
|
|
190
|
+
stack.pop # Remove marker
|
|
191
|
+
stack << items
|
|
192
|
+
i += 1
|
|
193
|
+
when TAG_HASH_START
|
|
194
|
+
stack << :hash_marker
|
|
195
|
+
i += 1
|
|
196
|
+
when TAG_HASH_END
|
|
197
|
+
pairs = []
|
|
198
|
+
while stack.last != :hash_marker
|
|
199
|
+
value = stack.pop
|
|
200
|
+
key = stack.pop
|
|
201
|
+
pairs.unshift([key, value])
|
|
202
|
+
end
|
|
203
|
+
stack.pop # Remove marker
|
|
204
|
+
stack << pairs.to_h
|
|
205
|
+
i += 1
|
|
206
|
+
when TAG_HASH_KEY
|
|
207
|
+
len = `#{flat}[#{i + 1}]`
|
|
208
|
+
i += 3 # Skip tag, len, and placeholder
|
|
209
|
+
# Read key bytes
|
|
210
|
+
key_bytes = []
|
|
211
|
+
chunks = (len + 7) / 8
|
|
212
|
+
chunks.times do |j|
|
|
213
|
+
chunk = `#{flat}[#{i + j}]`
|
|
214
|
+
8.times do |k|
|
|
215
|
+
break if key_bytes.length >= len
|
|
216
|
+
|
|
217
|
+
key_bytes << ((chunk >> (k * 8)) & 0xff)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
i += chunks
|
|
221
|
+
key = key_bytes.pack('C*').force_encoding('UTF-8')
|
|
222
|
+
stack << key
|
|
223
|
+
else
|
|
224
|
+
raise "Unknown tag: #{tag} at index #{i}"
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
stack.first
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Factory method to create appropriate parser
|
|
233
|
+
#
|
|
234
|
+
# @param grammar_json [String, Hash] Grammar JSON
|
|
235
|
+
# @return [WasmParser, Object] Appropriate parser for current environment
|
|
236
|
+
#
|
|
237
|
+
def self.create_wasm_parser(grammar_json)
|
|
238
|
+
WasmParser.new(grammar_json)
|
|
239
|
+
end
|
|
240
|
+
end
|
data/lib/parsanol.rb
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
# Parsanol - A high-performance PEG parser construction library for Ruby.
|
|
6
|
+
#
|
|
7
|
+
# Typical usage:
|
|
8
|
+
#
|
|
9
|
+
# require 'parsanol'
|
|
10
|
+
#
|
|
11
|
+
# class MyParser < Parsanol::Parser
|
|
12
|
+
# rule(:a) { str('a').repeat }
|
|
13
|
+
# root(:a)
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# result = MyParser.new.parse('aaaa') # => 'aaaa'@0
|
|
17
|
+
#
|
|
18
|
+
# Parsanol provides a declarative DSL for constructing parsers using PEG
|
|
19
|
+
# (Parsing Expression Grammar) semantics. The library is designed as a
|
|
20
|
+
# high-performance, feature-rich alternative to Parslet.
|
|
21
|
+
#
|
|
22
|
+
# == Two-Stage Parsing
|
|
23
|
+
#
|
|
24
|
+
# Parsing is typically done in two stages:
|
|
25
|
+
#
|
|
26
|
+
# 1. Parse the input string to produce an intermediate tree
|
|
27
|
+
# 2. Transform the tree into an application-specific AST
|
|
28
|
+
#
|
|
29
|
+
# This separation allows grammar changes without affecting downstream code.
|
|
30
|
+
#
|
|
31
|
+
# == Error Handling
|
|
32
|
+
#
|
|
33
|
+
# Failed parses raise {Parsanol::ParseFailed} with detailed error information:
|
|
34
|
+
#
|
|
35
|
+
# begin
|
|
36
|
+
# parser.parse(invalid_input)
|
|
37
|
+
# rescue Parsanol::ParseFailed => e
|
|
38
|
+
# puts e.parse_failure_cause.ascii_tree
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# Inspired by Parslet (MIT License).
|
|
42
|
+
|
|
43
|
+
module Parsanol
|
|
44
|
+
# Hook to extend including classes with ClassMethods.
|
|
45
|
+
def self.included(base)
|
|
46
|
+
base.extend(ClassMethods)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Exception raised when parsing fails. Contains detailed error information
|
|
50
|
+
# in the #parse_failure_cause attribute.
|
|
51
|
+
class ParseFailed < StandardError
|
|
52
|
+
def initialize(message, cause = nil)
|
|
53
|
+
super(message)
|
|
54
|
+
@parse_failure_cause = cause
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Detailed cause of the parse failure.
|
|
58
|
+
# @return [Parsanol::Cause]
|
|
59
|
+
attr_reader :parse_failure_cause
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Class methods added to classes that include Parsanol.
|
|
63
|
+
module ClassMethods
|
|
64
|
+
# Enable automatic rule optimization for all rules in this parser.
|
|
65
|
+
# Optimizations include quantifier simplification, sequence flattening,
|
|
66
|
+
# choice reordering, and lookahead simplification.
|
|
67
|
+
#
|
|
68
|
+
# NOTE: Optimizations are DISABLED BY DEFAULT as of v3.1.0.
|
|
69
|
+
# Use this method to opt-in for complex grammars.
|
|
70
|
+
def optimize_rules!(enable = true)
|
|
71
|
+
@optimize_rules = enable
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Disable automatic rule optimization.
|
|
75
|
+
def disable_optimization!
|
|
76
|
+
@optimize_rules = false
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if rule optimization is enabled.
|
|
80
|
+
# @return [Boolean]
|
|
81
|
+
def optimize_rules?
|
|
82
|
+
@optimize_rules = false if @optimize_rules.nil?
|
|
83
|
+
@optimize_rules
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Define a named grammar rule. Creates a method that returns an Entity atom.
|
|
87
|
+
# Rules are memoized for efficiency.
|
|
88
|
+
#
|
|
89
|
+
# @param name [Symbol] the rule name
|
|
90
|
+
# @param opts [Hash] options (:label for custom labeling)
|
|
91
|
+
# @yield block that returns the rule's parser atom
|
|
92
|
+
def rule(name, opts = {}, &definition)
|
|
93
|
+
undef_method name if method_defined? name
|
|
94
|
+
define_method(name) do
|
|
95
|
+
@rule_cache ||= {}
|
|
96
|
+
return @rule_cache[name] if @rule_cache.key?(name)
|
|
97
|
+
|
|
98
|
+
wrapper = proc {
|
|
99
|
+
atom = instance_eval(&definition)
|
|
100
|
+
|
|
101
|
+
if self.class.optimize_rules?
|
|
102
|
+
atom = Parsanol::Optimizer.simplify_quantifiers(atom)
|
|
103
|
+
atom = Parsanol::Optimizer.simplify_sequences(atom)
|
|
104
|
+
atom = Parsanol::Optimizer.simplify_choices(atom)
|
|
105
|
+
atom = Parsanol::Optimizer.simplify_lookaheads(atom)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
atom
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
@rule_cache[name] = Atoms::Entity.new(name, opts[:label], &wrapper)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Helper class for bracket notation character class matching.
|
|
117
|
+
# @api private
|
|
118
|
+
class CharacterClassBuilder
|
|
119
|
+
def [](chars)
|
|
120
|
+
Atoms::Re.new("[#{chars}]")
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Creates a character class matcher. Supports both method and bracket forms.
|
|
125
|
+
#
|
|
126
|
+
# @overload match(pattern)
|
|
127
|
+
# @param pattern [String] regex character class
|
|
128
|
+
# @overload match[]
|
|
129
|
+
# @return [CharacterClassBuilder] builder for bracket notation
|
|
130
|
+
# @return [Parsanol::Atoms::Re] regex atom
|
|
131
|
+
def match(pattern = nil)
|
|
132
|
+
return CharacterClassBuilder.new unless pattern
|
|
133
|
+
|
|
134
|
+
Atoms::Re.new(pattern)
|
|
135
|
+
end
|
|
136
|
+
module_function :match
|
|
137
|
+
|
|
138
|
+
# Creates a literal string matcher.
|
|
139
|
+
#
|
|
140
|
+
# @param literal [String] the string to match
|
|
141
|
+
# @return [Parsanol::Atoms::Str] string atom
|
|
142
|
+
def str(literal)
|
|
143
|
+
Atoms::Str.new(literal)
|
|
144
|
+
end
|
|
145
|
+
module_function :str
|
|
146
|
+
|
|
147
|
+
# Creates a matcher for any single character.
|
|
148
|
+
#
|
|
149
|
+
# @return [Parsanol::Atoms::Re] regex atom matching '.'
|
|
150
|
+
def any
|
|
151
|
+
Atoms::Re.new('.')
|
|
152
|
+
end
|
|
153
|
+
module_function :any
|
|
154
|
+
|
|
155
|
+
# Creates a new variable scope for captures. Inner captures shadow outer
|
|
156
|
+
# ones with the same name during the block's execution.
|
|
157
|
+
#
|
|
158
|
+
# @yield block containing scoped parsing
|
|
159
|
+
# @return [Parsanol::Atoms::Scope] scope atom
|
|
160
|
+
def scope(&block)
|
|
161
|
+
Atoms::Scope.new(block)
|
|
162
|
+
end
|
|
163
|
+
module_function :scope
|
|
164
|
+
|
|
165
|
+
# Creates a dynamic parser that is evaluated at parse time.
|
|
166
|
+
# Useful for context-dependent parsing. Use sparingly due to performance.
|
|
167
|
+
#
|
|
168
|
+
# @yield block returning a parser atom or parse result
|
|
169
|
+
# @return [Parsanol::Atoms::Dynamic] dynamic atom
|
|
170
|
+
def dynamic(&block)
|
|
171
|
+
Atoms::Dynamic.new(block)
|
|
172
|
+
end
|
|
173
|
+
module_function :dynamic
|
|
174
|
+
|
|
175
|
+
# Creates an infix expression parser with operator precedence.
|
|
176
|
+
# Operators are specified as [atom, precedence, associativity] tuples.
|
|
177
|
+
#
|
|
178
|
+
# @param operand [Parsanol::Atoms::Base] parser for operands
|
|
179
|
+
# @param operators [Array<Array>] operator definitions
|
|
180
|
+
# @yield optional block to customize result tree structure
|
|
181
|
+
# @return [Parsanol::Atoms::Infix] infix parser
|
|
182
|
+
def infix_expression(operand, *operators, &combiner)
|
|
183
|
+
Atoms::Infix.new(operand, operators, &combiner)
|
|
184
|
+
end
|
|
185
|
+
module_function :infix_expression
|
|
186
|
+
|
|
187
|
+
# Creates a pattern binding for sequence matching in transforms.
|
|
188
|
+
# Only matches array values, not single elements.
|
|
189
|
+
#
|
|
190
|
+
# @param name [Symbol] binding variable name
|
|
191
|
+
# @return [Parsanol::Pattern::SequenceBind] sequence pattern
|
|
192
|
+
def sequence(name)
|
|
193
|
+
Pattern::SequenceBind.new(name)
|
|
194
|
+
end
|
|
195
|
+
module_function :sequence
|
|
196
|
+
|
|
197
|
+
# Creates a pattern binding for simple (leaf) value matching.
|
|
198
|
+
# Matches anything that is not a Hash or Array.
|
|
199
|
+
#
|
|
200
|
+
# @param name [Symbol] binding variable name
|
|
201
|
+
# @return [Parsanol::Pattern::SimpleBind] simple pattern
|
|
202
|
+
def simple(name)
|
|
203
|
+
Pattern::SimpleBind.new(name)
|
|
204
|
+
end
|
|
205
|
+
module_function :simple
|
|
206
|
+
|
|
207
|
+
# Creates a pattern binding that matches any subtree.
|
|
208
|
+
# This is the most permissive pattern type.
|
|
209
|
+
#
|
|
210
|
+
# @param name [Symbol] binding variable name
|
|
211
|
+
# @return [Parsanol::Pattern::SubtreeBind] subtree pattern
|
|
212
|
+
def subtree(name)
|
|
213
|
+
Pattern::SubtreeBind.new(name)
|
|
214
|
+
end
|
|
215
|
+
module_function :subtree
|
|
216
|
+
|
|
217
|
+
# Parses a treetop-style expression string and returns the corresponding atom.
|
|
218
|
+
#
|
|
219
|
+
# This is a convenience method for defining parsers using treetop syntax.
|
|
220
|
+
# The expression parser is pure Ruby (not Rust-accelerated) since it runs only
|
|
221
|
+
# at grammar definition time. The resulting atoms can be used with native parsing.
|
|
222
|
+
#
|
|
223
|
+
# @note Whitespace is required before operators: 'a' ? not 'a'?
|
|
224
|
+
#
|
|
225
|
+
# @example Basic usage
|
|
226
|
+
# exp("'a' 'b' ?") # => str('a') >> str('b').maybe
|
|
227
|
+
#
|
|
228
|
+
# @example With Rust-accelerated parsing
|
|
229
|
+
# atom = exp("'a' +")
|
|
230
|
+
# Native.parse_with_grammar(atom, 'aaa') # Uses Rust extension
|
|
231
|
+
#
|
|
232
|
+
# @param str [String] a treetop expression string
|
|
233
|
+
# @return [Parsanol::Atoms::Base] the corresponding parser atom
|
|
234
|
+
# @see Parsanol::Expression for full syntax documentation
|
|
235
|
+
def exp(str)
|
|
236
|
+
Expression.new(str).to_parslet
|
|
237
|
+
end
|
|
238
|
+
module_function :exp
|
|
239
|
+
|
|
240
|
+
autoload :Expression, 'parsanol/expression'
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
require 'parsanol/version'
|
|
244
|
+
require 'parsanol/resettable'
|
|
245
|
+
require 'parsanol/result'
|
|
246
|
+
require 'parsanol/slice'
|
|
247
|
+
require 'parsanol/string_view'
|
|
248
|
+
require 'parsanol/rope'
|
|
249
|
+
require 'parsanol/pool'
|
|
250
|
+
require 'parsanol/pools/slice_pool'
|
|
251
|
+
require 'parsanol/pools/array_pool'
|
|
252
|
+
require 'parsanol/pools/position_pool'
|
|
253
|
+
require 'parsanol/buffer'
|
|
254
|
+
require 'parsanol/pools/buffer_pool'
|
|
255
|
+
require 'parsanol/lazy_result'
|
|
256
|
+
require 'parsanol/result_builder'
|
|
257
|
+
require 'parsanol/first_set'
|
|
258
|
+
require 'parsanol/cause'
|
|
259
|
+
require 'parsanol/source'
|
|
260
|
+
require 'parsanol/atoms'
|
|
261
|
+
require 'parsanol/pattern'
|
|
262
|
+
require 'parsanol/pattern/binding'
|
|
263
|
+
require 'parsanol/transform'
|
|
264
|
+
require 'parsanol/parser'
|
|
265
|
+
require 'parsanol/error_reporter'
|
|
266
|
+
require 'parsanol/scope'
|
|
267
|
+
require 'parsanol/optimizer'
|
|
268
|
+
require 'parsanol/options'
|
|
269
|
+
require 'parsanol/native'
|
|
270
|
+
|
|
271
|
+
# New features (require native extension for full functionality)
|
|
272
|
+
require 'parsanol/source_location'
|
|
273
|
+
require 'parsanol/grammar_builder'
|
|
274
|
+
require 'parsanol/streaming_parser'
|
|
275
|
+
require 'parsanol/incremental_parser'
|
|
276
|
+
require 'parsanol/builder_callbacks'
|
|
277
|
+
require 'parsanol/parallel'
|
|
278
|
+
|
|
279
|
+
# Add GrammarBuilder DSL to Parsanol module
|
|
280
|
+
Parsanol.extend(Parsanol::GrammarBuilderDSL)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/parsanol/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'parsanol'
|
|
7
|
+
spec.version = Parsanol::VERSION
|
|
8
|
+
spec.platform = Gem::Platform::RUBY
|
|
9
|
+
|
|
10
|
+
spec.authors = ['Ribose Inc.']
|
|
11
|
+
spec.email = ['open.source@ribose.com']
|
|
12
|
+
|
|
13
|
+
spec.summary = 'Parser construction library with great error reporting in Ruby.'
|
|
14
|
+
spec.description = 'A small Ruby library for constructing parsers in the PEG (Parsing Expression Grammar) fashion. ' \
|
|
15
|
+
'Parsanol provides Parslet-compatible API with additional features including ' \
|
|
16
|
+
'static frozen parsers and dynamic parsers, with optional Rust native extension for improved performance.'
|
|
17
|
+
spec.homepage = 'https://github.com/parsanol/parsanol-ruby'
|
|
18
|
+
spec.license = 'MIT'
|
|
19
|
+
|
|
20
|
+
spec.metadata = {
|
|
21
|
+
'bug_tracker_uri' => 'https://github.com/parsanol/parsanol-ruby/issues',
|
|
22
|
+
'changelog_uri' => 'https://github.com/parsanol/parsanol-ruby/blob/main/HISTORY.txt',
|
|
23
|
+
'documentation_uri' => 'https://parsanol.github.io/parsanol-ruby/',
|
|
24
|
+
'homepage_uri' => 'https://github.com/parsanol/parsanol-ruby',
|
|
25
|
+
'source_code_uri' => 'https://github.com/parsanol/parsanol-ruby',
|
|
26
|
+
'rubygems_mfa_required' => 'true'
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Rust extension
|
|
30
|
+
spec.extensions = ['ext/parsanol_native/extconf.rb']
|
|
31
|
+
|
|
32
|
+
spec.files = Dir.glob('{lib,ext}/**/*') + %w[
|
|
33
|
+
HISTORY.txt
|
|
34
|
+
LICENSE
|
|
35
|
+
Rakefile
|
|
36
|
+
README.adoc
|
|
37
|
+
parsanol-ruby.gemspec
|
|
38
|
+
Cargo.toml
|
|
39
|
+
Cargo.lock
|
|
40
|
+
]
|
|
41
|
+
spec.files.reject! { |f| File.directory?(f) }
|
|
42
|
+
spec.files.reject! { |f| f =~ /\.(dll|so|dylib|lib|bundle)\Z/ }
|
|
43
|
+
spec.require_paths = ['lib']
|
|
44
|
+
|
|
45
|
+
spec.required_ruby_version = '>= 3.2.0'
|
|
46
|
+
|
|
47
|
+
# Required for Rust extension
|
|
48
|
+
spec.add_dependency 'rb_sys', '~> 0.9.39'
|
|
49
|
+
|
|
50
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
51
|
+
spec.add_development_dependency 'rake-compiler', '~> 1.2.0'
|
|
52
|
+
spec.add_development_dependency 'rdoc', '~> 6.0'
|
|
53
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
54
|
+
|
|
55
|
+
# For code style checking
|
|
56
|
+
spec.add_development_dependency 'rubocop', '~> 1.0'
|
|
57
|
+
|
|
58
|
+
# For Parslet compatibility verification
|
|
59
|
+
spec.add_development_dependency 'parslet', '~> 2.0.0'
|
|
60
|
+
|
|
61
|
+
# For benchmarking
|
|
62
|
+
spec.add_development_dependency 'benchmark-ips', '~> 2.0'
|
|
63
|
+
|
|
64
|
+
# For type checking
|
|
65
|
+
spec.add_development_dependency 'rbs', '~> 3.0'
|
|
66
|
+
spec.add_development_dependency 'steep', '~> 1.0'
|
|
67
|
+
end
|