pegparse 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rubocop.yml +13 -0
- data/Gemfile +14 -0
- data/LICENSE.txt +21 -0
- data/README.md +133 -0
- data/Rakefile +16 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/pegparse/biop_rule_chain.rb +113 -0
- data/lib/pegparse/borrowed_areas.rb +35 -0
- data/lib/pegparse/line_counter.rb +61 -0
- data/lib/pegparse/parser_base.rb +139 -0
- data/lib/pegparse/parser_context.rb +19 -0
- data/lib/pegparse/parser_core.rb +243 -0
- data/lib/pegparse/parser_errors.rb +97 -0
- data/lib/pegparse/version.rb +5 -0
- data/lib/pegparse.rb +9 -0
- data/pegparse.gemspec +37 -0
- data/samples/bsh_parser.rb +337 -0
- data/samples/calc_parser.rb +55 -0
- data/samples/json_parser.rb +92 -0
- data/samples/xml_parser.rb +182 -0
- metadata +67 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
require_relative "parser_context"
|
2
|
+
|
3
|
+
# Parser base class (core mechanism for backtracking)
|
4
|
+
class Pegparse::ParserCore
|
5
|
+
# start rule symbol used by parse()
|
6
|
+
# @return [Symbol]
|
7
|
+
attr_accessor :start_rule_symbol
|
8
|
+
|
9
|
+
# @param scanner_or_context [StringScanner,Pegparse::ParserContext]
|
10
|
+
def initialize(scanner_or_context)
|
11
|
+
init_context(scanner_or_context) if scanner_or_context
|
12
|
+
end
|
13
|
+
|
14
|
+
# initialize inner state
|
15
|
+
def init_context(scanner_or_context)
|
16
|
+
if scanner_or_context.is_a? Pegparse::ParserContext
|
17
|
+
@context = scanner_or_context
|
18
|
+
else
|
19
|
+
@context = Pegparse::ParserContext.new(scanner_or_context)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Start parse
|
24
|
+
# @param scanner_or_context [StringScanner,Pegparse::ParserContext]
|
25
|
+
# @param rule [Symbol]
|
26
|
+
# @return [Object] match result
|
27
|
+
def parse(scanner_or_context = nil, rule: nil)
|
28
|
+
raise ArgumentError if !scanner_or_context && !@context
|
29
|
+
raise ArgumentError if !rule && !@start_rule_symbol
|
30
|
+
|
31
|
+
init_context(scanner_or_context) if scanner_or_context
|
32
|
+
current_start_rule_symbol = rule || @start_rule_symbol
|
33
|
+
|
34
|
+
ret = nil
|
35
|
+
catch(:backtrack) do
|
36
|
+
ret = __send__(current_start_rule_symbol)
|
37
|
+
end
|
38
|
+
@context.errors.clear_errors if eos?
|
39
|
+
return ret
|
40
|
+
end
|
41
|
+
|
42
|
+
def eos?
|
43
|
+
@context.scanner.eos?
|
44
|
+
end
|
45
|
+
|
46
|
+
def save_error(reason)
|
47
|
+
@context.errors.save_error(@context.scanner.pos, @context.rule_stack, reason)
|
48
|
+
end
|
49
|
+
|
50
|
+
def backtrack_position_to(pos)
|
51
|
+
@context.scanner.pos = pos
|
52
|
+
@context.borrowed_areas.backtracked(pos)
|
53
|
+
end
|
54
|
+
|
55
|
+
# parse error info
|
56
|
+
# @return [Array] array of meaningful errors. an element should be [[[line, char], parent reason], [[line, char], child reason]]
|
57
|
+
def best_errors
|
58
|
+
@context.errors.best_errors.map{|error|
|
59
|
+
error.map{|rule|
|
60
|
+
[ @context.line_counter.position(rule.pos), rule.reason ]
|
61
|
+
}
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
# Check whether matching will success or not.
|
66
|
+
# @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
|
67
|
+
# @return [String, Object] match result String (if you call with block, return is block's result)
|
68
|
+
def peek(str_or_regexp = nil, &block)
|
69
|
+
raise ArgumentError if str_or_regexp && block
|
70
|
+
raise ArgumentError if !str_or_regexp && !block
|
71
|
+
|
72
|
+
if block
|
73
|
+
bk_pos = @context.scanner.pos
|
74
|
+
ret = nil
|
75
|
+
catch(:backtrack) do
|
76
|
+
ret = block.call()
|
77
|
+
end
|
78
|
+
backtrack_position_to(bk_pos)
|
79
|
+
return ret
|
80
|
+
end
|
81
|
+
|
82
|
+
if str_or_regexp.is_a?(String)
|
83
|
+
if @context.scanner.match?(str_or_regexp)
|
84
|
+
@context.line_counter.memo(@context.scanner.pos, str_or_regexp)
|
85
|
+
if @context.borrowed_areas.conflicted_area(@context.scanner.pos + str_or_regexp.bytesize - 1)
|
86
|
+
return nil
|
87
|
+
end
|
88
|
+
return str_or_regexp
|
89
|
+
else
|
90
|
+
return nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
if str_or_regexp.is_a?(Regexp)
|
94
|
+
if (size = @context.scanner.match?(str_or_regexp))
|
95
|
+
str = @context.scanner.peek(size)
|
96
|
+
@context.line_counter.memo(@context.scanner.pos, str)
|
97
|
+
if @context.borrowed_areas.conflicted_area(@context.scanner.pos + size - 1)
|
98
|
+
return nil
|
99
|
+
end
|
100
|
+
return str
|
101
|
+
end
|
102
|
+
return nil
|
103
|
+
end
|
104
|
+
raise ArgumentError
|
105
|
+
end
|
106
|
+
|
107
|
+
# Match with pattern. Backtrack if match failed.
|
108
|
+
# @param str_or_regexp [String, Regexp] matching
|
109
|
+
# @return [String] match result
|
110
|
+
def read(str_or_regexp)
|
111
|
+
raise ArgumentError unless str_or_regexp
|
112
|
+
ret = peek(str_or_regexp)
|
113
|
+
if ret
|
114
|
+
@context.scanner.pos += ret.bytesize
|
115
|
+
return ret
|
116
|
+
end
|
117
|
+
save_error(str_or_regexp)
|
118
|
+
backtrack()
|
119
|
+
end
|
120
|
+
|
121
|
+
# Match with pattern or block. Returns nil if match failed.
|
122
|
+
# @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
|
123
|
+
# @return [String, Object] match result
|
124
|
+
def optional(str_or_regexp = nil, &block)
|
125
|
+
raise ArgumentError if str_or_regexp && block
|
126
|
+
raise ArgumentError if !str_or_regexp && !block
|
127
|
+
|
128
|
+
if block
|
129
|
+
bk_pos = @context.scanner.pos
|
130
|
+
ret = nil
|
131
|
+
catch(:backtrack) do
|
132
|
+
@context.rule_stack.push [@context.scanner.pos, :optional]
|
133
|
+
ret = block.call()
|
134
|
+
return ret
|
135
|
+
ensure
|
136
|
+
@context.rule_stack.pop
|
137
|
+
end
|
138
|
+
backtrack_position_to(bk_pos)
|
139
|
+
return nil
|
140
|
+
end
|
141
|
+
|
142
|
+
ret = peek(str_or_regexp)
|
143
|
+
@context.scanner.pos += ret.bytesize if ret
|
144
|
+
return ret
|
145
|
+
end
|
146
|
+
|
147
|
+
def backtrack()
|
148
|
+
throw :backtrack
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.wrap_with_trace_method(method_sym)
|
152
|
+
original_method_sym = ('original_' + method_sym.to_s).to_sym
|
153
|
+
unless self.method_defined?(original_method_sym)
|
154
|
+
self.alias_method original_method_sym, method_sym
|
155
|
+
self.define_method(method_sym) do |*args|
|
156
|
+
@context.rule_stack.push [@context.scanner.pos, method_sym]
|
157
|
+
ret = self.__send__(original_method_sym, *args)
|
158
|
+
return ret
|
159
|
+
ensure
|
160
|
+
@context.rule_stack.pop
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Wrap method as nonterminal symbol rule.
|
166
|
+
# @param method_sym [Symbol] wrapping method symbol
|
167
|
+
# @return [Symbol]
|
168
|
+
def self.rule(method_sym)
|
169
|
+
self.wrap_with_trace_method(method_sym)
|
170
|
+
method_sym
|
171
|
+
end
|
172
|
+
|
173
|
+
###
|
174
|
+
|
175
|
+
# Try to match some candidates in order. (PEG's choice operator) Backtrack if all match failed.
|
176
|
+
# @param alter_procs [Array<Proc>] match candidates
|
177
|
+
# @return [Object] result of the matched candidate
|
178
|
+
def choice(*alter_procs)
|
179
|
+
alter_procs.each do |alter_proc|
|
180
|
+
ret = optional{ alter_proc.call() }
|
181
|
+
return ret if ret
|
182
|
+
end
|
183
|
+
backtrack()
|
184
|
+
end
|
185
|
+
|
186
|
+
# Try to match in loop. Returns [] even no loop succeeded.
|
187
|
+
# @return [Array<Object>] array of match results for each loop
|
188
|
+
def zero_or_more(&block)
|
189
|
+
ret = []
|
190
|
+
while true
|
191
|
+
val = optional { block.call() }
|
192
|
+
break unless val
|
193
|
+
ret << val
|
194
|
+
end
|
195
|
+
return ret
|
196
|
+
end
|
197
|
+
|
198
|
+
# Try to match in loop. Backtrack if no loop succeeded.
|
199
|
+
# @return [Array<Object>] array of match results for each loop
|
200
|
+
def one_or_more(&block)
|
201
|
+
ret = [block.call()]
|
202
|
+
while true
|
203
|
+
val = optional { block.call() }
|
204
|
+
break unless val
|
205
|
+
ret << val
|
206
|
+
end
|
207
|
+
return ret
|
208
|
+
end
|
209
|
+
|
210
|
+
# Temporarily change scanner position to next line(use for here-document)
|
211
|
+
# area consumed by block becomes non-matchable().
|
212
|
+
def borrow_next_line(&block)
|
213
|
+
mark_pos = @context.scanner.pos
|
214
|
+
if @context.borrowed_areas.borrowed_area_end_pos
|
215
|
+
borrowed_start_pos = @context.borrowed_areas.borrowed_area_end_pos
|
216
|
+
else
|
217
|
+
read(/.*\n/)
|
218
|
+
borrowed_start_pos = @context.scanner.pos
|
219
|
+
end
|
220
|
+
@context.scanner.pos = borrowed_start_pos
|
221
|
+
ret = block.call
|
222
|
+
borrowed_end_pos = @context.scanner.pos
|
223
|
+
@context.scanner.pos = mark_pos
|
224
|
+
@context.borrowed_areas.add_area(Pegparse::BorrowedArea.new(
|
225
|
+
marker_pos: mark_pos,
|
226
|
+
start_pos: borrowed_start_pos,
|
227
|
+
end_pos: borrowed_end_pos,
|
228
|
+
))
|
229
|
+
return ret
|
230
|
+
end
|
231
|
+
|
232
|
+
# match to borrowed area
|
233
|
+
def borrowed_area
|
234
|
+
if area = @context.borrowed_areas.conflicted_area(@context.scanner.pos)
|
235
|
+
if area.start_pos == @context.scanner.pos
|
236
|
+
ret = @context.scanner.peek(area.end_pos - area.start_pos)
|
237
|
+
@context.scanner.pos = area.end_pos
|
238
|
+
return ret
|
239
|
+
end
|
240
|
+
end
|
241
|
+
backtrack()
|
242
|
+
end
|
243
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
|
2
|
+
module Pegparse
|
3
|
+
ParserError = Struct.new(
|
4
|
+
:pos,
|
5
|
+
:reason,
|
6
|
+
keyword_init: true,
|
7
|
+
)
|
8
|
+
|
9
|
+
ParseErrorLocation = Struct.new(
|
10
|
+
:index_in_errors,
|
11
|
+
:start_positions_of_optional,
|
12
|
+
keyword_init: true,
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
class Pegparse::ParserErrors
|
17
|
+
def initialize
|
18
|
+
@farthest_pos = 0
|
19
|
+
@farthest_errors = []
|
20
|
+
end
|
21
|
+
|
22
|
+
# just save parsing error
|
23
|
+
# @params pos [Integer]
|
24
|
+
# @params rule_stack [Array] array of [matching start pos, matching symbol]
|
25
|
+
# @params reason [Object]
|
26
|
+
def save_error(pos, rule_stack, reason)
|
27
|
+
return if pos < @farthest_pos
|
28
|
+
if pos > @farthest_pos
|
29
|
+
@farthest_errors.clear
|
30
|
+
end
|
31
|
+
@farthest_pos = pos
|
32
|
+
copy_stack = rule_stack.map{|pos, reason| Pegparse::ParserError.new(pos: pos, reason: reason) }
|
33
|
+
copy_stack << Pegparse::ParserError.new(pos: pos, reason: reason)
|
34
|
+
@farthest_errors << copy_stack
|
35
|
+
end
|
36
|
+
|
37
|
+
# compare two errors which is better by parsing descent path
|
38
|
+
# (optional matching starts more earlier, priority becomes lower)
|
39
|
+
# @params a [Pegparse::ParseErrorLocation]
|
40
|
+
# @params b [Pegparse::ParseErrorLocation]
|
41
|
+
def compare_optional_memo(a, b)
|
42
|
+
a_opts = a.start_positions_of_optional
|
43
|
+
b_opts = b.start_positions_of_optional
|
44
|
+
|
45
|
+
for i in 0...[a_opts.size, b_opts.size].min
|
46
|
+
if a_opts[i] > b_opts[i]
|
47
|
+
return -1
|
48
|
+
end
|
49
|
+
if a_opts[i] < b_opts[i]
|
50
|
+
return 1
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return a_opts.size <=> b_opts.size
|
54
|
+
end
|
55
|
+
|
56
|
+
# get meaningful errors
|
57
|
+
# @return [Array<Pegparse::ParseError>]
|
58
|
+
def best_errors
|
59
|
+
optional_memos = @farthest_errors.map.with_index do |stack, index|
|
60
|
+
Pegparse::ParseErrorLocation.new(
|
61
|
+
index_in_errors: index,
|
62
|
+
start_positions_of_optional: stack.select{|rule| rule.reason == :optional}.map{|rule| rule.pos}
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
best_memos = []
|
67
|
+
optional_memos.each do |memo|
|
68
|
+
if best_memos.empty?
|
69
|
+
best_memos << memo
|
70
|
+
next
|
71
|
+
end
|
72
|
+
cmp = compare_optional_memo(memo, best_memos[0])
|
73
|
+
if cmp < 0
|
74
|
+
best_memos = [memo]
|
75
|
+
next
|
76
|
+
elsif cmp == 0
|
77
|
+
best_memos << memo
|
78
|
+
next
|
79
|
+
else
|
80
|
+
next
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
bests = best_memos.map{|x| @farthest_errors[x.index_in_errors]}
|
85
|
+
|
86
|
+
result = bests.map do |stack|
|
87
|
+
stack.select{|x| x.reason != :optional}.last(2)
|
88
|
+
end
|
89
|
+
result
|
90
|
+
end
|
91
|
+
|
92
|
+
# remove all stored errors
|
93
|
+
def clear_errors
|
94
|
+
@farthest_pos = 0
|
95
|
+
@farthest_errors = []
|
96
|
+
end
|
97
|
+
end
|
data/lib/pegparse.rb
ADDED
data/pegparse.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/pegparse/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "pegparse"
|
7
|
+
spec.version = Pegparse::VERSION
|
8
|
+
spec.authors = ["Riki Ishikawa"]
|
9
|
+
spec.email = ["riki.ishikawa@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "library to create recursive descent parser."
|
12
|
+
spec.description = "provide base class for PEG like recursive descent parser."
|
13
|
+
spec.homepage = "https://github.com/jljse/pegparse"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 3.0.0"
|
16
|
+
|
17
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'https://mygemserver.com'"
|
18
|
+
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
20
|
+
spec.metadata["source_code_uri"] = "https://github.com/jljse/pegparse"
|
21
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
22
|
+
|
23
|
+
# Specify which files should be added to the gem when it is released.
|
24
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
25
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
26
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
27
|
+
end
|
28
|
+
spec.bindir = "exe"
|
29
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ["lib"]
|
31
|
+
|
32
|
+
# Uncomment to register a new dependency of your gem
|
33
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
34
|
+
|
35
|
+
# For more information and examples about making a new gem, checkout our
|
36
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
37
|
+
end
|