pegparse 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,243 @@
1
+ require_relative "parser_context"
2
+
3
+ # Parser base class (core mechanism for backtracking)
4
+ class Pegparse::ParserCore
5
+ # start rule symbol used by parse()
6
+ # @return [Symbol]
7
+ attr_accessor :start_rule_symbol
8
+
9
+ # @param scanner_or_context [StringScanner,Pegparse::ParserContext]
10
+ def initialize(scanner_or_context)
11
+ init_context(scanner_or_context) if scanner_or_context
12
+ end
13
+
14
+ # initialize inner state
15
+ def init_context(scanner_or_context)
16
+ if scanner_or_context.is_a? Pegparse::ParserContext
17
+ @context = scanner_or_context
18
+ else
19
+ @context = Pegparse::ParserContext.new(scanner_or_context)
20
+ end
21
+ end
22
+
23
+ # Start parse
24
+ # @param scanner_or_context [StringScanner,Pegparse::ParserContext]
25
+ # @param rule [Symbol]
26
+ # @return [Object] match result
27
+ def parse(scanner_or_context = nil, rule: nil)
28
+ raise ArgumentError if !scanner_or_context && !@context
29
+ raise ArgumentError if !rule && !@start_rule_symbol
30
+
31
+ init_context(scanner_or_context) if scanner_or_context
32
+ current_start_rule_symbol = rule || @start_rule_symbol
33
+
34
+ ret = nil
35
+ catch(:backtrack) do
36
+ ret = __send__(current_start_rule_symbol)
37
+ end
38
+ @context.errors.clear_errors if eos?
39
+ return ret
40
+ end
41
+
42
+ def eos?
43
+ @context.scanner.eos?
44
+ end
45
+
46
+ def save_error(reason)
47
+ @context.errors.save_error(@context.scanner.pos, @context.rule_stack, reason)
48
+ end
49
+
50
+ def backtrack_position_to(pos)
51
+ @context.scanner.pos = pos
52
+ @context.borrowed_areas.backtracked(pos)
53
+ end
54
+
55
+ # parse error info
56
+ # @return [Array] array of meaningful errors. an element should be [[[line, char], parent reason], [[line, char], child reason]]
57
+ def best_errors
58
+ @context.errors.best_errors.map{|error|
59
+ error.map{|rule|
60
+ [ @context.line_counter.position(rule.pos), rule.reason ]
61
+ }
62
+ }
63
+ end
64
+
65
+ # Check whether matching will success or not.
66
+ # @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
67
+ # @return [String, Object] match result String (if you call with block, return is block's result)
68
+ def peek(str_or_regexp = nil, &block)
69
+ raise ArgumentError if str_or_regexp && block
70
+ raise ArgumentError if !str_or_regexp && !block
71
+
72
+ if block
73
+ bk_pos = @context.scanner.pos
74
+ ret = nil
75
+ catch(:backtrack) do
76
+ ret = block.call()
77
+ end
78
+ backtrack_position_to(bk_pos)
79
+ return ret
80
+ end
81
+
82
+ if str_or_regexp.is_a?(String)
83
+ if @context.scanner.match?(str_or_regexp)
84
+ @context.line_counter.memo(@context.scanner.pos, str_or_regexp)
85
+ if @context.borrowed_areas.conflicted_area(@context.scanner.pos + str_or_regexp.bytesize - 1)
86
+ return nil
87
+ end
88
+ return str_or_regexp
89
+ else
90
+ return nil
91
+ end
92
+ end
93
+ if str_or_regexp.is_a?(Regexp)
94
+ if (size = @context.scanner.match?(str_or_regexp))
95
+ str = @context.scanner.peek(size)
96
+ @context.line_counter.memo(@context.scanner.pos, str)
97
+ if @context.borrowed_areas.conflicted_area(@context.scanner.pos + size - 1)
98
+ return nil
99
+ end
100
+ return str
101
+ end
102
+ return nil
103
+ end
104
+ raise ArgumentError
105
+ end
106
+
107
+ # Match with pattern. Backtrack if match failed.
108
+ # @param str_or_regexp [String, Regexp] matching
109
+ # @return [String] match result
110
+ def read(str_or_regexp)
111
+ raise ArgumentError unless str_or_regexp
112
+ ret = peek(str_or_regexp)
113
+ if ret
114
+ @context.scanner.pos += ret.bytesize
115
+ return ret
116
+ end
117
+ save_error(str_or_regexp)
118
+ backtrack()
119
+ end
120
+
121
+ # Match with pattern or block. Returns nil if match failed.
122
+ # @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
123
+ # @return [String, Object] match result
124
+ def optional(str_or_regexp = nil, &block)
125
+ raise ArgumentError if str_or_regexp && block
126
+ raise ArgumentError if !str_or_regexp && !block
127
+
128
+ if block
129
+ bk_pos = @context.scanner.pos
130
+ ret = nil
131
+ catch(:backtrack) do
132
+ @context.rule_stack.push [@context.scanner.pos, :optional]
133
+ ret = block.call()
134
+ return ret
135
+ ensure
136
+ @context.rule_stack.pop
137
+ end
138
+ backtrack_position_to(bk_pos)
139
+ return nil
140
+ end
141
+
142
+ ret = peek(str_or_regexp)
143
+ @context.scanner.pos += ret.bytesize if ret
144
+ return ret
145
+ end
146
+
147
+ def backtrack()
148
+ throw :backtrack
149
+ end
150
+
151
+ def self.wrap_with_trace_method(method_sym)
152
+ original_method_sym = ('original_' + method_sym.to_s).to_sym
153
+ unless self.method_defined?(original_method_sym)
154
+ self.alias_method original_method_sym, method_sym
155
+ self.define_method(method_sym) do |*args|
156
+ @context.rule_stack.push [@context.scanner.pos, method_sym]
157
+ ret = self.__send__(original_method_sym, *args)
158
+ return ret
159
+ ensure
160
+ @context.rule_stack.pop
161
+ end
162
+ end
163
+ end
164
+
165
+ # Wrap method as nonterminal symbol rule.
166
+ # @param method_sym [Symbol] wrapping method symbol
167
+ # @return [Symbol]
168
+ def self.rule(method_sym)
169
+ self.wrap_with_trace_method(method_sym)
170
+ method_sym
171
+ end
172
+
173
+ ###
174
+
175
+ # Try to match some candidates in order. (PEG's choice operator) Backtrack if all match failed.
176
+ # @param alter_procs [Array<Proc>] match candidates
177
+ # @return [Object] result of the matched candidate
178
+ def choice(*alter_procs)
179
+ alter_procs.each do |alter_proc|
180
+ ret = optional{ alter_proc.call() }
181
+ return ret if ret
182
+ end
183
+ backtrack()
184
+ end
185
+
186
+ # Try to match in loop. Returns [] even no loop succeeded.
187
+ # @return [Array<Object>] array of match results for each loop
188
+ def zero_or_more(&block)
189
+ ret = []
190
+ while true
191
+ val = optional { block.call() }
192
+ break unless val
193
+ ret << val
194
+ end
195
+ return ret
196
+ end
197
+
198
+ # Try to match in loop. Backtrack if no loop succeeded.
199
+ # @return [Array<Object>] array of match results for each loop
200
+ def one_or_more(&block)
201
+ ret = [block.call()]
202
+ while true
203
+ val = optional { block.call() }
204
+ break unless val
205
+ ret << val
206
+ end
207
+ return ret
208
+ end
209
+
210
+ # Temporarily change scanner position to next line(use for here-document)
211
+ # area consumed by block becomes non-matchable().
212
+ def borrow_next_line(&block)
213
+ mark_pos = @context.scanner.pos
214
+ if @context.borrowed_areas.borrowed_area_end_pos
215
+ borrowed_start_pos = @context.borrowed_areas.borrowed_area_end_pos
216
+ else
217
+ read(/.*\n/)
218
+ borrowed_start_pos = @context.scanner.pos
219
+ end
220
+ @context.scanner.pos = borrowed_start_pos
221
+ ret = block.call
222
+ borrowed_end_pos = @context.scanner.pos
223
+ @context.scanner.pos = mark_pos
224
+ @context.borrowed_areas.add_area(Pegparse::BorrowedArea.new(
225
+ marker_pos: mark_pos,
226
+ start_pos: borrowed_start_pos,
227
+ end_pos: borrowed_end_pos,
228
+ ))
229
+ return ret
230
+ end
231
+
232
+ # match to borrowed area
233
+ def borrowed_area
234
+ if area = @context.borrowed_areas.conflicted_area(@context.scanner.pos)
235
+ if area.start_pos == @context.scanner.pos
236
+ ret = @context.scanner.peek(area.end_pos - area.start_pos)
237
+ @context.scanner.pos = area.end_pos
238
+ return ret
239
+ end
240
+ end
241
+ backtrack()
242
+ end
243
+ end
@@ -0,0 +1,97 @@
1
+
2
+ module Pegparse
3
+ ParserError = Struct.new(
4
+ :pos,
5
+ :reason,
6
+ keyword_init: true,
7
+ )
8
+
9
+ ParseErrorLocation = Struct.new(
10
+ :index_in_errors,
11
+ :start_positions_of_optional,
12
+ keyword_init: true,
13
+ )
14
+ end
15
+
16
+ class Pegparse::ParserErrors
17
+ def initialize
18
+ @farthest_pos = 0
19
+ @farthest_errors = []
20
+ end
21
+
22
+ # just save parsing error
23
+ # @params pos [Integer]
24
+ # @params rule_stack [Array] array of [matching start pos, matching symbol]
25
+ # @params reason [Object]
26
+ def save_error(pos, rule_stack, reason)
27
+ return if pos < @farthest_pos
28
+ if pos > @farthest_pos
29
+ @farthest_errors.clear
30
+ end
31
+ @farthest_pos = pos
32
+ copy_stack = rule_stack.map{|pos, reason| Pegparse::ParserError.new(pos: pos, reason: reason) }
33
+ copy_stack << Pegparse::ParserError.new(pos: pos, reason: reason)
34
+ @farthest_errors << copy_stack
35
+ end
36
+
37
+ # compare two errors which is better by parsing descent path
38
+ # (optional matching starts more earlier, priority becomes lower)
39
+ # @params a [Pegparse::ParseErrorLocation]
40
+ # @params b [Pegparse::ParseErrorLocation]
41
+ def compare_optional_memo(a, b)
42
+ a_opts = a.start_positions_of_optional
43
+ b_opts = b.start_positions_of_optional
44
+
45
+ for i in 0...[a_opts.size, b_opts.size].min
46
+ if a_opts[i] > b_opts[i]
47
+ return -1
48
+ end
49
+ if a_opts[i] < b_opts[i]
50
+ return 1
51
+ end
52
+ end
53
+ return a_opts.size <=> b_opts.size
54
+ end
55
+
56
+ # get meaningful errors
57
+ # @return [Array<Pegparse::ParseError>]
58
+ def best_errors
59
+ optional_memos = @farthest_errors.map.with_index do |stack, index|
60
+ Pegparse::ParseErrorLocation.new(
61
+ index_in_errors: index,
62
+ start_positions_of_optional: stack.select{|rule| rule.reason == :optional}.map{|rule| rule.pos}
63
+ )
64
+ end
65
+
66
+ best_memos = []
67
+ optional_memos.each do |memo|
68
+ if best_memos.empty?
69
+ best_memos << memo
70
+ next
71
+ end
72
+ cmp = compare_optional_memo(memo, best_memos[0])
73
+ if cmp < 0
74
+ best_memos = [memo]
75
+ next
76
+ elsif cmp == 0
77
+ best_memos << memo
78
+ next
79
+ else
80
+ next
81
+ end
82
+ end
83
+
84
+ bests = best_memos.map{|x| @farthest_errors[x.index_in_errors]}
85
+
86
+ result = bests.map do |stack|
87
+ stack.select{|x| x.reason != :optional}.last(2)
88
+ end
89
+ result
90
+ end
91
+
92
+ # remove all stored errors
93
+ def clear_errors
94
+ @farthest_pos = 0
95
+ @farthest_errors = []
96
+ end
97
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pegparse
4
+ VERSION = "0.1.0"
5
+ end
data/lib/pegparse.rb ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pegparse/version"
4
+ require_relative "pegparse/parser_base"
5
+ require_relative "pegparse/biop_rule_chain"
6
+
7
+ module Pegparse
8
+ class Error < StandardError; end
9
+ end
data/pegparse.gemspec ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/pegparse/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "pegparse"
7
+ spec.version = Pegparse::VERSION
8
+ spec.authors = ["Riki Ishikawa"]
9
+ spec.email = ["riki.ishikawa@gmail.com"]
10
+
11
+ spec.summary = "library to create recursive descent parser."
12
+ spec.description = "provide base class for PEG like recursive descent parser."
13
+ spec.homepage = "https://github.com/jljse/pegparse"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.0.0"
16
+
17
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'https://mygemserver.com'"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = "https://github.com/jljse/pegparse"
21
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ # Uncomment to register a new dependency of your gem
33
+ # spec.add_dependency "example-gem", "~> 1.0"
34
+
35
+ # For more information and examples about making a new gem, checkout our
36
+ # guide at: https://bundler.io/guides/creating_gem.html
37
+ end