pegparse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ require_relative "parser_context"
2
+
3
+ # Parser base class (core mechanism for backtracking)
4
+ class Pegparse::ParserCore
5
+ # start rule symbol used by parse()
6
+ # @return [Symbol]
7
+ attr_accessor :start_rule_symbol
8
+
9
+ # @param scanner_or_context [StringScanner,Pegparse::ParserContext]
10
+ def initialize(scanner_or_context)
11
+ init_context(scanner_or_context) if scanner_or_context
12
+ end
13
+
14
+ # initialize inner state
15
+ def init_context(scanner_or_context)
16
+ if scanner_or_context.is_a? Pegparse::ParserContext
17
+ @context = scanner_or_context
18
+ else
19
+ @context = Pegparse::ParserContext.new(scanner_or_context)
20
+ end
21
+ end
22
+
23
+ # Start parse
24
+ # @param scanner_or_context [StringScanner,Pegparse::ParserContext]
25
+ # @param rule [Symbol]
26
+ # @return [Object] match result
27
+ def parse(scanner_or_context = nil, rule: nil)
28
+ raise ArgumentError if !scanner_or_context && !@context
29
+ raise ArgumentError if !rule && !@start_rule_symbol
30
+
31
+ init_context(scanner_or_context) if scanner_or_context
32
+ current_start_rule_symbol = rule || @start_rule_symbol
33
+
34
+ ret = nil
35
+ catch(:backtrack) do
36
+ ret = __send__(current_start_rule_symbol)
37
+ end
38
+ @context.errors.clear_errors if eos?
39
+ return ret
40
+ end
41
+
42
+ def eos?
43
+ @context.scanner.eos?
44
+ end
45
+
46
+ def save_error(reason)
47
+ @context.errors.save_error(@context.scanner.pos, @context.rule_stack, reason)
48
+ end
49
+
50
+ def backtrack_position_to(pos)
51
+ @context.scanner.pos = pos
52
+ @context.borrowed_areas.backtracked(pos)
53
+ end
54
+
55
+ # parse error info
56
+ # @return [Array] array of meaningful errors. an element should be [[[line, char], parent reason], [[line, char], child reason]]
57
+ def best_errors
58
+ @context.errors.best_errors.map{|error|
59
+ error.map{|rule|
60
+ [ @context.line_counter.position(rule.pos), rule.reason ]
61
+ }
62
+ }
63
+ end
64
+
65
+ # Check whether matching will success or not.
66
+ # @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
67
+ # @return [String, Object] match result String (if you call with block, return is block's result)
68
+ def peek(str_or_regexp = nil, &block)
69
+ raise ArgumentError if str_or_regexp && block
70
+ raise ArgumentError if !str_or_regexp && !block
71
+
72
+ if block
73
+ bk_pos = @context.scanner.pos
74
+ ret = nil
75
+ catch(:backtrack) do
76
+ ret = block.call()
77
+ end
78
+ backtrack_position_to(bk_pos)
79
+ return ret
80
+ end
81
+
82
+ if str_or_regexp.is_a?(String)
83
+ if @context.scanner.match?(str_or_regexp)
84
+ @context.line_counter.memo(@context.scanner.pos, str_or_regexp)
85
+ if @context.borrowed_areas.conflicted_area(@context.scanner.pos + str_or_regexp.bytesize - 1)
86
+ return nil
87
+ end
88
+ return str_or_regexp
89
+ else
90
+ return nil
91
+ end
92
+ end
93
+ if str_or_regexp.is_a?(Regexp)
94
+ if (size = @context.scanner.match?(str_or_regexp))
95
+ str = @context.scanner.peek(size)
96
+ @context.line_counter.memo(@context.scanner.pos, str)
97
+ if @context.borrowed_areas.conflicted_area(@context.scanner.pos + size - 1)
98
+ return nil
99
+ end
100
+ return str
101
+ end
102
+ return nil
103
+ end
104
+ raise ArgumentError
105
+ end
106
+
107
+ # Match with pattern. Backtrack if match failed.
108
+ # @param str_or_regexp [String, Regexp] matching
109
+ # @return [String] match result
110
+ def read(str_or_regexp)
111
+ raise ArgumentError unless str_or_regexp
112
+ ret = peek(str_or_regexp)
113
+ if ret
114
+ @context.scanner.pos += ret.bytesize
115
+ return ret
116
+ end
117
+ save_error(str_or_regexp)
118
+ backtrack()
119
+ end
120
+
121
+ # Match with pattern or block. Returns nil if match failed.
122
+ # @param str_or_regexp [String, Regexp, nil] matching (if nil, block will be used)
123
+ # @return [String, Object] match result
124
+ def optional(str_or_regexp = nil, &block)
125
+ raise ArgumentError if str_or_regexp && block
126
+ raise ArgumentError if !str_or_regexp && !block
127
+
128
+ if block
129
+ bk_pos = @context.scanner.pos
130
+ ret = nil
131
+ catch(:backtrack) do
132
+ @context.rule_stack.push [@context.scanner.pos, :optional]
133
+ ret = block.call()
134
+ return ret
135
+ ensure
136
+ @context.rule_stack.pop
137
+ end
138
+ backtrack_position_to(bk_pos)
139
+ return nil
140
+ end
141
+
142
+ ret = peek(str_or_regexp)
143
+ @context.scanner.pos += ret.bytesize if ret
144
+ return ret
145
+ end
146
+
147
+ def backtrack()
148
+ throw :backtrack
149
+ end
150
+
151
+ def self.wrap_with_trace_method(method_sym)
152
+ original_method_sym = ('original_' + method_sym.to_s).to_sym
153
+ unless self.method_defined?(original_method_sym)
154
+ self.alias_method original_method_sym, method_sym
155
+ self.define_method(method_sym) do |*args|
156
+ @context.rule_stack.push [@context.scanner.pos, method_sym]
157
+ ret = self.__send__(original_method_sym, *args)
158
+ return ret
159
+ ensure
160
+ @context.rule_stack.pop
161
+ end
162
+ end
163
+ end
164
+
165
+ # Wrap method as nonterminal symbol rule.
166
+ # @param method_sym [Symbol] wrapping method symbol
167
+ # @return [Symbol]
168
+ def self.rule(method_sym)
169
+ self.wrap_with_trace_method(method_sym)
170
+ method_sym
171
+ end
172
+
173
+ ###
174
+
175
+ # Try to match some candidates in order. (PEG's choice operator) Backtrack if all match failed.
176
+ # @param alter_procs [Array<Proc>] match candidates
177
+ # @return [Object] result of the matched candidate
178
+ def choice(*alter_procs)
179
+ alter_procs.each do |alter_proc|
180
+ ret = optional{ alter_proc.call() }
181
+ return ret if ret
182
+ end
183
+ backtrack()
184
+ end
185
+
186
+ # Try to match in loop. Returns [] even no loop succeeded.
187
+ # @return [Array<Object>] array of match results for each loop
188
+ def zero_or_more(&block)
189
+ ret = []
190
+ while true
191
+ val = optional { block.call() }
192
+ break unless val
193
+ ret << val
194
+ end
195
+ return ret
196
+ end
197
+
198
+ # Try to match in loop. Backtrack if no loop succeeded.
199
+ # @return [Array<Object>] array of match results for each loop
200
+ def one_or_more(&block)
201
+ ret = [block.call()]
202
+ while true
203
+ val = optional { block.call() }
204
+ break unless val
205
+ ret << val
206
+ end
207
+ return ret
208
+ end
209
+
210
+ # Temporarily change scanner position to next line(use for here-document)
211
+ # area consumed by block becomes non-matchable().
212
+ def borrow_next_line(&block)
213
+ mark_pos = @context.scanner.pos
214
+ if @context.borrowed_areas.borrowed_area_end_pos
215
+ borrowed_start_pos = @context.borrowed_areas.borrowed_area_end_pos
216
+ else
217
+ read(/.*\n/)
218
+ borrowed_start_pos = @context.scanner.pos
219
+ end
220
+ @context.scanner.pos = borrowed_start_pos
221
+ ret = block.call
222
+ borrowed_end_pos = @context.scanner.pos
223
+ @context.scanner.pos = mark_pos
224
+ @context.borrowed_areas.add_area(Pegparse::BorrowedArea.new(
225
+ marker_pos: mark_pos,
226
+ start_pos: borrowed_start_pos,
227
+ end_pos: borrowed_end_pos,
228
+ ))
229
+ return ret
230
+ end
231
+
232
+ # match to borrowed area
233
+ def borrowed_area
234
+ if area = @context.borrowed_areas.conflicted_area(@context.scanner.pos)
235
+ if area.start_pos == @context.scanner.pos
236
+ ret = @context.scanner.peek(area.end_pos - area.start_pos)
237
+ @context.scanner.pos = area.end_pos
238
+ return ret
239
+ end
240
+ end
241
+ backtrack()
242
+ end
243
+ end
@@ -0,0 +1,97 @@
1
+
2
+ module Pegparse
3
+ ParserError = Struct.new(
4
+ :pos,
5
+ :reason,
6
+ keyword_init: true,
7
+ )
8
+
9
+ ParseErrorLocation = Struct.new(
10
+ :index_in_errors,
11
+ :start_positions_of_optional,
12
+ keyword_init: true,
13
+ )
14
+ end
15
+
16
+ class Pegparse::ParserErrors
17
+ def initialize
18
+ @farthest_pos = 0
19
+ @farthest_errors = []
20
+ end
21
+
22
+ # just save parsing error
23
+ # @params pos [Integer]
24
+ # @params rule_stack [Array] array of [matching start pos, matching symbol]
25
+ # @params reason [Object]
26
+ def save_error(pos, rule_stack, reason)
27
+ return if pos < @farthest_pos
28
+ if pos > @farthest_pos
29
+ @farthest_errors.clear
30
+ end
31
+ @farthest_pos = pos
32
+ copy_stack = rule_stack.map{|pos, reason| Pegparse::ParserError.new(pos: pos, reason: reason) }
33
+ copy_stack << Pegparse::ParserError.new(pos: pos, reason: reason)
34
+ @farthest_errors << copy_stack
35
+ end
36
+
37
+ # compare two errors which is better by parsing descent path
38
+ # (optional matching starts more earlier, priority becomes lower)
39
+ # @params a [Pegparse::ParseErrorLocation]
40
+ # @params b [Pegparse::ParseErrorLocation]
41
+ def compare_optional_memo(a, b)
42
+ a_opts = a.start_positions_of_optional
43
+ b_opts = b.start_positions_of_optional
44
+
45
+ for i in 0...[a_opts.size, b_opts.size].min
46
+ if a_opts[i] > b_opts[i]
47
+ return -1
48
+ end
49
+ if a_opts[i] < b_opts[i]
50
+ return 1
51
+ end
52
+ end
53
+ return a_opts.size <=> b_opts.size
54
+ end
55
+
56
+ # get meaningful errors
57
+ # @return [Array<Pegparse::ParseError>]
58
+ def best_errors
59
+ optional_memos = @farthest_errors.map.with_index do |stack, index|
60
+ Pegparse::ParseErrorLocation.new(
61
+ index_in_errors: index,
62
+ start_positions_of_optional: stack.select{|rule| rule.reason == :optional}.map{|rule| rule.pos}
63
+ )
64
+ end
65
+
66
+ best_memos = []
67
+ optional_memos.each do |memo|
68
+ if best_memos.empty?
69
+ best_memos << memo
70
+ next
71
+ end
72
+ cmp = compare_optional_memo(memo, best_memos[0])
73
+ if cmp < 0
74
+ best_memos = [memo]
75
+ next
76
+ elsif cmp == 0
77
+ best_memos << memo
78
+ next
79
+ else
80
+ next
81
+ end
82
+ end
83
+
84
+ bests = best_memos.map{|x| @farthest_errors[x.index_in_errors]}
85
+
86
+ result = bests.map do |stack|
87
+ stack.select{|x| x.reason != :optional}.last(2)
88
+ end
89
+ result
90
+ end
91
+
92
+ # remove all stored errors
93
+ def clear_errors
94
+ @farthest_pos = 0
95
+ @farthest_errors = []
96
+ end
97
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pegparse
4
+ VERSION = "0.1.0"
5
+ end
data/lib/pegparse.rb ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pegparse/version"
4
+ require_relative "pegparse/parser_base"
5
+ require_relative "pegparse/biop_rule_chain"
6
+
7
+ module Pegparse
8
+ class Error < StandardError; end
9
+ end
data/pegparse.gemspec ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/pegparse/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "pegparse"
7
+ spec.version = Pegparse::VERSION
8
+ spec.authors = ["Riki Ishikawa"]
9
+ spec.email = ["riki.ishikawa@gmail.com"]
10
+
11
+ spec.summary = "library to create recursive descent parser."
12
+ spec.description = "provide base class for PEG like recursive descent parser."
13
+ spec.homepage = "https://github.com/jljse/pegparse"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.0.0"
16
+
17
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'https://mygemserver.com'"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = "https://github.com/jljse/pegparse"
21
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ # Uncomment to register a new dependency of your gem
33
+ # spec.add_dependency "example-gem", "~> 1.0"
34
+
35
+ # For more information and examples about making a new gem, checkout our
36
+ # guide at: https://bundler.io/guides/creating_gem.html
37
+ end