pegex 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+ require 'pegex/input'
2
+
3
+ $pegex_nil = []
4
+ $dummy = [1]
5
+
6
+ class Pegex::Parser
7
+ attr_accessor :grammar
8
+ attr_accessor :receiver
9
+ attr_accessor :parent
10
+ attr_accessor :rule
11
+ attr_accessor :debug
12
+
13
+ def initialize
14
+ @position = 0
15
+ @farthest = 0
16
+ @optimized = false
17
+ @debug = false
18
+ @throw_on_error = true
19
+ # @debug = true
20
+ yield self if block_given?
21
+ end
22
+
23
+ def parse input, start=nil
24
+ @position = 0
25
+ if input.kind_of? String
26
+ input = Pegex::Input.new do |i|
27
+ i.string = input
28
+ end
29
+ end
30
+ @input = input
31
+ @input.open unless @input.open?
32
+ @buffer = @input.read
33
+ @length = @buffer.length
34
+
35
+ fail "No 'grammar'. Can't parse" unless @grammar
36
+ @tree = @grammar.tree ||= @grammar.make_tree
37
+
38
+ start_rule_ref = start ||
39
+ @tree['+toprule'] ||
40
+ (@tree['TOP'] ? 'TOP' : nil) or
41
+ fail "No starting rule for Pegex::Parser::parse"
42
+
43
+ optimize_grammar start_rule_ref
44
+
45
+ fail "No 'receiver'. Can't parse" unless @receiver
46
+
47
+ # XXX does ruby have problems with circulat references
48
+ @receiver.parser = self
49
+
50
+ if @receiver.respond_to? 'initial'
51
+ @rule, @parent = $start_rule_ref, {}
52
+ end
53
+
54
+ match = match_ref start_rule_ref, {}
55
+
56
+ @input.close
57
+
58
+ if !match or @position < @length
59
+ throw_error "Parse document failed for some reason"
60
+ return
61
+ end
62
+
63
+ if @receiver.respond_to? 'final'
64
+ @rule, @parent = start_rule_ref, {}
65
+ match = [ @receiver.final(match.first) ]
66
+ end
67
+
68
+ return match.first
69
+ end
70
+
71
+ def optimize_grammar start
72
+ return if @optimized
73
+ @tree.each_pair do |name, node|
74
+ next if node.kind_of? String
75
+ optimize_node node
76
+ end
77
+ optimize_node '.ref' => start
78
+ @optimized = true
79
+ end
80
+
81
+ def optimize_node node
82
+ ['ref', 'rgx', 'all', 'any', 'err', 'code', 'xxx'].each do |kind|
83
+ fail if kind == 'xxx'
84
+ if node['rule'] = node[".#{kind}"]
85
+ node['kind'] = kind
86
+ node['method'] = self.method "match_#{kind}"
87
+ break
88
+ end
89
+ end
90
+ min, max = node.values_at '+min', '+max'
91
+ node['+min'] ||= max == nil ? 1 : 0
92
+ node['+max'] ||= min == nil ? 1 : 0
93
+ node['+asr'] ||= nil
94
+ node['+min'] = node['+min'].to_i
95
+ node['+max'] = node['+max'].to_i
96
+
97
+ if ['any', 'all'].include? node['kind']
98
+ node['rule'].each do |elem|
99
+ optimize_node elem
100
+ end
101
+ elsif node['kind'] == 'ref'
102
+ ref = node['rule']
103
+ rule = @tree[ref]
104
+ if @receiver.respond_to? "got_#{ref}"
105
+ rule['action'] = receiver.method "got_#{ref}"
106
+ elsif receiver.respond_to? 'gotrule'
107
+ rule['action'] = receiver.method 'gotrule'
108
+ end
109
+ node['method'] = self.method 'match_ref_trace' if @debug
110
+ elsif node['kind'] == 'rgx'
111
+ node['rule'] = Regexp.new "\\A#{node['.rgx']}"
112
+ end
113
+ if sep = node['.sep']
114
+ optimize_node sep
115
+ end
116
+ end
117
+
118
+ def match_next next_
119
+ return match_next_with_sep next_ if next_['.sep']
120
+
121
+ rule, method, kind, min, max, assertion =
122
+ next_.values_at 'rule', 'method', 'kind', '+min', '+max', '+asr'
123
+
124
+ position, match, count = @position, [], 0
125
+
126
+ while return_ = method.call(rule, next_)
127
+ position = @position unless assertion
128
+ count += 1
129
+ match.concat return_ unless return_.equal? $pegex_nil
130
+ break if max == 1
131
+ end
132
+ if max != 1
133
+ match = [match]
134
+ @farthest = position if (@position = position) > @farthest
135
+ end
136
+ result = (count >= min and (max == 0 or count <= max)) ^ (assertion == -1)
137
+ if not result or assertion
138
+ @farthest = position if (@position = position) > @farthest
139
+ end
140
+
141
+ return result ? next_['-skip'] ? [] : match : false
142
+ end
143
+
144
+ def match_next_with_sep next_
145
+ rule, method, kind, min, max, sep =
146
+ next_.values_at 'rule', 'method', 'kind', '+min', '+max', '.sep'
147
+
148
+ position, match, count, scount, smin, smax =
149
+ @position, [], 0, 0, sep.values_at('+min', '+max')
150
+
151
+ while return_ = method.call(rule, next_)
152
+ position = @position
153
+ count += 1
154
+ match.concat return_
155
+ return_ = match_next(sep) or break
156
+ match.concat return_
157
+ scount += 1
158
+ end
159
+ if max != 1
160
+ match = [match]
161
+ end
162
+ result = count >= min and (max == 0 or count <= max)
163
+ if count == scount and not sep['+eok']
164
+ @farthest = position if (@position = position) > @farthest
165
+ end
166
+
167
+ return result ? next_['-skip'] ? [] : match : false
168
+ end
169
+
170
+ def match_ref ref, parent
171
+ rule = @tree[ref]
172
+ match = match_next(rule) or return false
173
+ return $dummy unless rule['action']
174
+ @rule, @parent = ref, parent
175
+ result = rule['action'].call(match.first)
176
+ return (result.equal? $pegex_nil) ? result : [result]
177
+ end
178
+
179
+ def match_rgx regexp, parent=nil
180
+ position = @position
181
+ string = @buffer[position .. -1]
182
+ (m = string.match regexp) or return false
183
+ position += m[0].length
184
+ match = m[1..-1]
185
+ match = [ match ] if m.length > 2
186
+ @farthest = position if (@position = position) > @farthest
187
+ return match
188
+ end
189
+
190
+ def match_all list, parent=nil
191
+ position, set, len = @position, [], 0
192
+ list.each do |elem|
193
+ if match = match_next(elem)
194
+ if !elem['+asr'] and !elem['-skip']
195
+ set.concat match
196
+ len += 1
197
+ end
198
+ else
199
+ @farthest = position if (@position = position) > @farthest
200
+ return false
201
+ end
202
+ end
203
+ set = [set] if len > 1
204
+ return set
205
+ end
206
+
207
+ def match_any list, parent=nil
208
+ list.each do |elem|
209
+ if (match = match_next elem)
210
+ return match
211
+ end
212
+ end
213
+ return false
214
+ end
215
+
216
+ def match_err error, parent=nil
217
+ throw_error error
218
+ end
219
+
220
+ def match_ref_trace ref, parent
221
+ rule = @tree[ref]
222
+ trace_on = ! rule['+asr']
223
+ trace "try_#{ref}" if trace_on
224
+ result = nil
225
+ if (result = match_ref ref, parent)
226
+ trace "got_#{ref}" if trace_on
227
+ else
228
+ trace "not_#{ref}" if trace_on
229
+ end
230
+ return result
231
+ end
232
+
233
+ def trace action
234
+ indent = !!action.match(/^try_/)
235
+ @indent ||= 0
236
+ @indent -= 1 unless indent
237
+ $stderr.print ' ' * @indent
238
+ @indent += 1 if indent
239
+ snippet = @buffer[@position..-1]
240
+ snippet = snippet[0..30] + '...' if snippet.length > 30;
241
+ snippet.gsub! /\n/, "\\n"
242
+ $stderr.printf "%-30s", action
243
+ $stderr.print indent ? " >#{snippet}<\n" : "\n"
244
+ end
245
+
246
+ def throw_error msg
247
+ raise msg
248
+ end
249
+
250
+ class PegexParseError < RuntimeError
251
+
252
+ end
253
+
254
+ def throw_error msg
255
+ @error = format_error msg
256
+ return nil unless @throw_on_error
257
+ raise PegexParseError, @error
258
+ end
259
+
260
+ def format_error msg
261
+ buffer = @buffer
262
+ position = @farthest
263
+ real_pos = @position
264
+
265
+ line = buffer[0, position].scan(/\n/).size + 1
266
+ column = position - (buffer.rindex("\n", position) || -1)
267
+
268
+ pretext = @buffer[
269
+ position < 50 ? 0 : position - 50,
270
+ position < 50 ? position : 50
271
+ ]
272
+ context = @buffer[position, 50]
273
+ pretext.gsub! /.*\n/m, ''
274
+ context.gsub! /\n/, "\\n"
275
+
276
+ return <<"..."
277
+ Error parsing Pegex document:
278
+ msg: #{msg}
279
+ line: #{line}
280
+ column: #{column}
281
+ context: #{pretext}#{context}
282
+ #{' ' * (pretext.length + 10)}^
283
+ position: #{position} (#{real_pos} pre-lookahead)
284
+ ...
285
+ end
286
+
287
+ end
@@ -0,0 +1,148 @@
1
+ require 'pegex/tree'
2
+ require 'pegex/grammar/atoms'
3
+
4
+ class Pegex::Pegex::AST < Pegex::Tree
5
+ def initialize
6
+ @atoms = Pegex::Grammar::Atoms.new.atoms
7
+ @extra_rules = {}
8
+ @prefixes = {
9
+ '!' => ['+asr', -1],
10
+ '=' => ['+asr', 1],
11
+ '.' => '-skip',
12
+ '-' => '-pass',
13
+ '+' => '-wrap',
14
+ }
15
+ end
16
+
17
+ def got_grammar got
18
+ meta_section, rule_section = got
19
+ grammar =
20
+ {'+toprule' => @toprule}.merge(@extra_rules).merge(meta_section)
21
+ rule_section.each do |rule|
22
+ key, value = rule.first
23
+ grammar[key] = value
24
+ end
25
+ return grammar
26
+ end
27
+
28
+ def got_meta_section got
29
+ meta = {}
30
+ got.each do |next_|
31
+ key, val = next_
32
+ key = "+#{key}"
33
+ old = meta[key]
34
+ if ! old.nil?
35
+ if old.kind_of? Array
36
+ old << val
37
+ else
38
+ meta[key] = [ old, val ]
39
+ end
40
+ else
41
+ meta[key] = val
42
+ end
43
+ end
44
+ return meta
45
+ end
46
+
47
+ def got_rule_definition got
48
+ name, value = got
49
+ @toprule = name if name == 'TOP'
50
+ @toprule ||= name
51
+ return { name => value }
52
+ end
53
+
54
+ def got_bracketed_group got
55
+ prefix, group, suffix = got
56
+ unless prefix.empty?
57
+ group[@prefixes[prefix]] = 1
58
+ end
59
+ unless suffix.empty?
60
+ set_quantity group, suffix
61
+ end
62
+ return group
63
+ end
64
+
65
+ def got_all_group got
66
+ list = get_group got
67
+ fail unless list.length > 0
68
+ return list.first if list.length == 1
69
+ return '.all' => list
70
+ end
71
+
72
+ def got_any_group got
73
+ list = get_group got
74
+ fail unless list.length > 0
75
+ return list.first if list.length == 1
76
+ return '.any' => list
77
+ end
78
+
79
+ def get_group group
80
+ return group.flatten
81
+ end
82
+
83
+ def got_rule_part got
84
+ rule, sep_op, sep_rule = got
85
+ if sep_rule
86
+ sep_rule['+eok'] = true if sep_op == '%%'
87
+ rule['.sep'] = sep_rule
88
+ end
89
+ return rule
90
+ end
91
+
92
+ def got_rule_reference got
93
+ prefix, ref1, ref2, suffix = got
94
+ ref = ref1 || ref2 # TODO: determine if ref1 is falsy enough
95
+ node = { '.ref' => ref }
96
+ if (regex = @atoms[ref])
97
+ @extra_rules[ref] = {'.rgx' => regex}
98
+ end
99
+ unless suffix.empty?
100
+ set_quantity node, suffix
101
+ end
102
+ unless prefix.empty?
103
+ if @prefixes[prefix].kind_of? Array
104
+ key, val = @prefixes[prefix]
105
+ else
106
+ key, val = @prefixes[prefix], 1
107
+ end
108
+ node[key] = val
109
+ end
110
+ return node
111
+ end
112
+
113
+ def got_regular_expression got
114
+ got.gsub! /\s*#.*\n/, ''
115
+ got.gsub! /\s+/, ''
116
+ got.gsub! /\ \:|\=|\!/, " ?#{$1}"
117
+ return {'.rgx' => got}
118
+ end
119
+
120
+ def got_whitespace_token got
121
+ return '.rgx' => "<ws#{got.length}>"
122
+ end
123
+
124
+ def got_error_message got
125
+ return '.err' => got
126
+ end
127
+
128
+ def set_quantity object, quantifier
129
+ case quantifier
130
+ when ?*
131
+ object['+min'] = 0
132
+ when ?+
133
+ object['+min'] = 1
134
+ when ??
135
+ object['+max'] = 1
136
+ when /^(\d+)\+$/
137
+ object['+min'] = $1
138
+ when /^(\d+)\-(\d+)+$/
139
+ object['+min'] = $1
140
+ object['+max'] = $2
141
+ when /^(\d+)$/
142
+ object['+min'] = $1
143
+ object['+max'] = $1
144
+ else
145
+ fail "Invalid quantifier: '#{quantifier}'"
146
+ end
147
+ end
148
+ end