kpeg 0.7

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in callisto.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2011, Evan Phoenix
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the <organization> nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+
@@ -0,0 +1,8 @@
1
+ KPeg
2
+ ====
3
+
4
+ KPeg is a simple PEG library for Ruby. It provides an API as well as native grammar to build the grammar.
5
+
6
+ KPeg strives to provide a simple, powerful API without being too exotic.
7
+
8
+ KPeg supports direct left recursion of rules via the [OMeta memoization](http://www.vpri.org/pdf/tr2008003_experimenting.pdf) trick.
@@ -0,0 +1,24 @@
1
+ require 'rake/testtask'
2
+
3
+ $:.unshift "lib"
4
+
5
+ task :default => :test
6
+
7
+ desc "Run tests"
8
+ Rake::TestTask.new do |t|
9
+ t.test_files = FileList['test/test*.rb']
10
+ t.verbose = true
11
+ end
12
+
13
+ task :grammar do
14
+ require 'kpeg'
15
+ require 'kpeg/format'
16
+ require 'kpeg/grammar_renderer'
17
+
18
+ gr = KPeg::GrammarRenderer.new(KPeg::FORMAT)
19
+ gr.render(STDOUT)
20
+ end
21
+
22
+ task :parser do
23
+ sh "ruby -Ilib bin/kpeg -o lib/kpeg/format_parser.rb -s -f lib/kpeg/format.kpeg"
24
+ end
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'kpeg'
4
+ require 'kpeg/code_generator'
5
+ require 'kpeg/format_parser'
6
+ require 'kpeg/grammar_renderer'
7
+
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |o|
12
+ o.banner = "Usage: kpeg [options]"
13
+
14
+ o.on("-t", "--test", "Syntax check the file only") do |v|
15
+ options[:test] = v
16
+ end
17
+
18
+ o.on("--reformat", "Reformat your grammar and write it back out") do
19
+ options[:reformat] = true
20
+ end
21
+
22
+ o.on("-o", "--output FILE", "Where the output should go") do |v|
23
+ options[:output] = v
24
+ end
25
+
26
+ o.on("-n", "--name NAME", "Class name to use for the parser") do |v|
27
+ options[:name] = v
28
+ end
29
+
30
+ o.on("-f", "--force", "Overwrite the output if it exists") do |v|
31
+ options[:force] = v
32
+ end
33
+
34
+ o.on("-s", "--stand-alone", "Write the parser to run standalone") do |v|
35
+ options[:standalone] = v
36
+ end
37
+
38
+ o.on("-v", "--[no-]verbose", "Run verbosely") do |v|
39
+ options[:verbose] = v
40
+ end
41
+
42
+ o.on("-d", "--debug", "Debug parsing the file") do |v|
43
+ options[:debug] = v
44
+ end
45
+ end.parse!
46
+
47
+ file = ARGV.shift
48
+
49
+ unless File.exists?(file)
50
+ puts "File '#{file}' does not exist"
51
+ exit 1
52
+ end
53
+
54
+ parser = KPeg::FormatParser.new File.read(file), true
55
+
56
+ unless m = parser.parse
57
+ puts "Syntax error in grammar #{file}"
58
+ parser.show_error
59
+ exit 1
60
+ end
61
+
62
+ grammar = parser.grammar
63
+
64
+ if options[:reformat]
65
+ if !options[:output]
66
+ puts "Please specify -o for where to write the new grammar"
67
+ exit 1
68
+ end
69
+
70
+ output = options[:output]
71
+ if File.exists?(output) and !options[:force]
72
+ puts "Output '#{output}' already exists, not overwriting (use -f)"
73
+ exit 1
74
+ end
75
+
76
+ rend = KPeg::GrammarRenderer.new(parser.grammar)
77
+
78
+ File.open output, "w" do |f|
79
+ rend.render(f)
80
+ end
81
+
82
+ puts "Wrote reformatted output to #{output}"
83
+
84
+ exit 0
85
+ end
86
+
87
+ if !options[:test] and !options[:name]
88
+ unless name = grammar.variables["name"]
89
+ puts "Please specify -n"
90
+ exit 1
91
+ end
92
+ else
93
+ name = options[:name]
94
+ end
95
+
96
+
97
+ if options[:output]
98
+ new_path = options[:output]
99
+ else
100
+ new_path = "#{file}.rb"
101
+ end
102
+
103
+ if !options[:test] and File.exists?(new_path) and !options[:force]
104
+ puts "Path #{new_path} already exists, not overwriting\n"
105
+ exit 1
106
+ end
107
+
108
+ if options[:test]
109
+ puts "Syntax ok"
110
+
111
+ if options[:debug]
112
+ gr = KPeg::GrammarRenderer.new(grammar)
113
+ gr.render(STDOUT)
114
+ end
115
+ exit 0
116
+ end
117
+
118
+
119
+ cg = KPeg::CodeGenerator.new name, grammar
120
+ cg.standalone = options[:standalone]
121
+
122
+ File.open new_path, "w" do |f|
123
+ f << cg.output
124
+ end
125
+
126
+ puts "Wrote #{name} to #{new_path}"
@@ -0,0 +1 @@
1
+ au BufNewFile,BufRead *.kpeg set filetype=kpeg
@@ -0,0 +1,55 @@
1
+ " Vim syntax file
2
+ " Language: kpeg
3
+ " Version: $Revision$
4
+
5
+ if version < 600
6
+ syntax clear
7
+ elseif exists("b:current_syntax")
8
+ finish
9
+ endif
10
+
11
+ syn case match
12
+
13
+ " Misc syntax.
14
+ syn match kpegOperator /[|*?+!\[\]]/
15
+ syn match kpegAssign "="
16
+ syn match kpegCapture /[<>]/
17
+ syn match kpegParen /[()]/
18
+
19
+ syn match kpegIdentifier /-|([a-zA-Z][-a-zA-Z0-9]*)/
20
+ syn match kpegComment /#.*$/
21
+ syn region kpegString start="\"" end="\"" skip="\\\\\|\\\""
22
+ syn region kpegRegexp start=/\// skip=/\\\// end=/\//
23
+
24
+ syntax include @Ruby syntax/ruby.vim
25
+
26
+ syn region kpegCode matchgroup=kpegCurly start=/{/ end=/}/ contains=@Ruby
27
+
28
+ syn match kpegLabel /:[a-zA-Z][-a-zA-Z0-9]*/
29
+
30
+ if version >= 508 || !exists("did_c_syn_inits")
31
+ if version < 508
32
+ let did_c_syn_inits = 1
33
+ command -nargs=+ HiLink hi link <args>
34
+ else
35
+ command -nargs=+ HiLink hi def link <args>
36
+ endif
37
+
38
+ HiLink kpegRegexp Special
39
+ HiLink kpegNumber Number
40
+ HiLink kpegComment Comment
41
+ HiLink kpegString String
42
+ HiLink kpegLabel Type
43
+ HiLink kpegOperator Operator
44
+ HiLink kpegAssign Define
45
+ HiLink kpegCapture Keyword
46
+ HiLink kpegFloat Float
47
+ HiLink kpegIdentifier Identifier
48
+
49
+ HiLink kpegParen Delimiter
50
+ HiLink kpegCurly Delimiter
51
+
52
+ delcommand HiLink
53
+ endif
54
+
55
+ let b:current_syntax = "kpeg"
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "kpeg/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "kpeg"
7
+ s.version = KPeg::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Evan Phoenix"]
10
+ s.email = ["evan@fallingsnow.net"]
11
+ s.homepage = "https://github.com/evanphx/kpeg"
12
+ s.summary = %q{Peg-based Code Generator}
13
+ s.description = %q{A tool for generating parsers using PEG}
14
+
15
+ rb = Dir["lib/**/*.rb"] << "bin/kpeg"
16
+ docs = Dir["doc/**/*"]
17
+
18
+ s.files = rb + docs + ["LICENSE", "README.md", "Rakefile", "kpeg.gemspec", "Gemfile"]
19
+ s.test_files = Dir["test/**/*.rb"]
20
+ s.bindir = "bin"
21
+ s.executables = ["kpeg"]
22
+ s.require_paths = ["lib"]
23
+ s.add_development_dependency "rake"
24
+ end
@@ -0,0 +1,50 @@
1
+ module KPeg
2
+ def self.grammar
3
+ g = Grammar.new
4
+ yield g
5
+ g
6
+ end
7
+
8
+ def self.match(str, gram)
9
+ scan = Parser.new(str, gram)
10
+ scan.parse
11
+ end
12
+
13
+ def self.load_grammar(file, log=false)
14
+ parser = KPeg::FormatParser.new File.read(file)
15
+ parser.parse
16
+
17
+ return parser.grammar
18
+ end
19
+
20
+ def self.load(file, name)
21
+ grammar = load_grammar(file)
22
+ cg = KPeg::CodeGenerator.new name, grammar
23
+
24
+ code = cg.output
25
+
26
+ warn "[Loading parser '#{name}' => #{code.size} bytes]"
27
+
28
+ Object.module_eval code
29
+ true
30
+ end
31
+
32
+ def self.compile(str, name, scope=Object)
33
+ parser = KPeg::FormatParser.new str
34
+ unless parser.parse
35
+ parser.raise_error
36
+ end
37
+
38
+ cg = KPeg::CodeGenerator.new name, parser.grammar
39
+
40
+ code = cg.output
41
+
42
+ scope.module_eval code
43
+ true
44
+ end
45
+ end
46
+
47
+ require 'kpeg/grammar'
48
+ require 'kpeg/format_parser'
49
+ require 'kpeg/code_generator'
50
+
@@ -0,0 +1,355 @@
1
+ require 'kpeg/grammar_renderer'
2
+ require 'stringio'
3
+
4
+ module KPeg
5
+ class CodeGenerator
6
+ def initialize(name, gram, debug=false)
7
+ @name = name
8
+ @grammar = gram
9
+ @debug = debug
10
+ @saves = 0
11
+ @output = nil
12
+ @standalone = false
13
+ end
14
+
15
+ attr_accessor :standalone
16
+
17
+ def method_name(name)
18
+ name = name.gsub("-","_hyphen_")
19
+ "_#{name}"
20
+ end
21
+
22
+ def save
23
+ if @saves == 0
24
+ str = "_save"
25
+ else
26
+ str = "_save#{@saves}"
27
+ end
28
+
29
+ @saves += 1
30
+ str
31
+ end
32
+
33
+ def reset_saves
34
+ @saves = 0
35
+ end
36
+
37
+ def output_op(code, op)
38
+ case op
39
+ when Dot
40
+ code << " _tmp = get_byte\n"
41
+ when LiteralString
42
+ code << " _tmp = match_string(#{op.string.dump})\n"
43
+ when LiteralRegexp
44
+ code << " _tmp = scan(/\\A#{op.regexp}/)\n"
45
+ when CharRange
46
+ ss = save()
47
+ if op.start.bytesize == 1 and op.fin.bytesize == 1
48
+ code << " #{ss} = self.pos\n"
49
+ code << " _tmp = get_byte\n"
50
+ code << " if _tmp\n"
51
+
52
+ if op.start.respond_to? :getbyte
53
+ left = op.start.getbyte 0
54
+ right = op.fin.getbyte 0
55
+ else
56
+ left = op.start[0]
57
+ right = op.fin[0]
58
+ end
59
+
60
+ code << " unless _tmp >= #{left} and _tmp <= #{right}\n"
61
+ code << " self.pos = #{ss}\n"
62
+ code << " _tmp = nil\n"
63
+ code << " end\n"
64
+ code << " end\n"
65
+ else
66
+ raise "Unsupported char range - #{op.inspect}"
67
+ end
68
+ when Choice
69
+ ss = save()
70
+ code << "\n #{ss} = self.pos\n"
71
+ code << " while true # choice\n"
72
+ op.ops.each_with_index do |n,idx|
73
+ output_op code, n
74
+
75
+ code << " break if _tmp\n"
76
+ code << " self.pos = #{ss}\n"
77
+ if idx == op.ops.size - 1
78
+ code << " break\n"
79
+ end
80
+ end
81
+ code << " end # end choice\n\n"
82
+ when Multiple
83
+ ss = save()
84
+ if op.min == 0 and op.max == 1
85
+ code << " #{ss} = self.pos\n"
86
+ output_op code, op.op
87
+ if op.save_values
88
+ code << " @result = nil unless _tmp\n"
89
+ end
90
+ code << " unless _tmp\n"
91
+ code << " _tmp = true\n"
92
+ code << " self.pos = #{ss}\n"
93
+ code << " end\n"
94
+ elsif op.min == 0 and !op.max
95
+ if op.save_values
96
+ code << " _ary = []\n"
97
+ end
98
+
99
+ code << " while true\n"
100
+ output_op code, op.op
101
+ if op.save_values
102
+ code << " _ary << @result if _tmp\n"
103
+ end
104
+ code << " break unless _tmp\n"
105
+ code << " end\n"
106
+ code << " _tmp = true\n"
107
+
108
+ if op.save_values
109
+ code << " @result = _ary\n"
110
+ end
111
+
112
+ elsif op.min == 1 and !op.max
113
+ code << " #{ss} = self.pos\n"
114
+ if op.save_values
115
+ code << " _ary = []\n"
116
+ end
117
+ output_op code, op.op
118
+ code << " if _tmp\n"
119
+ if op.save_values
120
+ code << " _ary << @result\n"
121
+ end
122
+ code << " while true\n"
123
+ code << " "
124
+ output_op code, op.op
125
+ if op.save_values
126
+ code << " _ary << @result if _tmp\n"
127
+ end
128
+ code << " break unless _tmp\n"
129
+ code << " end\n"
130
+ code << " _tmp = true\n"
131
+ if op.save_values
132
+ code << " @result = _ary\n"
133
+ end
134
+ code << " else\n"
135
+ code << " self.pos = #{ss}\n"
136
+ code << " end\n"
137
+ else
138
+ code << " #{ss} = self.pos\n"
139
+ code << " _count = 0\n"
140
+ code << " while true\n"
141
+ code << " "
142
+ output_op code, op.op
143
+ code << " if _tmp\n"
144
+ code << " _count += 1\n"
145
+ code << " break if _count == #{op.max}\n"
146
+ code << " else\n"
147
+ code << " break\n"
148
+ code << " end\n"
149
+ code << " end\n"
150
+ code << " if _count >= #{op.min}\n"
151
+ code << " _tmp = true\n"
152
+ code << " else\n"
153
+ code << " self.pos = #{ss}\n"
154
+ code << " _tmp = nil\n"
155
+ code << " end\n"
156
+ end
157
+
158
+ when Sequence
159
+ ss = save()
160
+ code << "\n #{ss} = self.pos\n"
161
+ code << " while true # sequence\n"
162
+ op.ops.each_with_index do |n, idx|
163
+ output_op code, n
164
+
165
+ if idx == op.ops.size - 1
166
+ code << " unless _tmp\n"
167
+ code << " self.pos = #{ss}\n"
168
+ code << " end\n"
169
+ code << " break\n"
170
+ else
171
+ code << " unless _tmp\n"
172
+ code << " self.pos = #{ss}\n"
173
+ code << " break\n"
174
+ code << " end\n"
175
+ end
176
+ end
177
+ code << " end # end sequence\n\n"
178
+ when AndPredicate
179
+ ss = save()
180
+ code << " #{ss} = self.pos\n"
181
+ if op.op.kind_of? Action
182
+ code << " _tmp = begin; #{op.op.action}; end\n"
183
+ else
184
+ output_op code, op.op
185
+ end
186
+ code << " self.pos = #{ss}\n"
187
+ when NotPredicate
188
+ ss = save()
189
+ code << " #{ss} = self.pos\n"
190
+ if op.op.kind_of? Action
191
+ code << " _tmp = begin; #{op.op.action}; end\n"
192
+ else
193
+ output_op code, op.op
194
+ end
195
+ code << " _tmp = _tmp ? nil : true\n"
196
+ code << " self.pos = #{ss}\n"
197
+ when RuleReference
198
+ code << " _tmp = apply(:#{method_name op.rule_name})\n"
199
+ when InvokeRule
200
+ if op.arguments
201
+ code << " _tmp = #{method_name op.rule_name}#{op.arguments}\n"
202
+ else
203
+ code << " _tmp = #{method_name op.rule_name}()\n"
204
+ end
205
+ when ForeignInvokeRule
206
+ if op.arguments
207
+ code << " _tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name}, #{op.arguments[1..-2]})\n"
208
+ else
209
+ code << " _tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name})\n"
210
+ end
211
+ when Tag
212
+ if op.tag_name and !op.tag_name.empty?
213
+ output_op code, op.op
214
+ code << " #{op.tag_name} = @result\n"
215
+ else
216
+ output_op code, op.op
217
+ end
218
+ when Action
219
+ code << " @result = begin; "
220
+ code << op.action << "; end\n"
221
+ if @debug
222
+ code << " puts \" => \" #{op.action.dump} \" => \#{@result.inspect} \\n\"\n"
223
+ end
224
+ code << " _tmp = true\n"
225
+ when Collect
226
+ code << " _text_start = self.pos\n"
227
+ output_op code, op.op
228
+ code << " if _tmp\n"
229
+ code << " text = get_text(_text_start)\n"
230
+ code << " end\n"
231
+ else
232
+ raise "Unknown op - #{op.class}"
233
+ end
234
+
235
+ end
236
+
237
+ def standalone_region(path)
238
+ cp = File.read(path)
239
+ start = cp.index("# STANDALONE START")
240
+ fin = cp.index("# STANDALONE END")
241
+
242
+ return nil unless start and fin
243
+ cp[start..fin]
244
+ end
245
+
246
+ def output
247
+ return @output if @output
248
+ if @standalone
249
+ code = "class #{@name}\n"
250
+
251
+ unless cp = standalone_region(
252
+ File.expand_path("../compiled_parser.rb", __FILE__))
253
+
254
+ puts "Standalone failure. Check compiler_parser.rb for proper boundary comments"
255
+ exit 1
256
+ end
257
+
258
+ unless pp = standalone_region(
259
+ File.expand_path("../position.rb", __FILE__))
260
+ puts "Standalone failure. Check position.rb for proper boundary comments"
261
+ end
262
+
263
+ cp.gsub!(/include Position/, pp)
264
+ code << cp << "\n"
265
+ else
266
+ code = "require 'kpeg/compiled_parser'\n\n"
267
+ code << "class #{@name} < KPeg::CompiledParser\n"
268
+ end
269
+
270
+ @grammar.setup_actions.each do |act|
271
+ code << "\n#{act.action}\n\n"
272
+ end
273
+
274
+ fg = @grammar.foreign_grammars
275
+
276
+ if fg.empty?
277
+ if @standalone
278
+ code << " def setup_foreign_grammar; end\n"
279
+ end
280
+ else
281
+ code << " def setup_foreign_grammar\n"
282
+ @grammar.foreign_grammars.each do |name, gram|
283
+ code << " @_grammar_#{name} = #{gram}.new(nil)\n"
284
+ end
285
+ code << " end\n"
286
+ end
287
+
288
+ render = GrammarRenderer.new(@grammar)
289
+
290
+ renderings = {}
291
+
292
+ @grammar.rule_order.each do |name|
293
+ reset_saves
294
+
295
+ rule = @grammar.rules[name]
296
+ io = StringIO.new
297
+ render.render_op io, rule.op
298
+
299
+ rend = io.string
300
+ rend.gsub! "\n", " "
301
+
302
+ renderings[name] = rend
303
+
304
+ code << "\n"
305
+ code << " # #{name} = #{rend}\n"
306
+
307
+ if rule.arguments
308
+ code << " def #{method_name name}(#{rule.arguments.join(',')})\n"
309
+ else
310
+ code << " def #{method_name name}\n"
311
+ end
312
+
313
+ if @debug
314
+ code << " puts \"START #{name} @ \#{show_pos}\\n\"\n"
315
+ end
316
+
317
+ output_op code, rule.op
318
+ if @debug
319
+ code << " if _tmp\n"
320
+ code << " puts \" OK #{name} @ \#{show_pos}\\n\"\n"
321
+ code << " else\n"
322
+ code << " puts \" FAIL #{name} @ \#{show_pos}\\n\"\n"
323
+ code << " end\n"
324
+ end
325
+
326
+ code << " set_failed_rule :#{method_name name} unless _tmp\n"
327
+ code << " return _tmp\n"
328
+ code << " end\n"
329
+ end
330
+
331
+ code << "\n Rules = {}\n"
332
+ @grammar.rule_order.each do |name|
333
+ rule = @grammar.rules[name]
334
+
335
+ rend = GrammarRenderer.escape renderings[name], true
336
+ code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
337
+ end
338
+
339
+ code << "end\n"
340
+ @output = code
341
+ end
342
+
343
+ def make(str)
344
+ m = Module.new
345
+ m.module_eval output
346
+
347
+ cls = m.const_get(@name)
348
+ cls.new(str)
349
+ end
350
+
351
+ def parse(str)
352
+ make(str).parse
353
+ end
354
+ end
355
+ end