kpeg 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in callisto.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2011, Evan Phoenix
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the <organization> nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+
@@ -0,0 +1,8 @@
1
+ KPeg
2
+ ====
3
+
4
+ KPeg is a simple PEG library for Ruby. It provides an API as well as native grammar to build the grammar.
5
+
6
+ KPeg strives to provide a simple, powerful API without being too exotic.
7
+
8
+ KPeg supports direct left recursion of rules via the [OMeta memoization](http://www.vpri.org/pdf/tr2008003_experimenting.pdf) trick.
@@ -0,0 +1,24 @@
1
+ require 'rake/testtask'
2
+
3
+ $:.unshift "lib"
4
+
5
+ task :default => :test
6
+
7
+ desc "Run tests"
8
+ Rake::TestTask.new do |t|
9
+ t.test_files = FileList['test/test*.rb']
10
+ t.verbose = true
11
+ end
12
+
13
+ task :grammar do
14
+ require 'kpeg'
15
+ require 'kpeg/format'
16
+ require 'kpeg/grammar_renderer'
17
+
18
+ gr = KPeg::GrammarRenderer.new(KPeg::FORMAT)
19
+ gr.render(STDOUT)
20
+ end
21
+
22
+ task :parser do
23
+ sh "ruby -Ilib bin/kpeg -o lib/kpeg/format_parser.rb -s -f lib/kpeg/format.kpeg"
24
+ end
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'kpeg'
4
+ require 'kpeg/code_generator'
5
+ require 'kpeg/format_parser'
6
+ require 'kpeg/grammar_renderer'
7
+
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |o|
12
+ o.banner = "Usage: kpeg [options]"
13
+
14
+ o.on("-t", "--test", "Syntax check the file only") do |v|
15
+ options[:test] = v
16
+ end
17
+
18
+ o.on("--reformat", "Reformat your grammar and write it back out") do
19
+ options[:reformat] = true
20
+ end
21
+
22
+ o.on("-o", "--output FILE", "Where the output should go") do |v|
23
+ options[:output] = v
24
+ end
25
+
26
+ o.on("-n", "--name NAME", "Class name to use for the parser") do |v|
27
+ options[:name] = v
28
+ end
29
+
30
+ o.on("-f", "--force", "Overwrite the output if it exists") do |v|
31
+ options[:force] = v
32
+ end
33
+
34
+ o.on("-s", "--stand-alone", "Write the parser to run standalone") do |v|
35
+ options[:standalone] = v
36
+ end
37
+
38
+ o.on("-v", "--[no-]verbose", "Run verbosely") do |v|
39
+ options[:verbose] = v
40
+ end
41
+
42
+ o.on("-d", "--debug", "Debug parsing the file") do |v|
43
+ options[:debug] = v
44
+ end
45
+ end.parse!
46
+
47
+ file = ARGV.shift
48
+
49
+ unless File.exists?(file)
50
+ puts "File '#{file}' does not exist"
51
+ exit 1
52
+ end
53
+
54
+ parser = KPeg::FormatParser.new File.read(file), true
55
+
56
+ unless m = parser.parse
57
+ puts "Syntax error in grammar #{file}"
58
+ parser.show_error
59
+ exit 1
60
+ end
61
+
62
+ grammar = parser.grammar
63
+
64
+ if options[:reformat]
65
+ if !options[:output]
66
+ puts "Please specify -o for where to write the new grammar"
67
+ exit 1
68
+ end
69
+
70
+ output = options[:output]
71
+ if File.exists?(output) and !options[:force]
72
+ puts "Output '#{output}' already exists, not overwriting (use -f)"
73
+ exit 1
74
+ end
75
+
76
+ rend = KPeg::GrammarRenderer.new(parser.grammar)
77
+
78
+ File.open output, "w" do |f|
79
+ rend.render(f)
80
+ end
81
+
82
+ puts "Wrote reformatted output to #{output}"
83
+
84
+ exit 0
85
+ end
86
+
87
+ if !options[:test] and !options[:name]
88
+ unless name = grammar.variables["name"]
89
+ puts "Please specify -n"
90
+ exit 1
91
+ end
92
+ else
93
+ name = options[:name]
94
+ end
95
+
96
+
97
+ if options[:output]
98
+ new_path = options[:output]
99
+ else
100
+ new_path = "#{file}.rb"
101
+ end
102
+
103
+ if !options[:test] and File.exists?(new_path) and !options[:force]
104
+ puts "Path #{new_path} already exists, not overwriting\n"
105
+ exit 1
106
+ end
107
+
108
+ if options[:test]
109
+ puts "Syntax ok"
110
+
111
+ if options[:debug]
112
+ gr = KPeg::GrammarRenderer.new(grammar)
113
+ gr.render(STDOUT)
114
+ end
115
+ exit 0
116
+ end
117
+
118
+
119
+ cg = KPeg::CodeGenerator.new name, grammar
120
+ cg.standalone = options[:standalone]
121
+
122
+ File.open new_path, "w" do |f|
123
+ f << cg.output
124
+ end
125
+
126
+ puts "Wrote #{name} to #{new_path}"
@@ -0,0 +1 @@
1
+ au BufNewFile,BufRead *.kpeg set filetype=kpeg
@@ -0,0 +1,55 @@
1
+ " Vim syntax file
2
+ " Language: kpeg
3
+ " Version: $Revision$
4
+
5
+ if version < 600
6
+ syntax clear
7
+ elseif exists("b:current_syntax")
8
+ finish
9
+ endif
10
+
11
+ syn case match
12
+
13
+ " Misc syntax.
14
+ syn match kpegOperator /[|*?+!\[\]]/
15
+ syn match kpegAssign "="
16
+ syn match kpegCapture /[<>]/
17
+ syn match kpegParen /[()]/
18
+
19
+ syn match kpegIdentifier /-|([a-zA-Z][-a-zA-Z0-9]*)/
20
+ syn match kpegComment /#.*$/
21
+ syn region kpegString start="\"" end="\"" skip="\\\\\|\\\""
22
+ syn region kpegRegexp start=/\// skip=/\\\// end=/\//
23
+
24
+ syntax include @Ruby syntax/ruby.vim
25
+
26
+ syn region kpegCode matchgroup=kpegCurly start=/{/ end=/}/ contains=@Ruby
27
+
28
+ syn match kpegLabel /:[a-zA-Z][-a-zA-Z0-9]*/
29
+
30
+ if version >= 508 || !exists("did_c_syn_inits")
31
+ if version < 508
32
+ let did_c_syn_inits = 1
33
+ command -nargs=+ HiLink hi link <args>
34
+ else
35
+ command -nargs=+ HiLink hi def link <args>
36
+ endif
37
+
38
+ HiLink kpegRegexp Special
39
+ HiLink kpegNumber Number
40
+ HiLink kpegComment Comment
41
+ HiLink kpegString String
42
+ HiLink kpegLabel Type
43
+ HiLink kpegOperator Operator
44
+ HiLink kpegAssign Define
45
+ HiLink kpegCapture Keyword
46
+ HiLink kpegFloat Float
47
+ HiLink kpegIdentifier Identifier
48
+
49
+ HiLink kpegParen Delimiter
50
+ HiLink kpegCurly Delimiter
51
+
52
+ delcommand HiLink
53
+ endif
54
+
55
+ let b:current_syntax = "kpeg"
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "kpeg/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "kpeg"
7
+ s.version = KPeg::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Evan Phoenix"]
10
+ s.email = ["evan@fallingsnow.net"]
11
+ s.homepage = "https://github.com/evanphx/kpeg"
12
+ s.summary = %q{Peg-based Code Generator}
13
+ s.description = %q{A tool for generating parsers using PEG}
14
+
15
+ rb = Dir["lib/**/*.rb"] << "bin/kpeg"
16
+ docs = Dir["doc/**/*"]
17
+
18
+ s.files = rb + docs + ["LICENSE", "README.md", "Rakefile", "kpeg.gemspec", "Gemfile"]
19
+ s.test_files = Dir["test/**/*.rb"]
20
+ s.bindir = "bin"
21
+ s.executables = ["kpeg"]
22
+ s.require_paths = ["lib"]
23
+ s.add_development_dependency "rake"
24
+ end
@@ -0,0 +1,50 @@
1
+ module KPeg
2
+ def self.grammar
3
+ g = Grammar.new
4
+ yield g
5
+ g
6
+ end
7
+
8
+ def self.match(str, gram)
9
+ scan = Parser.new(str, gram)
10
+ scan.parse
11
+ end
12
+
13
+ def self.load_grammar(file, log=false)
14
+ parser = KPeg::FormatParser.new File.read(file)
15
+ parser.parse
16
+
17
+ return parser.grammar
18
+ end
19
+
20
+ def self.load(file, name)
21
+ grammar = load_grammar(file)
22
+ cg = KPeg::CodeGenerator.new name, grammar
23
+
24
+ code = cg.output
25
+
26
+ warn "[Loading parser '#{name}' => #{code.size} bytes]"
27
+
28
+ Object.module_eval code
29
+ true
30
+ end
31
+
32
+ def self.compile(str, name, scope=Object)
33
+ parser = KPeg::FormatParser.new str
34
+ unless parser.parse
35
+ parser.raise_error
36
+ end
37
+
38
+ cg = KPeg::CodeGenerator.new name, parser.grammar
39
+
40
+ code = cg.output
41
+
42
+ scope.module_eval code
43
+ true
44
+ end
45
+ end
46
+
47
+ require 'kpeg/grammar'
48
+ require 'kpeg/format_parser'
49
+ require 'kpeg/code_generator'
50
+
@@ -0,0 +1,355 @@
1
+ require 'kpeg/grammar_renderer'
2
+ require 'stringio'
3
+
4
+ module KPeg
5
+ class CodeGenerator
6
+ def initialize(name, gram, debug=false)
7
+ @name = name
8
+ @grammar = gram
9
+ @debug = debug
10
+ @saves = 0
11
+ @output = nil
12
+ @standalone = false
13
+ end
14
+
15
+ attr_accessor :standalone
16
+
17
+ def method_name(name)
18
+ name = name.gsub("-","_hyphen_")
19
+ "_#{name}"
20
+ end
21
+
22
+ def save
23
+ if @saves == 0
24
+ str = "_save"
25
+ else
26
+ str = "_save#{@saves}"
27
+ end
28
+
29
+ @saves += 1
30
+ str
31
+ end
32
+
33
+ def reset_saves
34
+ @saves = 0
35
+ end
36
+
37
+ def output_op(code, op)
38
+ case op
39
+ when Dot
40
+ code << " _tmp = get_byte\n"
41
+ when LiteralString
42
+ code << " _tmp = match_string(#{op.string.dump})\n"
43
+ when LiteralRegexp
44
+ code << " _tmp = scan(/\\A#{op.regexp}/)\n"
45
+ when CharRange
46
+ ss = save()
47
+ if op.start.bytesize == 1 and op.fin.bytesize == 1
48
+ code << " #{ss} = self.pos\n"
49
+ code << " _tmp = get_byte\n"
50
+ code << " if _tmp\n"
51
+
52
+ if op.start.respond_to? :getbyte
53
+ left = op.start.getbyte 0
54
+ right = op.fin.getbyte 0
55
+ else
56
+ left = op.start[0]
57
+ right = op.fin[0]
58
+ end
59
+
60
+ code << " unless _tmp >= #{left} and _tmp <= #{right}\n"
61
+ code << " self.pos = #{ss}\n"
62
+ code << " _tmp = nil\n"
63
+ code << " end\n"
64
+ code << " end\n"
65
+ else
66
+ raise "Unsupported char range - #{op.inspect}"
67
+ end
68
+ when Choice
69
+ ss = save()
70
+ code << "\n #{ss} = self.pos\n"
71
+ code << " while true # choice\n"
72
+ op.ops.each_with_index do |n,idx|
73
+ output_op code, n
74
+
75
+ code << " break if _tmp\n"
76
+ code << " self.pos = #{ss}\n"
77
+ if idx == op.ops.size - 1
78
+ code << " break\n"
79
+ end
80
+ end
81
+ code << " end # end choice\n\n"
82
+ when Multiple
83
+ ss = save()
84
+ if op.min == 0 and op.max == 1
85
+ code << " #{ss} = self.pos\n"
86
+ output_op code, op.op
87
+ if op.save_values
88
+ code << " @result = nil unless _tmp\n"
89
+ end
90
+ code << " unless _tmp\n"
91
+ code << " _tmp = true\n"
92
+ code << " self.pos = #{ss}\n"
93
+ code << " end\n"
94
+ elsif op.min == 0 and !op.max
95
+ if op.save_values
96
+ code << " _ary = []\n"
97
+ end
98
+
99
+ code << " while true\n"
100
+ output_op code, op.op
101
+ if op.save_values
102
+ code << " _ary << @result if _tmp\n"
103
+ end
104
+ code << " break unless _tmp\n"
105
+ code << " end\n"
106
+ code << " _tmp = true\n"
107
+
108
+ if op.save_values
109
+ code << " @result = _ary\n"
110
+ end
111
+
112
+ elsif op.min == 1 and !op.max
113
+ code << " #{ss} = self.pos\n"
114
+ if op.save_values
115
+ code << " _ary = []\n"
116
+ end
117
+ output_op code, op.op
118
+ code << " if _tmp\n"
119
+ if op.save_values
120
+ code << " _ary << @result\n"
121
+ end
122
+ code << " while true\n"
123
+ code << " "
124
+ output_op code, op.op
125
+ if op.save_values
126
+ code << " _ary << @result if _tmp\n"
127
+ end
128
+ code << " break unless _tmp\n"
129
+ code << " end\n"
130
+ code << " _tmp = true\n"
131
+ if op.save_values
132
+ code << " @result = _ary\n"
133
+ end
134
+ code << " else\n"
135
+ code << " self.pos = #{ss}\n"
136
+ code << " end\n"
137
+ else
138
+ code << " #{ss} = self.pos\n"
139
+ code << " _count = 0\n"
140
+ code << " while true\n"
141
+ code << " "
142
+ output_op code, op.op
143
+ code << " if _tmp\n"
144
+ code << " _count += 1\n"
145
+ code << " break if _count == #{op.max}\n"
146
+ code << " else\n"
147
+ code << " break\n"
148
+ code << " end\n"
149
+ code << " end\n"
150
+ code << " if _count >= #{op.min}\n"
151
+ code << " _tmp = true\n"
152
+ code << " else\n"
153
+ code << " self.pos = #{ss}\n"
154
+ code << " _tmp = nil\n"
155
+ code << " end\n"
156
+ end
157
+
158
+ when Sequence
159
+ ss = save()
160
+ code << "\n #{ss} = self.pos\n"
161
+ code << " while true # sequence\n"
162
+ op.ops.each_with_index do |n, idx|
163
+ output_op code, n
164
+
165
+ if idx == op.ops.size - 1
166
+ code << " unless _tmp\n"
167
+ code << " self.pos = #{ss}\n"
168
+ code << " end\n"
169
+ code << " break\n"
170
+ else
171
+ code << " unless _tmp\n"
172
+ code << " self.pos = #{ss}\n"
173
+ code << " break\n"
174
+ code << " end\n"
175
+ end
176
+ end
177
+ code << " end # end sequence\n\n"
178
+ when AndPredicate
179
+ ss = save()
180
+ code << " #{ss} = self.pos\n"
181
+ if op.op.kind_of? Action
182
+ code << " _tmp = begin; #{op.op.action}; end\n"
183
+ else
184
+ output_op code, op.op
185
+ end
186
+ code << " self.pos = #{ss}\n"
187
+ when NotPredicate
188
+ ss = save()
189
+ code << " #{ss} = self.pos\n"
190
+ if op.op.kind_of? Action
191
+ code << " _tmp = begin; #{op.op.action}; end\n"
192
+ else
193
+ output_op code, op.op
194
+ end
195
+ code << " _tmp = _tmp ? nil : true\n"
196
+ code << " self.pos = #{ss}\n"
197
+ when RuleReference
198
+ code << " _tmp = apply(:#{method_name op.rule_name})\n"
199
+ when InvokeRule
200
+ if op.arguments
201
+ code << " _tmp = #{method_name op.rule_name}#{op.arguments}\n"
202
+ else
203
+ code << " _tmp = #{method_name op.rule_name}()\n"
204
+ end
205
+ when ForeignInvokeRule
206
+ if op.arguments
207
+ code << " _tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name}, #{op.arguments[1..-2]})\n"
208
+ else
209
+ code << " _tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name})\n"
210
+ end
211
+ when Tag
212
+ if op.tag_name and !op.tag_name.empty?
213
+ output_op code, op.op
214
+ code << " #{op.tag_name} = @result\n"
215
+ else
216
+ output_op code, op.op
217
+ end
218
+ when Action
219
+ code << " @result = begin; "
220
+ code << op.action << "; end\n"
221
+ if @debug
222
+ code << " puts \" => \" #{op.action.dump} \" => \#{@result.inspect} \\n\"\n"
223
+ end
224
+ code << " _tmp = true\n"
225
+ when Collect
226
+ code << " _text_start = self.pos\n"
227
+ output_op code, op.op
228
+ code << " if _tmp\n"
229
+ code << " text = get_text(_text_start)\n"
230
+ code << " end\n"
231
+ else
232
+ raise "Unknown op - #{op.class}"
233
+ end
234
+
235
+ end
236
+
237
+ def standalone_region(path)
238
+ cp = File.read(path)
239
+ start = cp.index("# STANDALONE START")
240
+ fin = cp.index("# STANDALONE END")
241
+
242
+ return nil unless start and fin
243
+ cp[start..fin]
244
+ end
245
+
246
+ def output
247
+ return @output if @output
248
+ if @standalone
249
+ code = "class #{@name}\n"
250
+
251
+ unless cp = standalone_region(
252
+ File.expand_path("../compiled_parser.rb", __FILE__))
253
+
254
+ puts "Standalone failure. Check compiler_parser.rb for proper boundary comments"
255
+ exit 1
256
+ end
257
+
258
+ unless pp = standalone_region(
259
+ File.expand_path("../position.rb", __FILE__))
260
+ puts "Standalone failure. Check position.rb for proper boundary comments"
261
+ end
262
+
263
+ cp.gsub!(/include Position/, pp)
264
+ code << cp << "\n"
265
+ else
266
+ code = "require 'kpeg/compiled_parser'\n\n"
267
+ code << "class #{@name} < KPeg::CompiledParser\n"
268
+ end
269
+
270
+ @grammar.setup_actions.each do |act|
271
+ code << "\n#{act.action}\n\n"
272
+ end
273
+
274
+ fg = @grammar.foreign_grammars
275
+
276
+ if fg.empty?
277
+ if @standalone
278
+ code << " def setup_foreign_grammar; end\n"
279
+ end
280
+ else
281
+ code << " def setup_foreign_grammar\n"
282
+ @grammar.foreign_grammars.each do |name, gram|
283
+ code << " @_grammar_#{name} = #{gram}.new(nil)\n"
284
+ end
285
+ code << " end\n"
286
+ end
287
+
288
+ render = GrammarRenderer.new(@grammar)
289
+
290
+ renderings = {}
291
+
292
+ @grammar.rule_order.each do |name|
293
+ reset_saves
294
+
295
+ rule = @grammar.rules[name]
296
+ io = StringIO.new
297
+ render.render_op io, rule.op
298
+
299
+ rend = io.string
300
+ rend.gsub! "\n", " "
301
+
302
+ renderings[name] = rend
303
+
304
+ code << "\n"
305
+ code << " # #{name} = #{rend}\n"
306
+
307
+ if rule.arguments
308
+ code << " def #{method_name name}(#{rule.arguments.join(',')})\n"
309
+ else
310
+ code << " def #{method_name name}\n"
311
+ end
312
+
313
+ if @debug
314
+ code << " puts \"START #{name} @ \#{show_pos}\\n\"\n"
315
+ end
316
+
317
+ output_op code, rule.op
318
+ if @debug
319
+ code << " if _tmp\n"
320
+ code << " puts \" OK #{name} @ \#{show_pos}\\n\"\n"
321
+ code << " else\n"
322
+ code << " puts \" FAIL #{name} @ \#{show_pos}\\n\"\n"
323
+ code << " end\n"
324
+ end
325
+
326
+ code << " set_failed_rule :#{method_name name} unless _tmp\n"
327
+ code << " return _tmp\n"
328
+ code << " end\n"
329
+ end
330
+
331
+ code << "\n Rules = {}\n"
332
+ @grammar.rule_order.each do |name|
333
+ rule = @grammar.rules[name]
334
+
335
+ rend = GrammarRenderer.escape renderings[name], true
336
+ code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
337
+ end
338
+
339
+ code << "end\n"
340
+ @output = code
341
+ end
342
+
343
+ def make(str)
344
+ m = Module.new
345
+ m.module_eval output
346
+
347
+ cls = m.const_get(@name)
348
+ cls.new(str)
349
+ end
350
+
351
+ def parse(str)
352
+ make(str).parse
353
+ end
354
+ end
355
+ end