kpeg 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -1
- data/Manifest.txt +2 -4
- data/README.rdoc +32 -0
- data/Rakefile +10 -3
- data/lib/hoe/kpeg.rb +6 -5
- data/lib/kpeg.rb +1 -1
- data/lib/kpeg/code_generator.rb +72 -39
- data/lib/kpeg/compiled_parser.rb +15 -11
- data/lib/kpeg/format_parser.kpeg +21 -9
- data/lib/kpeg/format_parser.rb +42 -39
- data/lib/kpeg/grammar.rb +1 -1
- data/lib/kpeg/grammar_renderer.rb +14 -0
- data/lib/kpeg/position.rb +1 -0
- data/lib/kpeg/string_escape.rb +355 -2
- data/test/test_kpeg_code_generator.rb +166 -0
- data/test/test_kpeg_format.rb +2 -2
- data/test/{test_file_parser_roundtrip.rb → test_kpeg_format_parser_round_trip.rb} +1 -1
- data/test/{test_gen_calc.rb → test_kpeg_grammar.rb} +48 -5
- data/test/test_kpeg_grammar_renderer.rb +46 -5
- metadata +17 -20
- data/Gemfile +0 -12
- data/test/test_left_recursion.rb +0 -50
data/History.txt
CHANGED
@@ -1,4 +1,18 @@
|
|
1
|
-
===
|
1
|
+
=== 0.10 / 2012-04-16
|
2
|
+
|
3
|
+
* Minor enhancements
|
4
|
+
* In standalone parsers generation of a default initialize method may be
|
5
|
+
disabled with the custom_initialize variable:
|
6
|
+
|
7
|
+
%% custom_initialize = true
|
8
|
+
* Added a pre-class directive for adding class comments
|
9
|
+
* Generated code is now surrounded by startdoc/stopdoc.
|
10
|
+
|
11
|
+
* Bug fixes
|
12
|
+
* Hoe plugin now overwrites generated files
|
13
|
+
* Directives and variables now round-trip through KPeg::GrammarRenderer
|
14
|
+
|
15
|
+
=== 0.9 / 2012-04-06
|
2
16
|
|
3
17
|
* Minor enhancements
|
4
18
|
* Added arbitrary directives to the kpeg grammar
|
data/Manifest.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
.autotest
|
2
|
-
Gemfile
|
3
2
|
History.txt
|
4
3
|
LICENSE
|
5
4
|
Manifest.txt
|
@@ -35,13 +34,12 @@ lib/kpeg/position.rb
|
|
35
34
|
lib/kpeg/string_escape.kpeg
|
36
35
|
lib/kpeg/string_escape.rb
|
37
36
|
test/inputs/comments.kpeg
|
38
|
-
test/test_file_parser_roundtrip.rb
|
39
|
-
test/test_gen_calc.rb
|
40
37
|
test/test_kpeg.rb
|
41
38
|
test/test_kpeg_code_generator.rb
|
42
39
|
test/test_kpeg_compiled_parser.rb
|
43
40
|
test/test_kpeg_format.rb
|
41
|
+
test/test_kpeg_format_parser_round_trip.rb
|
42
|
+
test/test_kpeg_grammar.rb
|
44
43
|
test/test_kpeg_grammar_renderer.rb
|
45
|
-
test/test_left_recursion.rb
|
46
44
|
vim/syntax_kpeg/ftdetect/kpeg.vim
|
47
45
|
vim/syntax_kpeg/syntax/kpeg.vim
|
data/README.rdoc
CHANGED
@@ -157,6 +157,38 @@ Kpeg allows comments to be added to the grammar file by using the # symbol
|
|
157
157
|
|
158
158
|
# This is a comment in my grammar
|
159
159
|
|
160
|
+
=== Variables
|
161
|
+
|
162
|
+
A variable looks like this:
|
163
|
+
|
164
|
+
%% name = value
|
165
|
+
|
166
|
+
Kpeg allows the following variables that control the output parser:
|
167
|
+
|
168
|
+
name::
|
169
|
+
The class name of the generated parser.
|
170
|
+
custom_initialize::
|
171
|
+
When built as a standalone parser a default initialize method will not be
|
172
|
+
included.
|
173
|
+
|
174
|
+
=== Directives
|
175
|
+
|
176
|
+
A directive looks like this:
|
177
|
+
|
178
|
+
%% header {
|
179
|
+
...
|
180
|
+
}
|
181
|
+
|
182
|
+
Kpeg allows the following directives:
|
183
|
+
|
184
|
+
header::
|
185
|
+
Placed before any generated code
|
186
|
+
pre-class::
|
187
|
+
Placed before the class definition to provide a class comment
|
188
|
+
footer::
|
189
|
+
Placed after the end of the class (for requiring files dependent upon the
|
190
|
+
parser's namespace
|
191
|
+
|
160
192
|
== Generating and running your parser
|
161
193
|
|
162
194
|
Before you can generate your parser you will need to define a root rule. This
|
data/Rakefile
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'hoe'
|
5
5
|
|
6
|
-
Hoe.plugin :bundler
|
7
6
|
Hoe.plugin :gemspec
|
8
7
|
Hoe.plugin :git
|
9
8
|
Hoe.plugin :minitest
|
@@ -29,10 +28,18 @@ rule ".rb" => ".kpeg" do |t|
|
|
29
28
|
ruby "-Ilib bin/kpeg -s -o #{t.name} -f #{t.source}"
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
task :parser => %w[
|
31
|
+
PARSER_FILES = %w[
|
34
32
|
lib/kpeg/string_escape.rb
|
35
33
|
lib/kpeg/format_parser.rb
|
36
34
|
]
|
37
35
|
|
36
|
+
PARSER_FILES.map do |parser_file|
|
37
|
+
file parser_file => 'lib/kpeg/compiled_parser.rb'
|
38
|
+
file parser_file => 'lib/kpeg/code_generator.rb'
|
39
|
+
file parser_file => 'lib/kpeg/position.rb'
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "build the parser"
|
43
|
+
task :parser => PARSER_FILES
|
44
|
+
|
38
45
|
# vim: syntax=ruby
|
data/lib/hoe/kpeg.rb
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
# license:
|
11
11
|
#
|
12
12
|
# Copyright (c) Ryan Davis, seattle.rb
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# Permission is hereby granted, free of charge, to any person obtaining
|
15
15
|
# a copy of this software and associated documentation files (the
|
16
16
|
# "Software"), to deal in the Software without restriction, including
|
@@ -18,10 +18,10 @@
|
|
18
18
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
19
19
|
# permit persons to whom the Software is furnished to do so, subject to
|
20
20
|
# the following conditions:
|
21
|
-
#
|
21
|
+
#
|
22
22
|
# The above copyright notice and this permission notice shall be
|
23
23
|
# included in all copies or substantial portions of the Software.
|
24
|
-
#
|
24
|
+
#
|
25
25
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
26
26
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
27
27
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -55,9 +55,10 @@ module Hoe::Kpeg
|
|
55
55
|
def initialize_kpeg
|
56
56
|
self.kpeg_tasks = [:multi, :test, :check_manifest]
|
57
57
|
|
58
|
-
# -
|
58
|
+
# -f = overwrite existing file
|
59
59
|
# -s = parser does not require runtime
|
60
|
-
|
60
|
+
# -v = verbose
|
61
|
+
self.kpeg_flags ||= "-s -v -f"
|
61
62
|
|
62
63
|
dependency 'kpeg', '~> 0.9', :development
|
63
64
|
end
|
data/lib/kpeg.rb
CHANGED
data/lib/kpeg/code_generator.rb
CHANGED
@@ -90,11 +90,11 @@ module KPeg
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
|
93
|
+
|
94
94
|
def indentify(code, indent)
|
95
95
|
"#{" " * indent}#{code}"
|
96
96
|
end
|
97
|
-
|
97
|
+
|
98
98
|
# Default indent is 4 spaces (indent=2)
|
99
99
|
def output_op(code, op, indent=2)
|
100
100
|
case op
|
@@ -309,15 +309,23 @@ module KPeg
|
|
309
309
|
else
|
310
310
|
raise "Unknown op - #{op.class}"
|
311
311
|
end
|
312
|
-
|
313
312
|
end
|
314
313
|
|
315
|
-
def standalone_region(path)
|
316
|
-
|
317
|
-
|
318
|
-
|
314
|
+
def standalone_region(path, marker = "STANDALONE")
|
315
|
+
expanded_path = File.expand_path("../#{path}", __FILE__)
|
316
|
+
cp = File.read(expanded_path)
|
317
|
+
|
318
|
+
start_marker = "# #{marker} START"
|
319
|
+
end_marker = /^\s*# #{Regexp.escape marker} END/
|
320
|
+
|
321
|
+
start = cp.index(start_marker) + start_marker.length + 1 # \n
|
322
|
+
fin = cp.index(end_marker)
|
323
|
+
|
324
|
+
unless start and fin
|
325
|
+
abort("#{marker} boundaries in #{path} missing " \
|
326
|
+
"for standalone generation")
|
327
|
+
end
|
319
328
|
|
320
|
-
return nil unless start and fin
|
321
329
|
cp[start..fin]
|
322
330
|
end
|
323
331
|
|
@@ -326,37 +334,29 @@ module KPeg
|
|
326
334
|
|
327
335
|
code = []
|
328
336
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
end
|
333
|
-
|
334
|
-
if @standalone
|
335
|
-
code << "class #{@name}\n"
|
337
|
+
output_header(code)
|
338
|
+
output_grammar(code)
|
339
|
+
output_footer(code)
|
336
340
|
|
337
|
-
|
338
|
-
|
341
|
+
@output = code.join
|
342
|
+
end
|
339
343
|
|
340
|
-
|
341
|
-
|
342
|
-
end
|
344
|
+
##
|
345
|
+
# Output of class end and footer
|
343
346
|
|
344
|
-
|
345
|
-
|
346
|
-
puts "Standalone failure. Check position.rb for proper boundary comments"
|
347
|
-
end
|
347
|
+
def output_footer(code)
|
348
|
+
code << "end\n"
|
348
349
|
|
349
|
-
|
350
|
-
code <<
|
351
|
-
else
|
352
|
-
code << "require 'kpeg/compiled_parser'\n\n"
|
353
|
-
code << "class #{@name} < KPeg::CompiledParser\n"
|
350
|
+
if footer = @grammar.directives['footer']
|
351
|
+
code << footer.action
|
354
352
|
end
|
353
|
+
end
|
355
354
|
|
356
|
-
|
357
|
-
|
358
|
-
end
|
355
|
+
##
|
356
|
+
# Output of grammar and rules
|
359
357
|
|
358
|
+
def output_grammar(code)
|
359
|
+
code << " # :stopdoc:\n"
|
360
360
|
handle_ast(code)
|
361
361
|
|
362
362
|
fg = @grammar.foreign_grammars
|
@@ -418,24 +418,57 @@ module KPeg
|
|
418
418
|
|
419
419
|
code << "\n Rules = {}\n"
|
420
420
|
@grammar.rule_order.each do |name|
|
421
|
-
rule = @grammar.rules[name]
|
422
|
-
|
423
421
|
rend = GrammarRenderer.escape renderings[name], true
|
424
422
|
code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
|
425
423
|
end
|
426
424
|
|
427
|
-
code << "
|
425
|
+
code << " # :startdoc:\n"
|
426
|
+
end
|
428
427
|
|
429
|
-
|
430
|
-
|
428
|
+
##
|
429
|
+
# Output up to the user-defined setup actions
|
430
|
+
|
431
|
+
def output_header(code)
|
432
|
+
if header = @grammar.directives['header']
|
433
|
+
code << header.action.strip
|
434
|
+
code << "\n"
|
431
435
|
end
|
432
436
|
|
433
|
-
|
437
|
+
pre_class = @grammar.directives['pre-class']
|
438
|
+
|
439
|
+
if @standalone
|
440
|
+
if pre_class
|
441
|
+
code << pre_class.action.strip
|
442
|
+
code << "\n"
|
443
|
+
end
|
444
|
+
code << "class #{@name}\n"
|
445
|
+
|
446
|
+
cp = standalone_region("compiled_parser.rb")
|
447
|
+
cpi = standalone_region("compiled_parser.rb", "INITIALIZE")
|
448
|
+
pp = standalone_region("position.rb")
|
449
|
+
|
450
|
+
cp.gsub!(/include Position/, pp)
|
451
|
+
code << " # :stopdoc:\n"
|
452
|
+
code << cpi << "\n" unless @grammar.variables['custom_initialize']
|
453
|
+
code << cp << "\n"
|
454
|
+
code << " # :startdoc:\n"
|
455
|
+
else
|
456
|
+
code << "require 'kpeg/compiled_parser'\n\n"
|
457
|
+
if pre_class
|
458
|
+
code << pre_class.action.strip
|
459
|
+
code << "\n"
|
460
|
+
end
|
461
|
+
code << "class #{@name} < KPeg::CompiledParser\n"
|
462
|
+
end
|
463
|
+
|
464
|
+
@grammar.setup_actions.each do |act|
|
465
|
+
code << "\n#{act.action}\n\n"
|
466
|
+
end
|
434
467
|
end
|
435
468
|
|
436
469
|
def make(str)
|
437
470
|
m = Module.new
|
438
|
-
m.module_eval output
|
471
|
+
m.module_eval output, "(kpeg parser #{@name})"
|
439
472
|
|
440
473
|
cls = m.const_get(@name)
|
441
474
|
cls.new(str)
|
data/lib/kpeg/compiled_parser.rb
CHANGED
@@ -10,8 +10,22 @@ module KPeg
|
|
10
10
|
|
11
11
|
# Leave these markers in! They allow us to generate standalone
|
12
12
|
# code automatically!
|
13
|
-
|
13
|
+
|
14
|
+
# INITIALIZE START
|
15
|
+
|
16
|
+
# This is distinct from setup_parser so that a standalone parser
|
17
|
+
# can redefine #initialize and still have access to the proper
|
18
|
+
# parser setup code.
|
19
|
+
def initialize(str, debug=false)
|
20
|
+
setup_parser(str, debug)
|
21
|
+
end
|
22
|
+
|
23
|
+
# INITIALIZE END
|
24
|
+
|
14
25
|
# STANDALONE START
|
26
|
+
|
27
|
+
# Prepares for parsing +str+. If you define a custom initialize you must
|
28
|
+
# call this method before #parse
|
15
29
|
def setup_parser(str, debug=false)
|
16
30
|
@string = str
|
17
31
|
@pos = 0
|
@@ -23,14 +37,6 @@ module KPeg
|
|
23
37
|
setup_foreign_grammar
|
24
38
|
end
|
25
39
|
|
26
|
-
# This is distinct from setup_parser so that a standalone parser
|
27
|
-
# can redefine #initialize and still have access to the proper
|
28
|
-
# parser setup code.
|
29
|
-
#
|
30
|
-
def initialize(str, debug=false)
|
31
|
-
setup_parser(str, debug)
|
32
|
-
end
|
33
|
-
|
34
40
|
attr_reader :string
|
35
41
|
attr_reader :failing_rule_offset
|
36
42
|
attr_accessor :result, :pos
|
@@ -228,7 +234,6 @@ module KPeg
|
|
228
234
|
def apply_with_args(rule, *args)
|
229
235
|
memo_key = [rule, args]
|
230
236
|
if m = @memoizations[memo_key][@pos]
|
231
|
-
prev = @pos
|
232
237
|
@pos = m.pos
|
233
238
|
if !m.set
|
234
239
|
m.left_rec = true
|
@@ -263,7 +268,6 @@ module KPeg
|
|
263
268
|
|
264
269
|
def apply(rule)
|
265
270
|
if m = @memoizations[rule][@pos]
|
266
|
-
prev = @pos
|
267
271
|
@pos = m.pos
|
268
272
|
if !m.set
|
269
273
|
m.left_rec = true
|
data/lib/kpeg/format_parser.kpeg
CHANGED
@@ -1,14 +1,25 @@
|
|
1
1
|
%% name = KPeg::FormatParser
|
2
|
+
%% custom_initialize = true
|
3
|
+
|
4
|
+
%% pre-class {
|
5
|
+
require 'kpeg/grammar'
|
6
|
+
}
|
2
7
|
|
3
8
|
%% {
|
4
|
-
|
9
|
+
|
10
|
+
##
|
11
|
+
# Creates a new kpeg format parser for +str+.
|
5
12
|
|
6
13
|
def initialize(str, debug=false)
|
7
14
|
setup_parser(str, debug)
|
8
15
|
@g = KPeg::Grammar.new
|
9
16
|
end
|
10
17
|
|
18
|
+
##
|
19
|
+
# The parsed grammar
|
20
|
+
|
11
21
|
attr_reader :g
|
22
|
+
|
12
23
|
alias_method :grammar, :g
|
13
24
|
}
|
14
25
|
|
@@ -22,8 +33,8 @@
|
|
22
33
|
kleene = "*"
|
23
34
|
|
24
35
|
# Allow - by itself, but not at the beginning
|
25
|
-
var = < "-" | /[a-
|
26
|
-
method = < /[a-
|
36
|
+
var = < "-" | /[a-z][\w-]*/i > { text }
|
37
|
+
method = < /[a-z_]\w*/i > { text }
|
27
38
|
|
28
39
|
dbl_escapes = "n" { "\n" }
|
29
40
|
| "s" { " " }
|
@@ -39,7 +50,8 @@
|
|
39
50
|
| num_escapes
|
40
51
|
| < . > { text }
|
41
52
|
num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
|
42
|
-
| "x" < /[
|
53
|
+
| "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
|
54
|
+
# TODO use /\h{2}/ after 1.8 support is dropped
|
43
55
|
dbl_seq = < /[^\\"]+/ > { text }
|
44
56
|
dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
|
45
57
|
dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
|
@@ -55,10 +67,10 @@ sgl_escape_quote = "\\'" { "'" }
|
|
55
67
|
regexp = "/" not_slash:body "/" regexp_opts:opts
|
56
68
|
{ @g.reg body, opts }
|
57
69
|
|
58
|
-
char = < /[a-
|
70
|
+
char = < /[a-z\d]/i > { text }
|
59
71
|
char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
|
60
72
|
|
61
|
-
range_num = < /[1-9]
|
73
|
+
range_num = < /[1-9]\d*/ > { text }
|
62
74
|
range_elem = < range_num|kleene > { text }
|
63
75
|
mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
|
64
76
|
{ [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
|
@@ -105,7 +117,7 @@ sgl_escape_quote = "\\'" { "'" }
|
|
105
117
|
| - var:n - { [n] }
|
106
118
|
statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
|
107
119
|
| - var:v - "=" - expression:o { @g.set(v, o) }
|
108
|
-
| - "%" var:name - "=" - < /[
|
120
|
+
| - "%" var:name - "=" - < /[:\w]+/ >
|
109
121
|
{ @g.add_foreign_grammar(name, text) }
|
110
122
|
| - "%%" - curly:act { @g.add_setup act }
|
111
123
|
| - "%%" - var:name - curly:act { @g.add_directive name, act }
|
@@ -117,8 +129,8 @@ sgl_escape_quote = "\\'" { "'" }
|
|
117
129
|
|
118
130
|
# These are a seperate set of rules used to parse an ast declaration
|
119
131
|
|
120
|
-
ast_constant = < /[A-Z]
|
121
|
-
ast_word = < /[
|
132
|
+
ast_constant = < /[A-Z]\w*/ > { text }
|
133
|
+
ast_word = < /[a-z_]\w*/i > { text }
|
122
134
|
|
123
135
|
ast_sp = (" " | "\t")*
|
124
136
|
|