kpeg 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -1
- data/Manifest.txt +2 -4
- data/README.rdoc +32 -0
- data/Rakefile +10 -3
- data/lib/hoe/kpeg.rb +6 -5
- data/lib/kpeg.rb +1 -1
- data/lib/kpeg/code_generator.rb +72 -39
- data/lib/kpeg/compiled_parser.rb +15 -11
- data/lib/kpeg/format_parser.kpeg +21 -9
- data/lib/kpeg/format_parser.rb +42 -39
- data/lib/kpeg/grammar.rb +1 -1
- data/lib/kpeg/grammar_renderer.rb +14 -0
- data/lib/kpeg/position.rb +1 -0
- data/lib/kpeg/string_escape.rb +355 -2
- data/test/test_kpeg_code_generator.rb +166 -0
- data/test/test_kpeg_format.rb +2 -2
- data/test/{test_file_parser_roundtrip.rb → test_kpeg_format_parser_round_trip.rb} +1 -1
- data/test/{test_gen_calc.rb → test_kpeg_grammar.rb} +48 -5
- data/test/test_kpeg_grammar_renderer.rb +46 -5
- metadata +17 -20
- data/Gemfile +0 -12
- data/test/test_left_recursion.rb +0 -50
data/History.txt
CHANGED
@@ -1,4 +1,18 @@
|
|
1
|
-
===
|
1
|
+
=== 0.10 / 2012-04-16
|
2
|
+
|
3
|
+
* Minor enhancements
|
4
|
+
* In standalone parsers generation of a default initialize method may be
|
5
|
+
disabled with the custom_initialize variable:
|
6
|
+
|
7
|
+
%% custom_initialize = true
|
8
|
+
* Added a pre-class directive for adding class comments
|
9
|
+
* Generated code is now surrounded by startdoc/stopdoc.
|
10
|
+
|
11
|
+
* Bug fixes
|
12
|
+
* Hoe plugin now overwrites generated files
|
13
|
+
* Directives and variables now round-trip through KPeg::GrammarRenderer
|
14
|
+
|
15
|
+
=== 0.9 / 2012-04-06
|
2
16
|
|
3
17
|
* Minor enhancements
|
4
18
|
* Added arbitrary directives to the kpeg grammar
|
data/Manifest.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
.autotest
|
2
|
-
Gemfile
|
3
2
|
History.txt
|
4
3
|
LICENSE
|
5
4
|
Manifest.txt
|
@@ -35,13 +34,12 @@ lib/kpeg/position.rb
|
|
35
34
|
lib/kpeg/string_escape.kpeg
|
36
35
|
lib/kpeg/string_escape.rb
|
37
36
|
test/inputs/comments.kpeg
|
38
|
-
test/test_file_parser_roundtrip.rb
|
39
|
-
test/test_gen_calc.rb
|
40
37
|
test/test_kpeg.rb
|
41
38
|
test/test_kpeg_code_generator.rb
|
42
39
|
test/test_kpeg_compiled_parser.rb
|
43
40
|
test/test_kpeg_format.rb
|
41
|
+
test/test_kpeg_format_parser_round_trip.rb
|
42
|
+
test/test_kpeg_grammar.rb
|
44
43
|
test/test_kpeg_grammar_renderer.rb
|
45
|
-
test/test_left_recursion.rb
|
46
44
|
vim/syntax_kpeg/ftdetect/kpeg.vim
|
47
45
|
vim/syntax_kpeg/syntax/kpeg.vim
|
data/README.rdoc
CHANGED
@@ -157,6 +157,38 @@ Kpeg allows comments to be added to the grammar file by using the # symbol
|
|
157
157
|
|
158
158
|
# This is a comment in my grammar
|
159
159
|
|
160
|
+
=== Variables
|
161
|
+
|
162
|
+
A variable looks like this:
|
163
|
+
|
164
|
+
%% name = value
|
165
|
+
|
166
|
+
Kpeg allows the following variables that control the output parser:
|
167
|
+
|
168
|
+
name::
|
169
|
+
The class name of the generated parser.
|
170
|
+
custom_initialize::
|
171
|
+
When built as a standalone parser a default initialize method will not be
|
172
|
+
included.
|
173
|
+
|
174
|
+
=== Directives
|
175
|
+
|
176
|
+
A directive looks like this:
|
177
|
+
|
178
|
+
%% header {
|
179
|
+
...
|
180
|
+
}
|
181
|
+
|
182
|
+
Kpeg allows the following directives:
|
183
|
+
|
184
|
+
header::
|
185
|
+
Placed before any generated code
|
186
|
+
pre-class::
|
187
|
+
Placed before the class definition to provide a class comment
|
188
|
+
footer::
|
189
|
+
Placed after the end of the class (for requiring files dependent upon the
|
190
|
+
parser's namespace
|
191
|
+
|
160
192
|
== Generating and running your parser
|
161
193
|
|
162
194
|
Before you can generate your parser you will need to define a root rule. This
|
data/Rakefile
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'hoe'
|
5
5
|
|
6
|
-
Hoe.plugin :bundler
|
7
6
|
Hoe.plugin :gemspec
|
8
7
|
Hoe.plugin :git
|
9
8
|
Hoe.plugin :minitest
|
@@ -29,10 +28,18 @@ rule ".rb" => ".kpeg" do |t|
|
|
29
28
|
ruby "-Ilib bin/kpeg -s -o #{t.name} -f #{t.source}"
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
task :parser => %w[
|
31
|
+
PARSER_FILES = %w[
|
34
32
|
lib/kpeg/string_escape.rb
|
35
33
|
lib/kpeg/format_parser.rb
|
36
34
|
]
|
37
35
|
|
36
|
+
PARSER_FILES.map do |parser_file|
|
37
|
+
file parser_file => 'lib/kpeg/compiled_parser.rb'
|
38
|
+
file parser_file => 'lib/kpeg/code_generator.rb'
|
39
|
+
file parser_file => 'lib/kpeg/position.rb'
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "build the parser"
|
43
|
+
task :parser => PARSER_FILES
|
44
|
+
|
38
45
|
# vim: syntax=ruby
|
data/lib/hoe/kpeg.rb
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
# license:
|
11
11
|
#
|
12
12
|
# Copyright (c) Ryan Davis, seattle.rb
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# Permission is hereby granted, free of charge, to any person obtaining
|
15
15
|
# a copy of this software and associated documentation files (the
|
16
16
|
# "Software"), to deal in the Software without restriction, including
|
@@ -18,10 +18,10 @@
|
|
18
18
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
19
19
|
# permit persons to whom the Software is furnished to do so, subject to
|
20
20
|
# the following conditions:
|
21
|
-
#
|
21
|
+
#
|
22
22
|
# The above copyright notice and this permission notice shall be
|
23
23
|
# included in all copies or substantial portions of the Software.
|
24
|
-
#
|
24
|
+
#
|
25
25
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
26
26
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
27
27
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -55,9 +55,10 @@ module Hoe::Kpeg
|
|
55
55
|
def initialize_kpeg
|
56
56
|
self.kpeg_tasks = [:multi, :test, :check_manifest]
|
57
57
|
|
58
|
-
# -
|
58
|
+
# -f = overwrite existing file
|
59
59
|
# -s = parser does not require runtime
|
60
|
-
|
60
|
+
# -v = verbose
|
61
|
+
self.kpeg_flags ||= "-s -v -f"
|
61
62
|
|
62
63
|
dependency 'kpeg', '~> 0.9', :development
|
63
64
|
end
|
data/lib/kpeg.rb
CHANGED
data/lib/kpeg/code_generator.rb
CHANGED
@@ -90,11 +90,11 @@ module KPeg
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
|
93
|
+
|
94
94
|
def indentify(code, indent)
|
95
95
|
"#{" " * indent}#{code}"
|
96
96
|
end
|
97
|
-
|
97
|
+
|
98
98
|
# Default indent is 4 spaces (indent=2)
|
99
99
|
def output_op(code, op, indent=2)
|
100
100
|
case op
|
@@ -309,15 +309,23 @@ module KPeg
|
|
309
309
|
else
|
310
310
|
raise "Unknown op - #{op.class}"
|
311
311
|
end
|
312
|
-
|
313
312
|
end
|
314
313
|
|
315
|
-
def standalone_region(path)
|
316
|
-
|
317
|
-
|
318
|
-
|
314
|
+
def standalone_region(path, marker = "STANDALONE")
|
315
|
+
expanded_path = File.expand_path("../#{path}", __FILE__)
|
316
|
+
cp = File.read(expanded_path)
|
317
|
+
|
318
|
+
start_marker = "# #{marker} START"
|
319
|
+
end_marker = /^\s*# #{Regexp.escape marker} END/
|
320
|
+
|
321
|
+
start = cp.index(start_marker) + start_marker.length + 1 # \n
|
322
|
+
fin = cp.index(end_marker)
|
323
|
+
|
324
|
+
unless start and fin
|
325
|
+
abort("#{marker} boundaries in #{path} missing " \
|
326
|
+
"for standalone generation")
|
327
|
+
end
|
319
328
|
|
320
|
-
return nil unless start and fin
|
321
329
|
cp[start..fin]
|
322
330
|
end
|
323
331
|
|
@@ -326,37 +334,29 @@ module KPeg
|
|
326
334
|
|
327
335
|
code = []
|
328
336
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
end
|
333
|
-
|
334
|
-
if @standalone
|
335
|
-
code << "class #{@name}\n"
|
337
|
+
output_header(code)
|
338
|
+
output_grammar(code)
|
339
|
+
output_footer(code)
|
336
340
|
|
337
|
-
|
338
|
-
|
341
|
+
@output = code.join
|
342
|
+
end
|
339
343
|
|
340
|
-
|
341
|
-
|
342
|
-
end
|
344
|
+
##
|
345
|
+
# Output of class end and footer
|
343
346
|
|
344
|
-
|
345
|
-
|
346
|
-
puts "Standalone failure. Check position.rb for proper boundary comments"
|
347
|
-
end
|
347
|
+
def output_footer(code)
|
348
|
+
code << "end\n"
|
348
349
|
|
349
|
-
|
350
|
-
code <<
|
351
|
-
else
|
352
|
-
code << "require 'kpeg/compiled_parser'\n\n"
|
353
|
-
code << "class #{@name} < KPeg::CompiledParser\n"
|
350
|
+
if footer = @grammar.directives['footer']
|
351
|
+
code << footer.action
|
354
352
|
end
|
353
|
+
end
|
355
354
|
|
356
|
-
|
357
|
-
|
358
|
-
end
|
355
|
+
##
|
356
|
+
# Output of grammar and rules
|
359
357
|
|
358
|
+
def output_grammar(code)
|
359
|
+
code << " # :stopdoc:\n"
|
360
360
|
handle_ast(code)
|
361
361
|
|
362
362
|
fg = @grammar.foreign_grammars
|
@@ -418,24 +418,57 @@ module KPeg
|
|
418
418
|
|
419
419
|
code << "\n Rules = {}\n"
|
420
420
|
@grammar.rule_order.each do |name|
|
421
|
-
rule = @grammar.rules[name]
|
422
|
-
|
423
421
|
rend = GrammarRenderer.escape renderings[name], true
|
424
422
|
code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
|
425
423
|
end
|
426
424
|
|
427
|
-
code << "
|
425
|
+
code << " # :startdoc:\n"
|
426
|
+
end
|
428
427
|
|
429
|
-
|
430
|
-
|
428
|
+
##
|
429
|
+
# Output up to the user-defined setup actions
|
430
|
+
|
431
|
+
def output_header(code)
|
432
|
+
if header = @grammar.directives['header']
|
433
|
+
code << header.action.strip
|
434
|
+
code << "\n"
|
431
435
|
end
|
432
436
|
|
433
|
-
|
437
|
+
pre_class = @grammar.directives['pre-class']
|
438
|
+
|
439
|
+
if @standalone
|
440
|
+
if pre_class
|
441
|
+
code << pre_class.action.strip
|
442
|
+
code << "\n"
|
443
|
+
end
|
444
|
+
code << "class #{@name}\n"
|
445
|
+
|
446
|
+
cp = standalone_region("compiled_parser.rb")
|
447
|
+
cpi = standalone_region("compiled_parser.rb", "INITIALIZE")
|
448
|
+
pp = standalone_region("position.rb")
|
449
|
+
|
450
|
+
cp.gsub!(/include Position/, pp)
|
451
|
+
code << " # :stopdoc:\n"
|
452
|
+
code << cpi << "\n" unless @grammar.variables['custom_initialize']
|
453
|
+
code << cp << "\n"
|
454
|
+
code << " # :startdoc:\n"
|
455
|
+
else
|
456
|
+
code << "require 'kpeg/compiled_parser'\n\n"
|
457
|
+
if pre_class
|
458
|
+
code << pre_class.action.strip
|
459
|
+
code << "\n"
|
460
|
+
end
|
461
|
+
code << "class #{@name} < KPeg::CompiledParser\n"
|
462
|
+
end
|
463
|
+
|
464
|
+
@grammar.setup_actions.each do |act|
|
465
|
+
code << "\n#{act.action}\n\n"
|
466
|
+
end
|
434
467
|
end
|
435
468
|
|
436
469
|
def make(str)
|
437
470
|
m = Module.new
|
438
|
-
m.module_eval output
|
471
|
+
m.module_eval output, "(kpeg parser #{@name})"
|
439
472
|
|
440
473
|
cls = m.const_get(@name)
|
441
474
|
cls.new(str)
|
data/lib/kpeg/compiled_parser.rb
CHANGED
@@ -10,8 +10,22 @@ module KPeg
|
|
10
10
|
|
11
11
|
# Leave these markers in! They allow us to generate standalone
|
12
12
|
# code automatically!
|
13
|
-
|
13
|
+
|
14
|
+
# INITIALIZE START
|
15
|
+
|
16
|
+
# This is distinct from setup_parser so that a standalone parser
|
17
|
+
# can redefine #initialize and still have access to the proper
|
18
|
+
# parser setup code.
|
19
|
+
def initialize(str, debug=false)
|
20
|
+
setup_parser(str, debug)
|
21
|
+
end
|
22
|
+
|
23
|
+
# INITIALIZE END
|
24
|
+
|
14
25
|
# STANDALONE START
|
26
|
+
|
27
|
+
# Prepares for parsing +str+. If you define a custom initialize you must
|
28
|
+
# call this method before #parse
|
15
29
|
def setup_parser(str, debug=false)
|
16
30
|
@string = str
|
17
31
|
@pos = 0
|
@@ -23,14 +37,6 @@ module KPeg
|
|
23
37
|
setup_foreign_grammar
|
24
38
|
end
|
25
39
|
|
26
|
-
# This is distinct from setup_parser so that a standalone parser
|
27
|
-
# can redefine #initialize and still have access to the proper
|
28
|
-
# parser setup code.
|
29
|
-
#
|
30
|
-
def initialize(str, debug=false)
|
31
|
-
setup_parser(str, debug)
|
32
|
-
end
|
33
|
-
|
34
40
|
attr_reader :string
|
35
41
|
attr_reader :failing_rule_offset
|
36
42
|
attr_accessor :result, :pos
|
@@ -228,7 +234,6 @@ module KPeg
|
|
228
234
|
def apply_with_args(rule, *args)
|
229
235
|
memo_key = [rule, args]
|
230
236
|
if m = @memoizations[memo_key][@pos]
|
231
|
-
prev = @pos
|
232
237
|
@pos = m.pos
|
233
238
|
if !m.set
|
234
239
|
m.left_rec = true
|
@@ -263,7 +268,6 @@ module KPeg
|
|
263
268
|
|
264
269
|
def apply(rule)
|
265
270
|
if m = @memoizations[rule][@pos]
|
266
|
-
prev = @pos
|
267
271
|
@pos = m.pos
|
268
272
|
if !m.set
|
269
273
|
m.left_rec = true
|
data/lib/kpeg/format_parser.kpeg
CHANGED
@@ -1,14 +1,25 @@
|
|
1
1
|
%% name = KPeg::FormatParser
|
2
|
+
%% custom_initialize = true
|
3
|
+
|
4
|
+
%% pre-class {
|
5
|
+
require 'kpeg/grammar'
|
6
|
+
}
|
2
7
|
|
3
8
|
%% {
|
4
|
-
|
9
|
+
|
10
|
+
##
|
11
|
+
# Creates a new kpeg format parser for +str+.
|
5
12
|
|
6
13
|
def initialize(str, debug=false)
|
7
14
|
setup_parser(str, debug)
|
8
15
|
@g = KPeg::Grammar.new
|
9
16
|
end
|
10
17
|
|
18
|
+
##
|
19
|
+
# The parsed grammar
|
20
|
+
|
11
21
|
attr_reader :g
|
22
|
+
|
12
23
|
alias_method :grammar, :g
|
13
24
|
}
|
14
25
|
|
@@ -22,8 +33,8 @@
|
|
22
33
|
kleene = "*"
|
23
34
|
|
24
35
|
# Allow - by itself, but not at the beginning
|
25
|
-
var = < "-" | /[a-
|
26
|
-
method = < /[a-
|
36
|
+
var = < "-" | /[a-z][\w-]*/i > { text }
|
37
|
+
method = < /[a-z_]\w*/i > { text }
|
27
38
|
|
28
39
|
dbl_escapes = "n" { "\n" }
|
29
40
|
| "s" { " " }
|
@@ -39,7 +50,8 @@
|
|
39
50
|
| num_escapes
|
40
51
|
| < . > { text }
|
41
52
|
num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
|
42
|
-
| "x" < /[
|
53
|
+
| "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
|
54
|
+
# TODO use /\h{2}/ after 1.8 support is dropped
|
43
55
|
dbl_seq = < /[^\\"]+/ > { text }
|
44
56
|
dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
|
45
57
|
dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
|
@@ -55,10 +67,10 @@ sgl_escape_quote = "\\'" { "'" }
|
|
55
67
|
regexp = "/" not_slash:body "/" regexp_opts:opts
|
56
68
|
{ @g.reg body, opts }
|
57
69
|
|
58
|
-
char = < /[a-
|
70
|
+
char = < /[a-z\d]/i > { text }
|
59
71
|
char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
|
60
72
|
|
61
|
-
range_num = < /[1-9]
|
73
|
+
range_num = < /[1-9]\d*/ > { text }
|
62
74
|
range_elem = < range_num|kleene > { text }
|
63
75
|
mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
|
64
76
|
{ [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
|
@@ -105,7 +117,7 @@ sgl_escape_quote = "\\'" { "'" }
|
|
105
117
|
| - var:n - { [n] }
|
106
118
|
statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
|
107
119
|
| - var:v - "=" - expression:o { @g.set(v, o) }
|
108
|
-
| - "%" var:name - "=" - < /[
|
120
|
+
| - "%" var:name - "=" - < /[:\w]+/ >
|
109
121
|
{ @g.add_foreign_grammar(name, text) }
|
110
122
|
| - "%%" - curly:act { @g.add_setup act }
|
111
123
|
| - "%%" - var:name - curly:act { @g.add_directive name, act }
|
@@ -117,8 +129,8 @@ sgl_escape_quote = "\\'" { "'" }
|
|
117
129
|
|
118
130
|
# These are a seperate set of rules used to parse an ast declaration
|
119
131
|
|
120
|
-
ast_constant = < /[A-Z]
|
121
|
-
ast_word = < /[
|
132
|
+
ast_constant = < /[A-Z]\w*/ > { text }
|
133
|
+
ast_word = < /[a-z_]\w*/i > { text }
|
122
134
|
|
123
135
|
ast_sp = (" " | "\t")*
|
124
136
|
|