kpeg 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,18 @@
1
- === 1.0 / 2012-04-06
1
+ === 0.10 / 2012-04-16
2
+
3
+ * Minor enhancements
4
+ * In standalone parsers generation of a default initialize method may be
5
+ disabled with the custom_initialize variable:
6
+
7
+ %% custom_initialize = true
8
+ * Added a pre-class directive for adding class comments
9
+ * Generated code is now surrounded by startdoc/stopdoc.
10
+
11
+ * Bug fixes
12
+ * Hoe plugin now overwrites generated files
13
+ * Directives and variables now round-trip through KPeg::GrammarRenderer
14
+
15
+ === 0.9 / 2012-04-06
2
16
 
3
17
  * Minor enhancements
4
18
  * Added arbitrary directives to the kpeg grammar
@@ -1,5 +1,4 @@
1
1
  .autotest
2
- Gemfile
3
2
  History.txt
4
3
  LICENSE
5
4
  Manifest.txt
@@ -35,13 +34,12 @@ lib/kpeg/position.rb
35
34
  lib/kpeg/string_escape.kpeg
36
35
  lib/kpeg/string_escape.rb
37
36
  test/inputs/comments.kpeg
38
- test/test_file_parser_roundtrip.rb
39
- test/test_gen_calc.rb
40
37
  test/test_kpeg.rb
41
38
  test/test_kpeg_code_generator.rb
42
39
  test/test_kpeg_compiled_parser.rb
43
40
  test/test_kpeg_format.rb
41
+ test/test_kpeg_format_parser_round_trip.rb
42
+ test/test_kpeg_grammar.rb
44
43
  test/test_kpeg_grammar_renderer.rb
45
- test/test_left_recursion.rb
46
44
  vim/syntax_kpeg/ftdetect/kpeg.vim
47
45
  vim/syntax_kpeg/syntax/kpeg.vim
@@ -157,6 +157,38 @@ Kpeg allows comments to be added to the grammar file by using the # symbol
157
157
 
158
158
  # This is a comment in my grammar
159
159
 
160
+ === Variables
161
+
162
+ A variable looks like this:
163
+
164
+ %% name = value
165
+
166
+ Kpeg allows the following variables that control the output parser:
167
+
168
+ name::
169
+ The class name of the generated parser.
170
+ custom_initialize::
171
+ When built as a standalone parser a default initialize method will not be
172
+ included.
173
+
174
+ === Directives
175
+
176
+ A directive looks like this:
177
+
178
+ %% header {
179
+ ...
180
+ }
181
+
182
+ Kpeg allows the following directives:
183
+
184
+ header::
185
+ Placed before any generated code
186
+ pre-class::
187
+ Placed before the class definition to provide a class comment
188
+ footer::
189
+ Placed after the end of the class (for requiring files dependent upon the
190
+ parser's namespace
191
+
160
192
  == Generating and running your parser
161
193
 
162
194
  Before you can generate your parser you will need to define a root rule. This
data/Rakefile CHANGED
@@ -3,7 +3,6 @@
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
5
 
6
- Hoe.plugin :bundler
7
6
  Hoe.plugin :gemspec
8
7
  Hoe.plugin :git
9
8
  Hoe.plugin :minitest
@@ -29,10 +28,18 @@ rule ".rb" => ".kpeg" do |t|
29
28
  ruby "-Ilib bin/kpeg -s -o #{t.name} -f #{t.source}"
30
29
  end
31
30
 
32
- desc "build the parser"
33
- task :parser => %w[
31
+ PARSER_FILES = %w[
34
32
  lib/kpeg/string_escape.rb
35
33
  lib/kpeg/format_parser.rb
36
34
  ]
37
35
 
36
+ PARSER_FILES.map do |parser_file|
37
+ file parser_file => 'lib/kpeg/compiled_parser.rb'
38
+ file parser_file => 'lib/kpeg/code_generator.rb'
39
+ file parser_file => 'lib/kpeg/position.rb'
40
+ end
41
+
42
+ desc "build the parser"
43
+ task :parser => PARSER_FILES
44
+
38
45
  # vim: syntax=ruby
@@ -10,7 +10,7 @@
10
10
  # license:
11
11
  #
12
12
  # Copyright (c) Ryan Davis, seattle.rb
13
- #
13
+ #
14
14
  # Permission is hereby granted, free of charge, to any person obtaining
15
15
  # a copy of this software and associated documentation files (the
16
16
  # "Software"), to deal in the Software without restriction, including
@@ -18,10 +18,10 @@
18
18
  # distribute, sublicense, and/or sell copies of the Software, and to
19
19
  # permit persons to whom the Software is furnished to do so, subject to
20
20
  # the following conditions:
21
- #
21
+ #
22
22
  # The above copyright notice and this permission notice shall be
23
23
  # included in all copies or substantial portions of the Software.
24
- #
24
+ #
25
25
  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26
26
  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27
27
  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -55,9 +55,10 @@ module Hoe::Kpeg
55
55
  def initialize_kpeg
56
56
  self.kpeg_tasks = [:multi, :test, :check_manifest]
57
57
 
58
- # -v = verbose
58
+ # -f = overwrite existing file
59
59
  # -s = parser does not require runtime
60
- self.kpeg_flags ||= "-s -v"
60
+ # -v = verbose
61
+ self.kpeg_flags ||= "-s -v -f"
61
62
 
62
63
  dependency 'kpeg', '~> 0.9', :development
63
64
  end
@@ -1,6 +1,6 @@
1
1
  module KPeg
2
2
 
3
- VERSION = "0.9.0"
3
+ VERSION = "0.10.0"
4
4
 
5
5
  def self.grammar
6
6
  g = Grammar.new
@@ -90,11 +90,11 @@ module KPeg
90
90
  end
91
91
  end
92
92
  end
93
-
93
+
94
94
  def indentify(code, indent)
95
95
  "#{" " * indent}#{code}"
96
96
  end
97
-
97
+
98
98
  # Default indent is 4 spaces (indent=2)
99
99
  def output_op(code, op, indent=2)
100
100
  case op
@@ -309,15 +309,23 @@ module KPeg
309
309
  else
310
310
  raise "Unknown op - #{op.class}"
311
311
  end
312
-
313
312
  end
314
313
 
315
- def standalone_region(path)
316
- cp = File.read(path)
317
- start = cp.index("# STANDALONE START")
318
- fin = cp.index("# STANDALONE END")
314
+ def standalone_region(path, marker = "STANDALONE")
315
+ expanded_path = File.expand_path("../#{path}", __FILE__)
316
+ cp = File.read(expanded_path)
317
+
318
+ start_marker = "# #{marker} START"
319
+ end_marker = /^\s*# #{Regexp.escape marker} END/
320
+
321
+ start = cp.index(start_marker) + start_marker.length + 1 # \n
322
+ fin = cp.index(end_marker)
323
+
324
+ unless start and fin
325
+ abort("#{marker} boundaries in #{path} missing " \
326
+ "for standalone generation")
327
+ end
319
328
 
320
- return nil unless start and fin
321
329
  cp[start..fin]
322
330
  end
323
331
 
@@ -326,37 +334,29 @@ module KPeg
326
334
 
327
335
  code = []
328
336
 
329
- if header = @grammar.directives['header']
330
- code << header.action.strip
331
- code << "\n"
332
- end
333
-
334
- if @standalone
335
- code << "class #{@name}\n"
337
+ output_header(code)
338
+ output_grammar(code)
339
+ output_footer(code)
336
340
 
337
- unless cp = standalone_region(
338
- File.expand_path("../compiled_parser.rb", __FILE__))
341
+ @output = code.join
342
+ end
339
343
 
340
- puts "Standalone failure. Check compiler_parser.rb for proper boundary comments"
341
- exit 1
342
- end
344
+ ##
345
+ # Output of class end and footer
343
346
 
344
- unless pp = standalone_region(
345
- File.expand_path("../position.rb", __FILE__))
346
- puts "Standalone failure. Check position.rb for proper boundary comments"
347
- end
347
+ def output_footer(code)
348
+ code << "end\n"
348
349
 
349
- cp.gsub!(/include Position/, pp)
350
- code << cp << "\n"
351
- else
352
- code << "require 'kpeg/compiled_parser'\n\n"
353
- code << "class #{@name} < KPeg::CompiledParser\n"
350
+ if footer = @grammar.directives['footer']
351
+ code << footer.action
354
352
  end
353
+ end
355
354
 
356
- @grammar.setup_actions.each do |act|
357
- code << "\n#{act.action}\n\n"
358
- end
355
+ ##
356
+ # Output of grammar and rules
359
357
 
358
+ def output_grammar(code)
359
+ code << " # :stopdoc:\n"
360
360
  handle_ast(code)
361
361
 
362
362
  fg = @grammar.foreign_grammars
@@ -418,24 +418,57 @@ module KPeg
418
418
 
419
419
  code << "\n Rules = {}\n"
420
420
  @grammar.rule_order.each do |name|
421
- rule = @grammar.rules[name]
422
-
423
421
  rend = GrammarRenderer.escape renderings[name], true
424
422
  code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
425
423
  end
426
424
 
427
- code << "end\n"
425
+ code << " # :startdoc:\n"
426
+ end
428
427
 
429
- if footer = @grammar.directives['footer']
430
- code << footer.action
428
+ ##
429
+ # Output up to the user-defined setup actions
430
+
431
+ def output_header(code)
432
+ if header = @grammar.directives['header']
433
+ code << header.action.strip
434
+ code << "\n"
431
435
  end
432
436
 
433
- @output = code.join
437
+ pre_class = @grammar.directives['pre-class']
438
+
439
+ if @standalone
440
+ if pre_class
441
+ code << pre_class.action.strip
442
+ code << "\n"
443
+ end
444
+ code << "class #{@name}\n"
445
+
446
+ cp = standalone_region("compiled_parser.rb")
447
+ cpi = standalone_region("compiled_parser.rb", "INITIALIZE")
448
+ pp = standalone_region("position.rb")
449
+
450
+ cp.gsub!(/include Position/, pp)
451
+ code << " # :stopdoc:\n"
452
+ code << cpi << "\n" unless @grammar.variables['custom_initialize']
453
+ code << cp << "\n"
454
+ code << " # :startdoc:\n"
455
+ else
456
+ code << "require 'kpeg/compiled_parser'\n\n"
457
+ if pre_class
458
+ code << pre_class.action.strip
459
+ code << "\n"
460
+ end
461
+ code << "class #{@name} < KPeg::CompiledParser\n"
462
+ end
463
+
464
+ @grammar.setup_actions.each do |act|
465
+ code << "\n#{act.action}\n\n"
466
+ end
434
467
  end
435
468
 
436
469
  def make(str)
437
470
  m = Module.new
438
- m.module_eval output
471
+ m.module_eval output, "(kpeg parser #{@name})"
439
472
 
440
473
  cls = m.const_get(@name)
441
474
  cls.new(str)
@@ -10,8 +10,22 @@ module KPeg
10
10
 
11
11
  # Leave these markers in! They allow us to generate standalone
12
12
  # code automatically!
13
- #
13
+
14
+ # INITIALIZE START
15
+
16
+ # This is distinct from setup_parser so that a standalone parser
17
+ # can redefine #initialize and still have access to the proper
18
+ # parser setup code.
19
+ def initialize(str, debug=false)
20
+ setup_parser(str, debug)
21
+ end
22
+
23
+ # INITIALIZE END
24
+
14
25
  # STANDALONE START
26
+
27
+ # Prepares for parsing +str+. If you define a custom initialize you must
28
+ # call this method before #parse
15
29
  def setup_parser(str, debug=false)
16
30
  @string = str
17
31
  @pos = 0
@@ -23,14 +37,6 @@ module KPeg
23
37
  setup_foreign_grammar
24
38
  end
25
39
 
26
- # This is distinct from setup_parser so that a standalone parser
27
- # can redefine #initialize and still have access to the proper
28
- # parser setup code.
29
- #
30
- def initialize(str, debug=false)
31
- setup_parser(str, debug)
32
- end
33
-
34
40
  attr_reader :string
35
41
  attr_reader :failing_rule_offset
36
42
  attr_accessor :result, :pos
@@ -228,7 +234,6 @@ module KPeg
228
234
  def apply_with_args(rule, *args)
229
235
  memo_key = [rule, args]
230
236
  if m = @memoizations[memo_key][@pos]
231
- prev = @pos
232
237
  @pos = m.pos
233
238
  if !m.set
234
239
  m.left_rec = true
@@ -263,7 +268,6 @@ module KPeg
263
268
 
264
269
  def apply(rule)
265
270
  if m = @memoizations[rule][@pos]
266
- prev = @pos
267
271
  @pos = m.pos
268
272
  if !m.set
269
273
  m.left_rec = true
@@ -1,14 +1,25 @@
1
1
  %% name = KPeg::FormatParser
2
+ %% custom_initialize = true
3
+
4
+ %% pre-class {
5
+ require 'kpeg/grammar'
6
+ }
2
7
 
3
8
  %% {
4
- require 'kpeg/grammar'
9
+
10
+ ##
11
+ # Creates a new kpeg format parser for +str+.
5
12
 
6
13
  def initialize(str, debug=false)
7
14
  setup_parser(str, debug)
8
15
  @g = KPeg::Grammar.new
9
16
  end
10
17
 
18
+ ##
19
+ # The parsed grammar
20
+
11
21
  attr_reader :g
22
+
12
23
  alias_method :grammar, :g
13
24
  }
14
25
 
@@ -22,8 +33,8 @@
22
33
  kleene = "*"
23
34
 
24
35
  # Allow - by itself, but not at the beginning
25
- var = < "-" | /[a-zA-Z][\-_a-zA-Z0-9]*/ > { text }
26
- method = < /[a-zA-Z_][a-zA-Z0-9_]*/ > { text }
36
+ var = < "-" | /[a-z][\w-]*/i > { text }
37
+ method = < /[a-z_]\w*/i > { text }
27
38
 
28
39
  dbl_escapes = "n" { "\n" }
29
40
  | "s" { " " }
@@ -39,7 +50,8 @@
39
50
  | num_escapes
40
51
  | < . > { text }
41
52
  num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
42
- | "x" < /[0-9a-fA-F]{2}/ > { [text.to_i(16)].pack("U") }
53
+ | "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
54
+ # TODO use /\h{2}/ after 1.8 support is dropped
43
55
  dbl_seq = < /[^\\"]+/ > { text }
44
56
  dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
45
57
  dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
@@ -55,10 +67,10 @@ sgl_escape_quote = "\\'" { "'" }
55
67
  regexp = "/" not_slash:body "/" regexp_opts:opts
56
68
  { @g.reg body, opts }
57
69
 
58
- char = < /[a-zA-Z0-9]/ > { text }
70
+ char = < /[a-z\d]/i > { text }
59
71
  char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
60
72
 
61
- range_num = < /[1-9][0-9]*/ > { text }
73
+ range_num = < /[1-9]\d*/ > { text }
62
74
  range_elem = < range_num|kleene > { text }
63
75
  mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
64
76
  { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
@@ -105,7 +117,7 @@ sgl_escape_quote = "\\'" { "'" }
105
117
  | - var:n - { [n] }
106
118
  statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
107
119
  | - var:v - "=" - expression:o { @g.set(v, o) }
108
- | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ >
120
+ | - "%" var:name - "=" - < /[:\w]+/ >
109
121
  { @g.add_foreign_grammar(name, text) }
110
122
  | - "%%" - curly:act { @g.add_setup act }
111
123
  | - "%%" - var:name - curly:act { @g.add_directive name, act }
@@ -117,8 +129,8 @@ sgl_escape_quote = "\\'" { "'" }
117
129
 
118
130
  # These are a seperate set of rules used to parse an ast declaration
119
131
 
120
- ast_constant = < /[A-Z][A-Za-z0-9_]*/ > { text }
121
- ast_word = < /[A-Za-z_][A-Za-z0-9_]*/ > { text }
132
+ ast_constant = < /[A-Z]\w*/ > { text }
133
+ ast_word = < /[a-z_]\w*/i > { text }
122
134
 
123
135
  ast_sp = (" " | "\t")*
124
136