kpeg 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,18 @@
1
- === 1.0 / 2012-04-06
1
+ === 0.10 / 2012-04-16
2
+
3
+ * Minor enhancements
4
+ * In standalone parsers generation of a default initialize method may be
5
+ disabled with the custom_initialize variable:
6
+
7
+ %% custom_initialize = true
8
+ * Added a pre-class directive for adding class comments
9
+ * Generated code is now surrounded by startdoc/stopdoc.
10
+
11
+ * Bug fixes
12
+ * Hoe plugin now overwrites generated files
13
+ * Directives and variables now round-trip through KPeg::GrammarRenderer
14
+
15
+ === 0.9 / 2012-04-06
2
16
 
3
17
  * Minor enhancements
4
18
  * Added arbitrary directives to the kpeg grammar
@@ -1,5 +1,4 @@
1
1
  .autotest
2
- Gemfile
3
2
  History.txt
4
3
  LICENSE
5
4
  Manifest.txt
@@ -35,13 +34,12 @@ lib/kpeg/position.rb
35
34
  lib/kpeg/string_escape.kpeg
36
35
  lib/kpeg/string_escape.rb
37
36
  test/inputs/comments.kpeg
38
- test/test_file_parser_roundtrip.rb
39
- test/test_gen_calc.rb
40
37
  test/test_kpeg.rb
41
38
  test/test_kpeg_code_generator.rb
42
39
  test/test_kpeg_compiled_parser.rb
43
40
  test/test_kpeg_format.rb
41
+ test/test_kpeg_format_parser_round_trip.rb
42
+ test/test_kpeg_grammar.rb
44
43
  test/test_kpeg_grammar_renderer.rb
45
- test/test_left_recursion.rb
46
44
  vim/syntax_kpeg/ftdetect/kpeg.vim
47
45
  vim/syntax_kpeg/syntax/kpeg.vim
@@ -157,6 +157,38 @@ Kpeg allows comments to be added to the grammar file by using the # symbol
157
157
 
158
158
  # This is a comment in my grammar
159
159
 
160
+ === Variables
161
+
162
+ A variable looks like this:
163
+
164
+ %% name = value
165
+
166
+ Kpeg allows the following variables that control the output parser:
167
+
168
+ name::
169
+ The class name of the generated parser.
170
+ custom_initialize::
171
+ When built as a standalone parser a default initialize method will not be
172
+ included.
173
+
174
+ === Directives
175
+
176
+ A directive looks like this:
177
+
178
+ %% header {
179
+ ...
180
+ }
181
+
182
+ Kpeg allows the following directives:
183
+
184
+ header::
185
+ Placed before any generated code
186
+ pre-class::
187
+ Placed before the class definition to provide a class comment
188
+ footer::
189
+ Placed after the end of the class (for requiring files dependent upon the
190
+ parser's namespace
191
+
160
192
  == Generating and running your parser
161
193
 
162
194
  Before you can generate your parser you will need to define a root rule. This
data/Rakefile CHANGED
@@ -3,7 +3,6 @@
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
5
 
6
- Hoe.plugin :bundler
7
6
  Hoe.plugin :gemspec
8
7
  Hoe.plugin :git
9
8
  Hoe.plugin :minitest
@@ -29,10 +28,18 @@ rule ".rb" => ".kpeg" do |t|
29
28
  ruby "-Ilib bin/kpeg -s -o #{t.name} -f #{t.source}"
30
29
  end
31
30
 
32
- desc "build the parser"
33
- task :parser => %w[
31
+ PARSER_FILES = %w[
34
32
  lib/kpeg/string_escape.rb
35
33
  lib/kpeg/format_parser.rb
36
34
  ]
37
35
 
36
+ PARSER_FILES.map do |parser_file|
37
+ file parser_file => 'lib/kpeg/compiled_parser.rb'
38
+ file parser_file => 'lib/kpeg/code_generator.rb'
39
+ file parser_file => 'lib/kpeg/position.rb'
40
+ end
41
+
42
+ desc "build the parser"
43
+ task :parser => PARSER_FILES
44
+
38
45
  # vim: syntax=ruby
@@ -10,7 +10,7 @@
10
10
  # license:
11
11
  #
12
12
  # Copyright (c) Ryan Davis, seattle.rb
13
- #
13
+ #
14
14
  # Permission is hereby granted, free of charge, to any person obtaining
15
15
  # a copy of this software and associated documentation files (the
16
16
  # "Software"), to deal in the Software without restriction, including
@@ -18,10 +18,10 @@
18
18
  # distribute, sublicense, and/or sell copies of the Software, and to
19
19
  # permit persons to whom the Software is furnished to do so, subject to
20
20
  # the following conditions:
21
- #
21
+ #
22
22
  # The above copyright notice and this permission notice shall be
23
23
  # included in all copies or substantial portions of the Software.
24
- #
24
+ #
25
25
  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26
26
  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27
27
  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -55,9 +55,10 @@ module Hoe::Kpeg
55
55
  def initialize_kpeg
56
56
  self.kpeg_tasks = [:multi, :test, :check_manifest]
57
57
 
58
- # -v = verbose
58
+ # -f = overwrite existing file
59
59
  # -s = parser does not require runtime
60
- self.kpeg_flags ||= "-s -v"
60
+ # -v = verbose
61
+ self.kpeg_flags ||= "-s -v -f"
61
62
 
62
63
  dependency 'kpeg', '~> 0.9', :development
63
64
  end
@@ -1,6 +1,6 @@
1
1
  module KPeg
2
2
 
3
- VERSION = "0.9.0"
3
+ VERSION = "0.10.0"
4
4
 
5
5
  def self.grammar
6
6
  g = Grammar.new
@@ -90,11 +90,11 @@ module KPeg
90
90
  end
91
91
  end
92
92
  end
93
-
93
+
94
94
  def indentify(code, indent)
95
95
  "#{" " * indent}#{code}"
96
96
  end
97
-
97
+
98
98
  # Default indent is 4 spaces (indent=2)
99
99
  def output_op(code, op, indent=2)
100
100
  case op
@@ -309,15 +309,23 @@ module KPeg
309
309
  else
310
310
  raise "Unknown op - #{op.class}"
311
311
  end
312
-
313
312
  end
314
313
 
315
- def standalone_region(path)
316
- cp = File.read(path)
317
- start = cp.index("# STANDALONE START")
318
- fin = cp.index("# STANDALONE END")
314
+ def standalone_region(path, marker = "STANDALONE")
315
+ expanded_path = File.expand_path("../#{path}", __FILE__)
316
+ cp = File.read(expanded_path)
317
+
318
+ start_marker = "# #{marker} START"
319
+ end_marker = /^\s*# #{Regexp.escape marker} END/
320
+
321
+ start = cp.index(start_marker) + start_marker.length + 1 # \n
322
+ fin = cp.index(end_marker)
323
+
324
+ unless start and fin
325
+ abort("#{marker} boundaries in #{path} missing " \
326
+ "for standalone generation")
327
+ end
319
328
 
320
- return nil unless start and fin
321
329
  cp[start..fin]
322
330
  end
323
331
 
@@ -326,37 +334,29 @@ module KPeg
326
334
 
327
335
  code = []
328
336
 
329
- if header = @grammar.directives['header']
330
- code << header.action.strip
331
- code << "\n"
332
- end
333
-
334
- if @standalone
335
- code << "class #{@name}\n"
337
+ output_header(code)
338
+ output_grammar(code)
339
+ output_footer(code)
336
340
 
337
- unless cp = standalone_region(
338
- File.expand_path("../compiled_parser.rb", __FILE__))
341
+ @output = code.join
342
+ end
339
343
 
340
- puts "Standalone failure. Check compiler_parser.rb for proper boundary comments"
341
- exit 1
342
- end
344
+ ##
345
+ # Output of class end and footer
343
346
 
344
- unless pp = standalone_region(
345
- File.expand_path("../position.rb", __FILE__))
346
- puts "Standalone failure. Check position.rb for proper boundary comments"
347
- end
347
+ def output_footer(code)
348
+ code << "end\n"
348
349
 
349
- cp.gsub!(/include Position/, pp)
350
- code << cp << "\n"
351
- else
352
- code << "require 'kpeg/compiled_parser'\n\n"
353
- code << "class #{@name} < KPeg::CompiledParser\n"
350
+ if footer = @grammar.directives['footer']
351
+ code << footer.action
354
352
  end
353
+ end
355
354
 
356
- @grammar.setup_actions.each do |act|
357
- code << "\n#{act.action}\n\n"
358
- end
355
+ ##
356
+ # Output of grammar and rules
359
357
 
358
+ def output_grammar(code)
359
+ code << " # :stopdoc:\n"
360
360
  handle_ast(code)
361
361
 
362
362
  fg = @grammar.foreign_grammars
@@ -418,24 +418,57 @@ module KPeg
418
418
 
419
419
  code << "\n Rules = {}\n"
420
420
  @grammar.rule_order.each do |name|
421
- rule = @grammar.rules[name]
422
-
423
421
  rend = GrammarRenderer.escape renderings[name], true
424
422
  code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
425
423
  end
426
424
 
427
- code << "end\n"
425
+ code << " # :startdoc:\n"
426
+ end
428
427
 
429
- if footer = @grammar.directives['footer']
430
- code << footer.action
428
+ ##
429
+ # Output up to the user-defined setup actions
430
+
431
+ def output_header(code)
432
+ if header = @grammar.directives['header']
433
+ code << header.action.strip
434
+ code << "\n"
431
435
  end
432
436
 
433
- @output = code.join
437
+ pre_class = @grammar.directives['pre-class']
438
+
439
+ if @standalone
440
+ if pre_class
441
+ code << pre_class.action.strip
442
+ code << "\n"
443
+ end
444
+ code << "class #{@name}\n"
445
+
446
+ cp = standalone_region("compiled_parser.rb")
447
+ cpi = standalone_region("compiled_parser.rb", "INITIALIZE")
448
+ pp = standalone_region("position.rb")
449
+
450
+ cp.gsub!(/include Position/, pp)
451
+ code << " # :stopdoc:\n"
452
+ code << cpi << "\n" unless @grammar.variables['custom_initialize']
453
+ code << cp << "\n"
454
+ code << " # :startdoc:\n"
455
+ else
456
+ code << "require 'kpeg/compiled_parser'\n\n"
457
+ if pre_class
458
+ code << pre_class.action.strip
459
+ code << "\n"
460
+ end
461
+ code << "class #{@name} < KPeg::CompiledParser\n"
462
+ end
463
+
464
+ @grammar.setup_actions.each do |act|
465
+ code << "\n#{act.action}\n\n"
466
+ end
434
467
  end
435
468
 
436
469
  def make(str)
437
470
  m = Module.new
438
- m.module_eval output
471
+ m.module_eval output, "(kpeg parser #{@name})"
439
472
 
440
473
  cls = m.const_get(@name)
441
474
  cls.new(str)
@@ -10,8 +10,22 @@ module KPeg
10
10
 
11
11
  # Leave these markers in! They allow us to generate standalone
12
12
  # code automatically!
13
- #
13
+
14
+ # INITIALIZE START
15
+
16
+ # This is distinct from setup_parser so that a standalone parser
17
+ # can redefine #initialize and still have access to the proper
18
+ # parser setup code.
19
+ def initialize(str, debug=false)
20
+ setup_parser(str, debug)
21
+ end
22
+
23
+ # INITIALIZE END
24
+
14
25
  # STANDALONE START
26
+
27
+ # Prepares for parsing +str+. If you define a custom initialize you must
28
+ # call this method before #parse
15
29
  def setup_parser(str, debug=false)
16
30
  @string = str
17
31
  @pos = 0
@@ -23,14 +37,6 @@ module KPeg
23
37
  setup_foreign_grammar
24
38
  end
25
39
 
26
- # This is distinct from setup_parser so that a standalone parser
27
- # can redefine #initialize and still have access to the proper
28
- # parser setup code.
29
- #
30
- def initialize(str, debug=false)
31
- setup_parser(str, debug)
32
- end
33
-
34
40
  attr_reader :string
35
41
  attr_reader :failing_rule_offset
36
42
  attr_accessor :result, :pos
@@ -228,7 +234,6 @@ module KPeg
228
234
  def apply_with_args(rule, *args)
229
235
  memo_key = [rule, args]
230
236
  if m = @memoizations[memo_key][@pos]
231
- prev = @pos
232
237
  @pos = m.pos
233
238
  if !m.set
234
239
  m.left_rec = true
@@ -263,7 +268,6 @@ module KPeg
263
268
 
264
269
  def apply(rule)
265
270
  if m = @memoizations[rule][@pos]
266
- prev = @pos
267
271
  @pos = m.pos
268
272
  if !m.set
269
273
  m.left_rec = true
@@ -1,14 +1,25 @@
1
1
  %% name = KPeg::FormatParser
2
+ %% custom_initialize = true
3
+
4
+ %% pre-class {
5
+ require 'kpeg/grammar'
6
+ }
2
7
 
3
8
  %% {
4
- require 'kpeg/grammar'
9
+
10
+ ##
11
+ # Creates a new kpeg format parser for +str+.
5
12
 
6
13
  def initialize(str, debug=false)
7
14
  setup_parser(str, debug)
8
15
  @g = KPeg::Grammar.new
9
16
  end
10
17
 
18
+ ##
19
+ # The parsed grammar
20
+
11
21
  attr_reader :g
22
+
12
23
  alias_method :grammar, :g
13
24
  }
14
25
 
@@ -22,8 +33,8 @@
22
33
  kleene = "*"
23
34
 
24
35
  # Allow - by itself, but not at the beginning
25
- var = < "-" | /[a-zA-Z][\-_a-zA-Z0-9]*/ > { text }
26
- method = < /[a-zA-Z_][a-zA-Z0-9_]*/ > { text }
36
+ var = < "-" | /[a-z][\w-]*/i > { text }
37
+ method = < /[a-z_]\w*/i > { text }
27
38
 
28
39
  dbl_escapes = "n" { "\n" }
29
40
  | "s" { " " }
@@ -39,7 +50,8 @@
39
50
  | num_escapes
40
51
  | < . > { text }
41
52
  num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
42
- | "x" < /[0-9a-fA-F]{2}/ > { [text.to_i(16)].pack("U") }
53
+ | "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") }
54
+ # TODO use /\h{2}/ after 1.8 support is dropped
43
55
  dbl_seq = < /[^\\"]+/ > { text }
44
56
  dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
45
57
  dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
@@ -55,10 +67,10 @@ sgl_escape_quote = "\\'" { "'" }
55
67
  regexp = "/" not_slash:body "/" regexp_opts:opts
56
68
  { @g.reg body, opts }
57
69
 
58
- char = < /[a-zA-Z0-9]/ > { text }
70
+ char = < /[a-z\d]/i > { text }
59
71
  char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
60
72
 
61
- range_num = < /[1-9][0-9]*/ > { text }
73
+ range_num = < /[1-9]\d*/ > { text }
62
74
  range_elem = < range_num|kleene > { text }
63
75
  mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
64
76
  { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
@@ -105,7 +117,7 @@ sgl_escape_quote = "\\'" { "'" }
105
117
  | - var:n - { [n] }
106
118
  statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
107
119
  | - var:v - "=" - expression:o { @g.set(v, o) }
108
- | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ >
120
+ | - "%" var:name - "=" - < /[:\w]+/ >
109
121
  { @g.add_foreign_grammar(name, text) }
110
122
  | - "%%" - curly:act { @g.add_setup act }
111
123
  | - "%%" - var:name - curly:act { @g.add_directive name, act }
@@ -117,8 +129,8 @@ sgl_escape_quote = "\\'" { "'" }
117
129
 
118
130
  # These are a seperate set of rules used to parse an ast declaration
119
131
 
120
- ast_constant = < /[A-Z][A-Za-z0-9_]*/ > { text }
121
- ast_word = < /[A-Za-z_][A-Za-z0-9_]*/ > { text }
132
+ ast_constant = < /[A-Z]\w*/ > { text }
133
+ ast_word = < /[a-z_]\w*/i > { text }
122
134
 
123
135
  ast_sp = (" " | "\t")*
124
136