parslet 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  # A sample Gemfile
2
2
  source "http://rubygems.org"
3
3
 
4
- gem 'blankslate', '>= 2.1.2.3'
4
+ gem 'blankslate', '~> 2'
5
5
 
6
6
  group :development do
7
7
  gem 'rspec'
@@ -9,7 +9,6 @@ group :development do
9
9
 
10
10
  gem 'sdoc'
11
11
 
12
- gem 'autotest'
13
- gem 'autotest-fsevent'
14
- gem 'autotest-growl'
12
+ gem 'guard'
13
+ gem 'growl'
15
14
  end
data/HISTORY.txt CHANGED
@@ -1,8 +1,22 @@
1
- = 1.1.0 / ???
1
+ = 1.1.0 / 2Feb2011
2
2
 
3
- + Uses throw/catch internally for an order of magnitude increase in execution
4
- speed.
3
+ + Uses return (fail/success), cached line counts, memoizing of parse results
4
+ and other tricks internally for at least an order of magnitude increase
5
+ in execution speed.
5
6
 
7
+ + str('foo').maybe will now return an empty string again. Use .as(...) to
8
+ name things and get back [] from #repeat and nil from #maybe.
9
+
10
+ + If you require 'parslet/atoms/visitor', you'll get an accept method on
11
+ all known Parslet::Atoms.
12
+
13
+ + If you require 'parslet/export', you can call #to_citrus and #to_treetop
14
+ to produce string versions of your grammar in those dialects.
15
+
16
+ + Requiring 'parslet/convenience' will given you a parse_with_debug on
17
+ your Parslet::Parser class. This prints some diagnostics on parse failure.
18
+ (Thanks to Florian Hanke)
19
+
6
20
  = 1.0.1 / 17Jan2011
7
21
 
8
22
  A happy new year!
data/README CHANGED
@@ -43,10 +43,11 @@ SYNOPSIS
43
43
 
44
44
  COMPATIBILITY
45
45
 
46
- This library should work with both ruby 1.8 and ruby 1.9.
46
+ This library should work with most rubies. I've tested it with MRI 1.8, 1.9,
47
+ rbx-head, jruby. Please report as a bug if you encounter issues.
47
48
 
48
49
  STATUS
49
50
 
50
- one dot oh.
51
+ At version 1.1 - Good basic functionality and lots of plans for extension.
51
52
 
52
53
  (c) 2010 Kaspar Schiess
data/Rakefile CHANGED
@@ -1,65 +1,15 @@
1
1
 
2
2
  require "rubygems"
3
- require "rake/gempackagetask"
4
3
  require "rake/rdoctask"
5
4
  require 'rspec/core/rake_task'
5
+ require "rake/gempackagetask"
6
+
6
7
 
7
8
  desc "Run all examples"
8
9
  RSpec::Core::RakeTask.new
9
10
 
10
11
  task :default => :spec
11
12
 
12
- # This builds the actual gem. For details of what all these options
13
- # mean, and other ones you can add, check the documentation here:
14
- #
15
- # http://rubygems.org/read/chapter/20
16
- #
17
- spec = Gem::Specification.new do |s|
18
-
19
- # Change these as appropriate
20
- s.name = "parslet"
21
- s.version = "1.0.1"
22
- s.summary = "Parser construction library with great error reporting in Ruby."
23
- s.author = "Kaspar Schiess"
24
- s.email = "kaspar.schiess@absurd.li"
25
- s.homepage = "http://kschiess.github.com/parslet"
26
-
27
- s.has_rdoc = true
28
- s.extra_rdoc_files = %w(README)
29
- s.rdoc_options = %w(--main README)
30
-
31
- # Add any extra files to include in the gem
32
- s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
33
- s.require_paths = ["lib"]
34
-
35
- # If you want to depend on other gems, add them here, along with any
36
- # relevant versions
37
- s.add_dependency("blankslate", "~> 2.1.2.3")
38
-
39
- # If your tests use any gems, include them here
40
- s.add_development_dependency("rspec")
41
- s.add_development_dependency("flexmock")
42
- end
43
-
44
- # This task actually builds the gem. We also regenerate a static
45
- # .gemspec file, which is useful if something (i.e. GitHub) will
46
- # be automatically building a gem for this project. If you're not
47
- # using GitHub, edit as appropriate.
48
- #
49
- # To publish your gem online, install the 'gemcutter' gem; Read more
50
- # about that here: http://gemcutter.org/pages/gem_docs
51
- Rake::GemPackageTask.new(spec) do |pkg|
52
- pkg.gem_spec = spec
53
- end
54
-
55
- desc "Build the gemspec file #{spec.name}.gemspec"
56
- task :gemspec do
57
- file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
58
- File.open(file, "w") {|f| f << spec.to_ruby }
59
- end
60
-
61
- task :package => :gemspec
62
-
63
13
  require 'sdoc'
64
14
 
65
15
  # Generate documentation
@@ -73,7 +23,14 @@ Rake::RDocTask.new do |rdoc|
73
23
  rdoc.rdoc_dir = "rdoc"
74
24
  end
75
25
 
76
- desc 'Clear out RDoc and generated packages'
77
- task :clean => [:clobber_rdoc, :clobber_package] do
78
- rm "#{spec.name}.gemspec"
26
+ desc 'Clear out RDoc'
27
+ task :clean => [:clobber_rdoc, :clobber_package]
28
+
29
+ # This task actually builds the gem.
30
+ spec = eval(File.read('parslet.gemspec'))
31
+ desc "Generate the gem package."
32
+ Rake::GemPackageTask.new(spec) do |pkg|
33
+ pkg.gem_spec = spec
79
34
  end
35
+
36
+ task :gem => :spec
@@ -2,6 +2,7 @@
2
2
 
3
3
  # Example contributed by Hal Brodigan (postmodern). Thanks!
4
4
 
5
+ $:.unshift '../lib'
5
6
  require 'parslet'
6
7
 
7
8
  class EmailParser < Parslet::Parser
@@ -19,11 +20,11 @@ class EmailParser < Parslet::Parser
19
20
  }
20
21
 
21
22
  rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
22
- rule(:separator) { space? >> dot.as(:dot) >> space? | space }
23
+ rule(:separator) { dot.as(:dot) >> space? | space }
23
24
  rule(:words) { word >> (separator >> word).repeat }
24
25
 
25
26
  rule(:email) {
26
- (words >> space? >> at.as(:at) >> space? >> words).as(:email)
27
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
27
28
  }
28
29
 
29
30
  root(:email)
@@ -31,8 +32,11 @@ end
31
32
 
32
33
  class EmailSanitizer < Parslet::Transform
33
34
  rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
34
- rule(:at => simple(:at)) { '@' }
35
35
  rule(:word => simple(:word)) { word }
36
+
37
+ rule(:username => sequence(:username)) { username.join + "@" }
38
+ rule(:username => simple(:username)) { username + "@" }
39
+
36
40
  rule(:email => sequence(:email)) { email.join }
37
41
  end
38
42
 
@@ -45,8 +49,8 @@ unless ARGV[0]
45
49
  end
46
50
 
47
51
  begin
48
- puts sanitizer.apply(parser.parse(ARGV[0]))
52
+ p sanitizer.apply(parser.parse(ARGV[0]))
49
53
  rescue Parslet::ParseFailed => error
50
54
  puts error
51
55
  puts parser.root.error_tree
52
- end
56
+ end
data/example/erb.rb ADDED
@@ -0,0 +1,44 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+ require 'parslet'
3
+
4
+ class ErbParser < Parslet::Parser
5
+ rule(:ruby) { (str('%>').absnt? >> any).repeat.as(:ruby) }
6
+
7
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
8
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
9
+ rule(:code) { ruby.as(:code) }
10
+ rule(:erb) { expression | comment | code }
11
+
12
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
13
+ rule(:text) { (str('<%').absnt? >> any).repeat(1) }
14
+
15
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
16
+ root(:text_with_ruby)
17
+ end
18
+
19
+ parser = ErbParser.new
20
+ p parser.parse "The value of x is <%= x %>."
21
+ p parser.parse "<% 1 + 2 %>"
22
+ p parser.parse "<%# commented %>"
23
+
24
+
25
+ evaluator = Parslet::Transform.new do
26
+
27
+ erb_binding = binding
28
+
29
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
30
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
31
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
32
+
33
+ rule(:text => simple(:text)) { text }
34
+ rule(:text => sequence(:texts)) { texts.join }
35
+
36
+ end
37
+
38
+ puts evaluator.apply(parser.parse(<<-ERB
39
+ The <% a = 2 %>not printed result of "a = 2".
40
+ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
41
+ The <%= 'nicely' %> printed result.
42
+ The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
43
+ ERB
44
+ ))
data/example/minilisp.rb CHANGED
@@ -5,12 +5,13 @@ $:.unshift '../lib'
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
+ require 'parslet/convenience'
8
9
 
9
10
  module MiniLisp
10
11
  class Parser < Parslet::Parser
11
12
  root :expression
12
13
  rule(:expression) {
13
- space? >> str('(') >> space? >> body >> str(')')
14
+ space? >> str('(') >> space? >> body >> str(')') >> space?
14
15
  }
15
16
 
16
17
  rule(:body) {
@@ -77,24 +78,17 @@ end
77
78
  parser = MiniLisp::Parser.new
78
79
  transform = MiniLisp::Transform.new
79
80
 
80
- # Parse stage
81
- begin
82
- result = parser.parse %Q{
83
- (define test (lambda ()
84
- (begin
85
- (display "something")
86
- (display 1)
87
- (display 3.08))))
88
- (test)
89
- }
90
- rescue Parslet::ParseFailed => failure
91
- puts failure
92
- puts parser.root.error_tree if parser.root.cause
93
- exit
94
- end
81
+ result = parser.parse_with_debug %Q{
82
+ (define test (lambda ()
83
+ (begin
84
+ (display "something")
85
+ (display 1)
86
+ (display 3.08))))
87
+ (test)
88
+ }
95
89
 
96
90
  # Transform the result
97
- pp transform.do(result)
91
+ pp transform.do(result) if result
98
92
 
99
93
  # Thereby reducing it to the earlier problem:
100
94
  # http://github.com/kschiess/toylisp
data/example/parens.rb CHANGED
@@ -2,7 +2,9 @@
2
2
  # uses '.as(:name)' to construct a tree that can reliably be matched
3
3
  # afterwards.
4
4
 
5
- $:.unshift '../lib'
5
+ $:.unshift File.join(
6
+ File.dirname(__FILE__),
7
+ '/../lib')
6
8
 
7
9
  require 'pp'
8
10
  require 'parslet'
@@ -16,7 +16,10 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
16
16
  # str('a') | str('b')
17
17
  #
18
18
  def initialize(*alternatives)
19
+ super()
20
+
19
21
  @alternatives = alternatives
22
+ @error_msg = "Expected one of #{alternatives.inspect}."
20
23
  end
21
24
 
22
25
  #---
@@ -28,14 +31,13 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
28
31
  self
29
32
  end
30
33
 
31
- def try(io) # :nodoc:
34
+ def try(source, context) # :nodoc:
32
35
  alternatives.each { |a|
33
- catch(:error) {
34
- return a.apply(io)
35
- }
36
+ value = a.apply(source, context)
37
+ return value unless value.error?
36
38
  }
37
39
  # If we reach this point, all alternatives have failed.
38
- error(io, "Expected one of #{alternatives.inspect}.")
40
+ error(source, @error_msg)
39
41
  end
40
42
 
41
43
  precedence ALTERNATE
@@ -4,31 +4,43 @@
4
4
  class Parslet::Atoms::Base
5
5
  include Parslet::Atoms::Precedence
6
6
 
7
+ # Internally, all parsing functions return either an instance of Fail
8
+ # or an instance of Success.
9
+ #
10
+ class Fail < Struct.new(:message)
11
+ def error?; true end
12
+ end
13
+
14
+ # Internally, all parsing functions return either an instance of Fail
15
+ # or an instance of Success.
16
+ #
17
+ class Success < Struct.new(:result)
18
+ def error?; false end
19
+ end
20
+
7
21
  # Given a string or an IO object, this will attempt a parse of its contents
8
22
  # and return a result. If the parse fails, a Parslet::ParseFailed exception
9
23
  # will be thrown.
10
24
  #
11
25
  def parse(io)
12
- if io.respond_to? :to_str
13
- io = StringIO.new(io)
14
- end
26
+ source = Parslet::Source.new(io)
27
+ context = Parslet::Atoms::Context.new
15
28
 
16
29
  result = nil
17
- error_message_or_success = catch(:error) {
18
- result = apply(io)
19
- :success
20
- }
30
+ value = apply(source, context)
21
31
 
22
32
  # If we didn't succeed the parse, raise an exception for the user.
23
33
  # Stack trace will be off, but the error tree should explain the reason
24
34
  # it failed.
25
- if error_message_or_success != :success
26
- raise Parslet::ParseFailed, error_message_or_success
35
+ if value.error?
36
+ raise Parslet::ParseFailed, value.message
27
37
  end
28
38
 
39
+ # assert: value is a success answer
40
+
29
41
  # If we haven't consumed the input, then the pattern doesn't match. Try
30
42
  # to provide a good error message (even asking down below)
31
- unless io.eof?
43
+ unless source.eof?
32
44
  # Do we know why we stopped matching input? If yes, that's a good
33
45
  # error to fail with. Otherwise just report that we cannot consume the
34
46
  # input.
@@ -37,44 +49,42 @@ class Parslet::Atoms::Base
37
49
  raise Parslet::ParseFailed,
38
50
  "Unconsumed input, maybe because of this: #{cause}"
39
51
  else
52
+ old_pos = source.pos
40
53
  parse_failed(
41
- format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
54
+ format_cause(source,
55
+ "Don't know what to do with #{source.read(100)}", old_pos))
42
56
  end
43
57
  end
44
58
 
45
- return flatten(result)
59
+ return flatten(value.result)
46
60
  end
47
61
 
48
62
  #---
49
63
  # Calls the #try method of this parslet. In case of a parse error, apply
50
- # leaves the io in the state it was before the attempt.
64
+ # leaves the source in the state it was before the attempt.
51
65
  #+++
52
- def apply(io) # :nodoc:
53
- # p [:start, self, io.string[io.pos, 10]]
66
+ def apply(source, context) # :nodoc:
67
+ old_pos = source.pos
54
68
 
55
- old_pos = io.pos
56
-
57
- # p [:try, self, io.string[io.pos, 20]]
58
- message = catch(:error) {
59
- r = try(io)
60
- # p [:return_from, self, r, flatten(r)]
61
-
62
- # This has just succeeded, so last_cause must be empty
63
- @last_cause = nil
64
- return r
69
+ result = context.cache(self, source) {
70
+ try(source, context)
65
71
  }
66
72
 
67
- # We only reach this point if the parse has failed. message is not nil.
68
- # p [:failing, self, io.string[io.pos, 20]]
73
+ # This has just succeeded, so last_cause must be empty
74
+ unless result.error?
75
+ @last_cause = nil
76
+ return result
77
+ end
69
78
 
70
- io.pos = old_pos
71
- throw :error, message
79
+ # We only reach this point if the parse has failed. Rewind the input.
80
+ source.pos = old_pos
81
+ return result # is instance of Fail
72
82
  end
73
-
83
+
74
84
  # Override this in your Atoms::Base subclasses to implement parsing
75
85
  # behaviour.
76
86
  #
77
- def try(io)
87
+ def try(source, context)
78
88
  raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
79
89
  end
80
90
 
@@ -159,7 +169,7 @@ class Parslet::Atoms::Base
159
169
  # Takes a mixed value coming out of a parslet and converts it to a return
160
170
  # value for the user by dropping things and merging hashes.
161
171
  #
162
- def flatten(value) # :nodoc:
172
+ def flatten(value, named=false) # :nodoc:
163
173
  # Passes through everything that isn't an array of things
164
174
  return value unless value.instance_of? Array
165
175
 
@@ -174,9 +184,9 @@ class Parslet::Atoms::Base
174
184
  when :sequence
175
185
  return flatten_sequence(result)
176
186
  when :maybe
177
- return result.first
187
+ return named ? result.first : result.first || ''
178
188
  when :repetition
179
- return flatten_repetition(result)
189
+ return flatten_repetition(result, named)
180
190
  end
181
191
 
182
192
  fail "BUG: Unknown tag #{tag.inspect}."
@@ -211,7 +221,7 @@ class Parslet::Atoms::Base
211
221
  fail "Unhandled case when foldr'ing sequence."
212
222
  end
213
223
 
214
- def flatten_repetition(list) # :nodoc:
224
+ def flatten_repetition(list, named) # :nodoc:
215
225
  if list.any? { |e| e.instance_of?(Hash) }
216
226
  # If keyed subtrees are in the array, we'll want to discard all
217
227
  # strings inbetween. To keep them, name them.
@@ -226,8 +236,11 @@ class Parslet::Atoms::Base
226
236
  flatten(1)
227
237
  end
228
238
 
239
+ # Consistent handling of empty lists, when we act on a named result
240
+ return [] if named && list.empty?
241
+
229
242
  # If there are only strings, concatenate them and return that.
230
- list.inject('') { |s,e| s<<(e||'') }
243
+ list.inject('') { |s,e| s<<e }
231
244
  end
232
245
 
233
246
  def self.precedence(prec) # :nodoc:
@@ -250,7 +263,10 @@ class Parslet::Atoms::Base
250
263
  # but needed for clever error reports.
251
264
  #
252
265
  def cause # :nodoc:
253
- @last_cause
266
+ @last_cause && @last_cause.to_s || nil
267
+ end
268
+ def cause? # :nodoc:
269
+ !!@last_cause
254
270
  end
255
271
 
256
272
  # Error tree returns what went wrong here plus what went wrong inside
@@ -260,34 +276,49 @@ class Parslet::Atoms::Base
260
276
  def error_tree
261
277
  Parslet::ErrorTree.new(self)
262
278
  end
263
- def cause? # :nodoc:
264
- not @last_cause.nil?
265
- end
266
279
  private
267
- # TODO comments!!!
268
- # Report/raise a parse error with the given message, printing the current
269
- # position as well. Appends 'at line X char Y.' to the message you give.
270
- # If +pos+ is given, it is used as the real position the error happened,
271
- # correcting the io's current position.
280
+
281
+ # Produces an instance of Success and returns it.
282
+ #
283
+ def success(result)
284
+ Success.new(result)
285
+ end
286
+
287
+ # Produces an instance of Fail and returns it.
272
288
  #
273
- def error(io, str, pos=nil)
274
- @last_cause = format_cause(io, str, pos)
275
- throw :error, @last_cause
289
+ def error(source, str, pos=nil)
290
+ @last_cause = format_cause(source, str, pos)
291
+ Fail.new(@last_cause)
276
292
  end
277
- def parse_failed(str)
278
- @last_cause = str
293
+
294
+ # Signals to the outside that the parse has failed. Use this in conjunction
295
+ # with #format_cause for nice error messages.
296
+ #
297
+ def parse_failed(cause)
298
+ @last_cause = cause
279
299
  raise Parslet::ParseFailed,
280
- @last_cause
300
+ @last_cause.to_s
281
301
  end
282
- def format_cause(io, str, pos=nil)
283
- pre = io.string[0..(pos||io.pos)]
284
- lines = Array(pre.lines)
285
-
286
- return str if lines.empty?
287
-
288
- pos = lines.last.length
289
- return "#{str} at line #{lines.count} char #{pos}."
302
+
303
+ class Cause < Struct.new(:message, :source, :pos)
304
+ def to_s
305
+ line, column = source.line_and_column(pos)
306
+ message + " at line #{line} char #{column}."
307
+ end
308
+ end
309
+
310
+ # Appends 'at line ... char ...' to the string given. Use +pos+ to override
311
+ # the position of the +source+. This method returns an object that can
312
+ # be turned into a string using #to_s.
313
+ #
314
+ def format_cause(source, str, pos=nil)
315
+ real_pos = (pos||source.pos)
316
+ Cause.new(str, source, real_pos)
290
317
  end
318
+
319
+ # That annoying warning 'Duplicate subtrees while merging result' comes
320
+ # from here. You should add more '.as(...)' names to your intermediary tree.
321
+ #
291
322
  def warn_about_duplicate_keys(h1, h2)
292
323
  d = h1.keys & h2.keys
293
324
  unless d.empty?
@@ -0,0 +1,48 @@
1
+ module Parslet::Atoms
2
+ # Helper class that implements a transient cache that maps position and
3
+ # parslet object to results. This is used for memoization in the packrat
4
+ # style.
5
+ #
6
+ class Context
7
+ def initialize
8
+ @cache = Hash.new { |h, k| h[k] = {} }
9
+ end
10
+
11
+ # Caches a parse answer for obj at source.pos. Applying the same parslet
12
+ # at one position of input always yields the same result, unless the input
13
+ # has changed.
14
+ #
15
+ # We need the entire source here so we can ask for how many characters
16
+ # were consumed by a successful parse. Imitation of such a parse must
17
+ # advance the input pos by the same amount of bytes.
18
+ #
19
+ def cache(obj, source, &block)
20
+ beg = source.pos
21
+
22
+ # Not in cache yet? Return early.
23
+ unless entry = lookup(obj, beg)
24
+ result = yield
25
+
26
+ set obj, beg, [result, source.pos-beg]
27
+ return result
28
+ end
29
+
30
+ # the condition in unless has returned true, so entry is not nil.
31
+ result, advance = entry
32
+
33
+ # The data we're skipping here has been read before. (since it is in
34
+ # the cache) PLUS the actual contents are not interesting anymore since
35
+ # we know obj matches at beg. So skip reading.
36
+ source.pos = beg + advance
37
+ return result
38
+ end
39
+
40
+ private
41
+ def lookup(obj, pos)
42
+ @cache[pos][obj]
43
+ end
44
+ def set(obj, pos, val)
45
+ @cache[pos][obj] = val
46
+ end
47
+ end
48
+ end
@@ -18,8 +18,8 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
18
18
  @block = block
19
19
  end
20
20
 
21
- def try(io) # :nodoc:
22
- parslet.apply(io)
21
+ def try(source, context) # :nodoc:
22
+ parslet.apply(source, context)
23
23
  end
24
24
 
25
25
  def parslet