parslet 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  # A sample Gemfile
2
2
  source "http://rubygems.org"
3
3
 
4
- gem 'blankslate', '>= 2.1.2.3'
4
+ gem 'blankslate', '~> 2'
5
5
 
6
6
  group :development do
7
7
  gem 'rspec'
@@ -9,7 +9,6 @@ group :development do
9
9
 
10
10
  gem 'sdoc'
11
11
 
12
- gem 'autotest'
13
- gem 'autotest-fsevent'
14
- gem 'autotest-growl'
12
+ gem 'guard'
13
+ gem 'growl'
15
14
  end
data/HISTORY.txt CHANGED
@@ -1,8 +1,22 @@
1
- = 1.1.0 / ???
1
+ = 1.1.0 / 2Feb2011
2
2
 
3
- + Uses throw/catch internally for an order of magnitude increase in execution
4
- speed.
3
+ + Uses return (fail/success), cached line counts, memoizing of parse results
4
+ and other tricks internally for at least an order of magnitude increase
5
+ in execution speed.
5
6
 
7
+ + str('foo').maybe will now return an empty string again. Use .as(...) to
8
+ name things and get back [] from #repeat and nil from #maybe.
9
+
10
+ + If you require 'parslet/atoms/visitor', you'll get an accept method on
11
+ all known Parslet::Atoms.
12
+
13
+ + If you require 'parslet/export', you can call #to_citrus and #to_treetop
14
+ to produce string versions of your grammar in those dialects.
15
+
16
+ + Requiring 'parslet/convenience' will given you a parse_with_debug on
17
+ your Parslet::Parser class. This prints some diagnostics on parse failure.
18
+ (Thanks to Florian Hanke)
19
+
6
20
  = 1.0.1 / 17Jan2011
7
21
 
8
22
  A happy new year!
data/README CHANGED
@@ -43,10 +43,11 @@ SYNOPSIS
43
43
 
44
44
  COMPATIBILITY
45
45
 
46
- This library should work with both ruby 1.8 and ruby 1.9.
46
+ This library should work with most rubies. I've tested it with MRI 1.8, 1.9,
47
+ rbx-head, jruby. Please report as a bug if you encounter issues.
47
48
 
48
49
  STATUS
49
50
 
50
- one dot oh.
51
+ At version 1.1 - Good basic functionality and lots of plans for extension.
51
52
 
52
53
  (c) 2010 Kaspar Schiess
data/Rakefile CHANGED
@@ -1,65 +1,15 @@
1
1
 
2
2
  require "rubygems"
3
- require "rake/gempackagetask"
4
3
  require "rake/rdoctask"
5
4
  require 'rspec/core/rake_task'
5
+ require "rake/gempackagetask"
6
+
6
7
 
7
8
  desc "Run all examples"
8
9
  RSpec::Core::RakeTask.new
9
10
 
10
11
  task :default => :spec
11
12
 
12
- # This builds the actual gem. For details of what all these options
13
- # mean, and other ones you can add, check the documentation here:
14
- #
15
- # http://rubygems.org/read/chapter/20
16
- #
17
- spec = Gem::Specification.new do |s|
18
-
19
- # Change these as appropriate
20
- s.name = "parslet"
21
- s.version = "1.0.1"
22
- s.summary = "Parser construction library with great error reporting in Ruby."
23
- s.author = "Kaspar Schiess"
24
- s.email = "kaspar.schiess@absurd.li"
25
- s.homepage = "http://kschiess.github.com/parslet"
26
-
27
- s.has_rdoc = true
28
- s.extra_rdoc_files = %w(README)
29
- s.rdoc_options = %w(--main README)
30
-
31
- # Add any extra files to include in the gem
32
- s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
33
- s.require_paths = ["lib"]
34
-
35
- # If you want to depend on other gems, add them here, along with any
36
- # relevant versions
37
- s.add_dependency("blankslate", "~> 2.1.2.3")
38
-
39
- # If your tests use any gems, include them here
40
- s.add_development_dependency("rspec")
41
- s.add_development_dependency("flexmock")
42
- end
43
-
44
- # This task actually builds the gem. We also regenerate a static
45
- # .gemspec file, which is useful if something (i.e. GitHub) will
46
- # be automatically building a gem for this project. If you're not
47
- # using GitHub, edit as appropriate.
48
- #
49
- # To publish your gem online, install the 'gemcutter' gem; Read more
50
- # about that here: http://gemcutter.org/pages/gem_docs
51
- Rake::GemPackageTask.new(spec) do |pkg|
52
- pkg.gem_spec = spec
53
- end
54
-
55
- desc "Build the gemspec file #{spec.name}.gemspec"
56
- task :gemspec do
57
- file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
58
- File.open(file, "w") {|f| f << spec.to_ruby }
59
- end
60
-
61
- task :package => :gemspec
62
-
63
13
  require 'sdoc'
64
14
 
65
15
  # Generate documentation
@@ -73,7 +23,14 @@ Rake::RDocTask.new do |rdoc|
73
23
  rdoc.rdoc_dir = "rdoc"
74
24
  end
75
25
 
76
- desc 'Clear out RDoc and generated packages'
77
- task :clean => [:clobber_rdoc, :clobber_package] do
78
- rm "#{spec.name}.gemspec"
26
+ desc 'Clear out RDoc'
27
+ task :clean => [:clobber_rdoc, :clobber_package]
28
+
29
+ # This task actually builds the gem.
30
+ spec = eval(File.read('parslet.gemspec'))
31
+ desc "Generate the gem package."
32
+ Rake::GemPackageTask.new(spec) do |pkg|
33
+ pkg.gem_spec = spec
79
34
  end
35
+
36
+ task :gem => :spec
@@ -2,6 +2,7 @@
2
2
 
3
3
  # Example contributed by Hal Brodigan (postmodern). Thanks!
4
4
 
5
+ $:.unshift '../lib'
5
6
  require 'parslet'
6
7
 
7
8
  class EmailParser < Parslet::Parser
@@ -19,11 +20,11 @@ class EmailParser < Parslet::Parser
19
20
  }
20
21
 
21
22
  rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
22
- rule(:separator) { space? >> dot.as(:dot) >> space? | space }
23
+ rule(:separator) { dot.as(:dot) >> space? | space }
23
24
  rule(:words) { word >> (separator >> word).repeat }
24
25
 
25
26
  rule(:email) {
26
- (words >> space? >> at.as(:at) >> space? >> words).as(:email)
27
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
27
28
  }
28
29
 
29
30
  root(:email)
@@ -31,8 +32,11 @@ end
31
32
 
32
33
  class EmailSanitizer < Parslet::Transform
33
34
  rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
34
- rule(:at => simple(:at)) { '@' }
35
35
  rule(:word => simple(:word)) { word }
36
+
37
+ rule(:username => sequence(:username)) { username.join + "@" }
38
+ rule(:username => simple(:username)) { username + "@" }
39
+
36
40
  rule(:email => sequence(:email)) { email.join }
37
41
  end
38
42
 
@@ -45,8 +49,8 @@ unless ARGV[0]
45
49
  end
46
50
 
47
51
  begin
48
- puts sanitizer.apply(parser.parse(ARGV[0]))
52
+ p sanitizer.apply(parser.parse(ARGV[0]))
49
53
  rescue Parslet::ParseFailed => error
50
54
  puts error
51
55
  puts parser.root.error_tree
52
- end
56
+ end
data/example/erb.rb ADDED
@@ -0,0 +1,44 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+ require 'parslet'
3
+
4
+ class ErbParser < Parslet::Parser
5
+ rule(:ruby) { (str('%>').absnt? >> any).repeat.as(:ruby) }
6
+
7
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
8
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
9
+ rule(:code) { ruby.as(:code) }
10
+ rule(:erb) { expression | comment | code }
11
+
12
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
13
+ rule(:text) { (str('<%').absnt? >> any).repeat(1) }
14
+
15
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
16
+ root(:text_with_ruby)
17
+ end
18
+
19
+ parser = ErbParser.new
20
+ p parser.parse "The value of x is <%= x %>."
21
+ p parser.parse "<% 1 + 2 %>"
22
+ p parser.parse "<%# commented %>"
23
+
24
+
25
+ evaluator = Parslet::Transform.new do
26
+
27
+ erb_binding = binding
28
+
29
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
30
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
31
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
32
+
33
+ rule(:text => simple(:text)) { text }
34
+ rule(:text => sequence(:texts)) { texts.join }
35
+
36
+ end
37
+
38
+ puts evaluator.apply(parser.parse(<<-ERB
39
+ The <% a = 2 %>not printed result of "a = 2".
40
+ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
41
+ The <%= 'nicely' %> printed result.
42
+ The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
43
+ ERB
44
+ ))
data/example/minilisp.rb CHANGED
@@ -5,12 +5,13 @@ $:.unshift '../lib'
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
+ require 'parslet/convenience'
8
9
 
9
10
  module MiniLisp
10
11
  class Parser < Parslet::Parser
11
12
  root :expression
12
13
  rule(:expression) {
13
- space? >> str('(') >> space? >> body >> str(')')
14
+ space? >> str('(') >> space? >> body >> str(')') >> space?
14
15
  }
15
16
 
16
17
  rule(:body) {
@@ -77,24 +78,17 @@ end
77
78
  parser = MiniLisp::Parser.new
78
79
  transform = MiniLisp::Transform.new
79
80
 
80
- # Parse stage
81
- begin
82
- result = parser.parse %Q{
83
- (define test (lambda ()
84
- (begin
85
- (display "something")
86
- (display 1)
87
- (display 3.08))))
88
- (test)
89
- }
90
- rescue Parslet::ParseFailed => failure
91
- puts failure
92
- puts parser.root.error_tree if parser.root.cause
93
- exit
94
- end
81
+ result = parser.parse_with_debug %Q{
82
+ (define test (lambda ()
83
+ (begin
84
+ (display "something")
85
+ (display 1)
86
+ (display 3.08))))
87
+ (test)
88
+ }
95
89
 
96
90
  # Transform the result
97
- pp transform.do(result)
91
+ pp transform.do(result) if result
98
92
 
99
93
  # Thereby reducing it to the earlier problem:
100
94
  # http://github.com/kschiess/toylisp
data/example/parens.rb CHANGED
@@ -2,7 +2,9 @@
2
2
  # uses '.as(:name)' to construct a tree that can reliably be matched
3
3
  # afterwards.
4
4
 
5
- $:.unshift '../lib'
5
+ $:.unshift File.join(
6
+ File.dirname(__FILE__),
7
+ '/../lib')
6
8
 
7
9
  require 'pp'
8
10
  require 'parslet'
@@ -16,7 +16,10 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
16
16
  # str('a') | str('b')
17
17
  #
18
18
  def initialize(*alternatives)
19
+ super()
20
+
19
21
  @alternatives = alternatives
22
+ @error_msg = "Expected one of #{alternatives.inspect}."
20
23
  end
21
24
 
22
25
  #---
@@ -28,14 +31,13 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
28
31
  self
29
32
  end
30
33
 
31
- def try(io) # :nodoc:
34
+ def try(source, context) # :nodoc:
32
35
  alternatives.each { |a|
33
- catch(:error) {
34
- return a.apply(io)
35
- }
36
+ value = a.apply(source, context)
37
+ return value unless value.error?
36
38
  }
37
39
  # If we reach this point, all alternatives have failed.
38
- error(io, "Expected one of #{alternatives.inspect}.")
40
+ error(source, @error_msg)
39
41
  end
40
42
 
41
43
  precedence ALTERNATE
@@ -4,31 +4,43 @@
4
4
  class Parslet::Atoms::Base
5
5
  include Parslet::Atoms::Precedence
6
6
 
7
+ # Internally, all parsing functions return either an instance of Fail
8
+ # or an instance of Success.
9
+ #
10
+ class Fail < Struct.new(:message)
11
+ def error?; true end
12
+ end
13
+
14
+ # Internally, all parsing functions return either an instance of Fail
15
+ # or an instance of Success.
16
+ #
17
+ class Success < Struct.new(:result)
18
+ def error?; false end
19
+ end
20
+
7
21
  # Given a string or an IO object, this will attempt a parse of its contents
8
22
  # and return a result. If the parse fails, a Parslet::ParseFailed exception
9
23
  # will be thrown.
10
24
  #
11
25
  def parse(io)
12
- if io.respond_to? :to_str
13
- io = StringIO.new(io)
14
- end
26
+ source = Parslet::Source.new(io)
27
+ context = Parslet::Atoms::Context.new
15
28
 
16
29
  result = nil
17
- error_message_or_success = catch(:error) {
18
- result = apply(io)
19
- :success
20
- }
30
+ value = apply(source, context)
21
31
 
22
32
  # If we didn't succeed the parse, raise an exception for the user.
23
33
  # Stack trace will be off, but the error tree should explain the reason
24
34
  # it failed.
25
- if error_message_or_success != :success
26
- raise Parslet::ParseFailed, error_message_or_success
35
+ if value.error?
36
+ raise Parslet::ParseFailed, value.message
27
37
  end
28
38
 
39
+ # assert: value is a success answer
40
+
29
41
  # If we haven't consumed the input, then the pattern doesn't match. Try
30
42
  # to provide a good error message (even asking down below)
31
- unless io.eof?
43
+ unless source.eof?
32
44
  # Do we know why we stopped matching input? If yes, that's a good
33
45
  # error to fail with. Otherwise just report that we cannot consume the
34
46
  # input.
@@ -37,44 +49,42 @@ class Parslet::Atoms::Base
37
49
  raise Parslet::ParseFailed,
38
50
  "Unconsumed input, maybe because of this: #{cause}"
39
51
  else
52
+ old_pos = source.pos
40
53
  parse_failed(
41
- format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
54
+ format_cause(source,
55
+ "Don't know what to do with #{source.read(100)}", old_pos))
42
56
  end
43
57
  end
44
58
 
45
- return flatten(result)
59
+ return flatten(value.result)
46
60
  end
47
61
 
48
62
  #---
49
63
  # Calls the #try method of this parslet. In case of a parse error, apply
50
- # leaves the io in the state it was before the attempt.
64
+ # leaves the source in the state it was before the attempt.
51
65
  #+++
52
- def apply(io) # :nodoc:
53
- # p [:start, self, io.string[io.pos, 10]]
66
+ def apply(source, context) # :nodoc:
67
+ old_pos = source.pos
54
68
 
55
- old_pos = io.pos
56
-
57
- # p [:try, self, io.string[io.pos, 20]]
58
- message = catch(:error) {
59
- r = try(io)
60
- # p [:return_from, self, r, flatten(r)]
61
-
62
- # This has just succeeded, so last_cause must be empty
63
- @last_cause = nil
64
- return r
69
+ result = context.cache(self, source) {
70
+ try(source, context)
65
71
  }
66
72
 
67
- # We only reach this point if the parse has failed. message is not nil.
68
- # p [:failing, self, io.string[io.pos, 20]]
73
+ # This has just succeeded, so last_cause must be empty
74
+ unless result.error?
75
+ @last_cause = nil
76
+ return result
77
+ end
69
78
 
70
- io.pos = old_pos
71
- throw :error, message
79
+ # We only reach this point if the parse has failed. Rewind the input.
80
+ source.pos = old_pos
81
+ return result # is instance of Fail
72
82
  end
73
-
83
+
74
84
  # Override this in your Atoms::Base subclasses to implement parsing
75
85
  # behaviour.
76
86
  #
77
- def try(io)
87
+ def try(source, context)
78
88
  raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
79
89
  end
80
90
 
@@ -159,7 +169,7 @@ class Parslet::Atoms::Base
159
169
  # Takes a mixed value coming out of a parslet and converts it to a return
160
170
  # value for the user by dropping things and merging hashes.
161
171
  #
162
- def flatten(value) # :nodoc:
172
+ def flatten(value, named=false) # :nodoc:
163
173
  # Passes through everything that isn't an array of things
164
174
  return value unless value.instance_of? Array
165
175
 
@@ -174,9 +184,9 @@ class Parslet::Atoms::Base
174
184
  when :sequence
175
185
  return flatten_sequence(result)
176
186
  when :maybe
177
- return result.first
187
+ return named ? result.first : result.first || ''
178
188
  when :repetition
179
- return flatten_repetition(result)
189
+ return flatten_repetition(result, named)
180
190
  end
181
191
 
182
192
  fail "BUG: Unknown tag #{tag.inspect}."
@@ -211,7 +221,7 @@ class Parslet::Atoms::Base
211
221
  fail "Unhandled case when foldr'ing sequence."
212
222
  end
213
223
 
214
- def flatten_repetition(list) # :nodoc:
224
+ def flatten_repetition(list, named) # :nodoc:
215
225
  if list.any? { |e| e.instance_of?(Hash) }
216
226
  # If keyed subtrees are in the array, we'll want to discard all
217
227
  # strings inbetween. To keep them, name them.
@@ -226,8 +236,11 @@ class Parslet::Atoms::Base
226
236
  flatten(1)
227
237
  end
228
238
 
239
+ # Consistent handling of empty lists, when we act on a named result
240
+ return [] if named && list.empty?
241
+
229
242
  # If there are only strings, concatenate them and return that.
230
- list.inject('') { |s,e| s<<(e||'') }
243
+ list.inject('') { |s,e| s<<e }
231
244
  end
232
245
 
233
246
  def self.precedence(prec) # :nodoc:
@@ -250,7 +263,10 @@ class Parslet::Atoms::Base
250
263
  # but needed for clever error reports.
251
264
  #
252
265
  def cause # :nodoc:
253
- @last_cause
266
+ @last_cause && @last_cause.to_s || nil
267
+ end
268
+ def cause? # :nodoc:
269
+ !!@last_cause
254
270
  end
255
271
 
256
272
  # Error tree returns what went wrong here plus what went wrong inside
@@ -260,34 +276,49 @@ class Parslet::Atoms::Base
260
276
  def error_tree
261
277
  Parslet::ErrorTree.new(self)
262
278
  end
263
- def cause? # :nodoc:
264
- not @last_cause.nil?
265
- end
266
279
  private
267
- # TODO comments!!!
268
- # Report/raise a parse error with the given message, printing the current
269
- # position as well. Appends 'at line X char Y.' to the message you give.
270
- # If +pos+ is given, it is used as the real position the error happened,
271
- # correcting the io's current position.
280
+
281
+ # Produces an instance of Success and returns it.
282
+ #
283
+ def success(result)
284
+ Success.new(result)
285
+ end
286
+
287
+ # Produces an instance of Fail and returns it.
272
288
  #
273
- def error(io, str, pos=nil)
274
- @last_cause = format_cause(io, str, pos)
275
- throw :error, @last_cause
289
+ def error(source, str, pos=nil)
290
+ @last_cause = format_cause(source, str, pos)
291
+ Fail.new(@last_cause)
276
292
  end
277
- def parse_failed(str)
278
- @last_cause = str
293
+
294
+ # Signals to the outside that the parse has failed. Use this in conjunction
295
+ # with #format_cause for nice error messages.
296
+ #
297
+ def parse_failed(cause)
298
+ @last_cause = cause
279
299
  raise Parslet::ParseFailed,
280
- @last_cause
300
+ @last_cause.to_s
281
301
  end
282
- def format_cause(io, str, pos=nil)
283
- pre = io.string[0..(pos||io.pos)]
284
- lines = Array(pre.lines)
285
-
286
- return str if lines.empty?
287
-
288
- pos = lines.last.length
289
- return "#{str} at line #{lines.count} char #{pos}."
302
+
303
+ class Cause < Struct.new(:message, :source, :pos)
304
+ def to_s
305
+ line, column = source.line_and_column(pos)
306
+ message + " at line #{line} char #{column}."
307
+ end
308
+ end
309
+
310
+ # Appends 'at line ... char ...' to the string given. Use +pos+ to override
311
+ # the position of the +source+. This method returns an object that can
312
+ # be turned into a string using #to_s.
313
+ #
314
+ def format_cause(source, str, pos=nil)
315
+ real_pos = (pos||source.pos)
316
+ Cause.new(str, source, real_pos)
290
317
  end
318
+
319
+ # That annoying warning 'Duplicate subtrees while merging result' comes
320
+ # from here. You should add more '.as(...)' names to your intermediary tree.
321
+ #
291
322
  def warn_about_duplicate_keys(h1, h2)
292
323
  d = h1.keys & h2.keys
293
324
  unless d.empty?
@@ -0,0 +1,48 @@
1
+ module Parslet::Atoms
2
+ # Helper class that implements a transient cache that maps position and
3
+ # parslet object to results. This is used for memoization in the packrat
4
+ # style.
5
+ #
6
+ class Context
7
+ def initialize
8
+ @cache = Hash.new { |h, k| h[k] = {} }
9
+ end
10
+
11
+ # Caches a parse answer for obj at source.pos. Applying the same parslet
12
+ # at one position of input always yields the same result, unless the input
13
+ # has changed.
14
+ #
15
+ # We need the entire source here so we can ask for how many characters
16
+ # were consumed by a successful parse. Imitation of such a parse must
17
+ # advance the input pos by the same amount of bytes.
18
+ #
19
+ def cache(obj, source, &block)
20
+ beg = source.pos
21
+
22
+ # Not in cache yet? Return early.
23
+ unless entry = lookup(obj, beg)
24
+ result = yield
25
+
26
+ set obj, beg, [result, source.pos-beg]
27
+ return result
28
+ end
29
+
30
+ # the condition in unless has returned true, so entry is not nil.
31
+ result, advance = entry
32
+
33
+ # The data we're skipping here has been read before. (since it is in
34
+ # the cache) PLUS the actual contents are not interesting anymore since
35
+ # we know obj matches at beg. So skip reading.
36
+ source.pos = beg + advance
37
+ return result
38
+ end
39
+
40
+ private
41
+ def lookup(obj, pos)
42
+ @cache[pos][obj]
43
+ end
44
+ def set(obj, pos, val)
45
+ @cache[pos][obj] = val
46
+ end
47
+ end
48
+ end
@@ -18,8 +18,8 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
18
18
  @block = block
19
19
  end
20
20
 
21
- def try(io) # :nodoc:
22
- parslet.apply(io)
21
+ def try(source, context) # :nodoc:
22
+ parslet.apply(source, context)
23
23
  end
24
24
 
25
25
  def parslet