RubyGems - ghazel-parslet - Versions diffs - 1.4.0.1 - Mend

ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

data/HISTORY.txt +195 -0
data/LICENSE +23 -0
data/README +70 -0
data/Rakefile +49 -0
data/example/boolean_algebra.rb +70 -0
data/example/calc.rb +153 -0
data/example/comments.rb +35 -0
data/example/deepest_errors.rb +131 -0
data/example/documentation.rb +18 -0
data/example/email_parser.rb +52 -0
data/example/empty.rb +13 -0
data/example/erb.rb +47 -0
data/example/ignore.rb +33 -0
data/example/ip_address.rb +125 -0
data/example/json.rb +128 -0
data/example/local.rb +34 -0
data/example/mathn.rb +44 -0
data/example/minilisp.rb +94 -0
data/example/modularity.rb +47 -0
data/example/nested_errors.rb +132 -0
data/example/output/boolean_algebra.out +4 -0
data/example/output/calc.out +1 -0
data/example/output/comments.out +8 -0
data/example/output/deepest_errors.out +54 -0
data/example/output/documentation.err +4 -0
data/example/output/documentation.out +1 -0
data/example/output/email_parser.out +2 -0
data/example/output/empty.err +1 -0
data/example/output/erb.out +7 -0
data/example/output/ignore.out +1 -0
data/example/output/ignore_whitespace.out +1 -0
data/example/output/ip_address.out +9 -0
data/example/output/json.out +5 -0
data/example/output/local.out +3 -0
data/example/output/mathn.out +4 -0
data/example/output/minilisp.out +5 -0
data/example/output/modularity.out +0 -0
data/example/output/nested_errors.out +54 -0
data/example/output/parens.out +8 -0
data/example/output/readme.out +1 -0
data/example/output/seasons.out +28 -0
data/example/output/sentence.out +1 -0
data/example/output/simple_xml.out +2 -0
data/example/output/string_parser.out +3 -0
data/example/parens.rb +42 -0
data/example/readme.rb +30 -0
data/example/seasons.rb +46 -0
data/example/sentence.rb +36 -0
data/example/simple.lit +3 -0
data/example/simple_xml.rb +54 -0
data/example/string_parser.rb +77 -0
data/example/test.lit +4 -0
data/lib/parslet.rb +254 -0
data/lib/parslet/atoms.rb +32 -0
data/lib/parslet/atoms/alternative.rb +50 -0
data/lib/parslet/atoms/base.rb +124 -0
data/lib/parslet/atoms/can_flatten.rb +137 -0
data/lib/parslet/atoms/context.rb +94 -0
data/lib/parslet/atoms/dsl.rb +98 -0
data/lib/parslet/atoms/entity.rb +41 -0
data/lib/parslet/atoms/lookahead.rb +49 -0
data/lib/parslet/atoms/named.rb +32 -0
data/lib/parslet/atoms/re.rb +38 -0
data/lib/parslet/atoms/repetition.rb +63 -0
data/lib/parslet/atoms/rule.rb +12 -0
data/lib/parslet/atoms/rule/position.rb +143 -0
data/lib/parslet/atoms/sequence.rb +38 -0
data/lib/parslet/atoms/str.rb +37 -0
data/lib/parslet/atoms/visitor.rb +89 -0
data/lib/parslet/cause.rb +94 -0
data/lib/parslet/convenience.rb +35 -0
data/lib/parslet/error_reporter.rb +7 -0
data/lib/parslet/error_reporter/deepest.rb +95 -0
data/lib/parslet/error_reporter/tree.rb +57 -0
data/lib/parslet/export.rb +162 -0
data/lib/parslet/expression.rb +51 -0
data/lib/parslet/expression/treetop.rb +92 -0
data/lib/parslet/parser.rb +67 -0
data/lib/parslet/pattern.rb +114 -0
data/lib/parslet/pattern/binding.rb +49 -0
data/lib/parslet/rig/rspec.rb +51 -0
data/lib/parslet/slice.rb +101 -0
data/lib/parslet/source.rb +62 -0
data/lib/parslet/source/line_cache.rb +95 -0
data/lib/parslet/transform.rb +236 -0
data/lib/parslet/transform/context.rb +32 -0
metadata +264 -0

data/example/string_parser.rb ADDED

@@ -0,0 +1,77 @@
+# A more complex parser that illustrates how a compiler might be constructed.
+# The parser recognizes strings and integer literals and constructs almost a
+# useful AST from the file contents.
+require 'pp'
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require 'parslet'
+include Parslet
+class LiteralsParser < Parslet::Parser
+  rule :space do
+    (match '[ ]').repeat(1)
+  end
+  rule :literals do
+    (literal >> eol).repeat
+  end
+  rule :literal do
+    (integer | string).as(:literal) >> space.maybe
+  end
+  rule :string do
+    str('"') >>
+    (
+      (str('\\') >> any) |
+      (str('"').absent? >> any)
+    ).repeat.as(:string) >>
+    str('"')
+  end
+  rule :integer do
+    match('[0-9]').repeat(1).as(:integer)
+  end
+  rule :eol do
+    line_end.repeat(1)
+  end
+  rule :line_end do
+    crlf >> space.maybe
+  end
+  rule :crlf do
+    match('[\r\n]').repeat(1)
+  end
+  root :literals
+end
+input_name = File.join(File.dirname(__FILE__), 'simple.lit')
+file = File.read(input_name)
+parsetree = LiteralsParser.new.parse(file)
+class Lit < Struct.new(:text)
+  def to_s
+    text.inspect
+  end
+end
+class StringLit < Lit
+end
+class IntLit < Lit
+  def to_s
+    text
+  end
+end
+transform = Parslet::Transform.new do
+  rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
+  rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
+end
+ast = transform.apply(parsetree)
+pp ast

data/example/test.lit ADDED

@@ -0,0 +1,4 @@
+"THis is a string"
+"This is another string"
+"This string is escaped \"embedded quoted stuff \" "
+12 // an integer literal and a comment

data/lib/parslet.rb ADDED

@@ -0,0 +1,254 @@
+# A simple parser generator library. Typical usage would look like this:
+#
+#   require 'parslet'
+#
+#   class MyParser < Parslet::Parser
+#     rule(:a) { str('a').repeat }
+#     root(:a)
+#   end
+#
+#   pp MyParser.new.parse('aaaa')   # => 'aaaa'@0
+#   pp MyParser.new.parse('bbbb')   # => Parslet::Atoms::ParseFailed:
+#                                   #    Don't know what to do with bbbb at line 1 char 1.
+#
+# The simple DSL allows you to define grammars in PEG-style. This kind of
+# grammar construction does away with the ambiguities that usually comes with
+# parsers; instead, it allows you to construct grammars that are easier to
+# debug, since less magic is involved.
+#
+# Parslet is typically used in stages:
+#
+#
+# * Parsing the input string; this yields an intermediary tree, see
+#   Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
+#   Parslet::ClassMethods#root.
+# * Transformation of the tree into something useful to you, see
+#   Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
+#
+# The first stage is traditionally intermingled with the second stage; output
+# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
+#
+# The stages are completely decoupled; You can change your grammar around and
+# use the second stage to isolate the rest of your code from the changes
+# you've effected.
+#
+# == Further reading
+#
+# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
+# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
+# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
+# {Parslet::Atoms::Alternative}.
+#
+# == When things go wrong
+#
+# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
+# all the details of what went wrong, including a detailed error trace that
+# can be printed out as an ascii tree. ({Parslet::Cause})
+#
+module Parslet
+  # Extends classes that include Parslet with the module
+  # {Parslet::ClassMethods}.
+  #
+  def self.included(base)
+    base.extend(ClassMethods)
+  end
+  # Raised when the parse failed to match. It contains the message that should
+  # be presented to the user. More details can be extracted from the
+  # exceptions #cause member: It contains an instance of {Parslet::Cause} that
+  # stores all the details of your failed parse in a tree structure.
+  #
+  #   begin
+  #     parslet.parse(str)
+  #   rescue Parslet::ParseFailed => failure
+  #     puts failure.cause.ascii_tree
+  #   end
+  #
+  # Alternatively, you can just require 'parslet/convenience' and call the
+  # method #parse_with_debug instead of #parse. This method will never raise
+  # and print error trees to stdout.
+  #
+  #   require 'parslet/convenience'
+  #   parslet.parse_with_debug(str)
+  #
+  class ParseFailed < StandardError
+    def initialize(message, cause=nil)
+      super(message)
+      @cause = cause
+    end
+    # Why the parse failed.
+    #
+    # @return [Parslet::Cause]
+    attr_reader :cause
+  end
+  # Raised when the parse operation didn't consume all of its input. In this
+  # case, it makes only limited sense to look at the error tree. Maybe the
+  # parser worked just fine, but didn't account for the characters at the tail
+  # of the input?
+  #
+  #   str('foo').parse('foobar')
+  #   # raises Parslet::UnconsumedInput:
+  #   #   Don't know what to do with "bar" at line 1 char 4.
+  #
+  # Note that you can have parslet ignore this error:
+  #
+  #   str('foo').parse('foobar', prefix: true)  # => "foo"@0
+  #
+  class UnconsumedInput < ParseFailed
+  end
+  module ClassMethods
+    # Define an entity for the parser. This generates a method of the same
+    # name that can be used as part of other patterns. Those methods can be
+    # freely mixed in your parser class with real ruby methods.
+    #
+    #   class MyParser
+    #     include Parslet
+    #
+    #     rule(:bar) { str('bar') }
+    #     rule(:twobar) do
+    #       bar >> bar
+    #     end
+    #
+    #     root :twobar
+    #   end
+    #
+    def rule(name, &definition)
+      define_method(name) do
+        @rules ||= {}     # <name, rule> memoization
+        return @rules[name] if @rules.has_key?(name)
+        # Capture the self of the parser class along with the definition.
+        definition_closure = proc {
+          self.instance_eval(&definition)
+        }
+        @rules[name] = Atoms::Rule.new(name, &definition_closure)
+      end
+    end
+  end
+  # Allows for delayed construction of #match. See also Parslet.match.
+  #
+  # @api private
+  class DelayedMatchConstructor
+    def [](str)
+      Atoms::Re.new("[" + str + "]")
+    end
+  end
+  # Returns an atom matching a character class. All regular expressions can be
+  # used, as long as they match only a single character at a time.
+  #
+  #   match('[ab]')     # will match either 'a' or 'b'
+  #   match('[\n\s]')   # will match newlines and spaces
+  #
+  # There is also another (convenience) form of this method:
+  #
+  #   match['a-z']      # synonymous to match('[a-z]')
+  #   match['\n']       # synonymous to match('[\n]')
+  #
+  # @overload match(str)
+  #   @param str [String] character class to match (regexp syntax)
+  #   @return [Parslet::Atoms::Re] a parslet atom
+  #
+  def match(str=nil)
+    return DelayedMatchConstructor.new unless str
+    return Atoms::Re.new(str)
+  end
+  module_function :match
+  # Returns an atom matching the +str+ given:
+  #
+  #   str('class')      # will match 'class'
+  #
+  # @param str [String] string to match verbatim
+  # @return [Parslet::Atoms::Str] a parslet atom
+  #
+  def str(str)
+    Atoms::Str.new(str)
+  end
+  module_function :str
+  # Returns an atom matching any character. It acts like the '.' (dot)
+  # character in regular expressions.
+  #
+  #   any.parse('a')    # => 'a'
+  #
+  # @return [Parslet::Atoms::Re] a parslet atom
+  #
+  def any
+    Atoms::Re.new('.')
+  end
+  module_function :any
+  # A special kind of atom that allows embedding whole treetop expressions
+  # into parslet construction.
+  #
+  #   # the same as str('a') >> str('b').maybe
+  #   exp(%Q("a" "b"?))
+  #
+  # @param str [String] a treetop expression
+  # @return [Parslet::Atoms::Base] the corresponding parslet parser
+  #
+  def exp(str)
+    Parslet::Expression.new(str).to_parslet
+  end
+  module_function :exp
+  # Returns a placeholder for a tree transformation that will only match a
+  # sequence of elements. The +symbol+ you specify will be the key for the
+  # matched sequence in the returned dictionary.
+  #
+  #   # This would match a body element that contains several declarations.
+  #   { :body => sequence(:declarations) }
+  #
+  # The above example would match <code>:body => ['a', 'b']</code>, but not
+  # <code>:body => 'a'</code>.
+  #
+  # see {Parslet::Transform}
+  #
+  def sequence(symbol)
+    Pattern::SequenceBind.new(symbol)
+  end
+  module_function :sequence
+  # Returns a placeholder for a tree transformation that will only match
+  # simple elements. This matches everything that <code>#sequence</code>
+  # doesn't match.
+  #
+  #   # Matches a single header.
+  #   { :header => simple(:header) }
+  #
+  # see {Parslet::Transform}
+  #
+  def simple(symbol)
+    Pattern::SimpleBind.new(symbol)
+  end
+  module_function :simple
+  # Returns a placeholder for tree transformation patterns that will match
+  # any kind of subtree.
+  #
+  #   { :expression => subtree(:exp) }
+  #
+  def subtree(symbol)
+    Pattern::SubtreeBind.new(symbol)
+  end
+  module_function :subtree
+  autoload :Expression, 'parslet/expression'
+end
+require 'parslet/slice'
+require 'parslet/cause'
+require 'parslet/source'
+require 'parslet/atoms'
+require 'parslet/pattern'
+require 'parslet/pattern/binding'
+require 'parslet/transform'
+require 'parslet/parser'
+require 'parslet/error_reporter'

data/lib/parslet/atoms.rb ADDED

@@ -0,0 +1,32 @@
+# This is where parslets name comes from: Small parser atoms.
+#
+module Parslet::Atoms
+  # The precedence module controls parenthesis during the #inspect printing
+  # of parslets. It is not relevant to other aspects of the parsing.
+  #
+  module Precedence
+    prec = 0
+    BASE       = (prec+=1)    # everything else
+    LOOKAHEAD  = (prec+=1)    # &SOMETHING
+    REPETITION = (prec+=1)    # 'a'+, 'a'?
+    SEQUENCE   = (prec+=1)    # 'a' 'b'
+    ALTERNATE  = (prec+=1)    # 'a' | 'b'
+    OUTER      = (prec+=1)    # printing is done here.
+  end
+  require 'parslet/atoms/can_flatten'
+  require 'parslet/atoms/context'
+  require 'parslet/atoms/dsl'
+  require 'parslet/atoms/base'
+  require 'parslet/atoms/named'
+  require 'parslet/atoms/lookahead'
+  require 'parslet/atoms/alternative'
+  require 'parslet/atoms/sequence'
+  require 'parslet/atoms/repetition'
+  require 'parslet/atoms/re'
+  require 'parslet/atoms/str'
+  require 'parslet/atoms/entity'
+  require 'parslet/atoms/rule'
+end

data/lib/parslet/atoms/alternative.rb ADDED

@@ -0,0 +1,50 @@
+# Alternative during matching. Contains a list of parslets that is tried each
+# one in turn. Only fails if all alternatives fail.
+#
+# Example:
+#
+#   str('a') | str('b')   # matches either 'a' or 'b'
+#
+class Parslet::Atoms::Alternative < Parslet::Atoms::Base
+  attr_reader :alternatives
+  # Constructs an Alternative instance using all given parslets in the order
+  # given. This is what happens if you call '|' on existing parslets, like
+  # this:
+  #
+  #   str('a') | str('b')
+  #
+  def initialize(*alternatives)
+    super()
+    @alternatives = alternatives
+    @error_msg = "Expected one of #{alternatives.inspect}"
+  end
+  #---
+  # Don't construct a hanging tree of Alternative parslets, instead store them
+  # all here. This reduces the number of objects created.
+  #+++
+  def |(parslet)
+    self.class.new(*@alternatives + [parslet])
+  end
+  def try(source, context)
+    errors = alternatives.map { |a|
+      success, value = result = a.apply(source, context)
+      return result if success
+      # Aggregate all errors
+      value
+    }
+    # If we reach this point, all alternatives have failed.
+    context.err(self, source, @error_msg, errors)
+  end
+  precedence ALTERNATE
+  def to_s_inner(prec)
+    alternatives.map { |a| a.to_s(prec) }.join(' / ')
+  end
+end

data/lib/parslet/atoms/base.rb ADDED

@@ -0,0 +1,124 @@
+# Base class for all parslets, handles orchestration of calls and implements
+# a lot of the operator and chaining methods.
+#
+# Also see Parslet::Atoms::DSL chaining parslet atoms together.
+#
+class Parslet::Atoms::Base
+  include Parslet::Atoms::Precedence
+  include Parslet::Atoms::DSL
+  include Parslet::Atoms::CanFlatten
+  # Given a string or an IO object, this will attempt a parse of its contents
+  # and return a result. If the parse fails, a Parslet::ParseFailed exception
+  # will be thrown.
+  #
+  # @param io [String, Source] input for the parse process
+  # @option options [Parslet::ErrorReporter] :reporter error reporter to use,
+  #   defaults to Parslet::ErrorReporter::Tree
+  # @option options [Boolean] :prefix Should a prefix match be accepted?
+  #   (default: false)
+  # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
+  #   tree
+  #
+  def parse(io, options={})
+    source = io.respond_to?(:line_and_column) ?
+      io :
+      Parslet::Source.new(io)
+    # Try to cheat. Assuming that we'll be able to parse the input, don't
+    # run error reporting code.
+    success, value = setup_and_apply(source, nil)
+    # If we didn't succeed the parse, raise an exception for the user.
+    # Stack trace will be off, but the error tree should explain the reason
+    # it failed.
+    unless success
+      # Cheating has not paid off. Now pay the cost: Rerun the parse,
+      # gathering error information in the process.
+      reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
+      success, value = setup_and_apply(source, reporter)
+      fail "Assertion failed: success was true when parsing with reporter" \
+        if success
+      # Value is a Parslet::Cause, which can be turned into an exception:
+      value.raise
+      fail "NEVER REACHED"
+    end
+    # assert: success is true
+    # If we haven't consumed the input, then the pattern doesn't match. Try
+    # to provide a good error message (even asking down below)
+    if !options[:prefix] && source.chars_left > 0
+      old_pos = source.pos
+      Parslet::Cause.format(
+        source, old_pos,
+        "Don't know what to do with #{source.consume(10).to_s.inspect}").
+        raise(Parslet::UnconsumedInput)
+    end
+    return flatten(value)
+  end
+  # Creates a context for parsing and applies the current atom to the input.
+  # Returns the parse result.
+  #
+  # @return [<Boolean, Object>] Result of the parse. If the first member is
+  #   true, the parse has succeeded.
+  def setup_and_apply(source, error_reporter)
+    context = Parslet::Atoms::Context.new(error_reporter)
+    apply(source, context)
+  end
+  #---
+  # Calls the #try method of this parslet. In case of a parse error, apply
+  # leaves the source in the state it was before the attempt.
+  #+++
+  def apply(source, context)
+    old_pos = source.pos
+    #success, value = result = context.try_with_cache(self, source)
+    success, value = result = try(source, context)
+    return result if success
+    # We only reach this point if the parse has failed. Rewind the input.
+    source.pos = old_pos
+    return result
+  end
+  # Override this in your Atoms::Base subclasses to implement parsing
+  # behaviour.
+  #
+  def try(source, context)
+    raise NotImplementedError, \
+      "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
+  end
+  # Debug printing - in Treetop syntax.
+  #
+  def self.precedence(prec)
+    define_method(:precedence) { prec }
+  end
+  precedence BASE
+  def to_s(outer_prec=OUTER)
+    if outer_prec < precedence
+      "("+to_s_inner(precedence)+")"
+    else
+      to_s_inner(precedence)
+    end
+  end
+  def inspect
+    to_s(OUTER)
+  end
+private
+  # Produces an instance of Success and returns it.
+  #
+  def succ(result)
+    [true, result]
+  end
+end