RubyGems - rly - Versions diffs - 0.1.0 - Mend

rly 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/.gitignore ADDED Viewed

@@ -0,0 +1,18 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+measurement/report.txt

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--color
+--format progress
+-rspec_helper

data/.travis.yml ADDED Viewed

@@ -0,0 +1,4 @@
+language: ruby
+rvm:
+  - 1.9.3
+  - rbx-19mode

data/Gemfile ADDED Viewed

@@ -0,0 +1,6 @@
+source 'https://rubygems.org'
+gem 'rake', :group => :test
+gem 'pry-nav', :group => :development
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2012 Vladimir Pouzanov
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,18 @@
+[![Build Status](https://secure.travis-ci.org/farcaller/rly.png?branch=master)](http://travis-ci.org/farcaller/rly)
+# Rly
+Rly is a lexer and parser generator for ruby, based on ideas and solutions of
+Python's [Ply](http://www.dabeaz.com/ply/).
+## Installation
+Install via rubygems
+    gem install rly
+## Usage
+You need to create lexer and parser classes for each grammar you want to process.
+It is commonly done by subclassing {Rly::Lex} and {Rly::Parse} classes (check the
+appropriate docs).

data/Rakefile ADDED Viewed

@@ -0,0 +1,17 @@
+require "bundler/gem_tasks"
+Bundler::GemHelper.install_tasks
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new('spec')
+# require 'yardstick/rake/verify'
+# Yardstick::Rake::Verify.new do |verify|
+#   verify.threshold = 100
+# end
+# require 'yardstick/rake/measurement'
+# Yardstick::Rake::Measurement.new(:yardstick_measure) do |measurement|
+#   measurement.output = 'measurement/report.txt'
+# end
+task :default => :spec

data/lib/rly.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require "rly/version"
+require "rly/lex"
+module Rly
+  # Your code goes here...
+end

data/lib/rly/lex.rb ADDED Viewed

@@ -0,0 +1,302 @@
+require "rly/lex_token"
+module Rly
+  # Exception, which is returned on unhandled lexing errors.
+  class LexError < Exception; end
+  # Base class for your lexer.
+  #
+  # Generally, you define a new lexer by subclassing Rly::Lex. Your code should
+  # use methods {.token}, {.ignore}, {.literals}, {.on_error} to make the lexer
+  # configuration (check the methods documentation for details).
+  #
+  # Once you got your lexer configured, you can create its instances passing a
+  # String to be tokenized. You can then use either {#each} method or common
+  # *Enumerable* methods to get the processed tokens.
+  class Lex
+    include Enumerable
+    # Tracks the current line number for generated tokens
+    #
+    # *lineno*'s value should be increased manually. Check the example for a demo
+    # rule.
+    #
+    # @api semipublic
+    # @return [Fixnum] current line number
+    #
+    # @example
+    #   token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
+    attr_accessor :lineno
+    # Tracks the current position in the input string
+    #
+    # Genreally, it should only be used to skip a few characters in the error hander.
+    #
+    # @api semipublic
+    # @return [Fixnum] index of a starting character for current token
+    #
+    # @example
+    #   on_error do |t|
+    #     t.lexer.pos += 1
+    #     nil # skip the bad character
+    #   end
+    attr_accessor :pos
+    # Creates a new lexer instance for given input
+    #
+    # @api public
+    # @param input [String] a string to be tokenized
+    # @example
+    #   class MyLexer < Rly::Lex
+    #     ignore " "
+    #     token :LOWERS, /[a-z]+/
+    #     token :UPPERS, /[A-Z]+/
+    #   end
+    #
+    #   lex = MyLexer.new("hello WORLD")
+    #   lex.each do |tok|
+    #     puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
+    #                                        #=> "UPPERS -> WORLD"
+    #   end
+    def initialize(input="")
+      @input = input
+      @pos = 0
+      @lineno = 0
+    end
+    # Processes the next token in input
+    #
+    # This is the main interface to lexer. If block is given, {#each} behaves like
+    # an usual enumerator, yielding the next token. If there is no block, {#each}
+    # returns an Enumerator object.
+    #
+    # {#each} Raises {LexError} if the input cannot be processed. This happens if
+    # there were no matches by 'token' rules and no matches by 'literals' rule.
+    # If the {.on_error} handler is not set, the exception will be raised immediately,
+    # however, if the handler is set, the eception will be raised only if the {#pos}
+    # after returning from error handler is still unchanged.
+    #
+    # @api public
+    # @yieldparam tok [LexToken] next processed token
+    # @raise [LexError] if the input cannot be processed
+    # @return [Enumerator] if block is not given
+    # @return [nil] if block is given
+    #
+    # @example
+    #   lex = MyLexer.new("hello WORLD")
+    #
+    #   lex.each #=> #<Enumerator: ...>
+    #
+    #   lex.each do |tok|
+    #     puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
+    #                                        #=> "UPPERS -> WORLD"
+    #   end
+    def each
+      return self.to_enum unless block_given?
+      while @pos < @input.length
+        if self.class.ignores_list[@input[@pos]]
+          @pos += 1
+          next
+        end
+        matched = false
+        self.class.tokens.each do |type, rule, block|
+          m = rule.match(@input, @pos)
+          next unless m
+          tok = LexToken.new(type, m[0], self)
+          matched = true
+          tok = block.call(tok) if block
+          yield tok if tok.type
+          @pos = m.end(0)
+        end
+        unless matched
+          if self.class.literals_list[@input[@pos]]
+            tok = LexToken.new(@input[@pos], @input[@pos], self)
+            matched = true
+            yield tok
+            @pos += 1
+          end
+        end
+        unless matched
+          if self.class.error_hander
+            pos = @pos
+            tok = LexToken.new(:error, @input[@pos], self)
+            tok = self.class.error_hander.call(tok)
+            if pos == @pos
+              raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
+            else
+              yield tok if tok && tok.type
+            end
+          else
+            raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
+          end
+        end
+      end
+    end
+    class << self
+      # Returns the list of registered tokens
+      #
+      # @api private
+      # @visibility protected
+      # @return [Array] array of [type, regex, block] triples
+      def tokens
+        @tokens ||= []
+      end
+      # Returns the list of registered literals
+      #
+      # @api private
+      # @visibility protected
+      # @return [String] registered literals
+      def literals_list
+        @literals ||= ""
+      end
+      # Returns the list of registered ignorables
+      #
+      # @api private
+      # @visibility protected
+      # @return [String] registered ignorables
+      def ignores_list
+        @ignores ||= ""
+      end
+      # Returns the registered error handler, if any
+      #
+      # @api private
+      # @visibility protected
+      # @return [Proc] registered error handler
+      def error_hander
+        @error_block
+      end
+      private
+      # @!group DSL Class Methods
+      # Adds a token definition to a class
+      #
+      # This method adds a token definition to be lated used to tokenize input.
+      # It can be used to register normal tokens, and also functional tokens (the
+      # latter ones are processed as usual but are not being returned).
+      #
+      # @!visibility public
+      # @api public
+      # @param type [Symbol] token type. It should be an all-caps symbol by convention
+      # @param regex [Regexp] a regular expression to match the token
+      #
+      # @yieldparam tok [LexToken] a new token instance for processed input
+      # @yieldreturn [LexToken] the same or modified token instance. Return nil
+      #              to ignore the input
+      # @see .literals
+      # @see .ignores
+      # @example
+      #   class MyLexer < Rly::Lex
+      #     token :LOWERS, /[a-z]+/   # this would match LOWERS on 1+ lowercase letters
+      #
+      #     token :INT, /\d+/ do |t|  # this would match on integers
+      #       t.value = t.value.to_i  # additionally the value is converted to Fixnum
+      #       t                       # the updated token is returned
+      #     end
+      #
+      #     token /\n/ do |t|        # this would match on newlines
+      #       t.lexer.lineno += 1    # the block will be executed on match, but
+      #     end                      # no token will be returned (as name is not specified)
+      #
+      #   end
+      def token(*args, &block)
+        if args.length == 2
+          self.tokens << [args[0], args[1], block]
+        elsif args.length == 1
+          self.tokens << [nil, args[0], block]
+        else
+          raise ArgumentError
+        end
+      end
+      # Specifies a list of one-char literals
+      #
+      # Literals may be used in the case when you have several one-character tokens
+      # and you don't want to define them one by one using {.token} method.
+      #
+      # @!visibility public
+      # @api public
+      # @param lit [String] the list of literals
+      # @see .token
+      # @example
+      #   class MyLexer < Rly::Lex
+      #     literals "+-/*"
+      #   end
+      #
+      #   lex = MyLexer.new("+-")
+      #   lex.each do |tok|
+      #     puts "#{tok.type} -> #{tok.value}" #=> "+ -> +"
+      #                                        #=> "- -> -"
+      #   end
+      def literals(lit)
+        @literals = lit
+      end
+      # Specifies a list of one-char symbols to be ignored in input
+      #
+      # This method allows to skip over formatting symbols (like tabs and spaces) quickly.
+      #
+      # @!visibility public
+      # @api public
+      # @param ign [String] the list of ignored symbols
+      # @see .token
+      # @example
+      #   class MyLexer < Rly::Lex
+      #     literals "+-"
+      #     token :INT, /\d+/
+      #     ignore " \t"
+      #   end
+      #
+      #   lex = MyLexer.new("2 + 2")
+      #   lex.each do |tok|
+      #     puts "#{tok.type} -> #{tok.value}" #=> "INT -> 2"
+      #                                        #=> "+ -> +"
+      #                                        #=> "INT -> 2"
+      #   end
+      def ignore(ign)
+        @ignores = ign
+      end
+      # Specifies a block that should be called on error
+      #
+      # In case of lexing error the lexer first tries to fix it by providing a
+      # chance for developer to look on the failing character. If this block is
+      # not provided, the lexing error always results in {LexError}.
+      #
+      # You must increment the lexer's {#pos} as part of the action. You may also
+      # return a new {LexToken} or nil to skip the input
+      #
+      # @!visibility public
+      # @api public
+      # @see .token
+      # @example
+      #   class MyLexer < Rly::Lex
+      #     token :INT, /\d+/
+      #     on_error do |tok|
+      #       tok.lexer.pos += 1 # just skip the offending character
+      #     end
+      #   end
+      #
+      #   lex = MyLexer.new("123qwe")
+      #   lex.each do |tok|
+      #     puts "#{tok.type} -> #{tok.value}" #=> "INT -> 123"
+      #   end
+      def on_error(&block)
+        @error_block = block
+      end
+    end
+  end
+end

data/lib/rly/lex_token.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Rly
+  class LexToken
+    attr_accessor :value
+    attr_reader :type, :lexer
+    def initialize(type, value, lexer)
+      @type = type
+      @value = value
+      @lexer = lexer
+    end
+  end
+end

data/lib/rly/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Rly
+  VERSION = "0.1.0"
+end

data/rly.gemspec ADDED Viewed

@@ -0,0 +1,21 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'rly/version'
+Gem::Specification.new do |gem|
+  gem.name          = "rly"
+  gem.version       = Rly::VERSION
+  gem.authors       = ["Vladimir Pouzanov"]
+  gem.email         = ["farcaller@gmail.com"]
+  gem.description   = "A simple ruby implementation of lex and yacc, based on Python's ply"
+  gem.summary       = "A simple ruby implementation of lex and yacc, based on Python's ply"
+  gem.homepage      = ""
+  gem.files         = `git ls-files`.split($/)
+  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.require_paths = ["lib"]
+  gem.add_development_dependency 'rspec'
+end

data/spec/lex/lexer_spec.rb ADDED Viewed

@@ -0,0 +1,144 @@
+require "rly"
+describe Rly::Lex do
+  context "Simple Lexer" do
+    testLexer = Class.new(Rly::Lex) do
+      token :FIRST, /[a-z]+/
+      token :SECOND, /[A-Z]+/
+    end
+    it "should have a list of defined tokens" do
+      testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
+    end
+    it "should output tokens one by one" do
+      test = 'qweASDzxc'
+      l = testLexer.new(test).to_enum
+      tok = l.next
+      tok.type.should == :FIRST
+      tok.value.should == 'qwe'
+      tok = l.next
+      tok.type.should == :SECOND
+      tok.value.should == 'ASD'
+      tok = l.next
+      tok.type.should == :FIRST
+      tok.value.should == 'zxc'
+      expect { l.next } .to raise_error(StopIteration)
+    end
+  end
+  context "Literals Lexer" do
+    testLexer = Class.new(Rly::Lex) do
+      literals "+-*/"
+    end
+    it "should output literal tokens" do
+      test = '++--'
+      l = testLexer.new(test).to_enum
+      l.next.value.should == '+'
+      l.next.value.should == '+'
+      l.next.value.should == '-'
+      l.next.value.should == '-'
+    end
+  end
+  context "Ignores Lexer" do
+    testLexer = Class.new(Rly::Lex) do
+      ignore " \t"
+    end
+    it "should honour ignores list" do
+      test = "     \t\t  \t    \t"
+      l = testLexer.new(test).to_enum
+      expect { l.next } .to raise_error(StopIteration)
+    end
+  end
+  context "Block-based Token Lexer" do
+    testLexer = Class.new(Rly::Lex) do
+      token :TEST, /\d+/ do |t|
+        t.value = t.value.to_i
+        t
+      end
+    end
+    it "calls a block to further process a token" do
+      test = "42"
+      l = testLexer.new(test).to_enum
+      l.next.value == 42
+    end
+  end
+  context "Non-outputtable tokens Lexer" do
+    testLexer = Class.new(Rly::Lex) do
+      token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
+    end
+    it "process but don't output tokens without a name" do
+      test = "\n\n\n"
+      l = testLexer.new(test)
+      expect { l.to_enum.next } .to raise_error(StopIteration)
+      l.lineno.should == 3
+    end
+  end
+  context "Error handling" do
+    it "raises an error, if there are no suitable tokens" do
+      testLexer = Class.new(Rly::Lex) do
+        token :NUM, /\d+/
+      end
+      l = testLexer.new("test")
+      expect { l.to_enum.next } .to raise_error(Rly::LexError)
+    end
+    it "raises an error, if there is no possible tokens defined" do
+      testLexer = Class.new(Rly::Lex) do ; end
+      l = testLexer.new("test")
+      expect { l.to_enum.next } .to raise_error(Rly::LexError)
+    end
+    it "calls an error function if it is available, which returns a fixed token" do
+      testLexer = Class.new(Rly::Lex) do
+        token :NUM, /\d+/
+        on_error do |t|
+          t.value = "BAD #{t.value}"
+          t.lexer.pos += 1
+          t
+        end
+      end
+      l = testLexer.new("test")
+      tok = l.to_enum.next
+      tok.value.should == "BAD t"
+      tok.type.should == :error
+      tok = l.to_enum.next
+      tok.value.should == "BAD e"
+      tok.type.should == :error
+    end
+    it "calls an error function if it is available, which can skip a token" do
+      testLexer = Class.new(Rly::Lex) do
+        token :NUM, /\d+/
+        on_error do |t|
+          t.lexer.pos += 1
+          nil
+        end
+      end
+      l = testLexer.new("test1")
+      l.to_enum.next.value.should == '1'
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require "rubygems"
+require "bundler/setup"
+RSpec.configure do |config|
+  config.treat_symbols_as_metadata_keys_with_true_values = true
+  config.run_all_when_everything_filtered = true
+  config.filter_run :focus
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = 'random'
+end

metadata ADDED Viewed

@@ -0,0 +1,78 @@
+--- !ruby/object:Gem::Specification
+name: rly
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+  prerelease:
+platform: ruby
+authors:
+- Vladimir Pouzanov
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-11-09 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: A simple ruby implementation of lex and yacc, based on Python's ply
+email:
+- farcaller@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rspec
+- .travis.yml
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- lib/rly.rb
+- lib/rly/lex.rb
+- lib/rly/lex_token.rb
+- lib/rly/version.rb
+- rly.gemspec
+- spec/lex/lexer_spec.rb
+- spec/spec_helper.rb
+homepage: ''
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.21
+signing_key:
+specification_version: 3
+summary: A simple ruby implementation of lex and yacc, based on Python's ply
+test_files:
+- spec/lex/lexer_spec.rb
+- spec/spec_helper.rb
+has_rdoc: