RubyGems - rlex - Versions diffs - 0.5.0 - Mend

rlex 0.5.0

Files changed (14) hide show

data/.gitignore ADDED Viewed

@@ -0,0 +1,17 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp

data/.rspec ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --format documentation

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in rlex.gemspec
+gemspec

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2012 Rasmus Borgsmidt
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,74 @@
+# Rlex
+Provides a simple lexer based on the <tt>StringScanner</tt> class.
+The lexer was written for use with Racc, a Ruby variant of
+Yacc. But there is no code dependency on that project so the
+lexer may also be used on its own or with other packages.
+* Ignored input takes precedence over rules and keywords, so if
+  a prefix is matched by an ignore pattern, it's ignored even if
+  it's also a keyword or matched by a rule
+* The lexer is greedy, so if a prefix is matched by multiple
+  rules or keywords, the lexer chooses the option consuming the
+  most input
+## Documentation
+Find out more about this project on [GitHub][gh_rlex],
+[RubyGems][rg_rlex] and [RubyDoc][rd_rlex].
+[gh_rlex]: https://github.com/borgsmidt/rlex
+[rg_rlex]: http://rubygems.org/gems/rlex
+[rd_rlex]: http://rubydoc.info/gems/rlex
+## Installation
+Install the gem using:
+    $ gem install rlex
+Alternatively, add this line to your application's Gemfile:
+    gem 'rlex'
+Then execute:
+    $ bundle
+## Usage
+```ruby
+# Define behavior
+lexer = Lexer.new
+lexer.ignore /\s+/                   # ignore whitespace
+lexer.rule :word, /\w+/              # consider any text a 'word'
+lexer.keyword :if                    # treat 'if' as a special keyword
+lexer.keyword :lparen, "("           # any fixed input such as parentheses
+lexer.keyword :rparen, ")"           #   may be defined as keywords
+# Initialize with input
+lexer.start "if ( foo ) bar"         # initialize the lexer with a string
+# Iterate through tokens
+lexer.next_token # => Token (type = :if,     value = 'if' )
+lexer.next_token # => Token (type = :lparen, value = '('  )
+lexer.next_token # => Token (type = :word,   value = 'foo')
+lexer.next_token # => Token (type = :rparen, value = ')'  )
+lexer.next_token # => Token (type = :word,   value = 'bar')
+lexer.next_token # => EOF_TOKEN
+```
+## Contributing
+1. Fork it on [GitHub][gh_rlex]
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Added some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request
+## License
+Written by Rasmus Borgsmidt &lt;<rasmus@borgsmidt.dk>&gt;
+Released under the MIT license: www.opensource.org/licenses/MIT

data/Rakefile ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env rake
2	+ require "bundler/gem_tasks"

data/lib/rlex.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require "rlex/version"
+# Implements a simple lexer using a StringScanner.
+#
+# @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
+#
+module Rlex
+end

data/lib/rlex/lexer.rb ADDED Viewed

@@ -0,0 +1,172 @@
+require 'strscan'
+require 'rlex/token'
+module Rlex
+  # Implements a simple lexer using a <tt>StringScanner</tt>.
+  #
+  # The lexer was written for use with Racc, a Ruby variant of
+  # Yacc. But there is no code dependency on that project so the
+  # lexer may also be used on its own or with other packages.
+  #
+  # * Ignored input takes precedence over rules and keywords, so if
+  #   a prefix is matched by an ignore pattern, it's ignored even if
+  #   it's also a keyword or matched by a rule
+  # * The lexer is greedy, so if a prefix is matched by multiple
+  #   rules or keywords, the lexer chooses the option consuming the
+  #   most input
+  #
+  # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
+  #
+  # @example Basic usage
+  #   # Define behavior
+  #   lexer = Lexer.new
+  #   lexer.ignore /\s+/                   # ignore whitespace
+  #   lexer.rule :word, /\w+/              # consider any text a 'word'
+  #   lexer.keyword :if                    # treat 'if' as a special keyword
+  #   lexer.keyword :lparen, "("           # any fixed input such as parentheses
+  #   lexer.keyword :rparen, ")"           #   may be defined as keywords
+  #
+  #   # Initialize with input
+  #   lexer.start "if ( foo ) bar"         # initialize the lexer with a string
+  #
+  #   # Iterate through tokens
+  #   lexer.next_token # => Token (type = :if,     value = 'if')
+  #   lexer.next_token # => Token (type = :lparen, value = '(')
+  #   lexer.next_token # => Token (type = :word,   value = 'foo')
+  #   lexer.next_token # => Token (type = :rparen, value = ')')
+  #   lexer.next_token # => Token (type = :word,   value = 'bar')
+  #   lexer.next_token # => EOF_TOKEN
+  #
+  class Lexer
+    # Initializes an empty Lexer.
+    #
+    def initialize
+      @ignored = []
+      @rules = []
+      @keywords = {}
+    end
+    # Instructs the lexer to ignore input matched by the specified
+    # pattern. If appropriate, call this multiple times to ignore
+    # several patterns.
+    #
+    # @note Ignored input takes precedence over rules and keywords,
+    #   so if a prefix is matched by an ignore pattern, it's ignored
+    #   even if it's also a keyword or matched by a rule
+    #
+    # @param [Regexp] pattern Pattern of input to ignore
+    #
+    # @return [Regexp] The specified pattern
+    #
+    def ignore(pattern)
+      @ignored << pattern
+      return pattern
+    end
+    # Defines a rule to match the specified pattern.
+    #
+    # @note Use keywords for efficiency instead of rules whenever
+    #   the matched input is static
+    #
+    # @param [Symbol, #to_sym] name Unique name of rule
+    # @param [Regexp] pattern Pattern of input to match
+    #
+    # @raise [ArgumentError] If the specified name is already
+    #   used by other rules or keywords
+    #
+    # @return [Symbol] The name of the rule
+    #
+    def rule(name, pattern)
+      # @todo Validate the rule name
+      @rules << (Rule.new name.to_sym, pattern)
+      return name.to_sym
+    end
+    # Defines a static sequence of input as a keyword.
+    #
+    # @note Use keywords for efficiency instead of rules whenever
+    #   the matched input is static
+    #
+    # @param [optional, Symbol, #to_sym] name Unique name of the
+    #   keyword. If this argument is not given, the keyword is used
+    #   to name itself
+    # @param [String, #to_s] kword Sequence of input to match as a
+    #   keyword
+    #
+    # @raise [ArgumentError] If the specified name is already
+    #   used by other rules or keywords
+    #
+    # @return [Symbol] The name of the keyword
+    #
+    def keyword(name = nil, kword)
+      # @todo Validate the keyword name
+      name = kword if name == nil
+      pattern = Regexp.new(Regexp.escape kword.to_s)
+      rule name, pattern
+      @keywords[kword.to_s] = Token.new name.to_sym, kword.to_s
+      return name.to_sym
+    end
+    # Initializes the lexer with new input.
+    #
+    # @note This resets the lexer with a new StringScanner so any
+    #   state information related to previous input is lost
+    #
+    # @param [String] input Input to scan for tokens
+    #
+    # @return [String] The specified input
+    #
+    def start(input)
+      @scanner = StringScanner.new input
+      return input
+    end
+    # Returns the next token matched from the remaining input. If no
+    # input is left, or the lexer has not been initialized,
+    # <tt>EOF_TOKEN</tt> is returned.
+    #
+    # @raise [RuntimeError] If there is any unmatched input
+    #
+    # @return [Token] Next token or <tt>EOF_TOKEN</tt>
+    #
+    def next_token
+      return EOF_TOKEN if @scanner.nil? or @scanner.empty?
+      return next_token if ignore_prefix?
+      rule = greediest_rule
+      if rule
+        prefix = @scanner.scan(rule.pattern)
+        keyword = @keywords[prefix]
+        return keyword ? keyword : Token.new(rule.name, prefix)
+      end
+      raise "unexpected input <#{@scanner.peek(5)}>"
+    end
+    private
+    # @private
+    Rule = Struct.new :name, :pattern
+    # @private
+    def ignore_prefix?
+      @ignored.each do |pattern|
+        prefix = @scanner.scan(pattern)
+        return true if prefix
+      end
+      return false
+    end
+    # @private
+    def greediest_rule
+      r = nil
+      len = 0
+      @rules.each do |rule|
+        prefix = @scanner.check(rule.pattern)
+        if prefix and prefix.length > len
+          r = rule
+          len = prefix.length
+        end
+      end
+      return r
+    end
+  end
+end

data/lib/rlex/token.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module Rlex
+  # Simple class to represent tokens matched from the input.
+  #
+  # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
+  #
+  # @attr_reader [Symbol] type Type of the token, such as the name of
+  #   the rule used to match it
+  # @attr_reader [String] value Text matched from the input
+  #
+  Token = Struct.new :type, :value
+  # Special token used when the lexer has reached the end of the
+  # specified input.
+  #
+  EOF_TOKEN = Token.new :eof, ""
+end

data/lib/rlex/version.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module Rlex
+  # Project version
+  VERSION = "0.5.0"
+end

data/rlex.gemspec ADDED Viewed

@@ -0,0 +1,20 @@
+# -*- encoding: utf-8 -*-
+require File.expand_path('../lib/rlex/version', __FILE__)
+Gem::Specification.new do |gem|
+  gem.authors       = ["Rasmus Borgsmidt"]
+  gem.email         = ["rasmus@borgsmidt.dk"]
+  gem.description   = %q{Implements a simple lexer using a StringScanner}
+  gem.summary       = %q{The lexer was written for use with Racc, a
+                         Ruby variant of Yacc. But there is no code
+                         dependency on that project so the lexer may
+                         also be used on its own or with other packages.}
+  gem.homepage      = "https://github.com/borgsmidt/rlex"
+  gem.files         = `git ls-files`.split($\)
+  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.name          = "rlex"
+  gem.require_paths = ["lib"]
+  gem.version       = Rlex::VERSION
+end

data/spec/rlex/lexer_spec.rb ADDED Viewed

@@ -0,0 +1,69 @@
+require 'spec_helper'
+require 'rlex/token'
+require 'rlex/lexer'
+include Rlex
+describe Lexer do
+  before :each do
+    @lexer = Lexer.new
+  end
+  describe "next_token" do
+    it "should return EOF_TOKEN when the lexer has been assigned no input" do
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+    it "should return EOF_TOKEN when there is no more input" do
+      @lexer.start ""
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+    it "should raise an error when remaining input is not matched by any rules" do
+      @lexer.start "unmatched input"
+      lambda {@lexer.next_token}.should raise_error RuntimeError
+    end
+    it "should return EOF_TOKEN when instructed to ignore everything" do
+      @lexer.ignore /.*/
+      @lexer.start "input to be ignored"
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+    it "should return recognized keywords and finish with EOF_TOKEN" do
+      @lexer.ignore /\s+/
+      @lexer.keyword :special
+      @lexer.start " \t\nspecialspecial   special   "
+      special = Token.new :special, "special"
+      @lexer.next_token.should eq special
+      @lexer.next_token.should eq special
+      @lexer.next_token.should eq special
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+    it "should return tokens matched by regular rules and finish with EOF_TOKEN" do
+      @lexer.ignore /\s+/
+      @lexer.rule :word, /\w+/
+      @lexer.start "sentence with four tokens"
+      @lexer.next_token.should eq Token.new :word, "sentence"
+      @lexer.next_token.should eq Token.new :word, "with"
+      @lexer.next_token.should eq Token.new :word, "four"
+      @lexer.next_token.should eq Token.new :word, "tokens"
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+    it "should return all types of tokens and finish with EOF_TOKEN" do
+      @lexer.ignore /\s+/
+      @lexer.keyword :if
+      @lexer.keyword :lparen, "("
+      @lexer.keyword :rparen, ")"
+      @lexer.rule :word, /\w+/
+      @lexer.start "ifu ( if ) ifu"
+      @lexer.next_token.should eq Token.new :word, "ifu"
+      @lexer.next_token.should eq Token.new :lparen, "("
+      @lexer.next_token.should eq Token.new :if, "if"
+      @lexer.next_token.should eq Token.new :rparen, ")"
+      @lexer.next_token.should eq Token.new :word, "ifu"
+      @lexer.next_token.should eq EOF_TOKEN
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# Require this file using `require "spec_helper.rb"` to ensure that it is only
+# loaded once.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  config.treat_symbols_as_metadata_keys_with_true_values = true
+  config.run_all_when_everything_filtered = true
+  config.filter_run :focus
+end

metadata ADDED Viewed

@@ -0,0 +1,69 @@
+--- !ruby/object:Gem::Specification
+name: rlex
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.5.0
+platform: ruby
+authors:
+- Rasmus Borgsmidt
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-04-24 00:00:00 Z
+dependencies: []
+description: Implements a simple lexer using a StringScanner
+email:
+- rasmus@borgsmidt.dk
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rspec
+- Gemfile
+- LICENSE
+- README.md
+- Rakefile
+- lib/rlex.rb
+- lib/rlex/lexer.rb
+- lib/rlex/token.rb
+- lib/rlex/version.rb
+- rlex.gemspec
+- spec/rlex/lexer_spec.rb
+- spec/spec_helper.rb
+homepage: https://github.com/borgsmidt/rlex
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.11
+signing_key:
+specification_version: 3
+summary: The lexer was written for use with Racc, a Ruby variant of Yacc. But there is no code dependency on that project so the lexer may also be used on its own or with other packages.
+test_files:
+- spec/rlex/lexer_spec.rb
+- spec/spec_helper.rb
+has_rdoc: