RubyGems - rley - Versions diffs - 0.0.02 - Mend

rley 0.0.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +15 -0
data/.rspec +1 -0
data/.rubocop.yml +74 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.simplecov +7 -0
data/.travis.yml +21 -0
data/.yardopts +6 -0
data/CHANGELOG.md +10 -0
data/Gemfile +8 -0
data/LICENSE.txt +19 -0
data/README.md +19 -0
data/Rakefile +32 -0
data/lib/rley/constants.rb +26 -0
data/lib/rley/parser/chart.rb +39 -0
data/lib/rley/parser/dotted_item.rb +80 -0
data/lib/rley/parser/earley_parser.rb +177 -0
data/lib/rley/parser/parse_state.rb +54 -0
data/lib/rley/parser/parsing.rb +101 -0
data/lib/rley/parser/state_set.rb +47 -0
data/lib/rley/parser/token.rb +21 -0
data/lib/rley/syntax/grammar.rb +59 -0
data/lib/rley/syntax/grm_symbol.rb +18 -0
data/lib/rley/syntax/literal.rb +20 -0
data/lib/rley/syntax/non_terminal.rb +18 -0
data/lib/rley/syntax/production.rb +42 -0
data/lib/rley/syntax/symbol_seq.rb +36 -0
data/lib/rley/syntax/terminal.rb +18 -0
data/lib/rley/syntax/verbatim_symbol.rb +21 -0
data/spec/rley/parser/chart_spec.rb +47 -0
data/spec/rley/parser/dotted_item_spec.rb +108 -0
data/spec/rley/parser/earley_parser_spec.rb +271 -0
data/spec/rley/parser/parse_state_spec.rb +99 -0
data/spec/rley/parser/parsing_spec.rb +118 -0
data/spec/rley/parser/state_set_spec.rb +68 -0
data/spec/rley/parser/token_spec.rb +40 -0
data/spec/rley/syntax/grammar_spec.rb +149 -0
data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
data/spec/rley/syntax/literal_spec.rb +32 -0
data/spec/rley/syntax/non_terminal_spec.rb +29 -0
data/spec/rley/syntax/production_spec.rb +50 -0
data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
data/spec/rley/syntax/terminal_spec.rb +29 -0
data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
data/spec/spec_helper.rb +21 -0
metadata +166 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    ZTgxOWU0YmIzMDdlZmQ3NGVlZDBkYzcxZTEzNDQ4NDgwMWM3ZmZiOA==
+  data.tar.gz: !binary |-
+    ZmZkNWZlZDgwZWQ2ZTYzYjA5ZjkyNTZlZjMwMGZmMjIwNjVjODFjNQ==
+!binary "U0hBNTEy":
+  metadata.gz: !binary |-
+    ZGY2YzBlMTM0MTNiZWE3ZjQyYmNjOWMzYWQ4ZTY4N2RjMDQ4YzExMTg5MTM5
+    NjFlMDRlZjYyYWM5NDJmYzlhNzY3YWE3N2FiZDVhNGM2NzVhOGMwZjZmZjE0
+    YjhkZjFiNGVlOTQwMmZjZjkzNWQ3ZGY3NGM1Y2M4YWU3ZjE3MDI=
+  data.tar.gz: !binary |-
+    MDViNDQ3MjBjOTg1MWI2NmJmNmRhZTg2MzQ0MmRlMDZmY2JmMDhiNTZlY2Zi
+    NTMwZDdlNGI1MWIwMzkxN2FiNjMyZjk4ZWViZjk0YzJlMTY0MmMyZmVlN2U3
+    MGJiYjFlNDE5NzM0MzhlMWQzNGIyMDBmOTJkZDQwNDYyNDVjNjQ=

data/.rspec ADDED Viewed

	@@ -0,0 +1 @@
1	+ --backtrace

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,74 @@
+AllCops:
+  Exclude:
+    - 'examples/**/*'
+    - 'features/**/*'
+    - 'gems/**/*'
+# This is disabled because some demos use UTF-8
+AsciiComments:
+  Enabled: false
+CaseIndentation:
+  IndentWhenRelativeTo: end
+  IndentOneStep: true
+# Rubocop enforces the use of is_a? instead of kind_of?
+# Which is contrary to modelling practice.
+ClassCheck:
+  Enabled: false
+ClassLength:
+  Max: 250
+  CountComments: false
+ConstantName:
+  Enabled: false
+CyclomaticComplexity:
+  Enabled: false
+DefWithParentheses:
+  Enabled: false
+Documentation:
+  Enabled: false
+EmptyLines:
+  Enabled: false
+EmptyLinesAroundBody:
+  Enabled: false
+Encoding:
+  Enabled: false
+FileName:
+  Enabled: false
+IndentationWidth :
+  Enabled: false
+# Avoid methods longer than 50 lines of code
+MethodLength:
+  Max: 50
+  CountComments: false
+NonNilCheck:
+  Enabled: false
+NumericLiterals:
+  Enabled: false
+RaiseArgs:
+  Enabled: false
+RedundantReturn:
+  Enabled: false
+SpaceInsideBrackets:
+  Enabled: false
+TrailingWhitespace:
+  Enabled: false

data/.ruby-gemset ADDED Viewed

	@@ -0,0 +1 @@
1	+ rley

data/.ruby-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1.9.3

data/.simplecov ADDED Viewed

@@ -0,0 +1,7 @@
+# .simplecov
+# Configuration
+SimpleCov.start do
+  # Remove all files that match /spec/ in their path
+  add_filter "/spec/"
+end

data/.travis.yml ADDED Viewed

@@ -0,0 +1,21 @@
+language: ruby
+rvm:
+  - 2.1.0
+  - 2.0.0
+  - 1.9.3
+  - 1.9.2
+  - jruby-19mode
+  - jruby-head
+# Workaround issue of jruby-head configuration on Travis CI
+matrix:
+  allow_failures:
+    - rvm: jruby-head
+gemfile:
+  - Gemfile
+# whitelist
+branches:
+  only:
+    - master

data/.yardopts ADDED Viewed

@@ -0,0 +1,6 @@
+--exclude examples --exclude features --exclude spec
+--no-private
+--markup markdown
+-
+Changelog.md
+License.txt

data/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,10 @@
+### 0.0.02 / 2014-11-12
+* [CHANGE] File `README.md`: Added Travis CI badge.
+### 0.0.01 / 2014-11-12
+* [CHANGE] Rley is "gemmified"!
+### 0.0.00 / 2014-11-07
+* [FEATURE] Initial public working version

data/Gemfile ADDED Viewed

@@ -0,0 +1,8 @@
+source 'https://rubygems.org'
+# Prevent Bundler to load the dependencies from our .gemspec file
+group :development do
+  gem 'rake',  '>= 0.8.0'
+  gem 'rspec', '>= 3.0.0'
+  gem 'simplecov', '>= 0.5.0'
+end

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,19 @@
+Copyright (c) 2014 Dimitri Geshef
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,19 @@
+Rley
+===========
+[Homepage](https://github.com/famished-tiger/Rley)
+[![Build Status](https://travis-ci.org/famished-tiger/Rley.svg?branch=master)](https://travis-ci.org/famished-tiger/Rley)
+### What is Rley? ###
+__Rley__ is a Ruby implementation of a Earley parser.
+The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
+This project is in "early" stage.
+Consult Wikipedia to learn more about Earley's parsing algorithm.
+Copyright
+---------
+Copyright (c) 2014, Dimitri Geshef.
+__Rley__ is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt) for details.

data/Rakefile ADDED Viewed

@@ -0,0 +1,32 @@
+require 'rubygems'
+require_relative './lib/rley/constants'
+namespace :gem do
+desc 'Push the gem to rubygems.org'
+task :push do
+  system("gem push rley-#{Rley::Version}.gem")
+end
+end # namespace
+# Testing-specific tasks
+# RSpec as testing tool
+require 'rspec/core/rake_task'
+desc 'Run RSpec'
+RSpec::Core::RakeTask.new do |spec|
+  spec.pattern = 'spec/**/*_spec.rb'
+end
+# Combine RSpec tests
+desc 'Run tests, with RSpec'
+task test: [:spec]
+# Default rake task
+task default: :test
+# End of file

data/lib/rley/constants.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# File: constants.rb
+# Purpose: definition of Rley constants.
+module Rley # Module used as a namespace
+  # The version number of the gem.
+  Version = '0.0.02'
+  # Brief description of the gem.
+  Description = "Ruby implementation of the Earley's parsing algorithm"
+  # Constant Rley::RootDir contains the absolute path of Rley's
+  # start directory. Note: it also ends with a slash character.
+  unless defined?(RootDir)
+    # The initialisation of constant RootDir is guarded in order
+    # to avoid multiple initialisation (not allowed for constants)
+    # The start folder of Rley.
+    RootDir = begin
+      require 'pathname' # Load Pathname class from standard library
+      startdir = Pathname(__FILE__).dirname.parent.parent.expand_path
+      startdir.to_s + '/' # Append trailing slash character to it
+    end
+  end
+end # module
+# End of file

data/lib/rley/parser/chart.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require_relative 'state_set'
+require_relative 'parse_state'
+module Rley # This module is used as a namespace
+  module Parser # This module is used as a namespace
+    # Also called a parse table
+    # A one-dimensional array with n + 1 entries (n = number of input tokens).
+    class Chart
+      attr_reader(:state_sets)
+      def initialize(startDottedItem, tokenCount)
+        @state_sets = Array.new(tokenCount + 1) {|_| StateSet.new }
+        push_state(startDottedItem, 0, 0)
+      end
+      # The dotted item/rule used to seed the parse chart.
+      # It corresponds to the start production and a dot placed
+      # at the beginning of the rhs
+      def start_dotted_rule()
+        return self[0].states.first.dotted_rule
+      end
+      # Access the state set at given position
+      def [](index)
+        return state_sets[index]
+      end
+      # Push a parse state for the chart entry with given index
+      def push_state(aDottedItem, anOrigin, anIndex)
+        new_state = ParseState.new(aDottedItem, anOrigin)
+        self[anIndex].push_state(new_state)
+      end
+    end # class
+  end # module
+end # module
+# End of file

data/lib/rley/parser/dotted_item.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# A dotted item is a parse state for a given production/grammar rule
+# It partitions the rhs of the rule in two parts.
+# The left part consists of the symbols in the rules that are matched
+# by the input tokens.
+# The right part consists of symbols that are predicted to match the
+# input tokens.
+# The terminology stems from the traditional way to visualize the partition
+# by using a fat dot character as a separator between the left and right parts
+# An item with the dot at the beginning (i.e. before any rhs symbol)
+#   is called a predicted item.
+# An item with the dot at the end (i.e. after all rhs symbols)
+#   is called a reduce item.
+# An item with a dot in front of a terminal is called a shift item.
+class DottedItem
+  # Production rule
+  attr_reader(:production)
+  # Index of the next symbol (from the rhs) after the 'dot'.
+  # If the dot is at the end of the rhs (i.e.) there is no next
+  # symbol, then the position takes the value -1.
+  # It the rhs is empty, then the postion is -2
+  attr_reader(:position)
+  # @param aProduction
+  def initialize(aProduction, aPosition)
+    @production = aProduction
+    @position = valid_position(aPosition)
+  end
+  # Return true if the dot position is at the start of the rhs.
+  def at_start?()
+    return position == 0 || position == -2
+  end
+  # An item with the dot at the beginning is called
+  # predicted item
+  alias :predicted_item? :at_start?
+  # A dotted item is called a reduce item if the dot is at the end.
+  def reduce_item?()
+    return position < 0 # Either -1 or -2
+  end
+  # The non-terminal symbol that is on the left-side of the production
+  def lhs()
+    return production.lhs
+  end
+  # Return the symbol after the dot.
+  # nil is returned if the dot is at the end
+  def next_symbol()
+    result = (position < 0) ? nil : production.rhs[position]
+  end
+  # An item with the dot in front of a terminal is called a shift item
+  def shift_item?()
+  end
+  private
+  # Return the given after its validation.
+  def valid_position(aPosition)
+    rhs_size = production.rhs.size
+    if aPosition < 0 || aPosition > rhs_size
+      fail StandardError, 'Out of bound index'
+    end
+    if rhs_size == 0
+      index = -2 # Minus 2 at start/end of empty production
+    elsif aPosition == rhs_size
+      index = -1  # Minus 1 at end of non-empty production
+    else
+      index = aPosition
+    end
+    return index
+  end
+end # class
+# End of file

data/lib/rley/parser/earley_parser.rb ADDED Viewed

@@ -0,0 +1,177 @@
+require_relative '../syntax/grammar'
+require_relative 'dotted_item'
+require_relative 'parsing'
+module Rley # This module is used as a namespace
+  module Parser # This module is used as a namespace
+    # Implementation of a parser that uses the Earley parsing algorithm.
+    class EarleyParser
+      # The grammar of the language.
+      attr_reader(:grammar)
+      # The dotted items/rules for the productions of the grammar
+      attr_reader(:dotted_items)
+      # A Hash that defines the mapping: non-terminal => [start dotted items]
+      attr_reader(:start_mapping)
+      # A Hash that defines the mapping: dotted item => next dotted item
+      # In other words, the 'next_mapping' allows to find the dotted item
+      # after "advancing" the dot
+      attr_reader(:next_mapping)
+      def initialize(aGrammar)
+        @grammar = aGrammar
+        @dotted_items = build_dotted_items(grammar)
+        @start_mapping = build_start_mapping(dotted_items)
+        @next_mapping = build_next_mapping(dotted_items)
+      end
+      def parse(aTokenSequence)
+        result = Parsing.new(start_dotted_item, aTokenSequence)
+        (0..aTokenSequence.size).each do |i|
+          result.chart[i].each do |state|
+            if state.complete?
+              # parse reached end of production
+              completion(result, state, i)
+            else
+              next_symbol = state.next_symbol
+              if next_symbol.kind_of?(Syntax::NonTerminal)
+                prediction(result, next_symbol, i)
+              else
+                # Expecting a terminal symbol
+                scanning(result, next_symbol, i)
+              end
+            end
+          end
+        end
+        return result
+      end
+      private
+      def build_dotted_items(aGrammar)
+        items = []
+        aGrammar.rules.each do |prod|
+          rhs_size = prod.rhs.size
+          if rhs_size == 0
+            items << DottemItem.new(prod, 0)
+          else
+            items += (0..rhs_size).map { |i| DottedItem.new(prod, i) }
+          end
+        end
+        return items
+      end
+      # Create a Hash with pairs of the kind:
+      # non-terminal => [start dotted items]
+      def build_start_mapping(theDottedItems)
+        mapping = {}
+        theDottedItems.each do |item|
+          next unless item.at_start?
+          lhs_symbol = item.lhs
+          map_entry = mapping.fetch(lhs_symbol, [])
+          map_entry << item
+          mapping[lhs_symbol] = map_entry
+        end
+        return mapping
+      end
+      # Create a Hash with pairs of the kind:
+      # dotted item => next dotted item
+      # next dotted item uses same production and the dot
+      # position is advanced by one symbol
+      def build_next_mapping(theDottedItems)
+        mapping = {}
+        theDottedItems.each_cons(2) do |(item1, item2)|
+          next if item1.production != item2.production
+          mapping[item1] = item2
+        end
+        return mapping
+      end
+      # The dotted item for the start production and
+      # with the dot at the beginning of the rhs
+      def start_dotted_item()
+        # TODO: remove assumption that first dotted_item is
+        # for start production
+        return dotted_items[0]
+      end
+      # This method is called when a parse state for chart entry at position
+      # 'pos' expects as next symbol a non-terminal.
+      # Given a predicted non-terminal 'nt' and a current token position
+      # 'pos':
+      # For each production with 'nt' as lhs, retrieve their corresponding
+      # initial dotted rules nt -> . xxxx
+      # For retrieved dotted rule, add a parse state to the chart entry at 'pos':
+      #   <initial dotted rule, pos, pos>
+      # In short, one adds states to chart[pos], one per production that
+      # specifies how to reduce some input into the predicted nt (non-terminal)
+      # A prediction corresponds to a potential expansion of a nonterminal
+      # in a left-most derivation.
+      # @param aParsing [Parsing] the object that encapsulates the results
+      #   result of the parsing process
+      # @param aNonTerminal [NonTerminal] a non-terminal symbol that
+      #   immediately follows a dot
+      #   (= is expected/predicted by the production rule)
+      # @param aPosition [Fixnum] position in the input token sequence.
+      def prediction(aParsing, aNonTerminal, aPosition)
+        # Retrieve all start dotted items for productions
+        # with aNonTerminal as its lhs
+        items = start_mapping[aNonTerminal]
+        items.each do |an_item|
+          aParsing.push_state(an_item, aPosition, aPosition)
+        end
+      end
+      # This method is called when a parse state for chart entry at position
+      # 'pos' expects a terminal as next symbol.
+      # If the input token matches the terminal symbol then:
+      # Retrieve all parse states for chart entry at 'aPosition'
+      # that have the given terminal as next symbol.
+      # For each s of the above states, push to chart entry aPosition + 1
+      # a new state like: <next dotted rule, s.origin, aPosition + 1>
+      # In other words, we place the dotted rules in the next state set
+      # such that the dot appears after terminal.
+      # @param aParsing [Parsing] the object that encapsulates the results
+      #   result of the parsing process
+      # @param Terminal [Terminal] a terminal symbol that
+      #   immediately follows a dot
+      # @param aPosition [Fixnum] position in the input token sequence.
+      def scanning(aParsing, aTerminal, aPosition)
+        aParsing.scanning(aTerminal, aPosition) { |item|
+          next_mapping[item]
+        }
+      end
+      # This method is called when a parse state at chart entry reaches the end
+      # of a production.
+      # For every state in chart[aPosition] that is complete (i.e. of the form:
+      #   { dotted_rule: X -> γ •, origin: j}),
+      # Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
+      #   In other words, rules that predicted the non-terminal X.
+      # For each s, add to chart[aPosition] a state of the form
+      #   { dotted_rule: Y → α X • β, origin: i})
+      def completion(aParsing, aState, aPosition)
+        aParsing.completion(aState, aPosition) { |item|
+          next_mapping[item]
+        }
+      end
+    end # class
+  end # module
+end # module
+# End of file