RubyGems - rley - Versions diffs - 0.0.02 - Mend

rley 0.0.02

Files changed (46) hide show

checksums.yaml +15 -0
data/.rspec +1 -0
data/.rubocop.yml +74 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.simplecov +7 -0
data/.travis.yml +21 -0
data/.yardopts +6 -0
data/CHANGELOG.md +10 -0
data/Gemfile +8 -0
data/LICENSE.txt +19 -0
data/README.md +19 -0
data/Rakefile +32 -0
data/lib/rley/constants.rb +26 -0
data/lib/rley/parser/chart.rb +39 -0
data/lib/rley/parser/dotted_item.rb +80 -0
data/lib/rley/parser/earley_parser.rb +177 -0
data/lib/rley/parser/parse_state.rb +54 -0
data/lib/rley/parser/parsing.rb +101 -0
data/lib/rley/parser/state_set.rb +47 -0
data/lib/rley/parser/token.rb +21 -0
data/lib/rley/syntax/grammar.rb +59 -0
data/lib/rley/syntax/grm_symbol.rb +18 -0
data/lib/rley/syntax/literal.rb +20 -0
data/lib/rley/syntax/non_terminal.rb +18 -0
data/lib/rley/syntax/production.rb +42 -0
data/lib/rley/syntax/symbol_seq.rb +36 -0
data/lib/rley/syntax/terminal.rb +18 -0
data/lib/rley/syntax/verbatim_symbol.rb +21 -0
data/spec/rley/parser/chart_spec.rb +47 -0
data/spec/rley/parser/dotted_item_spec.rb +108 -0
data/spec/rley/parser/earley_parser_spec.rb +271 -0
data/spec/rley/parser/parse_state_spec.rb +99 -0
data/spec/rley/parser/parsing_spec.rb +118 -0
data/spec/rley/parser/state_set_spec.rb +68 -0
data/spec/rley/parser/token_spec.rb +40 -0
data/spec/rley/syntax/grammar_spec.rb +149 -0
data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
data/spec/rley/syntax/literal_spec.rb +32 -0
data/spec/rley/syntax/non_terminal_spec.rb +29 -0
data/spec/rley/syntax/production_spec.rb +50 -0
data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
data/spec/rley/syntax/terminal_spec.rb +29 -0
data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
data/spec/spec_helper.rb +21 -0
metadata +166 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    ZTgxOWU0YmIzMDdlZmQ3NGVlZDBkYzcxZTEzNDQ4NDgwMWM3ZmZiOA==
+  data.tar.gz: !binary |-
+    ZmZkNWZlZDgwZWQ2ZTYzYjA5ZjkyNTZlZjMwMGZmMjIwNjVjODFjNQ==
+!binary "U0hBNTEy":
+  metadata.gz: !binary |-
+    ZGY2YzBlMTM0MTNiZWE3ZjQyYmNjOWMzYWQ4ZTY4N2RjMDQ4YzExMTg5MTM5
+    NjFlMDRlZjYyYWM5NDJmYzlhNzY3YWE3N2FiZDVhNGM2NzVhOGMwZjZmZjE0
+    YjhkZjFiNGVlOTQwMmZjZjkzNWQ3ZGY3NGM1Y2M4YWU3ZjE3MDI=
+  data.tar.gz: !binary |-
+    MDViNDQ3MjBjOTg1MWI2NmJmNmRhZTg2MzQ0MmRlMDZmY2JmMDhiNTZlY2Zi
+    NTMwZDdlNGI1MWIwMzkxN2FiNjMyZjk4ZWViZjk0YzJlMTY0MmMyZmVlN2U3
+    MGJiYjFlNDE5NzM0MzhlMWQzNGIyMDBmOTJkZDQwNDYyNDVjNjQ=

data/.rspec ADDED Viewed

	@@ -0,0 +1 @@
1	+ --backtrace

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,74 @@
+AllCops:
+  Exclude:
+    - 'examples/**/*'
+    - 'features/**/*'
+    - 'gems/**/*'
+# This is disabled because some demos use UTF-8
+AsciiComments:
+  Enabled: false
+CaseIndentation:
+  IndentWhenRelativeTo: end
+  IndentOneStep: true
+# Rubocop enforces the use of is_a? instead of kind_of?
+# Which is contrary to modelling practice.
+ClassCheck:
+  Enabled: false
+ClassLength:
+  Max: 250
+  CountComments: false
+ConstantName:
+  Enabled: false
+CyclomaticComplexity:
+  Enabled: false
+DefWithParentheses:
+  Enabled: false
+Documentation:
+  Enabled: false
+EmptyLines:
+  Enabled: false
+EmptyLinesAroundBody:
+  Enabled: false
+Encoding:
+  Enabled: false
+FileName:
+  Enabled: false
+IndentationWidth :
+  Enabled: false
+# Avoid methods longer than 50 lines of code
+MethodLength:
+  Max: 50
+  CountComments: false
+NonNilCheck:
+  Enabled: false
+NumericLiterals:
+  Enabled: false
+RaiseArgs:
+  Enabled: false
+RedundantReturn:
+  Enabled: false
+SpaceInsideBrackets:
+  Enabled: false
+TrailingWhitespace:
+  Enabled: false

data/.ruby-gemset ADDED Viewed

	@@ -0,0 +1 @@
1	+ rley

data/.ruby-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1.9.3

data/.simplecov ADDED Viewed

@@ -0,0 +1,7 @@
+# .simplecov
+# Configuration
+SimpleCov.start do
+  # Remove all files that match /spec/ in their path
+  add_filter "/spec/"
+end

data/.travis.yml ADDED Viewed

@@ -0,0 +1,21 @@
+language: ruby
+rvm:
+  - 2.1.0
+  - 2.0.0
+  - 1.9.3
+  - 1.9.2
+  - jruby-19mode
+  - jruby-head
+# Workaround issue of jruby-head configuration on Travis CI
+matrix:
+  allow_failures:
+    - rvm: jruby-head
+gemfile:
+  - Gemfile
+# whitelist
+branches:
+  only:
+    - master

data/.yardopts ADDED Viewed

@@ -0,0 +1,6 @@
+--exclude examples --exclude features --exclude spec
+--no-private
+--markup markdown
+-
+Changelog.md
+License.txt

data/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,10 @@
+### 0.0.02 / 2014-11-12
+* [CHANGE] File `README.md`: Added Travis CI badge.
+### 0.0.01 / 2014-11-12
+* [CHANGE] Rley is "gemmified"!
+### 0.0.00 / 2014-11-07
+* [FEATURE] Initial public working version

data/Gemfile ADDED Viewed

@@ -0,0 +1,8 @@
+source 'https://rubygems.org'
+# Prevent Bundler to load the dependencies from our .gemspec file
+group :development do
+  gem 'rake',  '>= 0.8.0'
+  gem 'rspec', '>= 3.0.0'
+  gem 'simplecov', '>= 0.5.0'
+end

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,19 @@
+Copyright (c) 2014 Dimitri Geshef
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,19 @@
+Rley
+===========
+[Homepage](https://github.com/famished-tiger/Rley)
+[![Build Status](https://travis-ci.org/famished-tiger/Rley.svg?branch=master)](https://travis-ci.org/famished-tiger/Rley)
+### What is Rley? ###
+__Rley__ is a Ruby implementation of a Earley parser.
+The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
+This project is in "early" stage.
+Consult Wikipedia to learn more about Earley's parsing algorithm.
+Copyright
+---------
+Copyright (c) 2014, Dimitri Geshef.
+__Rley__ is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt) for details.

data/Rakefile ADDED Viewed

@@ -0,0 +1,32 @@
+require 'rubygems'
+require_relative './lib/rley/constants'
+namespace :gem do
+desc 'Push the gem to rubygems.org'
+task :push do
+  system("gem push rley-#{Rley::Version}.gem")
+end
+end # namespace
+# Testing-specific tasks
+# RSpec as testing tool
+require 'rspec/core/rake_task'
+desc 'Run RSpec'
+RSpec::Core::RakeTask.new do |spec|
+  spec.pattern = 'spec/**/*_spec.rb'
+end
+# Combine RSpec tests
+desc 'Run tests, with RSpec'
+task test: [:spec]
+# Default rake task
+task default: :test
+# End of file

data/lib/rley/constants.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# File: constants.rb
+# Purpose: definition of Rley constants.
+module Rley # Module used as a namespace
+  # The version number of the gem.
+  Version = '0.0.02'
+  # Brief description of the gem.
+  Description = "Ruby implementation of the Earley's parsing algorithm"
+  # Constant Rley::RootDir contains the absolute path of Rley's
+  # start directory. Note: it also ends with a slash character.
+  unless defined?(RootDir)
+    # The initialisation of constant RootDir is guarded in order
+    # to avoid multiple initialisation (not allowed for constants)
+    # The start folder of Rley.
+    RootDir = begin
+      require 'pathname' # Load Pathname class from standard library
+      startdir = Pathname(__FILE__).dirname.parent.parent.expand_path
+      startdir.to_s + '/' # Append trailing slash character to it
+    end
+  end
+end # module
+# End of file

data/lib/rley/parser/chart.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require_relative 'state_set'
+require_relative 'parse_state'
+module Rley # This module is used as a namespace
+  module Parser # This module is used as a namespace
+    # Also called a parse table
+    # A one-dimensional array with n + 1 entries (n = number of input tokens).
+    class Chart
+      attr_reader(:state_sets)
+      def initialize(startDottedItem, tokenCount)
+        @state_sets = Array.new(tokenCount + 1) {|_| StateSet.new }
+        push_state(startDottedItem, 0, 0)
+      end
+      # The dotted item/rule used to seed the parse chart.
+      # It corresponds to the start production and a dot placed
+      # at the beginning of the rhs
+      def start_dotted_rule()
+        return self[0].states.first.dotted_rule
+      end
+      # Access the state set at given position
+      def [](index)
+        return state_sets[index]
+      end
+      # Push a parse state for the chart entry with given index
+      def push_state(aDottedItem, anOrigin, anIndex)
+        new_state = ParseState.new(aDottedItem, anOrigin)
+        self[anIndex].push_state(new_state)
+      end
+    end # class
+  end # module
+end # module
+# End of file

data/lib/rley/parser/dotted_item.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# A dotted item is a parse state for a given production/grammar rule
+# It partitions the rhs of the rule in two parts.
+# The left part consists of the symbols in the rules that are matched
+# by the input tokens.
+# The right part consists of symbols that are predicted to match the
+# input tokens.
+# The terminology stems from the traditional way to visualize the partition
+# by using a fat dot character as a separator between the left and right parts
+# An item with the dot at the beginning (i.e. before any rhs symbol)
+#   is called a predicted item.
+# An item with the dot at the end (i.e. after all rhs symbols)
+#   is called a reduce item.
+# An item with a dot in front of a terminal is called a shift item.
+class DottedItem
+  # Production rule
+  attr_reader(:production)
+  # Index of the next symbol (from the rhs) after the 'dot'.
+  # If the dot is at the end of the rhs (i.e.) there is no next
+  # symbol, then the position takes the value -1.
+  # It the rhs is empty, then the postion is -2
+  attr_reader(:position)
+  # @param aProduction
+  def initialize(aProduction, aPosition)
+    @production = aProduction
+    @position = valid_position(aPosition)
+  end
+  # Return true if the dot position is at the start of the rhs.
+  def at_start?()
+    return position == 0 || position == -2
+  end
+  # An item with the dot at the beginning is called
+  # predicted item
+  alias :predicted_item? :at_start?
+  # A dotted item is called a reduce item if the dot is at the end.
+  def reduce_item?()
+    return position < 0 # Either -1 or -2
+  end
+  # The non-terminal symbol that is on the left-side of the production
+  def lhs()
+    return production.lhs
+  end
+  # Return the symbol after the dot.
+  # nil is returned if the dot is at the end
+  def next_symbol()
+    result = (position < 0) ? nil : production.rhs[position]
+  end
+  # An item with the dot in front of a terminal is called a shift item
+  def shift_item?()
+  end
+  private
+  # Return the given after its validation.
+  def valid_position(aPosition)
+    rhs_size = production.rhs.size
+    if aPosition < 0 || aPosition > rhs_size
+      fail StandardError, 'Out of bound index'
+    end
+    if rhs_size == 0
+      index = -2 # Minus 2 at start/end of empty production
+    elsif aPosition == rhs_size
+      index = -1  # Minus 1 at end of non-empty production
+    else
+      index = aPosition
+    end
+    return index
+  end
+end # class
+# End of file

data/lib/rley/parser/earley_parser.rb ADDED Viewed

@@ -0,0 +1,177 @@
+require_relative '../syntax/grammar'
+require_relative 'dotted_item'
+require_relative 'parsing'
+module Rley # This module is used as a namespace
+  module Parser # This module is used as a namespace
+    # Implementation of a parser that uses the Earley parsing algorithm.
+    class EarleyParser
+      # The grammar of the language.
+      attr_reader(:grammar)
+      # The dotted items/rules for the productions of the grammar
+      attr_reader(:dotted_items)
+      # A Hash that defines the mapping: non-terminal => [start dotted items]
+      attr_reader(:start_mapping)
+      # A Hash that defines the mapping: dotted item => next dotted item
+      # In other words, the 'next_mapping' allows to find the dotted item
+      # after "advancing" the dot
+      attr_reader(:next_mapping)
+      def initialize(aGrammar)
+        @grammar = aGrammar
+        @dotted_items = build_dotted_items(grammar)
+        @start_mapping = build_start_mapping(dotted_items)
+        @next_mapping = build_next_mapping(dotted_items)
+      end
+      def parse(aTokenSequence)
+        result = Parsing.new(start_dotted_item, aTokenSequence)
+        (0..aTokenSequence.size).each do |i|
+          result.chart[i].each do |state|
+            if state.complete?
+              # parse reached end of production
+              completion(result, state, i)
+            else
+              next_symbol = state.next_symbol
+              if next_symbol.kind_of?(Syntax::NonTerminal)
+                prediction(result, next_symbol, i)
+              else
+                # Expecting a terminal symbol
+                scanning(result, next_symbol, i)
+              end
+            end
+          end
+        end
+        return result
+      end
+      private
+      def build_dotted_items(aGrammar)
+        items = []
+        aGrammar.rules.each do |prod|
+          rhs_size = prod.rhs.size
+          if rhs_size == 0
+            items << DottemItem.new(prod, 0)
+          else
+            items += (0..rhs_size).map { |i| DottedItem.new(prod, i) }
+          end
+        end
+        return items
+      end
+      # Create a Hash with pairs of the kind:
+      # non-terminal => [start dotted items]
+      def build_start_mapping(theDottedItems)
+        mapping = {}
+        theDottedItems.each do |item|
+          next unless item.at_start?
+          lhs_symbol = item.lhs
+          map_entry = mapping.fetch(lhs_symbol, [])
+          map_entry << item
+          mapping[lhs_symbol] = map_entry
+        end
+        return mapping
+      end
+      # Create a Hash with pairs of the kind:
+      # dotted item => next dotted item
+      # next dotted item uses same production and the dot
+      # position is advanced by one symbol
+      def build_next_mapping(theDottedItems)
+        mapping = {}
+        theDottedItems.each_cons(2) do |(item1, item2)|
+          next if item1.production != item2.production
+          mapping[item1] = item2
+        end
+        return mapping
+      end
+      # The dotted item for the start production and
+      # with the dot at the beginning of the rhs
+      def start_dotted_item()
+        # TODO: remove assumption that first dotted_item is
+        # for start production
+        return dotted_items[0]
+      end
+      # This method is called when a parse state for chart entry at position
+      # 'pos' expects as next symbol a non-terminal.
+      # Given a predicted non-terminal 'nt' and a current token position
+      # 'pos':
+      # For each production with 'nt' as lhs, retrieve their corresponding
+      # initial dotted rules nt -> . xxxx
+      # For retrieved dotted rule, add a parse state to the chart entry at 'pos':
+      #   <initial dotted rule, pos, pos>
+      # In short, one adds states to chart[pos], one per production that
+      # specifies how to reduce some input into the predicted nt (non-terminal)
+      # A prediction corresponds to a potential expansion of a nonterminal
+      # in a left-most derivation.
+      # @param aParsing [Parsing] the object that encapsulates the results
+      #   result of the parsing process
+      # @param aNonTerminal [NonTerminal] a non-terminal symbol that
+      #   immediately follows a dot
+      #   (= is expected/predicted by the production rule)
+      # @param aPosition [Fixnum] position in the input token sequence.
+      def prediction(aParsing, aNonTerminal, aPosition)
+        # Retrieve all start dotted items for productions
+        # with aNonTerminal as its lhs
+        items = start_mapping[aNonTerminal]
+        items.each do |an_item|
+          aParsing.push_state(an_item, aPosition, aPosition)
+        end
+      end
+      # This method is called when a parse state for chart entry at position
+      # 'pos' expects a terminal as next symbol.
+      # If the input token matches the terminal symbol then:
+      # Retrieve all parse states for chart entry at 'aPosition'
+      # that have the given terminal as next symbol.
+      # For each s of the above states, push to chart entry aPosition + 1
+      # a new state like: <next dotted rule, s.origin, aPosition + 1>
+      # In other words, we place the dotted rules in the next state set
+      # such that the dot appears after terminal.
+      # @param aParsing [Parsing] the object that encapsulates the results
+      #   result of the parsing process
+      # @param Terminal [Terminal] a terminal symbol that
+      #   immediately follows a dot
+      # @param aPosition [Fixnum] position in the input token sequence.
+      def scanning(aParsing, aTerminal, aPosition)
+        aParsing.scanning(aTerminal, aPosition) { |item|
+          next_mapping[item]
+        }
+      end
+      # This method is called when a parse state at chart entry reaches the end
+      # of a production.
+      # For every state in chart[aPosition] that is complete (i.e. of the form:
+      #   { dotted_rule: X -> γ •, origin: j}),
+      # Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
+      #   In other words, rules that predicted the non-terminal X.
+      # For each s, add to chart[aPosition] a state of the form
+      #   { dotted_rule: Y → α X • β, origin: i})
+      def completion(aParsing, aState, aPosition)
+        aParsing.completion(aState, aPosition) { |item|
+          next_mapping[item]
+        }
+      end
+    end # class
+  end # module
+end # module
+# End of file