RubyGems - rley - Versions diffs - 0.5.07 → 0.5.08 - Mend

rley 0.5.07 → 0.5.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
data/examples/general/SRL/lib/ast_builder.rb +176 -0
data/examples/general/SRL/lib/ast_building.rb +20 -0
data/examples/general/SRL/lib/grammar.rb +32 -0
data/examples/general/SRL/lib/parser.rb +26 -0
data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
data/examples/general/SRL/lib/regex_repr.rb +1 -0
data/examples/general/SRL/lib/srl_demo.rb +67 -0
data/examples/general/SRL/lib/tokenizer.rb +101 -0
data/examples/general/SRL/spec/integration_spec.rb +103 -0
data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
data/examples/general/SRL/spec/spec_helper.rb +25 -0
data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
data/examples/general/SRL/srl_demo.rb +57 -0
data/examples/general/calc_iter1/calc_demo.rb +1 -1
data/examples/general/calc_iter2/ast_building.rb +20 -0
data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
data/examples/general/calc_iter2/calc_demo.rb +1 -1
data/lib/rley/base/base_parser.rb +1 -1
data/lib/rley/base/grm_items_builder.rb +1 -1
data/lib/rley/constants.rb +1 -1
data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
data/lib/rley/parser/gfg_chart.rb +8 -3
data/lib/rley/parser/gfg_earley_parser.rb +5 -2
data/lib/rley/parser/gfg_parsing.rb +5 -1
data/lib/rley/parser/parse_tree_builder.rb +16 -5
data/lib/rley/ptree/terminal_node.rb +3 -2
data/spec/rley/parser/ast_builder_spec.rb +2 -2
data/spec/rley/parser/cst_builder_spec.rb +2 -3
metadata +20 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4ce368c99ffa556898d9a89788786e47b5e8b115
-  data.tar.gz: 97a691b869089989f601556ed13dd24d729b7ad0
+  metadata.gz: 3c616b691fb51ba2eb00a25fee75ff4a80093990
+  data.tar.gz: 1039cfe8f29c8d1ec7c88fa83c18f9173763b8f2
 SHA512:
-  metadata.gz: b50595273bf5f75e25b6e609de197be31a4f8583f1c1f93ef8494a8e5367facdbafd13326bd2097c9da657096c2f15e9dc8d439d16212f4e578134ce538984b2
-  data.tar.gz: 8be6b6bdef48de1cbd19b2785530a5bc17e43804a43ff01f287a89a129a83897f8ea05ecb951018c99f888bbfefbbc33e89ee9cfb0adb2e86eba2e3971703c1d
+  metadata.gz: df7412344421bd421fb459fe5cf8053618dea1212c4da27e83cf41225dbaf664d9b143499978e6bcef2ae293a7bf9378d3ecb4867f989553f798e9723ba8344b
+  data.tar.gz: 436474ceafd2689137fab890b19ca24715ebe72dd1311b3ad64313bc130cf8bbce12fe35049008d20a89634309cbac882da70bc891522d45a58e8ce310b466a7

data/CHANGELOG.md CHANGED

@@ -1,3 +1,9 @@
+### 0.5.08 / 2017-11-xx
+* [FIX] Method `BaseParser::initialize` missing parameter name in doc caused a YARD warning.
+* [FIX] Method `GrmItemsBuilder::build_dotted_items` missing parameter name in doc caused a YARD warning.
+* [FIX] Method `NonTerminalVertex::initialize` missing parameter name in doc caused a YARD warning.
 ### 0.5.07 / 2017-11-11
 * [NEW] File `benchmark_mini_en.rb` added in `examples/NLP` folder for parsing performance measurements.
 * [CHANGE] Demo calculator in `examples/general/calc_iter2`: added support for log10 and cbrt functions. README.md slightly reworked.

data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} RENAMED

File without changes

data/examples/NLP/nano_eng/nano_en_demo.rb ADDED

@@ -0,0 +1,118 @@
+require 'rley' # Load Rley library
+########################################
+# Step 1. Define a grammar for a nano English-like language
+# based on example from Jurafski & Martin book (chapter 8 of the book).
+# Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
+# 2009, Pearson Education, Inc., ISBN 978-0135041963
+# It defines the syntax of a sentence in a mini English-like language
+# with a very simplified syntax and vocabulary
+# Instantiate a builder object that will build the grammar for us
+builder = Rley::Syntax::GrammarBuilder.new do
+  # Next 2 lines we define the terminal symbols
+  # (= word categories in the lexicon)
+  add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
+  add_terminals('Aux', 'Det', 'Preposition')
+  # Here we define the productions (= grammar rules)
+  rule 'Start' => 'S'
+  rule 'S' => %w[NP VP]
+  rule 'S' => %w[Aux NP VP]
+  rule 'S' => 'VP'
+  rule 'NP' => 'Pronoun'
+  rule 'NP' => 'Proper-Noun'
+  rule 'NP' => %w[Det Nominal]
+  rule 'Nominal' => %[Noun]
+  rule 'Nominal' => %[Nominal Noun]
+  rule 'VP' => 'Verb'
+  rule 'VP' => %w[Verb NP]
+  rule 'VP' => %w[Verb NP PP]
+  rule 'VP' => %w[Verb PP]
+  rule 'VP' => %w[VP PP]
+  rule 'PP' => %w[Preposition NP]
+end
+# And now, let's build the grammar...
+grammar = builder.grammar
+########################################
+# Step 2. Creating a lexicon
+# To simplify things, lexicon is implemented as a Hash with pairs of the form:
+# word => terminal symbol name
+Lexicon = {
+  'man' => 'Noun',
+  'dog' => 'Noun',
+  'cat' => 'Noun',
+  'telescope' => 'Noun',
+  'park' => 'Noun',
+  'saw' => 'Verb',
+  'ate' => 'Verb',
+  'walked' => 'Verb',
+  'John' => 'Proper-Noun',
+  'Mary' => 'Proper-Noun',
+  'Bob' => 'Proper-Noun',
+  'a' => 'Determiner',
+  'an' => 'Determiner',
+  'the' => 'Determiner',
+  'my' => 'Determiner',
+  'in' => 'Preposition',
+  'on' => 'Preposition',
+  'by' => 'Preposition',
+  'with' => 'Preposition'
+}.freeze
+########################################
+# Step 3. Creating a tokenizer
+# A tokenizer reads the input string and converts it into a sequence of tokens
+# Highly simplified tokenizer implementation.
+def tokenizer(aTextToParse, aGrammar)
+  tokens = aTextToParse.scan(/\S+/).map do |word|
+    term_name = Lexicon[word]
+    raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
+    terminal = aGrammar.name2symbol[term_name]
+    Rley::Lexical::Token.new(word, terminal)
+  end
+  return tokens
+end
+########################################
+# Step 4. Create a parser for that grammar
+# Easy with Rley...
+parser = Rley::Parser::GFGEarleyParser.new(grammar)
+########################################
+# Step 5. Parsing the input
+input_to_parse = 'John saw Mary with a telescope'
+# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
+# Convert input text into a sequence of token objects...
+tokens = tokenizer(input_to_parse, grammar)
+result = parser.parse(tokens)
+puts "Parsing successful? #{result.success?}"
+unless result.success?
+  puts result.failure_reason.message
+  exit(1)
+end
+########################################
+# Step 6. Generating a parse tree from parse result
+ptree = result.parse_tree
+# Let's create a parse tree visitor
+visitor = Rley::ParseTreeVisitor.new(ptree)
+# Let's create a formatter (i.e. visit event listener)
+# renderer = Rley::Formatter::Debug.new($stdout)
+# Let's create a formatter that will render the parse tree with characters
+renderer = Rley::Formatter::Asciitree.new($stdout)
+# Let's create a formatter that will render the parse tree in labelled
+# bracket notation
+# renderer = Rley::Formatter::BracketNotation.new($stdout)
+# Subscribe the formatter to the visitor's event and launch the visit
+renderer.render(visitor)
+# End of file

data/examples/NLP/nano_eng/nano_grammar.rb ADDED

@@ -0,0 +1,59 @@
+# Grammar for a simple subset of English language
+# It is called nano-English because it has a more elaborate
+# grammar than pico-English but remains still tiny compared to "real" English
+require 'rley' # Load the gem
+########################################
+# Define a grammar for a nano English-like language
+# based on chapter 12 from Jurafski & Martin book.
+# Daniel Jurafsky,‎ James H. Martin: "Speech and Language Processing";
+# 2009, Pearson Education, Inc., ISBN 978-0135041963
+# It defines the syntax of a sentence in a mini English-like language
+builder = Rley::Syntax::GrammarBuilder.new do
+  add_terminals('Pronoun', 'Proper-Noun')
+  add_terminals('Determiner', 'Noun')
+  add_terminals('Cardinal_number', 'Ordinal_number', 'Quant')
+  add_terminals('Verb', 'GerundV', 'Aux')
+  add_terminals('Predeterminer', 'Preposition')
+  rule 'language' => 'sentence'
+  rule 'sentence' => 'declarative'
+  rule 'sentence' => 'imperative'
+  rule 'sentence' => 'yes_no_question'
+  rule 'sentence' => 'wh_subject_question'
+  rule 'sentence' => 'wh_non_subject_question'
+  rule 'declarative' => %w[NP VP]
+  rule 'imperative' => 'VP'
+  rule 'yes_no_question' => %w[Aux NP VP]
+  rule 'wh_subject_question' => %w[Wh_NP NP VP]
+  rule 'wh_non_subject_question' => %w[Wh_NP Aux NP VP]
+  rule 'NP' => %[Predeterminer NP]
+  rule 'NP' => 'Pronoun'
+  rule 'NP' => 'Proper-Noun'
+  rule 'NP' => %w[Det Card Ord Quant Nominal]
+  rule 'VP' => 'Verb'
+  rule 'VP' => %w[Verb NP]
+  rule 'VP' => %w[Verb NP PP]
+  rule 'VP' => %w[Verb PP]
+  rule 'Det' => 'Determiner'
+  rule 'Det' => []
+  rule 'Card' => 'Cardinal_number'
+  rule 'Card' => []
+  rule 'Ord' => 'Ordinal_number'
+  rule 'Ord' =>  []
+  rule 'Nominal' => 'Noun'
+  rule 'Nominal' => %[Nominal Noun]
+  rule 'Nominal' => %w[Nominal GerundVP]
+  rule 'Nominal' => %w[Nominal RelClause]
+  rule 'PP' => %w[Preposition NP]
+  rule 'GerundVP' => 'GerundV'
+  rule 'GerundVP' => %w[GerundV NP]
+  rule 'GerundVP' => %w[GerundV NP PP]
+  rule 'GerundVP' => %w[GerundV PP]
+  rule 'RelClause' => %w[Relative_pronoun VP]
+end
+# And now build the grammar...
+NanoGrammar = builder.grammar

data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} RENAMED

@@ -1,12 +1,12 @@
 require 'rley' # Load Rley library
 ########################################
-# Step 1. Define a grammar for a micro English-like language
+# Step 1. Define a grammar for a pico English-like language
 # based on example from NLTK book (chapter 8 of the book).
 # Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
 # with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
 # It defines the syntax of a sentence in a mini English-like language
-# with a very simplified syntax.
+# with a very simplified syntax and vocabulary
 # Instantiate a builder object that will build the grammar for us
 builder = Rley::Syntax::GrammarBuilder.new do

data/examples/general/SRL/lib/ast_builder.rb ADDED

@@ -0,0 +1,176 @@
+require_relative 'ast_building'
+require_relative 'regex_repr'
+# The purpose of a ASTBuilder is to build piece by piece an AST
+# (Abstract Syntax Tree) from a sequence of input tokens and
+# visit events produced by walking over a GFGParsing object.
+# Uses the Builder GoF pattern.
+# The Builder pattern creates a complex object
+# (say, a parse tree) from simpler objects (terminal and non-terminal
+# nodes) and using a step by step approach.
+class ASTBuilder < Rley::Parser::ParseTreeBuilder
+  include ASTBuilding
+  Terminal2NodeClass = { }.freeze
+  protected
+  # Overriding method.
+  # Factory method for creating a node object for the given
+  # input token.
+  # @param aTerminal [Terminal] Terminal symbol associated with the token
+  # @param aTokenPosition [Integer] Position of token in the input stream
+  # @param aToken [Token] The input token
+  def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
+    node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
+    return node
+  end
+  # Method to override.
+  # Factory method for creating a parent node object.
+  # @param aProduction [Production] Production rule
+  # @param aRange [Range] Range of tokens matched by the rule
+  # @param theTokens [Array] The input tokens
+  # @param theChildren [Array] Children nodes (one per rhs symbol)
+  def new_parent_node(aProduction, aRange, theTokens, theChildren)
+    node = case aProduction.name
+      when 'srl_0' # rule 'srl' => 'quantifier'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'quantifier_0' # rule 'quantifier' => 'ONCE'
+        multiplicity(1, 1)
+      when 'quantifier_1' # rule 'quantifier' => 'TWICE'
+        multiplicity(2, 2)
+      when 'quantifier_2' # rule 'quantifier' => %w[EXACTLY count TIMES]
+        reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
+      when 'quantifier_3' # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
+        reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
+      when 'quantifier_4' # rule 'quantifier' => 'OPTIONAL'
+        multiplicity(0, 1)
+      when 'quantifier_5' # rule 'quantifier' => %w[ONCE OR MORE]
+        multiplicity(1, :more)
+      when 'quantifier_6' # rule 'quantifier' => %w[NEVER OR MORE]
+        multiplicity(0, :more)
+      when 'quantifier_7' # rule 'quantifier' => %w[AT LEAST count TIMES]
+        reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
+      when 'count_0', 'count_1'
+        return_first_child(aRange, theTokens, theChildren)
+      when 'times_suffix_0', 'times_suffix_1'
+        nil
+      else
+        raise StandardError, "Don't know production #{aProduction.name}"
+    end
+    return node
+  end
+  def multiplicity(lowerBound, upperBound)
+    return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
+  end
+  # rule 'quantifier' => %w[EXACTLY count TIMES]
+  def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
+    count = theChildren[1].token.lexeme.to_i
+    multiplicity(count, count)
+  end
+  # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
+  def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
+    upper = theChildren[3].token.lexeme.to_i
+    # lower = theChildren[1].token.lexeme.to_i
+    multiplicity(3, upper)
+  end
+  # rule 'quantifier' => %w[AT LEAST count TIMES]
+  def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
+    count = theChildren[2].token.lexeme.to_i
+    multiplicity(count, :more)
+  end
+=begin
+  def reduce_binary_operator(theChildren)
+    operator_node = theChildren[1]
+    operator_node.children << theChildren[0]
+    operator_node.children << theChildren[2]
+    return operator_node
+  end
+  # rule 'simple_expression' => %w[simple_expression add_operator term]
+  def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
+    reduce_binary_operator(theChildren)
+  end
+  # rule 'term' => %w[term mul_operator factor]
+  def reduce_term_1(_production, _range, _tokens, theChildren)
+    reduce_binary_operator(theChildren)
+  end
+  # rule 'factor' => %w[simple_factor POWER simple_factor]]
+  def reduce_factor_1(aProduction, aRange, theTokens, theChildren)
+    result = PowerNode.new(theChildren[1].symbol, aRange)
+    result.children << theChildren[0]
+    result.children << theChildren[2]
+    return result
+  end
+  # rule 'simple_factor' => %[sign scalar]
+  def reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
+    first_child = theChildren[0]
+    result = if first_child.kind_of?(CalcNegateNode)
+               -theChildren[1]
+             else
+               theChildren[1]
+             end
+    return result
+  end
+  # rule 'simple_factor' => %w[unary_function in_parenthesis]
+  def reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
+    func = CalcUnaryFunction.new(theChildren[0].symbol, aRange.low)
+    func.func_name = theChildren[0].value
+    func.children << theChildren[1]
+    return func
+  end
+  # rule 'simple_factor' => %w[MINUS in_parenthesis]
+  def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
+    negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
+    negation.children << theChildren[1]
+    return negation
+  end
+  # rule 'add_operator' => 'PLUS'
+  def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
+    return CalcAddNode.new(theChildren[0].symbol, aRange)
+  end
+  # rule 'add_operator' => 'MINUS'
+  def reduce_add_operator_1(_production, aRange, _tokens, theChildren)
+    return CalcSubtractNode.new(theChildren[0].symbol, aRange)
+  end
+  # rule 'mul_operator' => 'STAR'
+  def reduce_mul_operator_0(_production, aRange, _tokens, theChildren)
+    return CalcMultiplyNode.new(theChildren[0].symbol, aRange)
+  end
+  # rule 'mul_operator' => 'DIVIDE'
+  def reduce_mul_operator_1(_production, aRange, _tokens, theChildren)
+    return CalcDivideNode.new(theChildren[0].symbol, aRange)
+  end
+=end
+end # class
+# End of file

data/examples/general/SRL/lib/ast_building.rb ADDED

@@ -0,0 +1,20 @@
+# Mix-in module that provides convenenience methods for
+# constructing an AST (Abstract Syntax Tree).
+module ASTBuilding
+  def return_first_child(_range, _tokens, theChildren)
+    return theChildren[0]
+  end
+  def return_second_child(_range, _tokens, theChildren)
+    return theChildren[1]
+  end
+  def return_last_child(_range, _tokens, theChildren)
+    return theChildren[-1]
+  end
+  def return_epsilon(_range, _tokens, _children)
+    return nil
+  end
+end # module
+# End of file

data/examples/general/SRL/lib/grammar.rb ADDED

@@ -0,0 +1,32 @@
+# Grammar for SRL (Simple Regex Language)
+require 'rley' # Load the gem
+module SRL
+  ########################################
+  # Work in progress.
+  # This is a very partial grammar of SRL.
+  # It will be expanded with the coming versions of Rley
+  builder = Rley::Syntax::GrammarBuilder.new do
+    add_terminals('DIGIT', 'INTEGER')
+    add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
+    add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
+    add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
+    # For the moment one focuses on quantifier syntax only...
+    rule 'srl' => 'quantifier'
+    rule 'quantifier' => 'ONCE'
+    rule 'quantifier' => 'TWICE'
+    rule 'quantifier' => %w[EXACTLY count TIMES]
+    rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
+    rule 'quantifier' => 'OPTIONAL'
+    rule 'quantifier' => %w[ONCE OR MORE]
+    rule 'quantifier' => %w[NEVER OR MORE]
+    rule 'quantifier' => %w[AT LEAST count TIMES]
+    rule 'count' => 'DIGIT'
+    rule 'count' => 'INTEGER'
+    rule 'times_suffix' => 'TIMES'
+    rule 'times_suffix' => []
+  end
+  # And now build the grammar and make it accessible via a global constant
+  Grammar = builder.grammar
+end # module