RubyGems - rley - Versions diffs - 0.3.08 → 0.3.09 - Mend

rley 0.3.08 → 0.3.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +190 -35
data/examples/NLP/mini_en_demo.rb +92 -0
data/lib/rley/constants.rb +1 -1
metadata +3 -20
data/examples/grammars/grammar_L0.rb +0 -32
data/examples/grammars/grammar_abc.rb +0 -26
data/examples/parsers/demo-JSON/JSON_grammar.rb +0 -31
data/examples/parsers/demo-JSON/JSON_lexer.rb +0 -114
data/examples/parsers/demo-JSON/JSON_parser.rb +0 -89
data/examples/parsers/demo-JSON/demo_json.rb +0 -42
data/examples/parsers/parsing_L0.rb +0 -124
data/examples/parsers/parsing_L1.rb +0 -137
data/examples/parsers/parsing_abc.rb +0 -71
data/examples/parsers/parsing_ambig.rb +0 -92
data/examples/parsers/parsing_another.rb +0 -70
data/examples/parsers/parsing_b_expr.rb +0 -85
data/examples/parsers/parsing_err_expr.rb +0 -74
data/examples/parsers/parsing_groucho.rb +0 -97
data/examples/parsers/parsing_right_recursive.rb +0 -70
data/examples/parsers/parsing_tricky.rb +0 -91
data/examples/parsers/tracing_parser.rb +0 -54
data/examples/recognizers/recognizer_abc.rb +0 -71

data/examples/parsers/parsing_abc.rb DELETED Viewed

@@ -1,71 +0,0 @@
-# Purpose: to demonstrate how to build and render a parse tree
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse tree from the parse result
-# 7. Render the parse tree (in JSON)
-########################################
-# Step 1. Define a grammar for a very simple language
-# It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
-# (based on example in N. Wirth's book "Compiler Construction", p. 6)
-# Let's create the grammar step-by-step with the grammar builder:
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('a', 'b', 'c')
-builder.add_production('S' => 'A')
-builder.add_production('A' => %w(a A c))
-builder.add_production('A' => 'b')
-# And now build the grammar...
-grammar_abc = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-def tokenizer(aText, aGrammar)
-  tokens = aText.chars.map do |ch|
-    terminal = aGrammar.name2symbol[ch]
-    raise StandardError, "Unknown input character '#{ch}'" if terminal.nil?
-    Rley::Parser::Token.new(ch, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(grammar_abc)
-########################################
-# Step 3. Tokenize the input
-valid_input = 'aabcc'
-tokens = tokenizer(valid_input, grammar_abc)
-########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-########################################
-# Step 6. Generate a parse tree from the parse result
-ptree = result.parse_tree
-########################################
-# Step 7. Render the parse tree (in JSON)
-# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
-#Here we create a renderer object...
-renderer = Rley::Formatter::Json.new(STDOUT)
-# Now emit the parse tree as JSON on the console output
-puts "JSON rendering of the parse tree for '#{valid_input}' input:"
-renderer.render(visitor)
-# End of file

data/examples/parsers/parsing_ambig.rb DELETED Viewed

@@ -1,92 +0,0 @@
-# Purpose: to demonstrate how to build and render a parse tree
-require 'pp' # TODO remove this dependency
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse tree from the parse result
-# 7. Render the parse tree (in JSON)
-########################################
-# Step 1. Define a grammar for a very simple language
-# Grammar 3: An ambiguous arithmetic expression language
-# (based on example in article on Earley's algorithm in Wikipedia)
-# Let's create the grammar step-by-step with the grammar builder:
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('integer', '+', '*')
-builder.add_production('P' => 'S')
-builder.add_production('S' => %w(S + S))
-builder.add_production('S' => %w(S * S))
-builder.add_production('S' => 'L')
-builder.add_production('L' => 'integer')
-# And now build the grammar...
-grammar_amb = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-def tokenizer(aText, aGrammar)
-  tokens = aText.scan(/\S+/).map do |lexeme|
-    case lexeme
-      when '+', '*'
-        terminal = aGrammar.name2symbol[lexeme]
-      when /^[-+]?\d+$/
-        terminal = aGrammar.name2symbol['integer']
-      else
-        msg = "Unknown input text '#{lexeme}'"
-        raise StandardError, msg
-    end
-    Rley::Parser::Token.new(lexeme, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(grammar_amb)
-########################################
-# Step 3. Tokenize the input
-valid_input = '2 + 3 * 4'
-tokens = tokenizer(valid_input, grammar_amb)
-########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-puts "Parsing success? #{result.success?}"
-puts "Ambiguous parse? #{result.ambiguous?}"
-# pp result
-result.chart.state_sets.each_with_index do |aStateSet, index|
-  puts "State[#{index}]"
-  puts "========"
-  aStateSet.states.each { |aState| puts aState.to_s }
-end
-=begin
-########################################
-# Step 6. Generate a parse tree from the parse result
-ptree = result.parse_tree
-pp ptree
-########################################
-# Step 7. Render the parse tree (in JSON)
-# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
-#Here we create a renderer object...
-renderer = Rley::Formatter::Json.new(STDOUT)
-# Now emit the parse tree as JSON on the console output
-puts "JSON rendering of the parse tree for '#{valid_input}' input:"
-renderer.render(visitor)
-=end
-# End of file

data/examples/parsers/parsing_another.rb DELETED Viewed

@@ -1,70 +0,0 @@
-# Purpose: to demonstrate how to build and render a parse tree
-require 'pp' # TODO remove this dependency
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse tree from the parse result
-# 7. Render the parse tree (in JSON)
-########################################
-# Step 1. Define a problematic grammar
-# Grammar Z: A grammar with hidden left recursion and a cycle
-# (based on example 2 in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
-# Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
-# Let's create the grammar step-by-step with the grammar builder:
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('b')
-builder.add_production('S' => %w(S S))
-builder.add_production('S' => 'b')
-# And now build the grammar...
-grammar_tricky = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-def tokenizer(aText, aGrammar)
-  tokens = aText.chars.map do |lexeme|
-    case lexeme
-      when 'b'
-        terminal = aGrammar.name2symbol[lexeme]
-      else
-        msg = "Unknown input text '#{lexeme}'"
-        raise StandardError, msg
-    end
-    Rley::Parser::Token.new(lexeme, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(grammar_tricky)
-########################################
-# Step 3. Tokenize the input
-valid_input = 'bbb'
-tokens = tokenizer(valid_input, grammar_tricky)
-########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-puts "Parsing success? #{result.success?}"
-puts "Parsing ambiguous? #{result.ambiguous?}"
-#pp result
-result.chart.state_sets.each_with_index do |aStateSet, index|
-  puts "State[#{index}]"
-  puts "========"
-  aStateSet.states.each { |aState| puts aState.to_s }
-end
-# End of file

data/examples/parsers/parsing_b_expr.rb DELETED Viewed

@@ -1,85 +0,0 @@
-# Purpose: to demonstrate how to parse basic arithmetic expressions
-# and render a parse tree
-require 'pp' # TODO remove this dependency
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse tree from the parse result
-# 7. Render the parse tree (in JSON)
-########################################
-# Step 1. Define a grammar for a very simple arithmetic expression language
-# (based on example in article on Earley's algorithm in Wikipedia)
-# Let's create the grammar piece by piece
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('+', '*', 'integer')
-builder.add_production('P' => 'S')
-builder.add_production('S' => %w(S + M))
-builder.add_production('S' => 'M')
-builder.add_production('M' => %w(M * T))
-builder.add_production('M' => 'T')
-builder.add_production('T' => 'integer')
-# And now build the grammar...
-grammar_s_expr = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-def tokenizer(aText, aGrammar)
-  tokens = aText.scan(/\S+/).map do |lexeme|
-    case lexeme
-      when '+', '*'
-        terminal = aGrammar.name2symbol[lexeme]
-      when /^[-+]?\d+$/
-        terminal = aGrammar.name2symbol['integer']
-      else
-        msg = "Unknown input text '#{lexeme}'"
-        raise StandardError, msg
-    end
-    Rley::Parser::Token.new(lexeme, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
-########################################
-# Step 3. Tokenize the input
-valid_input = '2 + 3 * 4'
-tokens = tokenizer(valid_input, grammar_s_expr)
-########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-puts "Parse successful? #{result.success?}"
-########################################
-# Step 6. Generate a parse tree from the parse result
-ptree = result.parse_tree
-pp ptree
-########################################
-# Step 7. Render the parse tree (in JSON)
-# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
-#Here we create a renderer object...
-renderer = Rley::Formatter::Json.new(STDOUT)
-# Now emit the parse tree as JSON on the console output
-puts "JSON rendering of the parse tree for '#{valid_input}' input:"
-renderer.render(visitor)
-# End of file

data/examples/parsers/parsing_err_expr.rb DELETED Viewed

@@ -1,74 +0,0 @@
-# Purpose: to demonstrate how to catch parsing errors
-require 'pp' # TODO remove this dependency
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the invalid input
-########################################
-# Step 1. Define a grammar for a very simple arithmetic expression language
-# (based on example in article on Earley's algorithm in Wikipedia)
-# Let's create the grammar piece by piece
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('+', '*', 'integer')
-builder.add_production('P' => 'S')
-builder.add_production('S' => %w(S + M))
-builder.add_production('S' => 'M')
-builder.add_production('M' => %w(M * T))
-builder.add_production('M' => 'T')
-builder.add_production('T' => 'integer')
-# And now build the grammar...
-grammar_s_expr = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-def tokenizer(aText, aGrammar)
-  tokens = aText.scan(/\S+/).map do |lexeme|
-    case lexeme
-      when '+', '*'
-        terminal = aGrammar.name2symbol[lexeme]
-      when /^[-+]?\d+$/
-        terminal = aGrammar.name2symbol['integer']
-      else
-        msg = "Unknown input text '#{lexeme}'"
-        raise StandardError, msg
-    end
-    Rley::Parser::Token.new(lexeme, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
-########################################
-# Step 4. Tokenize the invalid input
-invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
-puts "Invalid expression to parse: #{invalid_input}"
-puts ''
-tokens = tokenizer(invalid_input, grammar_s_expr)
-########################################
-# Step 5. Let catch the exception caused by a syntax error...
-# ... and display the error message
-begin
-  parser.parse(tokens)
-  rescue StandardError => exc
-    puts exc.message
-end
-# End of file

data/examples/parsers/parsing_groucho.rb DELETED Viewed

@@ -1,97 +0,0 @@
-# Purpose: to demonstrate how to parse an emblematic ambiguous sentence
-# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
-require 'rley'  # Load the gem
-# Steps to render a parse tree (of a valid parsed input):
-# 1. Define a grammar
-# 2. Create a tokenizer for the language
-# 3. Create a parser for that grammar
-# 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse forest from the parse result
-########################################
-# Step 1. Define a grammar for a micro English-like language
-# based on Jurafky & Martin L0 language (chapter 12 of the book).
-# It defines the syntax of a sentence in a language with a
-# very limited syntax and lexicon in the context of airline reservation.
-builder = Rley::Syntax::GrammarBuilder.new
-builder.add_terminals('N', 'V', 'Pro')  # N(oun), V(erb), Pro(noun)
-builder.add_terminals('Det', 'P')       # Det(erminer), P(reposition)
-builder.add_production('S' => %w[NP VP])
-builder.add_production('NP' => %w[Det N])
-builder.add_production('NP' => %w[Det N PP])
-builder.add_production('NP' => 'Pro')
-builder.add_production('VP' => %w[V NP])
-builder.add_production('VP' => %w[VP PP])
-builder.add_production('PP' => %w[P NP])
-# And now build the grammar...
-groucho_grammar = builder.grammar
-########################################
-# 2. Create a tokenizer for the language
-# The tokenizer transforms the input into an array of tokens
-# This is a very simplistic implementation for demo purposes.
-# The lexicon is just a Hash with pairs of the form:
-# word => terminal symbol name
-Groucho_lexicon = {
-  'elephant' => 'N',
-  'pajamas' => 'N',
-  'shot' => 'V',
-  'I' => 'Pro',
-  'an' => 'Det',
-  'my' => 'Det',
-  'in' => 'P',
-}
-# Highly simplified tokenizer implementation.
-def tokenizer(aText, aGrammar)
-  tokens = aText.scan(/\S+/).map do |word|
-    term_name = Groucho_lexicon[word]
-    if term_name.nil?
-      raise StandardError, "Word '#{word}' not found in lexicon"
-    end
-    terminal = aGrammar.name2symbol[term_name]
-    Rley::Parser::Token.new(word, terminal)
-  end
-  return tokens
-end
-########################################
-# Step 3. Create a parser for that grammar
-parser = Rley::Parser::EarleyParser.new(groucho_grammar)
-########################################
-# Step 3. Tokenize the input
-valid_input = 'I shot an elephant in my pajamas'
-tokens = tokenizer(valid_input, groucho_grammar)
-########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-puts "Parsing success? #{result.success?}"
-#=begin
-########################################
-# Step 6. Generate a parse tree from the parse result
-ptree = result.parse_tree
-########################################
-# Step 7. Render the parse tree (in JSON)
-# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
-#Here we create a renderer object...
-renderer = Rley::Formatter::Json.new(STDOUT)
-# Now emit the parse tree as JSON on the console output
-puts "JSON rendering of the parse tree for '#{valid_input}' input:"
-renderer.render(visitor)
-#=end
-# End of file