rley 0.5.07 → 0.5.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
  5. data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
  6. data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
  7. data/examples/general/SRL/lib/ast_builder.rb +176 -0
  8. data/examples/general/SRL/lib/ast_building.rb +20 -0
  9. data/examples/general/SRL/lib/grammar.rb +32 -0
  10. data/examples/general/SRL/lib/parser.rb +26 -0
  11. data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
  12. data/examples/general/SRL/lib/regex_repr.rb +1 -0
  13. data/examples/general/SRL/lib/srl_demo.rb +67 -0
  14. data/examples/general/SRL/lib/tokenizer.rb +101 -0
  15. data/examples/general/SRL/spec/integration_spec.rb +103 -0
  16. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
  17. data/examples/general/SRL/spec/spec_helper.rb +25 -0
  18. data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
  19. data/examples/general/SRL/srl_demo.rb +57 -0
  20. data/examples/general/calc_iter1/calc_demo.rb +1 -1
  21. data/examples/general/calc_iter2/ast_building.rb +20 -0
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
  23. data/examples/general/calc_iter2/calc_demo.rb +1 -1
  24. data/lib/rley/base/base_parser.rb +1 -1
  25. data/lib/rley/base/grm_items_builder.rb +1 -1
  26. data/lib/rley/constants.rb +1 -1
  27. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  28. data/lib/rley/parser/gfg_chart.rb +8 -3
  29. data/lib/rley/parser/gfg_earley_parser.rb +5 -2
  30. data/lib/rley/parser/gfg_parsing.rb +5 -1
  31. data/lib/rley/parser/parse_tree_builder.rb +16 -5
  32. data/lib/rley/ptree/terminal_node.rb +3 -2
  33. data/spec/rley/parser/ast_builder_spec.rb +2 -2
  34. data/spec/rley/parser/cst_builder_spec.rb +2 -3
  35. metadata +20 -4
@@ -0,0 +1,25 @@
1
+ # File: spec_helper.rb
2
+ # Purpose: utility file that is loaded by all our RSpec files
3
+
4
+ require 'simplecov'
5
+
6
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
7
+ [
8
+ SimpleCov::Formatter::HTMLFormatter,
9
+ ]
10
+ )
11
+
12
+ require 'pp' # Use pretty-print for debugging purposes
13
+ require 'rspec' # Use the RSpec framework
14
+
15
+ RSpec.configure do |config|
16
+ config.expect_with :rspec do |c|
17
+ # Disable the `should` syntax...
18
+ c.syntax = :expect
19
+ end
20
+
21
+ # Display stack trace in case of failure
22
+ config.full_backtrace = true
23
+ end
24
+
25
+ # End of file
@@ -0,0 +1,125 @@
1
+ require_relative 'spec_helper' # Use the RSpec framework
2
+ require_relative '../lib/grammar'
3
+ require_relative '../lib/tokenizer' # Load the class under test
4
+
5
+
6
+ module SRL
7
+ describe Tokenizer do
8
+ def match_expectations(aTokenizer, theExpectations)
9
+ aTokenizer.tokens.each_with_index do |token, i|
10
+ terminal, lexeme = theExpectations[i]
11
+ expect(token.terminal.name).to eq(terminal)
12
+ expect(token.lexeme).to eq(lexeme)
13
+ end
14
+ end
15
+
16
+
17
+ subject { Tokenizer.new('', SRL::Grammar) }
18
+
19
+ context 'Initialization:' do
20
+
21
+ it 'should be initialized with a text to tokenize and a grammar' do
22
+ expect { Tokenizer.new('anything', SRL::Grammar) }.not_to raise_error
23
+ end
24
+
25
+ it 'should have its scanner initialized' do
26
+ expect(subject.scanner).to be_kind_of(StringScanner)
27
+ end
28
+ end # context
29
+
30
+ context 'Single token recognition:' do
31
+ # it 'should tokenize delimiters and separators' do
32
+ # subject.scanner.string = ','
33
+ # token = subject.tokens.first
34
+ # expect(token).to be_kind_of(Rley::Lexical::Token)
35
+ # expect(token.terminal.name).to eq('COMMA')
36
+ # expect(token.lexeme).to eq(',')
37
+ # end
38
+
39
+ it 'should tokenize keywords' do
40
+ sample = 'between Exactly oncE optional TWICE'
41
+ subject.scanner.string = sample
42
+ subject.tokens.each do |tok|
43
+ expect(tok).to be_kind_of(Rley::Lexical::Token)
44
+ expect(tok.terminal.name).to eq(tok.lexeme.upcase)
45
+ end
46
+ end
47
+
48
+ it 'should tokenize integer values' do
49
+ subject.scanner.string = ' 123 '
50
+ token = subject.tokens.first
51
+ expect(token).to be_kind_of(Rley::Lexical::Token)
52
+ expect(token.terminal.name).to eq('INTEGER')
53
+ expect(token.lexeme).to eq('123')
54
+ end
55
+
56
+ it 'should tokenize single digits' do
57
+ subject.scanner.string = ' 1 '
58
+ token = subject.tokens.first
59
+ expect(token).to be_kind_of(Rley::Lexical::Token)
60
+ expect(token.terminal.name).to eq('DIGIT')
61
+ expect(token.lexeme).to eq('1')
62
+ end
63
+ end # context
64
+
65
+ context 'Quantifier tokenization:' do
66
+ it "should recognize 'exactly ... times'" do
67
+ input = 'exactly 4 Times'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['EXACTLY', 'exactly'],
71
+ ['DIGIT', '4'],
72
+ ['TIMES', 'Times']
73
+ ]
74
+ match_expectations(subject, expectations)
75
+ end
76
+
77
+ it "should recognize 'between ... and ... times'" do
78
+ input = 'Between 2 AND 4 times'
79
+ subject.scanner.string = input
80
+ expectations = [
81
+ ['BETWEEN', 'Between'],
82
+ ['DIGIT', '2'],
83
+ ['AND', 'AND'],
84
+ ['DIGIT', '4'],
85
+ ['TIMES', 'times']
86
+ ]
87
+ match_expectations(subject, expectations)
88
+ end
89
+
90
+ it "should recognize 'once or more'" do
91
+ input = 'Once or MORE'
92
+ subject.scanner.string = input
93
+ expectations = [
94
+ ['ONCE', 'Once'],
95
+ ['OR', 'or'],
96
+ ['MORE', 'MORE']
97
+ ]
98
+ match_expectations(subject, expectations)
99
+ end
100
+
101
+ it "should recognize 'never or more'" do
102
+ input = 'never or more'
103
+ subject.scanner.string = input
104
+ expectations = [
105
+ ['NEVER', 'never'],
106
+ ['OR', 'or'],
107
+ ['MORE', 'more']
108
+ ]
109
+ match_expectations(subject, expectations)
110
+ end
111
+
112
+ it "should recognize 'at least ... times'" do
113
+ input = 'at least 10 times'
114
+ subject.scanner.string = input
115
+ expectations = [
116
+ ['AT', 'at'],
117
+ ['LEAST', 'least'],
118
+ ['INTEGER', '10'],
119
+ ['TIMES', 'times']
120
+ ]
121
+ match_expectations(subject, expectations)
122
+ end
123
+ end # context
124
+ end # describe
125
+ end # module
@@ -0,0 +1,57 @@
1
+ require_relative './lib/parser'
2
+
3
+ def print_title(aTitle)
4
+ puts aTitle
5
+ puts '=' * aTitle.size
6
+ end
7
+
8
+ def print_tree(aTitle, aParseTree)
9
+ # Let's create a parse tree visitor
10
+ visitor = Rley::ParseTreeVisitor.new(aParseTree)
11
+
12
+ # Now output formatted parse tree
13
+ print_title(aTitle)
14
+ renderer = Rley::Formatter::Asciitree.new($stdout)
15
+ renderer.render(visitor)
16
+ puts ''
17
+ end
18
+
19
+ # Create a calculator parser object
20
+ parser = SRL::Parser.new
21
+
22
+ # Parse the input expression in command-line
23
+ if ARGV.empty?
24
+ my_name = File.basename(__FILE__)
25
+ msg = <<-END_MSG
26
+ WORK IN PROGRESS
27
+ Simple Regex Language parser:
28
+ - Parses a very limited subset of the language and displays the parse tree
29
+
30
+ Command-line syntax:
31
+ ruby #{my_name} "quantifier expression"
32
+ where:
33
+ the SRL quantifier expression is enclosed between double quotes (")
34
+
35
+ Examples:
36
+ ruby #{my_name} "exactly 4 times"
37
+ ruby #{my_name} "between 2 and 3 times"
38
+ END_MSG
39
+ puts msg
40
+ exit(1)
41
+ end
42
+ puts ARGV[0]
43
+ result = parser.parse_SRL(ARGV[0])
44
+
45
+ unless result.success?
46
+ # Stop if the parse failed...
47
+ puts "Parsing of '#{ARGV[0]}' failed"
48
+ puts "Reason: #{result.failure_reason.message}"
49
+ exit(1)
50
+ end
51
+
52
+
53
+ # Generate a concrete syntax parse tree from the parse result
54
+ cst_ptree = result.parse_tree
55
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
56
+
57
+ # End of file
@@ -5,7 +5,7 @@ require_relative 'calc_ast_builder'
5
5
  if ARGV.empty?
6
6
  my_name = File.basename(__FILE__)
7
7
  msg = <<-END_MSG
8
- Command-line symtax:
8
+ Command-line syntax:
9
9
  ruby #{my_name} "arithmetic expression"
10
10
  where:
11
11
  the arithmetic expression is enclosed between double quotes (")
@@ -0,0 +1,20 @@
1
+ # Mix-in module that provides convenenience methods for
2
+ # constructing an AST (Abstract Syntax Tree).
3
+ module ASTBuilding
4
+ def return_first_child(_range, _tokens, theChildren)
5
+ return theChildren[0]
6
+ end
7
+
8
+ def return_second_child(_range, _tokens, theChildren)
9
+ return theChildren[1]
10
+ end
11
+
12
+ def return_last_child(_range, _tokens, theChildren)
13
+ return theChildren[-1]
14
+ end
15
+
16
+ def return_epsilon(_range, _tokens, _children)
17
+ return nil
18
+ end
19
+ end # module
20
+ # End of file
@@ -1,3 +1,4 @@
1
+ require_relative 'ast_building'
1
2
  require_relative 'calc_ast_nodes'
2
3
 
3
4
  # The purpose of a CalcASTBuilder is to build piece by piece an AST
@@ -8,6 +9,8 @@ require_relative 'calc_ast_nodes'
8
9
  # (say, a parse tree) from simpler objects (terminal and non-terminal
9
10
  # nodes) and using a step by step approach.
10
11
  class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
12
+ include ASTBuilding
13
+
11
14
  Terminal2NodeClass = {
12
15
  # Lexical ambiguity: minus sign represents two very concepts:
13
16
  # The unary negation operator on one hand, the binary substraction operator
@@ -23,29 +26,6 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
23
26
 
24
27
  protected
25
28
 
26
- def return_first_child(_range, _tokens, theChildren)
27
- return theChildren[0]
28
- end
29
-
30
- def return_second_child(_range, _tokens, theChildren)
31
- return theChildren[1]
32
- end
33
-
34
- def return_last_child(_range, _tokens, theChildren)
35
- return theChildren[-1]
36
- end
37
-
38
- def return_epsilon(_range, _tokens, _children)
39
- return nil
40
- end
41
-
42
- # Overriding method.
43
- # Create a parse tree object with given
44
- # node as root node.
45
- def create_tree(aRootNode)
46
- return Rley::PTree::ParseTree.new(aRootNode)
47
- end
48
-
49
29
  # Overriding method.
50
30
  # Factory method for creating a node object for the given
51
31
  # input token.
@@ -28,7 +28,7 @@ Demo calculator that prints:
28
28
  - The Concrete and Abstract Syntax Trees of the math expression.
29
29
  - The result of the math expression.
30
30
 
31
- Command-line symtax:
31
+ Command-line syntax:
32
32
  ruby #{my_name} "arithmetic expression"
33
33
  where:
34
34
  the arithmetic expression is enclosed between double quotes (")
@@ -17,7 +17,7 @@ module Rley # This module is used as a namespace
17
17
  attr_reader(:dotted_items)
18
18
 
19
19
  # Constructor.
20
- # @param [Syntax::Grammar] The grammar of the language.
20
+ # @param aGrammar [Syntax::Grammar] The grammar of the language.
21
21
  def initialize(aGrammar)
22
22
  @grammar = aGrammar
23
23
  @dotted_items = build_dotted_items(grammar) # Method from mixin
@@ -5,7 +5,7 @@ module Rley # This module is used as a namespace
5
5
  # Mix-in module. Builds the dotted items for a given grammar
6
6
  module GrmItemsBuilder
7
7
  # Build an array of dotted items from the productions of passed grammar.
8
- # @param [Syntax::Grammar]
8
+ # @param aGrammar [Syntax::Grammar]
9
9
  # @return [Array<DottedItem>]
10
10
  def build_dotted_items(aGrammar)
11
11
  items = []
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.07'.freeze
6
+ Version = '0.5.08'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -14,7 +14,7 @@ module Rley # This module is used as a namespace
14
14
  attr_reader :non_terminal
15
15
 
16
16
  # Constructor to specialize in subclasses.
17
- # @param [Syntax::NonTerminal]
17
+ # @param aNonTerminal [Syntax::NonTerminal]
18
18
  def initialize(aNonTerminal)
19
19
  super()
20
20
  @non_terminal = aNonTerminal
@@ -9,26 +9,29 @@ module Rley # This module is used as a namespace
9
9
  # Assuming that n == number of input tokens,
10
10
  # the chart is an array with n + 1 entry sets.
11
11
  class GFGChart
12
- # An array of entry sets (one per input token + 1)
12
+ # @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
13
13
  attr_reader(:sets)
14
14
 
15
15
  # @param tokenCount [Integer] The number of lexemes in the input to parse.
16
+ # @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
16
17
  def initialize(tokenCount, aGFGraph)
17
18
  @sets = Array.new(tokenCount + 1) { |_| ParseEntrySet.new }
18
19
  push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
19
20
  end
20
21
 
21
- # Return the start (non-terminal) symbol of the grammar.
22
+ # @return [Syntax::NonTerminal] the start symbol of the grammar.
22
23
  def start_symbol()
23
24
  return sets.first.entries[0].vertex.non_terminal
24
25
  end
25
26
 
26
- # Access the entry set at given position
27
+ # @param index [Integer]
28
+ # @return [ParseEntrySet] Access the entry set at given position
27
29
  def [](index)
28
30
  return sets[index]
29
31
  end
30
32
 
31
33
  # Return the index value of the last non-empty entry set.
34
+ # @return [Integer]
32
35
  def last_index()
33
36
  first_empty = sets.find_index(&:empty?)
34
37
  index = if first_empty.nil?
@@ -49,11 +52,13 @@ module Rley # This module is used as a namespace
49
52
  end
50
53
 
51
54
  # Retrieve the first parse entry added to this chart
55
+ # @return [ParseEntry]
52
56
  def initial_entry()
53
57
  return sets[0].first
54
58
  end
55
59
 
56
60
  # Retrieve the entry that corresponds to a complete and successful parse
61
+ # @return [ParseEntry]
57
62
  def accepting_entry()
58
63
  # Success can be detected as follows:
59
64
  # The last chart entry set has at least one complete parse entry
@@ -6,9 +6,12 @@ module Rley # This module is used as a namespace
6
6
  module Parser # This module is used as a namespace
7
7
  # Implementation of a parser that uses the Earley parsing algorithm.
8
8
  class GFGEarleyParser < Base::BaseParser
9
- # The Grammar Flow graph for the given grammar
9
+ # The Grammar Flow graph generated from the provided grammar.
10
+ # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
10
11
  attr_reader :gf_graph
11
12
 
13
+ # Constructor.
14
+ # @param aGrammar [Syntax::Grammar] The grammar of the language to parse.
12
15
  def initialize(aGrammar)
13
16
  super(aGrammar)
14
17
  @gf_graph = GFG::GrmFlowGraph.new(dotted_items)
@@ -17,7 +20,7 @@ module Rley # This module is used as a namespace
17
20
  # Parse a sequence of input tokens.
18
21
  # @param aTokenSequence [Array] Array of Tokens objects returned by a
19
22
  # tokenizer/scanner/lexer.
20
- # @return [Parsing] an object that embeds the parse results.
23
+ # @return [GFGParsing] an object that embeds the parse results.
21
24
  def parse(aTokenSequence)
22
25
  result = GFGParsing.new(gf_graph, aTokenSequence)
23
26
  last_token_index = aTokenSequence.size
@@ -9,12 +9,15 @@ module Rley # This module is used as a namespace
9
9
  module Parser # This module is used as a namespace
10
10
  class GFGParsing
11
11
  # The link to the grammar flow graph
12
+ # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
12
13
  attr_reader(:gf_graph)
13
14
 
14
15
  # The link to the chart object
16
+ # @return [GFGChart]
15
17
  attr_reader(:chart)
16
18
 
17
19
  # The sequence of input token to parse
20
+ # @return [Array<Lexical::Token>]
18
21
  attr_reader(:tokens)
19
22
 
20
23
  # A Hash with pairs of the form:
@@ -22,13 +25,14 @@ module Rley # This module is used as a namespace
22
25
  # It associates to a every parse entry its antecedent(s), that is,
23
26
  # the parse entry/ies that causes the key parse entry to be created
24
27
  # with one the gfg rules
28
+ # @return [Hash{ParseEntry => Array<ParseEntry>}]
25
29
  attr_reader(:antecedence)
26
30
 
27
31
  # The reason of a parse failure
28
32
  attr_reader(:failure_reason)
29
33
 
30
34
  # Constructor
31
- # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
35
+ # @param theGFG [GFG::GrmFlowGraph] the Grammar Flow Graph
32
36
  # @param theTokens [Array<Token>] the array of input tokens
33
37
  def initialize(theGFG, theTokens)
34
38
  @gf_graph = theGFG
@@ -12,6 +12,9 @@ module Rley # This module is used as a namespace
12
12
  module Parser # This module is used as a namespace
13
13
  # Structure used internally by ParseTreeBuilder class.
14
14
  CSTRawNode = Struct.new(:range, :symbol, :children) do
15
+ # Constructor.
16
+ # @param aSymbol [Lexical::TokenRange] The token position range.
17
+ # @param aSymbol [Syntax::Symbol] A symbol from grammar.
15
18
  def initialize(aRange, aSymbol)
16
19
  super
17
20
  self.range = aRange
@@ -41,12 +44,13 @@ module Rley # This module is used as a namespace
41
44
  def initialize(theTokens)
42
45
  @tokens = theTokens
43
46
  @stack = []
47
+ @dummy_node = Object.new.freeze
44
48
  end
45
49
 
46
50
  # Receive events resulting from a visit of GFGParsing object.
47
51
  # These events are produced by a specialized Enumerator created
48
52
  # with a ParseWalkerFactory instance.
49
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
53
+ # @param anEvent [Syntax::Symbol] Kind of visit event. Should be: :visit
50
54
  # @param anEntry [ParseEntry] The entry being visited
51
55
  # @param anIndex [anIndex] The token index associated with anEntry
52
56
  def receive_event(anEvent, anEntry, anIndex)
@@ -71,6 +75,13 @@ module Rley # This module is used as a namespace
71
75
  return @stack
72
76
  end
73
77
 
78
+ # Overriding method.
79
+ # Create a parse tree object with given
80
+ # node as root node.
81
+ def create_tree(aRootNode)
82
+ return Rley::PTree::ParseTree.new(aRootNode)
83
+ end
84
+
74
85
  private
75
86
 
76
87
  # Return the top of stack element.
@@ -173,7 +184,7 @@ module Rley # This module is used as a namespace
173
184
  # Initialize children array of TOS with nil placeholders.
174
185
  # The number of elements equals the number of symbols at rhs.
175
186
  def init_TOS_children(aCount)
176
- tos.children = Array.new(aCount)
187
+ tos.children = Array.new(aCount) { |_index| @dummy_node }
177
188
  end
178
189
 
179
190
  # Does the position on the left side of the dot correspond
@@ -213,15 +224,15 @@ module Rley # This module is used as a namespace
213
224
  # array at that position.
214
225
  # If the position is nil, then the node will be placed at the position of
215
226
  # the rightmost nil element in children array.
216
- def place_TOS_child(aNode, aRHSPos)
227
+ def place_TOS_child(aNode, aRHSPos)
217
228
  if aRHSPos.nil?
218
229
  # Retrieve index of most rightmost nil child...
219
- pos = tos.children.rindex(&:nil?)
230
+ pos = tos.children.rindex { |child| child == @dummy_node }
220
231
  raise StandardError, 'Internal error' if pos.nil?
221
232
  else
222
233
  pos = aRHSPos
223
234
  end
224
-
235
+
225
236
  tos.children[pos] = aNode
226
237
  end
227
238