rley 0.5.07 → 0.5.08

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
  5. data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
  6. data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
  7. data/examples/general/SRL/lib/ast_builder.rb +176 -0
  8. data/examples/general/SRL/lib/ast_building.rb +20 -0
  9. data/examples/general/SRL/lib/grammar.rb +32 -0
  10. data/examples/general/SRL/lib/parser.rb +26 -0
  11. data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
  12. data/examples/general/SRL/lib/regex_repr.rb +1 -0
  13. data/examples/general/SRL/lib/srl_demo.rb +67 -0
  14. data/examples/general/SRL/lib/tokenizer.rb +101 -0
  15. data/examples/general/SRL/spec/integration_spec.rb +103 -0
  16. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
  17. data/examples/general/SRL/spec/spec_helper.rb +25 -0
  18. data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
  19. data/examples/general/SRL/srl_demo.rb +57 -0
  20. data/examples/general/calc_iter1/calc_demo.rb +1 -1
  21. data/examples/general/calc_iter2/ast_building.rb +20 -0
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
  23. data/examples/general/calc_iter2/calc_demo.rb +1 -1
  24. data/lib/rley/base/base_parser.rb +1 -1
  25. data/lib/rley/base/grm_items_builder.rb +1 -1
  26. data/lib/rley/constants.rb +1 -1
  27. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  28. data/lib/rley/parser/gfg_chart.rb +8 -3
  29. data/lib/rley/parser/gfg_earley_parser.rb +5 -2
  30. data/lib/rley/parser/gfg_parsing.rb +5 -1
  31. data/lib/rley/parser/parse_tree_builder.rb +16 -5
  32. data/lib/rley/ptree/terminal_node.rb +3 -2
  33. data/spec/rley/parser/ast_builder_spec.rb +2 -2
  34. data/spec/rley/parser/cst_builder_spec.rb +2 -3
  35. metadata +20 -4
@@ -0,0 +1,25 @@
1
+ # File: spec_helper.rb
2
+ # Purpose: utility file that is loaded by all our RSpec files
3
+
4
+ require 'simplecov'
5
+
6
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
7
+ [
8
+ SimpleCov::Formatter::HTMLFormatter,
9
+ ]
10
+ )
11
+
12
+ require 'pp' # Use pretty-print for debugging purposes
13
+ require 'rspec' # Use the RSpec framework
14
+
15
+ RSpec.configure do |config|
16
+ config.expect_with :rspec do |c|
17
+ # Disable the `should` syntax...
18
+ c.syntax = :expect
19
+ end
20
+
21
+ # Display stack trace in case of failure
22
+ config.full_backtrace = true
23
+ end
24
+
25
+ # End of file
@@ -0,0 +1,125 @@
1
+ require_relative 'spec_helper' # Use the RSpec framework
2
+ require_relative '../lib/grammar'
3
+ require_relative '../lib/tokenizer' # Load the class under test
4
+
5
+
6
+ module SRL
7
+ describe Tokenizer do
8
+ def match_expectations(aTokenizer, theExpectations)
9
+ aTokenizer.tokens.each_with_index do |token, i|
10
+ terminal, lexeme = theExpectations[i]
11
+ expect(token.terminal.name).to eq(terminal)
12
+ expect(token.lexeme).to eq(lexeme)
13
+ end
14
+ end
15
+
16
+
17
+ subject { Tokenizer.new('', SRL::Grammar) }
18
+
19
+ context 'Initialization:' do
20
+
21
+ it 'should be initialized with a text to tokenize and a grammar' do
22
+ expect { Tokenizer.new('anything', SRL::Grammar) }.not_to raise_error
23
+ end
24
+
25
+ it 'should have its scanner initialized' do
26
+ expect(subject.scanner).to be_kind_of(StringScanner)
27
+ end
28
+ end # context
29
+
30
+ context 'Single token recognition:' do
31
+ # it 'should tokenize delimiters and separators' do
32
+ # subject.scanner.string = ','
33
+ # token = subject.tokens.first
34
+ # expect(token).to be_kind_of(Rley::Lexical::Token)
35
+ # expect(token.terminal.name).to eq('COMMA')
36
+ # expect(token.lexeme).to eq(',')
37
+ # end
38
+
39
+ it 'should tokenize keywords' do
40
+ sample = 'between Exactly oncE optional TWICE'
41
+ subject.scanner.string = sample
42
+ subject.tokens.each do |tok|
43
+ expect(tok).to be_kind_of(Rley::Lexical::Token)
44
+ expect(tok.terminal.name).to eq(tok.lexeme.upcase)
45
+ end
46
+ end
47
+
48
+ it 'should tokenize integer values' do
49
+ subject.scanner.string = ' 123 '
50
+ token = subject.tokens.first
51
+ expect(token).to be_kind_of(Rley::Lexical::Token)
52
+ expect(token.terminal.name).to eq('INTEGER')
53
+ expect(token.lexeme).to eq('123')
54
+ end
55
+
56
+ it 'should tokenize single digits' do
57
+ subject.scanner.string = ' 1 '
58
+ token = subject.tokens.first
59
+ expect(token).to be_kind_of(Rley::Lexical::Token)
60
+ expect(token.terminal.name).to eq('DIGIT')
61
+ expect(token.lexeme).to eq('1')
62
+ end
63
+ end # context
64
+
65
+ context 'Quantifier tokenization:' do
66
+ it "should recognize 'exactly ... times'" do
67
+ input = 'exactly 4 Times'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['EXACTLY', 'exactly'],
71
+ ['DIGIT', '4'],
72
+ ['TIMES', 'Times']
73
+ ]
74
+ match_expectations(subject, expectations)
75
+ end
76
+
77
+ it "should recognize 'between ... and ... times'" do
78
+ input = 'Between 2 AND 4 times'
79
+ subject.scanner.string = input
80
+ expectations = [
81
+ ['BETWEEN', 'Between'],
82
+ ['DIGIT', '2'],
83
+ ['AND', 'AND'],
84
+ ['DIGIT', '4'],
85
+ ['TIMES', 'times']
86
+ ]
87
+ match_expectations(subject, expectations)
88
+ end
89
+
90
+ it "should recognize 'once or more'" do
91
+ input = 'Once or MORE'
92
+ subject.scanner.string = input
93
+ expectations = [
94
+ ['ONCE', 'Once'],
95
+ ['OR', 'or'],
96
+ ['MORE', 'MORE']
97
+ ]
98
+ match_expectations(subject, expectations)
99
+ end
100
+
101
+ it "should recognize 'never or more'" do
102
+ input = 'never or more'
103
+ subject.scanner.string = input
104
+ expectations = [
105
+ ['NEVER', 'never'],
106
+ ['OR', 'or'],
107
+ ['MORE', 'more']
108
+ ]
109
+ match_expectations(subject, expectations)
110
+ end
111
+
112
+ it "should recognize 'at least ... times'" do
113
+ input = 'at least 10 times'
114
+ subject.scanner.string = input
115
+ expectations = [
116
+ ['AT', 'at'],
117
+ ['LEAST', 'least'],
118
+ ['INTEGER', '10'],
119
+ ['TIMES', 'times']
120
+ ]
121
+ match_expectations(subject, expectations)
122
+ end
123
+ end # context
124
+ end # describe
125
+ end # module
@@ -0,0 +1,57 @@
1
+ require_relative './lib/parser'
2
+
3
+ def print_title(aTitle)
4
+ puts aTitle
5
+ puts '=' * aTitle.size
6
+ end
7
+
8
+ def print_tree(aTitle, aParseTree)
9
+ # Let's create a parse tree visitor
10
+ visitor = Rley::ParseTreeVisitor.new(aParseTree)
11
+
12
+ # Now output formatted parse tree
13
+ print_title(aTitle)
14
+ renderer = Rley::Formatter::Asciitree.new($stdout)
15
+ renderer.render(visitor)
16
+ puts ''
17
+ end
18
+
19
+ # Create a calculator parser object
20
+ parser = SRL::Parser.new
21
+
22
+ # Parse the input expression in command-line
23
+ if ARGV.empty?
24
+ my_name = File.basename(__FILE__)
25
+ msg = <<-END_MSG
26
+ WORK IN PROGRESS
27
+ Simple Regex Language parser:
28
+ - Parses a very limited subset of the language and displays the parse tree
29
+
30
+ Command-line syntax:
31
+ ruby #{my_name} "quantifier expression"
32
+ where:
33
+ the SRL quantifier expression is enclosed between double quotes (")
34
+
35
+ Examples:
36
+ ruby #{my_name} "exactly 4 times"
37
+ ruby #{my_name} "between 2 and 3 times"
38
+ END_MSG
39
+ puts msg
40
+ exit(1)
41
+ end
42
+ puts ARGV[0]
43
+ result = parser.parse_SRL(ARGV[0])
44
+
45
+ unless result.success?
46
+ # Stop if the parse failed...
47
+ puts "Parsing of '#{ARGV[0]}' failed"
48
+ puts "Reason: #{result.failure_reason.message}"
49
+ exit(1)
50
+ end
51
+
52
+
53
+ # Generate a concrete syntax parse tree from the parse result
54
+ cst_ptree = result.parse_tree
55
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
56
+
57
+ # End of file
@@ -5,7 +5,7 @@ require_relative 'calc_ast_builder'
5
5
  if ARGV.empty?
6
6
  my_name = File.basename(__FILE__)
7
7
  msg = <<-END_MSG
8
- Command-line symtax:
8
+ Command-line syntax:
9
9
  ruby #{my_name} "arithmetic expression"
10
10
  where:
11
11
  the arithmetic expression is enclosed between double quotes (")
@@ -0,0 +1,20 @@
1
+ # Mix-in module that provides convenenience methods for
2
+ # constructing an AST (Abstract Syntax Tree).
3
+ module ASTBuilding
4
+ def return_first_child(_range, _tokens, theChildren)
5
+ return theChildren[0]
6
+ end
7
+
8
+ def return_second_child(_range, _tokens, theChildren)
9
+ return theChildren[1]
10
+ end
11
+
12
+ def return_last_child(_range, _tokens, theChildren)
13
+ return theChildren[-1]
14
+ end
15
+
16
+ def return_epsilon(_range, _tokens, _children)
17
+ return nil
18
+ end
19
+ end # module
20
+ # End of file
@@ -1,3 +1,4 @@
1
+ require_relative 'ast_building'
1
2
  require_relative 'calc_ast_nodes'
2
3
 
3
4
  # The purpose of a CalcASTBuilder is to build piece by piece an AST
@@ -8,6 +9,8 @@ require_relative 'calc_ast_nodes'
8
9
  # (say, a parse tree) from simpler objects (terminal and non-terminal
9
10
  # nodes) and using a step by step approach.
10
11
  class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
12
+ include ASTBuilding
13
+
11
14
  Terminal2NodeClass = {
12
15
  # Lexical ambiguity: minus sign represents two very concepts:
13
16
  # The unary negation operator on one hand, the binary substraction operator
@@ -23,29 +26,6 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
23
26
 
24
27
  protected
25
28
 
26
- def return_first_child(_range, _tokens, theChildren)
27
- return theChildren[0]
28
- end
29
-
30
- def return_second_child(_range, _tokens, theChildren)
31
- return theChildren[1]
32
- end
33
-
34
- def return_last_child(_range, _tokens, theChildren)
35
- return theChildren[-1]
36
- end
37
-
38
- def return_epsilon(_range, _tokens, _children)
39
- return nil
40
- end
41
-
42
- # Overriding method.
43
- # Create a parse tree object with given
44
- # node as root node.
45
- def create_tree(aRootNode)
46
- return Rley::PTree::ParseTree.new(aRootNode)
47
- end
48
-
49
29
  # Overriding method.
50
30
  # Factory method for creating a node object for the given
51
31
  # input token.
@@ -28,7 +28,7 @@ Demo calculator that prints:
28
28
  - The Concrete and Abstract Syntax Trees of the math expression.
29
29
  - The result of the math expression.
30
30
 
31
- Command-line symtax:
31
+ Command-line syntax:
32
32
  ruby #{my_name} "arithmetic expression"
33
33
  where:
34
34
  the arithmetic expression is enclosed between double quotes (")
@@ -17,7 +17,7 @@ module Rley # This module is used as a namespace
17
17
  attr_reader(:dotted_items)
18
18
 
19
19
  # Constructor.
20
- # @param [Syntax::Grammar] The grammar of the language.
20
+ # @param aGrammar [Syntax::Grammar] The grammar of the language.
21
21
  def initialize(aGrammar)
22
22
  @grammar = aGrammar
23
23
  @dotted_items = build_dotted_items(grammar) # Method from mixin
@@ -5,7 +5,7 @@ module Rley # This module is used as a namespace
5
5
  # Mix-in module. Builds the dotted items for a given grammar
6
6
  module GrmItemsBuilder
7
7
  # Build an array of dotted items from the productions of passed grammar.
8
- # @param [Syntax::Grammar]
8
+ # @param aGrammar [Syntax::Grammar]
9
9
  # @return [Array<DottedItem>]
10
10
  def build_dotted_items(aGrammar)
11
11
  items = []
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.07'.freeze
6
+ Version = '0.5.08'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -14,7 +14,7 @@ module Rley # This module is used as a namespace
14
14
  attr_reader :non_terminal
15
15
 
16
16
  # Constructor to specialize in subclasses.
17
- # @param [Syntax::NonTerminal]
17
+ # @param aNonTerminal [Syntax::NonTerminal]
18
18
  def initialize(aNonTerminal)
19
19
  super()
20
20
  @non_terminal = aNonTerminal
@@ -9,26 +9,29 @@ module Rley # This module is used as a namespace
9
9
  # Assuming that n == number of input tokens,
10
10
  # the chart is an array with n + 1 entry sets.
11
11
  class GFGChart
12
- # An array of entry sets (one per input token + 1)
12
+ # @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
13
13
  attr_reader(:sets)
14
14
 
15
15
  # @param tokenCount [Integer] The number of lexemes in the input to parse.
16
+ # @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
16
17
  def initialize(tokenCount, aGFGraph)
17
18
  @sets = Array.new(tokenCount + 1) { |_| ParseEntrySet.new }
18
19
  push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
19
20
  end
20
21
 
21
- # Return the start (non-terminal) symbol of the grammar.
22
+ # @return [Syntax::NonTerminal] the start symbol of the grammar.
22
23
  def start_symbol()
23
24
  return sets.first.entries[0].vertex.non_terminal
24
25
  end
25
26
 
26
- # Access the entry set at given position
27
+ # @param index [Integer]
28
+ # @return [ParseEntrySet] Access the entry set at given position
27
29
  def [](index)
28
30
  return sets[index]
29
31
  end
30
32
 
31
33
  # Return the index value of the last non-empty entry set.
34
+ # @return [Integer]
32
35
  def last_index()
33
36
  first_empty = sets.find_index(&:empty?)
34
37
  index = if first_empty.nil?
@@ -49,11 +52,13 @@ module Rley # This module is used as a namespace
49
52
  end
50
53
 
51
54
  # Retrieve the first parse entry added to this chart
55
+ # @return [ParseEntry]
52
56
  def initial_entry()
53
57
  return sets[0].first
54
58
  end
55
59
 
56
60
  # Retrieve the entry that corresponds to a complete and successful parse
61
+ # @return [ParseEntry]
57
62
  def accepting_entry()
58
63
  # Success can be detected as follows:
59
64
  # The last chart entry set has at least one complete parse entry
@@ -6,9 +6,12 @@ module Rley # This module is used as a namespace
6
6
  module Parser # This module is used as a namespace
7
7
  # Implementation of a parser that uses the Earley parsing algorithm.
8
8
  class GFGEarleyParser < Base::BaseParser
9
- # The Grammar Flow graph for the given grammar
9
+ # The Grammar Flow graph generated from the provided grammar.
10
+ # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
10
11
  attr_reader :gf_graph
11
12
 
13
+ # Constructor.
14
+ # @param aGrammar [Syntax::Grammar] The grammar of the language to parse.
12
15
  def initialize(aGrammar)
13
16
  super(aGrammar)
14
17
  @gf_graph = GFG::GrmFlowGraph.new(dotted_items)
@@ -17,7 +20,7 @@ module Rley # This module is used as a namespace
17
20
  # Parse a sequence of input tokens.
18
21
  # @param aTokenSequence [Array] Array of Tokens objects returned by a
19
22
  # tokenizer/scanner/lexer.
20
- # @return [Parsing] an object that embeds the parse results.
23
+ # @return [GFGParsing] an object that embeds the parse results.
21
24
  def parse(aTokenSequence)
22
25
  result = GFGParsing.new(gf_graph, aTokenSequence)
23
26
  last_token_index = aTokenSequence.size
@@ -9,12 +9,15 @@ module Rley # This module is used as a namespace
9
9
  module Parser # This module is used as a namespace
10
10
  class GFGParsing
11
11
  # The link to the grammar flow graph
12
+ # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
12
13
  attr_reader(:gf_graph)
13
14
 
14
15
  # The link to the chart object
16
+ # @return [GFGChart]
15
17
  attr_reader(:chart)
16
18
 
17
19
  # The sequence of input token to parse
20
+ # @return [Array<Lexical::Token>]
18
21
  attr_reader(:tokens)
19
22
 
20
23
  # A Hash with pairs of the form:
@@ -22,13 +25,14 @@ module Rley # This module is used as a namespace
22
25
  # It associates to a every parse entry its antecedent(s), that is,
23
26
  # the parse entry/ies that causes the key parse entry to be created
24
27
  # with one the gfg rules
28
+ # @return [Hash{ParseEntry => Array<ParseEntry>}]
25
29
  attr_reader(:antecedence)
26
30
 
27
31
  # The reason of a parse failure
28
32
  attr_reader(:failure_reason)
29
33
 
30
34
  # Constructor
31
- # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
35
+ # @param theGFG [GFG::GrmFlowGraph] the Grammar Flow Graph
32
36
  # @param theTokens [Array<Token>] the array of input tokens
33
37
  def initialize(theGFG, theTokens)
34
38
  @gf_graph = theGFG
@@ -12,6 +12,9 @@ module Rley # This module is used as a namespace
12
12
  module Parser # This module is used as a namespace
13
13
  # Structure used internally by ParseTreeBuilder class.
14
14
  CSTRawNode = Struct.new(:range, :symbol, :children) do
15
+ # Constructor.
16
+ # @param aSymbol [Lexical::TokenRange] The token position range.
17
+ # @param aSymbol [Syntax::Symbol] A symbol from grammar.
15
18
  def initialize(aRange, aSymbol)
16
19
  super
17
20
  self.range = aRange
@@ -41,12 +44,13 @@ module Rley # This module is used as a namespace
41
44
  def initialize(theTokens)
42
45
  @tokens = theTokens
43
46
  @stack = []
47
+ @dummy_node = Object.new.freeze
44
48
  end
45
49
 
46
50
  # Receive events resulting from a visit of GFGParsing object.
47
51
  # These events are produced by a specialized Enumerator created
48
52
  # with a ParseWalkerFactory instance.
49
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
53
+ # @param anEvent [Syntax::Symbol] Kind of visit event. Should be: :visit
50
54
  # @param anEntry [ParseEntry] The entry being visited
51
55
  # @param anIndex [anIndex] The token index associated with anEntry
52
56
  def receive_event(anEvent, anEntry, anIndex)
@@ -71,6 +75,13 @@ module Rley # This module is used as a namespace
71
75
  return @stack
72
76
  end
73
77
 
78
+ # Overriding method.
79
+ # Create a parse tree object with given
80
+ # node as root node.
81
+ def create_tree(aRootNode)
82
+ return Rley::PTree::ParseTree.new(aRootNode)
83
+ end
84
+
74
85
  private
75
86
 
76
87
  # Return the top of stack element.
@@ -173,7 +184,7 @@ module Rley # This module is used as a namespace
173
184
  # Initialize children array of TOS with nil placeholders.
174
185
  # The number of elements equals the number of symbols at rhs.
175
186
  def init_TOS_children(aCount)
176
- tos.children = Array.new(aCount)
187
+ tos.children = Array.new(aCount) { |_index| @dummy_node }
177
188
  end
178
189
 
179
190
  # Does the position on the left side of the dot correspond
@@ -213,15 +224,15 @@ module Rley # This module is used as a namespace
213
224
  # array at that position.
214
225
  # If the position is nil, then the node will be placed at the position of
215
226
  # the rightmost nil element in children array.
216
- def place_TOS_child(aNode, aRHSPos)
227
+ def place_TOS_child(aNode, aRHSPos)
217
228
  if aRHSPos.nil?
218
229
  # Retrieve index of most rightmost nil child...
219
- pos = tos.children.rindex(&:nil?)
230
+ pos = tos.children.rindex { |child| child == @dummy_node }
220
231
  raise StandardError, 'Internal error' if pos.nil?
221
232
  else
222
233
  pos = aRHSPos
223
234
  end
224
-
235
+
225
236
  tos.children[pos] = aNode
226
237
  end
227
238