rley 0.5.04 → 0.5.05

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ # Grammar for simple arithmetical expressions
2
+ require 'rley' # Load the gem
3
+
4
+ ########################################
5
+ # Define a grammar for basic arithmetical expressions
6
+ builder = Rley::Syntax::GrammarBuilder.new do
7
+ add_terminals('NUMBER')
8
+ add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
9
+ add_terminals('PLUS', 'MINUS') # For '+', '-' operators or sign
10
+ add_terminals('STAR', 'DIVIDE', 'POWER') # For '*', '/', '**' operators
11
+ rule 'expression' => %w[simple_expression]
12
+ rule 'simple_expression' => 'term'
13
+ rule 'simple_expression' => %w[simple_expression add_operator term]
14
+ rule 'term' => 'factor'
15
+ rule 'term' => %w[term mul_operator factor]
16
+ rule 'factor' => 'simple_factor'
17
+ rule 'factor' => %w[simple_factor POWER simple_factor]
18
+ rule 'simple_factor' => %w[sign NUMBER]
19
+ rule 'simple_factor' => %w[LPAREN expression RPAREN]
20
+ rule 'simple_factor' => %w[MINUS LPAREN expression RPAREN]
21
+ rule 'sign' => 'PLUS'
22
+ rule 'sign' => 'MINUS'
23
+ rule 'sign' => []
24
+ rule 'add_operator' => 'PLUS'
25
+ rule 'add_operator' => 'MINUS'
26
+ rule 'mul_operator' => 'STAR'
27
+ rule 'mul_operator' => 'DIVIDE'
28
+ end
29
+
30
+ # And now build the grammar...
31
+ CalcGrammar = builder.grammar
@@ -0,0 +1,78 @@
1
+ # File: calc_lexer.rb
2
+ # Lexer for a basic arithmetical expression parser
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ class CalcLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '(' => 'LPAREN',
15
+ ')' => 'RPAREN',
16
+ '+' => 'PLUS',
17
+ '-' => 'MINUS',
18
+ '*' => 'STAR',
19
+ '/' => 'DIVIDE',
20
+ '**' => 'POWER'
21
+ }.freeze
22
+
23
+ class ScanError < StandardError; end
24
+
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos?
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+
43
+ def _next_token()
44
+ skip_whitespaces
45
+ curr_ch = scanner.peek(1)
46
+ return nil if curr_ch.nil?
47
+
48
+ token = nil
49
+
50
+ if '()+-/'.include? curr_ch
51
+ # Single character token
52
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
53
+
54
+ elsif (lexeme = scanner.scan(/\*\*/))
55
+ token = build_token(@@lexeme2name[lexeme], lexeme)
56
+ elsif (lexeme = scanner.scan(/\*/))
57
+ token = build_token(@@lexeme2name[lexeme], lexeme)
58
+ elsif (lexeme = scanner.scan(/[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/))
59
+ token = build_token('NUMBER', lexeme)
60
+ else # Unknown token
61
+ erroneous = curr_ch.nil? ? '' : curr_ch
62
+ sequel = scanner.scan(/.{1,20}/)
63
+ erroneous += sequel unless sequel.nil?
64
+ raise ScanError.new("Unknown token #{erroneous}")
65
+ end
66
+
67
+ return token
68
+ end
69
+
70
+ def build_token(aSymbolName, aLexeme)
71
+ token_type = name2symbol[aSymbolName]
72
+ return Rley::Tokens::Token.new(aLexeme, token_type)
73
+ end
74
+
75
+ def skip_whitespaces()
76
+ scanner.scan(/[ \t\f\n\r]+/)
77
+ end
78
+ end # class
@@ -0,0 +1,24 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require_relative 'calc_lexer'
4
+ require_relative 'calc_grammar'
5
+
6
+ # A parser for arithmetic expressions
7
+ class CalcParser < Rley::Parser::GFGEarleyParser
8
+ attr_reader(:source_file)
9
+
10
+ # Constructor
11
+ def initialize()
12
+ # Builder the Earley parser with the calculator grammar
13
+ super(CalcGrammar)
14
+ end
15
+
16
+ def parse_expression(aText)
17
+ lexer = CalcLexer.new(aText, grammar)
18
+ result = parse(lexer.tokens)
19
+
20
+ return result
21
+ end
22
+ end # class
23
+
24
+ # End of file
@@ -0,0 +1,113 @@
1
+ require 'rspec' # Use the RSpec framework
2
+ require_relative '../calc_parser' # Load the class under test
3
+ require_relative '../calc_ast_builder'
4
+
5
+ RSpec.configure do |config|
6
+ # Display stack trace in case of failure
7
+ config.full_backtrace = true
8
+ end
9
+
10
+
11
+ describe 'Calculator' do
12
+ def parse_expression(anExpression)
13
+ # Create a calculator parser object
14
+ parser = CalcParser.new
15
+ result = parser.parse_expression(anExpression)
16
+
17
+ unless result.success?
18
+ # Stop if the parse failed...
19
+ puts "Parsing of '#{anExpression}' failed"
20
+ puts "Reason: #{result.failure_reason.message}"
21
+ exit(1)
22
+ end
23
+
24
+ return result
25
+ end
26
+
27
+ def print_cst(aParseResult)
28
+ # Generate a parse tree from the parse result
29
+ ptree = aParseResult.parse_tree
30
+
31
+ # Let's create a parse tree visitor
32
+ visitor = Rley::ParseTreeVisitor.new(ptree)
33
+
34
+ # Now output formatted parse tree
35
+ renderer = Rley::Formatter::Asciitree.new($stdout)
36
+ renderer.render(visitor)
37
+ end
38
+
39
+ def build_ast(aParseResult)
40
+ tree_builder = CalcASTBuilder
41
+ # Generate an abstract syntax tree from the parse result
42
+ ast = aParseResult.parse_tree(tree_builder)
43
+ return ast.root
44
+ end
45
+
46
+ def expect_expr(anExpression)
47
+ parsing = parse_expression(anExpression)
48
+ ast = build_ast(parsing)
49
+ return expect(ast.interpret)
50
+ end
51
+
52
+ context 'Parsing valid expressions' do
53
+ it 'should evaluate simple number literals' do
54
+ expect_expr('2').to eq(2)
55
+ end
56
+
57
+ it 'should evaluate positive number literals' do
58
+ expect_expr('+2').to eq(2)
59
+ expect_expr('+ 2').to eq(2)
60
+ end
61
+
62
+ it 'should evaluate negative number literals' do
63
+ expect_expr('-2').to eq(-2)
64
+ expect_expr('- 2').to eq(-2)
65
+ end
66
+
67
+ it 'should evaluate addition' do
68
+ expect_expr('2 + 2').to eq(4)
69
+ end
70
+
71
+ it 'should evaluate subtraction' do
72
+ expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
73
+ end
74
+
75
+ it 'handles negative numbers' do
76
+ expect_expr('3--2').to eq(5)
77
+ end
78
+
79
+ it 'should evaluate division' do
80
+ expect_expr('10.5 / 5').to eq(2.1)
81
+ end
82
+
83
+ it 'should evaluate multiplication' do
84
+ expect_expr('2 * 3.1').to eq(6.2)
85
+ end
86
+
87
+ it 'should evaluate exponentiation' do
88
+ expect_expr('5 ** (3 - 1)').to eq(25)
89
+ expect_expr('25 ** 0.5').to eq(5)
90
+ end
91
+
92
+ it 'should change sign of expression in parentheses' do
93
+ expect_expr('- (2 * 5)').to eq(-10)
94
+ end
95
+
96
+ it 'should evaluate parentheses' do
97
+ expect_expr('2 * (2.1 + 1)').to eq(6.2)
98
+ end
99
+
100
+ it 'should evaluate regardless of whitespace' do
101
+ expect_expr("2*(1+\t1)").to eq(4)
102
+ end
103
+
104
+ it 'should evaluate order of operations' do
105
+ expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
106
+ end
107
+
108
+ it 'should evaluate multiple levels of parentheses' do
109
+ expect_expr('2*(1/(1+3))').to eq(0.5)
110
+ end
111
+ end # context
112
+ end # describe
113
+ # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.04'.freeze
6
+ Version = '0.5.05'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -10,7 +10,30 @@ require_relative 'shortcut_edge'
10
10
 
11
11
  module Rley # This module is used as a namespace
12
12
  module GFG # This module is used as a namespace
13
- # TODO: add definition
13
+ # A Grammar Flow Graph (GFG) represents the parsing states of productions
14
+ # rules from a context-free grammar. This representation is based on a
15
+ # directed graph structure. The parsing process can then be re-formulated
16
+ # as a path problem in the graph. The theory behind GFGs can be found in
17
+ # papers. The first article on GFG can be found here:
18
+ # https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
19
+ # There are three types of vertex in a GFG:
20
+ # start vertex, end vertex and item vertex.
21
+ # For each non-terminal symbol N of the grammar, there is:
22
+ # a start vertex with label '.N'
23
+ # an end vertex with label 'N.'
24
+ # For each production rule of the grammar:
25
+ # N => s1 s2 s3 (...) sk
26
+ # I.e. a rule with k grammar symbols in its right-handed side.
27
+ # For such a rule there will be k + 1 item vertices. By convention,
28
+ # the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
29
+ # the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
30
+ # the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
31
+ # and so on. In other words, the labels are obtained by moving a dot
32
+ # in successive positions in the rhs. The dot represents the
33
+ # parse progress for the production rule. Symbols on the left of the
34
+ # dot represent the symbols that were successfully matched in the input.
35
+ # A GFG has three types of directed edges linking the vertices.
36
+ # call edge, return edge and scan edge.
14
37
  class GrmFlowGraph
15
38
  # The set of all vertices in the graph
16
39
  attr_reader :vertices
@@ -24,6 +47,9 @@ module Rley # This module is used as a namespace
24
47
  # A Hash with pairs of the form: non-terminal symbol => end node
25
48
  attr_reader :end_vertex_for
26
49
 
50
+ # Constructor.
51
+ # @param theDottedItems [Array<DottedItem>] an array of the dotted items
52
+ # of the grammar.
27
53
  def initialize(theDottedItems)
28
54
  @vertices = []
29
55
  @start_vertex_for = {}
@@ -32,7 +58,9 @@ module Rley # This module is used as a namespace
32
58
  build_graph(theDottedItems)
33
59
  end
34
60
 
35
- # Return the vertex with given vertex label.
61
+ # Retrieve the vertex with given vertex label.
62
+ # @param aVertexLabel [String] the label of a vertex from the graph
63
+ # @return [Vertex] the vertex with the given label, otherwise nil.
36
64
  def find_vertex(aVertexLabel)
37
65
  vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
38
66
  end
@@ -42,7 +70,7 @@ module Rley # This module is used as a namespace
42
70
  # If one wants to remove useless rules, then do first:
43
71
  # elimination of non-generating symbols
44
72
  # then elimination of unreachable symbols
45
- def diagnose
73
+ def diagnose()
46
74
  mark_unreachable_symbols
47
75
  end
48
76
 
@@ -65,15 +93,6 @@ module Rley # This module is used as a namespace
65
93
  return next_one
66
94
  end
67
95
  end
68
-
69
- def print_vertex(aText, aVertex)
70
- print aText + ' '
71
- if aVertex.kind_of?(NonTerminalVertex)
72
- puts "#{aVertex.class} #{aVertex.non_terminal.name}"
73
- else
74
- p(aVertex.label)
75
- end
76
- end
77
96
 
78
97
  # Walk over all the vertices of the graph that are reachable from a given
79
98
  # start vertex. This is a depth-first graph traversal.
@@ -137,6 +156,16 @@ module Rley # This module is used as a namespace
137
156
  @start_vertex = aVertex if vertices.empty?
138
157
  vertices << aVertex
139
158
  end
159
+
160
+ # For debugging purposes
161
+ def print_vertex(aText, aVertex)
162
+ print aText + ' '
163
+ if aVertex.kind_of?(NonTerminalVertex)
164
+ puts "#{aVertex.class} #{aVertex.non_terminal.name}"
165
+ else
166
+ p(aVertex.label)
167
+ end
168
+ end
140
169
 
141
170
  def build_graph(theDottedItems)
142
171
  build_all_starts_ends(theDottedItems)
@@ -6,30 +6,37 @@ module Rley # This module is used as a namespace
6
6
  # - To know its label
7
7
  class Vertex
8
8
  # The edges linking the successor vertices to this one.
9
+ # @!attribute [r] edges
10
+ # @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
9
11
  attr_reader :edges
10
12
 
13
+ # Constructor to override.
11
14
  def initialize()
12
15
  @edges = []
13
16
  end
14
17
 
15
- # Add an graph edge to this vertex
18
+ # Add an graph edge to this vertex.
19
+ # @param anEdge [Edge] the edge to be added.
16
20
  def add_edge(anEdge)
17
21
  arrow = check_add_edge(anEdge)
18
22
  edges << arrow
19
23
  end
20
24
 
21
- # Returns true iff the vertex corresponds to an dotted item that has
25
+ # Determine if the vertex corresponds to an dotted item that has
22
26
  # its dot at the end of a production (i.e. is a reduced item).
27
+ # @return [Boolean] true iff vertex corresponds to reduced item.
23
28
  def complete?()
24
29
  return false # Default implementation
25
30
  end
26
31
 
27
- # Return the symbol before the dot else nil.
32
+ # Retrieve the grammar symbol before the dot.
33
+ # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
28
34
  def prev_symbol()
29
35
  return nil # Default implementation
30
36
  end
31
37
 
32
- # Return the symbol after the dot else nil.
38
+ # Retrieve the grammar symbol after the dot.
39
+ # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
33
40
  def next_symbol()
34
41
  return nil # Default implementation
35
42
  end
@@ -27,7 +27,9 @@ module Rley # This module is used as a namespace
27
27
  # The reason of a parse failure
28
28
  attr_reader(:failure_reason)
29
29
 
30
-
30
+ # Constructor
31
+ # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
32
+ # @param theTokens [Array<Token>] the array of input tokens
31
33
  def initialize(theGFG, theTokens)
32
34
  @gf_graph = theGFG
33
35
  @tokens = theTokens.dup
@@ -46,8 +48,34 @@ module Rley # This module is used as a namespace
46
48
  next_symbol = anEntry.next_symbol
47
49
  start_vertex = gf_graph.start_vertex_for[next_symbol]
48
50
  pos = aPosition
51
+ size_before = chart[pos].size
49
52
  apply_rule(anEntry, start_vertex, pos, pos, :call_rule)
53
+
54
+ if next_symbol.nullable? && anEntry.dotted_entry?
55
+ size_after = chart[pos].size
56
+ # ...apply the Nullable rule
57
+ nullable_rule(anEntry, aPosition) if size_after == size_before
58
+ end
50
59
  end
60
+
61
+ # Let the current sigma set be the ith parse entry set.
62
+ # This method is invoked when a dotted entry is added
63
+ # to the parse entry set of the from [A => alpha . B beta, k]
64
+ # and B is nullable
65
+ # Then the entry [A => alpha B . beta, k] is added to the current
66
+ # sigma set.
67
+ def nullable_rule(anEntry, aPosition)
68
+ next_symbol = anEntry.next_symbol
69
+ end_vertex = gf_graph.end_vertex_for[next_symbol]
70
+ pos = aPosition
71
+ end_entry = push_entry(end_vertex, anEntry.origin, pos, :nullable_rule)
72
+ curr_vertex = anEntry.vertex
73
+ next_vertex = curr_vertex.shortcut.successor
74
+
75
+ # first pos == origin
76
+ # second pos == position
77
+ apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
78
+ end
51
79
 
52
80
  # Let the current sigma set be the ith parse entry set.
53
81
  # This method is invoked when an entry is added to a parse entry set
@@ -40,8 +40,9 @@ module Rley # This module is used as a namespace
40
40
  # Append the given entry (if it isn't yet in the set)
41
41
  # to the list of parse entries
42
42
  # @param anEntry [ParseEntry] the parse entry to push.
43
- # @return [ParseEntry] the passed parse entry it doesn't added
43
+ # @return [ParseEntry] the passed parse entry if it pushes it
44
44
  def push_entry(anEntry)
45
+ # TODO: control overhead next line
45
46
  match = entries.find { |entry| entry == anEntry }
46
47
  if match
47
48
  result = match