rley 0.5.04 → 0.5.05

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,31 @@
1
+ # Grammar for simple arithmetical expressions
2
+ require 'rley' # Load the gem
3
+
4
+ ########################################
5
+ # Define a grammar for basic arithmetical expressions
6
+ builder = Rley::Syntax::GrammarBuilder.new do
7
+ add_terminals('NUMBER')
8
+ add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
9
+ add_terminals('PLUS', 'MINUS') # For '+', '-' operators or sign
10
+ add_terminals('STAR', 'DIVIDE', 'POWER') # For '*', '/', '**' operators
11
+ rule 'expression' => %w[simple_expression]
12
+ rule 'simple_expression' => 'term'
13
+ rule 'simple_expression' => %w[simple_expression add_operator term]
14
+ rule 'term' => 'factor'
15
+ rule 'term' => %w[term mul_operator factor]
16
+ rule 'factor' => 'simple_factor'
17
+ rule 'factor' => %w[simple_factor POWER simple_factor]
18
+ rule 'simple_factor' => %w[sign NUMBER]
19
+ rule 'simple_factor' => %w[LPAREN expression RPAREN]
20
+ rule 'simple_factor' => %w[MINUS LPAREN expression RPAREN]
21
+ rule 'sign' => 'PLUS'
22
+ rule 'sign' => 'MINUS'
23
+ rule 'sign' => []
24
+ rule 'add_operator' => 'PLUS'
25
+ rule 'add_operator' => 'MINUS'
26
+ rule 'mul_operator' => 'STAR'
27
+ rule 'mul_operator' => 'DIVIDE'
28
+ end
29
+
30
+ # And now build the grammar...
31
+ CalcGrammar = builder.grammar
@@ -0,0 +1,78 @@
1
+ # File: calc_lexer.rb
2
+ # Lexer for a basic arithmetical expression parser
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ class CalcLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '(' => 'LPAREN',
15
+ ')' => 'RPAREN',
16
+ '+' => 'PLUS',
17
+ '-' => 'MINUS',
18
+ '*' => 'STAR',
19
+ '/' => 'DIVIDE',
20
+ '**' => 'POWER'
21
+ }.freeze
22
+
23
+ class ScanError < StandardError; end
24
+
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos?
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+
43
+ def _next_token()
44
+ skip_whitespaces
45
+ curr_ch = scanner.peek(1)
46
+ return nil if curr_ch.nil?
47
+
48
+ token = nil
49
+
50
+ if '()+-/'.include? curr_ch
51
+ # Single character token
52
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
53
+
54
+ elsif (lexeme = scanner.scan(/\*\*/))
55
+ token = build_token(@@lexeme2name[lexeme], lexeme)
56
+ elsif (lexeme = scanner.scan(/\*/))
57
+ token = build_token(@@lexeme2name[lexeme], lexeme)
58
+ elsif (lexeme = scanner.scan(/[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/))
59
+ token = build_token('NUMBER', lexeme)
60
+ else # Unknown token
61
+ erroneous = curr_ch.nil? ? '' : curr_ch
62
+ sequel = scanner.scan(/.{1,20}/)
63
+ erroneous += sequel unless sequel.nil?
64
+ raise ScanError.new("Unknown token #{erroneous}")
65
+ end
66
+
67
+ return token
68
+ end
69
+
70
+ def build_token(aSymbolName, aLexeme)
71
+ token_type = name2symbol[aSymbolName]
72
+ return Rley::Tokens::Token.new(aLexeme, token_type)
73
+ end
74
+
75
+ def skip_whitespaces()
76
+ scanner.scan(/[ \t\f\n\r]+/)
77
+ end
78
+ end # class
@@ -0,0 +1,24 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require_relative 'calc_lexer'
4
+ require_relative 'calc_grammar'
5
+
6
+ # A parser for arithmetic expressions
7
+ class CalcParser < Rley::Parser::GFGEarleyParser
8
+ attr_reader(:source_file)
9
+
10
+ # Constructor
11
+ def initialize()
12
+ # Builder the Earley parser with the calculator grammar
13
+ super(CalcGrammar)
14
+ end
15
+
16
+ def parse_expression(aText)
17
+ lexer = CalcLexer.new(aText, grammar)
18
+ result = parse(lexer.tokens)
19
+
20
+ return result
21
+ end
22
+ end # class
23
+
24
+ # End of file
@@ -0,0 +1,113 @@
1
+ require 'rspec' # Use the RSpec framework
2
+ require_relative '../calc_parser' # Load the class under test
3
+ require_relative '../calc_ast_builder'
4
+
5
+ RSpec.configure do |config|
6
+ # Display stack trace in case of failure
7
+ config.full_backtrace = true
8
+ end
9
+
10
+
11
+ describe 'Calculator' do
12
+ def parse_expression(anExpression)
13
+ # Create a calculator parser object
14
+ parser = CalcParser.new
15
+ result = parser.parse_expression(anExpression)
16
+
17
+ unless result.success?
18
+ # Stop if the parse failed...
19
+ puts "Parsing of '#{anExpression}' failed"
20
+ puts "Reason: #{result.failure_reason.message}"
21
+ exit(1)
22
+ end
23
+
24
+ return result
25
+ end
26
+
27
+ def print_cst(aParseResult)
28
+ # Generate a parse tree from the parse result
29
+ ptree = aParseResult.parse_tree
30
+
31
+ # Let's create a parse tree visitor
32
+ visitor = Rley::ParseTreeVisitor.new(ptree)
33
+
34
+ # Now output formatted parse tree
35
+ renderer = Rley::Formatter::Asciitree.new($stdout)
36
+ renderer.render(visitor)
37
+ end
38
+
39
+ def build_ast(aParseResult)
40
+ tree_builder = CalcASTBuilder
41
+ # Generate an abstract syntax tree from the parse result
42
+ ast = aParseResult.parse_tree(tree_builder)
43
+ return ast.root
44
+ end
45
+
46
+ def expect_expr(anExpression)
47
+ parsing = parse_expression(anExpression)
48
+ ast = build_ast(parsing)
49
+ return expect(ast.interpret)
50
+ end
51
+
52
+ context 'Parsing valid expressions' do
53
+ it 'should evaluate simple number literals' do
54
+ expect_expr('2').to eq(2)
55
+ end
56
+
57
+ it 'should evaluate positive number literals' do
58
+ expect_expr('+2').to eq(2)
59
+ expect_expr('+ 2').to eq(2)
60
+ end
61
+
62
+ it 'should evaluate negative number literals' do
63
+ expect_expr('-2').to eq(-2)
64
+ expect_expr('- 2').to eq(-2)
65
+ end
66
+
67
+ it 'should evaluate addition' do
68
+ expect_expr('2 + 2').to eq(4)
69
+ end
70
+
71
+ it 'should evaluate subtraction' do
72
+ expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
73
+ end
74
+
75
+ it 'handles negative numbers' do
76
+ expect_expr('3--2').to eq(5)
77
+ end
78
+
79
+ it 'should evaluate division' do
80
+ expect_expr('10.5 / 5').to eq(2.1)
81
+ end
82
+
83
+ it 'should evaluate multiplication' do
84
+ expect_expr('2 * 3.1').to eq(6.2)
85
+ end
86
+
87
+ it 'should evaluate exponentiation' do
88
+ expect_expr('5 ** (3 - 1)').to eq(25)
89
+ expect_expr('25 ** 0.5').to eq(5)
90
+ end
91
+
92
+ it 'should change sign of expression in parentheses' do
93
+ expect_expr('- (2 * 5)').to eq(-10)
94
+ end
95
+
96
+ it 'should evaluate parentheses' do
97
+ expect_expr('2 * (2.1 + 1)').to eq(6.2)
98
+ end
99
+
100
+ it 'should evaluate regardless of whitespace' do
101
+ expect_expr("2*(1+\t1)").to eq(4)
102
+ end
103
+
104
+ it 'should evaluate order of operations' do
105
+ expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
106
+ end
107
+
108
+ it 'should evaluate multiple levels of parentheses' do
109
+ expect_expr('2*(1/(1+3))').to eq(0.5)
110
+ end
111
+ end # context
112
+ end # describe
113
+ # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.04'.freeze
6
+ Version = '0.5.05'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -10,7 +10,30 @@ require_relative 'shortcut_edge'
10
10
 
11
11
  module Rley # This module is used as a namespace
12
12
  module GFG # This module is used as a namespace
13
- # TODO: add definition
13
+ # A Grammar Flow Graph (GFG) represents the parsing states of productions
14
+ # rules from a context-free grammar. This representation is based on a
15
+ # directed graph structure. The parsing process can then be re-formulated
16
+ # as a path problem in the graph. The theory behind GFGs can be found in
17
+ # papers. The first article on GFG can be found here:
18
+ # https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
19
+ # There are three types of vertex in a GFG:
20
+ # start vertex, end vertex and item vertex.
21
+ # For each non-terminal symbol N of the grammar, there is:
22
+ # a start vertex with label '.N'
23
+ # an end vertex with label 'N.'
24
+ # For each production rule of the grammar:
25
+ # N => s1 s2 s3 (...) sk
26
+ # I.e. a rule with k grammar symbols in its right-handed side.
27
+ # For such a rule there will be k + 1 item vertices. By convention,
28
+ # the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
29
+ # the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
30
+ # the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
31
+ # and so on. In other words, the labels are obtained by moving a dot
32
+ # in successive positions in the rhs. The dot represents the
33
+ # parse progress for the production rule. Symbols on the left of the
34
+ # dot represent the symbols that were successfully matched in the input.
35
+ # A GFG has three types of directed edges linking the vertices.
36
+ # call edge, return edge and scan edge.
14
37
  class GrmFlowGraph
15
38
  # The set of all vertices in the graph
16
39
  attr_reader :vertices
@@ -24,6 +47,9 @@ module Rley # This module is used as a namespace
24
47
  # A Hash with pairs of the form: non-terminal symbol => end node
25
48
  attr_reader :end_vertex_for
26
49
 
50
+ # Constructor.
51
+ # @param theDottedItems [Array<DottedItem>] an array of the dotted items
52
+ # of the grammar.
27
53
  def initialize(theDottedItems)
28
54
  @vertices = []
29
55
  @start_vertex_for = {}
@@ -32,7 +58,9 @@ module Rley # This module is used as a namespace
32
58
  build_graph(theDottedItems)
33
59
  end
34
60
 
35
- # Return the vertex with given vertex label.
61
+ # Retrieve the vertex with given vertex label.
62
+ # @param aVertexLabel [String] the label of a vertex from the graph
63
+ # @return [Vertex] the vertex with the given label, otherwise nil.
36
64
  def find_vertex(aVertexLabel)
37
65
  vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
38
66
  end
@@ -42,7 +70,7 @@ module Rley # This module is used as a namespace
42
70
  # If one wants to remove useless rules, then do first:
43
71
  # elimination of non-generating symbols
44
72
  # then elimination of unreachable symbols
45
- def diagnose
73
+ def diagnose()
46
74
  mark_unreachable_symbols
47
75
  end
48
76
 
@@ -65,15 +93,6 @@ module Rley # This module is used as a namespace
65
93
  return next_one
66
94
  end
67
95
  end
68
-
69
- def print_vertex(aText, aVertex)
70
- print aText + ' '
71
- if aVertex.kind_of?(NonTerminalVertex)
72
- puts "#{aVertex.class} #{aVertex.non_terminal.name}"
73
- else
74
- p(aVertex.label)
75
- end
76
- end
77
96
 
78
97
  # Walk over all the vertices of the graph that are reachable from a given
79
98
  # start vertex. This is a depth-first graph traversal.
@@ -137,6 +156,16 @@ module Rley # This module is used as a namespace
137
156
  @start_vertex = aVertex if vertices.empty?
138
157
  vertices << aVertex
139
158
  end
159
+
160
+ # For debugging purposes
161
+ def print_vertex(aText, aVertex)
162
+ print aText + ' '
163
+ if aVertex.kind_of?(NonTerminalVertex)
164
+ puts "#{aVertex.class} #{aVertex.non_terminal.name}"
165
+ else
166
+ p(aVertex.label)
167
+ end
168
+ end
140
169
 
141
170
  def build_graph(theDottedItems)
142
171
  build_all_starts_ends(theDottedItems)
@@ -6,30 +6,37 @@ module Rley # This module is used as a namespace
6
6
  # - To know its label
7
7
  class Vertex
8
8
  # The edges linking the successor vertices to this one.
9
+ # @!attribute [r] edges
10
+ # @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
9
11
  attr_reader :edges
10
12
 
13
+ # Constructor to override.
11
14
  def initialize()
12
15
  @edges = []
13
16
  end
14
17
 
15
- # Add an graph edge to this vertex
18
+ # Add an graph edge to this vertex.
19
+ # @param anEdge [Edge] the edge to be added.
16
20
  def add_edge(anEdge)
17
21
  arrow = check_add_edge(anEdge)
18
22
  edges << arrow
19
23
  end
20
24
 
21
- # Returns true iff the vertex corresponds to an dotted item that has
25
+ # Determine if the vertex corresponds to an dotted item that has
22
26
  # its dot at the end of a production (i.e. is a reduced item).
27
+ # @return [Boolean] true iff vertex corresponds to reduced item.
23
28
  def complete?()
24
29
  return false # Default implementation
25
30
  end
26
31
 
27
- # Return the symbol before the dot else nil.
32
+ # Retrieve the grammar symbol before the dot.
33
+ # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
28
34
  def prev_symbol()
29
35
  return nil # Default implementation
30
36
  end
31
37
 
32
- # Return the symbol after the dot else nil.
38
+ # Retrieve the grammar symbol after the dot.
39
+ # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
33
40
  def next_symbol()
34
41
  return nil # Default implementation
35
42
  end
@@ -27,7 +27,9 @@ module Rley # This module is used as a namespace
27
27
  # The reason of a parse failure
28
28
  attr_reader(:failure_reason)
29
29
 
30
-
30
+ # Constructor
31
+ # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
32
+ # @param theTokens [Array<Token>] the array of input tokens
31
33
  def initialize(theGFG, theTokens)
32
34
  @gf_graph = theGFG
33
35
  @tokens = theTokens.dup
@@ -46,8 +48,34 @@ module Rley # This module is used as a namespace
46
48
  next_symbol = anEntry.next_symbol
47
49
  start_vertex = gf_graph.start_vertex_for[next_symbol]
48
50
  pos = aPosition
51
+ size_before = chart[pos].size
49
52
  apply_rule(anEntry, start_vertex, pos, pos, :call_rule)
53
+
54
+ if next_symbol.nullable? && anEntry.dotted_entry?
55
+ size_after = chart[pos].size
56
+ # ...apply the Nullable rule
57
+ nullable_rule(anEntry, aPosition) if size_after == size_before
58
+ end
50
59
  end
60
+
61
+ # Let the current sigma set be the ith parse entry set.
62
+ # This method is invoked when a dotted entry is added
63
+ # to the parse entry set of the from [A => alpha . B beta, k]
64
+ # and B is nullable
65
+ # Then the entry [A => alpha B . beta, k] is added to the current
66
+ # sigma set.
67
+ def nullable_rule(anEntry, aPosition)
68
+ next_symbol = anEntry.next_symbol
69
+ end_vertex = gf_graph.end_vertex_for[next_symbol]
70
+ pos = aPosition
71
+ end_entry = push_entry(end_vertex, anEntry.origin, pos, :nullable_rule)
72
+ curr_vertex = anEntry.vertex
73
+ next_vertex = curr_vertex.shortcut.successor
74
+
75
+ # first pos == origin
76
+ # second pos == position
77
+ apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
78
+ end
51
79
 
52
80
  # Let the current sigma set be the ith parse entry set.
53
81
  # This method is invoked when an entry is added to a parse entry set
@@ -40,8 +40,9 @@ module Rley # This module is used as a namespace
40
40
  # Append the given entry (if it isn't yet in the set)
41
41
  # to the list of parse entries
42
42
  # @param anEntry [ParseEntry] the parse entry to push.
43
- # @return [ParseEntry] the passed parse entry it doesn't added
43
+ # @return [ParseEntry] the passed parse entry if it pushes it
44
44
  def push_entry(anEntry)
45
+ # TODO: control overhead next line
45
46
  match = entries.find { |entry| entry == anEntry }
46
47
  if match
47
48
  result = match