rley 0.5.04 → 0.5.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/examples/general/calc_iter1/calc_demo.rb +1 -1
- data/examples/general/calc_iter2/calc_ast_builder.rb +200 -0
- data/examples/general/calc_iter2/calc_ast_nodes.rb +156 -0
- data/examples/general/calc_iter2/calc_demo.rb +66 -0
- data/examples/general/calc_iter2/calc_grammar.rb +31 -0
- data/examples/general/calc_iter2/calc_lexer.rb +78 -0
- data/examples/general/calc_iter2/calc_parser.rb +24 -0
- data/examples/general/calc_iter2/spec/calculator_spec.rb +113 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +41 -12
- data/lib/rley/gfg/vertex.rb +11 -4
- data/lib/rley/parser/gfg_parsing.rb +29 -1
- data/lib/rley/parser/parse_entry_set.rb +2 -1
- data/lib/rley/parser/parse_forest_factory.rb +7 -0
- data/lib/rley/parser/parse_rep_creator.rb +8 -2
- data/lib/rley/parser/parse_tree_builder.rb +5 -3
- data/lib/rley/parser/parse_tree_factory.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +15 -10
- data/spec/rley/parser/ambiguous_parse_spec.rb +1 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +148 -11
- metadata +9 -2
@@ -0,0 +1,31 @@
|
|
1
|
+
# Grammar for simple arithmetical expressions
|
2
|
+
require 'rley' # Load the gem
|
3
|
+
|
4
|
+
########################################
|
5
|
+
# Define a grammar for basic arithmetical expressions
|
6
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
7
|
+
add_terminals('NUMBER')
|
8
|
+
add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
|
9
|
+
add_terminals('PLUS', 'MINUS') # For '+', '-' operators or sign
|
10
|
+
add_terminals('STAR', 'DIVIDE', 'POWER') # For '*', '/', '**' operators
|
11
|
+
rule 'expression' => %w[simple_expression]
|
12
|
+
rule 'simple_expression' => 'term'
|
13
|
+
rule 'simple_expression' => %w[simple_expression add_operator term]
|
14
|
+
rule 'term' => 'factor'
|
15
|
+
rule 'term' => %w[term mul_operator factor]
|
16
|
+
rule 'factor' => 'simple_factor'
|
17
|
+
rule 'factor' => %w[simple_factor POWER simple_factor]
|
18
|
+
rule 'simple_factor' => %w[sign NUMBER]
|
19
|
+
rule 'simple_factor' => %w[LPAREN expression RPAREN]
|
20
|
+
rule 'simple_factor' => %w[MINUS LPAREN expression RPAREN]
|
21
|
+
rule 'sign' => 'PLUS'
|
22
|
+
rule 'sign' => 'MINUS'
|
23
|
+
rule 'sign' => []
|
24
|
+
rule 'add_operator' => 'PLUS'
|
25
|
+
rule 'add_operator' => 'MINUS'
|
26
|
+
rule 'mul_operator' => 'STAR'
|
27
|
+
rule 'mul_operator' => 'DIVIDE'
|
28
|
+
end
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
CalcGrammar = builder.grammar
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# File: calc_lexer.rb
|
2
|
+
# Lexer for a basic arithmetical expression parser
|
3
|
+
require 'strscan'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
|
7
|
+
class CalcLexer
|
8
|
+
attr_reader(:scanner)
|
9
|
+
attr_reader(:lineno)
|
10
|
+
attr_reader(:line_start)
|
11
|
+
attr_reader(:name2symbol)
|
12
|
+
|
13
|
+
@@lexeme2name = {
|
14
|
+
'(' => 'LPAREN',
|
15
|
+
')' => 'RPAREN',
|
16
|
+
'+' => 'PLUS',
|
17
|
+
'-' => 'MINUS',
|
18
|
+
'*' => 'STAR',
|
19
|
+
'/' => 'DIVIDE',
|
20
|
+
'**' => 'POWER'
|
21
|
+
}.freeze
|
22
|
+
|
23
|
+
class ScanError < StandardError; end
|
24
|
+
|
25
|
+
def initialize(source, aGrammar)
|
26
|
+
@scanner = StringScanner.new(source)
|
27
|
+
@name2symbol = aGrammar.name2symbol
|
28
|
+
@lineno = 1
|
29
|
+
end
|
30
|
+
|
31
|
+
def tokens()
|
32
|
+
tok_sequence = []
|
33
|
+
until @scanner.eos?
|
34
|
+
token = _next_token
|
35
|
+
tok_sequence << token unless token.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
return tok_sequence
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def _next_token()
|
44
|
+
skip_whitespaces
|
45
|
+
curr_ch = scanner.peek(1)
|
46
|
+
return nil if curr_ch.nil?
|
47
|
+
|
48
|
+
token = nil
|
49
|
+
|
50
|
+
if '()+-/'.include? curr_ch
|
51
|
+
# Single character token
|
52
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
53
|
+
|
54
|
+
elsif (lexeme = scanner.scan(/\*\*/))
|
55
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
56
|
+
elsif (lexeme = scanner.scan(/\*/))
|
57
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
58
|
+
elsif (lexeme = scanner.scan(/[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/))
|
59
|
+
token = build_token('NUMBER', lexeme)
|
60
|
+
else # Unknown token
|
61
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
62
|
+
sequel = scanner.scan(/.{1,20}/)
|
63
|
+
erroneous += sequel unless sequel.nil?
|
64
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
65
|
+
end
|
66
|
+
|
67
|
+
return token
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_token(aSymbolName, aLexeme)
|
71
|
+
token_type = name2symbol[aSymbolName]
|
72
|
+
return Rley::Tokens::Token.new(aLexeme, token_type)
|
73
|
+
end
|
74
|
+
|
75
|
+
def skip_whitespaces()
|
76
|
+
scanner.scan(/[ \t\f\n\r]+/)
|
77
|
+
end
|
78
|
+
end # class
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require_relative 'calc_lexer'
|
4
|
+
require_relative 'calc_grammar'
|
5
|
+
|
6
|
+
# A parser for arithmetic expressions
|
7
|
+
class CalcParser < Rley::Parser::GFGEarleyParser
|
8
|
+
attr_reader(:source_file)
|
9
|
+
|
10
|
+
# Constructor
|
11
|
+
def initialize()
|
12
|
+
# Builder the Earley parser with the calculator grammar
|
13
|
+
super(CalcGrammar)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_expression(aText)
|
17
|
+
lexer = CalcLexer.new(aText, grammar)
|
18
|
+
result = parse(lexer.tokens)
|
19
|
+
|
20
|
+
return result
|
21
|
+
end
|
22
|
+
end # class
|
23
|
+
|
24
|
+
# End of file
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'rspec' # Use the RSpec framework
|
2
|
+
require_relative '../calc_parser' # Load the class under test
|
3
|
+
require_relative '../calc_ast_builder'
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
# Display stack trace in case of failure
|
7
|
+
config.full_backtrace = true
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
describe 'Calculator' do
|
12
|
+
def parse_expression(anExpression)
|
13
|
+
# Create a calculator parser object
|
14
|
+
parser = CalcParser.new
|
15
|
+
result = parser.parse_expression(anExpression)
|
16
|
+
|
17
|
+
unless result.success?
|
18
|
+
# Stop if the parse failed...
|
19
|
+
puts "Parsing of '#{anExpression}' failed"
|
20
|
+
puts "Reason: #{result.failure_reason.message}"
|
21
|
+
exit(1)
|
22
|
+
end
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
def print_cst(aParseResult)
|
28
|
+
# Generate a parse tree from the parse result
|
29
|
+
ptree = aParseResult.parse_tree
|
30
|
+
|
31
|
+
# Let's create a parse tree visitor
|
32
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
33
|
+
|
34
|
+
# Now output formatted parse tree
|
35
|
+
renderer = Rley::Formatter::Asciitree.new($stdout)
|
36
|
+
renderer.render(visitor)
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_ast(aParseResult)
|
40
|
+
tree_builder = CalcASTBuilder
|
41
|
+
# Generate an abstract syntax tree from the parse result
|
42
|
+
ast = aParseResult.parse_tree(tree_builder)
|
43
|
+
return ast.root
|
44
|
+
end
|
45
|
+
|
46
|
+
def expect_expr(anExpression)
|
47
|
+
parsing = parse_expression(anExpression)
|
48
|
+
ast = build_ast(parsing)
|
49
|
+
return expect(ast.interpret)
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'Parsing valid expressions' do
|
53
|
+
it 'should evaluate simple number literals' do
|
54
|
+
expect_expr('2').to eq(2)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'should evaluate positive number literals' do
|
58
|
+
expect_expr('+2').to eq(2)
|
59
|
+
expect_expr('+ 2').to eq(2)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should evaluate negative number literals' do
|
63
|
+
expect_expr('-2').to eq(-2)
|
64
|
+
expect_expr('- 2').to eq(-2)
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should evaluate addition' do
|
68
|
+
expect_expr('2 + 2').to eq(4)
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'should evaluate subtraction' do
|
72
|
+
expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'handles negative numbers' do
|
76
|
+
expect_expr('3--2').to eq(5)
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'should evaluate division' do
|
80
|
+
expect_expr('10.5 / 5').to eq(2.1)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'should evaluate multiplication' do
|
84
|
+
expect_expr('2 * 3.1').to eq(6.2)
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should evaluate exponentiation' do
|
88
|
+
expect_expr('5 ** (3 - 1)').to eq(25)
|
89
|
+
expect_expr('25 ** 0.5').to eq(5)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should change sign of expression in parentheses' do
|
93
|
+
expect_expr('- (2 * 5)').to eq(-10)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should evaluate parentheses' do
|
97
|
+
expect_expr('2 * (2.1 + 1)').to eq(6.2)
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should evaluate regardless of whitespace' do
|
101
|
+
expect_expr("2*(1+\t1)").to eq(4)
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'should evaluate order of operations' do
|
105
|
+
expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'should evaluate multiple levels of parentheses' do
|
109
|
+
expect_expr('2*(1/(1+3))').to eq(0.5)
|
110
|
+
end
|
111
|
+
end # context
|
112
|
+
end # describe
|
113
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -10,7 +10,30 @@ require_relative 'shortcut_edge'
|
|
10
10
|
|
11
11
|
module Rley # This module is used as a namespace
|
12
12
|
module GFG # This module is used as a namespace
|
13
|
-
#
|
13
|
+
# A Grammar Flow Graph (GFG) represents the parsing states of productions
|
14
|
+
# rules from a context-free grammar. This representation is based on a
|
15
|
+
# directed graph structure. The parsing process can then be re-formulated
|
16
|
+
# as a path problem in the graph. The theory behind GFGs can be found in
|
17
|
+
# papers. The first article on GFG can be found here:
|
18
|
+
# https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
|
19
|
+
# There are three types of vertex in a GFG:
|
20
|
+
# start vertex, end vertex and item vertex.
|
21
|
+
# For each non-terminal symbol N of the grammar, there is:
|
22
|
+
# a start vertex with label '.N'
|
23
|
+
# an end vertex with label 'N.'
|
24
|
+
# For each production rule of the grammar:
|
25
|
+
# N => s1 s2 s3 (...) sk
|
26
|
+
# I.e. a rule with k grammar symbols in its right-handed side.
|
27
|
+
# For such a rule there will be k + 1 item vertices. By convention,
|
28
|
+
# the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
|
29
|
+
# the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
|
30
|
+
# the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
|
31
|
+
# and so on. In other words, the labels are obtained by moving a dot
|
32
|
+
# in successive positions in the rhs. The dot represents the
|
33
|
+
# parse progress for the production rule. Symbols on the left of the
|
34
|
+
# dot represent the symbols that were successfully matched in the input.
|
35
|
+
# A GFG has three types of directed edges linking the vertices.
|
36
|
+
# call edge, return edge and scan edge.
|
14
37
|
class GrmFlowGraph
|
15
38
|
# The set of all vertices in the graph
|
16
39
|
attr_reader :vertices
|
@@ -24,6 +47,9 @@ module Rley # This module is used as a namespace
|
|
24
47
|
# A Hash with pairs of the form: non-terminal symbol => end node
|
25
48
|
attr_reader :end_vertex_for
|
26
49
|
|
50
|
+
# Constructor.
|
51
|
+
# @param theDottedItems [Array<DottedItem>] an array of the dotted items
|
52
|
+
# of the grammar.
|
27
53
|
def initialize(theDottedItems)
|
28
54
|
@vertices = []
|
29
55
|
@start_vertex_for = {}
|
@@ -32,7 +58,9 @@ module Rley # This module is used as a namespace
|
|
32
58
|
build_graph(theDottedItems)
|
33
59
|
end
|
34
60
|
|
35
|
-
#
|
61
|
+
# Retrieve the vertex with given vertex label.
|
62
|
+
# @param aVertexLabel [String] the label of a vertex from the graph
|
63
|
+
# @return [Vertex] the vertex with the given label, otherwise nil.
|
36
64
|
def find_vertex(aVertexLabel)
|
37
65
|
vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
|
38
66
|
end
|
@@ -42,7 +70,7 @@ module Rley # This module is used as a namespace
|
|
42
70
|
# If one wants to remove useless rules, then do first:
|
43
71
|
# elimination of non-generating symbols
|
44
72
|
# then elimination of unreachable symbols
|
45
|
-
def diagnose
|
73
|
+
def diagnose()
|
46
74
|
mark_unreachable_symbols
|
47
75
|
end
|
48
76
|
|
@@ -65,15 +93,6 @@ module Rley # This module is used as a namespace
|
|
65
93
|
return next_one
|
66
94
|
end
|
67
95
|
end
|
68
|
-
|
69
|
-
def print_vertex(aText, aVertex)
|
70
|
-
print aText + ' '
|
71
|
-
if aVertex.kind_of?(NonTerminalVertex)
|
72
|
-
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
73
|
-
else
|
74
|
-
p(aVertex.label)
|
75
|
-
end
|
76
|
-
end
|
77
96
|
|
78
97
|
# Walk over all the vertices of the graph that are reachable from a given
|
79
98
|
# start vertex. This is a depth-first graph traversal.
|
@@ -137,6 +156,16 @@ module Rley # This module is used as a namespace
|
|
137
156
|
@start_vertex = aVertex if vertices.empty?
|
138
157
|
vertices << aVertex
|
139
158
|
end
|
159
|
+
|
160
|
+
# For debugging purposes
|
161
|
+
def print_vertex(aText, aVertex)
|
162
|
+
print aText + ' '
|
163
|
+
if aVertex.kind_of?(NonTerminalVertex)
|
164
|
+
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
165
|
+
else
|
166
|
+
p(aVertex.label)
|
167
|
+
end
|
168
|
+
end
|
140
169
|
|
141
170
|
def build_graph(theDottedItems)
|
142
171
|
build_all_starts_ends(theDottedItems)
|
data/lib/rley/gfg/vertex.rb
CHANGED
@@ -6,30 +6,37 @@ module Rley # This module is used as a namespace
|
|
6
6
|
# - To know its label
|
7
7
|
class Vertex
|
8
8
|
# The edges linking the successor vertices to this one.
|
9
|
+
# @!attribute [r] edges
|
10
|
+
# @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
|
9
11
|
attr_reader :edges
|
10
12
|
|
13
|
+
# Constructor to override.
|
11
14
|
def initialize()
|
12
15
|
@edges = []
|
13
16
|
end
|
14
17
|
|
15
|
-
# Add an graph edge to this vertex
|
18
|
+
# Add an graph edge to this vertex.
|
19
|
+
# @param anEdge [Edge] the edge to be added.
|
16
20
|
def add_edge(anEdge)
|
17
21
|
arrow = check_add_edge(anEdge)
|
18
22
|
edges << arrow
|
19
23
|
end
|
20
24
|
|
21
|
-
#
|
25
|
+
# Determine if the vertex corresponds to an dotted item that has
|
22
26
|
# its dot at the end of a production (i.e. is a reduced item).
|
27
|
+
# @return [Boolean] true iff vertex corresponds to reduced item.
|
23
28
|
def complete?()
|
24
29
|
return false # Default implementation
|
25
30
|
end
|
26
31
|
|
27
|
-
#
|
32
|
+
# Retrieve the grammar symbol before the dot.
|
33
|
+
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
28
34
|
def prev_symbol()
|
29
35
|
return nil # Default implementation
|
30
36
|
end
|
31
37
|
|
32
|
-
#
|
38
|
+
# Retrieve the grammar symbol after the dot.
|
39
|
+
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
33
40
|
def next_symbol()
|
34
41
|
return nil # Default implementation
|
35
42
|
end
|
@@ -27,7 +27,9 @@ module Rley # This module is used as a namespace
|
|
27
27
|
# The reason of a parse failure
|
28
28
|
attr_reader(:failure_reason)
|
29
29
|
|
30
|
-
|
30
|
+
# Constructor
|
31
|
+
# @param theGFG [GrmFlowGraph] the Grammar Flow Graph
|
32
|
+
# @param theTokens [Array<Token>] the array of input tokens
|
31
33
|
def initialize(theGFG, theTokens)
|
32
34
|
@gf_graph = theGFG
|
33
35
|
@tokens = theTokens.dup
|
@@ -46,8 +48,34 @@ module Rley # This module is used as a namespace
|
|
46
48
|
next_symbol = anEntry.next_symbol
|
47
49
|
start_vertex = gf_graph.start_vertex_for[next_symbol]
|
48
50
|
pos = aPosition
|
51
|
+
size_before = chart[pos].size
|
49
52
|
apply_rule(anEntry, start_vertex, pos, pos, :call_rule)
|
53
|
+
|
54
|
+
if next_symbol.nullable? && anEntry.dotted_entry?
|
55
|
+
size_after = chart[pos].size
|
56
|
+
# ...apply the Nullable rule
|
57
|
+
nullable_rule(anEntry, aPosition) if size_after == size_before
|
58
|
+
end
|
50
59
|
end
|
60
|
+
|
61
|
+
# Let the current sigma set be the ith parse entry set.
|
62
|
+
# This method is invoked when a dotted entry is added
|
63
|
+
# to the parse entry set of the from [A => alpha . B beta, k]
|
64
|
+
# and B is nullable
|
65
|
+
# Then the entry [A => alpha B . beta, k] is added to the current
|
66
|
+
# sigma set.
|
67
|
+
def nullable_rule(anEntry, aPosition)
|
68
|
+
next_symbol = anEntry.next_symbol
|
69
|
+
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
70
|
+
pos = aPosition
|
71
|
+
end_entry = push_entry(end_vertex, anEntry.origin, pos, :nullable_rule)
|
72
|
+
curr_vertex = anEntry.vertex
|
73
|
+
next_vertex = curr_vertex.shortcut.successor
|
74
|
+
|
75
|
+
# first pos == origin
|
76
|
+
# second pos == position
|
77
|
+
apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
|
78
|
+
end
|
51
79
|
|
52
80
|
# Let the current sigma set be the ith parse entry set.
|
53
81
|
# This method is invoked when an entry is added to a parse entry set
|
@@ -40,8 +40,9 @@ module Rley # This module is used as a namespace
|
|
40
40
|
# Append the given entry (if it isn't yet in the set)
|
41
41
|
# to the list of parse entries
|
42
42
|
# @param anEntry [ParseEntry] the parse entry to push.
|
43
|
-
# @return [ParseEntry] the passed parse entry it
|
43
|
+
# @return [ParseEntry] the passed parse entry if it pushes it
|
44
44
|
def push_entry(anEntry)
|
45
|
+
# TODO: control overhead next line
|
45
46
|
match = entries.find { |entry| entry == anEntry }
|
46
47
|
if match
|
47
48
|
result = match
|