rley 0.5.04 → 0.5.05
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/examples/general/calc_iter1/calc_demo.rb +1 -1
- data/examples/general/calc_iter2/calc_ast_builder.rb +200 -0
- data/examples/general/calc_iter2/calc_ast_nodes.rb +156 -0
- data/examples/general/calc_iter2/calc_demo.rb +66 -0
- data/examples/general/calc_iter2/calc_grammar.rb +31 -0
- data/examples/general/calc_iter2/calc_lexer.rb +78 -0
- data/examples/general/calc_iter2/calc_parser.rb +24 -0
- data/examples/general/calc_iter2/spec/calculator_spec.rb +113 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +41 -12
- data/lib/rley/gfg/vertex.rb +11 -4
- data/lib/rley/parser/gfg_parsing.rb +29 -1
- data/lib/rley/parser/parse_entry_set.rb +2 -1
- data/lib/rley/parser/parse_forest_factory.rb +7 -0
- data/lib/rley/parser/parse_rep_creator.rb +8 -2
- data/lib/rley/parser/parse_tree_builder.rb +5 -3
- data/lib/rley/parser/parse_tree_factory.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +15 -10
- data/spec/rley/parser/ambiguous_parse_spec.rb +1 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +148 -11
- metadata +9 -2
@@ -0,0 +1,31 @@
|
|
1
|
+
# Grammar for simple arithmetical expressions
|
2
|
+
require 'rley' # Load the gem
|
3
|
+
|
4
|
+
########################################
|
5
|
+
# Define a grammar for basic arithmetical expressions
|
6
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
7
|
+
add_terminals('NUMBER')
|
8
|
+
add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
|
9
|
+
add_terminals('PLUS', 'MINUS') # For '+', '-' operators or sign
|
10
|
+
add_terminals('STAR', 'DIVIDE', 'POWER') # For '*', '/', '**' operators
|
11
|
+
rule 'expression' => %w[simple_expression]
|
12
|
+
rule 'simple_expression' => 'term'
|
13
|
+
rule 'simple_expression' => %w[simple_expression add_operator term]
|
14
|
+
rule 'term' => 'factor'
|
15
|
+
rule 'term' => %w[term mul_operator factor]
|
16
|
+
rule 'factor' => 'simple_factor'
|
17
|
+
rule 'factor' => %w[simple_factor POWER simple_factor]
|
18
|
+
rule 'simple_factor' => %w[sign NUMBER]
|
19
|
+
rule 'simple_factor' => %w[LPAREN expression RPAREN]
|
20
|
+
rule 'simple_factor' => %w[MINUS LPAREN expression RPAREN]
|
21
|
+
rule 'sign' => 'PLUS'
|
22
|
+
rule 'sign' => 'MINUS'
|
23
|
+
rule 'sign' => []
|
24
|
+
rule 'add_operator' => 'PLUS'
|
25
|
+
rule 'add_operator' => 'MINUS'
|
26
|
+
rule 'mul_operator' => 'STAR'
|
27
|
+
rule 'mul_operator' => 'DIVIDE'
|
28
|
+
end
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
CalcGrammar = builder.grammar
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# File: calc_lexer.rb
|
2
|
+
# Lexer for a basic arithmetical expression parser
|
3
|
+
require 'strscan'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
|
7
|
+
class CalcLexer
|
8
|
+
attr_reader(:scanner)
|
9
|
+
attr_reader(:lineno)
|
10
|
+
attr_reader(:line_start)
|
11
|
+
attr_reader(:name2symbol)
|
12
|
+
|
13
|
+
@@lexeme2name = {
|
14
|
+
'(' => 'LPAREN',
|
15
|
+
')' => 'RPAREN',
|
16
|
+
'+' => 'PLUS',
|
17
|
+
'-' => 'MINUS',
|
18
|
+
'*' => 'STAR',
|
19
|
+
'/' => 'DIVIDE',
|
20
|
+
'**' => 'POWER'
|
21
|
+
}.freeze
|
22
|
+
|
23
|
+
class ScanError < StandardError; end
|
24
|
+
|
25
|
+
def initialize(source, aGrammar)
|
26
|
+
@scanner = StringScanner.new(source)
|
27
|
+
@name2symbol = aGrammar.name2symbol
|
28
|
+
@lineno = 1
|
29
|
+
end
|
30
|
+
|
31
|
+
def tokens()
|
32
|
+
tok_sequence = []
|
33
|
+
until @scanner.eos?
|
34
|
+
token = _next_token
|
35
|
+
tok_sequence << token unless token.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
return tok_sequence
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def _next_token()
|
44
|
+
skip_whitespaces
|
45
|
+
curr_ch = scanner.peek(1)
|
46
|
+
return nil if curr_ch.nil?
|
47
|
+
|
48
|
+
token = nil
|
49
|
+
|
50
|
+
if '()+-/'.include? curr_ch
|
51
|
+
# Single character token
|
52
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
53
|
+
|
54
|
+
elsif (lexeme = scanner.scan(/\*\*/))
|
55
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
56
|
+
elsif (lexeme = scanner.scan(/\*/))
|
57
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
58
|
+
elsif (lexeme = scanner.scan(/[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/))
|
59
|
+
token = build_token('NUMBER', lexeme)
|
60
|
+
else # Unknown token
|
61
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
62
|
+
sequel = scanner.scan(/.{1,20}/)
|
63
|
+
erroneous += sequel unless sequel.nil?
|
64
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
65
|
+
end
|
66
|
+
|
67
|
+
return token
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_token(aSymbolName, aLexeme)
|
71
|
+
token_type = name2symbol[aSymbolName]
|
72
|
+
return Rley::Tokens::Token.new(aLexeme, token_type)
|
73
|
+
end
|
74
|
+
|
75
|
+
def skip_whitespaces()
|
76
|
+
scanner.scan(/[ \t\f\n\r]+/)
|
77
|
+
end
|
78
|
+
end # class
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require_relative 'calc_lexer'
|
4
|
+
require_relative 'calc_grammar'
|
5
|
+
|
6
|
+
# A parser for arithmetic expressions
|
7
|
+
class CalcParser < Rley::Parser::GFGEarleyParser
|
8
|
+
attr_reader(:source_file)
|
9
|
+
|
10
|
+
# Constructor
|
11
|
+
def initialize()
|
12
|
+
# Builder the Earley parser with the calculator grammar
|
13
|
+
super(CalcGrammar)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_expression(aText)
|
17
|
+
lexer = CalcLexer.new(aText, grammar)
|
18
|
+
result = parse(lexer.tokens)
|
19
|
+
|
20
|
+
return result
|
21
|
+
end
|
22
|
+
end # class
|
23
|
+
|
24
|
+
# End of file
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'rspec' # Use the RSpec framework
|
2
|
+
require_relative '../calc_parser' # Load the class under test
|
3
|
+
require_relative '../calc_ast_builder'
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
# Display stack trace in case of failure
|
7
|
+
config.full_backtrace = true
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
describe 'Calculator' do
|
12
|
+
def parse_expression(anExpression)
|
13
|
+
# Create a calculator parser object
|
14
|
+
parser = CalcParser.new
|
15
|
+
result = parser.parse_expression(anExpression)
|
16
|
+
|
17
|
+
unless result.success?
|
18
|
+
# Stop if the parse failed...
|
19
|
+
puts "Parsing of '#{anExpression}' failed"
|
20
|
+
puts "Reason: #{result.failure_reason.message}"
|
21
|
+
exit(1)
|
22
|
+
end
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
def print_cst(aParseResult)
|
28
|
+
# Generate a parse tree from the parse result
|
29
|
+
ptree = aParseResult.parse_tree
|
30
|
+
|
31
|
+
# Let's create a parse tree visitor
|
32
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
33
|
+
|
34
|
+
# Now output formatted parse tree
|
35
|
+
renderer = Rley::Formatter::Asciitree.new($stdout)
|
36
|
+
renderer.render(visitor)
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_ast(aParseResult)
|
40
|
+
tree_builder = CalcASTBuilder
|
41
|
+
# Generate an abstract syntax tree from the parse result
|
42
|
+
ast = aParseResult.parse_tree(tree_builder)
|
43
|
+
return ast.root
|
44
|
+
end
|
45
|
+
|
46
|
+
def expect_expr(anExpression)
|
47
|
+
parsing = parse_expression(anExpression)
|
48
|
+
ast = build_ast(parsing)
|
49
|
+
return expect(ast.interpret)
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'Parsing valid expressions' do
|
53
|
+
it 'should evaluate simple number literals' do
|
54
|
+
expect_expr('2').to eq(2)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'should evaluate positive number literals' do
|
58
|
+
expect_expr('+2').to eq(2)
|
59
|
+
expect_expr('+ 2').to eq(2)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should evaluate negative number literals' do
|
63
|
+
expect_expr('-2').to eq(-2)
|
64
|
+
expect_expr('- 2').to eq(-2)
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should evaluate addition' do
|
68
|
+
expect_expr('2 + 2').to eq(4)
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'should evaluate subtraction' do
|
72
|
+
expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'handles negative numbers' do
|
76
|
+
expect_expr('3--2').to eq(5)
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'should evaluate division' do
|
80
|
+
expect_expr('10.5 / 5').to eq(2.1)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'should evaluate multiplication' do
|
84
|
+
expect_expr('2 * 3.1').to eq(6.2)
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should evaluate exponentiation' do
|
88
|
+
expect_expr('5 ** (3 - 1)').to eq(25)
|
89
|
+
expect_expr('25 ** 0.5').to eq(5)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should change sign of expression in parentheses' do
|
93
|
+
expect_expr('- (2 * 5)').to eq(-10)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should evaluate parentheses' do
|
97
|
+
expect_expr('2 * (2.1 + 1)').to eq(6.2)
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should evaluate regardless of whitespace' do
|
101
|
+
expect_expr("2*(1+\t1)").to eq(4)
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'should evaluate order of operations' do
|
105
|
+
expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'should evaluate multiple levels of parentheses' do
|
109
|
+
expect_expr('2*(1/(1+3))').to eq(0.5)
|
110
|
+
end
|
111
|
+
end # context
|
112
|
+
end # describe
|
113
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -10,7 +10,30 @@ require_relative 'shortcut_edge'
|
|
10
10
|
|
11
11
|
module Rley # This module is used as a namespace
|
12
12
|
module GFG # This module is used as a namespace
|
13
|
-
#
|
13
|
+
# A Grammar Flow Graph (GFG) represents the parsing states of productions
|
14
|
+
# rules from a context-free grammar. This representation is based on a
|
15
|
+
# directed graph structure. The parsing process can then be re-formulated
|
16
|
+
# as a path problem in the graph. The theory behind GFGs can be found in
|
17
|
+
# papers. The first article on GFG can be found here:
|
18
|
+
# https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
|
19
|
+
# There are three types of vertex in a GFG:
|
20
|
+
# start vertex, end vertex and item vertex.
|
21
|
+
# For each non-terminal symbol N of the grammar, there is:
|
22
|
+
# a start vertex with label '.N'
|
23
|
+
# an end vertex with label 'N.'
|
24
|
+
# For each production rule of the grammar:
|
25
|
+
# N => s1 s2 s3 (...) sk
|
26
|
+
# I.e. a rule with k grammar symbols in its right-handed side.
|
27
|
+
# For such a rule there will be k + 1 item vertices. By convention,
|
28
|
+
# the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
|
29
|
+
# the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
|
30
|
+
# the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
|
31
|
+
# and so on. In other words, the labels are obtained by moving a dot
|
32
|
+
# in successive positions in the rhs. The dot represents the
|
33
|
+
# parse progress for the production rule. Symbols on the left of the
|
34
|
+
# dot represent the symbols that were successfully matched in the input.
|
35
|
+
# A GFG has three types of directed edges linking the vertices.
|
36
|
+
# call edge, return edge and scan edge.
|
14
37
|
class GrmFlowGraph
|
15
38
|
# The set of all vertices in the graph
|
16
39
|
attr_reader :vertices
|
@@ -24,6 +47,9 @@ module Rley # This module is used as a namespace
|
|
24
47
|
# A Hash with pairs of the form: non-terminal symbol => end node
|
25
48
|
attr_reader :end_vertex_for
|
26
49
|
|
50
|
+
# Constructor.
|
51
|
+
# @param theDottedItems [Array<DottedItem>] an array of the dotted items
|
52
|
+
# of the grammar.
|
27
53
|
def initialize(theDottedItems)
|
28
54
|
@vertices = []
|
29
55
|
@start_vertex_for = {}
|
@@ -32,7 +58,9 @@ module Rley # This module is used as a namespace
|
|
32
58
|
build_graph(theDottedItems)
|
33
59
|
end
|
34
60
|
|
35
|
-
#
|
61
|
+
# Retrieve the vertex with given vertex label.
|
62
|
+
# @param aVertexLabel [String] the label of a vertex from the graph
|
63
|
+
# @return [Vertex] the vertex with the given label, otherwise nil.
|
36
64
|
def find_vertex(aVertexLabel)
|
37
65
|
vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
|
38
66
|
end
|
@@ -42,7 +70,7 @@ module Rley # This module is used as a namespace
|
|
42
70
|
# If one wants to remove useless rules, then do first:
|
43
71
|
# elimination of non-generating symbols
|
44
72
|
# then elimination of unreachable symbols
|
45
|
-
def diagnose
|
73
|
+
def diagnose()
|
46
74
|
mark_unreachable_symbols
|
47
75
|
end
|
48
76
|
|
@@ -65,15 +93,6 @@ module Rley # This module is used as a namespace
|
|
65
93
|
return next_one
|
66
94
|
end
|
67
95
|
end
|
68
|
-
|
69
|
-
def print_vertex(aText, aVertex)
|
70
|
-
print aText + ' '
|
71
|
-
if aVertex.kind_of?(NonTerminalVertex)
|
72
|
-
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
73
|
-
else
|
74
|
-
p(aVertex.label)
|
75
|
-
end
|
76
|
-
end
|
77
96
|
|
78
97
|
# Walk over all the vertices of the graph that are reachable from a given
|
79
98
|
# start vertex. This is a depth-first graph traversal.
|
@@ -137,6 +156,16 @@ module Rley # This module is used as a namespace
|
|
137
156
|
@start_vertex = aVertex if vertices.empty?
|
138
157
|
vertices << aVertex
|
139
158
|
end
|
159
|
+
|
160
|
+
# For debugging purposes
|
161
|
+
def print_vertex(aText, aVertex)
|
162
|
+
print aText + ' '
|
163
|
+
if aVertex.kind_of?(NonTerminalVertex)
|
164
|
+
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
165
|
+
else
|
166
|
+
p(aVertex.label)
|
167
|
+
end
|
168
|
+
end
|
140
169
|
|
141
170
|
def build_graph(theDottedItems)
|
142
171
|
build_all_starts_ends(theDottedItems)
|
data/lib/rley/gfg/vertex.rb
CHANGED
@@ -6,30 +6,37 @@ module Rley # This module is used as a namespace
|
|
6
6
|
# - To know its label
|
7
7
|
class Vertex
|
8
8
|
# The edges linking the successor vertices to this one.
|
9
|
+
# @!attribute [r] edges
|
10
|
+
# @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
|
9
11
|
attr_reader :edges
|
10
12
|
|
13
|
+
# Constructor to override.
|
11
14
|
def initialize()
|
12
15
|
@edges = []
|
13
16
|
end
|
14
17
|
|
15
|
-
# Add an graph edge to this vertex
|
18
|
+
# Add an graph edge to this vertex.
|
19
|
+
# @param anEdge [Edge] the edge to be added.
|
16
20
|
def add_edge(anEdge)
|
17
21
|
arrow = check_add_edge(anEdge)
|
18
22
|
edges << arrow
|
19
23
|
end
|
20
24
|
|
21
|
-
#
|
25
|
+
# Determine if the vertex corresponds to an dotted item that has
|
22
26
|
# its dot at the end of a production (i.e. is a reduced item).
|
27
|
+
# @return [Boolean] true iff vertex corresponds to reduced item.
|
23
28
|
def complete?()
|
24
29
|
return false # Default implementation
|
25
30
|
end
|
26
31
|
|
27
|
-
#
|
32
|
+
# Retrieve the grammar symbol before the dot.
|
33
|
+
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
28
34
|
def prev_symbol()
|
29
35
|
return nil # Default implementation
|
30
36
|
end
|
31
37
|
|
32
|
-
#
|
38
|
+
# Retrieve the grammar symbol after the dot.
|
39
|
+
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
33
40
|
def next_symbol()
|
34
41
|
return nil # Default implementation
|
35
42
|
end
|
@@ -27,7 +27,9 @@ module Rley # This module is used as a namespace
|
|
27
27
|
# The reason of a parse failure
|
28
28
|
attr_reader(:failure_reason)
|
29
29
|
|
30
|
-
|
30
|
+
# Constructor
|
31
|
+
# @param theGFG [GrmFlowGraph] the Grammar Flow Graph
|
32
|
+
# @param theTokens [Array<Token>] the array of input tokens
|
31
33
|
def initialize(theGFG, theTokens)
|
32
34
|
@gf_graph = theGFG
|
33
35
|
@tokens = theTokens.dup
|
@@ -46,8 +48,34 @@ module Rley # This module is used as a namespace
|
|
46
48
|
next_symbol = anEntry.next_symbol
|
47
49
|
start_vertex = gf_graph.start_vertex_for[next_symbol]
|
48
50
|
pos = aPosition
|
51
|
+
size_before = chart[pos].size
|
49
52
|
apply_rule(anEntry, start_vertex, pos, pos, :call_rule)
|
53
|
+
|
54
|
+
if next_symbol.nullable? && anEntry.dotted_entry?
|
55
|
+
size_after = chart[pos].size
|
56
|
+
# ...apply the Nullable rule
|
57
|
+
nullable_rule(anEntry, aPosition) if size_after == size_before
|
58
|
+
end
|
50
59
|
end
|
60
|
+
|
61
|
+
# Let the current sigma set be the ith parse entry set.
|
62
|
+
# This method is invoked when a dotted entry is added
|
63
|
+
# to the parse entry set of the from [A => alpha . B beta, k]
|
64
|
+
# and B is nullable
|
65
|
+
# Then the entry [A => alpha B . beta, k] is added to the current
|
66
|
+
# sigma set.
|
67
|
+
def nullable_rule(anEntry, aPosition)
|
68
|
+
next_symbol = anEntry.next_symbol
|
69
|
+
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
70
|
+
pos = aPosition
|
71
|
+
end_entry = push_entry(end_vertex, anEntry.origin, pos, :nullable_rule)
|
72
|
+
curr_vertex = anEntry.vertex
|
73
|
+
next_vertex = curr_vertex.shortcut.successor
|
74
|
+
|
75
|
+
# first pos == origin
|
76
|
+
# second pos == position
|
77
|
+
apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
|
78
|
+
end
|
51
79
|
|
52
80
|
# Let the current sigma set be the ith parse entry set.
|
53
81
|
# This method is invoked when an entry is added to a parse entry set
|
@@ -40,8 +40,9 @@ module Rley # This module is used as a namespace
|
|
40
40
|
# Append the given entry (if it isn't yet in the set)
|
41
41
|
# to the list of parse entries
|
42
42
|
# @param anEntry [ParseEntry] the parse entry to push.
|
43
|
-
# @return [ParseEntry] the passed parse entry it
|
43
|
+
# @return [ParseEntry] the passed parse entry if it pushes it
|
44
44
|
def push_entry(anEntry)
|
45
|
+
# TODO: control overhead next line
|
45
46
|
match = entries.find { |entry| entry == anEntry }
|
46
47
|
if match
|
47
48
|
result = match
|