rley 0.5.01 → 0.5.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/examples/data_formats/JSON/cli_options.rb +25 -9
  4. data/examples/data_formats/JSON/json_ast_builder.rb +152 -0
  5. data/examples/data_formats/JSON/json_ast_nodes.rb +141 -0
  6. data/examples/data_formats/JSON/json_demo.rb +24 -8
  7. data/examples/general/calc_iter1/calc_ast_builder.rb +142 -0
  8. data/examples/general/calc_iter1/calc_ast_nodes.rb +151 -0
  9. data/examples/general/calc_iter1/calc_demo.rb +38 -0
  10. data/examples/general/calc_iter1/calc_grammar.rb +25 -0
  11. data/examples/general/calc_iter1/calc_lexer.rb +81 -0
  12. data/examples/general/{calc → calc_iter1}/calc_parser.rb +0 -0
  13. data/examples/general/calc_iter1/spec/calculator_spec.rb +73 -0
  14. data/examples/general/calc_iter2/calc_ast_builder.rb +186 -0
  15. data/examples/general/calc_iter2/calc_ast_nodes.rb +151 -0
  16. data/examples/general/{calc → calc_iter2}/calc_demo.rb +3 -2
  17. data/examples/general/{calc → calc_iter2}/calc_grammar.rb +0 -0
  18. data/examples/general/calc_iter2/calc_lexer.rb +81 -0
  19. data/examples/general/calc_iter2/calc_parser.rb +24 -0
  20. data/lib/rley.rb +1 -0
  21. data/lib/rley/constants.rb +1 -1
  22. data/lib/rley/parser/cst_builder.rb +5 -225
  23. data/lib/rley/parser/gfg_parsing.rb +2 -2
  24. data/lib/rley/parser/parse_forest_factory.rb +1 -1
  25. data/lib/rley/parser/parse_rep_creator.rb +2 -2
  26. data/lib/rley/parser/parse_tree_builder.rb +161 -104
  27. data/lib/rley/parser/parse_tree_factory.rb +6 -2
  28. data/spec/rley/parser/ast_builder_spec.rb +395 -0
  29. data/spec/rley/support/grammar_arr_int_helper.rb +21 -11
  30. metadata +20 -9
  31. data/examples/general/calc/calc_lexer.rb +0 -90
  32. data/spec/rley/parser/parse_tree_builder_spec.rb +0 -249
@@ -0,0 +1,151 @@
1
+ # Classes that implement nodes of Abstract Syntax Trees (AST) representing
2
+ # calculator parse results.
3
+
4
+
5
+ CalcTerminalNode = Struct.new(:token, :value, :position) do
6
+ def initialize(aToken, aPosition)
7
+ self.token = aToken
8
+ self.position = aPosition
9
+ init_value(aToken.lexeme)
10
+ end
11
+
12
+ # This method can be overriden
13
+ def init_value(aLiteral)
14
+ self.value = aLiteral.dup
15
+ end
16
+
17
+ def symbol()
18
+ self.token.terminal
19
+ end
20
+
21
+ def interpret()
22
+ return value
23
+ end
24
+
25
+ # Part of the 'visitee' role in Visitor design pattern.
26
+ # @param aVisitor[ParseTreeVisitor] the visitor
27
+ def accept(aVisitor)
28
+ aVisitor.visit_terminal(self)
29
+ end
30
+ end
31
+
32
+ class CalcNumberNode < CalcTerminalNode
33
+ def init_value(aLiteral)
34
+ case aLiteral
35
+ when /^[+-]?\d+$/
36
+ self.value = aLiteral.to_i
37
+
38
+ when /^[+-]?\d+(\.\d+)?([eE][+-]?\d+)?$/
39
+ self.value = aLiteral.to_f
40
+ end
41
+ end
42
+ end
43
+
44
+ class CalcCompositeNode
45
+ attr_accessor(:children)
46
+ attr_accessor(:symbol)
47
+
48
+ def initialize(aSymbol)
49
+ @symbol = aSymbol
50
+ @children = []
51
+ end
52
+
53
+ # Part of the 'visitee' role in Visitor design pattern.
54
+ # @param aVisitor[ParseTreeVisitor] the visitor
55
+ def accept(aVisitor)
56
+ aVisitor.visit_nonterminal(self)
57
+ end
58
+
59
+ alias subnodes children
60
+
61
+ end # class
62
+
63
+ class CalcUnaryOpNode < CalcCompositeNode
64
+ def initialize(aSymbol)
65
+ super(aSymbol)
66
+ end
67
+
68
+ # Convert this tree node in a simpler Ruby representation.
69
+ # Basically a Calc object corresponds to a Ruhy Hash
70
+ def to_ruby()
71
+ rep = {}
72
+ members.each do |pair|
73
+ rep[pair.name.to_ruby] = pair.value.to_ruby
74
+ end
75
+
76
+ return rep
77
+ end
78
+
79
+ alias members children
80
+ end # class
81
+
82
+ class CalcNegateNode < CalcUnaryOpNode
83
+ end # class
84
+
85
+ class CalcBinaryOpNode < CalcCompositeNode
86
+ def initialize(aSymbol)
87
+ super(aSymbol)
88
+ end
89
+
90
+ protected
91
+
92
+ def get_operands()
93
+ operands = []
94
+ children.each do |child|
95
+ oper = child.respond_to?(:interpret) ? child.interpret : child
96
+ operands << oper
97
+ end
98
+
99
+ return operands
100
+ end
101
+
102
+ end # class
103
+
104
+ class CalcAddNode < CalcBinaryOpNode
105
+
106
+ # TODO
107
+ def interpret()
108
+ operands = get_operands
109
+
110
+ sum = operands[0] + operands[1]
111
+ return sum
112
+ end
113
+ end # class
114
+
115
+
116
+ class CalcSubtractNode < CalcBinaryOpNode
117
+
118
+ # TODO
119
+ def interpret()
120
+ operands = get_operands
121
+
122
+ substraction = operands[0] - operands[1]
123
+ return substraction
124
+ end
125
+ end # class
126
+
127
+ class CalcMultiplyNode < CalcBinaryOpNode
128
+
129
+ # TODO
130
+ def interpret()
131
+ operands = get_operands
132
+ multiplication = operands[0] * operands[1]
133
+ return multiplication
134
+ end
135
+ end # class
136
+
137
+ class CalcDivideNode < CalcBinaryOpNode
138
+
139
+ # TODO
140
+ def interpret()
141
+ operands = get_operands
142
+ numerator = operands[0].to_f
143
+ denominator = operands[1]
144
+ division = numerator / denominator
145
+ return division
146
+ end
147
+ end # class
148
+
149
+
150
+
151
+
@@ -0,0 +1,38 @@
1
+ require_relative 'calc_parser'
2
+ require_relative 'calc_ast_builder'
3
+
4
+ # Retrieve input expression to parse from command-line
5
+ if ARGV.empty?
6
+ my_name = File.basename(__FILE__)
7
+ msg = <<-END_MSG
8
+ Command-line symtax:
9
+ ruby #{my_name} "arithmetic expression"
10
+ where:
11
+ the arithmetic expression is enclosed between double quotes (")
12
+
13
+ Example:
14
+ ruby #{my_name} "2 * 3 + (4 - 1)"
15
+ END_MSG
16
+ puts msg
17
+ exit(1)
18
+ end
19
+
20
+ # Create a calculator parser object
21
+ parser = CalcParser.new
22
+ result = parser.parse_expression(ARGV[0])
23
+
24
+ unless result.success?
25
+ # Stop if the parse failed...
26
+ puts "Parsing of '#{ARGV[0]}' failed"
27
+ puts "Reason: #{result.failure_reason.message}"
28
+ exit(1)
29
+ end
30
+
31
+ tree_builder = CalcASTBuilder
32
+
33
+ # Generate a parse tree from the parse result
34
+ ptree = result.parse_tree(tree_builder)
35
+
36
+ root = ptree.root
37
+ puts root.interpret # Output the expression result
38
+ # End of file
@@ -0,0 +1,25 @@
1
+ # Grammar for simple arithmetical expressions
2
+ require 'rley' # Load the gem
3
+
4
+ ########################################
5
+ # Define a grammar for basic arithmetical expressions
6
+ builder = Rley::Syntax::GrammarBuilder.new do
7
+ add_terminals('NUMBER')
8
+ add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
9
+ add_terminals('PLUS', 'MINUS') # For '+', '-' operators
10
+ add_terminals('STAR', 'DIVIDE') # For '*', '/' operators
11
+ rule 'expression' => 'simple_expression'
12
+ rule 'simple_expression' => 'term'
13
+ rule 'simple_expression' => %w[simple_expression add_operator term]
14
+ rule 'term' => 'factor'
15
+ rule 'term' => %w[term mul_operator factor]
16
+ rule 'factor' => 'NUMBER'
17
+ rule 'factor' => %w[LPAREN expression RPAREN]
18
+ rule 'add_operator' => 'PLUS'
19
+ rule 'add_operator' => 'MINUS'
20
+ rule 'mul_operator' => 'STAR'
21
+ rule 'mul_operator' => 'DIVIDE'
22
+ end
23
+
24
+ # And now build the grammar...
25
+ CalcGrammar = builder.grammar
@@ -0,0 +1,81 @@
1
+ # File: calc_lexer.rb
2
+ # Lexer for a basic arithmetical expression parser
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ class CalcLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '(' => 'LPAREN',
15
+ ')' => 'RPAREN',
16
+ '+' => 'PLUS',
17
+ '-' => 'MINUS',
18
+ '*' => 'STAR',
19
+ '/' => 'DIVIDE',
20
+ '**' => 'POWER'
21
+ }.freeze
22
+
23
+ class ScanError < StandardError; end
24
+
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos?
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+
43
+ def _next_token()
44
+ skip_whitespaces
45
+ curr_ch = scanner.peek(1)
46
+ return nil if curr_ch.nil?
47
+
48
+ token = nil
49
+
50
+ if '()+/'.include? curr_ch
51
+ # Single character token
52
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
53
+
54
+ elsif lexeme = scanner.scan(/\*\*/)
55
+ token = build_token(@@lexeme2name[lexeme], lexeme)
56
+ elsif lexeme = scanner.scan(/\*/)
57
+ token = build_token(@@lexeme2name[lexeme], lexeme)
58
+ elsif lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
59
+ token = build_token('NUMBER', lexeme)
60
+ elsif lexeme = scanner.scan(/-/)
61
+ token = build_token(@@lexeme2name[curr_ch], lexeme)
62
+ else # Unknown token
63
+ erroneous = curr_ch.nil? ? '' : curr_ch
64
+ sequel = scanner.scan(/.{1,20}/)
65
+ erroneous += sequel unless sequel.nil?
66
+ raise ScanError.new("Unknown token #{erroneous}")
67
+ end
68
+
69
+ return token
70
+ end
71
+
72
+ def build_token(aSymbolName, aLexeme)
73
+ token_type = name2symbol[aSymbolName]
74
+ return Rley::Tokens::Token.new(aLexeme, token_type)
75
+ end
76
+
77
+ def skip_whitespaces()
78
+ scanner.scan(/[ \t\f\n\r]+/)
79
+ end
80
+
81
+ end # class
@@ -0,0 +1,73 @@
1
+ require 'rspec' # Use the RSpec framework
2
+ require_relative '../calc_parser' # Load the class under test
3
+ require_relative '../calc_ast_builder'
4
+
5
+
6
+ describe 'Calculator' do
7
+ def parse_expression(anExpression)
8
+ # Create a calculator parser object
9
+ parser = CalcParser.new
10
+ result = parser.parse_expression(anExpression)
11
+
12
+ unless result.success?
13
+ # Stop if the parse failed...
14
+ puts "Parsing of '#{anExpression}' failed"
15
+ puts "Reason: #{result.failure_reason.message}"
16
+ exit(1)
17
+ end
18
+
19
+ return result
20
+ end
21
+
22
+ def build_ast(aParseResult)
23
+ tree_builder = CalcASTBuilder
24
+ # Generate an abstract syntax tree from the parse result
25
+ ast = aParseResult.parse_tree(tree_builder)
26
+ return ast.root
27
+ end
28
+
29
+ def expect_expr(anExpression)
30
+ parsing = parse_expression(anExpression)
31
+ ast = build_ast(parsing)
32
+ return expect(ast.interpret)
33
+ end
34
+
35
+ it 'should evaluate simple number literals' do
36
+ expect_expr('2').to eq(2)
37
+ end
38
+
39
+ it 'should evaluate addition' do
40
+ expect_expr('2 + 2').to eq(4)
41
+ end
42
+
43
+ it 'should evaluate subtraction' do
44
+ expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
45
+ end
46
+
47
+ it 'should evaluate division' do
48
+ expect_expr('10.5 / 5').to eq(2.1)
49
+ end
50
+
51
+ it 'should evaluate multiplication' do
52
+ expect_expr('2 * 3.1').to eq(6.2)
53
+ end
54
+
55
+ it 'should evaluate parentheses' do
56
+ expect_expr('2 * (2.1 + 1)').to eq(6.2)
57
+ end
58
+
59
+ it 'should evaluate regardless of whitespace' do
60
+ expect_expr("2*(1+\t1)").to eq(4)
61
+ end
62
+
63
+ it 'should evaluate order of operations' do
64
+ expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
65
+ end
66
+
67
+ it 'should evaluate multiple levels of parentheses' do
68
+ expect_expr('2*(1/(1+3))').to eq(0.5)
69
+ end
70
+
71
+ end # describe
72
+ # End of file
73
+
@@ -0,0 +1,186 @@
1
+ require_relative 'calc_ast_nodes'
2
+
3
+ # The purpose of a CalcASTBuilder is to build piece by piece an AST
4
+ # (Abstract Syntax Tree) from a sequence of input tokens and
5
+ # visit events produced by walking over a GFGParsing object.
6
+ # Uses the Builder GoF pattern.
7
+ # The Builder pattern creates a complex object
8
+ # (say, a parse tree) from simpler objects (terminal and non-terminal
9
+ # nodes) and using a step by step approach.
10
+ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
11
+ Terminal2NodeClass = {
12
+ # Plus sign character is ambiguous. It can represent an operator
13
+ # or a positive value
14
+ '+' => { 'add_operator[0]' => CalcAddNode, 'sign[0]' => PTree::TerminalNode },
15
+ # Minus sign character is ambiguous. It can represent an operator
16
+ # or a negative value
17
+ '-' => { 'add_operator[1]' => CalcSubtractNode, 'sign[1]' => CalcNegateNode },
18
+ '*' => CalcMultiplyNode,
19
+ '/' => CalcDivideNode,
20
+ 'number' => CalcNumberNode
21
+ }
22
+
23
+ protected
24
+
25
+ def return_first_child(_range, _tokens, theChildren)
26
+ return theChildren[0]
27
+ end
28
+
29
+ def return_second_child(_range, _tokens, theChildren)
30
+ return theChildren[1]
31
+ end
32
+
33
+ def return_last_child(_range, _tokens, theChildren)
34
+ return theChildren[-1]
35
+ end
36
+
37
+ # Overriding method.
38
+ # Create a parse tree object with given
39
+ # node as root node.
40
+ def create_tree(aRootNode)
41
+ return Rley::PTree::ParseTree.new(aRootNode)
42
+ end
43
+
44
+ # Overriding method.
45
+ # Factory method for creating a node object for the given
46
+ # input token.
47
+ # @param aTerminal [Terminal] Terminal symbol associated with the token
48
+ # @param aTokenPosition [Integer] Position of token in the input stream
49
+ # @param aToken [Token] The input token
50
+ def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
51
+ klass = Terminal2NodeClass.fetch(aTerminal.name, CalcTerminalNode)
52
+ klass = klass[aProduction.name] if klass.is_a?(Hash) # Lexical ambiguity
53
+ return klass.new(aToken, aTokenPosition)
54
+ end
55
+
56
+
57
+ # Method to override.
58
+ # Factory method for creating a parent node object.
59
+ # @param aProduction [Production] Production rule
60
+ # @param aRange [Range] Range of tokens matched by the rule
61
+ # @param theTokens [Array] The input tokens
62
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
63
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
64
+ node = case aProduction.name
65
+ when 'expression[0]' # rule 'expression' => %w[sign simple_expression]
66
+ reduce_expression_0(aProduction, aRange, theTokens, theChildren)
67
+
68
+ # when /value\[\d\]/
69
+ # return_first_child(aRange, theTokens, theChildren)
70
+
71
+ # when 'object[0]'
72
+ # reduce_object_0(aProduction, aRange, theTokens, theChildren)
73
+
74
+ # when 'object[1]'
75
+ # reduce_object_1(aRange, theTokens, theChildren)
76
+
77
+ # when 'member-list[0]'
78
+ # reduce_member_list_0(aRange, theTokens, theChildren)
79
+
80
+ # when 'member-list[1]'
81
+ # reduce_member_list_1(aProduction, aRange, theTokens, theChildren)
82
+
83
+ # when 'member[0]'
84
+ # reduce_member_0(aProduction, aRange, theTokens, theChildren)
85
+
86
+ # when 'array[0]'
87
+ # reduce_array_0(aProduction, aRange, theTokens, theChildren)
88
+
89
+ # when 'array[1]'
90
+ # reduce_array_1(aRange, theTokens, theChildren)
91
+
92
+ # when 'array-items[0]'
93
+ # reduce_array_items_0(aRange, theTokens, theChildren)
94
+
95
+ when 'sign[0]' # rule 'sign' => 'PLUS'
96
+ return_first_child(aRange, theTokens, theChildren)
97
+
98
+ when 'sign[1]' # rule 'sign' => 'MINUS'
99
+ return_first_child(aRange, theTokens, theChildren)
100
+
101
+ when 'sign[2]' #rule 'sign' => []
102
+ reduce_sign_2(aProduction, aRange, theTokens, theChildren)
103
+ else
104
+ raise StandardError, "Don't know production #{aProduction.name}"
105
+ end
106
+
107
+ return node
108
+ end
109
+
110
+ # rule 'expression' => %w[sign simple_expression]
111
+ def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
112
+ sign = theChildren[0]
113
+ # Check type of sign
114
+ node = if sign && sign.kind_of?(CalcNegateNode)
115
+ sign.members << theChildren.last
116
+ else
117
+ theChildren.last
118
+ end
119
+
120
+ return node
121
+ end
122
+
123
+ # rule 'sign' => []
124
+ def reduce_sign_2(aProduction, aRange, theTokens, theChildren)
125
+ return nil # TODO; check whether this make sense
126
+ end
127
+ =begin
128
+ second_child = theChildren[1]
129
+ second_child.symbol = aProduction.lhs
130
+ return second_child
131
+ end
132
+
133
+ # rule 'object' => %w[begin-object end-object]
134
+ def reduce_object_1(aRange, theTokens, theChildren)
135
+ return CalcObjectNode.new(aProduction.lhs)
136
+ end
137
+
138
+ # rule 'member-list' => %w[member-list value-separator member]
139
+ def reduce_member_list_0(aRange, theTokens, theChildren)
140
+ node = theChildren[0]
141
+ node.members << theChildren.last
142
+ return node
143
+ end
144
+
145
+ # rule 'member-list' => 'member'
146
+ def reduce_member_list_1(aProduction, aRange, theTokens, theChildren)
147
+ node = CalcObjectNode.new(aProduction.lhs)
148
+ node.members << theChildren[0]
149
+ return node
150
+ end
151
+
152
+ # rule 'member' => %w[string name-separator value]
153
+ def reduce_member_0(aProduction, aRange, theTokens, theChildren)
154
+ return CalcPair.new(theChildren[0], theChildren[2], aProduction.lhs)
155
+ end
156
+
157
+ # rule 'object' => %w[begin-object member-list end-object]
158
+ def reduce_array_0(aProduction, aRange, theTokens, theChildren)
159
+ second_child = theChildren[1]
160
+ second_child.symbol = aProduction.lhs
161
+ return second_child
162
+ end
163
+
164
+
165
+ # rule 'array' => %w[begin-array end-array]
166
+ def reduce_array_1(aRange, theTokens, theChildren)
167
+ return CalcArrayNode.new
168
+ end
169
+
170
+ # rule 'array-items' => %w[array-items value-separator value]
171
+ def reduce_array_items_0(aRange, theTokens, theChildren)
172
+ node = theChildren[0]
173
+ node.children << theChildren[2]
174
+ return node
175
+ end
176
+
177
+
178
+
179
+ # rule 'array-items' => %w[value]
180
+ def reduce_array_items_1(aProduction, aRange, theTokens, theChildren)
181
+ node = CalcArrayNode.new(aProduction.lhs)
182
+ node.children << theChildren[0]
183
+ return node
184
+ end
185
+ =end
186
+ end # class