rley 0.5.01 → 0.5.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/examples/data_formats/JSON/cli_options.rb +25 -9
  4. data/examples/data_formats/JSON/json_ast_builder.rb +152 -0
  5. data/examples/data_formats/JSON/json_ast_nodes.rb +141 -0
  6. data/examples/data_formats/JSON/json_demo.rb +24 -8
  7. data/examples/general/calc_iter1/calc_ast_builder.rb +142 -0
  8. data/examples/general/calc_iter1/calc_ast_nodes.rb +151 -0
  9. data/examples/general/calc_iter1/calc_demo.rb +38 -0
  10. data/examples/general/calc_iter1/calc_grammar.rb +25 -0
  11. data/examples/general/calc_iter1/calc_lexer.rb +81 -0
  12. data/examples/general/{calc → calc_iter1}/calc_parser.rb +0 -0
  13. data/examples/general/calc_iter1/spec/calculator_spec.rb +73 -0
  14. data/examples/general/calc_iter2/calc_ast_builder.rb +186 -0
  15. data/examples/general/calc_iter2/calc_ast_nodes.rb +151 -0
  16. data/examples/general/{calc → calc_iter2}/calc_demo.rb +3 -2
  17. data/examples/general/{calc → calc_iter2}/calc_grammar.rb +0 -0
  18. data/examples/general/calc_iter2/calc_lexer.rb +81 -0
  19. data/examples/general/calc_iter2/calc_parser.rb +24 -0
  20. data/lib/rley.rb +1 -0
  21. data/lib/rley/constants.rb +1 -1
  22. data/lib/rley/parser/cst_builder.rb +5 -225
  23. data/lib/rley/parser/gfg_parsing.rb +2 -2
  24. data/lib/rley/parser/parse_forest_factory.rb +1 -1
  25. data/lib/rley/parser/parse_rep_creator.rb +2 -2
  26. data/lib/rley/parser/parse_tree_builder.rb +161 -104
  27. data/lib/rley/parser/parse_tree_factory.rb +6 -2
  28. data/spec/rley/parser/ast_builder_spec.rb +395 -0
  29. data/spec/rley/support/grammar_arr_int_helper.rb +21 -11
  30. metadata +20 -9
  31. data/examples/general/calc/calc_lexer.rb +0 -90
  32. data/spec/rley/parser/parse_tree_builder_spec.rb +0 -249
@@ -0,0 +1,151 @@
1
+ # Classes that implement nodes of Abstract Syntax Trees (AST) representing
2
+ # calculator parse results.
3
+
4
+
5
+ CalcTerminalNode = Struct.new(:token, :value, :position) do
6
+ def initialize(aToken, aPosition)
7
+ self.token = aToken
8
+ self.position = aPosition
9
+ init_value(aToken.lexeme)
10
+ end
11
+
12
+ # This method can be overriden
13
+ def init_value(aLiteral)
14
+ self.value = aLiteral.dup
15
+ end
16
+
17
+ def symbol()
18
+ self.token.terminal
19
+ end
20
+
21
+ def interpret()
22
+ return value
23
+ end
24
+
25
+ # Part of the 'visitee' role in Visitor design pattern.
26
+ # @param aVisitor[ParseTreeVisitor] the visitor
27
+ def accept(aVisitor)
28
+ aVisitor.visit_terminal(self)
29
+ end
30
+ end
31
+
32
+ class CalcNumberNode < CalcTerminalNode
33
+ def init_value(aLiteral)
34
+ case aLiteral
35
+ when /^[+-]?\d+$/
36
+ self.value = aLiteral.to_i
37
+
38
+ when /^[+-]?\d+(\.\d+)?([eE][+-]?\d+)?$/
39
+ self.value = aLiteral.to_f
40
+ end
41
+ end
42
+ end
43
+
44
+ class CalcCompositeNode
45
+ attr_accessor(:children)
46
+ attr_accessor(:symbol)
47
+
48
+ def initialize(aSymbol)
49
+ @symbol = aSymbol
50
+ @children = []
51
+ end
52
+
53
+ # Part of the 'visitee' role in Visitor design pattern.
54
+ # @param aVisitor[ParseTreeVisitor] the visitor
55
+ def accept(aVisitor)
56
+ aVisitor.visit_nonterminal(self)
57
+ end
58
+
59
+ alias subnodes children
60
+
61
+ end # class
62
+
63
+ class CalcUnaryOpNode < CalcCompositeNode
64
+ def initialize(aSymbol)
65
+ super(aSymbol)
66
+ end
67
+
68
+ # Convert this tree node in a simpler Ruby representation.
69
+ # Basically a Calc object corresponds to a Ruhy Hash
70
+ def to_ruby()
71
+ rep = {}
72
+ members.each do |pair|
73
+ rep[pair.name.to_ruby] = pair.value.to_ruby
74
+ end
75
+
76
+ return rep
77
+ end
78
+
79
+ alias members children
80
+ end # class
81
+
82
+ class CalcNegateNode < CalcUnaryOpNode
83
+ end # class
84
+
85
+ class CalcBinaryOpNode < CalcCompositeNode
86
+ def initialize(aSymbol)
87
+ super(aSymbol)
88
+ end
89
+
90
+ protected
91
+
92
+ def get_operands()
93
+ operands = []
94
+ children.each do |child|
95
+ oper = child.respond_to?(:interpret) ? child.interpret : child
96
+ operands << oper
97
+ end
98
+
99
+ return operands
100
+ end
101
+
102
+ end # class
103
+
104
+ class CalcAddNode < CalcBinaryOpNode
105
+
106
+ # TODO
107
+ def interpret()
108
+ operands = get_operands
109
+
110
+ sum = operands[0] + operands[1]
111
+ return sum
112
+ end
113
+ end # class
114
+
115
+
116
+ class CalcSubtractNode < CalcBinaryOpNode
117
+
118
+ # TODO
119
+ def interpret()
120
+ operands = get_operands
121
+
122
+ substraction = operands[0] - operands[1]
123
+ return substraction
124
+ end
125
+ end # class
126
+
127
+ class CalcMultiplyNode < CalcBinaryOpNode
128
+
129
+ # TODO
130
+ def interpret()
131
+ operands = get_operands
132
+ multiplication = operands[0] * operands[1]
133
+ return multiplication
134
+ end
135
+ end # class
136
+
137
+ class CalcDivideNode < CalcBinaryOpNode
138
+
139
+ # TODO
140
+ def interpret()
141
+ operands = get_operands
142
+ numerator = operands[0].to_f
143
+ denominator = operands[1]
144
+ division = numerator / denominator
145
+ return division
146
+ end
147
+ end # class
148
+
149
+
150
+
151
+
@@ -0,0 +1,38 @@
1
+ require_relative 'calc_parser'
2
+ require_relative 'calc_ast_builder'
3
+
4
+ # Retrieve input expression to parse from command-line
5
+ if ARGV.empty?
6
+ my_name = File.basename(__FILE__)
7
+ msg = <<-END_MSG
8
+ Command-line symtax:
9
+ ruby #{my_name} "arithmetic expression"
10
+ where:
11
+ the arithmetic expression is enclosed between double quotes (")
12
+
13
+ Example:
14
+ ruby #{my_name} "2 * 3 + (4 - 1)"
15
+ END_MSG
16
+ puts msg
17
+ exit(1)
18
+ end
19
+
20
+ # Create a calculator parser object
21
+ parser = CalcParser.new
22
+ result = parser.parse_expression(ARGV[0])
23
+
24
+ unless result.success?
25
+ # Stop if the parse failed...
26
+ puts "Parsing of '#{ARGV[0]}' failed"
27
+ puts "Reason: #{result.failure_reason.message}"
28
+ exit(1)
29
+ end
30
+
31
+ tree_builder = CalcASTBuilder
32
+
33
+ # Generate a parse tree from the parse result
34
+ ptree = result.parse_tree(tree_builder)
35
+
36
+ root = ptree.root
37
+ puts root.interpret # Output the expression result
38
+ # End of file
@@ -0,0 +1,25 @@
1
+ # Grammar for simple arithmetical expressions
2
+ require 'rley' # Load the gem
3
+
4
+ ########################################
5
+ # Define a grammar for basic arithmetical expressions
6
+ builder = Rley::Syntax::GrammarBuilder.new do
7
+ add_terminals('NUMBER')
8
+ add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
9
+ add_terminals('PLUS', 'MINUS') # For '+', '-' operators
10
+ add_terminals('STAR', 'DIVIDE') # For '*', '/' operators
11
+ rule 'expression' => 'simple_expression'
12
+ rule 'simple_expression' => 'term'
13
+ rule 'simple_expression' => %w[simple_expression add_operator term]
14
+ rule 'term' => 'factor'
15
+ rule 'term' => %w[term mul_operator factor]
16
+ rule 'factor' => 'NUMBER'
17
+ rule 'factor' => %w[LPAREN expression RPAREN]
18
+ rule 'add_operator' => 'PLUS'
19
+ rule 'add_operator' => 'MINUS'
20
+ rule 'mul_operator' => 'STAR'
21
+ rule 'mul_operator' => 'DIVIDE'
22
+ end
23
+
24
+ # And now build the grammar...
25
+ CalcGrammar = builder.grammar
@@ -0,0 +1,81 @@
1
+ # File: calc_lexer.rb
2
+ # Lexer for a basic arithmetical expression parser
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ class CalcLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '(' => 'LPAREN',
15
+ ')' => 'RPAREN',
16
+ '+' => 'PLUS',
17
+ '-' => 'MINUS',
18
+ '*' => 'STAR',
19
+ '/' => 'DIVIDE',
20
+ '**' => 'POWER'
21
+ }.freeze
22
+
23
+ class ScanError < StandardError; end
24
+
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos?
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+
43
+ def _next_token()
44
+ skip_whitespaces
45
+ curr_ch = scanner.peek(1)
46
+ return nil if curr_ch.nil?
47
+
48
+ token = nil
49
+
50
+ if '()+/'.include? curr_ch
51
+ # Single character token
52
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
53
+
54
+ elsif lexeme = scanner.scan(/\*\*/)
55
+ token = build_token(@@lexeme2name[lexeme], lexeme)
56
+ elsif lexeme = scanner.scan(/\*/)
57
+ token = build_token(@@lexeme2name[lexeme], lexeme)
58
+ elsif lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
59
+ token = build_token('NUMBER', lexeme)
60
+ elsif lexeme = scanner.scan(/-/)
61
+ token = build_token(@@lexeme2name[curr_ch], lexeme)
62
+ else # Unknown token
63
+ erroneous = curr_ch.nil? ? '' : curr_ch
64
+ sequel = scanner.scan(/.{1,20}/)
65
+ erroneous += sequel unless sequel.nil?
66
+ raise ScanError.new("Unknown token #{erroneous}")
67
+ end
68
+
69
+ return token
70
+ end
71
+
72
+ def build_token(aSymbolName, aLexeme)
73
+ token_type = name2symbol[aSymbolName]
74
+ return Rley::Tokens::Token.new(aLexeme, token_type)
75
+ end
76
+
77
+ def skip_whitespaces()
78
+ scanner.scan(/[ \t\f\n\r]+/)
79
+ end
80
+
81
+ end # class
@@ -0,0 +1,73 @@
1
+ require 'rspec' # Use the RSpec framework
2
+ require_relative '../calc_parser' # Load the class under test
3
+ require_relative '../calc_ast_builder'
4
+
5
+
6
+ describe 'Calculator' do
7
+ def parse_expression(anExpression)
8
+ # Create a calculator parser object
9
+ parser = CalcParser.new
10
+ result = parser.parse_expression(anExpression)
11
+
12
+ unless result.success?
13
+ # Stop if the parse failed...
14
+ puts "Parsing of '#{anExpression}' failed"
15
+ puts "Reason: #{result.failure_reason.message}"
16
+ exit(1)
17
+ end
18
+
19
+ return result
20
+ end
21
+
22
+ def build_ast(aParseResult)
23
+ tree_builder = CalcASTBuilder
24
+ # Generate an abstract syntax tree from the parse result
25
+ ast = aParseResult.parse_tree(tree_builder)
26
+ return ast.root
27
+ end
28
+
29
+ def expect_expr(anExpression)
30
+ parsing = parse_expression(anExpression)
31
+ ast = build_ast(parsing)
32
+ return expect(ast.interpret)
33
+ end
34
+
35
+ it 'should evaluate simple number literals' do
36
+ expect_expr('2').to eq(2)
37
+ end
38
+
39
+ it 'should evaluate addition' do
40
+ expect_expr('2 + 2').to eq(4)
41
+ end
42
+
43
+ it 'should evaluate subtraction' do
44
+ expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
45
+ end
46
+
47
+ it 'should evaluate division' do
48
+ expect_expr('10.5 / 5').to eq(2.1)
49
+ end
50
+
51
+ it 'should evaluate multiplication' do
52
+ expect_expr('2 * 3.1').to eq(6.2)
53
+ end
54
+
55
+ it 'should evaluate parentheses' do
56
+ expect_expr('2 * (2.1 + 1)').to eq(6.2)
57
+ end
58
+
59
+ it 'should evaluate regardless of whitespace' do
60
+ expect_expr("2*(1+\t1)").to eq(4)
61
+ end
62
+
63
+ it 'should evaluate order of operations' do
64
+ expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
65
+ end
66
+
67
+ it 'should evaluate multiple levels of parentheses' do
68
+ expect_expr('2*(1/(1+3))').to eq(0.5)
69
+ end
70
+
71
+ end # describe
72
+ # End of file
73
+
@@ -0,0 +1,186 @@
1
+ require_relative 'calc_ast_nodes'
2
+
3
+ # The purpose of a CalcASTBuilder is to build piece by piece an AST
4
+ # (Abstract Syntax Tree) from a sequence of input tokens and
5
+ # visit events produced by walking over a GFGParsing object.
6
+ # Uses the Builder GoF pattern.
7
+ # The Builder pattern creates a complex object
8
+ # (say, a parse tree) from simpler objects (terminal and non-terminal
9
+ # nodes) and using a step by step approach.
10
+ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
11
+ Terminal2NodeClass = {
12
+ # Plus sign character is ambiguous. It can represent an operator
13
+ # or a positive value
14
+ '+' => { 'add_operator[0]' => CalcAddNode, 'sign[0]' => PTree::TerminalNode },
15
+ # Minus sign character is ambiguous. It can represent an operator
16
+ # or a negative value
17
+ '-' => { 'add_operator[1]' => CalcSubtractNode, 'sign[1]' => CalcNegateNode },
18
+ '*' => CalcMultiplyNode,
19
+ '/' => CalcDivideNode,
20
+ 'number' => CalcNumberNode
21
+ }
22
+
23
+ protected
24
+
25
+ def return_first_child(_range, _tokens, theChildren)
26
+ return theChildren[0]
27
+ end
28
+
29
+ def return_second_child(_range, _tokens, theChildren)
30
+ return theChildren[1]
31
+ end
32
+
33
+ def return_last_child(_range, _tokens, theChildren)
34
+ return theChildren[-1]
35
+ end
36
+
37
+ # Overriding method.
38
+ # Create a parse tree object with given
39
+ # node as root node.
40
+ def create_tree(aRootNode)
41
+ return Rley::PTree::ParseTree.new(aRootNode)
42
+ end
43
+
44
+ # Overriding method.
45
+ # Factory method for creating a node object for the given
46
+ # input token.
47
+ # @param aTerminal [Terminal] Terminal symbol associated with the token
48
+ # @param aTokenPosition [Integer] Position of token in the input stream
49
+ # @param aToken [Token] The input token
50
+ def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
51
+ klass = Terminal2NodeClass.fetch(aTerminal.name, CalcTerminalNode)
52
+ klass = klass[aProduction.name] if klass.is_a?(Hash) # Lexical ambiguity
53
+ return klass.new(aToken, aTokenPosition)
54
+ end
55
+
56
+
57
+ # Method to override.
58
+ # Factory method for creating a parent node object.
59
+ # @param aProduction [Production] Production rule
60
+ # @param aRange [Range] Range of tokens matched by the rule
61
+ # @param theTokens [Array] The input tokens
62
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
63
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
64
+ node = case aProduction.name
65
+ when 'expression[0]' # rule 'expression' => %w[sign simple_expression]
66
+ reduce_expression_0(aProduction, aRange, theTokens, theChildren)
67
+
68
+ # when /value\[\d\]/
69
+ # return_first_child(aRange, theTokens, theChildren)
70
+
71
+ # when 'object[0]'
72
+ # reduce_object_0(aProduction, aRange, theTokens, theChildren)
73
+
74
+ # when 'object[1]'
75
+ # reduce_object_1(aRange, theTokens, theChildren)
76
+
77
+ # when 'member-list[0]'
78
+ # reduce_member_list_0(aRange, theTokens, theChildren)
79
+
80
+ # when 'member-list[1]'
81
+ # reduce_member_list_1(aProduction, aRange, theTokens, theChildren)
82
+
83
+ # when 'member[0]'
84
+ # reduce_member_0(aProduction, aRange, theTokens, theChildren)
85
+
86
+ # when 'array[0]'
87
+ # reduce_array_0(aProduction, aRange, theTokens, theChildren)
88
+
89
+ # when 'array[1]'
90
+ # reduce_array_1(aRange, theTokens, theChildren)
91
+
92
+ # when 'array-items[0]'
93
+ # reduce_array_items_0(aRange, theTokens, theChildren)
94
+
95
+ when 'sign[0]' # rule 'sign' => 'PLUS'
96
+ return_first_child(aRange, theTokens, theChildren)
97
+
98
+ when 'sign[1]' # rule 'sign' => 'MINUS'
99
+ return_first_child(aRange, theTokens, theChildren)
100
+
101
+ when 'sign[2]' #rule 'sign' => []
102
+ reduce_sign_2(aProduction, aRange, theTokens, theChildren)
103
+ else
104
+ raise StandardError, "Don't know production #{aProduction.name}"
105
+ end
106
+
107
+ return node
108
+ end
109
+
110
+ # rule 'expression' => %w[sign simple_expression]
111
+ def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
112
+ sign = theChildren[0]
113
+ # Check type of sign
114
+ node = if sign && sign.kind_of?(CalcNegateNode)
115
+ sign.members << theChildren.last
116
+ else
117
+ theChildren.last
118
+ end
119
+
120
+ return node
121
+ end
122
+
123
+ # rule 'sign' => []
124
+ def reduce_sign_2(aProduction, aRange, theTokens, theChildren)
125
+ return nil # TODO; check whether this make sense
126
+ end
127
+ =begin
128
+ second_child = theChildren[1]
129
+ second_child.symbol = aProduction.lhs
130
+ return second_child
131
+ end
132
+
133
+ # rule 'object' => %w[begin-object end-object]
134
+ def reduce_object_1(aRange, theTokens, theChildren)
135
+ return CalcObjectNode.new(aProduction.lhs)
136
+ end
137
+
138
+ # rule 'member-list' => %w[member-list value-separator member]
139
+ def reduce_member_list_0(aRange, theTokens, theChildren)
140
+ node = theChildren[0]
141
+ node.members << theChildren.last
142
+ return node
143
+ end
144
+
145
+ # rule 'member-list' => 'member'
146
+ def reduce_member_list_1(aProduction, aRange, theTokens, theChildren)
147
+ node = CalcObjectNode.new(aProduction.lhs)
148
+ node.members << theChildren[0]
149
+ return node
150
+ end
151
+
152
+ # rule 'member' => %w[string name-separator value]
153
+ def reduce_member_0(aProduction, aRange, theTokens, theChildren)
154
+ return CalcPair.new(theChildren[0], theChildren[2], aProduction.lhs)
155
+ end
156
+
157
+ # rule 'object' => %w[begin-object member-list end-object]
158
+ def reduce_array_0(aProduction, aRange, theTokens, theChildren)
159
+ second_child = theChildren[1]
160
+ second_child.symbol = aProduction.lhs
161
+ return second_child
162
+ end
163
+
164
+
165
+ # rule 'array' => %w[begin-array end-array]
166
+ def reduce_array_1(aRange, theTokens, theChildren)
167
+ return CalcArrayNode.new
168
+ end
169
+
170
+ # rule 'array-items' => %w[array-items value-separator value]
171
+ def reduce_array_items_0(aRange, theTokens, theChildren)
172
+ node = theChildren[0]
173
+ node.children << theChildren[2]
174
+ return node
175
+ end
176
+
177
+
178
+
179
+ # rule 'array-items' => %w[value]
180
+ def reduce_array_items_1(aProduction, aRange, theTokens, theChildren)
181
+ node = CalcArrayNode.new(aProduction.lhs)
182
+ node.children << theChildren[0]
183
+ return node
184
+ end
185
+ =end
186
+ end # class