rley 0.5.01 → 0.5.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/examples/data_formats/JSON/cli_options.rb +25 -9
  4. data/examples/data_formats/JSON/json_ast_builder.rb +152 -0
  5. data/examples/data_formats/JSON/json_ast_nodes.rb +141 -0
  6. data/examples/data_formats/JSON/json_demo.rb +24 -8
  7. data/examples/general/calc_iter1/calc_ast_builder.rb +142 -0
  8. data/examples/general/calc_iter1/calc_ast_nodes.rb +151 -0
  9. data/examples/general/calc_iter1/calc_demo.rb +38 -0
  10. data/examples/general/calc_iter1/calc_grammar.rb +25 -0
  11. data/examples/general/calc_iter1/calc_lexer.rb +81 -0
  12. data/examples/general/{calc → calc_iter1}/calc_parser.rb +0 -0
  13. data/examples/general/calc_iter1/spec/calculator_spec.rb +73 -0
  14. data/examples/general/calc_iter2/calc_ast_builder.rb +186 -0
  15. data/examples/general/calc_iter2/calc_ast_nodes.rb +151 -0
  16. data/examples/general/{calc → calc_iter2}/calc_demo.rb +3 -2
  17. data/examples/general/{calc → calc_iter2}/calc_grammar.rb +0 -0
  18. data/examples/general/calc_iter2/calc_lexer.rb +81 -0
  19. data/examples/general/calc_iter2/calc_parser.rb +24 -0
  20. data/lib/rley.rb +1 -0
  21. data/lib/rley/constants.rb +1 -1
  22. data/lib/rley/parser/cst_builder.rb +5 -225
  23. data/lib/rley/parser/gfg_parsing.rb +2 -2
  24. data/lib/rley/parser/parse_forest_factory.rb +1 -1
  25. data/lib/rley/parser/parse_rep_creator.rb +2 -2
  26. data/lib/rley/parser/parse_tree_builder.rb +161 -104
  27. data/lib/rley/parser/parse_tree_factory.rb +6 -2
  28. data/spec/rley/parser/ast_builder_spec.rb +395 -0
  29. data/spec/rley/support/grammar_arr_int_helper.rb +21 -11
  30. metadata +20 -9
  31. data/examples/general/calc/calc_lexer.rb +0 -90
  32. data/spec/rley/parser/parse_tree_builder_spec.rb +0 -249
@@ -0,0 +1,151 @@
1
+ # Classes that implement nodes of Abstract Syntax Trees (AST) representing
2
+ # calculator parse results.
3
+
4
+
5
+ CalcTerminalNode = Struct.new(:token, :value, :position) do
6
+ def initialize(aToken, aPosition)
7
+ self.token = aToken
8
+ self.position = aPosition
9
+ init_value(aToken.lexeme)
10
+ end
11
+
12
+ # This method can be overriden
13
+ def init_value(aLiteral)
14
+ self.value = aLiteral.dup
15
+ end
16
+
17
+ def symbol()
18
+ self.token.terminal
19
+ end
20
+
21
+ def to_ruby()
22
+ return value
23
+ end
24
+
25
+ # Part of the 'visitee' role in Visitor design pattern.
26
+ # @param aVisitor[ParseTreeVisitor] the visitor
27
+ def accept(aVisitor)
28
+ aVisitor.visit_terminal(self)
29
+ end
30
+ end
31
+
32
+ class CalcNumberNode < CalcTerminalNode
33
+ def init_value(aLiteral)
34
+ case aLiteral
35
+ when /^[+-]?\d+$/
36
+ self.value = aLiteral.to_i
37
+
38
+ when /^[+-]?\d+(\.\d+)?([eE][+-]?\d+)?$/
39
+ self.value = aLiteral.to_f
40
+ end
41
+ end
42
+ end
43
+
44
+ class CalcCompositeNode
45
+ attr_accessor(:children)
46
+ attr_accessor(:symbol)
47
+
48
+ def initialize(aSymbol)
49
+ @symbol = aSymbol
50
+ @children = []
51
+ end
52
+
53
+ # Part of the 'visitee' role in Visitor design pattern.
54
+ # @param aVisitor[ParseTreeVisitor] the visitor
55
+ def accept(aVisitor)
56
+ aVisitor.visit_nonterminal(self)
57
+ end
58
+
59
+ alias subnodes children
60
+
61
+ end # class
62
+
63
+ class CalcUnaryOpNode < CalcCompositeNode
64
+ def initialize(aSymbol)
65
+ super(aSymbol)
66
+ end
67
+
68
+ # Convert this tree node in a simpler Ruby representation.
69
+ # Basically a Calc object corresponds to a Ruhy Hash
70
+ def to_ruby()
71
+ rep = {}
72
+ members.each do |pair|
73
+ rep[pair.name.to_ruby] = pair.value.to_ruby
74
+ end
75
+
76
+ return rep
77
+ end
78
+
79
+ alias members children
80
+ end # class
81
+
82
+ class CalcNegateNode CalcUnaryOpNode
83
+ end # class
84
+
85
+ class CalcBinaryOpNode < CalcCompositeNode
86
+ def initialize(aSymbol)
87
+ super(aSymbol)
88
+ end
89
+
90
+ end # class
91
+
92
+ class CalcAddNode < CalcBinaryOpNode
93
+
94
+ # Convert this tree node in a simpler Ruby representation.
95
+ # Basically a Calc object corresponds to a Ruhy Hash
96
+ def to_ruby()
97
+ rep = []
98
+ children.each do |child|
99
+ rep << child.to_ruby
100
+ end
101
+
102
+ return rep
103
+ end
104
+ end # class
105
+
106
+
107
+ class CalcSubtractNode < CalcBinaryOpNode
108
+
109
+ # Convert this tree node in a simpler Ruby representation.
110
+ # Basically a Calc object corresponds to a Ruhy Hash
111
+ def to_ruby()
112
+ rep = []
113
+ children.each do |child|
114
+ rep << child.to_ruby
115
+ end
116
+
117
+ return rep
118
+ end
119
+ end # class
120
+
121
+ class CalcMultiplyNode < CalcBinaryOpNode
122
+
123
+ # Convert this tree node in a simpler Ruby representation.
124
+ # Basically a Calc object corresponds to a Ruhy Hash
125
+ def to_ruby()
126
+ rep = []
127
+ children.each do |child|
128
+ rep << child.to_ruby
129
+ end
130
+
131
+ return rep
132
+ end
133
+ end # class
134
+
135
+ class CalcDivideNode < CalcBinaryOpNode
136
+
137
+ # Convert this tree node in a simpler Ruby representation.
138
+ # Basically a Calc object corresponds to a Ruhy Hash
139
+ def to_ruby()
140
+ rep = []
141
+ children.each do |child|
142
+ rep << child.to_ruby
143
+ end
144
+
145
+ return rep
146
+ end
147
+ end # class
148
+
149
+
150
+
151
+
@@ -5,14 +5,15 @@ parser = CalcParser.new
5
5
 
6
6
  # Parse the input expression in command-line
7
7
  if ARGV.empty?
8
+ my_name = File.basename(__FILE__)
8
9
  msg = <<-END_MSG
9
10
  Command-line symtax:
10
- ruby #{__FILE__} "arithmetic expression"
11
+ ruby #{my_name} "arithmetic expression"
11
12
  where:
12
13
  the arithmetic expression is enclosed between double quotes (")
13
14
 
14
15
  Example:
15
- ruby #{__FILE__} "2 * 3 + (4 - 1)"
16
+ ruby #{my_name} "2 * 3 + (4 - 1)"
16
17
  END_MSG
17
18
  puts msg
18
19
  exit(1)
@@ -0,0 +1,81 @@
1
+ # File: calc_lexer.rb
2
+ # Lexer for a basic arithmetical expression parser
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ class CalcLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '(' => 'LPAREN',
15
+ ')' => 'RPAREN',
16
+ '+' => 'PLUS',
17
+ '-' => 'MINUS',
18
+ '*' => 'STAR',
19
+ '/' => 'DIVIDE',
20
+ '**' => 'POWER'
21
+ }.freeze
22
+
23
+ class ScanError < StandardError; end
24
+
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos?
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+
43
+ def _next_token()
44
+ skip_whitespaces
45
+ curr_ch = scanner.peek(1)
46
+ return nil if curr_ch.nil?
47
+
48
+ token = nil
49
+
50
+ if '()+/'.include? curr_ch
51
+ # Single character token
52
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
53
+
54
+ elsif lexeme = scanner.scan(/\*\*/)
55
+ token = build_token(@@lexeme2name[lexeme], lexeme)
56
+ elsif lexeme = scanner.scan(/\*/)
57
+ token = build_token(@@lexeme2name[lexeme], lexeme)
58
+ elsif lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
59
+ token = build_token('NUMBER', lexeme)
60
+ elsif lexeme = scanner.scan(/-/)
61
+ token = build_token(@@lexeme2name[curr_ch], lexeme)
62
+ else # Unknown token
63
+ erroneous = curr_ch.nil? ? '' : curr_ch
64
+ sequel = scanner.scan(/.{1,20}/)
65
+ erroneous += sequel unless sequel.nil?
66
+ raise ScanError.new("Unknown token #{erroneous}")
67
+ end
68
+
69
+ return token
70
+ end
71
+
72
+ def build_token(aSymbolName, aLexeme)
73
+ token_type = name2symbol[aSymbolName]
74
+ return Rley::Tokens::Token.new(aLexeme, token_type)
75
+ end
76
+
77
+ def skip_whitespaces()
78
+ scanner.scan(/[ \t\f\n\r]+/)
79
+ end
80
+
81
+ end # class
@@ -0,0 +1,24 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require_relative 'calc_lexer'
4
+ require_relative 'calc_grammar'
5
+
6
+ # A parser for arithmetic expressions
7
+ class CalcParser < Rley::Parser::GFGEarleyParser
8
+ attr_reader(:source_file)
9
+
10
+ # Constructor
11
+ def initialize()
12
+ # Builder the Earley parser with the calculator grammar
13
+ super(CalcGrammar)
14
+ end
15
+
16
+ def parse_expression(aText)
17
+ lexer = CalcLexer.new(aText, grammar)
18
+ result = parse(lexer.tokens)
19
+
20
+ return result
21
+ end
22
+ end # class
23
+
24
+ # End of file
@@ -6,6 +6,7 @@ require_relative './rley/constants'
6
6
  require_relative './rley/syntax/grammar_builder'
7
7
  require_relative './rley/tokens/token'
8
8
  require_relative './rley/parser/gfg_earley_parser'
9
+ require_relative './rley/parser/parse_tree_builder'
9
10
  require_relative './rley/parse_tree_visitor'
10
11
  require_relative './rley/formatter/debug'
11
12
  require_relative './rley/formatter/json'
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.01'.freeze
6
+ Version = '0.5.02'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -1,26 +1,10 @@
1
- require_relative '../tokens/token_range'
2
- require_relative '../syntax/terminal'
3
- require_relative '../syntax/non_terminal'
4
- require_relative '../gfg/end_vertex'
5
- require_relative '../gfg/item_vertex'
6
- require_relative '../gfg/start_vertex'
1
+ require_relative 'parse_tree_builder'
7
2
  require_relative '../ptree/non_terminal_node'
8
3
  require_relative '../ptree/terminal_node'
9
4
  require_relative '../ptree/parse_tree'
10
5
 
11
6
  module Rley # This module is used as a namespace
12
7
  module Parser # This module is used as a namespace
13
- # Structure used internally by CSTBuilder class.
14
- CSTRawNode = Struct.new(:range, :symbol, :children) do
15
- def initialize(aRange, aSymbol)
16
- super
17
- self.range = aRange
18
- self.symbol = aSymbol
19
- self.children = nil
20
- end
21
- end # Struct
22
-
23
-
24
8
  # The purpose of a CSTBuilder is to build piece by piece a CST
25
9
  # (Concrete Syntax Tree) from a sequence of input tokens and
26
10
  # visit events produced by walking over a GFGParsing object.
@@ -28,185 +12,16 @@ module Rley # This module is used as a namespace
28
12
  # The Builder pattern creates a complex object
29
13
  # (say, a parse tree) from simpler objects (terminal and non-terminal
30
14
  # nodes) and using a step by step approach.
31
- class CSTBuilder
32
- # @return [Array<Token>] The sequence of input tokens
33
- attr_reader(:tokens)
34
-
35
- # Link to CST object (being) built.
36
- attr_reader(:result)
37
-
38
-
39
- # Create a new builder instance.
40
- # @param theTokens [Array<Token>] The sequence of input tokens.
41
- def initialize(theTokens)
42
- @tokens = theTokens
43
- @stack = []
44
- end
45
-
46
- # Receive events resulting from a visit of GFGParsing object.
47
- # These events are produced by a specialized Enumerator created
48
- # with a ParseWalkerFactory instance.
49
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
50
- # @param anEntry [ParseEntry] The entry being visited
51
- # @param anIndex [anIndex] The token index associated with anEntry
52
- def receive_event(anEvent, anEntry, anIndex)
53
- # puts "Event: #{anEvent} #{anEntry} #{anIndex}"
54
- if anEntry.dotted_entry? # A N => alpha . beta pattern?
55
- process_item_entry(anEvent, anEntry, anIndex)
56
- elsif anEntry.start_entry? # A .N pattern?
57
- process_start_entry(anEvent, anEntry, anIndex)
58
- elsif anEntry.end_entry? # A N. pattern?
59
- process_end_entry(anEvent, anEntry, anIndex)
60
- else
61
- raise NotImplementedError
62
- end
63
-
64
- @last_visitee = anEntry
65
- end
15
+ class CSTBuilder < ParseTreeBuilder
66
16
 
67
17
  protected
68
-
69
- # Return the stack
70
- def stack()
71
- return @stack
72
- end
73
-
74
- private
75
-
76
- # Return the top of stack element.
77
- def tos()
78
- return @stack.last
79
- end
80
-
81
- # Handler for visit events for ParseEntry matching N. pattern
82
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
83
- # @param anEntry [ParseEntry] The entry being visited
84
- # @param anIndex [anIndex] The token index at end of anEntry
85
- def process_end_entry(anEvent, anEntry, anIndex)
86
- case anEvent
87
- when :visit
88
- range = { low: anEntry.origin, high: anIndex }
89
- non_terminal = entry2nonterm(anEntry)
90
- # Create raw node and push onto stack
91
- push_raw_node(range, non_terminal)
92
- else
93
- raise NotImplementedError
94
- end
95
- end
96
-
97
- # Handler for visit events for ParseEntry matching .N pattern
98
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
99
- # @param anEntry [ParseEntry] The entry being visited
100
- # @param anIndex [anIndex] The token index at end of anEntry
101
- def process_start_entry(anEvent, anEntry, anIndex)
102
- raise NotImplementedError unless [:visit, :revisit].include?(anEvent)
103
- end
104
-
105
- # Handler for visit events for ParseEntry matching N => alpha* . beta*
106
- # @param anEvent [Symbol] Kind of visit event. Should be: :visit
107
- # @param anEntry [ParseEntry] The entry being visited
108
- # @param anIndex [anIndex] The token index at end of anEntry
109
- def process_item_entry(anEvent, anEntry, anIndex)
110
- # TODO: what if rhs is empty?
111
- case anEvent
112
- when :visit, :revisit
113
- dot_pos = anEntry.vertex.dotted_item.position
114
- if dot_pos.zero? || dot_pos < 0
115
- # Check for pattern: N => alpha* .
116
- process_exit_entry(anEntry, anIndex) if anEntry.exit_entry?
117
-
118
- # Check for pattern: N => . alpha*
119
- process_entry_entry(anEntry, anIndex) if anEntry.entry_entry?
120
- else
121
- # (pattern: N => alpha+ . beta+)
122
- process_middle_entry(anEntry, anIndex)
123
- end
124
- else
125
- $stderr.puts "waiko '#{anEvent}'"
126
- raise NotImplementedError
127
- end
128
- end
129
-
130
- # @param anEntry [ParseEntry] Entry matching (pattern: N => alpha* .)
131
- # @param anIndex [anIndex] The token index at end of anEntry
132
- def process_exit_entry(anEntry, anIndex)
133
- production = anEntry.vertex.dotted_item.production
134
- count_rhs = production.rhs.members.size
135
- init_TOS_children(count_rhs) # Create placeholders for children
136
- build_terminal(anEntry, anIndex) if terminal_before_dot?(anEntry)
137
- end
138
-
139
- # @param anEntry [ParseEntry] Entry matching pattern: N => alpha+ . beta+
140
- # @param anIndex [anIndex] The token index at end of anEntry
141
- def process_middle_entry(anEntry, anIndex)
142
- build_terminal(anEntry, anIndex) if terminal_before_dot?(anEntry)
143
- end
144
-
145
-
146
-
147
- # @param anEntry [ParseEntry] Entry matching (pattern: N => . alpha)
148
- # @param anIndex [anIndex] The token index at end of anEntry
149
- def process_entry_entry(anEntry, anIndex)
150
- dotted_item = anEntry.vertex.dotted_item
151
- rule = dotted_item.production
152
- previous_tos = stack.pop
153
- non_terminal = entry2nonterm(anEntry)
154
- # For debugging purposes
155
- raise StandardError if previous_tos.symbol != non_terminal
156
-
157
- new_node = new_parent_node(rule, previous_tos.range,
158
- tokens, previous_tos.children)
159
- if stack.empty?
160
- @result = create_tree(new_node)
161
- else
162
- place_TOS_child(new_node, nil)
163
- end
164
- end
165
-
166
- # Create a raw node with given range
167
- # and push it on top of stack.
168
- def push_raw_node(aRange, aSymbol)
169
- raw_node = CSTRawNode.new(Tokens::TokenRange.new(aRange), aSymbol)
170
- stack.push(raw_node)
171
- end
172
-
173
- # Initialize children array of TOS with nil placeholders.
174
- # The number of elements equals the number of symbols at rhs.
175
- def init_TOS_children(aCount)
176
- tos.children = Array.new(aCount)
177
- end
178
-
179
- # Does the position on the left side of the dot correspond
180
- # a terminal symbol?
181
- # @param anEntry [ParseEntry] The entry being visited
182
- def terminal_before_dot?(anEntry)
183
- prev_symbol = anEntry.prev_symbol
184
- return prev_symbol && prev_symbol.terminal?
185
- end
186
-
187
- # A terminal symbol was detected at left of dot.
188
- # Build a raw node for that terminal and make it
189
- # a child of TOS.
190
- # @param anEntry [ParseEntry] The entry being visited
191
- # @param anIndex [anIndex] The token index at end of anEntry
192
- def build_terminal(anEntry, anIndex)
193
- # First, build node for terminal...
194
- term_symbol = anEntry.prev_symbol
195
- token_position = anIndex - 1
196
- token = tokens[token_position]
197
- term_node = new_leaf_node(term_symbol, token_position, token)
198
-
199
- # Second make it a child of TOS...
200
- pos = anEntry.vertex.dotted_item.prev_position # position in rhs of rule
201
- place_TOS_child(term_node, pos)
202
- end
203
-
18
+
204
19
  # Method to override
205
20
  # Create a parse tree object with given
206
21
  # node as root node.
207
22
  def create_tree(aRootNode)
208
23
  return Rley::PTree::ParseTree.new(aRootNode)
209
- end
24
+ end
210
25
 
211
26
  # Method to override
212
27
  # Factory method for creating a node object for the given
@@ -214,7 +29,7 @@ module Rley # This module is used as a namespace
214
29
  # @param _terminal [Terminal] Terminal symbol associated with the token
215
30
  # @param aTokenPosition [Integer] Position of token in the input stream
216
31
  # @param aToken [Token] The input token
217
- def new_leaf_node(_terminal, aTokenPosition, aToken)
32
+ def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
218
33
  PTree::TerminalNode.new(aToken, aTokenPosition)
219
34
  end
220
35
 
@@ -229,41 +44,6 @@ module Rley # This module is used as a namespace
229
44
  theChildren.reverse_each { |child| node.add_subnode(child) }
230
45
  return node
231
46
  end
232
-
233
- # Place the given node object as one of the children of the TOS
234
- # (TOS = Top Of Stack).
235
- # Each child has a position that is dictated by the position of the
236
- # related grammar symbol in the right-handed side (RHS) of the grammar
237
- # rule.
238
- # @param aNode [TerminalNode, NonTerminalNode] Node object to be placed
239
- # @param aRHSPos [Integer, NilClass] Position in RHS of rule.
240
- # If the position is provided, then the node will placed in the children
241
- # array at that position.
242
- # If the position is nil, then the node will be placed at the position of
243
- # the rightmost nil element in children array.
244
- def place_TOS_child(aNode, aRHSPos)
245
- if aRHSPos.nil?
246
- # Retrieve index of most rightmost nil child...
247
- pos = tos.children.rindex { |child| child.nil? }
248
- raise StandardError, 'Internal error' if pos.nil?
249
- else
250
- pos = aRHSPos
251
- end
252
-
253
- tos.children[pos] = aNode
254
- end
255
-
256
- # Retrieve non-terminal symbol of given parse entry
257
- def entry2nonterm(anEntry)
258
- case anEntry.vertex
259
- when GFG::StartVertex, GFG::EndVertex
260
- non_terminal = anEntry.vertex.non_terminal
261
- when GFG::ItemVertex
262
- non_terminal = anEntry.vertex.lhs
263
- end
264
-
265
- return non_terminal
266
- end
267
47
  end # class
268
48
  end # module
269
49
  end # module