rltk 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +21 -22
- data/lib/rltk/ast.rb +185 -118
- data/lib/rltk/cfg.rb +157 -103
- data/lib/rltk/cg/basic_block.rb +19 -19
- data/lib/rltk/cg/bindings.rb +16 -16
- data/lib/rltk/cg/builder.rb +129 -129
- data/lib/rltk/cg/context.rb +7 -7
- data/lib/rltk/cg/contractor.rb +7 -7
- data/lib/rltk/cg/execution_engine.rb +30 -30
- data/lib/rltk/cg/function.rb +37 -37
- data/lib/rltk/cg/generated_bindings.rb +3932 -3932
- data/lib/rltk/cg/generic_value.rb +17 -17
- data/lib/rltk/cg/instruction.rb +116 -116
- data/lib/rltk/cg/llvm.rb +22 -22
- data/lib/rltk/cg/memory_buffer.rb +7 -7
- data/lib/rltk/cg/module.rb +73 -73
- data/lib/rltk/cg/pass_manager.rb +35 -35
- data/lib/rltk/cg/target.rb +41 -41
- data/lib/rltk/cg/triple.rb +7 -7
- data/lib/rltk/cg/type.rb +75 -75
- data/lib/rltk/cg/value.rb +161 -161
- data/lib/rltk/lexer.rb +57 -57
- data/lib/rltk/lexers/calculator.rb +7 -7
- data/lib/rltk/lexers/ebnf.rb +5 -5
- data/lib/rltk/parser.rb +338 -295
- data/lib/rltk/parsers/infix_calc.rb +7 -7
- data/lib/rltk/parsers/postfix_calc.rb +3 -3
- data/lib/rltk/parsers/prefix_calc.rb +3 -3
- data/lib/rltk/token.rb +13 -13
- data/lib/rltk/version.rb +6 -6
- data/test/cg/tc_basic_block.rb +17 -17
- data/test/cg/tc_control_flow.rb +41 -41
- data/test/cg/tc_function.rb +4 -4
- data/test/cg/tc_generic_value.rb +3 -3
- data/test/cg/tc_instruction.rb +53 -53
- data/test/cg/tc_math.rb +12 -12
- data/test/cg/tc_module.rb +14 -14
- data/test/cg/tc_transforms.rb +11 -11
- data/test/cg/tc_type.rb +12 -12
- data/test/cg/tc_value.rb +35 -35
- data/test/cg/ts_cg.rb +5 -5
- data/test/tc_ast.rb +137 -60
- data/test/tc_cfg.rb +34 -34
- data/test/tc_lexer.rb +42 -42
- data/test/tc_parser.rb +250 -173
- data/test/tc_token.rb +2 -2
- data/test/ts_rltk.rb +8 -8
- metadata +84 -85
- data/lib/rltk/cg/old_generated_bindings.rb +0 -6152
data/lib/rltk/lexer.rb
CHANGED
@@ -32,7 +32,7 @@ module RLTK
|
|
32
32
|
|
33
33
|
# @return [String]
|
34
34
|
attr_reader :remainder
|
35
|
-
|
35
|
+
|
36
36
|
# @param [Integer] stream_offset Offset from begnning of string.
|
37
37
|
# @param [Integer] line_number Number of newlines encountered so far.
|
38
38
|
# @param [Integer] line_offset Offset from beginning of line.
|
@@ -43,28 +43,28 @@ module RLTK
|
|
43
43
|
@line_offset = line_offset
|
44
44
|
@remainder = remainder
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
# @return [String] String representation of the error.
|
48
48
|
def to_s
|
49
49
|
"#{super()}: #{@remainder}"
|
50
50
|
end
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
# The Lexer class may be sub-classed to produce new lexers. These lexers
|
54
54
|
# have a lot of features, and are described in the main documentation.
|
55
55
|
class Lexer
|
56
|
-
|
56
|
+
|
57
57
|
# @return [Environment] Environment used by an instantiated lexer.
|
58
58
|
attr_reader :env
|
59
|
-
|
59
|
+
|
60
60
|
#################
|
61
61
|
# Class Methods #
|
62
62
|
#################
|
63
|
-
|
63
|
+
|
64
64
|
class << self
|
65
65
|
# @return [Symbol] State in which the lexer starts.
|
66
66
|
attr_reader :start_state
|
67
|
-
|
67
|
+
|
68
68
|
# Called when the Lexer class is sub-classed, it installes
|
69
69
|
# necessary instance class variables.
|
70
70
|
#
|
@@ -72,7 +72,7 @@ module RLTK
|
|
72
72
|
def inherited(klass)
|
73
73
|
klass.install_icvars
|
74
74
|
end
|
75
|
-
|
75
|
+
|
76
76
|
# Installs instance class varialbes into a class.
|
77
77
|
#
|
78
78
|
# @return [void]
|
@@ -81,7 +81,7 @@ module RLTK
|
|
81
81
|
@rules = Hash.new {|h,k| h[k] = Array.new}
|
82
82
|
@start_state = :default
|
83
83
|
end
|
84
|
-
|
84
|
+
|
85
85
|
# Lex *string*, using *env* as the environment. This method will
|
86
86
|
# return the array of tokens generated by the lexer with a token
|
87
87
|
# of type EOS (End of Stream) appended to the end.
|
@@ -94,21 +94,21 @@ module RLTK
|
|
94
94
|
def lex(string, file_name = nil, env = self::Environment.new(@start_state))
|
95
95
|
# Offset from start of stream.
|
96
96
|
stream_offset = 0
|
97
|
-
|
97
|
+
|
98
98
|
# Offset from the start of the line.
|
99
99
|
line_offset = 0
|
100
100
|
line_number = 1
|
101
|
-
|
101
|
+
|
102
102
|
# Empty token list.
|
103
103
|
tokens = Array.new
|
104
|
-
|
104
|
+
|
105
105
|
# The scanner.
|
106
106
|
scanner = StringScanner.new(string)
|
107
|
-
|
107
|
+
|
108
108
|
# Start scanning the input string.
|
109
109
|
until scanner.eos?
|
110
110
|
match = nil
|
111
|
-
|
111
|
+
|
112
112
|
# If the match_type is set to :longest all of the
|
113
113
|
# rules for the current state need to be scanned
|
114
114
|
# and the longest match returned. If the
|
@@ -119,30 +119,30 @@ module RLTK
|
|
119
119
|
if txt = scanner.check(rule.pattern)
|
120
120
|
if not match or match.first.length < txt.length
|
121
121
|
match = [txt, rule]
|
122
|
-
|
122
|
+
|
123
123
|
break if @match_type == :first
|
124
124
|
end
|
125
125
|
end
|
126
126
|
end
|
127
127
|
end
|
128
|
-
|
128
|
+
|
129
129
|
if match
|
130
130
|
rule = match.last
|
131
|
-
|
131
|
+
|
132
132
|
txt = scanner.scan(rule.pattern)
|
133
133
|
type, value = env.rule_exec(rule.pattern.match(txt), txt, &rule.action)
|
134
|
-
|
134
|
+
|
135
135
|
if type
|
136
136
|
pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
|
137
|
-
tokens << Token.new(type, value, pos)
|
137
|
+
tokens << Token.new(type, value, pos)
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
# Advance our stat counters.
|
141
141
|
stream_offset += txt.length
|
142
|
-
|
142
|
+
|
143
143
|
if (newlines = txt.count("\n")) > 0
|
144
144
|
line_number += newlines
|
145
|
-
line_offset
|
145
|
+
line_offset = txt.rpartition("\n").last.length
|
146
146
|
else
|
147
147
|
line_offset += txt.length()
|
148
148
|
end
|
@@ -151,10 +151,10 @@ module RLTK
|
|
151
151
|
raise(error, 'Unable to match string with any of the given rules')
|
152
152
|
end
|
153
153
|
end
|
154
|
-
|
154
|
+
|
155
155
|
return tokens << Token.new(:EOS)
|
156
156
|
end
|
157
|
-
|
157
|
+
|
158
158
|
# A wrapper function that calls {Lexer.lex} on the contents of a
|
159
159
|
# file.
|
160
160
|
#
|
@@ -165,7 +165,7 @@ module RLTK
|
|
165
165
|
def lex_file(file_name, env = self::Environment.new(@start_state))
|
166
166
|
File.open(file_name, 'r') { |f| self.lex(f.read, file_name, env) }
|
167
167
|
end
|
168
|
-
|
168
|
+
|
169
169
|
# Used to tell a lexer to use the first match found instead
|
170
170
|
# of the longest match found.
|
171
171
|
#
|
@@ -173,7 +173,7 @@ module RLTK
|
|
173
173
|
def match_first
|
174
174
|
@match_type = :first
|
175
175
|
end
|
176
|
-
|
176
|
+
|
177
177
|
# This method is used to define a new lexing rule. The
|
178
178
|
# first argument is the regular expression used to match
|
179
179
|
# substrings of the input. The second argument is the state
|
@@ -194,15 +194,15 @@ module RLTK
|
|
194
194
|
# If no action is given we will set it to an empty
|
195
195
|
# action.
|
196
196
|
action ||= Proc.new() {}
|
197
|
-
|
197
|
+
|
198
198
|
pattern = Regexp.new(pattern) if pattern.is_a?(String)
|
199
|
-
|
199
|
+
|
200
200
|
r = Rule.new(pattern, action, state, flags)
|
201
|
-
|
201
|
+
|
202
202
|
if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
|
203
203
|
end
|
204
204
|
alias :r :rule
|
205
|
-
|
205
|
+
|
206
206
|
# Changes the starting state of the lexer.
|
207
207
|
#
|
208
208
|
# @param [Symbol] state Starting state for this lexer.
|
@@ -212,17 +212,17 @@ module RLTK
|
|
212
212
|
@start_state = state
|
213
213
|
end
|
214
214
|
end
|
215
|
-
|
215
|
+
|
216
216
|
####################
|
217
217
|
# Instance Methods #
|
218
218
|
####################
|
219
|
-
|
219
|
+
|
220
220
|
# Instantiates a new lexer and creates an environment to be
|
221
221
|
# used for subsequent calls.
|
222
222
|
def initialize
|
223
223
|
@env = self.class::Environment.new(self.class.start_state)
|
224
224
|
end
|
225
|
-
|
225
|
+
|
226
226
|
# Lexes a string using the encapsulated environment.
|
227
227
|
#
|
228
228
|
# @param [String] string String to be lexed.
|
@@ -232,7 +232,7 @@ module RLTK
|
|
232
232
|
def lex(string, file_name = nil)
|
233
233
|
self.class.lex(string, file_name, @env)
|
234
234
|
end
|
235
|
-
|
235
|
+
|
236
236
|
# Lexes a file using the encapsulated environment.
|
237
237
|
#
|
238
238
|
# @param [String] file_name File to be lexed.
|
@@ -241,19 +241,19 @@ module RLTK
|
|
241
241
|
def lex_file(file_name)
|
242
242
|
self.class.lex_file(file_name, @env)
|
243
243
|
end
|
244
|
-
|
244
|
+
|
245
245
|
# All actions passed to LexerCore.rule are evaluated inside an
|
246
246
|
# instance of the Environment class or its subclass (which must have
|
247
247
|
# the same name). This class provides functions for manipulating
|
248
248
|
# lexer state and flags.
|
249
249
|
class Environment
|
250
|
-
|
250
|
+
|
251
251
|
# @return [Array<Symbol>] Flags currently set in this environment.
|
252
252
|
attr_reader :flags
|
253
|
-
|
253
|
+
|
254
254
|
# @return [Match] Match object generated by a rule's regular expression.
|
255
255
|
attr_accessor :match
|
256
|
-
|
256
|
+
|
257
257
|
# Instantiates a new Environment object.
|
258
258
|
#
|
259
259
|
# @param [Symbol] start_state Lexer's start state.
|
@@ -263,7 +263,7 @@ module RLTK
|
|
263
263
|
@match = match
|
264
264
|
@flags = Array.new
|
265
265
|
end
|
266
|
-
|
266
|
+
|
267
267
|
# This function will instance_exec a block for a rule after
|
268
268
|
# setting the match value.
|
269
269
|
#
|
@@ -272,28 +272,28 @@ module RLTK
|
|
272
272
|
# @param [Proc] block Block for matched rule.
|
273
273
|
def rule_exec(match, txt, &block)
|
274
274
|
self.match = match
|
275
|
-
|
275
|
+
|
276
276
|
self.instance_exec(txt, &block)
|
277
277
|
end
|
278
|
-
|
278
|
+
|
279
279
|
# Pops a state from the state stack.
|
280
280
|
#
|
281
281
|
# @return [void]
|
282
282
|
def pop_state
|
283
283
|
@state.pop
|
284
|
-
|
284
|
+
|
285
285
|
nil
|
286
286
|
end
|
287
|
-
|
287
|
+
|
288
288
|
# Pushes a new state onto the state stack.
|
289
289
|
#
|
290
290
|
# @return [void]
|
291
291
|
def push_state(state)
|
292
292
|
@state << state
|
293
|
-
|
293
|
+
|
294
294
|
nil
|
295
295
|
end
|
296
|
-
|
296
|
+
|
297
297
|
# Sets the value on the top of the state stack.
|
298
298
|
#
|
299
299
|
# @param [Symbol] state New state for the lexing environment.
|
@@ -301,15 +301,15 @@ module RLTK
|
|
301
301
|
# @return [void]
|
302
302
|
def set_state(state)
|
303
303
|
@state[-1] = state
|
304
|
-
|
304
|
+
|
305
305
|
nil
|
306
306
|
end
|
307
|
-
|
307
|
+
|
308
308
|
# @return [Symbol] Current state of the lexing environment.
|
309
309
|
def state
|
310
310
|
@state.last
|
311
311
|
end
|
312
|
-
|
312
|
+
|
313
313
|
# Sets a flag in the current environment.
|
314
314
|
#
|
315
315
|
# @param [Symbol] flag Flag to set as enabled.
|
@@ -319,10 +319,10 @@ module RLTK
|
|
319
319
|
if not @flags.include?(flag)
|
320
320
|
@flags << flag
|
321
321
|
end
|
322
|
-
|
322
|
+
|
323
323
|
nil
|
324
324
|
end
|
325
|
-
|
325
|
+
|
326
326
|
# Unsets a flag in the current environment.
|
327
327
|
#
|
328
328
|
# @param [Symbol] flag Flag to unset.
|
@@ -330,31 +330,31 @@ module RLTK
|
|
330
330
|
# @return [void]
|
331
331
|
def unset_flag(flag)
|
332
332
|
@flags.delete(flag)
|
333
|
-
|
333
|
+
|
334
334
|
nil
|
335
335
|
end
|
336
|
-
|
336
|
+
|
337
337
|
# Unsets all flags in the current environment.
|
338
338
|
#
|
339
339
|
# @return [void]
|
340
340
|
def clear_flags
|
341
341
|
@flags = Array.new
|
342
|
-
|
342
|
+
|
343
343
|
nil
|
344
344
|
end
|
345
345
|
end
|
346
|
-
|
346
|
+
|
347
347
|
# The Rule class is used simply for data encapsulation.
|
348
348
|
class Rule
|
349
349
|
# @return [Proc] Token producting action to be taken when this rule is matched.
|
350
350
|
attr_reader :action
|
351
|
-
|
351
|
+
|
352
352
|
# @return [Regexp] Regular expression for matching this rule.
|
353
353
|
attr_reader :pattern
|
354
|
-
|
354
|
+
|
355
355
|
# @return [Array<Symbol>] Flags currently set in this lexing environment.
|
356
356
|
attr_reader :flags
|
357
|
-
|
357
|
+
|
358
358
|
# Instantiates a new Rule object.
|
359
359
|
#
|
360
360
|
# @param [Regexp] pattern Regular expression used to match to this rule.
|
@@ -15,29 +15,29 @@ require 'rltk/lexer'
|
|
15
15
|
#######################
|
16
16
|
|
17
17
|
module RLTK
|
18
|
-
|
18
|
+
|
19
19
|
# The RLTK::Lexers module contains the lexers that are included as part of
|
20
20
|
# the RLKT project.
|
21
21
|
module Lexers
|
22
|
-
|
22
|
+
|
23
23
|
# The Calculator lexer is a simple lexer for use with several of the
|
24
24
|
# provided parsers.
|
25
25
|
class Calculator < Lexer
|
26
|
-
|
26
|
+
|
27
27
|
#################
|
28
28
|
# Default State #
|
29
29
|
#################
|
30
|
-
|
30
|
+
|
31
31
|
rule(/\+/) { :PLS }
|
32
32
|
rule(/-/) { :SUB }
|
33
33
|
rule(/\*/) { :MUL }
|
34
34
|
rule(/\//) { :DIV }
|
35
|
-
|
35
|
+
|
36
36
|
rule(/\(/) { :LPAREN }
|
37
37
|
rule(/\)/) { :RPAREN }
|
38
|
-
|
38
|
+
|
39
39
|
rule(/[0-9]+/) { |t| [:NUM, t.to_i] }
|
40
|
-
|
40
|
+
|
41
41
|
rule(/\s/)
|
42
42
|
end
|
43
43
|
end
|
data/lib/rltk/lexers/ebnf.rb
CHANGED
@@ -16,22 +16,22 @@ require 'rltk/lexer'
|
|
16
16
|
|
17
17
|
module RLTK
|
18
18
|
module Lexers
|
19
|
-
|
19
|
+
|
20
20
|
# The EBNF lexer is used by the RLTK::CFG class.
|
21
21
|
class EBNF < Lexer
|
22
|
-
|
22
|
+
|
23
23
|
#################
|
24
24
|
# Default State #
|
25
25
|
#################
|
26
|
-
|
26
|
+
|
27
27
|
rule(/\*/) { :STAR }
|
28
28
|
rule(/\+/) { :PLUS }
|
29
29
|
rule(/\?/) { :QUESTION }
|
30
30
|
rule(/\./) { :DOT }
|
31
|
-
|
31
|
+
|
32
32
|
rule(/[a-z0-9_']+/) { |t| [:NONTERM, t.to_sym] }
|
33
33
|
rule(/[A-Z0-9_']+/) { |t| [:TERM, t.to_sym] }
|
34
|
-
|
34
|
+
|
35
35
|
rule(/\s/)
|
36
36
|
end
|
37
37
|
end
|
data/lib/rltk/parser.rb
CHANGED
@@ -24,21 +24,21 @@ module RLTK
|
|
24
24
|
'Unexpected token. Token not present in grammar definition.'
|
25
25
|
end
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# A NotInLanguage error is raised whenever there is no valid parse tree
|
29
29
|
# for a given token stream. In other words, the input string is not in the
|
30
30
|
# defined language.
|
31
31
|
class NotInLanguage < StandardError
|
32
|
-
|
32
|
+
|
33
33
|
# @return [Array<Token>] List of tokens that have been successfully parsed
|
34
34
|
attr_reader :seen
|
35
|
-
|
35
|
+
|
36
36
|
# @return [Token] Token that caused the parser to stop
|
37
37
|
attr_reader :current
|
38
|
-
|
38
|
+
|
39
39
|
# @return [Array<Token>] List of tokens that have yet to be seen
|
40
40
|
attr_reader :remaining
|
41
|
-
|
41
|
+
|
42
42
|
# @param [Array<Token>] seen Tokens that have been successfully parsed
|
43
43
|
# @param [Token] current Token that caused the parser to stop
|
44
44
|
# @param [Array<Token>] remaining Tokens that have yet to be seen
|
@@ -47,25 +47,25 @@ module RLTK
|
|
47
47
|
@current = current
|
48
48
|
@remaining = remaining
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
# @return [String] String representation of the error.
|
52
52
|
def to_s
|
53
53
|
"String not in language. Token info:\n\tSeen: #{@seen}\n\tCurrent: #{@current}\n\tRemaining: #{@remaining}"
|
54
54
|
end
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
# An error of this type is raised when the parser encountered a error that
|
58
58
|
# was handled by an error production.
|
59
59
|
class HandledError < StandardError
|
60
|
-
|
60
|
+
|
61
61
|
# The errors as reported by the parser.
|
62
|
-
#
|
62
|
+
#
|
63
63
|
# @return [Array<Object>]
|
64
64
|
attr_reader :errors
|
65
|
-
|
65
|
+
|
66
66
|
# The result that would have been returned by the call to *parse*.
|
67
67
|
attr_reader :result
|
68
|
-
|
68
|
+
|
69
69
|
# Instantiate a new HandledError object with *errors*.
|
70
70
|
#
|
71
71
|
# @param [Array<Object>] errors Errors added to the parsing environment by calls to {Parser::Environment#error}.
|
@@ -75,49 +75,68 @@ module RLTK
|
|
75
75
|
@result = result
|
76
76
|
end
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
# Used for exceptions that occure during parser construction.
|
80
80
|
class ParserConstructionException < Exception; end
|
81
|
-
|
81
|
+
|
82
82
|
# Used for runtime exceptions that are the parsers fault. These should
|
83
83
|
# never be observed in the wild.
|
84
84
|
class InternalParserException < Exception; end
|
85
|
-
|
85
|
+
|
86
|
+
# Used to indicate that a parser is empty or hasn't been finalized.
|
87
|
+
class UselessParserException < Exception
|
88
|
+
# Sets the error messsage for this exception.
|
89
|
+
def initialize
|
90
|
+
super('Parser has not been finalized.')
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
86
94
|
# The Parser class may be sub-classed to produce new parsers. These
|
87
95
|
# parsers have a lot of features, and are described in the main
|
88
96
|
# documentation.
|
89
97
|
class Parser
|
90
98
|
# @return [Environment] Environment used by the instantiated parser.
|
91
99
|
attr_reader :env
|
92
|
-
|
100
|
+
|
93
101
|
#################
|
94
102
|
# Class Methods #
|
95
103
|
#################
|
96
|
-
|
104
|
+
|
97
105
|
class << self
|
106
|
+
# The overridden new prevents un-finalized parsers from being
|
107
|
+
# instantiated.
|
108
|
+
def new(*args)
|
109
|
+
if @symbols.nil?
|
110
|
+
raise UselessParserException
|
111
|
+
else
|
112
|
+
super(*args)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
98
116
|
# Installs instance class varialbes into a class.
|
99
117
|
#
|
100
118
|
# @return [void]
|
101
119
|
def install_icvars
|
102
120
|
@curr_lhs = nil
|
103
121
|
@curr_prec = nil
|
104
|
-
|
122
|
+
|
105
123
|
@conflicts = Hash.new {|h, k| h[k] = Array.new}
|
106
124
|
@grammar = CFG.new
|
107
|
-
|
125
|
+
|
108
126
|
@lh_sides = Hash.new
|
109
127
|
@procs = Array.new
|
110
128
|
@states = Array.new
|
111
|
-
|
129
|
+
|
112
130
|
# Variables for dealing with precedence.
|
113
131
|
@prec_counts = {:left => 0, :right => 0, :non => 0}
|
114
132
|
@production_precs = Array.new
|
115
133
|
@token_precs = Hash.new
|
116
|
-
|
134
|
+
@token_hooks = Hash.new {|h, k| h[k] = []}
|
135
|
+
|
117
136
|
# Set the default argument handling policy. Valid values
|
118
137
|
# are :array and :splat.
|
119
138
|
@default_arg_type = :splat
|
120
|
-
|
139
|
+
|
121
140
|
@grammar.callback do |type, which, p, sels = []|
|
122
141
|
@procs[p.id] = [
|
123
142
|
case type
|
@@ -126,35 +145,35 @@ module RLTK
|
|
126
145
|
when :empty then ProdProc.new { || nil }
|
127
146
|
else ProdProc.new { |o| o }
|
128
147
|
end
|
129
|
-
|
148
|
+
|
130
149
|
when :elp
|
131
150
|
case which
|
132
151
|
when :empty then ProdProc.new { || [] }
|
133
152
|
else ProdProc.new { |prime| prime }
|
134
153
|
end
|
135
|
-
|
154
|
+
|
136
155
|
when :nelp
|
137
156
|
case which
|
138
157
|
when :single
|
139
158
|
ProdProc.new { |el| [el] }
|
140
|
-
|
159
|
+
|
141
160
|
when :multiple
|
142
161
|
ProdProc.new(:splat, sels) do |*syms|
|
143
162
|
el = syms[1..-1]
|
144
163
|
syms.first << (el.length == 1 ? el.first : el)
|
145
164
|
end
|
146
|
-
|
165
|
+
|
147
166
|
else
|
148
167
|
ProdProc.new { |*el| el.length == 1 ? el.first : el }
|
149
168
|
end
|
150
169
|
end,
|
151
170
|
p.rhs.length
|
152
171
|
]
|
153
|
-
|
172
|
+
|
154
173
|
@production_precs[p.id] = p.last_terminal
|
155
174
|
end
|
156
175
|
end
|
157
|
-
|
176
|
+
|
158
177
|
# Called when the Lexer class is sub-classed, it installes
|
159
178
|
# necessary instance class variables.
|
160
179
|
#
|
@@ -162,7 +181,7 @@ module RLTK
|
|
162
181
|
def inherited(klass)
|
163
182
|
klass.install_icvars
|
164
183
|
end
|
165
|
-
|
184
|
+
|
166
185
|
# If *state* (or its equivalent) is not in the state list it is
|
167
186
|
# added and it's ID is returned. If there is already a state
|
168
187
|
# with the same items as *state* in the state list its ID is
|
@@ -176,13 +195,13 @@ module RLTK
|
|
176
195
|
id
|
177
196
|
else
|
178
197
|
state.id = @states.length
|
179
|
-
|
198
|
+
|
180
199
|
@states << state
|
181
|
-
|
200
|
+
|
182
201
|
@states.length - 1
|
183
202
|
end
|
184
203
|
end
|
185
|
-
|
204
|
+
|
186
205
|
# Build a hash with the default options for Parser.finalize
|
187
206
|
# and then update it with the values from *opts*.
|
188
207
|
#
|
@@ -191,7 +210,7 @@ module RLTK
|
|
191
210
|
# @return [Hash{Symbol => Object}]
|
192
211
|
def build_finalize_opts(opts)
|
193
212
|
opts[:explain] = self.get_io(opts[:explain])
|
194
|
-
|
213
|
+
|
195
214
|
{
|
196
215
|
explain: false,
|
197
216
|
lookahead: true,
|
@@ -200,7 +219,7 @@ module RLTK
|
|
200
219
|
}.update(opts)
|
201
220
|
end
|
202
221
|
private :build_finalize_opts
|
203
|
-
|
222
|
+
|
204
223
|
# Build a hash with the default options for Parser.parse and
|
205
224
|
# then update it with the values from *opts*.
|
206
225
|
#
|
@@ -210,7 +229,7 @@ module RLTK
|
|
210
229
|
def build_parse_opts(opts)
|
211
230
|
opts[:parse_tree] = self.get_io(opts[:parse_tree])
|
212
231
|
opts[:verbose] = self.get_io(opts[:verbose])
|
213
|
-
|
232
|
+
|
214
233
|
{
|
215
234
|
accept: :first,
|
216
235
|
env: self::Environment.new,
|
@@ -219,7 +238,7 @@ module RLTK
|
|
219
238
|
}.update(opts)
|
220
239
|
end
|
221
240
|
private :build_parse_opts
|
222
|
-
|
241
|
+
|
223
242
|
# This method is used to (surprise) check the sanity of the
|
224
243
|
# constructed parser. It checks to make sure all non-terminals
|
225
244
|
# used in the grammar definition appear on the left-hand side of
|
@@ -236,7 +255,7 @@ module RLTK
|
|
236
255
|
raise ParserConstructionException, "Non-terminal #{sym} does not appear on the left-hand side of any production."
|
237
256
|
end
|
238
257
|
end
|
239
|
-
|
258
|
+
|
240
259
|
# Check the actions in each state.
|
241
260
|
each_state do |state|
|
242
261
|
state.actions.each do |sym, actions|
|
@@ -247,14 +266,14 @@ module RLTK
|
|
247
266
|
if sym != :EOS
|
248
267
|
raise ParserConstructionException, "Accept action found for terminal #{sym} in state #{state.id}."
|
249
268
|
end
|
250
|
-
|
269
|
+
|
251
270
|
elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
|
252
271
|
raise ParserConstructionException, "Object of type #{action.class} found in actions for terminal " +
|
253
272
|
"#{sym} in state #{state.id}."
|
254
|
-
|
273
|
+
|
255
274
|
end
|
256
275
|
end
|
257
|
-
|
276
|
+
|
258
277
|
if (conflict = state.conflict_on?(sym))
|
259
278
|
self.inform_conflict(state.id, conflict, sym)
|
260
279
|
end
|
@@ -262,16 +281,16 @@ module RLTK
|
|
262
281
|
# Here we check actions for non-terminals.
|
263
282
|
if actions.length > 1
|
264
283
|
raise ParserConstructionException, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
|
265
|
-
|
284
|
+
|
266
285
|
elsif actions.length == 1 and not actions.first.is_a?(GoTo)
|
267
286
|
raise ParserConstructionException, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
|
268
|
-
|
287
|
+
|
269
288
|
end
|
270
289
|
end
|
271
290
|
end
|
272
291
|
end
|
273
292
|
end
|
274
|
-
|
293
|
+
|
275
294
|
# This method checks to see if the parser would be in parse state
|
276
295
|
# *dest* after starting in state *start* and reading *symbols*.
|
277
296
|
#
|
@@ -283,26 +302,26 @@ module RLTK
|
|
283
302
|
def check_reachability(start, dest, symbols)
|
284
303
|
path_exists = true
|
285
304
|
cur_state = start
|
286
|
-
|
305
|
+
|
287
306
|
symbols.each do |sym|
|
288
|
-
|
307
|
+
|
289
308
|
actions = @states[cur_state.id].on?(sym)
|
290
309
|
actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
|
291
|
-
|
310
|
+
|
292
311
|
if actions.empty?
|
293
312
|
path_exists = false
|
294
313
|
break
|
295
314
|
end
|
296
|
-
|
315
|
+
|
297
316
|
# There can only be one Shift action for terminals and
|
298
317
|
# one GoTo action for non-terminals, so we know the
|
299
318
|
# first action is the only one in the list.
|
300
319
|
cur_state = @states[actions.first.id]
|
301
320
|
end
|
302
|
-
|
321
|
+
|
303
322
|
path_exists and cur_state.id == dest.id
|
304
323
|
end
|
305
|
-
|
324
|
+
|
306
325
|
# Declares a new clause inside of a production. The right-hand
|
307
326
|
# side is specified by *expression* and the precedence of this
|
308
327
|
# production can be changed by setting the *precedence* argument
|
@@ -318,9 +337,9 @@ module RLTK
|
|
318
337
|
# Use the curr_prec only if it isn't overridden for this
|
319
338
|
# clause.
|
320
339
|
precedence ||= @curr_prec
|
321
|
-
|
340
|
+
|
322
341
|
production, selections = @grammar.clause(expression)
|
323
|
-
|
342
|
+
|
324
343
|
# Check to make sure the action's arity matches the number
|
325
344
|
# of symbols on the right-hand side.
|
326
345
|
expected_arity = (selections.empty? ? production.rhs.length : selections.length)
|
@@ -329,16 +348,16 @@ module RLTK
|
|
329
348
|
"Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
|
330
349
|
' Action arity must match the number of terminals and non-terminals in the clause.'
|
331
350
|
end
|
332
|
-
|
351
|
+
|
333
352
|
# Add the action to our proc list.
|
334
353
|
@procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
|
335
|
-
|
354
|
+
|
336
355
|
# If no precedence is specified use the precedence of the
|
337
356
|
# last terminal in the production.
|
338
357
|
@production_precs[production.id] = precedence || production.last_terminal
|
339
358
|
end
|
340
359
|
alias :c :clause
|
341
|
-
|
360
|
+
|
342
361
|
# Removes resources that were needed to generate the parser but
|
343
362
|
# aren't needed when actually parsing input.
|
344
363
|
#
|
@@ -346,23 +365,23 @@ module RLTK
|
|
346
365
|
def clean
|
347
366
|
# We've told the developer about conflicts by now.
|
348
367
|
@conflicts = nil
|
349
|
-
|
368
|
+
|
350
369
|
# Drop the grammar and the grammar'.
|
351
370
|
@grammar = nil
|
352
371
|
@grammar_prime = nil
|
353
|
-
|
372
|
+
|
354
373
|
# Drop precedence and bookkeeping information.
|
355
374
|
@cur_lhs = nil
|
356
375
|
@cur_prec = nil
|
357
|
-
|
376
|
+
|
358
377
|
@prec_counts = nil
|
359
378
|
@production_precs = nil
|
360
379
|
@token_precs = nil
|
361
|
-
|
380
|
+
|
362
381
|
# Drop the items from each of the states.
|
363
382
|
each_state { |state| state.clean }
|
364
383
|
end
|
365
|
-
|
384
|
+
|
366
385
|
# Set the default argument type for the actions associated with
|
367
386
|
# clauses. All actions defined after this call will be passed
|
368
387
|
# arguments in the way specified here, unless overridden in the
|
@@ -375,15 +394,15 @@ module RLTK
|
|
375
394
|
@default_arg_type = type if type == :array or type == :splat
|
376
395
|
end
|
377
396
|
alias :dat :default_arg_type
|
378
|
-
|
397
|
+
|
379
398
|
# Adds productions and actions for parsing empty lists.
|
380
399
|
#
|
381
400
|
# @see CFG#empty_list_production
|
382
401
|
def build_list_production(symbol, list_elements, separator = '')
|
383
402
|
@grammar.build_list_production(symbol, list_elements, separator)
|
384
403
|
end
|
385
|
-
alias :list :build_list_production
|
386
|
-
|
404
|
+
alias :list :build_list_production
|
405
|
+
|
387
406
|
# This function will print a description of the parser to the
|
388
407
|
# provided IO object.
|
389
408
|
#
|
@@ -396,127 +415,127 @@ module RLTK
|
|
396
415
|
io.puts('# Productions #')
|
397
416
|
io.puts('###############')
|
398
417
|
io.puts
|
399
|
-
|
418
|
+
|
400
419
|
max_id_length = @grammar.productions(:id).length.to_s.length
|
401
|
-
|
420
|
+
|
402
421
|
# Print the productions.
|
403
422
|
@grammar.productions.each do |sym, productions|
|
404
|
-
|
423
|
+
|
405
424
|
max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
|
406
|
-
|
425
|
+
|
407
426
|
productions.each do |production|
|
408
427
|
p_string = production.to_s
|
409
|
-
|
428
|
+
|
410
429
|
io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
|
411
|
-
|
430
|
+
|
412
431
|
if (prec = @production_precs[production.id])
|
413
432
|
io.print(' ' * (max_rhs_length - p_string.length))
|
414
433
|
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
415
434
|
end
|
416
|
-
|
435
|
+
|
417
436
|
io.puts
|
418
437
|
end
|
419
|
-
|
438
|
+
|
420
439
|
io.puts
|
421
440
|
end
|
422
|
-
|
441
|
+
|
423
442
|
io.puts('##########')
|
424
443
|
io.puts('# Tokens #')
|
425
444
|
io.puts('##########')
|
426
445
|
io.puts
|
427
|
-
|
446
|
+
|
428
447
|
max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
|
429
|
-
|
448
|
+
|
430
449
|
@grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
|
431
450
|
io.print("\t#{term}")
|
432
|
-
|
451
|
+
|
433
452
|
if (prec = @token_precs[term])
|
434
453
|
io.print(' ' * (max_token_len - term.length))
|
435
454
|
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
436
455
|
end
|
437
|
-
|
456
|
+
|
438
457
|
io.puts
|
439
458
|
end
|
440
|
-
|
459
|
+
|
441
460
|
io.puts
|
442
|
-
|
461
|
+
|
443
462
|
io.puts('#####################')
|
444
463
|
io.puts('# Table Information #')
|
445
464
|
io.puts('#####################')
|
446
465
|
io.puts
|
447
|
-
|
466
|
+
|
448
467
|
io.puts("\tStart symbol: #{@grammar.start_symbol}'")
|
449
468
|
io.puts
|
450
|
-
|
469
|
+
|
451
470
|
io.puts("\tTotal number of states: #{@states.length}")
|
452
471
|
io.puts
|
453
|
-
|
472
|
+
|
454
473
|
io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
|
455
474
|
io.puts
|
456
|
-
|
475
|
+
|
457
476
|
@conflicts.each do |state_id, conflicts|
|
458
477
|
io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
|
459
478
|
end
|
460
|
-
|
479
|
+
|
461
480
|
io.puts if not @conflicts.empty?
|
462
|
-
|
481
|
+
|
463
482
|
# Print the parse table.
|
464
483
|
io.puts('###############')
|
465
484
|
io.puts('# Parse Table #')
|
466
485
|
io.puts('###############')
|
467
486
|
io.puts
|
468
|
-
|
487
|
+
|
469
488
|
each_state do |state|
|
470
489
|
io.puts("State #{state.id}:")
|
471
490
|
io.puts
|
472
|
-
|
491
|
+
|
473
492
|
io.puts("\t# ITEMS #")
|
474
493
|
max = state.items.inject(0) do |max, item|
|
475
494
|
if item.lhs.to_s.length > max then item.lhs.to_s.length else max end
|
476
495
|
end
|
477
|
-
|
496
|
+
|
478
497
|
state.each do |item|
|
479
498
|
io.puts("\t#{item.to_s(max)}")
|
480
499
|
end
|
481
|
-
|
500
|
+
|
482
501
|
io.puts
|
483
502
|
io.puts("\t# ACTIONS #")
|
484
|
-
|
503
|
+
|
485
504
|
state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
|
486
505
|
state.actions[sym].each do |action|
|
487
506
|
io.puts("\tOn #{sym} #{action}")
|
488
507
|
end
|
489
508
|
end
|
490
|
-
|
509
|
+
|
491
510
|
io.puts
|
492
511
|
io.puts("\t# CONFLICTS #")
|
493
|
-
|
512
|
+
|
494
513
|
if @conflicts[state.id].length == 0
|
495
514
|
io.puts("\tNone\n\n")
|
496
515
|
else
|
497
516
|
@conflicts[state.id].each do |conflict|
|
498
517
|
type, sym = conflict
|
499
|
-
|
518
|
+
|
500
519
|
io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
|
501
|
-
|
520
|
+
|
502
521
|
io.puts(" on #{sym}")
|
503
522
|
end
|
504
|
-
|
523
|
+
|
505
524
|
io.puts
|
506
525
|
end
|
507
526
|
end
|
508
|
-
|
527
|
+
|
509
528
|
# Close any IO objects that aren't $stdout.
|
510
529
|
io.close if io.is_a?(IO) and io != $stdout
|
511
530
|
else
|
512
531
|
raise ParserConstructionException, 'Parser.explain called outside of finalize.'
|
513
532
|
end
|
514
533
|
end
|
515
|
-
|
534
|
+
|
516
535
|
# This method will finalize the parser causing the construction
|
517
536
|
# of states and their actions, and the resolution of conflicts
|
518
537
|
# using lookahead and precedence information.
|
519
|
-
#
|
538
|
+
#
|
520
539
|
# No calls to {Parser.production} may appear after the call to
|
521
540
|
# Parser.finalize.
|
522
541
|
#
|
@@ -529,58 +548,63 @@ module RLTK
|
|
529
548
|
#
|
530
549
|
# @return [void]
|
531
550
|
def finalize(opts = {})
|
532
|
-
|
551
|
+
|
552
|
+
if @grammar.productions.empty?
|
553
|
+
raise ParserConstructionException,
|
554
|
+
"Parser has no productions. Cowardly refusing to construct an empty parser."
|
555
|
+
end
|
556
|
+
|
533
557
|
# Get the full options hash.
|
534
558
|
opts = build_finalize_opts(opts)
|
535
|
-
|
559
|
+
|
536
560
|
# Get the name of the file in which the parser is defined.
|
537
561
|
#
|
538
562
|
# FIXME: See why this is failing for the simple ListParser example.
|
539
563
|
def_file = caller()[2].split(':')[0] if opts[:use]
|
540
|
-
|
564
|
+
|
541
565
|
# Check to make sure we can load the necessary information
|
542
566
|
# from the specified object.
|
543
567
|
if opts[:use] and (
|
544
568
|
(opts[:use].is_a?(String) and File.exists?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
|
545
569
|
(opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
|
546
570
|
)
|
547
|
-
|
571
|
+
|
548
572
|
file = self.get_io(opts[:use], 'r')
|
549
|
-
|
573
|
+
|
550
574
|
# Un-marshal our saved data structures.
|
551
575
|
file.flock(File::LOCK_SH)
|
552
576
|
@lh_sides, @states, @symbols = Marshal.load(file)
|
553
577
|
file.flock(File::LOCK_UN)
|
554
|
-
|
578
|
+
|
555
579
|
# Close the file if we opened it.
|
556
580
|
file.close if opts[:use].is_a?(String)
|
557
|
-
|
581
|
+
|
558
582
|
# Remove any un-needed data and return.
|
559
583
|
return self.clean
|
560
584
|
end
|
561
|
-
|
585
|
+
|
562
586
|
# Grab all of the symbols that comprise the grammar
|
563
587
|
# (besides the start symbol).
|
564
588
|
@symbols = @grammar.symbols << :ERROR
|
565
|
-
|
589
|
+
|
566
590
|
# Add our starting state to the state list.
|
567
591
|
@start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
|
568
592
|
start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
|
569
593
|
start_state = State.new(@symbols, [start_production.to_item])
|
570
|
-
|
594
|
+
|
571
595
|
start_state.close(@grammar.productions)
|
572
|
-
|
596
|
+
|
573
597
|
self.add_state(start_state)
|
574
|
-
|
598
|
+
|
575
599
|
# Translate the precedence of productions from tokens to
|
576
600
|
# (associativity, precedence) pairs.
|
577
601
|
@production_precs.map! { |prec| @token_precs[prec] }
|
578
|
-
|
602
|
+
|
579
603
|
# Build the rest of the transition table.
|
580
604
|
each_state do |state|
|
581
605
|
#Transition states.
|
582
606
|
tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
|
583
|
-
|
607
|
+
|
584
608
|
#Bin each item in this set into reachable transition
|
585
609
|
#states.
|
586
610
|
state.each do |item|
|
@@ -588,7 +612,7 @@ module RLTK
|
|
588
612
|
tstates[next_symbol] << item.copy
|
589
613
|
end
|
590
614
|
end
|
591
|
-
|
615
|
+
|
592
616
|
# For each transition state:
|
593
617
|
# 1) Get transition symbol
|
594
618
|
# 2) Advance dot
|
@@ -596,15 +620,15 @@ module RLTK
|
|
596
620
|
# 4) Get state id and add transition
|
597
621
|
tstates.each do |symbol, tstate|
|
598
622
|
tstate.each { |item| item.advance }
|
599
|
-
|
623
|
+
|
600
624
|
tstate.close(@grammar.productions)
|
601
|
-
|
625
|
+
|
602
626
|
id = self.add_state(tstate)
|
603
|
-
|
627
|
+
|
604
628
|
# Add Goto and Shift actions.
|
605
629
|
state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
|
606
630
|
end
|
607
|
-
|
631
|
+
|
608
632
|
# Find the Accept and Reduce actions for this state.
|
609
633
|
state.each do |item|
|
610
634
|
if item.at_end?
|
@@ -616,35 +640,35 @@ module RLTK
|
|
616
640
|
end
|
617
641
|
end
|
618
642
|
end
|
619
|
-
|
643
|
+
|
620
644
|
# Build the production.id -> production.lhs map.
|
621
645
|
@grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
|
622
|
-
|
646
|
+
|
623
647
|
# Prune the parsing table for unnecessary reduce actions.
|
624
648
|
self.prune(opts[:lookahead], opts[:precedence])
|
625
|
-
|
649
|
+
|
626
650
|
# Check the parser for inconsistencies.
|
627
651
|
self.check_sanity
|
628
|
-
|
652
|
+
|
629
653
|
# Print the table if requested.
|
630
654
|
self.explain(opts[:explain]) if opts[:explain]
|
631
|
-
|
655
|
+
|
632
656
|
# Remove any data that is no longer needed.
|
633
657
|
self.clean
|
634
|
-
|
658
|
+
|
635
659
|
# Store the parser's final data structures if requested.
|
636
660
|
if opts[:use]
|
637
661
|
io = self.get_io(opts[:use])
|
638
|
-
|
662
|
+
|
639
663
|
io.flock(File::LOCK_EX) if io.is_a?(File)
|
640
664
|
Marshal.dump([@lh_sides, @states, @symbols], io)
|
641
665
|
io.flock(File::LOCK_UN) if io.is_a?(File)
|
642
|
-
|
666
|
+
|
643
667
|
# Close the IO object if we opened it.
|
644
668
|
io.close if opts[:use].is_a?(String)
|
645
669
|
end
|
646
670
|
end
|
647
|
-
|
671
|
+
|
648
672
|
# Converts an object into an IO object as appropriate.
|
649
673
|
#
|
650
674
|
# @param [Object] o Object to be converted into an IO object.
|
@@ -662,7 +686,7 @@ module RLTK
|
|
662
686
|
false
|
663
687
|
end
|
664
688
|
end
|
665
|
-
|
689
|
+
|
666
690
|
# Iterate over the parser's states.
|
667
691
|
#
|
668
692
|
# @yieldparam [State] state One of the parser automaton's state objects
|
@@ -675,16 +699,16 @@ module RLTK
|
|
675
699
|
current_state += 1
|
676
700
|
end
|
677
701
|
end
|
678
|
-
|
702
|
+
|
679
703
|
# @return [CFG] The grammar that can be parsed by this Parser.
|
680
704
|
def grammar
|
681
705
|
@grammar.clone
|
682
706
|
end
|
683
|
-
|
707
|
+
|
684
708
|
# This method generates and memoizes the G' grammar used to
|
685
709
|
# calculate the LALR(1) lookahead sets. Information about this
|
686
710
|
# grammar and its use can be found in the following paper:
|
687
|
-
#
|
711
|
+
#
|
688
712
|
# Simple Computation of LALR(1) Lookahead Sets
|
689
713
|
# Manuel E. Bermudez and George Logothetis
|
690
714
|
# Information Processing Letters 31 - 1989
|
@@ -693,33 +717,33 @@ module RLTK
|
|
693
717
|
def grammar_prime
|
694
718
|
if not @grammar_prime
|
695
719
|
@grammar_prime = CFG.new
|
696
|
-
|
720
|
+
|
697
721
|
each_state do |state|
|
698
722
|
state.each do |item|
|
699
723
|
lhs = "#{state.id}_#{item.next_symbol}".to_sym
|
700
|
-
|
724
|
+
|
701
725
|
next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
|
702
|
-
|
726
|
+
|
703
727
|
@grammar.productions[item.next_symbol].each do |production|
|
704
728
|
rhs = ''
|
705
|
-
|
729
|
+
|
706
730
|
cstate = state
|
707
|
-
|
731
|
+
|
708
732
|
production.rhs.each do |symbol|
|
709
733
|
rhs += "#{cstate.id}_#{symbol} "
|
710
|
-
|
734
|
+
|
711
735
|
cstate = @states[cstate.on?(symbol).first.id]
|
712
736
|
end
|
713
|
-
|
737
|
+
|
714
738
|
@grammar_prime.production(lhs, rhs)
|
715
739
|
end
|
716
740
|
end
|
717
741
|
end
|
718
742
|
end
|
719
|
-
|
743
|
+
|
720
744
|
@grammar_prime
|
721
745
|
end
|
722
|
-
|
746
|
+
|
723
747
|
# Inform the parser core that a conflict has been detected.
|
724
748
|
#
|
725
749
|
# @param [Integer] state_id ID of the state where the conflict was encountered.
|
@@ -730,7 +754,7 @@ module RLTK
|
|
730
754
|
def inform_conflict(state_id, type, sym)
|
731
755
|
@conflicts[state_id] << [type, sym]
|
732
756
|
end
|
733
|
-
|
757
|
+
|
734
758
|
# This method is used to specify that the symbols in *symbols*
|
735
759
|
# are left-associative. Subsequent calls to this method will
|
736
760
|
# give their arguments higher precedence.
|
@@ -740,12 +764,12 @@ module RLTK
|
|
740
764
|
# @return [void]
|
741
765
|
def left(*symbols)
|
742
766
|
prec_level = @prec_counts[:left] += 1
|
743
|
-
|
767
|
+
|
744
768
|
symbols.map { |s| s.to_sym }.each do |sym|
|
745
769
|
@token_precs[sym] = [:left, prec_level]
|
746
770
|
end
|
747
771
|
end
|
748
|
-
|
772
|
+
|
749
773
|
# This method is used to specify that the symbols in *symbols*
|
750
774
|
# are non-associative.
|
751
775
|
#
|
@@ -754,25 +778,25 @@ module RLTK
|
|
754
778
|
# @return [void]
|
755
779
|
def nonassoc(*symbols)
|
756
780
|
prec_level = @prec_counts[:non] += 1
|
757
|
-
|
781
|
+
|
758
782
|
symbols.map { |s| s.to_sym }.each do |sym|
|
759
783
|
@token_precs[sym] = [:non, prec_level]
|
760
784
|
end
|
761
785
|
end
|
762
|
-
|
786
|
+
|
763
787
|
# Adds productions and actions for parsing nonempty lists.
|
764
788
|
#
|
765
789
|
# @see CFG#nonempty_list_production
|
766
790
|
def build_nonempty_list_production(symbol, list_elements, separator = '')
|
767
791
|
@grammar.build_nonempty_list_production(symbol, list_elements, separator)
|
768
792
|
end
|
769
|
-
alias :nonempty_list :build_nonempty_list_production
|
770
|
-
|
793
|
+
alias :nonempty_list :build_nonempty_list_production
|
794
|
+
|
771
795
|
# This function is where actual parsing takes place. The
|
772
796
|
# _tokens_ argument must be an array of Token objects, the last
|
773
797
|
# of which has type EOS. By default this method will return the
|
774
798
|
# value computed by the first successful parse tree found.
|
775
|
-
#
|
799
|
+
#
|
776
800
|
# Additional information about the parsing options can be found in
|
777
801
|
# the main documentation.
|
778
802
|
#
|
@@ -787,27 +811,27 @@ module RLTK
|
|
787
811
|
# @return [Object, Array<Object>] Result or results of parsing the given tokens.
|
788
812
|
def parse(tokens, opts = {})
|
789
813
|
# Get the full options hash.
|
790
|
-
opts
|
791
|
-
v
|
792
|
-
|
814
|
+
opts = build_parse_opts(opts)
|
815
|
+
v = opts[:verbose]
|
816
|
+
|
793
817
|
if opts[:verbose]
|
794
818
|
v.puts("Input tokens:")
|
795
819
|
v.puts(tokens.map { |t| t.type }.inspect)
|
796
820
|
v.puts
|
797
821
|
end
|
798
|
-
|
822
|
+
|
799
823
|
# Stack IDs to keep track of them during parsing.
|
800
824
|
stack_id = 0
|
801
|
-
|
825
|
+
|
802
826
|
# Error mode indicators.
|
803
827
|
error_mode = false
|
804
828
|
reduction_guard = false
|
805
|
-
|
829
|
+
|
806
830
|
# Our various list of stacks.
|
807
831
|
accepted = []
|
808
832
|
moving_on = []
|
809
833
|
processing = [ParseStack.new(stack_id += 1)]
|
810
|
-
|
834
|
+
|
811
835
|
# Iterate over the tokens. We don't procede to the
|
812
836
|
# next token until every stack is done with the
|
813
837
|
# current one.
|
@@ -815,33 +839,36 @@ module RLTK
|
|
815
839
|
# Check to make sure this token was seen in the
|
816
840
|
# grammar definition.
|
817
841
|
raise BadToken if not @symbols.include?(token.type)
|
818
|
-
|
842
|
+
|
819
843
|
v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
820
|
-
|
844
|
+
|
821
845
|
# Iterate over the stacks until each one is done.
|
822
846
|
while (stack = processing.shift)
|
847
|
+
# Execute any token hooks in this stack's environment.
|
848
|
+
@token_hooks[token.type].each { |hook| opts[:env].instance_exec &hook}
|
849
|
+
|
823
850
|
# Get the available actions for this stack.
|
824
851
|
actions = @states[stack.state].on?(token.type)
|
825
|
-
|
852
|
+
|
826
853
|
if actions.empty?
|
827
854
|
# If we are already in error mode and there
|
828
855
|
# are no actions we skip this token.
|
829
856
|
if error_mode
|
830
857
|
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
831
|
-
|
858
|
+
|
832
859
|
# Add the current token to the array
|
833
860
|
# that corresponds to the output value
|
834
861
|
# for the ERROR token.
|
835
862
|
stack.output_stack.last << token
|
836
|
-
|
863
|
+
|
837
864
|
moving_on << stack
|
838
865
|
next
|
839
866
|
end
|
840
|
-
|
867
|
+
|
841
868
|
# We would be dropping the last stack so we
|
842
869
|
# are going to go into error mode.
|
843
870
|
if accepted.empty? and moving_on.empty? and processing.empty?
|
844
|
-
|
871
|
+
|
845
872
|
if v
|
846
873
|
v.puts
|
847
874
|
v.puts('Current stack:')
|
@@ -850,7 +877,7 @@ module RLTK
|
|
850
877
|
v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
|
851
878
|
v.puts
|
852
879
|
end
|
853
|
-
|
880
|
+
|
854
881
|
# Try and find a valid error state.
|
855
882
|
while stack.state
|
856
883
|
if (actions = @states[stack.state].on?(:ERROR)).empty?
|
@@ -860,17 +887,17 @@ module RLTK
|
|
860
887
|
else
|
861
888
|
# Enter the found error state.
|
862
889
|
stack.push(actions.first.id, [token], :ERROR, token.position)
|
863
|
-
|
890
|
+
|
864
891
|
break
|
865
892
|
end
|
866
893
|
end
|
867
|
-
|
894
|
+
|
868
895
|
if stack.state
|
869
896
|
# We found a valid error state.
|
870
897
|
error_mode = reduction_guard = true
|
871
898
|
opts[:env].he = true
|
872
899
|
moving_on << stack
|
873
|
-
|
900
|
+
|
874
901
|
if v
|
875
902
|
v.puts('Invalid input encountered. Entering error handling mode.')
|
876
903
|
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}")
|
@@ -879,20 +906,20 @@ module RLTK
|
|
879
906
|
# No valid error states could be
|
880
907
|
# found. Time to print a message
|
881
908
|
# and leave.
|
882
|
-
|
909
|
+
|
883
910
|
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
884
911
|
end
|
885
912
|
else
|
886
913
|
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
887
914
|
end
|
888
|
-
|
915
|
+
|
889
916
|
next
|
890
917
|
end
|
891
|
-
|
918
|
+
|
892
919
|
# Make (stack, action) pairs, duplicating the
|
893
920
|
# stack as necessary.
|
894
921
|
pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
|
895
|
-
|
922
|
+
|
896
923
|
pairs.each do |stack, action|
|
897
924
|
if v
|
898
925
|
v.puts
|
@@ -903,127 +930,127 @@ module RLTK
|
|
903
930
|
v.puts
|
904
931
|
v.puts("Action taken: #{action.to_s}")
|
905
932
|
end
|
906
|
-
|
933
|
+
|
907
934
|
if action.is_a?(Accept)
|
908
935
|
if opts[:accept] == :all
|
909
936
|
accepted << stack
|
910
937
|
else
|
911
938
|
v.puts('Accepting input.') if v
|
912
939
|
opts[:parse_tree].puts(stack.tree) if opts[:parse_tree]
|
913
|
-
|
940
|
+
|
914
941
|
if opts[:env].he
|
915
942
|
raise HandledError.new(opts[:env].errors, stack.result)
|
916
943
|
else
|
917
944
|
return stack.result
|
918
945
|
end
|
919
946
|
end
|
920
|
-
|
947
|
+
|
921
948
|
elsif action.is_a?(Reduce)
|
922
949
|
# Get the production associated with this reduction.
|
923
950
|
production_proc, pop_size = @procs[action.id]
|
924
|
-
|
951
|
+
|
925
952
|
if not production_proc
|
926
953
|
raise InternalParserException, "No production #{action.id} found."
|
927
954
|
end
|
928
|
-
|
955
|
+
|
929
956
|
args, positions = stack.pop(pop_size)
|
930
957
|
opts[:env].set_positions(positions)
|
931
|
-
|
958
|
+
|
932
959
|
if not production_proc.selections.empty?
|
933
960
|
args = args.values_at(*production_proc.selections)
|
934
961
|
end
|
935
|
-
|
962
|
+
|
936
963
|
result =
|
937
964
|
if production_proc.arg_type == :array
|
938
965
|
opts[:env].instance_exec(args, &production_proc)
|
939
966
|
else
|
940
967
|
opts[:env].instance_exec(*args, &production_proc)
|
941
968
|
end
|
942
|
-
|
969
|
+
|
943
970
|
if (goto = @states[stack.state].on?(@lh_sides[action.id]).first)
|
944
|
-
|
971
|
+
|
945
972
|
v.puts("Going to state #{goto.id}.\n") if v
|
946
|
-
|
973
|
+
|
947
974
|
pos0 = nil
|
948
|
-
|
975
|
+
|
949
976
|
if args.empty?
|
950
977
|
# Empty productions need to be
|
951
978
|
# handled specially.
|
952
979
|
pos0 = stack.position
|
953
|
-
|
980
|
+
|
954
981
|
pos0.stream_offset += pos0.length + 1
|
955
982
|
pos0.line_offset += pos0.length + 1
|
956
|
-
|
983
|
+
|
957
984
|
pos0.length = 0
|
958
985
|
else
|
959
986
|
pos0 = opts[:env].pos( 0)
|
960
987
|
pos1 = opts[:env].pos(-1)
|
961
|
-
|
988
|
+
|
962
989
|
pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
|
963
990
|
end
|
964
|
-
|
991
|
+
|
965
992
|
stack.push(goto.id, result, @lh_sides[action.id], pos0)
|
966
993
|
else
|
967
994
|
raise InternalParserException, "No GoTo action found in state #{stack.state} " +
|
968
995
|
"after reducing by production #{action.id}"
|
969
996
|
end
|
970
|
-
|
997
|
+
|
971
998
|
# This stack is NOT ready for the next
|
972
999
|
# token.
|
973
1000
|
processing << stack
|
974
|
-
|
1001
|
+
|
975
1002
|
# Exit error mode if necessary.
|
976
1003
|
error_mode = false if error_mode and not reduction_guard
|
977
|
-
|
1004
|
+
|
978
1005
|
elsif action.is_a?(Shift)
|
979
1006
|
stack.push(action.id, token.value, token.type, token.position)
|
980
|
-
|
1007
|
+
|
981
1008
|
# This stack is ready for the next
|
982
1009
|
# token.
|
983
1010
|
moving_on << stack
|
984
|
-
|
1011
|
+
|
985
1012
|
# Exit error mode.
|
986
1013
|
error_mode = false
|
987
1014
|
end
|
988
1015
|
end
|
989
1016
|
end
|
990
|
-
|
1017
|
+
|
991
1018
|
v.puts("\n\n") if v
|
992
|
-
|
1019
|
+
|
993
1020
|
processing = moving_on
|
994
1021
|
moving_on = []
|
995
|
-
|
1022
|
+
|
996
1023
|
# If we don't have any active stacks at this point the
|
997
1024
|
# string isn't in the language.
|
998
1025
|
if opts[:accept] == :first and processing.length == 0
|
999
1026
|
v.close if v and v != $stdout
|
1000
1027
|
raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
|
1001
1028
|
end
|
1002
|
-
|
1029
|
+
|
1003
1030
|
reduction_guard = false
|
1004
1031
|
end
|
1005
|
-
|
1032
|
+
|
1006
1033
|
# If we have reached this point we are accepting all parse
|
1007
1034
|
# trees.
|
1008
1035
|
if v
|
1009
1036
|
v.puts("Accepting input with #{accepted.length} derivation(s).")
|
1010
|
-
|
1037
|
+
|
1011
1038
|
v.close if v != $stdout
|
1012
1039
|
end
|
1013
|
-
|
1040
|
+
|
1014
1041
|
accepted.each do |stack|
|
1015
1042
|
opts[:parse_tree].puts(stack.tree)
|
1016
1043
|
end if opts[:parse_tree]
|
1017
|
-
|
1044
|
+
|
1018
1045
|
results = accepted.map { |stack| stack.result }
|
1019
|
-
|
1046
|
+
|
1020
1047
|
if opts[:env].he
|
1021
1048
|
raise HandledError.new(opts[:env].errors, results)
|
1022
1049
|
else
|
1023
1050
|
return results
|
1024
1051
|
end
|
1025
1052
|
end
|
1026
|
-
|
1053
|
+
|
1027
1054
|
# Adds a new production to the parser with a left-hand value of
|
1028
1055
|
# *symbol*. If *expression* is specified it is taken as the
|
1029
1056
|
# right-hand side of the production and *action* is associated
|
@@ -1040,34 +1067,34 @@ module RLTK
|
|
1040
1067
|
#
|
1041
1068
|
# @return [void]
|
1042
1069
|
def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
|
1043
|
-
|
1070
|
+
|
1044
1071
|
# Check the symbol.
|
1045
1072
|
if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
|
1046
1073
|
raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
|
1047
1074
|
end
|
1048
|
-
|
1075
|
+
|
1049
1076
|
@grammar.curr_lhs = symbol.to_sym
|
1050
1077
|
@curr_prec = precedence
|
1051
|
-
|
1078
|
+
|
1052
1079
|
orig_dat = nil
|
1053
1080
|
if arg_type != @default_arg_type
|
1054
1081
|
orig_dat = @default_arg_type
|
1055
1082
|
@default_arg_type = arg_type
|
1056
1083
|
end
|
1057
|
-
|
1084
|
+
|
1058
1085
|
if expression
|
1059
1086
|
self.clause(expression, precedence, &action)
|
1060
1087
|
else
|
1061
1088
|
self.instance_exec(&action)
|
1062
1089
|
end
|
1063
|
-
|
1090
|
+
|
1064
1091
|
@default_arg_type = orig_dat if not orig_dat.nil?
|
1065
|
-
|
1092
|
+
|
1066
1093
|
@grammar.curr_lhs = nil
|
1067
1094
|
@curr_prec = nil
|
1068
1095
|
end
|
1069
1096
|
alias :p :production
|
1070
|
-
|
1097
|
+
|
1071
1098
|
# This method uses lookahead sets and precedence information to
|
1072
1099
|
# resolve conflicts and remove unnecessary reduce actions.
|
1073
1100
|
#
|
@@ -1077,41 +1104,41 @@ module RLTK
|
|
1077
1104
|
# @return [void]
|
1078
1105
|
def prune(do_lookahead, do_precedence)
|
1079
1106
|
terms = @grammar.terms
|
1080
|
-
|
1107
|
+
|
1081
1108
|
# If both options are false there is no pruning to do.
|
1082
1109
|
return if not (do_lookahead or do_precedence)
|
1083
|
-
|
1110
|
+
|
1084
1111
|
each_state do |state0|
|
1085
|
-
|
1112
|
+
|
1086
1113
|
#####################
|
1087
1114
|
# Lookahead Pruning #
|
1088
1115
|
#####################
|
1089
|
-
|
1116
|
+
|
1090
1117
|
if do_lookahead
|
1091
1118
|
# Find all of the reductions in this state.
|
1092
1119
|
reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
|
1093
|
-
|
1120
|
+
|
1094
1121
|
reductions.each do |reduction|
|
1095
1122
|
production = @grammar.productions(:id)[reduction.id]
|
1096
|
-
|
1123
|
+
|
1097
1124
|
lookahead = Array.new
|
1098
|
-
|
1125
|
+
|
1099
1126
|
# Build the lookahead set.
|
1100
1127
|
each_state do |state1|
|
1101
1128
|
if self.check_reachability(state1, state0, production.rhs)
|
1102
1129
|
lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
|
1103
1130
|
end
|
1104
1131
|
end
|
1105
|
-
|
1132
|
+
|
1106
1133
|
# Translate the G' follow symbols into G
|
1107
1134
|
# lookahead symbols.
|
1108
1135
|
lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
|
1109
|
-
|
1136
|
+
|
1110
1137
|
# Here we remove the unnecessary reductions.
|
1111
1138
|
# If there are error productions we need to
|
1112
1139
|
# scale back the amount of pruning done.
|
1113
1140
|
pruning_candidates = terms - lookahead
|
1114
|
-
|
1141
|
+
|
1115
1142
|
if terms.include?(:ERROR)
|
1116
1143
|
pruning_candidates.each do |sym|
|
1117
1144
|
state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
|
@@ -1121,23 +1148,23 @@ module RLTK
|
|
1121
1148
|
end
|
1122
1149
|
end
|
1123
1150
|
end
|
1124
|
-
|
1151
|
+
|
1125
1152
|
########################################
|
1126
1153
|
# Precedence and Associativity Pruning #
|
1127
1154
|
########################################
|
1128
|
-
|
1155
|
+
|
1129
1156
|
if do_precedence
|
1130
1157
|
state0.actions.each do |symbol, actions|
|
1131
|
-
|
1158
|
+
|
1132
1159
|
# We are only interested in pruning actions
|
1133
1160
|
# for terminal symbols.
|
1134
1161
|
next unless CFG::is_terminal?(symbol)
|
1135
|
-
|
1136
|
-
# Skip to the next one if there is no
|
1162
|
+
|
1163
|
+
# Skip to the next one if there is no
|
1137
1164
|
# possibility of a Shift/Reduce or
|
1138
1165
|
# Reduce/Reduce conflict.
|
1139
1166
|
next unless actions and actions.length > 1
|
1140
|
-
|
1167
|
+
|
1141
1168
|
resolve_ok = actions.inject(true) do |m, a|
|
1142
1169
|
if a.is_a?(Reduce)
|
1143
1170
|
m and @production_precs[a.id]
|
@@ -1145,18 +1172,18 @@ module RLTK
|
|
1145
1172
|
m
|
1146
1173
|
end
|
1147
1174
|
end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
|
1148
|
-
|
1175
|
+
|
1149
1176
|
if @token_precs[symbol] and resolve_ok
|
1150
1177
|
max_prec = 0
|
1151
1178
|
selected_action = nil
|
1152
|
-
|
1179
|
+
|
1153
1180
|
# Grab the associativity and precedence
|
1154
1181
|
# for the input token.
|
1155
1182
|
tassoc, tprec = @token_precs[symbol]
|
1156
|
-
|
1183
|
+
|
1157
1184
|
actions.each do |a|
|
1158
1185
|
assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
|
1159
|
-
|
1186
|
+
|
1160
1187
|
# If two actions have the same precedence we
|
1161
1188
|
# will only replace the previous production if:
|
1162
1189
|
# * The token is left associative and the current action is a Reduce
|
@@ -1164,20 +1191,20 @@ module RLTK
|
|
1164
1191
|
if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
|
1165
1192
|
max_prec = prec
|
1166
1193
|
selected_action = a
|
1167
|
-
|
1194
|
+
|
1168
1195
|
elsif prec == max_prec and assoc == :nonassoc
|
1169
1196
|
raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
|
1170
|
-
|
1197
|
+
|
1171
1198
|
end
|
1172
1199
|
end
|
1173
|
-
|
1200
|
+
|
1174
1201
|
state0.actions[symbol] = [selected_action]
|
1175
1202
|
end
|
1176
1203
|
end
|
1177
1204
|
end
|
1178
1205
|
end
|
1179
1206
|
end
|
1180
|
-
|
1207
|
+
|
1181
1208
|
# This method is used to specify that the symbols in _symbols_
|
1182
1209
|
# are right associative. Subsequent calls to this method will
|
1183
1210
|
# give their arguments higher precedence.
|
@@ -1187,12 +1214,12 @@ module RLTK
|
|
1187
1214
|
# @return [void]
|
1188
1215
|
def right(*symbols)
|
1189
1216
|
prec_level = @prec_counts[:right] += 1
|
1190
|
-
|
1217
|
+
|
1191
1218
|
symbols.map { |s| s.to_sym }.each do |sym|
|
1192
1219
|
@token_precs[sym] = [:right, prec_level]
|
1193
1220
|
end
|
1194
1221
|
end
|
1195
|
-
|
1222
|
+
|
1196
1223
|
# Changes the starting symbol of the parser.
|
1197
1224
|
#
|
1198
1225
|
# @param [Symbol] symbol The starting symbol of the grammar.
|
@@ -1201,27 +1228,43 @@ module RLTK
|
|
1201
1228
|
def start(symbol)
|
1202
1229
|
@grammar.start symbol
|
1203
1230
|
end
|
1231
|
+
|
1232
|
+
# Add a hook that is executed whenever *sym* is seen.
|
1233
|
+
#
|
1234
|
+
# The *sym* must be a terminal symbol.
|
1235
|
+
#
|
1236
|
+
# @param [Symbol] sym Symbol to hook into
|
1237
|
+
# @param [Proc] proc Code to execute when the block is seen
|
1238
|
+
#
|
1239
|
+
# @return [void]
|
1240
|
+
def token_hook(sym, &proc)
|
1241
|
+
if CFG::is_terminal?(sym)
|
1242
|
+
@token_hooks[sym] << proc
|
1243
|
+
else
|
1244
|
+
raise 'Method token_hook expects `sym` to be non-terminal.'
|
1245
|
+
end
|
1246
|
+
end
|
1204
1247
|
end
|
1205
|
-
|
1248
|
+
|
1206
1249
|
####################
|
1207
1250
|
# Instance Methods #
|
1208
1251
|
####################
|
1209
|
-
|
1252
|
+
|
1210
1253
|
# Instantiates a new parser and creates an environment to be
|
1211
1254
|
# used for subsequent calls.
|
1212
1255
|
def initialize
|
1213
1256
|
@env = self.class::Environment.new
|
1214
1257
|
end
|
1215
|
-
|
1258
|
+
|
1216
1259
|
# Parses the given token stream using the encapsulated environment.
|
1217
1260
|
#
|
1218
1261
|
# @see .parse
|
1219
1262
|
def parse(tokens, opts = {})
|
1220
1263
|
self.class.parse(tokens, {:env => @env}.update(opts))
|
1221
1264
|
end
|
1222
|
-
|
1265
|
+
|
1223
1266
|
################################
|
1224
|
-
|
1267
|
+
|
1225
1268
|
# All actions passed to Parser.producation and Parser.clause are
|
1226
1269
|
# evaluated inside an instance of the Environment class or its
|
1227
1270
|
# subclass (which must have the same name).
|
@@ -1230,24 +1273,24 @@ module RLTK
|
|
1230
1273
|
#
|
1231
1274
|
# @return [Boolean]
|
1232
1275
|
attr_accessor :he
|
1233
|
-
|
1276
|
+
|
1234
1277
|
# A list of all objects added using the *error* method.
|
1235
1278
|
#
|
1236
1279
|
# @return [Array<Object>]
|
1237
1280
|
attr_reader :errors
|
1238
|
-
|
1281
|
+
|
1239
1282
|
# Instantiate a new Environment object.
|
1240
1283
|
def initialize
|
1241
1284
|
self.reset
|
1242
1285
|
end
|
1243
|
-
|
1286
|
+
|
1244
1287
|
# Adds an object to the list of errors.
|
1245
1288
|
#
|
1246
1289
|
# @return [void]
|
1247
1290
|
def error(o)
|
1248
1291
|
@errors << o
|
1249
1292
|
end
|
1250
|
-
|
1293
|
+
|
1251
1294
|
# Returns a StreamPosition object for the symbol at location n,
|
1252
1295
|
# indexed from zero.
|
1253
1296
|
#
|
@@ -1257,7 +1300,7 @@ module RLTK
|
|
1257
1300
|
def pos(n)
|
1258
1301
|
@positions[n]
|
1259
1302
|
end
|
1260
|
-
|
1303
|
+
|
1261
1304
|
# Reset any variables that need to be re-initialized between
|
1262
1305
|
# parse calls.
|
1263
1306
|
#
|
@@ -1266,7 +1309,7 @@ module RLTK
|
|
1266
1309
|
@errors = Array.new
|
1267
1310
|
@he = false
|
1268
1311
|
end
|
1269
|
-
|
1312
|
+
|
1270
1313
|
# Setter for the *positions* array.
|
1271
1314
|
#
|
1272
1315
|
# @param [Array<StreamPosition>] positions
|
@@ -1276,19 +1319,19 @@ module RLTK
|
|
1276
1319
|
@positions = positions
|
1277
1320
|
end
|
1278
1321
|
end
|
1279
|
-
|
1322
|
+
|
1280
1323
|
# The ParseStack class is used by a Parser to keep track of state
|
1281
1324
|
# during parsing.
|
1282
1325
|
class ParseStack
|
1283
1326
|
# @return [Integer] ID of this parse stack.
|
1284
1327
|
attr_reader :id
|
1285
|
-
|
1328
|
+
|
1286
1329
|
# @return [Array<Object>] Array of objects produced by {Reduce} actions.
|
1287
1330
|
attr_reader :output_stack
|
1288
|
-
|
1331
|
+
|
1289
1332
|
# @return [Array<Integer>] Array of states used when performing {Reduce} actions.
|
1290
1333
|
attr_reader :state_stack
|
1291
|
-
|
1334
|
+
|
1292
1335
|
# Instantiate a new ParserStack object.
|
1293
1336
|
#
|
1294
1337
|
# @param [Integer] id ID for this parse stack. Used by GLR algorithm.
|
@@ -1300,16 +1343,16 @@ module RLTK
|
|
1300
1343
|
# @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
|
1301
1344
|
def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
|
1302
1345
|
@id = id
|
1303
|
-
|
1346
|
+
|
1304
1347
|
@node_stack = nstack
|
1305
1348
|
@output_stack = ostack
|
1306
1349
|
@state_stack = sstack
|
1307
|
-
|
1350
|
+
|
1308
1351
|
@connections = connections
|
1309
1352
|
@labels = labels
|
1310
1353
|
@positions = positions
|
1311
1354
|
end
|
1312
|
-
|
1355
|
+
|
1313
1356
|
# Branch this stack, effectively creating a new copy of its
|
1314
1357
|
# internal state.
|
1315
1358
|
#
|
@@ -1327,16 +1370,16 @@ module RLTK
|
|
1327
1370
|
# Check to see if we can obtain a deep copy.
|
1328
1371
|
if 0.respond_to?(:copy)
|
1329
1372
|
o.copy
|
1330
|
-
|
1373
|
+
|
1331
1374
|
else
|
1332
1375
|
begin o.clone rescue o end
|
1333
1376
|
end
|
1334
1377
|
end
|
1335
|
-
|
1378
|
+
|
1336
1379
|
ParseStack.new(new_id, new_output_stack, @state_stack.clone,
|
1337
1380
|
@node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
|
1338
1381
|
end
|
1339
|
-
|
1382
|
+
|
1340
1383
|
# @return [StreamPosition] Position data for the last symbol on the stack.
|
1341
1384
|
def position
|
1342
1385
|
if @positions.empty?
|
@@ -1345,7 +1388,7 @@ module RLTK
|
|
1345
1388
|
@positions.last.clone
|
1346
1389
|
end
|
1347
1390
|
end
|
1348
|
-
|
1391
|
+
|
1349
1392
|
# Push new state and other information onto the stack.
|
1350
1393
|
#
|
1351
1394
|
# @param [Integer] state ID of the shifted state.
|
@@ -1360,14 +1403,14 @@ module RLTK
|
|
1360
1403
|
@node_stack << @labels.length
|
1361
1404
|
@labels << if CFG::is_terminal?(node0) and o then node0.to_s + "(#{o})" else node0 end
|
1362
1405
|
@positions << position
|
1363
|
-
|
1406
|
+
|
1364
1407
|
if CFG::is_nonterminal?(node0)
|
1365
1408
|
@cbuffer.each do |node1|
|
1366
1409
|
@connections << [@labels.length - 1, node1]
|
1367
1410
|
end
|
1368
1411
|
end
|
1369
1412
|
end
|
1370
|
-
|
1413
|
+
|
1371
1414
|
# Pop some number of objects off of the inside stacks.
|
1372
1415
|
#
|
1373
1416
|
# @param [Integer] n Number of object to pop off the stack.
|
@@ -1375,15 +1418,15 @@ module RLTK
|
|
1375
1418
|
# @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
|
1376
1419
|
def pop(n = 1)
|
1377
1420
|
@state_stack.pop(n)
|
1378
|
-
|
1421
|
+
|
1379
1422
|
# Pop the node stack so that the proper edges can be added
|
1380
1423
|
# when the production's left-hand side non-terminal is
|
1381
1424
|
# pushed onto the stack.
|
1382
1425
|
@cbuffer = @node_stack.pop(n)
|
1383
|
-
|
1426
|
+
|
1384
1427
|
[@output_stack.pop(n), @positions.pop(n)]
|
1385
1428
|
end
|
1386
|
-
|
1429
|
+
|
1387
1430
|
# Fetch the result stored in this ParseStack. If there is more
|
1388
1431
|
# than one object left on the output stack there is an error.
|
1389
1432
|
#
|
@@ -1395,48 +1438,48 @@ module RLTK
|
|
1395
1438
|
raise InternalParserException, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
|
1396
1439
|
end
|
1397
1440
|
end
|
1398
|
-
|
1441
|
+
|
1399
1442
|
# @return [Integer] Current state of this ParseStack.
|
1400
1443
|
def state
|
1401
1444
|
@state_stack.last
|
1402
1445
|
end
|
1403
|
-
|
1446
|
+
|
1404
1447
|
# @return [String] Representation of the parse tree in the DOT langauge.
|
1405
1448
|
def tree
|
1406
1449
|
tree = "digraph tree#{@id} {\n"
|
1407
|
-
|
1450
|
+
|
1408
1451
|
@labels.each_with_index do |label, i|
|
1409
1452
|
tree += "\tnode#{i} [label=\"#{label}\""
|
1410
|
-
|
1453
|
+
|
1411
1454
|
if CFG::is_terminal?(label)
|
1412
1455
|
tree += " shape=box"
|
1413
1456
|
end
|
1414
|
-
|
1457
|
+
|
1415
1458
|
tree += "];\n"
|
1416
1459
|
end
|
1417
|
-
|
1460
|
+
|
1418
1461
|
tree += "\n"
|
1419
|
-
|
1462
|
+
|
1420
1463
|
@connections.each do |from, to|
|
1421
1464
|
tree += "\tnode#{from} -> node#{to};\n"
|
1422
1465
|
end
|
1423
|
-
|
1466
|
+
|
1424
1467
|
tree += "}"
|
1425
1468
|
end
|
1426
1469
|
end
|
1427
|
-
|
1470
|
+
|
1428
1471
|
# The State class is used to represent sets of items and actions to be
|
1429
1472
|
# used during parsing.
|
1430
1473
|
class State
|
1431
1474
|
# @return [Integer] State's ID.
|
1432
1475
|
attr_accessor :id
|
1433
|
-
|
1476
|
+
|
1434
1477
|
# @return [Array<CFG::Item>] Item objects that comprise this state
|
1435
1478
|
attr_reader :items
|
1436
|
-
|
1479
|
+
|
1437
1480
|
# @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
|
1438
1481
|
attr_reader :actions
|
1439
|
-
|
1482
|
+
|
1440
1483
|
# Instantiate a new State object.
|
1441
1484
|
#
|
1442
1485
|
# @param [Array<Symbol>] tokens Tokens that represent this state
|
@@ -1446,7 +1489,7 @@ module RLTK
|
|
1446
1489
|
@items = items
|
1447
1490
|
@actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
|
1448
1491
|
end
|
1449
|
-
|
1492
|
+
|
1450
1493
|
# Compare one State to another. Two States are equal if they
|
1451
1494
|
# have the same items or, if the items have been cleaned, if
|
1452
1495
|
# the States have the same ID.
|
@@ -1457,7 +1500,7 @@ module RLTK
|
|
1457
1500
|
def ==(other)
|
1458
1501
|
if self.items and other.items then self.items == other.items else self.id == other.id end
|
1459
1502
|
end
|
1460
|
-
|
1503
|
+
|
1461
1504
|
# Add a Reduce action to the state.
|
1462
1505
|
#
|
1463
1506
|
# @param [Production] production Production used to perform the reduction
|
@@ -1465,24 +1508,24 @@ module RLTK
|
|
1465
1508
|
# @return [void]
|
1466
1509
|
def add_reduction(production)
|
1467
1510
|
action = Reduce.new(production)
|
1468
|
-
|
1511
|
+
|
1469
1512
|
# Reduce actions are not allowed for the ERROR terminal.
|
1470
1513
|
@actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
|
1471
1514
|
end
|
1472
|
-
|
1515
|
+
|
1473
1516
|
# @param [CFG::Item] item Item to add to this state.
|
1474
1517
|
def append(item)
|
1475
1518
|
if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
|
1476
1519
|
end
|
1477
1520
|
alias :<< :append
|
1478
|
-
|
1521
|
+
|
1479
1522
|
# Clean this State by removing the list of {CFG::Item} objects.
|
1480
1523
|
#
|
1481
1524
|
# @return [void]
|
1482
1525
|
def clean
|
1483
1526
|
@items = nil
|
1484
1527
|
end
|
1485
|
-
|
1528
|
+
|
1486
1529
|
# Close this state using *productions*.
|
1487
1530
|
#
|
1488
1531
|
# @param [Array<CFG::Production>] productions Productions used to close this state.
|
@@ -1495,7 +1538,7 @@ module RLTK
|
|
1495
1538
|
end
|
1496
1539
|
end
|
1497
1540
|
end
|
1498
|
-
|
1541
|
+
|
1499
1542
|
# Checks to see if there is a conflict in this state, given a
|
1500
1543
|
# input of *sym*. Returns :SR if a shift/reduce conflict is
|
1501
1544
|
# detected and :RR if a reduce/reduce conflict is detected. If
|
@@ -1505,20 +1548,20 @@ module RLTK
|
|
1505
1548
|
#
|
1506
1549
|
# @return [:SR, :RR, nil]
|
1507
1550
|
def conflict_on?(sym)
|
1508
|
-
|
1551
|
+
|
1509
1552
|
reductions = 0
|
1510
1553
|
shifts = 0
|
1511
|
-
|
1554
|
+
|
1512
1555
|
@actions[sym].each do |action|
|
1513
1556
|
if action.is_a?(Reduce)
|
1514
1557
|
reductions += 1
|
1515
|
-
|
1558
|
+
|
1516
1559
|
elsif action.is_a?(Shift)
|
1517
1560
|
shifts += 1
|
1518
|
-
|
1561
|
+
|
1519
1562
|
end
|
1520
1563
|
end
|
1521
|
-
|
1564
|
+
|
1522
1565
|
if shifts == 1 and reductions > 0
|
1523
1566
|
:SR
|
1524
1567
|
elsif reductions > 1
|
@@ -1527,7 +1570,7 @@ module RLTK
|
|
1527
1570
|
nil
|
1528
1571
|
end
|
1529
1572
|
end
|
1530
|
-
|
1573
|
+
|
1531
1574
|
# Iterate over the state's items.
|
1532
1575
|
#
|
1533
1576
|
# @return [void]
|
@@ -1538,7 +1581,7 @@ module RLTK
|
|
1538
1581
|
current_item += 1
|
1539
1582
|
end
|
1540
1583
|
end
|
1541
|
-
|
1584
|
+
|
1542
1585
|
# Specify an Action to perform when the input token is *symbol*.
|
1543
1586
|
#
|
1544
1587
|
# @param [Symbol] symbol Symbol to add action for.
|
@@ -1552,7 +1595,7 @@ module RLTK
|
|
1552
1595
|
raise ParserConstructionException, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
|
1553
1596
|
end
|
1554
1597
|
end
|
1555
|
-
|
1598
|
+
|
1556
1599
|
# Returns that actions that should be taken when the input token
|
1557
1600
|
# is *symbol*.
|
1558
1601
|
#
|
@@ -1563,35 +1606,35 @@ module RLTK
|
|
1563
1606
|
@actions[symbol].clone
|
1564
1607
|
end
|
1565
1608
|
end
|
1566
|
-
|
1609
|
+
|
1567
1610
|
# A subclass of Proc that indicates how it should be passed arguments
|
1568
1611
|
# by the parser.
|
1569
1612
|
class ProdProc < Proc
|
1570
1613
|
# @return [:array, :splat] Method that should be used to pass arguments to this proc.
|
1571
1614
|
attr_reader :arg_type
|
1572
|
-
|
1615
|
+
|
1573
1616
|
# @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
|
1574
1617
|
attr_reader :selections
|
1575
|
-
|
1618
|
+
|
1576
1619
|
def initialize(arg_type = :splat, selections = [])
|
1577
1620
|
super()
|
1578
1621
|
@arg_type = arg_type
|
1579
1622
|
@selections = selections
|
1580
1623
|
end
|
1581
1624
|
end
|
1582
|
-
|
1625
|
+
|
1583
1626
|
# The Action class is used to indicate what action the parser should
|
1584
1627
|
# take given a current state and input token.
|
1585
1628
|
class Action
|
1586
1629
|
# @return [Integer] ID of this action.
|
1587
1630
|
attr_reader :id
|
1588
|
-
|
1631
|
+
|
1589
1632
|
# @param [Integer] id ID of this action.
|
1590
1633
|
def initialize(id = nil)
|
1591
1634
|
@id = id
|
1592
1635
|
end
|
1593
1636
|
end
|
1594
|
-
|
1637
|
+
|
1595
1638
|
# The Accept class indicates to the parser that it should accept the
|
1596
1639
|
# current parse tree.
|
1597
1640
|
class Accept < Action
|
@@ -1600,7 +1643,7 @@ module RLTK
|
|
1600
1643
|
"Accept"
|
1601
1644
|
end
|
1602
1645
|
end
|
1603
|
-
|
1646
|
+
|
1604
1647
|
# The GoTo class indicates to the parser that it should goto the state
|
1605
1648
|
# specified by GoTo.id.
|
1606
1649
|
class GoTo < Action
|
@@ -1609,24 +1652,24 @@ module RLTK
|
|
1609
1652
|
"GoTo #{self.id}"
|
1610
1653
|
end
|
1611
1654
|
end
|
1612
|
-
|
1655
|
+
|
1613
1656
|
# The Reduce class indicates to the parser that it should reduce the
|
1614
1657
|
# input stack by the rule specified by Reduce.id.
|
1615
1658
|
class Reduce < Action
|
1616
|
-
|
1659
|
+
|
1617
1660
|
# @param [Production] production Production to reduce by
|
1618
1661
|
def initialize(production)
|
1619
1662
|
super(production.id)
|
1620
|
-
|
1663
|
+
|
1621
1664
|
@production = production
|
1622
1665
|
end
|
1623
|
-
|
1666
|
+
|
1624
1667
|
# @return [String] String representation of this action.
|
1625
1668
|
def to_s
|
1626
1669
|
"Reduce by Production #{self.id} : #{@production}"
|
1627
1670
|
end
|
1628
1671
|
end
|
1629
|
-
|
1672
|
+
|
1630
1673
|
# The Shift class indicates to the parser that it should shift the
|
1631
1674
|
# current input token.
|
1632
1675
|
class Shift < Action
|