rltk 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +21 -22
- data/lib/rltk/ast.rb +185 -118
- data/lib/rltk/cfg.rb +157 -103
- data/lib/rltk/cg/basic_block.rb +19 -19
- data/lib/rltk/cg/bindings.rb +16 -16
- data/lib/rltk/cg/builder.rb +129 -129
- data/lib/rltk/cg/context.rb +7 -7
- data/lib/rltk/cg/contractor.rb +7 -7
- data/lib/rltk/cg/execution_engine.rb +30 -30
- data/lib/rltk/cg/function.rb +37 -37
- data/lib/rltk/cg/generated_bindings.rb +3932 -3932
- data/lib/rltk/cg/generic_value.rb +17 -17
- data/lib/rltk/cg/instruction.rb +116 -116
- data/lib/rltk/cg/llvm.rb +22 -22
- data/lib/rltk/cg/memory_buffer.rb +7 -7
- data/lib/rltk/cg/module.rb +73 -73
- data/lib/rltk/cg/pass_manager.rb +35 -35
- data/lib/rltk/cg/target.rb +41 -41
- data/lib/rltk/cg/triple.rb +7 -7
- data/lib/rltk/cg/type.rb +75 -75
- data/lib/rltk/cg/value.rb +161 -161
- data/lib/rltk/lexer.rb +57 -57
- data/lib/rltk/lexers/calculator.rb +7 -7
- data/lib/rltk/lexers/ebnf.rb +5 -5
- data/lib/rltk/parser.rb +338 -295
- data/lib/rltk/parsers/infix_calc.rb +7 -7
- data/lib/rltk/parsers/postfix_calc.rb +3 -3
- data/lib/rltk/parsers/prefix_calc.rb +3 -3
- data/lib/rltk/token.rb +13 -13
- data/lib/rltk/version.rb +6 -6
- data/test/cg/tc_basic_block.rb +17 -17
- data/test/cg/tc_control_flow.rb +41 -41
- data/test/cg/tc_function.rb +4 -4
- data/test/cg/tc_generic_value.rb +3 -3
- data/test/cg/tc_instruction.rb +53 -53
- data/test/cg/tc_math.rb +12 -12
- data/test/cg/tc_module.rb +14 -14
- data/test/cg/tc_transforms.rb +11 -11
- data/test/cg/tc_type.rb +12 -12
- data/test/cg/tc_value.rb +35 -35
- data/test/cg/ts_cg.rb +5 -5
- data/test/tc_ast.rb +137 -60
- data/test/tc_cfg.rb +34 -34
- data/test/tc_lexer.rb +42 -42
- data/test/tc_parser.rb +250 -173
- data/test/tc_token.rb +2 -2
- data/test/ts_rltk.rb +8 -8
- metadata +84 -85
- data/lib/rltk/cg/old_generated_bindings.rb +0 -6152
data/lib/rltk/lexer.rb
CHANGED
@@ -32,7 +32,7 @@ module RLTK
|
|
32
32
|
|
33
33
|
# @return [String]
|
34
34
|
attr_reader :remainder
|
35
|
-
|
35
|
+
|
36
36
|
# @param [Integer] stream_offset Offset from begnning of string.
|
37
37
|
# @param [Integer] line_number Number of newlines encountered so far.
|
38
38
|
# @param [Integer] line_offset Offset from beginning of line.
|
@@ -43,28 +43,28 @@ module RLTK
|
|
43
43
|
@line_offset = line_offset
|
44
44
|
@remainder = remainder
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
# @return [String] String representation of the error.
|
48
48
|
def to_s
|
49
49
|
"#{super()}: #{@remainder}"
|
50
50
|
end
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
# The Lexer class may be sub-classed to produce new lexers. These lexers
|
54
54
|
# have a lot of features, and are described in the main documentation.
|
55
55
|
class Lexer
|
56
|
-
|
56
|
+
|
57
57
|
# @return [Environment] Environment used by an instantiated lexer.
|
58
58
|
attr_reader :env
|
59
|
-
|
59
|
+
|
60
60
|
#################
|
61
61
|
# Class Methods #
|
62
62
|
#################
|
63
|
-
|
63
|
+
|
64
64
|
class << self
|
65
65
|
# @return [Symbol] State in which the lexer starts.
|
66
66
|
attr_reader :start_state
|
67
|
-
|
67
|
+
|
68
68
|
# Called when the Lexer class is sub-classed, it installes
|
69
69
|
# necessary instance class variables.
|
70
70
|
#
|
@@ -72,7 +72,7 @@ module RLTK
|
|
72
72
|
def inherited(klass)
|
73
73
|
klass.install_icvars
|
74
74
|
end
|
75
|
-
|
75
|
+
|
76
76
|
# Installs instance class varialbes into a class.
|
77
77
|
#
|
78
78
|
# @return [void]
|
@@ -81,7 +81,7 @@ module RLTK
|
|
81
81
|
@rules = Hash.new {|h,k| h[k] = Array.new}
|
82
82
|
@start_state = :default
|
83
83
|
end
|
84
|
-
|
84
|
+
|
85
85
|
# Lex *string*, using *env* as the environment. This method will
|
86
86
|
# return the array of tokens generated by the lexer with a token
|
87
87
|
# of type EOS (End of Stream) appended to the end.
|
@@ -94,21 +94,21 @@ module RLTK
|
|
94
94
|
def lex(string, file_name = nil, env = self::Environment.new(@start_state))
|
95
95
|
# Offset from start of stream.
|
96
96
|
stream_offset = 0
|
97
|
-
|
97
|
+
|
98
98
|
# Offset from the start of the line.
|
99
99
|
line_offset = 0
|
100
100
|
line_number = 1
|
101
|
-
|
101
|
+
|
102
102
|
# Empty token list.
|
103
103
|
tokens = Array.new
|
104
|
-
|
104
|
+
|
105
105
|
# The scanner.
|
106
106
|
scanner = StringScanner.new(string)
|
107
|
-
|
107
|
+
|
108
108
|
# Start scanning the input string.
|
109
109
|
until scanner.eos?
|
110
110
|
match = nil
|
111
|
-
|
111
|
+
|
112
112
|
# If the match_type is set to :longest all of the
|
113
113
|
# rules for the current state need to be scanned
|
114
114
|
# and the longest match returned. If the
|
@@ -119,30 +119,30 @@ module RLTK
|
|
119
119
|
if txt = scanner.check(rule.pattern)
|
120
120
|
if not match or match.first.length < txt.length
|
121
121
|
match = [txt, rule]
|
122
|
-
|
122
|
+
|
123
123
|
break if @match_type == :first
|
124
124
|
end
|
125
125
|
end
|
126
126
|
end
|
127
127
|
end
|
128
|
-
|
128
|
+
|
129
129
|
if match
|
130
130
|
rule = match.last
|
131
|
-
|
131
|
+
|
132
132
|
txt = scanner.scan(rule.pattern)
|
133
133
|
type, value = env.rule_exec(rule.pattern.match(txt), txt, &rule.action)
|
134
|
-
|
134
|
+
|
135
135
|
if type
|
136
136
|
pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
|
137
|
-
tokens << Token.new(type, value, pos)
|
137
|
+
tokens << Token.new(type, value, pos)
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
# Advance our stat counters.
|
141
141
|
stream_offset += txt.length
|
142
|
-
|
142
|
+
|
143
143
|
if (newlines = txt.count("\n")) > 0
|
144
144
|
line_number += newlines
|
145
|
-
line_offset
|
145
|
+
line_offset = txt.rpartition("\n").last.length
|
146
146
|
else
|
147
147
|
line_offset += txt.length()
|
148
148
|
end
|
@@ -151,10 +151,10 @@ module RLTK
|
|
151
151
|
raise(error, 'Unable to match string with any of the given rules')
|
152
152
|
end
|
153
153
|
end
|
154
|
-
|
154
|
+
|
155
155
|
return tokens << Token.new(:EOS)
|
156
156
|
end
|
157
|
-
|
157
|
+
|
158
158
|
# A wrapper function that calls {Lexer.lex} on the contents of a
|
159
159
|
# file.
|
160
160
|
#
|
@@ -165,7 +165,7 @@ module RLTK
|
|
165
165
|
def lex_file(file_name, env = self::Environment.new(@start_state))
|
166
166
|
File.open(file_name, 'r') { |f| self.lex(f.read, file_name, env) }
|
167
167
|
end
|
168
|
-
|
168
|
+
|
169
169
|
# Used to tell a lexer to use the first match found instead
|
170
170
|
# of the longest match found.
|
171
171
|
#
|
@@ -173,7 +173,7 @@ module RLTK
|
|
173
173
|
def match_first
|
174
174
|
@match_type = :first
|
175
175
|
end
|
176
|
-
|
176
|
+
|
177
177
|
# This method is used to define a new lexing rule. The
|
178
178
|
# first argument is the regular expression used to match
|
179
179
|
# substrings of the input. The second argument is the state
|
@@ -194,15 +194,15 @@ module RLTK
|
|
194
194
|
# If no action is given we will set it to an empty
|
195
195
|
# action.
|
196
196
|
action ||= Proc.new() {}
|
197
|
-
|
197
|
+
|
198
198
|
pattern = Regexp.new(pattern) if pattern.is_a?(String)
|
199
|
-
|
199
|
+
|
200
200
|
r = Rule.new(pattern, action, state, flags)
|
201
|
-
|
201
|
+
|
202
202
|
if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
|
203
203
|
end
|
204
204
|
alias :r :rule
|
205
|
-
|
205
|
+
|
206
206
|
# Changes the starting state of the lexer.
|
207
207
|
#
|
208
208
|
# @param [Symbol] state Starting state for this lexer.
|
@@ -212,17 +212,17 @@ module RLTK
|
|
212
212
|
@start_state = state
|
213
213
|
end
|
214
214
|
end
|
215
|
-
|
215
|
+
|
216
216
|
####################
|
217
217
|
# Instance Methods #
|
218
218
|
####################
|
219
|
-
|
219
|
+
|
220
220
|
# Instantiates a new lexer and creates an environment to be
|
221
221
|
# used for subsequent calls.
|
222
222
|
def initialize
|
223
223
|
@env = self.class::Environment.new(self.class.start_state)
|
224
224
|
end
|
225
|
-
|
225
|
+
|
226
226
|
# Lexes a string using the encapsulated environment.
|
227
227
|
#
|
228
228
|
# @param [String] string String to be lexed.
|
@@ -232,7 +232,7 @@ module RLTK
|
|
232
232
|
def lex(string, file_name = nil)
|
233
233
|
self.class.lex(string, file_name, @env)
|
234
234
|
end
|
235
|
-
|
235
|
+
|
236
236
|
# Lexes a file using the encapsulated environment.
|
237
237
|
#
|
238
238
|
# @param [String] file_name File to be lexed.
|
@@ -241,19 +241,19 @@ module RLTK
|
|
241
241
|
def lex_file(file_name)
|
242
242
|
self.class.lex_file(file_name, @env)
|
243
243
|
end
|
244
|
-
|
244
|
+
|
245
245
|
# All actions passed to LexerCore.rule are evaluated inside an
|
246
246
|
# instance of the Environment class or its subclass (which must have
|
247
247
|
# the same name). This class provides functions for manipulating
|
248
248
|
# lexer state and flags.
|
249
249
|
class Environment
|
250
|
-
|
250
|
+
|
251
251
|
# @return [Array<Symbol>] Flags currently set in this environment.
|
252
252
|
attr_reader :flags
|
253
|
-
|
253
|
+
|
254
254
|
# @return [Match] Match object generated by a rule's regular expression.
|
255
255
|
attr_accessor :match
|
256
|
-
|
256
|
+
|
257
257
|
# Instantiates a new Environment object.
|
258
258
|
#
|
259
259
|
# @param [Symbol] start_state Lexer's start state.
|
@@ -263,7 +263,7 @@ module RLTK
|
|
263
263
|
@match = match
|
264
264
|
@flags = Array.new
|
265
265
|
end
|
266
|
-
|
266
|
+
|
267
267
|
# This function will instance_exec a block for a rule after
|
268
268
|
# setting the match value.
|
269
269
|
#
|
@@ -272,28 +272,28 @@ module RLTK
|
|
272
272
|
# @param [Proc] block Block for matched rule.
|
273
273
|
def rule_exec(match, txt, &block)
|
274
274
|
self.match = match
|
275
|
-
|
275
|
+
|
276
276
|
self.instance_exec(txt, &block)
|
277
277
|
end
|
278
|
-
|
278
|
+
|
279
279
|
# Pops a state from the state stack.
|
280
280
|
#
|
281
281
|
# @return [void]
|
282
282
|
def pop_state
|
283
283
|
@state.pop
|
284
|
-
|
284
|
+
|
285
285
|
nil
|
286
286
|
end
|
287
|
-
|
287
|
+
|
288
288
|
# Pushes a new state onto the state stack.
|
289
289
|
#
|
290
290
|
# @return [void]
|
291
291
|
def push_state(state)
|
292
292
|
@state << state
|
293
|
-
|
293
|
+
|
294
294
|
nil
|
295
295
|
end
|
296
|
-
|
296
|
+
|
297
297
|
# Sets the value on the top of the state stack.
|
298
298
|
#
|
299
299
|
# @param [Symbol] state New state for the lexing environment.
|
@@ -301,15 +301,15 @@ module RLTK
|
|
301
301
|
# @return [void]
|
302
302
|
def set_state(state)
|
303
303
|
@state[-1] = state
|
304
|
-
|
304
|
+
|
305
305
|
nil
|
306
306
|
end
|
307
|
-
|
307
|
+
|
308
308
|
# @return [Symbol] Current state of the lexing environment.
|
309
309
|
def state
|
310
310
|
@state.last
|
311
311
|
end
|
312
|
-
|
312
|
+
|
313
313
|
# Sets a flag in the current environment.
|
314
314
|
#
|
315
315
|
# @param [Symbol] flag Flag to set as enabled.
|
@@ -319,10 +319,10 @@ module RLTK
|
|
319
319
|
if not @flags.include?(flag)
|
320
320
|
@flags << flag
|
321
321
|
end
|
322
|
-
|
322
|
+
|
323
323
|
nil
|
324
324
|
end
|
325
|
-
|
325
|
+
|
326
326
|
# Unsets a flag in the current environment.
|
327
327
|
#
|
328
328
|
# @param [Symbol] flag Flag to unset.
|
@@ -330,31 +330,31 @@ module RLTK
|
|
330
330
|
# @return [void]
|
331
331
|
def unset_flag(flag)
|
332
332
|
@flags.delete(flag)
|
333
|
-
|
333
|
+
|
334
334
|
nil
|
335
335
|
end
|
336
|
-
|
336
|
+
|
337
337
|
# Unsets all flags in the current environment.
|
338
338
|
#
|
339
339
|
# @return [void]
|
340
340
|
def clear_flags
|
341
341
|
@flags = Array.new
|
342
|
-
|
342
|
+
|
343
343
|
nil
|
344
344
|
end
|
345
345
|
end
|
346
|
-
|
346
|
+
|
347
347
|
# The Rule class is used simply for data encapsulation.
|
348
348
|
class Rule
|
349
349
|
# @return [Proc] Token producting action to be taken when this rule is matched.
|
350
350
|
attr_reader :action
|
351
|
-
|
351
|
+
|
352
352
|
# @return [Regexp] Regular expression for matching this rule.
|
353
353
|
attr_reader :pattern
|
354
|
-
|
354
|
+
|
355
355
|
# @return [Array<Symbol>] Flags currently set in this lexing environment.
|
356
356
|
attr_reader :flags
|
357
|
-
|
357
|
+
|
358
358
|
# Instantiates a new Rule object.
|
359
359
|
#
|
360
360
|
# @param [Regexp] pattern Regular expression used to match to this rule.
|
@@ -15,29 +15,29 @@ require 'rltk/lexer'
|
|
15
15
|
#######################
|
16
16
|
|
17
17
|
module RLTK
|
18
|
-
|
18
|
+
|
19
19
|
# The RLTK::Lexers module contains the lexers that are included as part of
|
20
20
|
# the RLKT project.
|
21
21
|
module Lexers
|
22
|
-
|
22
|
+
|
23
23
|
# The Calculator lexer is a simple lexer for use with several of the
|
24
24
|
# provided parsers.
|
25
25
|
class Calculator < Lexer
|
26
|
-
|
26
|
+
|
27
27
|
#################
|
28
28
|
# Default State #
|
29
29
|
#################
|
30
|
-
|
30
|
+
|
31
31
|
rule(/\+/) { :PLS }
|
32
32
|
rule(/-/) { :SUB }
|
33
33
|
rule(/\*/) { :MUL }
|
34
34
|
rule(/\//) { :DIV }
|
35
|
-
|
35
|
+
|
36
36
|
rule(/\(/) { :LPAREN }
|
37
37
|
rule(/\)/) { :RPAREN }
|
38
|
-
|
38
|
+
|
39
39
|
rule(/[0-9]+/) { |t| [:NUM, t.to_i] }
|
40
|
-
|
40
|
+
|
41
41
|
rule(/\s/)
|
42
42
|
end
|
43
43
|
end
|
data/lib/rltk/lexers/ebnf.rb
CHANGED
@@ -16,22 +16,22 @@ require 'rltk/lexer'
|
|
16
16
|
|
17
17
|
module RLTK
|
18
18
|
module Lexers
|
19
|
-
|
19
|
+
|
20
20
|
# The EBNF lexer is used by the RLTK::CFG class.
|
21
21
|
class EBNF < Lexer
|
22
|
-
|
22
|
+
|
23
23
|
#################
|
24
24
|
# Default State #
|
25
25
|
#################
|
26
|
-
|
26
|
+
|
27
27
|
rule(/\*/) { :STAR }
|
28
28
|
rule(/\+/) { :PLUS }
|
29
29
|
rule(/\?/) { :QUESTION }
|
30
30
|
rule(/\./) { :DOT }
|
31
|
-
|
31
|
+
|
32
32
|
rule(/[a-z0-9_']+/) { |t| [:NONTERM, t.to_sym] }
|
33
33
|
rule(/[A-Z0-9_']+/) { |t| [:TERM, t.to_sym] }
|
34
|
-
|
34
|
+
|
35
35
|
rule(/\s/)
|
36
36
|
end
|
37
37
|
end
|
data/lib/rltk/parser.rb
CHANGED
@@ -24,21 +24,21 @@ module RLTK
|
|
24
24
|
'Unexpected token. Token not present in grammar definition.'
|
25
25
|
end
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# A NotInLanguage error is raised whenever there is no valid parse tree
|
29
29
|
# for a given token stream. In other words, the input string is not in the
|
30
30
|
# defined language.
|
31
31
|
class NotInLanguage < StandardError
|
32
|
-
|
32
|
+
|
33
33
|
# @return [Array<Token>] List of tokens that have been successfully parsed
|
34
34
|
attr_reader :seen
|
35
|
-
|
35
|
+
|
36
36
|
# @return [Token] Token that caused the parser to stop
|
37
37
|
attr_reader :current
|
38
|
-
|
38
|
+
|
39
39
|
# @return [Array<Token>] List of tokens that have yet to be seen
|
40
40
|
attr_reader :remaining
|
41
|
-
|
41
|
+
|
42
42
|
# @param [Array<Token>] seen Tokens that have been successfully parsed
|
43
43
|
# @param [Token] current Token that caused the parser to stop
|
44
44
|
# @param [Array<Token>] remaining Tokens that have yet to be seen
|
@@ -47,25 +47,25 @@ module RLTK
|
|
47
47
|
@current = current
|
48
48
|
@remaining = remaining
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
# @return [String] String representation of the error.
|
52
52
|
def to_s
|
53
53
|
"String not in language. Token info:\n\tSeen: #{@seen}\n\tCurrent: #{@current}\n\tRemaining: #{@remaining}"
|
54
54
|
end
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
# An error of this type is raised when the parser encountered a error that
|
58
58
|
# was handled by an error production.
|
59
59
|
class HandledError < StandardError
|
60
|
-
|
60
|
+
|
61
61
|
# The errors as reported by the parser.
|
62
|
-
#
|
62
|
+
#
|
63
63
|
# @return [Array<Object>]
|
64
64
|
attr_reader :errors
|
65
|
-
|
65
|
+
|
66
66
|
# The result that would have been returned by the call to *parse*.
|
67
67
|
attr_reader :result
|
68
|
-
|
68
|
+
|
69
69
|
# Instantiate a new HandledError object with *errors*.
|
70
70
|
#
|
71
71
|
# @param [Array<Object>] errors Errors added to the parsing environment by calls to {Parser::Environment#error}.
|
@@ -75,49 +75,68 @@ module RLTK
|
|
75
75
|
@result = result
|
76
76
|
end
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
# Used for exceptions that occure during parser construction.
|
80
80
|
class ParserConstructionException < Exception; end
|
81
|
-
|
81
|
+
|
82
82
|
# Used for runtime exceptions that are the parsers fault. These should
|
83
83
|
# never be observed in the wild.
|
84
84
|
class InternalParserException < Exception; end
|
85
|
-
|
85
|
+
|
86
|
+
# Used to indicate that a parser is empty or hasn't been finalized.
|
87
|
+
class UselessParserException < Exception
|
88
|
+
# Sets the error messsage for this exception.
|
89
|
+
def initialize
|
90
|
+
super('Parser has not been finalized.')
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
86
94
|
# The Parser class may be sub-classed to produce new parsers. These
|
87
95
|
# parsers have a lot of features, and are described in the main
|
88
96
|
# documentation.
|
89
97
|
class Parser
|
90
98
|
# @return [Environment] Environment used by the instantiated parser.
|
91
99
|
attr_reader :env
|
92
|
-
|
100
|
+
|
93
101
|
#################
|
94
102
|
# Class Methods #
|
95
103
|
#################
|
96
|
-
|
104
|
+
|
97
105
|
class << self
|
106
|
+
# The overridden new prevents un-finalized parsers from being
|
107
|
+
# instantiated.
|
108
|
+
def new(*args)
|
109
|
+
if @symbols.nil?
|
110
|
+
raise UselessParserException
|
111
|
+
else
|
112
|
+
super(*args)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
98
116
|
# Installs instance class varialbes into a class.
|
99
117
|
#
|
100
118
|
# @return [void]
|
101
119
|
def install_icvars
|
102
120
|
@curr_lhs = nil
|
103
121
|
@curr_prec = nil
|
104
|
-
|
122
|
+
|
105
123
|
@conflicts = Hash.new {|h, k| h[k] = Array.new}
|
106
124
|
@grammar = CFG.new
|
107
|
-
|
125
|
+
|
108
126
|
@lh_sides = Hash.new
|
109
127
|
@procs = Array.new
|
110
128
|
@states = Array.new
|
111
|
-
|
129
|
+
|
112
130
|
# Variables for dealing with precedence.
|
113
131
|
@prec_counts = {:left => 0, :right => 0, :non => 0}
|
114
132
|
@production_precs = Array.new
|
115
133
|
@token_precs = Hash.new
|
116
|
-
|
134
|
+
@token_hooks = Hash.new {|h, k| h[k] = []}
|
135
|
+
|
117
136
|
# Set the default argument handling policy. Valid values
|
118
137
|
# are :array and :splat.
|
119
138
|
@default_arg_type = :splat
|
120
|
-
|
139
|
+
|
121
140
|
@grammar.callback do |type, which, p, sels = []|
|
122
141
|
@procs[p.id] = [
|
123
142
|
case type
|
@@ -126,35 +145,35 @@ module RLTK
|
|
126
145
|
when :empty then ProdProc.new { || nil }
|
127
146
|
else ProdProc.new { |o| o }
|
128
147
|
end
|
129
|
-
|
148
|
+
|
130
149
|
when :elp
|
131
150
|
case which
|
132
151
|
when :empty then ProdProc.new { || [] }
|
133
152
|
else ProdProc.new { |prime| prime }
|
134
153
|
end
|
135
|
-
|
154
|
+
|
136
155
|
when :nelp
|
137
156
|
case which
|
138
157
|
when :single
|
139
158
|
ProdProc.new { |el| [el] }
|
140
|
-
|
159
|
+
|
141
160
|
when :multiple
|
142
161
|
ProdProc.new(:splat, sels) do |*syms|
|
143
162
|
el = syms[1..-1]
|
144
163
|
syms.first << (el.length == 1 ? el.first : el)
|
145
164
|
end
|
146
|
-
|
165
|
+
|
147
166
|
else
|
148
167
|
ProdProc.new { |*el| el.length == 1 ? el.first : el }
|
149
168
|
end
|
150
169
|
end,
|
151
170
|
p.rhs.length
|
152
171
|
]
|
153
|
-
|
172
|
+
|
154
173
|
@production_precs[p.id] = p.last_terminal
|
155
174
|
end
|
156
175
|
end
|
157
|
-
|
176
|
+
|
158
177
|
# Called when the Lexer class is sub-classed, it installes
|
159
178
|
# necessary instance class variables.
|
160
179
|
#
|
@@ -162,7 +181,7 @@ module RLTK
|
|
162
181
|
def inherited(klass)
|
163
182
|
klass.install_icvars
|
164
183
|
end
|
165
|
-
|
184
|
+
|
166
185
|
# If *state* (or its equivalent) is not in the state list it is
|
167
186
|
# added and it's ID is returned. If there is already a state
|
168
187
|
# with the same items as *state* in the state list its ID is
|
@@ -176,13 +195,13 @@ module RLTK
|
|
176
195
|
id
|
177
196
|
else
|
178
197
|
state.id = @states.length
|
179
|
-
|
198
|
+
|
180
199
|
@states << state
|
181
|
-
|
200
|
+
|
182
201
|
@states.length - 1
|
183
202
|
end
|
184
203
|
end
|
185
|
-
|
204
|
+
|
186
205
|
# Build a hash with the default options for Parser.finalize
|
187
206
|
# and then update it with the values from *opts*.
|
188
207
|
#
|
@@ -191,7 +210,7 @@ module RLTK
|
|
191
210
|
# @return [Hash{Symbol => Object}]
|
192
211
|
def build_finalize_opts(opts)
|
193
212
|
opts[:explain] = self.get_io(opts[:explain])
|
194
|
-
|
213
|
+
|
195
214
|
{
|
196
215
|
explain: false,
|
197
216
|
lookahead: true,
|
@@ -200,7 +219,7 @@ module RLTK
|
|
200
219
|
}.update(opts)
|
201
220
|
end
|
202
221
|
private :build_finalize_opts
|
203
|
-
|
222
|
+
|
204
223
|
# Build a hash with the default options for Parser.parse and
|
205
224
|
# then update it with the values from *opts*.
|
206
225
|
#
|
@@ -210,7 +229,7 @@ module RLTK
|
|
210
229
|
def build_parse_opts(opts)
|
211
230
|
opts[:parse_tree] = self.get_io(opts[:parse_tree])
|
212
231
|
opts[:verbose] = self.get_io(opts[:verbose])
|
213
|
-
|
232
|
+
|
214
233
|
{
|
215
234
|
accept: :first,
|
216
235
|
env: self::Environment.new,
|
@@ -219,7 +238,7 @@ module RLTK
|
|
219
238
|
}.update(opts)
|
220
239
|
end
|
221
240
|
private :build_parse_opts
|
222
|
-
|
241
|
+
|
223
242
|
# This method is used to (surprise) check the sanity of the
|
224
243
|
# constructed parser. It checks to make sure all non-terminals
|
225
244
|
# used in the grammar definition appear on the left-hand side of
|
@@ -236,7 +255,7 @@ module RLTK
|
|
236
255
|
raise ParserConstructionException, "Non-terminal #{sym} does not appear on the left-hand side of any production."
|
237
256
|
end
|
238
257
|
end
|
239
|
-
|
258
|
+
|
240
259
|
# Check the actions in each state.
|
241
260
|
each_state do |state|
|
242
261
|
state.actions.each do |sym, actions|
|
@@ -247,14 +266,14 @@ module RLTK
|
|
247
266
|
if sym != :EOS
|
248
267
|
raise ParserConstructionException, "Accept action found for terminal #{sym} in state #{state.id}."
|
249
268
|
end
|
250
|
-
|
269
|
+
|
251
270
|
elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
|
252
271
|
raise ParserConstructionException, "Object of type #{action.class} found in actions for terminal " +
|
253
272
|
"#{sym} in state #{state.id}."
|
254
|
-
|
273
|
+
|
255
274
|
end
|
256
275
|
end
|
257
|
-
|
276
|
+
|
258
277
|
if (conflict = state.conflict_on?(sym))
|
259
278
|
self.inform_conflict(state.id, conflict, sym)
|
260
279
|
end
|
@@ -262,16 +281,16 @@ module RLTK
|
|
262
281
|
# Here we check actions for non-terminals.
|
263
282
|
if actions.length > 1
|
264
283
|
raise ParserConstructionException, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
|
265
|
-
|
284
|
+
|
266
285
|
elsif actions.length == 1 and not actions.first.is_a?(GoTo)
|
267
286
|
raise ParserConstructionException, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
|
268
|
-
|
287
|
+
|
269
288
|
end
|
270
289
|
end
|
271
290
|
end
|
272
291
|
end
|
273
292
|
end
|
274
|
-
|
293
|
+
|
275
294
|
# This method checks to see if the parser would be in parse state
|
276
295
|
# *dest* after starting in state *start* and reading *symbols*.
|
277
296
|
#
|
@@ -283,26 +302,26 @@ module RLTK
|
|
283
302
|
def check_reachability(start, dest, symbols)
|
284
303
|
path_exists = true
|
285
304
|
cur_state = start
|
286
|
-
|
305
|
+
|
287
306
|
symbols.each do |sym|
|
288
|
-
|
307
|
+
|
289
308
|
actions = @states[cur_state.id].on?(sym)
|
290
309
|
actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
|
291
|
-
|
310
|
+
|
292
311
|
if actions.empty?
|
293
312
|
path_exists = false
|
294
313
|
break
|
295
314
|
end
|
296
|
-
|
315
|
+
|
297
316
|
# There can only be one Shift action for terminals and
|
298
317
|
# one GoTo action for non-terminals, so we know the
|
299
318
|
# first action is the only one in the list.
|
300
319
|
cur_state = @states[actions.first.id]
|
301
320
|
end
|
302
|
-
|
321
|
+
|
303
322
|
path_exists and cur_state.id == dest.id
|
304
323
|
end
|
305
|
-
|
324
|
+
|
306
325
|
# Declares a new clause inside of a production. The right-hand
|
307
326
|
# side is specified by *expression* and the precedence of this
|
308
327
|
# production can be changed by setting the *precedence* argument
|
@@ -318,9 +337,9 @@ module RLTK
|
|
318
337
|
# Use the curr_prec only if it isn't overridden for this
|
319
338
|
# clause.
|
320
339
|
precedence ||= @curr_prec
|
321
|
-
|
340
|
+
|
322
341
|
production, selections = @grammar.clause(expression)
|
323
|
-
|
342
|
+
|
324
343
|
# Check to make sure the action's arity matches the number
|
325
344
|
# of symbols on the right-hand side.
|
326
345
|
expected_arity = (selections.empty? ? production.rhs.length : selections.length)
|
@@ -329,16 +348,16 @@ module RLTK
|
|
329
348
|
"Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
|
330
349
|
' Action arity must match the number of terminals and non-terminals in the clause.'
|
331
350
|
end
|
332
|
-
|
351
|
+
|
333
352
|
# Add the action to our proc list.
|
334
353
|
@procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
|
335
|
-
|
354
|
+
|
336
355
|
# If no precedence is specified use the precedence of the
|
337
356
|
# last terminal in the production.
|
338
357
|
@production_precs[production.id] = precedence || production.last_terminal
|
339
358
|
end
|
340
359
|
alias :c :clause
|
341
|
-
|
360
|
+
|
342
361
|
# Removes resources that were needed to generate the parser but
|
343
362
|
# aren't needed when actually parsing input.
|
344
363
|
#
|
@@ -346,23 +365,23 @@ module RLTK
|
|
346
365
|
def clean
|
347
366
|
# We've told the developer about conflicts by now.
|
348
367
|
@conflicts = nil
|
349
|
-
|
368
|
+
|
350
369
|
# Drop the grammar and the grammar'.
|
351
370
|
@grammar = nil
|
352
371
|
@grammar_prime = nil
|
353
|
-
|
372
|
+
|
354
373
|
# Drop precedence and bookkeeping information.
|
355
374
|
@cur_lhs = nil
|
356
375
|
@cur_prec = nil
|
357
|
-
|
376
|
+
|
358
377
|
@prec_counts = nil
|
359
378
|
@production_precs = nil
|
360
379
|
@token_precs = nil
|
361
|
-
|
380
|
+
|
362
381
|
# Drop the items from each of the states.
|
363
382
|
each_state { |state| state.clean }
|
364
383
|
end
|
365
|
-
|
384
|
+
|
366
385
|
# Set the default argument type for the actions associated with
|
367
386
|
# clauses. All actions defined after this call will be passed
|
368
387
|
# arguments in the way specified here, unless overridden in the
|
@@ -375,15 +394,15 @@ module RLTK
|
|
375
394
|
@default_arg_type = type if type == :array or type == :splat
|
376
395
|
end
|
377
396
|
alias :dat :default_arg_type
|
378
|
-
|
397
|
+
|
379
398
|
# Adds productions and actions for parsing empty lists.
|
380
399
|
#
|
381
400
|
# @see CFG#empty_list_production
|
382
401
|
def build_list_production(symbol, list_elements, separator = '')
|
383
402
|
@grammar.build_list_production(symbol, list_elements, separator)
|
384
403
|
end
|
385
|
-
alias :list :build_list_production
|
386
|
-
|
404
|
+
alias :list :build_list_production
|
405
|
+
|
387
406
|
# This function will print a description of the parser to the
|
388
407
|
# provided IO object.
|
389
408
|
#
|
@@ -396,127 +415,127 @@ module RLTK
|
|
396
415
|
io.puts('# Productions #')
|
397
416
|
io.puts('###############')
|
398
417
|
io.puts
|
399
|
-
|
418
|
+
|
400
419
|
max_id_length = @grammar.productions(:id).length.to_s.length
|
401
|
-
|
420
|
+
|
402
421
|
# Print the productions.
|
403
422
|
@grammar.productions.each do |sym, productions|
|
404
|
-
|
423
|
+
|
405
424
|
max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
|
406
|
-
|
425
|
+
|
407
426
|
productions.each do |production|
|
408
427
|
p_string = production.to_s
|
409
|
-
|
428
|
+
|
410
429
|
io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
|
411
|
-
|
430
|
+
|
412
431
|
if (prec = @production_precs[production.id])
|
413
432
|
io.print(' ' * (max_rhs_length - p_string.length))
|
414
433
|
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
415
434
|
end
|
416
|
-
|
435
|
+
|
417
436
|
io.puts
|
418
437
|
end
|
419
|
-
|
438
|
+
|
420
439
|
io.puts
|
421
440
|
end
|
422
|
-
|
441
|
+
|
423
442
|
io.puts('##########')
|
424
443
|
io.puts('# Tokens #')
|
425
444
|
io.puts('##########')
|
426
445
|
io.puts
|
427
|
-
|
446
|
+
|
428
447
|
max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
|
429
|
-
|
448
|
+
|
430
449
|
@grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
|
431
450
|
io.print("\t#{term}")
|
432
|
-
|
451
|
+
|
433
452
|
if (prec = @token_precs[term])
|
434
453
|
io.print(' ' * (max_token_len - term.length))
|
435
454
|
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
436
455
|
end
|
437
|
-
|
456
|
+
|
438
457
|
io.puts
|
439
458
|
end
|
440
|
-
|
459
|
+
|
441
460
|
io.puts
|
442
|
-
|
461
|
+
|
443
462
|
io.puts('#####################')
|
444
463
|
io.puts('# Table Information #')
|
445
464
|
io.puts('#####################')
|
446
465
|
io.puts
|
447
|
-
|
466
|
+
|
448
467
|
io.puts("\tStart symbol: #{@grammar.start_symbol}'")
|
449
468
|
io.puts
|
450
|
-
|
469
|
+
|
451
470
|
io.puts("\tTotal number of states: #{@states.length}")
|
452
471
|
io.puts
|
453
|
-
|
472
|
+
|
454
473
|
io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
|
455
474
|
io.puts
|
456
|
-
|
475
|
+
|
457
476
|
@conflicts.each do |state_id, conflicts|
|
458
477
|
io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
|
459
478
|
end
|
460
|
-
|
479
|
+
|
461
480
|
io.puts if not @conflicts.empty?
|
462
|
-
|
481
|
+
|
463
482
|
# Print the parse table.
|
464
483
|
io.puts('###############')
|
465
484
|
io.puts('# Parse Table #')
|
466
485
|
io.puts('###############')
|
467
486
|
io.puts
|
468
|
-
|
487
|
+
|
469
488
|
each_state do |state|
|
470
489
|
io.puts("State #{state.id}:")
|
471
490
|
io.puts
|
472
|
-
|
491
|
+
|
473
492
|
io.puts("\t# ITEMS #")
|
474
493
|
max = state.items.inject(0) do |max, item|
|
475
494
|
if item.lhs.to_s.length > max then item.lhs.to_s.length else max end
|
476
495
|
end
|
477
|
-
|
496
|
+
|
478
497
|
state.each do |item|
|
479
498
|
io.puts("\t#{item.to_s(max)}")
|
480
499
|
end
|
481
|
-
|
500
|
+
|
482
501
|
io.puts
|
483
502
|
io.puts("\t# ACTIONS #")
|
484
|
-
|
503
|
+
|
485
504
|
state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
|
486
505
|
state.actions[sym].each do |action|
|
487
506
|
io.puts("\tOn #{sym} #{action}")
|
488
507
|
end
|
489
508
|
end
|
490
|
-
|
509
|
+
|
491
510
|
io.puts
|
492
511
|
io.puts("\t# CONFLICTS #")
|
493
|
-
|
512
|
+
|
494
513
|
if @conflicts[state.id].length == 0
|
495
514
|
io.puts("\tNone\n\n")
|
496
515
|
else
|
497
516
|
@conflicts[state.id].each do |conflict|
|
498
517
|
type, sym = conflict
|
499
|
-
|
518
|
+
|
500
519
|
io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
|
501
|
-
|
520
|
+
|
502
521
|
io.puts(" on #{sym}")
|
503
522
|
end
|
504
|
-
|
523
|
+
|
505
524
|
io.puts
|
506
525
|
end
|
507
526
|
end
|
508
|
-
|
527
|
+
|
509
528
|
# Close any IO objects that aren't $stdout.
|
510
529
|
io.close if io.is_a?(IO) and io != $stdout
|
511
530
|
else
|
512
531
|
raise ParserConstructionException, 'Parser.explain called outside of finalize.'
|
513
532
|
end
|
514
533
|
end
|
515
|
-
|
534
|
+
|
516
535
|
# This method will finalize the parser causing the construction
|
517
536
|
# of states and their actions, and the resolution of conflicts
|
518
537
|
# using lookahead and precedence information.
|
519
|
-
#
|
538
|
+
#
|
520
539
|
# No calls to {Parser.production} may appear after the call to
|
521
540
|
# Parser.finalize.
|
522
541
|
#
|
@@ -529,58 +548,63 @@ module RLTK
|
|
529
548
|
#
|
530
549
|
# @return [void]
|
531
550
|
def finalize(opts = {})
|
532
|
-
|
551
|
+
|
552
|
+
if @grammar.productions.empty?
|
553
|
+
raise ParserConstructionException,
|
554
|
+
"Parser has no productions. Cowardly refusing to construct an empty parser."
|
555
|
+
end
|
556
|
+
|
533
557
|
# Get the full options hash.
|
534
558
|
opts = build_finalize_opts(opts)
|
535
|
-
|
559
|
+
|
536
560
|
# Get the name of the file in which the parser is defined.
|
537
561
|
#
|
538
562
|
# FIXME: See why this is failing for the simple ListParser example.
|
539
563
|
def_file = caller()[2].split(':')[0] if opts[:use]
|
540
|
-
|
564
|
+
|
541
565
|
# Check to make sure we can load the necessary information
|
542
566
|
# from the specified object.
|
543
567
|
if opts[:use] and (
|
544
568
|
(opts[:use].is_a?(String) and File.exists?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
|
545
569
|
(opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
|
546
570
|
)
|
547
|
-
|
571
|
+
|
548
572
|
file = self.get_io(opts[:use], 'r')
|
549
|
-
|
573
|
+
|
550
574
|
# Un-marshal our saved data structures.
|
551
575
|
file.flock(File::LOCK_SH)
|
552
576
|
@lh_sides, @states, @symbols = Marshal.load(file)
|
553
577
|
file.flock(File::LOCK_UN)
|
554
|
-
|
578
|
+
|
555
579
|
# Close the file if we opened it.
|
556
580
|
file.close if opts[:use].is_a?(String)
|
557
|
-
|
581
|
+
|
558
582
|
# Remove any un-needed data and return.
|
559
583
|
return self.clean
|
560
584
|
end
|
561
|
-
|
585
|
+
|
562
586
|
# Grab all of the symbols that comprise the grammar
|
563
587
|
# (besides the start symbol).
|
564
588
|
@symbols = @grammar.symbols << :ERROR
|
565
|
-
|
589
|
+
|
566
590
|
# Add our starting state to the state list.
|
567
591
|
@start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
|
568
592
|
start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
|
569
593
|
start_state = State.new(@symbols, [start_production.to_item])
|
570
|
-
|
594
|
+
|
571
595
|
start_state.close(@grammar.productions)
|
572
|
-
|
596
|
+
|
573
597
|
self.add_state(start_state)
|
574
|
-
|
598
|
+
|
575
599
|
# Translate the precedence of productions from tokens to
|
576
600
|
# (associativity, precedence) pairs.
|
577
601
|
@production_precs.map! { |prec| @token_precs[prec] }
|
578
|
-
|
602
|
+
|
579
603
|
# Build the rest of the transition table.
|
580
604
|
each_state do |state|
|
581
605
|
#Transition states.
|
582
606
|
tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
|
583
|
-
|
607
|
+
|
584
608
|
#Bin each item in this set into reachable transition
|
585
609
|
#states.
|
586
610
|
state.each do |item|
|
@@ -588,7 +612,7 @@ module RLTK
|
|
588
612
|
tstates[next_symbol] << item.copy
|
589
613
|
end
|
590
614
|
end
|
591
|
-
|
615
|
+
|
592
616
|
# For each transition state:
|
593
617
|
# 1) Get transition symbol
|
594
618
|
# 2) Advance dot
|
@@ -596,15 +620,15 @@ module RLTK
|
|
596
620
|
# 4) Get state id and add transition
|
597
621
|
tstates.each do |symbol, tstate|
|
598
622
|
tstate.each { |item| item.advance }
|
599
|
-
|
623
|
+
|
600
624
|
tstate.close(@grammar.productions)
|
601
|
-
|
625
|
+
|
602
626
|
id = self.add_state(tstate)
|
603
|
-
|
627
|
+
|
604
628
|
# Add Goto and Shift actions.
|
605
629
|
state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
|
606
630
|
end
|
607
|
-
|
631
|
+
|
608
632
|
# Find the Accept and Reduce actions for this state.
|
609
633
|
state.each do |item|
|
610
634
|
if item.at_end?
|
@@ -616,35 +640,35 @@ module RLTK
|
|
616
640
|
end
|
617
641
|
end
|
618
642
|
end
|
619
|
-
|
643
|
+
|
620
644
|
# Build the production.id -> production.lhs map.
|
621
645
|
@grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
|
622
|
-
|
646
|
+
|
623
647
|
# Prune the parsing table for unnecessary reduce actions.
|
624
648
|
self.prune(opts[:lookahead], opts[:precedence])
|
625
|
-
|
649
|
+
|
626
650
|
# Check the parser for inconsistencies.
|
627
651
|
self.check_sanity
|
628
|
-
|
652
|
+
|
629
653
|
# Print the table if requested.
|
630
654
|
self.explain(opts[:explain]) if opts[:explain]
|
631
|
-
|
655
|
+
|
632
656
|
# Remove any data that is no longer needed.
|
633
657
|
self.clean
|
634
|
-
|
658
|
+
|
635
659
|
# Store the parser's final data structures if requested.
|
636
660
|
if opts[:use]
|
637
661
|
io = self.get_io(opts[:use])
|
638
|
-
|
662
|
+
|
639
663
|
io.flock(File::LOCK_EX) if io.is_a?(File)
|
640
664
|
Marshal.dump([@lh_sides, @states, @symbols], io)
|
641
665
|
io.flock(File::LOCK_UN) if io.is_a?(File)
|
642
|
-
|
666
|
+
|
643
667
|
# Close the IO object if we opened it.
|
644
668
|
io.close if opts[:use].is_a?(String)
|
645
669
|
end
|
646
670
|
end
|
647
|
-
|
671
|
+
|
648
672
|
# Converts an object into an IO object as appropriate.
|
649
673
|
#
|
650
674
|
# @param [Object] o Object to be converted into an IO object.
|
@@ -662,7 +686,7 @@ module RLTK
|
|
662
686
|
false
|
663
687
|
end
|
664
688
|
end
|
665
|
-
|
689
|
+
|
666
690
|
# Iterate over the parser's states.
|
667
691
|
#
|
668
692
|
# @yieldparam [State] state One of the parser automaton's state objects
|
@@ -675,16 +699,16 @@ module RLTK
|
|
675
699
|
current_state += 1
|
676
700
|
end
|
677
701
|
end
|
678
|
-
|
702
|
+
|
679
703
|
# @return [CFG] The grammar that can be parsed by this Parser.
|
680
704
|
def grammar
|
681
705
|
@grammar.clone
|
682
706
|
end
|
683
|
-
|
707
|
+
|
684
708
|
# This method generates and memoizes the G' grammar used to
|
685
709
|
# calculate the LALR(1) lookahead sets. Information about this
|
686
710
|
# grammar and its use can be found in the following paper:
|
687
|
-
#
|
711
|
+
#
|
688
712
|
# Simple Computation of LALR(1) Lookahead Sets
|
689
713
|
# Manuel E. Bermudez and George Logothetis
|
690
714
|
# Information Processing Letters 31 - 1989
|
@@ -693,33 +717,33 @@ module RLTK
|
|
693
717
|
def grammar_prime
|
694
718
|
if not @grammar_prime
|
695
719
|
@grammar_prime = CFG.new
|
696
|
-
|
720
|
+
|
697
721
|
each_state do |state|
|
698
722
|
state.each do |item|
|
699
723
|
lhs = "#{state.id}_#{item.next_symbol}".to_sym
|
700
|
-
|
724
|
+
|
701
725
|
next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
|
702
|
-
|
726
|
+
|
703
727
|
@grammar.productions[item.next_symbol].each do |production|
|
704
728
|
rhs = ''
|
705
|
-
|
729
|
+
|
706
730
|
cstate = state
|
707
|
-
|
731
|
+
|
708
732
|
production.rhs.each do |symbol|
|
709
733
|
rhs += "#{cstate.id}_#{symbol} "
|
710
|
-
|
734
|
+
|
711
735
|
cstate = @states[cstate.on?(symbol).first.id]
|
712
736
|
end
|
713
|
-
|
737
|
+
|
714
738
|
@grammar_prime.production(lhs, rhs)
|
715
739
|
end
|
716
740
|
end
|
717
741
|
end
|
718
742
|
end
|
719
|
-
|
743
|
+
|
720
744
|
@grammar_prime
|
721
745
|
end
|
722
|
-
|
746
|
+
|
723
747
|
# Inform the parser core that a conflict has been detected.
|
724
748
|
#
|
725
749
|
# @param [Integer] state_id ID of the state where the conflict was encountered.
|
@@ -730,7 +754,7 @@ module RLTK
|
|
730
754
|
def inform_conflict(state_id, type, sym)
|
731
755
|
@conflicts[state_id] << [type, sym]
|
732
756
|
end
|
733
|
-
|
757
|
+
|
734
758
|
# This method is used to specify that the symbols in *symbols*
|
735
759
|
# are left-associative. Subsequent calls to this method will
|
736
760
|
# give their arguments higher precedence.
|
@@ -740,12 +764,12 @@ module RLTK
|
|
740
764
|
# @return [void]
|
741
765
|
def left(*symbols)
|
742
766
|
prec_level = @prec_counts[:left] += 1
|
743
|
-
|
767
|
+
|
744
768
|
symbols.map { |s| s.to_sym }.each do |sym|
|
745
769
|
@token_precs[sym] = [:left, prec_level]
|
746
770
|
end
|
747
771
|
end
|
748
|
-
|
772
|
+
|
749
773
|
# This method is used to specify that the symbols in *symbols*
|
750
774
|
# are non-associative.
|
751
775
|
#
|
@@ -754,25 +778,25 @@ module RLTK
|
|
754
778
|
# @return [void]
|
755
779
|
def nonassoc(*symbols)
|
756
780
|
prec_level = @prec_counts[:non] += 1
|
757
|
-
|
781
|
+
|
758
782
|
symbols.map { |s| s.to_sym }.each do |sym|
|
759
783
|
@token_precs[sym] = [:non, prec_level]
|
760
784
|
end
|
761
785
|
end
|
762
|
-
|
786
|
+
|
763
787
|
# Adds productions and actions for parsing nonempty lists.
|
764
788
|
#
|
765
789
|
# @see CFG#nonempty_list_production
|
766
790
|
def build_nonempty_list_production(symbol, list_elements, separator = '')
|
767
791
|
@grammar.build_nonempty_list_production(symbol, list_elements, separator)
|
768
792
|
end
|
769
|
-
alias :nonempty_list :build_nonempty_list_production
|
770
|
-
|
793
|
+
alias :nonempty_list :build_nonempty_list_production
|
794
|
+
|
771
795
|
# This function is where actual parsing takes place. The
|
772
796
|
# _tokens_ argument must be an array of Token objects, the last
|
773
797
|
# of which has type EOS. By default this method will return the
|
774
798
|
# value computed by the first successful parse tree found.
|
775
|
-
#
|
799
|
+
#
|
776
800
|
# Additional information about the parsing options can be found in
|
777
801
|
# the main documentation.
|
778
802
|
#
|
@@ -787,27 +811,27 @@ module RLTK
|
|
787
811
|
# @return [Object, Array<Object>] Result or results of parsing the given tokens.
|
788
812
|
def parse(tokens, opts = {})
|
789
813
|
# Get the full options hash.
|
790
|
-
opts
|
791
|
-
v
|
792
|
-
|
814
|
+
opts = build_parse_opts(opts)
|
815
|
+
v = opts[:verbose]
|
816
|
+
|
793
817
|
if opts[:verbose]
|
794
818
|
v.puts("Input tokens:")
|
795
819
|
v.puts(tokens.map { |t| t.type }.inspect)
|
796
820
|
v.puts
|
797
821
|
end
|
798
|
-
|
822
|
+
|
799
823
|
# Stack IDs to keep track of them during parsing.
|
800
824
|
stack_id = 0
|
801
|
-
|
825
|
+
|
802
826
|
# Error mode indicators.
|
803
827
|
error_mode = false
|
804
828
|
reduction_guard = false
|
805
|
-
|
829
|
+
|
806
830
|
# Our various list of stacks.
|
807
831
|
accepted = []
|
808
832
|
moving_on = []
|
809
833
|
processing = [ParseStack.new(stack_id += 1)]
|
810
|
-
|
834
|
+
|
811
835
|
# Iterate over the tokens. We don't procede to the
|
812
836
|
# next token until every stack is done with the
|
813
837
|
# current one.
|
@@ -815,33 +839,36 @@ module RLTK
|
|
815
839
|
# Check to make sure this token was seen in the
|
816
840
|
# grammar definition.
|
817
841
|
raise BadToken if not @symbols.include?(token.type)
|
818
|
-
|
842
|
+
|
819
843
|
v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
820
|
-
|
844
|
+
|
821
845
|
# Iterate over the stacks until each one is done.
|
822
846
|
while (stack = processing.shift)
|
847
|
+
# Execute any token hooks in this stack's environment.
|
848
|
+
@token_hooks[token.type].each { |hook| opts[:env].instance_exec &hook}
|
849
|
+
|
823
850
|
# Get the available actions for this stack.
|
824
851
|
actions = @states[stack.state].on?(token.type)
|
825
|
-
|
852
|
+
|
826
853
|
if actions.empty?
|
827
854
|
# If we are already in error mode and there
|
828
855
|
# are no actions we skip this token.
|
829
856
|
if error_mode
|
830
857
|
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
831
|
-
|
858
|
+
|
832
859
|
# Add the current token to the array
|
833
860
|
# that corresponds to the output value
|
834
861
|
# for the ERROR token.
|
835
862
|
stack.output_stack.last << token
|
836
|
-
|
863
|
+
|
837
864
|
moving_on << stack
|
838
865
|
next
|
839
866
|
end
|
840
|
-
|
867
|
+
|
841
868
|
# We would be dropping the last stack so we
|
842
869
|
# are going to go into error mode.
|
843
870
|
if accepted.empty? and moving_on.empty? and processing.empty?
|
844
|
-
|
871
|
+
|
845
872
|
if v
|
846
873
|
v.puts
|
847
874
|
v.puts('Current stack:')
|
@@ -850,7 +877,7 @@ module RLTK
|
|
850
877
|
v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
|
851
878
|
v.puts
|
852
879
|
end
|
853
|
-
|
880
|
+
|
854
881
|
# Try and find a valid error state.
|
855
882
|
while stack.state
|
856
883
|
if (actions = @states[stack.state].on?(:ERROR)).empty?
|
@@ -860,17 +887,17 @@ module RLTK
|
|
860
887
|
else
|
861
888
|
# Enter the found error state.
|
862
889
|
stack.push(actions.first.id, [token], :ERROR, token.position)
|
863
|
-
|
890
|
+
|
864
891
|
break
|
865
892
|
end
|
866
893
|
end
|
867
|
-
|
894
|
+
|
868
895
|
if stack.state
|
869
896
|
# We found a valid error state.
|
870
897
|
error_mode = reduction_guard = true
|
871
898
|
opts[:env].he = true
|
872
899
|
moving_on << stack
|
873
|
-
|
900
|
+
|
874
901
|
if v
|
875
902
|
v.puts('Invalid input encountered. Entering error handling mode.')
|
876
903
|
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}")
|
@@ -879,20 +906,20 @@ module RLTK
|
|
879
906
|
# No valid error states could be
|
880
907
|
# found. Time to print a message
|
881
908
|
# and leave.
|
882
|
-
|
909
|
+
|
883
910
|
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
884
911
|
end
|
885
912
|
else
|
886
913
|
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
887
914
|
end
|
888
|
-
|
915
|
+
|
889
916
|
next
|
890
917
|
end
|
891
|
-
|
918
|
+
|
892
919
|
# Make (stack, action) pairs, duplicating the
|
893
920
|
# stack as necessary.
|
894
921
|
pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
|
895
|
-
|
922
|
+
|
896
923
|
pairs.each do |stack, action|
|
897
924
|
if v
|
898
925
|
v.puts
|
@@ -903,127 +930,127 @@ module RLTK
|
|
903
930
|
v.puts
|
904
931
|
v.puts("Action taken: #{action.to_s}")
|
905
932
|
end
|
906
|
-
|
933
|
+
|
907
934
|
if action.is_a?(Accept)
|
908
935
|
if opts[:accept] == :all
|
909
936
|
accepted << stack
|
910
937
|
else
|
911
938
|
v.puts('Accepting input.') if v
|
912
939
|
opts[:parse_tree].puts(stack.tree) if opts[:parse_tree]
|
913
|
-
|
940
|
+
|
914
941
|
if opts[:env].he
|
915
942
|
raise HandledError.new(opts[:env].errors, stack.result)
|
916
943
|
else
|
917
944
|
return stack.result
|
918
945
|
end
|
919
946
|
end
|
920
|
-
|
947
|
+
|
921
948
|
elsif action.is_a?(Reduce)
|
922
949
|
# Get the production associated with this reduction.
|
923
950
|
production_proc, pop_size = @procs[action.id]
|
924
|
-
|
951
|
+
|
925
952
|
if not production_proc
|
926
953
|
raise InternalParserException, "No production #{action.id} found."
|
927
954
|
end
|
928
|
-
|
955
|
+
|
929
956
|
args, positions = stack.pop(pop_size)
|
930
957
|
opts[:env].set_positions(positions)
|
931
|
-
|
958
|
+
|
932
959
|
if not production_proc.selections.empty?
|
933
960
|
args = args.values_at(*production_proc.selections)
|
934
961
|
end
|
935
|
-
|
962
|
+
|
936
963
|
result =
|
937
964
|
if production_proc.arg_type == :array
|
938
965
|
opts[:env].instance_exec(args, &production_proc)
|
939
966
|
else
|
940
967
|
opts[:env].instance_exec(*args, &production_proc)
|
941
968
|
end
|
942
|
-
|
969
|
+
|
943
970
|
if (goto = @states[stack.state].on?(@lh_sides[action.id]).first)
|
944
|
-
|
971
|
+
|
945
972
|
v.puts("Going to state #{goto.id}.\n") if v
|
946
|
-
|
973
|
+
|
947
974
|
pos0 = nil
|
948
|
-
|
975
|
+
|
949
976
|
if args.empty?
|
950
977
|
# Empty productions need to be
|
951
978
|
# handled specially.
|
952
979
|
pos0 = stack.position
|
953
|
-
|
980
|
+
|
954
981
|
pos0.stream_offset += pos0.length + 1
|
955
982
|
pos0.line_offset += pos0.length + 1
|
956
|
-
|
983
|
+
|
957
984
|
pos0.length = 0
|
958
985
|
else
|
959
986
|
pos0 = opts[:env].pos( 0)
|
960
987
|
pos1 = opts[:env].pos(-1)
|
961
|
-
|
988
|
+
|
962
989
|
pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
|
963
990
|
end
|
964
|
-
|
991
|
+
|
965
992
|
stack.push(goto.id, result, @lh_sides[action.id], pos0)
|
966
993
|
else
|
967
994
|
raise InternalParserException, "No GoTo action found in state #{stack.state} " +
|
968
995
|
"after reducing by production #{action.id}"
|
969
996
|
end
|
970
|
-
|
997
|
+
|
971
998
|
# This stack is NOT ready for the next
|
972
999
|
# token.
|
973
1000
|
processing << stack
|
974
|
-
|
1001
|
+
|
975
1002
|
# Exit error mode if necessary.
|
976
1003
|
error_mode = false if error_mode and not reduction_guard
|
977
|
-
|
1004
|
+
|
978
1005
|
elsif action.is_a?(Shift)
|
979
1006
|
stack.push(action.id, token.value, token.type, token.position)
|
980
|
-
|
1007
|
+
|
981
1008
|
# This stack is ready for the next
|
982
1009
|
# token.
|
983
1010
|
moving_on << stack
|
984
|
-
|
1011
|
+
|
985
1012
|
# Exit error mode.
|
986
1013
|
error_mode = false
|
987
1014
|
end
|
988
1015
|
end
|
989
1016
|
end
|
990
|
-
|
1017
|
+
|
991
1018
|
v.puts("\n\n") if v
|
992
|
-
|
1019
|
+
|
993
1020
|
processing = moving_on
|
994
1021
|
moving_on = []
|
995
|
-
|
1022
|
+
|
996
1023
|
# If we don't have any active stacks at this point the
|
997
1024
|
# string isn't in the language.
|
998
1025
|
if opts[:accept] == :first and processing.length == 0
|
999
1026
|
v.close if v and v != $stdout
|
1000
1027
|
raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
|
1001
1028
|
end
|
1002
|
-
|
1029
|
+
|
1003
1030
|
reduction_guard = false
|
1004
1031
|
end
|
1005
|
-
|
1032
|
+
|
1006
1033
|
# If we have reached this point we are accepting all parse
|
1007
1034
|
# trees.
|
1008
1035
|
if v
|
1009
1036
|
v.puts("Accepting input with #{accepted.length} derivation(s).")
|
1010
|
-
|
1037
|
+
|
1011
1038
|
v.close if v != $stdout
|
1012
1039
|
end
|
1013
|
-
|
1040
|
+
|
1014
1041
|
accepted.each do |stack|
|
1015
1042
|
opts[:parse_tree].puts(stack.tree)
|
1016
1043
|
end if opts[:parse_tree]
|
1017
|
-
|
1044
|
+
|
1018
1045
|
results = accepted.map { |stack| stack.result }
|
1019
|
-
|
1046
|
+
|
1020
1047
|
if opts[:env].he
|
1021
1048
|
raise HandledError.new(opts[:env].errors, results)
|
1022
1049
|
else
|
1023
1050
|
return results
|
1024
1051
|
end
|
1025
1052
|
end
|
1026
|
-
|
1053
|
+
|
1027
1054
|
# Adds a new production to the parser with a left-hand value of
|
1028
1055
|
# *symbol*. If *expression* is specified it is taken as the
|
1029
1056
|
# right-hand side of the production and *action* is associated
|
@@ -1040,34 +1067,34 @@ module RLTK
|
|
1040
1067
|
#
|
1041
1068
|
# @return [void]
|
1042
1069
|
def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
|
1043
|
-
|
1070
|
+
|
1044
1071
|
# Check the symbol.
|
1045
1072
|
if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
|
1046
1073
|
raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
|
1047
1074
|
end
|
1048
|
-
|
1075
|
+
|
1049
1076
|
@grammar.curr_lhs = symbol.to_sym
|
1050
1077
|
@curr_prec = precedence
|
1051
|
-
|
1078
|
+
|
1052
1079
|
orig_dat = nil
|
1053
1080
|
if arg_type != @default_arg_type
|
1054
1081
|
orig_dat = @default_arg_type
|
1055
1082
|
@default_arg_type = arg_type
|
1056
1083
|
end
|
1057
|
-
|
1084
|
+
|
1058
1085
|
if expression
|
1059
1086
|
self.clause(expression, precedence, &action)
|
1060
1087
|
else
|
1061
1088
|
self.instance_exec(&action)
|
1062
1089
|
end
|
1063
|
-
|
1090
|
+
|
1064
1091
|
@default_arg_type = orig_dat if not orig_dat.nil?
|
1065
|
-
|
1092
|
+
|
1066
1093
|
@grammar.curr_lhs = nil
|
1067
1094
|
@curr_prec = nil
|
1068
1095
|
end
|
1069
1096
|
alias :p :production
|
1070
|
-
|
1097
|
+
|
1071
1098
|
# This method uses lookahead sets and precedence information to
|
1072
1099
|
# resolve conflicts and remove unnecessary reduce actions.
|
1073
1100
|
#
|
@@ -1077,41 +1104,41 @@ module RLTK
|
|
1077
1104
|
# @return [void]
|
1078
1105
|
def prune(do_lookahead, do_precedence)
|
1079
1106
|
terms = @grammar.terms
|
1080
|
-
|
1107
|
+
|
1081
1108
|
# If both options are false there is no pruning to do.
|
1082
1109
|
return if not (do_lookahead or do_precedence)
|
1083
|
-
|
1110
|
+
|
1084
1111
|
each_state do |state0|
|
1085
|
-
|
1112
|
+
|
1086
1113
|
#####################
|
1087
1114
|
# Lookahead Pruning #
|
1088
1115
|
#####################
|
1089
|
-
|
1116
|
+
|
1090
1117
|
if do_lookahead
|
1091
1118
|
# Find all of the reductions in this state.
|
1092
1119
|
reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
|
1093
|
-
|
1120
|
+
|
1094
1121
|
reductions.each do |reduction|
|
1095
1122
|
production = @grammar.productions(:id)[reduction.id]
|
1096
|
-
|
1123
|
+
|
1097
1124
|
lookahead = Array.new
|
1098
|
-
|
1125
|
+
|
1099
1126
|
# Build the lookahead set.
|
1100
1127
|
each_state do |state1|
|
1101
1128
|
if self.check_reachability(state1, state0, production.rhs)
|
1102
1129
|
lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
|
1103
1130
|
end
|
1104
1131
|
end
|
1105
|
-
|
1132
|
+
|
1106
1133
|
# Translate the G' follow symbols into G
|
1107
1134
|
# lookahead symbols.
|
1108
1135
|
lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
|
1109
|
-
|
1136
|
+
|
1110
1137
|
# Here we remove the unnecessary reductions.
|
1111
1138
|
# If there are error productions we need to
|
1112
1139
|
# scale back the amount of pruning done.
|
1113
1140
|
pruning_candidates = terms - lookahead
|
1114
|
-
|
1141
|
+
|
1115
1142
|
if terms.include?(:ERROR)
|
1116
1143
|
pruning_candidates.each do |sym|
|
1117
1144
|
state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
|
@@ -1121,23 +1148,23 @@ module RLTK
|
|
1121
1148
|
end
|
1122
1149
|
end
|
1123
1150
|
end
|
1124
|
-
|
1151
|
+
|
1125
1152
|
########################################
|
1126
1153
|
# Precedence and Associativity Pruning #
|
1127
1154
|
########################################
|
1128
|
-
|
1155
|
+
|
1129
1156
|
if do_precedence
|
1130
1157
|
state0.actions.each do |symbol, actions|
|
1131
|
-
|
1158
|
+
|
1132
1159
|
# We are only interested in pruning actions
|
1133
1160
|
# for terminal symbols.
|
1134
1161
|
next unless CFG::is_terminal?(symbol)
|
1135
|
-
|
1136
|
-
# Skip to the next one if there is no
|
1162
|
+
|
1163
|
+
# Skip to the next one if there is no
|
1137
1164
|
# possibility of a Shift/Reduce or
|
1138
1165
|
# Reduce/Reduce conflict.
|
1139
1166
|
next unless actions and actions.length > 1
|
1140
|
-
|
1167
|
+
|
1141
1168
|
resolve_ok = actions.inject(true) do |m, a|
|
1142
1169
|
if a.is_a?(Reduce)
|
1143
1170
|
m and @production_precs[a.id]
|
@@ -1145,18 +1172,18 @@ module RLTK
|
|
1145
1172
|
m
|
1146
1173
|
end
|
1147
1174
|
end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
|
1148
|
-
|
1175
|
+
|
1149
1176
|
if @token_precs[symbol] and resolve_ok
|
1150
1177
|
max_prec = 0
|
1151
1178
|
selected_action = nil
|
1152
|
-
|
1179
|
+
|
1153
1180
|
# Grab the associativity and precedence
|
1154
1181
|
# for the input token.
|
1155
1182
|
tassoc, tprec = @token_precs[symbol]
|
1156
|
-
|
1183
|
+
|
1157
1184
|
actions.each do |a|
|
1158
1185
|
assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
|
1159
|
-
|
1186
|
+
|
1160
1187
|
# If two actions have the same precedence we
|
1161
1188
|
# will only replace the previous production if:
|
1162
1189
|
# * The token is left associative and the current action is a Reduce
|
@@ -1164,20 +1191,20 @@ module RLTK
|
|
1164
1191
|
if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
|
1165
1192
|
max_prec = prec
|
1166
1193
|
selected_action = a
|
1167
|
-
|
1194
|
+
|
1168
1195
|
elsif prec == max_prec and assoc == :nonassoc
|
1169
1196
|
raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
|
1170
|
-
|
1197
|
+
|
1171
1198
|
end
|
1172
1199
|
end
|
1173
|
-
|
1200
|
+
|
1174
1201
|
state0.actions[symbol] = [selected_action]
|
1175
1202
|
end
|
1176
1203
|
end
|
1177
1204
|
end
|
1178
1205
|
end
|
1179
1206
|
end
|
1180
|
-
|
1207
|
+
|
1181
1208
|
# This method is used to specify that the symbols in _symbols_
|
1182
1209
|
# are right associative. Subsequent calls to this method will
|
1183
1210
|
# give their arguments higher precedence.
|
@@ -1187,12 +1214,12 @@ module RLTK
|
|
1187
1214
|
# @return [void]
|
1188
1215
|
def right(*symbols)
|
1189
1216
|
prec_level = @prec_counts[:right] += 1
|
1190
|
-
|
1217
|
+
|
1191
1218
|
symbols.map { |s| s.to_sym }.each do |sym|
|
1192
1219
|
@token_precs[sym] = [:right, prec_level]
|
1193
1220
|
end
|
1194
1221
|
end
|
1195
|
-
|
1222
|
+
|
1196
1223
|
# Changes the starting symbol of the parser.
|
1197
1224
|
#
|
1198
1225
|
# @param [Symbol] symbol The starting symbol of the grammar.
|
@@ -1201,27 +1228,43 @@ module RLTK
|
|
1201
1228
|
def start(symbol)
|
1202
1229
|
@grammar.start symbol
|
1203
1230
|
end
|
1231
|
+
|
1232
|
+
# Add a hook that is executed whenever *sym* is seen.
|
1233
|
+
#
|
1234
|
+
# The *sym* must be a terminal symbol.
|
1235
|
+
#
|
1236
|
+
# @param [Symbol] sym Symbol to hook into
|
1237
|
+
# @param [Proc] proc Code to execute when the block is seen
|
1238
|
+
#
|
1239
|
+
# @return [void]
|
1240
|
+
def token_hook(sym, &proc)
|
1241
|
+
if CFG::is_terminal?(sym)
|
1242
|
+
@token_hooks[sym] << proc
|
1243
|
+
else
|
1244
|
+
raise 'Method token_hook expects `sym` to be non-terminal.'
|
1245
|
+
end
|
1246
|
+
end
|
1204
1247
|
end
|
1205
|
-
|
1248
|
+
|
1206
1249
|
####################
|
1207
1250
|
# Instance Methods #
|
1208
1251
|
####################
|
1209
|
-
|
1252
|
+
|
1210
1253
|
# Instantiates a new parser and creates an environment to be
|
1211
1254
|
# used for subsequent calls.
|
1212
1255
|
def initialize
|
1213
1256
|
@env = self.class::Environment.new
|
1214
1257
|
end
|
1215
|
-
|
1258
|
+
|
1216
1259
|
# Parses the given token stream using the encapsulated environment.
|
1217
1260
|
#
|
1218
1261
|
# @see .parse
|
1219
1262
|
def parse(tokens, opts = {})
|
1220
1263
|
self.class.parse(tokens, {:env => @env}.update(opts))
|
1221
1264
|
end
|
1222
|
-
|
1265
|
+
|
1223
1266
|
################################
|
1224
|
-
|
1267
|
+
|
1225
1268
|
# All actions passed to Parser.producation and Parser.clause are
|
1226
1269
|
# evaluated inside an instance of the Environment class or its
|
1227
1270
|
# subclass (which must have the same name).
|
@@ -1230,24 +1273,24 @@ module RLTK
|
|
1230
1273
|
#
|
1231
1274
|
# @return [Boolean]
|
1232
1275
|
attr_accessor :he
|
1233
|
-
|
1276
|
+
|
1234
1277
|
# A list of all objects added using the *error* method.
|
1235
1278
|
#
|
1236
1279
|
# @return [Array<Object>]
|
1237
1280
|
attr_reader :errors
|
1238
|
-
|
1281
|
+
|
1239
1282
|
# Instantiate a new Environment object.
|
1240
1283
|
def initialize
|
1241
1284
|
self.reset
|
1242
1285
|
end
|
1243
|
-
|
1286
|
+
|
1244
1287
|
# Adds an object to the list of errors.
|
1245
1288
|
#
|
1246
1289
|
# @return [void]
|
1247
1290
|
def error(o)
|
1248
1291
|
@errors << o
|
1249
1292
|
end
|
1250
|
-
|
1293
|
+
|
1251
1294
|
# Returns a StreamPosition object for the symbol at location n,
|
1252
1295
|
# indexed from zero.
|
1253
1296
|
#
|
@@ -1257,7 +1300,7 @@ module RLTK
|
|
1257
1300
|
def pos(n)
|
1258
1301
|
@positions[n]
|
1259
1302
|
end
|
1260
|
-
|
1303
|
+
|
1261
1304
|
# Reset any variables that need to be re-initialized between
|
1262
1305
|
# parse calls.
|
1263
1306
|
#
|
@@ -1266,7 +1309,7 @@ module RLTK
|
|
1266
1309
|
@errors = Array.new
|
1267
1310
|
@he = false
|
1268
1311
|
end
|
1269
|
-
|
1312
|
+
|
1270
1313
|
# Setter for the *positions* array.
|
1271
1314
|
#
|
1272
1315
|
# @param [Array<StreamPosition>] positions
|
@@ -1276,19 +1319,19 @@ module RLTK
|
|
1276
1319
|
@positions = positions
|
1277
1320
|
end
|
1278
1321
|
end
|
1279
|
-
|
1322
|
+
|
1280
1323
|
# The ParseStack class is used by a Parser to keep track of state
|
1281
1324
|
# during parsing.
|
1282
1325
|
class ParseStack
|
1283
1326
|
# @return [Integer] ID of this parse stack.
|
1284
1327
|
attr_reader :id
|
1285
|
-
|
1328
|
+
|
1286
1329
|
# @return [Array<Object>] Array of objects produced by {Reduce} actions.
|
1287
1330
|
attr_reader :output_stack
|
1288
|
-
|
1331
|
+
|
1289
1332
|
# @return [Array<Integer>] Array of states used when performing {Reduce} actions.
|
1290
1333
|
attr_reader :state_stack
|
1291
|
-
|
1334
|
+
|
1292
1335
|
# Instantiate a new ParserStack object.
|
1293
1336
|
#
|
1294
1337
|
# @param [Integer] id ID for this parse stack. Used by GLR algorithm.
|
@@ -1300,16 +1343,16 @@ module RLTK
|
|
1300
1343
|
# @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
|
1301
1344
|
def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
|
1302
1345
|
@id = id
|
1303
|
-
|
1346
|
+
|
1304
1347
|
@node_stack = nstack
|
1305
1348
|
@output_stack = ostack
|
1306
1349
|
@state_stack = sstack
|
1307
|
-
|
1350
|
+
|
1308
1351
|
@connections = connections
|
1309
1352
|
@labels = labels
|
1310
1353
|
@positions = positions
|
1311
1354
|
end
|
1312
|
-
|
1355
|
+
|
1313
1356
|
# Branch this stack, effectively creating a new copy of its
|
1314
1357
|
# internal state.
|
1315
1358
|
#
|
@@ -1327,16 +1370,16 @@ module RLTK
|
|
1327
1370
|
# Check to see if we can obtain a deep copy.
|
1328
1371
|
if 0.respond_to?(:copy)
|
1329
1372
|
o.copy
|
1330
|
-
|
1373
|
+
|
1331
1374
|
else
|
1332
1375
|
begin o.clone rescue o end
|
1333
1376
|
end
|
1334
1377
|
end
|
1335
|
-
|
1378
|
+
|
1336
1379
|
ParseStack.new(new_id, new_output_stack, @state_stack.clone,
|
1337
1380
|
@node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
|
1338
1381
|
end
|
1339
|
-
|
1382
|
+
|
1340
1383
|
# @return [StreamPosition] Position data for the last symbol on the stack.
|
1341
1384
|
def position
|
1342
1385
|
if @positions.empty?
|
@@ -1345,7 +1388,7 @@ module RLTK
|
|
1345
1388
|
@positions.last.clone
|
1346
1389
|
end
|
1347
1390
|
end
|
1348
|
-
|
1391
|
+
|
1349
1392
|
# Push new state and other information onto the stack.
|
1350
1393
|
#
|
1351
1394
|
# @param [Integer] state ID of the shifted state.
|
@@ -1360,14 +1403,14 @@ module RLTK
|
|
1360
1403
|
@node_stack << @labels.length
|
1361
1404
|
@labels << if CFG::is_terminal?(node0) and o then node0.to_s + "(#{o})" else node0 end
|
1362
1405
|
@positions << position
|
1363
|
-
|
1406
|
+
|
1364
1407
|
if CFG::is_nonterminal?(node0)
|
1365
1408
|
@cbuffer.each do |node1|
|
1366
1409
|
@connections << [@labels.length - 1, node1]
|
1367
1410
|
end
|
1368
1411
|
end
|
1369
1412
|
end
|
1370
|
-
|
1413
|
+
|
1371
1414
|
# Pop some number of objects off of the inside stacks.
|
1372
1415
|
#
|
1373
1416
|
# @param [Integer] n Number of object to pop off the stack.
|
@@ -1375,15 +1418,15 @@ module RLTK
|
|
1375
1418
|
# @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
|
1376
1419
|
def pop(n = 1)
|
1377
1420
|
@state_stack.pop(n)
|
1378
|
-
|
1421
|
+
|
1379
1422
|
# Pop the node stack so that the proper edges can be added
|
1380
1423
|
# when the production's left-hand side non-terminal is
|
1381
1424
|
# pushed onto the stack.
|
1382
1425
|
@cbuffer = @node_stack.pop(n)
|
1383
|
-
|
1426
|
+
|
1384
1427
|
[@output_stack.pop(n), @positions.pop(n)]
|
1385
1428
|
end
|
1386
|
-
|
1429
|
+
|
1387
1430
|
# Fetch the result stored in this ParseStack. If there is more
|
1388
1431
|
# than one object left on the output stack there is an error.
|
1389
1432
|
#
|
@@ -1395,48 +1438,48 @@ module RLTK
|
|
1395
1438
|
raise InternalParserException, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
|
1396
1439
|
end
|
1397
1440
|
end
|
1398
|
-
|
1441
|
+
|
1399
1442
|
# @return [Integer] Current state of this ParseStack.
|
1400
1443
|
def state
|
1401
1444
|
@state_stack.last
|
1402
1445
|
end
|
1403
|
-
|
1446
|
+
|
1404
1447
|
# @return [String] Representation of the parse tree in the DOT langauge.
|
1405
1448
|
def tree
|
1406
1449
|
tree = "digraph tree#{@id} {\n"
|
1407
|
-
|
1450
|
+
|
1408
1451
|
@labels.each_with_index do |label, i|
|
1409
1452
|
tree += "\tnode#{i} [label=\"#{label}\""
|
1410
|
-
|
1453
|
+
|
1411
1454
|
if CFG::is_terminal?(label)
|
1412
1455
|
tree += " shape=box"
|
1413
1456
|
end
|
1414
|
-
|
1457
|
+
|
1415
1458
|
tree += "];\n"
|
1416
1459
|
end
|
1417
|
-
|
1460
|
+
|
1418
1461
|
tree += "\n"
|
1419
|
-
|
1462
|
+
|
1420
1463
|
@connections.each do |from, to|
|
1421
1464
|
tree += "\tnode#{from} -> node#{to};\n"
|
1422
1465
|
end
|
1423
|
-
|
1466
|
+
|
1424
1467
|
tree += "}"
|
1425
1468
|
end
|
1426
1469
|
end
|
1427
|
-
|
1470
|
+
|
1428
1471
|
# The State class is used to represent sets of items and actions to be
|
1429
1472
|
# used during parsing.
|
1430
1473
|
class State
|
1431
1474
|
# @return [Integer] State's ID.
|
1432
1475
|
attr_accessor :id
|
1433
|
-
|
1476
|
+
|
1434
1477
|
# @return [Array<CFG::Item>] Item objects that comprise this state
|
1435
1478
|
attr_reader :items
|
1436
|
-
|
1479
|
+
|
1437
1480
|
# @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
|
1438
1481
|
attr_reader :actions
|
1439
|
-
|
1482
|
+
|
1440
1483
|
# Instantiate a new State object.
|
1441
1484
|
#
|
1442
1485
|
# @param [Array<Symbol>] tokens Tokens that represent this state
|
@@ -1446,7 +1489,7 @@ module RLTK
|
|
1446
1489
|
@items = items
|
1447
1490
|
@actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
|
1448
1491
|
end
|
1449
|
-
|
1492
|
+
|
1450
1493
|
# Compare one State to another. Two States are equal if they
|
1451
1494
|
# have the same items or, if the items have been cleaned, if
|
1452
1495
|
# the States have the same ID.
|
@@ -1457,7 +1500,7 @@ module RLTK
|
|
1457
1500
|
def ==(other)
|
1458
1501
|
if self.items and other.items then self.items == other.items else self.id == other.id end
|
1459
1502
|
end
|
1460
|
-
|
1503
|
+
|
1461
1504
|
# Add a Reduce action to the state.
|
1462
1505
|
#
|
1463
1506
|
# @param [Production] production Production used to perform the reduction
|
@@ -1465,24 +1508,24 @@ module RLTK
|
|
1465
1508
|
# @return [void]
|
1466
1509
|
def add_reduction(production)
|
1467
1510
|
action = Reduce.new(production)
|
1468
|
-
|
1511
|
+
|
1469
1512
|
# Reduce actions are not allowed for the ERROR terminal.
|
1470
1513
|
@actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
|
1471
1514
|
end
|
1472
|
-
|
1515
|
+
|
1473
1516
|
# @param [CFG::Item] item Item to add to this state.
|
1474
1517
|
def append(item)
|
1475
1518
|
if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
|
1476
1519
|
end
|
1477
1520
|
alias :<< :append
|
1478
|
-
|
1521
|
+
|
1479
1522
|
# Clean this State by removing the list of {CFG::Item} objects.
|
1480
1523
|
#
|
1481
1524
|
# @return [void]
|
1482
1525
|
def clean
|
1483
1526
|
@items = nil
|
1484
1527
|
end
|
1485
|
-
|
1528
|
+
|
1486
1529
|
# Close this state using *productions*.
|
1487
1530
|
#
|
1488
1531
|
# @param [Array<CFG::Production>] productions Productions used to close this state.
|
@@ -1495,7 +1538,7 @@ module RLTK
|
|
1495
1538
|
end
|
1496
1539
|
end
|
1497
1540
|
end
|
1498
|
-
|
1541
|
+
|
1499
1542
|
# Checks to see if there is a conflict in this state, given a
|
1500
1543
|
# input of *sym*. Returns :SR if a shift/reduce conflict is
|
1501
1544
|
# detected and :RR if a reduce/reduce conflict is detected. If
|
@@ -1505,20 +1548,20 @@ module RLTK
|
|
1505
1548
|
#
|
1506
1549
|
# @return [:SR, :RR, nil]
|
1507
1550
|
def conflict_on?(sym)
|
1508
|
-
|
1551
|
+
|
1509
1552
|
reductions = 0
|
1510
1553
|
shifts = 0
|
1511
|
-
|
1554
|
+
|
1512
1555
|
@actions[sym].each do |action|
|
1513
1556
|
if action.is_a?(Reduce)
|
1514
1557
|
reductions += 1
|
1515
|
-
|
1558
|
+
|
1516
1559
|
elsif action.is_a?(Shift)
|
1517
1560
|
shifts += 1
|
1518
|
-
|
1561
|
+
|
1519
1562
|
end
|
1520
1563
|
end
|
1521
|
-
|
1564
|
+
|
1522
1565
|
if shifts == 1 and reductions > 0
|
1523
1566
|
:SR
|
1524
1567
|
elsif reductions > 1
|
@@ -1527,7 +1570,7 @@ module RLTK
|
|
1527
1570
|
nil
|
1528
1571
|
end
|
1529
1572
|
end
|
1530
|
-
|
1573
|
+
|
1531
1574
|
# Iterate over the state's items.
|
1532
1575
|
#
|
1533
1576
|
# @return [void]
|
@@ -1538,7 +1581,7 @@ module RLTK
|
|
1538
1581
|
current_item += 1
|
1539
1582
|
end
|
1540
1583
|
end
|
1541
|
-
|
1584
|
+
|
1542
1585
|
# Specify an Action to perform when the input token is *symbol*.
|
1543
1586
|
#
|
1544
1587
|
# @param [Symbol] symbol Symbol to add action for.
|
@@ -1552,7 +1595,7 @@ module RLTK
|
|
1552
1595
|
raise ParserConstructionException, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
|
1553
1596
|
end
|
1554
1597
|
end
|
1555
|
-
|
1598
|
+
|
1556
1599
|
# Returns that actions that should be taken when the input token
|
1557
1600
|
# is *symbol*.
|
1558
1601
|
#
|
@@ -1563,35 +1606,35 @@ module RLTK
|
|
1563
1606
|
@actions[symbol].clone
|
1564
1607
|
end
|
1565
1608
|
end
|
1566
|
-
|
1609
|
+
|
1567
1610
|
# A subclass of Proc that indicates how it should be passed arguments
|
1568
1611
|
# by the parser.
|
1569
1612
|
class ProdProc < Proc
|
1570
1613
|
# @return [:array, :splat] Method that should be used to pass arguments to this proc.
|
1571
1614
|
attr_reader :arg_type
|
1572
|
-
|
1615
|
+
|
1573
1616
|
# @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
|
1574
1617
|
attr_reader :selections
|
1575
|
-
|
1618
|
+
|
1576
1619
|
def initialize(arg_type = :splat, selections = [])
|
1577
1620
|
super()
|
1578
1621
|
@arg_type = arg_type
|
1579
1622
|
@selections = selections
|
1580
1623
|
end
|
1581
1624
|
end
|
1582
|
-
|
1625
|
+
|
1583
1626
|
# The Action class is used to indicate what action the parser should
|
1584
1627
|
# take given a current state and input token.
|
1585
1628
|
class Action
|
1586
1629
|
# @return [Integer] ID of this action.
|
1587
1630
|
attr_reader :id
|
1588
|
-
|
1631
|
+
|
1589
1632
|
# @param [Integer] id ID of this action.
|
1590
1633
|
def initialize(id = nil)
|
1591
1634
|
@id = id
|
1592
1635
|
end
|
1593
1636
|
end
|
1594
|
-
|
1637
|
+
|
1595
1638
|
# The Accept class indicates to the parser that it should accept the
|
1596
1639
|
# current parse tree.
|
1597
1640
|
class Accept < Action
|
@@ -1600,7 +1643,7 @@ module RLTK
|
|
1600
1643
|
"Accept"
|
1601
1644
|
end
|
1602
1645
|
end
|
1603
|
-
|
1646
|
+
|
1604
1647
|
# The GoTo class indicates to the parser that it should goto the state
|
1605
1648
|
# specified by GoTo.id.
|
1606
1649
|
class GoTo < Action
|
@@ -1609,24 +1652,24 @@ module RLTK
|
|
1609
1652
|
"GoTo #{self.id}"
|
1610
1653
|
end
|
1611
1654
|
end
|
1612
|
-
|
1655
|
+
|
1613
1656
|
# The Reduce class indicates to the parser that it should reduce the
|
1614
1657
|
# input stack by the rule specified by Reduce.id.
|
1615
1658
|
class Reduce < Action
|
1616
|
-
|
1659
|
+
|
1617
1660
|
# @param [Production] production Production to reduce by
|
1618
1661
|
def initialize(production)
|
1619
1662
|
super(production.id)
|
1620
|
-
|
1663
|
+
|
1621
1664
|
@production = production
|
1622
1665
|
end
|
1623
|
-
|
1666
|
+
|
1624
1667
|
# @return [String] String representation of this action.
|
1625
1668
|
def to_s
|
1626
1669
|
"Reduce by Production #{self.id} : #{@production}"
|
1627
1670
|
end
|
1628
1671
|
end
|
1629
|
-
|
1672
|
+
|
1630
1673
|
# The Shift class indicates to the parser that it should shift the
|
1631
1674
|
# current input token.
|
1632
1675
|
class Shift < Action
|