rltk 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +21 -22
- data/lib/rltk/ast.rb +185 -118
- data/lib/rltk/cfg.rb +157 -103
- data/lib/rltk/cg/basic_block.rb +19 -19
- data/lib/rltk/cg/bindings.rb +16 -16
- data/lib/rltk/cg/builder.rb +129 -129
- data/lib/rltk/cg/context.rb +7 -7
- data/lib/rltk/cg/contractor.rb +7 -7
- data/lib/rltk/cg/execution_engine.rb +30 -30
- data/lib/rltk/cg/function.rb +37 -37
- data/lib/rltk/cg/generated_bindings.rb +3932 -3932
- data/lib/rltk/cg/generic_value.rb +17 -17
- data/lib/rltk/cg/instruction.rb +116 -116
- data/lib/rltk/cg/llvm.rb +22 -22
- data/lib/rltk/cg/memory_buffer.rb +7 -7
- data/lib/rltk/cg/module.rb +73 -73
- data/lib/rltk/cg/pass_manager.rb +35 -35
- data/lib/rltk/cg/target.rb +41 -41
- data/lib/rltk/cg/triple.rb +7 -7
- data/lib/rltk/cg/type.rb +75 -75
- data/lib/rltk/cg/value.rb +161 -161
- data/lib/rltk/lexer.rb +57 -57
- data/lib/rltk/lexers/calculator.rb +7 -7
- data/lib/rltk/lexers/ebnf.rb +5 -5
- data/lib/rltk/parser.rb +338 -295
- data/lib/rltk/parsers/infix_calc.rb +7 -7
- data/lib/rltk/parsers/postfix_calc.rb +3 -3
- data/lib/rltk/parsers/prefix_calc.rb +3 -3
- data/lib/rltk/token.rb +13 -13
- data/lib/rltk/version.rb +6 -6
- data/test/cg/tc_basic_block.rb +17 -17
- data/test/cg/tc_control_flow.rb +41 -41
- data/test/cg/tc_function.rb +4 -4
- data/test/cg/tc_generic_value.rb +3 -3
- data/test/cg/tc_instruction.rb +53 -53
- data/test/cg/tc_math.rb +12 -12
- data/test/cg/tc_module.rb +14 -14
- data/test/cg/tc_transforms.rb +11 -11
- data/test/cg/tc_type.rb +12 -12
- data/test/cg/tc_value.rb +35 -35
- data/test/cg/ts_cg.rb +5 -5
- data/test/tc_ast.rb +137 -60
- data/test/tc_cfg.rb +34 -34
- data/test/tc_lexer.rb +42 -42
- data/test/tc_parser.rb +250 -173
- data/test/tc_token.rb +2 -2
- data/test/ts_rltk.rb +8 -8
- metadata +84 -85
- data/lib/rltk/cg/old_generated_bindings.rb +0 -6152
data/lib/rltk/cfg.rb
CHANGED
@@ -24,25 +24,25 @@ module RLTK
|
|
24
24
|
# An exception class that represents a problem with a context-free
|
25
25
|
# grammar's definition.
|
26
26
|
class GrammarError < StandardError; end
|
27
|
-
|
27
|
+
|
28
28
|
# The CFG class is used to represent context-free grammars. It is used by
|
29
29
|
# the RLTK::Parser class to represent the parser's grammar, but can also be
|
30
30
|
# used to manipulate arbitrary CFGs.
|
31
31
|
class CFG
|
32
|
-
|
32
|
+
|
33
33
|
# @return [Symbol] The grammar's starting symbol.
|
34
34
|
attr_reader :start_symbol
|
35
|
-
|
35
|
+
|
36
36
|
# This is used by the {CFG#production} method to wrap {CFG#clause}
|
37
37
|
# calls.
|
38
38
|
#
|
39
39
|
# @return [Symbol] The current left-hand side symbol.
|
40
40
|
attr_accessor :curr_lhs
|
41
|
-
|
41
|
+
|
42
42
|
#################
|
43
43
|
# Class Methods #
|
44
44
|
#################
|
45
|
-
|
45
|
+
|
46
46
|
# Tests to see if a symbol is a terminal symbol, as used by the CFG
|
47
47
|
# class.
|
48
48
|
#
|
@@ -52,7 +52,7 @@ module RLTK
|
|
52
52
|
def self.is_terminal?(sym)
|
53
53
|
sym and (s = sym.to_s) == s.upcase
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
# Tests to see if a symbol is a non-terminal symbol, as used by the
|
57
57
|
# CFG class.
|
58
58
|
#
|
@@ -62,11 +62,11 @@ module RLTK
|
|
62
62
|
def self.is_nonterminal?(sym)
|
63
63
|
sym and (s = sym.to_s) == s.downcase
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
####################
|
67
67
|
# Instance Methods #
|
68
68
|
####################
|
69
|
-
|
69
|
+
|
70
70
|
# Instantiates a new CFG object that uses *callback* to inform the
|
71
71
|
# programmer of the generation of new productions due to EBNF
|
72
72
|
# operators.
|
@@ -79,18 +79,18 @@ module RLTK
|
|
79
79
|
@production_counter = -1
|
80
80
|
@start_symbol = nil
|
81
81
|
@wrapper_symbol = nil
|
82
|
-
|
82
|
+
|
83
83
|
@productions_id = Hash.new
|
84
84
|
@productions_sym = Hash.new { |h, k| h[k] = [] }
|
85
85
|
@production_buffer = Array.new
|
86
|
-
|
86
|
+
|
87
87
|
@terms = Set.new([:EOS])
|
88
88
|
@nonterms = Set.new
|
89
|
-
|
89
|
+
|
90
90
|
@firsts = Hash.new
|
91
91
|
@follows = Hash.new { |h,k| h[k] = Array.new }
|
92
92
|
end
|
93
|
-
|
93
|
+
|
94
94
|
# Adds *production* to the appropriate internal data structures.
|
95
95
|
#
|
96
96
|
# @param [Production] production The production to add to the grammar.
|
@@ -98,10 +98,28 @@ module RLTK
|
|
98
98
|
# @return [void]
|
99
99
|
def add_production(production)
|
100
100
|
@productions_sym[production.lhs] << (@productions_id[production.id] = production)
|
101
|
-
|
101
|
+
|
102
102
|
production
|
103
103
|
end
|
104
|
-
|
104
|
+
|
105
|
+
# If the production already exists it will be returned. If it does not
|
106
|
+
# exist then it will be created and then returned.
|
107
|
+
#
|
108
|
+
# @param [Symbol] name The name of the production to add
|
109
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
110
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
def get_list_production(name, list_elements, separator = '')
|
114
|
+
if @nonterms.include?(name)
|
115
|
+
name
|
116
|
+
|
117
|
+
else
|
118
|
+
build_list_production(name, list_elements, separator)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
alias :get_list :get_list_production
|
122
|
+
|
105
123
|
# Builds a production representing a (possibly empty) list of tokens.
|
106
124
|
# These tokens may optionally be separated by a provided token. This
|
107
125
|
# function is used to eliminate the EBNF * operator.
|
@@ -115,24 +133,42 @@ module RLTK
|
|
115
133
|
# Add the items for the following productions:
|
116
134
|
#
|
117
135
|
# name: | name_prime
|
118
|
-
|
136
|
+
|
119
137
|
name_prime = "#{name}_prime".to_sym
|
120
|
-
|
138
|
+
|
121
139
|
# 1st Production
|
122
140
|
production, _ = self.production(name, '')
|
123
141
|
@callback.call(:elp, :empty, production)
|
124
|
-
|
142
|
+
|
125
143
|
# 2nd Production
|
126
144
|
production, _ = self.production(name, name_prime)
|
127
145
|
@callback.call(:elp, :nonempty, production)
|
128
|
-
|
146
|
+
|
129
147
|
# Add remaining productions via nonempty_list helper.
|
130
148
|
self.nonempty_list(name_prime, list_elements, separator)
|
131
|
-
|
149
|
+
|
132
150
|
name
|
133
151
|
end
|
134
152
|
alias :list :build_list_production
|
135
|
-
|
153
|
+
|
154
|
+
# If the production already exists it will be returned. If it does not
|
155
|
+
# exist then it will be created and then returned.
|
156
|
+
#
|
157
|
+
# @param [Symbol] name The name of the production to add
|
158
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
159
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
160
|
+
#
|
161
|
+
# @return [void]
|
162
|
+
def get_nonempty_list_production(name, list_elements, separator = '')
|
163
|
+
if @nonterms.include?(name)
|
164
|
+
name
|
165
|
+
|
166
|
+
else
|
167
|
+
build_nonempty_list_production(name, list_elements, separator)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
alias :get_nonempty_list :get_nonempty_list_production
|
171
|
+
|
136
172
|
# Builds a production representing a non-empty list of tokens. These
|
137
173
|
# tokens may optionally be separated by a provided token. This
|
138
174
|
# function is used to eliminate the EBNF + operator.
|
@@ -154,17 +190,17 @@ module RLTK
|
|
154
190
|
# name: name_list_elements | name separator name_list_elements
|
155
191
|
#
|
156
192
|
# name_list_elements: #{list_elements.join('|')}
|
157
|
-
|
193
|
+
|
158
194
|
build_elements_productions = false
|
159
|
-
|
195
|
+
|
160
196
|
list_element_string =
|
161
197
|
if list_elements.is_a?(Array)
|
162
198
|
if list_elements.empty?
|
163
199
|
raise ArgumentError, 'Parameter list_elements must not be empty.'
|
164
|
-
|
200
|
+
|
165
201
|
elsif list_elements.length == 1
|
166
202
|
list_elements.first
|
167
|
-
|
203
|
+
|
168
204
|
else
|
169
205
|
build_elements_productions = true
|
170
206
|
"#{name}_list_elements"
|
@@ -172,17 +208,17 @@ module RLTK
|
|
172
208
|
else
|
173
209
|
list_elements
|
174
210
|
end
|
175
|
-
|
211
|
+
|
176
212
|
list_element_selected_string = list_element_string.to_s.split.map { |s| ".#{s}" }.join(' ')
|
177
|
-
|
213
|
+
|
178
214
|
# Single Element Production
|
179
215
|
production, _ = self.production(name, list_element_string)
|
180
216
|
@callback.call(:nelp, :single, production)
|
181
|
-
|
217
|
+
|
182
218
|
# Multiple Element Production
|
183
219
|
production, selections = self.production(name, ".#{name} #{separator} #{list_element_selected_string}")
|
184
220
|
@callback.call(:nelp, :multiple, production, selections)
|
185
|
-
|
221
|
+
|
186
222
|
if build_elements_productions
|
187
223
|
# List Element Productions
|
188
224
|
list_elements.each do |element|
|
@@ -190,11 +226,28 @@ module RLTK
|
|
190
226
|
@callback.call(:nelp, :elements, production)
|
191
227
|
end
|
192
228
|
end
|
193
|
-
|
229
|
+
|
194
230
|
name
|
195
231
|
end
|
196
232
|
alias :nonempty_list :build_nonempty_list_production
|
197
|
-
|
233
|
+
|
234
|
+
# If the production already exists it will be returned. If it does not
|
235
|
+
# exist then it will be created and then returned.
|
236
|
+
#
|
237
|
+
# @param [Symbol] name The name of the production to add
|
238
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
239
|
+
#
|
240
|
+
# @return [void]
|
241
|
+
def get_optional_production(name, list_elements)
|
242
|
+
if @nonterms.include?(name)
|
243
|
+
name
|
244
|
+
|
245
|
+
else
|
246
|
+
build_optional_production(name, list_elements)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
alias :get_optional :get_optional_production
|
250
|
+
|
198
251
|
# Build a production for an optional symbol. This is used to
|
199
252
|
# eliminate the EBNF ? operator.
|
200
253
|
#
|
@@ -207,22 +260,23 @@ module RLTK
|
|
207
260
|
# Add the items for the following productions:
|
208
261
|
#
|
209
262
|
# name: | opt_symbol
|
210
|
-
|
263
|
+
|
211
264
|
# Empty production.
|
212
265
|
production = self.add_production(Production.new(self.next_id, name, []))
|
213
266
|
@callback.call(:optional, :empty, production)
|
214
|
-
|
267
|
+
|
215
268
|
# Nonempty production
|
216
269
|
production = self.add_production(Production.new(self.next_id, name, [opt_symbol]))
|
217
270
|
@callback.call(:optional, :nonempty, production)
|
218
|
-
|
271
|
+
|
219
272
|
# Add the new symbol to the list of nonterminals.
|
220
273
|
@nonterms << name
|
221
274
|
end
|
222
|
-
|
275
|
+
|
223
276
|
name
|
224
277
|
end
|
225
|
-
|
278
|
+
alias :optional :build_optional_production
|
279
|
+
|
226
280
|
# Sets the EBNF callback to *callback*.
|
227
281
|
#
|
228
282
|
# @param [Proc] callback A Proc object to be called when EBNF operators are expanded and list productions are added.
|
@@ -230,10 +284,10 @@ module RLTK
|
|
230
284
|
# @return [void]
|
231
285
|
def callback(&callback)
|
232
286
|
@callback = callback if callback
|
233
|
-
|
287
|
+
|
234
288
|
nil
|
235
289
|
end
|
236
|
-
|
290
|
+
|
237
291
|
# This function MUST be called inside a CFG.production block. It will
|
238
292
|
# make a new production with the left-hand side specified by the
|
239
293
|
# CFG.production call's argument. This is the function that is
|
@@ -244,61 +298,61 @@ module RLTK
|
|
244
298
|
# @return [Array(Production, Array<Integer>)]
|
245
299
|
def clause(expression)
|
246
300
|
raise GrammarError, 'CFG#clause called outside of CFG#production block.' if not @curr_lhs
|
247
|
-
|
301
|
+
|
248
302
|
lhs = @curr_lhs.to_sym
|
249
303
|
rhs = Array.new
|
250
304
|
tokens = @lexer.lex(expression.to_s)
|
251
305
|
selections = Array.new
|
252
|
-
|
306
|
+
|
253
307
|
# Set this as the start symbol if there isn't one already
|
254
308
|
# defined.
|
255
309
|
@start_symbol ||= lhs
|
256
|
-
|
310
|
+
|
257
311
|
# Remove EBNF tokens and replace them with new productions.
|
258
312
|
symbol_count = 0
|
259
313
|
tokens.each_index do |i|
|
260
314
|
ttype0 = tokens[i].type
|
261
315
|
tvalue0 = tokens[i].value
|
262
|
-
|
316
|
+
|
263
317
|
if ttype0 == :TERM or ttype0 == :NONTERM
|
264
|
-
|
318
|
+
|
265
319
|
# Add this symbol to the correct collection.
|
266
320
|
(ttype0 == :TERM ? @terms : @nonterms) << tvalue0
|
267
|
-
|
321
|
+
|
268
322
|
if i + 1 < tokens.length
|
269
323
|
ttype1 = tokens[i + 1].type
|
270
324
|
tvalue1 = tokens[i + 1].value
|
271
|
-
|
325
|
+
|
272
326
|
rhs <<
|
273
327
|
case ttype1
|
274
|
-
when :QUESTION then self.
|
275
|
-
when :STAR then self.
|
276
|
-
when :PLUS then self.
|
328
|
+
when :QUESTION then self.get_optional_production("#{tvalue0.downcase}_optional".to_sym, tvalue0)
|
329
|
+
when :STAR then self.get_list_production("#{tvalue0.downcase}_list".to_sym, tvalue0)
|
330
|
+
when :PLUS then self.get_nonempty_list_production("#{tvalue0.downcase}_nonempty_list".to_sym, tvalue0)
|
277
331
|
else tvalue0
|
278
332
|
end
|
279
333
|
else
|
280
334
|
rhs << tvalue0
|
281
335
|
end
|
282
|
-
|
336
|
+
|
283
337
|
symbol_count += 1
|
284
|
-
|
338
|
+
|
285
339
|
elsif ttype0 == :DOT
|
286
340
|
selections << symbol_count
|
287
341
|
end
|
288
342
|
end
|
289
|
-
|
343
|
+
|
290
344
|
# Make the production.
|
291
345
|
@production_buffer << [(production = Production.new(self.next_id, lhs, rhs)), selections]
|
292
|
-
|
346
|
+
|
293
347
|
# Make sure the production symbol is collected.
|
294
348
|
@nonterms << lhs
|
295
|
-
|
349
|
+
|
296
350
|
# Add the new production to our collections.
|
297
351
|
self.add_production(production)
|
298
|
-
|
352
|
+
|
299
353
|
return [production, selections]
|
300
354
|
end
|
301
|
-
|
355
|
+
|
302
356
|
# This function calculates the *first* set of a series of tokens. It
|
303
357
|
# uses the {CFG#first_set} helper function to find the first set of
|
304
358
|
# individual symbols.
|
@@ -309,21 +363,21 @@ module RLTK
|
|
309
363
|
def first_set(sentence)
|
310
364
|
if sentence.is_a?(Symbol)
|
311
365
|
first_set_prime(sentence)
|
312
|
-
|
366
|
+
|
313
367
|
elsif sentence.inject(true) { |m, sym| m and self.symbols.include?(sym) }
|
314
368
|
set0 = []
|
315
369
|
all_have_empty = true
|
316
|
-
|
370
|
+
|
317
371
|
sentence.each do |sym|
|
318
372
|
set0 |= (set1 = self.first_set(sym)) - [:'ɛ']
|
319
|
-
|
373
|
+
|
320
374
|
break if not (all_have_empty = set1.include?(:'ɛ'))
|
321
375
|
end
|
322
|
-
|
376
|
+
|
323
377
|
if all_have_empty then set0 + [:'ɛ'] else set0 end
|
324
378
|
end
|
325
379
|
end
|
326
|
-
|
380
|
+
|
327
381
|
# This function is responsible for calculating the *first* set of
|
328
382
|
# individual symbols.
|
329
383
|
#
|
@@ -341,7 +395,7 @@ module RLTK
|
|
341
395
|
[sym0]
|
342
396
|
else
|
343
397
|
set0 = []
|
344
|
-
|
398
|
+
|
345
399
|
@productions_sym[sym0].each do |production|
|
346
400
|
if production.rhs.empty?
|
347
401
|
# If this is an empty production we should
|
@@ -349,27 +403,27 @@ module RLTK
|
|
349
403
|
set0 << :'ɛ'
|
350
404
|
else
|
351
405
|
all_have_empty = true
|
352
|
-
|
406
|
+
|
353
407
|
production.rhs.each do |sym1|
|
354
|
-
|
408
|
+
|
355
409
|
set1 = []
|
356
|
-
|
410
|
+
|
357
411
|
# Grab the First set for the current
|
358
412
|
# symbol in this production.
|
359
413
|
if not seen_lh_sides.include?(sym1)
|
360
414
|
set0 |= (set1 = first_set_prime(sym1, seen_lh_sides << sym1)) - [:'ɛ']
|
361
415
|
end
|
362
|
-
|
416
|
+
|
363
417
|
break if not (all_have_empty = set1.include?(:'ɛ'))
|
364
418
|
end
|
365
|
-
|
419
|
+
|
366
420
|
# Add the empty production if this production
|
367
421
|
# is all non-terminals that can be reduced to
|
368
422
|
# the empty string.
|
369
423
|
set0 << :'ɛ' if all_have_empty
|
370
424
|
end
|
371
425
|
end
|
372
|
-
|
426
|
+
|
373
427
|
set0.uniq
|
374
428
|
end
|
375
429
|
else
|
@@ -377,7 +431,7 @@ module RLTK
|
|
377
431
|
end
|
378
432
|
end
|
379
433
|
private :first_set_prime
|
380
|
-
|
434
|
+
|
381
435
|
# Returns the *follow* set for a given symbol. The second argument is
|
382
436
|
# used to avoid infinite recursion when mutually recursive rules are
|
383
437
|
# encountered.
|
@@ -387,22 +441,22 @@ module RLTK
|
|
387
441
|
#
|
388
442
|
# @return [Array<Symbol>]
|
389
443
|
def follow_set(sym0, seen_lh_sides = [])
|
390
|
-
|
444
|
+
|
391
445
|
# Use the memoized set if possible.
|
392
446
|
return @follows[sym0] if @follows.has_key?(sym0)
|
393
|
-
|
447
|
+
|
394
448
|
if @nonterms.member? sym0
|
395
449
|
set0 = []
|
396
|
-
|
450
|
+
|
397
451
|
# Add EOS to the start symbol's follow set.
|
398
452
|
set0 << :EOS if sym0 == @start_symbol
|
399
|
-
|
453
|
+
|
400
454
|
@productions_id.values.each do |production|
|
401
455
|
production.rhs.each_with_index do |sym1, i|
|
402
456
|
if i + 1 < production.rhs.length
|
403
457
|
if sym0 == sym1
|
404
458
|
set0 |= (set1 = self.first_set(production.rhs[(i + 1)..-1])) - [:'ɛ']
|
405
|
-
|
459
|
+
|
406
460
|
set0 |= self.follow_set(production.lhs) if set1.include?(:'ɛ')
|
407
461
|
end
|
408
462
|
elsif sym0 != production.lhs and sym0 == sym1 and not seen_lh_sides.include?(production.lhs)
|
@@ -410,7 +464,7 @@ module RLTK
|
|
410
464
|
end
|
411
465
|
end
|
412
466
|
end
|
413
|
-
|
467
|
+
|
414
468
|
if seen_lh_sides.empty? or not set0.empty?
|
415
469
|
# Memoize the result for later.
|
416
470
|
@follows[sym0] |= set0
|
@@ -421,17 +475,17 @@ module RLTK
|
|
421
475
|
[]
|
422
476
|
end
|
423
477
|
end
|
424
|
-
|
478
|
+
|
425
479
|
# @return [Integer] ID for the next production to be defined.
|
426
480
|
def next_id
|
427
481
|
@production_counter += 1
|
428
482
|
end
|
429
|
-
|
483
|
+
|
430
484
|
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
431
485
|
def nonterms
|
432
486
|
@nonterms.clone
|
433
487
|
end
|
434
|
-
|
488
|
+
|
435
489
|
# Builds a new production with the left-hand side value of *symbol*.
|
436
490
|
# If *expression* is specified it is take as the right-hand side of
|
437
491
|
# production. If *expression* is nil then *block* is evaluated, and
|
@@ -445,23 +499,23 @@ module RLTK
|
|
445
499
|
# an array of productions otherwise
|
446
500
|
def production(symbol, expression = nil, &block)
|
447
501
|
@production_buffer = Array.new
|
448
|
-
|
502
|
+
|
449
503
|
prev_lhs = @curr_lhs
|
450
504
|
@curr_lhs = symbol
|
451
|
-
|
505
|
+
|
452
506
|
ret_val =
|
453
507
|
if expression
|
454
508
|
self.clause(expression)
|
455
509
|
else
|
456
510
|
self.instance_exec(&block)
|
457
|
-
|
511
|
+
|
458
512
|
@production_buffer.clone
|
459
513
|
end
|
460
|
-
|
514
|
+
|
461
515
|
@curr_lhs = prev_lhs
|
462
516
|
return ret_val
|
463
517
|
end
|
464
|
-
|
518
|
+
|
465
519
|
# If *by* is :sym, returns a hash of the grammar's productions, using
|
466
520
|
# the productions' left-hand side symbol as the key. If *by* is :id
|
467
521
|
# an array of productions is returned in the order of their
|
@@ -479,7 +533,7 @@ module RLTK
|
|
479
533
|
nil
|
480
534
|
end
|
481
535
|
end
|
482
|
-
|
536
|
+
|
483
537
|
# Sets the start symbol for this grammar.
|
484
538
|
#
|
485
539
|
# @param [Symbol] symbol The new start symbol.
|
@@ -489,32 +543,32 @@ module RLTK
|
|
489
543
|
if not CFG::is_nonterminal?(symbol)
|
490
544
|
raise GrammarError, 'Start symbol must be a non-terminal.'
|
491
545
|
end
|
492
|
-
|
546
|
+
|
493
547
|
@start_symbol = symbol
|
494
548
|
end
|
495
|
-
|
549
|
+
|
496
550
|
# @return [Array<Symbol>] All symbols used in the grammar's definition.
|
497
551
|
def symbols
|
498
552
|
self.terms + self.nonterms
|
499
553
|
end
|
500
|
-
|
554
|
+
|
501
555
|
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
502
556
|
def terms
|
503
557
|
@terms.clone
|
504
558
|
end
|
505
|
-
|
559
|
+
|
506
560
|
# Oddly enough, the Production class represents a production in a
|
507
561
|
# context-free grammar.
|
508
562
|
class Production
|
509
563
|
# @return [Integer] ID of this production.
|
510
564
|
attr_reader :id
|
511
|
-
|
565
|
+
|
512
566
|
# @return [Symbol] Left-hand side of this production.
|
513
567
|
attr_reader :lhs
|
514
|
-
|
568
|
+
|
515
569
|
# @return [Array<Symbol>] Right-hand side of this production.
|
516
570
|
attr_reader :rhs
|
517
|
-
|
571
|
+
|
518
572
|
# Instantiates a new Production object with the specified ID,
|
519
573
|
# and left- and right-hand sides.
|
520
574
|
#
|
@@ -526,7 +580,7 @@ module RLTK
|
|
526
580
|
@lhs = lhs
|
527
581
|
@rhs = rhs
|
528
582
|
end
|
529
|
-
|
583
|
+
|
530
584
|
# Comparese on production to another. Returns true only if the
|
531
585
|
# left- and right- hand sides match.
|
532
586
|
#
|
@@ -536,22 +590,22 @@ module RLTK
|
|
536
590
|
def ==(other)
|
537
591
|
self.lhs == other.lhs and self.rhs == other.rhs
|
538
592
|
end
|
539
|
-
|
593
|
+
|
540
594
|
# @return [Production] A new copy of this production.
|
541
595
|
def copy
|
542
596
|
Production.new(@id, @lhs, @rhs.clone)
|
543
597
|
end
|
544
|
-
|
598
|
+
|
545
599
|
# @return [Symbol] The last terminal in the right-hand side of the production.
|
546
600
|
def last_terminal
|
547
601
|
@rhs.inject(nil) { |m, sym| if CFG::is_terminal?(sym) then sym else m end }
|
548
602
|
end
|
549
|
-
|
603
|
+
|
550
604
|
# @return [Item] An Item based on this production.
|
551
605
|
def to_item
|
552
606
|
Item.new(0, @id, @lhs, @rhs)
|
553
607
|
end
|
554
|
-
|
608
|
+
|
555
609
|
# Returns a string representation of this production.
|
556
610
|
#
|
557
611
|
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|
@@ -561,12 +615,12 @@ module RLTK
|
|
561
615
|
"#{format("%-#{padding}s", @lhs)} -> #{@rhs.empty? ? 'ɛ' : @rhs.map { |s| s.to_s }.join(' ')}"
|
562
616
|
end
|
563
617
|
end
|
564
|
-
|
618
|
+
|
565
619
|
# The Item class represents a CFG production with dot in it.
|
566
620
|
class Item < Production
|
567
621
|
# @return [Integer] Index of the next symbol in this item.
|
568
622
|
attr_reader :dot
|
569
|
-
|
623
|
+
|
570
624
|
# Instantiates a new Item object with a dot located before the
|
571
625
|
# symbol at index *dot* of the right-hand side. The remaining
|
572
626
|
# arguments (*args*) should be as specified by
|
@@ -576,11 +630,11 @@ module RLTK
|
|
576
630
|
# @param [Array<Object>] args (see {Production#initialize})
|
577
631
|
def initialize(dot, *args)
|
578
632
|
super(*args)
|
579
|
-
|
633
|
+
|
580
634
|
# The Dot indicates the NEXT symbol to be read.
|
581
635
|
@dot = dot
|
582
636
|
end
|
583
|
-
|
637
|
+
|
584
638
|
# Compares two items.
|
585
639
|
#
|
586
640
|
# @param [Item] other Another item to compare to.
|
@@ -589,7 +643,7 @@ module RLTK
|
|
589
643
|
def ==(other)
|
590
644
|
self.dot == other.dot and self.lhs == other.lhs and self.rhs == other.rhs
|
591
645
|
end
|
592
|
-
|
646
|
+
|
593
647
|
# Moves the items dot forward by one if the end of the right-hand
|
594
648
|
# side hasn't already been reached.
|
595
649
|
#
|
@@ -599,26 +653,26 @@ module RLTK
|
|
599
653
|
@dot += 1
|
600
654
|
end
|
601
655
|
end
|
602
|
-
|
656
|
+
|
603
657
|
# Tests to see if the dot is at the end of the right-hand side.
|
604
658
|
#
|
605
659
|
# @return [Boolean]
|
606
660
|
def at_end?
|
607
661
|
@dot == @rhs.length
|
608
662
|
end
|
609
|
-
|
663
|
+
|
610
664
|
# @return [Item] A new copy of this item.
|
611
665
|
def copy
|
612
666
|
Item.new(@dot, @id, @lhs, @rhs.clone)
|
613
667
|
end
|
614
|
-
|
668
|
+
|
615
669
|
# Returns the symbol located after the dot.
|
616
670
|
#
|
617
671
|
# @return [Symbol] Symbol located after the dot (at the index indicated by the {#dot} attribute).
|
618
672
|
def next_symbol
|
619
673
|
@rhs[@dot]
|
620
674
|
end
|
621
|
-
|
675
|
+
|
622
676
|
# Returns a string representation of this item.
|
623
677
|
#
|
624
678
|
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|