rltk3 3.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/LICENSE +27 -0
- data/README.md +852 -0
- data/Rakefile +197 -0
- data/lib/rltk/ast.rb +573 -0
- data/lib/rltk/cfg.rb +683 -0
- data/lib/rltk/cg/basic_block.rb +157 -0
- data/lib/rltk/cg/bindings.rb +151 -0
- data/lib/rltk/cg/builder.rb +1127 -0
- data/lib/rltk/cg/context.rb +48 -0
- data/lib/rltk/cg/contractor.rb +51 -0
- data/lib/rltk/cg/execution_engine.rb +194 -0
- data/lib/rltk/cg/function.rb +237 -0
- data/lib/rltk/cg/generated_bindings.rb +8118 -0
- data/lib/rltk/cg/generic_value.rb +95 -0
- data/lib/rltk/cg/instruction.rb +519 -0
- data/lib/rltk/cg/llvm.rb +150 -0
- data/lib/rltk/cg/memory_buffer.rb +75 -0
- data/lib/rltk/cg/module.rb +451 -0
- data/lib/rltk/cg/pass_manager.rb +252 -0
- data/lib/rltk/cg/support.rb +29 -0
- data/lib/rltk/cg/target.rb +230 -0
- data/lib/rltk/cg/triple.rb +58 -0
- data/lib/rltk/cg/type.rb +554 -0
- data/lib/rltk/cg/value.rb +1272 -0
- data/lib/rltk/cg.rb +32 -0
- data/lib/rltk/lexer.rb +372 -0
- data/lib/rltk/lexers/calculator.rb +44 -0
- data/lib/rltk/lexers/ebnf.rb +38 -0
- data/lib/rltk/parser.rb +1702 -0
- data/lib/rltk/parsers/infix_calc.rb +43 -0
- data/lib/rltk/parsers/postfix_calc.rb +34 -0
- data/lib/rltk/parsers/prefix_calc.rb +34 -0
- data/lib/rltk/token.rb +90 -0
- data/lib/rltk/version.rb +11 -0
- data/lib/rltk.rb +16 -0
- data/test/cg/tc_basic_block.rb +83 -0
- data/test/cg/tc_control_flow.rb +191 -0
- data/test/cg/tc_function.rb +54 -0
- data/test/cg/tc_generic_value.rb +33 -0
- data/test/cg/tc_instruction.rb +256 -0
- data/test/cg/tc_llvm.rb +25 -0
- data/test/cg/tc_math.rb +88 -0
- data/test/cg/tc_module.rb +89 -0
- data/test/cg/tc_transforms.rb +68 -0
- data/test/cg/tc_type.rb +69 -0
- data/test/cg/tc_value.rb +151 -0
- data/test/cg/ts_cg.rb +23 -0
- data/test/tc_ast.rb +332 -0
- data/test/tc_cfg.rb +164 -0
- data/test/tc_lexer.rb +216 -0
- data/test/tc_parser.rb +711 -0
- data/test/tc_token.rb +34 -0
- data/test/ts_rltk.rb +47 -0
- metadata +317 -0
data/lib/rltk/cfg.rb
ADDED
@@ -0,0 +1,683 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
4
|
+
# Project: Ruby Language Toolkit
|
5
|
+
# Date: 2011/03/24
|
6
|
+
# Description: This file contains the a class representing a context-free
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
############
|
10
|
+
# Requires #
|
11
|
+
############
|
12
|
+
|
13
|
+
# Standard Library
|
14
|
+
require 'set'
|
15
|
+
|
16
|
+
# Ruby Language Toolkit
|
17
|
+
require 'rltk/lexers/ebnf'
|
18
|
+
|
19
|
+
#######################
|
20
|
+
# Classes and Modules #
|
21
|
+
#######################
|
22
|
+
|
23
|
+
module RLTK
|
24
|
+
# An exception class that represents a problem with a context-free
|
25
|
+
# grammar's definition.
|
26
|
+
class GrammarError < StandardError; end
|
27
|
+
|
28
|
+
# The CFG class is used to represent context-free grammars. It is used by
|
29
|
+
# the RLTK::Parser class to represent the parser's grammar, but can also be
|
30
|
+
# used to manipulate arbitrary CFGs.
|
31
|
+
class CFG
|
32
|
+
|
33
|
+
# @return [Symbol] The grammar's starting symbol.
|
34
|
+
attr_reader :start_symbol
|
35
|
+
|
36
|
+
# This is used by the {CFG#production} method to wrap {CFG#clause}
|
37
|
+
# calls.
|
38
|
+
#
|
39
|
+
# @return [Symbol] The current left-hand side symbol.
|
40
|
+
attr_accessor :curr_lhs
|
41
|
+
|
42
|
+
#################
|
43
|
+
# Class Methods #
|
44
|
+
#################
|
45
|
+
|
46
|
+
# Tests to see if a symbol is a terminal symbol, as used by the CFG
|
47
|
+
# class.
|
48
|
+
#
|
49
|
+
# @param [Symbol] sym The symbol to test.
|
50
|
+
#
|
51
|
+
# @return [Boolean]
|
52
|
+
def self.is_terminal?(sym)
|
53
|
+
sym and (s = sym.to_s) == s.upcase
|
54
|
+
end
|
55
|
+
|
56
|
+
# Tests to see if a symbol is a non-terminal symbol, as used by the
|
57
|
+
# CFG class.
|
58
|
+
#
|
59
|
+
# @param [Symbol] sym The symbol to test.
|
60
|
+
#
|
61
|
+
# @return [Boolean]
|
62
|
+
def self.is_nonterminal?(sym)
|
63
|
+
sym and (s = sym.to_s) == s.downcase
|
64
|
+
end
|
65
|
+
|
66
|
+
####################
|
67
|
+
# Instance Methods #
|
68
|
+
####################
|
69
|
+
|
70
|
+
# Instantiates a new CFG object that uses *callback* to inform the
|
71
|
+
# programmer of the generation of new productions due to EBNF
|
72
|
+
# operators.
|
73
|
+
#
|
74
|
+
# @param [Proc] callback A Proc object to be called when EBNF operators are expanded.
|
75
|
+
def initialize(&callback)
|
76
|
+
@curr_lhs = nil
|
77
|
+
@callback = callback || Proc.new {}
|
78
|
+
@lexer = Lexers::EBNF.new
|
79
|
+
@production_counter = -1
|
80
|
+
@start_symbol = nil
|
81
|
+
@wrapper_symbol = nil
|
82
|
+
|
83
|
+
@productions_id = Hash.new
|
84
|
+
@productions_sym = Hash.new { |h, k| h[k] = [] }
|
85
|
+
@production_buffer = Array.new
|
86
|
+
|
87
|
+
@terms = Set.new([:EOS])
|
88
|
+
@nonterms = Set.new
|
89
|
+
|
90
|
+
@firsts = Hash.new
|
91
|
+
@follows = Hash.new { |h,k| h[k] = Array.new }
|
92
|
+
end
|
93
|
+
|
94
|
+
# Adds *production* to the appropriate internal data structures.
|
95
|
+
#
|
96
|
+
# @param [Production] production The production to add to the grammar.
|
97
|
+
#
|
98
|
+
# @return [void]
|
99
|
+
def add_production(production)
|
100
|
+
@productions_sym[production.lhs] << (@productions_id[production.id] = production)
|
101
|
+
|
102
|
+
production
|
103
|
+
end
|
104
|
+
|
105
|
+
# If the production already exists it will be returned. If it does not
|
106
|
+
# exist then it will be created and then returned.
|
107
|
+
#
|
108
|
+
# @param [Symbol] name The name of the production to add
|
109
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
110
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
def get_list_production(name, list_elements, separator = '')
|
114
|
+
if @nonterms.include?(name)
|
115
|
+
name
|
116
|
+
|
117
|
+
else
|
118
|
+
build_list_production(name, list_elements, separator)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
alias :get_list :get_list_production
|
122
|
+
|
123
|
+
# Builds a production representing a (possibly empty) list of tokens.
|
124
|
+
# These tokens may optionally be separated by a provided token. This
|
125
|
+
# function is used to eliminate the EBNF * operator.
|
126
|
+
#
|
127
|
+
# @param [Symbol] name The name of the production to add
|
128
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
129
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
130
|
+
#
|
131
|
+
# @return [void]
|
132
|
+
def build_list_production(name, list_elements, separator = '')
|
133
|
+
# Add the items for the following productions:
|
134
|
+
#
|
135
|
+
# name: | name_prime
|
136
|
+
|
137
|
+
name_prime = "#{name}_prime".to_sym
|
138
|
+
|
139
|
+
# 1st Production
|
140
|
+
production, _ = self.production(name, '')
|
141
|
+
@callback.call(:elp, :empty, production)
|
142
|
+
|
143
|
+
# 2nd Production
|
144
|
+
production, _ = self.production(name, name_prime)
|
145
|
+
@callback.call(:elp, :nonempty, production)
|
146
|
+
|
147
|
+
# Add remaining productions via nonempty_list helper.
|
148
|
+
self.nonempty_list(name_prime, list_elements, separator)
|
149
|
+
|
150
|
+
name
|
151
|
+
end
|
152
|
+
alias :list :build_list_production
|
153
|
+
|
154
|
+
# If the production already exists it will be returned. If it does not
|
155
|
+
# exist then it will be created and then returned.
|
156
|
+
#
|
157
|
+
# @param [Symbol] name The name of the production to add
|
158
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
159
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
160
|
+
#
|
161
|
+
# @return [void]
|
162
|
+
def get_nonempty_list_production(name, list_elements, separator = '')
|
163
|
+
if @nonterms.include?(name)
|
164
|
+
name
|
165
|
+
|
166
|
+
else
|
167
|
+
build_nonempty_list_production(name, list_elements, separator)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
alias :get_nonempty_list :get_nonempty_list_production
|
171
|
+
|
172
|
+
# Builds a production representing a non-empty list of tokens. These
|
173
|
+
# tokens may optionally be separated by a provided token. This
|
174
|
+
# function is used to eliminate the EBNF + operator.
|
175
|
+
#
|
176
|
+
# @param [Symbol] name The name of the production to add
|
177
|
+
# @param [String, Symbol, Array<String, Symbol>] list_elements Expression(s) that may appear in the list
|
178
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
179
|
+
#
|
180
|
+
# @return [void]
|
181
|
+
def build_nonempty_list_production(name, list_elements, separator = '')
|
182
|
+
# Add the items for the following productions:
|
183
|
+
#
|
184
|
+
# If there is only one list element:
|
185
|
+
#
|
186
|
+
# name: list_element | name separator list_element
|
187
|
+
#
|
188
|
+
# else
|
189
|
+
#
|
190
|
+
# name: name_list_elements | name separator name_list_elements
|
191
|
+
#
|
192
|
+
# name_list_elements: #{list_elements.join('|')}
|
193
|
+
|
194
|
+
build_elements_productions = false
|
195
|
+
|
196
|
+
list_element_string =
|
197
|
+
if list_elements.is_a?(Array)
|
198
|
+
if list_elements.empty?
|
199
|
+
raise ArgumentError, 'Parameter list_elements must not be empty.'
|
200
|
+
|
201
|
+
elsif list_elements.length == 1
|
202
|
+
list_elements.first
|
203
|
+
|
204
|
+
else
|
205
|
+
build_elements_productions = true
|
206
|
+
"#{name}_list_elements"
|
207
|
+
end
|
208
|
+
else
|
209
|
+
list_elements
|
210
|
+
end
|
211
|
+
|
212
|
+
list_element_selected_string = list_element_string.to_s.split.map { |s| ".#{s}" }.join(' ')
|
213
|
+
|
214
|
+
# Single Element Production
|
215
|
+
production, _ = self.production(name, list_element_string)
|
216
|
+
@callback.call(:nelp, :single, production)
|
217
|
+
|
218
|
+
# Multiple Element Production
|
219
|
+
production, selections = self.production(name, ".#{name} #{separator} #{list_element_selected_string}")
|
220
|
+
@callback.call(:nelp, :multiple, production, selections)
|
221
|
+
|
222
|
+
if build_elements_productions
|
223
|
+
# List Element Productions
|
224
|
+
list_elements.each do |element|
|
225
|
+
production, _ = self.production(list_element_string, element)
|
226
|
+
@callback.call(:nelp, :elements, production)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
name
|
231
|
+
end
|
232
|
+
alias :nonempty_list :build_nonempty_list_production
|
233
|
+
|
234
|
+
# If the production already exists it will be returned. If it does not
|
235
|
+
# exist then it will be created and then returned.
|
236
|
+
#
|
237
|
+
# @param [Symbol] name The name of the production to add
|
238
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
239
|
+
#
|
240
|
+
# @return [void]
|
241
|
+
def get_optional_production(name, list_elements)
|
242
|
+
if @nonterms.include?(name)
|
243
|
+
name
|
244
|
+
|
245
|
+
else
|
246
|
+
build_optional_production(name, list_elements)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
alias :get_optional :get_optional_production
|
250
|
+
|
251
|
+
# Build a production for an optional symbol. This is used to
|
252
|
+
# eliminate the EBNF ? operator.
|
253
|
+
#
|
254
|
+
# @param [Symbol] name The name for the new production
|
255
|
+
# @param [Symbol] opt_symbol Symbol to expand
|
256
|
+
#
|
257
|
+
# @return [Symbol] The value of the name argument
|
258
|
+
def build_optional_production(name, opt_symbol)
|
259
|
+
if not @productions_sym.has_key?(name)
|
260
|
+
# Add the items for the following productions:
|
261
|
+
#
|
262
|
+
# name: | opt_symbol
|
263
|
+
|
264
|
+
# Empty production.
|
265
|
+
production = self.add_production(Production.new(self.next_id, name, []))
|
266
|
+
@callback.call(:optional, :empty, production)
|
267
|
+
|
268
|
+
# Nonempty production
|
269
|
+
production = self.add_production(Production.new(self.next_id, name, [opt_symbol]))
|
270
|
+
@callback.call(:optional, :nonempty, production)
|
271
|
+
|
272
|
+
# Add the new symbol to the list of nonterminals.
|
273
|
+
@nonterms << name
|
274
|
+
end
|
275
|
+
|
276
|
+
name
|
277
|
+
end
|
278
|
+
alias :optional :build_optional_production
|
279
|
+
|
280
|
+
# Sets the EBNF callback to *callback*.
|
281
|
+
#
|
282
|
+
# @param [Proc] callback A Proc object to be called when EBNF operators are expanded and list productions are added.
|
283
|
+
#
|
284
|
+
# @return [void]
|
285
|
+
def callback(&callback)
|
286
|
+
@callback = callback if callback
|
287
|
+
|
288
|
+
nil
|
289
|
+
end
|
290
|
+
|
291
|
+
# This function MUST be called inside a CFG.production block. It will
|
292
|
+
# make a new production with the left-hand side specified by the
|
293
|
+
# CFG.production call's argument. This is the function that is
|
294
|
+
# responsible for removing EBNF symbols from the grammar.
|
295
|
+
#
|
296
|
+
# @param [String, Symbol] expression The right-hand side of a CFG production.
|
297
|
+
#
|
298
|
+
# @return [Array(Production, Array<Integer>)]
|
299
|
+
def clause(expression)
|
300
|
+
raise GrammarError, 'CFG#clause called outside of CFG#production block.' if not @curr_lhs
|
301
|
+
|
302
|
+
lhs = @curr_lhs.to_sym
|
303
|
+
rhs = Array.new
|
304
|
+
tokens = @lexer.lex(expression.to_s)
|
305
|
+
selections = Array.new
|
306
|
+
|
307
|
+
# Set this as the start symbol if there isn't one already
|
308
|
+
# defined.
|
309
|
+
@start_symbol ||= lhs
|
310
|
+
|
311
|
+
# Remove EBNF tokens and replace them with new productions.
|
312
|
+
symbol_count = 0
|
313
|
+
tokens.each_index do |i|
|
314
|
+
ttype0 = tokens[i].type
|
315
|
+
tvalue0 = tokens[i].value
|
316
|
+
|
317
|
+
if ttype0 == :TERM or ttype0 == :NONTERM
|
318
|
+
|
319
|
+
# Add this symbol to the correct collection.
|
320
|
+
(ttype0 == :TERM ? @terms : @nonterms) << tvalue0
|
321
|
+
|
322
|
+
if i + 1 < tokens.length
|
323
|
+
rhs <<
|
324
|
+
case tokens[i + 1].type
|
325
|
+
when :QUESTION then self.get_optional_production("#{tvalue0.downcase}_optional".to_sym, tvalue0)
|
326
|
+
when :STAR then self.get_list_production("#{tvalue0.downcase}_list".to_sym, tvalue0)
|
327
|
+
when :PLUS then self.get_nonempty_list_production("#{tvalue0.downcase}_nonempty_list".to_sym, tvalue0)
|
328
|
+
else tvalue0
|
329
|
+
end
|
330
|
+
else
|
331
|
+
rhs << tvalue0
|
332
|
+
end
|
333
|
+
|
334
|
+
symbol_count += 1
|
335
|
+
|
336
|
+
elsif ttype0 == :DOT
|
337
|
+
selections << symbol_count
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# Make the production.
|
342
|
+
@production_buffer << [(production = Production.new(self.next_id, lhs, rhs)), selections]
|
343
|
+
|
344
|
+
# Make sure the production symbol is collected.
|
345
|
+
@nonterms << lhs
|
346
|
+
|
347
|
+
# Add the new production to our collections.
|
348
|
+
self.add_production(production)
|
349
|
+
|
350
|
+
return [production, selections]
|
351
|
+
end
|
352
|
+
|
353
|
+
# This function calculates the *first* set of a series of tokens. It
|
354
|
+
# uses the {CFG#first_set} helper function to find the first set of
|
355
|
+
# individual symbols.
|
356
|
+
#
|
357
|
+
# @param [Symbol, Array<Symbol>] sentence Sentence to find the *first set* for.
|
358
|
+
#
|
359
|
+
# @return [Array<Symbol>] The *first set* for the given sentence.
|
360
|
+
def first_set(sentence)
|
361
|
+
if sentence.is_a?(Symbol)
|
362
|
+
first_set_prime(sentence)
|
363
|
+
|
364
|
+
elsif sentence.inject(true) { |m, sym| m and self.symbols.include?(sym) }
|
365
|
+
set0 = []
|
366
|
+
all_have_empty = true
|
367
|
+
|
368
|
+
sentence.each do |sym|
|
369
|
+
set0 |= (set1 = self.first_set(sym)) - [:'ɛ']
|
370
|
+
|
371
|
+
break if not (all_have_empty = set1.include?(:'ɛ'))
|
372
|
+
end
|
373
|
+
|
374
|
+
if all_have_empty then set0 + [:'ɛ'] else set0 end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# This function is responsible for calculating the *first* set of
|
379
|
+
# individual symbols.
|
380
|
+
#
|
381
|
+
# @param [Symbol] sym0 The symbol to find the *first set* of.
|
382
|
+
# @param [Array<Symbol>] seen_lh_sides Previously seen LHS symbols.
|
383
|
+
#
|
384
|
+
# @return [Array<Symbol>]
|
385
|
+
def first_set_prime(sym0, seen_lh_sides = [])
|
386
|
+
if self.symbols.include?(sym0)
|
387
|
+
# Memoize the result for later.
|
388
|
+
@firsts[sym0] ||=
|
389
|
+
if CFG::is_terminal?(sym0)
|
390
|
+
# If the symbol is a terminal, it is the only symbol in
|
391
|
+
# its follow set.
|
392
|
+
[sym0]
|
393
|
+
else
|
394
|
+
set0 = []
|
395
|
+
|
396
|
+
@productions_sym[sym0].each do |production|
|
397
|
+
if production.rhs.empty?
|
398
|
+
# If this is an empty production we should
|
399
|
+
# add the empty string to the First set.
|
400
|
+
set0 << :'ɛ'
|
401
|
+
else
|
402
|
+
all_have_empty = true
|
403
|
+
|
404
|
+
production.rhs.each do |sym1|
|
405
|
+
|
406
|
+
set1 = []
|
407
|
+
|
408
|
+
# Grab the First set for the current
|
409
|
+
# symbol in this production.
|
410
|
+
if not seen_lh_sides.include?(sym1)
|
411
|
+
set0 |= (set1 = first_set_prime(sym1, seen_lh_sides << sym1)) - [:'ɛ']
|
412
|
+
end
|
413
|
+
|
414
|
+
break if not (all_have_empty = set1.include?(:'ɛ'))
|
415
|
+
end
|
416
|
+
|
417
|
+
# Add the empty production if this production
|
418
|
+
# is all non-terminals that can be reduced to
|
419
|
+
# the empty string.
|
420
|
+
set0 << :'ɛ' if all_have_empty
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
set0.uniq
|
425
|
+
end
|
426
|
+
else
|
427
|
+
[]
|
428
|
+
end
|
429
|
+
end
|
430
|
+
private :first_set_prime
|
431
|
+
|
432
|
+
# Returns the *follow* set for a given symbol. The second argument is
|
433
|
+
# used to avoid infinite recursion when mutually recursive rules are
|
434
|
+
# encountered.
|
435
|
+
#
|
436
|
+
# @param [Symbol] sym0 The symbol to find the *follow set* for.
|
437
|
+
# @param [Array<Symbol>] seen_lh_sides Previously seen LHS symbols.
|
438
|
+
#
|
439
|
+
# @return [Array<Symbol>]
|
440
|
+
def follow_set(sym0, seen_lh_sides = [])
|
441
|
+
|
442
|
+
# Use the memoized set if possible.
|
443
|
+
return @follows[sym0] if @follows.has_key?(sym0)
|
444
|
+
|
445
|
+
if @nonterms.member? sym0
|
446
|
+
set0 = []
|
447
|
+
|
448
|
+
# Add EOS to the start symbol's follow set.
|
449
|
+
set0 << :EOS if sym0 == @start_symbol
|
450
|
+
|
451
|
+
@productions_id.values.each do |production|
|
452
|
+
production.rhs.each_with_index do |sym1, i|
|
453
|
+
if i + 1 < production.rhs.length
|
454
|
+
if sym0 == sym1
|
455
|
+
set0 |= (set1 = self.first_set(production.rhs[(i + 1)..-1])) - [:'ɛ']
|
456
|
+
|
457
|
+
set0 |= self.follow_set(production.lhs) if set1.include?(:'ɛ')
|
458
|
+
end
|
459
|
+
elsif sym0 != production.lhs and sym0 == sym1 and not seen_lh_sides.include?(production.lhs)
|
460
|
+
set0 |= self.follow_set(production.lhs, seen_lh_sides << production.lhs)
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
if seen_lh_sides.empty? or not set0.empty?
|
466
|
+
# Memoize the result for later.
|
467
|
+
@follows[sym0] |= set0
|
468
|
+
else
|
469
|
+
set0
|
470
|
+
end
|
471
|
+
else
|
472
|
+
[]
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# @return [Integer] ID for the next production to be defined.
|
477
|
+
def next_id
|
478
|
+
@production_counter += 1
|
479
|
+
end
|
480
|
+
|
481
|
+
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
482
|
+
def nonterms
|
483
|
+
@nonterms.clone
|
484
|
+
end
|
485
|
+
|
486
|
+
# Builds a new production with the left-hand side value of *symbol*.
|
487
|
+
# If *expression* is specified it is take as the right-hand side of
|
488
|
+
# production. If *expression* is nil then *block* is evaluated, and
|
489
|
+
# expected to make one or more calls to {CFG#clause}.
|
490
|
+
#
|
491
|
+
# @param [Symbol] symbol The left-hand side of a production
|
492
|
+
# @param [String, Symbol] expression The right-hand side of a production
|
493
|
+
# @param [Proc] block Optional block for defining production clauses
|
494
|
+
#
|
495
|
+
# @return [Production, Array<Production>] A single production if called with an expression;
|
496
|
+
# an array of productions otherwise
|
497
|
+
def production(symbol, expression = nil, &block)
|
498
|
+
@production_buffer = Array.new
|
499
|
+
|
500
|
+
prev_lhs = @curr_lhs
|
501
|
+
@curr_lhs = symbol
|
502
|
+
|
503
|
+
ret_val =
|
504
|
+
if expression
|
505
|
+
self.clause(expression)
|
506
|
+
else
|
507
|
+
self.instance_exec(&block)
|
508
|
+
|
509
|
+
@production_buffer.clone
|
510
|
+
end
|
511
|
+
|
512
|
+
@curr_lhs = prev_lhs
|
513
|
+
return ret_val
|
514
|
+
end
|
515
|
+
|
516
|
+
# If *by* is :sym, returns a hash of the grammar's productions, using
|
517
|
+
# the productions' left-hand side symbol as the key. If *by* is :id
|
518
|
+
# an array of productions is returned in the order of their
|
519
|
+
# definition.
|
520
|
+
#
|
521
|
+
# @param [:sym, :id] by The way in which productions should be returned.
|
522
|
+
#
|
523
|
+
# @return [Array<Production>, Hash{Symbol => Production}]
|
524
|
+
def productions(by = :sym)
|
525
|
+
if by == :sym
|
526
|
+
@productions_sym
|
527
|
+
elsif by == :id
|
528
|
+
@productions_id
|
529
|
+
else
|
530
|
+
nil
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
# Sets the start symbol for this grammar.
|
535
|
+
#
|
536
|
+
# @param [Symbol] symbol The new start symbol.
|
537
|
+
#
|
538
|
+
# @return [Symbol]
|
539
|
+
def start(symbol)
|
540
|
+
if not CFG::is_nonterminal?(symbol)
|
541
|
+
raise GrammarError, 'Start symbol must be a non-terminal.'
|
542
|
+
end
|
543
|
+
|
544
|
+
@start_symbol = symbol
|
545
|
+
end
|
546
|
+
|
547
|
+
# @return [Array<Symbol>] All symbols used in the grammar's definition.
|
548
|
+
def symbols
|
549
|
+
self.terms + self.nonterms
|
550
|
+
end
|
551
|
+
|
552
|
+
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
553
|
+
def terms
|
554
|
+
@terms.clone
|
555
|
+
end
|
556
|
+
|
557
|
+
# Oddly enough, the Production class represents a production in a
|
558
|
+
# context-free grammar.
|
559
|
+
class Production
|
560
|
+
# @return [Integer] ID of this production.
|
561
|
+
attr_reader :id
|
562
|
+
|
563
|
+
# @return [Symbol] Left-hand side of this production.
|
564
|
+
attr_reader :lhs
|
565
|
+
|
566
|
+
# @return [Array<Symbol>] Right-hand side of this production.
|
567
|
+
attr_reader :rhs
|
568
|
+
|
569
|
+
# Instantiates a new Production object with the specified ID,
|
570
|
+
# and left- and right-hand sides.
|
571
|
+
#
|
572
|
+
# @param [Integer] id ID number of this production.
|
573
|
+
# @param [Symbol] lhs Left-hand side of the production.
|
574
|
+
# @param [Array<Symbol>] rhs Right-hand side of the production.
|
575
|
+
def initialize(id, lhs, rhs)
|
576
|
+
@id = id
|
577
|
+
@lhs = lhs
|
578
|
+
@rhs = rhs
|
579
|
+
end
|
580
|
+
|
581
|
+
# Comparese on production to another. Returns true only if the
|
582
|
+
# left- and right- hand sides match.
|
583
|
+
#
|
584
|
+
# @param [Production] other Another production to compare to.
|
585
|
+
#
|
586
|
+
# @return [Boolean]
|
587
|
+
def ==(other)
|
588
|
+
self.lhs == other.lhs and self.rhs == other.rhs
|
589
|
+
end
|
590
|
+
|
591
|
+
# @return [Production] A new copy of this production.
|
592
|
+
def copy
|
593
|
+
Production.new(@id, @lhs, @rhs.clone)
|
594
|
+
end
|
595
|
+
|
596
|
+
# @return [Symbol] The last terminal in the right-hand side of the production.
|
597
|
+
def last_terminal
|
598
|
+
@rhs.inject(nil) { |m, sym| if CFG::is_terminal?(sym) then sym else m end }
|
599
|
+
end
|
600
|
+
|
601
|
+
# @return [Item] An Item based on this production.
|
602
|
+
def to_item
|
603
|
+
Item.new(0, @id, @lhs, @rhs)
|
604
|
+
end
|
605
|
+
|
606
|
+
# Returns a string representation of this production.
|
607
|
+
#
|
608
|
+
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|
609
|
+
#
|
610
|
+
# @return [String]
|
611
|
+
def to_s(padding = 0)
|
612
|
+
"#{format("%-#{padding}s", @lhs)} -> #{@rhs.empty? ? 'ɛ' : @rhs.map { |s| s.to_s }.join(' ')}"
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
# The Item class represents a CFG production with dot in it.
|
617
|
+
class Item < Production
|
618
|
+
# @return [Integer] Index of the next symbol in this item.
|
619
|
+
attr_reader :dot
|
620
|
+
|
621
|
+
# Instantiates a new Item object with a dot located before the
|
622
|
+
# symbol at index *dot* of the right-hand side. The remaining
|
623
|
+
# arguments (*args*) should be as specified by
|
624
|
+
# {Production#initialize}.
|
625
|
+
#
|
626
|
+
# @param [Integer] dot Location of the dot in this Item.
|
627
|
+
# @param [Array<Object>] args (see {Production#initialize})
|
628
|
+
def initialize(dot, *args)
|
629
|
+
super(*args)
|
630
|
+
|
631
|
+
# The Dot indicates the NEXT symbol to be read.
|
632
|
+
@dot = dot
|
633
|
+
end
|
634
|
+
|
635
|
+
# Compares two items.
|
636
|
+
#
|
637
|
+
# @param [Item] other Another item to compare to.
|
638
|
+
#
|
639
|
+
# @return [Boolean]
|
640
|
+
def ==(other)
|
641
|
+
self.dot == other.dot and self.lhs == other.lhs and self.rhs == other.rhs
|
642
|
+
end
|
643
|
+
|
644
|
+
# Moves the items dot forward by one if the end of the right-hand
|
645
|
+
# side hasn't already been reached.
|
646
|
+
#
|
647
|
+
# @return [Integer, nil]
|
648
|
+
def advance
|
649
|
+
if @dot < @rhs.length
|
650
|
+
@dot += 1
|
651
|
+
end
|
652
|
+
end
|
653
|
+
|
654
|
+
# Tests to see if the dot is at the end of the right-hand side.
|
655
|
+
#
|
656
|
+
# @return [Boolean]
|
657
|
+
def at_end?
|
658
|
+
@dot == @rhs.length
|
659
|
+
end
|
660
|
+
|
661
|
+
# @return [Item] A new copy of this item.
|
662
|
+
def copy
|
663
|
+
Item.new(@dot, @id, @lhs, @rhs.clone)
|
664
|
+
end
|
665
|
+
|
666
|
+
# Returns the symbol located after the dot.
|
667
|
+
#
|
668
|
+
# @return [Symbol] Symbol located after the dot (at the index indicated by the {#dot} attribute).
|
669
|
+
def next_symbol
|
670
|
+
@rhs[@dot]
|
671
|
+
end
|
672
|
+
|
673
|
+
# Returns a string representation of this item.
|
674
|
+
#
|
675
|
+
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|
676
|
+
#
|
677
|
+
# @return [String]
|
678
|
+
def to_s(padding = 0)
|
679
|
+
"#{format("%-#{padding}s", @lhs)} -> #{@rhs.map { |s| s.to_s }.insert(@dot, '·').join(' ') }"
|
680
|
+
end
|
681
|
+
end
|
682
|
+
end
|
683
|
+
end
|