rltk3 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/LICENSE +27 -0
- data/README.md +852 -0
- data/Rakefile +197 -0
- data/lib/rltk/ast.rb +573 -0
- data/lib/rltk/cfg.rb +683 -0
- data/lib/rltk/cg/basic_block.rb +157 -0
- data/lib/rltk/cg/bindings.rb +151 -0
- data/lib/rltk/cg/builder.rb +1127 -0
- data/lib/rltk/cg/context.rb +48 -0
- data/lib/rltk/cg/contractor.rb +51 -0
- data/lib/rltk/cg/execution_engine.rb +194 -0
- data/lib/rltk/cg/function.rb +237 -0
- data/lib/rltk/cg/generated_bindings.rb +8118 -0
- data/lib/rltk/cg/generic_value.rb +95 -0
- data/lib/rltk/cg/instruction.rb +519 -0
- data/lib/rltk/cg/llvm.rb +150 -0
- data/lib/rltk/cg/memory_buffer.rb +75 -0
- data/lib/rltk/cg/module.rb +451 -0
- data/lib/rltk/cg/pass_manager.rb +252 -0
- data/lib/rltk/cg/support.rb +29 -0
- data/lib/rltk/cg/target.rb +230 -0
- data/lib/rltk/cg/triple.rb +58 -0
- data/lib/rltk/cg/type.rb +554 -0
- data/lib/rltk/cg/value.rb +1272 -0
- data/lib/rltk/cg.rb +32 -0
- data/lib/rltk/lexer.rb +372 -0
- data/lib/rltk/lexers/calculator.rb +44 -0
- data/lib/rltk/lexers/ebnf.rb +38 -0
- data/lib/rltk/parser.rb +1702 -0
- data/lib/rltk/parsers/infix_calc.rb +43 -0
- data/lib/rltk/parsers/postfix_calc.rb +34 -0
- data/lib/rltk/parsers/prefix_calc.rb +34 -0
- data/lib/rltk/token.rb +90 -0
- data/lib/rltk/version.rb +11 -0
- data/lib/rltk.rb +16 -0
- data/test/cg/tc_basic_block.rb +83 -0
- data/test/cg/tc_control_flow.rb +191 -0
- data/test/cg/tc_function.rb +54 -0
- data/test/cg/tc_generic_value.rb +33 -0
- data/test/cg/tc_instruction.rb +256 -0
- data/test/cg/tc_llvm.rb +25 -0
- data/test/cg/tc_math.rb +88 -0
- data/test/cg/tc_module.rb +89 -0
- data/test/cg/tc_transforms.rb +68 -0
- data/test/cg/tc_type.rb +69 -0
- data/test/cg/tc_value.rb +151 -0
- data/test/cg/ts_cg.rb +23 -0
- data/test/tc_ast.rb +332 -0
- data/test/tc_cfg.rb +164 -0
- data/test/tc_lexer.rb +216 -0
- data/test/tc_parser.rb +711 -0
- data/test/tc_token.rb +34 -0
- data/test/ts_rltk.rb +47 -0
- metadata +317 -0
data/lib/rltk/cfg.rb
ADDED
@@ -0,0 +1,683 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
4
|
+
# Project: Ruby Language Toolkit
|
5
|
+
# Date: 2011/03/24
|
6
|
+
# Description: This file contains the a class representing a context-free
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
############
|
10
|
+
# Requires #
|
11
|
+
############
|
12
|
+
|
13
|
+
# Standard Library
|
14
|
+
require 'set'
|
15
|
+
|
16
|
+
# Ruby Language Toolkit
|
17
|
+
require 'rltk/lexers/ebnf'
|
18
|
+
|
19
|
+
#######################
|
20
|
+
# Classes and Modules #
|
21
|
+
#######################
|
22
|
+
|
23
|
+
module RLTK
|
24
|
+
# An exception class that represents a problem with a context-free
|
25
|
+
# grammar's definition.
|
26
|
+
class GrammarError < StandardError; end
|
27
|
+
|
28
|
+
# The CFG class is used to represent context-free grammars. It is used by
|
29
|
+
# the RLTK::Parser class to represent the parser's grammar, but can also be
|
30
|
+
# used to manipulate arbitrary CFGs.
|
31
|
+
class CFG
|
32
|
+
|
33
|
+
# @return [Symbol] The grammar's starting symbol.
|
34
|
+
attr_reader :start_symbol
|
35
|
+
|
36
|
+
# This is used by the {CFG#production} method to wrap {CFG#clause}
|
37
|
+
# calls.
|
38
|
+
#
|
39
|
+
# @return [Symbol] The current left-hand side symbol.
|
40
|
+
attr_accessor :curr_lhs
|
41
|
+
|
42
|
+
#################
|
43
|
+
# Class Methods #
|
44
|
+
#################
|
45
|
+
|
46
|
+
# Tests to see if a symbol is a terminal symbol, as used by the CFG
|
47
|
+
# class.
|
48
|
+
#
|
49
|
+
# @param [Symbol] sym The symbol to test.
|
50
|
+
#
|
51
|
+
# @return [Boolean]
|
52
|
+
def self.is_terminal?(sym)
|
53
|
+
sym and (s = sym.to_s) == s.upcase
|
54
|
+
end
|
55
|
+
|
56
|
+
# Tests to see if a symbol is a non-terminal symbol, as used by the
|
57
|
+
# CFG class.
|
58
|
+
#
|
59
|
+
# @param [Symbol] sym The symbol to test.
|
60
|
+
#
|
61
|
+
# @return [Boolean]
|
62
|
+
def self.is_nonterminal?(sym)
|
63
|
+
sym and (s = sym.to_s) == s.downcase
|
64
|
+
end
|
65
|
+
|
66
|
+
####################
|
67
|
+
# Instance Methods #
|
68
|
+
####################
|
69
|
+
|
70
|
+
# Instantiates a new CFG object that uses *callback* to inform the
|
71
|
+
# programmer of the generation of new productions due to EBNF
|
72
|
+
# operators.
|
73
|
+
#
|
74
|
+
# @param [Proc] callback A Proc object to be called when EBNF operators are expanded.
|
75
|
+
def initialize(&callback)
|
76
|
+
@curr_lhs = nil
|
77
|
+
@callback = callback || Proc.new {}
|
78
|
+
@lexer = Lexers::EBNF.new
|
79
|
+
@production_counter = -1
|
80
|
+
@start_symbol = nil
|
81
|
+
@wrapper_symbol = nil
|
82
|
+
|
83
|
+
@productions_id = Hash.new
|
84
|
+
@productions_sym = Hash.new { |h, k| h[k] = [] }
|
85
|
+
@production_buffer = Array.new
|
86
|
+
|
87
|
+
@terms = Set.new([:EOS])
|
88
|
+
@nonterms = Set.new
|
89
|
+
|
90
|
+
@firsts = Hash.new
|
91
|
+
@follows = Hash.new { |h,k| h[k] = Array.new }
|
92
|
+
end
|
93
|
+
|
94
|
+
# Adds *production* to the appropriate internal data structures.
|
95
|
+
#
|
96
|
+
# @param [Production] production The production to add to the grammar.
|
97
|
+
#
|
98
|
+
# @return [void]
|
99
|
+
def add_production(production)
|
100
|
+
@productions_sym[production.lhs] << (@productions_id[production.id] = production)
|
101
|
+
|
102
|
+
production
|
103
|
+
end
|
104
|
+
|
105
|
+
# If the production already exists it will be returned. If it does not
|
106
|
+
# exist then it will be created and then returned.
|
107
|
+
#
|
108
|
+
# @param [Symbol] name The name of the production to add
|
109
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
110
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
def get_list_production(name, list_elements, separator = '')
|
114
|
+
if @nonterms.include?(name)
|
115
|
+
name
|
116
|
+
|
117
|
+
else
|
118
|
+
build_list_production(name, list_elements, separator)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
alias :get_list :get_list_production
|
122
|
+
|
123
|
+
# Builds a production representing a (possibly empty) list of tokens.
|
124
|
+
# These tokens may optionally be separated by a provided token. This
|
125
|
+
# function is used to eliminate the EBNF * operator.
|
126
|
+
#
|
127
|
+
# @param [Symbol] name The name of the production to add
|
128
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
129
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
130
|
+
#
|
131
|
+
# @return [void]
|
132
|
+
def build_list_production(name, list_elements, separator = '')
|
133
|
+
# Add the items for the following productions:
|
134
|
+
#
|
135
|
+
# name: | name_prime
|
136
|
+
|
137
|
+
name_prime = "#{name}_prime".to_sym
|
138
|
+
|
139
|
+
# 1st Production
|
140
|
+
production, _ = self.production(name, '')
|
141
|
+
@callback.call(:elp, :empty, production)
|
142
|
+
|
143
|
+
# 2nd Production
|
144
|
+
production, _ = self.production(name, name_prime)
|
145
|
+
@callback.call(:elp, :nonempty, production)
|
146
|
+
|
147
|
+
# Add remaining productions via nonempty_list helper.
|
148
|
+
self.nonempty_list(name_prime, list_elements, separator)
|
149
|
+
|
150
|
+
name
|
151
|
+
end
|
152
|
+
alias :list :build_list_production
|
153
|
+
|
154
|
+
# If the production already exists it will be returned. If it does not
|
155
|
+
# exist then it will be created and then returned.
|
156
|
+
#
|
157
|
+
# @param [Symbol] name The name of the production to add
|
158
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
159
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
160
|
+
#
|
161
|
+
# @return [void]
|
162
|
+
def get_nonempty_list_production(name, list_elements, separator = '')
|
163
|
+
if @nonterms.include?(name)
|
164
|
+
name
|
165
|
+
|
166
|
+
else
|
167
|
+
build_nonempty_list_production(name, list_elements, separator)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
alias :get_nonempty_list :get_nonempty_list_production
|
171
|
+
|
172
|
+
# Builds a production representing a non-empty list of tokens. These
|
173
|
+
# tokens may optionally be separated by a provided token. This
|
174
|
+
# function is used to eliminate the EBNF + operator.
|
175
|
+
#
|
176
|
+
# @param [Symbol] name The name of the production to add
|
177
|
+
# @param [String, Symbol, Array<String, Symbol>] list_elements Expression(s) that may appear in the list
|
178
|
+
# @param [Symbol, String] separator The list separator symbol or symbols
|
179
|
+
#
|
180
|
+
# @return [void]
|
181
|
+
def build_nonempty_list_production(name, list_elements, separator = '')
|
182
|
+
# Add the items for the following productions:
|
183
|
+
#
|
184
|
+
# If there is only one list element:
|
185
|
+
#
|
186
|
+
# name: list_element | name separator list_element
|
187
|
+
#
|
188
|
+
# else
|
189
|
+
#
|
190
|
+
# name: name_list_elements | name separator name_list_elements
|
191
|
+
#
|
192
|
+
# name_list_elements: #{list_elements.join('|')}
|
193
|
+
|
194
|
+
build_elements_productions = false
|
195
|
+
|
196
|
+
list_element_string =
|
197
|
+
if list_elements.is_a?(Array)
|
198
|
+
if list_elements.empty?
|
199
|
+
raise ArgumentError, 'Parameter list_elements must not be empty.'
|
200
|
+
|
201
|
+
elsif list_elements.length == 1
|
202
|
+
list_elements.first
|
203
|
+
|
204
|
+
else
|
205
|
+
build_elements_productions = true
|
206
|
+
"#{name}_list_elements"
|
207
|
+
end
|
208
|
+
else
|
209
|
+
list_elements
|
210
|
+
end
|
211
|
+
|
212
|
+
list_element_selected_string = list_element_string.to_s.split.map { |s| ".#{s}" }.join(' ')
|
213
|
+
|
214
|
+
# Single Element Production
|
215
|
+
production, _ = self.production(name, list_element_string)
|
216
|
+
@callback.call(:nelp, :single, production)
|
217
|
+
|
218
|
+
# Multiple Element Production
|
219
|
+
production, selections = self.production(name, ".#{name} #{separator} #{list_element_selected_string}")
|
220
|
+
@callback.call(:nelp, :multiple, production, selections)
|
221
|
+
|
222
|
+
if build_elements_productions
|
223
|
+
# List Element Productions
|
224
|
+
list_elements.each do |element|
|
225
|
+
production, _ = self.production(list_element_string, element)
|
226
|
+
@callback.call(:nelp, :elements, production)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
name
|
231
|
+
end
|
232
|
+
alias :nonempty_list :build_nonempty_list_production
|
233
|
+
|
234
|
+
# If the production already exists it will be returned. If it does not
|
235
|
+
# exist then it will be created and then returned.
|
236
|
+
#
|
237
|
+
# @param [Symbol] name The name of the production to add
|
238
|
+
# @param [String, Symbol, Array<String>] list_elements Expression(s) that may appear in the list
|
239
|
+
#
|
240
|
+
# @return [void]
|
241
|
+
def get_optional_production(name, list_elements)
|
242
|
+
if @nonterms.include?(name)
|
243
|
+
name
|
244
|
+
|
245
|
+
else
|
246
|
+
build_optional_production(name, list_elements)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
alias :get_optional :get_optional_production
|
250
|
+
|
251
|
+
# Build a production for an optional symbol. This is used to
|
252
|
+
# eliminate the EBNF ? operator.
|
253
|
+
#
|
254
|
+
# @param [Symbol] name The name for the new production
|
255
|
+
# @param [Symbol] opt_symbol Symbol to expand
|
256
|
+
#
|
257
|
+
# @return [Symbol] The value of the name argument
|
258
|
+
def build_optional_production(name, opt_symbol)
|
259
|
+
if not @productions_sym.has_key?(name)
|
260
|
+
# Add the items for the following productions:
|
261
|
+
#
|
262
|
+
# name: | opt_symbol
|
263
|
+
|
264
|
+
# Empty production.
|
265
|
+
production = self.add_production(Production.new(self.next_id, name, []))
|
266
|
+
@callback.call(:optional, :empty, production)
|
267
|
+
|
268
|
+
# Nonempty production
|
269
|
+
production = self.add_production(Production.new(self.next_id, name, [opt_symbol]))
|
270
|
+
@callback.call(:optional, :nonempty, production)
|
271
|
+
|
272
|
+
# Add the new symbol to the list of nonterminals.
|
273
|
+
@nonterms << name
|
274
|
+
end
|
275
|
+
|
276
|
+
name
|
277
|
+
end
|
278
|
+
alias :optional :build_optional_production
|
279
|
+
|
280
|
+
# Sets the EBNF callback to *callback*.
|
281
|
+
#
|
282
|
+
# @param [Proc] callback A Proc object to be called when EBNF operators are expanded and list productions are added.
|
283
|
+
#
|
284
|
+
# @return [void]
|
285
|
+
def callback(&callback)
|
286
|
+
@callback = callback if callback
|
287
|
+
|
288
|
+
nil
|
289
|
+
end
|
290
|
+
|
291
|
+
# This function MUST be called inside a CFG.production block. It will
|
292
|
+
# make a new production with the left-hand side specified by the
|
293
|
+
# CFG.production call's argument. This is the function that is
|
294
|
+
# responsible for removing EBNF symbols from the grammar.
|
295
|
+
#
|
296
|
+
# @param [String, Symbol] expression The right-hand side of a CFG production.
|
297
|
+
#
|
298
|
+
# @return [Array(Production, Array<Integer>)]
|
299
|
+
def clause(expression)
|
300
|
+
raise GrammarError, 'CFG#clause called outside of CFG#production block.' if not @curr_lhs
|
301
|
+
|
302
|
+
lhs = @curr_lhs.to_sym
|
303
|
+
rhs = Array.new
|
304
|
+
tokens = @lexer.lex(expression.to_s)
|
305
|
+
selections = Array.new
|
306
|
+
|
307
|
+
# Set this as the start symbol if there isn't one already
|
308
|
+
# defined.
|
309
|
+
@start_symbol ||= lhs
|
310
|
+
|
311
|
+
# Remove EBNF tokens and replace them with new productions.
|
312
|
+
symbol_count = 0
|
313
|
+
tokens.each_index do |i|
|
314
|
+
ttype0 = tokens[i].type
|
315
|
+
tvalue0 = tokens[i].value
|
316
|
+
|
317
|
+
if ttype0 == :TERM or ttype0 == :NONTERM
|
318
|
+
|
319
|
+
# Add this symbol to the correct collection.
|
320
|
+
(ttype0 == :TERM ? @terms : @nonterms) << tvalue0
|
321
|
+
|
322
|
+
if i + 1 < tokens.length
|
323
|
+
rhs <<
|
324
|
+
case tokens[i + 1].type
|
325
|
+
when :QUESTION then self.get_optional_production("#{tvalue0.downcase}_optional".to_sym, tvalue0)
|
326
|
+
when :STAR then self.get_list_production("#{tvalue0.downcase}_list".to_sym, tvalue0)
|
327
|
+
when :PLUS then self.get_nonempty_list_production("#{tvalue0.downcase}_nonempty_list".to_sym, tvalue0)
|
328
|
+
else tvalue0
|
329
|
+
end
|
330
|
+
else
|
331
|
+
rhs << tvalue0
|
332
|
+
end
|
333
|
+
|
334
|
+
symbol_count += 1
|
335
|
+
|
336
|
+
elsif ttype0 == :DOT
|
337
|
+
selections << symbol_count
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# Make the production.
|
342
|
+
@production_buffer << [(production = Production.new(self.next_id, lhs, rhs)), selections]
|
343
|
+
|
344
|
+
# Make sure the production symbol is collected.
|
345
|
+
@nonterms << lhs
|
346
|
+
|
347
|
+
# Add the new production to our collections.
|
348
|
+
self.add_production(production)
|
349
|
+
|
350
|
+
return [production, selections]
|
351
|
+
end
|
352
|
+
|
353
|
+
# This function calculates the *first* set of a series of tokens. It
|
354
|
+
# uses the {CFG#first_set} helper function to find the first set of
|
355
|
+
# individual symbols.
|
356
|
+
#
|
357
|
+
# @param [Symbol, Array<Symbol>] sentence Sentence to find the *first set* for.
|
358
|
+
#
|
359
|
+
# @return [Array<Symbol>] The *first set* for the given sentence.
|
360
|
+
def first_set(sentence)
|
361
|
+
if sentence.is_a?(Symbol)
|
362
|
+
first_set_prime(sentence)
|
363
|
+
|
364
|
+
elsif sentence.inject(true) { |m, sym| m and self.symbols.include?(sym) }
|
365
|
+
set0 = []
|
366
|
+
all_have_empty = true
|
367
|
+
|
368
|
+
sentence.each do |sym|
|
369
|
+
set0 |= (set1 = self.first_set(sym)) - [:'ɛ']
|
370
|
+
|
371
|
+
break if not (all_have_empty = set1.include?(:'ɛ'))
|
372
|
+
end
|
373
|
+
|
374
|
+
if all_have_empty then set0 + [:'ɛ'] else set0 end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# This function is responsible for calculating the *first* set of
|
379
|
+
# individual symbols.
|
380
|
+
#
|
381
|
+
# @param [Symbol] sym0 The symbol to find the *first set* of.
|
382
|
+
# @param [Array<Symbol>] seen_lh_sides Previously seen LHS symbols.
|
383
|
+
#
|
384
|
+
# @return [Array<Symbol>]
|
385
|
+
def first_set_prime(sym0, seen_lh_sides = [])
|
386
|
+
if self.symbols.include?(sym0)
|
387
|
+
# Memoize the result for later.
|
388
|
+
@firsts[sym0] ||=
|
389
|
+
if CFG::is_terminal?(sym0)
|
390
|
+
# If the symbol is a terminal, it is the only symbol in
|
391
|
+
# its follow set.
|
392
|
+
[sym0]
|
393
|
+
else
|
394
|
+
set0 = []
|
395
|
+
|
396
|
+
@productions_sym[sym0].each do |production|
|
397
|
+
if production.rhs.empty?
|
398
|
+
# If this is an empty production we should
|
399
|
+
# add the empty string to the First set.
|
400
|
+
set0 << :'ɛ'
|
401
|
+
else
|
402
|
+
all_have_empty = true
|
403
|
+
|
404
|
+
production.rhs.each do |sym1|
|
405
|
+
|
406
|
+
set1 = []
|
407
|
+
|
408
|
+
# Grab the First set for the current
|
409
|
+
# symbol in this production.
|
410
|
+
if not seen_lh_sides.include?(sym1)
|
411
|
+
set0 |= (set1 = first_set_prime(sym1, seen_lh_sides << sym1)) - [:'ɛ']
|
412
|
+
end
|
413
|
+
|
414
|
+
break if not (all_have_empty = set1.include?(:'ɛ'))
|
415
|
+
end
|
416
|
+
|
417
|
+
# Add the empty production if this production
|
418
|
+
# is all non-terminals that can be reduced to
|
419
|
+
# the empty string.
|
420
|
+
set0 << :'ɛ' if all_have_empty
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
set0.uniq
|
425
|
+
end
|
426
|
+
else
|
427
|
+
[]
|
428
|
+
end
|
429
|
+
end
|
430
|
+
private :first_set_prime
|
431
|
+
|
432
|
+
# Returns the *follow* set for a given symbol. The second argument is
|
433
|
+
# used to avoid infinite recursion when mutually recursive rules are
|
434
|
+
# encountered.
|
435
|
+
#
|
436
|
+
# @param [Symbol] sym0 The symbol to find the *follow set* for.
|
437
|
+
# @param [Array<Symbol>] seen_lh_sides Previously seen LHS symbols.
|
438
|
+
#
|
439
|
+
# @return [Array<Symbol>]
|
440
|
+
def follow_set(sym0, seen_lh_sides = [])
|
441
|
+
|
442
|
+
# Use the memoized set if possible.
|
443
|
+
return @follows[sym0] if @follows.has_key?(sym0)
|
444
|
+
|
445
|
+
if @nonterms.member? sym0
|
446
|
+
set0 = []
|
447
|
+
|
448
|
+
# Add EOS to the start symbol's follow set.
|
449
|
+
set0 << :EOS if sym0 == @start_symbol
|
450
|
+
|
451
|
+
@productions_id.values.each do |production|
|
452
|
+
production.rhs.each_with_index do |sym1, i|
|
453
|
+
if i + 1 < production.rhs.length
|
454
|
+
if sym0 == sym1
|
455
|
+
set0 |= (set1 = self.first_set(production.rhs[(i + 1)..-1])) - [:'ɛ']
|
456
|
+
|
457
|
+
set0 |= self.follow_set(production.lhs) if set1.include?(:'ɛ')
|
458
|
+
end
|
459
|
+
elsif sym0 != production.lhs and sym0 == sym1 and not seen_lh_sides.include?(production.lhs)
|
460
|
+
set0 |= self.follow_set(production.lhs, seen_lh_sides << production.lhs)
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
if seen_lh_sides.empty? or not set0.empty?
|
466
|
+
# Memoize the result for later.
|
467
|
+
@follows[sym0] |= set0
|
468
|
+
else
|
469
|
+
set0
|
470
|
+
end
|
471
|
+
else
|
472
|
+
[]
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# @return [Integer] ID for the next production to be defined.
|
477
|
+
def next_id
|
478
|
+
@production_counter += 1
|
479
|
+
end
|
480
|
+
|
481
|
+
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
482
|
+
def nonterms
|
483
|
+
@nonterms.clone
|
484
|
+
end
|
485
|
+
|
486
|
+
# Builds a new production with the left-hand side value of *symbol*.
|
487
|
+
# If *expression* is specified it is take as the right-hand side of
|
488
|
+
# production. If *expression* is nil then *block* is evaluated, and
|
489
|
+
# expected to make one or more calls to {CFG#clause}.
|
490
|
+
#
|
491
|
+
# @param [Symbol] symbol The left-hand side of a production
|
492
|
+
# @param [String, Symbol] expression The right-hand side of a production
|
493
|
+
# @param [Proc] block Optional block for defining production clauses
|
494
|
+
#
|
495
|
+
# @return [Production, Array<Production>] A single production if called with an expression;
|
496
|
+
# an array of productions otherwise
|
497
|
+
def production(symbol, expression = nil, &block)
|
498
|
+
@production_buffer = Array.new
|
499
|
+
|
500
|
+
prev_lhs = @curr_lhs
|
501
|
+
@curr_lhs = symbol
|
502
|
+
|
503
|
+
ret_val =
|
504
|
+
if expression
|
505
|
+
self.clause(expression)
|
506
|
+
else
|
507
|
+
self.instance_exec(&block)
|
508
|
+
|
509
|
+
@production_buffer.clone
|
510
|
+
end
|
511
|
+
|
512
|
+
@curr_lhs = prev_lhs
|
513
|
+
return ret_val
|
514
|
+
end
|
515
|
+
|
516
|
+
# If *by* is :sym, returns a hash of the grammar's productions, using
|
517
|
+
# the productions' left-hand side symbol as the key. If *by* is :id
|
518
|
+
# an array of productions is returned in the order of their
|
519
|
+
# definition.
|
520
|
+
#
|
521
|
+
# @param [:sym, :id] by The way in which productions should be returned.
|
522
|
+
#
|
523
|
+
# @return [Array<Production>, Hash{Symbol => Production}]
|
524
|
+
def productions(by = :sym)
|
525
|
+
if by == :sym
|
526
|
+
@productions_sym
|
527
|
+
elsif by == :id
|
528
|
+
@productions_id
|
529
|
+
else
|
530
|
+
nil
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
# Sets the start symbol for this grammar.
|
535
|
+
#
|
536
|
+
# @param [Symbol] symbol The new start symbol.
|
537
|
+
#
|
538
|
+
# @return [Symbol]
|
539
|
+
def start(symbol)
|
540
|
+
if not CFG::is_nonterminal?(symbol)
|
541
|
+
raise GrammarError, 'Start symbol must be a non-terminal.'
|
542
|
+
end
|
543
|
+
|
544
|
+
@start_symbol = symbol
|
545
|
+
end
|
546
|
+
|
547
|
+
# @return [Array<Symbol>] All symbols used in the grammar's definition.
|
548
|
+
def symbols
|
549
|
+
self.terms + self.nonterms
|
550
|
+
end
|
551
|
+
|
552
|
+
# @return [Set<Symbol>] All terminal symbols used in the grammar's definition.
|
553
|
+
def terms
|
554
|
+
@terms.clone
|
555
|
+
end
|
556
|
+
|
557
|
+
# Oddly enough, the Production class represents a production in a
|
558
|
+
# context-free grammar.
|
559
|
+
class Production
|
560
|
+
# @return [Integer] ID of this production.
|
561
|
+
attr_reader :id
|
562
|
+
|
563
|
+
# @return [Symbol] Left-hand side of this production.
|
564
|
+
attr_reader :lhs
|
565
|
+
|
566
|
+
# @return [Array<Symbol>] Right-hand side of this production.
|
567
|
+
attr_reader :rhs
|
568
|
+
|
569
|
+
# Instantiates a new Production object with the specified ID,
|
570
|
+
# and left- and right-hand sides.
|
571
|
+
#
|
572
|
+
# @param [Integer] id ID number of this production.
|
573
|
+
# @param [Symbol] lhs Left-hand side of the production.
|
574
|
+
# @param [Array<Symbol>] rhs Right-hand side of the production.
|
575
|
+
def initialize(id, lhs, rhs)
|
576
|
+
@id = id
|
577
|
+
@lhs = lhs
|
578
|
+
@rhs = rhs
|
579
|
+
end
|
580
|
+
|
581
|
+
# Comparese on production to another. Returns true only if the
|
582
|
+
# left- and right- hand sides match.
|
583
|
+
#
|
584
|
+
# @param [Production] other Another production to compare to.
|
585
|
+
#
|
586
|
+
# @return [Boolean]
|
587
|
+
def ==(other)
|
588
|
+
self.lhs == other.lhs and self.rhs == other.rhs
|
589
|
+
end
|
590
|
+
|
591
|
+
# @return [Production] A new copy of this production.
|
592
|
+
def copy
|
593
|
+
Production.new(@id, @lhs, @rhs.clone)
|
594
|
+
end
|
595
|
+
|
596
|
+
# @return [Symbol] The last terminal in the right-hand side of the production.
|
597
|
+
def last_terminal
|
598
|
+
@rhs.inject(nil) { |m, sym| if CFG::is_terminal?(sym) then sym else m end }
|
599
|
+
end
|
600
|
+
|
601
|
+
# @return [Item] An Item based on this production.
|
602
|
+
def to_item
|
603
|
+
Item.new(0, @id, @lhs, @rhs)
|
604
|
+
end
|
605
|
+
|
606
|
+
# Returns a string representation of this production.
|
607
|
+
#
|
608
|
+
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|
609
|
+
#
|
610
|
+
# @return [String]
|
611
|
+
def to_s(padding = 0)
|
612
|
+
"#{format("%-#{padding}s", @lhs)} -> #{@rhs.empty? ? 'ɛ' : @rhs.map { |s| s.to_s }.join(' ')}"
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
# The Item class represents a CFG production with dot in it.
|
617
|
+
class Item < Production
|
618
|
+
# @return [Integer] Index of the next symbol in this item.
|
619
|
+
attr_reader :dot
|
620
|
+
|
621
|
+
# Instantiates a new Item object with a dot located before the
|
622
|
+
# symbol at index *dot* of the right-hand side. The remaining
|
623
|
+
# arguments (*args*) should be as specified by
|
624
|
+
# {Production#initialize}.
|
625
|
+
#
|
626
|
+
# @param [Integer] dot Location of the dot in this Item.
|
627
|
+
# @param [Array<Object>] args (see {Production#initialize})
|
628
|
+
def initialize(dot, *args)
|
629
|
+
super(*args)
|
630
|
+
|
631
|
+
# The Dot indicates the NEXT symbol to be read.
|
632
|
+
@dot = dot
|
633
|
+
end
|
634
|
+
|
635
|
+
# Compares two items.
|
636
|
+
#
|
637
|
+
# @param [Item] other Another item to compare to.
|
638
|
+
#
|
639
|
+
# @return [Boolean]
|
640
|
+
def ==(other)
|
641
|
+
self.dot == other.dot and self.lhs == other.lhs and self.rhs == other.rhs
|
642
|
+
end
|
643
|
+
|
644
|
+
# Moves the items dot forward by one if the end of the right-hand
|
645
|
+
# side hasn't already been reached.
|
646
|
+
#
|
647
|
+
# @return [Integer, nil]
|
648
|
+
def advance
|
649
|
+
if @dot < @rhs.length
|
650
|
+
@dot += 1
|
651
|
+
end
|
652
|
+
end
|
653
|
+
|
654
|
+
# Tests to see if the dot is at the end of the right-hand side.
|
655
|
+
#
|
656
|
+
# @return [Boolean]
|
657
|
+
def at_end?
|
658
|
+
@dot == @rhs.length
|
659
|
+
end
|
660
|
+
|
661
|
+
# @return [Item] A new copy of this item.
|
662
|
+
def copy
|
663
|
+
Item.new(@dot, @id, @lhs, @rhs.clone)
|
664
|
+
end
|
665
|
+
|
666
|
+
# Returns the symbol located after the dot.
|
667
|
+
#
|
668
|
+
# @return [Symbol] Symbol located after the dot (at the index indicated by the {#dot} attribute).
|
669
|
+
def next_symbol
|
670
|
+
@rhs[@dot]
|
671
|
+
end
|
672
|
+
|
673
|
+
# Returns a string representation of this item.
|
674
|
+
#
|
675
|
+
# @param [Integer] padding The ammount of padding spaces to add to the beginning of the string.
|
676
|
+
#
|
677
|
+
# @return [String]
|
678
|
+
def to_s(padding = 0)
|
679
|
+
"#{format("%-#{padding}s", @lhs)} -> #{@rhs.map { |s| s.to_s }.insert(@dot, '·').join(' ') }"
|
680
|
+
end
|
681
|
+
end
|
682
|
+
end
|
683
|
+
end
|