antlr3 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
data/lib/antlr3/token.rb
ADDED
@@ -0,0 +1,592 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
=begin LICENSE
|
5
|
+
|
6
|
+
[The "BSD licence"]
|
7
|
+
Copyright (c) 2009 Kyle Yetter
|
8
|
+
All rights reserved.
|
9
|
+
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
11
|
+
modification, are permitted provided that the following conditions
|
12
|
+
are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright
|
17
|
+
notice, this list of conditions and the following disclaimer in the
|
18
|
+
documentation and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
23
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
24
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
25
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
26
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
27
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
28
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
29
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
31
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
module ANTLR3
|
36
|
+
|
37
|
+
=begin rdoc ANTLR3::Token
|
38
|
+
|
39
|
+
At a minimum, tokens are data structures that bind together a chunk of text and
|
40
|
+
a corresponding type symbol, which categorizes/characterizes the content of the
|
41
|
+
text. Tokens also usually carry information about their location in the input,
|
42
|
+
such as absolute character index, line number, and position within the line (or
|
43
|
+
column).
|
44
|
+
|
45
|
+
Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
|
46
|
+
categorization that groups things on a larger scale. Parsers will usually ignore
|
47
|
+
tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
|
48
|
+
like comment and white space huddled together with neighboring tokens,
|
49
|
+
effectively ignoring them without discarding them.
|
50
|
+
|
51
|
+
ANTLR tokens also keep a reference to the source stream from which they
|
52
|
+
originated. Token streams will also provide an index value for the token, which
|
53
|
+
indicates the position of the token relative to other tokens in the stream,
|
54
|
+
starting at zero. For example, the 22nd token pulled from a lexer by
|
55
|
+
CommonTokenStream will have index value 21.
|
56
|
+
|
57
|
+
== Token as an Interface
|
58
|
+
|
59
|
+
This library provides a token implementation (see CommonToken). Additionally,
|
60
|
+
you may write your own token class as long as you provide methods that give
|
61
|
+
access to the attributes expected by a token. Even though most of the ANTLR
|
62
|
+
library tries to use duck-typing techniques instead of pure object-oriented type
|
63
|
+
checking, it's a good idea to include this ANTLR3::Token into your customized
|
64
|
+
token class.
|
65
|
+
|
66
|
+
=end
|
67
|
+
|
68
|
+
module Token
|
69
|
+
include ANTLR3::Constants
|
70
|
+
include Comparable
|
71
|
+
|
72
|
+
# the token's associated chunk of text
|
73
|
+
attr_accessor :text
|
74
|
+
|
75
|
+
# the integer value associated with the token's type
|
76
|
+
attr_accessor :type
|
77
|
+
|
78
|
+
# the text's starting line number within the source (indexed starting at 1)
|
79
|
+
attr_accessor :line
|
80
|
+
|
81
|
+
# the text's starting position in the line within the source (indexed starting at 0)
|
82
|
+
attr_accessor :column
|
83
|
+
|
84
|
+
# the integer value of the channel to which the token is assigned
|
85
|
+
attr_accessor :channel
|
86
|
+
|
87
|
+
# the index of the token with respect to other the other tokens produced during lexing
|
88
|
+
attr_accessor :index
|
89
|
+
|
90
|
+
# a reference to the input stream from which the token was extracted
|
91
|
+
attr_accessor :input
|
92
|
+
|
93
|
+
# the absolute character index in the input at which the text starts
|
94
|
+
attr_accessor :start
|
95
|
+
|
96
|
+
# the absolute character index in the input at which the text ends
|
97
|
+
attr_accessor :stop
|
98
|
+
|
99
|
+
alias :input_stream :input
|
100
|
+
alias :input_stream= :input=
|
101
|
+
alias :token_index :index
|
102
|
+
alias :token_index= :index=
|
103
|
+
|
104
|
+
def =~ obj
|
105
|
+
case obj
|
106
|
+
when Integer then type == obj
|
107
|
+
when Symbol then name.to_sym == obj
|
108
|
+
when Regexp then obj =~ text
|
109
|
+
when String then text == obj
|
110
|
+
else super
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def <=> tk2
|
115
|
+
index <=> tk2.index
|
116
|
+
end
|
117
|
+
|
118
|
+
def initialize_copy(orig)
|
119
|
+
self.index = -1
|
120
|
+
self.type = orig.type
|
121
|
+
self.channel = orig.channel
|
122
|
+
self.text = orig.text.clone if orig.text
|
123
|
+
self.start = orig.start
|
124
|
+
self.stop = orig.stop
|
125
|
+
self.line = orig.line
|
126
|
+
self.column = orig.column
|
127
|
+
self.input = orig.input
|
128
|
+
end
|
129
|
+
|
130
|
+
def concrete?
|
131
|
+
input && start && stop ? true : false
|
132
|
+
end
|
133
|
+
|
134
|
+
def imaginary?
|
135
|
+
input && start && stop ? false : true
|
136
|
+
end
|
137
|
+
|
138
|
+
def name
|
139
|
+
token_name(type)
|
140
|
+
end
|
141
|
+
|
142
|
+
def hidden?
|
143
|
+
channel == HIDDEN_CHANNEL
|
144
|
+
end
|
145
|
+
|
146
|
+
def source_text
|
147
|
+
concrete? ? input.substring(start, stop) : text
|
148
|
+
end
|
149
|
+
|
150
|
+
def hide!
|
151
|
+
self.channel = HIDDEN_CHANNEL
|
152
|
+
end
|
153
|
+
|
154
|
+
def range
|
155
|
+
start..stop rescue nil
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_i
|
159
|
+
index.to_i
|
160
|
+
end
|
161
|
+
|
162
|
+
def to_s
|
163
|
+
text.to_s
|
164
|
+
end
|
165
|
+
|
166
|
+
def inspect
|
167
|
+
text_inspect = text ? '[%p] ' % text : ' '
|
168
|
+
text_position = line != 0 ? '@ line %s col %s ' % [line, column] : ''
|
169
|
+
stream_position = start ? '(%s..%s)' % [start, stop] : ''
|
170
|
+
|
171
|
+
front = index != -1 ? index.to_s << ' ' : ''
|
172
|
+
rep = front << name << text_inspect <<
|
173
|
+
text_position << stream_position
|
174
|
+
rep.strip!
|
175
|
+
channel == DEFAULT_CHANNEL or rep << " (#{channel.to_s})"
|
176
|
+
return(rep)
|
177
|
+
end
|
178
|
+
|
179
|
+
def pretty_print(printer)
|
180
|
+
printer.text( inspect )
|
181
|
+
end
|
182
|
+
|
183
|
+
private
|
184
|
+
|
185
|
+
def token_name(type)
|
186
|
+
BUILT_IN_TOKEN_NAMES[type]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
CommonToken = Struct.new(:type, :channel, :text, :input, :start,
|
191
|
+
:stop, :index, :line, :column)
|
192
|
+
|
193
|
+
=begin rdoc ANTLR3::CommonToken
|
194
|
+
|
195
|
+
The base class for the standard implementation of Token. It is implemented as a
|
196
|
+
simple Struct as tokens are basically simple data structures binding together a
|
197
|
+
bunch of different information and Structs are slightly faster than a standard
|
198
|
+
Object with accessor methods implementation.
|
199
|
+
|
200
|
+
By default, ANTLR generated ruby code will provide a customized subclass of
|
201
|
+
CommonToken to track token-type names efficiently for debugging, inspection, and
|
202
|
+
general utility. Thus code generated for a standard combo lexer-parser grammar
|
203
|
+
named XYZ will have a base module named XYZ and a customized CommonToken
|
204
|
+
subclass named XYZ::Token.
|
205
|
+
|
206
|
+
Here is the token structure attribute list in order:
|
207
|
+
|
208
|
+
* <tt>type</tt>
|
209
|
+
* <tt>channel</tt>
|
210
|
+
* <tt>text</tt>
|
211
|
+
* <tt>input</tt>
|
212
|
+
* <tt>start</tt>
|
213
|
+
* <tt>stop</tt>
|
214
|
+
* <tt>index</tt>
|
215
|
+
* <tt>line</tt>
|
216
|
+
* <tt>column</tt>
|
217
|
+
|
218
|
+
=end
|
219
|
+
|
220
|
+
class CommonToken
|
221
|
+
include Token
|
222
|
+
DEFAULT_VALUES = {
|
223
|
+
:channel => DEFAULT_CHANNEL,
|
224
|
+
:index => -1,
|
225
|
+
:line => 0,
|
226
|
+
:column => -1
|
227
|
+
}.freeze
|
228
|
+
|
229
|
+
def self.token_name(type)
|
230
|
+
BUILT_IN_TOKEN_NAMES[type]
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.create(fields = {})
|
234
|
+
fields = DEFAULT_VALUES.merge(fields)
|
235
|
+
args = members.map { |name| fields[name.to_sym] }
|
236
|
+
new(*args)
|
237
|
+
end
|
238
|
+
|
239
|
+
# allows you to make a copy of a token with a different class
|
240
|
+
def self.from_token(token)
|
241
|
+
new(token.type, token.channel, token.text ? token.text.clone : nil,
|
242
|
+
token.input, token.start, token.stop, -1, token.line, token.column)
|
243
|
+
end
|
244
|
+
|
245
|
+
def initialize(type = nil, channel = DEFAULT_CHANNEL, text = nil,
|
246
|
+
input = nil, start = nil, stop = nil, index = -1,
|
247
|
+
line = 0, column = -1)
|
248
|
+
super
|
249
|
+
block_given? and yield(self)
|
250
|
+
self.text.nil? && self.start && self.stop and
|
251
|
+
self.text = self.input.substring(self.start, self.stop)
|
252
|
+
end
|
253
|
+
|
254
|
+
alias :input_stream :input
|
255
|
+
alias :input_stream= :input=
|
256
|
+
alias :token_index :index
|
257
|
+
alias :token_index= :index=
|
258
|
+
end
|
259
|
+
|
260
|
+
Constants::EOF_TOKEN = CommonToken.new(EOF).freeze
|
261
|
+
Constants::INVALID_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
|
262
|
+
Constants::SKIP_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
|
263
|
+
|
264
|
+
=begin rdoc ANTLR3::TokenSource
|
265
|
+
|
266
|
+
TokenSource is a simple mixin module that demands an
|
267
|
+
implementation of the method #next_token. In return, it
|
268
|
+
defines methods #next and #each, which provide basic
|
269
|
+
iterator methods for token generators. Furthermore, it
|
270
|
+
includes Enumerable to provide the standard Ruby iteration
|
271
|
+
methods to token generators, like lexers.
|
272
|
+
|
273
|
+
=end
|
274
|
+
|
275
|
+
module TokenSource
|
276
|
+
include Constants
|
277
|
+
include Enumerable
|
278
|
+
extend ClassMacros
|
279
|
+
|
280
|
+
abstract :next_token
|
281
|
+
|
282
|
+
def next
|
283
|
+
token = next_token()
|
284
|
+
raise StopIteration if token.nil? or token.type == EOF
|
285
|
+
return token
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_stream(options = {})
|
289
|
+
if block_given?
|
290
|
+
CommonTokenStream.new(self, options) { |t| yield(t) }
|
291
|
+
else
|
292
|
+
CommonTokenStream.new(self, options)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def each
|
297
|
+
block_given? or return enum_for(:each)
|
298
|
+
loop { yield(self.next) }
|
299
|
+
rescue StopIteration
|
300
|
+
return self
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
|
305
|
+
=begin rdoc ANTLR3::TokenFactory
|
306
|
+
|
307
|
+
There are a variety of different entities throughout the ANTLR runtime library
|
308
|
+
that need to create token objects This module serves as a mixin that provides
|
309
|
+
methods for constructing tokens.
|
310
|
+
|
311
|
+
Including this module provides a +token_class+ attribute. Instance of the
|
312
|
+
including class can create tokens using the token class (which defaults to
|
313
|
+
ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
|
314
|
+
that can be called without any parameters and the token objects are expected to
|
315
|
+
have the standard token attributes (see ANTLR3::Token).
|
316
|
+
|
317
|
+
=end
|
318
|
+
|
319
|
+
module TokenFactory
|
320
|
+
attr_writer :token_class
|
321
|
+
def token_class
|
322
|
+
@token_class ||= begin
|
323
|
+
self.class.token_class rescue
|
324
|
+
self::Token rescue
|
325
|
+
ANTLR3::CommonToken
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def create_token(*args)
|
330
|
+
if block_given?
|
331
|
+
token_class.new(*args) do |*targs|
|
332
|
+
yield(*targs)
|
333
|
+
end
|
334
|
+
else
|
335
|
+
token_class.new(*args)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
|
341
|
+
=begin rdoc ANTLR3::TokenScheme
|
342
|
+
|
343
|
+
TokenSchemes exist to handle the problem of defining token types as integer
|
344
|
+
values while maintaining meaningful text names for the types. They are
|
345
|
+
dynamically defined modules that map integer values to constants with token-type
|
346
|
+
names.
|
347
|
+
|
348
|
+
---
|
349
|
+
|
350
|
+
Fundamentally, tokens exist to take a chunk of text and identify it as belonging
|
351
|
+
to some category, like "VARIABLE" or "INTEGER". In code, the category is
|
352
|
+
represented by an integer -- some arbitrary value that ANTLR will decide to use
|
353
|
+
as it is creating the recognizer. The purpose of using an integer (instead of
|
354
|
+
say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
|
355
|
+
token's type falls within a range, which is not possible with symbols.
|
356
|
+
|
357
|
+
The downside of token types being represented as integers is that a developer
|
358
|
+
needs to be able to reference the unknown type value by name in action code.
|
359
|
+
Furthermore, code that references the type by name and tokens that can be
|
360
|
+
inspected with names in place of type values are more meaningful to a developer.
|
361
|
+
|
362
|
+
Since ANTLR requires token type names to follow capital-letter naming
|
363
|
+
conventions, defining types as named constants of the recognizer class resolves
|
364
|
+
the problem of referencing type values by name. Thus, a token type like
|
365
|
+
``VARIABLE'' can be represented by a number like 5 and referenced within code by
|
366
|
+
+VARIABLE+. However, when a recognizer creates tokens, the name of the token's
|
367
|
+
type cannot be seen without using the data defined in the recognizer.
|
368
|
+
|
369
|
+
Of course, tokens could be defined with a name attribute that could be specified
|
370
|
+
when tokens are created. However, doing so would make tokens take up more space
|
371
|
+
than necessary, as well as making it difficult to change the type of a token
|
372
|
+
while maintaining a correct name value.
|
373
|
+
|
374
|
+
TokenSchemes exist as a technique to manage token type referencing and name
|
375
|
+
extraction. They:
|
376
|
+
|
377
|
+
1. keep token type references clear and understandable in recognizer code
|
378
|
+
2. permit access to a token's type-name independently of recognizer objects
|
379
|
+
3. allow multiple classes to share the same token information
|
380
|
+
|
381
|
+
== Building Token Schemes
|
382
|
+
|
383
|
+
TokenScheme is a subclass of Module. Thus, it has the method
|
384
|
+
<tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
|
385
|
+
will evaluate the block in the context of the scheme (module), similarly to
|
386
|
+
Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
|
387
|
+
module with the following actions:
|
388
|
+
|
389
|
+
1. define a customized token class (more on that below)
|
390
|
+
2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
|
391
|
+
3. dynamically populate the new scheme module with a couple instance methods
|
392
|
+
4. include ANTLR3::Constants in the new scheme module
|
393
|
+
|
394
|
+
As TokenScheme the class functions as a metaclass, figuring out some of the
|
395
|
+
scoping behavior can be mildly confusing if you're trying to get a handle of the
|
396
|
+
entity for your own purposes. Remember that all of the instance methods of
|
397
|
+
TokenScheme function as module-level methods of TokenScheme instances, ala
|
398
|
+
+attr_accessor+ and friends.
|
399
|
+
|
400
|
+
<tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
|
401
|
+
definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
|
402
|
+
essentially like <tt>Module#const_set</tt>, except it forbids constant
|
403
|
+
overwriting (which would mess up recognizer code fairly badly) and adds an
|
404
|
+
inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
|
405
|
+
<tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
|
406
|
+
types with a hash pairing names to values.
|
407
|
+
|
408
|
+
<tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
|
409
|
+
type-to-name definition. This is particularly useful for the anonymous tokens
|
410
|
+
that ANTLR generates for literal strings in the grammar specification. For
|
411
|
+
example, if you refer to the literal <tt>'='</tt> in some parser rule in your
|
412
|
+
grammar, ANTLR will add a lexer rule for the literal and give the token a name
|
413
|
+
like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
|
414
|
+
Since this is pretty meaningless to a developer, generated code should add a
|
415
|
+
special name definition for type value <tt><i>x</i></tt> with the string
|
416
|
+
<tt>"'='"</tt>.
|
417
|
+
|
418
|
+
=== Sample TokenScheme Construction
|
419
|
+
|
420
|
+
TokenData = ANTLR3::TokenScheme.new do
|
421
|
+
define_tokens(
|
422
|
+
:INT => 4,
|
423
|
+
:ID => 6,
|
424
|
+
:T__5 => 5,
|
425
|
+
:WS => 7
|
426
|
+
)
|
427
|
+
|
428
|
+
# note the self:: scoping below is due to the fact that
|
429
|
+
# ruby lexically-scopes constant names instead of
|
430
|
+
# looking up in the current scope
|
431
|
+
register_name(self::T__5, "'='")
|
432
|
+
end
|
433
|
+
|
434
|
+
TokenData::ID # => 6
|
435
|
+
TokenData::T__5 # => 5
|
436
|
+
TokenData.token_name(4) # => 'INT'
|
437
|
+
TokenData.token_name(5) # => "'='"
|
438
|
+
|
439
|
+
class ARecognizerOrSuch < ANTLR3::Parser
|
440
|
+
include TokenData
|
441
|
+
ID # => 6
|
442
|
+
end
|
443
|
+
|
444
|
+
== Custom Token Classes and Relationship with Tokens
|
445
|
+
|
446
|
+
When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
|
447
|
+
and assigned it to the constant name +Token+. This token class will both include
|
448
|
+
and extend the scheme module. Since token schemes define the private instance
|
449
|
+
method <tt>token_name(type)</tt>, instances of the token class are now able to
|
450
|
+
provide their type names. The Token method <tt>name</tt> uses the
|
451
|
+
<tt>token_name</tt> method to provide the type name as if it were a simple
|
452
|
+
attribute without storing the name itself.
|
453
|
+
|
454
|
+
When a TokenScheme is included in a recognizer class, the class will now have
|
455
|
+
the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
|
456
|
+
and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
|
457
|
+
Token. Thus, when recognizers need to manufacture tokens, instead of using the
|
458
|
+
generic CommonToken class, they can create tokens using the customized Token
|
459
|
+
class provided by the token scheme.
|
460
|
+
|
461
|
+
If you need to use a token class other than CommonToken, you can pass the class
|
462
|
+
as a parameter to TokenScheme.new, which will be used in place of the
|
463
|
+
dynamically-created CommonToken subclass.
|
464
|
+
|
465
|
+
=end
|
466
|
+
|
467
|
+
class TokenScheme < ::Module
|
468
|
+
include TokenFactory
|
469
|
+
|
470
|
+
def self.new(tk_class = nil, &body)
|
471
|
+
super() do
|
472
|
+
tk_class ||= Class.new(::ANTLR3::CommonToken)
|
473
|
+
self.token_class = tk_class
|
474
|
+
|
475
|
+
const_set(:TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone)
|
476
|
+
|
477
|
+
scheme = self
|
478
|
+
define_method(:token_scheme) { scheme }
|
479
|
+
define_method(:token_names) { scheme::TOKEN_NAMES }
|
480
|
+
define_method(:token_name) do |type|
|
481
|
+
begin
|
482
|
+
token_names[type] or super
|
483
|
+
rescue NoMethodError
|
484
|
+
::ANTLR3::CommonToken.token_name(type)
|
485
|
+
end
|
486
|
+
end
|
487
|
+
module_function :token_name, :token_names
|
488
|
+
|
489
|
+
include ANTLR3::Constants
|
490
|
+
|
491
|
+
body and module_eval(&body)
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def included(mod)
|
496
|
+
super
|
497
|
+
mod.extend(self)
|
498
|
+
end
|
499
|
+
private :included
|
500
|
+
|
501
|
+
def define_tokens(token_map = {})
|
502
|
+
for token_name, token_value in token_map
|
503
|
+
define_token(token_name, token_value)
|
504
|
+
end
|
505
|
+
return self
|
506
|
+
end
|
507
|
+
|
508
|
+
def define_token(name, value)
|
509
|
+
if const_defined?(name)
|
510
|
+
current_value = const_get(name)
|
511
|
+
unless current_value == value
|
512
|
+
error = NameError.new("new token type definition ``#{name} = #{value}'' conflicts " <<
|
513
|
+
"with existing type definition ``#{name} = #{current_value}''", name)
|
514
|
+
raise error
|
515
|
+
end
|
516
|
+
else
|
517
|
+
const_set(name, value)
|
518
|
+
end
|
519
|
+
register_name(value, name) unless built_in_type?(value)
|
520
|
+
return self
|
521
|
+
end
|
522
|
+
|
523
|
+
def register_names(*names)
|
524
|
+
if names.length == 1 and Hash === names.first
|
525
|
+
names.first.each do |value, name|
|
526
|
+
register_name(value, name)
|
527
|
+
end
|
528
|
+
else
|
529
|
+
names.each_with_index do |name, i|
|
530
|
+
type_value = Constants::MIN_TOKEN_TYPE + i
|
531
|
+
register_name(type_value, name)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
def register_name(type_value, name)
|
537
|
+
name = name.to_s.freeze
|
538
|
+
if token_names.has_key?(type_value)
|
539
|
+
current_name = token_names[type_value]
|
540
|
+
current_name == name and return name
|
541
|
+
|
542
|
+
if current_name == "T__#{type_value}"
|
543
|
+
# only an anonymous name is registered -- upgrade the name to the full literal name
|
544
|
+
token_names[type_value] = name
|
545
|
+
elsif name == "T__#{type_value}"
|
546
|
+
# ignore name downgrade from literal to anonymous constant
|
547
|
+
return current_name
|
548
|
+
else
|
549
|
+
error = NameError.new(
|
550
|
+
"attempted assignment of token type #{type_value}" <<
|
551
|
+
" to name #{name} conflicts with existing name #{current_name}", name
|
552
|
+
)
|
553
|
+
raise error
|
554
|
+
end
|
555
|
+
else
|
556
|
+
token_names[type_value] = name.to_s.freeze
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
def built_in_type?(type_value)
|
561
|
+
Constants::BUILT_IN_TOKEN_NAMES.fetch(type_value, false) and true
|
562
|
+
end
|
563
|
+
|
564
|
+
def token_defined?(name_or_value)
|
565
|
+
case value
|
566
|
+
when Integer then token_names.has_key?(name_or_value)
|
567
|
+
else const_defined?(name_or_value.to_s)
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
def [](name_or_value)
|
572
|
+
case name_or_value
|
573
|
+
when Integer then token_names.fetch(name_or_value, nil)
|
574
|
+
else const_get(name_or_value.to_s) rescue token_names.index(name_or_value)
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
def token_class
|
579
|
+
self::Token
|
580
|
+
end
|
581
|
+
|
582
|
+
def token_class=(klass)
|
583
|
+
Class === klass or raise(TypeError, "token_class must be a Class")
|
584
|
+
Util.silence_warnings do
|
585
|
+
klass < self or klass.send(:include, self)
|
586
|
+
const_set(:Token, klass)
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
end
|
591
|
+
|
592
|
+
end
|