antlr3 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
data/lib/antlr3/token.rb
ADDED
@@ -0,0 +1,592 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
=begin LICENSE
|
5
|
+
|
6
|
+
[The "BSD licence"]
|
7
|
+
Copyright (c) 2009 Kyle Yetter
|
8
|
+
All rights reserved.
|
9
|
+
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
11
|
+
modification, are permitted provided that the following conditions
|
12
|
+
are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright
|
17
|
+
notice, this list of conditions and the following disclaimer in the
|
18
|
+
documentation and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
23
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
24
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
25
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
26
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
27
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
28
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
29
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
31
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
module ANTLR3
|
36
|
+
|
37
|
+
=begin rdoc ANTLR3::Token
|
38
|
+
|
39
|
+
At a minimum, tokens are data structures that bind together a chunk of text and
|
40
|
+
a corresponding type symbol, which categorizes/characterizes the content of the
|
41
|
+
text. Tokens also usually carry information about their location in the input,
|
42
|
+
such as absolute character index, line number, and position within the line (or
|
43
|
+
column).
|
44
|
+
|
45
|
+
Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
|
46
|
+
categorization that groups things on a larger scale. Parsers will usually ignore
|
47
|
+
tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
|
48
|
+
like comment and white space huddled together with neighboring tokens,
|
49
|
+
effectively ignoring them without discarding them.
|
50
|
+
|
51
|
+
ANTLR tokens also keep a reference to the source stream from which they
|
52
|
+
originated. Token streams will also provide an index value for the token, which
|
53
|
+
indicates the position of the token relative to other tokens in the stream,
|
54
|
+
starting at zero. For example, the 22nd token pulled from a lexer by
|
55
|
+
CommonTokenStream will have index value 21.
|
56
|
+
|
57
|
+
== Token as an Interface
|
58
|
+
|
59
|
+
This library provides a token implementation (see CommonToken). Additionally,
|
60
|
+
you may write your own token class as long as you provide methods that give
|
61
|
+
access to the attributes expected by a token. Even though most of the ANTLR
|
62
|
+
library tries to use duck-typing techniques instead of pure object-oriented type
|
63
|
+
checking, it's a good idea to include this ANTLR3::Token into your customized
|
64
|
+
token class.
|
65
|
+
|
66
|
+
=end
|
67
|
+
|
68
|
+
module Token
|
69
|
+
include ANTLR3::Constants
|
70
|
+
include Comparable
|
71
|
+
|
72
|
+
# the token's associated chunk of text
|
73
|
+
attr_accessor :text
|
74
|
+
|
75
|
+
# the integer value associated with the token's type
|
76
|
+
attr_accessor :type
|
77
|
+
|
78
|
+
# the text's starting line number within the source (indexed starting at 1)
|
79
|
+
attr_accessor :line
|
80
|
+
|
81
|
+
# the text's starting position in the line within the source (indexed starting at 0)
|
82
|
+
attr_accessor :column
|
83
|
+
|
84
|
+
# the integer value of the channel to which the token is assigned
|
85
|
+
attr_accessor :channel
|
86
|
+
|
87
|
+
# the index of the token with respect to other the other tokens produced during lexing
|
88
|
+
attr_accessor :index
|
89
|
+
|
90
|
+
# a reference to the input stream from which the token was extracted
|
91
|
+
attr_accessor :input
|
92
|
+
|
93
|
+
# the absolute character index in the input at which the text starts
|
94
|
+
attr_accessor :start
|
95
|
+
|
96
|
+
# the absolute character index in the input at which the text ends
|
97
|
+
attr_accessor :stop
|
98
|
+
|
99
|
+
alias :input_stream :input
|
100
|
+
alias :input_stream= :input=
|
101
|
+
alias :token_index :index
|
102
|
+
alias :token_index= :index=
|
103
|
+
|
104
|
+
def =~ obj
|
105
|
+
case obj
|
106
|
+
when Integer then type == obj
|
107
|
+
when Symbol then name.to_sym == obj
|
108
|
+
when Regexp then obj =~ text
|
109
|
+
when String then text == obj
|
110
|
+
else super
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def <=> tk2
|
115
|
+
index <=> tk2.index
|
116
|
+
end
|
117
|
+
|
118
|
+
def initialize_copy(orig)
|
119
|
+
self.index = -1
|
120
|
+
self.type = orig.type
|
121
|
+
self.channel = orig.channel
|
122
|
+
self.text = orig.text.clone if orig.text
|
123
|
+
self.start = orig.start
|
124
|
+
self.stop = orig.stop
|
125
|
+
self.line = orig.line
|
126
|
+
self.column = orig.column
|
127
|
+
self.input = orig.input
|
128
|
+
end
|
129
|
+
|
130
|
+
def concrete?
|
131
|
+
input && start && stop ? true : false
|
132
|
+
end
|
133
|
+
|
134
|
+
def imaginary?
|
135
|
+
input && start && stop ? false : true
|
136
|
+
end
|
137
|
+
|
138
|
+
def name
|
139
|
+
token_name(type)
|
140
|
+
end
|
141
|
+
|
142
|
+
def hidden?
|
143
|
+
channel == HIDDEN_CHANNEL
|
144
|
+
end
|
145
|
+
|
146
|
+
def source_text
|
147
|
+
concrete? ? input.substring(start, stop) : text
|
148
|
+
end
|
149
|
+
|
150
|
+
def hide!
|
151
|
+
self.channel = HIDDEN_CHANNEL
|
152
|
+
end
|
153
|
+
|
154
|
+
def range
|
155
|
+
start..stop rescue nil
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_i
|
159
|
+
index.to_i
|
160
|
+
end
|
161
|
+
|
162
|
+
def to_s
|
163
|
+
text.to_s
|
164
|
+
end
|
165
|
+
|
166
|
+
def inspect
|
167
|
+
text_inspect = text ? '[%p] ' % text : ' '
|
168
|
+
text_position = line != 0 ? '@ line %s col %s ' % [line, column] : ''
|
169
|
+
stream_position = start ? '(%s..%s)' % [start, stop] : ''
|
170
|
+
|
171
|
+
front = index != -1 ? index.to_s << ' ' : ''
|
172
|
+
rep = front << name << text_inspect <<
|
173
|
+
text_position << stream_position
|
174
|
+
rep.strip!
|
175
|
+
channel == DEFAULT_CHANNEL or rep << " (#{channel.to_s})"
|
176
|
+
return(rep)
|
177
|
+
end
|
178
|
+
|
179
|
+
def pretty_print(printer)
|
180
|
+
printer.text( inspect )
|
181
|
+
end
|
182
|
+
|
183
|
+
private
|
184
|
+
|
185
|
+
def token_name(type)
|
186
|
+
BUILT_IN_TOKEN_NAMES[type]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
CommonToken = Struct.new(:type, :channel, :text, :input, :start,
|
191
|
+
:stop, :index, :line, :column)
|
192
|
+
|
193
|
+
=begin rdoc ANTLR3::CommonToken
|
194
|
+
|
195
|
+
The base class for the standard implementation of Token. It is implemented as a
|
196
|
+
simple Struct as tokens are basically simple data structures binding together a
|
197
|
+
bunch of different information and Structs are slightly faster than a standard
|
198
|
+
Object with accessor methods implementation.
|
199
|
+
|
200
|
+
By default, ANTLR generated ruby code will provide a customized subclass of
|
201
|
+
CommonToken to track token-type names efficiently for debugging, inspection, and
|
202
|
+
general utility. Thus code generated for a standard combo lexer-parser grammar
|
203
|
+
named XYZ will have a base module named XYZ and a customized CommonToken
|
204
|
+
subclass named XYZ::Token.
|
205
|
+
|
206
|
+
Here is the token structure attribute list in order:
|
207
|
+
|
208
|
+
* <tt>type</tt>
|
209
|
+
* <tt>channel</tt>
|
210
|
+
* <tt>text</tt>
|
211
|
+
* <tt>input</tt>
|
212
|
+
* <tt>start</tt>
|
213
|
+
* <tt>stop</tt>
|
214
|
+
* <tt>index</tt>
|
215
|
+
* <tt>line</tt>
|
216
|
+
* <tt>column</tt>
|
217
|
+
|
218
|
+
=end
|
219
|
+
|
220
|
+
class CommonToken
|
221
|
+
include Token
|
222
|
+
DEFAULT_VALUES = {
|
223
|
+
:channel => DEFAULT_CHANNEL,
|
224
|
+
:index => -1,
|
225
|
+
:line => 0,
|
226
|
+
:column => -1
|
227
|
+
}.freeze
|
228
|
+
|
229
|
+
def self.token_name(type)
|
230
|
+
BUILT_IN_TOKEN_NAMES[type]
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.create(fields = {})
|
234
|
+
fields = DEFAULT_VALUES.merge(fields)
|
235
|
+
args = members.map { |name| fields[name.to_sym] }
|
236
|
+
new(*args)
|
237
|
+
end
|
238
|
+
|
239
|
+
# allows you to make a copy of a token with a different class
|
240
|
+
def self.from_token(token)
|
241
|
+
new(token.type, token.channel, token.text ? token.text.clone : nil,
|
242
|
+
token.input, token.start, token.stop, -1, token.line, token.column)
|
243
|
+
end
|
244
|
+
|
245
|
+
def initialize(type = nil, channel = DEFAULT_CHANNEL, text = nil,
|
246
|
+
input = nil, start = nil, stop = nil, index = -1,
|
247
|
+
line = 0, column = -1)
|
248
|
+
super
|
249
|
+
block_given? and yield(self)
|
250
|
+
self.text.nil? && self.start && self.stop and
|
251
|
+
self.text = self.input.substring(self.start, self.stop)
|
252
|
+
end
|
253
|
+
|
254
|
+
alias :input_stream :input
|
255
|
+
alias :input_stream= :input=
|
256
|
+
alias :token_index :index
|
257
|
+
alias :token_index= :index=
|
258
|
+
end
|
259
|
+
|
260
|
+
Constants::EOF_TOKEN = CommonToken.new(EOF).freeze
|
261
|
+
Constants::INVALID_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
|
262
|
+
Constants::SKIP_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
|
263
|
+
|
264
|
+
=begin rdoc ANTLR3::TokenSource
|
265
|
+
|
266
|
+
TokenSource is a simple mixin module that demands an
|
267
|
+
implementation of the method #next_token. In return, it
|
268
|
+
defines methods #next and #each, which provide basic
|
269
|
+
iterator methods for token generators. Furthermore, it
|
270
|
+
includes Enumerable to provide the standard Ruby iteration
|
271
|
+
methods to token generators, like lexers.
|
272
|
+
|
273
|
+
=end
|
274
|
+
|
275
|
+
module TokenSource
|
276
|
+
include Constants
|
277
|
+
include Enumerable
|
278
|
+
extend ClassMacros
|
279
|
+
|
280
|
+
abstract :next_token
|
281
|
+
|
282
|
+
def next
|
283
|
+
token = next_token()
|
284
|
+
raise StopIteration if token.nil? or token.type == EOF
|
285
|
+
return token
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_stream(options = {})
|
289
|
+
if block_given?
|
290
|
+
CommonTokenStream.new(self, options) { |t| yield(t) }
|
291
|
+
else
|
292
|
+
CommonTokenStream.new(self, options)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def each
|
297
|
+
block_given? or return enum_for(:each)
|
298
|
+
loop { yield(self.next) }
|
299
|
+
rescue StopIteration
|
300
|
+
return self
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
|
305
|
+
=begin rdoc ANTLR3::TokenFactory
|
306
|
+
|
307
|
+
There are a variety of different entities throughout the ANTLR runtime library
|
308
|
+
that need to create token objects This module serves as a mixin that provides
|
309
|
+
methods for constructing tokens.
|
310
|
+
|
311
|
+
Including this module provides a +token_class+ attribute. Instance of the
|
312
|
+
including class can create tokens using the token class (which defaults to
|
313
|
+
ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
|
314
|
+
that can be called without any parameters and the token objects are expected to
|
315
|
+
have the standard token attributes (see ANTLR3::Token).
|
316
|
+
|
317
|
+
=end
|
318
|
+
|
319
|
+
module TokenFactory
|
320
|
+
attr_writer :token_class
|
321
|
+
def token_class
|
322
|
+
@token_class ||= begin
|
323
|
+
self.class.token_class rescue
|
324
|
+
self::Token rescue
|
325
|
+
ANTLR3::CommonToken
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def create_token(*args)
|
330
|
+
if block_given?
|
331
|
+
token_class.new(*args) do |*targs|
|
332
|
+
yield(*targs)
|
333
|
+
end
|
334
|
+
else
|
335
|
+
token_class.new(*args)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
|
341
|
+
=begin rdoc ANTLR3::TokenScheme
|
342
|
+
|
343
|
+
TokenSchemes exist to handle the problem of defining token types as integer
|
344
|
+
values while maintaining meaningful text names for the types. They are
|
345
|
+
dynamically defined modules that map integer values to constants with token-type
|
346
|
+
names.
|
347
|
+
|
348
|
+
---
|
349
|
+
|
350
|
+
Fundamentally, tokens exist to take a chunk of text and identify it as belonging
|
351
|
+
to some category, like "VARIABLE" or "INTEGER". In code, the category is
|
352
|
+
represented by an integer -- some arbitrary value that ANTLR will decide to use
|
353
|
+
as it is creating the recognizer. The purpose of using an integer (instead of
|
354
|
+
say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
|
355
|
+
token's type falls within a range, which is not possible with symbols.
|
356
|
+
|
357
|
+
The downside of token types being represented as integers is that a developer
|
358
|
+
needs to be able to reference the unknown type value by name in action code.
|
359
|
+
Furthermore, code that references the type by name and tokens that can be
|
360
|
+
inspected with names in place of type values are more meaningful to a developer.
|
361
|
+
|
362
|
+
Since ANTLR requires token type names to follow capital-letter naming
|
363
|
+
conventions, defining types as named constants of the recognizer class resolves
|
364
|
+
the problem of referencing type values by name. Thus, a token type like
|
365
|
+
``VARIABLE'' can be represented by a number like 5 and referenced within code by
|
366
|
+
+VARIABLE+. However, when a recognizer creates tokens, the name of the token's
|
367
|
+
type cannot be seen without using the data defined in the recognizer.
|
368
|
+
|
369
|
+
Of course, tokens could be defined with a name attribute that could be specified
|
370
|
+
when tokens are created. However, doing so would make tokens take up more space
|
371
|
+
than necessary, as well as making it difficult to change the type of a token
|
372
|
+
while maintaining a correct name value.
|
373
|
+
|
374
|
+
TokenSchemes exist as a technique to manage token type referencing and name
|
375
|
+
extraction. They:
|
376
|
+
|
377
|
+
1. keep token type references clear and understandable in recognizer code
|
378
|
+
2. permit access to a token's type-name independently of recognizer objects
|
379
|
+
3. allow multiple classes to share the same token information
|
380
|
+
|
381
|
+
== Building Token Schemes
|
382
|
+
|
383
|
+
TokenScheme is a subclass of Module. Thus, it has the method
|
384
|
+
<tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
|
385
|
+
will evaluate the block in the context of the scheme (module), similarly to
|
386
|
+
Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
|
387
|
+
module with the following actions:
|
388
|
+
|
389
|
+
1. define a customized token class (more on that below)
|
390
|
+
2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
|
391
|
+
3. dynamically populate the new scheme module with a couple instance methods
|
392
|
+
4. include ANTLR3::Constants in the new scheme module
|
393
|
+
|
394
|
+
As TokenScheme the class functions as a metaclass, figuring out some of the
|
395
|
+
scoping behavior can be mildly confusing if you're trying to get a handle of the
|
396
|
+
entity for your own purposes. Remember that all of the instance methods of
|
397
|
+
TokenScheme function as module-level methods of TokenScheme instances, ala
|
398
|
+
+attr_accessor+ and friends.
|
399
|
+
|
400
|
+
<tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
|
401
|
+
definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
|
402
|
+
essentially like <tt>Module#const_set</tt>, except it forbids constant
|
403
|
+
overwriting (which would mess up recognizer code fairly badly) and adds an
|
404
|
+
inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
|
405
|
+
<tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
|
406
|
+
types with a hash pairing names to values.
|
407
|
+
|
408
|
+
<tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
|
409
|
+
type-to-name definition. This is particularly useful for the anonymous tokens
|
410
|
+
that ANTLR generates for literal strings in the grammar specification. For
|
411
|
+
example, if you refer to the literal <tt>'='</tt> in some parser rule in your
|
412
|
+
grammar, ANTLR will add a lexer rule for the literal and give the token a name
|
413
|
+
like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
|
414
|
+
Since this is pretty meaningless to a developer, generated code should add a
|
415
|
+
special name definition for type value <tt><i>x</i></tt> with the string
|
416
|
+
<tt>"'='"</tt>.
|
417
|
+
|
418
|
+
=== Sample TokenScheme Construction
|
419
|
+
|
420
|
+
TokenData = ANTLR3::TokenScheme.new do
|
421
|
+
define_tokens(
|
422
|
+
:INT => 4,
|
423
|
+
:ID => 6,
|
424
|
+
:T__5 => 5,
|
425
|
+
:WS => 7
|
426
|
+
)
|
427
|
+
|
428
|
+
# note the self:: scoping below is due to the fact that
|
429
|
+
# ruby lexically-scopes constant names instead of
|
430
|
+
# looking up in the current scope
|
431
|
+
register_name(self::T__5, "'='")
|
432
|
+
end
|
433
|
+
|
434
|
+
TokenData::ID # => 6
|
435
|
+
TokenData::T__5 # => 5
|
436
|
+
TokenData.token_name(4) # => 'INT'
|
437
|
+
TokenData.token_name(5) # => "'='"
|
438
|
+
|
439
|
+
class ARecognizerOrSuch < ANTLR3::Parser
|
440
|
+
include TokenData
|
441
|
+
ID # => 6
|
442
|
+
end
|
443
|
+
|
444
|
+
== Custom Token Classes and Relationship with Tokens
|
445
|
+
|
446
|
+
When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
|
447
|
+
and assigned it to the constant name +Token+. This token class will both include
|
448
|
+
and extend the scheme module. Since token schemes define the private instance
|
449
|
+
method <tt>token_name(type)</tt>, instances of the token class are now able to
|
450
|
+
provide their type names. The Token method <tt>name</tt> uses the
|
451
|
+
<tt>token_name</tt> method to provide the type name as if it were a simple
|
452
|
+
attribute without storing the name itself.
|
453
|
+
|
454
|
+
When a TokenScheme is included in a recognizer class, the class will now have
|
455
|
+
the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
|
456
|
+
and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
|
457
|
+
Token. Thus, when recognizers need to manufacture tokens, instead of using the
|
458
|
+
generic CommonToken class, they can create tokens using the customized Token
|
459
|
+
class provided by the token scheme.
|
460
|
+
|
461
|
+
If you need to use a token class other than CommonToken, you can pass the class
|
462
|
+
as a parameter to TokenScheme.new, which will be used in place of the
|
463
|
+
dynamically-created CommonToken subclass.
|
464
|
+
|
465
|
+
=end
|
466
|
+
|
467
|
+
class TokenScheme < ::Module
|
468
|
+
include TokenFactory
|
469
|
+
|
470
|
+
def self.new(tk_class = nil, &body)
|
471
|
+
super() do
|
472
|
+
tk_class ||= Class.new(::ANTLR3::CommonToken)
|
473
|
+
self.token_class = tk_class
|
474
|
+
|
475
|
+
const_set(:TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone)
|
476
|
+
|
477
|
+
scheme = self
|
478
|
+
define_method(:token_scheme) { scheme }
|
479
|
+
define_method(:token_names) { scheme::TOKEN_NAMES }
|
480
|
+
define_method(:token_name) do |type|
|
481
|
+
begin
|
482
|
+
token_names[type] or super
|
483
|
+
rescue NoMethodError
|
484
|
+
::ANTLR3::CommonToken.token_name(type)
|
485
|
+
end
|
486
|
+
end
|
487
|
+
module_function :token_name, :token_names
|
488
|
+
|
489
|
+
include ANTLR3::Constants
|
490
|
+
|
491
|
+
body and module_eval(&body)
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def included(mod)
|
496
|
+
super
|
497
|
+
mod.extend(self)
|
498
|
+
end
|
499
|
+
private :included
|
500
|
+
|
501
|
+
def define_tokens(token_map = {})
|
502
|
+
for token_name, token_value in token_map
|
503
|
+
define_token(token_name, token_value)
|
504
|
+
end
|
505
|
+
return self
|
506
|
+
end
|
507
|
+
|
508
|
+
def define_token(name, value)
|
509
|
+
if const_defined?(name)
|
510
|
+
current_value = const_get(name)
|
511
|
+
unless current_value == value
|
512
|
+
error = NameError.new("new token type definition ``#{name} = #{value}'' conflicts " <<
|
513
|
+
"with existing type definition ``#{name} = #{current_value}''", name)
|
514
|
+
raise error
|
515
|
+
end
|
516
|
+
else
|
517
|
+
const_set(name, value)
|
518
|
+
end
|
519
|
+
register_name(value, name) unless built_in_type?(value)
|
520
|
+
return self
|
521
|
+
end
|
522
|
+
|
523
|
+
def register_names(*names)
|
524
|
+
if names.length == 1 and Hash === names.first
|
525
|
+
names.first.each do |value, name|
|
526
|
+
register_name(value, name)
|
527
|
+
end
|
528
|
+
else
|
529
|
+
names.each_with_index do |name, i|
|
530
|
+
type_value = Constants::MIN_TOKEN_TYPE + i
|
531
|
+
register_name(type_value, name)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
def register_name(type_value, name)
|
537
|
+
name = name.to_s.freeze
|
538
|
+
if token_names.has_key?(type_value)
|
539
|
+
current_name = token_names[type_value]
|
540
|
+
current_name == name and return name
|
541
|
+
|
542
|
+
if current_name == "T__#{type_value}"
|
543
|
+
# only an anonymous name is registered -- upgrade the name to the full literal name
|
544
|
+
token_names[type_value] = name
|
545
|
+
elsif name == "T__#{type_value}"
|
546
|
+
# ignore name downgrade from literal to anonymous constant
|
547
|
+
return current_name
|
548
|
+
else
|
549
|
+
error = NameError.new(
|
550
|
+
"attempted assignment of token type #{type_value}" <<
|
551
|
+
" to name #{name} conflicts with existing name #{current_name}", name
|
552
|
+
)
|
553
|
+
raise error
|
554
|
+
end
|
555
|
+
else
|
556
|
+
token_names[type_value] = name.to_s.freeze
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
def built_in_type?(type_value)
|
561
|
+
Constants::BUILT_IN_TOKEN_NAMES.fetch(type_value, false) and true
|
562
|
+
end
|
563
|
+
|
564
|
+
def token_defined?(name_or_value)
|
565
|
+
case value
|
566
|
+
when Integer then token_names.has_key?(name_or_value)
|
567
|
+
else const_defined?(name_or_value.to_s)
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
def [](name_or_value)
|
572
|
+
case name_or_value
|
573
|
+
when Integer then token_names.fetch(name_or_value, nil)
|
574
|
+
else const_get(name_or_value.to_s) rescue token_names.index(name_or_value)
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
def token_class
|
579
|
+
self::Token
|
580
|
+
end
|
581
|
+
|
582
|
+
def token_class=(klass)
|
583
|
+
Class === klass or raise(TypeError, "token_class must be a Class")
|
584
|
+
Util.silence_warnings do
|
585
|
+
klass < self or klass.send(:include, self)
|
586
|
+
const_set(:Token, klass)
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
end
|
591
|
+
|
592
|
+
end
|