antlr3 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/ANTLR-LICENSE.txt +26 -0
  2. data/History.txt +66 -0
  3. data/README.txt +139 -0
  4. data/bin/antlr4ruby +33 -0
  5. data/java/RubyTarget.java +524 -0
  6. data/java/antlr-full-3.2.1.jar +0 -0
  7. data/lib/antlr3.rb +176 -0
  8. data/lib/antlr3/constants.rb +88 -0
  9. data/lib/antlr3/debug.rb +701 -0
  10. data/lib/antlr3/debug/event-hub.rb +210 -0
  11. data/lib/antlr3/debug/record-event-listener.rb +25 -0
  12. data/lib/antlr3/debug/rule-tracer.rb +55 -0
  13. data/lib/antlr3/debug/socket.rb +360 -0
  14. data/lib/antlr3/debug/trace-event-listener.rb +92 -0
  15. data/lib/antlr3/dfa.rb +247 -0
  16. data/lib/antlr3/dot.rb +174 -0
  17. data/lib/antlr3/error.rb +657 -0
  18. data/lib/antlr3/main.rb +561 -0
  19. data/lib/antlr3/modes/ast-builder.rb +41 -0
  20. data/lib/antlr3/modes/filter.rb +56 -0
  21. data/lib/antlr3/profile.rb +322 -0
  22. data/lib/antlr3/recognizers.rb +1280 -0
  23. data/lib/antlr3/streams.rb +985 -0
  24. data/lib/antlr3/streams/interactive.rb +91 -0
  25. data/lib/antlr3/streams/rewrite.rb +412 -0
  26. data/lib/antlr3/test/call-stack.rb +57 -0
  27. data/lib/antlr3/test/config.rb +23 -0
  28. data/lib/antlr3/test/core-extensions.rb +269 -0
  29. data/lib/antlr3/test/diff.rb +165 -0
  30. data/lib/antlr3/test/functional.rb +207 -0
  31. data/lib/antlr3/test/grammar.rb +371 -0
  32. data/lib/antlr3/token.rb +592 -0
  33. data/lib/antlr3/tree.rb +1415 -0
  34. data/lib/antlr3/tree/debug.rb +163 -0
  35. data/lib/antlr3/tree/visitor.rb +84 -0
  36. data/lib/antlr3/tree/wizard.rb +481 -0
  37. data/lib/antlr3/util.rb +149 -0
  38. data/lib/antlr3/version.rb +27 -0
  39. data/samples/ANTLRv3Grammar.g +621 -0
  40. data/samples/Cpp.g +749 -0
  41. data/templates/AST.stg +335 -0
  42. data/templates/ASTDbg.stg +40 -0
  43. data/templates/ASTParser.stg +153 -0
  44. data/templates/ASTTreeParser.stg +272 -0
  45. data/templates/Dbg.stg +192 -0
  46. data/templates/Ruby.stg +1514 -0
  47. data/test/functional/ast-output/auto-ast.rb +797 -0
  48. data/test/functional/ast-output/construction.rb +555 -0
  49. data/test/functional/ast-output/hetero-nodes.rb +753 -0
  50. data/test/functional/ast-output/rewrites.rb +1327 -0
  51. data/test/functional/ast-output/tree-rewrite.rb +1662 -0
  52. data/test/functional/debugging/debug-mode.rb +689 -0
  53. data/test/functional/debugging/profile-mode.rb +165 -0
  54. data/test/functional/debugging/rule-tracing.rb +74 -0
  55. data/test/functional/delegation/import.rb +379 -0
  56. data/test/functional/lexer/basic.rb +559 -0
  57. data/test/functional/lexer/filter-mode.rb +245 -0
  58. data/test/functional/lexer/nuances.rb +47 -0
  59. data/test/functional/lexer/properties.rb +104 -0
  60. data/test/functional/lexer/syn-pred.rb +32 -0
  61. data/test/functional/lexer/xml.rb +206 -0
  62. data/test/functional/main/main-scripts.rb +245 -0
  63. data/test/functional/parser/actions.rb +224 -0
  64. data/test/functional/parser/backtracking.rb +244 -0
  65. data/test/functional/parser/basic.rb +282 -0
  66. data/test/functional/parser/calc.rb +98 -0
  67. data/test/functional/parser/ll-star.rb +143 -0
  68. data/test/functional/parser/nuances.rb +165 -0
  69. data/test/functional/parser/predicates.rb +103 -0
  70. data/test/functional/parser/properties.rb +242 -0
  71. data/test/functional/parser/rule-methods.rb +132 -0
  72. data/test/functional/parser/scopes.rb +274 -0
  73. data/test/functional/token-rewrite/basic.rb +318 -0
  74. data/test/functional/token-rewrite/via-parser.rb +100 -0
  75. data/test/functional/tree-parser/basic.rb +750 -0
  76. data/test/unit/sample-input/file-stream-1 +2 -0
  77. data/test/unit/sample-input/teststreams.input2 +2 -0
  78. data/test/unit/test-dfa.rb +52 -0
  79. data/test/unit/test-exceptions.rb +44 -0
  80. data/test/unit/test-recognizers.rb +55 -0
  81. data/test/unit/test-scheme.rb +62 -0
  82. data/test/unit/test-streams.rb +459 -0
  83. data/test/unit/test-tree-wizard.rb +535 -0
  84. data/test/unit/test-trees.rb +854 -0
  85. metadata +205 -0
@@ -0,0 +1,592 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ =begin LICENSE
5
+
6
+ [The "BSD licence"]
7
+ Copyright (c) 2009 Kyle Yetter
8
+ All rights reserved.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions
12
+ are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright
15
+ notice, this list of conditions and the following disclaimer.
16
+ 2. Redistributions in binary form must reproduce the above copyright
17
+ notice, this list of conditions and the following disclaimer in the
18
+ documentation and/or other materials provided with the distribution.
19
+ 3. The name of the author may not be used to endorse or promote products
20
+ derived from this software without specific prior written permission.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ =end
34
+
35
+ module ANTLR3
36
+
37
+ =begin rdoc ANTLR3::Token
38
+
39
+ At a minimum, tokens are data structures that bind together a chunk of text and
40
+ a corresponding type symbol, which categorizes/characterizes the content of the
41
+ text. Tokens also usually carry information about their location in the input,
42
+ such as absolute character index, line number, and position within the line (or
43
+ column).
44
+
45
+ Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
46
+ categorization that groups things on a larger scale. Parsers will usually ignore
47
+ tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
48
+ like comment and white space huddled together with neighboring tokens,
49
+ effectively ignoring them without discarding them.
50
+
51
+ ANTLR tokens also keep a reference to the source stream from which they
52
+ originated. Token streams will also provide an index value for the token, which
53
+ indicates the position of the token relative to other tokens in the stream,
54
+ starting at zero. For example, the 22nd token pulled from a lexer by
55
+ CommonTokenStream will have index value 21.
56
+
57
+ == Token as an Interface
58
+
59
+ This library provides a token implementation (see CommonToken). Additionally,
60
+ you may write your own token class as long as you provide methods that give
61
+ access to the attributes expected by a token. Even though most of the ANTLR
62
+ library tries to use duck-typing techniques instead of pure object-oriented type
63
+ checking, it's a good idea to include this ANTLR3::Token into your customized
64
+ token class.
65
+
66
+ =end
67
+
68
+ module Token
69
+ include ANTLR3::Constants
70
+ include Comparable
71
+
72
+ # the token's associated chunk of text
73
+ attr_accessor :text
74
+
75
+ # the integer value associated with the token's type
76
+ attr_accessor :type
77
+
78
+ # the text's starting line number within the source (indexed starting at 1)
79
+ attr_accessor :line
80
+
81
+ # the text's starting position in the line within the source (indexed starting at 0)
82
+ attr_accessor :column
83
+
84
+ # the integer value of the channel to which the token is assigned
85
+ attr_accessor :channel
86
+
87
+ # the index of the token with respect to other the other tokens produced during lexing
88
+ attr_accessor :index
89
+
90
+ # a reference to the input stream from which the token was extracted
91
+ attr_accessor :input
92
+
93
+ # the absolute character index in the input at which the text starts
94
+ attr_accessor :start
95
+
96
+ # the absolute character index in the input at which the text ends
97
+ attr_accessor :stop
98
+
99
+ alias :input_stream :input
100
+ alias :input_stream= :input=
101
+ alias :token_index :index
102
+ alias :token_index= :index=
103
+
104
+ def =~ obj
105
+ case obj
106
+ when Integer then type == obj
107
+ when Symbol then name.to_sym == obj
108
+ when Regexp then obj =~ text
109
+ when String then text == obj
110
+ else super
111
+ end
112
+ end
113
+
114
+ def <=> tk2
115
+ index <=> tk2.index
116
+ end
117
+
118
+ def initialize_copy(orig)
119
+ self.index = -1
120
+ self.type = orig.type
121
+ self.channel = orig.channel
122
+ self.text = orig.text.clone if orig.text
123
+ self.start = orig.start
124
+ self.stop = orig.stop
125
+ self.line = orig.line
126
+ self.column = orig.column
127
+ self.input = orig.input
128
+ end
129
+
130
+ def concrete?
131
+ input && start && stop ? true : false
132
+ end
133
+
134
+ def imaginary?
135
+ input && start && stop ? false : true
136
+ end
137
+
138
+ def name
139
+ token_name(type)
140
+ end
141
+
142
+ def hidden?
143
+ channel == HIDDEN_CHANNEL
144
+ end
145
+
146
+ def source_text
147
+ concrete? ? input.substring(start, stop) : text
148
+ end
149
+
150
+ def hide!
151
+ self.channel = HIDDEN_CHANNEL
152
+ end
153
+
154
+ def range
155
+ start..stop rescue nil
156
+ end
157
+
158
+ def to_i
159
+ index.to_i
160
+ end
161
+
162
+ def to_s
163
+ text.to_s
164
+ end
165
+
166
+ def inspect
167
+ text_inspect = text ? '[%p] ' % text : ' '
168
+ text_position = line != 0 ? '@ line %s col %s ' % [line, column] : ''
169
+ stream_position = start ? '(%s..%s)' % [start, stop] : ''
170
+
171
+ front = index != -1 ? index.to_s << ' ' : ''
172
+ rep = front << name << text_inspect <<
173
+ text_position << stream_position
174
+ rep.strip!
175
+ channel == DEFAULT_CHANNEL or rep << " (#{channel.to_s})"
176
+ return(rep)
177
+ end
178
+
179
+ def pretty_print(printer)
180
+ printer.text( inspect )
181
+ end
182
+
183
+ private
184
+
185
+ def token_name(type)
186
+ BUILT_IN_TOKEN_NAMES[type]
187
+ end
188
+ end
189
+
190
+ CommonToken = Struct.new(:type, :channel, :text, :input, :start,
191
+ :stop, :index, :line, :column)
192
+
193
+ =begin rdoc ANTLR3::CommonToken
194
+
195
+ The base class for the standard implementation of Token. It is implemented as a
196
+ simple Struct as tokens are basically simple data structures binding together a
197
+ bunch of different information and Structs are slightly faster than a standard
198
+ Object with accessor methods implementation.
199
+
200
+ By default, ANTLR generated ruby code will provide a customized subclass of
201
+ CommonToken to track token-type names efficiently for debugging, inspection, and
202
+ general utility. Thus code generated for a standard combo lexer-parser grammar
203
+ named XYZ will have a base module named XYZ and a customized CommonToken
204
+ subclass named XYZ::Token.
205
+
206
+ Here is the token structure attribute list in order:
207
+
208
+ * <tt>type</tt>
209
+ * <tt>channel</tt>
210
+ * <tt>text</tt>
211
+ * <tt>input</tt>
212
+ * <tt>start</tt>
213
+ * <tt>stop</tt>
214
+ * <tt>index</tt>
215
+ * <tt>line</tt>
216
+ * <tt>column</tt>
217
+
218
+ =end
219
+
220
+ class CommonToken
221
+ include Token
222
+ DEFAULT_VALUES = {
223
+ :channel => DEFAULT_CHANNEL,
224
+ :index => -1,
225
+ :line => 0,
226
+ :column => -1
227
+ }.freeze
228
+
229
+ def self.token_name(type)
230
+ BUILT_IN_TOKEN_NAMES[type]
231
+ end
232
+
233
+ def self.create(fields = {})
234
+ fields = DEFAULT_VALUES.merge(fields)
235
+ args = members.map { |name| fields[name.to_sym] }
236
+ new(*args)
237
+ end
238
+
239
+ # allows you to make a copy of a token with a different class
240
+ def self.from_token(token)
241
+ new(token.type, token.channel, token.text ? token.text.clone : nil,
242
+ token.input, token.start, token.stop, -1, token.line, token.column)
243
+ end
244
+
245
+ def initialize(type = nil, channel = DEFAULT_CHANNEL, text = nil,
246
+ input = nil, start = nil, stop = nil, index = -1,
247
+ line = 0, column = -1)
248
+ super
249
+ block_given? and yield(self)
250
+ self.text.nil? && self.start && self.stop and
251
+ self.text = self.input.substring(self.start, self.stop)
252
+ end
253
+
254
+ alias :input_stream :input
255
+ alias :input_stream= :input=
256
+ alias :token_index :index
257
+ alias :token_index= :index=
258
+ end
259
+
260
+ Constants::EOF_TOKEN = CommonToken.new(EOF).freeze
261
+ Constants::INVALID_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
262
+ Constants::SKIP_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
263
+
264
+ =begin rdoc ANTLR3::TokenSource
265
+
266
+ TokenSource is a simple mixin module that demands an
267
+ implementation of the method #next_token. In return, it
268
+ defines methods #next and #each, which provide basic
269
+ iterator methods for token generators. Furthermore, it
270
+ includes Enumerable to provide the standard Ruby iteration
271
+ methods to token generators, like lexers.
272
+
273
+ =end
274
+
275
+ module TokenSource
276
+ include Constants
277
+ include Enumerable
278
+ extend ClassMacros
279
+
280
+ abstract :next_token
281
+
282
+ def next
283
+ token = next_token()
284
+ raise StopIteration if token.nil? or token.type == EOF
285
+ return token
286
+ end
287
+
288
+ def to_stream(options = {})
289
+ if block_given?
290
+ CommonTokenStream.new(self, options) { |t| yield(t) }
291
+ else
292
+ CommonTokenStream.new(self, options)
293
+ end
294
+ end
295
+
296
+ def each
297
+ block_given? or return enum_for(:each)
298
+ loop { yield(self.next) }
299
+ rescue StopIteration
300
+ return self
301
+ end
302
+ end
303
+
304
+
305
+ =begin rdoc ANTLR3::TokenFactory
306
+
307
+ There are a variety of different entities throughout the ANTLR runtime library
308
+ that need to create token objects This module serves as a mixin that provides
309
+ methods for constructing tokens.
310
+
311
+ Including this module provides a +token_class+ attribute. Instance of the
312
+ including class can create tokens using the token class (which defaults to
313
+ ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
314
+ that can be called without any parameters and the token objects are expected to
315
+ have the standard token attributes (see ANTLR3::Token).
316
+
317
+ =end
318
+
319
+ module TokenFactory
320
+ attr_writer :token_class
321
+ def token_class
322
+ @token_class ||= begin
323
+ self.class.token_class rescue
324
+ self::Token rescue
325
+ ANTLR3::CommonToken
326
+ end
327
+ end
328
+
329
+ def create_token(*args)
330
+ if block_given?
331
+ token_class.new(*args) do |*targs|
332
+ yield(*targs)
333
+ end
334
+ else
335
+ token_class.new(*args)
336
+ end
337
+ end
338
+ end
339
+
340
+
341
+ =begin rdoc ANTLR3::TokenScheme
342
+
343
+ TokenSchemes exist to handle the problem of defining token types as integer
344
+ values while maintaining meaningful text names for the types. They are
345
+ dynamically defined modules that map integer values to constants with token-type
346
+ names.
347
+
348
+ ---
349
+
350
+ Fundamentally, tokens exist to take a chunk of text and identify it as belonging
351
+ to some category, like "VARIABLE" or "INTEGER". In code, the category is
352
+ represented by an integer -- some arbitrary value that ANTLR will decide to use
353
+ as it is creating the recognizer. The purpose of using an integer (instead of
354
+ say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
355
+ token's type falls within a range, which is not possible with symbols.
356
+
357
+ The downside of token types being represented as integers is that a developer
358
+ needs to be able to reference the unknown type value by name in action code.
359
+ Furthermore, code that references the type by name and tokens that can be
360
+ inspected with names in place of type values are more meaningful to a developer.
361
+
362
+ Since ANTLR requires token type names to follow capital-letter naming
363
+ conventions, defining types as named constants of the recognizer class resolves
364
+ the problem of referencing type values by name. Thus, a token type like
365
+ ``VARIABLE'' can be represented by a number like 5 and referenced within code by
366
+ +VARIABLE+. However, when a recognizer creates tokens, the name of the token's
367
+ type cannot be seen without using the data defined in the recognizer.
368
+
369
+ Of course, tokens could be defined with a name attribute that could be specified
370
+ when tokens are created. However, doing so would make tokens take up more space
371
+ than necessary, as well as making it difficult to change the type of a token
372
+ while maintaining a correct name value.
373
+
374
+ TokenSchemes exist as a technique to manage token type referencing and name
375
+ extraction. They:
376
+
377
+ 1. keep token type references clear and understandable in recognizer code
378
+ 2. permit access to a token's type-name independently of recognizer objects
379
+ 3. allow multiple classes to share the same token information
380
+
381
+ == Building Token Schemes
382
+
383
+ TokenScheme is a subclass of Module. Thus, it has the method
384
+ <tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
385
+ will evaluate the block in the context of the scheme (module), similarly to
386
+ Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
387
+ module with the following actions:
388
+
389
+ 1. define a customized token class (more on that below)
390
+ 2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
391
+ 3. dynamically populate the new scheme module with a couple instance methods
392
+ 4. include ANTLR3::Constants in the new scheme module
393
+
394
+ As TokenScheme the class functions as a metaclass, figuring out some of the
395
+ scoping behavior can be mildly confusing if you're trying to get a handle of the
396
+ entity for your own purposes. Remember that all of the instance methods of
397
+ TokenScheme function as module-level methods of TokenScheme instances, ala
398
+ +attr_accessor+ and friends.
399
+
400
+ <tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
401
+ definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
402
+ essentially like <tt>Module#const_set</tt>, except it forbids constant
403
+ overwriting (which would mess up recognizer code fairly badly) and adds an
404
+ inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
405
+ <tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
406
+ types with a hash pairing names to values.
407
+
408
+ <tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
409
+ type-to-name definition. This is particularly useful for the anonymous tokens
410
+ that ANTLR generates for literal strings in the grammar specification. For
411
+ example, if you refer to the literal <tt>'='</tt> in some parser rule in your
412
+ grammar, ANTLR will add a lexer rule for the literal and give the token a name
413
+ like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
414
+ Since this is pretty meaningless to a developer, generated code should add a
415
+ special name definition for type value <tt><i>x</i></tt> with the string
416
+ <tt>"'='"</tt>.
417
+
418
+ === Sample TokenScheme Construction
419
+
420
+ TokenData = ANTLR3::TokenScheme.new do
421
+ define_tokens(
422
+ :INT => 4,
423
+ :ID => 6,
424
+ :T__5 => 5,
425
+ :WS => 7
426
+ )
427
+
428
+ # note the self:: scoping below is due to the fact that
429
+ # ruby lexically-scopes constant names instead of
430
+ # looking up in the current scope
431
+ register_name(self::T__5, "'='")
432
+ end
433
+
434
+ TokenData::ID # => 6
435
+ TokenData::T__5 # => 5
436
+ TokenData.token_name(4) # => 'INT'
437
+ TokenData.token_name(5) # => "'='"
438
+
439
+ class ARecognizerOrSuch < ANTLR3::Parser
440
+ include TokenData
441
+ ID # => 6
442
+ end
443
+
444
+ == Custom Token Classes and Relationship with Tokens
445
+
446
+ When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
447
+ and assigned it to the constant name +Token+. This token class will both include
448
+ and extend the scheme module. Since token schemes define the private instance
449
+ method <tt>token_name(type)</tt>, instances of the token class are now able to
450
+ provide their type names. The Token method <tt>name</tt> uses the
451
+ <tt>token_name</tt> method to provide the type name as if it were a simple
452
+ attribute without storing the name itself.
453
+
454
+ When a TokenScheme is included in a recognizer class, the class will now have
455
+ the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
456
+ and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
457
+ Token. Thus, when recognizers need to manufacture tokens, instead of using the
458
+ generic CommonToken class, they can create tokens using the customized Token
459
+ class provided by the token scheme.
460
+
461
+ If you need to use a token class other than CommonToken, you can pass the class
462
+ as a parameter to TokenScheme.new, which will be used in place of the
463
+ dynamically-created CommonToken subclass.
464
+
465
+ =end
466
+
467
+ class TokenScheme < ::Module
468
+ include TokenFactory
469
+
470
+ def self.new(tk_class = nil, &body)
471
+ super() do
472
+ tk_class ||= Class.new(::ANTLR3::CommonToken)
473
+ self.token_class = tk_class
474
+
475
+ const_set(:TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone)
476
+
477
+ scheme = self
478
+ define_method(:token_scheme) { scheme }
479
+ define_method(:token_names) { scheme::TOKEN_NAMES }
480
+ define_method(:token_name) do |type|
481
+ begin
482
+ token_names[type] or super
483
+ rescue NoMethodError
484
+ ::ANTLR3::CommonToken.token_name(type)
485
+ end
486
+ end
487
+ module_function :token_name, :token_names
488
+
489
+ include ANTLR3::Constants
490
+
491
+ body and module_eval(&body)
492
+ end
493
+ end
494
+
495
+ def included(mod)
496
+ super
497
+ mod.extend(self)
498
+ end
499
+ private :included
500
+
501
+ def define_tokens(token_map = {})
502
+ for token_name, token_value in token_map
503
+ define_token(token_name, token_value)
504
+ end
505
+ return self
506
+ end
507
+
508
+ def define_token(name, value)
509
+ if const_defined?(name)
510
+ current_value = const_get(name)
511
+ unless current_value == value
512
+ error = NameError.new("new token type definition ``#{name} = #{value}'' conflicts " <<
513
+ "with existing type definition ``#{name} = #{current_value}''", name)
514
+ raise error
515
+ end
516
+ else
517
+ const_set(name, value)
518
+ end
519
+ register_name(value, name) unless built_in_type?(value)
520
+ return self
521
+ end
522
+
523
+ def register_names(*names)
524
+ if names.length == 1 and Hash === names.first
525
+ names.first.each do |value, name|
526
+ register_name(value, name)
527
+ end
528
+ else
529
+ names.each_with_index do |name, i|
530
+ type_value = Constants::MIN_TOKEN_TYPE + i
531
+ register_name(type_value, name)
532
+ end
533
+ end
534
+ end
535
+
536
+ def register_name(type_value, name)
537
+ name = name.to_s.freeze
538
+ if token_names.has_key?(type_value)
539
+ current_name = token_names[type_value]
540
+ current_name == name and return name
541
+
542
+ if current_name == "T__#{type_value}"
543
+ # only an anonymous name is registered -- upgrade the name to the full literal name
544
+ token_names[type_value] = name
545
+ elsif name == "T__#{type_value}"
546
+ # ignore name downgrade from literal to anonymous constant
547
+ return current_name
548
+ else
549
+ error = NameError.new(
550
+ "attempted assignment of token type #{type_value}" <<
551
+ " to name #{name} conflicts with existing name #{current_name}", name
552
+ )
553
+ raise error
554
+ end
555
+ else
556
+ token_names[type_value] = name.to_s.freeze
557
+ end
558
+ end
559
+
560
+ def built_in_type?(type_value)
561
+ Constants::BUILT_IN_TOKEN_NAMES.fetch(type_value, false) and true
562
+ end
563
+
564
+ def token_defined?(name_or_value)
565
+ case value
566
+ when Integer then token_names.has_key?(name_or_value)
567
+ else const_defined?(name_or_value.to_s)
568
+ end
569
+ end
570
+
571
+ def [](name_or_value)
572
+ case name_or_value
573
+ when Integer then token_names.fetch(name_or_value, nil)
574
+ else const_get(name_or_value.to_s) rescue token_names.index(name_or_value)
575
+ end
576
+ end
577
+
578
+ def token_class
579
+ self::Token
580
+ end
581
+
582
+ def token_class=(klass)
583
+ Class === klass or raise(TypeError, "token_class must be a Class")
584
+ Util.silence_warnings do
585
+ klass < self or klass.send(:include, self)
586
+ const_set(:Token, klass)
587
+ end
588
+ end
589
+
590
+ end
591
+
592
+ end