antlr3 1.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/ANTLR-LICENSE.txt +26 -0
  2. data/History.txt +66 -0
  3. data/README.txt +139 -0
  4. data/bin/antlr4ruby +33 -0
  5. data/java/RubyTarget.java +524 -0
  6. data/java/antlr-full-3.2.1.jar +0 -0
  7. data/lib/antlr3.rb +176 -0
  8. data/lib/antlr3/constants.rb +88 -0
  9. data/lib/antlr3/debug.rb +701 -0
  10. data/lib/antlr3/debug/event-hub.rb +210 -0
  11. data/lib/antlr3/debug/record-event-listener.rb +25 -0
  12. data/lib/antlr3/debug/rule-tracer.rb +55 -0
  13. data/lib/antlr3/debug/socket.rb +360 -0
  14. data/lib/antlr3/debug/trace-event-listener.rb +92 -0
  15. data/lib/antlr3/dfa.rb +247 -0
  16. data/lib/antlr3/dot.rb +174 -0
  17. data/lib/antlr3/error.rb +657 -0
  18. data/lib/antlr3/main.rb +561 -0
  19. data/lib/antlr3/modes/ast-builder.rb +41 -0
  20. data/lib/antlr3/modes/filter.rb +56 -0
  21. data/lib/antlr3/profile.rb +322 -0
  22. data/lib/antlr3/recognizers.rb +1280 -0
  23. data/lib/antlr3/streams.rb +985 -0
  24. data/lib/antlr3/streams/interactive.rb +91 -0
  25. data/lib/antlr3/streams/rewrite.rb +412 -0
  26. data/lib/antlr3/test/call-stack.rb +57 -0
  27. data/lib/antlr3/test/config.rb +23 -0
  28. data/lib/antlr3/test/core-extensions.rb +269 -0
  29. data/lib/antlr3/test/diff.rb +165 -0
  30. data/lib/antlr3/test/functional.rb +207 -0
  31. data/lib/antlr3/test/grammar.rb +371 -0
  32. data/lib/antlr3/token.rb +592 -0
  33. data/lib/antlr3/tree.rb +1415 -0
  34. data/lib/antlr3/tree/debug.rb +163 -0
  35. data/lib/antlr3/tree/visitor.rb +84 -0
  36. data/lib/antlr3/tree/wizard.rb +481 -0
  37. data/lib/antlr3/util.rb +149 -0
  38. data/lib/antlr3/version.rb +27 -0
  39. data/samples/ANTLRv3Grammar.g +621 -0
  40. data/samples/Cpp.g +749 -0
  41. data/templates/AST.stg +335 -0
  42. data/templates/ASTDbg.stg +40 -0
  43. data/templates/ASTParser.stg +153 -0
  44. data/templates/ASTTreeParser.stg +272 -0
  45. data/templates/Dbg.stg +192 -0
  46. data/templates/Ruby.stg +1514 -0
  47. data/test/functional/ast-output/auto-ast.rb +797 -0
  48. data/test/functional/ast-output/construction.rb +555 -0
  49. data/test/functional/ast-output/hetero-nodes.rb +753 -0
  50. data/test/functional/ast-output/rewrites.rb +1327 -0
  51. data/test/functional/ast-output/tree-rewrite.rb +1662 -0
  52. data/test/functional/debugging/debug-mode.rb +689 -0
  53. data/test/functional/debugging/profile-mode.rb +165 -0
  54. data/test/functional/debugging/rule-tracing.rb +74 -0
  55. data/test/functional/delegation/import.rb +379 -0
  56. data/test/functional/lexer/basic.rb +559 -0
  57. data/test/functional/lexer/filter-mode.rb +245 -0
  58. data/test/functional/lexer/nuances.rb +47 -0
  59. data/test/functional/lexer/properties.rb +104 -0
  60. data/test/functional/lexer/syn-pred.rb +32 -0
  61. data/test/functional/lexer/xml.rb +206 -0
  62. data/test/functional/main/main-scripts.rb +245 -0
  63. data/test/functional/parser/actions.rb +224 -0
  64. data/test/functional/parser/backtracking.rb +244 -0
  65. data/test/functional/parser/basic.rb +282 -0
  66. data/test/functional/parser/calc.rb +98 -0
  67. data/test/functional/parser/ll-star.rb +143 -0
  68. data/test/functional/parser/nuances.rb +165 -0
  69. data/test/functional/parser/predicates.rb +103 -0
  70. data/test/functional/parser/properties.rb +242 -0
  71. data/test/functional/parser/rule-methods.rb +132 -0
  72. data/test/functional/parser/scopes.rb +274 -0
  73. data/test/functional/token-rewrite/basic.rb +318 -0
  74. data/test/functional/token-rewrite/via-parser.rb +100 -0
  75. data/test/functional/tree-parser/basic.rb +750 -0
  76. data/test/unit/sample-input/file-stream-1 +2 -0
  77. data/test/unit/sample-input/teststreams.input2 +2 -0
  78. data/test/unit/test-dfa.rb +52 -0
  79. data/test/unit/test-exceptions.rb +44 -0
  80. data/test/unit/test-recognizers.rb +55 -0
  81. data/test/unit/test-scheme.rb +62 -0
  82. data/test/unit/test-streams.rb +459 -0
  83. data/test/unit/test-tree-wizard.rb +535 -0
  84. data/test/unit/test-trees.rb +854 -0
  85. metadata +205 -0
@@ -0,0 +1,592 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ =begin LICENSE
5
+
6
+ [The "BSD licence"]
7
+ Copyright (c) 2009 Kyle Yetter
8
+ All rights reserved.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions
12
+ are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright
15
+ notice, this list of conditions and the following disclaimer.
16
+ 2. Redistributions in binary form must reproduce the above copyright
17
+ notice, this list of conditions and the following disclaimer in the
18
+ documentation and/or other materials provided with the distribution.
19
+ 3. The name of the author may not be used to endorse or promote products
20
+ derived from this software without specific prior written permission.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ =end
34
+
35
+ module ANTLR3
36
+
37
+ =begin rdoc ANTLR3::Token
38
+
39
+ At a minimum, tokens are data structures that bind together a chunk of text and
40
+ a corresponding type symbol, which categorizes/characterizes the content of the
41
+ text. Tokens also usually carry information about their location in the input,
42
+ such as absolute character index, line number, and position within the line (or
43
+ column).
44
+
45
+ Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
46
+ categorization that groups things on a larger scale. Parsers will usually ignore
47
+ tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
48
+ like comment and white space huddled together with neighboring tokens,
49
+ effectively ignoring them without discarding them.
50
+
51
+ ANTLR tokens also keep a reference to the source stream from which they
52
+ originated. Token streams will also provide an index value for the token, which
53
+ indicates the position of the token relative to other tokens in the stream,
54
+ starting at zero. For example, the 22nd token pulled from a lexer by
55
+ CommonTokenStream will have index value 21.
56
+
57
+ == Token as an Interface
58
+
59
+ This library provides a token implementation (see CommonToken). Additionally,
60
+ you may write your own token class as long as you provide methods that give
61
+ access to the attributes expected by a token. Even though most of the ANTLR
62
+ library tries to use duck-typing techniques instead of pure object-oriented type
63
+ checking, it's a good idea to include this ANTLR3::Token into your customized
64
+ token class.
65
+
66
+ =end
67
+
68
+ module Token
69
+ include ANTLR3::Constants
70
+ include Comparable
71
+
72
+ # the token's associated chunk of text
73
+ attr_accessor :text
74
+
75
+ # the integer value associated with the token's type
76
+ attr_accessor :type
77
+
78
+ # the text's starting line number within the source (indexed starting at 1)
79
+ attr_accessor :line
80
+
81
+ # the text's starting position in the line within the source (indexed starting at 0)
82
+ attr_accessor :column
83
+
84
+ # the integer value of the channel to which the token is assigned
85
+ attr_accessor :channel
86
+
87
+ # the index of the token with respect to other the other tokens produced during lexing
88
+ attr_accessor :index
89
+
90
+ # a reference to the input stream from which the token was extracted
91
+ attr_accessor :input
92
+
93
+ # the absolute character index in the input at which the text starts
94
+ attr_accessor :start
95
+
96
+ # the absolute character index in the input at which the text ends
97
+ attr_accessor :stop
98
+
99
+ alias :input_stream :input
100
+ alias :input_stream= :input=
101
+ alias :token_index :index
102
+ alias :token_index= :index=
103
+
104
+ def =~ obj
105
+ case obj
106
+ when Integer then type == obj
107
+ when Symbol then name.to_sym == obj
108
+ when Regexp then obj =~ text
109
+ when String then text == obj
110
+ else super
111
+ end
112
+ end
113
+
114
+ def <=> tk2
115
+ index <=> tk2.index
116
+ end
117
+
118
+ def initialize_copy(orig)
119
+ self.index = -1
120
+ self.type = orig.type
121
+ self.channel = orig.channel
122
+ self.text = orig.text.clone if orig.text
123
+ self.start = orig.start
124
+ self.stop = orig.stop
125
+ self.line = orig.line
126
+ self.column = orig.column
127
+ self.input = orig.input
128
+ end
129
+
130
+ def concrete?
131
+ input && start && stop ? true : false
132
+ end
133
+
134
+ def imaginary?
135
+ input && start && stop ? false : true
136
+ end
137
+
138
+ def name
139
+ token_name(type)
140
+ end
141
+
142
+ def hidden?
143
+ channel == HIDDEN_CHANNEL
144
+ end
145
+
146
+ def source_text
147
+ concrete? ? input.substring(start, stop) : text
148
+ end
149
+
150
+ def hide!
151
+ self.channel = HIDDEN_CHANNEL
152
+ end
153
+
154
+ def range
155
+ start..stop rescue nil
156
+ end
157
+
158
+ def to_i
159
+ index.to_i
160
+ end
161
+
162
+ def to_s
163
+ text.to_s
164
+ end
165
+
166
+ def inspect
167
+ text_inspect = text ? '[%p] ' % text : ' '
168
+ text_position = line != 0 ? '@ line %s col %s ' % [line, column] : ''
169
+ stream_position = start ? '(%s..%s)' % [start, stop] : ''
170
+
171
+ front = index != -1 ? index.to_s << ' ' : ''
172
+ rep = front << name << text_inspect <<
173
+ text_position << stream_position
174
+ rep.strip!
175
+ channel == DEFAULT_CHANNEL or rep << " (#{channel.to_s})"
176
+ return(rep)
177
+ end
178
+
179
+ def pretty_print(printer)
180
+ printer.text( inspect )
181
+ end
182
+
183
+ private
184
+
185
+ def token_name(type)
186
+ BUILT_IN_TOKEN_NAMES[type]
187
+ end
188
+ end
189
+
190
+ CommonToken = Struct.new(:type, :channel, :text, :input, :start,
191
+ :stop, :index, :line, :column)
192
+
193
+ =begin rdoc ANTLR3::CommonToken
194
+
195
+ The base class for the standard implementation of Token. It is implemented as a
196
+ simple Struct as tokens are basically simple data structures binding together a
197
+ bunch of different information and Structs are slightly faster than a standard
198
+ Object with accessor methods implementation.
199
+
200
+ By default, ANTLR generated ruby code will provide a customized subclass of
201
+ CommonToken to track token-type names efficiently for debugging, inspection, and
202
+ general utility. Thus code generated for a standard combo lexer-parser grammar
203
+ named XYZ will have a base module named XYZ and a customized CommonToken
204
+ subclass named XYZ::Token.
205
+
206
+ Here is the token structure attribute list in order:
207
+
208
+ * <tt>type</tt>
209
+ * <tt>channel</tt>
210
+ * <tt>text</tt>
211
+ * <tt>input</tt>
212
+ * <tt>start</tt>
213
+ * <tt>stop</tt>
214
+ * <tt>index</tt>
215
+ * <tt>line</tt>
216
+ * <tt>column</tt>
217
+
218
+ =end
219
+
220
+ class CommonToken
221
+ include Token
222
+ DEFAULT_VALUES = {
223
+ :channel => DEFAULT_CHANNEL,
224
+ :index => -1,
225
+ :line => 0,
226
+ :column => -1
227
+ }.freeze
228
+
229
+ def self.token_name(type)
230
+ BUILT_IN_TOKEN_NAMES[type]
231
+ end
232
+
233
+ def self.create(fields = {})
234
+ fields = DEFAULT_VALUES.merge(fields)
235
+ args = members.map { |name| fields[name.to_sym] }
236
+ new(*args)
237
+ end
238
+
239
+ # allows you to make a copy of a token with a different class
240
+ def self.from_token(token)
241
+ new(token.type, token.channel, token.text ? token.text.clone : nil,
242
+ token.input, token.start, token.stop, -1, token.line, token.column)
243
+ end
244
+
245
+ def initialize(type = nil, channel = DEFAULT_CHANNEL, text = nil,
246
+ input = nil, start = nil, stop = nil, index = -1,
247
+ line = 0, column = -1)
248
+ super
249
+ block_given? and yield(self)
250
+ self.text.nil? && self.start && self.stop and
251
+ self.text = self.input.substring(self.start, self.stop)
252
+ end
253
+
254
+ alias :input_stream :input
255
+ alias :input_stream= :input=
256
+ alias :token_index :index
257
+ alias :token_index= :index=
258
+ end
259
+
260
+ Constants::EOF_TOKEN = CommonToken.new(EOF).freeze
261
+ Constants::INVALID_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
262
+ Constants::SKIP_TOKEN = CommonToken.new(INVALID_TOKEN_TYPE).freeze
263
+
264
+ =begin rdoc ANTLR3::TokenSource
265
+
266
+ TokenSource is a simple mixin module that demands an
267
+ implementation of the method #next_token. In return, it
268
+ defines methods #next and #each, which provide basic
269
+ iterator methods for token generators. Furthermore, it
270
+ includes Enumerable to provide the standard Ruby iteration
271
+ methods to token generators, like lexers.
272
+
273
+ =end
274
+
275
+ module TokenSource
276
+ include Constants
277
+ include Enumerable
278
+ extend ClassMacros
279
+
280
+ abstract :next_token
281
+
282
+ def next
283
+ token = next_token()
284
+ raise StopIteration if token.nil? or token.type == EOF
285
+ return token
286
+ end
287
+
288
+ def to_stream(options = {})
289
+ if block_given?
290
+ CommonTokenStream.new(self, options) { |t| yield(t) }
291
+ else
292
+ CommonTokenStream.new(self, options)
293
+ end
294
+ end
295
+
296
+ def each
297
+ block_given? or return enum_for(:each)
298
+ loop { yield(self.next) }
299
+ rescue StopIteration
300
+ return self
301
+ end
302
+ end
303
+
304
+
305
+ =begin rdoc ANTLR3::TokenFactory
306
+
307
+ There are a variety of different entities throughout the ANTLR runtime library
308
+ that need to create token objects This module serves as a mixin that provides
309
+ methods for constructing tokens.
310
+
311
+ Including this module provides a +token_class+ attribute. Instance of the
312
+ including class can create tokens using the token class (which defaults to
313
+ ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
314
+ that can be called without any parameters and the token objects are expected to
315
+ have the standard token attributes (see ANTLR3::Token).
316
+
317
+ =end
318
+
319
+ module TokenFactory
320
+ attr_writer :token_class
321
+ def token_class
322
+ @token_class ||= begin
323
+ self.class.token_class rescue
324
+ self::Token rescue
325
+ ANTLR3::CommonToken
326
+ end
327
+ end
328
+
329
+ def create_token(*args)
330
+ if block_given?
331
+ token_class.new(*args) do |*targs|
332
+ yield(*targs)
333
+ end
334
+ else
335
+ token_class.new(*args)
336
+ end
337
+ end
338
+ end
339
+
340
+
341
+ =begin rdoc ANTLR3::TokenScheme
342
+
343
+ TokenSchemes exist to handle the problem of defining token types as integer
344
+ values while maintaining meaningful text names for the types. They are
345
+ dynamically defined modules that map integer values to constants with token-type
346
+ names.
347
+
348
+ ---
349
+
350
+ Fundamentally, tokens exist to take a chunk of text and identify it as belonging
351
+ to some category, like "VARIABLE" or "INTEGER". In code, the category is
352
+ represented by an integer -- some arbitrary value that ANTLR will decide to use
353
+ as it is creating the recognizer. The purpose of using an integer (instead of
354
+ say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
355
+ token's type falls within a range, which is not possible with symbols.
356
+
357
+ The downside of token types being represented as integers is that a developer
358
+ needs to be able to reference the unknown type value by name in action code.
359
+ Furthermore, code that references the type by name and tokens that can be
360
+ inspected with names in place of type values are more meaningful to a developer.
361
+
362
+ Since ANTLR requires token type names to follow capital-letter naming
363
+ conventions, defining types as named constants of the recognizer class resolves
364
+ the problem of referencing type values by name. Thus, a token type like
365
+ ``VARIABLE'' can be represented by a number like 5 and referenced within code by
366
+ +VARIABLE+. However, when a recognizer creates tokens, the name of the token's
367
+ type cannot be seen without using the data defined in the recognizer.
368
+
369
+ Of course, tokens could be defined with a name attribute that could be specified
370
+ when tokens are created. However, doing so would make tokens take up more space
371
+ than necessary, as well as making it difficult to change the type of a token
372
+ while maintaining a correct name value.
373
+
374
+ TokenSchemes exist as a technique to manage token type referencing and name
375
+ extraction. They:
376
+
377
+ 1. keep token type references clear and understandable in recognizer code
378
+ 2. permit access to a token's type-name independently of recognizer objects
379
+ 3. allow multiple classes to share the same token information
380
+
381
+ == Building Token Schemes
382
+
383
+ TokenScheme is a subclass of Module. Thus, it has the method
384
+ <tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
385
+ will evaluate the block in the context of the scheme (module), similarly to
386
+ Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
387
+ module with the following actions:
388
+
389
+ 1. define a customized token class (more on that below)
390
+ 2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
391
+ 3. dynamically populate the new scheme module with a couple instance methods
392
+ 4. include ANTLR3::Constants in the new scheme module
393
+
394
+ As TokenScheme the class functions as a metaclass, figuring out some of the
395
+ scoping behavior can be mildly confusing if you're trying to get a handle of the
396
+ entity for your own purposes. Remember that all of the instance methods of
397
+ TokenScheme function as module-level methods of TokenScheme instances, ala
398
+ +attr_accessor+ and friends.
399
+
400
+ <tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
401
+ definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
402
+ essentially like <tt>Module#const_set</tt>, except it forbids constant
403
+ overwriting (which would mess up recognizer code fairly badly) and adds an
404
+ inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
405
+ <tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
406
+ types with a hash pairing names to values.
407
+
408
+ <tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
409
+ type-to-name definition. This is particularly useful for the anonymous tokens
410
+ that ANTLR generates for literal strings in the grammar specification. For
411
+ example, if you refer to the literal <tt>'='</tt> in some parser rule in your
412
+ grammar, ANTLR will add a lexer rule for the literal and give the token a name
413
+ like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
414
+ Since this is pretty meaningless to a developer, generated code should add a
415
+ special name definition for type value <tt><i>x</i></tt> with the string
416
+ <tt>"'='"</tt>.
417
+
418
+ === Sample TokenScheme Construction
419
+
420
+ TokenData = ANTLR3::TokenScheme.new do
421
+ define_tokens(
422
+ :INT => 4,
423
+ :ID => 6,
424
+ :T__5 => 5,
425
+ :WS => 7
426
+ )
427
+
428
+ # note the self:: scoping below is due to the fact that
429
+ # ruby lexically-scopes constant names instead of
430
+ # looking up in the current scope
431
+ register_name(self::T__5, "'='")
432
+ end
433
+
434
+ TokenData::ID # => 6
435
+ TokenData::T__5 # => 5
436
+ TokenData.token_name(4) # => 'INT'
437
+ TokenData.token_name(5) # => "'='"
438
+
439
+ class ARecognizerOrSuch < ANTLR3::Parser
440
+ include TokenData
441
+ ID # => 6
442
+ end
443
+
444
+ == Custom Token Classes and Relationship with Tokens
445
+
446
+ When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
447
+ and assigned it to the constant name +Token+. This token class will both include
448
+ and extend the scheme module. Since token schemes define the private instance
449
+ method <tt>token_name(type)</tt>, instances of the token class are now able to
450
+ provide their type names. The Token method <tt>name</tt> uses the
451
+ <tt>token_name</tt> method to provide the type name as if it were a simple
452
+ attribute without storing the name itself.
453
+
454
+ When a TokenScheme is included in a recognizer class, the class will now have
455
+ the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
456
+ and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
457
+ Token. Thus, when recognizers need to manufacture tokens, instead of using the
458
+ generic CommonToken class, they can create tokens using the customized Token
459
+ class provided by the token scheme.
460
+
461
+ If you need to use a token class other than CommonToken, you can pass the class
462
+ as a parameter to TokenScheme.new, which will be used in place of the
463
+ dynamically-created CommonToken subclass.
464
+
465
+ =end
466
+
467
+ class TokenScheme < ::Module
468
+ include TokenFactory
469
+
470
+ def self.new(tk_class = nil, &body)
471
+ super() do
472
+ tk_class ||= Class.new(::ANTLR3::CommonToken)
473
+ self.token_class = tk_class
474
+
475
+ const_set(:TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone)
476
+
477
+ scheme = self
478
+ define_method(:token_scheme) { scheme }
479
+ define_method(:token_names) { scheme::TOKEN_NAMES }
480
+ define_method(:token_name) do |type|
481
+ begin
482
+ token_names[type] or super
483
+ rescue NoMethodError
484
+ ::ANTLR3::CommonToken.token_name(type)
485
+ end
486
+ end
487
+ module_function :token_name, :token_names
488
+
489
+ include ANTLR3::Constants
490
+
491
+ body and module_eval(&body)
492
+ end
493
+ end
494
+
495
+ def included(mod)
496
+ super
497
+ mod.extend(self)
498
+ end
499
+ private :included
500
+
501
+ def define_tokens(token_map = {})
502
+ for token_name, token_value in token_map
503
+ define_token(token_name, token_value)
504
+ end
505
+ return self
506
+ end
507
+
508
+ def define_token(name, value)
509
+ if const_defined?(name)
510
+ current_value = const_get(name)
511
+ unless current_value == value
512
+ error = NameError.new("new token type definition ``#{name} = #{value}'' conflicts " <<
513
+ "with existing type definition ``#{name} = #{current_value}''", name)
514
+ raise error
515
+ end
516
+ else
517
+ const_set(name, value)
518
+ end
519
+ register_name(value, name) unless built_in_type?(value)
520
+ return self
521
+ end
522
+
523
+ def register_names(*names)
524
+ if names.length == 1 and Hash === names.first
525
+ names.first.each do |value, name|
526
+ register_name(value, name)
527
+ end
528
+ else
529
+ names.each_with_index do |name, i|
530
+ type_value = Constants::MIN_TOKEN_TYPE + i
531
+ register_name(type_value, name)
532
+ end
533
+ end
534
+ end
535
+
536
+ def register_name(type_value, name)
537
+ name = name.to_s.freeze
538
+ if token_names.has_key?(type_value)
539
+ current_name = token_names[type_value]
540
+ current_name == name and return name
541
+
542
+ if current_name == "T__#{type_value}"
543
+ # only an anonymous name is registered -- upgrade the name to the full literal name
544
+ token_names[type_value] = name
545
+ elsif name == "T__#{type_value}"
546
+ # ignore name downgrade from literal to anonymous constant
547
+ return current_name
548
+ else
549
+ error = NameError.new(
550
+ "attempted assignment of token type #{type_value}" <<
551
+ " to name #{name} conflicts with existing name #{current_name}", name
552
+ )
553
+ raise error
554
+ end
555
+ else
556
+ token_names[type_value] = name.to_s.freeze
557
+ end
558
+ end
559
+
560
+ def built_in_type?(type_value)
561
+ Constants::BUILT_IN_TOKEN_NAMES.fetch(type_value, false) and true
562
+ end
563
+
564
+ def token_defined?(name_or_value)
565
+ case value
566
+ when Integer then token_names.has_key?(name_or_value)
567
+ else const_defined?(name_or_value.to_s)
568
+ end
569
+ end
570
+
571
+ def [](name_or_value)
572
+ case name_or_value
573
+ when Integer then token_names.fetch(name_or_value, nil)
574
+ else const_get(name_or_value.to_s) rescue token_names.index(name_or_value)
575
+ end
576
+ end
577
+
578
+ def token_class
579
+ self::Token
580
+ end
581
+
582
+ def token_class=(klass)
583
+ Class === klass or raise(TypeError, "token_class must be a Class")
584
+ Util.silence_warnings do
585
+ klass < self or klass.send(:include, self)
586
+ const_set(:Token, klass)
587
+ end
588
+ end
589
+
590
+ end
591
+
592
+ end