antlr3 1.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/ANTLR-LICENSE.txt +26 -0
  2. data/History.txt +66 -0
  3. data/README.txt +139 -0
  4. data/bin/antlr4ruby +33 -0
  5. data/java/RubyTarget.java +524 -0
  6. data/java/antlr-full-3.2.1.jar +0 -0
  7. data/lib/antlr3.rb +176 -0
  8. data/lib/antlr3/constants.rb +88 -0
  9. data/lib/antlr3/debug.rb +701 -0
  10. data/lib/antlr3/debug/event-hub.rb +210 -0
  11. data/lib/antlr3/debug/record-event-listener.rb +25 -0
  12. data/lib/antlr3/debug/rule-tracer.rb +55 -0
  13. data/lib/antlr3/debug/socket.rb +360 -0
  14. data/lib/antlr3/debug/trace-event-listener.rb +92 -0
  15. data/lib/antlr3/dfa.rb +247 -0
  16. data/lib/antlr3/dot.rb +174 -0
  17. data/lib/antlr3/error.rb +657 -0
  18. data/lib/antlr3/main.rb +561 -0
  19. data/lib/antlr3/modes/ast-builder.rb +41 -0
  20. data/lib/antlr3/modes/filter.rb +56 -0
  21. data/lib/antlr3/profile.rb +322 -0
  22. data/lib/antlr3/recognizers.rb +1280 -0
  23. data/lib/antlr3/streams.rb +985 -0
  24. data/lib/antlr3/streams/interactive.rb +91 -0
  25. data/lib/antlr3/streams/rewrite.rb +412 -0
  26. data/lib/antlr3/test/call-stack.rb +57 -0
  27. data/lib/antlr3/test/config.rb +23 -0
  28. data/lib/antlr3/test/core-extensions.rb +269 -0
  29. data/lib/antlr3/test/diff.rb +165 -0
  30. data/lib/antlr3/test/functional.rb +207 -0
  31. data/lib/antlr3/test/grammar.rb +371 -0
  32. data/lib/antlr3/token.rb +592 -0
  33. data/lib/antlr3/tree.rb +1415 -0
  34. data/lib/antlr3/tree/debug.rb +163 -0
  35. data/lib/antlr3/tree/visitor.rb +84 -0
  36. data/lib/antlr3/tree/wizard.rb +481 -0
  37. data/lib/antlr3/util.rb +149 -0
  38. data/lib/antlr3/version.rb +27 -0
  39. data/samples/ANTLRv3Grammar.g +621 -0
  40. data/samples/Cpp.g +749 -0
  41. data/templates/AST.stg +335 -0
  42. data/templates/ASTDbg.stg +40 -0
  43. data/templates/ASTParser.stg +153 -0
  44. data/templates/ASTTreeParser.stg +272 -0
  45. data/templates/Dbg.stg +192 -0
  46. data/templates/Ruby.stg +1514 -0
  47. data/test/functional/ast-output/auto-ast.rb +797 -0
  48. data/test/functional/ast-output/construction.rb +555 -0
  49. data/test/functional/ast-output/hetero-nodes.rb +753 -0
  50. data/test/functional/ast-output/rewrites.rb +1327 -0
  51. data/test/functional/ast-output/tree-rewrite.rb +1662 -0
  52. data/test/functional/debugging/debug-mode.rb +689 -0
  53. data/test/functional/debugging/profile-mode.rb +165 -0
  54. data/test/functional/debugging/rule-tracing.rb +74 -0
  55. data/test/functional/delegation/import.rb +379 -0
  56. data/test/functional/lexer/basic.rb +559 -0
  57. data/test/functional/lexer/filter-mode.rb +245 -0
  58. data/test/functional/lexer/nuances.rb +47 -0
  59. data/test/functional/lexer/properties.rb +104 -0
  60. data/test/functional/lexer/syn-pred.rb +32 -0
  61. data/test/functional/lexer/xml.rb +206 -0
  62. data/test/functional/main/main-scripts.rb +245 -0
  63. data/test/functional/parser/actions.rb +224 -0
  64. data/test/functional/parser/backtracking.rb +244 -0
  65. data/test/functional/parser/basic.rb +282 -0
  66. data/test/functional/parser/calc.rb +98 -0
  67. data/test/functional/parser/ll-star.rb +143 -0
  68. data/test/functional/parser/nuances.rb +165 -0
  69. data/test/functional/parser/predicates.rb +103 -0
  70. data/test/functional/parser/properties.rb +242 -0
  71. data/test/functional/parser/rule-methods.rb +132 -0
  72. data/test/functional/parser/scopes.rb +274 -0
  73. data/test/functional/token-rewrite/basic.rb +318 -0
  74. data/test/functional/token-rewrite/via-parser.rb +100 -0
  75. data/test/functional/tree-parser/basic.rb +750 -0
  76. data/test/unit/sample-input/file-stream-1 +2 -0
  77. data/test/unit/sample-input/teststreams.input2 +2 -0
  78. data/test/unit/test-dfa.rb +52 -0
  79. data/test/unit/test-exceptions.rb +44 -0
  80. data/test/unit/test-recognizers.rb +55 -0
  81. data/test/unit/test-scheme.rb +62 -0
  82. data/test/unit/test-streams.rb +459 -0
  83. data/test/unit/test-tree-wizard.rb +535 -0
  84. data/test/unit/test-trees.rb +854 -0
  85. metadata +205 -0
@@ -0,0 +1,985 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ =begin LICENSE
5
+
6
+ [The "BSD licence"]
7
+ Copyright (c) 2009 Kyle Yetter
8
+ All rights reserved.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions
12
+ are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright
15
+ notice, this list of conditions and the following disclaimer.
16
+ 2. Redistributions in binary form must reproduce the above copyright
17
+ notice, this list of conditions and the following disclaimer in the
18
+ documentation and/or other materials provided with the distribution.
19
+ 3. The name of the author may not be used to endorse or promote products
20
+ derived from this software without specific prior written permission.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ =end
34
+
35
+ module ANTLR3
36
+
37
+
38
+ =begin rdoc ANTLR3::Stream
39
+
40
+ = ANTLR3 Streams
41
+
42
+ This documentation first covers the general concept of streams as used by ANTLR
43
+ recognizers, and then discusses the specific <tt>ANTLR3::Stream</tt> module.
44
+
45
+ == ANTLR Stream Classes
46
+
47
+ ANTLR recognizers need a way to walk through input data in a serialized IO-style
48
+ fashion. They also need some book-keeping about the input to provide useful
49
+ information to developers, such as current line number and column. Furthermore,
50
+ to implement backtracking and various error recovery techniques, recognizers
51
+ need a way to record various locations in the input at a number of points in the
52
+ recognition process so the input state may be restored back to a prior state.
53
+
54
+ ANTLR bundles all of this functionality into a number of Stream classes, each
55
+ designed to be used by recognizers for a specific recognition task. Most of the
56
+ Stream hierarchy is implemented in antlr3/stream.rb, which is loaded by default
57
+ when 'antlr3' is required.
58
+
59
+ ---
60
+
61
+ Here's a brief overview of the various stream classes and their respective
62
+ purpose:
63
+
64
+ StringStream::
65
+ Similar to StringIO from the standard Ruby library, StringStream wraps raw
66
+ String data in a Stream interface for use by ANTLR lexers.
67
+ FileStream::
68
+ A subclass of StringStream, FileStream simply wraps data read from an IO or
69
+ File object for use by lexers.
70
+ CommonTokenStream::
71
+ The job of a TokenStream is to read lexer output and then provide ANTLR
72
+ parsers with the means to sequential walk through series of tokens.
73
+ CommonTokenStream is the default TokenStream implementation.
74
+ TokenRewriteStream::
75
+ A subclass of CommonTokenStream, TokenRewriteStreams provide rewriting-parsers
76
+ the ability to produce new output text from an input token-sequence by
77
+ managing rewrite "programs" on top of the stream.
78
+ CommonTreeNodeStream::
79
+ In a similar fashion to CommonTokenStream, CommonTreeNodeStream feeds tokens
80
+ to recognizers in a sequential fashion. However, the stream object serializes
81
+ an Abstract Syntax Tree into a flat, one-dimensional sequence, but preserves
82
+ the two-dimensional shape of the tree using special UP and DOWN tokens. The
83
+ sequence is primarily used by ANTLR Tree Parsers. *note* -- this is not
84
+ defined in antlr3/stream.rb, but antlr3/tree.rb
85
+
86
+ ---
87
+
88
+ The next few sections cover the most significant methods of all stream classes.
89
+
90
+ === consume / look / peek
91
+
92
+ <tt>stream.consume</tt> is used to advance a stream one unit. StringStreams are
93
+ advanced by one character and TokenStreams are advanced by one token.
94
+
95
+ <tt>stream.peek(k = 1)</tt> is used to quickly retrieve the object of interest
96
+ to a recognizer at look-ahead position specified by <tt>k</tt>. For
97
+ <b>StringStreams</b>, this is the <i>integer value of the character</i>
98
+ <tt>k</tt> characters ahead of the stream cursor. For <b>TokenStreams</b>, this
99
+ is the <i>integer token type of the token</i> <tt>k</tt> tokens ahead of the
100
+ stream cursor.
101
+
102
+ <tt>stream.look(k = 1)</tt> is used to retrieve the full object of interest at
103
+ look-ahead position specified by <tt>k</tt>. While <tt>peek</tt> provides the
104
+ <i>bare-minimum lightweight information</i> that the recognizer needs,
105
+ <tt>look</tt> provides the <i>full object of concern</i> in the stream. For
106
+ <b>StringStreams</b>, this is a <i>string object containing the single
107
+ character</i> <tt>k</tt> characters ahead of the stream cursor. For
108
+ <b>TokenStreams</b>, this is the <i>full token structure</i> <tt>k</tt> tokens
109
+ ahead of the stream cursor.
110
+
111
+ <b>Note:</b> in most ANTLR runtime APIs for other languages, <tt>peek</tt> is
112
+ implemented by some method with a name like <tt>LA(k)</tt> and <tt>look</tt> is
113
+ implemented by some method with a name like <tt>LT(k)</tt>. When writing this
114
+ Ruby runtime API, I found this naming practice both confusing, ambiguous, and
115
+ un-Ruby-like. Thus, I chose <tt>peek</tt> and <tt>look</tt> to represent a
116
+ quick-look (peek) and a full-fledged look-ahead operation (look). If this causes
117
+ confusion or any sort of compatibility strife for developers using this
118
+ implementation, all apologies.
119
+
120
+ === mark / rewind / release
121
+
122
+ <tt>marker = stream.mark</tt> causes the stream to record important information
123
+ about the current stream state, place the data in an internal memory table, and
124
+ return a memento, <tt>marker</tt>. The marker object is typically an integer key
125
+ to the stream's internal memory table.
126
+
127
+ Used in tandem with, <tt>stream.rewind(mark = last_marker)</tt>, the marker can
128
+ be used to restore the stream to an earlier state. This is used by recognizers
129
+ to perform tasks such as backtracking and error recovery.
130
+
131
+ <tt>stream.release(marker = last_marker)</tt> can be used to release an existing
132
+ state marker from the memory table.
133
+
134
+ === seek
135
+
136
+ <tt>stream.seek(position)</tt> moves the stream cursor to an absolute position
137
+ within the stream, basically like typical ruby <tt>IO#seek</tt> style methods.
138
+ However, unlike <tt>IO#seek</tt>, ANTLR streams currently always use absolute
139
+ position seeking.
140
+
141
+ == The Stream Module
142
+
143
+ <tt>ANTLR3::Stream</tt> is an abstract-ish base mixin for all IO-like stream
144
+ classes used by ANTLR recognizers.
145
+
146
+ The module doesn't do much on its own besides define arguably annoying
147
+ ``abstract'' pseudo-methods that demand implementation when it is mixed in to a
148
+ class that wants to be a Stream. Right now this exists as an artifact of porting
149
+ the ANTLR Java/Python runtime library to Ruby. In Java, of course, this is
150
+ represented as an interface. In Ruby, however, objects are duck-typed and
151
+ interfaces aren't that useful as programmatic entities -- in fact, it's mildly
152
+ wasteful to have a module like this hanging out. Thus, I may axe it.
153
+
154
+ When mixed in, it does give the class a #size and #source_name attribute
155
+ methods.
156
+
157
+ Except in a small handful of places, most of the ANTLR runtime library uses
158
+ duck-typing and not type checking on objects. This means that the methods which
159
+ manipulate stream objects don't usually bother checking that the object is a
160
+ Stream and assume that the object implements the proper stream interface. Thus,
161
+ it is not strictly necessary that custom stream objects include ANTLR3::Stream,
162
+ though it isn't a bad idea.
163
+
164
+ =end
165
+
166
+ module Stream
167
+ include ANTLR3::Constants
168
+ extend ClassMacros
169
+
170
+ ##
171
+ # :method: consume
172
+ # used to advance a stream one unit (such as character or token)
173
+ abstract :consume
174
+
175
+ ##
176
+ # :method: peek(k=1)
177
+ # used to quickly retreive the object of interest to a recognizer at lookahead
178
+ # position specified by <tt>k</tt> (such as integer value of a character or an
179
+ # integer token type)
180
+ abstract :peek
181
+
182
+ ##
183
+ # :method: look(k=1)
184
+ # used to retreive the full object of interest at lookahead position specified
185
+ # by <tt>k</tt> (such as a character string or a token structure)
186
+ abstract :look
187
+
188
+ ##
189
+ # :method: mark
190
+ # TODO: document
191
+ abstract :mark
192
+
193
+ ##
194
+ # :method: index
195
+ # TODO: document
196
+ abstract :index
197
+
198
+ ##
199
+ # :method: rewind(marker=last_marker)
200
+ # TODO: document
201
+ abstract :rewind
202
+
203
+ ##
204
+ # :method: release(marker = last_marker)
205
+ # TODO: document
206
+ abstract :release
207
+
208
+ ##
209
+ # :method: seek(position)
210
+ # TODO: document
211
+ abstract :seek
212
+
213
+ # TODO: document
214
+ attr_reader :size
215
+ # TODO: document
216
+ attr_accessor :source_name
217
+ end
218
+
219
+ =begin rdoc ANTLR3::CharacterStream
220
+
221
+ CharacterStream further extends the abstract-ish base mixin Stream to add
222
+ methods specific to navigating character-based input data. Thus, it serves as an
223
+ immitation of the Java interface for text-based streams, which are primarily
224
+ used by lexers.
225
+
226
+ It adds the ``abstract'' method, <tt>substring(start, stop)</tt>, which must be
227
+ implemented to return a slice of the input string from position <tt>start</tt>
228
+ to position <tt>stop</tt>. It also adds attribute accessor methods <tt>line</tt>
229
+ and <tt>column</tt>, which are expected to indicate the current line number and
230
+ position within the current line, respectively.
231
+
232
+ == A Word About <tt>line</tt> and <tt>column</tt> attributes
233
+
234
+ Presumably, the concept of <tt>line</tt> and <tt>column</tt> attirbutes of text
235
+ are familliar to most developers. Line numbers of text are indexed from number 1
236
+ up (not 0). Column numbers are indexed from 0 up. Thus, examining sample text:
237
+
238
+ Hey this is the first line.
239
+ Oh, and this is the second line.
240
+
241
+ Line 1 is the string "Hey this is the first line\\n". If a character stream is at
242
+ line 2, character 0, the stream cursor is sitting between the characters "\\n"
243
+ and "O".
244
+
245
+ *Note:* most ANTLR runtime APIs for other languages refer to <tt>column</tt>
246
+ with the more-precise, but lengthy name <tt>charPositionInLine</tt>. I prefered
247
+ to keep it simple and familliar in this Ruby runtime API.
248
+
249
+ =end
250
+
251
+ module CharacterStream
252
+ include Stream
253
+ extend ClassMacros
254
+ EOF = -1
255
+
256
+ ##
257
+ # :method: substring(start,stop)
258
+ abstract :substring
259
+
260
+ attr_accessor :line
261
+ attr_accessor :column
262
+ end
263
+
264
+
265
+ =begin rdoc ANTLR3::TokenStream
266
+
267
+ TokenStream further extends the abstract-ish base mixin Stream to add methods
268
+ specific to navigating token sequences. Thus, it serves as an imitation of the
269
+ Java interface for token-based streams, which are used by many different
270
+ components in ANTLR, including parsers and tree parsers.
271
+
272
+ == Token Streams
273
+
274
+ Token streams wrap a sequence of token objects produced by some token source,
275
+ usually a lexer. They provide the operations required by higher-level
276
+ recognizers, such as parsers and tree parsers for navigating through the
277
+ sequence of tokens. Unlike simple character-based streams, such as StringStream,
278
+ token-based streams have an additional level of complexity because they must
279
+ manage the task of "tuning" to a specific token channel.
280
+
281
+ One of the main advantages of ANTLR-based recognition is the token
282
+ <i>channel</i> feature, which allows you to hold on to all tokens of interest
283
+ while only presenting a specific set of interesting tokens to a parser. For
284
+ example, if you need to hide whitespace and comments from a parser, but hang on
285
+ to them for some other purpose, you have the lexer assign the comments and
286
+ whitespace to channel value HIDDEN as it creates the tokens.
287
+
288
+ When you create a token stream, you can tune it to some specific channel value.
289
+ Then, all <tt>peek</tt>, <tt>look</tt>, and <tt>consume</tt> operations only
290
+ yield tokens that have the same value for <tt>channel</tt>. The stream skips
291
+ over any non-matching tokens in between.
292
+
293
+ == The TokenStream Interface
294
+
295
+ In addition to the abstract methods and attribute methods provided by the base
296
+ Stream module, TokenStream adds a number of additional method implementation
297
+ requirements and attributes.
298
+
299
+ =end
300
+
301
+ module TokenStream
302
+ include Stream
303
+ extend ClassMacros
304
+
305
+ ##
306
+ # expected to return the token source object (such as a lexer) from which
307
+ # all tokens in the stream were retreived
308
+ attr_reader :token_source
309
+
310
+ ##
311
+ # expected to return the value of the last marker produced by a call to
312
+ # <tt>stream.mark</tt>
313
+ attr_reader :last_marker
314
+
315
+ ##
316
+ # expected to return the integer index of the stream cursor
317
+ attr_reader :position
318
+
319
+ ##
320
+ # the integer channel value to which the stream is ``tuned''
321
+ attr_accessor :channel
322
+
323
+ ##
324
+ # :method: to_s(start=0,stop=tokens.length-1)
325
+ # should take the tokens between start and stop in the sequence, extract their text
326
+ # and return the concatenation of all the text chunks
327
+ abstract :to_s
328
+
329
+ ##
330
+ # :method: at
331
+ # TODO: document
332
+ abstract :at
333
+ end
334
+
335
+ =begin rdoc ANTLR3::StringStream
336
+
337
+ A StringStream's purpose is to wrap the basic, naked text input of a recognition
338
+ system. Like all other stream types, it provides serial navigation of the input;
339
+ a recognizer can arbitrarily step forward and backward through the stream's
340
+ symbols as it requires. StringStream and its subclasses are they main way to
341
+ feed text input into an ANTLR Lexer for token processing.
342
+
343
+ The stream's symbols of interest, of course, are character values. Thus, the
344
+ #peek method returns the integer character value at look-ahead position
345
+ <tt>k</tt> and the #look method returns the character value as a +String+. They
346
+ also track various pieces of information such as the line and column numbers at
347
+ the current position.
348
+
349
+ === Note About Text Encoding
350
+
351
+ This version of the runtime library primarily targets ruby version 1.8, which
352
+ does not have strong built-in support for multi-byte character encodings. Thus,
353
+ characters are assumed to be represented by a single byte -- an integer between
354
+ 0 and 255. Ruby 1.9 does provide built-in encoding support for multi-byte
355
+ characters, but currently this library does not provide any streams to handle
356
+ non-ASCII encoding. However, encoding-savvy recognition code is a future
357
+ development goal for this project.
358
+
359
+ =end
360
+
361
+ class StringStream
362
+ include CharacterStream
363
+
364
+ # current integer character index of the stream
365
+ attr_reader :position
366
+
367
+ # the current line number of the input, indexed upward from 1
368
+ attr_reader :line
369
+
370
+ # the current character position within the current line, indexed upward from 0
371
+ attr_reader :column
372
+
373
+ # the name associated with the stream -- usually a file name
374
+ # defaults to <tt>"(string)"</tt>
375
+ attr_accessor :name
376
+
377
+ # the entire string that is wrapped by the stream
378
+ attr_reader :data
379
+
380
+ # creates a new StringStream object where +data+ is the string data to stream.
381
+ # accepts the following options in a symbol-to-value hash:
382
+ #
383
+ # [:file or :name] the (file) name to associate with the stream; default: <tt>'(string)'</tt>
384
+ # [:line] the initial line number; default: +1+
385
+ # [:column] the initial column number; default: +0+
386
+ #
387
+ def initialize(data, options = {})
388
+ @data = data.to_s
389
+ @data.equal?(data) and @data = @data.clone
390
+ @data.freeze
391
+ @position = 0
392
+ @line = options.fetch :line, 1
393
+ @column = options.fetch :column, 0
394
+ @markers = []
395
+ mark
396
+ @name ||= options[:file] || options[:name] # || '(string)'
397
+ end
398
+
399
+ def size
400
+ @data.length
401
+ end
402
+
403
+ alias length size
404
+
405
+ #
406
+ # rewinds the stream back to the start and clears out any existing marker entries
407
+ #
408
+ def reset
409
+ @position = 0
410
+ @line = 1
411
+ @column = 0
412
+ @markers.clear
413
+ return self
414
+ end
415
+
416
+ #
417
+ # advance the stream by one character; returns the character consumed
418
+ #
419
+ def consume
420
+ c = @data[@position] || EOF
421
+ if @position < @data.length
422
+ @column += 1
423
+ if c == ?\n
424
+ @line += 1
425
+ @column = 0
426
+ end
427
+ @position += 1
428
+ end
429
+ return(c)
430
+ end
431
+
432
+ #
433
+ # return the character at look-ahead distance +k+ as an integer. <tt>k = 1</tt> represents
434
+ # the current character. +k+ greater than 1 represents upcoming characters. A negative
435
+ # value of +k+ returns previous characters consumed, where <tt>k = -1</tt> is the last
436
+ # character consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
437
+ #
438
+ def peek(k = 1)
439
+ k == 0 and return nil
440
+ k += 1 if k < 0
441
+ index = @position + k - 1
442
+ index < 0 and return nil
443
+ @data[index] or EOF
444
+ end
445
+
446
+ #
447
+ # identical to #peek, except it returns the character value as a String
448
+ #
449
+ def look(k = 1)
450
+ k == 0 and return nil
451
+ k += 1 if k < 0
452
+
453
+ index = @position + k - 1
454
+ index < 0 and return nil
455
+
456
+ c = @data[index] and c.chr
457
+ end
458
+
459
+ #
460
+ # return a substring around the stream cursor at a distance +k+
461
+ # if <tt>k >= 0</tt>, return the next k characters
462
+ # if <tt>k < 0</tt>, return the previous <tt>|k|</tt> characters
463
+ #
464
+ def through(k)
465
+ if k >= 0 then @data[ @position, k ] else
466
+ start = (@position + k).at_least( 0 ) # start cannot be negative or index will wrap around
467
+ @data[ start ... @position ]
468
+ end
469
+ end
470
+
471
+ # operator style look-ahead
472
+ alias >> look
473
+
474
+ # operator style look-behind
475
+ def <<(k)
476
+ self << -k
477
+ end
478
+
479
+ alias index position
480
+ alias character_index position
481
+
482
+ alias source_name name
483
+
484
+ #
485
+ # Returns true if the stream appears to be at the beginning of a new line.
486
+ # This is an extra utility method for use inside lexer actions if needed.
487
+ #
488
+ def beginning_of_line?
489
+ @position.zero? or @data[@position - 1] == ?\n
490
+ end
491
+
492
+ #
493
+ # Returns true if the stream appears to be at the end of a new line.
494
+ # This is an extra utility method for use inside lexer actions if needed.
495
+ #
496
+ def end_of_line?
497
+ @data[@position] == ?\n if @position >= @data.length
498
+ end
499
+
500
+ #
501
+ # Returns true if the stream has been exhausted.
502
+ # This is an extra utility method for use inside lexer actions if needed.
503
+ #
504
+ def end_of_string?
505
+ @position >= @data.length
506
+ end
507
+
508
+ #
509
+ # Returns true if the stream appears to be at the beginning of a stream (position = 0).
510
+ # This is an extra utility method for use inside lexer actions if needed.
511
+ #
512
+ def beginning_of_string?
513
+ @position == 0
514
+ end
515
+
516
+ alias eof? end_of_string?
517
+ alias bof? beginning_of_string?
518
+
519
+ #
520
+ # record the current stream location parameters in the stream's marker table and
521
+ # return an integer-valued bookmark that may be used to restore the stream's
522
+ # position with the #rewind method. This method is used to implement backtracking.
523
+ #
524
+ def mark
525
+ state = [@position, @line, @column].freeze
526
+ @markers << state
527
+ return @markers.length - 1
528
+ end
529
+
530
+ #
531
+ # restore the stream to an earlier location recorded by #mark. If no marker value is
532
+ # provided, the last marker generated by #mark will be used.
533
+ #
534
+ def rewind(marker = @markers.length - 1, release = true)
535
+ (marker >= 0 and location = @markers[marker]) or return(self)
536
+ @position, @line, @column = location
537
+ release(marker) if release
538
+ return self
539
+ end
540
+
541
+ #
542
+ # the total number of markers currently in existence
543
+ #
544
+ def mark_depth
545
+ @markers.length
546
+ end
547
+
548
+ #
549
+ # the last marker value created by a call to #mark
550
+ #
551
+ def last_marker
552
+ @markers.length - 1
553
+ end
554
+
555
+ #
556
+ # let go of the bookmark data for the marker and all marker
557
+ # values created after the marker.
558
+ #
559
+ def release(marker = @markers.length - 1)
560
+ marker.between?(1, @markers.length - 1) or return
561
+ @markers[marker, @markers.length - marker ] = nil
562
+ return self
563
+ end
564
+
565
+ #
566
+ # jump to the absolute position value given by +index+.
567
+ # note: if +index+ is before the current position, the +line+ and +column+
568
+ # attributes of the stream will probably be incorrect
569
+ #
570
+ def seek(index)
571
+ index = index.bound( 0, @data.length ) # ensures index is within the stream's range
572
+ if index > @position
573
+ skipped = through( index - @position )
574
+ if lc = skipped.count("\n") and lc.zero?
575
+ @column += skipped.length
576
+ else
577
+ @line += lc
578
+ @column = skipped.length - skipped.rindex("\n") - 1
579
+ end
580
+ end
581
+ @position = index
582
+ return nil
583
+ end
584
+
585
+ #
586
+ # customized object inspection that shows:
587
+ # * the stream class
588
+ # * the stream's location in <tt>index / line:column</tt> format
589
+ # * +before_chars+ characters before the cursor (6 characters by default)
590
+ # * +after_chars+ characters after the cursor (10 characters by default)
591
+ #
592
+ def inspect(before_chars = 6, after_chars = 10)
593
+ before = through( -before_chars ).inspect
594
+ @position - before_chars > 0 and before.insert(0, '... ')
595
+
596
+ after = through( after_chars ).inspect
597
+ @position + after_chars + 1 < @data.length and after << ' ...'
598
+
599
+ location = "#@position / line #@line:#@column"
600
+ "#<#{self.class}: #{before} | #{after} @ #{location}>"
601
+ end
602
+
603
+ #
604
+ # return the string slice between position +start+ and +stop+
605
+ #
606
+ def substring(start, stop)
607
+ @data[start, stop - start + 1]
608
+ end
609
+
610
+ #
611
+ # identical to String#[]
612
+ #
613
+ def [](start, *args)
614
+ @data[start, *args]
615
+ end
616
+ end
617
+
618
+
619
+ =begin rdoc ANTLR3::FileStream
620
+
621
+ FileStream is a character stream that uses data stored in some external file. It
622
+ is nearly identical to StringStream and functions as use data located in a file
623
+ while automatically setting up the +source_name+ and +line+ parameters. It does
624
+ not actually use any buffered IO operations throughout the stream navigation
625
+ process. Instead, it reads the file data once when the stream is initialized.
626
+
627
+ =end
628
+
629
+ class FileStream < StringStream
630
+
631
+ #
632
+ # creates a new FileStream object using the given +file+ object.
633
+ # If +file+ is a path string, the file will be read and the contents
634
+ # will be used and the +name+ attribute will be set to the path.
635
+ # If +file+ is an IO-like object (that responds to :read),
636
+ # the content of the object will be used and the stream will
637
+ # attempt to set its +name+ object first trying the method #name
638
+ # on the object, then trying the method #path on the object.
639
+ #
640
+ # see StringStream.new for a list of additional options
641
+ # the constructer accepts
642
+ #
643
+ def initialize(file, options = {})
644
+ case file
645
+ when $stdin then
646
+ data = $stdin.read
647
+ @name = '(stdin)'
648
+ when ::File then
649
+ file = file.clone
650
+ file.reopen(file.path, 'r')
651
+ @name = file.path
652
+ data = file.read
653
+ file.close
654
+ else
655
+ if file.respond_to?(:read)
656
+ data = file.read
657
+ if file.respond_to?(:name) then @name = file.name
658
+ elsif file.respond_to?(:path) then @name = file.path
659
+ end
660
+ else
661
+ @name = file.to_s
662
+ if test(?f, @name) then data = File.read(@name)
663
+ else raise ArgumentError, "could not find an existing file at %p" % @name
664
+ end
665
+ end
666
+ end
667
+ super(data, options)
668
+ end
669
+
670
+ end
671
+
672
+ =begin rdoc ANTLR3::CommonTokenStream
673
+
674
+ CommonTokenStream serves as the primary token stream implementation for feeding
675
+ sequential token input into parsers.
676
+
677
+ Using some TokenSource (such as a lexer), the stream collects a token sequence,
678
+ setting the token's <tt>index</tt> attribute to indicate the token's position
679
+ within the stream. The streams may be tuned to some channel value; off-channel
680
+ tokens will be filtered out by the #peek, #look, and #consume methods.
681
+
682
+ === Sample Usage
683
+
684
+
685
+ source_input = ANTLR3::StringStream.new("35 * 4 - 1")
686
+ lexer = Calculator::Lexer.new(source_input)
687
+ tokens = ANTLR3::CommonTokenStream.new(lexer)
688
+
689
+ # assume this grammar defines whitespace as tokens on channel HIDDEN
690
+ # and numbers and operations as tokens on channel DEFAULT
691
+ tokens.look # => 0 INT['35'] @ line 1 col 0 (0..1)
692
+ tokens.look(2) # => 2 MULT["*"] @ line 1 col 2 (3..3)
693
+ tokens.tokens(0, 2)
694
+ # => [0 INT["35"] @line 1 col 0 (0..1),
695
+ # 1 WS[" "] @line 1 col 2 (1..1),
696
+ # 2 MULT["*"] @ line 1 col 3 (3..3)]
697
+ # notice the #tokens method does not filter off-channel tokens
698
+
699
+ lexer.reset
700
+ hidden_tokens =
701
+ ANTLR3::CommonTokenStream.new(lexer, :channel => ANTLR3::HIDDEN)
702
+ hidden_tokens.look # => 1 WS[' '] @ line 1 col 2 (1..1)
703
+
704
+ =end
705
+
706
+ class CommonTokenStream
707
+ include TokenStream
708
+ include Enumerable
709
+
710
+ #
711
+ # constructs a new token stream using the +token_source+ provided. +token_source+ is
712
+ # usually a lexer, but can be any object that implements +next_token+ and includes
713
+ # ANTLR3::TokenSource.
714
+ #
715
+ # If a block is provided, each token harvested will be yielded and if the block
716
+ # returns a +nil+ or +false+ value, the token will not be added to the stream --
717
+ # it will be discarded.
718
+ #
719
+ # === Options
720
+ # [:channel] The channel value the stream should be tuned to initially
721
+ # [:source_name] The source name (file name) attribute of the stream
722
+ #
723
+ # === Example
724
+ #
725
+ # # create a new token stream that is tuned to channel :comment, and
726
+ # # discard all WHITE_SPACE tokens
727
+ # ANTLR3::CommonTokenStream.new(lexer, :channel => :comment) do |token|
728
+ # token.name != 'WHITE_SPACE'
729
+ # end
730
+ #
731
+ def initialize(token_source, options = {})
732
+ @token_source = token_source
733
+ @last_marker = nil
734
+ @channel = options.fetch(:channel, DEFAULT_CHANNEL)
735
+
736
+ @tokens =
737
+ block_given? ? @token_source.select { |token| yield(token, self) } :
738
+ @token_source.to_a
739
+ @tokens.each_with_index { |t, i| t.index = i }
740
+ @position =
741
+ if first_token = @tokens.find { |t| t.channel == @channel }
742
+ @tokens.index(first_token)
743
+ else @tokens.length
744
+ end
745
+ @source_name = options.fetch(:source_name) { @token_source.source_name rescue nil }
746
+ end
747
+
748
+ #
749
+ # resets the token stream and rebuilds it with a potentially new token source.
750
+ # If no +token_source+ value is provided, the stream will attempt to reset the
751
+ # current +token_source+ by calling +reset+ on the object. The stream will
752
+ # then clear the token buffer and attempt to harvest new tokens. Identical in
753
+ # behavior to CommonTokenStream.new, if a block is provided, tokens will be
754
+ # yielded and discarded if the block returns a +false+ or +nil+ value.
755
+ #
756
+ def rebuild(token_source = nil)
757
+ if token_source.nil?
758
+ @token_source.reset rescue nil
759
+ else @token_source = token_source
760
+ end
761
+ @tokens = block_given? ? @token_source.select { |token| yield(token) } :
762
+ @token_source.to_a
763
+ @tokens.each_with_index { |t, i| t.index = i }
764
+ @last_marker = nil
765
+ @position =
766
+ if first_token = @tokens.find { |t| t.channel == @channel }
767
+ @tokens.index(first_token)
768
+ else @tokens.length
769
+ end
770
+ return self
771
+ end
772
+
773
+ #
774
+ # tune the stream to a new channel value
775
+ #
776
+ def tune_to(channel)
777
+ @channel = channel
778
+ end
779
+
780
+ def token_class
781
+ @token_source.token_class
782
+ rescue NoMethodError
783
+ @position == -1 and fill_buffer
784
+ @tokens.empty? ? CommonToken : @tokens.first.class
785
+ end
786
+
787
+ alias index position
788
+
789
+ def size
790
+ @tokens.length
791
+ end
792
+
793
+ alias length size
794
+
795
+ ###### State-Control ################################################
796
+
797
+ #
798
+ # rewind the stream to its initial state
799
+ #
800
+ def reset
801
+ @position = 0
802
+ @position += 1 while token = @tokens[@position] and
803
+ token.channel != @channel
804
+ @last_marker = nil
805
+ return self
806
+ end
807
+
808
+ #
809
+ # bookmark the current position of the input stream
810
+ #
811
+ def mark
812
+ @last_marker = @position
813
+ end
814
+
815
+ def release(marker = nil)
816
+ # do nothing
817
+ end
818
+
819
+
820
+ def rewind(marker = @last_marker, release = true)
821
+ seek(marker)
822
+ end
823
+
824
+
825
+ ###### Stream Navigation ###########################################
826
+
827
+ #
828
+ # advance the stream one step to the next on-channel token
829
+ #
830
+ def consume
831
+ token = @tokens[@position] || EOF_TOKEN
832
+ if @position < @tokens.length
833
+ @position = future?(2) || @tokens.length
834
+ end
835
+ return(token)
836
+ end
837
+
838
+ #
839
+ # jump to the stream position specified by +index+
840
+ # note: seek does not check whether or not the
841
+ # token at the specified position is on-channel,
842
+ #
843
+ def seek(index)
844
+ @position = index.to_i.bound(0, @tokens.length)
845
+ return self
846
+ end
847
+
848
+ #
849
+ # return the type of the on-channel token at look-ahead distance +k+. <tt>k = 1</tt> represents
850
+ # the current token. +k+ greater than 1 represents upcoming on-channel tokens. A negative
851
+ # value of +k+ returns previous on-channel tokens consumed, where <tt>k = -1</tt> is the last
852
+ # on-channel token consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
853
+ #
854
+ def peek(k = 1)
855
+ tk = look(k) and return(tk.type)
856
+ end
857
+
858
+ #
859
+ # operates simillarly to #peek, but returns the full token object at look-ahead position +k+
860
+ #
861
+ def look(k = 1)
862
+ index = future?(k) or return nil
863
+ @tokens.fetch(index, EOF_TOKEN)
864
+ end
865
+
866
+ alias >> look
867
+ def << k
868
+ self >> -k
869
+ end
870
+
871
+ #
872
+ # returns the index of the on-channel token at look-ahead position +k+ or nil if no other
873
+ # on-channel tokens exist
874
+ #
875
+ def future?(k = 1)
876
+ @position == -1 and fill_buffer
877
+
878
+ case
879
+ when k == 0 then nil
880
+ when k < 0 then past?(-k)
881
+ when k == 1 then @position
882
+ else
883
+ # since the stream only yields on-channel
884
+ # tokens, the stream can't just go to the
885
+ # next position, but rather must skip
886
+ # over off-channel tokens
887
+ (k - 1).times.inject(@position) do |cursor, |
888
+ begin
889
+ tk = @tokens.at(cursor += 1) or return(cursor)
890
+ # ^- if tk is nil (i.e. i is outside array limits)
891
+ end until tk.channel == @channel
892
+ cursor
893
+ end
894
+ end
895
+ end
896
+
897
+ #
898
+ # returns the index of the on-channel token at look-behind position +k+ or nil if no other
899
+ # on-channel tokens exist before the current token
900
+ #
901
+ def past?(k = 1)
902
+ @position == -1 and fill_buffer
903
+
904
+ case
905
+ when k == 0 then nil
906
+ when @position - k < 0 then nil
907
+ else
908
+
909
+ k.times.inject(@position) do |cursor, |
910
+ begin
911
+ cursor <= 0 and return(nil)
912
+ tk = @tokens.at(cursor -= 1) or return(nil)
913
+ end until tk.channel == @channel
914
+ cursor
915
+ end
916
+
917
+ end
918
+ end
919
+
920
+ #
921
+ # yields each token in the stream (including off-channel tokens)
922
+ # If no block is provided, the method returns an Enumerator object.
923
+ # #each accepts the same arguments as #tokens
924
+ #
925
+ def each(*args)
926
+ block_given? or return enum_for(:each, *args)
927
+ tokens(*args).each { |token| yield(token) }
928
+ end
929
+
930
+ #
931
+ # returns a copy of the token buffer. If +start+ and +stop+ are provided, tokens
932
+ # returns a slice of the token buffer from <tt>start..stop</tt>. The parameters
933
+ # are converted to integers with their <tt>to_i</tt> methods, and thus tokens
934
+ # can be provided to specify start and stop. If a block is provided, tokens are
935
+ # yielded and filtered out of the return array if the block returns a +false+
936
+ # or +nil+ value.
937
+ #
938
+ def tokens(start = nil, stop = nil)
939
+ stop.nil? || stop >= @tokens.length and stop = @tokens.length - 1
940
+ start.nil? || stop < 0 and start = 0
941
+ tokens = @tokens[start..stop]
942
+
943
+ if block_given?
944
+ tokens.delete_if { |t| not yield(t) }
945
+ end
946
+
947
+ return( tokens )
948
+ end
949
+
950
+
951
+ def at(i)
952
+ @tokens.at i
953
+ end
954
+
955
+ #
956
+ # identical to Array#[], as applied to the stream's token buffer
957
+ #
958
+ def [](i, *args)
959
+ @tokens[i, *args]
960
+ end
961
+
962
+ ###### Standard Conversion Methods ###############################
963
+ def inspect
964
+ string = "#<%p: @token_source=%p @ %p/%p" %
965
+ [self.class, @token_source.class, @position, @tokens.length]
966
+ tk = look(-1) and string << " #{tk.inspect} <--"
967
+ tk = look( 1) and string << " --> #{tk.inspect}"
968
+ string << '>'
969
+ end
970
+
971
+ #
972
+ # fetches the text content of all tokens between +start+ and +stop+ and
973
+ # joins the chunks into a single string
974
+ #
975
+ def extract_text(start = 0, stop = @tokens.length - 1)
976
+ start = start.to_i.at_least(0)
977
+ stop = stop.to_i.at_most(@tokens.length)
978
+ @tokens[start..stop].map! { |t| t.text }.join('')
979
+ end
980
+
981
+ alias to_s extract_text
982
+
983
+ end
984
+
985
+ end