antlr3 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/ANTLR-LICENSE.txt +26 -0
  2. data/History.txt +66 -0
  3. data/README.txt +139 -0
  4. data/bin/antlr4ruby +33 -0
  5. data/java/RubyTarget.java +524 -0
  6. data/java/antlr-full-3.2.1.jar +0 -0
  7. data/lib/antlr3.rb +176 -0
  8. data/lib/antlr3/constants.rb +88 -0
  9. data/lib/antlr3/debug.rb +701 -0
  10. data/lib/antlr3/debug/event-hub.rb +210 -0
  11. data/lib/antlr3/debug/record-event-listener.rb +25 -0
  12. data/lib/antlr3/debug/rule-tracer.rb +55 -0
  13. data/lib/antlr3/debug/socket.rb +360 -0
  14. data/lib/antlr3/debug/trace-event-listener.rb +92 -0
  15. data/lib/antlr3/dfa.rb +247 -0
  16. data/lib/antlr3/dot.rb +174 -0
  17. data/lib/antlr3/error.rb +657 -0
  18. data/lib/antlr3/main.rb +561 -0
  19. data/lib/antlr3/modes/ast-builder.rb +41 -0
  20. data/lib/antlr3/modes/filter.rb +56 -0
  21. data/lib/antlr3/profile.rb +322 -0
  22. data/lib/antlr3/recognizers.rb +1280 -0
  23. data/lib/antlr3/streams.rb +985 -0
  24. data/lib/antlr3/streams/interactive.rb +91 -0
  25. data/lib/antlr3/streams/rewrite.rb +412 -0
  26. data/lib/antlr3/test/call-stack.rb +57 -0
  27. data/lib/antlr3/test/config.rb +23 -0
  28. data/lib/antlr3/test/core-extensions.rb +269 -0
  29. data/lib/antlr3/test/diff.rb +165 -0
  30. data/lib/antlr3/test/functional.rb +207 -0
  31. data/lib/antlr3/test/grammar.rb +371 -0
  32. data/lib/antlr3/token.rb +592 -0
  33. data/lib/antlr3/tree.rb +1415 -0
  34. data/lib/antlr3/tree/debug.rb +163 -0
  35. data/lib/antlr3/tree/visitor.rb +84 -0
  36. data/lib/antlr3/tree/wizard.rb +481 -0
  37. data/lib/antlr3/util.rb +149 -0
  38. data/lib/antlr3/version.rb +27 -0
  39. data/samples/ANTLRv3Grammar.g +621 -0
  40. data/samples/Cpp.g +749 -0
  41. data/templates/AST.stg +335 -0
  42. data/templates/ASTDbg.stg +40 -0
  43. data/templates/ASTParser.stg +153 -0
  44. data/templates/ASTTreeParser.stg +272 -0
  45. data/templates/Dbg.stg +192 -0
  46. data/templates/Ruby.stg +1514 -0
  47. data/test/functional/ast-output/auto-ast.rb +797 -0
  48. data/test/functional/ast-output/construction.rb +555 -0
  49. data/test/functional/ast-output/hetero-nodes.rb +753 -0
  50. data/test/functional/ast-output/rewrites.rb +1327 -0
  51. data/test/functional/ast-output/tree-rewrite.rb +1662 -0
  52. data/test/functional/debugging/debug-mode.rb +689 -0
  53. data/test/functional/debugging/profile-mode.rb +165 -0
  54. data/test/functional/debugging/rule-tracing.rb +74 -0
  55. data/test/functional/delegation/import.rb +379 -0
  56. data/test/functional/lexer/basic.rb +559 -0
  57. data/test/functional/lexer/filter-mode.rb +245 -0
  58. data/test/functional/lexer/nuances.rb +47 -0
  59. data/test/functional/lexer/properties.rb +104 -0
  60. data/test/functional/lexer/syn-pred.rb +32 -0
  61. data/test/functional/lexer/xml.rb +206 -0
  62. data/test/functional/main/main-scripts.rb +245 -0
  63. data/test/functional/parser/actions.rb +224 -0
  64. data/test/functional/parser/backtracking.rb +244 -0
  65. data/test/functional/parser/basic.rb +282 -0
  66. data/test/functional/parser/calc.rb +98 -0
  67. data/test/functional/parser/ll-star.rb +143 -0
  68. data/test/functional/parser/nuances.rb +165 -0
  69. data/test/functional/parser/predicates.rb +103 -0
  70. data/test/functional/parser/properties.rb +242 -0
  71. data/test/functional/parser/rule-methods.rb +132 -0
  72. data/test/functional/parser/scopes.rb +274 -0
  73. data/test/functional/token-rewrite/basic.rb +318 -0
  74. data/test/functional/token-rewrite/via-parser.rb +100 -0
  75. data/test/functional/tree-parser/basic.rb +750 -0
  76. data/test/unit/sample-input/file-stream-1 +2 -0
  77. data/test/unit/sample-input/teststreams.input2 +2 -0
  78. data/test/unit/test-dfa.rb +52 -0
  79. data/test/unit/test-exceptions.rb +44 -0
  80. data/test/unit/test-recognizers.rb +55 -0
  81. data/test/unit/test-scheme.rb +62 -0
  82. data/test/unit/test-streams.rb +459 -0
  83. data/test/unit/test-tree-wizard.rb +535 -0
  84. data/test/unit/test-trees.rb +854 -0
  85. metadata +205 -0
@@ -0,0 +1,985 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ =begin LICENSE
5
+
6
+ [The "BSD licence"]
7
+ Copyright (c) 2009 Kyle Yetter
8
+ All rights reserved.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions
12
+ are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright
15
+ notice, this list of conditions and the following disclaimer.
16
+ 2. Redistributions in binary form must reproduce the above copyright
17
+ notice, this list of conditions and the following disclaimer in the
18
+ documentation and/or other materials provided with the distribution.
19
+ 3. The name of the author may not be used to endorse or promote products
20
+ derived from this software without specific prior written permission.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ =end
34
+
35
+ module ANTLR3
36
+
37
+
38
+ =begin rdoc ANTLR3::Stream
39
+
40
+ = ANTLR3 Streams
41
+
42
+ This documentation first covers the general concept of streams as used by ANTLR
43
+ recognizers, and then discusses the specific <tt>ANTLR3::Stream</tt> module.
44
+
45
+ == ANTLR Stream Classes
46
+
47
+ ANTLR recognizers need a way to walk through input data in a serialized IO-style
48
+ fashion. They also need some book-keeping about the input to provide useful
49
+ information to developers, such as current line number and column. Furthermore,
50
+ to implement backtracking and various error recovery techniques, recognizers
51
+ need a way to record various locations in the input at a number of points in the
52
+ recognition process so the input state may be restored back to a prior state.
53
+
54
+ ANTLR bundles all of this functionality into a number of Stream classes, each
55
+ designed to be used by recognizers for a specific recognition task. Most of the
56
+ Stream hierarchy is implemented in antlr3/stream.rb, which is loaded by default
57
+ when 'antlr3' is required.
58
+
59
+ ---
60
+
61
+ Here's a brief overview of the various stream classes and their respective
62
+ purpose:
63
+
64
+ StringStream::
65
+ Similar to StringIO from the standard Ruby library, StringStream wraps raw
66
+ String data in a Stream interface for use by ANTLR lexers.
67
+ FileStream::
68
+ A subclass of StringStream, FileStream simply wraps data read from an IO or
69
+ File object for use by lexers.
70
+ CommonTokenStream::
71
+ The job of a TokenStream is to read lexer output and then provide ANTLR
72
+ parsers with the means to sequential walk through series of tokens.
73
+ CommonTokenStream is the default TokenStream implementation.
74
+ TokenRewriteStream::
75
+ A subclass of CommonTokenStream, TokenRewriteStreams provide rewriting-parsers
76
+ the ability to produce new output text from an input token-sequence by
77
+ managing rewrite "programs" on top of the stream.
78
+ CommonTreeNodeStream::
79
+ In a similar fashion to CommonTokenStream, CommonTreeNodeStream feeds tokens
80
+ to recognizers in a sequential fashion. However, the stream object serializes
81
+ an Abstract Syntax Tree into a flat, one-dimensional sequence, but preserves
82
+ the two-dimensional shape of the tree using special UP and DOWN tokens. The
83
+ sequence is primarily used by ANTLR Tree Parsers. *note* -- this is not
84
+ defined in antlr3/stream.rb, but antlr3/tree.rb
85
+
86
+ ---
87
+
88
+ The next few sections cover the most significant methods of all stream classes.
89
+
90
+ === consume / look / peek
91
+
92
+ <tt>stream.consume</tt> is used to advance a stream one unit. StringStreams are
93
+ advanced by one character and TokenStreams are advanced by one token.
94
+
95
+ <tt>stream.peek(k = 1)</tt> is used to quickly retrieve the object of interest
96
+ to a recognizer at look-ahead position specified by <tt>k</tt>. For
97
+ <b>StringStreams</b>, this is the <i>integer value of the character</i>
98
+ <tt>k</tt> characters ahead of the stream cursor. For <b>TokenStreams</b>, this
99
+ is the <i>integer token type of the token</i> <tt>k</tt> tokens ahead of the
100
+ stream cursor.
101
+
102
+ <tt>stream.look(k = 1)</tt> is used to retrieve the full object of interest at
103
+ look-ahead position specified by <tt>k</tt>. While <tt>peek</tt> provides the
104
+ <i>bare-minimum lightweight information</i> that the recognizer needs,
105
+ <tt>look</tt> provides the <i>full object of concern</i> in the stream. For
106
+ <b>StringStreams</b>, this is a <i>string object containing the single
107
+ character</i> <tt>k</tt> characters ahead of the stream cursor. For
108
+ <b>TokenStreams</b>, this is the <i>full token structure</i> <tt>k</tt> tokens
109
+ ahead of the stream cursor.
110
+
111
+ <b>Note:</b> in most ANTLR runtime APIs for other languages, <tt>peek</tt> is
112
+ implemented by some method with a name like <tt>LA(k)</tt> and <tt>look</tt> is
113
+ implemented by some method with a name like <tt>LT(k)</tt>. When writing this
114
+ Ruby runtime API, I found this naming practice both confusing, ambiguous, and
115
+ un-Ruby-like. Thus, I chose <tt>peek</tt> and <tt>look</tt> to represent a
116
+ quick-look (peek) and a full-fledged look-ahead operation (look). If this causes
117
+ confusion or any sort of compatibility strife for developers using this
118
+ implementation, all apologies.
119
+
120
+ === mark / rewind / release
121
+
122
+ <tt>marker = stream.mark</tt> causes the stream to record important information
123
+ about the current stream state, place the data in an internal memory table, and
124
+ return a memento, <tt>marker</tt>. The marker object is typically an integer key
125
+ to the stream's internal memory table.
126
+
127
+ Used in tandem with, <tt>stream.rewind(mark = last_marker)</tt>, the marker can
128
+ be used to restore the stream to an earlier state. This is used by recognizers
129
+ to perform tasks such as backtracking and error recovery.
130
+
131
+ <tt>stream.release(marker = last_marker)</tt> can be used to release an existing
132
+ state marker from the memory table.
133
+
134
+ === seek
135
+
136
+ <tt>stream.seek(position)</tt> moves the stream cursor to an absolute position
137
+ within the stream, basically like typical ruby <tt>IO#seek</tt> style methods.
138
+ However, unlike <tt>IO#seek</tt>, ANTLR streams currently always use absolute
139
+ position seeking.
140
+
141
+ == The Stream Module
142
+
143
+ <tt>ANTLR3::Stream</tt> is an abstract-ish base mixin for all IO-like stream
144
+ classes used by ANTLR recognizers.
145
+
146
+ The module doesn't do much on its own besides define arguably annoying
147
+ ``abstract'' pseudo-methods that demand implementation when it is mixed in to a
148
+ class that wants to be a Stream. Right now this exists as an artifact of porting
149
+ the ANTLR Java/Python runtime library to Ruby. In Java, of course, this is
150
+ represented as an interface. In Ruby, however, objects are duck-typed and
151
+ interfaces aren't that useful as programmatic entities -- in fact, it's mildly
152
+ wasteful to have a module like this hanging out. Thus, I may axe it.
153
+
154
+ When mixed in, it does give the class a #size and #source_name attribute
155
+ methods.
156
+
157
+ Except in a small handful of places, most of the ANTLR runtime library uses
158
+ duck-typing and not type checking on objects. This means that the methods which
159
+ manipulate stream objects don't usually bother checking that the object is a
160
+ Stream and assume that the object implements the proper stream interface. Thus,
161
+ it is not strictly necessary that custom stream objects include ANTLR3::Stream,
162
+ though it isn't a bad idea.
163
+
164
+ =end
165
+
166
+ module Stream
167
+ include ANTLR3::Constants
168
+ extend ClassMacros
169
+
170
+ ##
171
+ # :method: consume
172
+ # used to advance a stream one unit (such as character or token)
173
+ abstract :consume
174
+
175
+ ##
176
+ # :method: peek(k=1)
177
+ # used to quickly retreive the object of interest to a recognizer at lookahead
178
+ # position specified by <tt>k</tt> (such as integer value of a character or an
179
+ # integer token type)
180
+ abstract :peek
181
+
182
+ ##
183
+ # :method: look(k=1)
184
+ # used to retreive the full object of interest at lookahead position specified
185
+ # by <tt>k</tt> (such as a character string or a token structure)
186
+ abstract :look
187
+
188
+ ##
189
+ # :method: mark
190
+ # TODO: document
191
+ abstract :mark
192
+
193
+ ##
194
+ # :method: index
195
+ # TODO: document
196
+ abstract :index
197
+
198
+ ##
199
+ # :method: rewind(marker=last_marker)
200
+ # TODO: document
201
+ abstract :rewind
202
+
203
+ ##
204
+ # :method: release(marker = last_marker)
205
+ # TODO: document
206
+ abstract :release
207
+
208
+ ##
209
+ # :method: seek(position)
210
+ # TODO: document
211
+ abstract :seek
212
+
213
+ # TODO: document
214
+ attr_reader :size
215
+ # TODO: document
216
+ attr_accessor :source_name
217
+ end
218
+
219
+ =begin rdoc ANTLR3::CharacterStream
220
+
221
+ CharacterStream further extends the abstract-ish base mixin Stream to add
222
+ methods specific to navigating character-based input data. Thus, it serves as an
223
+ immitation of the Java interface for text-based streams, which are primarily
224
+ used by lexers.
225
+
226
+ It adds the ``abstract'' method, <tt>substring(start, stop)</tt>, which must be
227
+ implemented to return a slice of the input string from position <tt>start</tt>
228
+ to position <tt>stop</tt>. It also adds attribute accessor methods <tt>line</tt>
229
+ and <tt>column</tt>, which are expected to indicate the current line number and
230
+ position within the current line, respectively.
231
+
232
+ == A Word About <tt>line</tt> and <tt>column</tt> attributes
233
+
234
+ Presumably, the concept of <tt>line</tt> and <tt>column</tt> attirbutes of text
235
+ are familliar to most developers. Line numbers of text are indexed from number 1
236
+ up (not 0). Column numbers are indexed from 0 up. Thus, examining sample text:
237
+
238
+ Hey this is the first line.
239
+ Oh, and this is the second line.
240
+
241
+ Line 1 is the string "Hey this is the first line\\n". If a character stream is at
242
+ line 2, character 0, the stream cursor is sitting between the characters "\\n"
243
+ and "O".
244
+
245
+ *Note:* most ANTLR runtime APIs for other languages refer to <tt>column</tt>
246
+ with the more-precise, but lengthy name <tt>charPositionInLine</tt>. I prefered
247
+ to keep it simple and familliar in this Ruby runtime API.
248
+
249
+ =end
250
+
251
+ module CharacterStream
252
+ include Stream
253
+ extend ClassMacros
254
+ EOF = -1
255
+
256
+ ##
257
+ # :method: substring(start,stop)
258
+ abstract :substring
259
+
260
+ attr_accessor :line
261
+ attr_accessor :column
262
+ end
263
+
264
+
265
+ =begin rdoc ANTLR3::TokenStream
266
+
267
+ TokenStream further extends the abstract-ish base mixin Stream to add methods
268
+ specific to navigating token sequences. Thus, it serves as an imitation of the
269
+ Java interface for token-based streams, which are used by many different
270
+ components in ANTLR, including parsers and tree parsers.
271
+
272
+ == Token Streams
273
+
274
+ Token streams wrap a sequence of token objects produced by some token source,
275
+ usually a lexer. They provide the operations required by higher-level
276
+ recognizers, such as parsers and tree parsers for navigating through the
277
+ sequence of tokens. Unlike simple character-based streams, such as StringStream,
278
+ token-based streams have an additional level of complexity because they must
279
+ manage the task of "tuning" to a specific token channel.
280
+
281
+ One of the main advantages of ANTLR-based recognition is the token
282
+ <i>channel</i> feature, which allows you to hold on to all tokens of interest
283
+ while only presenting a specific set of interesting tokens to a parser. For
284
+ example, if you need to hide whitespace and comments from a parser, but hang on
285
+ to them for some other purpose, you have the lexer assign the comments and
286
+ whitespace to channel value HIDDEN as it creates the tokens.
287
+
288
+ When you create a token stream, you can tune it to some specific channel value.
289
+ Then, all <tt>peek</tt>, <tt>look</tt>, and <tt>consume</tt> operations only
290
+ yield tokens that have the same value for <tt>channel</tt>. The stream skips
291
+ over any non-matching tokens in between.
292
+
293
+ == The TokenStream Interface
294
+
295
+ In addition to the abstract methods and attribute methods provided by the base
296
+ Stream module, TokenStream adds a number of additional method implementation
297
+ requirements and attributes.
298
+
299
+ =end
300
+
301
+ module TokenStream
302
+ include Stream
303
+ extend ClassMacros
304
+
305
+ ##
306
+ # expected to return the token source object (such as a lexer) from which
307
+ # all tokens in the stream were retreived
308
+ attr_reader :token_source
309
+
310
+ ##
311
+ # expected to return the value of the last marker produced by a call to
312
+ # <tt>stream.mark</tt>
313
+ attr_reader :last_marker
314
+
315
+ ##
316
+ # expected to return the integer index of the stream cursor
317
+ attr_reader :position
318
+
319
+ ##
320
+ # the integer channel value to which the stream is ``tuned''
321
+ attr_accessor :channel
322
+
323
+ ##
324
+ # :method: to_s(start=0,stop=tokens.length-1)
325
+ # should take the tokens between start and stop in the sequence, extract their text
326
+ # and return the concatenation of all the text chunks
327
+ abstract :to_s
328
+
329
+ ##
330
+ # :method: at
331
+ # TODO: document
332
+ abstract :at
333
+ end
334
+
335
+ =begin rdoc ANTLR3::StringStream
336
+
337
+ A StringStream's purpose is to wrap the basic, naked text input of a recognition
338
+ system. Like all other stream types, it provides serial navigation of the input;
339
+ a recognizer can arbitrarily step forward and backward through the stream's
340
+ symbols as it requires. StringStream and its subclasses are they main way to
341
+ feed text input into an ANTLR Lexer for token processing.
342
+
343
+ The stream's symbols of interest, of course, are character values. Thus, the
344
+ #peek method returns the integer character value at look-ahead position
345
+ <tt>k</tt> and the #look method returns the character value as a +String+. They
346
+ also track various pieces of information such as the line and column numbers at
347
+ the current position.
348
+
349
+ === Note About Text Encoding
350
+
351
+ This version of the runtime library primarily targets ruby version 1.8, which
352
+ does not have strong built-in support for multi-byte character encodings. Thus,
353
+ characters are assumed to be represented by a single byte -- an integer between
354
+ 0 and 255. Ruby 1.9 does provide built-in encoding support for multi-byte
355
+ characters, but currently this library does not provide any streams to handle
356
+ non-ASCII encoding. However, encoding-savvy recognition code is a future
357
+ development goal for this project.
358
+
359
+ =end
360
+
361
+ class StringStream
362
+ include CharacterStream
363
+
364
+ # current integer character index of the stream
365
+ attr_reader :position
366
+
367
+ # the current line number of the input, indexed upward from 1
368
+ attr_reader :line
369
+
370
+ # the current character position within the current line, indexed upward from 0
371
+ attr_reader :column
372
+
373
+ # the name associated with the stream -- usually a file name
374
+ # defaults to <tt>"(string)"</tt>
375
+ attr_accessor :name
376
+
377
+ # the entire string that is wrapped by the stream
378
+ attr_reader :data
379
+
380
+ # creates a new StringStream object where +data+ is the string data to stream.
381
+ # accepts the following options in a symbol-to-value hash:
382
+ #
383
+ # [:file or :name] the (file) name to associate with the stream; default: <tt>'(string)'</tt>
384
+ # [:line] the initial line number; default: +1+
385
+ # [:column] the initial column number; default: +0+
386
+ #
387
+ def initialize(data, options = {})
388
+ @data = data.to_s
389
+ @data.equal?(data) and @data = @data.clone
390
+ @data.freeze
391
+ @position = 0
392
+ @line = options.fetch :line, 1
393
+ @column = options.fetch :column, 0
394
+ @markers = []
395
+ mark
396
+ @name ||= options[:file] || options[:name] # || '(string)'
397
+ end
398
+
399
+ def size
400
+ @data.length
401
+ end
402
+
403
+ alias length size
404
+
405
+ #
406
+ # rewinds the stream back to the start and clears out any existing marker entries
407
+ #
408
+ def reset
409
+ @position = 0
410
+ @line = 1
411
+ @column = 0
412
+ @markers.clear
413
+ return self
414
+ end
415
+
416
+ #
417
+ # advance the stream by one character; returns the character consumed
418
+ #
419
+ def consume
420
+ c = @data[@position] || EOF
421
+ if @position < @data.length
422
+ @column += 1
423
+ if c == ?\n
424
+ @line += 1
425
+ @column = 0
426
+ end
427
+ @position += 1
428
+ end
429
+ return(c)
430
+ end
431
+
432
+ #
433
+ # return the character at look-ahead distance +k+ as an integer. <tt>k = 1</tt> represents
434
+ # the current character. +k+ greater than 1 represents upcoming characters. A negative
435
+ # value of +k+ returns previous characters consumed, where <tt>k = -1</tt> is the last
436
+ # character consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
437
+ #
438
+ def peek(k = 1)
439
+ k == 0 and return nil
440
+ k += 1 if k < 0
441
+ index = @position + k - 1
442
+ index < 0 and return nil
443
+ @data[index] or EOF
444
+ end
445
+
446
+ #
447
+ # identical to #peek, except it returns the character value as a String
448
+ #
449
+ def look(k = 1)
450
+ k == 0 and return nil
451
+ k += 1 if k < 0
452
+
453
+ index = @position + k - 1
454
+ index < 0 and return nil
455
+
456
+ c = @data[index] and c.chr
457
+ end
458
+
459
+ #
460
+ # return a substring around the stream cursor at a distance +k+
461
+ # if <tt>k >= 0</tt>, return the next k characters
462
+ # if <tt>k < 0</tt>, return the previous <tt>|k|</tt> characters
463
+ #
464
+ def through(k)
465
+ if k >= 0 then @data[ @position, k ] else
466
+ start = (@position + k).at_least( 0 ) # start cannot be negative or index will wrap around
467
+ @data[ start ... @position ]
468
+ end
469
+ end
470
+
471
+ # operator style look-ahead
472
+ alias >> look
473
+
474
+ # operator style look-behind
475
+ def <<(k)
476
+ self << -k
477
+ end
478
+
479
+ alias index position
480
+ alias character_index position
481
+
482
+ alias source_name name
483
+
484
+ #
485
+ # Returns true if the stream appears to be at the beginning of a new line.
486
+ # This is an extra utility method for use inside lexer actions if needed.
487
+ #
488
+ def beginning_of_line?
489
+ @position.zero? or @data[@position - 1] == ?\n
490
+ end
491
+
492
+ #
493
+ # Returns true if the stream appears to be at the end of a new line.
494
+ # This is an extra utility method for use inside lexer actions if needed.
495
+ #
496
+ def end_of_line?
497
+ @data[@position] == ?\n if @position >= @data.length
498
+ end
499
+
500
+ #
501
+ # Returns true if the stream has been exhausted.
502
+ # This is an extra utility method for use inside lexer actions if needed.
503
+ #
504
+ def end_of_string?
505
+ @position >= @data.length
506
+ end
507
+
508
+ #
509
+ # Returns true if the stream appears to be at the beginning of a stream (position = 0).
510
+ # This is an extra utility method for use inside lexer actions if needed.
511
+ #
512
+ def beginning_of_string?
513
+ @position == 0
514
+ end
515
+
516
+ alias eof? end_of_string?
517
+ alias bof? beginning_of_string?
518
+
519
+ #
520
+ # record the current stream location parameters in the stream's marker table and
521
+ # return an integer-valued bookmark that may be used to restore the stream's
522
+ # position with the #rewind method. This method is used to implement backtracking.
523
+ #
524
+ def mark
525
+ state = [@position, @line, @column].freeze
526
+ @markers << state
527
+ return @markers.length - 1
528
+ end
529
+
530
+ #
531
+ # restore the stream to an earlier location recorded by #mark. If no marker value is
532
+ # provided, the last marker generated by #mark will be used.
533
+ #
534
+ def rewind(marker = @markers.length - 1, release = true)
535
+ (marker >= 0 and location = @markers[marker]) or return(self)
536
+ @position, @line, @column = location
537
+ release(marker) if release
538
+ return self
539
+ end
540
+
541
+ #
542
+ # the total number of markers currently in existence
543
+ #
544
+ def mark_depth
545
+ @markers.length
546
+ end
547
+
548
+ #
549
+ # the last marker value created by a call to #mark
550
+ #
551
+ def last_marker
552
+ @markers.length - 1
553
+ end
554
+
555
+ #
556
+ # let go of the bookmark data for the marker and all marker
557
+ # values created after the marker.
558
+ #
559
+ def release(marker = @markers.length - 1)
560
+ marker.between?(1, @markers.length - 1) or return
561
+ @markers[marker, @markers.length - marker ] = nil
562
+ return self
563
+ end
564
+
565
+ #
566
+ # jump to the absolute position value given by +index+.
567
+ # note: if +index+ is before the current position, the +line+ and +column+
568
+ # attributes of the stream will probably be incorrect
569
+ #
570
+ def seek(index)
571
+ index = index.bound( 0, @data.length ) # ensures index is within the stream's range
572
+ if index > @position
573
+ skipped = through( index - @position )
574
+ if lc = skipped.count("\n") and lc.zero?
575
+ @column += skipped.length
576
+ else
577
+ @line += lc
578
+ @column = skipped.length - skipped.rindex("\n") - 1
579
+ end
580
+ end
581
+ @position = index
582
+ return nil
583
+ end
584
+
585
+ #
586
+ # customized object inspection that shows:
587
+ # * the stream class
588
+ # * the stream's location in <tt>index / line:column</tt> format
589
+ # * +before_chars+ characters before the cursor (6 characters by default)
590
+ # * +after_chars+ characters after the cursor (10 characters by default)
591
+ #
592
+ def inspect(before_chars = 6, after_chars = 10)
593
+ before = through( -before_chars ).inspect
594
+ @position - before_chars > 0 and before.insert(0, '... ')
595
+
596
+ after = through( after_chars ).inspect
597
+ @position + after_chars + 1 < @data.length and after << ' ...'
598
+
599
+ location = "#@position / line #@line:#@column"
600
+ "#<#{self.class}: #{before} | #{after} @ #{location}>"
601
+ end
602
+
603
+ #
604
+ # return the string slice between position +start+ and +stop+
605
+ #
606
+ def substring(start, stop)
607
+ @data[start, stop - start + 1]
608
+ end
609
+
610
+ #
611
+ # identical to String#[]
612
+ #
613
+ def [](start, *args)
614
+ @data[start, *args]
615
+ end
616
+ end
617
+
618
+
619
+ =begin rdoc ANTLR3::FileStream
620
+
621
+ FileStream is a character stream that uses data stored in some external file. It
622
+ is nearly identical to StringStream and functions as use data located in a file
623
+ while automatically setting up the +source_name+ and +line+ parameters. It does
624
+ not actually use any buffered IO operations throughout the stream navigation
625
+ process. Instead, it reads the file data once when the stream is initialized.
626
+
627
+ =end
628
+
629
+ class FileStream < StringStream
630
+
631
+ #
632
+ # creates a new FileStream object using the given +file+ object.
633
+ # If +file+ is a path string, the file will be read and the contents
634
+ # will be used and the +name+ attribute will be set to the path.
635
+ # If +file+ is an IO-like object (that responds to :read),
636
+ # the content of the object will be used and the stream will
637
+ # attempt to set its +name+ object first trying the method #name
638
+ # on the object, then trying the method #path on the object.
639
+ #
640
+ # see StringStream.new for a list of additional options
641
+ # the constructer accepts
642
+ #
643
+ def initialize(file, options = {})
644
+ case file
645
+ when $stdin then
646
+ data = $stdin.read
647
+ @name = '(stdin)'
648
+ when ::File then
649
+ file = file.clone
650
+ file.reopen(file.path, 'r')
651
+ @name = file.path
652
+ data = file.read
653
+ file.close
654
+ else
655
+ if file.respond_to?(:read)
656
+ data = file.read
657
+ if file.respond_to?(:name) then @name = file.name
658
+ elsif file.respond_to?(:path) then @name = file.path
659
+ end
660
+ else
661
+ @name = file.to_s
662
+ if test(?f, @name) then data = File.read(@name)
663
+ else raise ArgumentError, "could not find an existing file at %p" % @name
664
+ end
665
+ end
666
+ end
667
+ super(data, options)
668
+ end
669
+
670
+ end
671
+
672
+ =begin rdoc ANTLR3::CommonTokenStream
673
+
674
+ CommonTokenStream serves as the primary token stream implementation for feeding
675
+ sequential token input into parsers.
676
+
677
+ Using some TokenSource (such as a lexer), the stream collects a token sequence,
678
+ setting the token's <tt>index</tt> attribute to indicate the token's position
679
+ within the stream. The streams may be tuned to some channel value; off-channel
680
+ tokens will be filtered out by the #peek, #look, and #consume methods.
681
+
682
+ === Sample Usage
683
+
684
+
685
+ source_input = ANTLR3::StringStream.new("35 * 4 - 1")
686
+ lexer = Calculator::Lexer.new(source_input)
687
+ tokens = ANTLR3::CommonTokenStream.new(lexer)
688
+
689
+ # assume this grammar defines whitespace as tokens on channel HIDDEN
690
+ # and numbers and operations as tokens on channel DEFAULT
691
+ tokens.look # => 0 INT['35'] @ line 1 col 0 (0..1)
692
+ tokens.look(2) # => 2 MULT["*"] @ line 1 col 2 (3..3)
693
+ tokens.tokens(0, 2)
694
+ # => [0 INT["35"] @line 1 col 0 (0..1),
695
+ # 1 WS[" "] @line 1 col 2 (1..1),
696
+ # 2 MULT["*"] @ line 1 col 3 (3..3)]
697
+ # notice the #tokens method does not filter off-channel tokens
698
+
699
+ lexer.reset
700
+ hidden_tokens =
701
+ ANTLR3::CommonTokenStream.new(lexer, :channel => ANTLR3::HIDDEN)
702
+ hidden_tokens.look # => 1 WS[' '] @ line 1 col 2 (1..1)
703
+
704
+ =end
705
+
706
+ class CommonTokenStream
707
+ include TokenStream
708
+ include Enumerable
709
+
710
+ #
711
+ # constructs a new token stream using the +token_source+ provided. +token_source+ is
712
+ # usually a lexer, but can be any object that implements +next_token+ and includes
713
+ # ANTLR3::TokenSource.
714
+ #
715
+ # If a block is provided, each token harvested will be yielded and if the block
716
+ # returns a +nil+ or +false+ value, the token will not be added to the stream --
717
+ # it will be discarded.
718
+ #
719
+ # === Options
720
+ # [:channel] The channel value the stream should be tuned to initially
721
+ # [:source_name] The source name (file name) attribute of the stream
722
+ #
723
+ # === Example
724
+ #
725
+ # # create a new token stream that is tuned to channel :comment, and
726
+ # # discard all WHITE_SPACE tokens
727
+ # ANTLR3::CommonTokenStream.new(lexer, :channel => :comment) do |token|
728
+ # token.name != 'WHITE_SPACE'
729
+ # end
730
+ #
731
+ def initialize(token_source, options = {})
732
+ @token_source = token_source
733
+ @last_marker = nil
734
+ @channel = options.fetch(:channel, DEFAULT_CHANNEL)
735
+
736
+ @tokens =
737
+ block_given? ? @token_source.select { |token| yield(token, self) } :
738
+ @token_source.to_a
739
+ @tokens.each_with_index { |t, i| t.index = i }
740
+ @position =
741
+ if first_token = @tokens.find { |t| t.channel == @channel }
742
+ @tokens.index(first_token)
743
+ else @tokens.length
744
+ end
745
+ @source_name = options.fetch(:source_name) { @token_source.source_name rescue nil }
746
+ end
747
+
748
+ #
749
+ # resets the token stream and rebuilds it with a potentially new token source.
750
+ # If no +token_source+ value is provided, the stream will attempt to reset the
751
+ # current +token_source+ by calling +reset+ on the object. The stream will
752
+ # then clear the token buffer and attempt to harvest new tokens. Identical in
753
+ # behavior to CommonTokenStream.new, if a block is provided, tokens will be
754
+ # yielded and discarded if the block returns a +false+ or +nil+ value.
755
+ #
756
+ def rebuild(token_source = nil)
757
+ if token_source.nil?
758
+ @token_source.reset rescue nil
759
+ else @token_source = token_source
760
+ end
761
+ @tokens = block_given? ? @token_source.select { |token| yield(token) } :
762
+ @token_source.to_a
763
+ @tokens.each_with_index { |t, i| t.index = i }
764
+ @last_marker = nil
765
+ @position =
766
+ if first_token = @tokens.find { |t| t.channel == @channel }
767
+ @tokens.index(first_token)
768
+ else @tokens.length
769
+ end
770
+ return self
771
+ end
772
+
773
+ #
774
+ # tune the stream to a new channel value
775
+ #
776
+ def tune_to(channel)
777
+ @channel = channel
778
+ end
779
+
780
+ def token_class
781
+ @token_source.token_class
782
+ rescue NoMethodError
783
+ @position == -1 and fill_buffer
784
+ @tokens.empty? ? CommonToken : @tokens.first.class
785
+ end
786
+
787
+ alias index position
788
+
789
+ def size
790
+ @tokens.length
791
+ end
792
+
793
+ alias length size
794
+
795
+ ###### State-Control ################################################
796
+
797
+ #
798
+ # rewind the stream to its initial state
799
+ #
800
+ def reset
801
+ @position = 0
802
+ @position += 1 while token = @tokens[@position] and
803
+ token.channel != @channel
804
+ @last_marker = nil
805
+ return self
806
+ end
807
+
808
+ #
809
+ # bookmark the current position of the input stream
810
+ #
811
+ def mark
812
+ @last_marker = @position
813
+ end
814
+
815
+ def release(marker = nil)
816
+ # do nothing
817
+ end
818
+
819
+
820
+ def rewind(marker = @last_marker, release = true)
821
+ seek(marker)
822
+ end
823
+
824
+
825
+ ###### Stream Navigation ###########################################
826
+
827
+ #
828
+ # advance the stream one step to the next on-channel token
829
+ #
830
+ def consume
831
+ token = @tokens[@position] || EOF_TOKEN
832
+ if @position < @tokens.length
833
+ @position = future?(2) || @tokens.length
834
+ end
835
+ return(token)
836
+ end
837
+
838
+ #
839
+ # jump to the stream position specified by +index+
840
+ # note: seek does not check whether or not the
841
+ # token at the specified position is on-channel,
842
+ #
843
+ def seek(index)
844
+ @position = index.to_i.bound(0, @tokens.length)
845
+ return self
846
+ end
847
+
848
+ #
849
+ # return the type of the on-channel token at look-ahead distance +k+. <tt>k = 1</tt> represents
850
+ # the current token. +k+ greater than 1 represents upcoming on-channel tokens. A negative
851
+ # value of +k+ returns previous on-channel tokens consumed, where <tt>k = -1</tt> is the last
852
+ # on-channel token consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
853
+ #
854
+ def peek(k = 1)
855
+ tk = look(k) and return(tk.type)
856
+ end
857
+
858
+ #
859
+ # operates simillarly to #peek, but returns the full token object at look-ahead position +k+
860
+ #
861
+ def look(k = 1)
862
+ index = future?(k) or return nil
863
+ @tokens.fetch(index, EOF_TOKEN)
864
+ end
865
+
866
+ alias >> look
867
+ def << k
868
+ self >> -k
869
+ end
870
+
871
+ #
872
+ # returns the index of the on-channel token at look-ahead position +k+ or nil if no other
873
+ # on-channel tokens exist
874
+ #
875
+ def future?(k = 1)
876
+ @position == -1 and fill_buffer
877
+
878
+ case
879
+ when k == 0 then nil
880
+ when k < 0 then past?(-k)
881
+ when k == 1 then @position
882
+ else
883
+ # since the stream only yields on-channel
884
+ # tokens, the stream can't just go to the
885
+ # next position, but rather must skip
886
+ # over off-channel tokens
887
+ (k - 1).times.inject(@position) do |cursor, |
888
+ begin
889
+ tk = @tokens.at(cursor += 1) or return(cursor)
890
+ # ^- if tk is nil (i.e. i is outside array limits)
891
+ end until tk.channel == @channel
892
+ cursor
893
+ end
894
+ end
895
+ end
896
+
897
+ #
898
+ # returns the index of the on-channel token at look-behind position +k+ or nil if no other
899
+ # on-channel tokens exist before the current token
900
+ #
901
+ def past?(k = 1)
902
+ @position == -1 and fill_buffer
903
+
904
+ case
905
+ when k == 0 then nil
906
+ when @position - k < 0 then nil
907
+ else
908
+
909
+ k.times.inject(@position) do |cursor, |
910
+ begin
911
+ cursor <= 0 and return(nil)
912
+ tk = @tokens.at(cursor -= 1) or return(nil)
913
+ end until tk.channel == @channel
914
+ cursor
915
+ end
916
+
917
+ end
918
+ end
919
+
920
+ #
921
+ # yields each token in the stream (including off-channel tokens)
922
+ # If no block is provided, the method returns an Enumerator object.
923
+ # #each accepts the same arguments as #tokens
924
+ #
925
+ def each(*args)
926
+ block_given? or return enum_for(:each, *args)
927
+ tokens(*args).each { |token| yield(token) }
928
+ end
929
+
930
+ #
931
+ # returns a copy of the token buffer. If +start+ and +stop+ are provided, tokens
932
+ # returns a slice of the token buffer from <tt>start..stop</tt>. The parameters
933
+ # are converted to integers with their <tt>to_i</tt> methods, and thus tokens
934
+ # can be provided to specify start and stop. If a block is provided, tokens are
935
+ # yielded and filtered out of the return array if the block returns a +false+
936
+ # or +nil+ value.
937
+ #
938
+ def tokens(start = nil, stop = nil)
939
+ stop.nil? || stop >= @tokens.length and stop = @tokens.length - 1
940
+ start.nil? || stop < 0 and start = 0
941
+ tokens = @tokens[start..stop]
942
+
943
+ if block_given?
944
+ tokens.delete_if { |t| not yield(t) }
945
+ end
946
+
947
+ return( tokens )
948
+ end
949
+
950
+
951
+ def at(i)
952
+ @tokens.at i
953
+ end
954
+
955
+ #
956
+ # identical to Array#[], as applied to the stream's token buffer
957
+ #
958
+ def [](i, *args)
959
+ @tokens[i, *args]
960
+ end
961
+
962
+ ###### Standard Conversion Methods ###############################
963
+ def inspect
964
+ string = "#<%p: @token_source=%p @ %p/%p" %
965
+ [self.class, @token_source.class, @position, @tokens.length]
966
+ tk = look(-1) and string << " #{tk.inspect} <--"
967
+ tk = look( 1) and string << " --> #{tk.inspect}"
968
+ string << '>'
969
+ end
970
+
971
+ #
972
+ # fetches the text content of all tokens between +start+ and +stop+ and
973
+ # joins the chunks into a single string
974
+ #
975
+ def extract_text(start = 0, stop = @tokens.length - 1)
976
+ start = start.to_i.at_least(0)
977
+ stop = stop.to_i.at_most(@tokens.length)
978
+ @tokens[start..stop].map! { |t| t.text }.join('')
979
+ end
980
+
981
+ alias to_s extract_text
982
+
983
+ end
984
+
985
+ end