antlr3 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
@@ -0,0 +1,985 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
=begin LICENSE
|
5
|
+
|
6
|
+
[The "BSD licence"]
|
7
|
+
Copyright (c) 2009 Kyle Yetter
|
8
|
+
All rights reserved.
|
9
|
+
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
11
|
+
modification, are permitted provided that the following conditions
|
12
|
+
are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright
|
17
|
+
notice, this list of conditions and the following disclaimer in the
|
18
|
+
documentation and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
23
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
24
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
25
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
26
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
27
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
28
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
29
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
31
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
module ANTLR3
|
36
|
+
|
37
|
+
|
38
|
+
=begin rdoc ANTLR3::Stream
|
39
|
+
|
40
|
+
= ANTLR3 Streams
|
41
|
+
|
42
|
+
This documentation first covers the general concept of streams as used by ANTLR
|
43
|
+
recognizers, and then discusses the specific <tt>ANTLR3::Stream</tt> module.
|
44
|
+
|
45
|
+
== ANTLR Stream Classes
|
46
|
+
|
47
|
+
ANTLR recognizers need a way to walk through input data in a serialized IO-style
|
48
|
+
fashion. They also need some book-keeping about the input to provide useful
|
49
|
+
information to developers, such as current line number and column. Furthermore,
|
50
|
+
to implement backtracking and various error recovery techniques, recognizers
|
51
|
+
need a way to record various locations in the input at a number of points in the
|
52
|
+
recognition process so the input state may be restored back to a prior state.
|
53
|
+
|
54
|
+
ANTLR bundles all of this functionality into a number of Stream classes, each
|
55
|
+
designed to be used by recognizers for a specific recognition task. Most of the
|
56
|
+
Stream hierarchy is implemented in antlr3/stream.rb, which is loaded by default
|
57
|
+
when 'antlr3' is required.
|
58
|
+
|
59
|
+
---
|
60
|
+
|
61
|
+
Here's a brief overview of the various stream classes and their respective
|
62
|
+
purpose:
|
63
|
+
|
64
|
+
StringStream::
|
65
|
+
Similar to StringIO from the standard Ruby library, StringStream wraps raw
|
66
|
+
String data in a Stream interface for use by ANTLR lexers.
|
67
|
+
FileStream::
|
68
|
+
A subclass of StringStream, FileStream simply wraps data read from an IO or
|
69
|
+
File object for use by lexers.
|
70
|
+
CommonTokenStream::
|
71
|
+
The job of a TokenStream is to read lexer output and then provide ANTLR
|
72
|
+
parsers with the means to sequential walk through series of tokens.
|
73
|
+
CommonTokenStream is the default TokenStream implementation.
|
74
|
+
TokenRewriteStream::
|
75
|
+
A subclass of CommonTokenStream, TokenRewriteStreams provide rewriting-parsers
|
76
|
+
the ability to produce new output text from an input token-sequence by
|
77
|
+
managing rewrite "programs" on top of the stream.
|
78
|
+
CommonTreeNodeStream::
|
79
|
+
In a similar fashion to CommonTokenStream, CommonTreeNodeStream feeds tokens
|
80
|
+
to recognizers in a sequential fashion. However, the stream object serializes
|
81
|
+
an Abstract Syntax Tree into a flat, one-dimensional sequence, but preserves
|
82
|
+
the two-dimensional shape of the tree using special UP and DOWN tokens. The
|
83
|
+
sequence is primarily used by ANTLR Tree Parsers. *note* -- this is not
|
84
|
+
defined in antlr3/stream.rb, but antlr3/tree.rb
|
85
|
+
|
86
|
+
---
|
87
|
+
|
88
|
+
The next few sections cover the most significant methods of all stream classes.
|
89
|
+
|
90
|
+
=== consume / look / peek
|
91
|
+
|
92
|
+
<tt>stream.consume</tt> is used to advance a stream one unit. StringStreams are
|
93
|
+
advanced by one character and TokenStreams are advanced by one token.
|
94
|
+
|
95
|
+
<tt>stream.peek(k = 1)</tt> is used to quickly retrieve the object of interest
|
96
|
+
to a recognizer at look-ahead position specified by <tt>k</tt>. For
|
97
|
+
<b>StringStreams</b>, this is the <i>integer value of the character</i>
|
98
|
+
<tt>k</tt> characters ahead of the stream cursor. For <b>TokenStreams</b>, this
|
99
|
+
is the <i>integer token type of the token</i> <tt>k</tt> tokens ahead of the
|
100
|
+
stream cursor.
|
101
|
+
|
102
|
+
<tt>stream.look(k = 1)</tt> is used to retrieve the full object of interest at
|
103
|
+
look-ahead position specified by <tt>k</tt>. While <tt>peek</tt> provides the
|
104
|
+
<i>bare-minimum lightweight information</i> that the recognizer needs,
|
105
|
+
<tt>look</tt> provides the <i>full object of concern</i> in the stream. For
|
106
|
+
<b>StringStreams</b>, this is a <i>string object containing the single
|
107
|
+
character</i> <tt>k</tt> characters ahead of the stream cursor. For
|
108
|
+
<b>TokenStreams</b>, this is the <i>full token structure</i> <tt>k</tt> tokens
|
109
|
+
ahead of the stream cursor.
|
110
|
+
|
111
|
+
<b>Note:</b> in most ANTLR runtime APIs for other languages, <tt>peek</tt> is
|
112
|
+
implemented by some method with a name like <tt>LA(k)</tt> and <tt>look</tt> is
|
113
|
+
implemented by some method with a name like <tt>LT(k)</tt>. When writing this
|
114
|
+
Ruby runtime API, I found this naming practice both confusing, ambiguous, and
|
115
|
+
un-Ruby-like. Thus, I chose <tt>peek</tt> and <tt>look</tt> to represent a
|
116
|
+
quick-look (peek) and a full-fledged look-ahead operation (look). If this causes
|
117
|
+
confusion or any sort of compatibility strife for developers using this
|
118
|
+
implementation, all apologies.
|
119
|
+
|
120
|
+
=== mark / rewind / release
|
121
|
+
|
122
|
+
<tt>marker = stream.mark</tt> causes the stream to record important information
|
123
|
+
about the current stream state, place the data in an internal memory table, and
|
124
|
+
return a memento, <tt>marker</tt>. The marker object is typically an integer key
|
125
|
+
to the stream's internal memory table.
|
126
|
+
|
127
|
+
Used in tandem with, <tt>stream.rewind(mark = last_marker)</tt>, the marker can
|
128
|
+
be used to restore the stream to an earlier state. This is used by recognizers
|
129
|
+
to perform tasks such as backtracking and error recovery.
|
130
|
+
|
131
|
+
<tt>stream.release(marker = last_marker)</tt> can be used to release an existing
|
132
|
+
state marker from the memory table.
|
133
|
+
|
134
|
+
=== seek
|
135
|
+
|
136
|
+
<tt>stream.seek(position)</tt> moves the stream cursor to an absolute position
|
137
|
+
within the stream, basically like typical ruby <tt>IO#seek</tt> style methods.
|
138
|
+
However, unlike <tt>IO#seek</tt>, ANTLR streams currently always use absolute
|
139
|
+
position seeking.
|
140
|
+
|
141
|
+
== The Stream Module
|
142
|
+
|
143
|
+
<tt>ANTLR3::Stream</tt> is an abstract-ish base mixin for all IO-like stream
|
144
|
+
classes used by ANTLR recognizers.
|
145
|
+
|
146
|
+
The module doesn't do much on its own besides define arguably annoying
|
147
|
+
``abstract'' pseudo-methods that demand implementation when it is mixed in to a
|
148
|
+
class that wants to be a Stream. Right now this exists as an artifact of porting
|
149
|
+
the ANTLR Java/Python runtime library to Ruby. In Java, of course, this is
|
150
|
+
represented as an interface. In Ruby, however, objects are duck-typed and
|
151
|
+
interfaces aren't that useful as programmatic entities -- in fact, it's mildly
|
152
|
+
wasteful to have a module like this hanging out. Thus, I may axe it.
|
153
|
+
|
154
|
+
When mixed in, it does give the class a #size and #source_name attribute
|
155
|
+
methods.
|
156
|
+
|
157
|
+
Except in a small handful of places, most of the ANTLR runtime library uses
|
158
|
+
duck-typing and not type checking on objects. This means that the methods which
|
159
|
+
manipulate stream objects don't usually bother checking that the object is a
|
160
|
+
Stream and assume that the object implements the proper stream interface. Thus,
|
161
|
+
it is not strictly necessary that custom stream objects include ANTLR3::Stream,
|
162
|
+
though it isn't a bad idea.
|
163
|
+
|
164
|
+
=end
|
165
|
+
|
166
|
+
module Stream
|
167
|
+
include ANTLR3::Constants
|
168
|
+
extend ClassMacros
|
169
|
+
|
170
|
+
##
|
171
|
+
# :method: consume
|
172
|
+
# used to advance a stream one unit (such as character or token)
|
173
|
+
abstract :consume
|
174
|
+
|
175
|
+
##
|
176
|
+
# :method: peek(k=1)
|
177
|
+
# used to quickly retreive the object of interest to a recognizer at lookahead
|
178
|
+
# position specified by <tt>k</tt> (such as integer value of a character or an
|
179
|
+
# integer token type)
|
180
|
+
abstract :peek
|
181
|
+
|
182
|
+
##
|
183
|
+
# :method: look(k=1)
|
184
|
+
# used to retreive the full object of interest at lookahead position specified
|
185
|
+
# by <tt>k</tt> (such as a character string or a token structure)
|
186
|
+
abstract :look
|
187
|
+
|
188
|
+
##
|
189
|
+
# :method: mark
|
190
|
+
# TODO: document
|
191
|
+
abstract :mark
|
192
|
+
|
193
|
+
##
|
194
|
+
# :method: index
|
195
|
+
# TODO: document
|
196
|
+
abstract :index
|
197
|
+
|
198
|
+
##
|
199
|
+
# :method: rewind(marker=last_marker)
|
200
|
+
# TODO: document
|
201
|
+
abstract :rewind
|
202
|
+
|
203
|
+
##
|
204
|
+
# :method: release(marker = last_marker)
|
205
|
+
# TODO: document
|
206
|
+
abstract :release
|
207
|
+
|
208
|
+
##
|
209
|
+
# :method: seek(position)
|
210
|
+
# TODO: document
|
211
|
+
abstract :seek
|
212
|
+
|
213
|
+
# TODO: document
|
214
|
+
attr_reader :size
|
215
|
+
# TODO: document
|
216
|
+
attr_accessor :source_name
|
217
|
+
end
|
218
|
+
|
219
|
+
=begin rdoc ANTLR3::CharacterStream
|
220
|
+
|
221
|
+
CharacterStream further extends the abstract-ish base mixin Stream to add
|
222
|
+
methods specific to navigating character-based input data. Thus, it serves as an
|
223
|
+
immitation of the Java interface for text-based streams, which are primarily
|
224
|
+
used by lexers.
|
225
|
+
|
226
|
+
It adds the ``abstract'' method, <tt>substring(start, stop)</tt>, which must be
|
227
|
+
implemented to return a slice of the input string from position <tt>start</tt>
|
228
|
+
to position <tt>stop</tt>. It also adds attribute accessor methods <tt>line</tt>
|
229
|
+
and <tt>column</tt>, which are expected to indicate the current line number and
|
230
|
+
position within the current line, respectively.
|
231
|
+
|
232
|
+
== A Word About <tt>line</tt> and <tt>column</tt> attributes
|
233
|
+
|
234
|
+
Presumably, the concept of <tt>line</tt> and <tt>column</tt> attirbutes of text
|
235
|
+
are familliar to most developers. Line numbers of text are indexed from number 1
|
236
|
+
up (not 0). Column numbers are indexed from 0 up. Thus, examining sample text:
|
237
|
+
|
238
|
+
Hey this is the first line.
|
239
|
+
Oh, and this is the second line.
|
240
|
+
|
241
|
+
Line 1 is the string "Hey this is the first line\\n". If a character stream is at
|
242
|
+
line 2, character 0, the stream cursor is sitting between the characters "\\n"
|
243
|
+
and "O".
|
244
|
+
|
245
|
+
*Note:* most ANTLR runtime APIs for other languages refer to <tt>column</tt>
|
246
|
+
with the more-precise, but lengthy name <tt>charPositionInLine</tt>. I prefered
|
247
|
+
to keep it simple and familliar in this Ruby runtime API.
|
248
|
+
|
249
|
+
=end
|
250
|
+
|
251
|
+
module CharacterStream
|
252
|
+
include Stream
|
253
|
+
extend ClassMacros
|
254
|
+
EOF = -1
|
255
|
+
|
256
|
+
##
|
257
|
+
# :method: substring(start,stop)
|
258
|
+
abstract :substring
|
259
|
+
|
260
|
+
attr_accessor :line
|
261
|
+
attr_accessor :column
|
262
|
+
end
|
263
|
+
|
264
|
+
|
265
|
+
=begin rdoc ANTLR3::TokenStream
|
266
|
+
|
267
|
+
TokenStream further extends the abstract-ish base mixin Stream to add methods
|
268
|
+
specific to navigating token sequences. Thus, it serves as an imitation of the
|
269
|
+
Java interface for token-based streams, which are used by many different
|
270
|
+
components in ANTLR, including parsers and tree parsers.
|
271
|
+
|
272
|
+
== Token Streams
|
273
|
+
|
274
|
+
Token streams wrap a sequence of token objects produced by some token source,
|
275
|
+
usually a lexer. They provide the operations required by higher-level
|
276
|
+
recognizers, such as parsers and tree parsers for navigating through the
|
277
|
+
sequence of tokens. Unlike simple character-based streams, such as StringStream,
|
278
|
+
token-based streams have an additional level of complexity because they must
|
279
|
+
manage the task of "tuning" to a specific token channel.
|
280
|
+
|
281
|
+
One of the main advantages of ANTLR-based recognition is the token
|
282
|
+
<i>channel</i> feature, which allows you to hold on to all tokens of interest
|
283
|
+
while only presenting a specific set of interesting tokens to a parser. For
|
284
|
+
example, if you need to hide whitespace and comments from a parser, but hang on
|
285
|
+
to them for some other purpose, you have the lexer assign the comments and
|
286
|
+
whitespace to channel value HIDDEN as it creates the tokens.
|
287
|
+
|
288
|
+
When you create a token stream, you can tune it to some specific channel value.
|
289
|
+
Then, all <tt>peek</tt>, <tt>look</tt>, and <tt>consume</tt> operations only
|
290
|
+
yield tokens that have the same value for <tt>channel</tt>. The stream skips
|
291
|
+
over any non-matching tokens in between.
|
292
|
+
|
293
|
+
== The TokenStream Interface
|
294
|
+
|
295
|
+
In addition to the abstract methods and attribute methods provided by the base
|
296
|
+
Stream module, TokenStream adds a number of additional method implementation
|
297
|
+
requirements and attributes.
|
298
|
+
|
299
|
+
=end
|
300
|
+
|
301
|
+
module TokenStream
|
302
|
+
include Stream
|
303
|
+
extend ClassMacros
|
304
|
+
|
305
|
+
##
|
306
|
+
# expected to return the token source object (such as a lexer) from which
|
307
|
+
# all tokens in the stream were retreived
|
308
|
+
attr_reader :token_source
|
309
|
+
|
310
|
+
##
|
311
|
+
# expected to return the value of the last marker produced by a call to
|
312
|
+
# <tt>stream.mark</tt>
|
313
|
+
attr_reader :last_marker
|
314
|
+
|
315
|
+
##
|
316
|
+
# expected to return the integer index of the stream cursor
|
317
|
+
attr_reader :position
|
318
|
+
|
319
|
+
##
|
320
|
+
# the integer channel value to which the stream is ``tuned''
|
321
|
+
attr_accessor :channel
|
322
|
+
|
323
|
+
##
|
324
|
+
# :method: to_s(start=0,stop=tokens.length-1)
|
325
|
+
# should take the tokens between start and stop in the sequence, extract their text
|
326
|
+
# and return the concatenation of all the text chunks
|
327
|
+
abstract :to_s
|
328
|
+
|
329
|
+
##
|
330
|
+
# :method: at
|
331
|
+
# TODO: document
|
332
|
+
abstract :at
|
333
|
+
end
|
334
|
+
|
335
|
+
=begin rdoc ANTLR3::StringStream
|
336
|
+
|
337
|
+
A StringStream's purpose is to wrap the basic, naked text input of a recognition
|
338
|
+
system. Like all other stream types, it provides serial navigation of the input;
|
339
|
+
a recognizer can arbitrarily step forward and backward through the stream's
|
340
|
+
symbols as it requires. StringStream and its subclasses are they main way to
|
341
|
+
feed text input into an ANTLR Lexer for token processing.
|
342
|
+
|
343
|
+
The stream's symbols of interest, of course, are character values. Thus, the
|
344
|
+
#peek method returns the integer character value at look-ahead position
|
345
|
+
<tt>k</tt> and the #look method returns the character value as a +String+. They
|
346
|
+
also track various pieces of information such as the line and column numbers at
|
347
|
+
the current position.
|
348
|
+
|
349
|
+
=== Note About Text Encoding
|
350
|
+
|
351
|
+
This version of the runtime library primarily targets ruby version 1.8, which
|
352
|
+
does not have strong built-in support for multi-byte character encodings. Thus,
|
353
|
+
characters are assumed to be represented by a single byte -- an integer between
|
354
|
+
0 and 255. Ruby 1.9 does provide built-in encoding support for multi-byte
|
355
|
+
characters, but currently this library does not provide any streams to handle
|
356
|
+
non-ASCII encoding. However, encoding-savvy recognition code is a future
|
357
|
+
development goal for this project.
|
358
|
+
|
359
|
+
=end
|
360
|
+
|
361
|
+
class StringStream
|
362
|
+
include CharacterStream
|
363
|
+
|
364
|
+
# current integer character index of the stream
|
365
|
+
attr_reader :position
|
366
|
+
|
367
|
+
# the current line number of the input, indexed upward from 1
|
368
|
+
attr_reader :line
|
369
|
+
|
370
|
+
# the current character position within the current line, indexed upward from 0
|
371
|
+
attr_reader :column
|
372
|
+
|
373
|
+
# the name associated with the stream -- usually a file name
|
374
|
+
# defaults to <tt>"(string)"</tt>
|
375
|
+
attr_accessor :name
|
376
|
+
|
377
|
+
# the entire string that is wrapped by the stream
|
378
|
+
attr_reader :data
|
379
|
+
|
380
|
+
# creates a new StringStream object where +data+ is the string data to stream.
|
381
|
+
# accepts the following options in a symbol-to-value hash:
|
382
|
+
#
|
383
|
+
# [:file or :name] the (file) name to associate with the stream; default: <tt>'(string)'</tt>
|
384
|
+
# [:line] the initial line number; default: +1+
|
385
|
+
# [:column] the initial column number; default: +0+
|
386
|
+
#
|
387
|
+
def initialize(data, options = {})
|
388
|
+
@data = data.to_s
|
389
|
+
@data.equal?(data) and @data = @data.clone
|
390
|
+
@data.freeze
|
391
|
+
@position = 0
|
392
|
+
@line = options.fetch :line, 1
|
393
|
+
@column = options.fetch :column, 0
|
394
|
+
@markers = []
|
395
|
+
mark
|
396
|
+
@name ||= options[:file] || options[:name] # || '(string)'
|
397
|
+
end
|
398
|
+
|
399
|
+
def size
|
400
|
+
@data.length
|
401
|
+
end
|
402
|
+
|
403
|
+
alias length size
|
404
|
+
|
405
|
+
#
|
406
|
+
# rewinds the stream back to the start and clears out any existing marker entries
|
407
|
+
#
|
408
|
+
def reset
|
409
|
+
@position = 0
|
410
|
+
@line = 1
|
411
|
+
@column = 0
|
412
|
+
@markers.clear
|
413
|
+
return self
|
414
|
+
end
|
415
|
+
|
416
|
+
#
|
417
|
+
# advance the stream by one character; returns the character consumed
|
418
|
+
#
|
419
|
+
def consume
|
420
|
+
c = @data[@position] || EOF
|
421
|
+
if @position < @data.length
|
422
|
+
@column += 1
|
423
|
+
if c == ?\n
|
424
|
+
@line += 1
|
425
|
+
@column = 0
|
426
|
+
end
|
427
|
+
@position += 1
|
428
|
+
end
|
429
|
+
return(c)
|
430
|
+
end
|
431
|
+
|
432
|
+
#
|
433
|
+
# return the character at look-ahead distance +k+ as an integer. <tt>k = 1</tt> represents
|
434
|
+
# the current character. +k+ greater than 1 represents upcoming characters. A negative
|
435
|
+
# value of +k+ returns previous characters consumed, where <tt>k = -1</tt> is the last
|
436
|
+
# character consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
|
437
|
+
#
|
438
|
+
def peek(k = 1)
|
439
|
+
k == 0 and return nil
|
440
|
+
k += 1 if k < 0
|
441
|
+
index = @position + k - 1
|
442
|
+
index < 0 and return nil
|
443
|
+
@data[index] or EOF
|
444
|
+
end
|
445
|
+
|
446
|
+
#
|
447
|
+
# identical to #peek, except it returns the character value as a String
|
448
|
+
#
|
449
|
+
def look(k = 1)
|
450
|
+
k == 0 and return nil
|
451
|
+
k += 1 if k < 0
|
452
|
+
|
453
|
+
index = @position + k - 1
|
454
|
+
index < 0 and return nil
|
455
|
+
|
456
|
+
c = @data[index] and c.chr
|
457
|
+
end
|
458
|
+
|
459
|
+
#
|
460
|
+
# return a substring around the stream cursor at a distance +k+
|
461
|
+
# if <tt>k >= 0</tt>, return the next k characters
|
462
|
+
# if <tt>k < 0</tt>, return the previous <tt>|k|</tt> characters
|
463
|
+
#
|
464
|
+
def through(k)
|
465
|
+
if k >= 0 then @data[ @position, k ] else
|
466
|
+
start = (@position + k).at_least( 0 ) # start cannot be negative or index will wrap around
|
467
|
+
@data[ start ... @position ]
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
# operator style look-ahead
|
472
|
+
alias >> look
|
473
|
+
|
474
|
+
# operator style look-behind
|
475
|
+
def <<(k)
|
476
|
+
self << -k
|
477
|
+
end
|
478
|
+
|
479
|
+
alias index position
|
480
|
+
alias character_index position
|
481
|
+
|
482
|
+
alias source_name name
|
483
|
+
|
484
|
+
#
|
485
|
+
# Returns true if the stream appears to be at the beginning of a new line.
|
486
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
487
|
+
#
|
488
|
+
def beginning_of_line?
|
489
|
+
@position.zero? or @data[@position - 1] == ?\n
|
490
|
+
end
|
491
|
+
|
492
|
+
#
|
493
|
+
# Returns true if the stream appears to be at the end of a new line.
|
494
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
495
|
+
#
|
496
|
+
def end_of_line?
|
497
|
+
@data[@position] == ?\n if @position >= @data.length
|
498
|
+
end
|
499
|
+
|
500
|
+
#
|
501
|
+
# Returns true if the stream has been exhausted.
|
502
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
503
|
+
#
|
504
|
+
def end_of_string?
|
505
|
+
@position >= @data.length
|
506
|
+
end
|
507
|
+
|
508
|
+
#
|
509
|
+
# Returns true if the stream appears to be at the beginning of a stream (position = 0).
|
510
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
511
|
+
#
|
512
|
+
def beginning_of_string?
|
513
|
+
@position == 0
|
514
|
+
end
|
515
|
+
|
516
|
+
alias eof? end_of_string?
|
517
|
+
alias bof? beginning_of_string?
|
518
|
+
|
519
|
+
#
|
520
|
+
# record the current stream location parameters in the stream's marker table and
|
521
|
+
# return an integer-valued bookmark that may be used to restore the stream's
|
522
|
+
# position with the #rewind method. This method is used to implement backtracking.
|
523
|
+
#
|
524
|
+
def mark
|
525
|
+
state = [@position, @line, @column].freeze
|
526
|
+
@markers << state
|
527
|
+
return @markers.length - 1
|
528
|
+
end
|
529
|
+
|
530
|
+
#
|
531
|
+
# restore the stream to an earlier location recorded by #mark. If no marker value is
|
532
|
+
# provided, the last marker generated by #mark will be used.
|
533
|
+
#
|
534
|
+
def rewind(marker = @markers.length - 1, release = true)
|
535
|
+
(marker >= 0 and location = @markers[marker]) or return(self)
|
536
|
+
@position, @line, @column = location
|
537
|
+
release(marker) if release
|
538
|
+
return self
|
539
|
+
end
|
540
|
+
|
541
|
+
#
|
542
|
+
# the total number of markers currently in existence
|
543
|
+
#
|
544
|
+
def mark_depth
|
545
|
+
@markers.length
|
546
|
+
end
|
547
|
+
|
548
|
+
#
|
549
|
+
# the last marker value created by a call to #mark
|
550
|
+
#
|
551
|
+
def last_marker
|
552
|
+
@markers.length - 1
|
553
|
+
end
|
554
|
+
|
555
|
+
#
|
556
|
+
# let go of the bookmark data for the marker and all marker
|
557
|
+
# values created after the marker.
|
558
|
+
#
|
559
|
+
def release(marker = @markers.length - 1)
|
560
|
+
marker.between?(1, @markers.length - 1) or return
|
561
|
+
@markers[marker, @markers.length - marker ] = nil
|
562
|
+
return self
|
563
|
+
end
|
564
|
+
|
565
|
+
#
|
566
|
+
# jump to the absolute position value given by +index+.
|
567
|
+
# note: if +index+ is before the current position, the +line+ and +column+
|
568
|
+
# attributes of the stream will probably be incorrect
|
569
|
+
#
|
570
|
+
def seek(index)
|
571
|
+
index = index.bound( 0, @data.length ) # ensures index is within the stream's range
|
572
|
+
if index > @position
|
573
|
+
skipped = through( index - @position )
|
574
|
+
if lc = skipped.count("\n") and lc.zero?
|
575
|
+
@column += skipped.length
|
576
|
+
else
|
577
|
+
@line += lc
|
578
|
+
@column = skipped.length - skipped.rindex("\n") - 1
|
579
|
+
end
|
580
|
+
end
|
581
|
+
@position = index
|
582
|
+
return nil
|
583
|
+
end
|
584
|
+
|
585
|
+
#
|
586
|
+
# customized object inspection that shows:
|
587
|
+
# * the stream class
|
588
|
+
# * the stream's location in <tt>index / line:column</tt> format
|
589
|
+
# * +before_chars+ characters before the cursor (6 characters by default)
|
590
|
+
# * +after_chars+ characters after the cursor (10 characters by default)
|
591
|
+
#
|
592
|
+
def inspect(before_chars = 6, after_chars = 10)
|
593
|
+
before = through( -before_chars ).inspect
|
594
|
+
@position - before_chars > 0 and before.insert(0, '... ')
|
595
|
+
|
596
|
+
after = through( after_chars ).inspect
|
597
|
+
@position + after_chars + 1 < @data.length and after << ' ...'
|
598
|
+
|
599
|
+
location = "#@position / line #@line:#@column"
|
600
|
+
"#<#{self.class}: #{before} | #{after} @ #{location}>"
|
601
|
+
end
|
602
|
+
|
603
|
+
#
|
604
|
+
# return the string slice between position +start+ and +stop+
|
605
|
+
#
|
606
|
+
def substring(start, stop)
|
607
|
+
@data[start, stop - start + 1]
|
608
|
+
end
|
609
|
+
|
610
|
+
#
|
611
|
+
# identical to String#[]
|
612
|
+
#
|
613
|
+
def [](start, *args)
|
614
|
+
@data[start, *args]
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
618
|
+
|
619
|
+
=begin rdoc ANTLR3::FileStream
|
620
|
+
|
621
|
+
FileStream is a character stream that uses data stored in some external file. It
|
622
|
+
is nearly identical to StringStream and functions as use data located in a file
|
623
|
+
while automatically setting up the +source_name+ and +line+ parameters. It does
|
624
|
+
not actually use any buffered IO operations throughout the stream navigation
|
625
|
+
process. Instead, it reads the file data once when the stream is initialized.
|
626
|
+
|
627
|
+
=end
|
628
|
+
|
629
|
+
class FileStream < StringStream
|
630
|
+
|
631
|
+
#
|
632
|
+
# creates a new FileStream object using the given +file+ object.
|
633
|
+
# If +file+ is a path string, the file will be read and the contents
|
634
|
+
# will be used and the +name+ attribute will be set to the path.
|
635
|
+
# If +file+ is an IO-like object (that responds to :read),
|
636
|
+
# the content of the object will be used and the stream will
|
637
|
+
# attempt to set its +name+ object first trying the method #name
|
638
|
+
# on the object, then trying the method #path on the object.
|
639
|
+
#
|
640
|
+
# see StringStream.new for a list of additional options
|
641
|
+
# the constructer accepts
|
642
|
+
#
|
643
|
+
def initialize(file, options = {})
|
644
|
+
case file
|
645
|
+
when $stdin then
|
646
|
+
data = $stdin.read
|
647
|
+
@name = '(stdin)'
|
648
|
+
when ::File then
|
649
|
+
file = file.clone
|
650
|
+
file.reopen(file.path, 'r')
|
651
|
+
@name = file.path
|
652
|
+
data = file.read
|
653
|
+
file.close
|
654
|
+
else
|
655
|
+
if file.respond_to?(:read)
|
656
|
+
data = file.read
|
657
|
+
if file.respond_to?(:name) then @name = file.name
|
658
|
+
elsif file.respond_to?(:path) then @name = file.path
|
659
|
+
end
|
660
|
+
else
|
661
|
+
@name = file.to_s
|
662
|
+
if test(?f, @name) then data = File.read(@name)
|
663
|
+
else raise ArgumentError, "could not find an existing file at %p" % @name
|
664
|
+
end
|
665
|
+
end
|
666
|
+
end
|
667
|
+
super(data, options)
|
668
|
+
end
|
669
|
+
|
670
|
+
end
|
671
|
+
|
672
|
+
=begin rdoc ANTLR3::CommonTokenStream
|
673
|
+
|
674
|
+
CommonTokenStream serves as the primary token stream implementation for feeding
|
675
|
+
sequential token input into parsers.
|
676
|
+
|
677
|
+
Using some TokenSource (such as a lexer), the stream collects a token sequence,
|
678
|
+
setting the token's <tt>index</tt> attribute to indicate the token's position
|
679
|
+
within the stream. The streams may be tuned to some channel value; off-channel
|
680
|
+
tokens will be filtered out by the #peek, #look, and #consume methods.
|
681
|
+
|
682
|
+
=== Sample Usage
|
683
|
+
|
684
|
+
|
685
|
+
source_input = ANTLR3::StringStream.new("35 * 4 - 1")
|
686
|
+
lexer = Calculator::Lexer.new(source_input)
|
687
|
+
tokens = ANTLR3::CommonTokenStream.new(lexer)
|
688
|
+
|
689
|
+
# assume this grammar defines whitespace as tokens on channel HIDDEN
|
690
|
+
# and numbers and operations as tokens on channel DEFAULT
|
691
|
+
tokens.look # => 0 INT['35'] @ line 1 col 0 (0..1)
|
692
|
+
tokens.look(2) # => 2 MULT["*"] @ line 1 col 2 (3..3)
|
693
|
+
tokens.tokens(0, 2)
|
694
|
+
# => [0 INT["35"] @line 1 col 0 (0..1),
|
695
|
+
# 1 WS[" "] @line 1 col 2 (1..1),
|
696
|
+
# 2 MULT["*"] @ line 1 col 3 (3..3)]
|
697
|
+
# notice the #tokens method does not filter off-channel tokens
|
698
|
+
|
699
|
+
lexer.reset
|
700
|
+
hidden_tokens =
|
701
|
+
ANTLR3::CommonTokenStream.new(lexer, :channel => ANTLR3::HIDDEN)
|
702
|
+
hidden_tokens.look # => 1 WS[' '] @ line 1 col 2 (1..1)
|
703
|
+
|
704
|
+
=end
|
705
|
+
|
706
|
+
class CommonTokenStream
|
707
|
+
include TokenStream
|
708
|
+
include Enumerable
|
709
|
+
|
710
|
+
#
|
711
|
+
# constructs a new token stream using the +token_source+ provided. +token_source+ is
|
712
|
+
# usually a lexer, but can be any object that implements +next_token+ and includes
|
713
|
+
# ANTLR3::TokenSource.
|
714
|
+
#
|
715
|
+
# If a block is provided, each token harvested will be yielded and if the block
|
716
|
+
# returns a +nil+ or +false+ value, the token will not be added to the stream --
|
717
|
+
# it will be discarded.
|
718
|
+
#
|
719
|
+
# === Options
|
720
|
+
# [:channel] The channel value the stream should be tuned to initially
|
721
|
+
# [:source_name] The source name (file name) attribute of the stream
|
722
|
+
#
|
723
|
+
# === Example
|
724
|
+
#
|
725
|
+
# # create a new token stream that is tuned to channel :comment, and
|
726
|
+
# # discard all WHITE_SPACE tokens
|
727
|
+
# ANTLR3::CommonTokenStream.new(lexer, :channel => :comment) do |token|
|
728
|
+
# token.name != 'WHITE_SPACE'
|
729
|
+
# end
|
730
|
+
#
|
731
|
+
def initialize(token_source, options = {})
|
732
|
+
@token_source = token_source
|
733
|
+
@last_marker = nil
|
734
|
+
@channel = options.fetch(:channel, DEFAULT_CHANNEL)
|
735
|
+
|
736
|
+
@tokens =
|
737
|
+
block_given? ? @token_source.select { |token| yield(token, self) } :
|
738
|
+
@token_source.to_a
|
739
|
+
@tokens.each_with_index { |t, i| t.index = i }
|
740
|
+
@position =
|
741
|
+
if first_token = @tokens.find { |t| t.channel == @channel }
|
742
|
+
@tokens.index(first_token)
|
743
|
+
else @tokens.length
|
744
|
+
end
|
745
|
+
@source_name = options.fetch(:source_name) { @token_source.source_name rescue nil }
|
746
|
+
end
|
747
|
+
|
748
|
+
#
|
749
|
+
# resets the token stream and rebuilds it with a potentially new token source.
|
750
|
+
# If no +token_source+ value is provided, the stream will attempt to reset the
|
751
|
+
# current +token_source+ by calling +reset+ on the object. The stream will
|
752
|
+
# then clear the token buffer and attempt to harvest new tokens. Identical in
|
753
|
+
# behavior to CommonTokenStream.new, if a block is provided, tokens will be
|
754
|
+
# yielded and discarded if the block returns a +false+ or +nil+ value.
|
755
|
+
#
|
756
|
+
def rebuild(token_source = nil)
|
757
|
+
if token_source.nil?
|
758
|
+
@token_source.reset rescue nil
|
759
|
+
else @token_source = token_source
|
760
|
+
end
|
761
|
+
@tokens = block_given? ? @token_source.select { |token| yield(token) } :
|
762
|
+
@token_source.to_a
|
763
|
+
@tokens.each_with_index { |t, i| t.index = i }
|
764
|
+
@last_marker = nil
|
765
|
+
@position =
|
766
|
+
if first_token = @tokens.find { |t| t.channel == @channel }
|
767
|
+
@tokens.index(first_token)
|
768
|
+
else @tokens.length
|
769
|
+
end
|
770
|
+
return self
|
771
|
+
end
|
772
|
+
|
773
|
+
#
|
774
|
+
# tune the stream to a new channel value
|
775
|
+
#
|
776
|
+
def tune_to(channel)
|
777
|
+
@channel = channel
|
778
|
+
end
|
779
|
+
|
780
|
+
def token_class
|
781
|
+
@token_source.token_class
|
782
|
+
rescue NoMethodError
|
783
|
+
@position == -1 and fill_buffer
|
784
|
+
@tokens.empty? ? CommonToken : @tokens.first.class
|
785
|
+
end
|
786
|
+
|
787
|
+
alias index position
|
788
|
+
|
789
|
+
def size
|
790
|
+
@tokens.length
|
791
|
+
end
|
792
|
+
|
793
|
+
alias length size
|
794
|
+
|
795
|
+
###### State-Control ################################################
|
796
|
+
|
797
|
+
#
|
798
|
+
# rewind the stream to its initial state
|
799
|
+
#
|
800
|
+
def reset
|
801
|
+
@position = 0
|
802
|
+
@position += 1 while token = @tokens[@position] and
|
803
|
+
token.channel != @channel
|
804
|
+
@last_marker = nil
|
805
|
+
return self
|
806
|
+
end
|
807
|
+
|
808
|
+
#
|
809
|
+
# bookmark the current position of the input stream
|
810
|
+
#
|
811
|
+
def mark
|
812
|
+
@last_marker = @position
|
813
|
+
end
|
814
|
+
|
815
|
+
def release(marker = nil)
|
816
|
+
# do nothing
|
817
|
+
end
|
818
|
+
|
819
|
+
|
820
|
+
def rewind(marker = @last_marker, release = true)
|
821
|
+
seek(marker)
|
822
|
+
end
|
823
|
+
|
824
|
+
|
825
|
+
###### Stream Navigation ###########################################
|
826
|
+
|
827
|
+
#
|
828
|
+
# advance the stream one step to the next on-channel token
|
829
|
+
#
|
830
|
+
def consume
|
831
|
+
token = @tokens[@position] || EOF_TOKEN
|
832
|
+
if @position < @tokens.length
|
833
|
+
@position = future?(2) || @tokens.length
|
834
|
+
end
|
835
|
+
return(token)
|
836
|
+
end
|
837
|
+
|
838
|
+
#
|
839
|
+
# jump to the stream position specified by +index+
|
840
|
+
# note: seek does not check whether or not the
|
841
|
+
# token at the specified position is on-channel,
|
842
|
+
#
|
843
|
+
def seek(index)
|
844
|
+
@position = index.to_i.bound(0, @tokens.length)
|
845
|
+
return self
|
846
|
+
end
|
847
|
+
|
848
|
+
#
|
849
|
+
# return the type of the on-channel token at look-ahead distance +k+. <tt>k = 1</tt> represents
|
850
|
+
# the current token. +k+ greater than 1 represents upcoming on-channel tokens. A negative
|
851
|
+
# value of +k+ returns previous on-channel tokens consumed, where <tt>k = -1</tt> is the last
|
852
|
+
# on-channel token consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
|
853
|
+
#
|
854
|
+
def peek(k = 1)
|
855
|
+
tk = look(k) and return(tk.type)
|
856
|
+
end
|
857
|
+
|
858
|
+
#
|
859
|
+
# operates simillarly to #peek, but returns the full token object at look-ahead position +k+
|
860
|
+
#
|
861
|
+
def look(k = 1)
|
862
|
+
index = future?(k) or return nil
|
863
|
+
@tokens.fetch(index, EOF_TOKEN)
|
864
|
+
end
|
865
|
+
|
866
|
+
alias >> look
|
867
|
+
def << k
|
868
|
+
self >> -k
|
869
|
+
end
|
870
|
+
|
871
|
+
#
|
872
|
+
# returns the index of the on-channel token at look-ahead position +k+ or nil if no other
|
873
|
+
# on-channel tokens exist
|
874
|
+
#
|
875
|
+
def future?(k = 1)
|
876
|
+
@position == -1 and fill_buffer
|
877
|
+
|
878
|
+
case
|
879
|
+
when k == 0 then nil
|
880
|
+
when k < 0 then past?(-k)
|
881
|
+
when k == 1 then @position
|
882
|
+
else
|
883
|
+
# since the stream only yields on-channel
|
884
|
+
# tokens, the stream can't just go to the
|
885
|
+
# next position, but rather must skip
|
886
|
+
# over off-channel tokens
|
887
|
+
(k - 1).times.inject(@position) do |cursor, |
|
888
|
+
begin
|
889
|
+
tk = @tokens.at(cursor += 1) or return(cursor)
|
890
|
+
# ^- if tk is nil (i.e. i is outside array limits)
|
891
|
+
end until tk.channel == @channel
|
892
|
+
cursor
|
893
|
+
end
|
894
|
+
end
|
895
|
+
end
|
896
|
+
|
897
|
+
#
|
898
|
+
# returns the index of the on-channel token at look-behind position +k+ or nil if no other
|
899
|
+
# on-channel tokens exist before the current token
|
900
|
+
#
|
901
|
+
def past?(k = 1)
|
902
|
+
@position == -1 and fill_buffer
|
903
|
+
|
904
|
+
case
|
905
|
+
when k == 0 then nil
|
906
|
+
when @position - k < 0 then nil
|
907
|
+
else
|
908
|
+
|
909
|
+
k.times.inject(@position) do |cursor, |
|
910
|
+
begin
|
911
|
+
cursor <= 0 and return(nil)
|
912
|
+
tk = @tokens.at(cursor -= 1) or return(nil)
|
913
|
+
end until tk.channel == @channel
|
914
|
+
cursor
|
915
|
+
end
|
916
|
+
|
917
|
+
end
|
918
|
+
end
|
919
|
+
|
920
|
+
#
|
921
|
+
# yields each token in the stream (including off-channel tokens)
|
922
|
+
# If no block is provided, the method returns an Enumerator object.
|
923
|
+
# #each accepts the same arguments as #tokens
|
924
|
+
#
|
925
|
+
def each(*args)
|
926
|
+
block_given? or return enum_for(:each, *args)
|
927
|
+
tokens(*args).each { |token| yield(token) }
|
928
|
+
end
|
929
|
+
|
930
|
+
#
|
931
|
+
# returns a copy of the token buffer. If +start+ and +stop+ are provided, tokens
|
932
|
+
# returns a slice of the token buffer from <tt>start..stop</tt>. The parameters
|
933
|
+
# are converted to integers with their <tt>to_i</tt> methods, and thus tokens
|
934
|
+
# can be provided to specify start and stop. If a block is provided, tokens are
|
935
|
+
# yielded and filtered out of the return array if the block returns a +false+
|
936
|
+
# or +nil+ value.
|
937
|
+
#
|
938
|
+
def tokens(start = nil, stop = nil)
|
939
|
+
stop.nil? || stop >= @tokens.length and stop = @tokens.length - 1
|
940
|
+
start.nil? || stop < 0 and start = 0
|
941
|
+
tokens = @tokens[start..stop]
|
942
|
+
|
943
|
+
if block_given?
|
944
|
+
tokens.delete_if { |t| not yield(t) }
|
945
|
+
end
|
946
|
+
|
947
|
+
return( tokens )
|
948
|
+
end
|
949
|
+
|
950
|
+
|
951
|
+
def at(i)
|
952
|
+
@tokens.at i
|
953
|
+
end
|
954
|
+
|
955
|
+
#
|
956
|
+
# identical to Array#[], as applied to the stream's token buffer
|
957
|
+
#
|
958
|
+
def [](i, *args)
|
959
|
+
@tokens[i, *args]
|
960
|
+
end
|
961
|
+
|
962
|
+
###### Standard Conversion Methods ###############################
|
963
|
+
def inspect
|
964
|
+
string = "#<%p: @token_source=%p @ %p/%p" %
|
965
|
+
[self.class, @token_source.class, @position, @tokens.length]
|
966
|
+
tk = look(-1) and string << " #{tk.inspect} <--"
|
967
|
+
tk = look( 1) and string << " --> #{tk.inspect}"
|
968
|
+
string << '>'
|
969
|
+
end
|
970
|
+
|
971
|
+
#
|
972
|
+
# fetches the text content of all tokens between +start+ and +stop+ and
|
973
|
+
# joins the chunks into a single string
|
974
|
+
#
|
975
|
+
def extract_text(start = 0, stop = @tokens.length - 1)
|
976
|
+
start = start.to_i.at_least(0)
|
977
|
+
stop = stop.to_i.at_most(@tokens.length)
|
978
|
+
@tokens[start..stop].map! { |t| t.text }.join('')
|
979
|
+
end
|
980
|
+
|
981
|
+
alias to_s extract_text
|
982
|
+
|
983
|
+
end
|
984
|
+
|
985
|
+
end
|