antlr3 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
@@ -0,0 +1,985 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
=begin LICENSE
|
5
|
+
|
6
|
+
[The "BSD licence"]
|
7
|
+
Copyright (c) 2009 Kyle Yetter
|
8
|
+
All rights reserved.
|
9
|
+
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
11
|
+
modification, are permitted provided that the following conditions
|
12
|
+
are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright
|
17
|
+
notice, this list of conditions and the following disclaimer in the
|
18
|
+
documentation and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
23
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
24
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
25
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
26
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
27
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
28
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
29
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
31
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
module ANTLR3
|
36
|
+
|
37
|
+
|
38
|
+
=begin rdoc ANTLR3::Stream
|
39
|
+
|
40
|
+
= ANTLR3 Streams
|
41
|
+
|
42
|
+
This documentation first covers the general concept of streams as used by ANTLR
|
43
|
+
recognizers, and then discusses the specific <tt>ANTLR3::Stream</tt> module.
|
44
|
+
|
45
|
+
== ANTLR Stream Classes
|
46
|
+
|
47
|
+
ANTLR recognizers need a way to walk through input data in a serialized IO-style
|
48
|
+
fashion. They also need some book-keeping about the input to provide useful
|
49
|
+
information to developers, such as current line number and column. Furthermore,
|
50
|
+
to implement backtracking and various error recovery techniques, recognizers
|
51
|
+
need a way to record various locations in the input at a number of points in the
|
52
|
+
recognition process so the input state may be restored back to a prior state.
|
53
|
+
|
54
|
+
ANTLR bundles all of this functionality into a number of Stream classes, each
|
55
|
+
designed to be used by recognizers for a specific recognition task. Most of the
|
56
|
+
Stream hierarchy is implemented in antlr3/stream.rb, which is loaded by default
|
57
|
+
when 'antlr3' is required.
|
58
|
+
|
59
|
+
---
|
60
|
+
|
61
|
+
Here's a brief overview of the various stream classes and their respective
|
62
|
+
purpose:
|
63
|
+
|
64
|
+
StringStream::
|
65
|
+
Similar to StringIO from the standard Ruby library, StringStream wraps raw
|
66
|
+
String data in a Stream interface for use by ANTLR lexers.
|
67
|
+
FileStream::
|
68
|
+
A subclass of StringStream, FileStream simply wraps data read from an IO or
|
69
|
+
File object for use by lexers.
|
70
|
+
CommonTokenStream::
|
71
|
+
The job of a TokenStream is to read lexer output and then provide ANTLR
|
72
|
+
parsers with the means to sequential walk through series of tokens.
|
73
|
+
CommonTokenStream is the default TokenStream implementation.
|
74
|
+
TokenRewriteStream::
|
75
|
+
A subclass of CommonTokenStream, TokenRewriteStreams provide rewriting-parsers
|
76
|
+
the ability to produce new output text from an input token-sequence by
|
77
|
+
managing rewrite "programs" on top of the stream.
|
78
|
+
CommonTreeNodeStream::
|
79
|
+
In a similar fashion to CommonTokenStream, CommonTreeNodeStream feeds tokens
|
80
|
+
to recognizers in a sequential fashion. However, the stream object serializes
|
81
|
+
an Abstract Syntax Tree into a flat, one-dimensional sequence, but preserves
|
82
|
+
the two-dimensional shape of the tree using special UP and DOWN tokens. The
|
83
|
+
sequence is primarily used by ANTLR Tree Parsers. *note* -- this is not
|
84
|
+
defined in antlr3/stream.rb, but antlr3/tree.rb
|
85
|
+
|
86
|
+
---
|
87
|
+
|
88
|
+
The next few sections cover the most significant methods of all stream classes.
|
89
|
+
|
90
|
+
=== consume / look / peek
|
91
|
+
|
92
|
+
<tt>stream.consume</tt> is used to advance a stream one unit. StringStreams are
|
93
|
+
advanced by one character and TokenStreams are advanced by one token.
|
94
|
+
|
95
|
+
<tt>stream.peek(k = 1)</tt> is used to quickly retrieve the object of interest
|
96
|
+
to a recognizer at look-ahead position specified by <tt>k</tt>. For
|
97
|
+
<b>StringStreams</b>, this is the <i>integer value of the character</i>
|
98
|
+
<tt>k</tt> characters ahead of the stream cursor. For <b>TokenStreams</b>, this
|
99
|
+
is the <i>integer token type of the token</i> <tt>k</tt> tokens ahead of the
|
100
|
+
stream cursor.
|
101
|
+
|
102
|
+
<tt>stream.look(k = 1)</tt> is used to retrieve the full object of interest at
|
103
|
+
look-ahead position specified by <tt>k</tt>. While <tt>peek</tt> provides the
|
104
|
+
<i>bare-minimum lightweight information</i> that the recognizer needs,
|
105
|
+
<tt>look</tt> provides the <i>full object of concern</i> in the stream. For
|
106
|
+
<b>StringStreams</b>, this is a <i>string object containing the single
|
107
|
+
character</i> <tt>k</tt> characters ahead of the stream cursor. For
|
108
|
+
<b>TokenStreams</b>, this is the <i>full token structure</i> <tt>k</tt> tokens
|
109
|
+
ahead of the stream cursor.
|
110
|
+
|
111
|
+
<b>Note:</b> in most ANTLR runtime APIs for other languages, <tt>peek</tt> is
|
112
|
+
implemented by some method with a name like <tt>LA(k)</tt> and <tt>look</tt> is
|
113
|
+
implemented by some method with a name like <tt>LT(k)</tt>. When writing this
|
114
|
+
Ruby runtime API, I found this naming practice both confusing, ambiguous, and
|
115
|
+
un-Ruby-like. Thus, I chose <tt>peek</tt> and <tt>look</tt> to represent a
|
116
|
+
quick-look (peek) and a full-fledged look-ahead operation (look). If this causes
|
117
|
+
confusion or any sort of compatibility strife for developers using this
|
118
|
+
implementation, all apologies.
|
119
|
+
|
120
|
+
=== mark / rewind / release
|
121
|
+
|
122
|
+
<tt>marker = stream.mark</tt> causes the stream to record important information
|
123
|
+
about the current stream state, place the data in an internal memory table, and
|
124
|
+
return a memento, <tt>marker</tt>. The marker object is typically an integer key
|
125
|
+
to the stream's internal memory table.
|
126
|
+
|
127
|
+
Used in tandem with, <tt>stream.rewind(mark = last_marker)</tt>, the marker can
|
128
|
+
be used to restore the stream to an earlier state. This is used by recognizers
|
129
|
+
to perform tasks such as backtracking and error recovery.
|
130
|
+
|
131
|
+
<tt>stream.release(marker = last_marker)</tt> can be used to release an existing
|
132
|
+
state marker from the memory table.
|
133
|
+
|
134
|
+
=== seek
|
135
|
+
|
136
|
+
<tt>stream.seek(position)</tt> moves the stream cursor to an absolute position
|
137
|
+
within the stream, basically like typical ruby <tt>IO#seek</tt> style methods.
|
138
|
+
However, unlike <tt>IO#seek</tt>, ANTLR streams currently always use absolute
|
139
|
+
position seeking.
|
140
|
+
|
141
|
+
== The Stream Module
|
142
|
+
|
143
|
+
<tt>ANTLR3::Stream</tt> is an abstract-ish base mixin for all IO-like stream
|
144
|
+
classes used by ANTLR recognizers.
|
145
|
+
|
146
|
+
The module doesn't do much on its own besides define arguably annoying
|
147
|
+
``abstract'' pseudo-methods that demand implementation when it is mixed in to a
|
148
|
+
class that wants to be a Stream. Right now this exists as an artifact of porting
|
149
|
+
the ANTLR Java/Python runtime library to Ruby. In Java, of course, this is
|
150
|
+
represented as an interface. In Ruby, however, objects are duck-typed and
|
151
|
+
interfaces aren't that useful as programmatic entities -- in fact, it's mildly
|
152
|
+
wasteful to have a module like this hanging out. Thus, I may axe it.
|
153
|
+
|
154
|
+
When mixed in, it does give the class a #size and #source_name attribute
|
155
|
+
methods.
|
156
|
+
|
157
|
+
Except in a small handful of places, most of the ANTLR runtime library uses
|
158
|
+
duck-typing and not type checking on objects. This means that the methods which
|
159
|
+
manipulate stream objects don't usually bother checking that the object is a
|
160
|
+
Stream and assume that the object implements the proper stream interface. Thus,
|
161
|
+
it is not strictly necessary that custom stream objects include ANTLR3::Stream,
|
162
|
+
though it isn't a bad idea.
|
163
|
+
|
164
|
+
=end
|
165
|
+
|
166
|
+
module Stream
|
167
|
+
include ANTLR3::Constants
|
168
|
+
extend ClassMacros
|
169
|
+
|
170
|
+
##
|
171
|
+
# :method: consume
|
172
|
+
# used to advance a stream one unit (such as character or token)
|
173
|
+
abstract :consume
|
174
|
+
|
175
|
+
##
|
176
|
+
# :method: peek(k=1)
|
177
|
+
# used to quickly retreive the object of interest to a recognizer at lookahead
|
178
|
+
# position specified by <tt>k</tt> (such as integer value of a character or an
|
179
|
+
# integer token type)
|
180
|
+
abstract :peek
|
181
|
+
|
182
|
+
##
|
183
|
+
# :method: look(k=1)
|
184
|
+
# used to retreive the full object of interest at lookahead position specified
|
185
|
+
# by <tt>k</tt> (such as a character string or a token structure)
|
186
|
+
abstract :look
|
187
|
+
|
188
|
+
##
|
189
|
+
# :method: mark
|
190
|
+
# TODO: document
|
191
|
+
abstract :mark
|
192
|
+
|
193
|
+
##
|
194
|
+
# :method: index
|
195
|
+
# TODO: document
|
196
|
+
abstract :index
|
197
|
+
|
198
|
+
##
|
199
|
+
# :method: rewind(marker=last_marker)
|
200
|
+
# TODO: document
|
201
|
+
abstract :rewind
|
202
|
+
|
203
|
+
##
|
204
|
+
# :method: release(marker = last_marker)
|
205
|
+
# TODO: document
|
206
|
+
abstract :release
|
207
|
+
|
208
|
+
##
|
209
|
+
# :method: seek(position)
|
210
|
+
# TODO: document
|
211
|
+
abstract :seek
|
212
|
+
|
213
|
+
# TODO: document
|
214
|
+
attr_reader :size
|
215
|
+
# TODO: document
|
216
|
+
attr_accessor :source_name
|
217
|
+
end
|
218
|
+
|
219
|
+
=begin rdoc ANTLR3::CharacterStream
|
220
|
+
|
221
|
+
CharacterStream further extends the abstract-ish base mixin Stream to add
|
222
|
+
methods specific to navigating character-based input data. Thus, it serves as an
|
223
|
+
immitation of the Java interface for text-based streams, which are primarily
|
224
|
+
used by lexers.
|
225
|
+
|
226
|
+
It adds the ``abstract'' method, <tt>substring(start, stop)</tt>, which must be
|
227
|
+
implemented to return a slice of the input string from position <tt>start</tt>
|
228
|
+
to position <tt>stop</tt>. It also adds attribute accessor methods <tt>line</tt>
|
229
|
+
and <tt>column</tt>, which are expected to indicate the current line number and
|
230
|
+
position within the current line, respectively.
|
231
|
+
|
232
|
+
== A Word About <tt>line</tt> and <tt>column</tt> attributes
|
233
|
+
|
234
|
+
Presumably, the concept of <tt>line</tt> and <tt>column</tt> attirbutes of text
|
235
|
+
are familliar to most developers. Line numbers of text are indexed from number 1
|
236
|
+
up (not 0). Column numbers are indexed from 0 up. Thus, examining sample text:
|
237
|
+
|
238
|
+
Hey this is the first line.
|
239
|
+
Oh, and this is the second line.
|
240
|
+
|
241
|
+
Line 1 is the string "Hey this is the first line\\n". If a character stream is at
|
242
|
+
line 2, character 0, the stream cursor is sitting between the characters "\\n"
|
243
|
+
and "O".
|
244
|
+
|
245
|
+
*Note:* most ANTLR runtime APIs for other languages refer to <tt>column</tt>
|
246
|
+
with the more-precise, but lengthy name <tt>charPositionInLine</tt>. I prefered
|
247
|
+
to keep it simple and familliar in this Ruby runtime API.
|
248
|
+
|
249
|
+
=end
|
250
|
+
|
251
|
+
module CharacterStream
|
252
|
+
include Stream
|
253
|
+
extend ClassMacros
|
254
|
+
EOF = -1
|
255
|
+
|
256
|
+
##
|
257
|
+
# :method: substring(start,stop)
|
258
|
+
abstract :substring
|
259
|
+
|
260
|
+
attr_accessor :line
|
261
|
+
attr_accessor :column
|
262
|
+
end
|
263
|
+
|
264
|
+
|
265
|
+
=begin rdoc ANTLR3::TokenStream
|
266
|
+
|
267
|
+
TokenStream further extends the abstract-ish base mixin Stream to add methods
|
268
|
+
specific to navigating token sequences. Thus, it serves as an imitation of the
|
269
|
+
Java interface for token-based streams, which are used by many different
|
270
|
+
components in ANTLR, including parsers and tree parsers.
|
271
|
+
|
272
|
+
== Token Streams
|
273
|
+
|
274
|
+
Token streams wrap a sequence of token objects produced by some token source,
|
275
|
+
usually a lexer. They provide the operations required by higher-level
|
276
|
+
recognizers, such as parsers and tree parsers for navigating through the
|
277
|
+
sequence of tokens. Unlike simple character-based streams, such as StringStream,
|
278
|
+
token-based streams have an additional level of complexity because they must
|
279
|
+
manage the task of "tuning" to a specific token channel.
|
280
|
+
|
281
|
+
One of the main advantages of ANTLR-based recognition is the token
|
282
|
+
<i>channel</i> feature, which allows you to hold on to all tokens of interest
|
283
|
+
while only presenting a specific set of interesting tokens to a parser. For
|
284
|
+
example, if you need to hide whitespace and comments from a parser, but hang on
|
285
|
+
to them for some other purpose, you have the lexer assign the comments and
|
286
|
+
whitespace to channel value HIDDEN as it creates the tokens.
|
287
|
+
|
288
|
+
When you create a token stream, you can tune it to some specific channel value.
|
289
|
+
Then, all <tt>peek</tt>, <tt>look</tt>, and <tt>consume</tt> operations only
|
290
|
+
yield tokens that have the same value for <tt>channel</tt>. The stream skips
|
291
|
+
over any non-matching tokens in between.
|
292
|
+
|
293
|
+
== The TokenStream Interface
|
294
|
+
|
295
|
+
In addition to the abstract methods and attribute methods provided by the base
|
296
|
+
Stream module, TokenStream adds a number of additional method implementation
|
297
|
+
requirements and attributes.
|
298
|
+
|
299
|
+
=end
|
300
|
+
|
301
|
+
module TokenStream
|
302
|
+
include Stream
|
303
|
+
extend ClassMacros
|
304
|
+
|
305
|
+
##
|
306
|
+
# expected to return the token source object (such as a lexer) from which
|
307
|
+
# all tokens in the stream were retreived
|
308
|
+
attr_reader :token_source
|
309
|
+
|
310
|
+
##
|
311
|
+
# expected to return the value of the last marker produced by a call to
|
312
|
+
# <tt>stream.mark</tt>
|
313
|
+
attr_reader :last_marker
|
314
|
+
|
315
|
+
##
|
316
|
+
# expected to return the integer index of the stream cursor
|
317
|
+
attr_reader :position
|
318
|
+
|
319
|
+
##
|
320
|
+
# the integer channel value to which the stream is ``tuned''
|
321
|
+
attr_accessor :channel
|
322
|
+
|
323
|
+
##
|
324
|
+
# :method: to_s(start=0,stop=tokens.length-1)
|
325
|
+
# should take the tokens between start and stop in the sequence, extract their text
|
326
|
+
# and return the concatenation of all the text chunks
|
327
|
+
abstract :to_s
|
328
|
+
|
329
|
+
##
|
330
|
+
# :method: at
|
331
|
+
# TODO: document
|
332
|
+
abstract :at
|
333
|
+
end
|
334
|
+
|
335
|
+
=begin rdoc ANTLR3::StringStream
|
336
|
+
|
337
|
+
A StringStream's purpose is to wrap the basic, naked text input of a recognition
|
338
|
+
system. Like all other stream types, it provides serial navigation of the input;
|
339
|
+
a recognizer can arbitrarily step forward and backward through the stream's
|
340
|
+
symbols as it requires. StringStream and its subclasses are they main way to
|
341
|
+
feed text input into an ANTLR Lexer for token processing.
|
342
|
+
|
343
|
+
The stream's symbols of interest, of course, are character values. Thus, the
|
344
|
+
#peek method returns the integer character value at look-ahead position
|
345
|
+
<tt>k</tt> and the #look method returns the character value as a +String+. They
|
346
|
+
also track various pieces of information such as the line and column numbers at
|
347
|
+
the current position.
|
348
|
+
|
349
|
+
=== Note About Text Encoding
|
350
|
+
|
351
|
+
This version of the runtime library primarily targets ruby version 1.8, which
|
352
|
+
does not have strong built-in support for multi-byte character encodings. Thus,
|
353
|
+
characters are assumed to be represented by a single byte -- an integer between
|
354
|
+
0 and 255. Ruby 1.9 does provide built-in encoding support for multi-byte
|
355
|
+
characters, but currently this library does not provide any streams to handle
|
356
|
+
non-ASCII encoding. However, encoding-savvy recognition code is a future
|
357
|
+
development goal for this project.
|
358
|
+
|
359
|
+
=end
|
360
|
+
|
361
|
+
class StringStream
|
362
|
+
include CharacterStream
|
363
|
+
|
364
|
+
# current integer character index of the stream
|
365
|
+
attr_reader :position
|
366
|
+
|
367
|
+
# the current line number of the input, indexed upward from 1
|
368
|
+
attr_reader :line
|
369
|
+
|
370
|
+
# the current character position within the current line, indexed upward from 0
|
371
|
+
attr_reader :column
|
372
|
+
|
373
|
+
# the name associated with the stream -- usually a file name
|
374
|
+
# defaults to <tt>"(string)"</tt>
|
375
|
+
attr_accessor :name
|
376
|
+
|
377
|
+
# the entire string that is wrapped by the stream
|
378
|
+
attr_reader :data
|
379
|
+
|
380
|
+
# creates a new StringStream object where +data+ is the string data to stream.
|
381
|
+
# accepts the following options in a symbol-to-value hash:
|
382
|
+
#
|
383
|
+
# [:file or :name] the (file) name to associate with the stream; default: <tt>'(string)'</tt>
|
384
|
+
# [:line] the initial line number; default: +1+
|
385
|
+
# [:column] the initial column number; default: +0+
|
386
|
+
#
|
387
|
+
def initialize(data, options = {})
|
388
|
+
@data = data.to_s
|
389
|
+
@data.equal?(data) and @data = @data.clone
|
390
|
+
@data.freeze
|
391
|
+
@position = 0
|
392
|
+
@line = options.fetch :line, 1
|
393
|
+
@column = options.fetch :column, 0
|
394
|
+
@markers = []
|
395
|
+
mark
|
396
|
+
@name ||= options[:file] || options[:name] # || '(string)'
|
397
|
+
end
|
398
|
+
|
399
|
+
def size
|
400
|
+
@data.length
|
401
|
+
end
|
402
|
+
|
403
|
+
alias length size
|
404
|
+
|
405
|
+
#
|
406
|
+
# rewinds the stream back to the start and clears out any existing marker entries
|
407
|
+
#
|
408
|
+
def reset
|
409
|
+
@position = 0
|
410
|
+
@line = 1
|
411
|
+
@column = 0
|
412
|
+
@markers.clear
|
413
|
+
return self
|
414
|
+
end
|
415
|
+
|
416
|
+
#
|
417
|
+
# advance the stream by one character; returns the character consumed
|
418
|
+
#
|
419
|
+
def consume
|
420
|
+
c = @data[@position] || EOF
|
421
|
+
if @position < @data.length
|
422
|
+
@column += 1
|
423
|
+
if c == ?\n
|
424
|
+
@line += 1
|
425
|
+
@column = 0
|
426
|
+
end
|
427
|
+
@position += 1
|
428
|
+
end
|
429
|
+
return(c)
|
430
|
+
end
|
431
|
+
|
432
|
+
#
|
433
|
+
# return the character at look-ahead distance +k+ as an integer. <tt>k = 1</tt> represents
|
434
|
+
# the current character. +k+ greater than 1 represents upcoming characters. A negative
|
435
|
+
# value of +k+ returns previous characters consumed, where <tt>k = -1</tt> is the last
|
436
|
+
# character consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
|
437
|
+
#
|
438
|
+
def peek(k = 1)
|
439
|
+
k == 0 and return nil
|
440
|
+
k += 1 if k < 0
|
441
|
+
index = @position + k - 1
|
442
|
+
index < 0 and return nil
|
443
|
+
@data[index] or EOF
|
444
|
+
end
|
445
|
+
|
446
|
+
#
|
447
|
+
# identical to #peek, except it returns the character value as a String
|
448
|
+
#
|
449
|
+
def look(k = 1)
|
450
|
+
k == 0 and return nil
|
451
|
+
k += 1 if k < 0
|
452
|
+
|
453
|
+
index = @position + k - 1
|
454
|
+
index < 0 and return nil
|
455
|
+
|
456
|
+
c = @data[index] and c.chr
|
457
|
+
end
|
458
|
+
|
459
|
+
#
|
460
|
+
# return a substring around the stream cursor at a distance +k+
|
461
|
+
# if <tt>k >= 0</tt>, return the next k characters
|
462
|
+
# if <tt>k < 0</tt>, return the previous <tt>|k|</tt> characters
|
463
|
+
#
|
464
|
+
def through(k)
|
465
|
+
if k >= 0 then @data[ @position, k ] else
|
466
|
+
start = (@position + k).at_least( 0 ) # start cannot be negative or index will wrap around
|
467
|
+
@data[ start ... @position ]
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
# operator style look-ahead
|
472
|
+
alias >> look
|
473
|
+
|
474
|
+
# operator style look-behind
|
475
|
+
def <<(k)
|
476
|
+
self << -k
|
477
|
+
end
|
478
|
+
|
479
|
+
alias index position
|
480
|
+
alias character_index position
|
481
|
+
|
482
|
+
alias source_name name
|
483
|
+
|
484
|
+
#
|
485
|
+
# Returns true if the stream appears to be at the beginning of a new line.
|
486
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
487
|
+
#
|
488
|
+
def beginning_of_line?
|
489
|
+
@position.zero? or @data[@position - 1] == ?\n
|
490
|
+
end
|
491
|
+
|
492
|
+
#
|
493
|
+
# Returns true if the stream appears to be at the end of a new line.
|
494
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
495
|
+
#
|
496
|
+
def end_of_line?
|
497
|
+
@data[@position] == ?\n if @position >= @data.length
|
498
|
+
end
|
499
|
+
|
500
|
+
#
|
501
|
+
# Returns true if the stream has been exhausted.
|
502
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
503
|
+
#
|
504
|
+
def end_of_string?
|
505
|
+
@position >= @data.length
|
506
|
+
end
|
507
|
+
|
508
|
+
#
|
509
|
+
# Returns true if the stream appears to be at the beginning of a stream (position = 0).
|
510
|
+
# This is an extra utility method for use inside lexer actions if needed.
|
511
|
+
#
|
512
|
+
def beginning_of_string?
|
513
|
+
@position == 0
|
514
|
+
end
|
515
|
+
|
516
|
+
alias eof? end_of_string?
|
517
|
+
alias bof? beginning_of_string?
|
518
|
+
|
519
|
+
#
|
520
|
+
# record the current stream location parameters in the stream's marker table and
|
521
|
+
# return an integer-valued bookmark that may be used to restore the stream's
|
522
|
+
# position with the #rewind method. This method is used to implement backtracking.
|
523
|
+
#
|
524
|
+
def mark
|
525
|
+
state = [@position, @line, @column].freeze
|
526
|
+
@markers << state
|
527
|
+
return @markers.length - 1
|
528
|
+
end
|
529
|
+
|
530
|
+
#
|
531
|
+
# restore the stream to an earlier location recorded by #mark. If no marker value is
|
532
|
+
# provided, the last marker generated by #mark will be used.
|
533
|
+
#
|
534
|
+
def rewind(marker = @markers.length - 1, release = true)
|
535
|
+
(marker >= 0 and location = @markers[marker]) or return(self)
|
536
|
+
@position, @line, @column = location
|
537
|
+
release(marker) if release
|
538
|
+
return self
|
539
|
+
end
|
540
|
+
|
541
|
+
#
|
542
|
+
# the total number of markers currently in existence
|
543
|
+
#
|
544
|
+
def mark_depth
|
545
|
+
@markers.length
|
546
|
+
end
|
547
|
+
|
548
|
+
#
|
549
|
+
# the last marker value created by a call to #mark
|
550
|
+
#
|
551
|
+
def last_marker
|
552
|
+
@markers.length - 1
|
553
|
+
end
|
554
|
+
|
555
|
+
#
|
556
|
+
# let go of the bookmark data for the marker and all marker
|
557
|
+
# values created after the marker.
|
558
|
+
#
|
559
|
+
def release(marker = @markers.length - 1)
|
560
|
+
marker.between?(1, @markers.length - 1) or return
|
561
|
+
@markers[marker, @markers.length - marker ] = nil
|
562
|
+
return self
|
563
|
+
end
|
564
|
+
|
565
|
+
#
|
566
|
+
# jump to the absolute position value given by +index+.
|
567
|
+
# note: if +index+ is before the current position, the +line+ and +column+
|
568
|
+
# attributes of the stream will probably be incorrect
|
569
|
+
#
|
570
|
+
def seek(index)
|
571
|
+
index = index.bound( 0, @data.length ) # ensures index is within the stream's range
|
572
|
+
if index > @position
|
573
|
+
skipped = through( index - @position )
|
574
|
+
if lc = skipped.count("\n") and lc.zero?
|
575
|
+
@column += skipped.length
|
576
|
+
else
|
577
|
+
@line += lc
|
578
|
+
@column = skipped.length - skipped.rindex("\n") - 1
|
579
|
+
end
|
580
|
+
end
|
581
|
+
@position = index
|
582
|
+
return nil
|
583
|
+
end
|
584
|
+
|
585
|
+
#
|
586
|
+
# customized object inspection that shows:
|
587
|
+
# * the stream class
|
588
|
+
# * the stream's location in <tt>index / line:column</tt> format
|
589
|
+
# * +before_chars+ characters before the cursor (6 characters by default)
|
590
|
+
# * +after_chars+ characters after the cursor (10 characters by default)
|
591
|
+
#
|
592
|
+
def inspect(before_chars = 6, after_chars = 10)
|
593
|
+
before = through( -before_chars ).inspect
|
594
|
+
@position - before_chars > 0 and before.insert(0, '... ')
|
595
|
+
|
596
|
+
after = through( after_chars ).inspect
|
597
|
+
@position + after_chars + 1 < @data.length and after << ' ...'
|
598
|
+
|
599
|
+
location = "#@position / line #@line:#@column"
|
600
|
+
"#<#{self.class}: #{before} | #{after} @ #{location}>"
|
601
|
+
end
|
602
|
+
|
603
|
+
#
|
604
|
+
# return the string slice between position +start+ and +stop+
|
605
|
+
#
|
606
|
+
def substring(start, stop)
|
607
|
+
@data[start, stop - start + 1]
|
608
|
+
end
|
609
|
+
|
610
|
+
#
|
611
|
+
# identical to String#[]
|
612
|
+
#
|
613
|
+
def [](start, *args)
|
614
|
+
@data[start, *args]
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
618
|
+
|
619
|
+
=begin rdoc ANTLR3::FileStream
|
620
|
+
|
621
|
+
FileStream is a character stream that uses data stored in some external file. It
|
622
|
+
is nearly identical to StringStream and functions as use data located in a file
|
623
|
+
while automatically setting up the +source_name+ and +line+ parameters. It does
|
624
|
+
not actually use any buffered IO operations throughout the stream navigation
|
625
|
+
process. Instead, it reads the file data once when the stream is initialized.
|
626
|
+
|
627
|
+
=end
|
628
|
+
|
629
|
+
class FileStream < StringStream
|
630
|
+
|
631
|
+
#
|
632
|
+
# creates a new FileStream object using the given +file+ object.
|
633
|
+
# If +file+ is a path string, the file will be read and the contents
|
634
|
+
# will be used and the +name+ attribute will be set to the path.
|
635
|
+
# If +file+ is an IO-like object (that responds to :read),
|
636
|
+
# the content of the object will be used and the stream will
|
637
|
+
# attempt to set its +name+ object first trying the method #name
|
638
|
+
# on the object, then trying the method #path on the object.
|
639
|
+
#
|
640
|
+
# see StringStream.new for a list of additional options
|
641
|
+
# the constructer accepts
|
642
|
+
#
|
643
|
+
def initialize(file, options = {})
|
644
|
+
case file
|
645
|
+
when $stdin then
|
646
|
+
data = $stdin.read
|
647
|
+
@name = '(stdin)'
|
648
|
+
when ::File then
|
649
|
+
file = file.clone
|
650
|
+
file.reopen(file.path, 'r')
|
651
|
+
@name = file.path
|
652
|
+
data = file.read
|
653
|
+
file.close
|
654
|
+
else
|
655
|
+
if file.respond_to?(:read)
|
656
|
+
data = file.read
|
657
|
+
if file.respond_to?(:name) then @name = file.name
|
658
|
+
elsif file.respond_to?(:path) then @name = file.path
|
659
|
+
end
|
660
|
+
else
|
661
|
+
@name = file.to_s
|
662
|
+
if test(?f, @name) then data = File.read(@name)
|
663
|
+
else raise ArgumentError, "could not find an existing file at %p" % @name
|
664
|
+
end
|
665
|
+
end
|
666
|
+
end
|
667
|
+
super(data, options)
|
668
|
+
end
|
669
|
+
|
670
|
+
end
|
671
|
+
|
672
|
+
=begin rdoc ANTLR3::CommonTokenStream
|
673
|
+
|
674
|
+
CommonTokenStream serves as the primary token stream implementation for feeding
|
675
|
+
sequential token input into parsers.
|
676
|
+
|
677
|
+
Using some TokenSource (such as a lexer), the stream collects a token sequence,
|
678
|
+
setting the token's <tt>index</tt> attribute to indicate the token's position
|
679
|
+
within the stream. The streams may be tuned to some channel value; off-channel
|
680
|
+
tokens will be filtered out by the #peek, #look, and #consume methods.
|
681
|
+
|
682
|
+
=== Sample Usage
|
683
|
+
|
684
|
+
|
685
|
+
source_input = ANTLR3::StringStream.new("35 * 4 - 1")
|
686
|
+
lexer = Calculator::Lexer.new(source_input)
|
687
|
+
tokens = ANTLR3::CommonTokenStream.new(lexer)
|
688
|
+
|
689
|
+
# assume this grammar defines whitespace as tokens on channel HIDDEN
|
690
|
+
# and numbers and operations as tokens on channel DEFAULT
|
691
|
+
tokens.look # => 0 INT['35'] @ line 1 col 0 (0..1)
|
692
|
+
tokens.look(2) # => 2 MULT["*"] @ line 1 col 2 (3..3)
|
693
|
+
tokens.tokens(0, 2)
|
694
|
+
# => [0 INT["35"] @line 1 col 0 (0..1),
|
695
|
+
# 1 WS[" "] @line 1 col 2 (1..1),
|
696
|
+
# 2 MULT["*"] @ line 1 col 3 (3..3)]
|
697
|
+
# notice the #tokens method does not filter off-channel tokens
|
698
|
+
|
699
|
+
lexer.reset
|
700
|
+
hidden_tokens =
|
701
|
+
ANTLR3::CommonTokenStream.new(lexer, :channel => ANTLR3::HIDDEN)
|
702
|
+
hidden_tokens.look # => 1 WS[' '] @ line 1 col 2 (1..1)
|
703
|
+
|
704
|
+
=end
|
705
|
+
|
706
|
+
class CommonTokenStream
|
707
|
+
include TokenStream
|
708
|
+
include Enumerable
|
709
|
+
|
710
|
+
#
|
711
|
+
# constructs a new token stream using the +token_source+ provided. +token_source+ is
|
712
|
+
# usually a lexer, but can be any object that implements +next_token+ and includes
|
713
|
+
# ANTLR3::TokenSource.
|
714
|
+
#
|
715
|
+
# If a block is provided, each token harvested will be yielded and if the block
|
716
|
+
# returns a +nil+ or +false+ value, the token will not be added to the stream --
|
717
|
+
# it will be discarded.
|
718
|
+
#
|
719
|
+
# === Options
|
720
|
+
# [:channel] The channel value the stream should be tuned to initially
|
721
|
+
# [:source_name] The source name (file name) attribute of the stream
|
722
|
+
#
|
723
|
+
# === Example
|
724
|
+
#
|
725
|
+
# # create a new token stream that is tuned to channel :comment, and
|
726
|
+
# # discard all WHITE_SPACE tokens
|
727
|
+
# ANTLR3::CommonTokenStream.new(lexer, :channel => :comment) do |token|
|
728
|
+
# token.name != 'WHITE_SPACE'
|
729
|
+
# end
|
730
|
+
#
|
731
|
+
def initialize(token_source, options = {})
|
732
|
+
@token_source = token_source
|
733
|
+
@last_marker = nil
|
734
|
+
@channel = options.fetch(:channel, DEFAULT_CHANNEL)
|
735
|
+
|
736
|
+
@tokens =
|
737
|
+
block_given? ? @token_source.select { |token| yield(token, self) } :
|
738
|
+
@token_source.to_a
|
739
|
+
@tokens.each_with_index { |t, i| t.index = i }
|
740
|
+
@position =
|
741
|
+
if first_token = @tokens.find { |t| t.channel == @channel }
|
742
|
+
@tokens.index(first_token)
|
743
|
+
else @tokens.length
|
744
|
+
end
|
745
|
+
@source_name = options.fetch(:source_name) { @token_source.source_name rescue nil }
|
746
|
+
end
|
747
|
+
|
748
|
+
#
|
749
|
+
# resets the token stream and rebuilds it with a potentially new token source.
|
750
|
+
# If no +token_source+ value is provided, the stream will attempt to reset the
|
751
|
+
# current +token_source+ by calling +reset+ on the object. The stream will
|
752
|
+
# then clear the token buffer and attempt to harvest new tokens. Identical in
|
753
|
+
# behavior to CommonTokenStream.new, if a block is provided, tokens will be
|
754
|
+
# yielded and discarded if the block returns a +false+ or +nil+ value.
|
755
|
+
#
|
756
|
+
def rebuild(token_source = nil)
|
757
|
+
if token_source.nil?
|
758
|
+
@token_source.reset rescue nil
|
759
|
+
else @token_source = token_source
|
760
|
+
end
|
761
|
+
@tokens = block_given? ? @token_source.select { |token| yield(token) } :
|
762
|
+
@token_source.to_a
|
763
|
+
@tokens.each_with_index { |t, i| t.index = i }
|
764
|
+
@last_marker = nil
|
765
|
+
@position =
|
766
|
+
if first_token = @tokens.find { |t| t.channel == @channel }
|
767
|
+
@tokens.index(first_token)
|
768
|
+
else @tokens.length
|
769
|
+
end
|
770
|
+
return self
|
771
|
+
end
|
772
|
+
|
773
|
+
#
|
774
|
+
# tune the stream to a new channel value
|
775
|
+
#
|
776
|
+
def tune_to(channel)
|
777
|
+
@channel = channel
|
778
|
+
end
|
779
|
+
|
780
|
+
def token_class
|
781
|
+
@token_source.token_class
|
782
|
+
rescue NoMethodError
|
783
|
+
@position == -1 and fill_buffer
|
784
|
+
@tokens.empty? ? CommonToken : @tokens.first.class
|
785
|
+
end
|
786
|
+
|
787
|
+
alias index position
|
788
|
+
|
789
|
+
def size
|
790
|
+
@tokens.length
|
791
|
+
end
|
792
|
+
|
793
|
+
alias length size
|
794
|
+
|
795
|
+
###### State-Control ################################################
|
796
|
+
|
797
|
+
#
|
798
|
+
# rewind the stream to its initial state
|
799
|
+
#
|
800
|
+
def reset
|
801
|
+
@position = 0
|
802
|
+
@position += 1 while token = @tokens[@position] and
|
803
|
+
token.channel != @channel
|
804
|
+
@last_marker = nil
|
805
|
+
return self
|
806
|
+
end
|
807
|
+
|
808
|
+
#
|
809
|
+
# bookmark the current position of the input stream
|
810
|
+
#
|
811
|
+
def mark
|
812
|
+
@last_marker = @position
|
813
|
+
end
|
814
|
+
|
815
|
+
def release(marker = nil)
|
816
|
+
# do nothing
|
817
|
+
end
|
818
|
+
|
819
|
+
|
820
|
+
def rewind(marker = @last_marker, release = true)
|
821
|
+
seek(marker)
|
822
|
+
end
|
823
|
+
|
824
|
+
|
825
|
+
###### Stream Navigation ###########################################
|
826
|
+
|
827
|
+
#
|
828
|
+
# advance the stream one step to the next on-channel token
|
829
|
+
#
|
830
|
+
def consume
|
831
|
+
token = @tokens[@position] || EOF_TOKEN
|
832
|
+
if @position < @tokens.length
|
833
|
+
@position = future?(2) || @tokens.length
|
834
|
+
end
|
835
|
+
return(token)
|
836
|
+
end
|
837
|
+
|
838
|
+
#
|
839
|
+
# jump to the stream position specified by +index+
|
840
|
+
# note: seek does not check whether or not the
|
841
|
+
# token at the specified position is on-channel,
|
842
|
+
#
|
843
|
+
def seek(index)
|
844
|
+
@position = index.to_i.bound(0, @tokens.length)
|
845
|
+
return self
|
846
|
+
end
|
847
|
+
|
848
|
+
#
|
849
|
+
# return the type of the on-channel token at look-ahead distance +k+. <tt>k = 1</tt> represents
|
850
|
+
# the current token. +k+ greater than 1 represents upcoming on-channel tokens. A negative
|
851
|
+
# value of +k+ returns previous on-channel tokens consumed, where <tt>k = -1</tt> is the last
|
852
|
+
# on-channel token consumed. <tt>k = 0</tt> has undefined behavior and returns +nil+
|
853
|
+
#
|
854
|
+
def peek(k = 1)
|
855
|
+
tk = look(k) and return(tk.type)
|
856
|
+
end
|
857
|
+
|
858
|
+
#
|
859
|
+
# operates simillarly to #peek, but returns the full token object at look-ahead position +k+
|
860
|
+
#
|
861
|
+
def look(k = 1)
|
862
|
+
index = future?(k) or return nil
|
863
|
+
@tokens.fetch(index, EOF_TOKEN)
|
864
|
+
end
|
865
|
+
|
866
|
+
alias >> look
|
867
|
+
def << k
|
868
|
+
self >> -k
|
869
|
+
end
|
870
|
+
|
871
|
+
#
|
872
|
+
# returns the index of the on-channel token at look-ahead position +k+ or nil if no other
|
873
|
+
# on-channel tokens exist
|
874
|
+
#
|
875
|
+
def future?(k = 1)
|
876
|
+
@position == -1 and fill_buffer
|
877
|
+
|
878
|
+
case
|
879
|
+
when k == 0 then nil
|
880
|
+
when k < 0 then past?(-k)
|
881
|
+
when k == 1 then @position
|
882
|
+
else
|
883
|
+
# since the stream only yields on-channel
|
884
|
+
# tokens, the stream can't just go to the
|
885
|
+
# next position, but rather must skip
|
886
|
+
# over off-channel tokens
|
887
|
+
(k - 1).times.inject(@position) do |cursor, |
|
888
|
+
begin
|
889
|
+
tk = @tokens.at(cursor += 1) or return(cursor)
|
890
|
+
# ^- if tk is nil (i.e. i is outside array limits)
|
891
|
+
end until tk.channel == @channel
|
892
|
+
cursor
|
893
|
+
end
|
894
|
+
end
|
895
|
+
end
|
896
|
+
|
897
|
+
#
|
898
|
+
# returns the index of the on-channel token at look-behind position +k+ or nil if no other
|
899
|
+
# on-channel tokens exist before the current token
|
900
|
+
#
|
901
|
+
def past?(k = 1)
|
902
|
+
@position == -1 and fill_buffer
|
903
|
+
|
904
|
+
case
|
905
|
+
when k == 0 then nil
|
906
|
+
when @position - k < 0 then nil
|
907
|
+
else
|
908
|
+
|
909
|
+
k.times.inject(@position) do |cursor, |
|
910
|
+
begin
|
911
|
+
cursor <= 0 and return(nil)
|
912
|
+
tk = @tokens.at(cursor -= 1) or return(nil)
|
913
|
+
end until tk.channel == @channel
|
914
|
+
cursor
|
915
|
+
end
|
916
|
+
|
917
|
+
end
|
918
|
+
end
|
919
|
+
|
920
|
+
#
|
921
|
+
# yields each token in the stream (including off-channel tokens)
|
922
|
+
# If no block is provided, the method returns an Enumerator object.
|
923
|
+
# #each accepts the same arguments as #tokens
|
924
|
+
#
|
925
|
+
def each(*args)
|
926
|
+
block_given? or return enum_for(:each, *args)
|
927
|
+
tokens(*args).each { |token| yield(token) }
|
928
|
+
end
|
929
|
+
|
930
|
+
#
|
931
|
+
# returns a copy of the token buffer. If +start+ and +stop+ are provided, tokens
|
932
|
+
# returns a slice of the token buffer from <tt>start..stop</tt>. The parameters
|
933
|
+
# are converted to integers with their <tt>to_i</tt> methods, and thus tokens
|
934
|
+
# can be provided to specify start and stop. If a block is provided, tokens are
|
935
|
+
# yielded and filtered out of the return array if the block returns a +false+
|
936
|
+
# or +nil+ value.
|
937
|
+
#
|
938
|
+
def tokens(start = nil, stop = nil)
|
939
|
+
stop.nil? || stop >= @tokens.length and stop = @tokens.length - 1
|
940
|
+
start.nil? || stop < 0 and start = 0
|
941
|
+
tokens = @tokens[start..stop]
|
942
|
+
|
943
|
+
if block_given?
|
944
|
+
tokens.delete_if { |t| not yield(t) }
|
945
|
+
end
|
946
|
+
|
947
|
+
return( tokens )
|
948
|
+
end
|
949
|
+
|
950
|
+
|
951
|
+
def at(i)
|
952
|
+
@tokens.at i
|
953
|
+
end
|
954
|
+
|
955
|
+
#
|
956
|
+
# identical to Array#[], as applied to the stream's token buffer
|
957
|
+
#
|
958
|
+
def [](i, *args)
|
959
|
+
@tokens[i, *args]
|
960
|
+
end
|
961
|
+
|
962
|
+
###### Standard Conversion Methods ###############################
|
963
|
+
def inspect
|
964
|
+
string = "#<%p: @token_source=%p @ %p/%p" %
|
965
|
+
[self.class, @token_source.class, @position, @tokens.length]
|
966
|
+
tk = look(-1) and string << " #{tk.inspect} <--"
|
967
|
+
tk = look( 1) and string << " --> #{tk.inspect}"
|
968
|
+
string << '>'
|
969
|
+
end
|
970
|
+
|
971
|
+
#
|
972
|
+
# fetches the text content of all tokens between +start+ and +stop+ and
|
973
|
+
# joins the chunks into a single string
|
974
|
+
#
|
975
|
+
def extract_text(start = 0, stop = @tokens.length - 1)
|
976
|
+
start = start.to_i.at_least(0)
|
977
|
+
stop = stop.to_i.at_most(@tokens.length)
|
978
|
+
@tokens[start..stop].map! { |t| t.text }.join('')
|
979
|
+
end
|
980
|
+
|
981
|
+
alias to_s extract_text
|
982
|
+
|
983
|
+
end
|
984
|
+
|
985
|
+
end
|