filter_io 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/LICENSE +20 -0
- data/README.markdown +103 -0
- data/Rakefile +32 -0
- data/VERSION +1 -0
- data/lib/filter_io.rb +304 -0
- data/test/filter_io_test.rb +678 -0
- data/test/test_helper.rb +6 -0
- metadata +89 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Jason Weathered
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# `filter_io`
|
2
|
+
## Filter IO streams with a block. Ruby's FilterInputStream.
|
3
|
+
|
4
|
+
`filter_io` is analogous to Java's `FilterIOStream` in that it allows you to intercept and process data in an IO stream. This is particularly useful when used to clean up bad input data when using a CSV or XML parser.
|
5
|
+
|
6
|
+
`filter_io` provides a one-pass approach to filtering data which can be much faster and memory efficient than doing two passes (cleaning the source file into a buffer and then calling the original parser).
|
7
|
+
|
8
|
+
`filter_io` has been tested against Ruby 1.8.7 and Ruby 1.9.2.
|
9
|
+
|
10
|
+
### Installation
|
11
|
+
|
12
|
+
You can install from Gemcutter by running:
|
13
|
+
|
14
|
+
sudo gem install filter_io
|
15
|
+
|
16
|
+
### Example Usage
|
17
|
+
|
18
|
+
#### A Simple Example: ROT-13
|
19
|
+
|
20
|
+
io = FilterIO.new io do |data|
|
21
|
+
data.tr "A-Za-z", "N-ZA-Mn-za-m"
|
22
|
+
end
|
23
|
+
|
24
|
+
#### A Useful Example: Line Ending Normalisation
|
25
|
+
|
26
|
+
A common usage of `filter_io` is to normalise line endings before parsing CSV data:
|
27
|
+
|
28
|
+
# open source stream
|
29
|
+
File.open(filename) do |io|
|
30
|
+
|
31
|
+
# apply filter to stream
|
32
|
+
io = FilterIO.new(io) do |data, state|
|
33
|
+
# grab another chunk if the last character is a delimiter
|
34
|
+
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
35
|
+
# normalise line endings to LF
|
36
|
+
data.gsub /\r\n|\r|\n/, "\n"
|
37
|
+
end
|
38
|
+
|
39
|
+
# process resulting stream normally
|
40
|
+
FasterCSV.parse(io) do |row|
|
41
|
+
pp row
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
### Reference
|
47
|
+
|
48
|
+
Call `FilterIO.new` with the original IO stream, any options and the filtering block. The returned object pretends like a normal read-only non-seekable IO stream.
|
49
|
+
|
50
|
+
#### Block `state` parameter
|
51
|
+
|
52
|
+
An optional second parameter to the block is the `state` parameter which contains stream metadata which may be useful when processing the chuck. The methods currently available are:
|
53
|
+
|
54
|
+
* `bof?`: Returns true if this is the *first* chuck of the stream.
|
55
|
+
* `eof?`: Returns true if this is the *last* chunk of the stream.
|
56
|
+
|
57
|
+
#### `NeedMoreData`
|
58
|
+
|
59
|
+
If the filtering block needs more data to be able to return anything, you can raise a `FilterIO::NeedMoreData` exception and `filter_io` will read another block and pass the additional data to you. This can be repeated as necessary until enough data is retrieved.
|
60
|
+
|
61
|
+
For example usage of `NeedMoreData`, see the line ending normalisation example above.
|
62
|
+
|
63
|
+
#### Re-buffering Unprocessed Data
|
64
|
+
|
65
|
+
If your block is unable to process the whole chunk of data immediately, it can return both the processed chuck and the remainder to be processed later. This is done by returning a 2 element array: `[processed, unprocessed]`. If output is empty and unprocessed data is returned, `filter_io` will grab some more data and call the block again.
|
66
|
+
|
67
|
+
Here's an example which processes whole lines and prepends the line length to the beginning of each line.
|
68
|
+
|
69
|
+
io = FilterIO.new io do |data, state|
|
70
|
+
output = ''
|
71
|
+
# grab complete lines until we hit EOF
|
72
|
+
while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
|
73
|
+
output << "#{$1.size} #{$1}\n"
|
74
|
+
data = $'
|
75
|
+
end
|
76
|
+
# `output` contains the processed lines, `data` contains any left over partial line
|
77
|
+
[output, data]
|
78
|
+
end
|
79
|
+
|
80
|
+
#### Block Size
|
81
|
+
|
82
|
+
When either `readline`, `gets` or `read(nil)` is called, `filter_io` will process the input stream in 1,024 byte chucks. You can adjust this by passing a `:block_size` option to `new`.
|
83
|
+
|
84
|
+
#### Character Encodings
|
85
|
+
|
86
|
+
Ruby 1.9 has character encoding support can convert between UTF-8, ISO-8859-1, ASCII-8BIT, etc. This is triggered in `IO` by using `:external_encoding` and `:internal_encoding` when opening the stream.
|
87
|
+
`filter_io` will use the underlying stream's encoding settings when reading and filtering data. The processing block will be passed data in the internal encoding.
|
88
|
+
As per the core `IO` object, if `read` is called with a length (in bytes), the data will be returned in the external encoding.
|
89
|
+
In summary, everything should Just Work™
|
90
|
+
|
91
|
+
### Note on Patches/Pull Requests
|
92
|
+
|
93
|
+
* Fork the project.
|
94
|
+
* Make your feature addition or bug fix.
|
95
|
+
* Add tests for it. This is important so I don't break it in a
|
96
|
+
future version unintentionally.
|
97
|
+
* Commit, do not mess with rakefile, version, or history.
|
98
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
99
|
+
* Send me a pull request. Bonus points for topic branches.
|
100
|
+
|
101
|
+
### Copyright
|
102
|
+
|
103
|
+
Copyright (c) 2010 Jason Weathered. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the filter_io plugin.'
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.libs << 'lib'
|
11
|
+
t.libs << 'test'
|
12
|
+
t.pattern = 'test/**/*_test.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
task :test => :check_dependencies
|
17
|
+
|
18
|
+
begin
|
19
|
+
require 'jeweler'
|
20
|
+
Jeweler::Tasks.new do |gem|
|
21
|
+
gem.name = "filter_io"
|
22
|
+
gem.summary = "Filter IO streams with a block. Ruby's FilterInputStream."
|
23
|
+
gem.email = "jason@jasoncodes.com"
|
24
|
+
gem.homepage = "http://github.com/jasoncodes/filter_io"
|
25
|
+
gem.authors = ["Jason Weathered"]
|
26
|
+
gem.has_rdoc = false
|
27
|
+
gem.add_dependency 'activesupport'
|
28
|
+
end
|
29
|
+
Jeweler::GemcutterTasks.new
|
30
|
+
rescue LoadError
|
31
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
32
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/filter_io.rb
ADDED
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
class FilterIO
|
4
|
+
|
5
|
+
DEFAULT_BLOCK_SIZE = 1024
|
6
|
+
|
7
|
+
class NeedMoreData < Exception
|
8
|
+
end
|
9
|
+
|
10
|
+
class BlockState
|
11
|
+
attr_reader :bof, :eof
|
12
|
+
def initialize(bof, eof)
|
13
|
+
@bof = bof
|
14
|
+
@eof = eof
|
15
|
+
end
|
16
|
+
alias_method :bof?, :bof
|
17
|
+
alias_method :eof?, :eof
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(io, options = nil, &block)
|
21
|
+
@io = io
|
22
|
+
@options = options || {}
|
23
|
+
@block = block
|
24
|
+
@pos = 0
|
25
|
+
@buffer = empty_string
|
26
|
+
@buffer_raw = empty_string_raw
|
27
|
+
@options.assert_valid_keys :block_size
|
28
|
+
end
|
29
|
+
|
30
|
+
def pos
|
31
|
+
@pos
|
32
|
+
end
|
33
|
+
|
34
|
+
def bof?
|
35
|
+
@pos == 0
|
36
|
+
end
|
37
|
+
|
38
|
+
def eof?
|
39
|
+
@buffer.empty? && source_eof?
|
40
|
+
end
|
41
|
+
|
42
|
+
def source_eof?
|
43
|
+
@buffer_raw.empty? && @io.eof?
|
44
|
+
end
|
45
|
+
|
46
|
+
def readchar
|
47
|
+
raise EOFError, 'end of file reached' if eof?
|
48
|
+
if @io.respond_to? :external_encoding
|
49
|
+
data = empty_string_raw
|
50
|
+
begin
|
51
|
+
data << read(1).force_encoding(@io.internal_encoding || @io.external_encoding)
|
52
|
+
end until data.valid_encoding? or source_eof?
|
53
|
+
data.encode! @io.internal_encoding if @io.internal_encoding
|
54
|
+
data
|
55
|
+
else
|
56
|
+
read(1).ord
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def getc
|
61
|
+
readchar
|
62
|
+
rescue EOFError
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def read(length = nil)
|
67
|
+
|
68
|
+
raise ArgumentError if length && length < 0
|
69
|
+
return '' if length == 0
|
70
|
+
|
71
|
+
# fill the buffer up to the fill level (or whole input if length is nil)
|
72
|
+
while !source_eof? && (length.nil? || length > bytesize(@buffer))
|
73
|
+
buffer_data @options[:block_size] || length
|
74
|
+
end
|
75
|
+
|
76
|
+
# we now have all the data in the buffer that we need (or can get if EOF)
|
77
|
+
case
|
78
|
+
when bytesize(@buffer) > 0
|
79
|
+
# limit length to the buffer size if we were asked for it all or have ran out (EOF)
|
80
|
+
read_length = if length.nil? or length > bytesize(@buffer)
|
81
|
+
bytesize @buffer
|
82
|
+
else
|
83
|
+
length
|
84
|
+
end
|
85
|
+
data = pop_bytes read_length
|
86
|
+
@pos += bytesize(data)
|
87
|
+
if length.nil? && data.respond_to?(:encoding)
|
88
|
+
data.force_encoding @io.external_encoding
|
89
|
+
data.encode! @io.internal_encoding if @io.internal_encoding
|
90
|
+
end
|
91
|
+
data
|
92
|
+
when source_eof?
|
93
|
+
# end of file, nothing in the buffer to return
|
94
|
+
length.nil? ? empty_string : nil
|
95
|
+
else
|
96
|
+
raise IOError, 'Read error'
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
def rewind
|
102
|
+
seek 0, IO::SEEK_SET
|
103
|
+
end
|
104
|
+
|
105
|
+
def seek(offset, whence = IO::SEEK_SET)
|
106
|
+
|
107
|
+
new_pos = case whence
|
108
|
+
when IO::SEEK_SET
|
109
|
+
offset
|
110
|
+
when IO::SEEK_CUR
|
111
|
+
pos + offset
|
112
|
+
when IO::SEEK_END
|
113
|
+
raise Errno::EINVAL, 'SEEK_END not supported'
|
114
|
+
else
|
115
|
+
raise Errno::EINVAL
|
116
|
+
end
|
117
|
+
|
118
|
+
case new_pos
|
119
|
+
when pos
|
120
|
+
# noop
|
121
|
+
when 0
|
122
|
+
@io.rewind
|
123
|
+
@pos = 0
|
124
|
+
@buffer = empty_string
|
125
|
+
@buffer_raw = empty_string_raw
|
126
|
+
else
|
127
|
+
raise Errno::EINVAL, 'Random seek not supported'
|
128
|
+
end
|
129
|
+
|
130
|
+
0
|
131
|
+
end
|
132
|
+
|
133
|
+
def ungetc(char)
|
134
|
+
char = char.chr if char.respond_to? :chr
|
135
|
+
@pos -= bytesize(char)
|
136
|
+
@pos = 0 if @pos < 0
|
137
|
+
@buffer = char + @buffer
|
138
|
+
end
|
139
|
+
|
140
|
+
def gets(sep_string = $/)
|
141
|
+
|
142
|
+
return nil if eof?
|
143
|
+
return read if sep_string.nil?
|
144
|
+
|
145
|
+
paragraph_mode = sep_string == ''
|
146
|
+
sep_string = "\n\n" if paragraph_mode
|
147
|
+
sep_string = sep_string.to_s unless sep_string.is_a? String
|
148
|
+
|
149
|
+
if paragraph_mode
|
150
|
+
# consume any leading newlines
|
151
|
+
char = getc
|
152
|
+
char = getc while char && char.ord == 10
|
153
|
+
if char
|
154
|
+
ungetc char # push the first non-newline back onto the buffer
|
155
|
+
else
|
156
|
+
return nil # nothing left except newlines, bail out
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# fill the buffer until it contains the separator sequence
|
161
|
+
until source_eof? or @buffer.index(sep_string)
|
162
|
+
buffer_data
|
163
|
+
end
|
164
|
+
|
165
|
+
# calculate how much of the buffer to return
|
166
|
+
length = if idx = @buffer.index(sep_string)
|
167
|
+
# we found the separator, include it in our output
|
168
|
+
length = idx + sep_string.size
|
169
|
+
else
|
170
|
+
# no separator found (must be EOF). return everything we've got
|
171
|
+
length = @buffer.size
|
172
|
+
end
|
173
|
+
|
174
|
+
# increment the position and return the buffer fragment
|
175
|
+
data = @buffer.slice!(0, length)
|
176
|
+
@pos += bytesize(data)
|
177
|
+
|
178
|
+
data
|
179
|
+
end
|
180
|
+
|
181
|
+
def readline(sep_string = $/)
|
182
|
+
gets(sep_string) or raise EOFError, 'end of file reached'
|
183
|
+
end
|
184
|
+
|
185
|
+
def each_line(sep_string = $/)
|
186
|
+
unless block_given?
|
187
|
+
klass = defined?(Enumerator) ? Enumerator : Enumerable::Enumerator
|
188
|
+
return klass.new(self, :each_line, sep_string)
|
189
|
+
end
|
190
|
+
while line = gets(sep_string)
|
191
|
+
yield line
|
192
|
+
end
|
193
|
+
self
|
194
|
+
end
|
195
|
+
alias :each :each_line
|
196
|
+
alias :lines :each_line
|
197
|
+
|
198
|
+
def readlines(sep_string = $/)
|
199
|
+
lines = []
|
200
|
+
each_line(sep_string) { |line| lines << line }
|
201
|
+
lines
|
202
|
+
end
|
203
|
+
|
204
|
+
protected
|
205
|
+
|
206
|
+
def empty_string
|
207
|
+
str = String.new
|
208
|
+
if @io.respond_to?(:internal_encoding)
|
209
|
+
str.force_encoding @io.internal_encoding || @io.external_encoding
|
210
|
+
end
|
211
|
+
str
|
212
|
+
end
|
213
|
+
|
214
|
+
def empty_string_raw
|
215
|
+
str = String.new
|
216
|
+
if @io.respond_to?(:external_encoding)
|
217
|
+
str.force_encoding @io.external_encoding
|
218
|
+
end
|
219
|
+
str
|
220
|
+
end
|
221
|
+
|
222
|
+
def bytesize(str)
|
223
|
+
str.respond_to?(:bytesize) ? str.bytesize : str.size
|
224
|
+
end
|
225
|
+
|
226
|
+
def pop_bytes(count)
|
227
|
+
data = begin
|
228
|
+
if @io.respond_to?(:internal_encoding)
|
229
|
+
@buffer.force_encoding 'ASCII-8BIT'
|
230
|
+
end
|
231
|
+
@buffer.slice!(0, count)
|
232
|
+
ensure
|
233
|
+
if @io.respond_to?(:internal_encoding)
|
234
|
+
@buffer.force_encoding @io.internal_encoding || @io.external_encoding
|
235
|
+
end
|
236
|
+
end
|
237
|
+
data
|
238
|
+
end
|
239
|
+
|
240
|
+
def buffer_data(block_size = nil)
|
241
|
+
|
242
|
+
block_size ||= DEFAULT_BLOCK_SIZE
|
243
|
+
|
244
|
+
data = unless @buffer_raw.empty?
|
245
|
+
@buffer_raw.slice! 0, bytesize(@buffer_raw)
|
246
|
+
else
|
247
|
+
@io.read(block_size) or return
|
248
|
+
end
|
249
|
+
|
250
|
+
initial_data_size = bytesize(data)
|
251
|
+
begin
|
252
|
+
|
253
|
+
data = process_data data, initial_data_size
|
254
|
+
|
255
|
+
# if no processed data was returned and there is unprocessed data...
|
256
|
+
if data.is_a?(Array) && data.size == 2 && data[0].size == 0 && data[1].size > 0
|
257
|
+
# restore the unprocessed data into the temporary buffer
|
258
|
+
data = data[1]
|
259
|
+
# and add some more data to the buffer
|
260
|
+
raise NeedMoreData
|
261
|
+
end
|
262
|
+
|
263
|
+
rescue NeedMoreData => e
|
264
|
+
raise EOFError, 'end of file reached' if eof?
|
265
|
+
data << @io.read(block_size)
|
266
|
+
retry
|
267
|
+
end
|
268
|
+
|
269
|
+
data = [data] unless data.is_a? Array
|
270
|
+
raise 'Block must have 1 or 2 values' unless data.size <= 2
|
271
|
+
@buffer << data[0]
|
272
|
+
if data[1]
|
273
|
+
if @io.respond_to?(:internal_encoding) && @io.internal_encoding
|
274
|
+
data[1].convert! @io.external_encoding
|
275
|
+
end
|
276
|
+
@buffer_raw = data[1]
|
277
|
+
end
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
def process_data(data, initial_data_size)
|
282
|
+
|
283
|
+
if data.respond_to? :encoding
|
284
|
+
org_encoding = data.encoding
|
285
|
+
data.force_encoding @io.external_encoding
|
286
|
+
additional_data_size = bytesize(data) - initial_data_size
|
287
|
+
unless data.valid_encoding? or source_eof? or additional_data_size >= 4
|
288
|
+
data.force_encoding org_encoding
|
289
|
+
raise NeedMoreData
|
290
|
+
end
|
291
|
+
data.encode! @io.internal_encoding if @io.internal_encoding
|
292
|
+
end
|
293
|
+
|
294
|
+
if data && @block
|
295
|
+
state = BlockState.new @io.pos == data.length, source_eof?
|
296
|
+
args = [data, state]
|
297
|
+
args = args.first(@block.arity > 0 ? @block.arity : 1)
|
298
|
+
data = @block.call(*args)
|
299
|
+
end
|
300
|
+
|
301
|
+
data
|
302
|
+
end
|
303
|
+
|
304
|
+
end
|
@@ -0,0 +1,678 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'stringio'
|
5
|
+
require 'tempfile'
|
6
|
+
|
7
|
+
class FilterIOTest < ActiveSupport::TestCase
|
8
|
+
|
9
|
+
def assert_equal_reference_io(input)
|
10
|
+
|
11
|
+
expected_io = StringIO.new(input)
|
12
|
+
actual_io = FilterIO.new(StringIO.new(input))
|
13
|
+
|
14
|
+
results = [expected_io, actual_io].map do |io|
|
15
|
+
results = []
|
16
|
+
errors = []
|
17
|
+
positions = []
|
18
|
+
|
19
|
+
# call the block repeatedly until we get to EOF
|
20
|
+
# and once more at the end to check what happens at EOF
|
21
|
+
one_more_time = [true]
|
22
|
+
while !io.eof? || one_more_time.pop
|
23
|
+
begin
|
24
|
+
results << yield(io)
|
25
|
+
errors << nil
|
26
|
+
rescue Exception => e
|
27
|
+
results << nil
|
28
|
+
errors << [e.class, e.message]
|
29
|
+
end
|
30
|
+
positions << io.pos
|
31
|
+
raise 'Too many iterations' if results.size > 100
|
32
|
+
end
|
33
|
+
|
34
|
+
[results, errors, positions]
|
35
|
+
end
|
36
|
+
|
37
|
+
# compare the filtered output against the reference
|
38
|
+
results[0].zip(results[1]).each do |expected, actual|
|
39
|
+
assert_equal expected, actual
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
test "empty source" do
|
45
|
+
io = FilterIO.new(StringIO.new(''))
|
46
|
+
assert_true io.bof?
|
47
|
+
io = FilterIO.new(StringIO.new(''))
|
48
|
+
assert_true io.eof?
|
49
|
+
io = FilterIO.new(StringIO.new(''))
|
50
|
+
assert_raise EOFError do
|
51
|
+
io.readchar
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
test "simple eof" do
|
56
|
+
io = FilterIO.new(StringIO.new('x'))
|
57
|
+
assert_false io.eof?
|
58
|
+
assert_equal 'x', io.readchar.chr
|
59
|
+
assert_true io.eof?
|
60
|
+
assert_equal '', io.read
|
61
|
+
assert_equal nil, io.read(8)
|
62
|
+
end
|
63
|
+
|
64
|
+
test "simple bof" do
|
65
|
+
io = FilterIO.new(StringIO.new('x'))
|
66
|
+
assert_true io.bof?
|
67
|
+
assert_equal 'x', io.readchar.chr
|
68
|
+
assert_false io.bof?
|
69
|
+
end
|
70
|
+
|
71
|
+
test "unicode readchar" do
|
72
|
+
assert_equal_reference_io('Résume') { |io| io.readchar }
|
73
|
+
end
|
74
|
+
|
75
|
+
test "unicode read" do
|
76
|
+
(1..3).each do |read_size|
|
77
|
+
assert_equal_reference_io('Résume') { |io| io.read read_size }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
test "unicode read all" do
|
82
|
+
assert_equal_reference_io('Résume') { |io| io.read }
|
83
|
+
end
|
84
|
+
|
85
|
+
test "unicode gets" do
|
86
|
+
assert_equal_reference_io("über\nrésumé") { |io| io.gets }
|
87
|
+
end
|
88
|
+
|
89
|
+
test "unicode in block" do
|
90
|
+
input = 'Résumé Test'
|
91
|
+
expected = 'résumé test'
|
92
|
+
[2, nil].each do |block_size|
|
93
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) { |data| data.downcase }
|
94
|
+
actual = io.read
|
95
|
+
assert_equal expected, actual
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
test "should not buffer forever on bad encoding" do
|
100
|
+
input = "123\xc3\xc34567890"
|
101
|
+
block_count = 0
|
102
|
+
io = FilterIO.new(StringIO.new(input), :block_size => 2) do |data|
|
103
|
+
block_count += 1
|
104
|
+
assert_operator data.size, :<=, 6
|
105
|
+
data
|
106
|
+
end
|
107
|
+
actual = io.read
|
108
|
+
if input.respond_to? :force_encoding
|
109
|
+
input.force_encoding 'ASCII-8BIT'
|
110
|
+
actual.force_encoding 'ASCII-8BIT'
|
111
|
+
end
|
112
|
+
assert_equal input, actual
|
113
|
+
assert_operator block_count, :>=, 3
|
114
|
+
end
|
115
|
+
|
116
|
+
if IO.method_defined? :external_encoding
|
117
|
+
|
118
|
+
def with_iso8859_1_test_file(internal_encoding)
|
119
|
+
Tempfile.open 'filter_io' do |tempfile|
|
120
|
+
File.open(tempfile.path, 'wb') do |io|
|
121
|
+
io.write "\xFCber\nR\xE9sum\xE9"
|
122
|
+
end
|
123
|
+
File.open(tempfile.path, :external_encoding => 'ISO-8859-1', :internal_encoding => internal_encoding) do |io|
|
124
|
+
yield io
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
test "ISO-8859-1 sanity check to UTF-8" do
|
130
|
+
with_iso8859_1_test_file 'UTF-8' do |io_raw|
|
131
|
+
assert_equal 'ü', io_raw.readchar
|
132
|
+
assert_equal "ber\n", io_raw.gets
|
133
|
+
str = io_raw.gets
|
134
|
+
assert_equal 'résumé', str.downcase
|
135
|
+
assert_equal 'UTF-8', str.encoding.name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
test "ISO-8859-1 sanity check raw" do
|
140
|
+
with_iso8859_1_test_file nil do |io_raw|
|
141
|
+
assert_equal 'ü'.encode('ISO-8859-1'), io_raw.readchar
|
142
|
+
assert_equal "ber\n", io_raw.gets
|
143
|
+
str = io_raw.gets
|
144
|
+
assert_equal 'résumé'.encode('ISO-8859-1'), str.downcase
|
145
|
+
assert_equal 'ISO-8859-1', str.encoding.name
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
test "iso-8859-1 readchar to UTF-8" do
|
150
|
+
with_iso8859_1_test_file 'UTF-8' do |io_raw|
|
151
|
+
io = FilterIO.new(io_raw)
|
152
|
+
"über\n".chars.each do |expected|
|
153
|
+
actual = io.readchar
|
154
|
+
assert_equal expected, actual
|
155
|
+
assert_equal 'UTF-8', actual.encoding.name
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
test "iso-8859-1 readchar raw" do
|
161
|
+
with_iso8859_1_test_file nil do |io_raw|
|
162
|
+
io = FilterIO.new(io_raw)
|
163
|
+
"über\n".encode('ISO-8859-1').chars.each do |expected|
|
164
|
+
actual = io.readchar
|
165
|
+
assert_equal expected, actual
|
166
|
+
assert_equal 'ISO-8859-1', actual.encoding.name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
test "iso-8859-1 read to UTF-8" do
|
172
|
+
with_iso8859_1_test_file 'UTF-8' do |io_raw|
|
173
|
+
io = FilterIO.new(io_raw)
|
174
|
+
assert_equal 'ü'.force_encoding('ASCII-8BIT'), io.read(2)
|
175
|
+
assert_equal 'ASCII-8BIT', io.read(2).encoding.name
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
test "iso-8859-1 read raw" do
|
180
|
+
with_iso8859_1_test_file nil do |io_raw|
|
181
|
+
io = FilterIO.new(io_raw)
|
182
|
+
assert_equal 'ü'.encode('ISO-8859-1').force_encoding('ASCII-8BIT'), io.read(1)
|
183
|
+
assert_equal 'ASCII-8BIT', io.read(2).encoding.name
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
test "iso-8859-1 lines to UTF-8" do
|
188
|
+
with_iso8859_1_test_file 'UTF-8' do |io_raw|
|
189
|
+
io = FilterIO.new(io_raw)
|
190
|
+
expected = ["über\n", 'Résumé']
|
191
|
+
actual = io.lines.to_a
|
192
|
+
assert_equal expected, actual
|
193
|
+
assert_equal 'UTF-8', actual[0].encoding.name
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
test "iso-8859-1 lines raw" do
|
198
|
+
with_iso8859_1_test_file nil do |io_raw|
|
199
|
+
io = FilterIO.new(io_raw)
|
200
|
+
expected = ["über\n", 'Résumé'].map { |str| str.encode('ISO-8859-1') }
|
201
|
+
actual = io.lines.to_a
|
202
|
+
assert_equal expected, actual
|
203
|
+
assert_equal 'ISO-8859-1', actual[0].encoding.name
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
test "iso-8859-1 block to UTF-8" do
|
208
|
+
[1, 2, nil].each do |block_size|
|
209
|
+
expected = "über\nrésumé"
|
210
|
+
with_iso8859_1_test_file 'UTF-8' do |io_raw|
|
211
|
+
io = FilterIO.new(io_raw, :block_size => block_size) do |data, state|
|
212
|
+
assert_equal 'ü', data[0] if state.bof?
|
213
|
+
assert_equal 'UTF-8', data.encoding.name
|
214
|
+
data.downcase
|
215
|
+
end
|
216
|
+
assert_equal 'ü', io.readchar
|
217
|
+
assert_equal 'UTF-8', io.gets.encoding.name
|
218
|
+
assert_equal 'rés'.force_encoding('ASCII-8BIT'), io.read(4)
|
219
|
+
str = io.gets
|
220
|
+
assert_equal 'umé', str
|
221
|
+
assert_equal 'UTF-8', str.encoding.name
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
test "iso-8859-1 block raw" do
|
227
|
+
[1, 2, nil].each do |block_size|
|
228
|
+
expected = "über\nrésumé".encode('ISO-8859-1')
|
229
|
+
with_iso8859_1_test_file 'ISO-8859-1' do |io_raw|
|
230
|
+
io = FilterIO.new(io_raw, :block_size => block_size) do |data, state|
|
231
|
+
assert_equal 'ü'.encode('ISO-8859-1'), data[0] if state.bof?
|
232
|
+
assert_equal 'ISO-8859-1', data.encoding.name
|
233
|
+
data.downcase
|
234
|
+
end
|
235
|
+
assert_equal 'ü'.encode('ISO-8859-1'), io.readchar
|
236
|
+
assert_equal 'ISO-8859-1', io.gets.encoding.name
|
237
|
+
assert_equal 'rés'.encode('ISO-8859-1').force_encoding('ASCII-8BIT'), io.read(3)
|
238
|
+
str = io.gets
|
239
|
+
assert_equal 'umé'.encode('ISO-8859-1'), str
|
240
|
+
assert_equal 'ISO-8859-1', str.encoding.name
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
247
|
+
test "read" do
|
248
|
+
input = 'Lorem ipsum dolor sit amet, consectetur adipisicing elit'
|
249
|
+
io_reference = StringIO.new(input)
|
250
|
+
io = FilterIO.new(StringIO.new(input))
|
251
|
+
[10,5,4,8,7,nil,nil].each do |read_len|
|
252
|
+
assert_equal io_reference.read(read_len), io.read(read_len)
|
253
|
+
assert_equal io_reference.pos, io.pos
|
254
|
+
if read_len
|
255
|
+
assert_equal io_reference.readchar, io.readchar
|
256
|
+
else
|
257
|
+
assert_raise(EOFError) { io_reference.readchar }
|
258
|
+
assert_raise(EOFError) { io.readchar }
|
259
|
+
end
|
260
|
+
assert_equal io_reference.pos, io.pos
|
261
|
+
assert_equal io_reference.eof?, io.eof?
|
262
|
+
end
|
263
|
+
assert_equal io_reference.read, io.read
|
264
|
+
assert_equal io_reference.read(4), io.read(4)
|
265
|
+
assert_true io_reference.eof?
|
266
|
+
assert_true io.eof?
|
267
|
+
end
|
268
|
+
|
269
|
+
test "read zero before eof" do
|
270
|
+
io = FilterIO.new(StringIO.new('foo'))
|
271
|
+
assert_equal '', io.read(0)
|
272
|
+
assert_equal 0, io.pos
|
273
|
+
assert_false io.eof?
|
274
|
+
end
|
275
|
+
|
276
|
+
test "read zero at eof" do
|
277
|
+
io = FilterIO.new(StringIO.new(''))
|
278
|
+
assert_equal '', io.read(0)
|
279
|
+
assert_equal 0, io.pos
|
280
|
+
assert_true io.eof?
|
281
|
+
end
|
282
|
+
|
283
|
+
test "read negative" do
|
284
|
+
io = FilterIO.new(StringIO.new('foo'))
|
285
|
+
assert_equal 'fo', io.read(2)
|
286
|
+
assert_raise ArgumentError do
|
287
|
+
io.read(-1)
|
288
|
+
end
|
289
|
+
assert_equal 2, io.pos
|
290
|
+
end
|
291
|
+
|
292
|
+
test "simple block" do
|
293
|
+
input = 'foo bar'
|
294
|
+
expected = 'FOO BAR'
|
295
|
+
io = FilterIO.new(StringIO.new(input)) do |data|
|
296
|
+
data.upcase
|
297
|
+
end
|
298
|
+
assert_equal expected, io.read
|
299
|
+
end
|
300
|
+
|
301
|
+
test "block bof and eof" do
|
302
|
+
input = "Test String"
|
303
|
+
expected = ">>>*Test** Str**ing*<<<"
|
304
|
+
io = FilterIO.new(StringIO.new(input), :block_size => 4) do |data, state|
|
305
|
+
data = "*#{data}*"
|
306
|
+
data = ">>>#{data}" if state.bof?
|
307
|
+
data = "#{data}<<<" if state.eof?
|
308
|
+
data
|
309
|
+
end
|
310
|
+
assert_equal expected, io.read
|
311
|
+
end
|
312
|
+
|
313
|
+
test "Symbol#to_proc" do
|
314
|
+
input = 'foo bar'
|
315
|
+
expected = 'FOO BAR'
|
316
|
+
io = FilterIO.new StringIO.new(input), &:upcase
|
317
|
+
assert_equal expected, io.read
|
318
|
+
end
|
319
|
+
|
320
|
+
test "block size" do
|
321
|
+
[1,4,7,9,13,30].each do |block_size|
|
322
|
+
input = ('A'..'Z').to_a.join
|
323
|
+
expected = input.chars.enum_for(:each_slice, block_size).to_a.map(&:join).map { |x| "[#{x}]" }.join
|
324
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) do |data|
|
325
|
+
"[#{data}]"
|
326
|
+
end
|
327
|
+
assert_equal expected, io.read
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
test "block size different to read size" do
|
332
|
+
(1..5).each do |block_size|
|
333
|
+
input_str = ('A'..'Z').to_a.join
|
334
|
+
expected_str = input_str.chars.enum_for(:each_slice, block_size).map { |x| "[#{x.join}]" }.join
|
335
|
+
(1..5).each do |read_size|
|
336
|
+
|
337
|
+
expected = StringIO.new(expected_str)
|
338
|
+
actual = FilterIO.new(StringIO.new(input_str), :block_size => block_size) do |data|
|
339
|
+
"[#{data}]"
|
340
|
+
end
|
341
|
+
|
342
|
+
until expected.eof?
|
343
|
+
assert_equal expected.read(read_size), actual.read(read_size)
|
344
|
+
assert_equal expected.pos, actual.pos
|
345
|
+
end
|
346
|
+
assert_equal expected.eof?, actual.eof?
|
347
|
+
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
test "rewind pass through" do
|
353
|
+
io = FilterIO.new(StringIO.new('foo bar baz'))
|
354
|
+
assert_equal 'foo b', io.read(5)
|
355
|
+
assert_equal 'ar b', io.read(4)
|
356
|
+
io.rewind
|
357
|
+
assert_equal 'foo', io.read(3)
|
358
|
+
assert_equal ' ', io.readchar.chr
|
359
|
+
io.rewind
|
360
|
+
assert_equal 'f', io.readchar.chr
|
361
|
+
assert_equal 'oo', io.read(2)
|
362
|
+
end
|
363
|
+
|
364
|
+
test "rewind resets buffer" do
|
365
|
+
str = 'foobar'
|
366
|
+
io = FilterIO.new(StringIO.new(str))
|
367
|
+
assert_equal 'foo', io.read(3)
|
368
|
+
str.replace 'FooBar'
|
369
|
+
assert_equal 'Bar', io.read(3)
|
370
|
+
io.rewind
|
371
|
+
assert_equal 'Foo', io.read(3)
|
372
|
+
end
|
373
|
+
|
374
|
+
test "rewind with block" do
|
375
|
+
input = 'abcdefghij'
|
376
|
+
expected = input[1..-1]
|
377
|
+
io = FilterIO.new(StringIO.new(input), :block_size => 4) do |data, state|
|
378
|
+
data = data[1..-1] if state.bof?
|
379
|
+
data
|
380
|
+
end
|
381
|
+
assert_equal 'bc', io.read(2)
|
382
|
+
assert_equal 'defg', io.read(4)
|
383
|
+
io.rewind
|
384
|
+
assert_equal 'bc', io.read(2)
|
385
|
+
assert_equal 'defg', io.read(4)
|
386
|
+
end
|
387
|
+
|
388
|
+
test "ungetc" do
|
389
|
+
input = 'foobar'
|
390
|
+
io = FilterIO.new(StringIO.new(input))
|
391
|
+
assert_equal 'foo', io.read(3)
|
392
|
+
io.ungetc 'x'
|
393
|
+
io.ungetc 'y'[0].ord
|
394
|
+
assert_equal 'yxb', io.read(3)
|
395
|
+
(1..5).each do |i|
|
396
|
+
io.ungetc i.to_s
|
397
|
+
end
|
398
|
+
assert_equal '54321ar', io.read
|
399
|
+
assert_equal 'foobar', input
|
400
|
+
end
|
401
|
+
|
402
|
+
test "need more data" do
|
403
|
+
input = '1ab123456cde78f9ghij0'
|
404
|
+
expected = input.gsub /\d+/, '[\0]'
|
405
|
+
(1..5).each do |block_size|
|
406
|
+
expected_size = 0
|
407
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) do |data, state|
|
408
|
+
expected_size += block_size
|
409
|
+
raise FilterIO::NeedMoreData if data =~ /\d\z/ && !state.eof?
|
410
|
+
assert_equal expected_size, data.size unless state.eof?
|
411
|
+
expected_size = 0
|
412
|
+
data.gsub /\d+/, '[\0]'
|
413
|
+
end
|
414
|
+
assert_equal expected, io.read
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
test "line ending normalisation" do
|
419
|
+
input = "This\r\nis\r\ra\n\ntest\n\r\n\nstring\r\r\n.\n"
|
420
|
+
expected = "This\nis\n\na\n\ntest\n\n\nstring\n\n.\n"
|
421
|
+
(1..5).each do |block_size|
|
422
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) do |data, state|
|
423
|
+
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
424
|
+
data.gsub /\r\n|\r|\n/, "\n"
|
425
|
+
end
|
426
|
+
assert_equal expected, io.read
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
test "dropping characters" do
|
431
|
+
input = "ab1cde23f1g4hijklmno567pqr8stu9vw0xyz"
|
432
|
+
expected = input.gsub /\d+/, ''
|
433
|
+
(1..5).each do |block_size|
|
434
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) do |data|
|
435
|
+
data.gsub /\d+/, ''
|
436
|
+
end
|
437
|
+
assert_equal 0, io.pos
|
438
|
+
assert_equal expected, io.read
|
439
|
+
assert_equal expected.size, io.pos
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
test "getc" do
|
444
|
+
assert_equal_reference_io('foo') { |io| io.getc }
|
445
|
+
end
|
446
|
+
|
447
|
+
test "gets default" do
|
448
|
+
[
|
449
|
+
"",
|
450
|
+
"x",
|
451
|
+
"foo bar",
|
452
|
+
"foo\nbar",
|
453
|
+
"foo\nbar\nbaz\n"
|
454
|
+
].each do |input|
|
455
|
+
assert_equal_reference_io(input) { |io| io.gets }
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
test "gets all" do
|
460
|
+
[
|
461
|
+
"",
|
462
|
+
"x",
|
463
|
+
"foo bar",
|
464
|
+
"foo\nbar",
|
465
|
+
"foo\nbar\nbaz\n"
|
466
|
+
].each do |input|
|
467
|
+
assert_equal_reference_io(input) { |io| io.gets(nil) }
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
test "gets separator" do
|
472
|
+
[
|
473
|
+
"",
|
474
|
+
"x",
|
475
|
+
"foo\nbar\rbaz\n",
|
476
|
+
"abc\rdef\rghi\r",
|
477
|
+
"abcxyz",
|
478
|
+
].each do |input|
|
479
|
+
["\r", "x"].each do |sep_string|
|
480
|
+
assert_equal_reference_io(input) { |io| io.gets(sep_string) }
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
test "gets 2 char separator" do
|
486
|
+
["o", "oo"].each do |sep_string|
|
487
|
+
assert_equal_reference_io("foobarhelloworld") { |io| io.gets(sep_string) }
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
test "gets paragraph" do
|
492
|
+
{
|
493
|
+
"" => [],
|
494
|
+
"x" => ['x'],
|
495
|
+
"foo bar" => ["foo bar"],
|
496
|
+
"foo bar\n" => ["foo bar\n"],
|
497
|
+
"foo bar\n\n" => ["foo bar\n\n"],
|
498
|
+
"foo bar\n\n\n" => ["foo bar\n\n"],
|
499
|
+
"foo bar\nbaz" => ["foo bar\nbaz"],
|
500
|
+
"foo bar\n\nbaz" => ["foo bar\n\n", "baz"],
|
501
|
+
"foo bar\n\n\nbaz" => ["foo bar\n\n", "baz"],
|
502
|
+
"foo bar\n\nbaz\n" => ["foo bar\n\n", "baz\n"],
|
503
|
+
"foo bar\n\nbaz\n\n" => ["foo bar\n\n", "baz\n\n"],
|
504
|
+
"foo bar\n\nbaz\n\n\n" => ["foo bar\n\n", "baz\n\n"],
|
505
|
+
"\n\n\nfoo bar\n\nbaz\n\n\nabc\ndef" => ["foo bar\n\n", "baz\n\n", "abc\ndef"],
|
506
|
+
}.each do |input, expected|
|
507
|
+
io = FilterIO.new(StringIO.new(input))
|
508
|
+
actual = []
|
509
|
+
while para = io.gets('')
|
510
|
+
actual << para
|
511
|
+
end
|
512
|
+
assert_equal expected, actual
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
test "readline" do
|
517
|
+
[
|
518
|
+
"foo\nbar\n",
|
519
|
+
"foo\nbar\nbaz"
|
520
|
+
].each do |input|
|
521
|
+
assert_equal_reference_io(input) { |io| io.readline }
|
522
|
+
assert_equal_reference_io(input) { |io| io.readline("o") }
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
test "readlines" do
|
527
|
+
[
|
528
|
+
"foo\nbar\n",
|
529
|
+
"foo\nbar\nbaz"
|
530
|
+
].each do |input|
|
531
|
+
assert_equal_reference_io(input) { |io| io.readlines }
|
532
|
+
assert_equal_reference_io(input) { |io| io.readlines("o") }
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
test "lines with block" do
|
537
|
+
io = FilterIO.new(StringIO.new("foo\nbar\nbaz"))
|
538
|
+
expected = [ ["foo\n", "bar\n"], ["baz", nil] ]
|
539
|
+
actual = []
|
540
|
+
retval = io.lines do |line|
|
541
|
+
actual << [line, io.gets]
|
542
|
+
end
|
543
|
+
assert_equal io, retval
|
544
|
+
assert_equal expected, actual
|
545
|
+
end
|
546
|
+
|
547
|
+
test "lines enumerator" do
|
548
|
+
io = FilterIO.new(StringIO.new("foo\nbar\nbaz"))
|
549
|
+
e = io.lines
|
550
|
+
expected = [ ["foo\n", "bar\n"], ["baz", nil] ]
|
551
|
+
actual = e.map { |line| [line, io.gets] }
|
552
|
+
assert_equal expected, actual
|
553
|
+
end
|
554
|
+
|
555
|
+
test "seek set" do
|
556
|
+
|
557
|
+
io = FilterIO.new(StringIO.new("abcdef"))
|
558
|
+
|
559
|
+
# beginning
|
560
|
+
assert_equal 'a', io.readchar.chr
|
561
|
+
assert_equal 1, io.pos
|
562
|
+
io.seek 0, IO::SEEK_SET
|
563
|
+
assert_equal 'a', io.readchar.chr
|
564
|
+
assert_equal 1, io.pos
|
565
|
+
|
566
|
+
# same position
|
567
|
+
io.seek 1, IO::SEEK_SET
|
568
|
+
assert_equal 'b', io.readchar.chr
|
569
|
+
assert_equal 2, io.pos
|
570
|
+
|
571
|
+
# backwards fail
|
572
|
+
assert_raise Errno::EINVAL do
|
573
|
+
io.seek 1, IO::SEEK_SET
|
574
|
+
end
|
575
|
+
assert_equal 'c', io.readchar.chr
|
576
|
+
assert_equal 3, io.pos
|
577
|
+
|
578
|
+
end
|
579
|
+
|
580
|
+
test "seek current" do
|
581
|
+
|
582
|
+
io = FilterIO.new(StringIO.new("abcdef"))
|
583
|
+
|
584
|
+
# same pos
|
585
|
+
assert_equal 'ab', io.read(2)
|
586
|
+
assert_equal 2, io.pos
|
587
|
+
io.seek 0, IO::SEEK_CUR
|
588
|
+
assert_equal 2, io.pos
|
589
|
+
|
590
|
+
# backwards fail
|
591
|
+
assert_equal 'c', io.read(1)
|
592
|
+
assert_equal 3, io.pos
|
593
|
+
assert_raise Errno::EINVAL do
|
594
|
+
io.seek -1, IO::SEEK_CUR
|
595
|
+
end
|
596
|
+
assert_equal 3, io.pos
|
597
|
+
|
598
|
+
# forwards fail
|
599
|
+
assert_equal 3, io.pos
|
600
|
+
assert_raise Errno::EINVAL do
|
601
|
+
io.seek 2, IO::SEEK_CUR
|
602
|
+
end
|
603
|
+
assert_equal 3, io.pos
|
604
|
+
|
605
|
+
# beginning
|
606
|
+
io.seek -io.pos, IO::SEEK_CUR
|
607
|
+
assert_equal 0, io.pos
|
608
|
+
|
609
|
+
end
|
610
|
+
|
611
|
+
test "seek end" do
|
612
|
+
io = FilterIO.new(StringIO.new("abcdef"))
|
613
|
+
assert_raise Errno::EINVAL do
|
614
|
+
io.seek 0, IO::SEEK_END
|
615
|
+
end
|
616
|
+
assert_raise Errno::EINVAL do
|
617
|
+
io.seek 6, IO::SEEK_END
|
618
|
+
end
|
619
|
+
assert_raise Errno::EINVAL do
|
620
|
+
io.seek -6, IO::SEEK_END
|
621
|
+
end
|
622
|
+
end
|
623
|
+
|
624
|
+
test "need more data at eof" do
|
625
|
+
input = "foo"
|
626
|
+
io = FilterIO.new(StringIO.new(input), :block_size => 2) do |data|
|
627
|
+
raise FilterIO::NeedMoreData
|
628
|
+
end
|
629
|
+
assert_raise EOFError do
|
630
|
+
io.readline
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
test "unget via block" do
|
635
|
+
# get consecutive unique characters from a feed
|
636
|
+
# this is similar to uniq(1) and STL's unique_copy
|
637
|
+
input = "122234435"
|
638
|
+
expected = "123435"
|
639
|
+
(1..5).each do |block_size|
|
640
|
+
io = FilterIO.new(StringIO.new(input), :block_size => block_size) do |data, state|
|
641
|
+
# grab all of the same character
|
642
|
+
data =~ /\A(.)\1*(?!\1)/ or raise 'No data'
|
643
|
+
# if there was nothing after it and we aren't at EOF...
|
644
|
+
# ...grab more data to make sure we're at the end
|
645
|
+
raise FilterIO::NeedMoreData if $'.empty? && !state.eof?
|
646
|
+
# return the matched character as data and re-buffer the rest
|
647
|
+
[$&[0], $']
|
648
|
+
end
|
649
|
+
assert_equal expected, io.read
|
650
|
+
end
|
651
|
+
end
|
652
|
+
|
653
|
+
test "get more data via unget" do
|
654
|
+
|
655
|
+
input = "foo\ntest\n\n12345\n678"
|
656
|
+
expected = input.gsub(/^.*$/) { |x| "#{$&.size} #{$&}" }
|
657
|
+
expected += "\n" unless expected =~ /\n\z/
|
658
|
+
|
659
|
+
block_count = 0
|
660
|
+
io = FilterIO.new StringIO.new(input), :block_size => 2 do |data, state|
|
661
|
+
block_count += 1
|
662
|
+
raise 'Too many retries' if block_count > 100
|
663
|
+
raise "Expected less data: #{data.inspect}" if data.size > 6
|
664
|
+
output = ''
|
665
|
+
while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
|
666
|
+
output << "#{$1.size} #{$1}\n"
|
667
|
+
data = $'
|
668
|
+
end
|
669
|
+
[output, data]
|
670
|
+
end
|
671
|
+
actual = io.read
|
672
|
+
|
673
|
+
assert_equal expected, actual
|
674
|
+
assert_operator block_count, :>=, 10
|
675
|
+
|
676
|
+
end
|
677
|
+
|
678
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: filter_io
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Jason Weathered
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-06-14 00:00:00 +10:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: activesupport
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description:
|
36
|
+
email: jason@jasoncodes.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.markdown
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- LICENSE
|
47
|
+
- README.markdown
|
48
|
+
- Rakefile
|
49
|
+
- VERSION
|
50
|
+
- lib/filter_io.rb
|
51
|
+
- test/filter_io_test.rb
|
52
|
+
- test/test_helper.rb
|
53
|
+
has_rdoc: true
|
54
|
+
homepage: http://github.com/jasoncodes/filter_io
|
55
|
+
licenses: []
|
56
|
+
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options:
|
59
|
+
- --charset=UTF-8
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
requirements: []
|
81
|
+
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 1.3.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Filter IO streams with a block. Ruby's FilterInputStream.
|
87
|
+
test_files:
|
88
|
+
- test/filter_io_test.rb
|
89
|
+
- test/test_helper.rb
|