filter_io 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.markdown +6 -4
- data/Rakefile +5 -0
- data/filter_io.gemspec +1 -0
- data/lib/filter_io/version.rb +1 -1
- data/lib/filter_io.rb +46 -10
- data/spec/filter_io_spec.rb +64 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 153015f29c925bdc3e3f232bef84b24ce66e5cd5
|
4
|
+
data.tar.gz: 7f67801d371b9e05b6b543ff3cc9268e8e96868d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16952dba9a57805e96fb8f23f78bab8ea9b2b2f4638a5eda2016a8119793515066320b9d254b2008e2154f4f62bb920d572b7d8862460354d8a9443176765256
|
7
|
+
data.tar.gz: 8d3f368facfd069549d6d6881ec6643616a0fd6a47bac65589eef74a3f53d2d6766e51f98436c3800d0edfaa3acd36f780f53d9b6716c58a5c65642a837e5b50
|
data/README.markdown
CHANGED
@@ -30,8 +30,10 @@ end
|
|
30
30
|
A common usage of `filter_io` is to normalise line endings before parsing CSV data:
|
31
31
|
|
32
32
|
``` ruby
|
33
|
+
require 'csv'
|
34
|
+
|
33
35
|
# open source stream
|
34
|
-
File.open(filename) do |io|
|
36
|
+
File.open(filename, external_encoding: 'UTF-8') do |io|
|
35
37
|
# apply filter to stream
|
36
38
|
io = FilterIO.new(io) do |data, state|
|
37
39
|
# grab another chunk if the last character is a delimiter
|
@@ -39,10 +41,10 @@ File.open(filename) do |io|
|
|
39
41
|
# normalise line endings to LF
|
40
42
|
data.gsub /\r\n|\r|\n/, "\n"
|
41
43
|
end
|
42
|
-
|
44
|
+
|
43
45
|
# process resulting stream normally
|
44
|
-
|
45
|
-
|
46
|
+
CSV.parse(io, row_sep: "\n") do |row|
|
47
|
+
p row
|
46
48
|
end
|
47
49
|
end
|
48
50
|
```
|
data/Rakefile
CHANGED
data/filter_io.gemspec
CHANGED
data/lib/filter_io/version.rb
CHANGED
data/lib/filter_io.rb
CHANGED
@@ -125,6 +125,7 @@ class FilterIO
|
|
125
125
|
data
|
126
126
|
when source_eof?
|
127
127
|
# end of file, nothing in the buffer to return
|
128
|
+
buffer.replace empty_string if buffer
|
128
129
|
length.nil? ? empty_string : nil
|
129
130
|
else
|
130
131
|
raise IOError, 'Read error'
|
@@ -169,7 +170,22 @@ class FilterIO
|
|
169
170
|
@buffer = char + @buffer
|
170
171
|
end
|
171
172
|
|
172
|
-
def gets(
|
173
|
+
def gets(*args)
|
174
|
+
sep_string, limit = case args.size
|
175
|
+
when 0
|
176
|
+
[$/, nil]
|
177
|
+
when 1
|
178
|
+
if args.first.is_a?(Integer)
|
179
|
+
[$/, args.first]
|
180
|
+
else
|
181
|
+
[args.first, nil]
|
182
|
+
end
|
183
|
+
when 2
|
184
|
+
args
|
185
|
+
else
|
186
|
+
raise ArgumentError
|
187
|
+
end
|
188
|
+
|
173
189
|
return nil if eof?
|
174
190
|
return read if sep_string.nil?
|
175
191
|
|
@@ -189,21 +205,29 @@ class FilterIO
|
|
189
205
|
end
|
190
206
|
|
191
207
|
# fill the buffer until it contains the separator sequence
|
192
|
-
until source_eof?
|
208
|
+
until source_eof? || find_bytes(sep_string) || (limit && @buffer.bytesize >= limit)
|
193
209
|
buffer_data @options[:block_size]
|
194
210
|
end
|
195
211
|
|
196
212
|
# calculate how much of the buffer to return
|
197
|
-
length = if idx =
|
213
|
+
length = if idx = find_bytes(sep_string)
|
198
214
|
# we found the separator, include it in our output
|
199
|
-
length = idx + sep_string.
|
215
|
+
length = idx + sep_string.bytesize
|
200
216
|
else
|
201
217
|
# no separator found (must be EOF). return everything we've got
|
202
|
-
length = @buffer.
|
218
|
+
length = @buffer.bytesize
|
219
|
+
end
|
220
|
+
if limit && length > limit
|
221
|
+
length = limit
|
203
222
|
end
|
204
223
|
|
205
|
-
#
|
206
|
-
data = @buffer.
|
224
|
+
# extract the requested number of byte from the buffer
|
225
|
+
data = pop_bytes(length).force_encoding(@buffer.encoding)
|
226
|
+
# continue retreiving more bytes until we have complete characters
|
227
|
+
while limit && !data.valid_encoding? && (@buffer.bytesize > 0 || !source_eof?)
|
228
|
+
data += pop_bytes(1).force_encoding(@buffer.encoding)
|
229
|
+
end
|
230
|
+
# increment the position
|
207
231
|
@pos += data.bytesize
|
208
232
|
|
209
233
|
data
|
@@ -249,18 +273,30 @@ class FilterIO
|
|
249
273
|
str
|
250
274
|
end
|
251
275
|
|
252
|
-
def
|
276
|
+
def with_byte_buffer
|
253
277
|
begin
|
254
278
|
org_encoding = @buffer.encoding
|
255
279
|
@buffer.force_encoding 'ASCII-8BIT'
|
280
|
+
yield
|
281
|
+
ensure
|
282
|
+
@buffer.force_encoding org_encoding
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def pop_bytes(count, output_buffer = nil)
|
287
|
+
with_byte_buffer do
|
256
288
|
data = @buffer.slice!(0, count)
|
257
289
|
if output_buffer
|
258
290
|
output_buffer.replace data
|
259
291
|
else
|
260
292
|
data
|
261
293
|
end
|
262
|
-
|
263
|
-
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def find_bytes(str)
|
298
|
+
with_byte_buffer do
|
299
|
+
@buffer.index(str)
|
264
300
|
end
|
265
301
|
end
|
266
302
|
|
data/spec/filter_io_spec.rb
CHANGED
@@ -4,6 +4,7 @@ require 'spec_helper'
|
|
4
4
|
require 'stringio'
|
5
5
|
require 'tempfile'
|
6
6
|
require 'zlib'
|
7
|
+
require 'csv'
|
7
8
|
|
8
9
|
describe FilterIO do
|
9
10
|
def matches_reference_io_behaviour(input)
|
@@ -36,6 +37,9 @@ describe FilterIO do
|
|
36
37
|
# compare the filtered output against the reference
|
37
38
|
results[0].zip(results[1]).each do |expected, actual|
|
38
39
|
expect(actual).to eq expected
|
40
|
+
if actual.respond_to? :encoding
|
41
|
+
expect(actual.encoding).to eq expected.encoding
|
42
|
+
end
|
39
43
|
end
|
40
44
|
end
|
41
45
|
|
@@ -314,7 +318,13 @@ describe FilterIO do
|
|
314
318
|
buffer = 'abcdef'
|
315
319
|
result = io.read(3, buffer)
|
316
320
|
expect(result.object_id).to eq buffer.object_id
|
317
|
-
expect(
|
321
|
+
expect(buffer).to eq 'foo'
|
322
|
+
result = io.read(4, buffer)
|
323
|
+
expect(result.object_id).to eq buffer.object_id
|
324
|
+
expect(buffer).to eq ' bar'
|
325
|
+
result = io.read(3, buffer)
|
326
|
+
expect(result).to eq nil
|
327
|
+
expect(buffer).to eq ''
|
318
328
|
end
|
319
329
|
|
320
330
|
it 'allows filtering of input with a block' do
|
@@ -540,6 +550,44 @@ describe FilterIO do
|
|
540
550
|
end
|
541
551
|
end
|
542
552
|
|
553
|
+
it 'supports `get` with a limit' do
|
554
|
+
[
|
555
|
+
"",
|
556
|
+
"x",
|
557
|
+
"foo\nbar\rbaz\n",
|
558
|
+
"abc\rdef\rghi\r",
|
559
|
+
"über",
|
560
|
+
].each do |input|
|
561
|
+
[1, 2, 3, 4, 10].each do |limit|
|
562
|
+
matches_reference_io_behaviour(input) { |io| io.gets(limit) }
|
563
|
+
end
|
564
|
+
end
|
565
|
+
# TODO: test zero limit
|
566
|
+
end
|
567
|
+
|
568
|
+
it 'supports `gets` with a separator and a limit' do
|
569
|
+
[
|
570
|
+
"",
|
571
|
+
"x",
|
572
|
+
"foo\nbar\rbaz\n",
|
573
|
+
"abc\rdef\rghi\r",
|
574
|
+
"über",
|
575
|
+
].each do |input|
|
576
|
+
["\r", "x"].each do |sep_string|
|
577
|
+
[1, 2, 3, 4, 10].each do |limit|
|
578
|
+
matches_reference_io_behaviour(input) { |io| io.gets(sep_string, limit) }
|
579
|
+
end
|
580
|
+
end
|
581
|
+
end
|
582
|
+
# TODO: test zero limit
|
583
|
+
end
|
584
|
+
|
585
|
+
it 'errors when `get` is passed more than two args' do
|
586
|
+
expect {
|
587
|
+
FilterIO.new(StringIO.new).gets(1,2,3)
|
588
|
+
}.to raise_error ArgumentError
|
589
|
+
end
|
590
|
+
|
543
591
|
it 'supports `gets` with a two character seperator' do
|
544
592
|
["o", "oo"].each do |sep_string|
|
545
593
|
matches_reference_io_behaviour("foobarhelloworld") { |io| io.gets(sep_string) }
|
@@ -830,4 +878,19 @@ describe FilterIO do
|
|
830
878
|
output.rewind
|
831
879
|
expect(output.read).to eq 'TEST'
|
832
880
|
end
|
881
|
+
|
882
|
+
it 'supports CSV' do
|
883
|
+
input = StringIO.new "foo,bar\nbaz"
|
884
|
+
|
885
|
+
filtered_input = FilterIO.new input do |data|
|
886
|
+
data.upcase
|
887
|
+
end
|
888
|
+
|
889
|
+
rows = []
|
890
|
+
CSV.parse(filtered_input) do |row|
|
891
|
+
rows << row
|
892
|
+
end
|
893
|
+
|
894
|
+
expect(rows).to eq [%w[FOO BAR], %w[BAZ]]
|
895
|
+
end
|
833
896
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filter_io
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Weathered
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '2.13'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
description:
|
84
98
|
email:
|
85
99
|
- jason@jasoncodes.com
|