filter_io 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.markdown +6 -4
- data/Rakefile +5 -0
- data/filter_io.gemspec +1 -0
- data/lib/filter_io/version.rb +1 -1
- data/lib/filter_io.rb +46 -10
- data/spec/filter_io_spec.rb +64 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 153015f29c925bdc3e3f232bef84b24ce66e5cd5
|
4
|
+
data.tar.gz: 7f67801d371b9e05b6b543ff3cc9268e8e96868d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16952dba9a57805e96fb8f23f78bab8ea9b2b2f4638a5eda2016a8119793515066320b9d254b2008e2154f4f62bb920d572b7d8862460354d8a9443176765256
|
7
|
+
data.tar.gz: 8d3f368facfd069549d6d6881ec6643616a0fd6a47bac65589eef74a3f53d2d6766e51f98436c3800d0edfaa3acd36f780f53d9b6716c58a5c65642a837e5b50
|
data/README.markdown
CHANGED
@@ -30,8 +30,10 @@ end
|
|
30
30
|
A common usage of `filter_io` is to normalise line endings before parsing CSV data:
|
31
31
|
|
32
32
|
``` ruby
|
33
|
+
require 'csv'
|
34
|
+
|
33
35
|
# open source stream
|
34
|
-
File.open(filename) do |io|
|
36
|
+
File.open(filename, external_encoding: 'UTF-8') do |io|
|
35
37
|
# apply filter to stream
|
36
38
|
io = FilterIO.new(io) do |data, state|
|
37
39
|
# grab another chunk if the last character is a delimiter
|
@@ -39,10 +41,10 @@ File.open(filename) do |io|
|
|
39
41
|
# normalise line endings to LF
|
40
42
|
data.gsub /\r\n|\r|\n/, "\n"
|
41
43
|
end
|
42
|
-
|
44
|
+
|
43
45
|
# process resulting stream normally
|
44
|
-
|
45
|
-
|
46
|
+
CSV.parse(io, row_sep: "\n") do |row|
|
47
|
+
p row
|
46
48
|
end
|
47
49
|
end
|
48
50
|
```
|
data/Rakefile
CHANGED
data/filter_io.gemspec
CHANGED
data/lib/filter_io/version.rb
CHANGED
data/lib/filter_io.rb
CHANGED
@@ -125,6 +125,7 @@ class FilterIO
|
|
125
125
|
data
|
126
126
|
when source_eof?
|
127
127
|
# end of file, nothing in the buffer to return
|
128
|
+
buffer.replace empty_string if buffer
|
128
129
|
length.nil? ? empty_string : nil
|
129
130
|
else
|
130
131
|
raise IOError, 'Read error'
|
@@ -169,7 +170,22 @@ class FilterIO
|
|
169
170
|
@buffer = char + @buffer
|
170
171
|
end
|
171
172
|
|
172
|
-
def gets(
|
173
|
+
def gets(*args)
|
174
|
+
sep_string, limit = case args.size
|
175
|
+
when 0
|
176
|
+
[$/, nil]
|
177
|
+
when 1
|
178
|
+
if args.first.is_a?(Integer)
|
179
|
+
[$/, args.first]
|
180
|
+
else
|
181
|
+
[args.first, nil]
|
182
|
+
end
|
183
|
+
when 2
|
184
|
+
args
|
185
|
+
else
|
186
|
+
raise ArgumentError
|
187
|
+
end
|
188
|
+
|
173
189
|
return nil if eof?
|
174
190
|
return read if sep_string.nil?
|
175
191
|
|
@@ -189,21 +205,29 @@ class FilterIO
|
|
189
205
|
end
|
190
206
|
|
191
207
|
# fill the buffer until it contains the separator sequence
|
192
|
-
until source_eof?
|
208
|
+
until source_eof? || find_bytes(sep_string) || (limit && @buffer.bytesize >= limit)
|
193
209
|
buffer_data @options[:block_size]
|
194
210
|
end
|
195
211
|
|
196
212
|
# calculate how much of the buffer to return
|
197
|
-
length = if idx =
|
213
|
+
length = if idx = find_bytes(sep_string)
|
198
214
|
# we found the separator, include it in our output
|
199
|
-
length = idx + sep_string.
|
215
|
+
length = idx + sep_string.bytesize
|
200
216
|
else
|
201
217
|
# no separator found (must be EOF). return everything we've got
|
202
|
-
length = @buffer.
|
218
|
+
length = @buffer.bytesize
|
219
|
+
end
|
220
|
+
if limit && length > limit
|
221
|
+
length = limit
|
203
222
|
end
|
204
223
|
|
205
|
-
#
|
206
|
-
data = @buffer.
|
224
|
+
# extract the requested number of byte from the buffer
|
225
|
+
data = pop_bytes(length).force_encoding(@buffer.encoding)
|
226
|
+
# continue retreiving more bytes until we have complete characters
|
227
|
+
while limit && !data.valid_encoding? && (@buffer.bytesize > 0 || !source_eof?)
|
228
|
+
data += pop_bytes(1).force_encoding(@buffer.encoding)
|
229
|
+
end
|
230
|
+
# increment the position
|
207
231
|
@pos += data.bytesize
|
208
232
|
|
209
233
|
data
|
@@ -249,18 +273,30 @@ class FilterIO
|
|
249
273
|
str
|
250
274
|
end
|
251
275
|
|
252
|
-
def
|
276
|
+
def with_byte_buffer
|
253
277
|
begin
|
254
278
|
org_encoding = @buffer.encoding
|
255
279
|
@buffer.force_encoding 'ASCII-8BIT'
|
280
|
+
yield
|
281
|
+
ensure
|
282
|
+
@buffer.force_encoding org_encoding
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def pop_bytes(count, output_buffer = nil)
|
287
|
+
with_byte_buffer do
|
256
288
|
data = @buffer.slice!(0, count)
|
257
289
|
if output_buffer
|
258
290
|
output_buffer.replace data
|
259
291
|
else
|
260
292
|
data
|
261
293
|
end
|
262
|
-
|
263
|
-
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def find_bytes(str)
|
298
|
+
with_byte_buffer do
|
299
|
+
@buffer.index(str)
|
264
300
|
end
|
265
301
|
end
|
266
302
|
|
data/spec/filter_io_spec.rb
CHANGED
@@ -4,6 +4,7 @@ require 'spec_helper'
|
|
4
4
|
require 'stringio'
|
5
5
|
require 'tempfile'
|
6
6
|
require 'zlib'
|
7
|
+
require 'csv'
|
7
8
|
|
8
9
|
describe FilterIO do
|
9
10
|
def matches_reference_io_behaviour(input)
|
@@ -36,6 +37,9 @@ describe FilterIO do
|
|
36
37
|
# compare the filtered output against the reference
|
37
38
|
results[0].zip(results[1]).each do |expected, actual|
|
38
39
|
expect(actual).to eq expected
|
40
|
+
if actual.respond_to? :encoding
|
41
|
+
expect(actual.encoding).to eq expected.encoding
|
42
|
+
end
|
39
43
|
end
|
40
44
|
end
|
41
45
|
|
@@ -314,7 +318,13 @@ describe FilterIO do
|
|
314
318
|
buffer = 'abcdef'
|
315
319
|
result = io.read(3, buffer)
|
316
320
|
expect(result.object_id).to eq buffer.object_id
|
317
|
-
expect(
|
321
|
+
expect(buffer).to eq 'foo'
|
322
|
+
result = io.read(4, buffer)
|
323
|
+
expect(result.object_id).to eq buffer.object_id
|
324
|
+
expect(buffer).to eq ' bar'
|
325
|
+
result = io.read(3, buffer)
|
326
|
+
expect(result).to eq nil
|
327
|
+
expect(buffer).to eq ''
|
318
328
|
end
|
319
329
|
|
320
330
|
it 'allows filtering of input with a block' do
|
@@ -540,6 +550,44 @@ describe FilterIO do
|
|
540
550
|
end
|
541
551
|
end
|
542
552
|
|
553
|
+
it 'supports `get` with a limit' do
|
554
|
+
[
|
555
|
+
"",
|
556
|
+
"x",
|
557
|
+
"foo\nbar\rbaz\n",
|
558
|
+
"abc\rdef\rghi\r",
|
559
|
+
"über",
|
560
|
+
].each do |input|
|
561
|
+
[1, 2, 3, 4, 10].each do |limit|
|
562
|
+
matches_reference_io_behaviour(input) { |io| io.gets(limit) }
|
563
|
+
end
|
564
|
+
end
|
565
|
+
# TODO: test zero limit
|
566
|
+
end
|
567
|
+
|
568
|
+
it 'supports `gets` with a separator and a limit' do
|
569
|
+
[
|
570
|
+
"",
|
571
|
+
"x",
|
572
|
+
"foo\nbar\rbaz\n",
|
573
|
+
"abc\rdef\rghi\r",
|
574
|
+
"über",
|
575
|
+
].each do |input|
|
576
|
+
["\r", "x"].each do |sep_string|
|
577
|
+
[1, 2, 3, 4, 10].each do |limit|
|
578
|
+
matches_reference_io_behaviour(input) { |io| io.gets(sep_string, limit) }
|
579
|
+
end
|
580
|
+
end
|
581
|
+
end
|
582
|
+
# TODO: test zero limit
|
583
|
+
end
|
584
|
+
|
585
|
+
it 'errors when `get` is passed more than two args' do
|
586
|
+
expect {
|
587
|
+
FilterIO.new(StringIO.new).gets(1,2,3)
|
588
|
+
}.to raise_error ArgumentError
|
589
|
+
end
|
590
|
+
|
543
591
|
it 'supports `gets` with a two character seperator' do
|
544
592
|
["o", "oo"].each do |sep_string|
|
545
593
|
matches_reference_io_behaviour("foobarhelloworld") { |io| io.gets(sep_string) }
|
@@ -830,4 +878,19 @@ describe FilterIO do
|
|
830
878
|
output.rewind
|
831
879
|
expect(output.read).to eq 'TEST'
|
832
880
|
end
|
881
|
+
|
882
|
+
it 'supports CSV' do
|
883
|
+
input = StringIO.new "foo,bar\nbaz"
|
884
|
+
|
885
|
+
filtered_input = FilterIO.new input do |data|
|
886
|
+
data.upcase
|
887
|
+
end
|
888
|
+
|
889
|
+
rows = []
|
890
|
+
CSV.parse(filtered_input) do |row|
|
891
|
+
rows << row
|
892
|
+
end
|
893
|
+
|
894
|
+
expect(rows).to eq [%w[FOO BAR], %w[BAZ]]
|
895
|
+
end
|
833
896
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filter_io
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Weathered
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '2.13'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
description:
|
84
98
|
email:
|
85
99
|
- jason@jasoncodes.com
|