filter_io 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9208b4c2d0df82cc91ed1cad2283a3d36218987c
4
- data.tar.gz: a577f773a20e4e2be7bd219b01629d020a7522dc
3
+ metadata.gz: 153015f29c925bdc3e3f232bef84b24ce66e5cd5
4
+ data.tar.gz: 7f67801d371b9e05b6b543ff3cc9268e8e96868d
5
5
  SHA512:
6
- metadata.gz: 8edcd67f5b6220499dd21c6453d695eee991e8770f3ea48787fc1c72433275d8efe49c68fd27c48d0208edd76ad5bdf33e4fa612cc53e8a9f64e411a2348729d
7
- data.tar.gz: ea4b663fbd4aca1f2d85259904c008ca30c23ee409c76876f039aa9120115c07ef0d8b5fffe58107b39f97de150bd9310539307bf3ea232b782fa57ac6bcb53f
6
+ metadata.gz: 16952dba9a57805e96fb8f23f78bab8ea9b2b2f4638a5eda2016a8119793515066320b9d254b2008e2154f4f62bb920d572b7d8862460354d8a9443176765256
7
+ data.tar.gz: 8d3f368facfd069549d6d6881ec6643616a0fd6a47bac65589eef74a3f53d2d6766e51f98436c3800d0edfaa3acd36f780f53d9b6716c58a5c65642a837e5b50
data/README.markdown CHANGED
@@ -30,8 +30,10 @@ end
30
30
  A common usage of `filter_io` is to normalise line endings before parsing CSV data:
31
31
 
32
32
  ``` ruby
33
+ require 'csv'
34
+
33
35
  # open source stream
34
- File.open(filename) do |io|
36
+ File.open(filename, external_encoding: 'UTF-8') do |io|
35
37
  # apply filter to stream
36
38
  io = FilterIO.new(io) do |data, state|
37
39
  # grab another chunk if the last character is a delimiter
@@ -39,10 +41,10 @@ File.open(filename) do |io|
39
41
  # normalise line endings to LF
40
42
  data.gsub /\r\n|\r|\n/, "\n"
41
43
  end
42
-
44
+
43
45
  # process resulting stream normally
44
- FasterCSV.parse(io) do |row|
45
- pp row
46
+ CSV.parse(io, row_sep: "\n") do |row|
47
+ p row
46
48
  end
47
49
  end
48
50
  ```
data/Rakefile CHANGED
@@ -4,3 +4,8 @@ require 'rspec/core/rake_task'
4
4
 
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
  task :default => :spec
7
+
8
+ desc 'Open a Pry console with environment'
9
+ task :console do
10
+ exec "pry -Ilib -rfilter_io"
11
+ end
data/filter_io.gemspec CHANGED
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency 'rake'
24
24
  spec.add_development_dependency 'simplecov'
25
25
  spec.add_development_dependency 'rspec', '~> 2.13'
26
+ spec.add_development_dependency 'pry'
26
27
  end
@@ -1,3 +1,3 @@
1
1
  class FilterIO
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
data/lib/filter_io.rb CHANGED
@@ -125,6 +125,7 @@ class FilterIO
125
125
  data
126
126
  when source_eof?
127
127
  # end of file, nothing in the buffer to return
128
+ buffer.replace empty_string if buffer
128
129
  length.nil? ? empty_string : nil
129
130
  else
130
131
  raise IOError, 'Read error'
@@ -169,7 +170,22 @@ class FilterIO
169
170
  @buffer = char + @buffer
170
171
  end
171
172
 
172
- def gets(sep_string = $/)
173
+ def gets(*args)
174
+ sep_string, limit = case args.size
175
+ when 0
176
+ [$/, nil]
177
+ when 1
178
+ if args.first.is_a?(Integer)
179
+ [$/, args.first]
180
+ else
181
+ [args.first, nil]
182
+ end
183
+ when 2
184
+ args
185
+ else
186
+ raise ArgumentError
187
+ end
188
+
173
189
  return nil if eof?
174
190
  return read if sep_string.nil?
175
191
 
@@ -189,21 +205,29 @@ class FilterIO
189
205
  end
190
206
 
191
207
  # fill the buffer until it contains the separator sequence
192
- until source_eof? or @buffer.index(sep_string)
208
+ until source_eof? || find_bytes(sep_string) || (limit && @buffer.bytesize >= limit)
193
209
  buffer_data @options[:block_size]
194
210
  end
195
211
 
196
212
  # calculate how much of the buffer to return
197
- length = if idx = @buffer.index(sep_string)
213
+ length = if idx = find_bytes(sep_string)
198
214
  # we found the separator, include it in our output
199
- length = idx + sep_string.size
215
+ length = idx + sep_string.bytesize
200
216
  else
201
217
  # no separator found (must be EOF). return everything we've got
202
- length = @buffer.size
218
+ length = @buffer.bytesize
219
+ end
220
+ if limit && length > limit
221
+ length = limit
203
222
  end
204
223
 
205
- # increment the position and return the buffer fragment
206
- data = @buffer.slice!(0, length)
224
+ # extract the requested number of byte from the buffer
225
+ data = pop_bytes(length).force_encoding(@buffer.encoding)
226
+ # continue retreiving more bytes until we have complete characters
227
+ while limit && !data.valid_encoding? && (@buffer.bytesize > 0 || !source_eof?)
228
+ data += pop_bytes(1).force_encoding(@buffer.encoding)
229
+ end
230
+ # increment the position
207
231
  @pos += data.bytesize
208
232
 
209
233
  data
@@ -249,18 +273,30 @@ class FilterIO
249
273
  str
250
274
  end
251
275
 
252
- def pop_bytes(count, output_buffer = nil)
276
+ def with_byte_buffer
253
277
  begin
254
278
  org_encoding = @buffer.encoding
255
279
  @buffer.force_encoding 'ASCII-8BIT'
280
+ yield
281
+ ensure
282
+ @buffer.force_encoding org_encoding
283
+ end
284
+ end
285
+
286
+ def pop_bytes(count, output_buffer = nil)
287
+ with_byte_buffer do
256
288
  data = @buffer.slice!(0, count)
257
289
  if output_buffer
258
290
  output_buffer.replace data
259
291
  else
260
292
  data
261
293
  end
262
- ensure
263
- @buffer.force_encoding org_encoding
294
+ end
295
+ end
296
+
297
+ def find_bytes(str)
298
+ with_byte_buffer do
299
+ @buffer.index(str)
264
300
  end
265
301
  end
266
302
 
@@ -4,6 +4,7 @@ require 'spec_helper'
4
4
  require 'stringio'
5
5
  require 'tempfile'
6
6
  require 'zlib'
7
+ require 'csv'
7
8
 
8
9
  describe FilterIO do
9
10
  def matches_reference_io_behaviour(input)
@@ -36,6 +37,9 @@ describe FilterIO do
36
37
  # compare the filtered output against the reference
37
38
  results[0].zip(results[1]).each do |expected, actual|
38
39
  expect(actual).to eq expected
40
+ if actual.respond_to? :encoding
41
+ expect(actual.encoding).to eq expected.encoding
42
+ end
39
43
  end
40
44
  end
41
45
 
@@ -314,7 +318,13 @@ describe FilterIO do
314
318
  buffer = 'abcdef'
315
319
  result = io.read(3, buffer)
316
320
  expect(result.object_id).to eq buffer.object_id
317
- expect(result).to eq 'foo'
321
+ expect(buffer).to eq 'foo'
322
+ result = io.read(4, buffer)
323
+ expect(result.object_id).to eq buffer.object_id
324
+ expect(buffer).to eq ' bar'
325
+ result = io.read(3, buffer)
326
+ expect(result).to eq nil
327
+ expect(buffer).to eq ''
318
328
  end
319
329
 
320
330
  it 'allows filtering of input with a block' do
@@ -540,6 +550,44 @@ describe FilterIO do
540
550
  end
541
551
  end
542
552
 
553
+ it 'supports `get` with a limit' do
554
+ [
555
+ "",
556
+ "x",
557
+ "foo\nbar\rbaz\n",
558
+ "abc\rdef\rghi\r",
559
+ "über",
560
+ ].each do |input|
561
+ [1, 2, 3, 4, 10].each do |limit|
562
+ matches_reference_io_behaviour(input) { |io| io.gets(limit) }
563
+ end
564
+ end
565
+ # TODO: test zero limit
566
+ end
567
+
568
+ it 'supports `gets` with a separator and a limit' do
569
+ [
570
+ "",
571
+ "x",
572
+ "foo\nbar\rbaz\n",
573
+ "abc\rdef\rghi\r",
574
+ "über",
575
+ ].each do |input|
576
+ ["\r", "x"].each do |sep_string|
577
+ [1, 2, 3, 4, 10].each do |limit|
578
+ matches_reference_io_behaviour(input) { |io| io.gets(sep_string, limit) }
579
+ end
580
+ end
581
+ end
582
+ # TODO: test zero limit
583
+ end
584
+
585
+ it 'errors when `get` is passed more than two args' do
586
+ expect {
587
+ FilterIO.new(StringIO.new).gets(1,2,3)
588
+ }.to raise_error ArgumentError
589
+ end
590
+
543
591
  it 'supports `gets` with a two character seperator' do
544
592
  ["o", "oo"].each do |sep_string|
545
593
  matches_reference_io_behaviour("foobarhelloworld") { |io| io.gets(sep_string) }
@@ -830,4 +878,19 @@ describe FilterIO do
830
878
  output.rewind
831
879
  expect(output.read).to eq 'TEST'
832
880
  end
881
+
882
+ it 'supports CSV' do
883
+ input = StringIO.new "foo,bar\nbaz"
884
+
885
+ filtered_input = FilterIO.new input do |data|
886
+ data.upcase
887
+ end
888
+
889
+ rows = []
890
+ CSV.parse(filtered_input) do |row|
891
+ rows << row
892
+ end
893
+
894
+ expect(rows).to eq [%w[FOO BAR], %w[BAZ]]
895
+ end
833
896
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filter_io
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Weathered
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-18 00:00:00.000000000 Z
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '2.13'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  description:
84
98
  email:
85
99
  - jason@jasoncodes.com