iostreams 0.16.2 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e9a14b746c83e98c98950f8fe1086e689383e997a2f62688272f419d0a144c36
4
- data.tar.gz: 61c6da8d61da48d205f8537bd0a11b0b0cf4a1160ab4e00247f3cea507540072
3
+ metadata.gz: afb047a3682e7cbb9d953c303ebf2f525ff9975248ac550b6eded486ff73a72b
4
+ data.tar.gz: b14b13f5c23663b1b173d4ee767a93ea74b381f4c89ae5b44e4610bfbe4fabe4
5
5
  SHA512:
6
- metadata.gz: 9736cb2bacdb162120a9bd6febe300760c34ea70148a62352244d33ab9142dead20cdc600242f8304ba460fcc9f4364dd2fdc8588ab4fda2dd34802e302037d8
7
- data.tar.gz: 64a012432f793855f216be83b6522fe8301eff9f02fdc967782fb5488b85b6df630c44f6c4aefb4541b76b9a8f518856af2315b64ff733516c3b73b2451341f4
6
+ metadata.gz: 00a5fa08bf88bbcc9cc3c1418f90d2dd55c3da0b129cd472ef571e27f0dec653214f2f1780069801e8fd87c676c70e097fd79e86085fcbd63e3def501f779ec5
7
+ data.tar.gz: e193d7dacbd65625c9159fb61d13d3d0b005bd40e373047e2ece791c91c5c1798c881a398c0246fda8c424bd875dead9be3c5d3cc00765b0b2977d3decf3a08d
@@ -70,6 +70,15 @@ module IOStreams
70
70
  end
71
71
 
72
72
  # Iterate over a file / stream returning one line at a time.
73
+ # Embedded lines (within double quotes) will be skipped if
74
+ # 1. The file name contains .csv
75
+ # 2. Or the embedded_within argument is set
76
+ #
77
+ # Example: Supply custom options
78
+ # IOStreams.each_line(file_name, embedded_within: '"') do |line|
79
+ # puts line
80
+ # end
81
+ #
73
82
  def self.each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
74
83
  line_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |line_stream|
75
84
  line_stream.each(&block)
@@ -77,6 +86,15 @@ module IOStreams
77
86
  end
78
87
 
79
88
  # Iterate over a file / stream returning one line at a time.
89
+ # Embedded lines (within double quotes) will be skipped if
90
+ # 1. The file name contains .csv
91
+ # 2. Or the embedded_within argument is set
92
+ #
93
+ # Example: Supply custom options
94
+ # IOStreams.each_row(file_name, embedded_within: '"') do |line|
95
+ # puts line
96
+ # end
97
+ #
80
98
  def self.each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
81
99
  row_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |row_stream|
82
100
  row_stream.each(&block)
@@ -90,6 +108,15 @@ module IOStreams
90
108
  # Each record / line is returned one at a time so that very large files
91
109
  # can be read without having to load the entire file into memory.
92
110
  #
111
+ # Embedded lines (within double quotes) will be skipped if
112
+ # 1. The file name contains .csv
113
+ # 2. Or the embedded_within argument is set
114
+ #
115
+ # Example: Supply custom options
116
+ # IOStreams.each_record(file_name, embedded_within: '"') do |line|
117
+ # puts line
118
+ # end
119
+ #
93
120
  # Example:
94
121
  # file_name = 'customer_data.csv.pgp'
95
122
  # IOStreams.each_record(file_name) do |hash|
@@ -291,20 +318,21 @@ module IOStreams
291
318
  # its extension(s)
292
319
  #
293
320
  # Example Zip file:
294
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
321
+ # IOStreams.streams_for_file_name('myfile.zip')
295
322
  # => [ :zip ]
296
323
  #
297
324
  # Example Encrypted Gzip file:
298
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
325
+ # IOStreams.streams_for_file_name('myfile.csv.gz.enc')
299
326
  # => [ :gz, :enc ]
300
327
  #
301
328
  # Example plain text / binary file:
302
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
303
- # => [ :file ]
329
+ # IOStreams.streams_for_file_name('myfile.csv')
330
+ # => []
304
331
  def self.streams_for_file_name(file_name)
305
332
  raise ArgumentError.new('File name cannot be nil') if file_name.nil?
306
333
  raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
307
- parts = file_name.split('.')
334
+
335
+ parts = ::File.basename(file_name).split('.')
308
336
  extensions = []
309
337
  while extension = parts.pop
310
338
  sym = extension.downcase.to_sym
@@ -314,12 +342,31 @@ module IOStreams
314
342
  extensions
315
343
  end
316
344
 
345
+ # Extract URI if any was supplied
346
+ def self.scheme_for_file_name(file_name)
347
+ raise ArgumentError.new('File name cannot be nil') if file_name.nil?
348
+ raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
349
+
350
+ if matches = file_name.match(/\A(\w+):\/\//)
351
+ matches[1].downcase.to_sym
352
+ end
353
+ end
354
+
317
355
  # Iterate over a file / stream returning each record/line one at a time.
318
- def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
356
+ # It will apply the embedded_within argument if the file or input_stream contain .csv in its name.
357
+ def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, embedded_within: nil, **args, &block)
358
+
319
359
  return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) || file_name_or_io.is_a?(Array)
320
360
 
361
+ # TODO: needs to be improved
362
+ if embedded_within.nil? && file_name_or_io.is_a?(String)
363
+ embedded_within = '"' if file_name_or_io.include?('.csv')
364
+ elsif embedded_within.nil? && file_name
365
+ embedded_within = '"' if file_name.include?('.csv')
366
+ end
367
+
321
368
  reader(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
322
- IOStreams::Line::Reader.open(io, **args, &block)
369
+ IOStreams::Line::Reader.open(io, embedded_within: embedded_within, **args, &block)
323
370
  end
324
371
  end
325
372
 
@@ -331,6 +378,7 @@ module IOStreams
331
378
  encoding: nil,
332
379
  encode_cleaner: nil,
333
380
  encode_replace: nil,
381
+ embedded_within: nil,
334
382
  **args,
335
383
  &block)
336
384
 
@@ -338,12 +386,13 @@ module IOStreams
338
386
 
339
387
  line_reader(
340
388
  file_name_or_io,
341
- streams: streams,
342
- delimiter: delimiter,
343
- file_name: file_name,
344
- encoding: encoding,
345
- encode_cleaner: encode_cleaner,
346
- encode_replace: encode_replace
389
+ streams: streams,
390
+ delimiter: delimiter,
391
+ file_name: file_name,
392
+ encoding: encoding,
393
+ encode_cleaner: encode_cleaner,
394
+ encode_replace: encode_replace,
395
+ embedded_within: embedded_within
347
396
  ) do |io|
348
397
  file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
349
398
  IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
@@ -358,21 +407,23 @@ module IOStreams
358
407
  encoding: nil,
359
408
  encode_cleaner: nil,
360
409
  encode_replace: nil,
410
+ embedded_within: nil,
361
411
  **args,
362
412
  &block)
363
413
 
364
414
  return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
365
415
 
366
- line_reader(
367
- file_name_or_io,
368
- streams: streams,
369
- delimiter: delimiter,
370
- file_name: file_name,
371
- encoding: encoding,
372
- encode_cleaner: encode_cleaner,
373
- encode_replace: encode_replace
416
+ line_reader(file_name_or_io,
417
+ streams: streams,
418
+ delimiter: delimiter,
419
+ file_name: file_name,
420
+ encoding: encoding,
421
+ encode_cleaner: encode_cleaner,
422
+ encode_replace: encode_replace,
423
+ embedded_within: embedded_within
374
424
  ) do |io|
375
425
 
426
+
376
427
  file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
377
428
  IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
378
429
  end
@@ -401,6 +452,16 @@ module IOStreams
401
452
  @extensions.delete(extension.to_sym)
402
453
  end
403
454
 
455
+ # Register a file extension and the reader and writer streaming classes
456
+ #
457
+ # Example:
458
+ # # MyXls::Reader and MyXls::Writer must implement .open
459
+ # register_extension(:xls, MyXls::Reader, MyXls::Writer)
460
+ def self.register_scheme(scheme, reader_class, writer_class)
461
+ raise(ArgumentError, "Invalid scheme #{scheme.inspect}") unless scheme.nil? || scheme.to_s =~ /\A\w+\Z/
462
+ @schemes[scheme.nil? ? nil : scheme.to_sym] = Extension.new(reader_class, writer_class)
463
+ end
464
+
404
465
  # Helper method: Returns [true|false] if a value is blank?
405
466
  def self.blank?(value)
406
467
  if value.nil?
@@ -416,6 +477,7 @@ module IOStreams
416
477
 
417
478
  # A registry to hold formats for processing files during upload or download
418
479
  @extensions = {}
480
+ @schemes = {}
419
481
 
420
482
  # Struct to hold the Stream and options if any
421
483
  StreamStruct = Struct.new(:klass, :options)
@@ -438,8 +500,10 @@ module IOStreams
438
500
  if streams.nil?
439
501
  streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
440
502
  end
503
+ scheme = scheme_for_file_name(file_name_or_io) if file_name_or_io.is_a?(String)
441
504
 
442
505
  stream_structs = streams_for(type, streams)
506
+ stream_structs << stream_struct_for_scheme(type, scheme) if stream_structs.empty? || scheme
443
507
 
444
508
  # Add encoding stream if any of its options are present
445
509
  if encoding || encode_cleaner || encode_replace
@@ -466,7 +530,6 @@ module IOStreams
466
530
  if params.is_a?(Symbol)
467
531
  [stream_struct_for_stream(type, params)]
468
532
  elsif params.is_a?(Array)
469
- return [stream_struct_for_stream(type, nil)] if params.empty?
470
533
  a = []
471
534
  params.each do |stream|
472
535
  if stream.is_a?(Hash)
@@ -491,8 +554,14 @@ module IOStreams
491
554
  StreamStruct.new(klass, options)
492
555
  end
493
556
 
557
+ def self.stream_struct_for_scheme(type, scheme, options = {})
558
+ ext = @schemes[scheme.nil? ? nil : scheme.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme.inspect}")
559
+ klass = ext.send("#{type}_class")
560
+ StreamStruct.new(klass, options)
561
+ end
562
+
494
563
  # Default reader/writer when no other streams need to be applied.
495
- register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
564
+ # register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
496
565
 
497
566
  # Register File extensions
498
567
  register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
@@ -510,10 +579,17 @@ module IOStreams
510
579
  register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
511
580
  end
512
581
 
513
- # register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
514
- # register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
582
+ # Support URI schemes
583
+ #
584
+ # Examples:
585
+ # path/file_name
586
+ # http://hostname/path/file_name
587
+ # https://hostname/path/file_name
588
+ # sftp://hostname/path/file_name
589
+ # s3://bucket/key
590
+ register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
515
591
  # register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
516
592
  # register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
517
593
  # register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
518
- # register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
594
+ register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
519
595
  end
@@ -1,7 +1,7 @@
1
1
  module IOStreams
2
2
  module Line
3
3
  class Reader
4
- attr_reader :delimiter, :buffer_size, :line_count
4
+ attr_reader :delimiter, :buffer_size, :line_number
5
5
 
6
6
  # Prevent denial of service when a delimiter is not found before this number * `buffer_size` characters are read.
7
7
  MAX_BLOCKS_MULTIPLIER = 100
@@ -20,7 +20,7 @@ module IOStreams
20
20
  # Create a delimited stream reader from the supplied input stream.
21
21
  #
22
22
  # Lines returned will be in the encoding of the input stream.
23
- # To change the encoding of retruned lines, use IOStreams::Encode::Reader.
23
+ # To change the encoding of returned lines, use IOStreams::Encode::Reader.
24
24
  #
25
25
  # Parameters
26
26
  # input_stream
@@ -45,14 +45,15 @@ module IOStreams
45
45
  # - Skip "empty" / "blank" lines. RegExp?
46
46
  # - Extract header line(s) / first non-comment, non-blank line
47
47
  # - Embedded newline support, RegExp? or Proc?
48
- def initialize(input_stream, delimiter: nil, buffer_size: 65_536)
49
- @input_stream = input_stream
50
- @buffer_size = buffer_size
48
+ def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil)
49
+ @embedded_within = embedded_within
50
+ @input_stream = input_stream
51
+ @buffer_size = buffer_size
51
52
 
52
53
  # More efficient read buffering only supported when the input stream `#read` method supports it.
53
54
  @use_read_cache_buffer = !@input_stream.method(:read).arity.between?(0, 1)
54
55
 
55
- @line_count = 0
56
+ @line_number = 0
56
57
  @eof = false
57
58
  @read_cache_buffer = nil
58
59
  @buffer = nil
@@ -73,14 +74,40 @@ module IOStreams
73
74
  # Note:
74
75
  # * The line delimiter is _not_ returned.
75
76
  def each
77
+ line_count = 0
76
78
  until eof?
77
79
  line = readline
78
- yield(line) unless line.nil?
80
+ unless line.nil?
81
+ yield(line)
82
+ line_count += 1
83
+ end
79
84
  end
80
85
  line_count
81
86
  end
82
87
 
88
+ # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes.
89
+ # The embedded_within argument is set in IOStreams::LineReader
83
90
  def readline
91
+ line = _readline
92
+ if line && @embedded_within
93
+ initial_line_number = @line_number
94
+ while line.count(@embedded_within).odd?
95
+ raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10
96
+ line << @delimiter
97
+ line << _readline
98
+ end
99
+ end
100
+ line
101
+ end
102
+
103
+ # Returns whether the end of file has been reached for this stream
104
+ def eof?
105
+ @eof && (@buffer.nil? || @buffer.empty?)
106
+ end
107
+
108
+ private
109
+
110
+ def _readline
84
111
  return if eof?
85
112
 
86
113
  # Keep reading until it finds the delimiter
@@ -89,29 +116,22 @@ module IOStreams
89
116
 
90
117
  # Delimiter found?
91
118
  if index
92
- data = @buffer.slice(0, index)
93
- @buffer = @buffer.slice(index + @delimiter_size, @buffer.size)
94
- @line_count += 1
119
+ data = @buffer.slice(0, index)
120
+ @buffer = @buffer.slice(index + @delimiter_size, @buffer.size)
121
+ @line_number += 1
95
122
  elsif @eof && @buffer.empty?
96
123
  data = nil
97
124
  @buffer = nil
98
125
  else
99
126
  # Last line without delimiter
100
- data = @buffer
101
- @buffer = nil
102
- @line_count += 1
127
+ data = @buffer
128
+ @buffer = nil
129
+ @line_number += 1
103
130
  end
104
131
 
105
132
  data
106
133
  end
107
134
 
108
- # Returns whether the end of file has been reached for this stream
109
- def eof?
110
- @eof && (@buffer.nil? || @buffer.empty?)
111
- end
112
-
113
- private
114
-
115
135
  # Returns [Integer] the number of characters read into the internal buffer
116
136
  # Returns 0 on EOF
117
137
  def read_block
@@ -32,14 +32,14 @@ module IOStreams
32
32
  # :csv, :hash, :array, :json, :psv, :fixed
33
33
  #
34
34
  # For all other parameters, see Tabular::Header.new
35
- def initialize(delimited, cleanse_header: true, **args)
35
+ def initialize(line_reader, cleanse_header: true, **args)
36
36
  @tabular = IOStreams::Tabular.new(**args)
37
- @delimited = delimited
37
+ @line_reader = line_reader
38
38
  @cleanse_header = cleanse_header
39
39
  end
40
40
 
41
41
  def each
42
- @delimited.each do |line|
42
+ @line_reader.each do |line|
43
43
  if @tabular.header?
44
44
  @tabular.parse_header(line)
45
45
  @tabular.cleanse_header! if @cleanse_header
@@ -7,19 +7,17 @@ end
7
7
  require 'uri'
8
8
  module IOStreams
9
9
  module S3
10
+ autoload :Reader, 'io_streams/s3/reader'
11
+ autoload :Writer, 'io_streams/s3/writer'
12
+
10
13
  # Sample URI: s3://mybucket/user/abc.zip
11
14
  def self.parse_uri(uri)
12
- # 's3://mybucket/user/abc.zip'
13
15
  uri = URI.parse(uri)
14
- # Filename and bucket only
15
- if uri.scheme.nil?
16
- segments = uri.path.split('/')
17
- raise "S3 URI must at the very least contain '<bucket_name>/<key>'" if (segments.size == 1) || (segments[0] == '')
18
- {
19
- bucket: segments.shift,
20
- key: segments.join('/')
21
- }
22
- end
16
+ raise "Invalid URI. Required Format: 's3://<bucket_name>/<key>'" unless uri.scheme == 's3'
17
+ {
18
+ bucket: uri.host,
19
+ key: uri.path.sub(/\A\//, '')
20
+ }
23
21
  end
24
22
  end
25
23
  end
@@ -2,63 +2,25 @@ module IOStreams
2
2
  module S3
3
3
  class Reader
4
4
  # Read from a AWS S3 file
5
- def self.open(uri = nil, bucket: nil, region: nil, key: nil, &block)
6
- options = uri.nil? ? args : parse_uri(uri).merge(args)
5
+ def self.open(uri, region: nil, **args, &block)
6
+ raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
7
+
7
8
  s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
9
+ options = IOStreams::S3.parse_uri(uri)
8
10
  object = s3.bucket(options[:bucket]).object(options[:key])
9
11
 
10
- IO.pipe do |read_io, write_io|
11
- object.get(response_target: write_io)
12
- write_io.close
13
- block.call(read_io)
14
- end
15
- end
16
-
17
- def self.open2(uri = nil, **args, &block)
18
- if !uri.nil? && IOStreams.reader_stream?(uri)
19
- raise(ArgumentError, 'S3 can only accept a URI, not an IO stream when reading.')
20
- end
21
-
22
- unless defined?(Aws::S3::Resource)
23
- begin
24
- require 'aws-sdk-s3'
25
- rescue LoadError => exc
26
- raise(LoadError, "Install gem 'aws-sdk-s3' to read and write AWS S3 files: #{exc.message}")
27
- end
28
- end
29
-
30
- options = uri.nil? ? args : parse_uri(uri).merge(args)
31
-
32
12
  begin
33
- io = new(**options)
34
- block.call(io)
35
- ensure
36
- io.close if io && (io.respond_to?(:closed?) && !io.closed?)
37
- end
38
- end
39
-
40
- def initialize(region: nil, bucket:, key:)
41
- s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
42
- @object = s3.bucket(bucket).object(key)
43
- @buffer = []
44
- end
13
+ # Since S3 download only supports a push stream, write it to a tempfile first.
14
+ temp_file = Tempfile.new('rocket_job')
45
15
 
46
- def read(length = nil, outbuf = nil)
47
- # Sufficient data already in the buffer
48
- return @buffer.slice!(0, length) if length && (length <= @buffer.length)
16
+ args[:response_target] = temp_file.to_path
17
+ object.get(args)
49
18
 
50
- # Fetch in chunks
51
- @object.get do |chunk|
52
- @buffer << chunk
53
- return @buffer.slice!(0, length) if length && (length <= @buffer.length)
19
+ block.call(temp_file)
20
+ ensure
21
+ temp_file.delete if temp_file
54
22
  end
55
- @buffer if @buffer.size > 0
56
23
  end
57
-
58
- private
59
-
60
- attr_reader :object
61
-
62
24
  end
63
25
  end
64
26
  end
@@ -2,11 +2,13 @@ module IOStreams
2
2
  module S3
3
3
  class Writer
4
4
  # Write to AWS S3
5
- def self.open(uri = nil, bucket: nil, region: nil, key: nil, &block)
6
- options = uri.nil? ? args : parse_uri(uri).merge(args)
5
+ def self.open(uri, region: nil, **args, &block)
6
+ raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
7
+
8
+ options = IOStreams::S3.parse_uri(uri)
7
9
  s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
8
10
  object = s3.bucket(options[:bucket]).object(options[:key])
9
- object.upload_stream(file_name_or_io, &block)
11
+ object.upload_stream(args, &block)
10
12
  end
11
13
  end
12
14
  end
@@ -81,15 +81,15 @@ module IOStreams
81
81
  # .gz.enc [ :gz, :enc ]
82
82
  #
83
83
  # Example Zip file:
84
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
84
+ # IOStreams.streams_for_file_name('myfile.zip')
85
85
  # => [ :zip ]
86
86
  #
87
87
  # Example Encrypted Gzip file:
88
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
88
+ # IOStreams.streams_for_file_name('myfile.csv.gz.enc')
89
89
  # => [ :gz, :enc ]
90
90
  #
91
91
  # Example plain text / binary file:
92
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
92
+ # IOStreams.streams_for_file_name('myfile.csv')
93
93
  # => [ :file ]
94
94
  def streams_for_file_name(file_name)
95
95
  raise ArgumentError.new("Cannot auto-detect streams when already a stream: #{file_name.inspect}") if reader_stream?(file_name)
@@ -1,3 +1,3 @@
1
1
  module IOStreams
2
- VERSION = '0.16.2'
2
+ VERSION = '0.17.0'
3
3
  end
@@ -16,10 +16,7 @@ module IOStreams
16
16
  autoload :Writer, 'io_streams/gzip/writer'
17
17
  end
18
18
  autoload :Pgp, 'io_streams/pgp'
19
- module S3
20
- autoload :Reader, 'io_streams/s3/reader'
21
- autoload :Writer, 'io_streams/s3/writer'
22
- end
19
+ autoload :S3, 'io_streams/s3'
23
20
  module SFTP
24
21
  autoload :Reader, 'io_streams/sftp/reader'
25
22
  autoload :Writer, 'io_streams/sftp/writer'
@@ -0,0 +1,7 @@
1
+ name, description, zip
2
+ "
3
+ Jack","Firstname is Jack","234567"
4
+ "John","Firstname
5
+ is John","234568"
6
+ "Zack","Firstname is Zack","234568
7
+ "
@@ -0,0 +1,4 @@
1
+ name, description, zip
2
+ "Jack","Firstn"ame is Jack","234567"
3
+ "John","Firstname is John","234568"
4
+ "Zack","Firstname is Zack","234568"
@@ -95,6 +95,16 @@ class IOStreamsTest < Minitest::Test
95
95
  end
96
96
  end
97
97
 
98
+ describe '.scheme_for_file_name' do
99
+ it 'default' do
100
+ assert_nil IOStreams.scheme_for_file_name('a.xyz')
101
+ end
102
+
103
+ it 's3' do
104
+ assert_equal :s3, IOStreams.scheme_for_file_name('s3://a.xyz')
105
+ end
106
+ end
107
+
98
108
  describe '.each_line' do
99
109
  it 'returns a line at a time' do
100
110
  lines = []
@@ -6,6 +6,14 @@ class LineReaderTest < Minitest::Test
6
6
  File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
7
  end
8
8
 
9
+ let :csv_file do
10
+ File.join(File.dirname(__FILE__), 'files', 'embedded_lines_test.csv')
11
+ end
12
+
13
+ let :unclosed_quote_file do
14
+ File.join(File.dirname(__FILE__), 'files', 'unclosed_quote_test.csv')
15
+ end
16
+
9
17
  let :data do
10
18
  data = []
11
19
  File.open(file_name, 'rt') do |file|
@@ -16,6 +24,47 @@ class LineReaderTest < Minitest::Test
16
24
  data
17
25
  end
18
26
 
27
+ # Test file has embedded new lines in row 2, 3 and 4
28
+ #
29
+ # name, description, zip
30
+ # "\nJack","Firstname is Jack","234567"
31
+ # "John","Firstname\n is John","234568"
32
+ # "Zack","Firstname is Zack","234568\n"
33
+ #
34
+ describe 'embedded_within_quotes' do
35
+ describe 'csv file' do
36
+
37
+ it 'fails to keep embedded lines if flag is not set' do
38
+ lines = []
39
+ IOStreams::Line::Reader.open(csv_file) do |io|
40
+ io.each do |line|
41
+ lines << line
42
+ end
43
+ end
44
+ assert_equal 7, lines.count
45
+ end
46
+
47
+ it 'keeps embedded lines if flag is set' do
48
+ lines = []
49
+ IOStreams::Line::Reader.open(csv_file, embedded_within: '"') do |io|
50
+ io.each do |line|
51
+ lines << line
52
+ end
53
+ end
54
+ assert_equal 4, lines.count
55
+ end
56
+
57
+ it 'raises error for unclosed quote' do
58
+ assert_raises(RuntimeError) do
59
+ IOStreams::Line::Reader.open(unclosed_quote_file, embedded_within: '"') do |io|
60
+ io.each do |line|
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+
19
68
  describe '#each' do
20
69
  it 'each_line file' do
21
70
  lines = []
@@ -41,7 +90,7 @@ class LineReaderTest < Minitest::Test
41
90
  it "autodetect delimiter: #{delimiter.inspect}" do
42
91
  lines = []
43
92
  stream = StringIO.new(data.join(delimiter))
44
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15) do |io|
93
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15) do |io|
45
94
  io.each { |line| lines << line }
46
95
  end
47
96
  assert_equal data, lines
@@ -51,7 +100,7 @@ class LineReaderTest < Minitest::Test
51
100
  it "single read autodetect delimiter: #{delimiter.inspect}" do
52
101
  lines = []
53
102
  stream = StringIO.new(data.join(delimiter))
54
- count = IOStreams::Line::Reader.open(stream) do |io|
103
+ count = IOStreams::Line::Reader.open(stream) do |io|
55
104
  io.each { |line| lines << line }
56
105
  end
57
106
  assert_equal data, lines
@@ -63,7 +112,7 @@ class LineReaderTest < Minitest::Test
63
112
  it "reads delimited #{delimiter.inspect}" do
64
113
  lines = []
65
114
  stream = StringIO.new(data.join(delimiter))
66
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
115
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
67
116
  io.each { |line| lines << line }
68
117
  end
69
118
  assert_equal data, lines
@@ -75,7 +124,7 @@ class LineReaderTest < Minitest::Test
75
124
  delimiter = "\x01"
76
125
  lines = []
77
126
  stream = StringIO.new(data.join(delimiter).encode('ASCII-8BIT'))
78
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
127
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
79
128
  io.each { |line| lines << line }
80
129
  end
81
130
  assert_equal data, lines
@@ -0,0 +1,41 @@
1
+ require_relative 'test_helper'
2
+
3
+ class S3ReaderTest < Minitest::Test
4
+ describe IOStreams::File::Reader do
5
+ let :file_name do
6
+ File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
+ end
8
+
9
+ let :raw do
10
+ File.read(file_name)
11
+ end
12
+
13
+ let :uri do
14
+ "s3://#{ENV['S3_BUCKET_NAME']}/s3_test/test.txt"
15
+ end
16
+
17
+ let :upload_s3_file do
18
+ IOStreams::S3::Writer.open(uri) { |io| io << raw }
19
+ end
20
+
21
+ describe '.open' do
22
+ it 'reads' do
23
+ unless ENV['S3_BUCKET_NAME']
24
+ skip "Supply 'S3_BUCKET_NAME' environment variable with S3 bucket name to test with"
25
+ end
26
+
27
+ upload_s3_file
28
+ result = IOStreams::S3::Reader.open(uri) { |io| io.read }
29
+ assert_equal raw, result
30
+ end
31
+
32
+ it 'does not support streams' do
33
+ io_string = StringIO.new('data')
34
+ assert_raises ArgumentError do
35
+ IOStreams::S3::Reader.open(io_string) { |io| io.read }
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'test_helper'
2
+
3
+ class FileWriterTest < Minitest::Test
4
+ describe IOStreams::File::Writer do
5
+ let :file_name do
6
+ File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
+ end
8
+
9
+ let :raw do
10
+ File.read(file_name)
11
+ end
12
+
13
+ let :uri do
14
+ "s3://#{ENV['S3_BUCKET_NAME']}/s3_test/test.txt"
15
+ end
16
+
17
+ let :upload_s3_file do
18
+ IOStreams::S3::Writer.open(uri) { |io| io << raw }
19
+ end
20
+
21
+ describe '.open' do
22
+ it 'writes' do
23
+ unless ENV['S3_BUCKET_NAME']
24
+ skip "Supply 'S3_BUCKET_NAME' environment variable with S3 bucket name to test with"
25
+ end
26
+
27
+ IOStreams::S3::Writer.open(uri) { |io| io.write(raw) }
28
+ result = IOStreams::S3::Reader.open(uri) { |io| io.read }
29
+ assert_equal raw, result
30
+ end
31
+
32
+ it 'does not support streams' do
33
+ io_string = StringIO.new
34
+ assert_raises ArgumentError do
35
+ IOStreams::S3::Writer.open(io_string) { |io| io.write(raw) }
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iostreams
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.2
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-11 00:00:00.000000000 Z
11
+ date: 2019-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -80,6 +80,7 @@ files:
80
80
  - test/encode_writer_test.rb
81
81
  - test/file_reader_test.rb
82
82
  - test/file_writer_test.rb
83
+ - test/files/embedded_lines_test.csv
83
84
  - test/files/spreadsheet.xlsx
84
85
  - test/files/test.csv
85
86
  - test/files/test.json
@@ -88,6 +89,7 @@ files:
88
89
  - test/files/text.txt.gz
89
90
  - test/files/text.txt.gz.zip
90
91
  - test/files/text.zip
92
+ - test/files/unclosed_quote_test.csv
91
93
  - test/gzip_reader_test.rb
92
94
  - test/gzip_writer_test.rb
93
95
  - test/io_streams_test.rb
@@ -100,6 +102,8 @@ files:
100
102
  - test/record_writer_test.rb
101
103
  - test/row_reader_test.rb
102
104
  - test/row_writer_test.rb
105
+ - test/s3_reader_test.rb
106
+ - test/s3_writer_test.rb
103
107
  - test/tabular_test.rb
104
108
  - test/test_helper.rb
105
109
  - test/xlsx_reader_test.rb
@@ -139,6 +143,7 @@ test_files:
139
143
  - test/gzip_writer_test.rb
140
144
  - test/file_reader_test.rb
141
145
  - test/record_reader_test.rb
146
+ - test/s3_writer_test.rb
142
147
  - test/pgp_writer_test.rb
143
148
  - test/line_writer_test.rb
144
149
  - test/row_reader_test.rb
@@ -146,8 +151,10 @@ test_files:
146
151
  - test/zip_writer_test.rb
147
152
  - test/files/text.zip
148
153
  - test/files/spreadsheet.xlsx
154
+ - test/files/embedded_lines_test.csv
149
155
  - test/files/test.csv
150
156
  - test/files/test.json
157
+ - test/files/unclosed_quote_test.csv
151
158
  - test/files/text.txt.bz2
152
159
  - test/files/text.txt.gz.zip
153
160
  - test/files/text.txt.gz
@@ -160,3 +167,4 @@ test_files:
160
167
  - test/pgp_test.rb
161
168
  - test/io_streams_test.rb
162
169
  - test/record_writer_test.rb
170
+ - test/s3_reader_test.rb