iostreams 0.16.2 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e9a14b746c83e98c98950f8fe1086e689383e997a2f62688272f419d0a144c36
4
- data.tar.gz: 61c6da8d61da48d205f8537bd0a11b0b0cf4a1160ab4e00247f3cea507540072
3
+ metadata.gz: afb047a3682e7cbb9d953c303ebf2f525ff9975248ac550b6eded486ff73a72b
4
+ data.tar.gz: b14b13f5c23663b1b173d4ee767a93ea74b381f4c89ae5b44e4610bfbe4fabe4
5
5
  SHA512:
6
- metadata.gz: 9736cb2bacdb162120a9bd6febe300760c34ea70148a62352244d33ab9142dead20cdc600242f8304ba460fcc9f4364dd2fdc8588ab4fda2dd34802e302037d8
7
- data.tar.gz: 64a012432f793855f216be83b6522fe8301eff9f02fdc967782fb5488b85b6df630c44f6c4aefb4541b76b9a8f518856af2315b64ff733516c3b73b2451341f4
6
+ metadata.gz: 00a5fa08bf88bbcc9cc3c1418f90d2dd55c3da0b129cd472ef571e27f0dec653214f2f1780069801e8fd87c676c70e097fd79e86085fcbd63e3def501f779ec5
7
+ data.tar.gz: e193d7dacbd65625c9159fb61d13d3d0b005bd40e373047e2ece791c91c5c1798c881a398c0246fda8c424bd875dead9be3c5d3cc00765b0b2977d3decf3a08d
@@ -70,6 +70,15 @@ module IOStreams
70
70
  end
71
71
 
72
72
  # Iterate over a file / stream returning one line at a time.
73
+ # Embedded lines (within double quotes) will be skipped if
74
+ # 1. The file name contains .csv
75
+ # 2. Or the embedded_within argument is set
76
+ #
77
+ # Example: Supply custom options
78
+ # IOStreams.each_line(file_name, embedded_within: '"') do |line|
79
+ # puts line
80
+ # end
81
+ #
73
82
  def self.each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
74
83
  line_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |line_stream|
75
84
  line_stream.each(&block)
@@ -77,6 +86,15 @@ module IOStreams
77
86
  end
78
87
 
79
88
  # Iterate over a file / stream returning one line at a time.
89
+ # Embedded lines (within double quotes) will be skipped if
90
+ # 1. The file name contains .csv
91
+ # 2. Or the embedded_within argument is set
92
+ #
93
+ # Example: Supply custom options
94
+ # IOStreams.each_row(file_name, embedded_within: '"') do |line|
95
+ # puts line
96
+ # end
97
+ #
80
98
  def self.each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
81
99
  row_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |row_stream|
82
100
  row_stream.each(&block)
@@ -90,6 +108,15 @@ module IOStreams
90
108
  # Each record / line is returned one at a time so that very large files
91
109
  # can be read without having to load the entire file into memory.
92
110
  #
111
+ # Embedded lines (within double quotes) will be skipped if
112
+ # 1. The file name contains .csv
113
+ # 2. Or the embedded_within argument is set
114
+ #
115
+ # Example: Supply custom options
116
+ # IOStreams.each_record(file_name, embedded_within: '"') do |line|
117
+ # puts line
118
+ # end
119
+ #
93
120
  # Example:
94
121
  # file_name = 'customer_data.csv.pgp'
95
122
  # IOStreams.each_record(file_name) do |hash|
@@ -291,20 +318,21 @@ module IOStreams
291
318
  # its extension(s)
292
319
  #
293
320
  # Example Zip file:
294
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
321
+ # IOStreams.streams_for_file_name('myfile.zip')
295
322
  # => [ :zip ]
296
323
  #
297
324
  # Example Encrypted Gzip file:
298
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
325
+ # IOStreams.streams_for_file_name('myfile.csv.gz.enc')
299
326
  # => [ :gz, :enc ]
300
327
  #
301
328
  # Example plain text / binary file:
302
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
303
- # => [ :file ]
329
+ # IOStreams.streams_for_file_name('myfile.csv')
330
+ # => []
304
331
  def self.streams_for_file_name(file_name)
305
332
  raise ArgumentError.new('File name cannot be nil') if file_name.nil?
306
333
  raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
307
- parts = file_name.split('.')
334
+
335
+ parts = ::File.basename(file_name).split('.')
308
336
  extensions = []
309
337
  while extension = parts.pop
310
338
  sym = extension.downcase.to_sym
@@ -314,12 +342,31 @@ module IOStreams
314
342
  extensions
315
343
  end
316
344
 
345
+ # Extract URI if any was supplied
346
+ def self.scheme_for_file_name(file_name)
347
+ raise ArgumentError.new('File name cannot be nil') if file_name.nil?
348
+ raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
349
+
350
+ if matches = file_name.match(/\A(\w+):\/\//)
351
+ matches[1].downcase.to_sym
352
+ end
353
+ end
354
+
317
355
  # Iterate over a file / stream returning each record/line one at a time.
318
- def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
356
+ # It will apply the embedded_within argument if the file or input_stream contain .csv in its name.
357
+ def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, embedded_within: nil, **args, &block)
358
+
319
359
  return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) || file_name_or_io.is_a?(Array)
320
360
 
361
+ # TODO: needs to be improved
362
+ if embedded_within.nil? && file_name_or_io.is_a?(String)
363
+ embedded_within = '"' if file_name_or_io.include?('.csv')
364
+ elsif embedded_within.nil? && file_name
365
+ embedded_within = '"' if file_name.include?('.csv')
366
+ end
367
+
321
368
  reader(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
322
- IOStreams::Line::Reader.open(io, **args, &block)
369
+ IOStreams::Line::Reader.open(io, embedded_within: embedded_within, **args, &block)
323
370
  end
324
371
  end
325
372
 
@@ -331,6 +378,7 @@ module IOStreams
331
378
  encoding: nil,
332
379
  encode_cleaner: nil,
333
380
  encode_replace: nil,
381
+ embedded_within: nil,
334
382
  **args,
335
383
  &block)
336
384
 
@@ -338,12 +386,13 @@ module IOStreams
338
386
 
339
387
  line_reader(
340
388
  file_name_or_io,
341
- streams: streams,
342
- delimiter: delimiter,
343
- file_name: file_name,
344
- encoding: encoding,
345
- encode_cleaner: encode_cleaner,
346
- encode_replace: encode_replace
389
+ streams: streams,
390
+ delimiter: delimiter,
391
+ file_name: file_name,
392
+ encoding: encoding,
393
+ encode_cleaner: encode_cleaner,
394
+ encode_replace: encode_replace,
395
+ embedded_within: embedded_within
347
396
  ) do |io|
348
397
  file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
349
398
  IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
@@ -358,21 +407,23 @@ module IOStreams
358
407
  encoding: nil,
359
408
  encode_cleaner: nil,
360
409
  encode_replace: nil,
410
+ embedded_within: nil,
361
411
  **args,
362
412
  &block)
363
413
 
364
414
  return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
365
415
 
366
- line_reader(
367
- file_name_or_io,
368
- streams: streams,
369
- delimiter: delimiter,
370
- file_name: file_name,
371
- encoding: encoding,
372
- encode_cleaner: encode_cleaner,
373
- encode_replace: encode_replace
416
+ line_reader(file_name_or_io,
417
+ streams: streams,
418
+ delimiter: delimiter,
419
+ file_name: file_name,
420
+ encoding: encoding,
421
+ encode_cleaner: encode_cleaner,
422
+ encode_replace: encode_replace,
423
+ embedded_within: embedded_within
374
424
  ) do |io|
375
425
 
426
+
376
427
  file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
377
428
  IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
378
429
  end
@@ -401,6 +452,16 @@ module IOStreams
401
452
  @extensions.delete(extension.to_sym)
402
453
  end
403
454
 
455
+ # Register a file extension and the reader and writer streaming classes
456
+ #
457
+ # Example:
458
+ # # MyXls::Reader and MyXls::Writer must implement .open
459
+ # register_extension(:xls, MyXls::Reader, MyXls::Writer)
460
+ def self.register_scheme(scheme, reader_class, writer_class)
461
+ raise(ArgumentError, "Invalid scheme #{scheme.inspect}") unless scheme.nil? || scheme.to_s =~ /\A\w+\Z/
462
+ @schemes[scheme.nil? ? nil : scheme.to_sym] = Extension.new(reader_class, writer_class)
463
+ end
464
+
404
465
  # Helper method: Returns [true|false] if a value is blank?
405
466
  def self.blank?(value)
406
467
  if value.nil?
@@ -416,6 +477,7 @@ module IOStreams
416
477
 
417
478
  # A registry to hold formats for processing files during upload or download
418
479
  @extensions = {}
480
+ @schemes = {}
419
481
 
420
482
  # Struct to hold the Stream and options if any
421
483
  StreamStruct = Struct.new(:klass, :options)
@@ -438,8 +500,10 @@ module IOStreams
438
500
  if streams.nil?
439
501
  streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
440
502
  end
503
+ scheme = scheme_for_file_name(file_name_or_io) if file_name_or_io.is_a?(String)
441
504
 
442
505
  stream_structs = streams_for(type, streams)
506
+ stream_structs << stream_struct_for_scheme(type, scheme) if stream_structs.empty? || scheme
443
507
 
444
508
  # Add encoding stream if any of its options are present
445
509
  if encoding || encode_cleaner || encode_replace
@@ -466,7 +530,6 @@ module IOStreams
466
530
  if params.is_a?(Symbol)
467
531
  [stream_struct_for_stream(type, params)]
468
532
  elsif params.is_a?(Array)
469
- return [stream_struct_for_stream(type, nil)] if params.empty?
470
533
  a = []
471
534
  params.each do |stream|
472
535
  if stream.is_a?(Hash)
@@ -491,8 +554,14 @@ module IOStreams
491
554
  StreamStruct.new(klass, options)
492
555
  end
493
556
 
557
+ def self.stream_struct_for_scheme(type, scheme, options = {})
558
+ ext = @schemes[scheme.nil? ? nil : scheme.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme.inspect}")
559
+ klass = ext.send("#{type}_class")
560
+ StreamStruct.new(klass, options)
561
+ end
562
+
494
563
  # Default reader/writer when no other streams need to be applied.
495
- register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
564
+ # register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
496
565
 
497
566
  # Register File extensions
498
567
  register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
@@ -510,10 +579,17 @@ module IOStreams
510
579
  register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
511
580
  end
512
581
 
513
- # register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
514
- # register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
582
+ # Support URI schemes
583
+ #
584
+ # Examples:
585
+ # path/file_name
586
+ # http://hostname/path/file_name
587
+ # https://hostname/path/file_name
588
+ # sftp://hostname/path/file_name
589
+ # s3://bucket/key
590
+ register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
515
591
  # register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
516
592
  # register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
517
593
  # register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
518
- # register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
594
+ register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
519
595
  end
@@ -1,7 +1,7 @@
1
1
  module IOStreams
2
2
  module Line
3
3
  class Reader
4
- attr_reader :delimiter, :buffer_size, :line_count
4
+ attr_reader :delimiter, :buffer_size, :line_number
5
5
 
6
6
  # Prevent denial of service when a delimiter is not found before this number * `buffer_size` characters are read.
7
7
  MAX_BLOCKS_MULTIPLIER = 100
@@ -20,7 +20,7 @@ module IOStreams
20
20
  # Create a delimited stream reader from the supplied input stream.
21
21
  #
22
22
  # Lines returned will be in the encoding of the input stream.
23
- # To change the encoding of retruned lines, use IOStreams::Encode::Reader.
23
+ # To change the encoding of returned lines, use IOStreams::Encode::Reader.
24
24
  #
25
25
  # Parameters
26
26
  # input_stream
@@ -45,14 +45,15 @@ module IOStreams
45
45
  # - Skip "empty" / "blank" lines. RegExp?
46
46
  # - Extract header line(s) / first non-comment, non-blank line
47
47
  # - Embedded newline support, RegExp? or Proc?
48
- def initialize(input_stream, delimiter: nil, buffer_size: 65_536)
49
- @input_stream = input_stream
50
- @buffer_size = buffer_size
48
+ def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil)
49
+ @embedded_within = embedded_within
50
+ @input_stream = input_stream
51
+ @buffer_size = buffer_size
51
52
 
52
53
  # More efficient read buffering only supported when the input stream `#read` method supports it.
53
54
  @use_read_cache_buffer = !@input_stream.method(:read).arity.between?(0, 1)
54
55
 
55
- @line_count = 0
56
+ @line_number = 0
56
57
  @eof = false
57
58
  @read_cache_buffer = nil
58
59
  @buffer = nil
@@ -73,14 +74,40 @@ module IOStreams
73
74
  # Note:
74
75
  # * The line delimiter is _not_ returned.
75
76
  def each
77
+ line_count = 0
76
78
  until eof?
77
79
  line = readline
78
- yield(line) unless line.nil?
80
+ unless line.nil?
81
+ yield(line)
82
+ line_count += 1
83
+ end
79
84
  end
80
85
  line_count
81
86
  end
82
87
 
88
+ # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes.
89
+ # The embedded_within argument is set in IOStreams::LineReader
83
90
  def readline
91
+ line = _readline
92
+ if line && @embedded_within
93
+ initial_line_number = @line_number
94
+ while line.count(@embedded_within).odd?
95
+ raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10
96
+ line << @delimiter
97
+ line << _readline
98
+ end
99
+ end
100
+ line
101
+ end
102
+
103
+ # Returns whether the end of file has been reached for this stream
104
+ def eof?
105
+ @eof && (@buffer.nil? || @buffer.empty?)
106
+ end
107
+
108
+ private
109
+
110
+ def _readline
84
111
  return if eof?
85
112
 
86
113
  # Keep reading until it finds the delimiter
@@ -89,29 +116,22 @@ module IOStreams
89
116
 
90
117
  # Delimiter found?
91
118
  if index
92
- data = @buffer.slice(0, index)
93
- @buffer = @buffer.slice(index + @delimiter_size, @buffer.size)
94
- @line_count += 1
119
+ data = @buffer.slice(0, index)
120
+ @buffer = @buffer.slice(index + @delimiter_size, @buffer.size)
121
+ @line_number += 1
95
122
  elsif @eof && @buffer.empty?
96
123
  data = nil
97
124
  @buffer = nil
98
125
  else
99
126
  # Last line without delimiter
100
- data = @buffer
101
- @buffer = nil
102
- @line_count += 1
127
+ data = @buffer
128
+ @buffer = nil
129
+ @line_number += 1
103
130
  end
104
131
 
105
132
  data
106
133
  end
107
134
 
108
- # Returns whether the end of file has been reached for this stream
109
- def eof?
110
- @eof && (@buffer.nil? || @buffer.empty?)
111
- end
112
-
113
- private
114
-
115
135
  # Returns [Integer] the number of characters read into the internal buffer
116
136
  # Returns 0 on EOF
117
137
  def read_block
@@ -32,14 +32,14 @@ module IOStreams
32
32
  # :csv, :hash, :array, :json, :psv, :fixed
33
33
  #
34
34
  # For all other parameters, see Tabular::Header.new
35
- def initialize(delimited, cleanse_header: true, **args)
35
+ def initialize(line_reader, cleanse_header: true, **args)
36
36
  @tabular = IOStreams::Tabular.new(**args)
37
- @delimited = delimited
37
+ @line_reader = line_reader
38
38
  @cleanse_header = cleanse_header
39
39
  end
40
40
 
41
41
  def each
42
- @delimited.each do |line|
42
+ @line_reader.each do |line|
43
43
  if @tabular.header?
44
44
  @tabular.parse_header(line)
45
45
  @tabular.cleanse_header! if @cleanse_header
@@ -7,19 +7,17 @@ end
7
7
  require 'uri'
8
8
  module IOStreams
9
9
  module S3
10
+ autoload :Reader, 'io_streams/s3/reader'
11
+ autoload :Writer, 'io_streams/s3/writer'
12
+
10
13
  # Sample URI: s3://mybucket/user/abc.zip
11
14
  def self.parse_uri(uri)
12
- # 's3://mybucket/user/abc.zip'
13
15
  uri = URI.parse(uri)
14
- # Filename and bucket only
15
- if uri.scheme.nil?
16
- segments = uri.path.split('/')
17
- raise "S3 URI must at the very least contain '<bucket_name>/<key>'" if (segments.size == 1) || (segments[0] == '')
18
- {
19
- bucket: segments.shift,
20
- key: segments.join('/')
21
- }
22
- end
16
+ raise "Invalid URI. Required Format: 's3://<bucket_name>/<key>'" unless uri.scheme == 's3'
17
+ {
18
+ bucket: uri.host,
19
+ key: uri.path.sub(/\A\//, '')
20
+ }
23
21
  end
24
22
  end
25
23
  end
@@ -2,63 +2,25 @@ module IOStreams
2
2
  module S3
3
3
  class Reader
4
4
  # Read from a AWS S3 file
5
- def self.open(uri = nil, bucket: nil, region: nil, key: nil, &block)
6
- options = uri.nil? ? args : parse_uri(uri).merge(args)
5
+ def self.open(uri, region: nil, **args, &block)
6
+ raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
7
+
7
8
  s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
9
+ options = IOStreams::S3.parse_uri(uri)
8
10
  object = s3.bucket(options[:bucket]).object(options[:key])
9
11
 
10
- IO.pipe do |read_io, write_io|
11
- object.get(response_target: write_io)
12
- write_io.close
13
- block.call(read_io)
14
- end
15
- end
16
-
17
- def self.open2(uri = nil, **args, &block)
18
- if !uri.nil? && IOStreams.reader_stream?(uri)
19
- raise(ArgumentError, 'S3 can only accept a URI, not an IO stream when reading.')
20
- end
21
-
22
- unless defined?(Aws::S3::Resource)
23
- begin
24
- require 'aws-sdk-s3'
25
- rescue LoadError => exc
26
- raise(LoadError, "Install gem 'aws-sdk-s3' to read and write AWS S3 files: #{exc.message}")
27
- end
28
- end
29
-
30
- options = uri.nil? ? args : parse_uri(uri).merge(args)
31
-
32
12
  begin
33
- io = new(**options)
34
- block.call(io)
35
- ensure
36
- io.close if io && (io.respond_to?(:closed?) && !io.closed?)
37
- end
38
- end
39
-
40
- def initialize(region: nil, bucket:, key:)
41
- s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
42
- @object = s3.bucket(bucket).object(key)
43
- @buffer = []
44
- end
13
+ # Since S3 download only supports a push stream, write it to a tempfile first.
14
+ temp_file = Tempfile.new('rocket_job')
45
15
 
46
- def read(length = nil, outbuf = nil)
47
- # Sufficient data already in the buffer
48
- return @buffer.slice!(0, length) if length && (length <= @buffer.length)
16
+ args[:response_target] = temp_file.to_path
17
+ object.get(args)
49
18
 
50
- # Fetch in chunks
51
- @object.get do |chunk|
52
- @buffer << chunk
53
- return @buffer.slice!(0, length) if length && (length <= @buffer.length)
19
+ block.call(temp_file)
20
+ ensure
21
+ temp_file.delete if temp_file
54
22
  end
55
- @buffer if @buffer.size > 0
56
23
  end
57
-
58
- private
59
-
60
- attr_reader :object
61
-
62
24
  end
63
25
  end
64
26
  end
@@ -2,11 +2,13 @@ module IOStreams
2
2
  module S3
3
3
  class Writer
4
4
  # Write to AWS S3
5
- def self.open(uri = nil, bucket: nil, region: nil, key: nil, &block)
6
- options = uri.nil? ? args : parse_uri(uri).merge(args)
5
+ def self.open(uri, region: nil, **args, &block)
6
+ raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
7
+
8
+ options = IOStreams::S3.parse_uri(uri)
7
9
  s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
8
10
  object = s3.bucket(options[:bucket]).object(options[:key])
9
- object.upload_stream(file_name_or_io, &block)
11
+ object.upload_stream(args, &block)
10
12
  end
11
13
  end
12
14
  end
@@ -81,15 +81,15 @@ module IOStreams
81
81
  # .gz.enc [ :gz, :enc ]
82
82
  #
83
83
  # Example Zip file:
84
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
84
+ # IOStreams.streams_for_file_name('myfile.zip')
85
85
  # => [ :zip ]
86
86
  #
87
87
  # Example Encrypted Gzip file:
88
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
88
+ # IOStreams.streams_for_file_name('myfile.csv.gz.enc')
89
89
  # => [ :gz, :enc ]
90
90
  #
91
91
  # Example plain text / binary file:
92
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
92
+ # IOStreams.streams_for_file_name('myfile.csv')
93
93
  # => [ :file ]
94
94
  def streams_for_file_name(file_name)
95
95
  raise ArgumentError.new("Cannot auto-detect streams when already a stream: #{file_name.inspect}") if reader_stream?(file_name)
@@ -1,3 +1,3 @@
1
1
  module IOStreams
2
- VERSION = '0.16.2'
2
+ VERSION = '0.17.0'
3
3
  end
@@ -16,10 +16,7 @@ module IOStreams
16
16
  autoload :Writer, 'io_streams/gzip/writer'
17
17
  end
18
18
  autoload :Pgp, 'io_streams/pgp'
19
- module S3
20
- autoload :Reader, 'io_streams/s3/reader'
21
- autoload :Writer, 'io_streams/s3/writer'
22
- end
19
+ autoload :S3, 'io_streams/s3'
23
20
  module SFTP
24
21
  autoload :Reader, 'io_streams/sftp/reader'
25
22
  autoload :Writer, 'io_streams/sftp/writer'
@@ -0,0 +1,7 @@
1
+ name, description, zip
2
+ "
3
+ Jack","Firstname is Jack","234567"
4
+ "John","Firstname
5
+ is John","234568"
6
+ "Zack","Firstname is Zack","234568
7
+ "
@@ -0,0 +1,4 @@
1
+ name, description, zip
2
+ "Jack","Firstn"ame is Jack","234567"
3
+ "John","Firstname is John","234568"
4
+ "Zack","Firstname is Zack","234568"
@@ -95,6 +95,16 @@ class IOStreamsTest < Minitest::Test
95
95
  end
96
96
  end
97
97
 
98
+ describe '.scheme_for_file_name' do
99
+ it 'default' do
100
+ assert_nil IOStreams.scheme_for_file_name('a.xyz')
101
+ end
102
+
103
+ it 's3' do
104
+ assert_equal :s3, IOStreams.scheme_for_file_name('s3://a.xyz')
105
+ end
106
+ end
107
+
98
108
  describe '.each_line' do
99
109
  it 'returns a line at a time' do
100
110
  lines = []
@@ -6,6 +6,14 @@ class LineReaderTest < Minitest::Test
6
6
  File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
7
  end
8
8
 
9
+ let :csv_file do
10
+ File.join(File.dirname(__FILE__), 'files', 'embedded_lines_test.csv')
11
+ end
12
+
13
+ let :unclosed_quote_file do
14
+ File.join(File.dirname(__FILE__), 'files', 'unclosed_quote_test.csv')
15
+ end
16
+
9
17
  let :data do
10
18
  data = []
11
19
  File.open(file_name, 'rt') do |file|
@@ -16,6 +24,47 @@ class LineReaderTest < Minitest::Test
16
24
  data
17
25
  end
18
26
 
27
+ # Test file has embedded new lines in row 2, 3 and 4
28
+ #
29
+ # name, description, zip
30
+ # "\nJack","Firstname is Jack","234567"
31
+ # "John","Firstname\n is John","234568"
32
+ # "Zack","Firstname is Zack","234568\n"
33
+ #
34
+ describe 'embedded_within_quotes' do
35
+ describe 'csv file' do
36
+
37
+ it 'fails to keep embedded lines if flag is not set' do
38
+ lines = []
39
+ IOStreams::Line::Reader.open(csv_file) do |io|
40
+ io.each do |line|
41
+ lines << line
42
+ end
43
+ end
44
+ assert_equal 7, lines.count
45
+ end
46
+
47
+ it 'keeps embedded lines if flag is set' do
48
+ lines = []
49
+ IOStreams::Line::Reader.open(csv_file, embedded_within: '"') do |io|
50
+ io.each do |line|
51
+ lines << line
52
+ end
53
+ end
54
+ assert_equal 4, lines.count
55
+ end
56
+
57
+ it 'raises error for unclosed quote' do
58
+ assert_raises(RuntimeError) do
59
+ IOStreams::Line::Reader.open(unclosed_quote_file, embedded_within: '"') do |io|
60
+ io.each do |line|
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+
19
68
  describe '#each' do
20
69
  it 'each_line file' do
21
70
  lines = []
@@ -41,7 +90,7 @@ class LineReaderTest < Minitest::Test
41
90
  it "autodetect delimiter: #{delimiter.inspect}" do
42
91
  lines = []
43
92
  stream = StringIO.new(data.join(delimiter))
44
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15) do |io|
93
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15) do |io|
45
94
  io.each { |line| lines << line }
46
95
  end
47
96
  assert_equal data, lines
@@ -51,7 +100,7 @@ class LineReaderTest < Minitest::Test
51
100
  it "single read autodetect delimiter: #{delimiter.inspect}" do
52
101
  lines = []
53
102
  stream = StringIO.new(data.join(delimiter))
54
- count = IOStreams::Line::Reader.open(stream) do |io|
103
+ count = IOStreams::Line::Reader.open(stream) do |io|
55
104
  io.each { |line| lines << line }
56
105
  end
57
106
  assert_equal data, lines
@@ -63,7 +112,7 @@ class LineReaderTest < Minitest::Test
63
112
  it "reads delimited #{delimiter.inspect}" do
64
113
  lines = []
65
114
  stream = StringIO.new(data.join(delimiter))
66
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
115
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
67
116
  io.each { |line| lines << line }
68
117
  end
69
118
  assert_equal data, lines
@@ -75,7 +124,7 @@ class LineReaderTest < Minitest::Test
75
124
  delimiter = "\x01"
76
125
  lines = []
77
126
  stream = StringIO.new(data.join(delimiter).encode('ASCII-8BIT'))
78
- count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
127
+ count = IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
79
128
  io.each { |line| lines << line }
80
129
  end
81
130
  assert_equal data, lines
@@ -0,0 +1,41 @@
1
+ require_relative 'test_helper'
2
+
3
+ class S3ReaderTest < Minitest::Test
4
+ describe IOStreams::File::Reader do
5
+ let :file_name do
6
+ File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
+ end
8
+
9
+ let :raw do
10
+ File.read(file_name)
11
+ end
12
+
13
+ let :uri do
14
+ "s3://#{ENV['S3_BUCKET_NAME']}/s3_test/test.txt"
15
+ end
16
+
17
+ let :upload_s3_file do
18
+ IOStreams::S3::Writer.open(uri) { |io| io << raw }
19
+ end
20
+
21
+ describe '.open' do
22
+ it 'reads' do
23
+ unless ENV['S3_BUCKET_NAME']
24
+ skip "Supply 'S3_BUCKET_NAME' environment variable with S3 bucket name to test with"
25
+ end
26
+
27
+ upload_s3_file
28
+ result = IOStreams::S3::Reader.open(uri) { |io| io.read }
29
+ assert_equal raw, result
30
+ end
31
+
32
+ it 'does not support streams' do
33
+ io_string = StringIO.new('data')
34
+ assert_raises ArgumentError do
35
+ IOStreams::S3::Reader.open(io_string) { |io| io.read }
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'test_helper'
2
+
3
+ class FileWriterTest < Minitest::Test
4
+ describe IOStreams::File::Writer do
5
+ let :file_name do
6
+ File.join(File.dirname(__FILE__), 'files', 'text.txt')
7
+ end
8
+
9
+ let :raw do
10
+ File.read(file_name)
11
+ end
12
+
13
+ let :uri do
14
+ "s3://#{ENV['S3_BUCKET_NAME']}/s3_test/test.txt"
15
+ end
16
+
17
+ let :upload_s3_file do
18
+ IOStreams::S3::Writer.open(uri) { |io| io << raw }
19
+ end
20
+
21
+ describe '.open' do
22
+ it 'writes' do
23
+ unless ENV['S3_BUCKET_NAME']
24
+ skip "Supply 'S3_BUCKET_NAME' environment variable with S3 bucket name to test with"
25
+ end
26
+
27
+ IOStreams::S3::Writer.open(uri) { |io| io.write(raw) }
28
+ result = IOStreams::S3::Reader.open(uri) { |io| io.read }
29
+ assert_equal raw, result
30
+ end
31
+
32
+ it 'does not support streams' do
33
+ io_string = StringIO.new
34
+ assert_raises ArgumentError do
35
+ IOStreams::S3::Writer.open(io_string) { |io| io.write(raw) }
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iostreams
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.2
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-11 00:00:00.000000000 Z
11
+ date: 2019-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -80,6 +80,7 @@ files:
80
80
  - test/encode_writer_test.rb
81
81
  - test/file_reader_test.rb
82
82
  - test/file_writer_test.rb
83
+ - test/files/embedded_lines_test.csv
83
84
  - test/files/spreadsheet.xlsx
84
85
  - test/files/test.csv
85
86
  - test/files/test.json
@@ -88,6 +89,7 @@ files:
88
89
  - test/files/text.txt.gz
89
90
  - test/files/text.txt.gz.zip
90
91
  - test/files/text.zip
92
+ - test/files/unclosed_quote_test.csv
91
93
  - test/gzip_reader_test.rb
92
94
  - test/gzip_writer_test.rb
93
95
  - test/io_streams_test.rb
@@ -100,6 +102,8 @@ files:
100
102
  - test/record_writer_test.rb
101
103
  - test/row_reader_test.rb
102
104
  - test/row_writer_test.rb
105
+ - test/s3_reader_test.rb
106
+ - test/s3_writer_test.rb
103
107
  - test/tabular_test.rb
104
108
  - test/test_helper.rb
105
109
  - test/xlsx_reader_test.rb
@@ -139,6 +143,7 @@ test_files:
139
143
  - test/gzip_writer_test.rb
140
144
  - test/file_reader_test.rb
141
145
  - test/record_reader_test.rb
146
+ - test/s3_writer_test.rb
142
147
  - test/pgp_writer_test.rb
143
148
  - test/line_writer_test.rb
144
149
  - test/row_reader_test.rb
@@ -146,8 +151,10 @@ test_files:
146
151
  - test/zip_writer_test.rb
147
152
  - test/files/text.zip
148
153
  - test/files/spreadsheet.xlsx
154
+ - test/files/embedded_lines_test.csv
149
155
  - test/files/test.csv
150
156
  - test/files/test.json
157
+ - test/files/unclosed_quote_test.csv
151
158
  - test/files/text.txt.bz2
152
159
  - test/files/text.txt.gz.zip
153
160
  - test/files/text.txt.gz
@@ -160,3 +167,4 @@ test_files:
160
167
  - test/pgp_test.rb
161
168
  - test/io_streams_test.rb
162
169
  - test/record_writer_test.rb
170
+ - test/s3_reader_test.rb