iostreams 0.20.3 → 1.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/lib/io_streams/bzip2/reader.rb +9 -21
  3. data/lib/io_streams/bzip2/writer.rb +9 -21
  4. data/lib/io_streams/deprecated.rb +217 -0
  5. data/lib/io_streams/encode/reader.rb +12 -16
  6. data/lib/io_streams/encode/writer.rb +9 -13
  7. data/lib/io_streams/errors.rb +6 -6
  8. data/lib/io_streams/gzip/reader.rb +7 -14
  9. data/lib/io_streams/gzip/writer.rb +7 -15
  10. data/lib/io_streams/io_streams.rb +182 -524
  11. data/lib/io_streams/line/reader.rb +9 -9
  12. data/lib/io_streams/line/writer.rb +10 -11
  13. data/lib/io_streams/path.rb +190 -0
  14. data/lib/io_streams/paths/file.rb +176 -0
  15. data/lib/io_streams/paths/http.rb +92 -0
  16. data/lib/io_streams/paths/matcher.rb +61 -0
  17. data/lib/io_streams/paths/s3.rb +269 -0
  18. data/lib/io_streams/paths/sftp.rb +99 -0
  19. data/lib/io_streams/pgp.rb +47 -19
  20. data/lib/io_streams/pgp/reader.rb +20 -28
  21. data/lib/io_streams/pgp/writer.rb +24 -46
  22. data/lib/io_streams/reader.rb +28 -0
  23. data/lib/io_streams/record/reader.rb +20 -16
  24. data/lib/io_streams/record/writer.rb +28 -28
  25. data/lib/io_streams/row/reader.rb +22 -26
  26. data/lib/io_streams/row/writer.rb +29 -28
  27. data/lib/io_streams/stream.rb +400 -0
  28. data/lib/io_streams/streams.rb +125 -0
  29. data/lib/io_streams/symmetric_encryption/reader.rb +5 -13
  30. data/lib/io_streams/symmetric_encryption/writer.rb +16 -15
  31. data/lib/io_streams/tabular/header.rb +9 -3
  32. data/lib/io_streams/tabular/parser/array.rb +8 -3
  33. data/lib/io_streams/tabular/parser/csv.rb +6 -2
  34. data/lib/io_streams/tabular/parser/hash.rb +4 -1
  35. data/lib/io_streams/tabular/parser/json.rb +3 -1
  36. data/lib/io_streams/tabular/parser/psv.rb +3 -1
  37. data/lib/io_streams/tabular/utility/csv_row.rb +9 -8
  38. data/lib/io_streams/utils.rb +22 -0
  39. data/lib/io_streams/version.rb +1 -1
  40. data/lib/io_streams/writer.rb +28 -0
  41. data/lib/io_streams/xlsx/reader.rb +7 -19
  42. data/lib/io_streams/zip/reader.rb +7 -26
  43. data/lib/io_streams/zip/writer.rb +21 -38
  44. data/lib/iostreams.rb +15 -15
  45. data/test/bzip2_reader_test.rb +3 -3
  46. data/test/bzip2_writer_test.rb +3 -3
  47. data/test/deprecated_test.rb +123 -0
  48. data/test/encode_reader_test.rb +3 -3
  49. data/test/encode_writer_test.rb +6 -6
  50. data/test/gzip_reader_test.rb +2 -2
  51. data/test/gzip_writer_test.rb +3 -3
  52. data/test/io_streams_test.rb +43 -136
  53. data/test/line_reader_test.rb +20 -20
  54. data/test/line_writer_test.rb +3 -3
  55. data/test/path_test.rb +30 -28
  56. data/test/paths/file_test.rb +206 -0
  57. data/test/paths/http_test.rb +34 -0
  58. data/test/paths/matcher_test.rb +111 -0
  59. data/test/paths/s3_test.rb +207 -0
  60. data/test/pgp_reader_test.rb +8 -8
  61. data/test/pgp_writer_test.rb +13 -13
  62. data/test/record_reader_test.rb +5 -5
  63. data/test/record_writer_test.rb +4 -4
  64. data/test/row_reader_test.rb +5 -5
  65. data/test/row_writer_test.rb +6 -6
  66. data/test/stream_test.rb +116 -0
  67. data/test/streams_test.rb +255 -0
  68. data/test/utils_test.rb +20 -0
  69. data/test/xlsx_reader_test.rb +3 -3
  70. data/test/zip_reader_test.rb +12 -12
  71. data/test/zip_writer_test.rb +5 -5
  72. metadata +33 -45
  73. data/lib/io_streams/base_path.rb +0 -72
  74. data/lib/io_streams/file/path.rb +0 -58
  75. data/lib/io_streams/file/reader.rb +0 -12
  76. data/lib/io_streams/file/writer.rb +0 -22
  77. data/lib/io_streams/http/reader.rb +0 -71
  78. data/lib/io_streams/s3.rb +0 -26
  79. data/lib/io_streams/s3/path.rb +0 -40
  80. data/lib/io_streams/s3/reader.rb +0 -28
  81. data/lib/io_streams/s3/writer.rb +0 -85
  82. data/lib/io_streams/sftp/reader.rb +0 -67
  83. data/lib/io_streams/sftp/writer.rb +0 -68
  84. data/test/base_path_test.rb +0 -35
  85. data/test/file_path_test.rb +0 -97
  86. data/test/file_reader_test.rb +0 -33
  87. data/test/file_writer_test.rb +0 -50
  88. data/test/http_reader_test.rb +0 -38
  89. data/test/s3_reader_test.rb +0 -41
  90. data/test/s3_writer_test.rb +0 -41
@@ -1,22 +0,0 @@
1
- module IOStreams
2
- module File
3
- class Writer
4
- # Write to a named file
5
- #
6
- # Note:
7
- # If an exception is raised whilst the file is being written to the file is removed to
8
- # prevent incomplete / partial files from being created.
9
- def self.open(file_name, **args, &block)
10
- raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
11
-
12
- IOStreams::File::Path.mkpath(file_name)
13
- begin
14
- ::File.open(file_name, 'wb', &block)
15
- rescue StandardError => e
16
- ::File.unlink(file_name) if ::File.exist?(file_name)
17
- raise(e)
18
- end
19
- end
20
- end
21
- end
22
- end
@@ -1,71 +0,0 @@
1
- require 'net/http'
2
- require 'uri'
3
- module IOStreams
4
- module HTTP
5
- # Read a file using an http get.
6
- #
7
- # For example:
8
- # IOStreams.reader('https://www5.fdic.gov/idasp/Offices2.zip') {|file| puts file.read}
9
- #
10
- # Direct example without unzipping the above file:
11
- # IOStreams::HTTP::Reader.new('https://www5.fdic.gov/idasp/Offices2.zip') {|file| puts file.read}
12
- #
13
- # Parameters:
14
- # uri: [String|URI]
15
- # URI of the file to download.
16
- # Example:
17
- # https://www5.fdic.gov/idasp/Offices2.zip
18
- #
19
- # :username
20
- # When supplied, basic authentication is used with the username and password.
21
- # Default: nil
22
- #
23
- # :password
24
- # Password to use use with basic authentication when the username is supplied.
25
- #
26
- # Notes:
27
- # * Since Net::HTTP download only supports a push stream, the data is streamed into a tempfile first.
28
- class Reader
29
- def self.open(uri, username: nil, password: nil, **args, &block)
30
- raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String) || uri.is_a?(URI)
31
- handle_redirects(uri, username: username, password: password, **args, &block)
32
- end
33
-
34
- def self.handle_redirects(uri, username: nil, password: nil, http_redirect_count: 10, **args, &block)
35
- uri = URI.parse(uri) unless uri.is_a?(URI)
36
- result = nil
37
- raise(IOStreams::Errors::CommunicationsFailure, "Too many redirects") if http_redirect_count < 1
38
-
39
- Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
40
- request = Net::HTTP::Get.new(uri)
41
- request.basic_auth(username, password) if username
42
-
43
- http.request(request) do |response|
44
- if response.is_a?(Net::HTTPNotFound)
45
- raise(IOStreams::Errors::CommunicationsFailure, "Invalid URL: #{uri}")
46
- end
47
- if response.is_a?(Net::HTTPUnauthorized)
48
- raise(IOStreams::Errors::CommunicationsFailure, "Authorization Required: Invalid :username or :password.")
49
- end
50
- if response.is_a?(Net::HTTPRedirection)
51
- new_uri = response['location']
52
- return handle_redirects(new_uri, username: username, password: password, http_redirect_count: http_redirect_count - 1, **args, &block)
53
- end
54
-
55
- raise(IOStreams::Errors::CommunicationsFailure, "Invalid response code: #{response.code}") unless response.is_a?(Net::HTTPSuccess)
56
-
57
- # Since Net::HTTP download only supports a push stream, write it to a tempfile first.
58
- IOStreams::File::Path.temp_file_name('iostreams_http') do |file_name|
59
- IOStreams::File::Writer.open(file_name) do |io|
60
- response.read_body { |chunk| io.write(chunk) }
61
- end
62
- # Return a read stream
63
- result = IOStreams::File::Reader.open(file_name, &block)
64
- end
65
- end
66
- end
67
- result
68
- end
69
- end
70
- end
71
- end
@@ -1,26 +0,0 @@
1
- require 'uri'
2
- module IOStreams
3
- module S3
4
- autoload :Path, 'io_streams/s3/path'
5
- autoload :Reader, 'io_streams/s3/reader'
6
- autoload :Writer, 'io_streams/s3/writer'
7
-
8
- # Sample URI: s3://mybucket/user/abc.zip
9
- def self.parse_uri(uri)
10
- uri = URI.parse(uri)
11
- raise "Invalid URI. Required Format: 's3://<bucket_name>/<key>'" unless uri.scheme == 's3'
12
- {
13
- bucket: uri.host,
14
- key: uri.path.sub(/\A\//, '')
15
- }
16
- end
17
-
18
- def self.load_dependencies
19
- return if defined?(::Aws::S3::Resource)
20
-
21
- require 'aws-sdk-s3'
22
- rescue LoadError => exc
23
- raise(LoadError, "Install gem 'aws-sdk-s3' to read and write AWS S3 files: #{exc.message}")
24
- end
25
- end
26
- end
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
- module IOStreams
3
- module S3
4
- class Path < IOStreams::BasePath
5
- def initialize(path)
6
- IOStreams::S3.load_dependencies
7
- @s3 = Aws::S3::Resource.new
8
- @options = IOStreams::S3.parse_uri(path)
9
- @object = s3.bucket(options[:bucket]).object(options[:key])
10
- super(path)
11
- end
12
-
13
- # S3 logically creates paths when a key is set.
14
- def mkpath
15
- self
16
- end
17
-
18
- def mkdir
19
- self
20
- end
21
-
22
- def exist?
23
- object.exists?
24
- end
25
-
26
- def size
27
- object.size
28
- end
29
-
30
- def delete
31
- object.delete
32
- self
33
- end
34
-
35
- private
36
-
37
- attr_reader :s3, :options, :object
38
- end
39
- end
40
- end
@@ -1,28 +0,0 @@
1
- module IOStreams
2
- module S3
3
- class Reader
4
- # Read from a AWS S3 file
5
- def self.open(uri, region: nil, **args, &block)
6
- raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
7
-
8
- IOStreams::S3.load_dependencies
9
-
10
- # https://aws.amazon.com/blogs/developer/using-resources/
11
- s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
12
- options = IOStreams::S3.parse_uri(uri)
13
- object = s3.bucket(options[:bucket]).object(options[:key])
14
-
15
- begin
16
- # Since S3 download only supports a push stream, write it to a tempfile first.
17
- IOStreams::File::Path.temp_file_name('iostreams_s3') do |file_name|
18
- args[:response_target] = file_name
19
- object.get(args)
20
-
21
- # Return a read stream
22
- IOStreams::File::Reader.open(file_name, &block)
23
- end
24
- end
25
- end
26
- end
27
- end
28
- end
@@ -1,85 +0,0 @@
1
- module IOStreams
2
- module S3
3
- class Writer
4
- # Write to AWS S3
5
- #
6
- # Arguments:
7
- #
8
- # uri: [String]
9
- # Prefix must be: `s3://`
10
- # followed by bucket name,
11
- # followed by path and file_name (key).
12
- # Examples:
13
- # s3://my-bucket-name/file_name.txt
14
- # s3://my-bucket-name/some_path/file_name.csv
15
- #
16
- # region: [String]
17
- # AWS Region.
18
- # Default: ENV['AWS_REGION'], or supplied by ruby driver
19
- #
20
- # thread_count: [Integer]
21
- # The number of parallel multipart uploads
22
- # Default: 10
23
- #
24
- # tempfile: [Boolean]
25
- # Normally read data is stored in memory when building the parts in order to complete
26
- # the underlying multipart upload. By passing `:tempfile => true` data read will be
27
- # temporarily stored on disk reducing the memory footprint vastly.
28
- # Default: false
29
- #
30
- # part_size: [Integer]
31
- # Define how big each part size but the last should be.
32
- # Default: 5 * 1024 * 1024
33
- #
34
- # Other possible options extracted from AWS source code:
35
- # # See: https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#create_multipart_upload-instance_method
36
- #
37
- # acl: "private", # accepts private, public-read, public-read-write, authenticated-read, aws-exec-read, bucket-owner-read, bucket-owner-full-control
38
- # cache_control: "CacheControl",
39
- # content_disposition: "ContentDisposition",
40
- # content_encoding: "ContentEncoding",
41
- # content_language: "ContentLanguage",
42
- # content_type: "ContentType",
43
- # expires: Time.now,
44
- # grant_full_control: "GrantFullControl",
45
- # grant_read: "GrantRead",
46
- # grant_read_acp: "GrantReadACP",
47
- # grant_write_acp: "GrantWriteACP",
48
- # metadata: {
49
- # "MetadataKey" => "MetadataValue",
50
- # },
51
- # server_side_encryption: "AES256", # accepts AES256, aws:kms
52
- # storage_class: "STANDARD", # accepts STANDARD, REDUCED_REDUNDANCY, STANDARD_IA, ONEZONE_IA, INTELLIGENT_TIERING, GLACIER, DEEP_ARCHIVE
53
- # website_redirect_location: "WebsiteRedirectLocation",
54
- # sse_customer_algorithm: "SSECustomerAlgorithm",
55
- # sse_customer_key: "SSECustomerKey",
56
- # sse_customer_key_md5: "SSECustomerKeyMD5",
57
- # ssekms_key_id: "SSEKMSKeyId",
58
- # ssekms_encryption_context: "SSEKMSEncryptionContext",
59
- # request_payer: "requester", # accepts requester
60
- # tagging: "TaggingHeader",
61
- # object_lock_mode: "GOVERNANCE", # accepts GOVERNANCE, COMPLIANCE
62
- # object_lock_retain_until_date: Time.now,
63
- # object_lock_legal_hold_status: "ON", # accepts ON, OFF
64
- #
65
- # Raises [MultipartUploadError] If an object is being uploaded in
66
- # parts, and the upload can not be completed, then the upload is
67
- # aborted and this error is raised. The raised error has a `#errors`
68
- # method that returns the failures that caused the upload to be
69
- # aborted.
70
- def self.open(uri, region: nil, **args)
71
- raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String)
72
-
73
- IOStreams::S3.load_dependencies
74
-
75
- options = IOStreams::S3.parse_uri(uri)
76
- s3 = region.nil? ? Aws::S3::Resource.new : Aws::S3::Resource.new(region: region)
77
- object = s3.bucket(options[:bucket]).object(options[:key])
78
- object.upload_stream(args) do |s3|
79
- s3.binmode
80
- yield(s3)
81
- end
82
- end
83
- end
84
- end
85
- end
@@ -1,67 +0,0 @@
1
- module IOStreams
2
- # Example:
3
- # IOStreams::SFTP::Reader.open(
4
- # 'file.txt',
5
- # user: 'jbloggs',
6
- # password: 'secret',
7
- # host: 'example.org'
8
- # ) do |input|
9
- # puts input.read
10
- # end
11
- module SFTP
12
- class Reader
13
- include SemanticLogger::Loggable if defined?(SemanticLogger)
14
-
15
- # Stream to a remote file over sftp.
16
- #
17
- # file_name: [String]
18
- # Name of file to read from.
19
- #
20
- # user: [String]
21
- # Name of user to login with.
22
- #
23
- # password: [String]
24
- # Password for the user.
25
- #
26
- # host: [String]
27
- # Name of the host to connect to.
28
- #
29
- # port: [Integer]
30
- # Port to connect to at the above host.
31
- #
32
- # binary [true|false]
33
- # Whether to write in binary mode
34
- # Default: true
35
- #
36
- # options: [Hash]
37
- # Any options supported by Net::SSH.start
38
- #
39
- # Note:
40
- # - Net::SFTP::StatusException means the file could not be read
41
- def self.open(file_name, user:, password:, host:, port: 22, binary: true, options: {}, &block)
42
- raise(NotImplementedError, 'Can only SFTP directly to a file name, not another stream.') if IOStreams.writer_stream?(file_name)
43
-
44
- begin
45
- require 'net/sftp' unless defined?(Net::SFTP)
46
- rescue LoadError => e
47
- raise(LoadError, "Please install the 'net-sftp' gem for SFTP streaming support. #{e.message}")
48
- end
49
-
50
- options = options.dup
51
- options[:logger] ||= self.logger if defined?(SemanticLogger)
52
- options[:port] ||= 22
53
- options[:max_pkt_size] ||= 65536
54
- options[:password] = password
55
- options[:port] = port
56
- mode = binary ? 'rb' : 'r'
57
-
58
- result = nil
59
- Net::SFTP.start(host, user, options) do |sftp|
60
- result = sftp.file.open(file_name, mode, &block)
61
- end
62
- result
63
- end
64
-
65
- end
66
- end
67
- end
@@ -1,68 +0,0 @@
1
- module IOStreams
2
- # Example:
3
- # IOStreams::SFTP::Writer.open('file.txt',
4
- # user: 'jbloggs',
5
- # password: 'secret',
6
- # host: 'example.org',
7
- # options: {compression: false}
8
- # ) do |output|
9
- # output.write('Hello World')
10
- # end
11
- module SFTP
12
- class Writer
13
- include SemanticLogger::Loggable if defined?(SemanticLogger)
14
-
15
- # Stream to a remote file over sftp.
16
- #
17
- # file_name: [String]
18
- # Name of file to write to.
19
- #
20
- # user: [String]
21
- # Name of user to login with.
22
- #
23
- # password: [String]
24
- # Password for the user.
25
- #
26
- # host: [String]
27
- # Name of the host to connect to.
28
- #
29
- # port: [Integer]
30
- # Port to connect to at the above host.
31
- #
32
- # mkdir [true|false]
33
- # Whether to create the output directory on the target system before writing the file.
34
- # The path is created recursively if any portions of the path that are missing.
35
- # Default: false
36
- #
37
- # binary [true|false]
38
- # Whether to write in binary mode
39
- # Default: true
40
- #
41
- # options: [Hash]
42
- # Any options supported by Net::SSH.start
43
- def self.open(file_name, user:, password:, host:, port: 22, mkdir: false, binary: true, options: {}, &block)
44
- raise(NotImplementedError, 'Can only SFTP directly to a file name, not another stream.') if IOStreams.writer_stream?(file_name)
45
-
46
- begin
47
- require 'net/sftp' unless defined?(Net::SFTP)
48
- rescue LoadError => e
49
- raise(LoadError, "Please install the 'net-sftp' gem for SFTP streaming support. #{e.message}")
50
- end
51
-
52
- options = options.dup
53
- options[:logger] ||= logger if defined?(SemanticLogger)
54
- options[:port] ||= 22
55
- options[:max_pkt_size] ||= 65536
56
- options[:password] = password
57
- options[:port] = port
58
- mode = binary ? 'wb' : 'w'
59
-
60
- Net::SFTP.start(host, user, options) do |sftp|
61
- sftp.session.exec!("mkdir -p '#{::File.dirname(file_name)}'") if mkdir
62
- sftp.file.open(file_name, mode, &block)
63
- end
64
- end
65
-
66
- end
67
- end
68
- end
@@ -1,35 +0,0 @@
1
- require_relative 'test_helper'
2
-
3
- module IOStreams
4
- class BasePathTest < Minitest::Test
5
- describe IOStreams::BasePath do
6
- describe '.join' do
7
- let(:path) { IOStreams::BasePath.new('some_path') }
8
-
9
- it 'returns self when no elements' do
10
- assert_equal path.object_id, path.join.object_id
11
- end
12
-
13
- it 'adds element to path' do
14
- assert_equal ::File.join('some_path', 'test'), path.join('test').to_s
15
- end
16
-
17
- it 'adds paths to root' do
18
- assert_equal ::File.join('some_path', 'test', 'second', 'third'), path.join('test', 'second', 'third').to_s
19
- end
20
-
21
- it 'returns path and filename' do
22
- assert_equal ::File.join('some_path', 'file.xls'), path.join('file.xls').to_s
23
- end
24
-
25
- it 'adds elements to path' do
26
- assert_equal ::File.join('some_path', 'test', 'second', 'third', 'file.xls'), path.join('test', 'second', 'third', 'file.xls').to_s
27
- end
28
-
29
- it 'return path as sent in when full path' do
30
- assert_equal ::File.join('some_path', 'test', 'second', 'third', 'file.xls'), path.join('some_path', 'test', 'second', 'third', 'file.xls').to_s
31
- end
32
- end
33
- end
34
- end
35
- end