iostreams 1.10.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +20 -2
  3. data/Rakefile +7 -0
  4. data/lib/io_streams/builder.rb +10 -10
  5. data/lib/io_streams/bzip2/writer.rb +1 -1
  6. data/lib/io_streams/encode/reader.rb +2 -2
  7. data/lib/io_streams/encode/writer.rb +5 -5
  8. data/lib/io_streams/gzip/reader.rb +1 -1
  9. data/lib/io_streams/gzip/writer.rb +1 -1
  10. data/lib/io_streams/io_streams.rb +47 -21
  11. data/lib/io_streams/line/reader.rb +2 -2
  12. data/lib/io_streams/line/writer.rb +1 -1
  13. data/lib/io_streams/path.rb +2 -2
  14. data/lib/io_streams/paths/file.rb +25 -11
  15. data/lib/io_streams/paths/http.rb +80 -7
  16. data/lib/io_streams/paths/matcher.rb +3 -3
  17. data/lib/io_streams/paths/s3.rb +22 -3
  18. data/lib/io_streams/paths/sftp.rb +9 -10
  19. data/lib/io_streams/pgp/reader.rb +25 -7
  20. data/lib/io_streams/pgp/writer.rb +95 -29
  21. data/lib/io_streams/pgp.rb +289 -87
  22. data/lib/io_streams/reader.rb +4 -4
  23. data/lib/io_streams/record/reader.rb +3 -4
  24. data/lib/io_streams/record/writer.rb +3 -4
  25. data/lib/io_streams/row/reader.rb +1 -1
  26. data/lib/io_streams/row/writer.rb +1 -1
  27. data/lib/io_streams/stream.rb +36 -30
  28. data/lib/io_streams/symmetric_encryption/reader.rb +2 -2
  29. data/lib/io_streams/symmetric_encryption/writer.rb +4 -4
  30. data/lib/io_streams/tabular/header.rb +18 -6
  31. data/lib/io_streams/tabular/parser/array.rb +0 -10
  32. data/lib/io_streams/tabular/parser/csv.rb +6 -38
  33. data/lib/io_streams/tabular/parser/fixed.rb +5 -5
  34. data/lib/io_streams/tabular/parser/psv.rb +0 -12
  35. data/lib/io_streams/tabular.rb +5 -10
  36. data/lib/io_streams/utils.rb +6 -8
  37. data/lib/io_streams/version.rb +1 -1
  38. data/lib/io_streams/writer.rb +6 -6
  39. data/lib/io_streams/xlsx/reader.rb +1 -1
  40. data/lib/io_streams/zip/writer.rb +22 -10
  41. data/lib/iostreams.rb +0 -1
  42. metadata +28 -113
  43. data/lib/io_streams/deprecated.rb +0 -216
  44. data/lib/io_streams/tabular/utility/csv_row.rb +0 -105
  45. data/test/builder_test.rb +0 -311
  46. data/test/bzip2_reader_test.rb +0 -27
  47. data/test/bzip2_writer_test.rb +0 -56
  48. data/test/deprecated_test.rb +0 -121
  49. data/test/encode_reader_test.rb +0 -51
  50. data/test/encode_writer_test.rb +0 -90
  51. data/test/files/embedded_lines_test.csv +0 -7
  52. data/test/files/multiple_files.zip +0 -0
  53. data/test/files/spreadsheet.xlsx +0 -0
  54. data/test/files/test.csv +0 -4
  55. data/test/files/test.json +0 -3
  56. data/test/files/test.psv +0 -4
  57. data/test/files/text file.txt +0 -3
  58. data/test/files/text.txt +0 -3
  59. data/test/files/text.txt.bz2 +0 -0
  60. data/test/files/text.txt.gz +0 -0
  61. data/test/files/text.txt.gz.zip +0 -0
  62. data/test/files/text.zip +0 -0
  63. data/test/files/text.zip.gz +0 -0
  64. data/test/files/unclosed_quote_large_test.csv +0 -1658
  65. data/test/files/unclosed_quote_test.csv +0 -4
  66. data/test/files/unclosed_quote_test2.csv +0 -3
  67. data/test/files/utf16_test.csv +0 -0
  68. data/test/gzip_reader_test.rb +0 -27
  69. data/test/gzip_writer_test.rb +0 -52
  70. data/test/io_streams_test.rb +0 -132
  71. data/test/line_reader_test.rb +0 -325
  72. data/test/line_writer_test.rb +0 -59
  73. data/test/minimal_file_reader.rb +0 -25
  74. data/test/path_test.rb +0 -55
  75. data/test/paths/file_test.rb +0 -202
  76. data/test/paths/http_test.rb +0 -34
  77. data/test/paths/matcher_test.rb +0 -120
  78. data/test/paths/s3_test.rb +0 -220
  79. data/test/paths/sftp_test.rb +0 -106
  80. data/test/pgp_reader_test.rb +0 -46
  81. data/test/pgp_test.rb +0 -254
  82. data/test/pgp_writer_test.rb +0 -130
  83. data/test/record_reader_test.rb +0 -60
  84. data/test/record_writer_test.rb +0 -82
  85. data/test/row_reader_test.rb +0 -35
  86. data/test/row_writer_test.rb +0 -56
  87. data/test/stream_test.rb +0 -574
  88. data/test/tabular_test.rb +0 -338
  89. data/test/test_helper.rb +0 -40
  90. data/test/utils_test.rb +0 -20
  91. data/test/xlsx_reader_test.rb +0 -37
  92. data/test/zip_reader_test.rb +0 -53
  93. data/test/zip_writer_test.rb +0 -48
@@ -3,7 +3,7 @@ module IOStreams
3
3
  # Implement fnmatch logic for any path iterator
4
4
  class Matcher
5
5
  # Characters indicating that pattern matching is required
6
- MATCH_START_CHARS = /[*?\[{]/.freeze
6
+ MATCH_START_CHARS = /[*?\[{]/
7
7
 
8
8
  attr_reader :path, :pattern, :flags
9
9
 
@@ -50,9 +50,9 @@ module IOStreams
50
50
  @path = path || IOStreams.path
51
51
  @pattern = pattern
52
52
  else
53
- new_path = elements[0..index - 1].join("/")
53
+ new_path = elements[0..(index - 1)].join("/")
54
54
  @path = path.nil? ? IOStreams.path(new_path) : path.join(new_path)
55
- @pattern = elements[index..-1].join("/")
55
+ @pattern = elements[index..].join("/")
56
56
  end
57
57
  end
58
58
  end
@@ -8,6 +8,9 @@ module IOStreams
8
8
  # Largest file size supported by the S3 copy object api.
9
9
  S3_COPY_OBJECT_SIZE_LIMIT = 5 * 1024 * 1024 * 1024
10
10
 
11
+ # When an upload file exceeds this size, use a multipart file upload.
12
+ MULTIPART_UPLOAD_SIZE = 5 * 1024 * 1024
13
+
11
14
  # Arguments:
12
15
  #
13
16
  # url: [String]
@@ -24,6 +27,21 @@ module IOStreams
24
27
  # secret_access_key: [String]
25
28
  # AWS Secret Access Key Id to use to access this bucket.
26
29
  #
30
+ # region: [String]
31
+ # The AWS region to connect to.
32
+ # Defaults to region set in environment variable, or credential files.
33
+ #
34
+ # client: [Aws::S3::Client | Hash]
35
+ # Supply the AWS S3 Client instance to use for this path.
36
+ # Or, when a Hash, build a new client using the hash parameters.
37
+ #
38
+ # Example:
39
+ # client = Aws::S3::Client.new(endpoint: "https://s3.test.com")
40
+ # IOStreams::Paths::S3.new("s3://bucket/path/file_name.txt", client: client)
41
+ #
42
+ # Example:
43
+ # IOStreams::Paths::S3.new("s3://bucket/path/file_name.txt", client: { endpoint: "https://s3.test.com" })
44
+ #
27
45
  # Writer specific options:
28
46
  #
29
47
  # @option params [String] :acl
@@ -133,7 +151,7 @@ module IOStreams
133
151
  #
134
152
  # @option params [String] :object_lock_legal_hold_status
135
153
  # The Legal Hold status that you want to apply to the specified object.
136
- def initialize(url, client: nil, access_key_id: nil, secret_access_key: nil, **args)
154
+ def initialize(url, client: nil, access_key_id: nil, secret_access_key: nil, region: nil, **args)
137
155
  Utils.load_soft_dependency("aws-sdk-s3", "AWS S3") unless defined?(::Aws::S3::Client)
138
156
 
139
157
  uri = Utils::URI.new(url)
@@ -148,6 +166,7 @@ module IOStreams
148
166
  @client_options = client.is_a?(Hash) ? client.dup : {}
149
167
  @client_options[:access_key_id] = access_key_id if access_key_id
150
168
  @client_options[:secret_access_key] = secret_access_key if secret_access_key
169
+ @client_options[:region] = region if region
151
170
  end
152
171
 
153
172
  @options = args
@@ -192,7 +211,7 @@ module IOStreams
192
211
 
193
212
  # Make S3 perform direct copies within S3 itself.
194
213
  def copy_to(target_path, convert: true, **args)
195
- return super(target_path, convert: convert, **args) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
214
+ return super if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
196
215
 
197
216
  target = IOStreams.new(target_path)
198
217
  return super(target, convert: convert, **args) unless target.is_a?(self.class)
@@ -269,7 +288,7 @@ module IOStreams
269
288
 
270
289
  # Shortcut method if caller has a filename already with no other streams applied:
271
290
  def write_file(file_name)
272
- if ::File.size(file_name) > 5 * 1024 * 1024
291
+ if ::File.size(file_name) > MULTIPART_UPLOAD_SIZE
273
292
  # Use multipart file upload
274
293
  s3 = Aws::S3::Resource.new(client: client)
275
294
  obj = s3.bucket(bucket_name).object(path)
@@ -6,7 +6,7 @@ module IOStreams
6
6
  #
7
7
  # Example:
8
8
  # IOStreams.
9
- # path("sftp://example.org/path/file.txt", username: "jbloggs", password: "secret", compression: false).
9
+ # path("sftp://example.org/path/file.txt", username: "jbloggs", password: "secret").
10
10
  # reader do |input|
11
11
  # puts input.read
12
12
  # end
@@ -18,13 +18,11 @@ module IOStreams
18
18
  #
19
19
  # Example:
20
20
  # IOStreams.
21
- # path("sftp://example.org/path/file.txt", username: "jbloggs", password: "secret", compression: false).
21
+ # path("sftp://example.org/path/file.txt", username: "jbloggs", password: "secret").
22
22
  # writer do |output|
23
23
  # output.write('Hello World')
24
24
  # end
25
25
  class SFTP < IOStreams::Path
26
- include SemanticLogger::Loggable if defined?(SemanticLogger)
27
-
28
26
  class << self
29
27
  attr_accessor :sshpass_bin, :sftp_bin, :sshpass_wait_seconds, :before_password_wait_seconds
30
28
  end
@@ -195,7 +193,7 @@ module IOStreams
195
193
  # Give time for password to be processed and stdin to be passed to sftp process.
196
194
  sleep self.class.sshpass_wait_seconds
197
195
 
198
- writer.puts "get #{remote_file_name} #{local_file_name}"
196
+ writer.puts "get #{remote_file_name.inspect} #{local_file_name.inspect}"
199
197
  writer.puts "bye"
200
198
  writer.close
201
199
  out = reader.read.chomp
@@ -311,7 +309,7 @@ module IOStreams
311
309
 
312
310
  def build_ssh_options
313
311
  options = ssh_options.dup
314
- options[:logger] ||= logger if defined?(SemanticLogger)
312
+ options[:logger] ||= IOStreams.logger if IOStreams.logger
315
313
  options[:port] ||= port
316
314
  options[:max_pkt_size] ||= 65_536
317
315
  options[:password] ||= @password
@@ -319,15 +317,16 @@ module IOStreams
319
317
  end
320
318
 
321
319
  def map_log_level
322
- return "INFO" unless defined?(SemanticLogger)
323
-
324
- case logger.level
320
+ level = IOStreams.logger&.level
321
+ case level
325
322
  when :trace
326
323
  "DEBUG3"
327
324
  when :warn
328
325
  "ERROR"
326
+ when Symbol
327
+ level.to_s
329
328
  else
330
- logger.level.to_s
329
+ "INFO"
331
330
  end
332
331
  end
333
332
  end
@@ -20,18 +20,36 @@ module IOStreams
20
20
  # Name of file to read from
21
21
  #
22
22
  # passphrase: [String]
23
- # Pass phrase for private key to decrypt the file with
24
- def self.file(file_name, passphrase: nil)
23
+ # Pass phrase for private key to decrypt the file with.
24
+ # Not required when the file is signed but not encrypted.
25
+ #
26
+ # ignore_mdc_error: [true|false]
27
+ # Decrypt files that lack MDC (Modification Detection Code) integrity protection.
28
+ # Some legacy/enterprise systems (e.g. Workday) still produce such files, which
29
+ # modern GnuPG refuses to decrypt with `gpg: decryption forced to fail!`.
30
+ # Only enable this for files from a trusted source: without MDC the decrypted
31
+ # contents are not protected against tampering.
32
+ # Default: false
33
+ def self.file(file_name, passphrase: nil, ignore_mdc_error: false)
25
34
  # Cannot use `passphrase: self.default_passphrase` since it is considered private
26
35
  passphrase ||= default_passphrase
27
- raise(ArgumentError, "Missing both passphrase and IOStreams::Pgp::Reader.default_passphrase") unless passphrase
28
36
 
29
- loopback = IOStreams::Pgp.pgp_version.to_f >= 2.1 ? "--pinentry-mode loopback" : ""
30
- command = "#{IOStreams::Pgp.executable} #{loopback} --batch --no-tty --yes --decrypt --passphrase-fd 0 #{file_name}"
31
- IOStreams::Pgp.logger&.debug { "IOStreams::Pgp::Reader.open: #{command}" }
37
+ args = []
38
+ # Use --pinentry-mode loopback for all GnuPG versions >= 2.1
39
+ args += ["--pinentry-mode", "loopback"] if IOStreams::Pgp.pgp_version.to_f >= 2.1
40
+ # Use --no-symkey-cache for GnuPG versions >= 2.4 to avoid caching session keys
41
+ args << "--no-symkey-cache" if IOStreams::Pgp.pgp_version.to_f >= 2.4
42
+ args << "--ignore-mdc-error" if ignore_mdc_error
43
+ args += ["--batch", "--no-tty", "--yes", "--decrypt"]
44
+ # Only feed a passphrase when one is supplied; sign-only files need none.
45
+ args += ["--passphrase-fd", "0"] if passphrase
46
+ args << file_name.to_s
47
+
48
+ command = IOStreams::Pgp.gpg_command(*args)
49
+ IOStreams.logger&.debug { "IOStreams::Pgp::Reader.open: #{command.shelljoin}" }
32
50
 
33
51
  # Read decrypted contents from stdout
34
- Open3.popen3(command) do |stdin, stdout, stderr, waith_thr|
52
+ Open3.popen3(*command) do |stdin, stdout, stderr, waith_thr|
35
53
  stdin.puts(passphrase) if passphrase
36
54
  stdin.close
37
55
  result =
@@ -26,18 +26,42 @@ module IOStreams
26
26
  @audit_recipient = nil
27
27
  end
28
28
 
29
- # Write to a PGP / GPG file, encrypting the contents as it is written.
29
+ # Write to a PGP / GPG file, encrypting and/or signing the contents as it is written.
30
30
  #
31
31
  # file_name: [String]
32
32
  # Name of file to write to.
33
33
  #
34
+ # encrypt: [true|false]
35
+ # Whether to encrypt the file for the supplied recipient(s).
36
+ # When set to false the file is signed but not encrypted, in which case a
37
+ # :signer must be supplied and :recipient / :import_and_trust_key are ignored.
38
+ # Default: true
39
+ #
34
40
  # recipient: [String|Array<String>]
35
41
  # One or more emails of users for which to encrypt the file.
42
+ # Ignored when encrypt is false.
36
43
  #
37
44
  # import_and_trust_key: [String|Array<String>]
38
45
  # One or more pgp keys to import and then use to encrypt the file.
39
46
  # Note: Ascii Keys can contain multiple keys, only the last one in the file is used.
40
47
  #
48
+ # import_and_trust_level: [Integer]
49
+ # The owner-trust level to assign to keys supplied via :import_and_trust_key.
50
+ # 1 : Undefined (no opinion)
51
+ # 2 : Never (do not trust)
52
+ # 3 : Marginal
53
+ # 4 : Full
54
+ # 5 : Ultimate
55
+ # Default: 5 : Ultimate
56
+ #
57
+ # SECURITY WARNING:
58
+ # Only import and trust keys received from a verified, trusted source.
59
+ # The default trust level is `5` (Ultimate), which tells GPG to treat the imported key
60
+ # as if it were one of your own keys. An ultimately trusted key is implicitly valid and
61
+ # can in turn confer validity on other keys it has signed. Importing an attacker supplied
62
+ # key at this level allows that attacker to impersonate other recipients.
63
+ # When the key cannot be fully verified, supply a lower `import_and_trust_level`.
64
+ #
41
65
  # signer: [String]
42
66
  # Name of user with which to sign the encypted file.
43
67
  # Default: default_signer or do not sign.
@@ -46,7 +70,7 @@ module IOStreams
46
70
  # Passphrase to use to open the private key when signing the file.
47
71
  # Default: default_signer_passphrase
48
72
  #
49
- # compression: [:none|:zip|:zlib|:bzip2]
73
+ # compress: [:none|:zip|:zlib|:bzip2]
50
74
  # Note: Standard PGP only supports :zip.
51
75
  # :zlib is better than zip.
52
76
  # :bzip2 is best, but uses a lot of memory and is much slower.
@@ -55,58 +79,100 @@ module IOStreams
55
79
  # compress_level: [Integer]
56
80
  # Compression level
57
81
  # Default: 6
82
+ #
83
+ # Note: There is intentionally no option here to disable MDC (Modification Detection
84
+ # Code) integrity protection on the files we produce. The reader exposes
85
+ # `ignore_mdc_error:` so we can *consume* legacy files that lack MDC (see Reader),
86
+ # but we never want to *generate* them: MDC is what protects the encrypted contents
87
+ # against tampering, and modern GnuPG mandates it for current ciphers anyway
88
+ # (`--disable-mdc` is a no-op unless an obsolete cipher is forced). Omitting MDC on
89
+ # output would only weaken files we create, with no upside for this library.
58
90
  def self.file(file_name,
91
+ encrypt: true,
59
92
  recipient: nil,
60
93
  import_and_trust_key: nil,
94
+ import_and_trust_level: 5,
61
95
  signer: default_signer,
62
96
  signer_passphrase: default_signer_passphrase,
63
- compression: :zip,
64
- compress_level: 6,
65
- original_file_name: nil)
66
-
67
- raise(ArgumentError, "Requires either :recipient or :import_and_trust_key") unless recipient || import_and_trust_key
97
+ compress: :zip,
98
+ compress_level: 6)
99
+ if encrypt
100
+ raise(ArgumentError, "Requires either :recipient or :import_and_trust_key") unless recipient || import_and_trust_key
101
+ elsif !signer
102
+ raise(ArgumentError, "Requires a :signer when encrypt is false")
103
+ end
68
104
 
69
- compress_level = 0 if compression == :none
105
+ compress_level = 0 if compress == :none
70
106
 
71
- recipients = Array(recipient)
72
- recipients << audit_recipient if audit_recipient
107
+ recipients =
108
+ if encrypt
109
+ collect_recipients(recipient, import_and_trust_key, import_and_trust_level)
110
+ else
111
+ []
112
+ end
73
113
 
74
- Array(import_and_trust_key).each do |key|
75
- recipients << IOStreams::Pgp.import_and_trust(key: key)
76
- end
114
+ # Write to stdin, with the encrypted and/or signed contents being written to the file
115
+ args = build_args(
116
+ file_name: file_name,
117
+ encrypt: encrypt,
118
+ signer: signer,
119
+ signer_passphrase: signer_passphrase,
120
+ compress: compress,
121
+ compress_level: compress_level,
122
+ recipients: recipients
123
+ )
124
+ command = IOStreams::Pgp.gpg_command(*args)
77
125
 
78
- # Write to stdin, with encrypted contents being written to the file
79
- command = "#{IOStreams::Pgp.executable} --batch --no-tty --yes --encrypt"
80
- command << " --sign --local-user \"#{signer}\"" if signer
81
- if signer_passphrase
82
- command << " --pinentry-mode loopback" if IOStreams::Pgp.pgp_version.to_f >= 2.1
83
- command << " --passphrase \"#{signer_passphrase}\""
84
- end
85
- command << " -z #{compress_level}" if compress_level != 6
86
- command << " --compress-algo #{compression}" unless compression == :none
87
- recipients.each { |address| command << " --recipient \"#{address}\"" }
88
- command << " -o \"#{file_name}\""
89
-
90
- IOStreams::Pgp.logger&.debug { "IOStreams::Pgp::Writer.open: #{command}" }
126
+ # Do not log the command, it may contain the signer passphrase.
127
+ action = encrypt ? "encrypt" : "sign"
128
+ IOStreams.logger&.debug { "IOStreams::Pgp::Writer.open: #{action} -o #{file_name}" }
91
129
 
92
130
  result = nil
93
- Open3.popen2e(command) do |stdin, out, waith_thr|
131
+ Open3.popen2e(*command) do |stdin, out, waith_thr|
94
132
  begin
95
133
  stdin.binmode
96
134
  result = yield(stdin)
97
135
  stdin.close
98
136
  rescue Errno::EPIPE
99
137
  # Ignore broken pipe because gpg terminates early due to an error
100
- ::File.delete(file_name) if ::File.exist?(file_name)
138
+ ::FileUtils.rm_f(file_name)
101
139
  raise(Pgp::Failure, "GPG Failed writing to encrypted file: #{file_name}: #{out.read.chomp}")
102
140
  end
103
141
  unless waith_thr.value.success?
104
- ::File.delete(file_name) if ::File.exist?(file_name)
142
+ ::FileUtils.rm_f(file_name)
105
143
  raise(Pgp::Failure, "GPG Failed to create encrypted file: #{file_name}: #{out.read.chomp}")
106
144
  end
107
145
  end
108
146
  result
109
147
  end
148
+
149
+ def self.build_args(file_name:, encrypt:, signer:, signer_passphrase:, compress:, compress_level:, recipients:)
150
+ args = ["--batch", "--no-tty", "--yes"]
151
+ args << "--encrypt" if encrypt
152
+ args += ["--sign", "--local-user", signer.to_s] if signer
153
+ if signer_passphrase
154
+ args += ["--pinentry-mode", "loopback"] if IOStreams::Pgp.pgp_version.to_f >= 2.1
155
+ args << "--no-symkey-cache" if IOStreams::Pgp.pgp_version.to_f >= 2.4
156
+ args += ["--passphrase", signer_passphrase.to_s]
157
+ end
158
+ args += ["-z", compress_level.to_s] if compress_level != 6
159
+ args += ["--compress-algo", compress.to_s] unless compress == :none
160
+ recipients.each { |address| args += ["--recipient", address.to_s] }
161
+ args += ["-o", file_name.to_s]
162
+ args
163
+ end
164
+ private_class_method :build_args
165
+
166
+ def self.collect_recipients(recipient, import_and_trust_key, import_and_trust_level)
167
+ recipients = Array(recipient)
168
+ recipients << audit_recipient if audit_recipient
169
+
170
+ Array(import_and_trust_key).each do |key|
171
+ recipients << IOStreams::Pgp.import_and_trust(key: key, trust_level: import_and_trust_level)
172
+ end
173
+ recipients
174
+ end
175
+ private_class_method :collect_recipients
110
176
  end
111
177
  end
112
178
  end