iostreams 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +202 -0
  3. data/README.md +155 -47
  4. data/lib/io_streams/file/reader.rb +7 -8
  5. data/lib/io_streams/file/writer.rb +7 -8
  6. data/lib/io_streams/io_streams.rb +313 -129
  7. data/lib/io_streams/{delimited → line}/reader.rb +20 -30
  8. data/lib/io_streams/line/writer.rb +81 -0
  9. data/lib/io_streams/pgp.rb +4 -14
  10. data/lib/io_streams/record/reader.rb +55 -0
  11. data/lib/io_streams/record/writer.rb +63 -0
  12. data/lib/io_streams/row/reader.rb +60 -0
  13. data/lib/io_streams/row/writer.rb +62 -0
  14. data/lib/io_streams/s3.rb +25 -0
  15. data/lib/io_streams/s3/reader.rb +64 -0
  16. data/lib/io_streams/s3/writer.rb +13 -0
  17. data/lib/io_streams/streams.rb +1 -1
  18. data/lib/io_streams/tabular.rb +163 -0
  19. data/lib/io_streams/tabular/errors.rb +14 -0
  20. data/lib/io_streams/tabular/header.rb +146 -0
  21. data/lib/io_streams/tabular/parser/array.rb +26 -0
  22. data/lib/io_streams/tabular/parser/base.rb +12 -0
  23. data/lib/io_streams/tabular/parser/csv.rb +35 -0
  24. data/lib/io_streams/tabular/parser/fixed.rb +88 -0
  25. data/lib/io_streams/tabular/parser/hash.rb +21 -0
  26. data/lib/io_streams/tabular/parser/json.rb +25 -0
  27. data/lib/io_streams/tabular/parser/psv.rb +34 -0
  28. data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
  29. data/lib/io_streams/version.rb +2 -2
  30. data/lib/io_streams/xlsx/reader.rb +1 -1
  31. data/lib/io_streams/zip/reader.rb +1 -1
  32. data/lib/io_streams/zip/writer.rb +1 -1
  33. data/lib/iostreams.rb +21 -10
  34. data/test/bzip2_reader_test.rb +21 -22
  35. data/test/bzip2_writer_test.rb +38 -32
  36. data/test/file_reader_test.rb +19 -18
  37. data/test/file_writer_test.rb +23 -22
  38. data/test/files/test.json +3 -0
  39. data/test/gzip_reader_test.rb +21 -22
  40. data/test/gzip_writer_test.rb +35 -29
  41. data/test/io_streams_test.rb +137 -61
  42. data/test/line_reader_test.rb +105 -0
  43. data/test/line_writer_test.rb +50 -0
  44. data/test/pgp_reader_test.rb +29 -29
  45. data/test/pgp_test.rb +149 -195
  46. data/test/pgp_writer_test.rb +63 -62
  47. data/test/record_reader_test.rb +61 -0
  48. data/test/record_writer_test.rb +73 -0
  49. data/test/row_reader_test.rb +34 -0
  50. data/test/row_writer_test.rb +51 -0
  51. data/test/tabular_test.rb +184 -0
  52. data/test/xlsx_reader_test.rb +13 -17
  53. data/test/zip_reader_test.rb +21 -22
  54. data/test/zip_writer_test.rb +40 -36
  55. metadata +41 -17
  56. data/lib/io_streams/csv/reader.rb +0 -21
  57. data/lib/io_streams/csv/writer.rb +0 -20
  58. data/lib/io_streams/delimited/writer.rb +0 -67
  59. data/test/csv_reader_test.rb +0 -34
  60. data/test/csv_writer_test.rb +0 -35
  61. data/test/delimited_reader_test.rb +0 -115
  62. data/test/delimited_writer_test.rb +0 -44
@@ -1,16 +1,14 @@
1
1
  module IOStreams
2
- module Delimited
2
+ module Line
3
3
  class Reader
4
- attr_accessor :delimiter, :buffer_size, :encoding, :strip_non_printable
4
+ attr_reader :delimiter, :buffer_size, :encoding, :strip_non_printable
5
5
 
6
- # Read from a file or stream
7
- def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
8
- if IOStreams.reader_stream?(file_name_or_io)
9
- yield new(file_name_or_io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
6
+ # Read a line at a time from a file or stream
7
+ def self.open(file_name_or_io, **args)
8
+ if file_name_or_io.is_a?(String)
9
+ IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
10
10
  else
11
- ::File.open(file_name_or_io, 'rb') do |io|
12
- yield new(io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
13
- end
11
+ yield new(file_name_or_io, **args)
14
12
  end
15
13
  end
16
14
 
@@ -47,6 +45,12 @@ module IOStreams
47
45
  # Force encoding to this encoding for all data being read
48
46
  # Default: UTF8_ENCODING
49
47
  # Set to nil to disable encoding
48
+ #
49
+ # TODO:
50
+ # - Skip Comment lines. RegExp?
51
+ # - Skip "empty" / "blank" lines. RegExp?
52
+ # - Extract header line(s) / first non-comment, non-blank line
53
+ # - Embedded newline support, RegExp? or Proc?
50
54
  def initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
51
55
  @input_stream = input_stream
52
56
  @delimiter = delimiter
@@ -54,11 +58,12 @@ module IOStreams
54
58
  @encoding = encoding
55
59
  @strip_non_printable = strip_non_printable
56
60
 
57
- @delimiter.force_encoding(UTF8_ENCODING) if @delimiter && @encoding
61
+ @delimiter.encode(UTF8_ENCODING) if @delimiter && @encoding
58
62
  @buffer = ''
59
63
  end
60
64
 
61
- # Returns each line at a time to to the supplied block
65
+ # Iterate over every line in the file/stream passing each line to supplied block in turn.
66
+ # Returns [Integer] the number of lines read from the file/stream.
62
67
  def each(&block)
63
68
  partial = nil
64
69
  loop do
@@ -83,32 +88,17 @@ module IOStreams
83
88
  end
84
89
  end
85
90
 
86
- alias_method :each_line, :each
87
-
88
- # Reads length bytes from the I/O stream.
89
- # Not recommended, but available if someone calls #read on this delimited reader
90
- def read(length = nil, outbuf = nil)
91
- if length
92
- while (@buffer.size < length) && (read_chunk > 0)
93
- end
94
- data = @buffer.slice!(0, length)
95
- outbuf << data if outbuf
96
- data
97
- else
98
- while read_chunk > 0
99
- end
100
- @buffer
101
- end
102
- end
103
-
104
- ##########################################################################
105
91
  private
106
92
 
93
+ attr_reader :buffer
94
+ attr_writer :delimiter
95
+
107
96
  NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/)
108
97
 
109
98
  # Returns [Integer] the number of bytes read into the internal buffer
110
99
  # Returns 0 on EOF
111
100
  def read_chunk
101
+ # TODO: read into existing buffer
112
102
  chunk = @input_stream.read(@buffer_size)
113
103
  # EOF reached?
114
104
  return 0 unless chunk
@@ -0,0 +1,81 @@
1
+ module IOStreams
2
+ module Line
3
+ class Writer
4
+ attr_reader :delimiter, :encoding, :strip_non_printable
5
+
6
+ # Write a line at a time to a file or stream
7
+ def self.open(file_name_or_io, **args)
8
+ if file_name_or_io.is_a?(String)
9
+ IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
10
+ else
11
+ yield new(file_name_or_io, **args)
12
+ end
13
+ end
14
+
15
+ NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
16
+
17
+ # A delimited stream writer that will write to the supplied output stream
18
+ #
19
+ # The output stream should be binary with no text conversions performed
20
+ # since `strip_non_printable` will be applied to the binary stream before
21
+ # converting to UTF-8
22
+ #
23
+ # Parameters
24
+ # output_stream
25
+ # The output stream that implements #write
26
+ #
27
+ # delimiter: [String]
28
+ # Add the specified delimiter after every record when writing it
29
+ # to the output stream
30
+ # Default: OS Specific. Linux: "\n"
31
+ #
32
+ # encoding:
33
+ # Encode data before writing to the output stream.
34
+ # Default: UTF8_ENCODING
35
+ # Set to nil to disable encoding
36
+ #
37
+ # strip_non_printable: [true|false]
38
+ # Strip all non-printable characters before writing to the file / stream.
39
+ # Default: false
40
+ #
41
+ # TODO: Support replacement character for invalid characters
42
+ def initialize(output_stream, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false)
43
+ @output_stream = output_stream
44
+ @delimiter = delimiter.encode(encoding) if delimiter && encoding
45
+ @encoding = encoding
46
+ @strip_non_printable = strip_non_printable
47
+ end
48
+
49
+ # Write a line to the output stream
50
+ #
51
+ # Example:
52
+ # IOStreams.line_writer('a.txt') do |stream|
53
+ # stream << 'first line' << 'second line'
54
+ # end
55
+ def <<(record)
56
+ write(record)
57
+ self
58
+ end
59
+
60
+ # Write a line to the output stream followed by the delimiter.
61
+ # Returns [Integer] the number of bytes written.
62
+ #
63
+ # Example:
64
+ # IOStreams.line_writer('a.txt') do |stream|
65
+ # count = stream.write('first line')
66
+ # puts "Wrote #{count} bytes to the output file, including the delimiter"
67
+ # end
68
+ def write(record)
69
+ chunk = record.to_s
70
+ chunk.gsub!(NOT_PRINTABLE, '') if strip_non_printable
71
+ count = output_stream.write((encoding ? chunk.encode(encoding) : chunk))
72
+ count += output_stream.write(delimiter) if delimiter
73
+ count
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :output_stream
79
+ end
80
+ end
81
+ end
@@ -232,31 +232,21 @@ module IOStreams
232
232
  end
233
233
  end
234
234
 
235
- # Returns [String] containing all the keys for the supplied email address.
235
+ # Returns [String] containing all the public keys for the supplied email address.
236
236
  #
237
237
  # email: [String] Email address for requested key.
238
238
  #
239
239
  # ascii: [true|false]
240
240
  # Whether to export as ASCII text instead of binary format
241
241
  # Default: true
242
- #
243
- # private: [true|false]
244
- # Whether to export the private key
245
- # Default: false
246
- #
247
- # passphrase: [String]
248
- # In order to export a private key the passphrase for the key must be supplied.
249
- # Otherwise a `Inappropriate ioctl for device` error will be returned.
250
- def self.export(email:, passphrase: nil, ascii: true, private: false)
242
+ def self.export(email:, ascii: true)
251
243
  version_check
252
- raise(ArgumentError, "Missing keyword: passphrase when private: true") if private && passphrase.nil?
253
244
 
254
245
  armor = ascii ? '--armor' : nil
255
- cmd = private ? '--export-secret-keys' : '--export'
256
246
  loopback = pgp_version.to_f >= 2.1 ? '--pinentry-mode loopback' : ''
257
- command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} #{cmd} #{email}"
247
+ command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} --export #{email}"
258
248
 
259
- out, err, status = Open3.capture3(command, binmode: true, stdin_data: "#{passphrase}\n")
249
+ out, err, status = Open3.capture3(command, binmode: true)
260
250
  logger.debug { "IOStreams::Pgp.export: #{command}\n#{err}" } if logger
261
251
  if status.success? && out.length > 0
262
252
  out
@@ -0,0 +1,55 @@
1
+ module IOStreams
2
+ module Record
3
+ # Converts each line of an input stream into hash for every row
4
+ class Reader
5
+ include Enumerable
6
+
7
+ # Read a record as a Hash at a time from a file or stream.
8
+ def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
9
+ if file_name_or_io.is_a?(String)
10
+ IOStreams.line_reader(file_name_or_io,
11
+ delimiter: delimiter,
12
+ buffer_size: buffer_size,
13
+ encoding: encoding,
14
+ strip_non_printable: strip_non_printable) do |io|
15
+ yield new(io, file_name: file_name_or_io, **args)
16
+ end
17
+ else
18
+ yield new(file_name_or_io, **args)
19
+ end
20
+ end
21
+
22
+ # Create a Tabular reader to return the stream as Hash records
23
+ # Parse a delimited data source.
24
+ #
25
+ # Parameters
26
+ # delimited: [#each]
27
+ # Anything that returns one line / record at a time when #each is called on it.
28
+ #
29
+ # format: [Symbol]
30
+ # :csv, :hash, :array, :json, :psv, :fixed
31
+ #
32
+ # For all other parameters, see Tabular::Header.new
33
+ def initialize(delimited, cleanse_header: true, **args)
34
+ @tabular = IOStreams::Tabular.new(**args)
35
+ @delimited = delimited
36
+ @cleanse_header = cleanse_header
37
+ end
38
+
39
+ def each
40
+ delimited.each do |line|
41
+ if tabular.requires_header?
42
+ tabular.parse_header(line)
43
+ tabular.cleanse_header! if cleanse_header
44
+ else
45
+ yield tabular.record_parse(line)
46
+ end
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ attr_reader :tabular, :delimited, :cleanse_header
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,63 @@
1
+ module IOStreams
2
+ module Record
3
+ # Example, implied header from first record:
4
+ # IOStreams.record_writer do |stream|
5
+ # stream << {name: 'Jack', address: 'Somewhere', zipcode: 12345}
6
+ # stream << {name: 'Joe', address: 'Lost', zipcode: 32443, age: 23}
7
+ # end
8
+ #
9
+ # Output:
10
+ # name, add
11
+ #
12
+ class Writer
13
+ # Write a record as a Hash at a time to a file or stream.
14
+ def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
15
+ if file_name_or_io.is_a?(String)
16
+ IOStreams.line_writer(file_name_or_io,
17
+ delimiter: delimiter,
18
+ encoding: encoding,
19
+ strip_non_printable: strip_non_printable) do |io|
20
+ yield new(io, file_name: file_name_or_io, **args)
21
+ end
22
+ else
23
+ yield new(file_name_or_io, **args)
24
+ end
25
+ end
26
+
27
+ # Create a Tabular writer that takes individual
28
+ # Parse a delimited data source.
29
+ #
30
+ # Parameters
31
+ # delimited: [#<<]
32
+ # Anything that accepts a line / record at a time when #<< is called on it.
33
+ #
34
+ # format: [Symbol]
35
+ # :csv, :hash, :array, :json, :psv, :fixed
36
+ #
37
+ # For all other parameters, see Tabular::Header.new
38
+ #
39
+ # columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
40
+ def initialize(delimited, columns: nil, **args)
41
+ @tabular = IOStreams::Tabular.new(columns: columns, **args)
42
+ @delimited = delimited
43
+
44
+ # Render header line when `columns` is supplied.
45
+ delimited << @tabular.render(columns) if columns && @tabular.requires_header?
46
+ end
47
+
48
+ def <<(hash)
49
+ raise(ArgumentError, 'Must supply a Hash') unless hash.is_a?(Hash)
50
+ if tabular.requires_header?
51
+ columns = hash.keys
52
+ tabular.header.columns = columns
53
+ delimited << tabular.render(columns)
54
+ end
55
+ delimited << tabular.render(hash)
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :tabular, :delimited, :cleanse_header
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,60 @@
1
+ module IOStreams
2
+ module Row
3
+ # Converts each line of an input stream into an array for every line
4
+ class Reader
5
+ # Read a line as an Array at a time from a file or stream.
6
+ def self.open(file_name_or_io,
7
+ delimiter: nil,
8
+ buffer_size: 65_536,
9
+ encoding: UTF8_ENCODING,
10
+ strip_non_printable: false,
11
+ file_name: nil,
12
+ **args)
13
+ if file_name_or_io.is_a?(String)
14
+ IOStreams.line_reader(file_name_or_io,
15
+ delimiter: delimiter,
16
+ buffer_size: buffer_size,
17
+ encoding: encoding,
18
+ file_name: file_name,
19
+ strip_non_printable: strip_non_printable) do |io|
20
+ yield new(io, file_name: file_name, **args)
21
+ end
22
+ else
23
+ yield new(file_name_or_io, **args)
24
+ end
25
+ end
26
+
27
+ # Create a Tabular reader to return the stream rows as arrays.
28
+ #
29
+ # Parameters
30
+ # delimited: [#each]
31
+ # Anything that returns one line / record at a time when #each is called on it.
32
+ #
33
+ # format: [Symbol]
34
+ # :csv, :hash, :array, :json, :psv, :fixed
35
+ #
36
+ # For all other parameters, see Tabular::Header.new
37
+ def initialize(delimited, cleanse_header: true, **args)
38
+ @tabular = IOStreams::Tabular.new(**args)
39
+ @delimited = delimited
40
+ @cleanse_header = cleanse_header
41
+ end
42
+
43
+ def each
44
+ delimited.each do |line|
45
+ if tabular.requires_header?
46
+ columns = tabular.parse_header(line)
47
+ tabular.cleanse_header! if cleanse_header
48
+ yield columns
49
+ else
50
+ yield tabular.row_parse(line)
51
+ end
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ attr_reader :tabular, :delimited, :cleanse_header
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ require 'csv'
2
+ module IOStreams
3
+ module Row
4
+ # Example:
5
+ # IOStreams.row_writer do |stream|
6
+ # stream << ['name', 'address', 'zipcode']
7
+ # stream << ['Jack', 'Somewhere', 12345]
8
+ # stream << ['Joe', 'Lost', 32443]
9
+ # end
10
+ #
11
+ # Output:
12
+ # ...
13
+ #
14
+ class Writer
15
+ # Write a record as a Hash at a time to a file or stream.
16
+ def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
17
+ if file_name_or_io.is_a?(String)
18
+ IOStreams.line_writer(file_name_or_io,
19
+ delimiter: delimiter,
20
+ encoding: encoding,
21
+ strip_non_printable: strip_non_printable) do |io|
22
+ yield new(io, **args)
23
+ end
24
+ else
25
+ yield new(file_name_or_io, **args)
26
+ end
27
+ end
28
+
29
+ # Create a Tabular writer that takes individual rows as arrays.
30
+ #
31
+ # Parameters
32
+ # delimited: [#<<]
33
+ # Anything that accepts a line / record at a time when #<< is called on it.
34
+ #
35
+ # format: [Symbol]
36
+ # :csv, :hash, :array, :json, :psv, :fixed
37
+ #
38
+ # For all other parameters, see Tabular::Header.new
39
+ #
40
+ # columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
41
+ def initialize(delimited, columns: nil, **args)
42
+ @tabular = IOStreams::Tabular.new(columns: columns, **args)
43
+ @delimited = delimited
44
+
45
+ # Render header line when `columns` is supplied.
46
+ delimited << @tabular.render(columns) if columns && @tabular.requires_header?
47
+ end
48
+
49
+ # Supply a hash or an array to render
50
+ def <<(array)
51
+ raise(ArgumentError, 'Must supply an Array') unless array.is_a?(Array)
52
+ # If header (columns) was not supplied as an argument, assume first line is the header.
53
+ tabular.header.columns = array if tabular.requires_header?
54
+ delimited << tabular.render(array)
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :tabular, :delimited
60
+ end
61
+ end
62
+ end