iostreams 0.14.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +202 -0
  3. data/README.md +155 -47
  4. data/lib/io_streams/file/reader.rb +7 -8
  5. data/lib/io_streams/file/writer.rb +7 -8
  6. data/lib/io_streams/io_streams.rb +313 -129
  7. data/lib/io_streams/{delimited → line}/reader.rb +20 -30
  8. data/lib/io_streams/line/writer.rb +81 -0
  9. data/lib/io_streams/pgp.rb +4 -14
  10. data/lib/io_streams/record/reader.rb +55 -0
  11. data/lib/io_streams/record/writer.rb +63 -0
  12. data/lib/io_streams/row/reader.rb +60 -0
  13. data/lib/io_streams/row/writer.rb +62 -0
  14. data/lib/io_streams/s3.rb +25 -0
  15. data/lib/io_streams/s3/reader.rb +64 -0
  16. data/lib/io_streams/s3/writer.rb +13 -0
  17. data/lib/io_streams/streams.rb +1 -1
  18. data/lib/io_streams/tabular.rb +163 -0
  19. data/lib/io_streams/tabular/errors.rb +14 -0
  20. data/lib/io_streams/tabular/header.rb +146 -0
  21. data/lib/io_streams/tabular/parser/array.rb +26 -0
  22. data/lib/io_streams/tabular/parser/base.rb +12 -0
  23. data/lib/io_streams/tabular/parser/csv.rb +35 -0
  24. data/lib/io_streams/tabular/parser/fixed.rb +88 -0
  25. data/lib/io_streams/tabular/parser/hash.rb +21 -0
  26. data/lib/io_streams/tabular/parser/json.rb +25 -0
  27. data/lib/io_streams/tabular/parser/psv.rb +34 -0
  28. data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
  29. data/lib/io_streams/version.rb +2 -2
  30. data/lib/io_streams/xlsx/reader.rb +1 -1
  31. data/lib/io_streams/zip/reader.rb +1 -1
  32. data/lib/io_streams/zip/writer.rb +1 -1
  33. data/lib/iostreams.rb +21 -10
  34. data/test/bzip2_reader_test.rb +21 -22
  35. data/test/bzip2_writer_test.rb +38 -32
  36. data/test/file_reader_test.rb +19 -18
  37. data/test/file_writer_test.rb +23 -22
  38. data/test/files/test.json +3 -0
  39. data/test/gzip_reader_test.rb +21 -22
  40. data/test/gzip_writer_test.rb +35 -29
  41. data/test/io_streams_test.rb +137 -61
  42. data/test/line_reader_test.rb +105 -0
  43. data/test/line_writer_test.rb +50 -0
  44. data/test/pgp_reader_test.rb +29 -29
  45. data/test/pgp_test.rb +149 -195
  46. data/test/pgp_writer_test.rb +63 -62
  47. data/test/record_reader_test.rb +61 -0
  48. data/test/record_writer_test.rb +73 -0
  49. data/test/row_reader_test.rb +34 -0
  50. data/test/row_writer_test.rb +51 -0
  51. data/test/tabular_test.rb +184 -0
  52. data/test/xlsx_reader_test.rb +13 -17
  53. data/test/zip_reader_test.rb +21 -22
  54. data/test/zip_writer_test.rb +40 -36
  55. metadata +41 -17
  56. data/lib/io_streams/csv/reader.rb +0 -21
  57. data/lib/io_streams/csv/writer.rb +0 -20
  58. data/lib/io_streams/delimited/writer.rb +0 -67
  59. data/test/csv_reader_test.rb +0 -34
  60. data/test/csv_writer_test.rb +0 -35
  61. data/test/delimited_reader_test.rb +0 -115
  62. data/test/delimited_writer_test.rb +0 -44
@@ -1,16 +1,14 @@
1
1
  module IOStreams
2
- module Delimited
2
+ module Line
3
3
  class Reader
4
- attr_accessor :delimiter, :buffer_size, :encoding, :strip_non_printable
4
+ attr_reader :delimiter, :buffer_size, :encoding, :strip_non_printable
5
5
 
6
- # Read from a file or stream
7
- def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
8
- if IOStreams.reader_stream?(file_name_or_io)
9
- yield new(file_name_or_io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
6
+ # Read a line at a time from a file or stream
7
+ def self.open(file_name_or_io, **args)
8
+ if file_name_or_io.is_a?(String)
9
+ IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
10
10
  else
11
- ::File.open(file_name_or_io, 'rb') do |io|
12
- yield new(io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
13
- end
11
+ yield new(file_name_or_io, **args)
14
12
  end
15
13
  end
16
14
 
@@ -47,6 +45,12 @@ module IOStreams
47
45
  # Force encoding to this encoding for all data being read
48
46
  # Default: UTF8_ENCODING
49
47
  # Set to nil to disable encoding
48
+ #
49
+ # TODO:
50
+ # - Skip Comment lines. RegExp?
51
+ # - Skip "empty" / "blank" lines. RegExp?
52
+ # - Extract header line(s) / first non-comment, non-blank line
53
+ # - Embedded newline support, RegExp? or Proc?
50
54
  def initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
51
55
  @input_stream = input_stream
52
56
  @delimiter = delimiter
@@ -54,11 +58,12 @@ module IOStreams
54
58
  @encoding = encoding
55
59
  @strip_non_printable = strip_non_printable
56
60
 
57
- @delimiter.force_encoding(UTF8_ENCODING) if @delimiter && @encoding
61
+ @delimiter.encode(UTF8_ENCODING) if @delimiter && @encoding
58
62
  @buffer = ''
59
63
  end
60
64
 
61
- # Returns each line at a time to to the supplied block
65
+ # Iterate over every line in the file/stream passing each line to supplied block in turn.
66
+ # Returns [Integer] the number of lines read from the file/stream.
62
67
  def each(&block)
63
68
  partial = nil
64
69
  loop do
@@ -83,32 +88,17 @@ module IOStreams
83
88
  end
84
89
  end
85
90
 
86
- alias_method :each_line, :each
87
-
88
- # Reads length bytes from the I/O stream.
89
- # Not recommended, but available if someone calls #read on this delimited reader
90
- def read(length = nil, outbuf = nil)
91
- if length
92
- while (@buffer.size < length) && (read_chunk > 0)
93
- end
94
- data = @buffer.slice!(0, length)
95
- outbuf << data if outbuf
96
- data
97
- else
98
- while read_chunk > 0
99
- end
100
- @buffer
101
- end
102
- end
103
-
104
- ##########################################################################
105
91
  private
106
92
 
93
+ attr_reader :buffer
94
+ attr_writer :delimiter
95
+
107
96
  NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/)
108
97
 
109
98
  # Returns [Integer] the number of bytes read into the internal buffer
110
99
  # Returns 0 on EOF
111
100
  def read_chunk
101
+ # TODO: read into existing buffer
112
102
  chunk = @input_stream.read(@buffer_size)
113
103
  # EOF reached?
114
104
  return 0 unless chunk
@@ -0,0 +1,81 @@
1
+ module IOStreams
2
+ module Line
3
+ class Writer
4
+ attr_reader :delimiter, :encoding, :strip_non_printable
5
+
6
+ # Write a line at a time to a file or stream
7
+ def self.open(file_name_or_io, **args)
8
+ if file_name_or_io.is_a?(String)
9
+ IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
10
+ else
11
+ yield new(file_name_or_io, **args)
12
+ end
13
+ end
14
+
15
+ NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
16
+
17
+ # A delimited stream writer that will write to the supplied output stream
18
+ #
19
+ # The output stream should be binary with no text conversions performed
20
+ # since `strip_non_printable` will be applied to the binary stream before
21
+ # converting to UTF-8
22
+ #
23
+ # Parameters
24
+ # output_stream
25
+ # The output stream that implements #write
26
+ #
27
+ # delimiter: [String]
28
+ # Add the specified delimiter after every record when writing it
29
+ # to the output stream
30
+ # Default: OS Specific. Linux: "\n"
31
+ #
32
+ # encoding:
33
+ # Encode data before writing to the output stream.
34
+ # Default: UTF8_ENCODING
35
+ # Set to nil to disable encoding
36
+ #
37
+ # strip_non_printable: [true|false]
38
+ # Strip all non-printable characters before writing to the file / stream.
39
+ # Default: false
40
+ #
41
+ # TODO: Support replacement character for invalid characters
42
+ def initialize(output_stream, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false)
43
+ @output_stream = output_stream
44
+ @delimiter = delimiter.encode(encoding) if delimiter && encoding
45
+ @encoding = encoding
46
+ @strip_non_printable = strip_non_printable
47
+ end
48
+
49
+ # Write a line to the output stream
50
+ #
51
+ # Example:
52
+ # IOStreams.line_writer('a.txt') do |stream|
53
+ # stream << 'first line' << 'second line'
54
+ # end
55
+ def <<(record)
56
+ write(record)
57
+ self
58
+ end
59
+
60
+ # Write a line to the output stream followed by the delimiter.
61
+ # Returns [Integer] the number of bytes written.
62
+ #
63
+ # Example:
64
+ # IOStreams.line_writer('a.txt') do |stream|
65
+ # count = stream.write('first line')
66
+ # puts "Wrote #{count} bytes to the output file, including the delimiter"
67
+ # end
68
+ def write(record)
69
+ chunk = record.to_s
70
+ chunk.gsub!(NOT_PRINTABLE, '') if strip_non_printable
71
+ count = output_stream.write((encoding ? chunk.encode(encoding) : chunk))
72
+ count += output_stream.write(delimiter) if delimiter
73
+ count
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :output_stream
79
+ end
80
+ end
81
+ end
@@ -232,31 +232,21 @@ module IOStreams
232
232
  end
233
233
  end
234
234
 
235
- # Returns [String] containing all the keys for the supplied email address.
235
+ # Returns [String] containing all the public keys for the supplied email address.
236
236
  #
237
237
  # email: [String] Email address for requested key.
238
238
  #
239
239
  # ascii: [true|false]
240
240
  # Whether to export as ASCII text instead of binary format
241
241
  # Default: true
242
- #
243
- # private: [true|false]
244
- # Whether to export the private key
245
- # Default: false
246
- #
247
- # passphrase: [String]
248
- # In order to export a private key the passphrase for the key must be supplied.
249
- # Otherwise a `Inappropriate ioctl for device` error will be returned.
250
- def self.export(email:, passphrase: nil, ascii: true, private: false)
242
+ def self.export(email:, ascii: true)
251
243
  version_check
252
- raise(ArgumentError, "Missing keyword: passphrase when private: true") if private && passphrase.nil?
253
244
 
254
245
  armor = ascii ? '--armor' : nil
255
- cmd = private ? '--export-secret-keys' : '--export'
256
246
  loopback = pgp_version.to_f >= 2.1 ? '--pinentry-mode loopback' : ''
257
- command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} #{cmd} #{email}"
247
+ command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} --export #{email}"
258
248
 
259
- out, err, status = Open3.capture3(command, binmode: true, stdin_data: "#{passphrase}\n")
249
+ out, err, status = Open3.capture3(command, binmode: true)
260
250
  logger.debug { "IOStreams::Pgp.export: #{command}\n#{err}" } if logger
261
251
  if status.success? && out.length > 0
262
252
  out
@@ -0,0 +1,55 @@
1
+ module IOStreams
2
+ module Record
3
+ # Converts each line of an input stream into hash for every row
4
+ class Reader
5
+ include Enumerable
6
+
7
+ # Read a record as a Hash at a time from a file or stream.
8
+ def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
9
+ if file_name_or_io.is_a?(String)
10
+ IOStreams.line_reader(file_name_or_io,
11
+ delimiter: delimiter,
12
+ buffer_size: buffer_size,
13
+ encoding: encoding,
14
+ strip_non_printable: strip_non_printable) do |io|
15
+ yield new(io, file_name: file_name_or_io, **args)
16
+ end
17
+ else
18
+ yield new(file_name_or_io, **args)
19
+ end
20
+ end
21
+
22
+ # Create a Tabular reader to return the stream as Hash records
23
+ # Parse a delimited data source.
24
+ #
25
+ # Parameters
26
+ # delimited: [#each]
27
+ # Anything that returns one line / record at a time when #each is called on it.
28
+ #
29
+ # format: [Symbol]
30
+ # :csv, :hash, :array, :json, :psv, :fixed
31
+ #
32
+ # For all other parameters, see Tabular::Header.new
33
+ def initialize(delimited, cleanse_header: true, **args)
34
+ @tabular = IOStreams::Tabular.new(**args)
35
+ @delimited = delimited
36
+ @cleanse_header = cleanse_header
37
+ end
38
+
39
+ def each
40
+ delimited.each do |line|
41
+ if tabular.requires_header?
42
+ tabular.parse_header(line)
43
+ tabular.cleanse_header! if cleanse_header
44
+ else
45
+ yield tabular.record_parse(line)
46
+ end
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ attr_reader :tabular, :delimited, :cleanse_header
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,63 @@
1
+ module IOStreams
2
+ module Record
3
+ # Example, implied header from first record:
4
+ # IOStreams.record_writer do |stream|
5
+ # stream << {name: 'Jack', address: 'Somewhere', zipcode: 12345}
6
+ # stream << {name: 'Joe', address: 'Lost', zipcode: 32443, age: 23}
7
+ # end
8
+ #
9
+ # Output:
10
+ # name, add
11
+ #
12
+ class Writer
13
+ # Write a record as a Hash at a time to a file or stream.
14
+ def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
15
+ if file_name_or_io.is_a?(String)
16
+ IOStreams.line_writer(file_name_or_io,
17
+ delimiter: delimiter,
18
+ encoding: encoding,
19
+ strip_non_printable: strip_non_printable) do |io|
20
+ yield new(io, file_name: file_name_or_io, **args)
21
+ end
22
+ else
23
+ yield new(file_name_or_io, **args)
24
+ end
25
+ end
26
+
27
+ # Create a Tabular writer that takes individual
28
+ # Parse a delimited data source.
29
+ #
30
+ # Parameters
31
+ # delimited: [#<<]
32
+ # Anything that accepts a line / record at a time when #<< is called on it.
33
+ #
34
+ # format: [Symbol]
35
+ # :csv, :hash, :array, :json, :psv, :fixed
36
+ #
37
+ # For all other parameters, see Tabular::Header.new
38
+ #
39
+ # columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
40
+ def initialize(delimited, columns: nil, **args)
41
+ @tabular = IOStreams::Tabular.new(columns: columns, **args)
42
+ @delimited = delimited
43
+
44
+ # Render header line when `columns` is supplied.
45
+ delimited << @tabular.render(columns) if columns && @tabular.requires_header?
46
+ end
47
+
48
+ def <<(hash)
49
+ raise(ArgumentError, 'Must supply a Hash') unless hash.is_a?(Hash)
50
+ if tabular.requires_header?
51
+ columns = hash.keys
52
+ tabular.header.columns = columns
53
+ delimited << tabular.render(columns)
54
+ end
55
+ delimited << tabular.render(hash)
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :tabular, :delimited, :cleanse_header
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,60 @@
1
+ module IOStreams
2
+ module Row
3
+ # Converts each line of an input stream into an array for every line
4
+ class Reader
5
+ # Read a line as an Array at a time from a file or stream.
6
+ def self.open(file_name_or_io,
7
+ delimiter: nil,
8
+ buffer_size: 65_536,
9
+ encoding: UTF8_ENCODING,
10
+ strip_non_printable: false,
11
+ file_name: nil,
12
+ **args)
13
+ if file_name_or_io.is_a?(String)
14
+ IOStreams.line_reader(file_name_or_io,
15
+ delimiter: delimiter,
16
+ buffer_size: buffer_size,
17
+ encoding: encoding,
18
+ file_name: file_name,
19
+ strip_non_printable: strip_non_printable) do |io|
20
+ yield new(io, file_name: file_name, **args)
21
+ end
22
+ else
23
+ yield new(file_name_or_io, **args)
24
+ end
25
+ end
26
+
27
+ # Create a Tabular reader to return the stream rows as arrays.
28
+ #
29
+ # Parameters
30
+ # delimited: [#each]
31
+ # Anything that returns one line / record at a time when #each is called on it.
32
+ #
33
+ # format: [Symbol]
34
+ # :csv, :hash, :array, :json, :psv, :fixed
35
+ #
36
+ # For all other parameters, see Tabular::Header.new
37
+ def initialize(delimited, cleanse_header: true, **args)
38
+ @tabular = IOStreams::Tabular.new(**args)
39
+ @delimited = delimited
40
+ @cleanse_header = cleanse_header
41
+ end
42
+
43
+ def each
44
+ delimited.each do |line|
45
+ if tabular.requires_header?
46
+ columns = tabular.parse_header(line)
47
+ tabular.cleanse_header! if cleanse_header
48
+ yield columns
49
+ else
50
+ yield tabular.row_parse(line)
51
+ end
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ attr_reader :tabular, :delimited, :cleanse_header
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ require 'csv'
2
+ module IOStreams
3
+ module Row
4
+ # Example:
5
+ # IOStreams.row_writer do |stream|
6
+ # stream << ['name', 'address', 'zipcode']
7
+ # stream << ['Jack', 'Somewhere', 12345]
8
+ # stream << ['Joe', 'Lost', 32443]
9
+ # end
10
+ #
11
+ # Output:
12
+ # ...
13
+ #
14
+ class Writer
15
+ # Write a record as a Hash at a time to a file or stream.
16
+ def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
17
+ if file_name_or_io.is_a?(String)
18
+ IOStreams.line_writer(file_name_or_io,
19
+ delimiter: delimiter,
20
+ encoding: encoding,
21
+ strip_non_printable: strip_non_printable) do |io|
22
+ yield new(io, **args)
23
+ end
24
+ else
25
+ yield new(file_name_or_io, **args)
26
+ end
27
+ end
28
+
29
+ # Create a Tabular writer that takes individual rows as arrays.
30
+ #
31
+ # Parameters
32
+ # delimited: [#<<]
33
+ # Anything that accepts a line / record at a time when #<< is called on it.
34
+ #
35
+ # format: [Symbol]
36
+ # :csv, :hash, :array, :json, :psv, :fixed
37
+ #
38
+ # For all other parameters, see Tabular::Header.new
39
+ #
40
+ # columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
41
+ def initialize(delimited, columns: nil, **args)
42
+ @tabular = IOStreams::Tabular.new(columns: columns, **args)
43
+ @delimited = delimited
44
+
45
+ # Render header line when `columns` is supplied.
46
+ delimited << @tabular.render(columns) if columns && @tabular.requires_header?
47
+ end
48
+
49
+ # Supply a hash or an array to render
50
+ def <<(array)
51
+ raise(ArgumentError, 'Must supply an Array') unless array.is_a?(Array)
52
+ # If header (columns) was not supplied as an argument, assume first line is the header.
53
+ tabular.header.columns = array if tabular.requires_header?
54
+ delimited << tabular.render(array)
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :tabular, :delimited
60
+ end
61
+ end
62
+ end