iostreams 1.10.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -2
- data/Rakefile +7 -0
- data/lib/io_streams/builder.rb +10 -10
- data/lib/io_streams/bzip2/writer.rb +1 -1
- data/lib/io_streams/encode/reader.rb +2 -2
- data/lib/io_streams/encode/writer.rb +5 -5
- data/lib/io_streams/gzip/reader.rb +1 -1
- data/lib/io_streams/gzip/writer.rb +1 -1
- data/lib/io_streams/io_streams.rb +47 -21
- data/lib/io_streams/line/reader.rb +2 -2
- data/lib/io_streams/line/writer.rb +1 -1
- data/lib/io_streams/path.rb +2 -2
- data/lib/io_streams/paths/file.rb +25 -11
- data/lib/io_streams/paths/http.rb +80 -7
- data/lib/io_streams/paths/matcher.rb +3 -3
- data/lib/io_streams/paths/s3.rb +22 -3
- data/lib/io_streams/paths/sftp.rb +9 -10
- data/lib/io_streams/pgp/reader.rb +25 -7
- data/lib/io_streams/pgp/writer.rb +95 -29
- data/lib/io_streams/pgp.rb +289 -87
- data/lib/io_streams/reader.rb +4 -4
- data/lib/io_streams/record/reader.rb +3 -4
- data/lib/io_streams/record/writer.rb +3 -4
- data/lib/io_streams/row/reader.rb +1 -1
- data/lib/io_streams/row/writer.rb +1 -1
- data/lib/io_streams/stream.rb +36 -30
- data/lib/io_streams/symmetric_encryption/reader.rb +2 -2
- data/lib/io_streams/symmetric_encryption/writer.rb +4 -4
- data/lib/io_streams/tabular/header.rb +18 -6
- data/lib/io_streams/tabular/parser/array.rb +0 -10
- data/lib/io_streams/tabular/parser/csv.rb +6 -38
- data/lib/io_streams/tabular/parser/fixed.rb +5 -5
- data/lib/io_streams/tabular/parser/psv.rb +0 -12
- data/lib/io_streams/tabular.rb +5 -10
- data/lib/io_streams/utils.rb +6 -8
- data/lib/io_streams/version.rb +1 -1
- data/lib/io_streams/writer.rb +6 -6
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +22 -10
- data/lib/iostreams.rb +0 -1
- metadata +28 -113
- data/lib/io_streams/deprecated.rb +0 -216
- data/lib/io_streams/tabular/utility/csv_row.rb +0 -105
- data/test/builder_test.rb +0 -311
- data/test/bzip2_reader_test.rb +0 -27
- data/test/bzip2_writer_test.rb +0 -56
- data/test/deprecated_test.rb +0 -121
- data/test/encode_reader_test.rb +0 -51
- data/test/encode_writer_test.rb +0 -90
- data/test/files/embedded_lines_test.csv +0 -7
- data/test/files/multiple_files.zip +0 -0
- data/test/files/spreadsheet.xlsx +0 -0
- data/test/files/test.csv +0 -4
- data/test/files/test.json +0 -3
- data/test/files/test.psv +0 -4
- data/test/files/text file.txt +0 -3
- data/test/files/text.txt +0 -3
- data/test/files/text.txt.bz2 +0 -0
- data/test/files/text.txt.gz +0 -0
- data/test/files/text.txt.gz.zip +0 -0
- data/test/files/text.zip +0 -0
- data/test/files/text.zip.gz +0 -0
- data/test/files/unclosed_quote_large_test.csv +0 -1658
- data/test/files/unclosed_quote_test.csv +0 -4
- data/test/files/unclosed_quote_test2.csv +0 -3
- data/test/files/utf16_test.csv +0 -0
- data/test/gzip_reader_test.rb +0 -27
- data/test/gzip_writer_test.rb +0 -52
- data/test/io_streams_test.rb +0 -132
- data/test/line_reader_test.rb +0 -325
- data/test/line_writer_test.rb +0 -59
- data/test/minimal_file_reader.rb +0 -25
- data/test/path_test.rb +0 -55
- data/test/paths/file_test.rb +0 -202
- data/test/paths/http_test.rb +0 -34
- data/test/paths/matcher_test.rb +0 -120
- data/test/paths/s3_test.rb +0 -220
- data/test/paths/sftp_test.rb +0 -106
- data/test/pgp_reader_test.rb +0 -46
- data/test/pgp_test.rb +0 -254
- data/test/pgp_writer_test.rb +0 -130
- data/test/record_reader_test.rb +0 -60
- data/test/record_writer_test.rb +0 -82
- data/test/row_reader_test.rb +0 -35
- data/test/row_writer_test.rb +0 -56
- data/test/stream_test.rb +0 -574
- data/test/tabular_test.rb +0 -338
- data/test/test_helper.rb +0 -40
- data/test/utils_test.rb +0 -20
- data/test/xlsx_reader_test.rb +0 -37
- data/test/zip_reader_test.rb +0 -53
- data/test/zip_writer_test.rb +0 -48
metadata
CHANGED
|
@@ -1,17 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iostreams
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Reid Morrison
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
12
|
-
dependencies:
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: csv
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
15
26
|
executables: []
|
|
16
27
|
extensions: []
|
|
17
28
|
extra_rdoc_files: []
|
|
@@ -22,7 +33,6 @@ files:
|
|
|
22
33
|
- lib/io_streams/builder.rb
|
|
23
34
|
- lib/io_streams/bzip2/reader.rb
|
|
24
35
|
- lib/io_streams/bzip2/writer.rb
|
|
25
|
-
- lib/io_streams/deprecated.rb
|
|
26
36
|
- lib/io_streams/encode/reader.rb
|
|
27
37
|
- lib/io_streams/encode/writer.rb
|
|
28
38
|
- lib/io_streams/errors.rb
|
|
@@ -57,7 +67,6 @@ files:
|
|
|
57
67
|
- lib/io_streams/tabular/parser/hash.rb
|
|
58
68
|
- lib/io_streams/tabular/parser/json.rb
|
|
59
69
|
- lib/io_streams/tabular/parser/psv.rb
|
|
60
|
-
- lib/io_streams/tabular/utility/csv_row.rb
|
|
61
70
|
- lib/io_streams/utils.rb
|
|
62
71
|
- lib/io_streams/version.rb
|
|
63
72
|
- lib/io_streams/writer.rb
|
|
@@ -65,60 +74,15 @@ files:
|
|
|
65
74
|
- lib/io_streams/zip/reader.rb
|
|
66
75
|
- lib/io_streams/zip/writer.rb
|
|
67
76
|
- lib/iostreams.rb
|
|
68
|
-
- test/builder_test.rb
|
|
69
|
-
- test/bzip2_reader_test.rb
|
|
70
|
-
- test/bzip2_writer_test.rb
|
|
71
|
-
- test/deprecated_test.rb
|
|
72
|
-
- test/encode_reader_test.rb
|
|
73
|
-
- test/encode_writer_test.rb
|
|
74
|
-
- test/files/embedded_lines_test.csv
|
|
75
|
-
- test/files/multiple_files.zip
|
|
76
|
-
- test/files/spreadsheet.xlsx
|
|
77
|
-
- test/files/test.csv
|
|
78
|
-
- test/files/test.json
|
|
79
|
-
- test/files/test.psv
|
|
80
|
-
- test/files/text file.txt
|
|
81
|
-
- test/files/text.txt
|
|
82
|
-
- test/files/text.txt.bz2
|
|
83
|
-
- test/files/text.txt.gz
|
|
84
|
-
- test/files/text.txt.gz.zip
|
|
85
|
-
- test/files/text.zip
|
|
86
|
-
- test/files/text.zip.gz
|
|
87
|
-
- test/files/unclosed_quote_large_test.csv
|
|
88
|
-
- test/files/unclosed_quote_test.csv
|
|
89
|
-
- test/files/unclosed_quote_test2.csv
|
|
90
|
-
- test/files/utf16_test.csv
|
|
91
|
-
- test/gzip_reader_test.rb
|
|
92
|
-
- test/gzip_writer_test.rb
|
|
93
|
-
- test/io_streams_test.rb
|
|
94
|
-
- test/line_reader_test.rb
|
|
95
|
-
- test/line_writer_test.rb
|
|
96
|
-
- test/minimal_file_reader.rb
|
|
97
|
-
- test/path_test.rb
|
|
98
|
-
- test/paths/file_test.rb
|
|
99
|
-
- test/paths/http_test.rb
|
|
100
|
-
- test/paths/matcher_test.rb
|
|
101
|
-
- test/paths/s3_test.rb
|
|
102
|
-
- test/paths/sftp_test.rb
|
|
103
|
-
- test/pgp_reader_test.rb
|
|
104
|
-
- test/pgp_test.rb
|
|
105
|
-
- test/pgp_writer_test.rb
|
|
106
|
-
- test/record_reader_test.rb
|
|
107
|
-
- test/record_writer_test.rb
|
|
108
|
-
- test/row_reader_test.rb
|
|
109
|
-
- test/row_writer_test.rb
|
|
110
|
-
- test/stream_test.rb
|
|
111
|
-
- test/tabular_test.rb
|
|
112
|
-
- test/test_helper.rb
|
|
113
|
-
- test/utils_test.rb
|
|
114
|
-
- test/xlsx_reader_test.rb
|
|
115
|
-
- test/zip_reader_test.rb
|
|
116
|
-
- test/zip_writer_test.rb
|
|
117
77
|
homepage: https://iostreams.rocketjob.io
|
|
118
78
|
licenses:
|
|
119
79
|
- Apache-2.0
|
|
120
|
-
metadata:
|
|
121
|
-
|
|
80
|
+
metadata:
|
|
81
|
+
bug_tracker_uri: https://github.com/reidmorrison/iostreams/issues
|
|
82
|
+
changelog_uri: https://github.com/reidmorrison/iostreams/blob/v2.0.0/CHANGELOG.md
|
|
83
|
+
documentation_uri: https://iostreams.rocketjob.io
|
|
84
|
+
source_code_uri: https://github.com/reidmorrison/iostreams/tree/v2.0.0
|
|
85
|
+
rubygems_mfa_required: 'true'
|
|
122
86
|
rdoc_options: []
|
|
123
87
|
require_paths:
|
|
124
88
|
- lib
|
|
@@ -126,64 +90,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
126
90
|
requirements:
|
|
127
91
|
- - ">="
|
|
128
92
|
- !ruby/object:Gem::Version
|
|
129
|
-
version: '2
|
|
93
|
+
version: '3.2'
|
|
130
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
95
|
requirements:
|
|
132
96
|
- - ">="
|
|
133
97
|
- !ruby/object:Gem::Version
|
|
134
98
|
version: '0'
|
|
135
99
|
requirements: []
|
|
136
|
-
rubygems_version: 3.
|
|
137
|
-
signing_key:
|
|
100
|
+
rubygems_version: 3.6.9
|
|
138
101
|
specification_version: 4
|
|
139
|
-
summary:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
- test/bzip2_reader_test.rb
|
|
143
|
-
- test/bzip2_writer_test.rb
|
|
144
|
-
- test/deprecated_test.rb
|
|
145
|
-
- test/encode_reader_test.rb
|
|
146
|
-
- test/encode_writer_test.rb
|
|
147
|
-
- test/files/embedded_lines_test.csv
|
|
148
|
-
- test/files/multiple_files.zip
|
|
149
|
-
- test/files/spreadsheet.xlsx
|
|
150
|
-
- test/files/test.csv
|
|
151
|
-
- test/files/test.json
|
|
152
|
-
- test/files/test.psv
|
|
153
|
-
- test/files/text file.txt
|
|
154
|
-
- test/files/text.txt
|
|
155
|
-
- test/files/text.txt.bz2
|
|
156
|
-
- test/files/text.txt.gz
|
|
157
|
-
- test/files/text.txt.gz.zip
|
|
158
|
-
- test/files/text.zip
|
|
159
|
-
- test/files/text.zip.gz
|
|
160
|
-
- test/files/unclosed_quote_large_test.csv
|
|
161
|
-
- test/files/unclosed_quote_test.csv
|
|
162
|
-
- test/files/unclosed_quote_test2.csv
|
|
163
|
-
- test/files/utf16_test.csv
|
|
164
|
-
- test/gzip_reader_test.rb
|
|
165
|
-
- test/gzip_writer_test.rb
|
|
166
|
-
- test/io_streams_test.rb
|
|
167
|
-
- test/line_reader_test.rb
|
|
168
|
-
- test/line_writer_test.rb
|
|
169
|
-
- test/minimal_file_reader.rb
|
|
170
|
-
- test/path_test.rb
|
|
171
|
-
- test/paths/file_test.rb
|
|
172
|
-
- test/paths/http_test.rb
|
|
173
|
-
- test/paths/matcher_test.rb
|
|
174
|
-
- test/paths/s3_test.rb
|
|
175
|
-
- test/paths/sftp_test.rb
|
|
176
|
-
- test/pgp_reader_test.rb
|
|
177
|
-
- test/pgp_test.rb
|
|
178
|
-
- test/pgp_writer_test.rb
|
|
179
|
-
- test/record_reader_test.rb
|
|
180
|
-
- test/record_writer_test.rb
|
|
181
|
-
- test/row_reader_test.rb
|
|
182
|
-
- test/row_writer_test.rb
|
|
183
|
-
- test/stream_test.rb
|
|
184
|
-
- test/tabular_test.rb
|
|
185
|
-
- test/test_helper.rb
|
|
186
|
-
- test/utils_test.rb
|
|
187
|
-
- test/xlsx_reader_test.rb
|
|
188
|
-
- test/zip_reader_test.rb
|
|
189
|
-
- test/zip_writer_test.rb
|
|
102
|
+
summary: 'Streaming I/O for Ruby: compression, encryption, format, and storage transparent
|
|
103
|
+
to your code.'
|
|
104
|
+
test_files: []
|
|
@@ -1,216 +0,0 @@
|
|
|
1
|
-
module IOStreams
|
|
2
|
-
UTF8_ENCODING = Encoding.find("UTF-8").freeze
|
|
3
|
-
BINARY_ENCODING = Encoding.find("BINARY").freeze
|
|
4
|
-
|
|
5
|
-
# Deprecated IOStreams from v0.x. Do not use, will be removed soon.
|
|
6
|
-
module Deprecated
|
|
7
|
-
def self.included(base)
|
|
8
|
-
base.extend ClassMethods
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
module ClassMethods
|
|
12
|
-
# DEPRECATED. Use `#path` or `#io`
|
|
13
|
-
# Examples:
|
|
14
|
-
# IOStreams.path("data.zip").reader { |f| f.read(100) }
|
|
15
|
-
#
|
|
16
|
-
# IOStreams.path(file_name).option(:encode, encoding: "BINARY").reader { |f| f.read(100) }
|
|
17
|
-
#
|
|
18
|
-
# io_stream = StringIO.new("Hello World")
|
|
19
|
-
# IOStreams.stream(io_stream).reader { |f| f.read(100) }
|
|
20
|
-
def reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
|
21
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
22
|
-
path.reader(&block)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# DEPRECATED
|
|
26
|
-
def each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
27
|
-
path = build_path(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
28
|
-
path.each(:line, **args, &block)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# DEPRECATED
|
|
32
|
-
def each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
33
|
-
path = build_path(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
34
|
-
path.each(:array, **args, &block)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# DEPRECATED
|
|
38
|
-
def each_record(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
39
|
-
path = build_path(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
40
|
-
path.each(:hash, **args, &block)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# DEPRECATED. Use `#path` or `#io`
|
|
44
|
-
# Examples:
|
|
45
|
-
# IOStreams.path("data.zip").writer { |f| f.write("Hello World") }
|
|
46
|
-
#
|
|
47
|
-
# IOStreams.path(file_name).option(:encode, encoding: "BINARY").writer { |f| f.write("Hello World") }
|
|
48
|
-
#
|
|
49
|
-
# io_stream = StringIO.new("Hello World")
|
|
50
|
-
# IOStreams.stream(io_stream).writer { |f| f.write("Hello World") }
|
|
51
|
-
def writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
|
52
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
53
|
-
path.writer(&block)
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# DEPRECATED
|
|
57
|
-
def line_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
58
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
59
|
-
path.writer(:line, **args, &block)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# DEPRECATED
|
|
63
|
-
def row_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
64
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
65
|
-
path.writer(:array, **args, &block)
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# DEPRECATED
|
|
69
|
-
def record_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
70
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
71
|
-
path.writer(:hash, **args, &block)
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Copies the source file/stream to the target file/stream.
|
|
75
|
-
# Returns [Integer] the number of bytes copied
|
|
76
|
-
#
|
|
77
|
-
# Example: Copy between 2 files
|
|
78
|
-
# IOStreams.copy('a.csv', 'b.csv')
|
|
79
|
-
#
|
|
80
|
-
# Example: Read content from a Xlsx file and write it out in CSV form.
|
|
81
|
-
# IOStreams.copy('a.xlsx', 'b.csv')
|
|
82
|
-
#
|
|
83
|
-
# Example:
|
|
84
|
-
# # Read content from a JSON file and write it out in CSV form.
|
|
85
|
-
# #
|
|
86
|
-
# # The output header for the CSV file is extracted from the first row in the JSON file.
|
|
87
|
-
# # If the first JSON row does not contain all the column names then they will be ignored
|
|
88
|
-
# # for the rest of the file.
|
|
89
|
-
# IOStreams.copy('a.json', 'b.csv')
|
|
90
|
-
#
|
|
91
|
-
# Example:
|
|
92
|
-
# # Read a PSV file and write out a CSV file from it.
|
|
93
|
-
# IOStreams.copy('a.psv', 'b.csv')
|
|
94
|
-
#
|
|
95
|
-
# Example:
|
|
96
|
-
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
|
97
|
-
# # Since the target file_name already includes `.enc` in the filename, it is automatically
|
|
98
|
-
# # encrypted.
|
|
99
|
-
# IOStreams.copy('a.csv', 'b.csv.enc')
|
|
100
|
-
#
|
|
101
|
-
# Example:
|
|
102
|
-
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
|
103
|
-
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
|
104
|
-
# # the encryption stream is added.
|
|
105
|
-
# IOStreams.copy('a.csv', 'b', target_options: [:enc])
|
|
106
|
-
#
|
|
107
|
-
# Example:
|
|
108
|
-
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
|
109
|
-
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
|
110
|
-
# # the encryption stream is added, along with the optional compression option.
|
|
111
|
-
# IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
|
|
112
|
-
#
|
|
113
|
-
# Example:
|
|
114
|
-
# # Create a pgp encrypted file.
|
|
115
|
-
# # For PGP Encryption the recipients email address is required.
|
|
116
|
-
# IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
|
|
117
|
-
#
|
|
118
|
-
# Example: Copy between 2 existing streams
|
|
119
|
-
# IOStreams.reader('a.csv') do |source_stream|
|
|
120
|
-
# IOStreams.writer('b.csv.enc') do |target_stream|
|
|
121
|
-
# IOStreams.copy(source_stream, target_stream)
|
|
122
|
-
# end
|
|
123
|
-
# end
|
|
124
|
-
#
|
|
125
|
-
# Example:
|
|
126
|
-
# # Copy between 2 csv files, reducing the number of columns present and encrypting the
|
|
127
|
-
# # target file with Symmetric Encryption
|
|
128
|
-
# output_headers = %w[name address]
|
|
129
|
-
# IOStreams.copy(
|
|
130
|
-
# 'a.csv',
|
|
131
|
-
# 'b.csv.enc',
|
|
132
|
-
# target_options: [csv:{headers: output_headers}, enc: {compress: true}]
|
|
133
|
-
# )
|
|
134
|
-
#
|
|
135
|
-
# Example:
|
|
136
|
-
# # Copy a locally encrypted file to AWS S3.
|
|
137
|
-
# # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
|
|
138
|
-
# # Useful for when the entire bucket is encrypted on S3.
|
|
139
|
-
# IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
|
|
140
|
-
def copy(source_file_name_or_io, target_file_name_or_io, buffer_size: nil, source_options: {}, target_options: {})
|
|
141
|
-
# TODO: prevent stream conversions when reader and writer streams are the same!
|
|
142
|
-
reader(source_file_name_or_io, **source_options) do |source_stream|
|
|
143
|
-
writer(target_file_name_or_io, **target_options) do |target_stream|
|
|
144
|
-
IO.copy_stream(source_stream, target_stream)
|
|
145
|
-
end
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
# DEPRECATED
|
|
150
|
-
def reader_stream?(file_name_or_io)
|
|
151
|
-
file_name_or_io.respond_to?(:read)
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# DEPRECATED
|
|
155
|
-
def writer_stream?(file_name_or_io)
|
|
156
|
-
file_name_or_io.respond_to?(:write)
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
# DEPRECATED. Use Path#compressed?
|
|
160
|
-
def compressed?(file_name)
|
|
161
|
-
Path.new(file_name).compressed?
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
# DEPRECATED. Use Path#encrypted?
|
|
165
|
-
def encrypted?(file_name)
|
|
166
|
-
Path.new(file_name).encrypted?
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
# DEPRECATED
|
|
170
|
-
def line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
171
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
172
|
-
path.reader(:line, **args, &block)
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
# DEPRECATED
|
|
176
|
-
def row_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
177
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
178
|
-
path.reader(:line, **args, &block)
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# DEPRECATED
|
|
182
|
-
def record_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
|
183
|
-
path = build_path(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
|
|
184
|
-
path.reader(:hash, **args, &block)
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
private
|
|
188
|
-
|
|
189
|
-
def build_path(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil)
|
|
190
|
-
path = new(file_name_or_io)
|
|
191
|
-
path.file_name(file_name) if file_name
|
|
192
|
-
|
|
193
|
-
apply_old_style_streams(path, streams) if streams
|
|
194
|
-
|
|
195
|
-
if encoding || encode_cleaner || encode_replace
|
|
196
|
-
path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
path
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
# Applies old form streams to the path
|
|
203
|
-
def apply_old_style_streams(path, streams)
|
|
204
|
-
if streams.is_a?(Symbol)
|
|
205
|
-
path.stream(streams)
|
|
206
|
-
elsif streams.is_a?(Array)
|
|
207
|
-
streams.each { |stream| apply_old_style_streams(path, stream) }
|
|
208
|
-
elsif streams.is_a?(Hash)
|
|
209
|
-
streams.each_pair { |stream, options| path.stream(stream, **options) }
|
|
210
|
-
else
|
|
211
|
-
raise ArgumentError, "Invalid old style stream supplied: #{params.inspect}"
|
|
212
|
-
end
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
end
|
|
216
|
-
end
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
require "csv"
|
|
2
|
-
module IOStreams
|
|
3
|
-
class Tabular
|
|
4
|
-
module Utility
|
|
5
|
-
# For parsing a single line of CSV at a time
|
|
6
|
-
# 2 to 3 times better performance than CSV.parse_line and considerably less
|
|
7
|
-
# garbage collection required.
|
|
8
|
-
#
|
|
9
|
-
# Note: Only used prior to Ruby 2.6
|
|
10
|
-
class CSVRow < ::CSV
|
|
11
|
-
UTF8_ENCODING = Encoding.find("UTF-8").freeze
|
|
12
|
-
|
|
13
|
-
def initialize(encoding = UTF8_ENCODING)
|
|
14
|
-
@io = StringIO.new("".force_encoding(encoding))
|
|
15
|
-
super(@io, row_sep: "")
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# Parse a single line of CSV data
|
|
19
|
-
# Parameters
|
|
20
|
-
# line [String]
|
|
21
|
-
# A single line of CSV data without any line terminators
|
|
22
|
-
def parse(line)
|
|
23
|
-
return if IOStreams::Utils.blank?(line)
|
|
24
|
-
return if @skip_lines&.match(line)
|
|
25
|
-
|
|
26
|
-
in_extended_col = false
|
|
27
|
-
csv = []
|
|
28
|
-
parts = line.split(@col_sep, -1)
|
|
29
|
-
csv << nil if parts.empty?
|
|
30
|
-
|
|
31
|
-
# This loop is the hot path of csv parsing. Some things may be non-dry
|
|
32
|
-
# for a reason. Make sure to benchmark when refactoring.
|
|
33
|
-
parts.each do |part|
|
|
34
|
-
if in_extended_col
|
|
35
|
-
# If we are continuing a previous column
|
|
36
|
-
if part[-1] == @quote_char && part.count(@quote_char).odd?
|
|
37
|
-
# extended column ends
|
|
38
|
-
csv.last << part[0..-2]
|
|
39
|
-
raise MalformedCSVError, "Missing or stray quote in line #{lineno + 1}" if csv.last =~ @parsers[:stray_quote]
|
|
40
|
-
|
|
41
|
-
csv.last.gsub!(@quote_char * 2, @quote_char)
|
|
42
|
-
in_extended_col = false
|
|
43
|
-
else
|
|
44
|
-
csv.last << part
|
|
45
|
-
csv.last << @col_sep
|
|
46
|
-
end
|
|
47
|
-
elsif part[0] == @quote_char
|
|
48
|
-
# If we are starting a new quoted column
|
|
49
|
-
if part[-1] != @quote_char || part.count(@quote_char).odd?
|
|
50
|
-
# start an extended column
|
|
51
|
-
csv << part[1..-1]
|
|
52
|
-
csv.last << @col_sep
|
|
53
|
-
in_extended_col = true
|
|
54
|
-
else
|
|
55
|
-
# regular quoted column
|
|
56
|
-
csv << part[1..-2]
|
|
57
|
-
raise MalformedCSVError, "Missing or stray quote in line #{lineno + 1}" if csv.last =~ @parsers[:stray_quote]
|
|
58
|
-
|
|
59
|
-
csv.last.gsub!(@quote_char * 2, @quote_char)
|
|
60
|
-
end
|
|
61
|
-
elsif part =~ @parsers[:quote_or_nl]
|
|
62
|
-
# Unquoted field with bad characters.
|
|
63
|
-
if part =~ @parsers[:nl_or_lf]
|
|
64
|
-
raise MalformedCSVError, "Unquoted fields do not allow \\r or \\n (line #{lineno + 1})."
|
|
65
|
-
else
|
|
66
|
-
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
|
|
67
|
-
end
|
|
68
|
-
else
|
|
69
|
-
# Regular ole unquoted field.
|
|
70
|
-
csv << (part.empty? ? nil : part)
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
|
75
|
-
# column.
|
|
76
|
-
csv[-1][-1] = @row_sep if in_extended_col
|
|
77
|
-
|
|
78
|
-
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}." if in_extended_col
|
|
79
|
-
|
|
80
|
-
@lineno += 1
|
|
81
|
-
|
|
82
|
-
# save fields unconverted fields, if needed...
|
|
83
|
-
unconverted = csv.dup if @unconverted_fields
|
|
84
|
-
|
|
85
|
-
# convert fields, if needed...
|
|
86
|
-
csv = convert_fields(csv) unless @use_headers || @converters.empty?
|
|
87
|
-
# parse out header rows and handle CSV::Row conversions...
|
|
88
|
-
csv = parse_headers(csv) if @use_headers
|
|
89
|
-
|
|
90
|
-
# inject unconverted fields and accessor, if requested...
|
|
91
|
-
add_unconverted_fields(csv, unconverted) if @unconverted_fields && (!csv.respond_to? :unconverted_fields)
|
|
92
|
-
|
|
93
|
-
csv
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
# Return the supplied array as a single line CSV string.
|
|
97
|
-
def render(row)
|
|
98
|
-
row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
alias to_csv render
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|