redshift-connector-data_file 7.0.0 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/redshift_connector/abstract_data_file.rb +15 -8
- data/lib/redshift_connector/abstract_data_file_bundle.rb +11 -1
- data/lib/redshift_connector/reader/redshift_csv.rb +2 -1
- data/lib/redshift_connector/s3_data_file.rb +2 -2
- data/lib/redshift_connector/s3_data_file_bundle.rb +3 -4
- data/lib/redshift_connector/url_data_file.rb +7 -6
- data/lib/redshift_connector/url_data_file_bundle.rb +8 -11
- data/redshift-connector-data_file.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f242fc490379db815a588378910c758a3aa2d84
|
4
|
+
data.tar.gz: 2a315d9407eb66562b32d83993ba577a5b5063ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141660793595766a703b5bd798ee4a61c8f9dab6db02ea3084d7b9f5eb92305bf2ad7c6d3164a55eb9dfa5797593675ef161d4789509d1b35928bda840825bc4
|
7
|
+
data.tar.gz: dcfebdb474cd2f78be55f47d4afb2e2488bd3e43f72f88fc3806bcff80ab24619eb0afc4b054845d3f57d4961897b753b32b62aca0c9b99a1d883c3b4387cc08
|
@@ -2,17 +2,24 @@ require 'zlib'
|
|
2
2
|
|
3
3
|
module RedshiftConnector
|
4
4
|
class AbstractDataFile
|
5
|
+
def initialize(reader_class:)
|
6
|
+
@reader_class = reader_class
|
7
|
+
end
|
8
|
+
|
5
9
|
def each_row(&block)
|
6
|
-
f =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
f = open
|
11
|
+
begin
|
12
|
+
if gzipped_object?
|
13
|
+
f = Zlib::GzipReader.new(f)
|
14
|
+
end
|
15
|
+
@reader_class.new(f).each(&block)
|
16
|
+
ensure
|
17
|
+
f.close
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
21
|
+
# abstract open
|
22
|
+
|
16
23
|
def data_object?
|
17
24
|
@reader_class.data_object?(key)
|
18
25
|
end
|
@@ -1,5 +1,14 @@
|
|
1
1
|
module RedshiftConnector
|
2
2
|
class AbstractDataFileBundle
|
3
|
+
def initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
4
|
+
@filter = filter || lambda {|*row| row }
|
5
|
+
@batch_size = batch_size || 1000
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :batch_size
|
10
|
+
attr_reader :logger
|
11
|
+
|
3
12
|
def each_row(&block)
|
4
13
|
each_object do |obj|
|
5
14
|
obj.each_row(&block)
|
@@ -19,10 +28,11 @@ module RedshiftConnector
|
|
19
28
|
data_files.select {|obj| obj.data_object? }
|
20
29
|
end
|
21
30
|
|
31
|
+
# abstract data_files
|
32
|
+
|
22
33
|
REPORT_SIZE = 10_0000
|
23
34
|
|
24
35
|
def each_batch(report: true)
|
25
|
-
@logger.info "reader: #{@reader_class}"
|
26
36
|
n = 0
|
27
37
|
reported = 0
|
28
38
|
do_each_batch(@batch_size) do |rows|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'redshift_connector/reader/abstract'
|
2
2
|
require 'redshift_connector/reader/exception'
|
3
|
+
require 'strscan'
|
3
4
|
|
4
5
|
module RedshiftConnector
|
5
6
|
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
@@ -29,7 +30,7 @@ module RedshiftConnector
|
|
29
30
|
s = StringScanner.new(line)
|
30
31
|
s.skip(/\s+/)
|
31
32
|
until s.eos?
|
32
|
-
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
|
+
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
34
|
row.push unescape_column(col)
|
34
35
|
s.skip(/\s*/) # skip line terminator on line ends
|
35
36
|
s.skip(/,\s*/)
|
@@ -3,15 +3,15 @@ require 'redshift_connector/data_file'
|
|
3
3
|
module RedshiftConnector
|
4
4
|
class S3DataFile < AbstractDataFile
|
5
5
|
def initialize(object, reader_class:)
|
6
|
+
super reader_class: reader_class
|
6
7
|
@object = object
|
7
|
-
@reader_class = reader_class
|
8
8
|
end
|
9
9
|
|
10
10
|
def key
|
11
11
|
@object.key
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def open
|
15
15
|
@object.get.body
|
16
16
|
end
|
17
17
|
|
@@ -32,13 +32,12 @@ module RedshiftConnector
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
35
|
+
super filter: filter, batch_size: batch_size, logger: logger
|
35
36
|
@bucket = bucket
|
36
37
|
@prefix = prefix
|
37
38
|
@format = format
|
38
|
-
@filter = filter || lambda {|*row| row }
|
39
|
-
@batch_size = batch_size
|
40
|
-
@logger = logger
|
41
39
|
@reader_class = Reader.get(format)
|
40
|
+
logger.info "reader: #{@reader_class}"
|
42
41
|
end
|
43
42
|
|
44
43
|
attr_reader :bucket
|
@@ -61,7 +60,7 @@ module RedshiftConnector
|
|
61
60
|
pref = File.dirname(@prefix) + '/'
|
62
61
|
keys = @bucket.objects(prefix: pref).map(&:key)
|
63
62
|
unless keys.empty?
|
64
|
-
|
63
|
+
logger.info "DELETE #{pref}*"
|
65
64
|
@bucket.delete_objects(keys)
|
66
65
|
end
|
67
66
|
end
|
@@ -1,22 +1,23 @@
|
|
1
1
|
require 'redshift_connector/abstract_data_file'
|
2
|
-
require 'uri'
|
3
|
-
require 'zlib'
|
4
2
|
require 'open3'
|
5
3
|
|
6
4
|
module RedshiftConnector
|
7
5
|
class UrlDataFile < AbstractDataFile
|
8
6
|
def initialize(url, reader_class:)
|
7
|
+
super reader_class: reader_class
|
9
8
|
@url = url
|
10
|
-
@reader_class = reader_class
|
11
9
|
end
|
12
10
|
|
11
|
+
attr_reader :url
|
12
|
+
|
13
13
|
def key
|
14
|
-
|
14
|
+
@url.path
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
18
|
-
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
17
|
+
def open
|
18
|
+
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url.to_s)
|
19
19
|
stdin.close
|
20
|
+
stderr.close
|
20
21
|
stdout
|
21
22
|
end
|
22
23
|
end
|
@@ -2,21 +2,18 @@ require 'redshift_connector/reader'
|
|
2
2
|
require 'redshift_connector/abstract_data_file_bundle'
|
3
3
|
require 'redshift_connector/url_data_file'
|
4
4
|
require 'redshift_connector/logger'
|
5
|
+
require 'uri'
|
5
6
|
|
6
7
|
module RedshiftConnector
|
7
8
|
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@
|
13
|
-
|
9
|
+
def initialize(urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
10
|
+
super filter: filter, batch_size: 1000, logger: logger
|
11
|
+
reader_class = Reader.get(format)
|
12
|
+
raise ArgumentError, 'no URL given' if urls.empty?
|
13
|
+
@data_files = urls.map {|url| UrlDataFile.new(URI.parse(url), reader_class: reader_class) }
|
14
|
+
logger.info "reader: #{reader_class}"
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
-
@data_file_urls.map do |url|
|
18
|
-
UrlDataFile.new(url, reader_class: @reader_class)
|
19
|
-
end
|
20
|
-
end
|
17
|
+
attr_reader :data_files
|
21
18
|
end
|
22
19
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector-data_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.
|
4
|
+
version: 7.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidekazu Kobayashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
version: '0'
|
133
133
|
requirements: []
|
134
134
|
rubyforge_project:
|
135
|
-
rubygems_version: 2.6.
|
135
|
+
rubygems_version: 2.6.11
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: Utility classes for exported data files from Redshift
|