redshift-connector-data_file 7.0.0 → 7.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift_connector/abstract_data_file.rb +15 -8
- data/lib/redshift_connector/abstract_data_file_bundle.rb +11 -1
- data/lib/redshift_connector/reader/redshift_csv.rb +2 -1
- data/lib/redshift_connector/s3_data_file.rb +2 -2
- data/lib/redshift_connector/s3_data_file_bundle.rb +3 -4
- data/lib/redshift_connector/url_data_file.rb +7 -6
- data/lib/redshift_connector/url_data_file_bundle.rb +8 -11
- data/redshift-connector-data_file.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f242fc490379db815a588378910c758a3aa2d84
|
4
|
+
data.tar.gz: 2a315d9407eb66562b32d83993ba577a5b5063ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141660793595766a703b5bd798ee4a61c8f9dab6db02ea3084d7b9f5eb92305bf2ad7c6d3164a55eb9dfa5797593675ef161d4789509d1b35928bda840825bc4
|
7
|
+
data.tar.gz: dcfebdb474cd2f78be55f47d4afb2e2488bd3e43f72f88fc3806bcff80ab24619eb0afc4b054845d3f57d4961897b753b32b62aca0c9b99a1d883c3b4387cc08
|
@@ -2,17 +2,24 @@ require 'zlib'
|
|
2
2
|
|
3
3
|
module RedshiftConnector
|
4
4
|
class AbstractDataFile
|
5
|
+
def initialize(reader_class:)
|
6
|
+
@reader_class = reader_class
|
7
|
+
end
|
8
|
+
|
5
9
|
def each_row(&block)
|
6
|
-
f =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
f = open
|
11
|
+
begin
|
12
|
+
if gzipped_object?
|
13
|
+
f = Zlib::GzipReader.new(f)
|
14
|
+
end
|
15
|
+
@reader_class.new(f).each(&block)
|
16
|
+
ensure
|
17
|
+
f.close
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
21
|
+
# abstract open
|
22
|
+
|
16
23
|
def data_object?
|
17
24
|
@reader_class.data_object?(key)
|
18
25
|
end
|
@@ -1,5 +1,14 @@
|
|
1
1
|
module RedshiftConnector
|
2
2
|
class AbstractDataFileBundle
|
3
|
+
def initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
4
|
+
@filter = filter || lambda {|*row| row }
|
5
|
+
@batch_size = batch_size || 1000
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :batch_size
|
10
|
+
attr_reader :logger
|
11
|
+
|
3
12
|
def each_row(&block)
|
4
13
|
each_object do |obj|
|
5
14
|
obj.each_row(&block)
|
@@ -19,10 +28,11 @@ module RedshiftConnector
|
|
19
28
|
data_files.select {|obj| obj.data_object? }
|
20
29
|
end
|
21
30
|
|
31
|
+
# abstract data_files
|
32
|
+
|
22
33
|
REPORT_SIZE = 10_0000
|
23
34
|
|
24
35
|
def each_batch(report: true)
|
25
|
-
@logger.info "reader: #{@reader_class}"
|
26
36
|
n = 0
|
27
37
|
reported = 0
|
28
38
|
do_each_batch(@batch_size) do |rows|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'redshift_connector/reader/abstract'
|
2
2
|
require 'redshift_connector/reader/exception'
|
3
|
+
require 'strscan'
|
3
4
|
|
4
5
|
module RedshiftConnector
|
5
6
|
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
@@ -29,7 +30,7 @@ module RedshiftConnector
|
|
29
30
|
s = StringScanner.new(line)
|
30
31
|
s.skip(/\s+/)
|
31
32
|
until s.eos?
|
32
|
-
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
|
+
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
34
|
row.push unescape_column(col)
|
34
35
|
s.skip(/\s*/) # skip line terminator on line ends
|
35
36
|
s.skip(/,\s*/)
|
@@ -3,15 +3,15 @@ require 'redshift_connector/data_file'
|
|
3
3
|
module RedshiftConnector
|
4
4
|
class S3DataFile < AbstractDataFile
|
5
5
|
def initialize(object, reader_class:)
|
6
|
+
super reader_class: reader_class
|
6
7
|
@object = object
|
7
|
-
@reader_class = reader_class
|
8
8
|
end
|
9
9
|
|
10
10
|
def key
|
11
11
|
@object.key
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def open
|
15
15
|
@object.get.body
|
16
16
|
end
|
17
17
|
|
@@ -32,13 +32,12 @@ module RedshiftConnector
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
35
|
+
super filter: filter, batch_size: batch_size, logger: logger
|
35
36
|
@bucket = bucket
|
36
37
|
@prefix = prefix
|
37
38
|
@format = format
|
38
|
-
@filter = filter || lambda {|*row| row }
|
39
|
-
@batch_size = batch_size
|
40
|
-
@logger = logger
|
41
39
|
@reader_class = Reader.get(format)
|
40
|
+
logger.info "reader: #{@reader_class}"
|
42
41
|
end
|
43
42
|
|
44
43
|
attr_reader :bucket
|
@@ -61,7 +60,7 @@ module RedshiftConnector
|
|
61
60
|
pref = File.dirname(@prefix) + '/'
|
62
61
|
keys = @bucket.objects(prefix: pref).map(&:key)
|
63
62
|
unless keys.empty?
|
64
|
-
|
63
|
+
logger.info "DELETE #{pref}*"
|
65
64
|
@bucket.delete_objects(keys)
|
66
65
|
end
|
67
66
|
end
|
@@ -1,22 +1,23 @@
|
|
1
1
|
require 'redshift_connector/abstract_data_file'
|
2
|
-
require 'uri'
|
3
|
-
require 'zlib'
|
4
2
|
require 'open3'
|
5
3
|
|
6
4
|
module RedshiftConnector
|
7
5
|
class UrlDataFile < AbstractDataFile
|
8
6
|
def initialize(url, reader_class:)
|
7
|
+
super reader_class: reader_class
|
9
8
|
@url = url
|
10
|
-
@reader_class = reader_class
|
11
9
|
end
|
12
10
|
|
11
|
+
attr_reader :url
|
12
|
+
|
13
13
|
def key
|
14
|
-
|
14
|
+
@url.path
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
18
|
-
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
17
|
+
def open
|
18
|
+
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url.to_s)
|
19
19
|
stdin.close
|
20
|
+
stderr.close
|
20
21
|
stdout
|
21
22
|
end
|
22
23
|
end
|
@@ -2,21 +2,18 @@ require 'redshift_connector/reader'
|
|
2
2
|
require 'redshift_connector/abstract_data_file_bundle'
|
3
3
|
require 'redshift_connector/url_data_file'
|
4
4
|
require 'redshift_connector/logger'
|
5
|
+
require 'uri'
|
5
6
|
|
6
7
|
module RedshiftConnector
|
7
8
|
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@
|
13
|
-
|
9
|
+
def initialize(urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
10
|
+
super filter: filter, batch_size: 1000, logger: logger
|
11
|
+
reader_class = Reader.get(format)
|
12
|
+
raise ArgumentError, 'no URL given' if urls.empty?
|
13
|
+
@data_files = urls.map {|url| UrlDataFile.new(URI.parse(url), reader_class: reader_class) }
|
14
|
+
logger.info "reader: #{reader_class}"
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
-
@data_file_urls.map do |url|
|
18
|
-
UrlDataFile.new(url, reader_class: @reader_class)
|
19
|
-
end
|
20
|
-
end
|
17
|
+
attr_reader :data_files
|
21
18
|
end
|
22
19
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector-data_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.
|
4
|
+
version: 7.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidekazu Kobayashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
version: '0'
|
133
133
|
requirements: []
|
134
134
|
rubyforge_project:
|
135
|
-
rubygems_version: 2.6.
|
135
|
+
rubygems_version: 2.6.11
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: Utility classes for exported data files from Redshift
|