redshift-connector 4.3.1 → 4.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift-connector/abstract_data_file.rb +24 -0
- data/lib/redshift-connector/abstract_data_file_bundle.rb +22 -0
- data/lib/redshift-connector/reader/csv.rb +2 -2
- data/lib/redshift-connector/reader/redshift_csv.rb +2 -2
- data/lib/redshift-connector/reader/tsv.rb +2 -2
- data/lib/redshift-connector/s3_data_file.rb +5 -19
- data/lib/redshift-connector/s3_data_file_bundle.rb +3 -18
- data/lib/redshift-connector/url_data_file.rb +23 -0
- data/lib/redshift-connector/url_data_file_bundle.rb +21 -0
- data/lib/redshift-connector/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b12a5bb9e0027d0251a3d3e98766d60c1a7a2bf1
|
4
|
+
data.tar.gz: 2e27ed62452041a2dd6bac8471014bdbf4702ef6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ff4c494a726ae8749e1f3e3e13bfa070908e79aba4c037742117b5afb500a27ed84474d43a65ea6a2d57b12bfdf379b6126c3a1d76bf3962e625bee419efea7
|
7
|
+
data.tar.gz: 702273bc9689811b99345cd4fa685dd0916252a6695e462201323b77224809deb95dfc9a663076b9a54b1dd4ca1c7abc724ff29faf29d759ad4f9559ec400f77
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class AbstractDataFile
|
5
|
+
def each_row(&block)
|
6
|
+
f = if gzipped_object?
|
7
|
+
Zlib::GzipReader.new(content)
|
8
|
+
else
|
9
|
+
content
|
10
|
+
end
|
11
|
+
@reader_class.new(f).each(&block)
|
12
|
+
ensure
|
13
|
+
content.close
|
14
|
+
end
|
15
|
+
|
16
|
+
def data_object?
|
17
|
+
@reader_class.data_object?(key)
|
18
|
+
end
|
19
|
+
|
20
|
+
def gzipped_object?
|
21
|
+
File.extname(key) == '.gz'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
class AbstractDataFileBundle
|
3
|
+
def each_row(&block)
|
4
|
+
each_object do |obj|
|
5
|
+
obj.each_row(&block)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
alias each each_row
|
10
|
+
|
11
|
+
def each_object(&block)
|
12
|
+
all_data_objects.each do |obj|
|
13
|
+
@logger.info "processing s3 object: #{obj.key}"
|
14
|
+
yield obj
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def all_data_objects
|
19
|
+
data_files.select {|obj| obj.data_object? }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -8,8 +8,8 @@ module RedshiftConnector
|
|
8
8
|
class Reader::CSV < Reader::Abstract
|
9
9
|
declare_reader :csv
|
10
10
|
|
11
|
-
def self.data_object?(
|
12
|
-
/\.csv(?:\.|\z)/ =~ File.basename(
|
11
|
+
def self.data_object?(key)
|
12
|
+
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
13
13
|
end
|
14
14
|
|
15
15
|
def initialize(f)
|
@@ -7,8 +7,8 @@ module RedshiftConnector
|
|
7
7
|
class Reader::RedshiftCSV < Reader::Abstract
|
8
8
|
declare_reader :redshift_csv
|
9
9
|
|
10
|
-
def self.data_object?(
|
11
|
-
/\.csv(?:\.|\z)/ =~ File.basename(
|
10
|
+
def self.data_object?(key)
|
11
|
+
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
12
12
|
end
|
13
13
|
|
14
14
|
# f :: IO
|
@@ -7,8 +7,8 @@ module RedshiftConnector
|
|
7
7
|
class Reader::TSV < Reader::Abstract
|
8
8
|
declare_reader :tsv
|
9
9
|
|
10
|
-
def self.data_object?(
|
11
|
-
/\.tsv(?:\.|\z)/ =~ File.basename(
|
10
|
+
def self.data_object?(key)
|
11
|
+
/\.tsv(?:\.|\z)/ =~ File.basename(key)
|
12
12
|
end
|
13
13
|
|
14
14
|
def initialize(f)
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require '
|
1
|
+
require 'redshift-connector/abstract_data_file'
|
2
2
|
|
3
3
|
module RedshiftConnector
|
4
|
-
class S3DataFile
|
4
|
+
class S3DataFile < AbstractDataFile
|
5
5
|
def initialize(object, reader_class:)
|
6
6
|
@object = object
|
7
7
|
@reader_class = reader_class
|
@@ -11,24 +11,10 @@ module RedshiftConnector
|
|
11
11
|
@object.key
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
15
|
-
|
16
|
-
f = if gzipped_object?
|
17
|
-
Zlib::GzipReader.new(response.body)
|
18
|
-
else
|
19
|
-
response.body
|
20
|
-
end
|
21
|
-
@reader_class.new(f).each(&block)
|
22
|
-
ensure
|
23
|
-
response.body.close if response
|
14
|
+
def content
|
15
|
+
@object.get.body
|
24
16
|
end
|
25
17
|
|
26
|
-
|
27
|
-
@reader_class.data_object?(@object)
|
28
|
-
end
|
29
|
-
|
30
|
-
def gzipped_object?
|
31
|
-
File.extname(@object.key) == '.gz'
|
32
|
-
end
|
18
|
+
delegate :presigned_url, to: :@object
|
33
19
|
end
|
34
20
|
end
|
@@ -2,10 +2,11 @@ require 'redshift-connector/s3_bucket'
|
|
2
2
|
require 'redshift-connector/s3_data_file'
|
3
3
|
require 'redshift-connector/reader'
|
4
4
|
require 'redshift-connector/logger'
|
5
|
+
require 'redshift-connector/abstract_data_file_bundle'
|
5
6
|
require 'aws-sdk'
|
6
7
|
|
7
8
|
module RedshiftConnector
|
8
|
-
class S3DataFileBundle
|
9
|
+
class S3DataFileBundle < AbstractDataFileBundle
|
9
10
|
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
10
11
|
real_prefix = "#{bucket.prefix}/#{prefix}"
|
11
12
|
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
@@ -68,25 +69,9 @@ module RedshiftConnector
|
|
68
69
|
end
|
69
70
|
private :do_each_batch
|
70
71
|
|
71
|
-
def
|
72
|
-
each_object do |obj|
|
73
|
-
obj.each_row(&block)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
alias each each_row
|
78
|
-
|
79
|
-
def each_object(&block)
|
80
|
-
all_data_objects.each do |obj|
|
81
|
-
@logger.info "processing s3 object: #{obj.key}"
|
82
|
-
yield obj
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def all_data_objects
|
72
|
+
def data_files
|
87
73
|
@bucket.objects(prefix: @prefix)
|
88
74
|
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
89
|
-
.select {|obj| obj.data_object? }
|
90
75
|
end
|
91
76
|
|
92
77
|
def clear
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'redshift-connector/abstract_data_file'
|
2
|
+
require 'uri'
|
3
|
+
require 'zlib'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class UrlDataFile < AbstractDataFile
|
8
|
+
def initialize(url, reader_class:)
|
9
|
+
@url = url
|
10
|
+
@reader_class = reader_class
|
11
|
+
end
|
12
|
+
|
13
|
+
def key
|
14
|
+
URI.parse(@url).path
|
15
|
+
end
|
16
|
+
|
17
|
+
def content
|
18
|
+
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
19
|
+
stdin.close
|
20
|
+
stdout
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'redshift-connector/reader'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
require 'redshift-connector/abstract_data_file_bundle'
|
4
|
+
require 'redshift-connector/url_data_file'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
+
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
|
+
@data_file_urls = data_file_urls
|
10
|
+
@filter = filter || lambda {|*row| row }
|
11
|
+
@logger = logger
|
12
|
+
@reader_class = Reader.get(format)
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_files
|
16
|
+
@data_file_urls.map do |url|
|
17
|
+
UrlDataFile.new(url, reader_class: @reader_class)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.3.
|
4
|
+
version: 4.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -130,6 +130,8 @@ extra_rdoc_files: []
|
|
130
130
|
files:
|
131
131
|
- README.md
|
132
132
|
- lib/redshift-connector.rb
|
133
|
+
- lib/redshift-connector/abstract_data_file.rb
|
134
|
+
- lib/redshift-connector/abstract_data_file_bundle.rb
|
133
135
|
- lib/redshift-connector/connector.rb
|
134
136
|
- lib/redshift-connector/exporter.rb
|
135
137
|
- lib/redshift-connector/importer.rb
|
@@ -149,6 +151,8 @@ files:
|
|
149
151
|
- lib/redshift-connector/s3_bucket.rb
|
150
152
|
- lib/redshift-connector/s3_data_file.rb
|
151
153
|
- lib/redshift-connector/s3_data_file_bundle.rb
|
154
|
+
- lib/redshift-connector/url_data_file.rb
|
155
|
+
- lib/redshift-connector/url_data_file_bundle.rb
|
152
156
|
- lib/redshift-connector/version.rb
|
153
157
|
- test/all.rb
|
154
158
|
- test/config.rb
|