redshift-connector 4.3.1 → 4.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/redshift-connector/abstract_data_file.rb +24 -0
- data/lib/redshift-connector/abstract_data_file_bundle.rb +22 -0
- data/lib/redshift-connector/reader/csv.rb +2 -2
- data/lib/redshift-connector/reader/redshift_csv.rb +2 -2
- data/lib/redshift-connector/reader/tsv.rb +2 -2
- data/lib/redshift-connector/s3_data_file.rb +5 -19
- data/lib/redshift-connector/s3_data_file_bundle.rb +3 -18
- data/lib/redshift-connector/url_data_file.rb +23 -0
- data/lib/redshift-connector/url_data_file_bundle.rb +21 -0
- data/lib/redshift-connector/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b12a5bb9e0027d0251a3d3e98766d60c1a7a2bf1
|
4
|
+
data.tar.gz: 2e27ed62452041a2dd6bac8471014bdbf4702ef6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ff4c494a726ae8749e1f3e3e13bfa070908e79aba4c037742117b5afb500a27ed84474d43a65ea6a2d57b12bfdf379b6126c3a1d76bf3962e625bee419efea7
|
7
|
+
data.tar.gz: 702273bc9689811b99345cd4fa685dd0916252a6695e462201323b77224809deb95dfc9a663076b9a54b1dd4ca1c7abc724ff29faf29d759ad4f9559ec400f77
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class AbstractDataFile
|
5
|
+
def each_row(&block)
|
6
|
+
f = if gzipped_object?
|
7
|
+
Zlib::GzipReader.new(content)
|
8
|
+
else
|
9
|
+
content
|
10
|
+
end
|
11
|
+
@reader_class.new(f).each(&block)
|
12
|
+
ensure
|
13
|
+
content.close
|
14
|
+
end
|
15
|
+
|
16
|
+
def data_object?
|
17
|
+
@reader_class.data_object?(key)
|
18
|
+
end
|
19
|
+
|
20
|
+
def gzipped_object?
|
21
|
+
File.extname(key) == '.gz'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
class AbstractDataFileBundle
|
3
|
+
def each_row(&block)
|
4
|
+
each_object do |obj|
|
5
|
+
obj.each_row(&block)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
alias each each_row
|
10
|
+
|
11
|
+
def each_object(&block)
|
12
|
+
all_data_objects.each do |obj|
|
13
|
+
@logger.info "processing s3 object: #{obj.key}"
|
14
|
+
yield obj
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def all_data_objects
|
19
|
+
data_files.select {|obj| obj.data_object? }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -8,8 +8,8 @@ module RedshiftConnector
|
|
8
8
|
class Reader::CSV < Reader::Abstract
|
9
9
|
declare_reader :csv
|
10
10
|
|
11
|
-
def self.data_object?(
|
12
|
-
/\.csv(?:\.|\z)/ =~ File.basename(
|
11
|
+
def self.data_object?(key)
|
12
|
+
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
13
13
|
end
|
14
14
|
|
15
15
|
def initialize(f)
|
@@ -7,8 +7,8 @@ module RedshiftConnector
|
|
7
7
|
class Reader::RedshiftCSV < Reader::Abstract
|
8
8
|
declare_reader :redshift_csv
|
9
9
|
|
10
|
-
def self.data_object?(
|
11
|
-
/\.csv(?:\.|\z)/ =~ File.basename(
|
10
|
+
def self.data_object?(key)
|
11
|
+
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
12
12
|
end
|
13
13
|
|
14
14
|
# f :: IO
|
@@ -7,8 +7,8 @@ module RedshiftConnector
|
|
7
7
|
class Reader::TSV < Reader::Abstract
|
8
8
|
declare_reader :tsv
|
9
9
|
|
10
|
-
def self.data_object?(
|
11
|
-
/\.tsv(?:\.|\z)/ =~ File.basename(
|
10
|
+
def self.data_object?(key)
|
11
|
+
/\.tsv(?:\.|\z)/ =~ File.basename(key)
|
12
12
|
end
|
13
13
|
|
14
14
|
def initialize(f)
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require '
|
1
|
+
require 'redshift-connector/abstract_data_file'
|
2
2
|
|
3
3
|
module RedshiftConnector
|
4
|
-
class S3DataFile
|
4
|
+
class S3DataFile < AbstractDataFile
|
5
5
|
def initialize(object, reader_class:)
|
6
6
|
@object = object
|
7
7
|
@reader_class = reader_class
|
@@ -11,24 +11,10 @@ module RedshiftConnector
|
|
11
11
|
@object.key
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
15
|
-
|
16
|
-
f = if gzipped_object?
|
17
|
-
Zlib::GzipReader.new(response.body)
|
18
|
-
else
|
19
|
-
response.body
|
20
|
-
end
|
21
|
-
@reader_class.new(f).each(&block)
|
22
|
-
ensure
|
23
|
-
response.body.close if response
|
14
|
+
def content
|
15
|
+
@object.get.body
|
24
16
|
end
|
25
17
|
|
26
|
-
|
27
|
-
@reader_class.data_object?(@object)
|
28
|
-
end
|
29
|
-
|
30
|
-
def gzipped_object?
|
31
|
-
File.extname(@object.key) == '.gz'
|
32
|
-
end
|
18
|
+
delegate :presigned_url, to: :@object
|
33
19
|
end
|
34
20
|
end
|
@@ -2,10 +2,11 @@ require 'redshift-connector/s3_bucket'
|
|
2
2
|
require 'redshift-connector/s3_data_file'
|
3
3
|
require 'redshift-connector/reader'
|
4
4
|
require 'redshift-connector/logger'
|
5
|
+
require 'redshift-connector/abstract_data_file_bundle'
|
5
6
|
require 'aws-sdk'
|
6
7
|
|
7
8
|
module RedshiftConnector
|
8
|
-
class S3DataFileBundle
|
9
|
+
class S3DataFileBundle < AbstractDataFileBundle
|
9
10
|
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
10
11
|
real_prefix = "#{bucket.prefix}/#{prefix}"
|
11
12
|
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
@@ -68,25 +69,9 @@ module RedshiftConnector
|
|
68
69
|
end
|
69
70
|
private :do_each_batch
|
70
71
|
|
71
|
-
def
|
72
|
-
each_object do |obj|
|
73
|
-
obj.each_row(&block)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
alias each each_row
|
78
|
-
|
79
|
-
def each_object(&block)
|
80
|
-
all_data_objects.each do |obj|
|
81
|
-
@logger.info "processing s3 object: #{obj.key}"
|
82
|
-
yield obj
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def all_data_objects
|
72
|
+
def data_files
|
87
73
|
@bucket.objects(prefix: @prefix)
|
88
74
|
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
89
|
-
.select {|obj| obj.data_object? }
|
90
75
|
end
|
91
76
|
|
92
77
|
def clear
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'redshift-connector/abstract_data_file'
|
2
|
+
require 'uri'
|
3
|
+
require 'zlib'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class UrlDataFile < AbstractDataFile
|
8
|
+
def initialize(url, reader_class:)
|
9
|
+
@url = url
|
10
|
+
@reader_class = reader_class
|
11
|
+
end
|
12
|
+
|
13
|
+
def key
|
14
|
+
URI.parse(@url).path
|
15
|
+
end
|
16
|
+
|
17
|
+
def content
|
18
|
+
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
19
|
+
stdin.close
|
20
|
+
stdout
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'redshift-connector/reader'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
require 'redshift-connector/abstract_data_file_bundle'
|
4
|
+
require 'redshift-connector/url_data_file'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
+
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
|
+
@data_file_urls = data_file_urls
|
10
|
+
@filter = filter || lambda {|*row| row }
|
11
|
+
@logger = logger
|
12
|
+
@reader_class = Reader.get(format)
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_files
|
16
|
+
@data_file_urls.map do |url|
|
17
|
+
UrlDataFile.new(url, reader_class: @reader_class)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.3.
|
4
|
+
version: 4.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -130,6 +130,8 @@ extra_rdoc_files: []
|
|
130
130
|
files:
|
131
131
|
- README.md
|
132
132
|
- lib/redshift-connector.rb
|
133
|
+
- lib/redshift-connector/abstract_data_file.rb
|
134
|
+
- lib/redshift-connector/abstract_data_file_bundle.rb
|
133
135
|
- lib/redshift-connector/connector.rb
|
134
136
|
- lib/redshift-connector/exporter.rb
|
135
137
|
- lib/redshift-connector/importer.rb
|
@@ -149,6 +151,8 @@ files:
|
|
149
151
|
- lib/redshift-connector/s3_bucket.rb
|
150
152
|
- lib/redshift-connector/s3_data_file.rb
|
151
153
|
- lib/redshift-connector/s3_data_file_bundle.rb
|
154
|
+
- lib/redshift-connector/url_data_file.rb
|
155
|
+
- lib/redshift-connector/url_data_file_bundle.rb
|
152
156
|
- lib/redshift-connector/version.rb
|
153
157
|
- test/all.rb
|
154
158
|
- test/config.rb
|