redshift-connector-data_file 1.2.0 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/redshift-connector-data_file.rb +1 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/abstract_data_file.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/abstract_data_file_bundle.rb +0 -0
- data/lib/redshift_connector/data_file.rb +7 -0
- data/lib/redshift_connector/logger.rb +20 -0
- data/lib/redshift_connector/reader.rb +18 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/abstract.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/csv.rb +2 -2
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/exception.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/redshift_csv.rb +2 -2
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/tsv.rb +2 -2
- data/lib/redshift_connector/s3_bucket.rb +76 -0
- data/lib/redshift_connector/s3_data_file.rb +20 -0
- data/lib/redshift_connector/s3_data_file_bundle.rb +69 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/url_data_file.rb +1 -1
- data/lib/{redshift-connector/data_file → redshift_connector}/url_data_file_bundle.rb +5 -5
- data/redshift-connector-data_file.gemspec +9 -10
- metadata +68 -22
- data/lib/redshift-connector/data_file.rb +0 -9
- data/lib/redshift-connector/data_file/logger.rb +0 -14
- data/lib/redshift-connector/data_file/reader.rb +0 -18
- data/lib/redshift-connector/data_file/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
|
4
|
+
data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
|
7
|
+
data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
A gem to handle data files exported from Redshift
|
4
4
|
|
5
|
-
[redshift-connector](https://github.com/
|
5
|
+
[redshift-connector](https://github.com/bricolages/redshift-connector) uses this gem internally.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -22,4 +22,4 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Contributing
|
24
24
|
|
25
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
25
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bricolages/redshift-connector-data_file.
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'redshift_connector/data_file'
|
File without changes
|
File without changes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
@logger = nil
|
3
|
+
|
4
|
+
def RedshiftConnector.logger
|
5
|
+
# Defer to access Rails
|
6
|
+
@logger || Rails.logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def RedshiftConnector.logger=(logger)
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
class NullLogger
|
14
|
+
def noop(*args) end
|
15
|
+
alias error noop
|
16
|
+
alias warn noop
|
17
|
+
alias info noop
|
18
|
+
alias debug noop
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Reader
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift_connector/reader/redshift_csv'
|
8
|
+
require 'redshift_connector/reader/csv'
|
9
|
+
require 'redshift_connector/reader/tsv'
|
10
|
+
require 'redshift_connector/reader/exception'
|
11
|
+
|
12
|
+
module RedshiftConnector
|
13
|
+
module Reader
|
14
|
+
def Reader.get(id)
|
15
|
+
Abstract.get_reader_class(id)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
File without changes
|
File without changes
|
@@ -1,5 +1,5 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/reader/abstract'
|
2
|
+
require 'redshift_connector/reader/exception'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'aws-sdk'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class S3Bucket
|
5
|
+
@buckets = {}
|
6
|
+
@default = nil
|
7
|
+
|
8
|
+
def S3Bucket.add(name, default: false, **params)
|
9
|
+
instance = new(**params)
|
10
|
+
@buckets[name.to_s] = instance
|
11
|
+
if !@default or default
|
12
|
+
@default = instance
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def S3Bucket.default
|
17
|
+
@default or raise ArgumentError, "no default S3 bucket configured"
|
18
|
+
end
|
19
|
+
|
20
|
+
def S3Bucket.get(name)
|
21
|
+
@buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
|
25
|
+
@region = region
|
26
|
+
@name = bucket
|
27
|
+
@prefix = prefix
|
28
|
+
@access_key_id = access_key_id
|
29
|
+
@secret_access_key = secret_access_key
|
30
|
+
@iam_role = iam_role
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :name
|
34
|
+
attr_reader :prefix
|
35
|
+
|
36
|
+
def url
|
37
|
+
"s3://#{@bucket.name}/#{@prefix}/"
|
38
|
+
end
|
39
|
+
|
40
|
+
def client
|
41
|
+
@client ||= begin
|
42
|
+
args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
|
43
|
+
Aws::S3::Client.new(**args)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def bucket
|
48
|
+
@bucket ||= begin
|
49
|
+
resource = Aws::S3::Resource.new(client: client)
|
50
|
+
resource.bucket(@name)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def object(key)
|
55
|
+
bucket.object(key)
|
56
|
+
end
|
57
|
+
|
58
|
+
def objects(prefix:)
|
59
|
+
bucket.objects(prefix: prefix)
|
60
|
+
end
|
61
|
+
|
62
|
+
def delete_objects(keys)
|
63
|
+
bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
|
64
|
+
end
|
65
|
+
|
66
|
+
def credential_string
|
67
|
+
if @iam_role
|
68
|
+
"aws_iam_role=#{@iam_role}"
|
69
|
+
elsif @access_key_id
|
70
|
+
"aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
|
71
|
+
else
|
72
|
+
raise ArgumentError, "no credential given for Redshift S3 access"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'redshift_connector/data_file'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class S3DataFile < AbstractDataFile
|
5
|
+
def initialize(object, reader_class:)
|
6
|
+
@object = object
|
7
|
+
@reader_class = reader_class
|
8
|
+
end
|
9
|
+
|
10
|
+
def key
|
11
|
+
@object.key
|
12
|
+
end
|
13
|
+
|
14
|
+
def content
|
15
|
+
@object.get.body
|
16
|
+
end
|
17
|
+
|
18
|
+
delegate :presigned_url, to: :@object
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'redshift_connector/abstract_data_file_bundle'
|
2
|
+
require 'redshift_connector/s3_bucket'
|
3
|
+
require 'redshift_connector/s3_data_file'
|
4
|
+
require 'redshift_connector/logger'
|
5
|
+
require 'aws-sdk'
|
6
|
+
|
7
|
+
module RedshiftConnector
|
8
|
+
class S3DataFileBundle < AbstractDataFileBundle
|
9
|
+
def self.for_params(params)
|
10
|
+
unless params.txn_id
|
11
|
+
raise ArgumentError, "cannot create bundle: missing txn_id"
|
12
|
+
end
|
13
|
+
s3bucket = params.bucket ? S3Bucket.get(params.bucket) : S3Bucket.default
|
14
|
+
for_table(
|
15
|
+
bucket: s3bucket,
|
16
|
+
schema: params.schema,
|
17
|
+
table: params.table,
|
18
|
+
txn_id: params.txn_id,
|
19
|
+
filter: params.filter,
|
20
|
+
logger: params.logger
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
25
|
+
real_prefix = "#{bucket.prefix}/#{prefix}"
|
26
|
+
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
30
|
+
prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
|
31
|
+
new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
35
|
+
@bucket = bucket
|
36
|
+
@prefix = prefix
|
37
|
+
@format = format
|
38
|
+
@filter = filter || lambda {|*row| row }
|
39
|
+
@batch_size = batch_size
|
40
|
+
@logger = logger
|
41
|
+
@reader_class = Reader.get(format)
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_reader :bucket
|
45
|
+
attr_reader :prefix
|
46
|
+
|
47
|
+
def url
|
48
|
+
"s3://#{@bucket.name}/#{@prefix}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def credential_string
|
52
|
+
@bucket.credential_string
|
53
|
+
end
|
54
|
+
|
55
|
+
def data_files
|
56
|
+
@bucket.objects(prefix: @prefix)
|
57
|
+
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
58
|
+
end
|
59
|
+
|
60
|
+
def clear
|
61
|
+
pref = File.dirname(@prefix) + '/'
|
62
|
+
keys = @bucket.objects(prefix: pref).map(&:key)
|
63
|
+
unless keys.empty?
|
64
|
+
@logger.info "DELETE #{pref}*"
|
65
|
+
@bucket.delete_objects(keys)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
1
|
+
require 'redshift_connector/reader'
|
2
|
+
require 'redshift_connector/abstract_data_file_bundle'
|
3
|
+
require 'redshift_connector/url_data_file'
|
4
|
+
require 'redshift_connector/logger'
|
5
5
|
|
6
6
|
module RedshiftConnector
|
7
7
|
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger:
|
8
|
+
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
9
|
raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
|
10
10
|
@data_file_urls = data_file_urls
|
11
11
|
@filter = filter || lambda {|*row| row }
|
@@ -1,24 +1,23 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'redshift-connector/data_file/version'
|
5
|
-
|
6
1
|
Gem::Specification.new do |spec|
|
7
2
|
spec.name = "redshift-connector-data_file"
|
8
|
-
spec.version =
|
3
|
+
spec.version = "7.0.0"
|
9
4
|
spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
|
10
|
-
spec.email = ["kobahide789@gmail.com"]
|
5
|
+
spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
|
11
6
|
|
12
7
|
spec.summary = "Utility classes for exported data files from Redshift"
|
13
|
-
spec.homepage = "https://github.com/
|
8
|
+
spec.homepage = "https://github.com/bricolages/redshift-connector-data_file"
|
14
9
|
|
15
10
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
11
|
f.match(%r{^(test|spec|features)/})
|
17
12
|
end
|
18
13
|
spec.bindir = "exe"
|
19
|
-
spec.executables = spec.files.grep(%r{
|
14
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) {|f| File.basename(f) }
|
20
15
|
spec.require_paths = ["lib"]
|
21
16
|
|
17
|
+
spec.required_ruby_version = '>= 2.1.0'
|
18
|
+
spec.add_dependency 'aws-sdk', '~> 2.0'
|
22
19
|
spec.add_development_dependency "bundler", "~> 1.14"
|
23
|
-
spec.add_development_dependency "rake"
|
20
|
+
spec.add_development_dependency "rake"
|
21
|
+
spec.add_development_dependency "test-unit"
|
22
|
+
spec.add_development_dependency "pry"
|
24
23
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector-data_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidekazu Kobayashi
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-06-
|
12
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: aws-sdk
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '2.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '2.0'
|
14
28
|
- !ruby/object:Gem::Dependency
|
15
29
|
name: bundler
|
16
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -29,19 +43,48 @@ dependencies:
|
|
29
43
|
name: rake
|
30
44
|
requirement: !ruby/object:Gem::Requirement
|
31
45
|
requirements:
|
32
|
-
- - "
|
46
|
+
- - ">="
|
33
47
|
- !ruby/object:Gem::Version
|
34
|
-
version: '
|
48
|
+
version: '0'
|
35
49
|
type: :development
|
36
50
|
prerelease: false
|
37
51
|
version_requirements: !ruby/object:Gem::Requirement
|
38
52
|
requirements:
|
39
|
-
- - "
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: test-unit
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
40
61
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: pry
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
42
84
|
description:
|
43
85
|
email:
|
44
86
|
- kobahide789@gmail.com
|
87
|
+
- aamine@loveruby.net
|
45
88
|
executables: []
|
46
89
|
extensions: []
|
47
90
|
extra_rdoc_files: []
|
@@ -53,21 +96,24 @@ files:
|
|
53
96
|
- Rakefile
|
54
97
|
- bin/console
|
55
98
|
- bin/setup
|
56
|
-
- lib/redshift-connector
|
57
|
-
- lib/
|
58
|
-
- lib/
|
59
|
-
- lib/
|
60
|
-
- lib/
|
61
|
-
- lib/
|
62
|
-
- lib/
|
63
|
-
- lib/
|
64
|
-
- lib/
|
65
|
-
- lib/
|
66
|
-
- lib/
|
67
|
-
- lib/
|
68
|
-
- lib/
|
99
|
+
- lib/redshift-connector-data_file.rb
|
100
|
+
- lib/redshift_connector/abstract_data_file.rb
|
101
|
+
- lib/redshift_connector/abstract_data_file_bundle.rb
|
102
|
+
- lib/redshift_connector/data_file.rb
|
103
|
+
- lib/redshift_connector/logger.rb
|
104
|
+
- lib/redshift_connector/reader.rb
|
105
|
+
- lib/redshift_connector/reader/abstract.rb
|
106
|
+
- lib/redshift_connector/reader/csv.rb
|
107
|
+
- lib/redshift_connector/reader/exception.rb
|
108
|
+
- lib/redshift_connector/reader/redshift_csv.rb
|
109
|
+
- lib/redshift_connector/reader/tsv.rb
|
110
|
+
- lib/redshift_connector/s3_bucket.rb
|
111
|
+
- lib/redshift_connector/s3_data_file.rb
|
112
|
+
- lib/redshift_connector/s3_data_file_bundle.rb
|
113
|
+
- lib/redshift_connector/url_data_file.rb
|
114
|
+
- lib/redshift_connector/url_data_file_bundle.rb
|
69
115
|
- redshift-connector-data_file.gemspec
|
70
|
-
homepage: https://github.com/
|
116
|
+
homepage: https://github.com/bricolages/redshift-connector-data_file
|
71
117
|
licenses: []
|
72
118
|
metadata: {}
|
73
119
|
post_install_message:
|
@@ -78,7 +124,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
78
124
|
requirements:
|
79
125
|
- - ">="
|
80
126
|
- !ruby/object:Gem::Version
|
81
|
-
version:
|
127
|
+
version: 2.1.0
|
82
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
129
|
requirements:
|
84
130
|
- - ">="
|
@@ -86,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
132
|
version: '0'
|
87
133
|
requirements: []
|
88
134
|
rubyforge_project:
|
89
|
-
rubygems_version: 2.6.
|
135
|
+
rubygems_version: 2.6.8
|
90
136
|
signing_key:
|
91
137
|
specification_version: 4
|
92
138
|
summary: Utility classes for exported data files from Redshift
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Reader
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/data_file/reader/redshift_csv'
|
8
|
-
require 'redshift-connector/data_file/reader/csv'
|
9
|
-
require 'redshift-connector/data_file/reader/tsv'
|
10
|
-
require 'redshift-connector/data_file/reader/exception'
|
11
|
-
|
12
|
-
module RedshiftConnector
|
13
|
-
module Reader
|
14
|
-
def Reader.get(id)
|
15
|
-
Abstract.get_reader_class(id)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|