redshift-connector-data_file 1.2.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/redshift-connector-data_file.rb +1 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/abstract_data_file.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/abstract_data_file_bundle.rb +0 -0
- data/lib/redshift_connector/data_file.rb +7 -0
- data/lib/redshift_connector/logger.rb +20 -0
- data/lib/redshift_connector/reader.rb +18 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/abstract.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/csv.rb +2 -2
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/exception.rb +0 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/redshift_csv.rb +2 -2
- data/lib/{redshift-connector/data_file → redshift_connector}/reader/tsv.rb +2 -2
- data/lib/redshift_connector/s3_bucket.rb +76 -0
- data/lib/redshift_connector/s3_data_file.rb +20 -0
- data/lib/redshift_connector/s3_data_file_bundle.rb +69 -0
- data/lib/{redshift-connector/data_file → redshift_connector}/url_data_file.rb +1 -1
- data/lib/{redshift-connector/data_file → redshift_connector}/url_data_file_bundle.rb +5 -5
- data/redshift-connector-data_file.gemspec +9 -10
- metadata +68 -22
- data/lib/redshift-connector/data_file.rb +0 -9
- data/lib/redshift-connector/data_file/logger.rb +0 -14
- data/lib/redshift-connector/data_file/reader.rb +0 -18
- data/lib/redshift-connector/data_file/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
|
4
|
+
data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
|
7
|
+
data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
A gem to handle data files exported from Redshift
|
4
4
|
|
5
|
-
[redshift-connector](https://github.com/
|
5
|
+
[redshift-connector](https://github.com/bricolages/redshift-connector) uses this gem internally.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -22,4 +22,4 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Contributing
|
24
24
|
|
25
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
25
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bricolages/redshift-connector-data_file.
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'redshift_connector/data_file'
|
File without changes
|
File without changes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
@logger = nil
|
3
|
+
|
4
|
+
def RedshiftConnector.logger
|
5
|
+
# Defer to access Rails
|
6
|
+
@logger || Rails.logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def RedshiftConnector.logger=(logger)
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
class NullLogger
|
14
|
+
def noop(*args) end
|
15
|
+
alias error noop
|
16
|
+
alias warn noop
|
17
|
+
alias info noop
|
18
|
+
alias debug noop
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Reader
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift_connector/reader/redshift_csv'
|
8
|
+
require 'redshift_connector/reader/csv'
|
9
|
+
require 'redshift_connector/reader/tsv'
|
10
|
+
require 'redshift_connector/reader/exception'
|
11
|
+
|
12
|
+
module RedshiftConnector
|
13
|
+
module Reader
|
14
|
+
def Reader.get(id)
|
15
|
+
Abstract.get_reader_class(id)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
File without changes
|
File without changes
|
@@ -1,5 +1,5 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/reader/abstract'
|
2
|
+
require 'redshift_connector/reader/exception'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'aws-sdk'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class S3Bucket
|
5
|
+
@buckets = {}
|
6
|
+
@default = nil
|
7
|
+
|
8
|
+
def S3Bucket.add(name, default: false, **params)
|
9
|
+
instance = new(**params)
|
10
|
+
@buckets[name.to_s] = instance
|
11
|
+
if !@default or default
|
12
|
+
@default = instance
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def S3Bucket.default
|
17
|
+
@default or raise ArgumentError, "no default S3 bucket configured"
|
18
|
+
end
|
19
|
+
|
20
|
+
def S3Bucket.get(name)
|
21
|
+
@buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
|
25
|
+
@region = region
|
26
|
+
@name = bucket
|
27
|
+
@prefix = prefix
|
28
|
+
@access_key_id = access_key_id
|
29
|
+
@secret_access_key = secret_access_key
|
30
|
+
@iam_role = iam_role
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :name
|
34
|
+
attr_reader :prefix
|
35
|
+
|
36
|
+
def url
|
37
|
+
"s3://#{@bucket.name}/#{@prefix}/"
|
38
|
+
end
|
39
|
+
|
40
|
+
def client
|
41
|
+
@client ||= begin
|
42
|
+
args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
|
43
|
+
Aws::S3::Client.new(**args)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def bucket
|
48
|
+
@bucket ||= begin
|
49
|
+
resource = Aws::S3::Resource.new(client: client)
|
50
|
+
resource.bucket(@name)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def object(key)
|
55
|
+
bucket.object(key)
|
56
|
+
end
|
57
|
+
|
58
|
+
def objects(prefix:)
|
59
|
+
bucket.objects(prefix: prefix)
|
60
|
+
end
|
61
|
+
|
62
|
+
def delete_objects(keys)
|
63
|
+
bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
|
64
|
+
end
|
65
|
+
|
66
|
+
def credential_string
|
67
|
+
if @iam_role
|
68
|
+
"aws_iam_role=#{@iam_role}"
|
69
|
+
elsif @access_key_id
|
70
|
+
"aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
|
71
|
+
else
|
72
|
+
raise ArgumentError, "no credential given for Redshift S3 access"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'redshift_connector/data_file'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class S3DataFile < AbstractDataFile
|
5
|
+
def initialize(object, reader_class:)
|
6
|
+
@object = object
|
7
|
+
@reader_class = reader_class
|
8
|
+
end
|
9
|
+
|
10
|
+
def key
|
11
|
+
@object.key
|
12
|
+
end
|
13
|
+
|
14
|
+
def content
|
15
|
+
@object.get.body
|
16
|
+
end
|
17
|
+
|
18
|
+
delegate :presigned_url, to: :@object
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'redshift_connector/abstract_data_file_bundle'
|
2
|
+
require 'redshift_connector/s3_bucket'
|
3
|
+
require 'redshift_connector/s3_data_file'
|
4
|
+
require 'redshift_connector/logger'
|
5
|
+
require 'aws-sdk'
|
6
|
+
|
7
|
+
module RedshiftConnector
|
8
|
+
class S3DataFileBundle < AbstractDataFileBundle
|
9
|
+
def self.for_params(params)
|
10
|
+
unless params.txn_id
|
11
|
+
raise ArgumentError, "cannot create bundle: missing txn_id"
|
12
|
+
end
|
13
|
+
s3bucket = params.bucket ? S3Bucket.get(params.bucket) : S3Bucket.default
|
14
|
+
for_table(
|
15
|
+
bucket: s3bucket,
|
16
|
+
schema: params.schema,
|
17
|
+
table: params.table,
|
18
|
+
txn_id: params.txn_id,
|
19
|
+
filter: params.filter,
|
20
|
+
logger: params.logger
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
25
|
+
real_prefix = "#{bucket.prefix}/#{prefix}"
|
26
|
+
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
30
|
+
prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
|
31
|
+
new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
35
|
+
@bucket = bucket
|
36
|
+
@prefix = prefix
|
37
|
+
@format = format
|
38
|
+
@filter = filter || lambda {|*row| row }
|
39
|
+
@batch_size = batch_size
|
40
|
+
@logger = logger
|
41
|
+
@reader_class = Reader.get(format)
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_reader :bucket
|
45
|
+
attr_reader :prefix
|
46
|
+
|
47
|
+
def url
|
48
|
+
"s3://#{@bucket.name}/#{@prefix}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def credential_string
|
52
|
+
@bucket.credential_string
|
53
|
+
end
|
54
|
+
|
55
|
+
def data_files
|
56
|
+
@bucket.objects(prefix: @prefix)
|
57
|
+
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
58
|
+
end
|
59
|
+
|
60
|
+
def clear
|
61
|
+
pref = File.dirname(@prefix) + '/'
|
62
|
+
keys = @bucket.objects(prefix: pref).map(&:key)
|
63
|
+
unless keys.empty?
|
64
|
+
@logger.info "DELETE #{pref}*"
|
65
|
+
@bucket.delete_objects(keys)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
1
|
+
require 'redshift_connector/reader'
|
2
|
+
require 'redshift_connector/abstract_data_file_bundle'
|
3
|
+
require 'redshift_connector/url_data_file'
|
4
|
+
require 'redshift_connector/logger'
|
5
5
|
|
6
6
|
module RedshiftConnector
|
7
7
|
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger:
|
8
|
+
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
9
|
raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
|
10
10
|
@data_file_urls = data_file_urls
|
11
11
|
@filter = filter || lambda {|*row| row }
|
@@ -1,24 +1,23 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'redshift-connector/data_file/version'
|
5
|
-
|
6
1
|
Gem::Specification.new do |spec|
|
7
2
|
spec.name = "redshift-connector-data_file"
|
8
|
-
spec.version =
|
3
|
+
spec.version = "7.0.0"
|
9
4
|
spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
|
10
|
-
spec.email = ["kobahide789@gmail.com"]
|
5
|
+
spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
|
11
6
|
|
12
7
|
spec.summary = "Utility classes for exported data files from Redshift"
|
13
|
-
spec.homepage = "https://github.com/
|
8
|
+
spec.homepage = "https://github.com/bricolages/redshift-connector-data_file"
|
14
9
|
|
15
10
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
11
|
f.match(%r{^(test|spec|features)/})
|
17
12
|
end
|
18
13
|
spec.bindir = "exe"
|
19
|
-
spec.executables = spec.files.grep(%r{
|
14
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) {|f| File.basename(f) }
|
20
15
|
spec.require_paths = ["lib"]
|
21
16
|
|
17
|
+
spec.required_ruby_version = '>= 2.1.0'
|
18
|
+
spec.add_dependency 'aws-sdk', '~> 2.0'
|
22
19
|
spec.add_development_dependency "bundler", "~> 1.14"
|
23
|
-
spec.add_development_dependency "rake"
|
20
|
+
spec.add_development_dependency "rake"
|
21
|
+
spec.add_development_dependency "test-unit"
|
22
|
+
spec.add_development_dependency "pry"
|
24
23
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector-data_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidekazu Kobayashi
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-06-
|
12
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: aws-sdk
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '2.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '2.0'
|
14
28
|
- !ruby/object:Gem::Dependency
|
15
29
|
name: bundler
|
16
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -29,19 +43,48 @@ dependencies:
|
|
29
43
|
name: rake
|
30
44
|
requirement: !ruby/object:Gem::Requirement
|
31
45
|
requirements:
|
32
|
-
- - "
|
46
|
+
- - ">="
|
33
47
|
- !ruby/object:Gem::Version
|
34
|
-
version: '
|
48
|
+
version: '0'
|
35
49
|
type: :development
|
36
50
|
prerelease: false
|
37
51
|
version_requirements: !ruby/object:Gem::Requirement
|
38
52
|
requirements:
|
39
|
-
- - "
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: test-unit
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
40
61
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: pry
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
42
84
|
description:
|
43
85
|
email:
|
44
86
|
- kobahide789@gmail.com
|
87
|
+
- aamine@loveruby.net
|
45
88
|
executables: []
|
46
89
|
extensions: []
|
47
90
|
extra_rdoc_files: []
|
@@ -53,21 +96,24 @@ files:
|
|
53
96
|
- Rakefile
|
54
97
|
- bin/console
|
55
98
|
- bin/setup
|
56
|
-
- lib/redshift-connector
|
57
|
-
- lib/
|
58
|
-
- lib/
|
59
|
-
- lib/
|
60
|
-
- lib/
|
61
|
-
- lib/
|
62
|
-
- lib/
|
63
|
-
- lib/
|
64
|
-
- lib/
|
65
|
-
- lib/
|
66
|
-
- lib/
|
67
|
-
- lib/
|
68
|
-
- lib/
|
99
|
+
- lib/redshift-connector-data_file.rb
|
100
|
+
- lib/redshift_connector/abstract_data_file.rb
|
101
|
+
- lib/redshift_connector/abstract_data_file_bundle.rb
|
102
|
+
- lib/redshift_connector/data_file.rb
|
103
|
+
- lib/redshift_connector/logger.rb
|
104
|
+
- lib/redshift_connector/reader.rb
|
105
|
+
- lib/redshift_connector/reader/abstract.rb
|
106
|
+
- lib/redshift_connector/reader/csv.rb
|
107
|
+
- lib/redshift_connector/reader/exception.rb
|
108
|
+
- lib/redshift_connector/reader/redshift_csv.rb
|
109
|
+
- lib/redshift_connector/reader/tsv.rb
|
110
|
+
- lib/redshift_connector/s3_bucket.rb
|
111
|
+
- lib/redshift_connector/s3_data_file.rb
|
112
|
+
- lib/redshift_connector/s3_data_file_bundle.rb
|
113
|
+
- lib/redshift_connector/url_data_file.rb
|
114
|
+
- lib/redshift_connector/url_data_file_bundle.rb
|
69
115
|
- redshift-connector-data_file.gemspec
|
70
|
-
homepage: https://github.com/
|
116
|
+
homepage: https://github.com/bricolages/redshift-connector-data_file
|
71
117
|
licenses: []
|
72
118
|
metadata: {}
|
73
119
|
post_install_message:
|
@@ -78,7 +124,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
78
124
|
requirements:
|
79
125
|
- - ">="
|
80
126
|
- !ruby/object:Gem::Version
|
81
|
-
version:
|
127
|
+
version: 2.1.0
|
82
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
129
|
requirements:
|
84
130
|
- - ">="
|
@@ -86,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
132
|
version: '0'
|
87
133
|
requirements: []
|
88
134
|
rubyforge_project:
|
89
|
-
rubygems_version: 2.6.
|
135
|
+
rubygems_version: 2.6.8
|
90
136
|
signing_key:
|
91
137
|
specification_version: 4
|
92
138
|
summary: Utility classes for exported data files from Redshift
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Reader
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/data_file/reader/redshift_csv'
|
8
|
-
require 'redshift-connector/data_file/reader/csv'
|
9
|
-
require 'redshift-connector/data_file/reader/tsv'
|
10
|
-
require 'redshift-connector/data_file/reader/exception'
|
11
|
-
|
12
|
-
module RedshiftConnector
|
13
|
-
module Reader
|
14
|
-
def Reader.get(id)
|
15
|
-
Abstract.get_reader_class(id)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|