redshift-connector 6.0.0 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift-connector.rb +1 -31
- data/lib/redshift_connector.rb +34 -0
- data/lib/redshift_connector/active_record_data_source.rb +23 -0
- data/lib/redshift_connector/active_record_exporter.rb +47 -0
- data/lib/{redshift-connector → redshift_connector}/connector.rb +26 -25
- data/lib/redshift_connector/data_file_bundle_params.rb +28 -0
- data/lib/redshift_connector/exception.rb +5 -0
- data/lib/redshift_connector/exporter.rb +39 -0
- data/lib/redshift_connector/exporter_builder.rb +51 -0
- data/lib/redshift_connector/importer.rb +58 -0
- data/lib/{redshift-connector → redshift_connector}/importer/activerecord-import.rb +0 -0
- data/lib/{redshift-connector → redshift_connector}/importer/insert_delta.rb +8 -9
- data/lib/{redshift-connector → redshift_connector}/importer/rebuild_rename.rb +8 -9
- data/lib/{redshift-connector → redshift_connector}/importer/rebuild_truncate.rb +8 -9
- data/lib/redshift_connector/importer/upsert.rb +24 -0
- data/lib/{redshift-connector → redshift_connector}/query.rb +0 -0
- data/lib/{redshift-connector → redshift_connector}/version.rb +1 -1
- data/test/config.rb +7 -12
- data/test/database.yml +3 -3
- data/test/foreach.rb +5 -0
- metadata +32 -30
- data/lib/redshift-connector/exporter.rb +0 -103
- data/lib/redshift-connector/importer.rb +0 -117
- data/lib/redshift-connector/importer/upsert.rb +0 -25
- data/lib/redshift-connector/logger.rb +0 -20
- data/lib/redshift-connector/s3_bucket.rb +0 -76
- data/lib/redshift-connector/s3_data_file.rb +0 -20
- data/lib/redshift-connector/s3_data_file_bundle.rb +0 -54
- data/test/test_reader.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc258b0cae8475c5c78a9e37ebb80e16d186245d
|
4
|
+
data.tar.gz: 9c8c846b8bfb89714986146b019aa063bcfee9af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e9001439475fc25dfbc6c0eda67306044dccb7c5eeda5309fa2995df4adeaa5b028e3a177f33ca938b99d150ea10a8e48206479b0532f26f2eb17b59b5bf791
|
7
|
+
data.tar.gz: 1d83a4852630ab542bb14cc4fac376a08f549aec0d4225d92f65852f5af290c06d3c44ba31660991370707d9763322ad9d01daba9ff0a1dad352f88d4c0db6ff
|
data/lib/redshift-connector.rb
CHANGED
@@ -1,31 +1 @@
|
|
1
|
-
|
2
|
-
end
|
3
|
-
|
4
|
-
require 'redshift-connector/connector'
|
5
|
-
require 'redshift-connector/exporter'
|
6
|
-
require 'redshift-connector/importer'
|
7
|
-
require 'redshift-connector/s3_bucket'
|
8
|
-
require 'redshift-connector/s3_data_file_bundle'
|
9
|
-
require 'redshift-connector/version'
|
10
|
-
|
11
|
-
module RedshiftConnector
|
12
|
-
def RedshiftConnector.transport_delta(**params)
|
13
|
-
Connector.transport_delta(**params)
|
14
|
-
end
|
15
|
-
|
16
|
-
def RedshiftConnector.transport_all(**params)
|
17
|
-
Connector.transport_all(**params)
|
18
|
-
end
|
19
|
-
|
20
|
-
def RedshiftConnector.transport_delta_from_s3(**params)
|
21
|
-
Importer.transport_delta_from_s3(**params)
|
22
|
-
end
|
23
|
-
|
24
|
-
def RedshiftConnector.transport_all_from_s3(**params)
|
25
|
-
Importer.transport_all_from_s3(**params)
|
26
|
-
end
|
27
|
-
|
28
|
-
def RedshiftConnector.foreach(**params, &block)
|
29
|
-
Exporter.foreach(**params, &block)
|
30
|
-
end
|
31
|
-
end
|
1
|
+
require 'redshift_connector'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
end
|
3
|
+
|
4
|
+
require 'redshift_connector/connector'
|
5
|
+
require 'redshift_connector/exporter'
|
6
|
+
require 'redshift_connector/active_record_data_source'
|
7
|
+
require 'redshift_connector/active_record_exporter'
|
8
|
+
require 'redshift_connector/importer'
|
9
|
+
require 'redshift_connector/s3_bucket'
|
10
|
+
require 'redshift_connector/s3_data_file_bundle'
|
11
|
+
require 'redshift_connector/exception'
|
12
|
+
require 'redshift_connector/version'
|
13
|
+
|
14
|
+
module RedshiftConnector
|
15
|
+
def RedshiftConnector.transport_delta(**params)
|
16
|
+
Connector.transport_delta(**params)
|
17
|
+
end
|
18
|
+
|
19
|
+
def RedshiftConnector.transport_all(**params)
|
20
|
+
Connector.transport_all(**params)
|
21
|
+
end
|
22
|
+
|
23
|
+
def RedshiftConnector.transport_delta_from_s3(**params)
|
24
|
+
Importer.transport_delta_from_s3(**params)
|
25
|
+
end
|
26
|
+
|
27
|
+
def RedshiftConnector.transport_all_from_s3(**params)
|
28
|
+
Importer.transport_all_from_s3(**params)
|
29
|
+
end
|
30
|
+
|
31
|
+
def RedshiftConnector.foreach(**params, &block)
|
32
|
+
Exporter.foreach(**params, &block)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'redshift_connector/exporter_builder'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class ActiveRecordDataSource
|
5
|
+
def ActiveRecordDataSource.for_dao(dao)
|
6
|
+
new(dao)
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(dao)
|
10
|
+
@dao = dao
|
11
|
+
end
|
12
|
+
|
13
|
+
def exporter_builder
|
14
|
+
ExporterBuilder.new(ds: self, exporter_class: ActiveRecordExporter)
|
15
|
+
end
|
16
|
+
|
17
|
+
def execute_query(query_str)
|
18
|
+
@dao.connection_pool.with_connection {|conn|
|
19
|
+
conn.execute(query_str)
|
20
|
+
}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'redshift_connector/s3_data_file_bundle'
|
2
|
+
require 'redshift_connector/query'
|
3
|
+
require 'redshift_connector/logger'
|
4
|
+
|
5
|
+
module RedshiftConnector
|
6
|
+
class ActiveRecordExporter
|
7
|
+
def initialize(ds:, query:, bundle_params:, enable_sort: false, logger: RedshiftConnector.logger)
|
8
|
+
@ds = ds
|
9
|
+
@query = query
|
10
|
+
@bundle_params = bundle_params
|
11
|
+
@enable_sort = enable_sort
|
12
|
+
@logger = logger
|
13
|
+
|
14
|
+
@bundle = S3DataFileBundle.for_params(bundle_params)
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :query
|
18
|
+
attr_reader :bundle_params
|
19
|
+
attr_reader :bundle
|
20
|
+
attr_reader :logger
|
21
|
+
|
22
|
+
def execute
|
23
|
+
@bundle.clear
|
24
|
+
unload_query = UnloadQuery.new(query: @query, bundle: @bundle, enable_sort: @enable_sort)
|
25
|
+
@logger.info "EXPORT #{unload_query.description} -> #{@bundle.url}*"
|
26
|
+
stmt = unload_query.to_sql
|
27
|
+
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
28
|
+
@ds.execute_query(batch_job_label + stmt)
|
29
|
+
@bundle
|
30
|
+
end
|
31
|
+
|
32
|
+
def batch_job_label
|
33
|
+
@batch_job_label ||= begin
|
34
|
+
components = Dir.getwd.split('/')
|
35
|
+
app = if components.last == 'current'
|
36
|
+
# is Capistrano environment
|
37
|
+
components[-2]
|
38
|
+
else
|
39
|
+
components[-1]
|
40
|
+
end
|
41
|
+
batch_file = caller.detect {|c| /redshift_connector|active_record/ !~ c }
|
42
|
+
path = batch_file ? batch_file.split(':').first : '?'
|
43
|
+
"/* Job: #{app}:#{path} */ "
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
1
|
+
require 'redshift_connector/exporter'
|
2
|
+
require 'redshift_connector/importer'
|
3
|
+
require 'redshift_connector/data_file_bundle_params'
|
4
|
+
require 'redshift_connector/logger'
|
5
5
|
|
6
6
|
module RedshiftConnector
|
7
7
|
class Connector
|
@@ -15,16 +15,16 @@ module RedshiftConnector
|
|
15
15
|
delete_cond: nil,
|
16
16
|
upsert_columns: nil,
|
17
17
|
bucket: nil,
|
18
|
-
txn_id
|
18
|
+
txn_id: nil,
|
19
|
+
filter:,
|
19
20
|
logger: RedshiftConnector.logger,
|
20
21
|
quiet: false
|
21
22
|
)
|
22
23
|
unless src_table and dest_table
|
23
24
|
raise ArgumentError, "missing :table, :src_table or :dest_table"
|
24
25
|
end
|
25
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
26
26
|
logger = NullLogger.new if quiet
|
27
|
-
|
27
|
+
bundle_params = DataFileBundleParams.new(
|
28
28
|
bucket: bucket,
|
29
29
|
schema: schema,
|
30
30
|
table: src_table,
|
@@ -33,18 +33,19 @@ module RedshiftConnector
|
|
33
33
|
logger: logger
|
34
34
|
)
|
35
35
|
exporter = Exporter.for_table_delta(
|
36
|
-
|
36
|
+
bundle_params: bundle_params,
|
37
37
|
schema: schema,
|
38
38
|
table: src_table,
|
39
39
|
columns: columns,
|
40
40
|
condition: condition,
|
41
41
|
logger: logger
|
42
42
|
)
|
43
|
-
importer = Importer.
|
44
|
-
|
45
|
-
|
46
|
-
delete_cond: delete_cond,
|
47
|
-
|
43
|
+
importer = Importer.for_delta_upsert(
|
44
|
+
table: dest_table,
|
45
|
+
columns: columns,
|
46
|
+
delete_cond: delete_cond,
|
47
|
+
upsert_columns: upsert_columns,
|
48
|
+
logger: logger
|
48
49
|
)
|
49
50
|
new(exporter: exporter, importer: importer, logger: logger)
|
50
51
|
end
|
@@ -57,33 +58,32 @@ module RedshiftConnector
|
|
57
58
|
dest_table: table,
|
58
59
|
columns:,
|
59
60
|
bucket: nil,
|
60
|
-
txn_id
|
61
|
+
txn_id: nil,
|
61
62
|
filter:,
|
62
63
|
logger: RedshiftConnector.logger,
|
63
64
|
quiet: false
|
64
65
|
)
|
65
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
66
66
|
logger = NullLogger.new if quiet
|
67
|
-
|
67
|
+
bundle_params = DataFileBundleParams.new(
|
68
68
|
bucket: bucket,
|
69
69
|
schema: schema,
|
70
|
-
table:
|
70
|
+
table: src_table,
|
71
71
|
txn_id: txn_id,
|
72
72
|
filter: filter,
|
73
73
|
logger: logger
|
74
74
|
)
|
75
75
|
exporter = Exporter.for_table(
|
76
|
-
|
76
|
+
bundle_params: bundle_params,
|
77
77
|
schema: schema,
|
78
|
-
table:
|
78
|
+
table: src_table,
|
79
79
|
columns: columns,
|
80
80
|
logger: logger
|
81
81
|
)
|
82
|
-
importer = Importer.
|
82
|
+
importer = Importer.for_rebuild(
|
83
83
|
strategy: strategy,
|
84
|
-
|
85
|
-
|
86
|
-
logger: logger
|
84
|
+
table: dest_table,
|
85
|
+
columns: columns,
|
86
|
+
logger: logger
|
87
87
|
)
|
88
88
|
new(exporter: exporter, importer: importer, logger: logger)
|
89
89
|
end
|
@@ -92,6 +92,7 @@ module RedshiftConnector
|
|
92
92
|
@exporter = exporter
|
93
93
|
@importer = importer
|
94
94
|
@logger = logger
|
95
|
+
@bundle = nil
|
95
96
|
end
|
96
97
|
|
97
98
|
def export_enabled?
|
@@ -109,12 +110,12 @@ module RedshiftConnector
|
|
109
110
|
|
110
111
|
def export
|
111
112
|
@logger.info "==== export task =================================================="
|
112
|
-
@exporter.execute
|
113
|
+
@bundle = @exporter.execute
|
113
114
|
end
|
114
115
|
|
115
116
|
def import
|
116
117
|
@logger.info "==== import task =================================================="
|
117
|
-
@importer.execute
|
118
|
+
@importer.execute(@bundle)
|
118
119
|
end
|
119
120
|
end
|
120
121
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'redshift_connector/logger'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class DataFileBundleParams
|
5
|
+
def initialize(
|
6
|
+
bucket: nil,
|
7
|
+
schema:,
|
8
|
+
table:,
|
9
|
+
txn_id: nil,
|
10
|
+
filter:,
|
11
|
+
logger: RedshiftConnector.logger
|
12
|
+
)
|
13
|
+
@bucket = bucket
|
14
|
+
@schema = schema
|
15
|
+
@table = table
|
16
|
+
@txn_id = txn_id
|
17
|
+
@filter = filter
|
18
|
+
@logger = logger
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :bucket
|
22
|
+
attr_reader :schema
|
23
|
+
attr_reader :table
|
24
|
+
attr_reader :txn_id
|
25
|
+
attr_reader :filter
|
26
|
+
attr_reader :logger
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
module Exporter
|
3
|
+
@default_data_source = nil
|
4
|
+
|
5
|
+
def Exporter.default_data_source=(ds)
|
6
|
+
@default_data_source = ds
|
7
|
+
end
|
8
|
+
|
9
|
+
def Exporter.default_data_source
|
10
|
+
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
11
|
+
end
|
12
|
+
|
13
|
+
def Exporter.builder
|
14
|
+
default_data_source.exporter_builder
|
15
|
+
end
|
16
|
+
|
17
|
+
def Exporter.for_table_delta(**params)
|
18
|
+
builder.build_for_table_delta(**params)
|
19
|
+
end
|
20
|
+
|
21
|
+
def Exporter.for_table(**params)
|
22
|
+
builder.build_for_table(**params)
|
23
|
+
end
|
24
|
+
|
25
|
+
def Exporter.for_query(**params)
|
26
|
+
builder.build_for_query(**params)
|
27
|
+
end
|
28
|
+
|
29
|
+
def Exporter.foreach(**params, &block)
|
30
|
+
exporter = for_query(**params)
|
31
|
+
begin
|
32
|
+
exporter.execute
|
33
|
+
exporter.bundle.each_row(&block)
|
34
|
+
ensure
|
35
|
+
exporter.bundle.clear
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'redshift_connector/query'
|
2
|
+
require 'redshift_connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class ExporterBuilder
|
6
|
+
def initialize(ds:, exporter_class:)
|
7
|
+
@ds = ds
|
8
|
+
@exporter_class = exporter_class
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_for_table_delta(schema:, table:, condition:, columns:, bundle_params:, logger: RedshiftConnector.logger)
|
12
|
+
query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
13
|
+
@exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
|
14
|
+
end
|
15
|
+
|
16
|
+
def build_for_table(schema:, table:, columns:, bundle_params:, logger: RedshiftConnector.logger)
|
17
|
+
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
18
|
+
@exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
|
19
|
+
end
|
20
|
+
|
21
|
+
def build_for_query(
|
22
|
+
schema:,
|
23
|
+
table:,
|
24
|
+
bucket: nil,
|
25
|
+
query:,
|
26
|
+
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
27
|
+
filter: nil,
|
28
|
+
enable_sort: false,
|
29
|
+
logger: RedshiftConnector.logger,
|
30
|
+
quiet: false
|
31
|
+
)
|
32
|
+
logger = NullLogger.new if quiet
|
33
|
+
bundle_params = DataFileBundleParams.new(
|
34
|
+
bucket: bucket,
|
35
|
+
schema: schema,
|
36
|
+
table: table,
|
37
|
+
txn_id: txn_id,
|
38
|
+
filter: filter,
|
39
|
+
logger: logger
|
40
|
+
)
|
41
|
+
@exporter_class.new(
|
42
|
+
ds: @ds,
|
43
|
+
query: ArbitraryQuery.new(query),
|
44
|
+
bundle_params: bundle_params,
|
45
|
+
enable_sort: enable_sort,
|
46
|
+
logger: logger
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Importer
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift_connector/importer/upsert'
|
8
|
+
require 'redshift_connector/importer/insert_delta'
|
9
|
+
require 'redshift_connector/importer/rebuild_rename'
|
10
|
+
require 'redshift_connector/importer/rebuild_truncate'
|
11
|
+
require 'redshift_connector/logger'
|
12
|
+
|
13
|
+
module RedshiftConnector
|
14
|
+
module Importer
|
15
|
+
def Importer.for_delta_upsert(table:, columns:, delete_cond: nil, upsert_columns: nil, logger: RedshiftConnector.logger)
|
16
|
+
if delete_cond and upsert_columns
|
17
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
18
|
+
end
|
19
|
+
importer =
|
20
|
+
if delete_cond
|
21
|
+
Importer::InsertDelta.new(
|
22
|
+
dao: table.classify.constantize,
|
23
|
+
columns: columns,
|
24
|
+
delete_cond: delete_cond,
|
25
|
+
logger: logger
|
26
|
+
)
|
27
|
+
elsif upsert_columns
|
28
|
+
Importer::Upsert.new(
|
29
|
+
dao: table.classify.constantize,
|
30
|
+
columns: columns,
|
31
|
+
upsert_columns: upsert_columns,
|
32
|
+
logger: logger
|
33
|
+
)
|
34
|
+
else
|
35
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for delta import"
|
36
|
+
end
|
37
|
+
importer
|
38
|
+
end
|
39
|
+
|
40
|
+
def Importer.for_rebuild(strategy: 'rename', table:, columns:, logger: RedshiftConnector.logger)
|
41
|
+
c = get_rebuild_class(strategy)
|
42
|
+
c.new(
|
43
|
+
dao: table.classify.constantize,
|
44
|
+
columns: columns,
|
45
|
+
logger: logger
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def Importer.get_rebuild_class(strategy)
|
50
|
+
case strategy.to_s
|
51
|
+
when 'rename' then RebuildRename
|
52
|
+
when 'truncate' then RebuildTruncate
|
53
|
+
else
|
54
|
+
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
File without changes
|
@@ -1,19 +1,18 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::InsertDelta
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, delete_cond:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@delete_cond = delete_cond
|
11
10
|
@logger = logger
|
12
11
|
end
|
13
12
|
|
14
|
-
def execute
|
13
|
+
def execute(bundle)
|
15
14
|
delete_rows(@delete_cond)
|
16
|
-
import
|
15
|
+
import(bundle)
|
17
16
|
end
|
18
17
|
|
19
18
|
def delete_rows(cond_expr)
|
@@ -22,9 +21,9 @@ module RedshiftConnector
|
|
22
21
|
@logger.info "deleted."
|
23
22
|
end
|
24
23
|
|
25
|
-
def import
|
26
|
-
@logger.info "IMPORT #{
|
27
|
-
|
24
|
+
def import(bundle)
|
25
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
26
|
+
bundle.each_batch do |rows|
|
28
27
|
@dao.import(@columns, rows)
|
29
28
|
end
|
30
29
|
end
|
@@ -1,16 +1,15 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::RebuildRename
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@logger = logger
|
11
10
|
end
|
12
11
|
|
13
|
-
def execute
|
12
|
+
def execute(bundle)
|
14
13
|
dest_table = @dao.table_name
|
15
14
|
tmp_table = "#{dest_table}_new"
|
16
15
|
old_table = "#{dest_table}_old"
|
@@ -20,7 +19,7 @@ module RedshiftConnector
|
|
20
19
|
|
21
20
|
exec_update "drop table if exists #{tmp_table}"
|
22
21
|
exec_update "create table #{tmp_table} like #{dest_table}"
|
23
|
-
import(tmp_dao)
|
22
|
+
import(tmp_dao, bundle)
|
24
23
|
exec_update "drop table if exists #{old_table}"
|
25
24
|
# Atomic table exchange
|
26
25
|
exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
|
@@ -31,9 +30,9 @@ module RedshiftConnector
|
|
31
30
|
@dao.connection.execute(query)
|
32
31
|
end
|
33
32
|
|
34
|
-
def import(dao)
|
35
|
-
@logger.info "IMPORT #{
|
36
|
-
|
33
|
+
def import(dao, bundle)
|
34
|
+
@logger.info "IMPORT #{bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
|
35
|
+
bundle.each_batch do |rows|
|
37
36
|
dao.import(@columns, rows)
|
38
37
|
end
|
39
38
|
end
|
@@ -1,18 +1,17 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::RebuildTruncate
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@logger = logger
|
11
10
|
end
|
12
11
|
|
13
|
-
def execute
|
12
|
+
def execute(bundle)
|
14
13
|
truncate_table(@dao.table_name)
|
15
|
-
import
|
14
|
+
import(bundle)
|
16
15
|
end
|
17
16
|
|
18
17
|
def truncate_table(table_name)
|
@@ -21,9 +20,9 @@ module RedshiftConnector
|
|
21
20
|
@logger.info "truncated."
|
22
21
|
end
|
23
22
|
|
24
|
-
def import
|
25
|
-
@logger.info "IMPORT #{
|
26
|
-
|
23
|
+
def import(bundle)
|
24
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
25
|
+
bundle.each_batch do |rows|
|
27
26
|
@dao.import(@columns, rows)
|
28
27
|
end
|
29
28
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::Upsert
|
6
|
+
def initialize(dao:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@columns = columns
|
9
|
+
@upsert_columns = upsert_columns
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute(bundle)
|
14
|
+
import(bundle)
|
15
|
+
end
|
16
|
+
|
17
|
+
def import(bundle)
|
18
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
|
19
|
+
bundle.each_batch do |rows|
|
20
|
+
@dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
File without changes
|
data/test/config.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
|
2
|
-
# For test only
|
3
|
-
$TEST_SCHEMA = 'hidekazukobayashi'
|
1
|
+
$TEST_SCHEMA = 'aamine'
|
4
2
|
|
5
|
-
|
3
|
+
module RedshiftConnector
|
4
|
+
Exporter.default_data_source = RedshiftConnector::ActiveRecordDataSource.new(Redshift)
|
6
5
|
|
7
6
|
S3Bucket.add(
|
8
|
-
'
|
9
|
-
bucket: '
|
10
|
-
prefix: 'development
|
11
|
-
|
12
|
-
#iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
|
13
|
-
# When using explicit access key
|
14
|
-
access_key_id: 'AKIAJJGEKUU2MXO3X4NA',
|
15
|
-
secret_access_key: 'j+yF+bvisovNwPVsORz/FpSszkD567Xk270Pr3NY',
|
7
|
+
'redshift-copy-buffer',
|
8
|
+
bucket: 'redshift-copy-buffer',
|
9
|
+
prefix: 'development',
|
10
|
+
iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
|
16
11
|
default: true
|
17
12
|
)
|
18
13
|
end
|
data/test/database.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
mysql:
|
2
2
|
adapter: mysql2
|
3
3
|
host: localhost
|
4
|
-
username:
|
4
|
+
username: minero-aoki
|
5
5
|
database: test
|
6
6
|
encoding: utf8
|
7
7
|
|
@@ -10,6 +10,6 @@ redshift:
|
|
10
10
|
host: dwh.ckpd.co
|
11
11
|
port: 5439
|
12
12
|
database: production
|
13
|
-
username:
|
14
|
-
password:
|
13
|
+
username: aamine
|
14
|
+
password: "3edCVfr$"
|
15
15
|
encoding: utf8
|
data/test/foreach.rb
ADDED
metadata
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: redshift-connector-data_file
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 7.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 7.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: activerecord
|
28
|
+
name: activerecord
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,33 +39,33 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name: redshift
|
42
|
+
name: activerecord-redshift
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pg
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.18.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.18.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: activerecord-import
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,33 +144,35 @@ extra_rdoc_files: []
|
|
144
144
|
files:
|
145
145
|
- README.md
|
146
146
|
- lib/redshift-connector.rb
|
147
|
-
- lib/
|
148
|
-
- lib/
|
149
|
-
- lib/
|
150
|
-
- lib/
|
151
|
-
- lib/
|
152
|
-
- lib/
|
153
|
-
- lib/
|
154
|
-
- lib/
|
155
|
-
- lib/
|
156
|
-
- lib/
|
157
|
-
- lib/
|
158
|
-
- lib/
|
159
|
-
- lib/
|
160
|
-
- lib/
|
147
|
+
- lib/redshift_connector.rb
|
148
|
+
- lib/redshift_connector/active_record_data_source.rb
|
149
|
+
- lib/redshift_connector/active_record_exporter.rb
|
150
|
+
- lib/redshift_connector/connector.rb
|
151
|
+
- lib/redshift_connector/data_file_bundle_params.rb
|
152
|
+
- lib/redshift_connector/exception.rb
|
153
|
+
- lib/redshift_connector/exporter.rb
|
154
|
+
- lib/redshift_connector/exporter_builder.rb
|
155
|
+
- lib/redshift_connector/importer.rb
|
156
|
+
- lib/redshift_connector/importer/activerecord-import.rb
|
157
|
+
- lib/redshift_connector/importer/insert_delta.rb
|
158
|
+
- lib/redshift_connector/importer/rebuild_rename.rb
|
159
|
+
- lib/redshift_connector/importer/rebuild_truncate.rb
|
160
|
+
- lib/redshift_connector/importer/upsert.rb
|
161
|
+
- lib/redshift_connector/query.rb
|
162
|
+
- lib/redshift_connector/version.rb
|
161
163
|
- test/all.rb
|
162
164
|
- test/config.rb
|
163
165
|
- test/config.rb.example
|
164
166
|
- test/database.yml
|
165
167
|
- test/database.yml.example
|
168
|
+
- test/foreach.rb
|
166
169
|
- test/helper.rb
|
167
170
|
- test/item_pvs.ct.mysql
|
168
171
|
- test/item_pvs.ct.redshift
|
169
172
|
- test/reader/test_redshift_csv.rb
|
170
173
|
- test/test_connector.rb
|
171
|
-
- test/test_reader.rb
|
172
174
|
- test/test_s3_import.rb
|
173
|
-
homepage: https://github.com/
|
175
|
+
homepage: https://github.com/bricolages/redshift-connector
|
174
176
|
licenses:
|
175
177
|
- MIT
|
176
178
|
metadata: {}
|
@@ -190,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
192
|
version: '0'
|
191
193
|
requirements: []
|
192
194
|
rubyforge_project:
|
193
|
-
rubygems_version: 2.6.
|
195
|
+
rubygems_version: 2.6.8
|
194
196
|
signing_key:
|
195
197
|
specification_version: 4
|
196
198
|
summary: Redshift bulk data connector
|
@@ -1,103 +0,0 @@
|
|
1
|
-
require 'redshift-connector/query'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
class Exporter
|
6
|
-
def Exporter.default_data_source=(ds)
|
7
|
-
@default_data_source = ds
|
8
|
-
end
|
9
|
-
|
10
|
-
def Exporter.default_data_source
|
11
|
-
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
12
|
-
end
|
13
|
-
|
14
|
-
def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
|
15
|
-
delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
16
|
-
unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
|
17
|
-
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
18
|
-
end
|
19
|
-
|
20
|
-
def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
|
21
|
-
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
22
|
-
unload_query = UnloadQuery.new(query: query, bundle: bundle)
|
23
|
-
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
24
|
-
end
|
25
|
-
|
26
|
-
def Exporter.foreach(**params, &block)
|
27
|
-
exporter = Exporter.for_query(**params)
|
28
|
-
begin
|
29
|
-
exporter.execute
|
30
|
-
exporter.bundle.each_row(&block)
|
31
|
-
ensure
|
32
|
-
exporter.bundle.clear
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def Exporter.for_query(
|
37
|
-
ds: default_data_source,
|
38
|
-
schema:,
|
39
|
-
table:,
|
40
|
-
bucket: nil,
|
41
|
-
query:,
|
42
|
-
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
43
|
-
filter: nil,
|
44
|
-
enable_sort: false,
|
45
|
-
logger: RedshiftConnector.logger,
|
46
|
-
quiet: false
|
47
|
-
)
|
48
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
49
|
-
logger = NullLogger.new if quiet
|
50
|
-
bundle = S3DataFileBundle.for_table(
|
51
|
-
bucket: bucket,
|
52
|
-
schema: schema,
|
53
|
-
table: table,
|
54
|
-
txn_id: txn_id,
|
55
|
-
filter: filter,
|
56
|
-
logger: logger
|
57
|
-
)
|
58
|
-
exporter = Exporter.new(
|
59
|
-
ds: ds,
|
60
|
-
query: UnloadQuery.wrap(query: query, bundle: bundle, enable_sort: enable_sort),
|
61
|
-
bundle: bundle,
|
62
|
-
logger: logger
|
63
|
-
)
|
64
|
-
exporter
|
65
|
-
end
|
66
|
-
|
67
|
-
def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
|
68
|
-
@ds = ds
|
69
|
-
@query = query
|
70
|
-
@bundle = bundle
|
71
|
-
@logger = logger
|
72
|
-
end
|
73
|
-
|
74
|
-
attr_reader :query
|
75
|
-
attr_reader :bundle
|
76
|
-
attr_reader :logger
|
77
|
-
|
78
|
-
def execute
|
79
|
-
@bundle.clear
|
80
|
-
@logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
|
81
|
-
@ds.connection_pool.with_connection do |conn|
|
82
|
-
stmt = @query.to_sql
|
83
|
-
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
84
|
-
conn.execute(batch_job_label + stmt)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def batch_job_label
|
89
|
-
@batch_job_label ||= begin
|
90
|
-
components = Dir.getwd.split('/')
|
91
|
-
app = if components.last == 'current'
|
92
|
-
# is Capistrano environment
|
93
|
-
components[-2]
|
94
|
-
else
|
95
|
-
components[-1]
|
96
|
-
end
|
97
|
-
batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
|
98
|
-
path = batch_file ? batch_file.split(':').first : '?'
|
99
|
-
"/* Job: #{app}:#{path} */ "
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Importer
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/importer/upsert'
|
8
|
-
require 'redshift-connector/importer/insert_delta'
|
9
|
-
require 'redshift-connector/importer/rebuild_rename'
|
10
|
-
require 'redshift-connector/importer/rebuild_truncate'
|
11
|
-
|
12
|
-
require 'redshift-connector/s3_data_file_bundle'
|
13
|
-
require 'redshift-connector/logger'
|
14
|
-
|
15
|
-
module RedshiftConnector
|
16
|
-
module Importer
|
17
|
-
def Importer.transport_delta_from_s3(
|
18
|
-
bucket: nil, prefix:, format:, filter: nil,
|
19
|
-
table:, columns:,
|
20
|
-
delete_cond: nil, upsert_columns: nil,
|
21
|
-
logger: RedshiftConnector.logger, quiet: false)
|
22
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
23
|
-
logger = NullLogger.new if quiet
|
24
|
-
bundle = S3DataFileBundle.for_prefix(
|
25
|
-
bucket: bucket,
|
26
|
-
prefix: prefix,
|
27
|
-
format: format,
|
28
|
-
filter: filter,
|
29
|
-
logger: logger
|
30
|
-
)
|
31
|
-
transport_delta_from_bundle(
|
32
|
-
bundle: bundle,
|
33
|
-
table: table, columns: columns,
|
34
|
-
delete_cond: delete_cond, upsert_columns: upsert_columns,
|
35
|
-
logger: logger, quiet: quiet
|
36
|
-
)
|
37
|
-
end
|
38
|
-
|
39
|
-
def Importer.transport_delta_from_bundle(
|
40
|
-
bundle:,
|
41
|
-
table:, columns:,
|
42
|
-
delete_cond: nil, upsert_columns: nil,
|
43
|
-
logger: RedshiftConnector.logger, quiet: false
|
44
|
-
)
|
45
|
-
if delete_cond and upsert_columns
|
46
|
-
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
47
|
-
end
|
48
|
-
importer =
|
49
|
-
if delete_cond
|
50
|
-
Importer::InsertDelta.new(
|
51
|
-
dao: table.classify.constantize,
|
52
|
-
bundle: bundle,
|
53
|
-
columns: columns,
|
54
|
-
delete_cond: delete_cond,
|
55
|
-
logger: logger
|
56
|
-
)
|
57
|
-
elsif upsert_columns
|
58
|
-
Importer::Upsert.new(
|
59
|
-
dao: table.classify.constantize,
|
60
|
-
bundle: bundle,
|
61
|
-
columns: columns,
|
62
|
-
upsert_columns: upsert_columns,
|
63
|
-
logger: logger
|
64
|
-
)
|
65
|
-
else
|
66
|
-
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
67
|
-
end
|
68
|
-
importer
|
69
|
-
end
|
70
|
-
|
71
|
-
def Importer.transport_all_from_s3(
|
72
|
-
strategy: 'rename',
|
73
|
-
bucket: nil, prefix:, format:, filter: nil,
|
74
|
-
table:, columns:,
|
75
|
-
logger: RedshiftConnector.logger, quiet: false)
|
76
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
77
|
-
logger = NullLogger.new if quiet
|
78
|
-
bundle = S3DataFileBundle.for_prefix(
|
79
|
-
bucket: bucket,
|
80
|
-
prefix: prefix,
|
81
|
-
format: format,
|
82
|
-
filter: filter,
|
83
|
-
logger: logger
|
84
|
-
)
|
85
|
-
transport_all_from_bundle(
|
86
|
-
strategy: strategy,
|
87
|
-
bundle: bundle,
|
88
|
-
table: table, columns: columns,
|
89
|
-
logger: logger, quiet: quiet
|
90
|
-
)
|
91
|
-
end
|
92
|
-
|
93
|
-
def Importer.transport_all_from_bundle(
|
94
|
-
strategy: 'rename',
|
95
|
-
bundle:,
|
96
|
-
table:, columns:,
|
97
|
-
logger: RedshiftConnector.logger, quiet: false
|
98
|
-
)
|
99
|
-
importer = get_rebuild_class(strategy).new(
|
100
|
-
dao: table.classify.constantize,
|
101
|
-
bundle: bundle,
|
102
|
-
columns: columns,
|
103
|
-
logger: logger
|
104
|
-
)
|
105
|
-
importer
|
106
|
-
end
|
107
|
-
|
108
|
-
def Importer.get_rebuild_class(strategy)
|
109
|
-
case strategy.to_s
|
110
|
-
when 'rename' then RebuildRename
|
111
|
-
when 'truncate' then RebuildTruncate
|
112
|
-
else
|
113
|
-
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
require 'redshift-connector/importer/activerecord-import'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
class Importer::Upsert
|
6
|
-
def initialize(dao:, bundle:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
|
7
|
-
@dao = dao
|
8
|
-
@bundle = bundle
|
9
|
-
@columns = columns
|
10
|
-
@upsert_columns = upsert_columns
|
11
|
-
@logger = logger
|
12
|
-
end
|
13
|
-
|
14
|
-
def execute
|
15
|
-
import
|
16
|
-
end
|
17
|
-
|
18
|
-
def import
|
19
|
-
@logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
|
20
|
-
@bundle.each_batch do |rows|
|
21
|
-
@dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
@logger = nil
|
3
|
-
|
4
|
-
def RedshiftConnector.logger
|
5
|
-
# Defer to access Rails
|
6
|
-
@logger || Rails.logger
|
7
|
-
end
|
8
|
-
|
9
|
-
def RedshiftConnector.logger=(logger)
|
10
|
-
@logger = logger
|
11
|
-
end
|
12
|
-
|
13
|
-
class NullLogger
|
14
|
-
def noop(*args) end
|
15
|
-
alias error noop
|
16
|
-
alias warn noop
|
17
|
-
alias info noop
|
18
|
-
alias debug noop
|
19
|
-
end
|
20
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require 'aws-sdk'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class S3Bucket
|
5
|
-
@buckets = {}
|
6
|
-
@default = nil
|
7
|
-
|
8
|
-
def S3Bucket.add(name, default: false, **params)
|
9
|
-
instance = new(**params)
|
10
|
-
@buckets[name.to_s] = instance
|
11
|
-
if !@default or default
|
12
|
-
@default = instance
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def S3Bucket.default
|
17
|
-
@default or raise ArgumentError, "no default S3 bucket configured"
|
18
|
-
end
|
19
|
-
|
20
|
-
def S3Bucket.get(name)
|
21
|
-
@buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
|
25
|
-
@region = region
|
26
|
-
@name = bucket
|
27
|
-
@prefix = prefix
|
28
|
-
@access_key_id = access_key_id
|
29
|
-
@secret_access_key = secret_access_key
|
30
|
-
@iam_role = iam_role
|
31
|
-
end
|
32
|
-
|
33
|
-
attr_reader :name
|
34
|
-
attr_reader :prefix
|
35
|
-
|
36
|
-
def url
|
37
|
-
"s3://#{@bucket.name}/#{@prefix}/"
|
38
|
-
end
|
39
|
-
|
40
|
-
def client
|
41
|
-
@client ||= begin
|
42
|
-
args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
|
43
|
-
Aws::S3::Client.new(**args)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def bucket
|
48
|
-
@bucket ||= begin
|
49
|
-
resource = Aws::S3::Resource.new(client: client)
|
50
|
-
resource.bucket(@name)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def object(key)
|
55
|
-
bucket.object(key)
|
56
|
-
end
|
57
|
-
|
58
|
-
def objects(prefix:)
|
59
|
-
bucket.objects(prefix: prefix)
|
60
|
-
end
|
61
|
-
|
62
|
-
def delete_objects(keys)
|
63
|
-
bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
|
64
|
-
end
|
65
|
-
|
66
|
-
def credential_string
|
67
|
-
if @iam_role
|
68
|
-
"aws_iam_role=#{@iam_role}"
|
69
|
-
elsif @access_key_id
|
70
|
-
"aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
|
71
|
-
else
|
72
|
-
raise ArgumentError, "no credential given for Redshift S3 access"
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'redshift-connector/data_file'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class S3DataFile < AbstractDataFile
|
5
|
-
def initialize(object, reader_class:)
|
6
|
-
@object = object
|
7
|
-
@reader_class = reader_class
|
8
|
-
end
|
9
|
-
|
10
|
-
def key
|
11
|
-
@object.key
|
12
|
-
end
|
13
|
-
|
14
|
-
def content
|
15
|
-
@object.get.body
|
16
|
-
end
|
17
|
-
|
18
|
-
delegate :presigned_url, to: :@object
|
19
|
-
end
|
20
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
require 'redshift-connector/s3_bucket'
|
2
|
-
require 'redshift-connector/s3_data_file'
|
3
|
-
require 'redshift-connector/logger'
|
4
|
-
require 'redshift-connector/data_file'
|
5
|
-
require 'aws-sdk'
|
6
|
-
|
7
|
-
module RedshiftConnector
|
8
|
-
class S3DataFileBundle < AbstractDataFileBundle
|
9
|
-
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
10
|
-
real_prefix = "#{bucket.prefix}/#{prefix}"
|
11
|
-
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
15
|
-
prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
|
16
|
-
new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
20
|
-
@bucket = bucket
|
21
|
-
@prefix = prefix
|
22
|
-
@format = format
|
23
|
-
@filter = filter || lambda {|*row| row }
|
24
|
-
@batch_size = batch_size
|
25
|
-
@logger = logger
|
26
|
-
@reader_class = Reader.get(format)
|
27
|
-
end
|
28
|
-
|
29
|
-
attr_reader :bucket
|
30
|
-
attr_reader :prefix
|
31
|
-
|
32
|
-
def url
|
33
|
-
"s3://#{@bucket.name}/#{@prefix}"
|
34
|
-
end
|
35
|
-
|
36
|
-
def credential_string
|
37
|
-
@bucket.credential_string
|
38
|
-
end
|
39
|
-
|
40
|
-
def data_files
|
41
|
-
@bucket.objects(prefix: @prefix)
|
42
|
-
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
43
|
-
end
|
44
|
-
|
45
|
-
def clear
|
46
|
-
pref = File.dirname(@prefix) + '/'
|
47
|
-
keys = @bucket.objects(prefix: pref).map(&:key)
|
48
|
-
unless keys.empty?
|
49
|
-
@logger.info "DELETE #{pref}*"
|
50
|
-
@bucket.delete_objects(keys)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|