redshift-connector 6.0.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/redshift-connector.rb +1 -31
- data/lib/redshift_connector.rb +34 -0
- data/lib/redshift_connector/active_record_data_source.rb +23 -0
- data/lib/redshift_connector/active_record_exporter.rb +47 -0
- data/lib/{redshift-connector → redshift_connector}/connector.rb +26 -25
- data/lib/redshift_connector/data_file_bundle_params.rb +28 -0
- data/lib/redshift_connector/exception.rb +5 -0
- data/lib/redshift_connector/exporter.rb +39 -0
- data/lib/redshift_connector/exporter_builder.rb +51 -0
- data/lib/redshift_connector/importer.rb +58 -0
- data/lib/{redshift-connector → redshift_connector}/importer/activerecord-import.rb +0 -0
- data/lib/{redshift-connector → redshift_connector}/importer/insert_delta.rb +8 -9
- data/lib/{redshift-connector → redshift_connector}/importer/rebuild_rename.rb +8 -9
- data/lib/{redshift-connector → redshift_connector}/importer/rebuild_truncate.rb +8 -9
- data/lib/redshift_connector/importer/upsert.rb +24 -0
- data/lib/{redshift-connector → redshift_connector}/query.rb +0 -0
- data/lib/{redshift-connector → redshift_connector}/version.rb +1 -1
- data/test/config.rb +7 -12
- data/test/database.yml +3 -3
- data/test/foreach.rb +5 -0
- metadata +32 -30
- data/lib/redshift-connector/exporter.rb +0 -103
- data/lib/redshift-connector/importer.rb +0 -117
- data/lib/redshift-connector/importer/upsert.rb +0 -25
- data/lib/redshift-connector/logger.rb +0 -20
- data/lib/redshift-connector/s3_bucket.rb +0 -76
- data/lib/redshift-connector/s3_data_file.rb +0 -20
- data/lib/redshift-connector/s3_data_file_bundle.rb +0 -54
- data/test/test_reader.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc258b0cae8475c5c78a9e37ebb80e16d186245d
|
4
|
+
data.tar.gz: 9c8c846b8bfb89714986146b019aa063bcfee9af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e9001439475fc25dfbc6c0eda67306044dccb7c5eeda5309fa2995df4adeaa5b028e3a177f33ca938b99d150ea10a8e48206479b0532f26f2eb17b59b5bf791
|
7
|
+
data.tar.gz: 1d83a4852630ab542bb14cc4fac376a08f549aec0d4225d92f65852f5af290c06d3c44ba31660991370707d9763322ad9d01daba9ff0a1dad352f88d4c0db6ff
|
data/lib/redshift-connector.rb
CHANGED
@@ -1,31 +1 @@
|
|
1
|
-
|
2
|
-
end
|
3
|
-
|
4
|
-
require 'redshift-connector/connector'
|
5
|
-
require 'redshift-connector/exporter'
|
6
|
-
require 'redshift-connector/importer'
|
7
|
-
require 'redshift-connector/s3_bucket'
|
8
|
-
require 'redshift-connector/s3_data_file_bundle'
|
9
|
-
require 'redshift-connector/version'
|
10
|
-
|
11
|
-
module RedshiftConnector
|
12
|
-
def RedshiftConnector.transport_delta(**params)
|
13
|
-
Connector.transport_delta(**params)
|
14
|
-
end
|
15
|
-
|
16
|
-
def RedshiftConnector.transport_all(**params)
|
17
|
-
Connector.transport_all(**params)
|
18
|
-
end
|
19
|
-
|
20
|
-
def RedshiftConnector.transport_delta_from_s3(**params)
|
21
|
-
Importer.transport_delta_from_s3(**params)
|
22
|
-
end
|
23
|
-
|
24
|
-
def RedshiftConnector.transport_all_from_s3(**params)
|
25
|
-
Importer.transport_all_from_s3(**params)
|
26
|
-
end
|
27
|
-
|
28
|
-
def RedshiftConnector.foreach(**params, &block)
|
29
|
-
Exporter.foreach(**params, &block)
|
30
|
-
end
|
31
|
-
end
|
1
|
+
require 'redshift_connector'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
end
|
3
|
+
|
4
|
+
require 'redshift_connector/connector'
|
5
|
+
require 'redshift_connector/exporter'
|
6
|
+
require 'redshift_connector/active_record_data_source'
|
7
|
+
require 'redshift_connector/active_record_exporter'
|
8
|
+
require 'redshift_connector/importer'
|
9
|
+
require 'redshift_connector/s3_bucket'
|
10
|
+
require 'redshift_connector/s3_data_file_bundle'
|
11
|
+
require 'redshift_connector/exception'
|
12
|
+
require 'redshift_connector/version'
|
13
|
+
|
14
|
+
module RedshiftConnector
|
15
|
+
def RedshiftConnector.transport_delta(**params)
|
16
|
+
Connector.transport_delta(**params)
|
17
|
+
end
|
18
|
+
|
19
|
+
def RedshiftConnector.transport_all(**params)
|
20
|
+
Connector.transport_all(**params)
|
21
|
+
end
|
22
|
+
|
23
|
+
def RedshiftConnector.transport_delta_from_s3(**params)
|
24
|
+
Importer.transport_delta_from_s3(**params)
|
25
|
+
end
|
26
|
+
|
27
|
+
def RedshiftConnector.transport_all_from_s3(**params)
|
28
|
+
Importer.transport_all_from_s3(**params)
|
29
|
+
end
|
30
|
+
|
31
|
+
def RedshiftConnector.foreach(**params, &block)
|
32
|
+
Exporter.foreach(**params, &block)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'redshift_connector/exporter_builder'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class ActiveRecordDataSource
|
5
|
+
def ActiveRecordDataSource.for_dao(dao)
|
6
|
+
new(dao)
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(dao)
|
10
|
+
@dao = dao
|
11
|
+
end
|
12
|
+
|
13
|
+
def exporter_builder
|
14
|
+
ExporterBuilder.new(ds: self, exporter_class: ActiveRecordExporter)
|
15
|
+
end
|
16
|
+
|
17
|
+
def execute_query(query_str)
|
18
|
+
@dao.connection_pool.with_connection {|conn|
|
19
|
+
conn.execute(query_str)
|
20
|
+
}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'redshift_connector/s3_data_file_bundle'
|
2
|
+
require 'redshift_connector/query'
|
3
|
+
require 'redshift_connector/logger'
|
4
|
+
|
5
|
+
module RedshiftConnector
|
6
|
+
class ActiveRecordExporter
|
7
|
+
def initialize(ds:, query:, bundle_params:, enable_sort: false, logger: RedshiftConnector.logger)
|
8
|
+
@ds = ds
|
9
|
+
@query = query
|
10
|
+
@bundle_params = bundle_params
|
11
|
+
@enable_sort = enable_sort
|
12
|
+
@logger = logger
|
13
|
+
|
14
|
+
@bundle = S3DataFileBundle.for_params(bundle_params)
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :query
|
18
|
+
attr_reader :bundle_params
|
19
|
+
attr_reader :bundle
|
20
|
+
attr_reader :logger
|
21
|
+
|
22
|
+
def execute
|
23
|
+
@bundle.clear
|
24
|
+
unload_query = UnloadQuery.new(query: @query, bundle: @bundle, enable_sort: @enable_sort)
|
25
|
+
@logger.info "EXPORT #{unload_query.description} -> #{@bundle.url}*"
|
26
|
+
stmt = unload_query.to_sql
|
27
|
+
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
28
|
+
@ds.execute_query(batch_job_label + stmt)
|
29
|
+
@bundle
|
30
|
+
end
|
31
|
+
|
32
|
+
def batch_job_label
|
33
|
+
@batch_job_label ||= begin
|
34
|
+
components = Dir.getwd.split('/')
|
35
|
+
app = if components.last == 'current'
|
36
|
+
# is Capistrano environment
|
37
|
+
components[-2]
|
38
|
+
else
|
39
|
+
components[-1]
|
40
|
+
end
|
41
|
+
batch_file = caller.detect {|c| /redshift_connector|active_record/ !~ c }
|
42
|
+
path = batch_file ? batch_file.split(':').first : '?'
|
43
|
+
"/* Job: #{app}:#{path} */ "
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
1
|
+
require 'redshift_connector/exporter'
|
2
|
+
require 'redshift_connector/importer'
|
3
|
+
require 'redshift_connector/data_file_bundle_params'
|
4
|
+
require 'redshift_connector/logger'
|
5
5
|
|
6
6
|
module RedshiftConnector
|
7
7
|
class Connector
|
@@ -15,16 +15,16 @@ module RedshiftConnector
|
|
15
15
|
delete_cond: nil,
|
16
16
|
upsert_columns: nil,
|
17
17
|
bucket: nil,
|
18
|
-
txn_id
|
18
|
+
txn_id: nil,
|
19
|
+
filter:,
|
19
20
|
logger: RedshiftConnector.logger,
|
20
21
|
quiet: false
|
21
22
|
)
|
22
23
|
unless src_table and dest_table
|
23
24
|
raise ArgumentError, "missing :table, :src_table or :dest_table"
|
24
25
|
end
|
25
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
26
26
|
logger = NullLogger.new if quiet
|
27
|
-
|
27
|
+
bundle_params = DataFileBundleParams.new(
|
28
28
|
bucket: bucket,
|
29
29
|
schema: schema,
|
30
30
|
table: src_table,
|
@@ -33,18 +33,19 @@ module RedshiftConnector
|
|
33
33
|
logger: logger
|
34
34
|
)
|
35
35
|
exporter = Exporter.for_table_delta(
|
36
|
-
|
36
|
+
bundle_params: bundle_params,
|
37
37
|
schema: schema,
|
38
38
|
table: src_table,
|
39
39
|
columns: columns,
|
40
40
|
condition: condition,
|
41
41
|
logger: logger
|
42
42
|
)
|
43
|
-
importer = Importer.
|
44
|
-
|
45
|
-
|
46
|
-
delete_cond: delete_cond,
|
47
|
-
|
43
|
+
importer = Importer.for_delta_upsert(
|
44
|
+
table: dest_table,
|
45
|
+
columns: columns,
|
46
|
+
delete_cond: delete_cond,
|
47
|
+
upsert_columns: upsert_columns,
|
48
|
+
logger: logger
|
48
49
|
)
|
49
50
|
new(exporter: exporter, importer: importer, logger: logger)
|
50
51
|
end
|
@@ -57,33 +58,32 @@ module RedshiftConnector
|
|
57
58
|
dest_table: table,
|
58
59
|
columns:,
|
59
60
|
bucket: nil,
|
60
|
-
txn_id
|
61
|
+
txn_id: nil,
|
61
62
|
filter:,
|
62
63
|
logger: RedshiftConnector.logger,
|
63
64
|
quiet: false
|
64
65
|
)
|
65
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
66
66
|
logger = NullLogger.new if quiet
|
67
|
-
|
67
|
+
bundle_params = DataFileBundleParams.new(
|
68
68
|
bucket: bucket,
|
69
69
|
schema: schema,
|
70
|
-
table:
|
70
|
+
table: src_table,
|
71
71
|
txn_id: txn_id,
|
72
72
|
filter: filter,
|
73
73
|
logger: logger
|
74
74
|
)
|
75
75
|
exporter = Exporter.for_table(
|
76
|
-
|
76
|
+
bundle_params: bundle_params,
|
77
77
|
schema: schema,
|
78
|
-
table:
|
78
|
+
table: src_table,
|
79
79
|
columns: columns,
|
80
80
|
logger: logger
|
81
81
|
)
|
82
|
-
importer = Importer.
|
82
|
+
importer = Importer.for_rebuild(
|
83
83
|
strategy: strategy,
|
84
|
-
|
85
|
-
|
86
|
-
logger: logger
|
84
|
+
table: dest_table,
|
85
|
+
columns: columns,
|
86
|
+
logger: logger
|
87
87
|
)
|
88
88
|
new(exporter: exporter, importer: importer, logger: logger)
|
89
89
|
end
|
@@ -92,6 +92,7 @@ module RedshiftConnector
|
|
92
92
|
@exporter = exporter
|
93
93
|
@importer = importer
|
94
94
|
@logger = logger
|
95
|
+
@bundle = nil
|
95
96
|
end
|
96
97
|
|
97
98
|
def export_enabled?
|
@@ -109,12 +110,12 @@ module RedshiftConnector
|
|
109
110
|
|
110
111
|
def export
|
111
112
|
@logger.info "==== export task =================================================="
|
112
|
-
@exporter.execute
|
113
|
+
@bundle = @exporter.execute
|
113
114
|
end
|
114
115
|
|
115
116
|
def import
|
116
117
|
@logger.info "==== import task =================================================="
|
117
|
-
@importer.execute
|
118
|
+
@importer.execute(@bundle)
|
118
119
|
end
|
119
120
|
end
|
120
121
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'redshift_connector/logger'
|
2
|
+
|
3
|
+
module RedshiftConnector
|
4
|
+
class DataFileBundleParams
|
5
|
+
def initialize(
|
6
|
+
bucket: nil,
|
7
|
+
schema:,
|
8
|
+
table:,
|
9
|
+
txn_id: nil,
|
10
|
+
filter:,
|
11
|
+
logger: RedshiftConnector.logger
|
12
|
+
)
|
13
|
+
@bucket = bucket
|
14
|
+
@schema = schema
|
15
|
+
@table = table
|
16
|
+
@txn_id = txn_id
|
17
|
+
@filter = filter
|
18
|
+
@logger = logger
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :bucket
|
22
|
+
attr_reader :schema
|
23
|
+
attr_reader :table
|
24
|
+
attr_reader :txn_id
|
25
|
+
attr_reader :filter
|
26
|
+
attr_reader :logger
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
module Exporter
|
3
|
+
@default_data_source = nil
|
4
|
+
|
5
|
+
def Exporter.default_data_source=(ds)
|
6
|
+
@default_data_source = ds
|
7
|
+
end
|
8
|
+
|
9
|
+
def Exporter.default_data_source
|
10
|
+
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
11
|
+
end
|
12
|
+
|
13
|
+
def Exporter.builder
|
14
|
+
default_data_source.exporter_builder
|
15
|
+
end
|
16
|
+
|
17
|
+
def Exporter.for_table_delta(**params)
|
18
|
+
builder.build_for_table_delta(**params)
|
19
|
+
end
|
20
|
+
|
21
|
+
def Exporter.for_table(**params)
|
22
|
+
builder.build_for_table(**params)
|
23
|
+
end
|
24
|
+
|
25
|
+
def Exporter.for_query(**params)
|
26
|
+
builder.build_for_query(**params)
|
27
|
+
end
|
28
|
+
|
29
|
+
def Exporter.foreach(**params, &block)
|
30
|
+
exporter = for_query(**params)
|
31
|
+
begin
|
32
|
+
exporter.execute
|
33
|
+
exporter.bundle.each_row(&block)
|
34
|
+
ensure
|
35
|
+
exporter.bundle.clear
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'redshift_connector/query'
|
2
|
+
require 'redshift_connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class ExporterBuilder
|
6
|
+
def initialize(ds:, exporter_class:)
|
7
|
+
@ds = ds
|
8
|
+
@exporter_class = exporter_class
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_for_table_delta(schema:, table:, condition:, columns:, bundle_params:, logger: RedshiftConnector.logger)
|
12
|
+
query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
13
|
+
@exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
|
14
|
+
end
|
15
|
+
|
16
|
+
def build_for_table(schema:, table:, columns:, bundle_params:, logger: RedshiftConnector.logger)
|
17
|
+
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
18
|
+
@exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
|
19
|
+
end
|
20
|
+
|
21
|
+
def build_for_query(
|
22
|
+
schema:,
|
23
|
+
table:,
|
24
|
+
bucket: nil,
|
25
|
+
query:,
|
26
|
+
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
27
|
+
filter: nil,
|
28
|
+
enable_sort: false,
|
29
|
+
logger: RedshiftConnector.logger,
|
30
|
+
quiet: false
|
31
|
+
)
|
32
|
+
logger = NullLogger.new if quiet
|
33
|
+
bundle_params = DataFileBundleParams.new(
|
34
|
+
bucket: bucket,
|
35
|
+
schema: schema,
|
36
|
+
table: table,
|
37
|
+
txn_id: txn_id,
|
38
|
+
filter: filter,
|
39
|
+
logger: logger
|
40
|
+
)
|
41
|
+
@exporter_class.new(
|
42
|
+
ds: @ds,
|
43
|
+
query: ArbitraryQuery.new(query),
|
44
|
+
bundle_params: bundle_params,
|
45
|
+
enable_sort: enable_sort,
|
46
|
+
logger: logger
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Importer
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift_connector/importer/upsert'
|
8
|
+
require 'redshift_connector/importer/insert_delta'
|
9
|
+
require 'redshift_connector/importer/rebuild_rename'
|
10
|
+
require 'redshift_connector/importer/rebuild_truncate'
|
11
|
+
require 'redshift_connector/logger'
|
12
|
+
|
13
|
+
module RedshiftConnector
|
14
|
+
module Importer
|
15
|
+
def Importer.for_delta_upsert(table:, columns:, delete_cond: nil, upsert_columns: nil, logger: RedshiftConnector.logger)
|
16
|
+
if delete_cond and upsert_columns
|
17
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
18
|
+
end
|
19
|
+
importer =
|
20
|
+
if delete_cond
|
21
|
+
Importer::InsertDelta.new(
|
22
|
+
dao: table.classify.constantize,
|
23
|
+
columns: columns,
|
24
|
+
delete_cond: delete_cond,
|
25
|
+
logger: logger
|
26
|
+
)
|
27
|
+
elsif upsert_columns
|
28
|
+
Importer::Upsert.new(
|
29
|
+
dao: table.classify.constantize,
|
30
|
+
columns: columns,
|
31
|
+
upsert_columns: upsert_columns,
|
32
|
+
logger: logger
|
33
|
+
)
|
34
|
+
else
|
35
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for delta import"
|
36
|
+
end
|
37
|
+
importer
|
38
|
+
end
|
39
|
+
|
40
|
+
def Importer.for_rebuild(strategy: 'rename', table:, columns:, logger: RedshiftConnector.logger)
|
41
|
+
c = get_rebuild_class(strategy)
|
42
|
+
c.new(
|
43
|
+
dao: table.classify.constantize,
|
44
|
+
columns: columns,
|
45
|
+
logger: logger
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def Importer.get_rebuild_class(strategy)
|
50
|
+
case strategy.to_s
|
51
|
+
when 'rename' then RebuildRename
|
52
|
+
when 'truncate' then RebuildTruncate
|
53
|
+
else
|
54
|
+
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
File without changes
|
@@ -1,19 +1,18 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::InsertDelta
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, delete_cond:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@delete_cond = delete_cond
|
11
10
|
@logger = logger
|
12
11
|
end
|
13
12
|
|
14
|
-
def execute
|
13
|
+
def execute(bundle)
|
15
14
|
delete_rows(@delete_cond)
|
16
|
-
import
|
15
|
+
import(bundle)
|
17
16
|
end
|
18
17
|
|
19
18
|
def delete_rows(cond_expr)
|
@@ -22,9 +21,9 @@ module RedshiftConnector
|
|
22
21
|
@logger.info "deleted."
|
23
22
|
end
|
24
23
|
|
25
|
-
def import
|
26
|
-
@logger.info "IMPORT #{
|
27
|
-
|
24
|
+
def import(bundle)
|
25
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
26
|
+
bundle.each_batch do |rows|
|
28
27
|
@dao.import(@columns, rows)
|
29
28
|
end
|
30
29
|
end
|
@@ -1,16 +1,15 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::RebuildRename
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@logger = logger
|
11
10
|
end
|
12
11
|
|
13
|
-
def execute
|
12
|
+
def execute(bundle)
|
14
13
|
dest_table = @dao.table_name
|
15
14
|
tmp_table = "#{dest_table}_new"
|
16
15
|
old_table = "#{dest_table}_old"
|
@@ -20,7 +19,7 @@ module RedshiftConnector
|
|
20
19
|
|
21
20
|
exec_update "drop table if exists #{tmp_table}"
|
22
21
|
exec_update "create table #{tmp_table} like #{dest_table}"
|
23
|
-
import(tmp_dao)
|
22
|
+
import(tmp_dao, bundle)
|
24
23
|
exec_update "drop table if exists #{old_table}"
|
25
24
|
# Atomic table exchange
|
26
25
|
exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
|
@@ -31,9 +30,9 @@ module RedshiftConnector
|
|
31
30
|
@dao.connection.execute(query)
|
32
31
|
end
|
33
32
|
|
34
|
-
def import(dao)
|
35
|
-
@logger.info "IMPORT #{
|
36
|
-
|
33
|
+
def import(dao, bundle)
|
34
|
+
@logger.info "IMPORT #{bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
|
35
|
+
bundle.each_batch do |rows|
|
37
36
|
dao.import(@columns, rows)
|
38
37
|
end
|
39
38
|
end
|
@@ -1,18 +1,17 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
3
|
|
4
4
|
module RedshiftConnector
|
5
5
|
class Importer::RebuildTruncate
|
6
|
-
def initialize(dao:,
|
6
|
+
def initialize(dao:, columns:, logger: RedshiftConnector.logger)
|
7
7
|
@dao = dao
|
8
|
-
@bundle = bundle
|
9
8
|
@columns = columns
|
10
9
|
@logger = logger
|
11
10
|
end
|
12
11
|
|
13
|
-
def execute
|
12
|
+
def execute(bundle)
|
14
13
|
truncate_table(@dao.table_name)
|
15
|
-
import
|
14
|
+
import(bundle)
|
16
15
|
end
|
17
16
|
|
18
17
|
def truncate_table(table_name)
|
@@ -21,9 +20,9 @@ module RedshiftConnector
|
|
21
20
|
@logger.info "truncated."
|
22
21
|
end
|
23
22
|
|
24
|
-
def import
|
25
|
-
@logger.info "IMPORT #{
|
26
|
-
|
23
|
+
def import(bundle)
|
24
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
25
|
+
bundle.each_batch do |rows|
|
27
26
|
@dao.import(@columns, rows)
|
28
27
|
end
|
29
28
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'redshift_connector/importer/activerecord-import'
|
2
|
+
require 'redshift_connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::Upsert
|
6
|
+
def initialize(dao:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@columns = columns
|
9
|
+
@upsert_columns = upsert_columns
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute(bundle)
|
14
|
+
import(bundle)
|
15
|
+
end
|
16
|
+
|
17
|
+
def import(bundle)
|
18
|
+
@logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
|
19
|
+
bundle.each_batch do |rows|
|
20
|
+
@dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
File without changes
|
data/test/config.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
|
2
|
-
# For test only
|
3
|
-
$TEST_SCHEMA = 'hidekazukobayashi'
|
1
|
+
$TEST_SCHEMA = 'aamine'
|
4
2
|
|
5
|
-
|
3
|
+
module RedshiftConnector
|
4
|
+
Exporter.default_data_source = RedshiftConnector::ActiveRecordDataSource.new(Redshift)
|
6
5
|
|
7
6
|
S3Bucket.add(
|
8
|
-
'
|
9
|
-
bucket: '
|
10
|
-
prefix: 'development
|
11
|
-
|
12
|
-
#iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
|
13
|
-
# When using explicit access key
|
14
|
-
access_key_id: 'AKIAJJGEKUU2MXO3X4NA',
|
15
|
-
secret_access_key: 'j+yF+bvisovNwPVsORz/FpSszkD567Xk270Pr3NY',
|
7
|
+
'redshift-copy-buffer',
|
8
|
+
bucket: 'redshift-copy-buffer',
|
9
|
+
prefix: 'development',
|
10
|
+
iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
|
16
11
|
default: true
|
17
12
|
)
|
18
13
|
end
|
data/test/database.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
mysql:
|
2
2
|
adapter: mysql2
|
3
3
|
host: localhost
|
4
|
-
username:
|
4
|
+
username: minero-aoki
|
5
5
|
database: test
|
6
6
|
encoding: utf8
|
7
7
|
|
@@ -10,6 +10,6 @@ redshift:
|
|
10
10
|
host: dwh.ckpd.co
|
11
11
|
port: 5439
|
12
12
|
database: production
|
13
|
-
username:
|
14
|
-
password:
|
13
|
+
username: aamine
|
14
|
+
password: "3edCVfr$"
|
15
15
|
encoding: utf8
|
data/test/foreach.rb
ADDED
metadata
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: redshift-connector-data_file
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 7.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 7.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name: activerecord
|
28
|
+
name: activerecord
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,33 +39,33 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name: redshift
|
42
|
+
name: activerecord-redshift
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pg
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.18.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.18.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: activerecord-import
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,33 +144,35 @@ extra_rdoc_files: []
|
|
144
144
|
files:
|
145
145
|
- README.md
|
146
146
|
- lib/redshift-connector.rb
|
147
|
-
- lib/
|
148
|
-
- lib/
|
149
|
-
- lib/
|
150
|
-
- lib/
|
151
|
-
- lib/
|
152
|
-
- lib/
|
153
|
-
- lib/
|
154
|
-
- lib/
|
155
|
-
- lib/
|
156
|
-
- lib/
|
157
|
-
- lib/
|
158
|
-
- lib/
|
159
|
-
- lib/
|
160
|
-
- lib/
|
147
|
+
- lib/redshift_connector.rb
|
148
|
+
- lib/redshift_connector/active_record_data_source.rb
|
149
|
+
- lib/redshift_connector/active_record_exporter.rb
|
150
|
+
- lib/redshift_connector/connector.rb
|
151
|
+
- lib/redshift_connector/data_file_bundle_params.rb
|
152
|
+
- lib/redshift_connector/exception.rb
|
153
|
+
- lib/redshift_connector/exporter.rb
|
154
|
+
- lib/redshift_connector/exporter_builder.rb
|
155
|
+
- lib/redshift_connector/importer.rb
|
156
|
+
- lib/redshift_connector/importer/activerecord-import.rb
|
157
|
+
- lib/redshift_connector/importer/insert_delta.rb
|
158
|
+
- lib/redshift_connector/importer/rebuild_rename.rb
|
159
|
+
- lib/redshift_connector/importer/rebuild_truncate.rb
|
160
|
+
- lib/redshift_connector/importer/upsert.rb
|
161
|
+
- lib/redshift_connector/query.rb
|
162
|
+
- lib/redshift_connector/version.rb
|
161
163
|
- test/all.rb
|
162
164
|
- test/config.rb
|
163
165
|
- test/config.rb.example
|
164
166
|
- test/database.yml
|
165
167
|
- test/database.yml.example
|
168
|
+
- test/foreach.rb
|
166
169
|
- test/helper.rb
|
167
170
|
- test/item_pvs.ct.mysql
|
168
171
|
- test/item_pvs.ct.redshift
|
169
172
|
- test/reader/test_redshift_csv.rb
|
170
173
|
- test/test_connector.rb
|
171
|
-
- test/test_reader.rb
|
172
174
|
- test/test_s3_import.rb
|
173
|
-
homepage: https://github.com/
|
175
|
+
homepage: https://github.com/bricolages/redshift-connector
|
174
176
|
licenses:
|
175
177
|
- MIT
|
176
178
|
metadata: {}
|
@@ -190,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
192
|
version: '0'
|
191
193
|
requirements: []
|
192
194
|
rubyforge_project:
|
193
|
-
rubygems_version: 2.6.
|
195
|
+
rubygems_version: 2.6.8
|
194
196
|
signing_key:
|
195
197
|
specification_version: 4
|
196
198
|
summary: Redshift bulk data connector
|
@@ -1,103 +0,0 @@
|
|
1
|
-
require 'redshift-connector/query'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
class Exporter
|
6
|
-
def Exporter.default_data_source=(ds)
|
7
|
-
@default_data_source = ds
|
8
|
-
end
|
9
|
-
|
10
|
-
def Exporter.default_data_source
|
11
|
-
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
12
|
-
end
|
13
|
-
|
14
|
-
def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
|
15
|
-
delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
16
|
-
unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
|
17
|
-
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
18
|
-
end
|
19
|
-
|
20
|
-
def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
|
21
|
-
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
22
|
-
unload_query = UnloadQuery.new(query: query, bundle: bundle)
|
23
|
-
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
24
|
-
end
|
25
|
-
|
26
|
-
def Exporter.foreach(**params, &block)
|
27
|
-
exporter = Exporter.for_query(**params)
|
28
|
-
begin
|
29
|
-
exporter.execute
|
30
|
-
exporter.bundle.each_row(&block)
|
31
|
-
ensure
|
32
|
-
exporter.bundle.clear
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def Exporter.for_query(
|
37
|
-
ds: default_data_source,
|
38
|
-
schema:,
|
39
|
-
table:,
|
40
|
-
bucket: nil,
|
41
|
-
query:,
|
42
|
-
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
43
|
-
filter: nil,
|
44
|
-
enable_sort: false,
|
45
|
-
logger: RedshiftConnector.logger,
|
46
|
-
quiet: false
|
47
|
-
)
|
48
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
49
|
-
logger = NullLogger.new if quiet
|
50
|
-
bundle = S3DataFileBundle.for_table(
|
51
|
-
bucket: bucket,
|
52
|
-
schema: schema,
|
53
|
-
table: table,
|
54
|
-
txn_id: txn_id,
|
55
|
-
filter: filter,
|
56
|
-
logger: logger
|
57
|
-
)
|
58
|
-
exporter = Exporter.new(
|
59
|
-
ds: ds,
|
60
|
-
query: UnloadQuery.wrap(query: query, bundle: bundle, enable_sort: enable_sort),
|
61
|
-
bundle: bundle,
|
62
|
-
logger: logger
|
63
|
-
)
|
64
|
-
exporter
|
65
|
-
end
|
66
|
-
|
67
|
-
def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
|
68
|
-
@ds = ds
|
69
|
-
@query = query
|
70
|
-
@bundle = bundle
|
71
|
-
@logger = logger
|
72
|
-
end
|
73
|
-
|
74
|
-
attr_reader :query
|
75
|
-
attr_reader :bundle
|
76
|
-
attr_reader :logger
|
77
|
-
|
78
|
-
def execute
|
79
|
-
@bundle.clear
|
80
|
-
@logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
|
81
|
-
@ds.connection_pool.with_connection do |conn|
|
82
|
-
stmt = @query.to_sql
|
83
|
-
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
84
|
-
conn.execute(batch_job_label + stmt)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def batch_job_label
|
89
|
-
@batch_job_label ||= begin
|
90
|
-
components = Dir.getwd.split('/')
|
91
|
-
app = if components.last == 'current'
|
92
|
-
# is Capistrano environment
|
93
|
-
components[-2]
|
94
|
-
else
|
95
|
-
components[-1]
|
96
|
-
end
|
97
|
-
batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
|
98
|
-
path = batch_file ? batch_file.split(':').first : '?'
|
99
|
-
"/* Job: #{app}:#{path} */ "
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Importer
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/importer/upsert'
|
8
|
-
require 'redshift-connector/importer/insert_delta'
|
9
|
-
require 'redshift-connector/importer/rebuild_rename'
|
10
|
-
require 'redshift-connector/importer/rebuild_truncate'
|
11
|
-
|
12
|
-
require 'redshift-connector/s3_data_file_bundle'
|
13
|
-
require 'redshift-connector/logger'
|
14
|
-
|
15
|
-
module RedshiftConnector
|
16
|
-
module Importer
|
17
|
-
def Importer.transport_delta_from_s3(
|
18
|
-
bucket: nil, prefix:, format:, filter: nil,
|
19
|
-
table:, columns:,
|
20
|
-
delete_cond: nil, upsert_columns: nil,
|
21
|
-
logger: RedshiftConnector.logger, quiet: false)
|
22
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
23
|
-
logger = NullLogger.new if quiet
|
24
|
-
bundle = S3DataFileBundle.for_prefix(
|
25
|
-
bucket: bucket,
|
26
|
-
prefix: prefix,
|
27
|
-
format: format,
|
28
|
-
filter: filter,
|
29
|
-
logger: logger
|
30
|
-
)
|
31
|
-
transport_delta_from_bundle(
|
32
|
-
bundle: bundle,
|
33
|
-
table: table, columns: columns,
|
34
|
-
delete_cond: delete_cond, upsert_columns: upsert_columns,
|
35
|
-
logger: logger, quiet: quiet
|
36
|
-
)
|
37
|
-
end
|
38
|
-
|
39
|
-
def Importer.transport_delta_from_bundle(
|
40
|
-
bundle:,
|
41
|
-
table:, columns:,
|
42
|
-
delete_cond: nil, upsert_columns: nil,
|
43
|
-
logger: RedshiftConnector.logger, quiet: false
|
44
|
-
)
|
45
|
-
if delete_cond and upsert_columns
|
46
|
-
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
47
|
-
end
|
48
|
-
importer =
|
49
|
-
if delete_cond
|
50
|
-
Importer::InsertDelta.new(
|
51
|
-
dao: table.classify.constantize,
|
52
|
-
bundle: bundle,
|
53
|
-
columns: columns,
|
54
|
-
delete_cond: delete_cond,
|
55
|
-
logger: logger
|
56
|
-
)
|
57
|
-
elsif upsert_columns
|
58
|
-
Importer::Upsert.new(
|
59
|
-
dao: table.classify.constantize,
|
60
|
-
bundle: bundle,
|
61
|
-
columns: columns,
|
62
|
-
upsert_columns: upsert_columns,
|
63
|
-
logger: logger
|
64
|
-
)
|
65
|
-
else
|
66
|
-
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
67
|
-
end
|
68
|
-
importer
|
69
|
-
end
|
70
|
-
|
71
|
-
def Importer.transport_all_from_s3(
|
72
|
-
strategy: 'rename',
|
73
|
-
bucket: nil, prefix:, format:, filter: nil,
|
74
|
-
table:, columns:,
|
75
|
-
logger: RedshiftConnector.logger, quiet: false)
|
76
|
-
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
77
|
-
logger = NullLogger.new if quiet
|
78
|
-
bundle = S3DataFileBundle.for_prefix(
|
79
|
-
bucket: bucket,
|
80
|
-
prefix: prefix,
|
81
|
-
format: format,
|
82
|
-
filter: filter,
|
83
|
-
logger: logger
|
84
|
-
)
|
85
|
-
transport_all_from_bundle(
|
86
|
-
strategy: strategy,
|
87
|
-
bundle: bundle,
|
88
|
-
table: table, columns: columns,
|
89
|
-
logger: logger, quiet: quiet
|
90
|
-
)
|
91
|
-
end
|
92
|
-
|
93
|
-
def Importer.transport_all_from_bundle(
|
94
|
-
strategy: 'rename',
|
95
|
-
bundle:,
|
96
|
-
table:, columns:,
|
97
|
-
logger: RedshiftConnector.logger, quiet: false
|
98
|
-
)
|
99
|
-
importer = get_rebuild_class(strategy).new(
|
100
|
-
dao: table.classify.constantize,
|
101
|
-
bundle: bundle,
|
102
|
-
columns: columns,
|
103
|
-
logger: logger
|
104
|
-
)
|
105
|
-
importer
|
106
|
-
end
|
107
|
-
|
108
|
-
def Importer.get_rebuild_class(strategy)
|
109
|
-
case strategy.to_s
|
110
|
-
when 'rename' then RebuildRename
|
111
|
-
when 'truncate' then RebuildTruncate
|
112
|
-
else
|
113
|
-
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
require 'redshift-connector/importer/activerecord-import'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
class Importer::Upsert
|
6
|
-
def initialize(dao:, bundle:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
|
7
|
-
@dao = dao
|
8
|
-
@bundle = bundle
|
9
|
-
@columns = columns
|
10
|
-
@upsert_columns = upsert_columns
|
11
|
-
@logger = logger
|
12
|
-
end
|
13
|
-
|
14
|
-
def execute
|
15
|
-
import
|
16
|
-
end
|
17
|
-
|
18
|
-
def import
|
19
|
-
@logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
|
20
|
-
@bundle.each_batch do |rows|
|
21
|
-
@dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
@logger = nil
|
3
|
-
|
4
|
-
def RedshiftConnector.logger
|
5
|
-
# Defer to access Rails
|
6
|
-
@logger || Rails.logger
|
7
|
-
end
|
8
|
-
|
9
|
-
def RedshiftConnector.logger=(logger)
|
10
|
-
@logger = logger
|
11
|
-
end
|
12
|
-
|
13
|
-
class NullLogger
|
14
|
-
def noop(*args) end
|
15
|
-
alias error noop
|
16
|
-
alias warn noop
|
17
|
-
alias info noop
|
18
|
-
alias debug noop
|
19
|
-
end
|
20
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require 'aws-sdk'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class S3Bucket
|
5
|
-
@buckets = {}
|
6
|
-
@default = nil
|
7
|
-
|
8
|
-
def S3Bucket.add(name, default: false, **params)
|
9
|
-
instance = new(**params)
|
10
|
-
@buckets[name.to_s] = instance
|
11
|
-
if !@default or default
|
12
|
-
@default = instance
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def S3Bucket.default
|
17
|
-
@default or raise ArgumentError, "no default S3 bucket configured"
|
18
|
-
end
|
19
|
-
|
20
|
-
def S3Bucket.get(name)
|
21
|
-
@buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
|
25
|
-
@region = region
|
26
|
-
@name = bucket
|
27
|
-
@prefix = prefix
|
28
|
-
@access_key_id = access_key_id
|
29
|
-
@secret_access_key = secret_access_key
|
30
|
-
@iam_role = iam_role
|
31
|
-
end
|
32
|
-
|
33
|
-
attr_reader :name
|
34
|
-
attr_reader :prefix
|
35
|
-
|
36
|
-
def url
|
37
|
-
"s3://#{@bucket.name}/#{@prefix}/"
|
38
|
-
end
|
39
|
-
|
40
|
-
def client
|
41
|
-
@client ||= begin
|
42
|
-
args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
|
43
|
-
Aws::S3::Client.new(**args)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def bucket
|
48
|
-
@bucket ||= begin
|
49
|
-
resource = Aws::S3::Resource.new(client: client)
|
50
|
-
resource.bucket(@name)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def object(key)
|
55
|
-
bucket.object(key)
|
56
|
-
end
|
57
|
-
|
58
|
-
def objects(prefix:)
|
59
|
-
bucket.objects(prefix: prefix)
|
60
|
-
end
|
61
|
-
|
62
|
-
def delete_objects(keys)
|
63
|
-
bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
|
64
|
-
end
|
65
|
-
|
66
|
-
def credential_string
|
67
|
-
if @iam_role
|
68
|
-
"aws_iam_role=#{@iam_role}"
|
69
|
-
elsif @access_key_id
|
70
|
-
"aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
|
71
|
-
else
|
72
|
-
raise ArgumentError, "no credential given for Redshift S3 access"
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'redshift-connector/data_file'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class S3DataFile < AbstractDataFile
|
5
|
-
def initialize(object, reader_class:)
|
6
|
-
@object = object
|
7
|
-
@reader_class = reader_class
|
8
|
-
end
|
9
|
-
|
10
|
-
def key
|
11
|
-
@object.key
|
12
|
-
end
|
13
|
-
|
14
|
-
def content
|
15
|
-
@object.get.body
|
16
|
-
end
|
17
|
-
|
18
|
-
delegate :presigned_url, to: :@object
|
19
|
-
end
|
20
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
require 'redshift-connector/s3_bucket'
|
2
|
-
require 'redshift-connector/s3_data_file'
|
3
|
-
require 'redshift-connector/logger'
|
4
|
-
require 'redshift-connector/data_file'
|
5
|
-
require 'aws-sdk'
|
6
|
-
|
7
|
-
module RedshiftConnector
|
8
|
-
class S3DataFileBundle < AbstractDataFileBundle
|
9
|
-
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
10
|
-
real_prefix = "#{bucket.prefix}/#{prefix}"
|
11
|
-
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
15
|
-
prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
|
16
|
-
new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
20
|
-
@bucket = bucket
|
21
|
-
@prefix = prefix
|
22
|
-
@format = format
|
23
|
-
@filter = filter || lambda {|*row| row }
|
24
|
-
@batch_size = batch_size
|
25
|
-
@logger = logger
|
26
|
-
@reader_class = Reader.get(format)
|
27
|
-
end
|
28
|
-
|
29
|
-
attr_reader :bucket
|
30
|
-
attr_reader :prefix
|
31
|
-
|
32
|
-
def url
|
33
|
-
"s3://#{@bucket.name}/#{@prefix}"
|
34
|
-
end
|
35
|
-
|
36
|
-
def credential_string
|
37
|
-
@bucket.credential_string
|
38
|
-
end
|
39
|
-
|
40
|
-
def data_files
|
41
|
-
@bucket.objects(prefix: @prefix)
|
42
|
-
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
43
|
-
end
|
44
|
-
|
45
|
-
def clear
|
46
|
-
pref = File.dirname(@prefix) + '/'
|
47
|
-
keys = @bucket.objects(prefix: pref).map(&:key)
|
48
|
-
unless keys.empty?
|
49
|
-
@logger.info "DELETE #{pref}*"
|
50
|
-
@bucket.delete_objects(keys)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|