redshift-connector 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +10 -0
- data/lib/redshift-connector.rb +31 -0
- data/lib/redshift-connector/connector.rb +146 -0
- data/lib/redshift-connector/exporter.rb +116 -0
- data/lib/redshift-connector/importer.rb +89 -0
- data/lib/redshift-connector/importer/activerecord-import.rb +2 -0
- data/lib/redshift-connector/importer/insert_delta.rb +32 -0
- data/lib/redshift-connector/importer/rebuild_rename.rb +41 -0
- data/lib/redshift-connector/importer/rebuild_truncate.rb +31 -0
- data/lib/redshift-connector/importer/upsert.rb +25 -0
- data/lib/redshift-connector/logger.rb +20 -0
- data/lib/redshift-connector/query.rb +93 -0
- data/lib/redshift-connector/reader.rb +18 -0
- data/lib/redshift-connector/reader/abstract.rb +18 -0
- data/lib/redshift-connector/reader/csv.rb +24 -0
- data/lib/redshift-connector/reader/exception.rb +3 -0
- data/lib/redshift-connector/reader/redshift_csv.rb +54 -0
- data/lib/redshift-connector/reader/tsv.rb +24 -0
- data/lib/redshift-connector/s3_bucket.rb +72 -0
- data/lib/redshift-connector/s3_data_file.rb +34 -0
- data/lib/redshift-connector/s3_data_file_bundle.rb +101 -0
- data/lib/redshift-connector/version.rb +3 -0
- data/test/all.rb +3 -0
- data/test/config.rb +13 -0
- data/test/config.rb.example +18 -0
- data/test/database.yml +15 -0
- data/test/database.yml.example +15 -0
- data/test/foreach.rb +5 -0
- data/test/helper.rb +25 -0
- data/test/item_pvs.ct.mysql +11 -0
- data/test/item_pvs.ct.redshift +9 -0
- data/test/reader/test_redshift_csv.rb +30 -0
- data/test/test_connector.rb +148 -0
- data/test/test_reader.rb +10 -0
- data/test/test_s3_import.rb +32 -0
- metadata +190 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c5d5bf307943f19b5187a9244ecd89717241d72f
|
4
|
+
data.tar.gz: 1651cd723205ee50247c5e407f39aaa1eafd287d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ff3ef2d4b7e647617529313df5b1d24eb9bf537672ba4aeba3de6542d2fa60589576854b308bc7db5b34c853e1cf30a1a517f6cfd1c58b876018dd6c6fcab7c8
|
7
|
+
data.tar.gz: 51de6b97fd7099a2dfaf401b1f230e49f991c623cd43949bbf9e8a4e5f822180040b024638938613aaa89fdbaced93f5c9e27bf6e5e57c8e3d6c24a486f8c06b
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
end
|
3
|
+
|
4
|
+
require 'redshift-connector/connector'
|
5
|
+
require 'redshift-connector/exporter'
|
6
|
+
require 'redshift-connector/importer'
|
7
|
+
require 'redshift-connector/s3_bucket'
|
8
|
+
require 'redshift-connector/s3_data_file_bundle'
|
9
|
+
require 'redshift-connector/version'
|
10
|
+
|
11
|
+
module RedshiftConnector
|
12
|
+
def RedshiftConnector.transport_delta(**params)
|
13
|
+
Connector.transport_delta(**params)
|
14
|
+
end
|
15
|
+
|
16
|
+
def RedshiftConnector.transport_all(**params)
|
17
|
+
Connector.transport_all(**params)
|
18
|
+
end
|
19
|
+
|
20
|
+
def RedshiftConnector.transport_delta_from_s3(**params)
|
21
|
+
Importer.transport_delta_from_s3(**params)
|
22
|
+
end
|
23
|
+
|
24
|
+
def RedshiftConnector.transport_all_from_s3(**params)
|
25
|
+
Importer.transport_all_from_s3(**params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def RedshiftConnector.foreach(**params, &block)
|
29
|
+
Exporter.foreach(**params, &block)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'redshift-connector/exporter'
|
2
|
+
require 'redshift-connector/importer'
|
3
|
+
require 'redshift-connector/s3_data_file_bundle'
|
4
|
+
require 'redshift-connector/logger'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class Connector
|
8
|
+
def Connector.transport_delta(
|
9
|
+
schema:,
|
10
|
+
table: nil,
|
11
|
+
src_table: table,
|
12
|
+
dest_table: table,
|
13
|
+
condition:,
|
14
|
+
columns:,
|
15
|
+
delete_cond: nil,
|
16
|
+
upsert_columns: nil,
|
17
|
+
bucket: nil,
|
18
|
+
txn_id:, filter:,
|
19
|
+
logger: RedshiftConnector.logger,
|
20
|
+
quiet: false
|
21
|
+
)
|
22
|
+
unless src_table and dest_table
|
23
|
+
raise ArgumentError, "missing :table, :src_table or :dest_table"
|
24
|
+
end
|
25
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
26
|
+
logger = NullLogger.new if quiet
|
27
|
+
bundle = S3DataFileBundle.for_table(
|
28
|
+
bucket: bucket,
|
29
|
+
schema: schema,
|
30
|
+
table: src_table,
|
31
|
+
txn_id: txn_id,
|
32
|
+
filter: filter,
|
33
|
+
logger: logger
|
34
|
+
)
|
35
|
+
exporter = Exporter.for_table_delta(
|
36
|
+
bundle: bundle,
|
37
|
+
schema: schema,
|
38
|
+
table: src_table,
|
39
|
+
columns: columns,
|
40
|
+
condition: condition,
|
41
|
+
logger: logger
|
42
|
+
)
|
43
|
+
if delete_cond and upsert_columns
|
44
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
45
|
+
end
|
46
|
+
dao = dest_table.classify.constantize
|
47
|
+
importer =
|
48
|
+
if delete_cond
|
49
|
+
Importer::InsertDelta.new(
|
50
|
+
dao: dao,
|
51
|
+
bundle: bundle,
|
52
|
+
columns: columns,
|
53
|
+
delete_cond: delete_cond,
|
54
|
+
logger: logger
|
55
|
+
)
|
56
|
+
elsif upsert_columns
|
57
|
+
Importer::Upsert.new(
|
58
|
+
dao: dao,
|
59
|
+
bundle: bundle,
|
60
|
+
columns: columns,
|
61
|
+
upsert_columns: upsert_columns,
|
62
|
+
logger: logger
|
63
|
+
)
|
64
|
+
else
|
65
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
66
|
+
end
|
67
|
+
new(exporter: exporter, importer: importer, logger: logger)
|
68
|
+
end
|
69
|
+
|
70
|
+
def Connector.transport_all(
|
71
|
+
strategy: 'rename',
|
72
|
+
schema:,
|
73
|
+
table:,
|
74
|
+
src_table: table,
|
75
|
+
dest_table: table,
|
76
|
+
columns:,
|
77
|
+
bucket: nil,
|
78
|
+
txn_id:,
|
79
|
+
filter:,
|
80
|
+
logger: RedshiftConnector.logger,
|
81
|
+
quiet: false
|
82
|
+
)
|
83
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
84
|
+
logger = NullLogger.new if quiet
|
85
|
+
bundle = S3DataFileBundle.for_table(
|
86
|
+
bucket: bucket,
|
87
|
+
schema: schema,
|
88
|
+
table: table,
|
89
|
+
txn_id: txn_id,
|
90
|
+
filter: filter,
|
91
|
+
logger: logger
|
92
|
+
)
|
93
|
+
exporter = Exporter.for_table(
|
94
|
+
bundle: bundle,
|
95
|
+
schema: schema,
|
96
|
+
table: table,
|
97
|
+
columns: columns,
|
98
|
+
logger: logger
|
99
|
+
)
|
100
|
+
importer = Importer.get_rebuild_class(strategy).new(
|
101
|
+
dao: table.classify.constantize,
|
102
|
+
bundle: bundle,
|
103
|
+
columns: columns,
|
104
|
+
logger: logger
|
105
|
+
)
|
106
|
+
new(exporter: exporter, importer: importer, logger: logger)
|
107
|
+
end
|
108
|
+
|
109
|
+
def initialize(exporter:, importer:, logger:)
|
110
|
+
@exporter = exporter
|
111
|
+
@importer = importer
|
112
|
+
@logger = logger
|
113
|
+
end
|
114
|
+
|
115
|
+
def export_enabled?
|
116
|
+
not ENV['IMPORT_ONLY']
|
117
|
+
end
|
118
|
+
|
119
|
+
def export_forced?
|
120
|
+
!! (ENV['EXPORT_ONLY'] or ENV['FORCE'])
|
121
|
+
end
|
122
|
+
|
123
|
+
def import_enabled?
|
124
|
+
not ENV['EXPORT_ONLY']
|
125
|
+
end
|
126
|
+
|
127
|
+
def execute
|
128
|
+
export(forced: export_forced?) if export_enabled?
|
129
|
+
import if import_enabled?
|
130
|
+
end
|
131
|
+
|
132
|
+
def export(forced: false)
|
133
|
+
@logger.info "==== export task =================================================="
|
134
|
+
if not forced and @exporter.completed?
|
135
|
+
@logger.info "export task is already executed; skip"
|
136
|
+
else
|
137
|
+
@exporter.execute
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def import
|
142
|
+
@logger.info "==== import task =================================================="
|
143
|
+
@importer.execute
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'redshift-connector/query'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Exporter
|
6
|
+
def Exporter.default_data_source=(ds)
|
7
|
+
@default_data_source = ds
|
8
|
+
end
|
9
|
+
|
10
|
+
def Exporter.default_data_source
|
11
|
+
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
12
|
+
end
|
13
|
+
|
14
|
+
def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
|
15
|
+
delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
16
|
+
unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
|
17
|
+
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
18
|
+
end
|
19
|
+
|
20
|
+
def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
|
21
|
+
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
22
|
+
unload_query = UnloadQuery.new(query: query, bundle: bundle)
|
23
|
+
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
24
|
+
end
|
25
|
+
|
26
|
+
def Exporter.foreach(**params, &block)
|
27
|
+
exporter = Exporter.for_query(**params)
|
28
|
+
begin
|
29
|
+
exporter.execute
|
30
|
+
exporter.bundle.each_row(&block)
|
31
|
+
ensure
|
32
|
+
exporter.bundle.clear
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def Exporter.for_query(
|
37
|
+
ds: default_data_source,
|
38
|
+
schema:,
|
39
|
+
table:,
|
40
|
+
bucket: nil,
|
41
|
+
query:,
|
42
|
+
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
43
|
+
filter: nil,
|
44
|
+
logger: RedshiftConnector.logger,
|
45
|
+
quiet: false
|
46
|
+
)
|
47
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
48
|
+
logger = NullLogger.new if quiet
|
49
|
+
bundle = S3DataFileBundle.for_table(
|
50
|
+
bucket: bucket,
|
51
|
+
schema: schema,
|
52
|
+
table: table,
|
53
|
+
txn_id: txn_id,
|
54
|
+
filter: filter,
|
55
|
+
logger: logger
|
56
|
+
)
|
57
|
+
exporter = Exporter.new(
|
58
|
+
ds: ds,
|
59
|
+
query: UnloadQuery.wrap(query: query, bundle: bundle),
|
60
|
+
bundle: bundle,
|
61
|
+
logger: logger
|
62
|
+
)
|
63
|
+
exporter
|
64
|
+
end
|
65
|
+
|
66
|
+
def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
|
67
|
+
@ds = ds
|
68
|
+
@query = query
|
69
|
+
@bundle = bundle
|
70
|
+
@logger = logger
|
71
|
+
end
|
72
|
+
|
73
|
+
attr_reader :query
|
74
|
+
attr_reader :bundle
|
75
|
+
attr_reader :logger
|
76
|
+
|
77
|
+
def completed?
|
78
|
+
@bundle.bucket.object(flag_object_key).exists?
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_flag_object
|
82
|
+
@logger.info "TOUCH #{flag_object_key}"
|
83
|
+
@bundle.bucket.object(flag_object_key).put(body: "OK")
|
84
|
+
end
|
85
|
+
|
86
|
+
def flag_object_key
|
87
|
+
"#{File.dirname(@bundle.prefix)}/00completed"
|
88
|
+
end
|
89
|
+
|
90
|
+
def execute
|
91
|
+
@bundle.clear
|
92
|
+
@logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
|
93
|
+
@ds.connection_pool.with_connection do |conn|
|
94
|
+
stmt = @query.to_sql
|
95
|
+
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
96
|
+
conn.execute(batch_job_label + stmt)
|
97
|
+
end
|
98
|
+
create_flag_object
|
99
|
+
end
|
100
|
+
|
101
|
+
def batch_job_label
|
102
|
+
@batch_job_label ||= begin
|
103
|
+
components = Dir.getwd.split('/')
|
104
|
+
app = if components.last == 'current'
|
105
|
+
# is Capistrano environment
|
106
|
+
components[-2]
|
107
|
+
else
|
108
|
+
components[-1]
|
109
|
+
end
|
110
|
+
batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
|
111
|
+
path = batch_file ? batch_file.split(':').first : '?'
|
112
|
+
"/* Job: #{app}:#{path} */ "
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Importer
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift-connector/importer/upsert'
|
8
|
+
require 'redshift-connector/importer/insert_delta'
|
9
|
+
require 'redshift-connector/importer/rebuild_rename'
|
10
|
+
require 'redshift-connector/importer/rebuild_truncate'
|
11
|
+
|
12
|
+
require 'redshift-connector/s3_data_file_bundle'
|
13
|
+
require 'redshift-connector/logger'
|
14
|
+
|
15
|
+
module RedshiftConnector
|
16
|
+
module Importer
|
17
|
+
def Importer.transport_delta_from_s3(
|
18
|
+
bucket: nil, prefix:, format:, filter: nil,
|
19
|
+
table:, columns:,
|
20
|
+
delete_cond: nil, upsert_columns: nil,
|
21
|
+
logger: RedshiftConnector.logger, quiet: false)
|
22
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
23
|
+
logger = NullLogger.new if quiet
|
24
|
+
bundle = S3DataFileBundle.for_prefix(
|
25
|
+
bucket: bucket,
|
26
|
+
prefix: prefix,
|
27
|
+
format: format,
|
28
|
+
filter: filter,
|
29
|
+
logger: logger
|
30
|
+
)
|
31
|
+
if delete_cond and upsert_columns
|
32
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
33
|
+
end
|
34
|
+
importer =
|
35
|
+
if delete_cond
|
36
|
+
Importer::InsertDelta.new(
|
37
|
+
dao: table.classify.constantize,
|
38
|
+
bundle: bundle,
|
39
|
+
columns: columns,
|
40
|
+
delete_cond: delete_cond,
|
41
|
+
logger: logger
|
42
|
+
)
|
43
|
+
elsif upsert_columns
|
44
|
+
Importer::Upsert.new(
|
45
|
+
dao: table.classify.constantize,
|
46
|
+
bundle: bundle,
|
47
|
+
columns: columns,
|
48
|
+
upsert_columns: upsert_columns,
|
49
|
+
logger: logger
|
50
|
+
)
|
51
|
+
else
|
52
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
53
|
+
end
|
54
|
+
importer
|
55
|
+
end
|
56
|
+
|
57
|
+
def Importer.transport_all_from_s3(
|
58
|
+
strategy: 'rename',
|
59
|
+
bucket: nil, prefix:, format:, filter: nil,
|
60
|
+
table:, columns:,
|
61
|
+
logger: RedshiftConnector.logger, quiet: false)
|
62
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
63
|
+
logger = NullLogger.new if quiet
|
64
|
+
bundle = S3DataFileBundle.for_prefix(
|
65
|
+
bucket: bucket,
|
66
|
+
prefix: prefix,
|
67
|
+
format: format,
|
68
|
+
filter: filter,
|
69
|
+
logger: logger
|
70
|
+
)
|
71
|
+
importer = get_rebuild_class(strategy).new(
|
72
|
+
dao: table.classify.constantize,
|
73
|
+
bundle: bundle,
|
74
|
+
columns: columns,
|
75
|
+
logger: logger
|
76
|
+
)
|
77
|
+
importer
|
78
|
+
end
|
79
|
+
|
80
|
+
def Importer.get_rebuild_class(strategy)
|
81
|
+
case strategy.to_s
|
82
|
+
when 'rename' then RebuildRename
|
83
|
+
when 'truncate' then RebuildTruncate
|
84
|
+
else
|
85
|
+
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'redshift-connector/importer/activerecord-import'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::InsertDelta
|
6
|
+
def initialize(dao:, bundle:, columns:, delete_cond:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@bundle = bundle
|
9
|
+
@columns = columns
|
10
|
+
@delete_cond = delete_cond
|
11
|
+
@logger = logger
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
delete_rows(@delete_cond)
|
16
|
+
import
|
17
|
+
end
|
18
|
+
|
19
|
+
def delete_rows(cond_expr)
|
20
|
+
@logger.info "DELETE #{@dao.table_name} where (#{cond_expr})"
|
21
|
+
@dao.connection.execute("delete from #{@dao.table_name} where #{cond_expr}")
|
22
|
+
@logger.info "deleted."
|
23
|
+
end
|
24
|
+
|
25
|
+
def import
|
26
|
+
@logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
27
|
+
@bundle.each_batch do |rows|
|
28
|
+
@dao.import(@columns, rows)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'redshift-connector/importer/activerecord-import'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::RebuildRename
|
6
|
+
def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@bundle = bundle
|
9
|
+
@columns = columns
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute
|
14
|
+
dest_table = @dao.table_name
|
15
|
+
tmp_table = "#{dest_table}_new"
|
16
|
+
old_table = "#{dest_table}_old"
|
17
|
+
|
18
|
+
tmp_dao = @dao.dup
|
19
|
+
tmp_dao.table_name = tmp_table
|
20
|
+
|
21
|
+
exec_update "drop table if exists #{tmp_table}"
|
22
|
+
exec_update "create table #{tmp_table} like #{dest_table}"
|
23
|
+
import(tmp_dao)
|
24
|
+
exec_update "drop table if exists #{old_table}"
|
25
|
+
# Atomic table exchange
|
26
|
+
exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def exec_update(query)
|
30
|
+
@logger.info query
|
31
|
+
@dao.connection.execute(query)
|
32
|
+
end
|
33
|
+
|
34
|
+
def import(dao)
|
35
|
+
@logger.info "IMPORT #{@bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
|
36
|
+
@bundle.each_batch do |rows|
|
37
|
+
dao.import(@columns, rows)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|