redshift-connector 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +10 -0
- data/lib/redshift-connector.rb +31 -0
- data/lib/redshift-connector/connector.rb +146 -0
- data/lib/redshift-connector/exporter.rb +116 -0
- data/lib/redshift-connector/importer.rb +89 -0
- data/lib/redshift-connector/importer/activerecord-import.rb +2 -0
- data/lib/redshift-connector/importer/insert_delta.rb +32 -0
- data/lib/redshift-connector/importer/rebuild_rename.rb +41 -0
- data/lib/redshift-connector/importer/rebuild_truncate.rb +31 -0
- data/lib/redshift-connector/importer/upsert.rb +25 -0
- data/lib/redshift-connector/logger.rb +20 -0
- data/lib/redshift-connector/query.rb +93 -0
- data/lib/redshift-connector/reader.rb +18 -0
- data/lib/redshift-connector/reader/abstract.rb +18 -0
- data/lib/redshift-connector/reader/csv.rb +24 -0
- data/lib/redshift-connector/reader/exception.rb +3 -0
- data/lib/redshift-connector/reader/redshift_csv.rb +54 -0
- data/lib/redshift-connector/reader/tsv.rb +24 -0
- data/lib/redshift-connector/s3_bucket.rb +72 -0
- data/lib/redshift-connector/s3_data_file.rb +34 -0
- data/lib/redshift-connector/s3_data_file_bundle.rb +101 -0
- data/lib/redshift-connector/version.rb +3 -0
- data/test/all.rb +3 -0
- data/test/config.rb +13 -0
- data/test/config.rb.example +18 -0
- data/test/database.yml +15 -0
- data/test/database.yml.example +15 -0
- data/test/foreach.rb +5 -0
- data/test/helper.rb +25 -0
- data/test/item_pvs.ct.mysql +11 -0
- data/test/item_pvs.ct.redshift +9 -0
- data/test/reader/test_redshift_csv.rb +30 -0
- data/test/test_connector.rb +148 -0
- data/test/test_reader.rb +10 -0
- data/test/test_s3_import.rb +32 -0
- metadata +190 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c5d5bf307943f19b5187a9244ecd89717241d72f
|
4
|
+
data.tar.gz: 1651cd723205ee50247c5e407f39aaa1eafd287d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ff3ef2d4b7e647617529313df5b1d24eb9bf537672ba4aeba3de6542d2fa60589576854b308bc7db5b34c853e1cf30a1a517f6cfd1c58b876018dd6c6fcab7c8
|
7
|
+
data.tar.gz: 51de6b97fd7099a2dfaf401b1f230e49f991c623cd43949bbf9e8a4e5f822180040b024638938613aaa89fdbaced93f5c9e27bf6e5e57c8e3d6c24a486f8c06b
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
end
|
3
|
+
|
4
|
+
require 'redshift-connector/connector'
|
5
|
+
require 'redshift-connector/exporter'
|
6
|
+
require 'redshift-connector/importer'
|
7
|
+
require 'redshift-connector/s3_bucket'
|
8
|
+
require 'redshift-connector/s3_data_file_bundle'
|
9
|
+
require 'redshift-connector/version'
|
10
|
+
|
11
|
+
module RedshiftConnector
|
12
|
+
def RedshiftConnector.transport_delta(**params)
|
13
|
+
Connector.transport_delta(**params)
|
14
|
+
end
|
15
|
+
|
16
|
+
def RedshiftConnector.transport_all(**params)
|
17
|
+
Connector.transport_all(**params)
|
18
|
+
end
|
19
|
+
|
20
|
+
def RedshiftConnector.transport_delta_from_s3(**params)
|
21
|
+
Importer.transport_delta_from_s3(**params)
|
22
|
+
end
|
23
|
+
|
24
|
+
def RedshiftConnector.transport_all_from_s3(**params)
|
25
|
+
Importer.transport_all_from_s3(**params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def RedshiftConnector.foreach(**params, &block)
|
29
|
+
Exporter.foreach(**params, &block)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'redshift-connector/exporter'
|
2
|
+
require 'redshift-connector/importer'
|
3
|
+
require 'redshift-connector/s3_data_file_bundle'
|
4
|
+
require 'redshift-connector/logger'
|
5
|
+
|
6
|
+
module RedshiftConnector
|
7
|
+
class Connector
|
8
|
+
def Connector.transport_delta(
|
9
|
+
schema:,
|
10
|
+
table: nil,
|
11
|
+
src_table: table,
|
12
|
+
dest_table: table,
|
13
|
+
condition:,
|
14
|
+
columns:,
|
15
|
+
delete_cond: nil,
|
16
|
+
upsert_columns: nil,
|
17
|
+
bucket: nil,
|
18
|
+
txn_id:, filter:,
|
19
|
+
logger: RedshiftConnector.logger,
|
20
|
+
quiet: false
|
21
|
+
)
|
22
|
+
unless src_table and dest_table
|
23
|
+
raise ArgumentError, "missing :table, :src_table or :dest_table"
|
24
|
+
end
|
25
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
26
|
+
logger = NullLogger.new if quiet
|
27
|
+
bundle = S3DataFileBundle.for_table(
|
28
|
+
bucket: bucket,
|
29
|
+
schema: schema,
|
30
|
+
table: src_table,
|
31
|
+
txn_id: txn_id,
|
32
|
+
filter: filter,
|
33
|
+
logger: logger
|
34
|
+
)
|
35
|
+
exporter = Exporter.for_table_delta(
|
36
|
+
bundle: bundle,
|
37
|
+
schema: schema,
|
38
|
+
table: src_table,
|
39
|
+
columns: columns,
|
40
|
+
condition: condition,
|
41
|
+
logger: logger
|
42
|
+
)
|
43
|
+
if delete_cond and upsert_columns
|
44
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
45
|
+
end
|
46
|
+
dao = dest_table.classify.constantize
|
47
|
+
importer =
|
48
|
+
if delete_cond
|
49
|
+
Importer::InsertDelta.new(
|
50
|
+
dao: dao,
|
51
|
+
bundle: bundle,
|
52
|
+
columns: columns,
|
53
|
+
delete_cond: delete_cond,
|
54
|
+
logger: logger
|
55
|
+
)
|
56
|
+
elsif upsert_columns
|
57
|
+
Importer::Upsert.new(
|
58
|
+
dao: dao,
|
59
|
+
bundle: bundle,
|
60
|
+
columns: columns,
|
61
|
+
upsert_columns: upsert_columns,
|
62
|
+
logger: logger
|
63
|
+
)
|
64
|
+
else
|
65
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
66
|
+
end
|
67
|
+
new(exporter: exporter, importer: importer, logger: logger)
|
68
|
+
end
|
69
|
+
|
70
|
+
def Connector.transport_all(
|
71
|
+
strategy: 'rename',
|
72
|
+
schema:,
|
73
|
+
table:,
|
74
|
+
src_table: table,
|
75
|
+
dest_table: table,
|
76
|
+
columns:,
|
77
|
+
bucket: nil,
|
78
|
+
txn_id:,
|
79
|
+
filter:,
|
80
|
+
logger: RedshiftConnector.logger,
|
81
|
+
quiet: false
|
82
|
+
)
|
83
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
84
|
+
logger = NullLogger.new if quiet
|
85
|
+
bundle = S3DataFileBundle.for_table(
|
86
|
+
bucket: bucket,
|
87
|
+
schema: schema,
|
88
|
+
table: table,
|
89
|
+
txn_id: txn_id,
|
90
|
+
filter: filter,
|
91
|
+
logger: logger
|
92
|
+
)
|
93
|
+
exporter = Exporter.for_table(
|
94
|
+
bundle: bundle,
|
95
|
+
schema: schema,
|
96
|
+
table: table,
|
97
|
+
columns: columns,
|
98
|
+
logger: logger
|
99
|
+
)
|
100
|
+
importer = Importer.get_rebuild_class(strategy).new(
|
101
|
+
dao: table.classify.constantize,
|
102
|
+
bundle: bundle,
|
103
|
+
columns: columns,
|
104
|
+
logger: logger
|
105
|
+
)
|
106
|
+
new(exporter: exporter, importer: importer, logger: logger)
|
107
|
+
end
|
108
|
+
|
109
|
+
def initialize(exporter:, importer:, logger:)
|
110
|
+
@exporter = exporter
|
111
|
+
@importer = importer
|
112
|
+
@logger = logger
|
113
|
+
end
|
114
|
+
|
115
|
+
def export_enabled?
|
116
|
+
not ENV['IMPORT_ONLY']
|
117
|
+
end
|
118
|
+
|
119
|
+
def export_forced?
|
120
|
+
!! (ENV['EXPORT_ONLY'] or ENV['FORCE'])
|
121
|
+
end
|
122
|
+
|
123
|
+
def import_enabled?
|
124
|
+
not ENV['EXPORT_ONLY']
|
125
|
+
end
|
126
|
+
|
127
|
+
def execute
|
128
|
+
export(forced: export_forced?) if export_enabled?
|
129
|
+
import if import_enabled?
|
130
|
+
end
|
131
|
+
|
132
|
+
def export(forced: false)
|
133
|
+
@logger.info "==== export task =================================================="
|
134
|
+
if not forced and @exporter.completed?
|
135
|
+
@logger.info "export task is already executed; skip"
|
136
|
+
else
|
137
|
+
@exporter.execute
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def import
|
142
|
+
@logger.info "==== import task =================================================="
|
143
|
+
@importer.execute
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'redshift-connector/query'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Exporter
|
6
|
+
def Exporter.default_data_source=(ds)
|
7
|
+
@default_data_source = ds
|
8
|
+
end
|
9
|
+
|
10
|
+
def Exporter.default_data_source
|
11
|
+
@default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
|
12
|
+
end
|
13
|
+
|
14
|
+
def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
|
15
|
+
delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
|
16
|
+
unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
|
17
|
+
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
18
|
+
end
|
19
|
+
|
20
|
+
def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
|
21
|
+
query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
|
22
|
+
unload_query = UnloadQuery.new(query: query, bundle: bundle)
|
23
|
+
new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
|
24
|
+
end
|
25
|
+
|
26
|
+
def Exporter.foreach(**params, &block)
|
27
|
+
exporter = Exporter.for_query(**params)
|
28
|
+
begin
|
29
|
+
exporter.execute
|
30
|
+
exporter.bundle.each_row(&block)
|
31
|
+
ensure
|
32
|
+
exporter.bundle.clear
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def Exporter.for_query(
|
37
|
+
ds: default_data_source,
|
38
|
+
schema:,
|
39
|
+
table:,
|
40
|
+
bucket: nil,
|
41
|
+
query:,
|
42
|
+
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
43
|
+
filter: nil,
|
44
|
+
logger: RedshiftConnector.logger,
|
45
|
+
quiet: false
|
46
|
+
)
|
47
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
48
|
+
logger = NullLogger.new if quiet
|
49
|
+
bundle = S3DataFileBundle.for_table(
|
50
|
+
bucket: bucket,
|
51
|
+
schema: schema,
|
52
|
+
table: table,
|
53
|
+
txn_id: txn_id,
|
54
|
+
filter: filter,
|
55
|
+
logger: logger
|
56
|
+
)
|
57
|
+
exporter = Exporter.new(
|
58
|
+
ds: ds,
|
59
|
+
query: UnloadQuery.wrap(query: query, bundle: bundle),
|
60
|
+
bundle: bundle,
|
61
|
+
logger: logger
|
62
|
+
)
|
63
|
+
exporter
|
64
|
+
end
|
65
|
+
|
66
|
+
def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
|
67
|
+
@ds = ds
|
68
|
+
@query = query
|
69
|
+
@bundle = bundle
|
70
|
+
@logger = logger
|
71
|
+
end
|
72
|
+
|
73
|
+
attr_reader :query
|
74
|
+
attr_reader :bundle
|
75
|
+
attr_reader :logger
|
76
|
+
|
77
|
+
def completed?
|
78
|
+
@bundle.bucket.object(flag_object_key).exists?
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_flag_object
|
82
|
+
@logger.info "TOUCH #{flag_object_key}"
|
83
|
+
@bundle.bucket.object(flag_object_key).put(body: "OK")
|
84
|
+
end
|
85
|
+
|
86
|
+
def flag_object_key
|
87
|
+
"#{File.dirname(@bundle.prefix)}/00completed"
|
88
|
+
end
|
89
|
+
|
90
|
+
def execute
|
91
|
+
@bundle.clear
|
92
|
+
@logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
|
93
|
+
@ds.connection_pool.with_connection do |conn|
|
94
|
+
stmt = @query.to_sql
|
95
|
+
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
96
|
+
conn.execute(batch_job_label + stmt)
|
97
|
+
end
|
98
|
+
create_flag_object
|
99
|
+
end
|
100
|
+
|
101
|
+
def batch_job_label
|
102
|
+
@batch_job_label ||= begin
|
103
|
+
components = Dir.getwd.split('/')
|
104
|
+
app = if components.last == 'current'
|
105
|
+
# is Capistrano environment
|
106
|
+
components[-2]
|
107
|
+
else
|
108
|
+
components[-1]
|
109
|
+
end
|
110
|
+
batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
|
111
|
+
path = batch_file ? batch_file.split(':').first : '?'
|
112
|
+
"/* Job: #{app}:#{path} */ "
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Importer
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift-connector/importer/upsert'
|
8
|
+
require 'redshift-connector/importer/insert_delta'
|
9
|
+
require 'redshift-connector/importer/rebuild_rename'
|
10
|
+
require 'redshift-connector/importer/rebuild_truncate'
|
11
|
+
|
12
|
+
require 'redshift-connector/s3_data_file_bundle'
|
13
|
+
require 'redshift-connector/logger'
|
14
|
+
|
15
|
+
module RedshiftConnector
|
16
|
+
module Importer
|
17
|
+
def Importer.transport_delta_from_s3(
|
18
|
+
bucket: nil, prefix:, format:, filter: nil,
|
19
|
+
table:, columns:,
|
20
|
+
delete_cond: nil, upsert_columns: nil,
|
21
|
+
logger: RedshiftConnector.logger, quiet: false)
|
22
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
23
|
+
logger = NullLogger.new if quiet
|
24
|
+
bundle = S3DataFileBundle.for_prefix(
|
25
|
+
bucket: bucket,
|
26
|
+
prefix: prefix,
|
27
|
+
format: format,
|
28
|
+
filter: filter,
|
29
|
+
logger: logger
|
30
|
+
)
|
31
|
+
if delete_cond and upsert_columns
|
32
|
+
raise ArgumentError, "delete_cond and upsert_columns are exclusive"
|
33
|
+
end
|
34
|
+
importer =
|
35
|
+
if delete_cond
|
36
|
+
Importer::InsertDelta.new(
|
37
|
+
dao: table.classify.constantize,
|
38
|
+
bundle: bundle,
|
39
|
+
columns: columns,
|
40
|
+
delete_cond: delete_cond,
|
41
|
+
logger: logger
|
42
|
+
)
|
43
|
+
elsif upsert_columns
|
44
|
+
Importer::Upsert.new(
|
45
|
+
dao: table.classify.constantize,
|
46
|
+
bundle: bundle,
|
47
|
+
columns: columns,
|
48
|
+
upsert_columns: upsert_columns,
|
49
|
+
logger: logger
|
50
|
+
)
|
51
|
+
else
|
52
|
+
raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
|
53
|
+
end
|
54
|
+
importer
|
55
|
+
end
|
56
|
+
|
57
|
+
def Importer.transport_all_from_s3(
|
58
|
+
strategy: 'rename',
|
59
|
+
bucket: nil, prefix:, format:, filter: nil,
|
60
|
+
table:, columns:,
|
61
|
+
logger: RedshiftConnector.logger, quiet: false)
|
62
|
+
bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
|
63
|
+
logger = NullLogger.new if quiet
|
64
|
+
bundle = S3DataFileBundle.for_prefix(
|
65
|
+
bucket: bucket,
|
66
|
+
prefix: prefix,
|
67
|
+
format: format,
|
68
|
+
filter: filter,
|
69
|
+
logger: logger
|
70
|
+
)
|
71
|
+
importer = get_rebuild_class(strategy).new(
|
72
|
+
dao: table.classify.constantize,
|
73
|
+
bundle: bundle,
|
74
|
+
columns: columns,
|
75
|
+
logger: logger
|
76
|
+
)
|
77
|
+
importer
|
78
|
+
end
|
79
|
+
|
80
|
+
def Importer.get_rebuild_class(strategy)
|
81
|
+
case strategy.to_s
|
82
|
+
when 'rename' then RebuildRename
|
83
|
+
when 'truncate' then RebuildTruncate
|
84
|
+
else
|
85
|
+
raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'redshift-connector/importer/activerecord-import'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::InsertDelta
|
6
|
+
def initialize(dao:, bundle:, columns:, delete_cond:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@bundle = bundle
|
9
|
+
@columns = columns
|
10
|
+
@delete_cond = delete_cond
|
11
|
+
@logger = logger
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
delete_rows(@delete_cond)
|
16
|
+
import
|
17
|
+
end
|
18
|
+
|
19
|
+
def delete_rows(cond_expr)
|
20
|
+
@logger.info "DELETE #{@dao.table_name} where (#{cond_expr})"
|
21
|
+
@dao.connection.execute("delete from #{@dao.table_name} where #{cond_expr}")
|
22
|
+
@logger.info "deleted."
|
23
|
+
end
|
24
|
+
|
25
|
+
def import
|
26
|
+
@logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
|
27
|
+
@bundle.each_batch do |rows|
|
28
|
+
@dao.import(@columns, rows)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'redshift-connector/importer/activerecord-import'
|
2
|
+
require 'redshift-connector/logger'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
class Importer::RebuildRename
|
6
|
+
def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
|
7
|
+
@dao = dao
|
8
|
+
@bundle = bundle
|
9
|
+
@columns = columns
|
10
|
+
@logger = logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute
|
14
|
+
dest_table = @dao.table_name
|
15
|
+
tmp_table = "#{dest_table}_new"
|
16
|
+
old_table = "#{dest_table}_old"
|
17
|
+
|
18
|
+
tmp_dao = @dao.dup
|
19
|
+
tmp_dao.table_name = tmp_table
|
20
|
+
|
21
|
+
exec_update "drop table if exists #{tmp_table}"
|
22
|
+
exec_update "create table #{tmp_table} like #{dest_table}"
|
23
|
+
import(tmp_dao)
|
24
|
+
exec_update "drop table if exists #{old_table}"
|
25
|
+
# Atomic table exchange
|
26
|
+
exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def exec_update(query)
|
30
|
+
@logger.info query
|
31
|
+
@dao.connection.execute(query)
|
32
|
+
end
|
33
|
+
|
34
|
+
def import(dao)
|
35
|
+
@logger.info "IMPORT #{@bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
|
36
|
+
@bundle.each_batch do |rows|
|
37
|
+
dao.import(@columns, rows)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|