redshift_connector 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +20 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE +21 -0
  5. data/README.md +42 -0
  6. data/RELEASE.md +89 -0
  7. data/Rakefile +3 -0
  8. data/lib/redshift_connector.rb +35 -0
  9. data/lib/redshift_connector/active_record_data_source.rb +23 -0
  10. data/lib/redshift_connector/active_record_exporter.rb +47 -0
  11. data/lib/redshift_connector/connector.rb +189 -0
  12. data/lib/redshift_connector/data_file.rb +32 -0
  13. data/lib/redshift_connector/data_file_bundle_params.rb +25 -0
  14. data/lib/redshift_connector/data_file_bundle_reader.rb +72 -0
  15. data/lib/redshift_connector/exception.rb +5 -0
  16. data/lib/redshift_connector/exporter.rb +40 -0
  17. data/lib/redshift_connector/exporter_builder.rb +49 -0
  18. data/lib/redshift_connector/immediate_exporter.rb +19 -0
  19. data/lib/redshift_connector/importer.rb +58 -0
  20. data/lib/redshift_connector/importer/activerecord-import.rb +2 -0
  21. data/lib/redshift_connector/importer/insert_delta.rb +31 -0
  22. data/lib/redshift_connector/importer/rebuild_rename.rb +58 -0
  23. data/lib/redshift_connector/importer/rebuild_truncate.rb +30 -0
  24. data/lib/redshift_connector/importer/upsert.rb +24 -0
  25. data/lib/redshift_connector/logger.rb +20 -0
  26. data/lib/redshift_connector/query.rb +95 -0
  27. data/lib/redshift_connector/reader.rb +18 -0
  28. data/lib/redshift_connector/reader/abstract.rb +18 -0
  29. data/lib/redshift_connector/reader/csv.rb +24 -0
  30. data/lib/redshift_connector/reader/exception.rb +3 -0
  31. data/lib/redshift_connector/reader/redshift_csv.rb +25 -0
  32. data/lib/redshift_connector/reader/tsv.rb +24 -0
  33. data/lib/redshift_connector/s3_bucket.rb +76 -0
  34. data/lib/redshift_connector/s3_data_file.rb +20 -0
  35. data/lib/redshift_connector/s3_data_file_bundle.rb +68 -0
  36. data/lib/redshift_connector/version.rb +3 -0
  37. data/redshift_connector.gemspec +27 -0
  38. metadata +190 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6dfd6aa65bfc44fca5d3e0dc7e042911344be4213764e267136f7259b3529a90
4
+ data.tar.gz: f12016b3044c1199e0b0e3bba4c8e8826e6a72b80d40088d849110dd4624607f
5
+ SHA512:
6
+ metadata.gz: 68e8a3265255b168cbb40b1b200902922fb9641035533cf561ab69feee1d825d15dd999f3d0fb9dfbfc0b00349be662d713c1ca5ac3da418d49c44ad03771e20
7
+ data.tar.gz: 59cf47cc0aaec30d8025497dc11347904d50f9be8a5e68086d9c2952f773619b27be3c8ed5edf8a9f6708afe370ba1dfac7cb48c1602c7cbbb23d33e0457b054
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ # Packaging
2
+ *.gem
3
+ /spec/reports/
4
+ /spec/examples.txt
5
+
6
+ # Documents
7
+ /.yardoc/
8
+ /_yardoc/
9
+ /doc/
10
+ /rdoc/
11
+
12
+ # Bundler
13
+ /.bundle/
14
+ /vendor/bundle
15
+ /lib/bundler/man/
16
+ Gemfile.lock
17
+
18
+ # Gem Specific
19
+ test/database.yml
20
+ test/config.rb
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+ gemspec
3
+
4
+ # We need explicit version specification (4 or 5) here,
5
+ # to resolve version dependencies correctly.
6
+ gem 'activerecord', '~> 5.0'
7
+ gem 'mysql2'
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2016,2017 Minero Aoki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # Redshift Connector for Rails
2
+
3
+ redshift_connector is a Redshift bulk data connector for Rails (ActiveRecord).
4
+
5
+ This library is formerly called as "redshift-connector",
6
+ but renamed to "redshift_connector" to follow the gem naming standard.
7
+
8
+ ## Settings
9
+
10
+ Add following block to your Gemfile and bundle.
11
+ ```
12
+ gem 'redshift_connector'
13
+ ```
14
+ Add config/initializers/redshift_connector.rb like following:
15
+ ```
16
+ module RedshiftConnector
17
+ Exporter.default_data_source = Any_ActiveRecord_Class_Bound_To_Redshift
18
+
19
+ S3Bucket.add('primary', default: true,
20
+ region: 'YOUR_AWS_REGION_NAME',
21
+ bucket: 'YOUR_BUCKET_NAME',
22
+ prefix: 'YOUR_PREFIX',
23
+ iam_role: 'arn:aws:iam::XXXXXXXXXXXX:role/RedshiftReadOnly'
24
+ # For explicit S3 access, use following:
25
+ # aws_access_key_id: 'XXXXXXXXXXXXX',
26
+ # aws_secret_access_key: 'XXXXXXXXXXXXX'
27
+ )
28
+ end
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Fetching rows
34
+
35
+ ```
36
+ RedshiftConnector.foreach(schema: 'app_mst', table: 'shops', query: 'select id, name from app_mst.shops') do |id, name|
37
+ p [id, name]
38
+ end
39
+ ```
40
+ `schema` and `table` is the source table name (written in the query).
41
+ This method executes Redshift UNLOAD statement with given query and
42
+ unload result to the intermediate S3, then read contents.
data/RELEASE.md ADDED
@@ -0,0 +1,89 @@
1
+ # Release Note
2
+
3
+ ## version 8.0.0
4
+
5
+ - [INCOMPATIBLE] This library is renamed to "redshift_connector". Just modify your Gemfile from "redshift-connector" to "redshift_connector".
6
+ - [INCOMPATIBLE] redshift-connector-data_file gem is merged.
7
+ - [INCOMPATIBLE] (internal) *DataFileBundle#each, #each_row, #each_object, #each_batch, #all_data_objects are removed. Use DataFileBundleReader class instead.
8
+ - [INCOMPATIBLE] (internal) AbstractDataFileBundle class is removed.
9
+ - [INCOMPATIBLE] (internal) AbstractDataFile class is removed.
10
+
11
+ ## version 7.2.2
12
+
13
+ - [fix] RedshiftConnector.transport_all: src_table/dest_table parameter did not work.
14
+ - [fix] RedshiftConnector.transport_all (strategy=rename): newer activerecord-import requires class name.
15
+
16
+ ## version 7.2.1
17
+
18
+ - no change.
19
+
20
+ ## version 7.2.0
21
+
22
+ - Removes aws-sdk dependency
23
+
24
+ ## version 7.0.2
25
+
26
+ - [fix] RedshiftConnector.foreach did not work
27
+
28
+ ## version 7.0.1
29
+
30
+ - [fix] RedshiftConnector.transport_delta_from_s3, .transport_all_from_s3 were wrongly dropped, restore them.
31
+
32
+ ## version 7.0.0
33
+
34
+ - [INCOMPATIBLE] Library hierarchy changed: redshift-connector/* -> redshift_connector/*. redshift-connector.rb still exists as an entry point for bundler.
35
+ - [new] Exporter becomes pluggable. You can implement your own exporter data source instead of ActiveRecord.
36
+
37
+ ## version 6.0.0
38
+
39
+ - version number change only.
40
+
41
+ ## version 5.6.0
42
+
43
+ - Unifies version 4.x (supports Rails 4) and 5.x (supports Rails 5).
44
+
45
+ ## version 4.5.0 / 5.5.0
46
+
47
+ - [new] Separates S3 access layer to another gem: redshift-connector-data_file
48
+
49
+ ## version 4.4.1 / 5.4.1
50
+
51
+ - [new] New option enable_sort for Connector.foreach, to enforce global sorting.
52
+
53
+ ## version 4.4.0 / 5.4.0
54
+
55
+ - [CHANGE] Drops export-only-once feature (and FORCE environment switch), it is not so useful.
56
+ Exporter now always exports data.
57
+
58
+ ## version 4.3.2 / 5.3.2
59
+
60
+ - [new] Allows reading from S3 signed URL (for separated export/import processes)
61
+
62
+ ## version 4.3.1 / 5.3.1
63
+
64
+ - First release for Rails 5 series.
65
+ - [fix] Add option for AWS multi-regions support
66
+
67
+ ## version 4.3.0
68
+
69
+ - [new] New method RedshiftConnector.foreach to read rows with UNLOAD
70
+
71
+ ## version 4.2.0
72
+
73
+ - [new] New methods RedshiftConnector.transport_delta_from_s3, .transport_all_from_s3 to read from S3
74
+
75
+ ## version 4.1.0
76
+
77
+ - [new] Introduces rebuild operator. New facade method Connector.transport_all.
78
+
79
+ ## version 4.0.2
80
+
81
+ - [fix] Correctly parses UNLOAD-generated CSV (dangerous characeters are escaped by backslash).
82
+
83
+ ## version 4.0.1
84
+
85
+ - [new] Allow configure the default logger by RedshiftConnector.logger=.
86
+
87
+ ## version 4.0.0
88
+
89
+ First release for Rails 4 series.
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ task :test do
2
+ load "#{__dir__}/test/all.rb"
3
+ end
@@ -0,0 +1,35 @@
1
+ module RedshiftConnector
2
+ end
3
+
4
+ require 'redshift_connector/connector'
5
+ require 'redshift_connector/exporter'
6
+ require 'redshift_connector/active_record_data_source'
7
+ require 'redshift_connector/active_record_exporter'
8
+ require 'redshift_connector/immediate_exporter'
9
+ require 'redshift_connector/importer'
10
+ require 'redshift_connector/s3_bucket'
11
+ require 'redshift_connector/s3_data_file_bundle'
12
+ require 'redshift_connector/exception'
13
+ require 'redshift_connector/version'
14
+
15
+ module RedshiftConnector
16
+ def RedshiftConnector.transport_delta(**params)
17
+ Connector.transport_delta(**params)
18
+ end
19
+
20
+ def RedshiftConnector.transport_all(**params)
21
+ Connector.transport_all(**params)
22
+ end
23
+
24
+ def RedshiftConnector.transport_delta_from_s3(**params)
25
+ Connector.transport_delta_from_s3(**params)
26
+ end
27
+
28
+ def RedshiftConnector.transport_all_from_s3(**params)
29
+ Connector.transport_all_from_s3(**params)
30
+ end
31
+
32
+ def RedshiftConnector.foreach(**params, &block)
33
+ Exporter.foreach(**params, &block)
34
+ end
35
+ end
@@ -0,0 +1,23 @@
1
+ require 'redshift_connector/exporter_builder'
2
+
3
+ module RedshiftConnector
4
+ class ActiveRecordDataSource
5
+ def ActiveRecordDataSource.for_dao(dao)
6
+ new(dao)
7
+ end
8
+
9
+ def initialize(dao)
10
+ @dao = dao
11
+ end
12
+
13
+ def exporter_builder
14
+ ExporterBuilder.new(ds: self, exporter_class: ActiveRecordExporter)
15
+ end
16
+
17
+ def execute_query(query_str)
18
+ @dao.connection_pool.with_connection {|conn|
19
+ conn.execute(query_str)
20
+ }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,47 @@
1
+ require 'redshift_connector/s3_data_file_bundle'
2
+ require 'redshift_connector/query'
3
+ require 'redshift_connector/logger'
4
+
5
+ module RedshiftConnector
6
+ class ActiveRecordExporter
7
+ def initialize(ds:, query:, bundle_params:, enable_sort: false, logger: RedshiftConnector.logger)
8
+ @ds = ds
9
+ @query = query
10
+ @bundle_params = bundle_params
11
+ @enable_sort = enable_sort
12
+ @logger = logger
13
+
14
+ @bundle = S3DataFileBundle.for_params(bundle_params)
15
+ end
16
+
17
+ attr_reader :query
18
+ attr_reader :bundle_params
19
+ attr_reader :bundle
20
+ attr_reader :logger
21
+
22
+ def execute
23
+ @bundle.clear
24
+ unload_query = UnloadQuery.new(query: @query, bundle: @bundle, enable_sort: @enable_sort)
25
+ @logger.info "EXPORT #{unload_query.description} -> #{@bundle.url}*"
26
+ stmt = unload_query.to_sql
27
+ @logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
28
+ @ds.execute_query(batch_job_label + stmt)
29
+ @bundle
30
+ end
31
+
32
+ def batch_job_label
33
+ @batch_job_label ||= begin
34
+ components = Dir.getwd.split('/')
35
+ app = if components.last == 'current'
36
+ # is Capistrano environment
37
+ components[-2]
38
+ else
39
+ components[-1]
40
+ end
41
+ batch_file = caller.detect {|c| /redshift_connector|active_record/ !~ c }
42
+ path = batch_file ? batch_file.split(':').first : '?'
43
+ "/* Job: #{app}:#{path} */ "
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,189 @@
1
+ require 'redshift_connector/exporter'
2
+ require 'redshift_connector/immediate_exporter'
3
+ require 'redshift_connector/importer'
4
+ require 'redshift_connector/s3_data_file_bundle'
5
+ require 'redshift_connector/data_file_bundle_params'
6
+ require 'redshift_connector/data_file_bundle_reader'
7
+ require 'redshift_connector/logger'
8
+
9
+ module RedshiftConnector
10
+ class Connector
11
+ def Connector.transport_delta_from_s3(
12
+ bucket: nil,
13
+ prefix:,
14
+ format:,
15
+ filter: nil,
16
+ table:,
17
+ columns:,
18
+ delete_cond: nil,
19
+ upsert_columns: nil,
20
+ logger: RedshiftConnector.logger,
21
+ quiet: false
22
+ )
23
+ logger = NullLogger.new if quiet
24
+ bundle = S3DataFileBundle.for_prefix(
25
+ bucket: (bucket ? S3Bucket.get(bucket) : S3Bucket.default),
26
+ prefix: prefix,
27
+ format: format,
28
+ logger: logger
29
+ )
30
+ exporter = ImmediateExporter.new(bundle: bundle, logger: logger)
31
+ importer = Importer.for_delta_upsert(
32
+ table: table,
33
+ columns: columns,
34
+ delete_cond: delete_cond,
35
+ upsert_columns: upsert_columns,
36
+ logger: logger
37
+ )
38
+ new(exporter: exporter, importer: importer, filter: filter, logger: logger)
39
+ end
40
+
41
+ def Connector.transport_delta(
42
+ schema:,
43
+ table: nil,
44
+ src_table: table,
45
+ dest_table: table,
46
+ condition:,
47
+ columns:,
48
+ delete_cond: nil,
49
+ upsert_columns: nil,
50
+ bucket: nil,
51
+ txn_id: nil,
52
+ filter:,
53
+ logger: RedshiftConnector.logger,
54
+ quiet: false
55
+ )
56
+ unless src_table and dest_table
57
+ raise ArgumentError, "missing :table, :src_table or :dest_table"
58
+ end
59
+ logger = NullLogger.new if quiet
60
+ bundle_params = DataFileBundleParams.new(
61
+ bucket: bucket,
62
+ schema: schema,
63
+ table: src_table,
64
+ txn_id: txn_id,
65
+ logger: logger
66
+ )
67
+ exporter = Exporter.for_table_delta(
68
+ bundle_params: bundle_params,
69
+ schema: schema,
70
+ table: src_table,
71
+ columns: columns,
72
+ condition: condition,
73
+ logger: logger
74
+ )
75
+ importer = Importer.for_delta_upsert(
76
+ table: dest_table,
77
+ columns: columns,
78
+ delete_cond: delete_cond,
79
+ upsert_columns: upsert_columns,
80
+ logger: logger
81
+ )
82
+ new(exporter: exporter, importer: importer, filter: filter, logger: logger)
83
+ end
84
+
85
+ def Connector.transport_all_from_s3(
86
+ strategy: 'rename',
87
+ table:,
88
+ columns:,
89
+ bucket: nil,
90
+ prefix:,
91
+ format:,
92
+ filter: nil,
93
+ logger: RedshiftConnector.logger,
94
+ quiet: false
95
+ )
96
+ logger = NullLogger.new if quiet
97
+ bundle = S3DataFileBundle.for_prefix(
98
+ bucket: (bucket ? S3Bucket.get(bucket) : S3Bucket.default),
99
+ prefix: prefix,
100
+ format: format,
101
+ logger: logger
102
+ )
103
+ exporter = ImmediateExporter.new(bundle: bundle, logger: logger)
104
+ importer = Importer.for_rebuild(
105
+ strategy: strategy,
106
+ table: table,
107
+ columns: columns,
108
+ logger: logger
109
+ )
110
+ new(exporter: exporter, importer: importer, filter: filter, logger: logger)
111
+ end
112
+
113
+ def Connector.transport_all(
114
+ strategy: 'rename',
115
+ schema:,
116
+ table: nil,
117
+ src_table: table,
118
+ dest_table: table,
119
+ columns:,
120
+ bucket: nil,
121
+ txn_id: nil,
122
+ filter:,
123
+ logger: RedshiftConnector.logger,
124
+ quiet: false
125
+ )
126
+ logger = NullLogger.new if quiet
127
+ bundle_params = DataFileBundleParams.new(
128
+ bucket: bucket,
129
+ schema: schema,
130
+ table: src_table,
131
+ txn_id: txn_id,
132
+ logger: logger
133
+ )
134
+ exporter = Exporter.for_table(
135
+ bundle_params: bundle_params,
136
+ schema: schema,
137
+ table: src_table,
138
+ columns: columns,
139
+ logger: logger
140
+ )
141
+ importer = Importer.for_rebuild(
142
+ strategy: strategy,
143
+ table: dest_table,
144
+ columns: columns,
145
+ logger: logger
146
+ )
147
+ new(exporter: exporter, importer: importer, filter: filter, logger: logger)
148
+ end
149
+
150
+ def initialize(exporter:, importer:, filter: nil, logger:)
151
+ @exporter = exporter
152
+ @importer = importer
153
+ @filter = filter
154
+ @logger = logger
155
+ @bundle = nil
156
+ end
157
+
158
+ def export_enabled?
159
+ not ENV['IMPORT_ONLY']
160
+ end
161
+
162
+ def import_enabled?
163
+ not ENV['EXPORT_ONLY']
164
+ end
165
+
166
+ def execute
167
+ export if export_enabled?
168
+ import if import_enabled?
169
+ end
170
+
171
+ def export
172
+ @logger.info "==== export task =================================================="
173
+ @bundle = @exporter.execute
174
+ end
175
+
176
+ DEFAULT_BATCH_SIZE = 1000
177
+
178
+ def import
179
+ @logger.info "==== import task =================================================="
180
+ r = DataFileBundleReader.new(
181
+ @bundle,
182
+ filter: @filter,
183
+ batch_size: DEFAULT_BATCH_SIZE,
184
+ logger: @logger
185
+ )
186
+ @importer.execute(r)
187
+ end
188
+ end
189
+ end