redshift-connector 6.0.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/lib/redshift-connector.rb +1 -31
  3. data/lib/redshift_connector.rb +34 -0
  4. data/lib/redshift_connector/active_record_data_source.rb +23 -0
  5. data/lib/redshift_connector/active_record_exporter.rb +47 -0
  6. data/lib/{redshift-connector → redshift_connector}/connector.rb +26 -25
  7. data/lib/redshift_connector/data_file_bundle_params.rb +28 -0
  8. data/lib/redshift_connector/exception.rb +5 -0
  9. data/lib/redshift_connector/exporter.rb +39 -0
  10. data/lib/redshift_connector/exporter_builder.rb +51 -0
  11. data/lib/redshift_connector/importer.rb +58 -0
  12. data/lib/{redshift-connector → redshift_connector}/importer/activerecord-import.rb +0 -0
  13. data/lib/{redshift-connector → redshift_connector}/importer/insert_delta.rb +8 -9
  14. data/lib/{redshift-connector → redshift_connector}/importer/rebuild_rename.rb +8 -9
  15. data/lib/{redshift-connector → redshift_connector}/importer/rebuild_truncate.rb +8 -9
  16. data/lib/redshift_connector/importer/upsert.rb +24 -0
  17. data/lib/{redshift-connector → redshift_connector}/query.rb +0 -0
  18. data/lib/{redshift-connector → redshift_connector}/version.rb +1 -1
  19. data/test/config.rb +7 -12
  20. data/test/database.yml +3 -3
  21. data/test/foreach.rb +5 -0
  22. metadata +32 -30
  23. data/lib/redshift-connector/exporter.rb +0 -103
  24. data/lib/redshift-connector/importer.rb +0 -117
  25. data/lib/redshift-connector/importer/upsert.rb +0 -25
  26. data/lib/redshift-connector/logger.rb +0 -20
  27. data/lib/redshift-connector/s3_bucket.rb +0 -76
  28. data/lib/redshift-connector/s3_data_file.rb +0 -20
  29. data/lib/redshift-connector/s3_data_file_bundle.rb +0 -54
  30. data/test/test_reader.rb +0 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f31b6eadc1161cb752be8f735f6ab3cdf2e87d32
4
- data.tar.gz: edb474edc4bd93ee6ab5efc26d9b9770f05bd109
3
+ metadata.gz: bc258b0cae8475c5c78a9e37ebb80e16d186245d
4
+ data.tar.gz: 9c8c846b8bfb89714986146b019aa063bcfee9af
5
5
  SHA512:
6
- metadata.gz: 6d8c0229f3bc0a7beae47859ac7aee5b0df82d82e838ad33803ab9d9e2321e660cd6befd74fee7e800f9394efff425856f20bc0fb77c69dbac407315a65d8333
7
- data.tar.gz: c7514f15467cf56f6c3b60c0670a4f338645d0b05b96e418500aa9172b696a3f22c3277d498a4e309a779c2ab3202cd4f5a8dd2ad9fcda3e1527390a6a36b74b
6
+ metadata.gz: 8e9001439475fc25dfbc6c0eda67306044dccb7c5eeda5309fa2995df4adeaa5b028e3a177f33ca938b99d150ea10a8e48206479b0532f26f2eb17b59b5bf791
7
+ data.tar.gz: 1d83a4852630ab542bb14cc4fac376a08f549aec0d4225d92f65852f5af290c06d3c44ba31660991370707d9763322ad9d01daba9ff0a1dad352f88d4c0db6ff
@@ -1,31 +1 @@
1
- module RedshiftConnector
2
- end
3
-
4
- require 'redshift-connector/connector'
5
- require 'redshift-connector/exporter'
6
- require 'redshift-connector/importer'
7
- require 'redshift-connector/s3_bucket'
8
- require 'redshift-connector/s3_data_file_bundle'
9
- require 'redshift-connector/version'
10
-
11
- module RedshiftConnector
12
- def RedshiftConnector.transport_delta(**params)
13
- Connector.transport_delta(**params)
14
- end
15
-
16
- def RedshiftConnector.transport_all(**params)
17
- Connector.transport_all(**params)
18
- end
19
-
20
- def RedshiftConnector.transport_delta_from_s3(**params)
21
- Importer.transport_delta_from_s3(**params)
22
- end
23
-
24
- def RedshiftConnector.transport_all_from_s3(**params)
25
- Importer.transport_all_from_s3(**params)
26
- end
27
-
28
- def RedshiftConnector.foreach(**params, &block)
29
- Exporter.foreach(**params, &block)
30
- end
31
- end
1
+ require 'redshift_connector'
@@ -0,0 +1,34 @@
1
+ module RedshiftConnector
2
+ end
3
+
4
+ require 'redshift_connector/connector'
5
+ require 'redshift_connector/exporter'
6
+ require 'redshift_connector/active_record_data_source'
7
+ require 'redshift_connector/active_record_exporter'
8
+ require 'redshift_connector/importer'
9
+ require 'redshift_connector/s3_bucket'
10
+ require 'redshift_connector/s3_data_file_bundle'
11
+ require 'redshift_connector/exception'
12
+ require 'redshift_connector/version'
13
+
14
+ module RedshiftConnector
15
+ def RedshiftConnector.transport_delta(**params)
16
+ Connector.transport_delta(**params)
17
+ end
18
+
19
+ def RedshiftConnector.transport_all(**params)
20
+ Connector.transport_all(**params)
21
+ end
22
+
23
+ def RedshiftConnector.transport_delta_from_s3(**params)
24
+ Importer.transport_delta_from_s3(**params)
25
+ end
26
+
27
+ def RedshiftConnector.transport_all_from_s3(**params)
28
+ Importer.transport_all_from_s3(**params)
29
+ end
30
+
31
+ def RedshiftConnector.foreach(**params, &block)
32
+ Exporter.foreach(**params, &block)
33
+ end
34
+ end
@@ -0,0 +1,23 @@
1
+ require 'redshift_connector/exporter_builder'
2
+
3
+ module RedshiftConnector
4
+ class ActiveRecordDataSource
5
+ def ActiveRecordDataSource.for_dao(dao)
6
+ new(dao)
7
+ end
8
+
9
+ def initialize(dao)
10
+ @dao = dao
11
+ end
12
+
13
+ def exporter_builder
14
+ ExporterBuilder.new(ds: self, exporter_class: ActiveRecordExporter)
15
+ end
16
+
17
+ def execute_query(query_str)
18
+ @dao.connection_pool.with_connection {|conn|
19
+ conn.execute(query_str)
20
+ }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,47 @@
1
+ require 'redshift_connector/s3_data_file_bundle'
2
+ require 'redshift_connector/query'
3
+ require 'redshift_connector/logger'
4
+
5
+ module RedshiftConnector
6
+ class ActiveRecordExporter
7
+ def initialize(ds:, query:, bundle_params:, enable_sort: false, logger: RedshiftConnector.logger)
8
+ @ds = ds
9
+ @query = query
10
+ @bundle_params = bundle_params
11
+ @enable_sort = enable_sort
12
+ @logger = logger
13
+
14
+ @bundle = S3DataFileBundle.for_params(bundle_params)
15
+ end
16
+
17
+ attr_reader :query
18
+ attr_reader :bundle_params
19
+ attr_reader :bundle
20
+ attr_reader :logger
21
+
22
+ def execute
23
+ @bundle.clear
24
+ unload_query = UnloadQuery.new(query: @query, bundle: @bundle, enable_sort: @enable_sort)
25
+ @logger.info "EXPORT #{unload_query.description} -> #{@bundle.url}*"
26
+ stmt = unload_query.to_sql
27
+ @logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
28
+ @ds.execute_query(batch_job_label + stmt)
29
+ @bundle
30
+ end
31
+
32
+ def batch_job_label
33
+ @batch_job_label ||= begin
34
+ components = Dir.getwd.split('/')
35
+ app = if components.last == 'current'
36
+ # is Capistrano environment
37
+ components[-2]
38
+ else
39
+ components[-1]
40
+ end
41
+ batch_file = caller.detect {|c| /redshift_connector|active_record/ !~ c }
42
+ path = batch_file ? batch_file.split(':').first : '?'
43
+ "/* Job: #{app}:#{path} */ "
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,7 +1,7 @@
1
- require 'redshift-connector/exporter'
2
- require 'redshift-connector/importer'
3
- require 'redshift-connector/s3_data_file_bundle'
4
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/exporter'
2
+ require 'redshift_connector/importer'
3
+ require 'redshift_connector/data_file_bundle_params'
4
+ require 'redshift_connector/logger'
5
5
 
6
6
  module RedshiftConnector
7
7
  class Connector
@@ -15,16 +15,16 @@ module RedshiftConnector
15
15
  delete_cond: nil,
16
16
  upsert_columns: nil,
17
17
  bucket: nil,
18
- txn_id:, filter:,
18
+ txn_id: nil,
19
+ filter:,
19
20
  logger: RedshiftConnector.logger,
20
21
  quiet: false
21
22
  )
22
23
  unless src_table and dest_table
23
24
  raise ArgumentError, "missing :table, :src_table or :dest_table"
24
25
  end
25
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
26
26
  logger = NullLogger.new if quiet
27
- bundle = S3DataFileBundle.for_table(
27
+ bundle_params = DataFileBundleParams.new(
28
28
  bucket: bucket,
29
29
  schema: schema,
30
30
  table: src_table,
@@ -33,18 +33,19 @@ module RedshiftConnector
33
33
  logger: logger
34
34
  )
35
35
  exporter = Exporter.for_table_delta(
36
- bundle: bundle,
36
+ bundle_params: bundle_params,
37
37
  schema: schema,
38
38
  table: src_table,
39
39
  columns: columns,
40
40
  condition: condition,
41
41
  logger: logger
42
42
  )
43
- importer = Importer.transport_delta_from_bundle(
44
- bundle: bundle,
45
- table: dest_table, columns: columns,
46
- delete_cond: delete_cond, upsert_columns: upsert_columns,
47
- logger: logger, quiet: quiet
43
+ importer = Importer.for_delta_upsert(
44
+ table: dest_table,
45
+ columns: columns,
46
+ delete_cond: delete_cond,
47
+ upsert_columns: upsert_columns,
48
+ logger: logger
48
49
  )
49
50
  new(exporter: exporter, importer: importer, logger: logger)
50
51
  end
@@ -57,33 +58,32 @@ module RedshiftConnector
57
58
  dest_table: table,
58
59
  columns:,
59
60
  bucket: nil,
60
- txn_id:,
61
+ txn_id: nil,
61
62
  filter:,
62
63
  logger: RedshiftConnector.logger,
63
64
  quiet: false
64
65
  )
65
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
66
66
  logger = NullLogger.new if quiet
67
- bundle = S3DataFileBundle.for_table(
67
+ bundle_params = DataFileBundleParams.new(
68
68
  bucket: bucket,
69
69
  schema: schema,
70
- table: table,
70
+ table: src_table,
71
71
  txn_id: txn_id,
72
72
  filter: filter,
73
73
  logger: logger
74
74
  )
75
75
  exporter = Exporter.for_table(
76
- bundle: bundle,
76
+ bundle_params: bundle_params,
77
77
  schema: schema,
78
- table: table,
78
+ table: src_table,
79
79
  columns: columns,
80
80
  logger: logger
81
81
  )
82
- importer = Importer.transport_all_from_bundle(
82
+ importer = Importer.for_rebuild(
83
83
  strategy: strategy,
84
- bundle: bundle,
85
- table: table, columns: columns,
86
- logger: logger, quiet: quiet
84
+ table: dest_table,
85
+ columns: columns,
86
+ logger: logger
87
87
  )
88
88
  new(exporter: exporter, importer: importer, logger: logger)
89
89
  end
@@ -92,6 +92,7 @@ module RedshiftConnector
92
92
  @exporter = exporter
93
93
  @importer = importer
94
94
  @logger = logger
95
+ @bundle = nil
95
96
  end
96
97
 
97
98
  def export_enabled?
@@ -109,12 +110,12 @@ module RedshiftConnector
109
110
 
110
111
  def export
111
112
  @logger.info "==== export task =================================================="
112
- @exporter.execute
113
+ @bundle = @exporter.execute
113
114
  end
114
115
 
115
116
  def import
116
117
  @logger.info "==== import task =================================================="
117
- @importer.execute
118
+ @importer.execute(@bundle)
118
119
  end
119
120
  end
120
121
  end
@@ -0,0 +1,28 @@
1
+ require 'redshift_connector/logger'
2
+
3
+ module RedshiftConnector
4
+ class DataFileBundleParams
5
+ def initialize(
6
+ bucket: nil,
7
+ schema:,
8
+ table:,
9
+ txn_id: nil,
10
+ filter:,
11
+ logger: RedshiftConnector.logger
12
+ )
13
+ @bucket = bucket
14
+ @schema = schema
15
+ @table = table
16
+ @txn_id = txn_id
17
+ @filter = filter
18
+ @logger = logger
19
+ end
20
+
21
+ attr_reader :bucket
22
+ attr_reader :schema
23
+ attr_reader :table
24
+ attr_reader :txn_id
25
+ attr_reader :filter
26
+ attr_reader :logger
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ module RedshiftConnector
2
+ class Error < ::StandardError; end
3
+ class ExportError < Error; end
4
+ class ImportError < Error; end
5
+ end
@@ -0,0 +1,39 @@
1
+ module RedshiftConnector
2
+ module Exporter
3
+ @default_data_source = nil
4
+
5
+ def Exporter.default_data_source=(ds)
6
+ @default_data_source = ds
7
+ end
8
+
9
+ def Exporter.default_data_source
10
+ @default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
11
+ end
12
+
13
+ def Exporter.builder
14
+ default_data_source.exporter_builder
15
+ end
16
+
17
+ def Exporter.for_table_delta(**params)
18
+ builder.build_for_table_delta(**params)
19
+ end
20
+
21
+ def Exporter.for_table(**params)
22
+ builder.build_for_table(**params)
23
+ end
24
+
25
+ def Exporter.for_query(**params)
26
+ builder.build_for_query(**params)
27
+ end
28
+
29
+ def Exporter.foreach(**params, &block)
30
+ exporter = for_query(**params)
31
+ begin
32
+ exporter.execute
33
+ exporter.bundle.each_row(&block)
34
+ ensure
35
+ exporter.bundle.clear
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ require 'redshift_connector/query'
2
+ require 'redshift_connector/logger'
3
+
4
+ module RedshiftConnector
5
+ class ExporterBuilder
6
+ def initialize(ds:, exporter_class:)
7
+ @ds = ds
8
+ @exporter_class = exporter_class
9
+ end
10
+
11
+ def build_for_table_delta(schema:, table:, condition:, columns:, bundle_params:, logger: RedshiftConnector.logger)
12
+ query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
13
+ @exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
14
+ end
15
+
16
+ def build_for_table(schema:, table:, columns:, bundle_params:, logger: RedshiftConnector.logger)
17
+ query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
18
+ @exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
19
+ end
20
+
21
+ def build_for_query(
22
+ schema:,
23
+ table:,
24
+ bucket: nil,
25
+ query:,
26
+ txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
27
+ filter: nil,
28
+ enable_sort: false,
29
+ logger: RedshiftConnector.logger,
30
+ quiet: false
31
+ )
32
+ logger = NullLogger.new if quiet
33
+ bundle_params = DataFileBundleParams.new(
34
+ bucket: bucket,
35
+ schema: schema,
36
+ table: table,
37
+ txn_id: txn_id,
38
+ filter: filter,
39
+ logger: logger
40
+ )
41
+ @exporter_class.new(
42
+ ds: @ds,
43
+ query: ArbitraryQuery.new(query),
44
+ bundle_params: bundle_params,
45
+ enable_sort: enable_sort,
46
+ logger: logger
47
+ )
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,58 @@
1
+ # create module
2
+ module RedshiftConnector
3
+ module Importer
4
+ end
5
+ end
6
+
7
+ require 'redshift_connector/importer/upsert'
8
+ require 'redshift_connector/importer/insert_delta'
9
+ require 'redshift_connector/importer/rebuild_rename'
10
+ require 'redshift_connector/importer/rebuild_truncate'
11
+ require 'redshift_connector/logger'
12
+
13
+ module RedshiftConnector
14
+ module Importer
15
+ def Importer.for_delta_upsert(table:, columns:, delete_cond: nil, upsert_columns: nil, logger: RedshiftConnector.logger)
16
+ if delete_cond and upsert_columns
17
+ raise ArgumentError, "delete_cond and upsert_columns are exclusive"
18
+ end
19
+ importer =
20
+ if delete_cond
21
+ Importer::InsertDelta.new(
22
+ dao: table.classify.constantize,
23
+ columns: columns,
24
+ delete_cond: delete_cond,
25
+ logger: logger
26
+ )
27
+ elsif upsert_columns
28
+ Importer::Upsert.new(
29
+ dao: table.classify.constantize,
30
+ columns: columns,
31
+ upsert_columns: upsert_columns,
32
+ logger: logger
33
+ )
34
+ else
35
+ raise ArgumentError, "either of delete_cond or upsert_columns is required for delta import"
36
+ end
37
+ importer
38
+ end
39
+
40
+ def Importer.for_rebuild(strategy: 'rename', table:, columns:, logger: RedshiftConnector.logger)
41
+ c = get_rebuild_class(strategy)
42
+ c.new(
43
+ dao: table.classify.constantize,
44
+ columns: columns,
45
+ logger: logger
46
+ )
47
+ end
48
+
49
+ def Importer.get_rebuild_class(strategy)
50
+ case strategy.to_s
51
+ when 'rename' then RebuildRename
52
+ when 'truncate' then RebuildTruncate
53
+ else
54
+ raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,19 +1,18 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::InsertDelta
6
- def initialize(dao:, bundle:, columns:, delete_cond:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, delete_cond:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @delete_cond = delete_cond
11
10
  @logger = logger
12
11
  end
13
12
 
14
- def execute
13
+ def execute(bundle)
15
14
  delete_rows(@delete_cond)
16
- import
15
+ import(bundle)
17
16
  end
18
17
 
19
18
  def delete_rows(cond_expr)
@@ -22,9 +21,9 @@ module RedshiftConnector
22
21
  @logger.info "deleted."
23
22
  end
24
23
 
25
- def import
26
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
27
- @bundle.each_batch do |rows|
24
+ def import(bundle)
25
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
26
+ bundle.each_batch do |rows|
28
27
  @dao.import(@columns, rows)
29
28
  end
30
29
  end
@@ -1,16 +1,15 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::RebuildRename
6
- def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @logger = logger
11
10
  end
12
11
 
13
- def execute
12
+ def execute(bundle)
14
13
  dest_table = @dao.table_name
15
14
  tmp_table = "#{dest_table}_new"
16
15
  old_table = "#{dest_table}_old"
@@ -20,7 +19,7 @@ module RedshiftConnector
20
19
 
21
20
  exec_update "drop table if exists #{tmp_table}"
22
21
  exec_update "create table #{tmp_table} like #{dest_table}"
23
- import(tmp_dao)
22
+ import(tmp_dao, bundle)
24
23
  exec_update "drop table if exists #{old_table}"
25
24
  # Atomic table exchange
26
25
  exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
@@ -31,9 +30,9 @@ module RedshiftConnector
31
30
  @dao.connection.execute(query)
32
31
  end
33
32
 
34
- def import(dao)
35
- @logger.info "IMPORT #{@bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
36
- @bundle.each_batch do |rows|
33
+ def import(dao, bundle)
34
+ @logger.info "IMPORT #{bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
35
+ bundle.each_batch do |rows|
37
36
  dao.import(@columns, rows)
38
37
  end
39
38
  end
@@ -1,18 +1,17 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::RebuildTruncate
6
- def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @logger = logger
11
10
  end
12
11
 
13
- def execute
12
+ def execute(bundle)
14
13
  truncate_table(@dao.table_name)
15
- import
14
+ import(bundle)
16
15
  end
17
16
 
18
17
  def truncate_table(table_name)
@@ -21,9 +20,9 @@ module RedshiftConnector
21
20
  @logger.info "truncated."
22
21
  end
23
22
 
24
- def import
25
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
26
- @bundle.each_batch do |rows|
23
+ def import(bundle)
24
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
25
+ bundle.each_batch do |rows|
27
26
  @dao.import(@columns, rows)
28
27
  end
29
28
  end
@@ -0,0 +1,24 @@
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
+
4
+ module RedshiftConnector
5
+ class Importer::Upsert
6
+ def initialize(dao:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
7
+ @dao = dao
8
+ @columns = columns
9
+ @upsert_columns = upsert_columns
10
+ @logger = logger
11
+ end
12
+
13
+ def execute(bundle)
14
+ import(bundle)
15
+ end
16
+
17
+ def import(bundle)
18
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
19
+ bundle.each_batch do |rows|
20
+ @dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '6.0.0'
2
+ VERSION = '7.0.0'
3
3
  end
@@ -1,18 +1,13 @@
1
- module RedshiftConnector
2
- # For test only
3
- $TEST_SCHEMA = 'hidekazukobayashi'
1
+ $TEST_SCHEMA = 'aamine'
4
2
 
5
- Exporter.default_data_source = Redshift
3
+ module RedshiftConnector
4
+ Exporter.default_data_source = RedshiftConnector::ActiveRecordDataSource.new(Redshift)
6
5
 
7
6
  S3Bucket.add(
8
- 'test',
9
- bucket: 'misc-internal.ap-northeast-1',
10
- prefix: 'development/hidekazu-kobayashi',
11
- # When using IAM role
12
- #iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
13
- # When using explicit access key
14
- access_key_id: 'AKIAJJGEKUU2MXO3X4NA',
15
- secret_access_key: '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',
7
+ 'redshift-copy-buffer',
8
+ bucket: 'redshift-copy-buffer',
9
+ prefix: 'development',
10
+ iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
16
11
  default: true
17
12
  )
18
13
  end
@@ -1,7 +1,7 @@
1
1
  mysql:
2
2
  adapter: mysql2
3
3
  host: localhost
4
- username: test
4
+ username: minero-aoki
5
5
  database: test
6
6
  encoding: utf8
7
7
 
@@ -10,6 +10,6 @@ redshift:
10
10
  host: dwh.ckpd.co
11
11
  port: 5439
12
12
  database: production
13
- username: hidekazukobayashi
14
- password: '?N6s3oH#jc9k3d+P'
13
+ username: aamine
14
+ password: "3edCVfr$"
15
15
  encoding: utf8
@@ -0,0 +1,5 @@
1
+ require_relative 'helper'
2
+
3
+ RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select id from tabemiru.items where id < 50 order by 1', enable_sort: true) do |row|
4
+ p row
5
+ end
metadata CHANGED
@@ -1,31 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.0
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-02 00:00:00.000000000 Z
11
+ date: 2017-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: activerecord
14
+ name: redshift-connector-data_file
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 7.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 7.0.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: activerecord-redshift
28
+ name: activerecord
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,33 +39,33 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: redshift-connector-data_file
42
+ name: activerecord-redshift
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.1.0
47
+ version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 1.1.0
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pg
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.18'
61
+ version: 0.18.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.18'
68
+ version: 0.18.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: activerecord-import
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -144,33 +144,35 @@ extra_rdoc_files: []
144
144
  files:
145
145
  - README.md
146
146
  - lib/redshift-connector.rb
147
- - lib/redshift-connector/connector.rb
148
- - lib/redshift-connector/exporter.rb
149
- - lib/redshift-connector/importer.rb
150
- - lib/redshift-connector/importer/activerecord-import.rb
151
- - lib/redshift-connector/importer/insert_delta.rb
152
- - lib/redshift-connector/importer/rebuild_rename.rb
153
- - lib/redshift-connector/importer/rebuild_truncate.rb
154
- - lib/redshift-connector/importer/upsert.rb
155
- - lib/redshift-connector/logger.rb
156
- - lib/redshift-connector/query.rb
157
- - lib/redshift-connector/s3_bucket.rb
158
- - lib/redshift-connector/s3_data_file.rb
159
- - lib/redshift-connector/s3_data_file_bundle.rb
160
- - lib/redshift-connector/version.rb
147
+ - lib/redshift_connector.rb
148
+ - lib/redshift_connector/active_record_data_source.rb
149
+ - lib/redshift_connector/active_record_exporter.rb
150
+ - lib/redshift_connector/connector.rb
151
+ - lib/redshift_connector/data_file_bundle_params.rb
152
+ - lib/redshift_connector/exception.rb
153
+ - lib/redshift_connector/exporter.rb
154
+ - lib/redshift_connector/exporter_builder.rb
155
+ - lib/redshift_connector/importer.rb
156
+ - lib/redshift_connector/importer/activerecord-import.rb
157
+ - lib/redshift_connector/importer/insert_delta.rb
158
+ - lib/redshift_connector/importer/rebuild_rename.rb
159
+ - lib/redshift_connector/importer/rebuild_truncate.rb
160
+ - lib/redshift_connector/importer/upsert.rb
161
+ - lib/redshift_connector/query.rb
162
+ - lib/redshift_connector/version.rb
161
163
  - test/all.rb
162
164
  - test/config.rb
163
165
  - test/config.rb.example
164
166
  - test/database.yml
165
167
  - test/database.yml.example
168
+ - test/foreach.rb
166
169
  - test/helper.rb
167
170
  - test/item_pvs.ct.mysql
168
171
  - test/item_pvs.ct.redshift
169
172
  - test/reader/test_redshift_csv.rb
170
173
  - test/test_connector.rb
171
- - test/test_reader.rb
172
174
  - test/test_s3_import.rb
173
- homepage: https://github.com/aamine/redshift-connector
175
+ homepage: https://github.com/bricolages/redshift-connector
174
176
  licenses:
175
177
  - MIT
176
178
  metadata: {}
@@ -190,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
190
192
  version: '0'
191
193
  requirements: []
192
194
  rubyforge_project:
193
- rubygems_version: 2.6.11
195
+ rubygems_version: 2.6.8
194
196
  signing_key:
195
197
  specification_version: 4
196
198
  summary: Redshift bulk data connector
@@ -1,103 +0,0 @@
1
- require 'redshift-connector/query'
2
- require 'redshift-connector/logger'
3
-
4
- module RedshiftConnector
5
- class Exporter
6
- def Exporter.default_data_source=(ds)
7
- @default_data_source = ds
8
- end
9
-
10
- def Exporter.default_data_source
11
- @default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
12
- end
13
-
14
- def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
15
- delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
16
- unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
17
- new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
18
- end
19
-
20
- def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
21
- query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
22
- unload_query = UnloadQuery.new(query: query, bundle: bundle)
23
- new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
24
- end
25
-
26
- def Exporter.foreach(**params, &block)
27
- exporter = Exporter.for_query(**params)
28
- begin
29
- exporter.execute
30
- exporter.bundle.each_row(&block)
31
- ensure
32
- exporter.bundle.clear
33
- end
34
- end
35
-
36
- def Exporter.for_query(
37
- ds: default_data_source,
38
- schema:,
39
- table:,
40
- bucket: nil,
41
- query:,
42
- txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
43
- filter: nil,
44
- enable_sort: false,
45
- logger: RedshiftConnector.logger,
46
- quiet: false
47
- )
48
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
49
- logger = NullLogger.new if quiet
50
- bundle = S3DataFileBundle.for_table(
51
- bucket: bucket,
52
- schema: schema,
53
- table: table,
54
- txn_id: txn_id,
55
- filter: filter,
56
- logger: logger
57
- )
58
- exporter = Exporter.new(
59
- ds: ds,
60
- query: UnloadQuery.wrap(query: query, bundle: bundle, enable_sort: enable_sort),
61
- bundle: bundle,
62
- logger: logger
63
- )
64
- exporter
65
- end
66
-
67
- def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
68
- @ds = ds
69
- @query = query
70
- @bundle = bundle
71
- @logger = logger
72
- end
73
-
74
- attr_reader :query
75
- attr_reader :bundle
76
- attr_reader :logger
77
-
78
- def execute
79
- @bundle.clear
80
- @logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
81
- @ds.connection_pool.with_connection do |conn|
82
- stmt = @query.to_sql
83
- @logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
84
- conn.execute(batch_job_label + stmt)
85
- end
86
- end
87
-
88
- def batch_job_label
89
- @batch_job_label ||= begin
90
- components = Dir.getwd.split('/')
91
- app = if components.last == 'current'
92
- # is Capistrano environment
93
- components[-2]
94
- else
95
- components[-1]
96
- end
97
- batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
98
- path = batch_file ? batch_file.split(':').first : '?'
99
- "/* Job: #{app}:#{path} */ "
100
- end
101
- end
102
- end
103
- end
@@ -1,117 +0,0 @@
1
- # create module
2
- module RedshiftConnector
3
- module Importer
4
- end
5
- end
6
-
7
- require 'redshift-connector/importer/upsert'
8
- require 'redshift-connector/importer/insert_delta'
9
- require 'redshift-connector/importer/rebuild_rename'
10
- require 'redshift-connector/importer/rebuild_truncate'
11
-
12
- require 'redshift-connector/s3_data_file_bundle'
13
- require 'redshift-connector/logger'
14
-
15
- module RedshiftConnector
16
- module Importer
17
- def Importer.transport_delta_from_s3(
18
- bucket: nil, prefix:, format:, filter: nil,
19
- table:, columns:,
20
- delete_cond: nil, upsert_columns: nil,
21
- logger: RedshiftConnector.logger, quiet: false)
22
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
23
- logger = NullLogger.new if quiet
24
- bundle = S3DataFileBundle.for_prefix(
25
- bucket: bucket,
26
- prefix: prefix,
27
- format: format,
28
- filter: filter,
29
- logger: logger
30
- )
31
- transport_delta_from_bundle(
32
- bundle: bundle,
33
- table: table, columns: columns,
34
- delete_cond: delete_cond, upsert_columns: upsert_columns,
35
- logger: logger, quiet: quiet
36
- )
37
- end
38
-
39
- def Importer.transport_delta_from_bundle(
40
- bundle:,
41
- table:, columns:,
42
- delete_cond: nil, upsert_columns: nil,
43
- logger: RedshiftConnector.logger, quiet: false
44
- )
45
- if delete_cond and upsert_columns
46
- raise ArgumentError, "delete_cond and upsert_columns are exclusive"
47
- end
48
- importer =
49
- if delete_cond
50
- Importer::InsertDelta.new(
51
- dao: table.classify.constantize,
52
- bundle: bundle,
53
- columns: columns,
54
- delete_cond: delete_cond,
55
- logger: logger
56
- )
57
- elsif upsert_columns
58
- Importer::Upsert.new(
59
- dao: table.classify.constantize,
60
- bundle: bundle,
61
- columns: columns,
62
- upsert_columns: upsert_columns,
63
- logger: logger
64
- )
65
- else
66
- raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
67
- end
68
- importer
69
- end
70
-
71
- def Importer.transport_all_from_s3(
72
- strategy: 'rename',
73
- bucket: nil, prefix:, format:, filter: nil,
74
- table:, columns:,
75
- logger: RedshiftConnector.logger, quiet: false)
76
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
77
- logger = NullLogger.new if quiet
78
- bundle = S3DataFileBundle.for_prefix(
79
- bucket: bucket,
80
- prefix: prefix,
81
- format: format,
82
- filter: filter,
83
- logger: logger
84
- )
85
- transport_all_from_bundle(
86
- strategy: strategy,
87
- bundle: bundle,
88
- table: table, columns: columns,
89
- logger: logger, quiet: quiet
90
- )
91
- end
92
-
93
- def Importer.transport_all_from_bundle(
94
- strategy: 'rename',
95
- bundle:,
96
- table:, columns:,
97
- logger: RedshiftConnector.logger, quiet: false
98
- )
99
- importer = get_rebuild_class(strategy).new(
100
- dao: table.classify.constantize,
101
- bundle: bundle,
102
- columns: columns,
103
- logger: logger
104
- )
105
- importer
106
- end
107
-
108
- def Importer.get_rebuild_class(strategy)
109
- case strategy.to_s
110
- when 'rename' then RebuildRename
111
- when 'truncate' then RebuildTruncate
112
- else
113
- raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
114
- end
115
- end
116
- end
117
- end
@@ -1,25 +0,0 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
3
-
4
- module RedshiftConnector
5
- class Importer::Upsert
6
- def initialize(dao:, bundle:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
7
- @dao = dao
8
- @bundle = bundle
9
- @columns = columns
10
- @upsert_columns = upsert_columns
11
- @logger = logger
12
- end
13
-
14
- def execute
15
- import
16
- end
17
-
18
- def import
19
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
20
- @bundle.each_batch do |rows|
21
- @dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
22
- end
23
- end
24
- end
25
- end
@@ -1,20 +0,0 @@
1
- module RedshiftConnector
2
- @logger = nil
3
-
4
- def RedshiftConnector.logger
5
- # Defer to access Rails
6
- @logger || Rails.logger
7
- end
8
-
9
- def RedshiftConnector.logger=(logger)
10
- @logger = logger
11
- end
12
-
13
- class NullLogger
14
- def noop(*args) end
15
- alias error noop
16
- alias warn noop
17
- alias info noop
18
- alias debug noop
19
- end
20
- end
@@ -1,76 +0,0 @@
1
- require 'aws-sdk'
2
-
3
- module RedshiftConnector
4
- class S3Bucket
5
- @buckets = {}
6
- @default = nil
7
-
8
- def S3Bucket.add(name, default: false, **params)
9
- instance = new(**params)
10
- @buckets[name.to_s] = instance
11
- if !@default or default
12
- @default = instance
13
- end
14
- end
15
-
16
- def S3Bucket.default
17
- @default or raise ArgumentError, "no default S3 bucket configured"
18
- end
19
-
20
- def S3Bucket.get(name)
21
- @buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
22
- end
23
-
24
- def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
25
- @region = region
26
- @name = bucket
27
- @prefix = prefix
28
- @access_key_id = access_key_id
29
- @secret_access_key = secret_access_key
30
- @iam_role = iam_role
31
- end
32
-
33
- attr_reader :name
34
- attr_reader :prefix
35
-
36
- def url
37
- "s3://#{@bucket.name}/#{@prefix}/"
38
- end
39
-
40
- def client
41
- @client ||= begin
42
- args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
43
- Aws::S3::Client.new(**args)
44
- end
45
- end
46
-
47
- def bucket
48
- @bucket ||= begin
49
- resource = Aws::S3::Resource.new(client: client)
50
- resource.bucket(@name)
51
- end
52
- end
53
-
54
- def object(key)
55
- bucket.object(key)
56
- end
57
-
58
- def objects(prefix:)
59
- bucket.objects(prefix: prefix)
60
- end
61
-
62
- def delete_objects(keys)
63
- bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
64
- end
65
-
66
- def credential_string
67
- if @iam_role
68
- "aws_iam_role=#{@iam_role}"
69
- elsif @access_key_id
70
- "aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
71
- else
72
- raise ArgumentError, "no credential given for Redshift S3 access"
73
- end
74
- end
75
- end
76
- end
@@ -1,20 +0,0 @@
1
- require 'redshift-connector/data_file'
2
-
3
- module RedshiftConnector
4
- class S3DataFile < AbstractDataFile
5
- def initialize(object, reader_class:)
6
- @object = object
7
- @reader_class = reader_class
8
- end
9
-
10
- def key
11
- @object.key
12
- end
13
-
14
- def content
15
- @object.get.body
16
- end
17
-
18
- delegate :presigned_url, to: :@object
19
- end
20
- end
@@ -1,54 +0,0 @@
1
- require 'redshift-connector/s3_bucket'
2
- require 'redshift-connector/s3_data_file'
3
- require 'redshift-connector/logger'
4
- require 'redshift-connector/data_file'
5
- require 'aws-sdk'
6
-
7
- module RedshiftConnector
8
- class S3DataFileBundle < AbstractDataFileBundle
9
- def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
10
- real_prefix = "#{bucket.prefix}/#{prefix}"
11
- new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
12
- end
13
-
14
- def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
15
- prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
16
- new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
17
- end
18
-
19
- def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
20
- @bucket = bucket
21
- @prefix = prefix
22
- @format = format
23
- @filter = filter || lambda {|*row| row }
24
- @batch_size = batch_size
25
- @logger = logger
26
- @reader_class = Reader.get(format)
27
- end
28
-
29
- attr_reader :bucket
30
- attr_reader :prefix
31
-
32
- def url
33
- "s3://#{@bucket.name}/#{@prefix}"
34
- end
35
-
36
- def credential_string
37
- @bucket.credential_string
38
- end
39
-
40
- def data_files
41
- @bucket.objects(prefix: @prefix)
42
- .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
43
- end
44
-
45
- def clear
46
- pref = File.dirname(@prefix) + '/'
47
- keys = @bucket.objects(prefix: pref).map(&:key)
48
- unless keys.empty?
49
- @logger.info "DELETE #{pref}*"
50
- @bucket.delete_objects(keys)
51
- end
52
- end
53
- end
54
- end
@@ -1,9 +0,0 @@
1
- require 'test/unit'
2
-
3
- module RedshiftConnector
4
- class TestReader < Test::Unit::TestCase
5
- def test_get
6
- assert_equal Reader::RedshiftCSV, Reader.get(:redshift_csv)
7
- end
8
- end
9
- end