redshift-connector 6.0.0 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/lib/redshift-connector.rb +1 -31
  3. data/lib/redshift_connector.rb +34 -0
  4. data/lib/redshift_connector/active_record_data_source.rb +23 -0
  5. data/lib/redshift_connector/active_record_exporter.rb +47 -0
  6. data/lib/{redshift-connector → redshift_connector}/connector.rb +26 -25
  7. data/lib/redshift_connector/data_file_bundle_params.rb +28 -0
  8. data/lib/redshift_connector/exception.rb +5 -0
  9. data/lib/redshift_connector/exporter.rb +39 -0
  10. data/lib/redshift_connector/exporter_builder.rb +51 -0
  11. data/lib/redshift_connector/importer.rb +58 -0
  12. data/lib/{redshift-connector → redshift_connector}/importer/activerecord-import.rb +0 -0
  13. data/lib/{redshift-connector → redshift_connector}/importer/insert_delta.rb +8 -9
  14. data/lib/{redshift-connector → redshift_connector}/importer/rebuild_rename.rb +8 -9
  15. data/lib/{redshift-connector → redshift_connector}/importer/rebuild_truncate.rb +8 -9
  16. data/lib/redshift_connector/importer/upsert.rb +24 -0
  17. data/lib/{redshift-connector → redshift_connector}/query.rb +0 -0
  18. data/lib/{redshift-connector → redshift_connector}/version.rb +1 -1
  19. data/test/config.rb +7 -12
  20. data/test/database.yml +3 -3
  21. data/test/foreach.rb +5 -0
  22. metadata +32 -30
  23. data/lib/redshift-connector/exporter.rb +0 -103
  24. data/lib/redshift-connector/importer.rb +0 -117
  25. data/lib/redshift-connector/importer/upsert.rb +0 -25
  26. data/lib/redshift-connector/logger.rb +0 -20
  27. data/lib/redshift-connector/s3_bucket.rb +0 -76
  28. data/lib/redshift-connector/s3_data_file.rb +0 -20
  29. data/lib/redshift-connector/s3_data_file_bundle.rb +0 -54
  30. data/test/test_reader.rb +0 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f31b6eadc1161cb752be8f735f6ab3cdf2e87d32
4
- data.tar.gz: edb474edc4bd93ee6ab5efc26d9b9770f05bd109
3
+ metadata.gz: bc258b0cae8475c5c78a9e37ebb80e16d186245d
4
+ data.tar.gz: 9c8c846b8bfb89714986146b019aa063bcfee9af
5
5
  SHA512:
6
- metadata.gz: 6d8c0229f3bc0a7beae47859ac7aee5b0df82d82e838ad33803ab9d9e2321e660cd6befd74fee7e800f9394efff425856f20bc0fb77c69dbac407315a65d8333
7
- data.tar.gz: c7514f15467cf56f6c3b60c0670a4f338645d0b05b96e418500aa9172b696a3f22c3277d498a4e309a779c2ab3202cd4f5a8dd2ad9fcda3e1527390a6a36b74b
6
+ metadata.gz: 8e9001439475fc25dfbc6c0eda67306044dccb7c5eeda5309fa2995df4adeaa5b028e3a177f33ca938b99d150ea10a8e48206479b0532f26f2eb17b59b5bf791
7
+ data.tar.gz: 1d83a4852630ab542bb14cc4fac376a08f549aec0d4225d92f65852f5af290c06d3c44ba31660991370707d9763322ad9d01daba9ff0a1dad352f88d4c0db6ff
@@ -1,31 +1 @@
1
- module RedshiftConnector
2
- end
3
-
4
- require 'redshift-connector/connector'
5
- require 'redshift-connector/exporter'
6
- require 'redshift-connector/importer'
7
- require 'redshift-connector/s3_bucket'
8
- require 'redshift-connector/s3_data_file_bundle'
9
- require 'redshift-connector/version'
10
-
11
- module RedshiftConnector
12
- def RedshiftConnector.transport_delta(**params)
13
- Connector.transport_delta(**params)
14
- end
15
-
16
- def RedshiftConnector.transport_all(**params)
17
- Connector.transport_all(**params)
18
- end
19
-
20
- def RedshiftConnector.transport_delta_from_s3(**params)
21
- Importer.transport_delta_from_s3(**params)
22
- end
23
-
24
- def RedshiftConnector.transport_all_from_s3(**params)
25
- Importer.transport_all_from_s3(**params)
26
- end
27
-
28
- def RedshiftConnector.foreach(**params, &block)
29
- Exporter.foreach(**params, &block)
30
- end
31
- end
1
+ require 'redshift_connector'
@@ -0,0 +1,34 @@
1
+ module RedshiftConnector
2
+ end
3
+
4
+ require 'redshift_connector/connector'
5
+ require 'redshift_connector/exporter'
6
+ require 'redshift_connector/active_record_data_source'
7
+ require 'redshift_connector/active_record_exporter'
8
+ require 'redshift_connector/importer'
9
+ require 'redshift_connector/s3_bucket'
10
+ require 'redshift_connector/s3_data_file_bundle'
11
+ require 'redshift_connector/exception'
12
+ require 'redshift_connector/version'
13
+
14
+ module RedshiftConnector
15
+ def RedshiftConnector.transport_delta(**params)
16
+ Connector.transport_delta(**params)
17
+ end
18
+
19
+ def RedshiftConnector.transport_all(**params)
20
+ Connector.transport_all(**params)
21
+ end
22
+
23
+ def RedshiftConnector.transport_delta_from_s3(**params)
24
+ Importer.transport_delta_from_s3(**params)
25
+ end
26
+
27
+ def RedshiftConnector.transport_all_from_s3(**params)
28
+ Importer.transport_all_from_s3(**params)
29
+ end
30
+
31
+ def RedshiftConnector.foreach(**params, &block)
32
+ Exporter.foreach(**params, &block)
33
+ end
34
+ end
@@ -0,0 +1,23 @@
1
+ require 'redshift_connector/exporter_builder'
2
+
3
+ module RedshiftConnector
4
+ class ActiveRecordDataSource
5
+ def ActiveRecordDataSource.for_dao(dao)
6
+ new(dao)
7
+ end
8
+
9
+ def initialize(dao)
10
+ @dao = dao
11
+ end
12
+
13
+ def exporter_builder
14
+ ExporterBuilder.new(ds: self, exporter_class: ActiveRecordExporter)
15
+ end
16
+
17
+ def execute_query(query_str)
18
+ @dao.connection_pool.with_connection {|conn|
19
+ conn.execute(query_str)
20
+ }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,47 @@
1
+ require 'redshift_connector/s3_data_file_bundle'
2
+ require 'redshift_connector/query'
3
+ require 'redshift_connector/logger'
4
+
5
+ module RedshiftConnector
6
+ class ActiveRecordExporter
7
+ def initialize(ds:, query:, bundle_params:, enable_sort: false, logger: RedshiftConnector.logger)
8
+ @ds = ds
9
+ @query = query
10
+ @bundle_params = bundle_params
11
+ @enable_sort = enable_sort
12
+ @logger = logger
13
+
14
+ @bundle = S3DataFileBundle.for_params(bundle_params)
15
+ end
16
+
17
+ attr_reader :query
18
+ attr_reader :bundle_params
19
+ attr_reader :bundle
20
+ attr_reader :logger
21
+
22
+ def execute
23
+ @bundle.clear
24
+ unload_query = UnloadQuery.new(query: @query, bundle: @bundle, enable_sort: @enable_sort)
25
+ @logger.info "EXPORT #{unload_query.description} -> #{@bundle.url}*"
26
+ stmt = unload_query.to_sql
27
+ @logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
28
+ @ds.execute_query(batch_job_label + stmt)
29
+ @bundle
30
+ end
31
+
32
+ def batch_job_label
33
+ @batch_job_label ||= begin
34
+ components = Dir.getwd.split('/')
35
+ app = if components.last == 'current'
36
+ # is Capistrano environment
37
+ components[-2]
38
+ else
39
+ components[-1]
40
+ end
41
+ batch_file = caller.detect {|c| /redshift_connector|active_record/ !~ c }
42
+ path = batch_file ? batch_file.split(':').first : '?'
43
+ "/* Job: #{app}:#{path} */ "
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,7 +1,7 @@
1
- require 'redshift-connector/exporter'
2
- require 'redshift-connector/importer'
3
- require 'redshift-connector/s3_data_file_bundle'
4
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/exporter'
2
+ require 'redshift_connector/importer'
3
+ require 'redshift_connector/data_file_bundle_params'
4
+ require 'redshift_connector/logger'
5
5
 
6
6
  module RedshiftConnector
7
7
  class Connector
@@ -15,16 +15,16 @@ module RedshiftConnector
15
15
  delete_cond: nil,
16
16
  upsert_columns: nil,
17
17
  bucket: nil,
18
- txn_id:, filter:,
18
+ txn_id: nil,
19
+ filter:,
19
20
  logger: RedshiftConnector.logger,
20
21
  quiet: false
21
22
  )
22
23
  unless src_table and dest_table
23
24
  raise ArgumentError, "missing :table, :src_table or :dest_table"
24
25
  end
25
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
26
26
  logger = NullLogger.new if quiet
27
- bundle = S3DataFileBundle.for_table(
27
+ bundle_params = DataFileBundleParams.new(
28
28
  bucket: bucket,
29
29
  schema: schema,
30
30
  table: src_table,
@@ -33,18 +33,19 @@ module RedshiftConnector
33
33
  logger: logger
34
34
  )
35
35
  exporter = Exporter.for_table_delta(
36
- bundle: bundle,
36
+ bundle_params: bundle_params,
37
37
  schema: schema,
38
38
  table: src_table,
39
39
  columns: columns,
40
40
  condition: condition,
41
41
  logger: logger
42
42
  )
43
- importer = Importer.transport_delta_from_bundle(
44
- bundle: bundle,
45
- table: dest_table, columns: columns,
46
- delete_cond: delete_cond, upsert_columns: upsert_columns,
47
- logger: logger, quiet: quiet
43
+ importer = Importer.for_delta_upsert(
44
+ table: dest_table,
45
+ columns: columns,
46
+ delete_cond: delete_cond,
47
+ upsert_columns: upsert_columns,
48
+ logger: logger
48
49
  )
49
50
  new(exporter: exporter, importer: importer, logger: logger)
50
51
  end
@@ -57,33 +58,32 @@ module RedshiftConnector
57
58
  dest_table: table,
58
59
  columns:,
59
60
  bucket: nil,
60
- txn_id:,
61
+ txn_id: nil,
61
62
  filter:,
62
63
  logger: RedshiftConnector.logger,
63
64
  quiet: false
64
65
  )
65
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
66
66
  logger = NullLogger.new if quiet
67
- bundle = S3DataFileBundle.for_table(
67
+ bundle_params = DataFileBundleParams.new(
68
68
  bucket: bucket,
69
69
  schema: schema,
70
- table: table,
70
+ table: src_table,
71
71
  txn_id: txn_id,
72
72
  filter: filter,
73
73
  logger: logger
74
74
  )
75
75
  exporter = Exporter.for_table(
76
- bundle: bundle,
76
+ bundle_params: bundle_params,
77
77
  schema: schema,
78
- table: table,
78
+ table: src_table,
79
79
  columns: columns,
80
80
  logger: logger
81
81
  )
82
- importer = Importer.transport_all_from_bundle(
82
+ importer = Importer.for_rebuild(
83
83
  strategy: strategy,
84
- bundle: bundle,
85
- table: table, columns: columns,
86
- logger: logger, quiet: quiet
84
+ table: dest_table,
85
+ columns: columns,
86
+ logger: logger
87
87
  )
88
88
  new(exporter: exporter, importer: importer, logger: logger)
89
89
  end
@@ -92,6 +92,7 @@ module RedshiftConnector
92
92
  @exporter = exporter
93
93
  @importer = importer
94
94
  @logger = logger
95
+ @bundle = nil
95
96
  end
96
97
 
97
98
  def export_enabled?
@@ -109,12 +110,12 @@ module RedshiftConnector
109
110
 
110
111
  def export
111
112
  @logger.info "==== export task =================================================="
112
- @exporter.execute
113
+ @bundle = @exporter.execute
113
114
  end
114
115
 
115
116
  def import
116
117
  @logger.info "==== import task =================================================="
117
- @importer.execute
118
+ @importer.execute(@bundle)
118
119
  end
119
120
  end
120
121
  end
@@ -0,0 +1,28 @@
1
+ require 'redshift_connector/logger'
2
+
3
+ module RedshiftConnector
4
+ class DataFileBundleParams
5
+ def initialize(
6
+ bucket: nil,
7
+ schema:,
8
+ table:,
9
+ txn_id: nil,
10
+ filter:,
11
+ logger: RedshiftConnector.logger
12
+ )
13
+ @bucket = bucket
14
+ @schema = schema
15
+ @table = table
16
+ @txn_id = txn_id
17
+ @filter = filter
18
+ @logger = logger
19
+ end
20
+
21
+ attr_reader :bucket
22
+ attr_reader :schema
23
+ attr_reader :table
24
+ attr_reader :txn_id
25
+ attr_reader :filter
26
+ attr_reader :logger
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ module RedshiftConnector
2
+ class Error < ::StandardError; end
3
+ class ExportError < Error; end
4
+ class ImportError < Error; end
5
+ end
@@ -0,0 +1,39 @@
1
+ module RedshiftConnector
2
+ module Exporter
3
+ @default_data_source = nil
4
+
5
+ def Exporter.default_data_source=(ds)
6
+ @default_data_source = ds
7
+ end
8
+
9
+ def Exporter.default_data_source
10
+ @default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
11
+ end
12
+
13
+ def Exporter.builder
14
+ default_data_source.exporter_builder
15
+ end
16
+
17
+ def Exporter.for_table_delta(**params)
18
+ builder.build_for_table_delta(**params)
19
+ end
20
+
21
+ def Exporter.for_table(**params)
22
+ builder.build_for_table(**params)
23
+ end
24
+
25
+ def Exporter.for_query(**params)
26
+ builder.build_for_query(**params)
27
+ end
28
+
29
+ def Exporter.foreach(**params, &block)
30
+ exporter = for_query(**params)
31
+ begin
32
+ exporter.execute
33
+ exporter.bundle.each_row(&block)
34
+ ensure
35
+ exporter.bundle.clear
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ require 'redshift_connector/query'
2
+ require 'redshift_connector/logger'
3
+
4
+ module RedshiftConnector
5
+ class ExporterBuilder
6
+ def initialize(ds:, exporter_class:)
7
+ @ds = ds
8
+ @exporter_class = exporter_class
9
+ end
10
+
11
+ def build_for_table_delta(schema:, table:, condition:, columns:, bundle_params:, logger: RedshiftConnector.logger)
12
+ query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
13
+ @exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
14
+ end
15
+
16
+ def build_for_table(schema:, table:, columns:, bundle_params:, logger: RedshiftConnector.logger)
17
+ query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
18
+ @exporter_class.new(ds: @ds, query: query, bundle_params: bundle_params, logger: logger)
19
+ end
20
+
21
+ def build_for_query(
22
+ schema:,
23
+ table:,
24
+ bucket: nil,
25
+ query:,
26
+ txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
27
+ filter: nil,
28
+ enable_sort: false,
29
+ logger: RedshiftConnector.logger,
30
+ quiet: false
31
+ )
32
+ logger = NullLogger.new if quiet
33
+ bundle_params = DataFileBundleParams.new(
34
+ bucket: bucket,
35
+ schema: schema,
36
+ table: table,
37
+ txn_id: txn_id,
38
+ filter: filter,
39
+ logger: logger
40
+ )
41
+ @exporter_class.new(
42
+ ds: @ds,
43
+ query: ArbitraryQuery.new(query),
44
+ bundle_params: bundle_params,
45
+ enable_sort: enable_sort,
46
+ logger: logger
47
+ )
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,58 @@
1
+ # create module
2
+ module RedshiftConnector
3
+ module Importer
4
+ end
5
+ end
6
+
7
+ require 'redshift_connector/importer/upsert'
8
+ require 'redshift_connector/importer/insert_delta'
9
+ require 'redshift_connector/importer/rebuild_rename'
10
+ require 'redshift_connector/importer/rebuild_truncate'
11
+ require 'redshift_connector/logger'
12
+
13
+ module RedshiftConnector
14
+ module Importer
15
+ def Importer.for_delta_upsert(table:, columns:, delete_cond: nil, upsert_columns: nil, logger: RedshiftConnector.logger)
16
+ if delete_cond and upsert_columns
17
+ raise ArgumentError, "delete_cond and upsert_columns are exclusive"
18
+ end
19
+ importer =
20
+ if delete_cond
21
+ Importer::InsertDelta.new(
22
+ dao: table.classify.constantize,
23
+ columns: columns,
24
+ delete_cond: delete_cond,
25
+ logger: logger
26
+ )
27
+ elsif upsert_columns
28
+ Importer::Upsert.new(
29
+ dao: table.classify.constantize,
30
+ columns: columns,
31
+ upsert_columns: upsert_columns,
32
+ logger: logger
33
+ )
34
+ else
35
+ raise ArgumentError, "either of delete_cond or upsert_columns is required for delta import"
36
+ end
37
+ importer
38
+ end
39
+
40
+ def Importer.for_rebuild(strategy: 'rename', table:, columns:, logger: RedshiftConnector.logger)
41
+ c = get_rebuild_class(strategy)
42
+ c.new(
43
+ dao: table.classify.constantize,
44
+ columns: columns,
45
+ logger: logger
46
+ )
47
+ end
48
+
49
+ def Importer.get_rebuild_class(strategy)
50
+ case strategy.to_s
51
+ when 'rename' then RebuildRename
52
+ when 'truncate' then RebuildTruncate
53
+ else
54
+ raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,19 +1,18 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::InsertDelta
6
- def initialize(dao:, bundle:, columns:, delete_cond:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, delete_cond:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @delete_cond = delete_cond
11
10
  @logger = logger
12
11
  end
13
12
 
14
- def execute
13
+ def execute(bundle)
15
14
  delete_rows(@delete_cond)
16
- import
15
+ import(bundle)
17
16
  end
18
17
 
19
18
  def delete_rows(cond_expr)
@@ -22,9 +21,9 @@ module RedshiftConnector
22
21
  @logger.info "deleted."
23
22
  end
24
23
 
25
- def import
26
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
27
- @bundle.each_batch do |rows|
24
+ def import(bundle)
25
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
26
+ bundle.each_batch do |rows|
28
27
  @dao.import(@columns, rows)
29
28
  end
30
29
  end
@@ -1,16 +1,15 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::RebuildRename
6
- def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @logger = logger
11
10
  end
12
11
 
13
- def execute
12
+ def execute(bundle)
14
13
  dest_table = @dao.table_name
15
14
  tmp_table = "#{dest_table}_new"
16
15
  old_table = "#{dest_table}_old"
@@ -20,7 +19,7 @@ module RedshiftConnector
20
19
 
21
20
  exec_update "drop table if exists #{tmp_table}"
22
21
  exec_update "create table #{tmp_table} like #{dest_table}"
23
- import(tmp_dao)
22
+ import(tmp_dao, bundle)
24
23
  exec_update "drop table if exists #{old_table}"
25
24
  # Atomic table exchange
26
25
  exec_update "rename table #{dest_table} to #{old_table}, #{tmp_table} to #{dest_table}"
@@ -31,9 +30,9 @@ module RedshiftConnector
31
30
  @dao.connection.execute(query)
32
31
  end
33
32
 
34
- def import(dao)
35
- @logger.info "IMPORT #{@bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
36
- @bundle.each_batch do |rows|
33
+ def import(dao, bundle)
34
+ @logger.info "IMPORT #{bundle.url}* -> #{dao.table_name} (#{@columns.join(', ')})"
35
+ bundle.each_batch do |rows|
37
36
  dao.import(@columns, rows)
38
37
  end
39
38
  end
@@ -1,18 +1,17 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
3
 
4
4
  module RedshiftConnector
5
5
  class Importer::RebuildTruncate
6
- def initialize(dao:, bundle:, columns:, logger: RedshiftConnector.logger)
6
+ def initialize(dao:, columns:, logger: RedshiftConnector.logger)
7
7
  @dao = dao
8
- @bundle = bundle
9
8
  @columns = columns
10
9
  @logger = logger
11
10
  end
12
11
 
13
- def execute
12
+ def execute(bundle)
14
13
  truncate_table(@dao.table_name)
15
- import
14
+ import(bundle)
16
15
  end
17
16
 
18
17
  def truncate_table(table_name)
@@ -21,9 +20,9 @@ module RedshiftConnector
21
20
  @logger.info "truncated."
22
21
  end
23
22
 
24
- def import
25
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
26
- @bundle.each_batch do |rows|
23
+ def import(bundle)
24
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')})"
25
+ bundle.each_batch do |rows|
27
26
  @dao.import(@columns, rows)
28
27
  end
29
28
  end
@@ -0,0 +1,24 @@
1
+ require 'redshift_connector/importer/activerecord-import'
2
+ require 'redshift_connector/logger'
3
+
4
+ module RedshiftConnector
5
+ class Importer::Upsert
6
+ def initialize(dao:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
7
+ @dao = dao
8
+ @columns = columns
9
+ @upsert_columns = upsert_columns
10
+ @logger = logger
11
+ end
12
+
13
+ def execute(bundle)
14
+ import(bundle)
15
+ end
16
+
17
+ def import(bundle)
18
+ @logger.info "IMPORT #{bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
19
+ bundle.each_batch do |rows|
20
+ @dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '6.0.0'
2
+ VERSION = '7.0.0'
3
3
  end
@@ -1,18 +1,13 @@
1
- module RedshiftConnector
2
- # For test only
3
- $TEST_SCHEMA = 'hidekazukobayashi'
1
+ $TEST_SCHEMA = 'aamine'
4
2
 
5
- Exporter.default_data_source = Redshift
3
+ module RedshiftConnector
4
+ Exporter.default_data_source = RedshiftConnector::ActiveRecordDataSource.new(Redshift)
6
5
 
7
6
  S3Bucket.add(
8
- 'test',
9
- bucket: 'misc-internal.ap-northeast-1',
10
- prefix: 'development/hidekazu-kobayashi',
11
- # When using IAM role
12
- #iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
13
- # When using explicit access key
14
- access_key_id: 'AKIAJJGEKUU2MXO3X4NA',
15
- secret_access_key: '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',
7
+ 'redshift-copy-buffer',
8
+ bucket: 'redshift-copy-buffer',
9
+ prefix: 'development',
10
+ iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
16
11
  default: true
17
12
  )
18
13
  end
@@ -1,7 +1,7 @@
1
1
  mysql:
2
2
  adapter: mysql2
3
3
  host: localhost
4
- username: test
4
+ username: minero-aoki
5
5
  database: test
6
6
  encoding: utf8
7
7
 
@@ -10,6 +10,6 @@ redshift:
10
10
  host: dwh.ckpd.co
11
11
  port: 5439
12
12
  database: production
13
- username: hidekazukobayashi
14
- password: '?N6s3oH#jc9k3d+P'
13
+ username: aamine
14
+ password: "3edCVfr$"
15
15
  encoding: utf8
@@ -0,0 +1,5 @@
1
+ require_relative 'helper'
2
+
3
+ RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select id from tabemiru.items where id < 50 order by 1', enable_sort: true) do |row|
4
+ p row
5
+ end
metadata CHANGED
@@ -1,31 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.0
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-02 00:00:00.000000000 Z
11
+ date: 2017-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: activerecord
14
+ name: redshift-connector-data_file
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 7.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 7.0.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: activerecord-redshift
28
+ name: activerecord
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,33 +39,33 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: redshift-connector-data_file
42
+ name: activerecord-redshift
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 1.1.0
47
+ version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 1.1.0
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pg
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.18'
61
+ version: 0.18.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.18'
68
+ version: 0.18.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: activerecord-import
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -144,33 +144,35 @@ extra_rdoc_files: []
144
144
  files:
145
145
  - README.md
146
146
  - lib/redshift-connector.rb
147
- - lib/redshift-connector/connector.rb
148
- - lib/redshift-connector/exporter.rb
149
- - lib/redshift-connector/importer.rb
150
- - lib/redshift-connector/importer/activerecord-import.rb
151
- - lib/redshift-connector/importer/insert_delta.rb
152
- - lib/redshift-connector/importer/rebuild_rename.rb
153
- - lib/redshift-connector/importer/rebuild_truncate.rb
154
- - lib/redshift-connector/importer/upsert.rb
155
- - lib/redshift-connector/logger.rb
156
- - lib/redshift-connector/query.rb
157
- - lib/redshift-connector/s3_bucket.rb
158
- - lib/redshift-connector/s3_data_file.rb
159
- - lib/redshift-connector/s3_data_file_bundle.rb
160
- - lib/redshift-connector/version.rb
147
+ - lib/redshift_connector.rb
148
+ - lib/redshift_connector/active_record_data_source.rb
149
+ - lib/redshift_connector/active_record_exporter.rb
150
+ - lib/redshift_connector/connector.rb
151
+ - lib/redshift_connector/data_file_bundle_params.rb
152
+ - lib/redshift_connector/exception.rb
153
+ - lib/redshift_connector/exporter.rb
154
+ - lib/redshift_connector/exporter_builder.rb
155
+ - lib/redshift_connector/importer.rb
156
+ - lib/redshift_connector/importer/activerecord-import.rb
157
+ - lib/redshift_connector/importer/insert_delta.rb
158
+ - lib/redshift_connector/importer/rebuild_rename.rb
159
+ - lib/redshift_connector/importer/rebuild_truncate.rb
160
+ - lib/redshift_connector/importer/upsert.rb
161
+ - lib/redshift_connector/query.rb
162
+ - lib/redshift_connector/version.rb
161
163
  - test/all.rb
162
164
  - test/config.rb
163
165
  - test/config.rb.example
164
166
  - test/database.yml
165
167
  - test/database.yml.example
168
+ - test/foreach.rb
166
169
  - test/helper.rb
167
170
  - test/item_pvs.ct.mysql
168
171
  - test/item_pvs.ct.redshift
169
172
  - test/reader/test_redshift_csv.rb
170
173
  - test/test_connector.rb
171
- - test/test_reader.rb
172
174
  - test/test_s3_import.rb
173
- homepage: https://github.com/aamine/redshift-connector
175
+ homepage: https://github.com/bricolages/redshift-connector
174
176
  licenses:
175
177
  - MIT
176
178
  metadata: {}
@@ -190,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
190
192
  version: '0'
191
193
  requirements: []
192
194
  rubyforge_project:
193
- rubygems_version: 2.6.11
195
+ rubygems_version: 2.6.8
194
196
  signing_key:
195
197
  specification_version: 4
196
198
  summary: Redshift bulk data connector
@@ -1,103 +0,0 @@
1
- require 'redshift-connector/query'
2
- require 'redshift-connector/logger'
3
-
4
- module RedshiftConnector
5
- class Exporter
6
- def Exporter.default_data_source=(ds)
7
- @default_data_source = ds
8
- end
9
-
10
- def Exporter.default_data_source
11
- @default_data_source or raise ArgumentError, "RedshiftConnector::Exporter.default_data_source was not set"
12
- end
13
-
14
- def Exporter.for_table_delta(ds: default_data_source, schema:, table:, condition:, columns:, bundle:, logger: RedshiftConnector.logger)
15
- delta_query = DeltaQuery.new(schema: schema, table: table, columns: columns, condition: condition)
16
- unload_query = UnloadQuery.new(query: delta_query, bundle: bundle)
17
- new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
18
- end
19
-
20
- def Exporter.for_table(ds: default_data_source, schema:, table:, columns:, bundle:, logger: RedshiftConnector.logger)
21
- query = SelectAllQuery.new(schema: schema, table: table, columns: columns)
22
- unload_query = UnloadQuery.new(query: query, bundle: bundle)
23
- new(ds: ds, query: unload_query, bundle: bundle, logger: logger)
24
- end
25
-
26
- def Exporter.foreach(**params, &block)
27
- exporter = Exporter.for_query(**params)
28
- begin
29
- exporter.execute
30
- exporter.bundle.each_row(&block)
31
- ensure
32
- exporter.bundle.clear
33
- end
34
- end
35
-
36
- def Exporter.for_query(
37
- ds: default_data_source,
38
- schema:,
39
- table:,
40
- bucket: nil,
41
- query:,
42
- txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
43
- filter: nil,
44
- enable_sort: false,
45
- logger: RedshiftConnector.logger,
46
- quiet: false
47
- )
48
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
49
- logger = NullLogger.new if quiet
50
- bundle = S3DataFileBundle.for_table(
51
- bucket: bucket,
52
- schema: schema,
53
- table: table,
54
- txn_id: txn_id,
55
- filter: filter,
56
- logger: logger
57
- )
58
- exporter = Exporter.new(
59
- ds: ds,
60
- query: UnloadQuery.wrap(query: query, bundle: bundle, enable_sort: enable_sort),
61
- bundle: bundle,
62
- logger: logger
63
- )
64
- exporter
65
- end
66
-
67
- def initialize(ds: self.class.default_data_source, query:, bundle:, logger: RedshiftConnector.logger)
68
- @ds = ds
69
- @query = query
70
- @bundle = bundle
71
- @logger = logger
72
- end
73
-
74
- attr_reader :query
75
- attr_reader :bundle
76
- attr_reader :logger
77
-
78
- def execute
79
- @bundle.clear
80
- @logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
81
- @ds.connection_pool.with_connection do |conn|
82
- stmt = @query.to_sql
83
- @logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
84
- conn.execute(batch_job_label + stmt)
85
- end
86
- end
87
-
88
- def batch_job_label
89
- @batch_job_label ||= begin
90
- components = Dir.getwd.split('/')
91
- app = if components.last == 'current'
92
- # is Capistrano environment
93
- components[-2]
94
- else
95
- components[-1]
96
- end
97
- batch_file = caller.detect {|c| /redshift-connector|active_record/ !~ c }
98
- path = batch_file ? batch_file.split(':').first : '?'
99
- "/* Job: #{app}:#{path} */ "
100
- end
101
- end
102
- end
103
- end
@@ -1,117 +0,0 @@
1
- # create module
2
- module RedshiftConnector
3
- module Importer
4
- end
5
- end
6
-
7
- require 'redshift-connector/importer/upsert'
8
- require 'redshift-connector/importer/insert_delta'
9
- require 'redshift-connector/importer/rebuild_rename'
10
- require 'redshift-connector/importer/rebuild_truncate'
11
-
12
- require 'redshift-connector/s3_data_file_bundle'
13
- require 'redshift-connector/logger'
14
-
15
- module RedshiftConnector
16
- module Importer
17
- def Importer.transport_delta_from_s3(
18
- bucket: nil, prefix:, format:, filter: nil,
19
- table:, columns:,
20
- delete_cond: nil, upsert_columns: nil,
21
- logger: RedshiftConnector.logger, quiet: false)
22
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
23
- logger = NullLogger.new if quiet
24
- bundle = S3DataFileBundle.for_prefix(
25
- bucket: bucket,
26
- prefix: prefix,
27
- format: format,
28
- filter: filter,
29
- logger: logger
30
- )
31
- transport_delta_from_bundle(
32
- bundle: bundle,
33
- table: table, columns: columns,
34
- delete_cond: delete_cond, upsert_columns: upsert_columns,
35
- logger: logger, quiet: quiet
36
- )
37
- end
38
-
39
- def Importer.transport_delta_from_bundle(
40
- bundle:,
41
- table:, columns:,
42
- delete_cond: nil, upsert_columns: nil,
43
- logger: RedshiftConnector.logger, quiet: false
44
- )
45
- if delete_cond and upsert_columns
46
- raise ArgumentError, "delete_cond and upsert_columns are exclusive"
47
- end
48
- importer =
49
- if delete_cond
50
- Importer::InsertDelta.new(
51
- dao: table.classify.constantize,
52
- bundle: bundle,
53
- columns: columns,
54
- delete_cond: delete_cond,
55
- logger: logger
56
- )
57
- elsif upsert_columns
58
- Importer::Upsert.new(
59
- dao: table.classify.constantize,
60
- bundle: bundle,
61
- columns: columns,
62
- upsert_columns: upsert_columns,
63
- logger: logger
64
- )
65
- else
66
- raise ArgumentError, "either of delete_cond or upsert_columns is required for transport_delta"
67
- end
68
- importer
69
- end
70
-
71
- def Importer.transport_all_from_s3(
72
- strategy: 'rename',
73
- bucket: nil, prefix:, format:, filter: nil,
74
- table:, columns:,
75
- logger: RedshiftConnector.logger, quiet: false)
76
- bucket = bucket ? S3Bucket.get(bucket) : S3Bucket.default
77
- logger = NullLogger.new if quiet
78
- bundle = S3DataFileBundle.for_prefix(
79
- bucket: bucket,
80
- prefix: prefix,
81
- format: format,
82
- filter: filter,
83
- logger: logger
84
- )
85
- transport_all_from_bundle(
86
- strategy: strategy,
87
- bundle: bundle,
88
- table: table, columns: columns,
89
- logger: logger, quiet: quiet
90
- )
91
- end
92
-
93
- def Importer.transport_all_from_bundle(
94
- strategy: 'rename',
95
- bundle:,
96
- table:, columns:,
97
- logger: RedshiftConnector.logger, quiet: false
98
- )
99
- importer = get_rebuild_class(strategy).new(
100
- dao: table.classify.constantize,
101
- bundle: bundle,
102
- columns: columns,
103
- logger: logger
104
- )
105
- importer
106
- end
107
-
108
- def Importer.get_rebuild_class(strategy)
109
- case strategy.to_s
110
- when 'rename' then RebuildRename
111
- when 'truncate' then RebuildTruncate
112
- else
113
- raise ArgumentError, "unsupported rebuild strategy: #{strategy.inspect}"
114
- end
115
- end
116
- end
117
- end
@@ -1,25 +0,0 @@
1
- require 'redshift-connector/importer/activerecord-import'
2
- require 'redshift-connector/logger'
3
-
4
- module RedshiftConnector
5
- class Importer::Upsert
6
- def initialize(dao:, bundle:, columns:, upsert_columns:, logger: RedshiftConnector.logger)
7
- @dao = dao
8
- @bundle = bundle
9
- @columns = columns
10
- @upsert_columns = upsert_columns
11
- @logger = logger
12
- end
13
-
14
- def execute
15
- import
16
- end
17
-
18
- def import
19
- @logger.info "IMPORT #{@bundle.url}* -> #{@dao.table_name} (#{@columns.join(', ')}) upsert (#{@upsert_columns.join(', ')})"
20
- @bundle.each_batch do |rows|
21
- @dao.import(@columns, rows, on_duplicate_key_update: @upsert_columns)
22
- end
23
- end
24
- end
25
- end
@@ -1,20 +0,0 @@
1
- module RedshiftConnector
2
- @logger = nil
3
-
4
- def RedshiftConnector.logger
5
- # Defer to access Rails
6
- @logger || Rails.logger
7
- end
8
-
9
- def RedshiftConnector.logger=(logger)
10
- @logger = logger
11
- end
12
-
13
- class NullLogger
14
- def noop(*args) end
15
- alias error noop
16
- alias warn noop
17
- alias info noop
18
- alias debug noop
19
- end
20
- end
@@ -1,76 +0,0 @@
1
- require 'aws-sdk'
2
-
3
- module RedshiftConnector
4
- class S3Bucket
5
- @buckets = {}
6
- @default = nil
7
-
8
- def S3Bucket.add(name, default: false, **params)
9
- instance = new(**params)
10
- @buckets[name.to_s] = instance
11
- if !@default or default
12
- @default = instance
13
- end
14
- end
15
-
16
- def S3Bucket.default
17
- @default or raise ArgumentError, "no default S3 bucket configured"
18
- end
19
-
20
- def S3Bucket.get(name)
21
- @buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
22
- end
23
-
24
- def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
25
- @region = region
26
- @name = bucket
27
- @prefix = prefix
28
- @access_key_id = access_key_id
29
- @secret_access_key = secret_access_key
30
- @iam_role = iam_role
31
- end
32
-
33
- attr_reader :name
34
- attr_reader :prefix
35
-
36
- def url
37
- "s3://#{@bucket.name}/#{@prefix}/"
38
- end
39
-
40
- def client
41
- @client ||= begin
42
- args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
43
- Aws::S3::Client.new(**args)
44
- end
45
- end
46
-
47
- def bucket
48
- @bucket ||= begin
49
- resource = Aws::S3::Resource.new(client: client)
50
- resource.bucket(@name)
51
- end
52
- end
53
-
54
- def object(key)
55
- bucket.object(key)
56
- end
57
-
58
- def objects(prefix:)
59
- bucket.objects(prefix: prefix)
60
- end
61
-
62
- def delete_objects(keys)
63
- bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
64
- end
65
-
66
- def credential_string
67
- if @iam_role
68
- "aws_iam_role=#{@iam_role}"
69
- elsif @access_key_id
70
- "aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
71
- else
72
- raise ArgumentError, "no credential given for Redshift S3 access"
73
- end
74
- end
75
- end
76
- end
@@ -1,20 +0,0 @@
1
- require 'redshift-connector/data_file'
2
-
3
- module RedshiftConnector
4
- class S3DataFile < AbstractDataFile
5
- def initialize(object, reader_class:)
6
- @object = object
7
- @reader_class = reader_class
8
- end
9
-
10
- def key
11
- @object.key
12
- end
13
-
14
- def content
15
- @object.get.body
16
- end
17
-
18
- delegate :presigned_url, to: :@object
19
- end
20
- end
@@ -1,54 +0,0 @@
1
- require 'redshift-connector/s3_bucket'
2
- require 'redshift-connector/s3_data_file'
3
- require 'redshift-connector/logger'
4
- require 'redshift-connector/data_file'
5
- require 'aws-sdk'
6
-
7
- module RedshiftConnector
8
- class S3DataFileBundle < AbstractDataFileBundle
9
- def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
10
- real_prefix = "#{bucket.prefix}/#{prefix}"
11
- new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
12
- end
13
-
14
- def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
15
- prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
16
- new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
17
- end
18
-
19
- def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
20
- @bucket = bucket
21
- @prefix = prefix
22
- @format = format
23
- @filter = filter || lambda {|*row| row }
24
- @batch_size = batch_size
25
- @logger = logger
26
- @reader_class = Reader.get(format)
27
- end
28
-
29
- attr_reader :bucket
30
- attr_reader :prefix
31
-
32
- def url
33
- "s3://#{@bucket.name}/#{@prefix}"
34
- end
35
-
36
- def credential_string
37
- @bucket.credential_string
38
- end
39
-
40
- def data_files
41
- @bucket.objects(prefix: @prefix)
42
- .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
43
- end
44
-
45
- def clear
46
- pref = File.dirname(@prefix) + '/'
47
- keys = @bucket.objects(prefix: pref).map(&:key)
48
- unless keys.empty?
49
- @logger.info "DELETE #{pref}*"
50
- @bucket.delete_objects(keys)
51
- end
52
- end
53
- end
54
- end
@@ -1,9 +0,0 @@
1
- require 'test/unit'
2
-
3
- module RedshiftConnector
4
- class TestReader < Test::Unit::TestCase
5
- def test_get
6
- assert_equal Reader::RedshiftCSV, Reader.get(:redshift_csv)
7
- end
8
- end
9
- end