redshift-connector 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +10 -0
  3. data/lib/redshift-connector.rb +31 -0
  4. data/lib/redshift-connector/connector.rb +146 -0
  5. data/lib/redshift-connector/exporter.rb +116 -0
  6. data/lib/redshift-connector/importer.rb +89 -0
  7. data/lib/redshift-connector/importer/activerecord-import.rb +2 -0
  8. data/lib/redshift-connector/importer/insert_delta.rb +32 -0
  9. data/lib/redshift-connector/importer/rebuild_rename.rb +41 -0
  10. data/lib/redshift-connector/importer/rebuild_truncate.rb +31 -0
  11. data/lib/redshift-connector/importer/upsert.rb +25 -0
  12. data/lib/redshift-connector/logger.rb +20 -0
  13. data/lib/redshift-connector/query.rb +93 -0
  14. data/lib/redshift-connector/reader.rb +18 -0
  15. data/lib/redshift-connector/reader/abstract.rb +18 -0
  16. data/lib/redshift-connector/reader/csv.rb +24 -0
  17. data/lib/redshift-connector/reader/exception.rb +3 -0
  18. data/lib/redshift-connector/reader/redshift_csv.rb +54 -0
  19. data/lib/redshift-connector/reader/tsv.rb +24 -0
  20. data/lib/redshift-connector/s3_bucket.rb +72 -0
  21. data/lib/redshift-connector/s3_data_file.rb +34 -0
  22. data/lib/redshift-connector/s3_data_file_bundle.rb +101 -0
  23. data/lib/redshift-connector/version.rb +3 -0
  24. data/test/all.rb +3 -0
  25. data/test/config.rb +13 -0
  26. data/test/config.rb.example +18 -0
  27. data/test/database.yml +15 -0
  28. data/test/database.yml.example +15 -0
  29. data/test/foreach.rb +5 -0
  30. data/test/helper.rb +25 -0
  31. data/test/item_pvs.ct.mysql +11 -0
  32. data/test/item_pvs.ct.redshift +9 -0
  33. data/test/reader/test_redshift_csv.rb +30 -0
  34. data/test/test_connector.rb +148 -0
  35. data/test/test_reader.rb +10 -0
  36. data/test/test_s3_import.rb +32 -0
  37. metadata +190 -0
@@ -0,0 +1,3 @@
1
+ module RedshiftConnector
2
+ VERSION = '4.3.0'
3
+ end
data/test/all.rb ADDED
@@ -0,0 +1,3 @@
1
+ Dir.glob("#{__dir__}/**/test_*.rb").each do |path|
2
+ load path
3
+ end
data/test/config.rb ADDED
@@ -0,0 +1,13 @@
1
+ $TEST_SCHEMA = 'aamine'
2
+
3
+ module RedshiftConnector
4
+ Exporter.default_data_source = Redshift
5
+
6
+ S3Bucket.add(
7
+ 'redshift-copy-buffer',
8
+ bucket: 'redshift-copy-buffer',
9
+ prefix: 'development',
10
+ iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
11
+ default: true
12
+ )
13
+ end
@@ -0,0 +1,18 @@
1
+ module RedshiftConnector
2
+ # For test only
3
+ $TEST_SCHEMA = 'test'
4
+
5
+ Exporter.default_data_source = Redshift
6
+
7
+ S3Bucket.add(
8
+ 'ENTRY_NAME',
9
+ bucket: 'YOUR_BUCKET_NAME',
10
+ prefix: 'development',
11
+ # When using IAM role
12
+ iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
13
+ # When using explicit access key
14
+ #access_key_id: 'AAAAAAAAAA',
15
+ #secret_access_key: 'SSSSSSSSSS',
16
+ default: true
17
+ )
18
+ end
data/test/database.yml ADDED
@@ -0,0 +1,15 @@
1
+ mysql:
2
+ adapter: mysql2
3
+ host: localhost
4
+ username: minero-aoki
5
+ database: test
6
+ encoding: utf8
7
+
8
+ redshift:
9
+ adapter: redshift
10
+ host: dwh.ckpd.co
11
+ port: 5439
12
+ database: production
13
+ username: aamine
14
+ password: "3edCVfr$"
15
+ encoding: utf8
@@ -0,0 +1,15 @@
1
+ mysql:
2
+ adapter: mysql2
3
+ host: localhost
4
+ username: USER_NAME
5
+ database: test
6
+ encoding: utf8
7
+
8
+ redshift:
9
+ adapter: redshift
10
+ host: HOST_NAME
11
+ port: 5439
12
+ database: DATABASE_NAME
13
+ username: USER_NAME
14
+ password: PASSWORD
15
+ encoding: utf8
data/test/foreach.rb ADDED
@@ -0,0 +1,5 @@
1
+ require_relative 'helper'
2
+
3
+ RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select * from tabemiru.items where id < 10') do |row|
4
+ p row
5
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'active_record'
2
+ require 'yaml'
3
+ require 'logger'
4
+
5
+ YAML.load_file("#{__dir__}/database.yml").each do |name, ent|
6
+ ActiveRecord::Base.configurations[name] = ent
7
+ end
8
+
9
+ class BaseConn < ActiveRecord::Base
10
+ establish_connection :mysql
11
+ self.abstract_class = true
12
+ end
13
+ class ItemPv < BaseConn
14
+ connection
15
+ end
16
+ class Redshift < ActiveRecord::Base
17
+ establish_connection :redshift
18
+ end
19
+
20
+ require 'redshift-connector'
21
+ # This IS REQUIRED to ensure to load mysql2 driver, at least outside of Rails.
22
+ ActiveRecord::Import.require_adapter 'mysql2'
23
+ require_relative 'config'
24
+
25
+ RedshiftConnector.logger = Logger.new($stderr)
@@ -0,0 +1,11 @@
1
+ create table item_pvs
2
+ ( id bigint(20) primary key
3
+ , data_date date not null
4
+ , item_id int(11) not null
5
+ , pv bigint(20) not null default 0
6
+ , uu bigint(20) not null default 0
7
+ )
8
+ ;
9
+
10
+ create unique index item_pvs_idx on item_pvs (id, data_date, item_id) using btree
11
+ ;
@@ -0,0 +1,9 @@
1
+ create table item_pvs
2
+ ( id bigint encode lzo
3
+ , data_date date encode lzo
4
+ , item_id integer encode lzo
5
+ , pv bigint encode lzo
6
+ , uu bigint encode lzo
7
+ )
8
+ sortkey (data_date, item_id)
9
+ ;
@@ -0,0 +1,30 @@
1
+ require 'test/unit'
2
+ require 'redshift-connector/reader'
3
+
4
+ module RedshiftConnector
5
+ module Reader
6
+ class TestRedshiftCSV < Test::Unit::TestCase
7
+ def parse_row(line)
8
+ r = RedshiftCSV.new(nil)
9
+ r.parse_row(line, 1)
10
+ end
11
+
12
+ def test_parse_row
13
+ assert_equal ['xxx', 'yyyy', 'zzz'],
14
+ parse_row(%Q("xxx","yyyy","zzz"\n))
15
+
16
+ assert_equal ['xxx', 'yyyy', 'zzz'],
17
+ parse_row(%Q( "xxx" , "yyyy","zzz"\t\n))
18
+
19
+ assert_equal ['x,x', "y\r\ny", 'z"z', 'a\\a'],
20
+ parse_row(%Q("x\\,x","y\\r\\ny","z\\"z","a\\\\a"\n))
21
+
22
+ assert_equal ['981179', '2017-01-07', '6', 'show', '99', '3'],
23
+ parse_row(%Q("981179","2017-01-07","6","show","99","3"\r\n))
24
+
25
+ assert_equal ['981179', '2017-01-07', '6', '852', 'show', '{"page"=>"4"}', '1', '1'],
26
+ parse_row(%Q("981179","2017-01-07","6","852","show","{\\"page\\"=>\\"4\\"}","1","1"\n))
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,148 @@
1
+ require_relative 'helper'
2
+ require 'test/unit'
3
+
4
+ class TestConnector < Test::Unit::TestCase
5
+ def test_connector_upsert
6
+ data_date = '2016-11-03'
7
+ job = RedshiftConnector.transport_delta(
8
+ schema: $TEST_SCHEMA,
9
+ table: 'item_pvs',
10
+
11
+ txn_id: data_date,
12
+ condition: %Q(data_date = date '#{data_date}'),
13
+
14
+ columns: %w[id data_date item_id pv uu],
15
+ upsert_columns: %w[pv uu],
16
+ filter: -> (id, data_date, item_id, pv, uu) {
17
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
18
+ }
19
+ )
20
+ job.execute
21
+ end
22
+
23
+ def test_connector_delete_insert
24
+ data_date = '2016-11-03'
25
+ job = RedshiftConnector.transport_delta(
26
+ schema: $TEST_SCHEMA,
27
+ table: 'item_pvs',
28
+
29
+ txn_id: data_date,
30
+ condition: %Q(data_date = date '#{data_date}'),
31
+ delete_cond: %Q(data_date = date '#{data_date}'),
32
+
33
+ columns: %w[id data_date item_id pv uu],
34
+ filter: -> (id, data_date, item_id, pv, uu) {
35
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
36
+ }
37
+ )
38
+ job.execute
39
+ end
40
+
41
+ def test_dup_options
42
+ data_date = '2016-11-03'
43
+ assert_raise(ArgumentError) {
44
+ RedshiftConnector.transport_delta(
45
+ schema: $TEST_SCHEMA,
46
+ table: 'item_pvs',
47
+
48
+ txn_id: data_date,
49
+ condition: %Q(data_date = date '#{data_date}'),
50
+ delete_cond: %Q(data_date = date '#{data_date}'),
51
+
52
+ # Conflicts with delete_cond option
53
+ upsert_columns: %w[pv uu],
54
+
55
+ columns: %w[id data_date item_id pv uu],
56
+ filter: -> (id, data_date, item_id, pv, uu) {
57
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
58
+ }
59
+ )
60
+ }
61
+ end
62
+
63
+ def test_no_required_option
64
+ data_date = '2016-11-03'
65
+ assert_raise(ArgumentError) {
66
+ RedshiftConnector.transport_delta(
67
+ schema: $TEST_SCHEMA,
68
+ table: 'item_pvs',
69
+
70
+ txn_id: data_date,
71
+ condition: %Q(data_date = date '#{data_date}'),
72
+
73
+ columns: %w[id data_date item_id pv uu],
74
+ filter: -> (id, data_date, item_id, pv, uu) {
75
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
76
+ }
77
+ )
78
+ }
79
+ end
80
+
81
+ def test_connector_rebuild_truncate
82
+ data_date = '2016-11-03'
83
+ job = RedshiftConnector.transport_all(
84
+ strategy: 'truncate',
85
+ schema: $TEST_SCHEMA,
86
+ table: 'item_pvs',
87
+ txn_id: data_date,
88
+ columns: %w[id data_date item_id pv uu],
89
+ filter: -> (id, data_date, item_id, pv, uu) {
90
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
91
+ }
92
+ )
93
+ job.execute
94
+ end
95
+
96
+ def test_connector_rebuild_rename
97
+ data_date = '2016-11-03'
98
+ job = RedshiftConnector.transport_all(
99
+ strategy: 'rename',
100
+ schema: $TEST_SCHEMA,
101
+ table: 'item_pvs',
102
+ txn_id: data_date,
103
+ columns: %w[id data_date item_id pv uu],
104
+ filter: -> (id, data_date, item_id, pv, uu) {
105
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
106
+ }
107
+ )
108
+ job.execute
109
+ end
110
+
111
+ def test_connector_src_dest_table
112
+ data_date = '2016-11-03'
113
+ job = RedshiftConnector.transport_delta(
114
+ schema: $TEST_SCHEMA,
115
+ src_table: 'item_pvs',
116
+ dest_table: 'item_pvs',
117
+
118
+ txn_id: data_date,
119
+ condition: %Q(data_date = date '#{data_date}'),
120
+
121
+ columns: %w[id data_date item_id pv uu],
122
+ upsert_columns: %w[pv uu],
123
+ filter: -> (id, data_date, item_id, pv, uu) {
124
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
125
+ }
126
+ )
127
+ job.execute
128
+ end
129
+
130
+ def test_connector_missing_src_dest
131
+ data_date = '2016-11-03'
132
+ assert_raise(ArgumentError) {
133
+ RedshiftConnector.transport_delta(
134
+ schema: $TEST_SCHEMA,
135
+ src_table: 'item_pvs',
136
+
137
+ txn_id: data_date,
138
+ condition: %Q(data_date = date '#{data_date}'),
139
+
140
+ columns: %w[id data_date item_id pv uu],
141
+ upsert_columns: %w[pv uu],
142
+ filter: -> (id, data_date, item_id, pv, uu) {
143
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
144
+ }
145
+ )
146
+ }
147
+ end
148
+ end
@@ -0,0 +1,10 @@
1
+ require 'test/unit'
2
+ require 'redshift-connector/reader'
3
+
4
+ module RedshiftConnector
5
+ class TestReader < Test::Unit::TestCase
6
+ def test_get
7
+ assert_equal Reader::RedshiftCSV, Reader.get(:redshift_csv)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ require_relative 'helper'
2
+ require 'test/unit'
3
+ require 'redshift-connector'
4
+
5
+ class TestS3Import < Test::Unit::TestCase
6
+ def test_import_delta_tsv
7
+ data_date = '2016-11-03'
8
+ job = RedshiftConnector.transport_delta_from_s3(
9
+ prefix: "#{$TEST_SCHEMA}_export/item_pvs_tsv/#{data_date}/item_pvs.tsv.",
10
+ format: :tsv,
11
+
12
+ table: 'item_pvs',
13
+ columns: %w[id data_date item_id pv uu],
14
+ upsert_columns: %w[pv uu]
15
+ )
16
+ job.execute
17
+ end
18
+
19
+ def test_import_all
20
+ data_date = '2016-11-03'
21
+ job = RedshiftConnector.transport_all_from_s3(
22
+ strategy: 'truncate',
23
+
24
+ prefix: "#{$TEST_SCHEMA}_export/item_pvs_tsv/#{data_date}/item_pvs.tsv.",
25
+ format: :tsv,
26
+
27
+ table: 'item_pvs',
28
+ columns: %w[id data_date item_id pv uu]
29
+ )
30
+ job.execute
31
+ end
32
+ end
metadata ADDED
@@ -0,0 +1,190 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redshift-connector
3
+ version: !ruby/object:Gem::Version
4
+ version: 4.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Minero Aoki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "<"
18
+ - !ruby/object:Gem::Version
19
+ version: '5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "<"
25
+ - !ruby/object:Gem::Version
26
+ version: '5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord4-redshift-adapter
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.18.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.18.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: activerecord-import
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: aws-sdk
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: test-unit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rake
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: redshift-connector is a bulk data connector for Rails (ActiveRecord).
126
+ email: aamine@loveruby.net
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - README.md
132
+ - lib/redshift-connector.rb
133
+ - lib/redshift-connector/connector.rb
134
+ - lib/redshift-connector/exporter.rb
135
+ - lib/redshift-connector/importer.rb
136
+ - lib/redshift-connector/importer/activerecord-import.rb
137
+ - lib/redshift-connector/importer/insert_delta.rb
138
+ - lib/redshift-connector/importer/rebuild_rename.rb
139
+ - lib/redshift-connector/importer/rebuild_truncate.rb
140
+ - lib/redshift-connector/importer/upsert.rb
141
+ - lib/redshift-connector/logger.rb
142
+ - lib/redshift-connector/query.rb
143
+ - lib/redshift-connector/reader.rb
144
+ - lib/redshift-connector/reader/abstract.rb
145
+ - lib/redshift-connector/reader/csv.rb
146
+ - lib/redshift-connector/reader/exception.rb
147
+ - lib/redshift-connector/reader/redshift_csv.rb
148
+ - lib/redshift-connector/reader/tsv.rb
149
+ - lib/redshift-connector/s3_bucket.rb
150
+ - lib/redshift-connector/s3_data_file.rb
151
+ - lib/redshift-connector/s3_data_file_bundle.rb
152
+ - lib/redshift-connector/version.rb
153
+ - test/all.rb
154
+ - test/config.rb
155
+ - test/config.rb.example
156
+ - test/database.yml
157
+ - test/database.yml.example
158
+ - test/foreach.rb
159
+ - test/helper.rb
160
+ - test/item_pvs.ct.mysql
161
+ - test/item_pvs.ct.redshift
162
+ - test/reader/test_redshift_csv.rb
163
+ - test/test_connector.rb
164
+ - test/test_reader.rb
165
+ - test/test_s3_import.rb
166
+ homepage: https://github.com/aamine/redshift-connector
167
+ licenses:
168
+ - MIT
169
+ metadata: {}
170
+ post_install_message:
171
+ rdoc_options: []
172
+ require_paths:
173
+ - lib
174
+ required_ruby_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: 2.1.0
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ requirements: []
185
+ rubyforge_project:
186
+ rubygems_version: 2.6.8
187
+ signing_key:
188
+ specification_version: 4
189
+ summary: Redshift bulk data connector
190
+ test_files: []