redshift-connector 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +10 -0
  3. data/lib/redshift-connector.rb +31 -0
  4. data/lib/redshift-connector/connector.rb +146 -0
  5. data/lib/redshift-connector/exporter.rb +116 -0
  6. data/lib/redshift-connector/importer.rb +89 -0
  7. data/lib/redshift-connector/importer/activerecord-import.rb +2 -0
  8. data/lib/redshift-connector/importer/insert_delta.rb +32 -0
  9. data/lib/redshift-connector/importer/rebuild_rename.rb +41 -0
  10. data/lib/redshift-connector/importer/rebuild_truncate.rb +31 -0
  11. data/lib/redshift-connector/importer/upsert.rb +25 -0
  12. data/lib/redshift-connector/logger.rb +20 -0
  13. data/lib/redshift-connector/query.rb +93 -0
  14. data/lib/redshift-connector/reader.rb +18 -0
  15. data/lib/redshift-connector/reader/abstract.rb +18 -0
  16. data/lib/redshift-connector/reader/csv.rb +24 -0
  17. data/lib/redshift-connector/reader/exception.rb +3 -0
  18. data/lib/redshift-connector/reader/redshift_csv.rb +54 -0
  19. data/lib/redshift-connector/reader/tsv.rb +24 -0
  20. data/lib/redshift-connector/s3_bucket.rb +72 -0
  21. data/lib/redshift-connector/s3_data_file.rb +34 -0
  22. data/lib/redshift-connector/s3_data_file_bundle.rb +101 -0
  23. data/lib/redshift-connector/version.rb +3 -0
  24. data/test/all.rb +3 -0
  25. data/test/config.rb +13 -0
  26. data/test/config.rb.example +18 -0
  27. data/test/database.yml +15 -0
  28. data/test/database.yml.example +15 -0
  29. data/test/foreach.rb +5 -0
  30. data/test/helper.rb +25 -0
  31. data/test/item_pvs.ct.mysql +11 -0
  32. data/test/item_pvs.ct.redshift +9 -0
  33. data/test/reader/test_redshift_csv.rb +30 -0
  34. data/test/test_connector.rb +148 -0
  35. data/test/test_reader.rb +10 -0
  36. data/test/test_s3_import.rb +32 -0
  37. metadata +190 -0
@@ -0,0 +1,3 @@
1
+ module RedshiftConnector
2
+ VERSION = '4.3.0'
3
+ end
data/test/all.rb ADDED
@@ -0,0 +1,3 @@
1
+ Dir.glob("#{__dir__}/**/test_*.rb").each do |path|
2
+ load path
3
+ end
data/test/config.rb ADDED
@@ -0,0 +1,13 @@
1
+ $TEST_SCHEMA = 'aamine'
2
+
3
+ module RedshiftConnector
4
+ Exporter.default_data_source = Redshift
5
+
6
+ S3Bucket.add(
7
+ 'redshift-copy-buffer',
8
+ bucket: 'redshift-copy-buffer',
9
+ prefix: 'development',
10
+ iam_role: 'arn:aws:iam::789035092620:role/RedshiftDevelopers',
11
+ default: true
12
+ )
13
+ end
@@ -0,0 +1,18 @@
1
+ module RedshiftConnector
2
+ # For test only
3
+ $TEST_SCHEMA = 'test'
4
+
5
+ Exporter.default_data_source = Redshift
6
+
7
+ S3Bucket.add(
8
+ 'ENTRY_NAME',
9
+ bucket: 'YOUR_BUCKET_NAME',
10
+ prefix: 'development',
11
+ # When using IAM role
12
+ iam_role: 'arn:aws:iam::NNNNNNNNNNNN:role/RRRRRRRRR',
13
+ # When using explicit access key
14
+ #access_key_id: 'AAAAAAAAAA',
15
+ #secret_access_key: 'SSSSSSSSSS',
16
+ default: true
17
+ )
18
+ end
data/test/database.yml ADDED
@@ -0,0 +1,15 @@
1
+ mysql:
2
+ adapter: mysql2
3
+ host: localhost
4
+ username: minero-aoki
5
+ database: test
6
+ encoding: utf8
7
+
8
+ redshift:
9
+ adapter: redshift
10
+ host: dwh.ckpd.co
11
+ port: 5439
12
+ database: production
13
+ username: aamine
14
+ password: "3edCVfr$"
15
+ encoding: utf8
@@ -0,0 +1,15 @@
1
+ mysql:
2
+ adapter: mysql2
3
+ host: localhost
4
+ username: USER_NAME
5
+ database: test
6
+ encoding: utf8
7
+
8
+ redshift:
9
+ adapter: redshift
10
+ host: HOST_NAME
11
+ port: 5439
12
+ database: DATABASE_NAME
13
+ username: USER_NAME
14
+ password: PASSWORD
15
+ encoding: utf8
data/test/foreach.rb ADDED
@@ -0,0 +1,5 @@
1
+ require_relative 'helper'
2
+
3
+ RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select * from tabemiru.items where id < 10') do |row|
4
+ p row
5
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'active_record'
2
+ require 'yaml'
3
+ require 'logger'
4
+
5
+ YAML.load_file("#{__dir__}/database.yml").each do |name, ent|
6
+ ActiveRecord::Base.configurations[name] = ent
7
+ end
8
+
9
+ class BaseConn < ActiveRecord::Base
10
+ establish_connection :mysql
11
+ self.abstract_class = true
12
+ end
13
+ class ItemPv < BaseConn
14
+ connection
15
+ end
16
+ class Redshift < ActiveRecord::Base
17
+ establish_connection :redshift
18
+ end
19
+
20
+ require 'redshift-connector'
21
+ # This IS REQUIRED to ensure to load mysql2 driver, at least outside of Rails.
22
+ ActiveRecord::Import.require_adapter 'mysql2'
23
+ require_relative 'config'
24
+
25
+ RedshiftConnector.logger = Logger.new($stderr)
@@ -0,0 +1,11 @@
1
+ create table item_pvs
2
+ ( id bigint(20) primary key
3
+ , data_date date not null
4
+ , item_id int(11) not null
5
+ , pv bigint(20) not null default 0
6
+ , uu bigint(20) not null default 0
7
+ )
8
+ ;
9
+
10
+ create unique index item_pvs_idx on item_pvs (id, data_date, item_id) using btree
11
+ ;
@@ -0,0 +1,9 @@
1
+ create table item_pvs
2
+ ( id bigint encode lzo
3
+ , data_date date encode lzo
4
+ , item_id integer encode lzo
5
+ , pv bigint encode lzo
6
+ , uu bigint encode lzo
7
+ )
8
+ sortkey (data_date, item_id)
9
+ ;
@@ -0,0 +1,30 @@
1
+ require 'test/unit'
2
+ require 'redshift-connector/reader'
3
+
4
+ module RedshiftConnector
5
+ module Reader
6
+ class TestRedshiftCSV < Test::Unit::TestCase
7
+ def parse_row(line)
8
+ r = RedshiftCSV.new(nil)
9
+ r.parse_row(line, 1)
10
+ end
11
+
12
+ def test_parse_row
13
+ assert_equal ['xxx', 'yyyy', 'zzz'],
14
+ parse_row(%Q("xxx","yyyy","zzz"\n))
15
+
16
+ assert_equal ['xxx', 'yyyy', 'zzz'],
17
+ parse_row(%Q( "xxx" , "yyyy","zzz"\t\n))
18
+
19
+ assert_equal ['x,x', "y\r\ny", 'z"z', 'a\\a'],
20
+ parse_row(%Q("x\\,x","y\\r\\ny","z\\"z","a\\\\a"\n))
21
+
22
+ assert_equal ['981179', '2017-01-07', '6', 'show', '99', '3'],
23
+ parse_row(%Q("981179","2017-01-07","6","show","99","3"\r\n))
24
+
25
+ assert_equal ['981179', '2017-01-07', '6', '852', 'show', '{"page"=>"4"}', '1', '1'],
26
+ parse_row(%Q("981179","2017-01-07","6","852","show","{\\"page\\"=>\\"4\\"}","1","1"\n))
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,148 @@
1
+ require_relative 'helper'
2
+ require 'test/unit'
3
+
4
+ class TestConnector < Test::Unit::TestCase
5
+ def test_connector_upsert
6
+ data_date = '2016-11-03'
7
+ job = RedshiftConnector.transport_delta(
8
+ schema: $TEST_SCHEMA,
9
+ table: 'item_pvs',
10
+
11
+ txn_id: data_date,
12
+ condition: %Q(data_date = date '#{data_date}'),
13
+
14
+ columns: %w[id data_date item_id pv uu],
15
+ upsert_columns: %w[pv uu],
16
+ filter: -> (id, data_date, item_id, pv, uu) {
17
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
18
+ }
19
+ )
20
+ job.execute
21
+ end
22
+
23
+ def test_connector_delete_insert
24
+ data_date = '2016-11-03'
25
+ job = RedshiftConnector.transport_delta(
26
+ schema: $TEST_SCHEMA,
27
+ table: 'item_pvs',
28
+
29
+ txn_id: data_date,
30
+ condition: %Q(data_date = date '#{data_date}'),
31
+ delete_cond: %Q(data_date = date '#{data_date}'),
32
+
33
+ columns: %w[id data_date item_id pv uu],
34
+ filter: -> (id, data_date, item_id, pv, uu) {
35
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
36
+ }
37
+ )
38
+ job.execute
39
+ end
40
+
41
+ def test_dup_options
42
+ data_date = '2016-11-03'
43
+ assert_raise(ArgumentError) {
44
+ RedshiftConnector.transport_delta(
45
+ schema: $TEST_SCHEMA,
46
+ table: 'item_pvs',
47
+
48
+ txn_id: data_date,
49
+ condition: %Q(data_date = date '#{data_date}'),
50
+ delete_cond: %Q(data_date = date '#{data_date}'),
51
+
52
+ # Conflicts with delete_cond option
53
+ upsert_columns: %w[pv uu],
54
+
55
+ columns: %w[id data_date item_id pv uu],
56
+ filter: -> (id, data_date, item_id, pv, uu) {
57
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
58
+ }
59
+ )
60
+ }
61
+ end
62
+
63
+ def test_no_required_option
64
+ data_date = '2016-11-03'
65
+ assert_raise(ArgumentError) {
66
+ RedshiftConnector.transport_delta(
67
+ schema: $TEST_SCHEMA,
68
+ table: 'item_pvs',
69
+
70
+ txn_id: data_date,
71
+ condition: %Q(data_date = date '#{data_date}'),
72
+
73
+ columns: %w[id data_date item_id pv uu],
74
+ filter: -> (id, data_date, item_id, pv, uu) {
75
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
76
+ }
77
+ )
78
+ }
79
+ end
80
+
81
+ def test_connector_rebuild_truncate
82
+ data_date = '2016-11-03'
83
+ job = RedshiftConnector.transport_all(
84
+ strategy: 'truncate',
85
+ schema: $TEST_SCHEMA,
86
+ table: 'item_pvs',
87
+ txn_id: data_date,
88
+ columns: %w[id data_date item_id pv uu],
89
+ filter: -> (id, data_date, item_id, pv, uu) {
90
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
91
+ }
92
+ )
93
+ job.execute
94
+ end
95
+
96
+ def test_connector_rebuild_rename
97
+ data_date = '2016-11-03'
98
+ job = RedshiftConnector.transport_all(
99
+ strategy: 'rename',
100
+ schema: $TEST_SCHEMA,
101
+ table: 'item_pvs',
102
+ txn_id: data_date,
103
+ columns: %w[id data_date item_id pv uu],
104
+ filter: -> (id, data_date, item_id, pv, uu) {
105
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
106
+ }
107
+ )
108
+ job.execute
109
+ end
110
+
111
+ def test_connector_src_dest_table
112
+ data_date = '2016-11-03'
113
+ job = RedshiftConnector.transport_delta(
114
+ schema: $TEST_SCHEMA,
115
+ src_table: 'item_pvs',
116
+ dest_table: 'item_pvs',
117
+
118
+ txn_id: data_date,
119
+ condition: %Q(data_date = date '#{data_date}'),
120
+
121
+ columns: %w[id data_date item_id pv uu],
122
+ upsert_columns: %w[pv uu],
123
+ filter: -> (id, data_date, item_id, pv, uu) {
124
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
125
+ }
126
+ )
127
+ job.execute
128
+ end
129
+
130
+ def test_connector_missing_src_dest
131
+ data_date = '2016-11-03'
132
+ assert_raise(ArgumentError) {
133
+ RedshiftConnector.transport_delta(
134
+ schema: $TEST_SCHEMA,
135
+ src_table: 'item_pvs',
136
+
137
+ txn_id: data_date,
138
+ condition: %Q(data_date = date '#{data_date}'),
139
+
140
+ columns: %w[id data_date item_id pv uu],
141
+ upsert_columns: %w[pv uu],
142
+ filter: -> (id, data_date, item_id, pv, uu) {
143
+ [id.to_i, data_date, item_id.to_i, pv.to_i, uu.to_i]
144
+ }
145
+ )
146
+ }
147
+ end
148
+ end
@@ -0,0 +1,10 @@
1
+ require 'test/unit'
2
+ require 'redshift-connector/reader'
3
+
4
+ module RedshiftConnector
5
+ class TestReader < Test::Unit::TestCase
6
+ def test_get
7
+ assert_equal Reader::RedshiftCSV, Reader.get(:redshift_csv)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ require_relative 'helper'
2
+ require 'test/unit'
3
+ require 'redshift-connector'
4
+
5
+ class TestS3Import < Test::Unit::TestCase
6
+ def test_import_delta_tsv
7
+ data_date = '2016-11-03'
8
+ job = RedshiftConnector.transport_delta_from_s3(
9
+ prefix: "#{$TEST_SCHEMA}_export/item_pvs_tsv/#{data_date}/item_pvs.tsv.",
10
+ format: :tsv,
11
+
12
+ table: 'item_pvs',
13
+ columns: %w[id data_date item_id pv uu],
14
+ upsert_columns: %w[pv uu]
15
+ )
16
+ job.execute
17
+ end
18
+
19
+ def test_import_all
20
+ data_date = '2016-11-03'
21
+ job = RedshiftConnector.transport_all_from_s3(
22
+ strategy: 'truncate',
23
+
24
+ prefix: "#{$TEST_SCHEMA}_export/item_pvs_tsv/#{data_date}/item_pvs.tsv.",
25
+ format: :tsv,
26
+
27
+ table: 'item_pvs',
28
+ columns: %w[id data_date item_id pv uu]
29
+ )
30
+ job.execute
31
+ end
32
+ end
metadata ADDED
@@ -0,0 +1,190 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redshift-connector
3
+ version: !ruby/object:Gem::Version
4
+ version: 4.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Minero Aoki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "<"
18
+ - !ruby/object:Gem::Version
19
+ version: '5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "<"
25
+ - !ruby/object:Gem::Version
26
+ version: '5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord4-redshift-adapter
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.18.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.18.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: activerecord-import
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: aws-sdk
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: test-unit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rake
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: redshift-connector is a bulk data connector for Rails (ActiveRecord).
126
+ email: aamine@loveruby.net
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - README.md
132
+ - lib/redshift-connector.rb
133
+ - lib/redshift-connector/connector.rb
134
+ - lib/redshift-connector/exporter.rb
135
+ - lib/redshift-connector/importer.rb
136
+ - lib/redshift-connector/importer/activerecord-import.rb
137
+ - lib/redshift-connector/importer/insert_delta.rb
138
+ - lib/redshift-connector/importer/rebuild_rename.rb
139
+ - lib/redshift-connector/importer/rebuild_truncate.rb
140
+ - lib/redshift-connector/importer/upsert.rb
141
+ - lib/redshift-connector/logger.rb
142
+ - lib/redshift-connector/query.rb
143
+ - lib/redshift-connector/reader.rb
144
+ - lib/redshift-connector/reader/abstract.rb
145
+ - lib/redshift-connector/reader/csv.rb
146
+ - lib/redshift-connector/reader/exception.rb
147
+ - lib/redshift-connector/reader/redshift_csv.rb
148
+ - lib/redshift-connector/reader/tsv.rb
149
+ - lib/redshift-connector/s3_bucket.rb
150
+ - lib/redshift-connector/s3_data_file.rb
151
+ - lib/redshift-connector/s3_data_file_bundle.rb
152
+ - lib/redshift-connector/version.rb
153
+ - test/all.rb
154
+ - test/config.rb
155
+ - test/config.rb.example
156
+ - test/database.yml
157
+ - test/database.yml.example
158
+ - test/foreach.rb
159
+ - test/helper.rb
160
+ - test/item_pvs.ct.mysql
161
+ - test/item_pvs.ct.redshift
162
+ - test/reader/test_redshift_csv.rb
163
+ - test/test_connector.rb
164
+ - test/test_reader.rb
165
+ - test/test_s3_import.rb
166
+ homepage: https://github.com/aamine/redshift-connector
167
+ licenses:
168
+ - MIT
169
+ metadata: {}
170
+ post_install_message:
171
+ rdoc_options: []
172
+ require_paths:
173
+ - lib
174
+ required_ruby_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: 2.1.0
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ requirements: []
185
+ rubyforge_project:
186
+ rubygems_version: 2.6.8
187
+ signing_key:
188
+ specification_version: 4
189
+ summary: Redshift bulk data connector
190
+ test_files: []