redshift-connector 4.5.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -29
- data/lib/redshift-connector/connector.rb +11 -3
- data/lib/redshift-connector/exporter.rb +15 -2
- data/lib/redshift-connector/query.rb +3 -5
- data/lib/redshift-connector/reader.rb +18 -0
- data/lib/redshift-connector/reader/abstract.rb +18 -0
- data/lib/redshift-connector/reader/csv.rb +24 -0
- data/lib/redshift-connector/reader/exception.rb +3 -0
- data/lib/redshift-connector/reader/redshift_csv.rb +54 -0
- data/lib/redshift-connector/reader/tsv.rb +24 -0
- data/lib/redshift-connector/s3_bucket.rb +2 -6
- data/lib/redshift-connector/s3_data_file.rb +19 -5
- data/lib/redshift-connector/s3_data_file_bundle.rb +19 -3
- data/lib/redshift-connector/version.rb +1 -1
- data/test/foreach.rb +1 -1
- data/test/reader/test_redshift_csv.rb +1 -1
- data/test/test_reader.rb +1 -0
- metadata +13 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3d56a36e8dba44e03271c37ecedd89871e5f0d2
|
4
|
+
data.tar.gz: 3237a2be89f2b86c524d99ce0064773a3807b41e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0de405e11f29b3efad2e9161436396cd436396ceb525c284d505a42abb3893da882b6d3c1767b88589736776bc26cc8442be350ed77e40ca26e5ed73a1da096c
|
7
|
+
data.tar.gz: 6be60fc0ac03acb59f990dfb605abba2f49711ec32441d11d366ffa322b54835b9625deb3224429ff88f7528f2433a777028283f49fc81132d04f6a79d417cab
|
data/README.md
CHANGED
@@ -8,32 +8,3 @@ Add following block to your Gemfile and bundle.
|
|
8
8
|
```
|
9
9
|
gem 'redshift-connector'
|
10
10
|
```
|
11
|
-
Add config/initializers/redshift-connector.rb like following:
|
12
|
-
```
|
13
|
-
module RedshiftConnector
|
14
|
-
Exporter.default_data_source = Any_ActiveRecord_Class_Bound_To_Redshift
|
15
|
-
|
16
|
-
S3Bucket.add('primary', default: true,
|
17
|
-
region: 'YOUR_AWS_REGION_NAME',
|
18
|
-
bucket: 'YOUR_BUCKET_NAME',
|
19
|
-
prefix: 'YOUR_PREFIX',
|
20
|
-
iam_role: 'arn:aws:iam::XXXXXXXXXXXX:role/RedshiftReadOnly'
|
21
|
-
# For explicit S3 access, use following:
|
22
|
-
# aws_access_key_id: 'XXXXXXXXXXXXX',
|
23
|
-
# aws_secret_access_key: 'XXXXXXXXXXXXX'
|
24
|
-
)
|
25
|
-
end
|
26
|
-
```
|
27
|
-
|
28
|
-
## Usage
|
29
|
-
|
30
|
-
### Fetching rows
|
31
|
-
|
32
|
-
```
|
33
|
-
RedshiftConnector.foreach(schema: 'app_mst', table: 'shops', query: 'select id, name from app_mst.shops') do |id, name|
|
34
|
-
p [id, name]
|
35
|
-
end
|
36
|
-
```
|
37
|
-
`schema` and `table` is the source table name (written in the query).
|
38
|
-
This method executes Redshift UNLOAD statement with given query and
|
39
|
-
unload result to the intermediate S3, then read contents.
|
@@ -116,18 +116,26 @@ module RedshiftConnector
|
|
116
116
|
not ENV['IMPORT_ONLY']
|
117
117
|
end
|
118
118
|
|
119
|
+
def export_forced?
|
120
|
+
!! (ENV['EXPORT_ONLY'] or ENV['FORCE'])
|
121
|
+
end
|
122
|
+
|
119
123
|
def import_enabled?
|
120
124
|
not ENV['EXPORT_ONLY']
|
121
125
|
end
|
122
126
|
|
123
127
|
def execute
|
124
|
-
export if export_enabled?
|
128
|
+
export(forced: export_forced?) if export_enabled?
|
125
129
|
import if import_enabled?
|
126
130
|
end
|
127
131
|
|
128
|
-
def export
|
132
|
+
def export(forced: false)
|
129
133
|
@logger.info "==== export task =================================================="
|
130
|
-
@exporter.
|
134
|
+
if not forced and @exporter.completed?
|
135
|
+
@logger.info "export task is already executed; skip"
|
136
|
+
else
|
137
|
+
@exporter.execute
|
138
|
+
end
|
131
139
|
end
|
132
140
|
|
133
141
|
def import
|
@@ -41,7 +41,6 @@ module RedshiftConnector
|
|
41
41
|
query:,
|
42
42
|
txn_id: "#{Time.now.strftime('%Y%m%d_%H%M%S')}_#{$$}",
|
43
43
|
filter: nil,
|
44
|
-
enable_sort: false,
|
45
44
|
logger: RedshiftConnector.logger,
|
46
45
|
quiet: false
|
47
46
|
)
|
@@ -57,7 +56,7 @@ module RedshiftConnector
|
|
57
56
|
)
|
58
57
|
exporter = Exporter.new(
|
59
58
|
ds: ds,
|
60
|
-
query: UnloadQuery.wrap(query: query, bundle: bundle
|
59
|
+
query: UnloadQuery.wrap(query: query, bundle: bundle),
|
61
60
|
bundle: bundle,
|
62
61
|
logger: logger
|
63
62
|
)
|
@@ -75,6 +74,19 @@ module RedshiftConnector
|
|
75
74
|
attr_reader :bundle
|
76
75
|
attr_reader :logger
|
77
76
|
|
77
|
+
def completed?
|
78
|
+
@bundle.bucket.object(flag_object_key).exists?
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_flag_object
|
82
|
+
@logger.info "TOUCH #{flag_object_key}"
|
83
|
+
@bundle.bucket.object(flag_object_key).put(body: "OK")
|
84
|
+
end
|
85
|
+
|
86
|
+
def flag_object_key
|
87
|
+
"#{File.dirname(@bundle.prefix)}/00completed"
|
88
|
+
end
|
89
|
+
|
78
90
|
def execute
|
79
91
|
@bundle.clear
|
80
92
|
@logger.info "EXPORT #{@query.description} -> #{@bundle.url}*"
|
@@ -83,6 +95,7 @@ module RedshiftConnector
|
|
83
95
|
@logger.info "[SQL/Redshift] #{batch_job_label}#{stmt.strip}"
|
84
96
|
conn.execute(batch_job_label + stmt)
|
85
97
|
end
|
98
|
+
create_flag_object
|
86
99
|
end
|
87
100
|
|
88
101
|
def batch_job_label
|
@@ -44,14 +44,13 @@ module RedshiftConnector
|
|
44
44
|
end
|
45
45
|
|
46
46
|
class UnloadQuery
|
47
|
-
def UnloadQuery.wrap(query:, bundle
|
48
|
-
new(query: ArbitraryQuery.new(query), bundle: bundle
|
47
|
+
def UnloadQuery.wrap(query:, bundle:)
|
48
|
+
new(query: ArbitraryQuery.new(query), bundle: bundle)
|
49
49
|
end
|
50
50
|
|
51
|
-
def initialize(query:, bundle
|
51
|
+
def initialize(query:, bundle:)
|
52
52
|
@query = query
|
53
53
|
@bundle = bundle
|
54
|
-
@enable_sort = enable_sort
|
55
54
|
end
|
56
55
|
|
57
56
|
def table_spec
|
@@ -69,7 +68,6 @@ module RedshiftConnector
|
|
69
68
|
credentials '#{@bundle.credential_string}'
|
70
69
|
gzip
|
71
70
|
allowoverwrite
|
72
|
-
parallel #{@enable_sort ? 'off' : 'on'}
|
73
71
|
delimiter ',' escape addquotes
|
74
72
|
EndSQL
|
75
73
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# create module
|
2
|
+
module RedshiftConnector
|
3
|
+
module Reader
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'redshift-connector/reader/redshift_csv'
|
8
|
+
require 'redshift-connector/reader/csv'
|
9
|
+
require 'redshift-connector/reader/tsv'
|
10
|
+
require 'redshift-connector/reader/exception'
|
11
|
+
|
12
|
+
module RedshiftConnector
|
13
|
+
module Reader
|
14
|
+
def Reader.get(id)
|
15
|
+
Abstract.get_reader_class(id)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedshiftConnector
|
2
|
+
class Reader::Abstract
|
3
|
+
READER_CLASSES = {} # {Symbol => Class}
|
4
|
+
|
5
|
+
def self.declare_reader(id)
|
6
|
+
READER_CLASSES[id.to_sym] = self
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.get_reader_class(id)
|
10
|
+
READER_CLASSES[id.to_sym] or
|
11
|
+
raise ArgumentError, "unknown data file reader type: #{id.inspect}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.get_reader_class(id)
|
16
|
+
Reader::Abstract.get_reader_class(id)
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'redshift-connector/reader/abstract'
|
2
|
+
require 'redshift-connector/reader/exception'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
module RedshiftConnector
|
6
|
+
# Parses (standard) CSV files.
|
7
|
+
# For UNLOAD-generated CSV, use RedshiftCSV class.
|
8
|
+
class Reader::CSV < Reader::Abstract
|
9
|
+
declare_reader :csv
|
10
|
+
|
11
|
+
def self.data_object?(obj)
|
12
|
+
/\.csv(?:\.|\z)/ =~ File.basename(obj.key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(f)
|
16
|
+
@f = f
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
csv = CSV.new(@f)
|
21
|
+
csv.each(&block)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'redshift-connector/reader/abstract'
|
2
|
+
require 'redshift-connector/reader/exception'
|
3
|
+
|
4
|
+
module RedshiftConnector
|
5
|
+
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
6
|
+
# UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
|
7
|
+
class Reader::RedshiftCSV < Reader::Abstract
|
8
|
+
declare_reader :redshift_csv
|
9
|
+
|
10
|
+
def self.data_object?(obj)
|
11
|
+
/\.csv(?:\.|\z)/ =~ File.basename(obj.key)
|
12
|
+
end
|
13
|
+
|
14
|
+
# f :: IO
|
15
|
+
def initialize(f)
|
16
|
+
@f = f
|
17
|
+
end
|
18
|
+
|
19
|
+
def each
|
20
|
+
# We can use simple #each_line to read single row
|
21
|
+
# because line terminators are always escaped by UNLOAD.
|
22
|
+
@f.each_line do |line|
|
23
|
+
yield parse_row(line, @f.lineno)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_row(line, lineno = nil)
|
28
|
+
row = []
|
29
|
+
s = StringScanner.new(line)
|
30
|
+
s.skip(/\s+/)
|
31
|
+
until s.eos?
|
32
|
+
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
|
+
row.push unescape_column(col)
|
34
|
+
s.skip(/\s*/) # skip line terminator on line ends
|
35
|
+
s.skip(/,\s*/)
|
36
|
+
end
|
37
|
+
row
|
38
|
+
end
|
39
|
+
|
40
|
+
UNESCAPE_MAP = {
|
41
|
+
'\\"' => '"',
|
42
|
+
"\\'" => "'",
|
43
|
+
'\\,' => ',',
|
44
|
+
'\\r' => "\r",
|
45
|
+
'\\n' => "\n",
|
46
|
+
'\\\\' => '\\'
|
47
|
+
}
|
48
|
+
|
49
|
+
def unescape_column(col)
|
50
|
+
charmap = UNESCAPE_MAP
|
51
|
+
col[1...-1].gsub(/\\./) {|s| charmap[s] }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'redshift-connector/reader/abstract'
|
2
|
+
require 'redshift-connector/reader/exception'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
module RedshiftConnector
|
6
|
+
# Parses TSV (Tab Separated Format) files.
|
7
|
+
class Reader::TSV < Reader::Abstract
|
8
|
+
declare_reader :tsv
|
9
|
+
|
10
|
+
def self.data_object?(obj)
|
11
|
+
/\.tsv(?:\.|\z)/ =~ File.basename(obj.key)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(f)
|
15
|
+
@f = f
|
16
|
+
end
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
@f.each_line do |line|
|
20
|
+
yield line.chomp.split("\t", -1)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -21,8 +21,7 @@ module RedshiftConnector
|
|
21
21
|
@buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
|
22
22
|
end
|
23
23
|
|
24
|
-
def initialize(
|
25
|
-
@region = region
|
24
|
+
def initialize(bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
|
26
25
|
@name = bucket
|
27
26
|
@prefix = prefix
|
28
27
|
@access_key_id = access_key_id
|
@@ -38,10 +37,7 @@ module RedshiftConnector
|
|
38
37
|
end
|
39
38
|
|
40
39
|
def client
|
41
|
-
@client ||=
|
42
|
-
args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
|
43
|
-
Aws::S3::Client.new(**args)
|
44
|
-
end
|
40
|
+
@client ||= Aws::S3::Client.new(access_key_id: @access_key_id, secret_access_key: @secret_access_key)
|
45
41
|
end
|
46
42
|
|
47
43
|
def bucket
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require '
|
1
|
+
require 'zlib'
|
2
2
|
|
3
3
|
module RedshiftConnector
|
4
|
-
class S3DataFile
|
4
|
+
class S3DataFile
|
5
5
|
def initialize(object, reader_class:)
|
6
6
|
@object = object
|
7
7
|
@reader_class = reader_class
|
@@ -11,10 +11,24 @@ module RedshiftConnector
|
|
11
11
|
@object.key
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
15
|
-
@object.get
|
14
|
+
def each_row(&block)
|
15
|
+
response = @object.get
|
16
|
+
f = if gzipped_object?
|
17
|
+
Zlib::GzipReader.new(response.body)
|
18
|
+
else
|
19
|
+
response.body
|
20
|
+
end
|
21
|
+
@reader_class.new(f).each(&block)
|
22
|
+
ensure
|
23
|
+
response.body.close if response
|
16
24
|
end
|
17
25
|
|
18
|
-
|
26
|
+
def data_object?
|
27
|
+
@reader_class.data_object?(@object)
|
28
|
+
end
|
29
|
+
|
30
|
+
def gzipped_object?
|
31
|
+
File.extname(@object.key) == '.gz'
|
32
|
+
end
|
19
33
|
end
|
20
34
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
require 'redshift-connector/s3_bucket'
|
2
2
|
require 'redshift-connector/s3_data_file'
|
3
|
+
require 'redshift-connector/reader'
|
3
4
|
require 'redshift-connector/logger'
|
4
|
-
require 'redshift-connector/data_file'
|
5
5
|
require 'aws-sdk'
|
6
6
|
|
7
7
|
module RedshiftConnector
|
8
|
-
class S3DataFileBundle
|
8
|
+
class S3DataFileBundle
|
9
9
|
def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
|
10
10
|
real_prefix = "#{bucket.prefix}/#{prefix}"
|
11
11
|
new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
|
@@ -68,9 +68,25 @@ module RedshiftConnector
|
|
68
68
|
end
|
69
69
|
private :do_each_batch
|
70
70
|
|
71
|
-
def
|
71
|
+
def each_row(&block)
|
72
|
+
each_object do |obj|
|
73
|
+
obj.each_row(&block)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
alias each each_row
|
78
|
+
|
79
|
+
def each_object(&block)
|
80
|
+
all_data_objects.each do |obj|
|
81
|
+
@logger.info "processing s3 object: #{obj.key}"
|
82
|
+
yield obj
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def all_data_objects
|
72
87
|
@bucket.objects(prefix: @prefix)
|
73
88
|
.map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
|
89
|
+
.select {|obj| obj.data_object? }
|
74
90
|
end
|
75
91
|
|
76
92
|
def clear
|
data/test/foreach.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative 'helper'
|
2
2
|
|
3
|
-
RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select
|
3
|
+
RedshiftConnector.foreach(schema: 'tabemiru', table: 'items', query: 'select * from tabemiru.items where id < 10') do |row|
|
4
4
|
p row
|
5
5
|
end
|
data/test/test_reader.rb
CHANGED
metadata
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '5'
|
19
|
+
version: '5.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '5'
|
26
|
+
version: '5.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: activerecord5-redshift-adapter
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: redshift-connector-data_file
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 1.0.0
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 1.0.0
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: pg
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,6 +140,12 @@ files:
|
|
154
140
|
- lib/redshift-connector/importer/upsert.rb
|
155
141
|
- lib/redshift-connector/logger.rb
|
156
142
|
- lib/redshift-connector/query.rb
|
143
|
+
- lib/redshift-connector/reader.rb
|
144
|
+
- lib/redshift-connector/reader/abstract.rb
|
145
|
+
- lib/redshift-connector/reader/csv.rb
|
146
|
+
- lib/redshift-connector/reader/exception.rb
|
147
|
+
- lib/redshift-connector/reader/redshift_csv.rb
|
148
|
+
- lib/redshift-connector/reader/tsv.rb
|
157
149
|
- lib/redshift-connector/s3_bucket.rb
|
158
150
|
- lib/redshift-connector/s3_data_file.rb
|
159
151
|
- lib/redshift-connector/s3_data_file_bundle.rb
|