redshift-connector 4.4.1 → 4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift-connector/s3_data_file.rb +1 -1
- data/lib/redshift-connector/s3_data_file_bundle.rb +1 -2
- data/lib/redshift-connector/version.rb +1 -1
- data/test/reader/test_redshift_csv.rb +1 -1
- data/test/test_reader.rb +0 -1
- metadata +16 -12
- data/lib/redshift-connector/abstract_data_file.rb +0 -24
- data/lib/redshift-connector/abstract_data_file_bundle.rb +0 -22
- data/lib/redshift-connector/reader.rb +0 -18
- data/lib/redshift-connector/reader/abstract.rb +0 -18
- data/lib/redshift-connector/reader/csv.rb +0 -24
- data/lib/redshift-connector/reader/exception.rb +0 -3
- data/lib/redshift-connector/reader/redshift_csv.rb +0 -54
- data/lib/redshift-connector/reader/tsv.rb +0 -24
- data/lib/redshift-connector/url_data_file.rb +0 -23
- data/lib/redshift-connector/url_data_file_bundle.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb3953798104bd92d6d856b8fd3b2d835ba9d54a
|
4
|
+
data.tar.gz: e2f51e74ec4969e1187ce35e457a2af357bda1bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cec111e3ca92096a3a0071b7a76a36b3c259213ab03aecb5f75ca1e3b2cc41fd1b4e140384a5a482944d7f02819cfe8da5010ae0bd8c9e9b0619001e09aa3542
|
7
|
+
data.tar.gz: ac5dbcc924a7ffb5ee7387d4cf3f56a78371c10a870d4b3b3215502d19f256daff0bcc970ed2d7d946b6ecd62f8305a4ab55ecd6adbd0e4b1896cbf42b671067
|
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'redshift-connector/s3_bucket'
|
2
2
|
require 'redshift-connector/s3_data_file'
|
3
|
-
require 'redshift-connector/reader'
|
4
3
|
require 'redshift-connector/logger'
|
5
|
-
require 'redshift-connector/
|
4
|
+
require 'redshift-connector/data_file'
|
6
5
|
require 'aws-sdk'
|
7
6
|
|
8
7
|
module RedshiftConnector
|
data/test/test_reader.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: redshift-connector-data_file
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: pg
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,8 +144,6 @@ extra_rdoc_files: []
|
|
130
144
|
files:
|
131
145
|
- README.md
|
132
146
|
- lib/redshift-connector.rb
|
133
|
-
- lib/redshift-connector/abstract_data_file.rb
|
134
|
-
- lib/redshift-connector/abstract_data_file_bundle.rb
|
135
147
|
- lib/redshift-connector/connector.rb
|
136
148
|
- lib/redshift-connector/exporter.rb
|
137
149
|
- lib/redshift-connector/importer.rb
|
@@ -142,17 +154,9 @@ files:
|
|
142
154
|
- lib/redshift-connector/importer/upsert.rb
|
143
155
|
- lib/redshift-connector/logger.rb
|
144
156
|
- lib/redshift-connector/query.rb
|
145
|
-
- lib/redshift-connector/reader.rb
|
146
|
-
- lib/redshift-connector/reader/abstract.rb
|
147
|
-
- lib/redshift-connector/reader/csv.rb
|
148
|
-
- lib/redshift-connector/reader/exception.rb
|
149
|
-
- lib/redshift-connector/reader/redshift_csv.rb
|
150
|
-
- lib/redshift-connector/reader/tsv.rb
|
151
157
|
- lib/redshift-connector/s3_bucket.rb
|
152
158
|
- lib/redshift-connector/s3_data_file.rb
|
153
159
|
- lib/redshift-connector/s3_data_file_bundle.rb
|
154
|
-
- lib/redshift-connector/url_data_file.rb
|
155
|
-
- lib/redshift-connector/url_data_file_bundle.rb
|
156
160
|
- lib/redshift-connector/version.rb
|
157
161
|
- test/all.rb
|
158
162
|
- test/config.rb
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'zlib'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class AbstractDataFile
|
5
|
-
def each_row(&block)
|
6
|
-
f = if gzipped_object?
|
7
|
-
Zlib::GzipReader.new(content)
|
8
|
-
else
|
9
|
-
content
|
10
|
-
end
|
11
|
-
@reader_class.new(f).each(&block)
|
12
|
-
ensure
|
13
|
-
content.close
|
14
|
-
end
|
15
|
-
|
16
|
-
def data_object?
|
17
|
-
@reader_class.data_object?(key)
|
18
|
-
end
|
19
|
-
|
20
|
-
def gzipped_object?
|
21
|
-
File.extname(key) == '.gz'
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
class AbstractDataFileBundle
|
3
|
-
def each_row(&block)
|
4
|
-
each_object do |obj|
|
5
|
-
obj.each_row(&block)
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
alias each each_row
|
10
|
-
|
11
|
-
def each_object(&block)
|
12
|
-
all_data_objects.each do |obj|
|
13
|
-
@logger.info "processing s3 object: #{obj.key}"
|
14
|
-
yield obj
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def all_data_objects
|
19
|
-
data_files.select {|obj| obj.data_object? }
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Reader
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/reader/redshift_csv'
|
8
|
-
require 'redshift-connector/reader/csv'
|
9
|
-
require 'redshift-connector/reader/tsv'
|
10
|
-
require 'redshift-connector/reader/exception'
|
11
|
-
|
12
|
-
module RedshiftConnector
|
13
|
-
module Reader
|
14
|
-
def Reader.get(id)
|
15
|
-
Abstract.get_reader_class(id)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
class Reader::Abstract
|
3
|
-
READER_CLASSES = {} # {Symbol => Class}
|
4
|
-
|
5
|
-
def self.declare_reader(id)
|
6
|
-
READER_CLASSES[id.to_sym] = self
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.get_reader_class(id)
|
10
|
-
READER_CLASSES[id.to_sym] or
|
11
|
-
raise ArgumentError, "unknown data file reader type: #{id.inspect}"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.get_reader_class(id)
|
16
|
-
Reader::Abstract.get_reader_class(id)
|
17
|
-
end
|
18
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
require 'csv'
|
4
|
-
|
5
|
-
module RedshiftConnector
|
6
|
-
# Parses (standard) CSV files.
|
7
|
-
# For UNLOAD-generated CSV, use RedshiftCSV class.
|
8
|
-
class Reader::CSV < Reader::Abstract
|
9
|
-
declare_reader :csv
|
10
|
-
|
11
|
-
def self.data_object?(key)
|
12
|
-
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize(f)
|
16
|
-
@f = f
|
17
|
-
end
|
18
|
-
|
19
|
-
def each(&block)
|
20
|
-
csv = CSV.new(@f)
|
21
|
-
csv.each(&block)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
6
|
-
# UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
|
7
|
-
class Reader::RedshiftCSV < Reader::Abstract
|
8
|
-
declare_reader :redshift_csv
|
9
|
-
|
10
|
-
def self.data_object?(key)
|
11
|
-
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
12
|
-
end
|
13
|
-
|
14
|
-
# f :: IO
|
15
|
-
def initialize(f)
|
16
|
-
@f = f
|
17
|
-
end
|
18
|
-
|
19
|
-
def each
|
20
|
-
# We can use simple #each_line to read single row
|
21
|
-
# because line terminators are always escaped by UNLOAD.
|
22
|
-
@f.each_line do |line|
|
23
|
-
yield parse_row(line, @f.lineno)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def parse_row(line, lineno = nil)
|
28
|
-
row = []
|
29
|
-
s = StringScanner.new(line)
|
30
|
-
s.skip(/\s+/)
|
31
|
-
until s.eos?
|
32
|
-
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
|
-
row.push unescape_column(col)
|
34
|
-
s.skip(/\s*/) # skip line terminator on line ends
|
35
|
-
s.skip(/,\s*/)
|
36
|
-
end
|
37
|
-
row
|
38
|
-
end
|
39
|
-
|
40
|
-
UNESCAPE_MAP = {
|
41
|
-
'\\"' => '"',
|
42
|
-
"\\'" => "'",
|
43
|
-
'\\,' => ',',
|
44
|
-
'\\r' => "\r",
|
45
|
-
'\\n' => "\n",
|
46
|
-
'\\\\' => '\\'
|
47
|
-
}
|
48
|
-
|
49
|
-
def unescape_column(col)
|
50
|
-
charmap = UNESCAPE_MAP
|
51
|
-
col[1...-1].gsub(/\\./) {|s| charmap[s] }
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
require 'csv'
|
4
|
-
|
5
|
-
module RedshiftConnector
|
6
|
-
# Parses TSV (Tab Separated Format) files.
|
7
|
-
class Reader::TSV < Reader::Abstract
|
8
|
-
declare_reader :tsv
|
9
|
-
|
10
|
-
def self.data_object?(key)
|
11
|
-
/\.tsv(?:\.|\z)/ =~ File.basename(key)
|
12
|
-
end
|
13
|
-
|
14
|
-
def initialize(f)
|
15
|
-
@f = f
|
16
|
-
end
|
17
|
-
|
18
|
-
def each(&block)
|
19
|
-
@f.each_line do |line|
|
20
|
-
yield line.chomp.split("\t", -1)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'redshift-connector/abstract_data_file'
|
2
|
-
require 'uri'
|
3
|
-
require 'zlib'
|
4
|
-
require 'open3'
|
5
|
-
|
6
|
-
module RedshiftConnector
|
7
|
-
class UrlDataFile < AbstractDataFile
|
8
|
-
def initialize(url, reader_class:)
|
9
|
-
@url = url
|
10
|
-
@reader_class = reader_class
|
11
|
-
end
|
12
|
-
|
13
|
-
def key
|
14
|
-
URI.parse(@url).path
|
15
|
-
end
|
16
|
-
|
17
|
-
def content
|
18
|
-
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
19
|
-
stdin.close
|
20
|
-
stdout
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
require 'redshift-connector/abstract_data_file_bundle'
|
4
|
-
require 'redshift-connector/url_data_file'
|
5
|
-
|
6
|
-
module RedshiftConnector
|
7
|
-
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
|
-
@data_file_urls = data_file_urls
|
10
|
-
@filter = filter || lambda {|*row| row }
|
11
|
-
@logger = logger
|
12
|
-
@reader_class = Reader.get(format)
|
13
|
-
end
|
14
|
-
|
15
|
-
def data_files
|
16
|
-
@data_file_urls.map do |url|
|
17
|
-
UrlDataFile.new(url, reader_class: @reader_class)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|