redshift-connector 5.4.1 → 5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift-connector/s3_data_file.rb +1 -1
- data/lib/redshift-connector/s3_data_file_bundle.rb +1 -2
- data/lib/redshift-connector/version.rb +1 -1
- data/test/reader/test_redshift_csv.rb +1 -1
- data/test/test_reader.rb +0 -1
- metadata +16 -12
- data/lib/redshift-connector/abstract_data_file.rb +0 -24
- data/lib/redshift-connector/abstract_data_file_bundle.rb +0 -22
- data/lib/redshift-connector/reader.rb +0 -18
- data/lib/redshift-connector/reader/abstract.rb +0 -18
- data/lib/redshift-connector/reader/csv.rb +0 -24
- data/lib/redshift-connector/reader/exception.rb +0 -3
- data/lib/redshift-connector/reader/redshift_csv.rb +0 -54
- data/lib/redshift-connector/reader/tsv.rb +0 -24
- data/lib/redshift-connector/url_data_file.rb +0 -23
- data/lib/redshift-connector/url_data_file_bundle.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64ec55df09c992e2e792de2b6dea9554cba4d4be
|
4
|
+
data.tar.gz: 4c37b910361878dfb6830b79da024e968ab78726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a268d84364402590e59c115831fddce59be352b795676715a07f4d54a966a14fdf5a19a926783ea5f8c38073d5fa283c21822b1ea91600e224061a4932006dfa
|
7
|
+
data.tar.gz: be94cf2ae56df70b8c9e1be2d67619e26f6edb942957a4bb977c824042a55729b550a7a699c6400ac3f6566c1834a243bbcf773e1898a19b4e51da5ecc26e03d
|
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'redshift-connector/s3_bucket'
|
2
2
|
require 'redshift-connector/s3_data_file'
|
3
|
-
require 'redshift-connector/reader'
|
4
3
|
require 'redshift-connector/logger'
|
5
|
-
require 'redshift-connector/
|
4
|
+
require 'redshift-connector/data_file'
|
6
5
|
require 'aws-sdk'
|
7
6
|
|
8
7
|
module RedshiftConnector
|
data/test/test_reader.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: redshift-connector-data_file
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.0.0
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: activerecord
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,8 +144,6 @@ extra_rdoc_files: []
|
|
130
144
|
files:
|
131
145
|
- README.md
|
132
146
|
- lib/redshift-connector.rb
|
133
|
-
- lib/redshift-connector/abstract_data_file.rb
|
134
|
-
- lib/redshift-connector/abstract_data_file_bundle.rb
|
135
147
|
- lib/redshift-connector/connector.rb
|
136
148
|
- lib/redshift-connector/exporter.rb
|
137
149
|
- lib/redshift-connector/importer.rb
|
@@ -142,17 +154,9 @@ files:
|
|
142
154
|
- lib/redshift-connector/importer/upsert.rb
|
143
155
|
- lib/redshift-connector/logger.rb
|
144
156
|
- lib/redshift-connector/query.rb
|
145
|
-
- lib/redshift-connector/reader.rb
|
146
|
-
- lib/redshift-connector/reader/abstract.rb
|
147
|
-
- lib/redshift-connector/reader/csv.rb
|
148
|
-
- lib/redshift-connector/reader/exception.rb
|
149
|
-
- lib/redshift-connector/reader/redshift_csv.rb
|
150
|
-
- lib/redshift-connector/reader/tsv.rb
|
151
157
|
- lib/redshift-connector/s3_bucket.rb
|
152
158
|
- lib/redshift-connector/s3_data_file.rb
|
153
159
|
- lib/redshift-connector/s3_data_file_bundle.rb
|
154
|
-
- lib/redshift-connector/url_data_file.rb
|
155
|
-
- lib/redshift-connector/url_data_file_bundle.rb
|
156
160
|
- lib/redshift-connector/version.rb
|
157
161
|
- test/all.rb
|
158
162
|
- test/config.rb
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'zlib'
|
2
|
-
|
3
|
-
module RedshiftConnector
|
4
|
-
class AbstractDataFile
|
5
|
-
def each_row(&block)
|
6
|
-
f = if gzipped_object?
|
7
|
-
Zlib::GzipReader.new(content)
|
8
|
-
else
|
9
|
-
content
|
10
|
-
end
|
11
|
-
@reader_class.new(f).each(&block)
|
12
|
-
ensure
|
13
|
-
content.close
|
14
|
-
end
|
15
|
-
|
16
|
-
def data_object?
|
17
|
-
@reader_class.data_object?(key)
|
18
|
-
end
|
19
|
-
|
20
|
-
def gzipped_object?
|
21
|
-
File.extname(key) == '.gz'
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
class AbstractDataFileBundle
|
3
|
-
def each_row(&block)
|
4
|
-
each_object do |obj|
|
5
|
-
obj.each_row(&block)
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
alias each each_row
|
10
|
-
|
11
|
-
def each_object(&block)
|
12
|
-
all_data_objects.each do |obj|
|
13
|
-
@logger.info "processing s3 object: #{obj.key}"
|
14
|
-
yield obj
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def all_data_objects
|
19
|
-
data_files.select {|obj| obj.data_object? }
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# create module
|
2
|
-
module RedshiftConnector
|
3
|
-
module Reader
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
require 'redshift-connector/reader/redshift_csv'
|
8
|
-
require 'redshift-connector/reader/csv'
|
9
|
-
require 'redshift-connector/reader/tsv'
|
10
|
-
require 'redshift-connector/reader/exception'
|
11
|
-
|
12
|
-
module RedshiftConnector
|
13
|
-
module Reader
|
14
|
-
def Reader.get(id)
|
15
|
-
Abstract.get_reader_class(id)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module RedshiftConnector
|
2
|
-
class Reader::Abstract
|
3
|
-
READER_CLASSES = {} # {Symbol => Class}
|
4
|
-
|
5
|
-
def self.declare_reader(id)
|
6
|
-
READER_CLASSES[id.to_sym] = self
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.get_reader_class(id)
|
10
|
-
READER_CLASSES[id.to_sym] or
|
11
|
-
raise ArgumentError, "unknown data file reader type: #{id.inspect}"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.get_reader_class(id)
|
16
|
-
Reader::Abstract.get_reader_class(id)
|
17
|
-
end
|
18
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
require 'csv'
|
4
|
-
|
5
|
-
module RedshiftConnector
|
6
|
-
# Parses (standard) CSV files.
|
7
|
-
# For UNLOAD-generated CSV, use RedshiftCSV class.
|
8
|
-
class Reader::CSV < Reader::Abstract
|
9
|
-
declare_reader :csv
|
10
|
-
|
11
|
-
def self.data_object?(key)
|
12
|
-
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize(f)
|
16
|
-
@f = f
|
17
|
-
end
|
18
|
-
|
19
|
-
def each(&block)
|
20
|
-
csv = CSV.new(@f)
|
21
|
-
csv.each(&block)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
|
4
|
-
module RedshiftConnector
|
5
|
-
# Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
|
6
|
-
# UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
|
7
|
-
class Reader::RedshiftCSV < Reader::Abstract
|
8
|
-
declare_reader :redshift_csv
|
9
|
-
|
10
|
-
def self.data_object?(key)
|
11
|
-
/\.csv(?:\.|\z)/ =~ File.basename(key)
|
12
|
-
end
|
13
|
-
|
14
|
-
# f :: IO
|
15
|
-
def initialize(f)
|
16
|
-
@f = f
|
17
|
-
end
|
18
|
-
|
19
|
-
def each
|
20
|
-
# We can use simple #each_line to read single row
|
21
|
-
# because line terminators are always escaped by UNLOAD.
|
22
|
-
@f.each_line do |line|
|
23
|
-
yield parse_row(line, @f.lineno)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def parse_row(line, lineno = nil)
|
28
|
-
row = []
|
29
|
-
s = StringScanner.new(line)
|
30
|
-
s.skip(/\s+/)
|
31
|
-
until s.eos?
|
32
|
-
col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
|
33
|
-
row.push unescape_column(col)
|
34
|
-
s.skip(/\s*/) # skip line terminator on line ends
|
35
|
-
s.skip(/,\s*/)
|
36
|
-
end
|
37
|
-
row
|
38
|
-
end
|
39
|
-
|
40
|
-
UNESCAPE_MAP = {
|
41
|
-
'\\"' => '"',
|
42
|
-
"\\'" => "'",
|
43
|
-
'\\,' => ',',
|
44
|
-
'\\r' => "\r",
|
45
|
-
'\\n' => "\n",
|
46
|
-
'\\\\' => '\\'
|
47
|
-
}
|
48
|
-
|
49
|
-
def unescape_column(col)
|
50
|
-
charmap = UNESCAPE_MAP
|
51
|
-
col[1...-1].gsub(/\\./) {|s| charmap[s] }
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader/abstract'
|
2
|
-
require 'redshift-connector/reader/exception'
|
3
|
-
require 'csv'
|
4
|
-
|
5
|
-
module RedshiftConnector
|
6
|
-
# Parses TSV (Tab Separated Format) files.
|
7
|
-
class Reader::TSV < Reader::Abstract
|
8
|
-
declare_reader :tsv
|
9
|
-
|
10
|
-
def self.data_object?(key)
|
11
|
-
/\.tsv(?:\.|\z)/ =~ File.basename(key)
|
12
|
-
end
|
13
|
-
|
14
|
-
def initialize(f)
|
15
|
-
@f = f
|
16
|
-
end
|
17
|
-
|
18
|
-
def each(&block)
|
19
|
-
@f.each_line do |line|
|
20
|
-
yield line.chomp.split("\t", -1)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'redshift-connector/abstract_data_file'
|
2
|
-
require 'uri'
|
3
|
-
require 'zlib'
|
4
|
-
require 'open3'
|
5
|
-
|
6
|
-
module RedshiftConnector
|
7
|
-
class UrlDataFile < AbstractDataFile
|
8
|
-
def initialize(url, reader_class:)
|
9
|
-
@url = url
|
10
|
-
@reader_class = reader_class
|
11
|
-
end
|
12
|
-
|
13
|
-
def key
|
14
|
-
URI.parse(@url).path
|
15
|
-
end
|
16
|
-
|
17
|
-
def content
|
18
|
-
stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
|
19
|
-
stdin.close
|
20
|
-
stdout
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'redshift-connector/reader'
|
2
|
-
require 'redshift-connector/logger'
|
3
|
-
require 'redshift-connector/abstract_data_file_bundle'
|
4
|
-
require 'redshift-connector/url_data_file'
|
5
|
-
|
6
|
-
module RedshiftConnector
|
7
|
-
class UrlDataFileBundle < AbstractDataFileBundle
|
8
|
-
def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
|
9
|
-
@data_file_urls = data_file_urls
|
10
|
-
@filter = filter || lambda {|*row| row }
|
11
|
-
@logger = logger
|
12
|
-
@reader_class = Reader.get(format)
|
13
|
-
end
|
14
|
-
|
15
|
-
def data_files
|
16
|
-
@data_file_urls.map do |url|
|
17
|
-
UrlDataFile.new(url, reader_class: @reader_class)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|