redshift-connector 4.4.1 → 4.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 905cd291f9b03a51619ed92e3aa21a88775928df
4
- data.tar.gz: 1d7616a5482652d634a4004c790825f3f638225a
3
+ metadata.gz: cb3953798104bd92d6d856b8fd3b2d835ba9d54a
4
+ data.tar.gz: e2f51e74ec4969e1187ce35e457a2af357bda1bf
5
5
  SHA512:
6
- metadata.gz: d70d4f5ea5a0ba4369b1d9f730b0a861779c3f66b72eb036ca34653817af3c2d32e6d42190f4f61438a9324851a77eba2f2651e3b6472520aff5f13b2f500edf
7
- data.tar.gz: 6d2683f7361f3706a916d6bab4571a5bfa0a6ca8baeff3155c25005174ea1d07468c0b8959765be7548540a2a3c74e316e358e841ba0ff4c42f3e8df373c5a0e
6
+ metadata.gz: cec111e3ca92096a3a0071b7a76a36b3c259213ab03aecb5f75ca1e3b2cc41fd1b4e140384a5a482944d7f02819cfe8da5010ae0bd8c9e9b0619001e09aa3542
7
+ data.tar.gz: ac5dbcc924a7ffb5ee7387d4cf3f56a78371c10a870d4b3b3215502d19f256daff0bcc970ed2d7d946b6ecd62f8305a4ab55ecd6adbd0e4b1896cbf42b671067
@@ -1,4 +1,4 @@
1
- require 'redshift-connector/abstract_data_file'
1
+ require 'redshift-connector/data_file'
2
2
 
3
3
  module RedshiftConnector
4
4
  class S3DataFile < AbstractDataFile
@@ -1,8 +1,7 @@
1
1
  require 'redshift-connector/s3_bucket'
2
2
  require 'redshift-connector/s3_data_file'
3
- require 'redshift-connector/reader'
4
3
  require 'redshift-connector/logger'
5
- require 'redshift-connector/abstract_data_file_bundle'
4
+ require 'redshift-connector/data_file'
6
5
  require 'aws-sdk'
7
6
 
8
7
  module RedshiftConnector
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '4.4.1'
2
+ VERSION = '4.5.0'
3
3
  end
@@ -1,5 +1,5 @@
1
1
  require 'test/unit'
2
- require 'redshift-connector/reader'
2
+ require 'redshift-connector/data_file'
3
3
 
4
4
  module RedshiftConnector
5
5
  module Reader
data/test/test_reader.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'test/unit'
2
- require 'redshift-connector/reader'
3
2
 
4
3
  module RedshiftConnector
5
4
  class TestReader < Test::Unit::TestCase
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.1
4
+ version: 4.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-04 00:00:00.000000000 Z
11
+ date: 2017-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: redshift-connector-data_file
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: pg
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -130,8 +144,6 @@ extra_rdoc_files: []
130
144
  files:
131
145
  - README.md
132
146
  - lib/redshift-connector.rb
133
- - lib/redshift-connector/abstract_data_file.rb
134
- - lib/redshift-connector/abstract_data_file_bundle.rb
135
147
  - lib/redshift-connector/connector.rb
136
148
  - lib/redshift-connector/exporter.rb
137
149
  - lib/redshift-connector/importer.rb
@@ -142,17 +154,9 @@ files:
142
154
  - lib/redshift-connector/importer/upsert.rb
143
155
  - lib/redshift-connector/logger.rb
144
156
  - lib/redshift-connector/query.rb
145
- - lib/redshift-connector/reader.rb
146
- - lib/redshift-connector/reader/abstract.rb
147
- - lib/redshift-connector/reader/csv.rb
148
- - lib/redshift-connector/reader/exception.rb
149
- - lib/redshift-connector/reader/redshift_csv.rb
150
- - lib/redshift-connector/reader/tsv.rb
151
157
  - lib/redshift-connector/s3_bucket.rb
152
158
  - lib/redshift-connector/s3_data_file.rb
153
159
  - lib/redshift-connector/s3_data_file_bundle.rb
154
- - lib/redshift-connector/url_data_file.rb
155
- - lib/redshift-connector/url_data_file_bundle.rb
156
160
  - lib/redshift-connector/version.rb
157
161
  - test/all.rb
158
162
  - test/config.rb
@@ -1,24 +0,0 @@
1
- require 'zlib'
2
-
3
- module RedshiftConnector
4
- class AbstractDataFile
5
- def each_row(&block)
6
- f = if gzipped_object?
7
- Zlib::GzipReader.new(content)
8
- else
9
- content
10
- end
11
- @reader_class.new(f).each(&block)
12
- ensure
13
- content.close
14
- end
15
-
16
- def data_object?
17
- @reader_class.data_object?(key)
18
- end
19
-
20
- def gzipped_object?
21
- File.extname(key) == '.gz'
22
- end
23
- end
24
- end
@@ -1,22 +0,0 @@
1
- module RedshiftConnector
2
- class AbstractDataFileBundle
3
- def each_row(&block)
4
- each_object do |obj|
5
- obj.each_row(&block)
6
- end
7
- end
8
-
9
- alias each each_row
10
-
11
- def each_object(&block)
12
- all_data_objects.each do |obj|
13
- @logger.info "processing s3 object: #{obj.key}"
14
- yield obj
15
- end
16
- end
17
-
18
- def all_data_objects
19
- data_files.select {|obj| obj.data_object? }
20
- end
21
- end
22
- end
@@ -1,18 +0,0 @@
1
- # create module
2
- module RedshiftConnector
3
- module Reader
4
- end
5
- end
6
-
7
- require 'redshift-connector/reader/redshift_csv'
8
- require 'redshift-connector/reader/csv'
9
- require 'redshift-connector/reader/tsv'
10
- require 'redshift-connector/reader/exception'
11
-
12
- module RedshiftConnector
13
- module Reader
14
- def Reader.get(id)
15
- Abstract.get_reader_class(id)
16
- end
17
- end
18
- end
@@ -1,18 +0,0 @@
1
- module RedshiftConnector
2
- class Reader::Abstract
3
- READER_CLASSES = {} # {Symbol => Class}
4
-
5
- def self.declare_reader(id)
6
- READER_CLASSES[id.to_sym] = self
7
- end
8
-
9
- def self.get_reader_class(id)
10
- READER_CLASSES[id.to_sym] or
11
- raise ArgumentError, "unknown data file reader type: #{id.inspect}"
12
- end
13
- end
14
-
15
- def self.get_reader_class(id)
16
- Reader::Abstract.get_reader_class(id)
17
- end
18
- end
@@ -1,24 +0,0 @@
1
- require 'redshift-connector/reader/abstract'
2
- require 'redshift-connector/reader/exception'
3
- require 'csv'
4
-
5
- module RedshiftConnector
6
- # Parses (standard) CSV files.
7
- # For UNLOAD-generated CSV, use RedshiftCSV class.
8
- class Reader::CSV < Reader::Abstract
9
- declare_reader :csv
10
-
11
- def self.data_object?(key)
12
- /\.csv(?:\.|\z)/ =~ File.basename(key)
13
- end
14
-
15
- def initialize(f)
16
- @f = f
17
- end
18
-
19
- def each(&block)
20
- csv = CSV.new(@f)
21
- csv.each(&block)
22
- end
23
- end
24
- end
@@ -1,3 +0,0 @@
1
- module RedshiftConnector
2
- class Reader::MalformedCSVException < StandardError; end
3
- end
@@ -1,54 +0,0 @@
1
- require 'redshift-connector/reader/abstract'
2
- require 'redshift-connector/reader/exception'
3
-
4
- module RedshiftConnector
5
- # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
6
- # UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
7
- class Reader::RedshiftCSV < Reader::Abstract
8
- declare_reader :redshift_csv
9
-
10
- def self.data_object?(key)
11
- /\.csv(?:\.|\z)/ =~ File.basename(key)
12
- end
13
-
14
- # f :: IO
15
- def initialize(f)
16
- @f = f
17
- end
18
-
19
- def each
20
- # We can use simple #each_line to read single row
21
- # because line terminators are always escaped by UNLOAD.
22
- @f.each_line do |line|
23
- yield parse_row(line, @f.lineno)
24
- end
25
- end
26
-
27
- def parse_row(line, lineno = nil)
28
- row = []
29
- s = StringScanner.new(line)
30
- s.skip(/\s+/)
31
- until s.eos?
32
- col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
33
- row.push unescape_column(col)
34
- s.skip(/\s*/) # skip line terminator on line ends
35
- s.skip(/,\s*/)
36
- end
37
- row
38
- end
39
-
40
- UNESCAPE_MAP = {
41
- '\\"' => '"',
42
- "\\'" => "'",
43
- '\\,' => ',',
44
- '\\r' => "\r",
45
- '\\n' => "\n",
46
- '\\\\' => '\\'
47
- }
48
-
49
- def unescape_column(col)
50
- charmap = UNESCAPE_MAP
51
- col[1...-1].gsub(/\\./) {|s| charmap[s] }
52
- end
53
- end
54
- end
@@ -1,24 +0,0 @@
1
- require 'redshift-connector/reader/abstract'
2
- require 'redshift-connector/reader/exception'
3
- require 'csv'
4
-
5
- module RedshiftConnector
6
- # Parses TSV (Tab Separated Format) files.
7
- class Reader::TSV < Reader::Abstract
8
- declare_reader :tsv
9
-
10
- def self.data_object?(key)
11
- /\.tsv(?:\.|\z)/ =~ File.basename(key)
12
- end
13
-
14
- def initialize(f)
15
- @f = f
16
- end
17
-
18
- def each(&block)
19
- @f.each_line do |line|
20
- yield line.chomp.split("\t", -1)
21
- end
22
- end
23
- end
24
- end
@@ -1,23 +0,0 @@
1
- require 'redshift-connector/abstract_data_file'
2
- require 'uri'
3
- require 'zlib'
4
- require 'open3'
5
-
6
- module RedshiftConnector
7
- class UrlDataFile < AbstractDataFile
8
- def initialize(url, reader_class:)
9
- @url = url
10
- @reader_class = reader_class
11
- end
12
-
13
- def key
14
- URI.parse(@url).path
15
- end
16
-
17
- def content
18
- stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
19
- stdin.close
20
- stdout
21
- end
22
- end
23
- end
@@ -1,21 +0,0 @@
1
- require 'redshift-connector/reader'
2
- require 'redshift-connector/logger'
3
- require 'redshift-connector/abstract_data_file_bundle'
4
- require 'redshift-connector/url_data_file'
5
-
6
- module RedshiftConnector
7
- class UrlDataFileBundle < AbstractDataFileBundle
8
- def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
- @data_file_urls = data_file_urls
10
- @filter = filter || lambda {|*row| row }
11
- @logger = logger
12
- @reader_class = Reader.get(format)
13
- end
14
-
15
- def data_files
16
- @data_file_urls.map do |url|
17
- UrlDataFile.new(url, reader_class: @reader_class)
18
- end
19
- end
20
- end
21
- end