redshift-connector 4.3.1 → 4.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0ed37ac9fef3effe7b2e9c2c288be44f2d942d35
4
- data.tar.gz: 5b8b27e51b258609c83b2f06d3df3268e78cccfa
3
+ metadata.gz: b12a5bb9e0027d0251a3d3e98766d60c1a7a2bf1
4
+ data.tar.gz: 2e27ed62452041a2dd6bac8471014bdbf4702ef6
5
5
  SHA512:
6
- metadata.gz: db50e26f55babb5e0e897d08799001e5f3dd5f4a69375711e9275f52edc6d4a6ada9b6543f185ce8de80459d39e51c7576b7de7f1990b3b9f824c70d5fb669e8
7
- data.tar.gz: a1147ff8f8d20624c80674c49857c80a00ac0684235b5bd90c0f0ab8e86e32ab316e343cefd362b5d1fd8139c5fe79ffb9b79c3795de0a478d43a169611eb2b7
6
+ metadata.gz: 2ff4c494a726ae8749e1f3e3e13bfa070908e79aba4c037742117b5afb500a27ed84474d43a65ea6a2d57b12bfdf379b6126c3a1d76bf3962e625bee419efea7
7
+ data.tar.gz: 702273bc9689811b99345cd4fa685dd0916252a6695e462201323b77224809deb95dfc9a663076b9a54b1dd4ca1c7abc724ff29faf29d759ad4f9559ec400f77
@@ -0,0 +1,24 @@
1
+ require 'zlib'
2
+
3
+ module RedshiftConnector
4
+ class AbstractDataFile
5
+ def each_row(&block)
6
+ f = if gzipped_object?
7
+ Zlib::GzipReader.new(content)
8
+ else
9
+ content
10
+ end
11
+ @reader_class.new(f).each(&block)
12
+ ensure
13
+ content.close
14
+ end
15
+
16
+ def data_object?
17
+ @reader_class.data_object?(key)
18
+ end
19
+
20
+ def gzipped_object?
21
+ File.extname(key) == '.gz'
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ module RedshiftConnector
2
+ class AbstractDataFileBundle
3
+ def each_row(&block)
4
+ each_object do |obj|
5
+ obj.each_row(&block)
6
+ end
7
+ end
8
+
9
+ alias each each_row
10
+
11
+ def each_object(&block)
12
+ all_data_objects.each do |obj|
13
+ @logger.info "processing s3 object: #{obj.key}"
14
+ yield obj
15
+ end
16
+ end
17
+
18
+ def all_data_objects
19
+ data_files.select {|obj| obj.data_object? }
20
+ end
21
+ end
22
+ end
@@ -8,8 +8,8 @@ module RedshiftConnector
8
8
  class Reader::CSV < Reader::Abstract
9
9
  declare_reader :csv
10
10
 
11
- def self.data_object?(obj)
12
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
11
+ def self.data_object?(key)
12
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
13
13
  end
14
14
 
15
15
  def initialize(f)
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::RedshiftCSV < Reader::Abstract
8
8
  declare_reader :redshift_csv
9
9
 
10
- def self.data_object?(obj)
11
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  # f :: IO
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::TSV < Reader::Abstract
8
8
  declare_reader :tsv
9
9
 
10
- def self.data_object?(obj)
11
- /\.tsv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.tsv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  def initialize(f)
@@ -1,7 +1,7 @@
1
- require 'zlib'
1
+ require 'redshift-connector/abstract_data_file'
2
2
 
3
3
  module RedshiftConnector
4
- class S3DataFile
4
+ class S3DataFile < AbstractDataFile
5
5
  def initialize(object, reader_class:)
6
6
  @object = object
7
7
  @reader_class = reader_class
@@ -11,24 +11,10 @@ module RedshiftConnector
11
11
  @object.key
12
12
  end
13
13
 
14
- def each_row(&block)
15
- response = @object.get
16
- f = if gzipped_object?
17
- Zlib::GzipReader.new(response.body)
18
- else
19
- response.body
20
- end
21
- @reader_class.new(f).each(&block)
22
- ensure
23
- response.body.close if response
14
+ def content
15
+ @object.get.body
24
16
  end
25
17
 
26
- def data_object?
27
- @reader_class.data_object?(@object)
28
- end
29
-
30
- def gzipped_object?
31
- File.extname(@object.key) == '.gz'
32
- end
18
+ delegate :presigned_url, to: :@object
33
19
  end
34
20
  end
@@ -2,10 +2,11 @@ require 'redshift-connector/s3_bucket'
2
2
  require 'redshift-connector/s3_data_file'
3
3
  require 'redshift-connector/reader'
4
4
  require 'redshift-connector/logger'
5
+ require 'redshift-connector/abstract_data_file_bundle'
5
6
  require 'aws-sdk'
6
7
 
7
8
  module RedshiftConnector
8
- class S3DataFileBundle
9
+ class S3DataFileBundle < AbstractDataFileBundle
9
10
  def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
10
11
  real_prefix = "#{bucket.prefix}/#{prefix}"
11
12
  new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
@@ -68,25 +69,9 @@ module RedshiftConnector
68
69
  end
69
70
  private :do_each_batch
70
71
 
71
- def each_row(&block)
72
- each_object do |obj|
73
- obj.each_row(&block)
74
- end
75
- end
76
-
77
- alias each each_row
78
-
79
- def each_object(&block)
80
- all_data_objects.each do |obj|
81
- @logger.info "processing s3 object: #{obj.key}"
82
- yield obj
83
- end
84
- end
85
-
86
- def all_data_objects
72
+ def data_files
87
73
  @bucket.objects(prefix: @prefix)
88
74
  .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
89
- .select {|obj| obj.data_object? }
90
75
  end
91
76
 
92
77
  def clear
@@ -0,0 +1,23 @@
1
+ require 'redshift-connector/abstract_data_file'
2
+ require 'uri'
3
+ require 'zlib'
4
+ require 'open3'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFile < AbstractDataFile
8
+ def initialize(url, reader_class:)
9
+ @url = url
10
+ @reader_class = reader_class
11
+ end
12
+
13
+ def key
14
+ URI.parse(@url).path
15
+ end
16
+
17
+ def content
18
+ stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
19
+ stdin.close
20
+ stdout
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ require 'redshift-connector/reader'
2
+ require 'redshift-connector/logger'
3
+ require 'redshift-connector/abstract_data_file_bundle'
4
+ require 'redshift-connector/url_data_file'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFileBundle < AbstractDataFileBundle
8
+ def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
+ @data_file_urls = data_file_urls
10
+ @filter = filter || lambda {|*row| row }
11
+ @logger = logger
12
+ @reader_class = Reader.get(format)
13
+ end
14
+
15
+ def data_files
16
+ @data_file_urls.map do |url|
17
+ UrlDataFile.new(url, reader_class: @reader_class)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '4.3.1'
2
+ VERSION = '4.3.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.1
4
+ version: 4.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-20 00:00:00.000000000 Z
11
+ date: 2017-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -130,6 +130,8 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - README.md
132
132
  - lib/redshift-connector.rb
133
+ - lib/redshift-connector/abstract_data_file.rb
134
+ - lib/redshift-connector/abstract_data_file_bundle.rb
133
135
  - lib/redshift-connector/connector.rb
134
136
  - lib/redshift-connector/exporter.rb
135
137
  - lib/redshift-connector/importer.rb
@@ -149,6 +151,8 @@ files:
149
151
  - lib/redshift-connector/s3_bucket.rb
150
152
  - lib/redshift-connector/s3_data_file.rb
151
153
  - lib/redshift-connector/s3_data_file_bundle.rb
154
+ - lib/redshift-connector/url_data_file.rb
155
+ - lib/redshift-connector/url_data_file_bundle.rb
152
156
  - lib/redshift-connector/version.rb
153
157
  - test/all.rb
154
158
  - test/config.rb