redshift-connector 4.3.1 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0ed37ac9fef3effe7b2e9c2c288be44f2d942d35
4
- data.tar.gz: 5b8b27e51b258609c83b2f06d3df3268e78cccfa
3
+ metadata.gz: b12a5bb9e0027d0251a3d3e98766d60c1a7a2bf1
4
+ data.tar.gz: 2e27ed62452041a2dd6bac8471014bdbf4702ef6
5
5
  SHA512:
6
- metadata.gz: db50e26f55babb5e0e897d08799001e5f3dd5f4a69375711e9275f52edc6d4a6ada9b6543f185ce8de80459d39e51c7576b7de7f1990b3b9f824c70d5fb669e8
7
- data.tar.gz: a1147ff8f8d20624c80674c49857c80a00ac0684235b5bd90c0f0ab8e86e32ab316e343cefd362b5d1fd8139c5fe79ffb9b79c3795de0a478d43a169611eb2b7
6
+ metadata.gz: 2ff4c494a726ae8749e1f3e3e13bfa070908e79aba4c037742117b5afb500a27ed84474d43a65ea6a2d57b12bfdf379b6126c3a1d76bf3962e625bee419efea7
7
+ data.tar.gz: 702273bc9689811b99345cd4fa685dd0916252a6695e462201323b77224809deb95dfc9a663076b9a54b1dd4ca1c7abc724ff29faf29d759ad4f9559ec400f77
@@ -0,0 +1,24 @@
1
+ require 'zlib'
2
+
3
+ module RedshiftConnector
4
+ class AbstractDataFile
5
+ def each_row(&block)
6
+ f = if gzipped_object?
7
+ Zlib::GzipReader.new(content)
8
+ else
9
+ content
10
+ end
11
+ @reader_class.new(f).each(&block)
12
+ ensure
13
+ content.close
14
+ end
15
+
16
+ def data_object?
17
+ @reader_class.data_object?(key)
18
+ end
19
+
20
+ def gzipped_object?
21
+ File.extname(key) == '.gz'
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ module RedshiftConnector
2
+ class AbstractDataFileBundle
3
+ def each_row(&block)
4
+ each_object do |obj|
5
+ obj.each_row(&block)
6
+ end
7
+ end
8
+
9
+ alias each each_row
10
+
11
+ def each_object(&block)
12
+ all_data_objects.each do |obj|
13
+ @logger.info "processing s3 object: #{obj.key}"
14
+ yield obj
15
+ end
16
+ end
17
+
18
+ def all_data_objects
19
+ data_files.select {|obj| obj.data_object? }
20
+ end
21
+ end
22
+ end
@@ -8,8 +8,8 @@ module RedshiftConnector
8
8
  class Reader::CSV < Reader::Abstract
9
9
  declare_reader :csv
10
10
 
11
- def self.data_object?(obj)
12
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
11
+ def self.data_object?(key)
12
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
13
13
  end
14
14
 
15
15
  def initialize(f)
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::RedshiftCSV < Reader::Abstract
8
8
  declare_reader :redshift_csv
9
9
 
10
- def self.data_object?(obj)
11
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  # f :: IO
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::TSV < Reader::Abstract
8
8
  declare_reader :tsv
9
9
 
10
- def self.data_object?(obj)
11
- /\.tsv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.tsv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  def initialize(f)
@@ -1,7 +1,7 @@
1
- require 'zlib'
1
+ require 'redshift-connector/abstract_data_file'
2
2
 
3
3
  module RedshiftConnector
4
- class S3DataFile
4
+ class S3DataFile < AbstractDataFile
5
5
  def initialize(object, reader_class:)
6
6
  @object = object
7
7
  @reader_class = reader_class
@@ -11,24 +11,10 @@ module RedshiftConnector
11
11
  @object.key
12
12
  end
13
13
 
14
- def each_row(&block)
15
- response = @object.get
16
- f = if gzipped_object?
17
- Zlib::GzipReader.new(response.body)
18
- else
19
- response.body
20
- end
21
- @reader_class.new(f).each(&block)
22
- ensure
23
- response.body.close if response
14
+ def content
15
+ @object.get.body
24
16
  end
25
17
 
26
- def data_object?
27
- @reader_class.data_object?(@object)
28
- end
29
-
30
- def gzipped_object?
31
- File.extname(@object.key) == '.gz'
32
- end
18
+ delegate :presigned_url, to: :@object
33
19
  end
34
20
  end
@@ -2,10 +2,11 @@ require 'redshift-connector/s3_bucket'
2
2
  require 'redshift-connector/s3_data_file'
3
3
  require 'redshift-connector/reader'
4
4
  require 'redshift-connector/logger'
5
+ require 'redshift-connector/abstract_data_file_bundle'
5
6
  require 'aws-sdk'
6
7
 
7
8
  module RedshiftConnector
8
- class S3DataFileBundle
9
+ class S3DataFileBundle < AbstractDataFileBundle
9
10
  def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
10
11
  real_prefix = "#{bucket.prefix}/#{prefix}"
11
12
  new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
@@ -68,25 +69,9 @@ module RedshiftConnector
68
69
  end
69
70
  private :do_each_batch
70
71
 
71
- def each_row(&block)
72
- each_object do |obj|
73
- obj.each_row(&block)
74
- end
75
- end
76
-
77
- alias each each_row
78
-
79
- def each_object(&block)
80
- all_data_objects.each do |obj|
81
- @logger.info "processing s3 object: #{obj.key}"
82
- yield obj
83
- end
84
- end
85
-
86
- def all_data_objects
72
+ def data_files
87
73
  @bucket.objects(prefix: @prefix)
88
74
  .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
89
- .select {|obj| obj.data_object? }
90
75
  end
91
76
 
92
77
  def clear
@@ -0,0 +1,23 @@
1
+ require 'redshift-connector/abstract_data_file'
2
+ require 'uri'
3
+ require 'zlib'
4
+ require 'open3'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFile < AbstractDataFile
8
+ def initialize(url, reader_class:)
9
+ @url = url
10
+ @reader_class = reader_class
11
+ end
12
+
13
+ def key
14
+ URI.parse(@url).path
15
+ end
16
+
17
+ def content
18
+ stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
19
+ stdin.close
20
+ stdout
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ require 'redshift-connector/reader'
2
+ require 'redshift-connector/logger'
3
+ require 'redshift-connector/abstract_data_file_bundle'
4
+ require 'redshift-connector/url_data_file'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFileBundle < AbstractDataFileBundle
8
+ def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
+ @data_file_urls = data_file_urls
10
+ @filter = filter || lambda {|*row| row }
11
+ @logger = logger
12
+ @reader_class = Reader.get(format)
13
+ end
14
+
15
+ def data_files
16
+ @data_file_urls.map do |url|
17
+ UrlDataFile.new(url, reader_class: @reader_class)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '4.3.1'
2
+ VERSION = '4.3.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.1
4
+ version: 4.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-20 00:00:00.000000000 Z
11
+ date: 2017-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -130,6 +130,8 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - README.md
132
132
  - lib/redshift-connector.rb
133
+ - lib/redshift-connector/abstract_data_file.rb
134
+ - lib/redshift-connector/abstract_data_file_bundle.rb
133
135
  - lib/redshift-connector/connector.rb
134
136
  - lib/redshift-connector/exporter.rb
135
137
  - lib/redshift-connector/importer.rb
@@ -149,6 +151,8 @@ files:
149
151
  - lib/redshift-connector/s3_bucket.rb
150
152
  - lib/redshift-connector/s3_data_file.rb
151
153
  - lib/redshift-connector/s3_data_file_bundle.rb
154
+ - lib/redshift-connector/url_data_file.rb
155
+ - lib/redshift-connector/url_data_file_bundle.rb
152
156
  - lib/redshift-connector/version.rb
153
157
  - test/all.rb
154
158
  - test/config.rb