redshift-connector 5.3.1 → 5.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fd434428d24b3d41029d89fbbf48c8415416fc46
4
- data.tar.gz: a81a921c7b1bdde79851684137cf43d3bc5f26e7
3
+ metadata.gz: 92e4cbc013cea23b354fcd85bc91e320efd234ae
4
+ data.tar.gz: b5bbe711725abdf11ba8f138138fee8422fd6822
5
5
  SHA512:
6
- metadata.gz: c2dfd23d53f217190dcc85d99128700a40432eec27f89cf7a4100fbd61df81d3330ea7ddcc25fdd2164afdda3aeb736b5dd416f9fece9bc8c3fff905063925eb
7
- data.tar.gz: 6beed3edae728c20b33fbedaa858b7c27d49439cd86732ecf78d556cd3836cc2885ef83a60d0526935c4a2f05fb9908693ac76995b4a4096605db668953f0101
6
+ metadata.gz: 25731aa8fa269ded37717adb36e4d8a6ce81de171c36e6968e3bb1f29aadad93abfe068b6e3509ce70ff820f6f9b196c2ebc66373feded7bea99f945b0d645f7
7
+ data.tar.gz: ea26a509da98e7e6cf1fe7247b430db216687f216889d8181a1b5ea353de5d3a77a5bbf4de563f5ea4402c1782f8c57751b8cff167a9e1f7eed8e65cad955fcd
@@ -0,0 +1,24 @@
1
+ require 'zlib'
2
+
3
+ module RedshiftConnector
4
+ class AbstractDataFile
5
+ def each_row(&block)
6
+ f = if gzipped_object?
7
+ Zlib::GzipReader.new(content)
8
+ else
9
+ content
10
+ end
11
+ @reader_class.new(f).each(&block)
12
+ ensure
13
+ content.close
14
+ end
15
+
16
+ def data_object?
17
+ @reader_class.data_object?(key)
18
+ end
19
+
20
+ def gzipped_object?
21
+ File.extname(key) == '.gz'
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ module RedshiftConnector
2
+ class AbstractDataFileBundle
3
+ def each_row(&block)
4
+ each_object do |obj|
5
+ obj.each_row(&block)
6
+ end
7
+ end
8
+
9
+ alias each each_row
10
+
11
+ def each_object(&block)
12
+ all_data_objects.each do |obj|
13
+ @logger.info "processing s3 object: #{obj.key}"
14
+ yield obj
15
+ end
16
+ end
17
+
18
+ def all_data_objects
19
+ data_files.select {|obj| obj.data_object? }
20
+ end
21
+ end
22
+ end
@@ -8,8 +8,8 @@ module RedshiftConnector
8
8
  class Reader::CSV < Reader::Abstract
9
9
  declare_reader :csv
10
10
 
11
- def self.data_object?(obj)
12
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
11
+ def self.data_object?(key)
12
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
13
13
  end
14
14
 
15
15
  def initialize(f)
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::RedshiftCSV < Reader::Abstract
8
8
  declare_reader :redshift_csv
9
9
 
10
- def self.data_object?(obj)
11
- /\.csv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.csv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  # f :: IO
@@ -7,8 +7,8 @@ module RedshiftConnector
7
7
  class Reader::TSV < Reader::Abstract
8
8
  declare_reader :tsv
9
9
 
10
- def self.data_object?(obj)
11
- /\.tsv(?:\.|\z)/ =~ File.basename(obj.key)
10
+ def self.data_object?(key)
11
+ /\.tsv(?:\.|\z)/ =~ File.basename(key)
12
12
  end
13
13
 
14
14
  def initialize(f)
@@ -1,7 +1,7 @@
1
- require 'zlib'
1
+ require 'redshift-connector/abstract_data_file'
2
2
 
3
3
  module RedshiftConnector
4
- class S3DataFile
4
+ class S3DataFile < AbstractDataFile
5
5
  def initialize(object, reader_class:)
6
6
  @object = object
7
7
  @reader_class = reader_class
@@ -11,24 +11,10 @@ module RedshiftConnector
11
11
  @object.key
12
12
  end
13
13
 
14
- def each_row(&block)
15
- response = @object.get
16
- f = if gzipped_object?
17
- Zlib::GzipReader.new(response.body)
18
- else
19
- response.body
20
- end
21
- @reader_class.new(f).each(&block)
22
- ensure
23
- response.body.close if response
14
+ def content
15
+ @object.get.body
24
16
  end
25
17
 
26
- def data_object?
27
- @reader_class.data_object?(@object)
28
- end
29
-
30
- def gzipped_object?
31
- File.extname(@object.key) == '.gz'
32
- end
18
+ delegate :presigned_url, to: :@object
33
19
  end
34
20
  end
@@ -2,10 +2,11 @@ require 'redshift-connector/s3_bucket'
2
2
  require 'redshift-connector/s3_data_file'
3
3
  require 'redshift-connector/reader'
4
4
  require 'redshift-connector/logger'
5
+ require 'redshift-connector/abstract_data_file_bundle'
5
6
  require 'aws-sdk'
6
7
 
7
8
  module RedshiftConnector
8
- class S3DataFileBundle
9
+ class S3DataFileBundle < AbstractDataFileBundle
9
10
  def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
10
11
  real_prefix = "#{bucket.prefix}/#{prefix}"
11
12
  new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
@@ -68,25 +69,9 @@ module RedshiftConnector
68
69
  end
69
70
  private :do_each_batch
70
71
 
71
- def each_row(&block)
72
- each_object do |obj|
73
- obj.each_row(&block)
74
- end
75
- end
76
-
77
- alias each each_row
78
-
79
- def each_object(&block)
80
- all_data_objects.each do |obj|
81
- @logger.info "processing s3 object: #{obj.key}"
82
- yield obj
83
- end
84
- end
85
-
86
- def all_data_objects
72
+ def data_files
87
73
  @bucket.objects(prefix: @prefix)
88
74
  .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
89
- .select {|obj| obj.data_object? }
90
75
  end
91
76
 
92
77
  def clear
@@ -0,0 +1,23 @@
1
+ require 'redshift-connector/abstract_data_file'
2
+ require 'uri'
3
+ require 'zlib'
4
+ require 'open3'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFile < AbstractDataFile
8
+ def initialize(url, reader_class:)
9
+ @url = url
10
+ @reader_class = reader_class
11
+ end
12
+
13
+ def key
14
+ URI.parse(@url).path
15
+ end
16
+
17
+ def content
18
+ stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
19
+ stdin.close
20
+ stdout
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ require 'redshift-connector/reader'
2
+ require 'redshift-connector/logger'
3
+ require 'redshift-connector/abstract_data_file_bundle'
4
+ require 'redshift-connector/url_data_file'
5
+
6
+ module RedshiftConnector
7
+ class UrlDataFileBundle < AbstractDataFileBundle
8
+ def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
+ @data_file_urls = data_file_urls
10
+ @filter = filter || lambda {|*row| row }
11
+ @logger = logger
12
+ @reader_class = Reader.get(format)
13
+ end
14
+
15
+ def data_files
16
+ @data_file_urls.map do |url|
17
+ UrlDataFile.new(url, reader_class: @reader_class)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module RedshiftConnector
2
- VERSION = '5.3.1'
2
+ VERSION = '5.3.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.3.1
4
+ version: 5.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-27 00:00:00.000000000 Z
11
+ date: 2017-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -130,6 +130,8 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - README.md
132
132
  - lib/redshift-connector.rb
133
+ - lib/redshift-connector/abstract_data_file.rb
134
+ - lib/redshift-connector/abstract_data_file_bundle.rb
133
135
  - lib/redshift-connector/connector.rb
134
136
  - lib/redshift-connector/exporter.rb
135
137
  - lib/redshift-connector/importer.rb
@@ -149,6 +151,8 @@ files:
149
151
  - lib/redshift-connector/s3_bucket.rb
150
152
  - lib/redshift-connector/s3_data_file.rb
151
153
  - lib/redshift-connector/s3_data_file_bundle.rb
154
+ - lib/redshift-connector/url_data_file.rb
155
+ - lib/redshift-connector/url_data_file_bundle.rb
152
156
  - lib/redshift-connector/version.rb
153
157
  - test/all.rb
154
158
  - test/config.rb