redshift-connector-data_file 7.0.0 → 7.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
4
- data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
3
+ metadata.gz: 5f242fc490379db815a588378910c758a3aa2d84
4
+ data.tar.gz: 2a315d9407eb66562b32d83993ba577a5b5063ac
5
5
  SHA512:
6
- metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
7
- data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
6
+ metadata.gz: 141660793595766a703b5bd798ee4a61c8f9dab6db02ea3084d7b9f5eb92305bf2ad7c6d3164a55eb9dfa5797593675ef161d4789509d1b35928bda840825bc4
7
+ data.tar.gz: dcfebdb474cd2f78be55f47d4afb2e2488bd3e43f72f88fc3806bcff80ab24619eb0afc4b054845d3f57d4961897b753b32b62aca0c9b99a1d883c3b4387cc08
@@ -2,17 +2,24 @@ require 'zlib'
2
2
 
3
3
  module RedshiftConnector
4
4
  class AbstractDataFile
5
+ def initialize(reader_class:)
6
+ @reader_class = reader_class
7
+ end
8
+
5
9
  def each_row(&block)
6
- f = if gzipped_object?
7
- Zlib::GzipReader.new(content)
8
- else
9
- content
10
- end
11
- @reader_class.new(f).each(&block)
12
- ensure
13
- content.close
10
+ f = open
11
+ begin
12
+ if gzipped_object?
13
+ f = Zlib::GzipReader.new(f)
14
+ end
15
+ @reader_class.new(f).each(&block)
16
+ ensure
17
+ f.close
18
+ end
14
19
  end
15
20
 
21
+ # abstract open
22
+
16
23
  def data_object?
17
24
  @reader_class.data_object?(key)
18
25
  end
@@ -1,5 +1,14 @@
1
1
  module RedshiftConnector
2
2
  class AbstractDataFileBundle
3
+ def initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
4
+ @filter = filter || lambda {|*row| row }
5
+ @batch_size = batch_size || 1000
6
+ @logger = logger
7
+ end
8
+
9
+ attr_reader :batch_size
10
+ attr_reader :logger
11
+
3
12
  def each_row(&block)
4
13
  each_object do |obj|
5
14
  obj.each_row(&block)
@@ -19,10 +28,11 @@ module RedshiftConnector
19
28
  data_files.select {|obj| obj.data_object? }
20
29
  end
21
30
 
31
+ # abstract data_files
32
+
22
33
  REPORT_SIZE = 10_0000
23
34
 
24
35
  def each_batch(report: true)
25
- @logger.info "reader: #{@reader_class}"
26
36
  n = 0
27
37
  reported = 0
28
38
  do_each_batch(@batch_size) do |rows|
@@ -1,5 +1,6 @@
1
1
  require 'redshift_connector/reader/abstract'
2
2
  require 'redshift_connector/reader/exception'
3
+ require 'strscan'
3
4
 
4
5
  module RedshiftConnector
5
6
  # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
@@ -29,7 +30,7 @@ module RedshiftConnector
29
30
  s = StringScanner.new(line)
30
31
  s.skip(/\s+/)
31
32
  until s.eos?
32
- col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
33
+ col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
33
34
  row.push unescape_column(col)
34
35
  s.skip(/\s*/) # skip line terminator on line ends
35
36
  s.skip(/,\s*/)
@@ -3,15 +3,15 @@ require 'redshift_connector/data_file'
3
3
  module RedshiftConnector
4
4
  class S3DataFile < AbstractDataFile
5
5
  def initialize(object, reader_class:)
6
+ super reader_class: reader_class
6
7
  @object = object
7
- @reader_class = reader_class
8
8
  end
9
9
 
10
10
  def key
11
11
  @object.key
12
12
  end
13
13
 
14
- def content
14
+ def open
15
15
  @object.get.body
16
16
  end
17
17
 
@@ -32,13 +32,12 @@ module RedshiftConnector
32
32
  end
33
33
 
34
34
  def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
35
+ super filter: filter, batch_size: batch_size, logger: logger
35
36
  @bucket = bucket
36
37
  @prefix = prefix
37
38
  @format = format
38
- @filter = filter || lambda {|*row| row }
39
- @batch_size = batch_size
40
- @logger = logger
41
39
  @reader_class = Reader.get(format)
40
+ logger.info "reader: #{@reader_class}"
42
41
  end
43
42
 
44
43
  attr_reader :bucket
@@ -61,7 +60,7 @@ module RedshiftConnector
61
60
  pref = File.dirname(@prefix) + '/'
62
61
  keys = @bucket.objects(prefix: pref).map(&:key)
63
62
  unless keys.empty?
64
- @logger.info "DELETE #{pref}*"
63
+ logger.info "DELETE #{pref}*"
65
64
  @bucket.delete_objects(keys)
66
65
  end
67
66
  end
@@ -1,22 +1,23 @@
1
1
  require 'redshift_connector/abstract_data_file'
2
- require 'uri'
3
- require 'zlib'
4
2
  require 'open3'
5
3
 
6
4
  module RedshiftConnector
7
5
  class UrlDataFile < AbstractDataFile
8
6
  def initialize(url, reader_class:)
7
+ super reader_class: reader_class
9
8
  @url = url
10
- @reader_class = reader_class
11
9
  end
12
10
 
11
+ attr_reader :url
12
+
13
13
  def key
14
- URI.parse(@url).path
14
+ @url.path
15
15
  end
16
16
 
17
- def content
18
- stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
17
+ def open
18
+ stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url.to_s)
19
19
  stdin.close
20
+ stderr.close
20
21
  stdout
21
22
  end
22
23
  end
@@ -2,21 +2,18 @@ require 'redshift_connector/reader'
2
2
  require 'redshift_connector/abstract_data_file_bundle'
3
3
  require 'redshift_connector/url_data_file'
4
4
  require 'redshift_connector/logger'
5
+ require 'uri'
5
6
 
6
7
  module RedshiftConnector
7
8
  class UrlDataFileBundle < AbstractDataFileBundle
8
- def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
- raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
10
- @data_file_urls = data_file_urls
11
- @filter = filter || lambda {|*row| row }
12
- @logger = logger
13
- @reader_class = Reader.get(format)
9
+ def initialize(urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
10
+ super filter: filter, batch_size: 1000, logger: logger
11
+ reader_class = Reader.get(format)
12
+ raise ArgumentError, 'no URL given' if urls.empty?
13
+ @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url), reader_class: reader_class) }
14
+ logger.info "reader: #{reader_class}"
14
15
  end
15
16
 
16
- def data_files
17
- @data_file_urls.map do |url|
18
- UrlDataFile.new(url, reader_class: @reader_class)
19
- end
20
- end
17
+ attr_reader :data_files
21
18
  end
22
19
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "redshift-connector-data_file"
3
- spec.version = "7.0.0"
3
+ spec.version = "7.1.0"
4
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
5
5
  spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
6
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.0.0
4
+ version: 7.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-06-19 00:00:00.000000000 Z
12
+ date: 2017-09-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  version: '0'
133
133
  requirements: []
134
134
  rubyforge_project:
135
- rubygems_version: 2.6.8
135
+ rubygems_version: 2.6.11
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Utility classes for exported data files from Redshift