redshift-connector-data_file 7.0.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
4
- data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
3
+ metadata.gz: 5f242fc490379db815a588378910c758a3aa2d84
4
+ data.tar.gz: 2a315d9407eb66562b32d83993ba577a5b5063ac
5
5
  SHA512:
6
- metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
7
- data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
6
+ metadata.gz: 141660793595766a703b5bd798ee4a61c8f9dab6db02ea3084d7b9f5eb92305bf2ad7c6d3164a55eb9dfa5797593675ef161d4789509d1b35928bda840825bc4
7
+ data.tar.gz: dcfebdb474cd2f78be55f47d4afb2e2488bd3e43f72f88fc3806bcff80ab24619eb0afc4b054845d3f57d4961897b753b32b62aca0c9b99a1d883c3b4387cc08
@@ -2,17 +2,24 @@ require 'zlib'
2
2
 
3
3
  module RedshiftConnector
4
4
  class AbstractDataFile
5
+ def initialize(reader_class:)
6
+ @reader_class = reader_class
7
+ end
8
+
5
9
  def each_row(&block)
6
- f = if gzipped_object?
7
- Zlib::GzipReader.new(content)
8
- else
9
- content
10
- end
11
- @reader_class.new(f).each(&block)
12
- ensure
13
- content.close
10
+ f = open
11
+ begin
12
+ if gzipped_object?
13
+ f = Zlib::GzipReader.new(f)
14
+ end
15
+ @reader_class.new(f).each(&block)
16
+ ensure
17
+ f.close
18
+ end
14
19
  end
15
20
 
21
+ # abstract open
22
+
16
23
  def data_object?
17
24
  @reader_class.data_object?(key)
18
25
  end
@@ -1,5 +1,14 @@
1
1
  module RedshiftConnector
2
2
  class AbstractDataFileBundle
3
+ def initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
4
+ @filter = filter || lambda {|*row| row }
5
+ @batch_size = batch_size || 1000
6
+ @logger = logger
7
+ end
8
+
9
+ attr_reader :batch_size
10
+ attr_reader :logger
11
+
3
12
  def each_row(&block)
4
13
  each_object do |obj|
5
14
  obj.each_row(&block)
@@ -19,10 +28,11 @@ module RedshiftConnector
19
28
  data_files.select {|obj| obj.data_object? }
20
29
  end
21
30
 
31
+ # abstract data_files
32
+
22
33
  REPORT_SIZE = 10_0000
23
34
 
24
35
  def each_batch(report: true)
25
- @logger.info "reader: #{@reader_class}"
26
36
  n = 0
27
37
  reported = 0
28
38
  do_each_batch(@batch_size) do |rows|
@@ -1,5 +1,6 @@
1
1
  require 'redshift_connector/reader/abstract'
2
2
  require 'redshift_connector/reader/exception'
3
+ require 'strscan'
3
4
 
4
5
  module RedshiftConnector
5
6
  # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
@@ -29,7 +30,7 @@ module RedshiftConnector
29
30
  s = StringScanner.new(line)
30
31
  s.skip(/\s+/)
31
32
  until s.eos?
32
- col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
33
+ col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
33
34
  row.push unescape_column(col)
34
35
  s.skip(/\s*/) # skip line terminator on line ends
35
36
  s.skip(/,\s*/)
@@ -3,15 +3,15 @@ require 'redshift_connector/data_file'
3
3
  module RedshiftConnector
4
4
  class S3DataFile < AbstractDataFile
5
5
  def initialize(object, reader_class:)
6
+ super reader_class: reader_class
6
7
  @object = object
7
- @reader_class = reader_class
8
8
  end
9
9
 
10
10
  def key
11
11
  @object.key
12
12
  end
13
13
 
14
- def content
14
+ def open
15
15
  @object.get.body
16
16
  end
17
17
 
@@ -32,13 +32,12 @@ module RedshiftConnector
32
32
  end
33
33
 
34
34
  def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
35
+ super filter: filter, batch_size: batch_size, logger: logger
35
36
  @bucket = bucket
36
37
  @prefix = prefix
37
38
  @format = format
38
- @filter = filter || lambda {|*row| row }
39
- @batch_size = batch_size
40
- @logger = logger
41
39
  @reader_class = Reader.get(format)
40
+ logger.info "reader: #{@reader_class}"
42
41
  end
43
42
 
44
43
  attr_reader :bucket
@@ -61,7 +60,7 @@ module RedshiftConnector
61
60
  pref = File.dirname(@prefix) + '/'
62
61
  keys = @bucket.objects(prefix: pref).map(&:key)
63
62
  unless keys.empty?
64
- @logger.info "DELETE #{pref}*"
63
+ logger.info "DELETE #{pref}*"
65
64
  @bucket.delete_objects(keys)
66
65
  end
67
66
  end
@@ -1,22 +1,23 @@
1
1
  require 'redshift_connector/abstract_data_file'
2
- require 'uri'
3
- require 'zlib'
4
2
  require 'open3'
5
3
 
6
4
  module RedshiftConnector
7
5
  class UrlDataFile < AbstractDataFile
8
6
  def initialize(url, reader_class:)
7
+ super reader_class: reader_class
9
8
  @url = url
10
- @reader_class = reader_class
11
9
  end
12
10
 
11
+ attr_reader :url
12
+
13
13
  def key
14
- URI.parse(@url).path
14
+ @url.path
15
15
  end
16
16
 
17
- def content
18
- stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url)
17
+ def open
18
+ stdin, stdout, stderr, wait_th = Open3.popen3('curl', @url.to_s)
19
19
  stdin.close
20
+ stderr.close
20
21
  stdout
21
22
  end
22
23
  end
@@ -2,21 +2,18 @@ require 'redshift_connector/reader'
2
2
  require 'redshift_connector/abstract_data_file_bundle'
3
3
  require 'redshift_connector/url_data_file'
4
4
  require 'redshift_connector/logger'
5
+ require 'uri'
5
6
 
6
7
  module RedshiftConnector
7
8
  class UrlDataFileBundle < AbstractDataFileBundle
8
- def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
- raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
10
- @data_file_urls = data_file_urls
11
- @filter = filter || lambda {|*row| row }
12
- @logger = logger
13
- @reader_class = Reader.get(format)
9
+ def initialize(urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
10
+ super filter: filter, batch_size: 1000, logger: logger
11
+ reader_class = Reader.get(format)
12
+ raise ArgumentError, 'no URL given' if urls.empty?
13
+ @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url), reader_class: reader_class) }
14
+ logger.info "reader: #{reader_class}"
14
15
  end
15
16
 
16
- def data_files
17
- @data_file_urls.map do |url|
18
- UrlDataFile.new(url, reader_class: @reader_class)
19
- end
20
- end
17
+ attr_reader :data_files
21
18
  end
22
19
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "redshift-connector-data_file"
3
- spec.version = "7.0.0"
3
+ spec.version = "7.1.0"
4
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
5
5
  spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
6
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.0.0
4
+ version: 7.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-06-19 00:00:00.000000000 Z
12
+ date: 2017-09-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  version: '0'
133
133
  requirements: []
134
134
  rubyforge_project:
135
- rubygems_version: 2.6.8
135
+ rubygems_version: 2.6.11
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Utility classes for exported data files from Redshift