redshift-connector-data_file 1.2.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9828598e441b98d6324395601b2da8f1bdaa53c3
4
- data.tar.gz: 8ccfb87c8d318f2eabdb75b9811f2e8bd46fb1be
3
+ metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
4
+ data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
5
5
  SHA512:
6
- metadata.gz: 1b7141b92a430bb101551bc8d8c91b00f04df2bb5d4b35375c5ed2406265eed541241a0b528fa31ef10752df1bdb38fb74c92d3b450a71b4e34185d1c8edbfde
7
- data.tar.gz: 8d94161588f6fc3d6162fd995dae1700187070f5a4ab516fe6837dc87aa05045dcc1e6b978620714e1062d1a3cb52b7eab10e9596fc10e4c01910b2d637c6d30
6
+ metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
7
+ data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A gem to handle data files exported from Redshift
4
4
 
5
- [redshift-connector](https://github.com/aamine/redshift-coonector) uses this internally.
5
+ [redshift-connector](https://github.com/bricolages/redshift-connector) uses this gem internally.
6
6
 
7
7
  ## Installation
8
8
 
@@ -22,4 +22,4 @@ Or install it yourself as:
22
22
 
23
23
  ## Contributing
24
24
 
25
- Bug reports and pull requests are welcome on GitHub at https://github.com/koba789/redshift-connector-data_file.
25
+ Bug reports and pull requests are welcome on GitHub at https://github.com/bricolages/redshift-connector-data_file.
@@ -0,0 +1 @@
1
+ require 'redshift_connector/data_file'
@@ -0,0 +1,7 @@
1
+ # ensure to create module
2
+ module RedshiftConnector
3
+ end
4
+
5
+ require "redshift_connector/reader"
6
+ require "redshift_connector/url_data_file_bundle"
7
+ require "redshift_connector/logger"
@@ -0,0 +1,20 @@
1
+ module RedshiftConnector
2
+ @logger = nil
3
+
4
+ def RedshiftConnector.logger
5
+ # Defer to access Rails
6
+ @logger || Rails.logger
7
+ end
8
+
9
+ def RedshiftConnector.logger=(logger)
10
+ @logger = logger
11
+ end
12
+
13
+ class NullLogger
14
+ def noop(*args) end
15
+ alias error noop
16
+ alias warn noop
17
+ alias info noop
18
+ alias debug noop
19
+ end
20
+ end
@@ -0,0 +1,18 @@
1
+ # create module
2
+ module RedshiftConnector
3
+ module Reader
4
+ end
5
+ end
6
+
7
+ require 'redshift_connector/reader/redshift_csv'
8
+ require 'redshift_connector/reader/csv'
9
+ require 'redshift_connector/reader/tsv'
10
+ require 'redshift_connector/reader/exception'
11
+
12
+ module RedshiftConnector
13
+ module Reader
14
+ def Reader.get(id)
15
+ Abstract.get_reader_class(id)
16
+ end
17
+ end
18
+ end
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
  require 'csv'
4
4
 
5
5
  module RedshiftConnector
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
 
4
4
  module RedshiftConnector
5
5
  # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
  require 'csv'
4
4
 
5
5
  module RedshiftConnector
@@ -0,0 +1,76 @@
1
+ require 'aws-sdk'
2
+
3
+ module RedshiftConnector
4
+ class S3Bucket
5
+ @buckets = {}
6
+ @default = nil
7
+
8
+ def S3Bucket.add(name, default: false, **params)
9
+ instance = new(**params)
10
+ @buckets[name.to_s] = instance
11
+ if !@default or default
12
+ @default = instance
13
+ end
14
+ end
15
+
16
+ def S3Bucket.default
17
+ @default or raise ArgumentError, "no default S3 bucket configured"
18
+ end
19
+
20
+ def S3Bucket.get(name)
21
+ @buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
22
+ end
23
+
24
+ def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
25
+ @region = region
26
+ @name = bucket
27
+ @prefix = prefix
28
+ @access_key_id = access_key_id
29
+ @secret_access_key = secret_access_key
30
+ @iam_role = iam_role
31
+ end
32
+
33
+ attr_reader :name
34
+ attr_reader :prefix
35
+
36
+ def url
37
+ "s3://#{@bucket.name}/#{@prefix}/"
38
+ end
39
+
40
+ def client
41
+ @client ||= begin
42
+ args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
43
+ Aws::S3::Client.new(**args)
44
+ end
45
+ end
46
+
47
+ def bucket
48
+ @bucket ||= begin
49
+ resource = Aws::S3::Resource.new(client: client)
50
+ resource.bucket(@name)
51
+ end
52
+ end
53
+
54
+ def object(key)
55
+ bucket.object(key)
56
+ end
57
+
58
+ def objects(prefix:)
59
+ bucket.objects(prefix: prefix)
60
+ end
61
+
62
+ def delete_objects(keys)
63
+ bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
64
+ end
65
+
66
+ def credential_string
67
+ if @iam_role
68
+ "aws_iam_role=#{@iam_role}"
69
+ elsif @access_key_id
70
+ "aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
71
+ else
72
+ raise ArgumentError, "no credential given for Redshift S3 access"
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,20 @@
1
+ require 'redshift_connector/data_file'
2
+
3
+ module RedshiftConnector
4
+ class S3DataFile < AbstractDataFile
5
+ def initialize(object, reader_class:)
6
+ @object = object
7
+ @reader_class = reader_class
8
+ end
9
+
10
+ def key
11
+ @object.key
12
+ end
13
+
14
+ def content
15
+ @object.get.body
16
+ end
17
+
18
+ delegate :presigned_url, to: :@object
19
+ end
20
+ end
@@ -0,0 +1,69 @@
1
+ require 'redshift_connector/abstract_data_file_bundle'
2
+ require 'redshift_connector/s3_bucket'
3
+ require 'redshift_connector/s3_data_file'
4
+ require 'redshift_connector/logger'
5
+ require 'aws-sdk'
6
+
7
+ module RedshiftConnector
8
+ class S3DataFileBundle < AbstractDataFileBundle
9
+ def self.for_params(params)
10
+ unless params.txn_id
11
+ raise ArgumentError, "cannot create bundle: missing txn_id"
12
+ end
13
+ s3bucket = params.bucket ? S3Bucket.get(params.bucket) : S3Bucket.default
14
+ for_table(
15
+ bucket: s3bucket,
16
+ schema: params.schema,
17
+ table: params.table,
18
+ txn_id: params.txn_id,
19
+ filter: params.filter,
20
+ logger: params.logger
21
+ )
22
+ end
23
+
24
+ def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
25
+ real_prefix = "#{bucket.prefix}/#{prefix}"
26
+ new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
27
+ end
28
+
29
+ def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
30
+ prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
31
+ new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
32
+ end
33
+
34
+ def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
35
+ @bucket = bucket
36
+ @prefix = prefix
37
+ @format = format
38
+ @filter = filter || lambda {|*row| row }
39
+ @batch_size = batch_size
40
+ @logger = logger
41
+ @reader_class = Reader.get(format)
42
+ end
43
+
44
+ attr_reader :bucket
45
+ attr_reader :prefix
46
+
47
+ def url
48
+ "s3://#{@bucket.name}/#{@prefix}"
49
+ end
50
+
51
+ def credential_string
52
+ @bucket.credential_string
53
+ end
54
+
55
+ def data_files
56
+ @bucket.objects(prefix: @prefix)
57
+ .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
58
+ end
59
+
60
+ def clear
61
+ pref = File.dirname(@prefix) + '/'
62
+ keys = @bucket.objects(prefix: pref).map(&:key)
63
+ unless keys.empty?
64
+ @logger.info "DELETE #{pref}*"
65
+ @bucket.delete_objects(keys)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -1,4 +1,4 @@
1
- require 'redshift-connector/data_file/abstract_data_file'
1
+ require 'redshift_connector/abstract_data_file'
2
2
  require 'uri'
3
3
  require 'zlib'
4
4
  require 'open3'
@@ -1,11 +1,11 @@
1
- require 'redshift-connector/data_file/reader'
2
- require 'redshift-connector/data_file/logger'
3
- require 'redshift-connector/data_file/abstract_data_file_bundle'
4
- require 'redshift-connector/data_file/url_data_file'
1
+ require 'redshift_connector/reader'
2
+ require 'redshift_connector/abstract_data_file_bundle'
3
+ require 'redshift_connector/url_data_file'
4
+ require 'redshift_connector/logger'
5
5
 
6
6
  module RedshiftConnector
7
7
  class UrlDataFileBundle < AbstractDataFileBundle
8
- def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: DataFile.logger)
8
+ def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
9
  raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
10
10
  @data_file_urls = data_file_urls
11
11
  @filter = filter || lambda {|*row| row }
@@ -1,24 +1,23 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'redshift-connector/data_file/version'
5
-
6
1
  Gem::Specification.new do |spec|
7
2
  spec.name = "redshift-connector-data_file"
8
- spec.version = RedshiftConnector::DataFile::VERSION
3
+ spec.version = "7.0.0"
9
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
10
- spec.email = ["kobahide789@gmail.com"]
5
+ spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
11
6
 
12
7
  spec.summary = "Utility classes for exported data files from Redshift"
13
- spec.homepage = "https://github.com/koba789/redshift-connector-data_file"
8
+ spec.homepage = "https://github.com/bricolages/redshift-connector-data_file"
14
9
 
15
10
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
11
  f.match(%r{^(test|spec|features)/})
17
12
  end
18
13
  spec.bindir = "exe"
19
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
14
+ spec.executables = spec.files.grep(%r{\Aexe/}) {|f| File.basename(f) }
20
15
  spec.require_paths = ["lib"]
21
16
 
17
+ spec.required_ruby_version = '>= 2.1.0'
18
+ spec.add_dependency 'aws-sdk', '~> 2.0'
22
19
  spec.add_development_dependency "bundler", "~> 1.14"
23
- spec.add_development_dependency "rake", "~> 10.0"
20
+ spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "test-unit"
22
+ spec.add_development_dependency "pry"
24
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,8 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-06-02 00:00:00.000000000 Z
12
+ date: 2017-06-19 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: aws-sdk
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '2.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '2.0'
14
28
  - !ruby/object:Gem::Dependency
15
29
  name: bundler
16
30
  requirement: !ruby/object:Gem::Requirement
@@ -29,19 +43,48 @@ dependencies:
29
43
  name: rake
30
44
  requirement: !ruby/object:Gem::Requirement
31
45
  requirements:
32
- - - "~>"
46
+ - - ">="
33
47
  - !ruby/object:Gem::Version
34
- version: '10.0'
48
+ version: '0'
35
49
  type: :development
36
50
  prerelease: false
37
51
  version_requirements: !ruby/object:Gem::Requirement
38
52
  requirements:
39
- - - "~>"
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: test-unit
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
40
61
  - !ruby/object:Gem::Version
41
- version: '10.0'
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: pry
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
42
84
  description:
43
85
  email:
44
86
  - kobahide789@gmail.com
87
+ - aamine@loveruby.net
45
88
  executables: []
46
89
  extensions: []
47
90
  extra_rdoc_files: []
@@ -53,21 +96,24 @@ files:
53
96
  - Rakefile
54
97
  - bin/console
55
98
  - bin/setup
56
- - lib/redshift-connector/data_file.rb
57
- - lib/redshift-connector/data_file/abstract_data_file.rb
58
- - lib/redshift-connector/data_file/abstract_data_file_bundle.rb
59
- - lib/redshift-connector/data_file/logger.rb
60
- - lib/redshift-connector/data_file/reader.rb
61
- - lib/redshift-connector/data_file/reader/abstract.rb
62
- - lib/redshift-connector/data_file/reader/csv.rb
63
- - lib/redshift-connector/data_file/reader/exception.rb
64
- - lib/redshift-connector/data_file/reader/redshift_csv.rb
65
- - lib/redshift-connector/data_file/reader/tsv.rb
66
- - lib/redshift-connector/data_file/url_data_file.rb
67
- - lib/redshift-connector/data_file/url_data_file_bundle.rb
68
- - lib/redshift-connector/data_file/version.rb
99
+ - lib/redshift-connector-data_file.rb
100
+ - lib/redshift_connector/abstract_data_file.rb
101
+ - lib/redshift_connector/abstract_data_file_bundle.rb
102
+ - lib/redshift_connector/data_file.rb
103
+ - lib/redshift_connector/logger.rb
104
+ - lib/redshift_connector/reader.rb
105
+ - lib/redshift_connector/reader/abstract.rb
106
+ - lib/redshift_connector/reader/csv.rb
107
+ - lib/redshift_connector/reader/exception.rb
108
+ - lib/redshift_connector/reader/redshift_csv.rb
109
+ - lib/redshift_connector/reader/tsv.rb
110
+ - lib/redshift_connector/s3_bucket.rb
111
+ - lib/redshift_connector/s3_data_file.rb
112
+ - lib/redshift_connector/s3_data_file_bundle.rb
113
+ - lib/redshift_connector/url_data_file.rb
114
+ - lib/redshift_connector/url_data_file_bundle.rb
69
115
  - redshift-connector-data_file.gemspec
70
- homepage: https://github.com/koba789/redshift-connector-data_file
116
+ homepage: https://github.com/bricolages/redshift-connector-data_file
71
117
  licenses: []
72
118
  metadata: {}
73
119
  post_install_message:
@@ -78,7 +124,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
124
  requirements:
79
125
  - - ">="
80
126
  - !ruby/object:Gem::Version
81
- version: '0'
127
+ version: 2.1.0
82
128
  required_rubygems_version: !ruby/object:Gem::Requirement
83
129
  requirements:
84
130
  - - ">="
@@ -86,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
132
  version: '0'
87
133
  requirements: []
88
134
  rubyforge_project:
89
- rubygems_version: 2.6.11
135
+ rubygems_version: 2.6.8
90
136
  signing_key:
91
137
  specification_version: 4
92
138
  summary: Utility classes for exported data files from Redshift
@@ -1,9 +0,0 @@
1
- require "redshift-connector/data_file/version"
2
- require "redshift-connector/data_file/logger"
3
- require "redshift-connector/data_file/reader"
4
- require "redshift-connector/data_file/url_data_file_bundle"
5
-
6
- module RedshiftConnector
7
- module DataFile
8
- end
9
- end
@@ -1,14 +0,0 @@
1
- module RedshiftConnector
2
- module DataFile
3
- @logger = nil
4
-
5
- def self.logger
6
- # REVIEW: Reverse dependency
7
- @logger || RedshiftConnector&.logger || Rails.logger
8
- end
9
-
10
- def self.logger=(logger)
11
- @logger = logger
12
- end
13
- end
14
- end
@@ -1,18 +0,0 @@
1
- # create module
2
- module RedshiftConnector
3
- module Reader
4
- end
5
- end
6
-
7
- require 'redshift-connector/data_file/reader/redshift_csv'
8
- require 'redshift-connector/data_file/reader/csv'
9
- require 'redshift-connector/data_file/reader/tsv'
10
- require 'redshift-connector/data_file/reader/exception'
11
-
12
- module RedshiftConnector
13
- module Reader
14
- def Reader.get(id)
15
- Abstract.get_reader_class(id)
16
- end
17
- end
18
- end
@@ -1,5 +0,0 @@
1
- module RedshiftConnector
2
- module DataFile
3
- VERSION = "1.2.0"
4
- end
5
- end