redshift-connector-data_file 1.2.0 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9828598e441b98d6324395601b2da8f1bdaa53c3
4
- data.tar.gz: 8ccfb87c8d318f2eabdb75b9811f2e8bd46fb1be
3
+ metadata.gz: 5a91f1bb3c4764f126a7cd5db3c97b23626a8f54
4
+ data.tar.gz: 4d604f99c42e55ed8343a6240139df3ce723e63d
5
5
  SHA512:
6
- metadata.gz: 1b7141b92a430bb101551bc8d8c91b00f04df2bb5d4b35375c5ed2406265eed541241a0b528fa31ef10752df1bdb38fb74c92d3b450a71b4e34185d1c8edbfde
7
- data.tar.gz: 8d94161588f6fc3d6162fd995dae1700187070f5a4ab516fe6837dc87aa05045dcc1e6b978620714e1062d1a3cb52b7eab10e9596fc10e4c01910b2d637c6d30
6
+ metadata.gz: a64f93698aeee74754b6c0b4452b8bd9d06031235e0737a57f17752a0aba36659322a9e6a467e5c3f359f82fcd1e6f8ae1fef128a30c080370e08898d89216e7
7
+ data.tar.gz: 00e1d01e5da780e861a8c700ef2c45860bbef1e36e56ced8c19c4afc8c79750d1b50674c61474e8826904030ef6f13e4ec0eff82dca4d7bfa1a65caf18843a68
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A gem to handle data files exported from Redshift
4
4
 
5
- [redshift-connector](https://github.com/aamine/redshift-coonector) uses this internally.
5
+ [redshift-connector](https://github.com/bricolages/redshift-connector) uses this gem internally.
6
6
 
7
7
  ## Installation
8
8
 
@@ -22,4 +22,4 @@ Or install it yourself as:
22
22
 
23
23
  ## Contributing
24
24
 
25
- Bug reports and pull requests are welcome on GitHub at https://github.com/koba789/redshift-connector-data_file.
25
+ Bug reports and pull requests are welcome on GitHub at https://github.com/bricolages/redshift-connector-data_file.
@@ -0,0 +1 @@
1
+ require 'redshift_connector/data_file'
@@ -0,0 +1,7 @@
1
+ # ensure to create module
2
+ module RedshiftConnector
3
+ end
4
+
5
+ require "redshift_connector/reader"
6
+ require "redshift_connector/url_data_file_bundle"
7
+ require "redshift_connector/logger"
@@ -0,0 +1,20 @@
1
+ module RedshiftConnector
2
+ @logger = nil
3
+
4
+ def RedshiftConnector.logger
5
+ # Defer to access Rails
6
+ @logger || Rails.logger
7
+ end
8
+
9
+ def RedshiftConnector.logger=(logger)
10
+ @logger = logger
11
+ end
12
+
13
+ class NullLogger
14
+ def noop(*args) end
15
+ alias error noop
16
+ alias warn noop
17
+ alias info noop
18
+ alias debug noop
19
+ end
20
+ end
@@ -0,0 +1,18 @@
1
+ # create module
2
+ module RedshiftConnector
3
+ module Reader
4
+ end
5
+ end
6
+
7
+ require 'redshift_connector/reader/redshift_csv'
8
+ require 'redshift_connector/reader/csv'
9
+ require 'redshift_connector/reader/tsv'
10
+ require 'redshift_connector/reader/exception'
11
+
12
+ module RedshiftConnector
13
+ module Reader
14
+ def Reader.get(id)
15
+ Abstract.get_reader_class(id)
16
+ end
17
+ end
18
+ end
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
  require 'csv'
4
4
 
5
5
  module RedshiftConnector
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
 
4
4
  module RedshiftConnector
5
5
  # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
@@ -1,5 +1,5 @@
1
- require 'redshift-connector/data_file/reader/abstract'
2
- require 'redshift-connector/data_file/reader/exception'
1
+ require 'redshift_connector/reader/abstract'
2
+ require 'redshift_connector/reader/exception'
3
3
  require 'csv'
4
4
 
5
5
  module RedshiftConnector
@@ -0,0 +1,76 @@
1
+ require 'aws-sdk'
2
+
3
+ module RedshiftConnector
4
+ class S3Bucket
5
+ @buckets = {}
6
+ @default = nil
7
+
8
+ def S3Bucket.add(name, default: false, **params)
9
+ instance = new(**params)
10
+ @buckets[name.to_s] = instance
11
+ if !@default or default
12
+ @default = instance
13
+ end
14
+ end
15
+
16
+ def S3Bucket.default
17
+ @default or raise ArgumentError, "no default S3 bucket configured"
18
+ end
19
+
20
+ def S3Bucket.get(name)
21
+ @buckets[name.to_s] or raise ArgumentError, "no such S3 bucket configured: #{name.inspect}"
22
+ end
23
+
24
+ def initialize(region: nil, bucket:, prefix: nil, access_key_id: nil, secret_access_key: nil, iam_role: nil)
25
+ @region = region
26
+ @name = bucket
27
+ @prefix = prefix
28
+ @access_key_id = access_key_id
29
+ @secret_access_key = secret_access_key
30
+ @iam_role = iam_role
31
+ end
32
+
33
+ attr_reader :name
34
+ attr_reader :prefix
35
+
36
+ def url
37
+ "s3://#{@bucket.name}/#{@prefix}/"
38
+ end
39
+
40
+ def client
41
+ @client ||= begin
42
+ args = { region: @region, access_key_id: @access_key_id, secret_access_key: @secret_access_key }.reject {|k, v| v.nil? }
43
+ Aws::S3::Client.new(**args)
44
+ end
45
+ end
46
+
47
+ def bucket
48
+ @bucket ||= begin
49
+ resource = Aws::S3::Resource.new(client: client)
50
+ resource.bucket(@name)
51
+ end
52
+ end
53
+
54
+ def object(key)
55
+ bucket.object(key)
56
+ end
57
+
58
+ def objects(prefix:)
59
+ bucket.objects(prefix: prefix)
60
+ end
61
+
62
+ def delete_objects(keys)
63
+ bucket.delete_objects(delete: {objects: keys.map {|k| {key: k} }})
64
+ end
65
+
66
+ def credential_string
67
+ if @iam_role
68
+ "aws_iam_role=#{@iam_role}"
69
+ elsif @access_key_id
70
+ "aws_access_key_id=#{@access_key_id};aws_secret_access_key=#{@secret_access_key}"
71
+ else
72
+ raise ArgumentError, "no credential given for Redshift S3 access"
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,20 @@
1
+ require 'redshift_connector/data_file'
2
+
3
+ module RedshiftConnector
4
+ class S3DataFile < AbstractDataFile
5
+ def initialize(object, reader_class:)
6
+ @object = object
7
+ @reader_class = reader_class
8
+ end
9
+
10
+ def key
11
+ @object.key
12
+ end
13
+
14
+ def content
15
+ @object.get.body
16
+ end
17
+
18
+ delegate :presigned_url, to: :@object
19
+ end
20
+ end
@@ -0,0 +1,69 @@
1
+ require 'redshift_connector/abstract_data_file_bundle'
2
+ require 'redshift_connector/s3_bucket'
3
+ require 'redshift_connector/s3_data_file'
4
+ require 'redshift_connector/logger'
5
+ require 'aws-sdk'
6
+
7
+ module RedshiftConnector
8
+ class S3DataFileBundle < AbstractDataFileBundle
9
+ def self.for_params(params)
10
+ unless params.txn_id
11
+ raise ArgumentError, "cannot create bundle: missing txn_id"
12
+ end
13
+ s3bucket = params.bucket ? S3Bucket.get(params.bucket) : S3Bucket.default
14
+ for_table(
15
+ bucket: s3bucket,
16
+ schema: params.schema,
17
+ table: params.table,
18
+ txn_id: params.txn_id,
19
+ filter: params.filter,
20
+ logger: params.logger
21
+ )
22
+ end
23
+
24
+ def self.for_prefix(bucket: S3Bucket.default, prefix:, format:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
25
+ real_prefix = "#{bucket.prefix}/#{prefix}"
26
+ new(bucket, real_prefix, format: format, filter: filter, batch_size: batch_size, logger: logger)
27
+ end
28
+
29
+ def self.for_table(bucket: S3Bucket.default, schema:, table:, txn_id:, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
30
+ prefix = "#{bucket.prefix}/#{schema}_export/#{table}/#{txn_id}/#{table}.csv."
31
+ new(bucket, prefix, format: :redshift_csv, filter: filter, batch_size: batch_size, logger: logger)
32
+ end
33
+
34
+ def initialize(bucket, prefix, format: :csv, filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
35
+ @bucket = bucket
36
+ @prefix = prefix
37
+ @format = format
38
+ @filter = filter || lambda {|*row| row }
39
+ @batch_size = batch_size
40
+ @logger = logger
41
+ @reader_class = Reader.get(format)
42
+ end
43
+
44
+ attr_reader :bucket
45
+ attr_reader :prefix
46
+
47
+ def url
48
+ "s3://#{@bucket.name}/#{@prefix}"
49
+ end
50
+
51
+ def credential_string
52
+ @bucket.credential_string
53
+ end
54
+
55
+ def data_files
56
+ @bucket.objects(prefix: @prefix)
57
+ .map {|obj| S3DataFile.new(obj, reader_class: @reader_class) }
58
+ end
59
+
60
+ def clear
61
+ pref = File.dirname(@prefix) + '/'
62
+ keys = @bucket.objects(prefix: pref).map(&:key)
63
+ unless keys.empty?
64
+ @logger.info "DELETE #{pref}*"
65
+ @bucket.delete_objects(keys)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -1,4 +1,4 @@
1
- require 'redshift-connector/data_file/abstract_data_file'
1
+ require 'redshift_connector/abstract_data_file'
2
2
  require 'uri'
3
3
  require 'zlib'
4
4
  require 'open3'
@@ -1,11 +1,11 @@
1
- require 'redshift-connector/data_file/reader'
2
- require 'redshift-connector/data_file/logger'
3
- require 'redshift-connector/data_file/abstract_data_file_bundle'
4
- require 'redshift-connector/data_file/url_data_file'
1
+ require 'redshift_connector/reader'
2
+ require 'redshift_connector/abstract_data_file_bundle'
3
+ require 'redshift_connector/url_data_file'
4
+ require 'redshift_connector/logger'
5
5
 
6
6
  module RedshiftConnector
7
7
  class UrlDataFileBundle < AbstractDataFileBundle
8
- def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: DataFile.logger)
8
+ def initialize(data_file_urls, format: :redshift_csv, filter: nil, logger: RedshiftConnector.logger)
9
9
  raise ArgumentError, 'data_file_urls is empty' if data_file_urls.empty?
10
10
  @data_file_urls = data_file_urls
11
11
  @filter = filter || lambda {|*row| row }
@@ -1,24 +1,23 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'redshift-connector/data_file/version'
5
-
6
1
  Gem::Specification.new do |spec|
7
2
  spec.name = "redshift-connector-data_file"
8
- spec.version = RedshiftConnector::DataFile::VERSION
3
+ spec.version = "7.0.0"
9
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
10
- spec.email = ["kobahide789@gmail.com"]
5
+ spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
11
6
 
12
7
  spec.summary = "Utility classes for exported data files from Redshift"
13
- spec.homepage = "https://github.com/koba789/redshift-connector-data_file"
8
+ spec.homepage = "https://github.com/bricolages/redshift-connector-data_file"
14
9
 
15
10
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
11
  f.match(%r{^(test|spec|features)/})
17
12
  end
18
13
  spec.bindir = "exe"
19
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
14
+ spec.executables = spec.files.grep(%r{\Aexe/}) {|f| File.basename(f) }
20
15
  spec.require_paths = ["lib"]
21
16
 
17
+ spec.required_ruby_version = '>= 2.1.0'
18
+ spec.add_dependency 'aws-sdk', '~> 2.0'
22
19
  spec.add_development_dependency "bundler", "~> 1.14"
23
- spec.add_development_dependency "rake", "~> 10.0"
20
+ spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "test-unit"
22
+ spec.add_development_dependency "pry"
24
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,8 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-06-02 00:00:00.000000000 Z
12
+ date: 2017-06-19 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: aws-sdk
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '2.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '2.0'
14
28
  - !ruby/object:Gem::Dependency
15
29
  name: bundler
16
30
  requirement: !ruby/object:Gem::Requirement
@@ -29,19 +43,48 @@ dependencies:
29
43
  name: rake
30
44
  requirement: !ruby/object:Gem::Requirement
31
45
  requirements:
32
- - - "~>"
46
+ - - ">="
33
47
  - !ruby/object:Gem::Version
34
- version: '10.0'
48
+ version: '0'
35
49
  type: :development
36
50
  prerelease: false
37
51
  version_requirements: !ruby/object:Gem::Requirement
38
52
  requirements:
39
- - - "~>"
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: test-unit
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
40
61
  - !ruby/object:Gem::Version
41
- version: '10.0'
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: pry
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
42
84
  description:
43
85
  email:
44
86
  - kobahide789@gmail.com
87
+ - aamine@loveruby.net
45
88
  executables: []
46
89
  extensions: []
47
90
  extra_rdoc_files: []
@@ -53,21 +96,24 @@ files:
53
96
  - Rakefile
54
97
  - bin/console
55
98
  - bin/setup
56
- - lib/redshift-connector/data_file.rb
57
- - lib/redshift-connector/data_file/abstract_data_file.rb
58
- - lib/redshift-connector/data_file/abstract_data_file_bundle.rb
59
- - lib/redshift-connector/data_file/logger.rb
60
- - lib/redshift-connector/data_file/reader.rb
61
- - lib/redshift-connector/data_file/reader/abstract.rb
62
- - lib/redshift-connector/data_file/reader/csv.rb
63
- - lib/redshift-connector/data_file/reader/exception.rb
64
- - lib/redshift-connector/data_file/reader/redshift_csv.rb
65
- - lib/redshift-connector/data_file/reader/tsv.rb
66
- - lib/redshift-connector/data_file/url_data_file.rb
67
- - lib/redshift-connector/data_file/url_data_file_bundle.rb
68
- - lib/redshift-connector/data_file/version.rb
99
+ - lib/redshift-connector-data_file.rb
100
+ - lib/redshift_connector/abstract_data_file.rb
101
+ - lib/redshift_connector/abstract_data_file_bundle.rb
102
+ - lib/redshift_connector/data_file.rb
103
+ - lib/redshift_connector/logger.rb
104
+ - lib/redshift_connector/reader.rb
105
+ - lib/redshift_connector/reader/abstract.rb
106
+ - lib/redshift_connector/reader/csv.rb
107
+ - lib/redshift_connector/reader/exception.rb
108
+ - lib/redshift_connector/reader/redshift_csv.rb
109
+ - lib/redshift_connector/reader/tsv.rb
110
+ - lib/redshift_connector/s3_bucket.rb
111
+ - lib/redshift_connector/s3_data_file.rb
112
+ - lib/redshift_connector/s3_data_file_bundle.rb
113
+ - lib/redshift_connector/url_data_file.rb
114
+ - lib/redshift_connector/url_data_file_bundle.rb
69
115
  - redshift-connector-data_file.gemspec
70
- homepage: https://github.com/koba789/redshift-connector-data_file
116
+ homepage: https://github.com/bricolages/redshift-connector-data_file
71
117
  licenses: []
72
118
  metadata: {}
73
119
  post_install_message:
@@ -78,7 +124,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
124
  requirements:
79
125
  - - ">="
80
126
  - !ruby/object:Gem::Version
81
- version: '0'
127
+ version: 2.1.0
82
128
  required_rubygems_version: !ruby/object:Gem::Requirement
83
129
  requirements:
84
130
  - - ">="
@@ -86,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
132
  version: '0'
87
133
  requirements: []
88
134
  rubyforge_project:
89
- rubygems_version: 2.6.11
135
+ rubygems_version: 2.6.8
90
136
  signing_key:
91
137
  specification_version: 4
92
138
  summary: Utility classes for exported data files from Redshift
@@ -1,9 +0,0 @@
1
- require "redshift-connector/data_file/version"
2
- require "redshift-connector/data_file/logger"
3
- require "redshift-connector/data_file/reader"
4
- require "redshift-connector/data_file/url_data_file_bundle"
5
-
6
- module RedshiftConnector
7
- module DataFile
8
- end
9
- end
@@ -1,14 +0,0 @@
1
- module RedshiftConnector
2
- module DataFile
3
- @logger = nil
4
-
5
- def self.logger
6
- # REVIEW: Reverse dependency
7
- @logger || RedshiftConnector&.logger || Rails.logger
8
- end
9
-
10
- def self.logger=(logger)
11
- @logger = logger
12
- end
13
- end
14
- end
@@ -1,18 +0,0 @@
1
- # create module
2
- module RedshiftConnector
3
- module Reader
4
- end
5
- end
6
-
7
- require 'redshift-connector/data_file/reader/redshift_csv'
8
- require 'redshift-connector/data_file/reader/csv'
9
- require 'redshift-connector/data_file/reader/tsv'
10
- require 'redshift-connector/data_file/reader/exception'
11
-
12
- module RedshiftConnector
13
- module Reader
14
- def Reader.get(id)
15
- Abstract.get_reader_class(id)
16
- end
17
- end
18
- end
@@ -1,5 +0,0 @@
1
- module RedshiftConnector
2
- module DataFile
3
- VERSION = "1.2.0"
4
- end
5
- end