queuery_client 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c873a091add34619c21ef63bd324a41205dfdc4f509a9b4574a2a31c2c121e4b
4
- data.tar.gz: 5b184dba90213ba0f022302ae2dc99e59ec491189e4c7f65c5c05b24d3497ec3
3
+ metadata.gz: 2a5b41cb805e7fc0864fd695adc519c6b7212c294383935af40301d88b5b8d63
4
+ data.tar.gz: d4e970371d23e36bf40d4cafdf9e7da0d886c25bb0882ce9e12fc2a971a0e2c0
5
5
  SHA512:
6
- metadata.gz: 59fb860a84e25e604f160cd8209aecebd742b1a4e36fd84e3591bc4b7a0bc4f6c8ecb3482eb001c22eae379bec109ebf78ebe78a9f435a1c99b05f5e86299a56
7
- data.tar.gz: a56489e0fa77068fc548f9b4c98fc9215f962c80ec2fe197f1958e05538992884ddc7aaa76bd2f86c43980db9c31437f24485f8371ea4739c867ce7837bd7c45
6
+ metadata.gz: f61f86445e64fca2863ba474a6cfb27e87af761516b3fbb5394adbe6f61e23e4f9d81778cded02dc512bf495bb8b711099962b5fd63154880bcc3e746bf25f61
7
+ data.tar.gz: ee0f09b1e92729056857117a7fdf4fa422d13bae5168f521e6d2cd8ca8adc2152334c59fd826aff916369ee8db811c792a49087ddd08dfdb283c0a31445bab78
@@ -4,17 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
+ alias start_query execute_query
10
11
 
11
- def get_query(id)
12
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
13
15
  end
14
16
 
15
- def wait_for(id)
17
+ def wait_for(id, query_options)
16
18
  loop do
17
- query = get_query(id)
19
+ query = get_query(id, query_options)
18
20
  case query.status
19
21
  when 'success', 'failed'
20
22
  return query
@@ -23,17 +25,19 @@ module QueueryClient
23
25
  end
24
26
  end
25
27
 
26
- def query_and_wait(select_stmt, values)
27
- query = execute_query(select_stmt, values)
28
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
29
31
  end
30
32
 
31
- def query(select_stmt, values)
32
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
33
36
  case query.status
34
37
  when 'success'
35
38
  UrlDataFileBundle.new(
36
39
  query.data_file_urls,
40
+ manifest_file_url,
37
41
  s3_prefix: query.s3_prefix,
38
42
  )
39
43
  when 'failed'
@@ -41,13 +45,21 @@ module QueueryClient
41
45
  end
42
46
  end
43
47
 
48
+ # poll_result returns the results only if the query has already successed.
49
+ def poll_result(id)
50
+ query = get_query(id)
51
+ get_query_result(query)
52
+ end
53
+
44
54
  def garage_client
45
55
  @garage_client ||= BasicAuthGarageClient.new(
46
56
  endpoint: options.endpoint,
47
57
  path_prefix: '/',
48
58
  login: options.token,
49
59
  password: options.token_secret
50
- )
60
+ ).tap do |client|
61
+ client.headers['Host'] = options.host_header if options.host_header
62
+ end
51
63
  end
52
64
 
53
65
  def options
@@ -57,5 +69,37 @@ module QueueryClient
57
69
  def default_options
58
70
  QueueryClient.configuration
59
71
  end
72
+
73
+ private
74
+
75
+ def get_query_result(query)
76
+ case query.status
77
+ when 'pending', 'running'
78
+ nil
79
+ when 'success'
80
+ UrlDataFileBundle.new(
81
+ query.data_file_urls,
82
+ nil,
83
+ s3_prefix: query.s3_prefix,
84
+ )
85
+ when 'failure'
86
+ raise QueryError.new(query.error)
87
+ end
88
+ end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
60
104
  end
61
105
  end
@@ -1,5 +1,8 @@
1
1
  module QueueryClient
2
2
  class Configuration
3
+ REQUIRED_KEYS = [:endpoint, :token, :token_secret]
4
+ OPTIONAL_KEYS = [:host_header]
5
+
3
6
  def initialize(options = {})
4
7
  @options = options
5
8
  end
@@ -12,11 +15,7 @@ module QueueryClient
12
15
  @options = nil
13
16
  end
14
17
 
15
- [
16
- :endpoint,
17
- :token,
18
- :token_secret,
19
- ].each do |key|
18
+ REQUIRED_KEYS.each do |key|
20
19
  define_method(key) do
21
20
  options.fetch(key)
22
21
  end
@@ -26,6 +25,16 @@ module QueueryClient
26
25
  end
27
26
  end
28
27
 
28
+ OPTIONAL_KEYS.each do |key|
29
+ define_method(key) do
30
+ options[key]
31
+ end
32
+
33
+ define_method("#{key}=") do |value|
34
+ options[key] = value
35
+ end
36
+ end
37
+
29
38
  def merge(other)
30
39
  Configuration.new(to_h.merge(other.to_h))
31
40
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -12,15 +12,21 @@ module QueueryClient
12
12
  end
13
13
 
14
14
  def each_row(&block)
15
+ return enum_for(:each_row) if !block_given?
16
+
15
17
  f = open
16
18
  begin
17
19
  if gzipped_object?
18
20
  f = Zlib::GzipReader.new(f)
19
21
  end
20
- RedshiftCsvFile.new(f).each(&block)
22
+ RedshiftCsvFile.new(f).each do |row|
23
+ yield row
24
+ end
21
25
  ensure
22
26
  f.close
23
27
  end
28
+
29
+ self
24
30
  end
25
31
  end
26
32
  end
@@ -1,15 +1,31 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
12
+ return enum_for(:each_row) if !block_given?
13
+
6
14
  data_files.each do |file|
7
15
  if file.data_object?
8
- file.each_row(&block)
16
+ file.each_row do |row|
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
22
+ end
9
23
  end
10
24
  end
11
- end
12
25
 
26
+ self
27
+ end
13
28
  alias each each_row
29
+
14
30
  end
15
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ def self.type_cast(row, manifest_file)
5
+ row.zip(manifest_file.column_types).map do |value, type|
6
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
7
+
8
+ case type
9
+ when 'smallint', 'integer', 'bigint'
10
+ value.to_i
11
+ when 'numeric', 'double precision'
12
+ value.to_f
13
+ when 'character', 'character varying'
14
+ value
15
+ when 'timestamp without time zone', 'timestamp with time zone'
16
+ Time.parse(value)
17
+ when 'date'
18
+ Date.parse(value)
19
+ when 'boolean'
20
+ value == 'true' ? true : false
21
+ else
22
+ raise "not support data type: #{type}"
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -24,5 +25,15 @@ module QueueryClient
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
26
  b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
27
  end
28
+
29
+ def manifest_file
30
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
31
+ obj = b.object("#{@prefix}manifest")
32
+ S3ManifestFile.new(obj)
33
+ end
34
+
35
+ def has_manifest?
36
+ !manifest_file.nil?
37
+ end
27
38
  end
28
39
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,19 +1,23 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
20
+ attr_reader :logger
17
21
 
18
22
  def url
19
23
  uri = data_files.first.url.dup
@@ -28,5 +32,9 @@ module QueueryClient
28
32
  prefix = s3_uri.path[1..-1] # trim heading slash
29
33
  S3DataFileBundle.new(bucket, prefix)
30
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
31
39
  end
32
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
@@ -32,6 +32,6 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency 'redshift_csv_file'
33
33
  spec.add_dependency 'aws-sdk-s3'
34
34
  spec.add_development_dependency "bundler", "~> 1.13"
35
- spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "rake", "~> 12.0"
36
36
  spec.add_development_dependency "pry"
37
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-23 00:00:00.000000000 Z
11
+ date: 2021-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '10.0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '10.0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: pry
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.0.3
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []