queuery_client 1.0.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: abbdfbc43e113b76608bc2c0a4555bd5e194e9b10c4b7d89f6fe3e78509245f4
4
- data.tar.gz: 15cd76d0c3f02ed04e59254585428255686020e71603fa6633ffb022bd5f5357
3
+ metadata.gz: 680cc3940c5e82d5ae0772d89cd25ffc00310236f9d92b6a686f900dd46f0a88
4
+ data.tar.gz: 468821770777a14d147958b69a3f21b8a88a827e677b787ec2eba6b06b6d1007
5
5
  SHA512:
6
- metadata.gz: 95d02421bdbe1c30b995f9a7a40d86901339380c71dac23979a5653ea0f4038c98e9f8011c6d43a32dbbee482d5ef061639a9b4c3da9a718f36b5eaa46518f9c
7
- data.tar.gz: b558a205e45be08e7401a29aa1c94db29b181c01e950019b58773b9b511dd47fb62601e520da8474dca71dc53aa688cedc73245e4b5f347c0008607a7d12ae08
6
+ metadata.gz: f42b47de00d2f718ce017a5e7a343b1e84a9f681ab3195796e81f7da3c85f5bf21b23cb5f5d4a151c2b0f3a8feba1e8fb01d2645fa17a9393803f3f6f6e0826e
7
+ data.tar.gz: 9139744e2b4f16ea8654e6e32e961270bb67b732eac50dd3633f227aa4d97b74249d3ff8ad91e9d47f2fbade3a47dcdda716b1f7a252ec58d90e7352d755cb0d
@@ -4,17 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
+ alias start_query execute_query
10
11
 
11
- def get_query(id)
12
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
13
15
  end
14
16
 
15
- def wait_for(id)
17
+ def wait_for(id, query_options)
16
18
  loop do
17
- query = get_query(id)
19
+ query = get_query(id, query_options)
18
20
  case query.status
19
21
  when 'success', 'failed'
20
22
  return query
@@ -23,17 +25,19 @@ module QueueryClient
23
25
  end
24
26
  end
25
27
 
26
- def query_and_wait(select_stmt, values)
27
- query = execute_query(select_stmt, values)
28
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
29
31
  end
30
32
 
31
- def query(select_stmt, values)
32
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
33
36
  case query.status
34
37
  when 'success'
35
38
  UrlDataFileBundle.new(
36
39
  query.data_file_urls,
40
+ manifest_file_url,
37
41
  s3_prefix: query.s3_prefix,
38
42
  )
39
43
  when 'failed'
@@ -41,13 +45,21 @@ module QueueryClient
41
45
  end
42
46
  end
43
47
 
48
+ # poll_result returns the results only if the query has already successed.
49
+ def poll_result(id)
50
+ query = get_query(id)
51
+ get_query_result(query)
52
+ end
53
+
44
54
  def garage_client
45
55
  @garage_client ||= BasicAuthGarageClient.new(
46
56
  endpoint: options.endpoint,
47
57
  path_prefix: '/',
48
58
  login: options.token,
49
59
  password: options.token_secret
50
- )
60
+ ).tap do |client|
61
+ client.headers['Host'] = options.host_header if options.host_header
62
+ end
51
63
  end
52
64
 
53
65
  def options
@@ -57,5 +69,37 @@ module QueueryClient
57
69
  def default_options
58
70
  QueueryClient.configuration
59
71
  end
72
+
73
+ private
74
+
75
+ def get_query_result(query)
76
+ case query.status
77
+ when 'pending', 'running'
78
+ nil
79
+ when 'success'
80
+ UrlDataFileBundle.new(
81
+ query.data_file_urls,
82
+ nil,
83
+ s3_prefix: query.s3_prefix,
84
+ )
85
+ when 'failure'
86
+ raise QueryError.new(query.error)
87
+ end
88
+ end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
60
104
  end
61
105
  end
@@ -1,5 +1,8 @@
1
1
  module QueueryClient
2
2
  class Configuration
3
+ REQUIRED_KEYS = [:endpoint, :token, :token_secret]
4
+ OPTIONAL_KEYS = [:host_header]
5
+
3
6
  def initialize(options = {})
4
7
  @options = options
5
8
  end
@@ -12,11 +15,7 @@ module QueueryClient
12
15
  @options = nil
13
16
  end
14
17
 
15
- [
16
- :endpoint,
17
- :token,
18
- :token_secret,
19
- ].each do |key|
18
+ REQUIRED_KEYS.each do |key|
20
19
  define_method(key) do
21
20
  options.fetch(key)
22
21
  end
@@ -26,6 +25,16 @@ module QueueryClient
26
25
  end
27
26
  end
28
27
 
28
+ OPTIONAL_KEYS.each do |key|
29
+ define_method(key) do
30
+ options[key]
31
+ end
32
+
33
+ define_method("#{key}=") do |value|
34
+ options[key] = value
35
+ end
36
+ end
37
+
29
38
  def merge(other)
30
39
  Configuration.new(to_h.merge(other.to_h))
31
40
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -12,15 +12,21 @@ module QueueryClient
12
12
  end
13
13
 
14
14
  def each_row(&block)
15
+ return enum_for(:each_row) if !block_given?
16
+
15
17
  f = open
16
18
  begin
17
19
  if gzipped_object?
18
20
  f = Zlib::GzipReader.new(f)
19
21
  end
20
- RedshiftCsvFile.new(f).each(&block)
22
+ RedshiftCsvFile.new(f).each do |row|
23
+ yield row
24
+ end
21
25
  ensure
22
26
  f.close
23
27
  end
28
+
29
+ self
24
30
  end
25
31
  end
26
32
  end
@@ -1,15 +1,31 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
12
+ return enum_for(:each_row) if !block_given?
13
+
6
14
  data_files.each do |file|
7
15
  if file.data_object?
8
- file.each_row(&block)
16
+ file.each_row do |row|
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
22
+ end
9
23
  end
10
24
  end
11
- end
12
25
 
26
+ self
27
+ end
13
28
  alias each each_row
29
+
14
30
  end
15
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ FALSE_VALUES = [
5
+ false, 0,
6
+ "0", :"0",
7
+ "f", :f,
8
+ "F", :F,
9
+ "false", :false,
10
+ "FALSE", :FALSE,
11
+ "off", :off,
12
+ "OFF", :OFF,
13
+ ].to_set.freeze
14
+
15
+ def self.type_cast(row, manifest_file)
16
+ row.zip(manifest_file.column_types).map do |value, type|
17
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
18
+
19
+ case type
20
+ when 'smallint', 'integer', 'bigint'
21
+ value.to_i
22
+ when 'numeric', 'double precision'
23
+ value.to_f
24
+ when 'character', 'character varying'
25
+ value
26
+ when 'timestamp without time zone', 'timestamp with time zone'
27
+ Time.parse(value)
28
+ when 'date'
29
+ Date.parse(value)
30
+ when 'boolean'
31
+ FALSE_VALUES.include?(value) ? false : true
32
+ else
33
+ raise "not support data type: #{type}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -22,7 +23,23 @@ module QueueryClient
22
23
 
23
24
  def data_files
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
- b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
+ b.objects(prefix: @prefix)
27
+ .select {|obj| obj.key.include?('_part_') }
28
+ .map {|obj| S3DataFile.new(obj) }
29
+ end
30
+
31
+ def manifest_file
32
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
33
+ obj = b.object("#{@prefix}manifest")
34
+ if obj.exists?
35
+ S3ManifestFile.new(obj)
36
+ else
37
+ nil
38
+ end
39
+ end
40
+
41
+ def has_manifest?
42
+ !manifest_file.nil?
26
43
  end
27
44
  end
28
45
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.1"
2
+ VERSION = "1.1.2"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
@@ -32,6 +32,6 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency 'redshift_csv_file'
33
33
  spec.add_dependency 'aws-sdk-s3'
34
34
  spec.add_development_dependency "bundler", "~> 1.13"
35
- spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "rake", "~> 12.0"
36
36
  spec.add_development_dependency "pry"
37
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-08-30 00:00:00.000000000 Z
11
+ date: 2021-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '10.0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '10.0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: pry
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.0.1
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []