queuery_client 1.0.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 893d3e23c931f4f67621e03306cb3523e2bf86affe3a4c7bd6958e5cd4a7aa12
4
- data.tar.gz: 1fbcebdefffabad096aee653ab255329696b8e5b6e2056f4742a619ec15e3810
3
+ metadata.gz: 4c739774fd292daacd00b2b803065c89d8244a309f2ef09936b107360b4b2cbf
4
+ data.tar.gz: 7a2aac170572e82baaf78a8923465e083b6c485e75191842c7a7cad39922e1e1
5
5
  SHA512:
6
- metadata.gz: 4e7ac1fc3d9b850fc0d7d9261c1db44664511997ec17789dd153d9faf9296af7caa689561d30cfde7e07918d43331fbd11b92a1704c9781b489bb3b17704a4e3
7
- data.tar.gz: 59ca2f54c33e271ffa93d59c224fa98ae11b729152407b5b7691413bce1ca2b4a2ec6b14f9a6c6253cade2fe1bee290aa12f6496daa821441e9d7e9092b3cf91
6
+ metadata.gz: a9c74e88514542dba44ca726bd1c085d89ad308b56e2e094c4459055db4e93ae8589d090cb87e2df05d980c4a8d312b1985ab365015c19cbad57ea3ce0e30838
7
+ data.tar.gz: cd8d07630fc414c51aa7bec61d8abf5c3dabc93239db9e824577def9a56f7affcc39dd0e38190644f32081e188d5b13d4551b0e93999c76d90698358a75ed5e9
@@ -4,18 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
10
  alias start_query execute_query
11
11
 
12
- def get_query(id)
13
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
14
15
  end
15
16
 
16
- def wait_for(id)
17
+ def wait_for(id, query_options)
17
18
  loop do
18
- query = get_query(id)
19
+ query = get_query(id, query_options)
19
20
  case query.status
20
21
  when 'success', 'failed'
21
22
  return query
@@ -24,17 +25,19 @@ module QueueryClient
24
25
  end
25
26
  end
26
27
 
27
- def query_and_wait(select_stmt, values)
28
- query = execute_query(select_stmt, values)
29
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
30
31
  end
31
32
 
32
- def query(select_stmt, values)
33
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
34
36
  case query.status
35
37
  when 'success'
36
38
  UrlDataFileBundle.new(
37
39
  query.data_file_urls,
40
+ manifest_file_url,
38
41
  s3_prefix: query.s3_prefix,
39
42
  )
40
43
  when 'failed'
@@ -54,7 +57,9 @@ module QueueryClient
54
57
  path_prefix: '/',
55
58
  login: options.token,
56
59
  password: options.token_secret
57
- )
60
+ ).tap do |client|
61
+ client.headers['Host'] = options.host_header if options.host_header
62
+ end
58
63
  end
59
64
 
60
65
  def options
@@ -74,11 +79,27 @@ module QueueryClient
74
79
  when 'success'
75
80
  UrlDataFileBundle.new(
76
81
  query.data_file_urls,
82
+ nil,
77
83
  s3_prefix: query.s3_prefix,
78
84
  )
79
85
  when 'failure'
80
86
  raise QueryError.new(query.error)
81
87
  end
82
88
  end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
83
104
  end
84
105
  end
@@ -1,5 +1,8 @@
1
1
  module QueueryClient
2
2
  class Configuration
3
+ REQUIRED_KEYS = [:endpoint, :token, :token_secret]
4
+ OPTIONAL_KEYS = [:host_header]
5
+
3
6
  def initialize(options = {})
4
7
  @options = options
5
8
  end
@@ -12,11 +15,7 @@ module QueueryClient
12
15
  @options = nil
13
16
  end
14
17
 
15
- [
16
- :endpoint,
17
- :token,
18
- :token_secret,
19
- ].each do |key|
18
+ REQUIRED_KEYS.each do |key|
20
19
  define_method(key) do
21
20
  options.fetch(key)
22
21
  end
@@ -26,6 +25,16 @@ module QueueryClient
26
25
  end
27
26
  end
28
27
 
28
+ OPTIONAL_KEYS.each do |key|
29
+ define_method(key) do
30
+ options[key]
31
+ end
32
+
33
+ define_method("#{key}=") do |value|
34
+ options[key] = value
35
+ end
36
+ end
37
+
29
38
  def merge(other)
30
39
  Configuration.new(to_h.merge(other.to_h))
31
40
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -12,15 +12,21 @@ module QueueryClient
12
12
  end
13
13
 
14
14
  def each_row(&block)
15
+ return enum_for(:each_row) if !block_given?
16
+
15
17
  f = open
16
18
  begin
17
19
  if gzipped_object?
18
20
  f = Zlib::GzipReader.new(f)
19
21
  end
20
- RedshiftCsvFile.new(f).each(&block)
22
+ RedshiftCsvFile.new(f).each do |row|
23
+ yield row
24
+ end
21
25
  ensure
22
26
  f.close
23
27
  end
28
+
29
+ self
24
30
  end
25
31
  end
26
32
  end
@@ -1,15 +1,31 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
12
+ return enum_for(:each_row) if !block_given?
13
+
6
14
  data_files.each do |file|
7
15
  if file.data_object?
8
- file.each_row(&block)
16
+ file.each_row do |row|
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
22
+ end
9
23
  end
10
24
  end
11
- end
12
25
 
26
+ self
27
+ end
13
28
  alias each each_row
29
+
14
30
  end
15
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ FALSE_VALUES = [
5
+ false, 0,
6
+ "0", :"0",
7
+ "f", :f,
8
+ "F", :F,
9
+ "false", :false,
10
+ "FALSE", :FALSE,
11
+ "off", :off,
12
+ "OFF", :OFF,
13
+ ].to_set.freeze
14
+
15
+ def self.type_cast(row, manifest_file)
16
+ row.zip(manifest_file.column_types).map do |value, type|
17
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
18
+
19
+ case type
20
+ when 'smallint', 'integer', 'bigint'
21
+ value.to_i
22
+ when 'numeric', 'double precision'
23
+ value.to_f
24
+ when 'character', 'character varying'
25
+ value
26
+ when 'timestamp without time zone', 'timestamp with time zone'
27
+ value # Ruby does not have a class without timezone
28
+ when 'date'
29
+ Date.parse(value)
30
+ when 'boolean'
31
+ FALSE_VALUES.include?(value) ? false : true
32
+ else
33
+ raise "not support data type: #{type}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -22,7 +23,23 @@ module QueueryClient
22
23
 
23
24
  def data_files
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
- b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
+ b.objects(prefix: @prefix)
27
+ .select {|obj| obj.key.include?('_part_') }
28
+ .map {|obj| S3DataFile.new(obj) }
29
+ end
30
+
31
+ def manifest_file
32
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
33
+ obj = b.object("#{@prefix}manifest")
34
+ if obj.exists?
35
+ S3ManifestFile.new(obj)
36
+ else
37
+ nil
38
+ end
39
+ end
40
+
41
+ def has_manifest?
42
+ !manifest_file.nil?
26
43
  end
27
44
  end
28
45
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.3"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-22 00:00:00.000000000 Z
11
+ date: 2021-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []