queuery_client 1.0.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5aa7c59714b2972a888c845beaaf38e220fdc0173cd746a0a73f510a1c69afd
4
- data.tar.gz: c876ff24ba9b2d6f489d480de5fc55b74d08ab2aa0229c4cb2a74ca0914053f9
3
+ metadata.gz: 11b32a624cd5e1125520cc7f1d68d6048f551ebbdfbdb34a2d64520cb3ac89c2
4
+ data.tar.gz: a41d2d8a3148035caadaa435ee98a49db50be5382286c67c78d5faeba157dc1b
5
5
  SHA512:
6
- metadata.gz: 0c154295e3c4c676de39ee43b544ec458285044c0d797f7ec199c39589023019c7f407623745a53d2da6016a9eb1b1594240e2a1f74d0093025e9d3d73d0ab56
7
- data.tar.gz: f7340f67c56865d1c179d062801087931db880a6f9ccbc3f5d3109438ab277a2902000e253bf6d45f8ceebf47b91f2d27fb6709e2fe305f90201d6e9fba206c7
6
+ metadata.gz: 4c9303a966bc816b6fc95eede740ae9cc804f95f8538984726d5f54f96e2b2b9f985e9dc7ecf831a59fb9a334dd9759217c782386b68d0523336153649081f55
7
+ data.tar.gz: 61227ed70ae5620421ccee2445c901a6c6875dd0b9c9758a847b6a0091821e2b5f9906f211118c22419a0a3454a4ab373047ecac346c876984ef064f5c4351ae
@@ -4,37 +4,44 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
10
  alias start_query execute_query
11
11
 
12
- def get_query(id)
13
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
14
15
  end
15
16
 
16
- def wait_for(id)
17
+ MAX_POLLING_INTERVAL = 30
18
+
19
+ def wait_for(id, query_options)
20
+ n = 0
17
21
  loop do
18
- query = get_query(id)
22
+ query = get_query(id, query_options)
19
23
  case query.status
20
24
  when 'success', 'failed'
21
25
  return query
22
26
  end
23
- sleep 3
27
+ n += 1
28
+ sleep [3 * n, MAX_POLLING_INTERVAL].min
24
29
  end
25
30
  end
26
31
 
27
- def query_and_wait(select_stmt, values)
28
- query = execute_query(select_stmt, values)
29
- wait_for(query.id)
32
+ def query_and_wait(select_stmt, values, query_options)
33
+ query = execute_query(select_stmt, values, query_options)
34
+ wait_for(query.id, query_options)
30
35
  end
31
36
 
32
- def query(select_stmt, values)
33
- query = query_and_wait(select_stmt, values)
37
+ def query(select_stmt, values, **query_options)
38
+ query = query_and_wait(select_stmt, values, query_options)
39
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
34
40
  case query.status
35
41
  when 'success'
36
42
  UrlDataFileBundle.new(
37
43
  query.data_file_urls,
44
+ manifest_file_url,
38
45
  s3_prefix: query.s3_prefix,
39
46
  )
40
47
  when 'failed'
@@ -76,11 +83,27 @@ module QueueryClient
76
83
  when 'success'
77
84
  UrlDataFileBundle.new(
78
85
  query.data_file_urls,
86
+ nil,
79
87
  s3_prefix: query.s3_prefix,
80
88
  )
81
89
  when 'failure'
82
90
  raise QueryError.new(query.error)
83
91
  end
84
92
  end
93
+
94
+ def build_query_option_fields(query_options)
95
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
96
+ return '' if enable_query_options.empty?
97
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
98
+ ',' + query_option_fields.join(',')
99
+ end
100
+
101
+ def convert_field_name(option_name)
102
+ case option_name
103
+ when :enable_cast
104
+ 'manifest_file_url'
105
+ # add another option here if you need
106
+ end
107
+ end
85
108
  end
86
109
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -1,6 +1,12 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
6
12
  return enum_for(:each_row) if !block_given?
@@ -8,14 +14,18 @@ module QueueryClient
8
14
  data_files.each do |file|
9
15
  if file.data_object?
10
16
  file.each_row do |row|
11
- yield row
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
12
22
  end
13
23
  end
14
24
  end
15
25
 
16
26
  self
17
27
  end
18
-
19
28
  alias each each_row
29
+
20
30
  end
21
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ FALSE_VALUES = [
5
+ false, 0,
6
+ "0", :"0",
7
+ "f", :f,
8
+ "F", :F,
9
+ "false", :false,
10
+ "FALSE", :FALSE,
11
+ "off", :off,
12
+ "OFF", :OFF,
13
+ ].to_set.freeze
14
+
15
+ def self.type_cast(row, manifest_file)
16
+ row.zip(manifest_file.column_types).map do |value, type|
17
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
18
+
19
+ case type
20
+ when 'smallint', 'integer', 'bigint'
21
+ value.to_i
22
+ when 'numeric', 'double precision'
23
+ value.to_f
24
+ when 'character', 'character varying'
25
+ value
26
+ when 'timestamp without time zone', 'timestamp with time zone'
27
+ value # Ruby does not have a class without timezone
28
+ when 'date'
29
+ Date.parse(value)
30
+ when 'boolean'
31
+ FALSE_VALUES.include?(value) ? false : true
32
+ else
33
+ raise "not support data type: #{type}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -22,7 +23,23 @@ module QueueryClient
22
23
 
23
24
  def data_files
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
- b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
+ b.objects(prefix: @prefix)
27
+ .select {|obj| obj.key.include?('_part_') }
28
+ .map {|obj| S3DataFile.new(obj) }
29
+ end
30
+
31
+ def manifest_file
32
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
33
+ obj = b.object("#{@prefix}manifest")
34
+ if obj.exists?
35
+ S3ManifestFile.new(obj)
36
+ else
37
+ nil
38
+ end
39
+ end
40
+
41
+ def has_manifest?
42
+ !manifest_file.nil?
26
43
  end
27
44
  end
28
45
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.3"
2
+ VERSION = "1.1.4"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-04 00:00:00.000000000 Z
11
+ date: 2021-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []