queuery_client 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5aa7c59714b2972a888c845beaaf38e220fdc0173cd746a0a73f510a1c69afd
4
- data.tar.gz: c876ff24ba9b2d6f489d480de5fc55b74d08ab2aa0229c4cb2a74ca0914053f9
3
+ metadata.gz: 2a5b41cb805e7fc0864fd695adc519c6b7212c294383935af40301d88b5b8d63
4
+ data.tar.gz: d4e970371d23e36bf40d4cafdf9e7da0d886c25bb0882ce9e12fc2a971a0e2c0
5
5
  SHA512:
6
- metadata.gz: 0c154295e3c4c676de39ee43b544ec458285044c0d797f7ec199c39589023019c7f407623745a53d2da6016a9eb1b1594240e2a1f74d0093025e9d3d73d0ab56
7
- data.tar.gz: f7340f67c56865d1c179d062801087931db880a6f9ccbc3f5d3109438ab277a2902000e253bf6d45f8ceebf47b91f2d27fb6709e2fe305f90201d6e9fba206c7
6
+ metadata.gz: f61f86445e64fca2863ba474a6cfb27e87af761516b3fbb5394adbe6f61e23e4f9d81778cded02dc512bf495bb8b711099962b5fd63154880bcc3e746bf25f61
7
+ data.tar.gz: ee0f09b1e92729056857117a7fdf4fa422d13bae5168f521e6d2cd8ca8adc2152334c59fd826aff916369ee8db811c792a49087ddd08dfdb283c0a31445bab78
@@ -4,18 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
10
  alias start_query execute_query
11
11
 
12
- def get_query(id)
13
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
14
15
  end
15
16
 
16
- def wait_for(id)
17
+ def wait_for(id, query_options)
17
18
  loop do
18
- query = get_query(id)
19
+ query = get_query(id, query_options)
19
20
  case query.status
20
21
  when 'success', 'failed'
21
22
  return query
@@ -24,17 +25,19 @@ module QueueryClient
24
25
  end
25
26
  end
26
27
 
27
- def query_and_wait(select_stmt, values)
28
- query = execute_query(select_stmt, values)
29
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
30
31
  end
31
32
 
32
- def query(select_stmt, values)
33
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
34
36
  case query.status
35
37
  when 'success'
36
38
  UrlDataFileBundle.new(
37
39
  query.data_file_urls,
40
+ manifest_file_url,
38
41
  s3_prefix: query.s3_prefix,
39
42
  )
40
43
  when 'failed'
@@ -76,11 +79,27 @@ module QueueryClient
76
79
  when 'success'
77
80
  UrlDataFileBundle.new(
78
81
  query.data_file_urls,
82
+ nil,
79
83
  s3_prefix: query.s3_prefix,
80
84
  )
81
85
  when 'failure'
82
86
  raise QueryError.new(query.error)
83
87
  end
84
88
  end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
85
104
  end
86
105
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -1,6 +1,12 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
6
12
  return enum_for(:each_row) if !block_given?
@@ -8,14 +14,18 @@ module QueueryClient
8
14
  data_files.each do |file|
9
15
  if file.data_object?
10
16
  file.each_row do |row|
11
- yield row
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
12
22
  end
13
23
  end
14
24
  end
15
25
 
16
26
  self
17
27
  end
18
-
19
28
  alias each each_row
29
+
20
30
  end
21
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ def self.type_cast(row, manifest_file)
5
+ row.zip(manifest_file.column_types).map do |value, type|
6
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
7
+
8
+ case type
9
+ when 'smallint', 'integer', 'bigint'
10
+ value.to_i
11
+ when 'numeric', 'double precision'
12
+ value.to_f
13
+ when 'character', 'character varying'
14
+ value
15
+ when 'timestamp without time zone', 'timestamp with time zone'
16
+ Time.parse(value)
17
+ when 'date'
18
+ Date.parse(value)
19
+ when 'boolean'
20
+ value == 'true' ? true : false
21
+ else
22
+ raise "not support data type: #{type}"
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -24,5 +25,15 @@ module QueueryClient
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
26
  b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
27
  end
28
+
29
+ def manifest_file
30
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
31
+ obj = b.object("#{@prefix}manifest")
32
+ S3ManifestFile.new(obj)
33
+ end
34
+
35
+ def has_manifest?
36
+ !manifest_file.nil?
37
+ end
27
38
  end
28
39
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.3"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-04 00:00:00.000000000 Z
11
+ date: 2021-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []