queuery_client 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5aa7c59714b2972a888c845beaaf38e220fdc0173cd746a0a73f510a1c69afd
4
- data.tar.gz: c876ff24ba9b2d6f489d480de5fc55b74d08ab2aa0229c4cb2a74ca0914053f9
3
+ metadata.gz: 2a5b41cb805e7fc0864fd695adc519c6b7212c294383935af40301d88b5b8d63
4
+ data.tar.gz: d4e970371d23e36bf40d4cafdf9e7da0d886c25bb0882ce9e12fc2a971a0e2c0
5
5
  SHA512:
6
- metadata.gz: 0c154295e3c4c676de39ee43b544ec458285044c0d797f7ec199c39589023019c7f407623745a53d2da6016a9eb1b1594240e2a1f74d0093025e9d3d73d0ab56
7
- data.tar.gz: f7340f67c56865d1c179d062801087931db880a6f9ccbc3f5d3109438ab277a2902000e253bf6d45f8ceebf47b91f2d27fb6709e2fe305f90201d6e9fba206c7
6
+ metadata.gz: f61f86445e64fca2863ba474a6cfb27e87af761516b3fbb5394adbe6f61e23e4f9d81778cded02dc512bf495bb8b711099962b5fd63154880bcc3e746bf25f61
7
+ data.tar.gz: ee0f09b1e92729056857117a7fdf4fa422d13bae5168f521e6d2cd8ca8adc2152334c59fd826aff916369ee8db811c792a49087ddd08dfdb283c0a31445bab78
@@ -4,18 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
10
  alias start_query execute_query
11
11
 
12
- def get_query(id)
13
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
14
15
  end
15
16
 
16
- def wait_for(id)
17
+ def wait_for(id, query_options)
17
18
  loop do
18
- query = get_query(id)
19
+ query = get_query(id, query_options)
19
20
  case query.status
20
21
  when 'success', 'failed'
21
22
  return query
@@ -24,17 +25,19 @@ module QueueryClient
24
25
  end
25
26
  end
26
27
 
27
- def query_and_wait(select_stmt, values)
28
- query = execute_query(select_stmt, values)
29
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
30
31
  end
31
32
 
32
- def query(select_stmt, values)
33
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
34
36
  case query.status
35
37
  when 'success'
36
38
  UrlDataFileBundle.new(
37
39
  query.data_file_urls,
40
+ manifest_file_url,
38
41
  s3_prefix: query.s3_prefix,
39
42
  )
40
43
  when 'failed'
@@ -76,11 +79,27 @@ module QueueryClient
76
79
  when 'success'
77
80
  UrlDataFileBundle.new(
78
81
  query.data_file_urls,
82
+ nil,
79
83
  s3_prefix: query.s3_prefix,
80
84
  )
81
85
  when 'failure'
82
86
  raise QueryError.new(query.error)
83
87
  end
84
88
  end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
85
104
  end
86
105
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -1,6 +1,12 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
6
12
  return enum_for(:each_row) if !block_given?
@@ -8,14 +14,18 @@ module QueueryClient
8
14
  data_files.each do |file|
9
15
  if file.data_object?
10
16
  file.each_row do |row|
11
- yield row
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
12
22
  end
13
23
  end
14
24
  end
15
25
 
16
26
  self
17
27
  end
18
-
19
28
  alias each each_row
29
+
20
30
  end
21
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ def self.type_cast(row, manifest_file)
5
+ row.zip(manifest_file.column_types).map do |value, type|
6
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
7
+
8
+ case type
9
+ when 'smallint', 'integer', 'bigint'
10
+ value.to_i
11
+ when 'numeric', 'double precision'
12
+ value.to_f
13
+ when 'character', 'character varying'
14
+ value
15
+ when 'timestamp without time zone', 'timestamp with time zone'
16
+ Time.parse(value)
17
+ when 'date'
18
+ Date.parse(value)
19
+ when 'boolean'
20
+ value == 'true' ? true : false
21
+ else
22
+ raise "not support data type: #{type}"
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -24,5 +25,15 @@ module QueueryClient
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
26
  b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
27
  end
28
+
29
+ def manifest_file
30
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
31
+ obj = b.object("#{@prefix}manifest")
32
+ S3ManifestFile.new(obj)
33
+ end
34
+
35
+ def has_manifest?
36
+ !manifest_file.nil?
37
+ end
27
38
  end
28
39
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.3"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-04 00:00:00.000000000 Z
11
+ date: 2021-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []