queuery_client 1.0.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 893d3e23c931f4f67621e03306cb3523e2bf86affe3a4c7bd6958e5cd4a7aa12
4
- data.tar.gz: 1fbcebdefffabad096aee653ab255329696b8e5b6e2056f4742a619ec15e3810
3
+ metadata.gz: 4c739774fd292daacd00b2b803065c89d8244a309f2ef09936b107360b4b2cbf
4
+ data.tar.gz: 7a2aac170572e82baaf78a8923465e083b6c485e75191842c7a7cad39922e1e1
5
5
  SHA512:
6
- metadata.gz: 4e7ac1fc3d9b850fc0d7d9261c1db44664511997ec17789dd153d9faf9296af7caa689561d30cfde7e07918d43331fbd11b92a1704c9781b489bb3b17704a4e3
7
- data.tar.gz: 59ca2f54c33e271ffa93d59c224fa98ae11b729152407b5b7691413bce1ca2b4a2ec6b14f9a6c6253cade2fe1bee290aa12f6496daa821441e9d7e9092b3cf91
6
+ metadata.gz: a9c74e88514542dba44ca726bd1c085d89ad308b56e2e094c4459055db4e93ae8589d090cb87e2df05d980c4a8d312b1985ab365015c19cbad57ea3ce0e30838
7
+ data.tar.gz: cd8d07630fc414c51aa7bec61d8abf5c3dabc93239db9e824577def9a56f7affcc39dd0e38190644f32081e188d5b13d4551b0e93999c76d90698358a75ed5e9
@@ -4,18 +4,19 @@ module QueueryClient
4
4
  @options = options
5
5
  end
6
6
 
7
- def execute_query(select_stmt, values)
8
- garage_client.post("/v1/queries", q: select_stmt, values: values)
7
+ def execute_query(select_stmt, values, query_options)
8
+ garage_client.post("/v1/queries", q: select_stmt, values: values, enable_metadata: query_options[:enable_cast])
9
9
  end
10
10
  alias start_query execute_query
11
11
 
12
- def get_query(id)
13
- garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix')
12
+ def get_query(id, query_options)
13
+ query_option_fields = build_query_option_fields(query_options)
14
+ garage_client.get("/v1/queries/#{id}", fields: '__default__,s3_prefix' + query_option_fields)
14
15
  end
15
16
 
16
- def wait_for(id)
17
+ def wait_for(id, query_options)
17
18
  loop do
18
- query = get_query(id)
19
+ query = get_query(id, query_options)
19
20
  case query.status
20
21
  when 'success', 'failed'
21
22
  return query
@@ -24,17 +25,19 @@ module QueueryClient
24
25
  end
25
26
  end
26
27
 
27
- def query_and_wait(select_stmt, values)
28
- query = execute_query(select_stmt, values)
29
- wait_for(query.id)
28
+ def query_and_wait(select_stmt, values, query_options)
29
+ query = execute_query(select_stmt, values, query_options)
30
+ wait_for(query.id, query_options)
30
31
  end
31
32
 
32
- def query(select_stmt, values)
33
- query = query_and_wait(select_stmt, values)
33
+ def query(select_stmt, values, **query_options)
34
+ query = query_and_wait(select_stmt, values, query_options)
35
+ manifest_file_url = query.manifest_file_url if query_options[:enable_cast]
34
36
  case query.status
35
37
  when 'success'
36
38
  UrlDataFileBundle.new(
37
39
  query.data_file_urls,
40
+ manifest_file_url,
38
41
  s3_prefix: query.s3_prefix,
39
42
  )
40
43
  when 'failed'
@@ -54,7 +57,9 @@ module QueueryClient
54
57
  path_prefix: '/',
55
58
  login: options.token,
56
59
  password: options.token_secret
57
- )
60
+ ).tap do |client|
61
+ client.headers['Host'] = options.host_header if options.host_header
62
+ end
58
63
  end
59
64
 
60
65
  def options
@@ -74,11 +79,27 @@ module QueueryClient
74
79
  when 'success'
75
80
  UrlDataFileBundle.new(
76
81
  query.data_file_urls,
82
+ nil,
77
83
  s3_prefix: query.s3_prefix,
78
84
  )
79
85
  when 'failure'
80
86
  raise QueryError.new(query.error)
81
87
  end
82
88
  end
89
+
90
+ def build_query_option_fields(query_options)
91
+ enable_query_options = query_options.select{ |name, v| name if v }.keys
92
+ return '' if enable_query_options.empty?
93
+ query_option_fields = enable_query_options.map{ |option_name| convert_field_name(option_name) }
94
+ ',' + query_option_fields.join(',')
95
+ end
96
+
97
+ def convert_field_name(option_name)
98
+ case option_name
99
+ when :enable_cast
100
+ 'manifest_file_url'
101
+ # add another option here if you need
102
+ end
103
+ end
83
104
  end
84
105
  end
@@ -1,5 +1,8 @@
1
1
  module QueueryClient
2
2
  class Configuration
3
+ REQUIRED_KEYS = [:endpoint, :token, :token_secret]
4
+ OPTIONAL_KEYS = [:host_header]
5
+
3
6
  def initialize(options = {})
4
7
  @options = options
5
8
  end
@@ -12,11 +15,7 @@ module QueueryClient
12
15
  @options = nil
13
16
  end
14
17
 
15
- [
16
- :endpoint,
17
- :token,
18
- :token_secret,
19
- ].each do |key|
18
+ REQUIRED_KEYS.each do |key|
20
19
  define_method(key) do
21
20
  options.fetch(key)
22
21
  end
@@ -26,6 +25,16 @@ module QueueryClient
26
25
  end
27
26
  end
28
27
 
28
+ OPTIONAL_KEYS.each do |key|
29
+ define_method(key) do
30
+ options[key]
31
+ end
32
+
33
+ define_method("#{key}=") do |value|
34
+ options[key] = value
35
+ end
36
+ end
37
+
29
38
  def merge(other)
30
39
  Configuration.new(to_h.merge(other.to_h))
31
40
  end
@@ -2,7 +2,7 @@ require 'redshift_csv_file'
2
2
  require 'zlib'
3
3
 
4
4
  module QueueryClient
5
- class DataFile
5
+ class DataFile # abstract class
6
6
  def data_object?
7
7
  /\.csv(?:\.|\z)/ =~ File.basename(key)
8
8
  end
@@ -12,15 +12,21 @@ module QueueryClient
12
12
  end
13
13
 
14
14
  def each_row(&block)
15
+ return enum_for(:each_row) if !block_given?
16
+
15
17
  f = open
16
18
  begin
17
19
  if gzipped_object?
18
20
  f = Zlib::GzipReader.new(f)
19
21
  end
20
- RedshiftCsvFile.new(f).each(&block)
22
+ RedshiftCsvFile.new(f).each do |row|
23
+ yield row
24
+ end
21
25
  ensure
22
26
  f.close
23
27
  end
28
+
29
+ self
24
30
  end
25
31
  end
26
32
  end
@@ -1,15 +1,31 @@
1
+ require 'queuery_client/redshift_data_type'
2
+ require 'date'
3
+ require 'time'
4
+
1
5
  module QueueryClient
2
6
  class DataFileBundle
3
7
  # abstract data_files :: [DataFile]
8
+ # abstract manifest_file :: ManifestFile
9
+ # abstract def has_manifest?
4
10
 
5
11
  def each_row(&block)
12
+ return enum_for(:each_row) if !block_given?
13
+
6
14
  data_files.each do |file|
7
15
  if file.data_object?
8
- file.each_row(&block)
16
+ file.each_row do |row|
17
+ if has_manifest?
18
+ yield RedshiftDataType.type_cast(row, manifest_file)
19
+ else
20
+ yield row
21
+ end
22
+ end
9
23
  end
10
24
  end
11
- end
12
25
 
26
+ self
27
+ end
13
28
  alias each each_row
29
+
14
30
  end
15
31
  end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module QueueryClient
4
+ class ManifestFile # abstract class
5
+ def manifest_object?
6
+ /\.manifest(?:\.|\z)/ =~ File.basename(key)
7
+ end
8
+
9
+ def column_types
10
+ @column_types ||=
11
+ begin
12
+ f = open
13
+ j = JSON.load(f)
14
+ j['schema']['elements'].map{|x| x['type']['base']}
15
+ ensure
16
+ f.close
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module QueueryClient
3
+ module RedshiftDataType
4
+ FALSE_VALUES = [
5
+ false, 0,
6
+ "0", :"0",
7
+ "f", :f,
8
+ "F", :F,
9
+ "false", :false,
10
+ "FALSE", :FALSE,
11
+ "off", :off,
12
+ "OFF", :OFF,
13
+ ].to_set.freeze
14
+
15
+ def self.type_cast(row, manifest_file)
16
+ row.zip(manifest_file.column_types).map do |value, type|
17
+ next nil if (value == '' and type != 'character varing') # null becomes '' on unload
18
+
19
+ case type
20
+ when 'smallint', 'integer', 'bigint'
21
+ value.to_i
22
+ when 'numeric', 'double precision'
23
+ value.to_f
24
+ when 'character', 'character varying'
25
+ value
26
+ when 'timestamp without time zone', 'timestamp with time zone'
27
+ value # Ruby does not have a class without timezone
28
+ when 'date'
29
+ Date.parse(value)
30
+ when 'boolean'
31
+ FALSE_VALUES.include?(value) ? false : true
32
+ else
33
+ raise "not support data type: #{type}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,6 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/s3_data_file'
3
+ require 'queuery_client/s3_manifest_file'
3
4
  require 'aws-sdk-s3'
4
5
  require 'logger'
5
6
 
@@ -22,7 +23,23 @@ module QueueryClient
22
23
 
23
24
  def data_files
24
25
  b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
25
- b.objects(prefix: @prefix).map {|obj| S3DataFile.new(obj) }
26
+ b.objects(prefix: @prefix)
27
+ .select {|obj| obj.key.include?('_part_') }
28
+ .map {|obj| S3DataFile.new(obj) }
29
+ end
30
+
31
+ def manifest_file
32
+ b = Aws::S3::Resource.new(client: @s3_client).bucket(@bucket)
33
+ obj = b.object("#{@prefix}manifest")
34
+ if obj.exists?
35
+ S3ManifestFile.new(obj)
36
+ else
37
+ nil
38
+ end
39
+ end
40
+
41
+ def has_manifest?
42
+ !manifest_file.nil?
26
43
  end
27
44
  end
28
45
  end
@@ -0,0 +1,18 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'forwardable'
3
+
4
+ module QueueryClient
5
+ class S3ManifestFile < ManifestFile
6
+ extend Forwardable
7
+
8
+ def initialize(object)
9
+ @object = object
10
+ end
11
+
12
+ def_delegators '@object', :url, :key, :presigned_url
13
+
14
+ def open
15
+ @object.get.body
16
+ end
17
+ end
18
+ end
@@ -1,18 +1,21 @@
1
1
  require 'queuery_client/data_file_bundle'
2
2
  require 'queuery_client/url_data_file'
3
+ require 'queuery_client/url_manifest_file'
3
4
  require 'uri'
4
5
  require 'logger'
5
6
 
6
7
  module QueueryClient
7
8
  class UrlDataFileBundle < DataFileBundle
8
- def initialize(urls, s3_prefix:, logger: Logger.new($stderr))
9
- raise ArgumentError, 'no URL given' if urls.empty?
10
- @data_files = urls.map {|url| UrlDataFile.new(URI.parse(url)) }
9
+ def initialize(data_urls, manifest_url, s3_prefix:, logger: Logger.new($stderr))
10
+ raise ArgumentError, 'no URL given' if data_urls.empty?
11
+ @data_files = data_urls.map {|url| UrlDataFile.new(URI.parse(url)) }
12
+ @manifest_file = UrlManifestFile.new(URI.parse(manifest_url)) if manifest_url
11
13
  @s3_prefix = s3_prefix
12
14
  @logger = logger
13
15
  end
14
16
 
15
17
  attr_reader :data_files
18
+ attr_reader :manifest_file
16
19
  attr_reader :s3_prefix
17
20
  attr_reader :logger
18
21
 
@@ -29,5 +32,9 @@ module QueueryClient
29
32
  prefix = s3_uri.path[1..-1] # trim heading slash
30
33
  S3DataFileBundle.new(bucket, prefix)
31
34
  end
35
+
36
+ def has_manifest?
37
+ !@manifest_file.nil?
38
+ end
32
39
  end
33
40
  end
@@ -0,0 +1,28 @@
1
+ require 'queuery_client/manifest_file'
2
+ require 'net/http'
3
+ require 'stringio'
4
+ require 'json'
5
+
6
+ module QueueryClient
7
+ class UrlManifestFile < ManifestFile
8
+ def initialize(url)
9
+ @url = url
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def key
15
+ @url.path
16
+ end
17
+
18
+ def open
19
+ http = Net::HTTP.new(@url.host, @url.port)
20
+ http.use_ssl = (@url.scheme.downcase == 'https')
21
+ content = http.start {
22
+ res = http.get(@url.request_uri)
23
+ res.body
24
+ }
25
+ StringIO.new(content)
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module QueueryClient
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.3"
3
3
  end
@@ -16,8 +16,8 @@ module QueueryClient
16
16
  configuration.instance_eval(&block)
17
17
  end
18
18
 
19
- def query(select_stmt, values = [])
20
- Client.new.query(select_stmt, values)
19
+ def query(select_stmt, values = [], enable_cast: false)
20
+ Client.new.query(select_stmt, values, enable_cast: enable_cast)
21
21
  end
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: queuery_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-22 00:00:00.000000000 Z
11
+ date: 2021-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: garage_client
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - hidekazu-kobayashi@cookpad.com
100
100
  executables: []
@@ -113,11 +113,15 @@ files:
113
113
  - lib/queuery_client/configuration.rb
114
114
  - lib/queuery_client/data_file.rb
115
115
  - lib/queuery_client/data_file_bundle.rb
116
+ - lib/queuery_client/manifest_file.rb
116
117
  - lib/queuery_client/query_error.rb
118
+ - lib/queuery_client/redshift_data_type.rb
117
119
  - lib/queuery_client/s3_data_file.rb
118
120
  - lib/queuery_client/s3_data_file_bundle.rb
121
+ - lib/queuery_client/s3_manifest_file.rb
119
122
  - lib/queuery_client/url_data_file.rb
120
123
  - lib/queuery_client/url_data_file_bundle.rb
124
+ - lib/queuery_client/url_manifest_file.rb
121
125
  - lib/queuery_client/version.rb
122
126
  - queuery_client.gemspec
123
127
  homepage: https://github.com/bricolages/queuery_client
@@ -125,7 +129,7 @@ licenses:
125
129
  - MIT
126
130
  metadata:
127
131
  allowed_push_host: https://rubygems.org
128
- post_install_message:
132
+ post_install_message:
129
133
  rdoc_options: []
130
134
  require_paths:
131
135
  - lib
@@ -140,8 +144,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
144
  - !ruby/object:Gem::Version
141
145
  version: '0'
142
146
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
147
+ rubygems_version: 3.1.6
148
+ signing_key:
145
149
  specification_version: 4
146
150
  summary: Client library for Queuery Redshift HTTP API
147
151
  test_files: []