databricks_sql 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +8 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +215 -0
- data/Rakefile +10 -0
- data/lib/databricks_sql/client.rb +477 -0
- data/lib/databricks_sql/configuration.rb +22 -0
- data/lib/databricks_sql/errors.rb +53 -0
- data/lib/databricks_sql/external_link_handler.rb +89 -0
- data/lib/databricks_sql/result.rb +25 -0
- data/lib/databricks_sql/type_coercer.rb +59 -0
- data/lib/databricks_sql/version.rb +5 -0
- data/lib/databricks_sql.rb +31 -0
- data/sig/databricks_sql.rbs +182 -0
- data/spec/databricks_sql/client_spec.rb +533 -0
- data/spec/databricks_sql_spec.rb +37 -0
- data/spec/spec_helper.rb +16 -0
- metadata +77 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module DatabricksSql
|
|
8
|
+
class Client
|
|
9
|
+
DEFAULT_STATEMENT_PATH = "/api/2.0/sql/statements"
|
|
10
|
+
|
|
11
|
+
TERMINAL_STATUSES = %w[SUCCEEDED FAILED CANCELED CLOSED].freeze
|
|
12
|
+
|
|
13
|
+
attr_reader :host, :warehouse_id
|
|
14
|
+
|
|
15
|
+
def initialize(host: nil, token: nil, warehouse_id: nil, timeout: nil, open_timeout: nil, statement_path: nil)
|
|
16
|
+
global_config = DatabricksSql.configuration
|
|
17
|
+
|
|
18
|
+
@host = normalize_host(host || global_config.host)
|
|
19
|
+
@token = (token || global_config.token).to_s
|
|
20
|
+
@warehouse_id = (warehouse_id || global_config.warehouse_id).to_s
|
|
21
|
+
@timeout = timeout || global_config.timeout || 30
|
|
22
|
+
@open_timeout = open_timeout || global_config.open_timeout || 10
|
|
23
|
+
@statement_path = statement_path || global_config.statement_path || DEFAULT_STATEMENT_PATH
|
|
24
|
+
@external_link_require_https = global_config.external_link_require_https != false
|
|
25
|
+
@external_link_allowed_hosts = normalize_allowed_hosts(global_config.external_link_allowed_hosts)
|
|
26
|
+
validate_configuration!
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def execute_statement(statement:, parameters: nil, format: "JSON_ARRAY", disposition: "INLINE", catalog: nil,
|
|
30
|
+
schema: nil, column_schema: nil, wait_timeout: "10s", on_wait_timeout: "CONTINUE",
|
|
31
|
+
poll_interval: 1.0, max_wait: 300, request_timeout: nil, cancel_on_timeout: false,
|
|
32
|
+
auto_fetch_chunks: true, byte_limit: nil, row_limit: nil, query_tags: nil)
|
|
33
|
+
response = execute_statement_async(
|
|
34
|
+
statement: statement,
|
|
35
|
+
parameters: parameters,
|
|
36
|
+
format: format,
|
|
37
|
+
disposition: disposition,
|
|
38
|
+
catalog: catalog,
|
|
39
|
+
schema: schema,
|
|
40
|
+
wait_timeout: wait_timeout,
|
|
41
|
+
on_wait_timeout: on_wait_timeout,
|
|
42
|
+
request_timeout: request_timeout,
|
|
43
|
+
byte_limit: byte_limit,
|
|
44
|
+
row_limit: row_limit,
|
|
45
|
+
query_tags: query_tags
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
statement_id = response.fetch("statement_id")
|
|
49
|
+
if terminal_status?(statement_status(response))
|
|
50
|
+
response = hydrate_terminal_response(
|
|
51
|
+
response,
|
|
52
|
+
statement_id: statement_id,
|
|
53
|
+
request_timeout: request_timeout
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
return build_result(response, format: format, disposition: disposition, column_schema: column_schema,
|
|
57
|
+
auto_fetch_chunks: auto_fetch_chunks,
|
|
58
|
+
request_timeout: request_timeout)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
wait_for_statement(
|
|
62
|
+
statement_id: statement_id,
|
|
63
|
+
format: format,
|
|
64
|
+
disposition: disposition,
|
|
65
|
+
column_schema: column_schema,
|
|
66
|
+
poll_interval: poll_interval,
|
|
67
|
+
max_wait: max_wait,
|
|
68
|
+
request_timeout: request_timeout,
|
|
69
|
+
cancel_on_timeout: cancel_on_timeout,
|
|
70
|
+
auto_fetch_chunks: auto_fetch_chunks
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def execute_statement_async(statement:, parameters: nil, format: "JSON_ARRAY", disposition: "INLINE", catalog: nil,
|
|
75
|
+
schema: nil, wait_timeout: "10s", on_wait_timeout: "CONTINUE", request_timeout: nil,
|
|
76
|
+
byte_limit: nil, row_limit: nil, query_tags: nil)
|
|
77
|
+
payload = {
|
|
78
|
+
statement: statement,
|
|
79
|
+
warehouse_id: warehouse_id,
|
|
80
|
+
disposition: disposition,
|
|
81
|
+
format: format,
|
|
82
|
+
wait_timeout: wait_timeout,
|
|
83
|
+
on_wait_timeout: on_wait_timeout
|
|
84
|
+
}
|
|
85
|
+
payload[:parameters] = parameters if parameters
|
|
86
|
+
payload[:catalog] = catalog if catalog
|
|
87
|
+
payload[:schema] = schema if schema
|
|
88
|
+
payload[:byte_limit] = byte_limit unless byte_limit.nil?
|
|
89
|
+
payload[:row_limit] = row_limit unless row_limit.nil?
|
|
90
|
+
payload[:query_tags] = query_tags if query_tags
|
|
91
|
+
|
|
92
|
+
request_json(:post, @statement_path, payload: payload, request_timeout: request_timeout)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def get_statement(statement_id:, request_timeout: nil)
|
|
96
|
+
path = "#{@statement_path}/#{statement_id}"
|
|
97
|
+
request_json(:get, path, request_timeout: request_timeout)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def cancel_statement(statement_id:, request_timeout: nil)
|
|
101
|
+
path = "#{@statement_path}/#{statement_id}/cancel"
|
|
102
|
+
request_json(:post, path, payload: {}, request_timeout: request_timeout)
|
|
103
|
+
rescue HTTPError
|
|
104
|
+
nil
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def get_statement_chunk(statement_id:, chunk_index:, row_offset: nil, request_timeout: nil)
|
|
108
|
+
path = "#{@statement_path}/#{statement_id}/result/chunks/#{chunk_index}"
|
|
109
|
+
path = "#{path}?row_offset=#{row_offset}" unless row_offset.nil?
|
|
110
|
+
request_json(:get, path, request_timeout: request_timeout)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def get_statement_chunk_by_link(next_chunk_internal_link:, request_timeout: nil)
|
|
114
|
+
request_json(:get, next_chunk_internal_link, request_timeout: request_timeout)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def fetch_next_chunk(statement_id:, next_chunk_internal_link:, columns:, disposition: "INLINE", column_schema: nil,
|
|
118
|
+
request_timeout: nil)
|
|
119
|
+
payload = get_statement_chunk_by_link(
|
|
120
|
+
next_chunk_internal_link: next_chunk_internal_link,
|
|
121
|
+
request_timeout: request_timeout
|
|
122
|
+
)
|
|
123
|
+
rows = chunk_rows(payload, columns, disposition: disposition, column_schema: column_schema,
|
|
124
|
+
request_timeout: request_timeout)
|
|
125
|
+
|
|
126
|
+
Result.new(
|
|
127
|
+
statement_id: statement_id,
|
|
128
|
+
status: "SUCCEEDED",
|
|
129
|
+
disposition: normalize_disposition(disposition),
|
|
130
|
+
format: "JSON_ARRAY",
|
|
131
|
+
columns: columns,
|
|
132
|
+
rows: rows,
|
|
133
|
+
manifest: nil,
|
|
134
|
+
raw_response: payload,
|
|
135
|
+
next_chunk_internal_link: extract_next_chunk_internal_link(payload)
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def wait_for_statement(statement_id:, format: "JSON_ARRAY", disposition: "INLINE", column_schema: nil,
|
|
140
|
+
poll_interval: 1.0, max_wait: 300, request_timeout: nil, cancel_on_timeout: false,
|
|
141
|
+
auto_fetch_chunks: true)
|
|
142
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
143
|
+
|
|
144
|
+
loop do
|
|
145
|
+
response = get_statement(statement_id: statement_id, request_timeout: request_timeout)
|
|
146
|
+
|
|
147
|
+
if terminal_status?(statement_status(response))
|
|
148
|
+
response = hydrate_terminal_response(
|
|
149
|
+
response,
|
|
150
|
+
statement_id: statement_id,
|
|
151
|
+
request_timeout: request_timeout
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return build_result(response, format: format, disposition: disposition, column_schema: column_schema,
|
|
155
|
+
auto_fetch_chunks: auto_fetch_chunks,
|
|
156
|
+
request_timeout: request_timeout)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
|
|
160
|
+
if elapsed >= max_wait
|
|
161
|
+
cancel_statement(statement_id: statement_id, request_timeout: request_timeout) if cancel_on_timeout
|
|
162
|
+
raise TimeoutError.new("Polling exceeded max_wait of #{max_wait}s for statement #{statement_id}", max_wait)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
sleep(poll_interval)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def inspect
|
|
170
|
+
"#<#{self.class} " \
|
|
171
|
+
"@external_link_allowed_hosts=#{@external_link_allowed_hosts.inspect}, " \
|
|
172
|
+
"@external_link_require_https=#{@external_link_require_https.inspect}, " \
|
|
173
|
+
"@host=#{@host.inspect}, " \
|
|
174
|
+
"@open_timeout=#{@open_timeout.inspect}, " \
|
|
175
|
+
"@statement_path=#{@statement_path.inspect}, " \
|
|
176
|
+
"@timeout=#{@timeout.inspect}, " \
|
|
177
|
+
"@token=\"[REDACTED]\", " \
|
|
178
|
+
"@warehouse_id=#{@warehouse_id.inspect}>"
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
private
|
|
182
|
+
|
|
183
|
+
def validate_configuration!
|
|
184
|
+
raise ConfigurationError, "host is required" if @host.empty?
|
|
185
|
+
raise ConfigurationError, "token is required" if @token.empty?
|
|
186
|
+
raise ConfigurationError, "warehouse_id is required" if @warehouse_id.empty?
|
|
187
|
+
raise ConfigurationError, "timeout must be positive" unless @timeout.to_f.positive?
|
|
188
|
+
raise ConfigurationError, "open_timeout must be positive" unless @open_timeout.to_f.positive?
|
|
189
|
+
|
|
190
|
+
uri = parse_uri!(@host, field_name: "host")
|
|
191
|
+
raise ConfigurationError, "host must use HTTPS" unless uri.scheme == "https"
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def normalize_host(host)
|
|
195
|
+
host.to_s.strip.sub(%r{/+\z}, "")
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def terminal_status?(status)
|
|
199
|
+
TERMINAL_STATUSES.include?(status.to_s.upcase)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def statement_status(payload)
|
|
203
|
+
state = payload.dig("status", "state")
|
|
204
|
+
raise ParseError, "Missing required field: status.state" if state.to_s.empty?
|
|
205
|
+
|
|
206
|
+
state.to_s.upcase
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def build_result(payload, format:, disposition:, column_schema:, auto_fetch_chunks:, request_timeout:)
|
|
210
|
+
status = statement_status(payload)
|
|
211
|
+
statement_id = payload["statement_id"]
|
|
212
|
+
|
|
213
|
+
if %w[FAILED CANCELED CLOSED].include?(status)
|
|
214
|
+
message = payload.dig("status", "error", "message") ||
|
|
215
|
+
"Statement #{statement_id} ended with status #{status}"
|
|
216
|
+
raise ExecutionError.new(message, statement_id: statement_id, status: status)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
rows, columns = resolve_rows_and_columns(
|
|
220
|
+
payload,
|
|
221
|
+
disposition: disposition,
|
|
222
|
+
column_schema: column_schema,
|
|
223
|
+
request_timeout: request_timeout
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
next_chunk_internal_link = extract_next_chunk_internal_link(payload)
|
|
227
|
+
|
|
228
|
+
if auto_fetch_chunks
|
|
229
|
+
rows, next_chunk_internal_link = append_remaining_chunks(
|
|
230
|
+
rows,
|
|
231
|
+
columns,
|
|
232
|
+
next_chunk_internal_link: next_chunk_internal_link,
|
|
233
|
+
disposition: disposition,
|
|
234
|
+
column_schema: column_schema,
|
|
235
|
+
request_timeout: request_timeout
|
|
236
|
+
)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
Result.new(
|
|
240
|
+
statement_id: statement_id,
|
|
241
|
+
status: status,
|
|
242
|
+
disposition: normalize_disposition(disposition),
|
|
243
|
+
format: format.to_s.upcase,
|
|
244
|
+
columns: columns,
|
|
245
|
+
rows: rows,
|
|
246
|
+
manifest: payload["manifest"],
|
|
247
|
+
raw_response: payload,
|
|
248
|
+
next_chunk_internal_link: next_chunk_internal_link
|
|
249
|
+
)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def resolve_rows_and_columns(payload, disposition:, column_schema:, request_timeout:)
|
|
253
|
+
effective_disposition = infer_disposition(payload, disposition)
|
|
254
|
+
columns = extract_columns(payload)
|
|
255
|
+
|
|
256
|
+
rows = if effective_disposition == "EXTERNAL_LINK"
|
|
257
|
+
fetch_external_rows(payload, request_timeout: request_timeout)
|
|
258
|
+
else
|
|
259
|
+
inline_rows(payload, columns)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
rows = normalize_rows(rows, columns)
|
|
263
|
+
rows = TypeCoercer.coerce_rows(rows, column_schema)
|
|
264
|
+
|
|
265
|
+
[rows, columns]
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def infer_disposition(payload, fallback)
|
|
269
|
+
result = payload["result"] || {}
|
|
270
|
+
value = result["disposition"] || result["result_disposition"] || payload["disposition"] || fallback
|
|
271
|
+
normalize_disposition(value)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def normalize_disposition(value)
|
|
275
|
+
disposition = value.to_s.upcase
|
|
276
|
+
return "EXTERNAL_LINK" if disposition == "EXTERNAL_LINKS"
|
|
277
|
+
|
|
278
|
+
disposition
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def extract_next_chunk_internal_link(payload)
|
|
282
|
+
payload.dig("result", "next_chunk_internal_link") ||
|
|
283
|
+
payload.dig("result", "external_links", 0, "next_chunk_internal_link") ||
|
|
284
|
+
payload["next_chunk_internal_link"] ||
|
|
285
|
+
payload.dig("external_links", 0, "next_chunk_internal_link")
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def append_remaining_chunks(rows, columns, next_chunk_internal_link:, disposition:, column_schema:,
|
|
289
|
+
request_timeout:)
|
|
290
|
+
combined_rows = rows.dup
|
|
291
|
+
next_link = next_chunk_internal_link
|
|
292
|
+
|
|
293
|
+
until next_link.to_s.empty?
|
|
294
|
+
payload = get_statement_chunk_by_link(next_chunk_internal_link: next_link, request_timeout: request_timeout)
|
|
295
|
+
combined_rows.concat(
|
|
296
|
+
chunk_rows(payload, columns, disposition: disposition, column_schema: column_schema,
|
|
297
|
+
request_timeout: request_timeout)
|
|
298
|
+
)
|
|
299
|
+
next_link = extract_next_chunk_internal_link(payload)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
[combined_rows, next_link]
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def chunk_rows(payload, columns, disposition:, column_schema:, request_timeout:)
|
|
306
|
+
effective_disposition = normalize_disposition(disposition)
|
|
307
|
+
|
|
308
|
+
rows = if effective_disposition == "EXTERNAL_LINK"
|
|
309
|
+
fetch_external_rows(payload, request_timeout: request_timeout)
|
|
310
|
+
else
|
|
311
|
+
data = payload["data_array"] || payload.dig("result", "data_array") || []
|
|
312
|
+
|
|
313
|
+
if data.first.is_a?(Hash)
|
|
314
|
+
data
|
|
315
|
+
else
|
|
316
|
+
Array(data).map { |row| Array(columns).zip(Array(row)).to_h }
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
rows = normalize_rows(rows, columns)
|
|
321
|
+
TypeCoercer.coerce_rows(rows, column_schema)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def hydrate_terminal_response(payload, statement_id:, request_timeout:)
|
|
325
|
+
return payload unless statement_status(payload) == "SUCCEEDED"
|
|
326
|
+
return payload if payload["result"].is_a?(Hash)
|
|
327
|
+
|
|
328
|
+
get_statement(statement_id: statement_id, request_timeout: request_timeout)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def inline_rows(payload, columns)
|
|
332
|
+
result = payload["result"] || {}
|
|
333
|
+
data = result["data_array"] || payload["data_array"] || []
|
|
334
|
+
|
|
335
|
+
return [] unless data.is_a?(Array)
|
|
336
|
+
|
|
337
|
+
if data.first.is_a?(Hash)
|
|
338
|
+
data
|
|
339
|
+
else
|
|
340
|
+
data.map do |row|
|
|
341
|
+
Array(columns).zip(Array(row)).to_h
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def fetch_external_rows(payload, request_timeout:)
|
|
347
|
+
result = payload["result"] || {}
|
|
348
|
+
url = result["external_link"] ||
|
|
349
|
+
result["external_link_url"] ||
|
|
350
|
+
result.dig("external_links", 0, "external_link") ||
|
|
351
|
+
result.dig("external_links", 0, "external_link_url") ||
|
|
352
|
+
payload.dig("external_links", 0, "external_link") ||
|
|
353
|
+
payload.dig("external_links", 0, "external_link_url") ||
|
|
354
|
+
payload["external_link"]
|
|
355
|
+
|
|
356
|
+
raise ParseError, "EXTERNAL_LINK disposition returned without a download URL" if url.to_s.empty?
|
|
357
|
+
|
|
358
|
+
ExternalLinkHandler.fetch_and_parse(
|
|
359
|
+
url,
|
|
360
|
+
timeout: request_timeout || @timeout,
|
|
361
|
+
require_https: @external_link_require_https,
|
|
362
|
+
allowed_hosts: @external_link_allowed_hosts
|
|
363
|
+
)
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def normalize_allowed_hosts(hosts)
|
|
367
|
+
return nil if hosts.nil?
|
|
368
|
+
|
|
369
|
+
Array(hosts).map { |host| host.to_s.strip.downcase }.reject(&:empty?).uniq
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def parse_uri!(value, field_name:)
|
|
373
|
+
uri = URI.parse(value)
|
|
374
|
+
raise ConfigurationError, "#{field_name} must be an absolute URL" if uri.scheme.nil? || uri.host.nil?
|
|
375
|
+
|
|
376
|
+
uri
|
|
377
|
+
rescue URI::InvalidURIError => e
|
|
378
|
+
raise ConfigurationError, "#{field_name} is not a valid URL: #{e.message}"
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def normalize_rows(rows, columns)
|
|
382
|
+
return [] unless rows.is_a?(Array)
|
|
383
|
+
return rows if rows.empty?
|
|
384
|
+
|
|
385
|
+
if rows.first.is_a?(Hash)
|
|
386
|
+
rows
|
|
387
|
+
else
|
|
388
|
+
rows.map { |row| Array(columns).zip(Array(row)).to_h }
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def extract_columns(payload)
|
|
393
|
+
result = payload["result"] || {}
|
|
394
|
+
schema_columns = result.dig("schema", "columns") || payload.dig("manifest", "schema", "columns") || []
|
|
395
|
+
|
|
396
|
+
columns = schema_columns.map do |column|
|
|
397
|
+
column["name"] || column["column_name"]
|
|
398
|
+
end.compact
|
|
399
|
+
|
|
400
|
+
return columns unless columns.empty?
|
|
401
|
+
|
|
402
|
+
first_row = (result["data_array"] || payload["data_array"] || []).first
|
|
403
|
+
return first_row.keys if first_row.is_a?(Hash)
|
|
404
|
+
|
|
405
|
+
[]
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def request_json(method, path, payload: nil, request_timeout: nil)
|
|
409
|
+
uri = URI.parse("#{@host}#{path}")
|
|
410
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
411
|
+
http.use_ssl = uri.scheme == "https"
|
|
412
|
+
http.open_timeout = @open_timeout
|
|
413
|
+
http.read_timeout = request_timeout || @timeout
|
|
414
|
+
|
|
415
|
+
request = build_request(method, uri, payload)
|
|
416
|
+
response = http.request(request)
|
|
417
|
+
validate_http_response!(response)
|
|
418
|
+
parse_json(response.body)
|
|
419
|
+
rescue Timeout::Error, Net::OpenTimeout, Net::ReadTimeout
|
|
420
|
+
timeout = request_timeout || @timeout
|
|
421
|
+
raise TimeoutError.new("HTTP request timed out after #{timeout}s", timeout)
|
|
422
|
+
rescue SocketError => e
|
|
423
|
+
raise ConnectionError, "Connection error: #{e.message}"
|
|
424
|
+
rescue JSON::ParserError => e
|
|
425
|
+
raise ParseError, "Invalid JSON response: #{e.message}"
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def build_request(method, uri, payload)
|
|
429
|
+
request = case method.to_sym
|
|
430
|
+
when :get
|
|
431
|
+
Net::HTTP::Get.new(uri.request_uri)
|
|
432
|
+
when :post
|
|
433
|
+
Net::HTTP::Post.new(uri.request_uri)
|
|
434
|
+
else
|
|
435
|
+
raise ConfigurationError, "Unsupported HTTP method: #{method}"
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
request["Authorization"] = "Bearer #{@token}"
|
|
439
|
+
request["Content-Type"] = "application/json"
|
|
440
|
+
request["User-Agent"] = "databricks_sql/#{DatabricksSql::VERSION}"
|
|
441
|
+
request.body = JSON.generate(payload) if payload
|
|
442
|
+
request
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def validate_http_response!(response)
|
|
446
|
+
code = response.code.to_i
|
|
447
|
+
return if code.between?(200, 299)
|
|
448
|
+
|
|
449
|
+
message = "Databricks API request failed with HTTP #{code}"
|
|
450
|
+
|
|
451
|
+
case code
|
|
452
|
+
when 401
|
|
453
|
+
raise AuthenticationError.new(message, status_code: code, response_body: response.body)
|
|
454
|
+
when 403
|
|
455
|
+
raise AuthorizationError.new(message, status_code: code, response_body: response.body)
|
|
456
|
+
when 404
|
|
457
|
+
raise NotFoundError.new(message, status_code: code, response_body: response.body)
|
|
458
|
+
when 429
|
|
459
|
+
retry_after = response["Retry-After"]&.to_i
|
|
460
|
+
raise RateLimitError.new(
|
|
461
|
+
message,
|
|
462
|
+
status_code: code,
|
|
463
|
+
response_body: response.body,
|
|
464
|
+
retry_after_seconds: retry_after
|
|
465
|
+
)
|
|
466
|
+
when 500..599
|
|
467
|
+
raise ServerError.new(message, status_code: code, response_body: response.body)
|
|
468
|
+
else
|
|
469
|
+
raise HTTPError.new(message, status_code: code, response_body: response.body)
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def parse_json(body)
|
|
474
|
+
JSON.parse(body.to_s)
|
|
475
|
+
end
|
|
476
|
+
end
|
|
477
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DatabricksSql
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :host,
|
|
6
|
+
:token,
|
|
7
|
+
:warehouse_id,
|
|
8
|
+
:timeout,
|
|
9
|
+
:open_timeout,
|
|
10
|
+
:statement_path,
|
|
11
|
+
:external_link_require_https,
|
|
12
|
+
:external_link_allowed_hosts
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@timeout = 30
|
|
16
|
+
@open_timeout = 10
|
|
17
|
+
@statement_path = Client::DEFAULT_STATEMENT_PATH
|
|
18
|
+
@external_link_require_https = true
|
|
19
|
+
@external_link_allowed_hosts = nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DatabricksSql
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
|
|
6
|
+
class ConfigurationError < Error; end
|
|
7
|
+
class ConnectionError < Error; end
|
|
8
|
+
class ParseError < Error; end
|
|
9
|
+
|
|
10
|
+
class TimeoutError < Error
|
|
11
|
+
attr_reader :timeout_seconds
|
|
12
|
+
|
|
13
|
+
def initialize(message, timeout_seconds)
|
|
14
|
+
@timeout_seconds = timeout_seconds
|
|
15
|
+
super(message)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
class HTTPError < Error
|
|
20
|
+
attr_reader :status_code, :response_body
|
|
21
|
+
|
|
22
|
+
def initialize(message, status_code:, response_body: nil)
|
|
23
|
+
@status_code = status_code
|
|
24
|
+
@response_body = response_body
|
|
25
|
+
super(message)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class AuthenticationError < HTTPError; end
|
|
30
|
+
class AuthorizationError < HTTPError; end
|
|
31
|
+
class NotFoundError < HTTPError; end
|
|
32
|
+
|
|
33
|
+
class RateLimitError < HTTPError
|
|
34
|
+
attr_reader :retry_after_seconds
|
|
35
|
+
|
|
36
|
+
def initialize(message, status_code:, response_body: nil, retry_after_seconds: nil)
|
|
37
|
+
@retry_after_seconds = retry_after_seconds
|
|
38
|
+
super(message, status_code: status_code, response_body: response_body)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
class ServerError < HTTPError; end
|
|
43
|
+
|
|
44
|
+
class ExecutionError < Error
|
|
45
|
+
attr_reader :statement_id, :status
|
|
46
|
+
|
|
47
|
+
def initialize(message, statement_id:, status:)
|
|
48
|
+
@statement_id = statement_id
|
|
49
|
+
@status = status
|
|
50
|
+
super(message)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "json"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require "uri"
|
|
7
|
+
|
|
8
|
+
module DatabricksSql
|
|
9
|
+
class ExternalLinkHandler
|
|
10
|
+
class << self
|
|
11
|
+
def fetch_and_parse(url, timeout: 30, require_https: true, allowed_hosts: nil)
|
|
12
|
+
uri = URI.parse(url)
|
|
13
|
+
validate_external_uri!(uri, require_https: require_https, allowed_hosts: allowed_hosts)
|
|
14
|
+
|
|
15
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
16
|
+
http.use_ssl = uri.scheme == "https"
|
|
17
|
+
http.open_timeout = timeout
|
|
18
|
+
http.read_timeout = timeout
|
|
19
|
+
|
|
20
|
+
response = http.request(Net::HTTP::Get.new(uri.request_uri))
|
|
21
|
+
validate_download_response!(response)
|
|
22
|
+
parse_body(response.body, response["Content-Type"], url)
|
|
23
|
+
rescue Timeout::Error, Net::OpenTimeout, Net::ReadTimeout
|
|
24
|
+
raise TimeoutError.new("Timed out downloading external link after #{timeout}s", timeout)
|
|
25
|
+
rescue SocketError => e
|
|
26
|
+
raise ConnectionError, "Unable to connect to external link host: #{e.message}"
|
|
27
|
+
rescue URI::InvalidURIError => e
|
|
28
|
+
raise ConfigurationError, "Invalid external link URL: #{e.message}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def validate_external_uri!(uri, require_https:, allowed_hosts:)
|
|
34
|
+
raise ConfigurationError, "External link must be an absolute URL" if uri.scheme.nil? || uri.host.nil?
|
|
35
|
+
|
|
36
|
+
raise ConfigurationError, "External link must use HTTPS" if require_https && uri.scheme != "https"
|
|
37
|
+
|
|
38
|
+
return if allowed_hosts.nil? || allowed_hosts.empty?
|
|
39
|
+
|
|
40
|
+
host = uri.host.to_s.downcase
|
|
41
|
+
return if allowed_hosts.include?(host)
|
|
42
|
+
|
|
43
|
+
raise ConfigurationError, "External link host is not in the allowed list"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate_download_response!(response)
|
|
47
|
+
return if response.code.to_i.between?(200, 299)
|
|
48
|
+
|
|
49
|
+
raise HTTPError.new(
|
|
50
|
+
"External link download failed with HTTP #{response.code}",
|
|
51
|
+
status_code: response.code.to_i,
|
|
52
|
+
response_body: response.body
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def parse_body(body, content_type, url)
|
|
57
|
+
ct = content_type.to_s.downcase
|
|
58
|
+
|
|
59
|
+
return parse_json(body) if ct.include?("json")
|
|
60
|
+
return parse_csv(body) if ct.include?("csv") || url.downcase.end_with?(".csv")
|
|
61
|
+
|
|
62
|
+
# Fallback strategy for unknown content types.
|
|
63
|
+
parse_json(body)
|
|
64
|
+
rescue JSON::ParserError
|
|
65
|
+
parse_csv(body)
|
|
66
|
+
rescue CSV::MalformedCSVError, ArgumentError => e
|
|
67
|
+
raise ParseError, "Unable to parse external link payload: #{e.message}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def parse_json(body)
|
|
71
|
+
parsed = JSON.parse(body)
|
|
72
|
+
|
|
73
|
+
case parsed
|
|
74
|
+
when Array
|
|
75
|
+
parsed
|
|
76
|
+
when Hash
|
|
77
|
+
parsed.fetch("rows", [parsed])
|
|
78
|
+
else
|
|
79
|
+
raise ParseError, "Unsupported JSON payload type for external link"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def parse_csv(body)
|
|
84
|
+
rows = CSV.parse(body, headers: true)
|
|
85
|
+
rows.map(&:to_h)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DatabricksSql
|
|
4
|
+
class Result
|
|
5
|
+
attr_reader :statement_id, :status, :disposition, :format, :columns, :rows, :manifest, :raw_response,
|
|
6
|
+
:next_chunk_internal_link
|
|
7
|
+
|
|
8
|
+
def initialize(statement_id:, status:, disposition:, format:, columns:, rows:, manifest:, raw_response:,
|
|
9
|
+
next_chunk_internal_link: nil)
|
|
10
|
+
@statement_id = statement_id
|
|
11
|
+
@status = status
|
|
12
|
+
@disposition = disposition
|
|
13
|
+
@format = format
|
|
14
|
+
@columns = columns
|
|
15
|
+
@rows = rows
|
|
16
|
+
@manifest = manifest
|
|
17
|
+
@raw_response = raw_response
|
|
18
|
+
@next_chunk_internal_link = next_chunk_internal_link
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def success?
|
|
22
|
+
status == "SUCCEEDED"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|