@bitofsky/databricks-sql 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -19,6 +19,83 @@ type StatementStatus = {
19
19
  message: string;
20
20
  };
21
21
  };
22
+ /**
23
+ * Query execution metrics from Query History API
24
+ * @see https://docs.databricks.com/api/workspace/queryhistory/list
25
+ */
26
+ type QueryMetrics = {
27
+ /** Total time in milliseconds */
28
+ total_time_ms?: number;
29
+ /** Compilation time in milliseconds */
30
+ compilation_time_ms?: number;
31
+ /** Execution time in milliseconds */
32
+ execution_time_ms?: number;
33
+ /** Result fetch time in milliseconds */
34
+ result_fetch_time_ms?: number;
35
+ /** Query execution time in milliseconds */
36
+ query_execution_time_ms?: number;
37
+ /** Metadata time in milliseconds */
38
+ metadata_time_ms?: number;
39
+ /** Task total time in milliseconds */
40
+ task_total_time_ms?: number;
41
+ /** Photon total time in milliseconds */
42
+ photon_total_time_ms?: number;
43
+ /** Query compilation start timestamp */
44
+ query_compilation_start_timestamp?: number;
45
+ /** Bytes read */
46
+ read_bytes?: number;
47
+ /** Remote bytes read */
48
+ read_remote_bytes?: number;
49
+ /** Remote bytes written */
50
+ write_remote_bytes?: number;
51
+ /** Cache bytes read */
52
+ read_cache_bytes?: number;
53
+ /** Bytes spilled to disk */
54
+ spill_to_disk_bytes?: number;
55
+ /** Network bytes sent */
56
+ network_sent_bytes?: number;
57
+ /** Pruned bytes */
58
+ pruned_bytes?: number;
59
+ /** Rows produced count */
60
+ rows_produced_count?: number;
61
+ /** Rows read count */
62
+ rows_read_count?: number;
63
+ /** Files read count */
64
+ read_files_count?: number;
65
+ /** Partitions read count */
66
+ read_partitions_count?: number;
67
+ /** Pruned files count */
68
+ pruned_files_count?: number;
69
+ /** Whether result is from cache */
70
+ result_from_cache?: boolean;
71
+ /** Percentage of bytes read from cache */
72
+ bytes_read_from_cache_percentage?: number;
73
+ /** Remote rows written */
74
+ write_remote_rows?: number;
75
+ /** Remote files written */
76
+ write_remote_files?: number;
77
+ };
78
+ /**
79
+ * Query info from Query History API
80
+ * @see https://docs.databricks.com/api/workspace/queryhistory/list
81
+ */
82
+ type QueryInfo = {
83
+ query_id: string;
84
+ status: string;
85
+ query_text: string;
86
+ query_start_time_ms: number;
87
+ execution_end_time_ms?: number;
88
+ query_end_time_ms?: number;
89
+ user_id: number;
90
+ user_name: string;
91
+ endpoint_id: string;
92
+ warehouse_id: string;
93
+ rows_produced?: number;
94
+ metrics?: QueryMetrics;
95
+ is_final: boolean;
96
+ duration?: number;
97
+ statement_type?: string;
98
+ };
22
99
  /** Column schema information */
23
100
  type ColumnInfo = {
24
101
  name: string;
@@ -91,7 +168,9 @@ type StatementParameter = {
91
168
  */
92
169
  type ExecuteStatementOptions = {
93
170
  /** Progress callback (called on each poll) */
94
- onProgress?: (status: StatementStatus) => void;
171
+ onProgress?: (status: StatementStatus, metrics?: QueryMetrics) => void;
172
+ /** Enable query metrics fetching during polling (default: false) */
173
+ enableMetrics?: boolean;
95
174
  /** Abort signal for cancellation */
96
175
  signal?: AbortSignal;
97
176
  /** Result byte limit */
@@ -127,7 +206,10 @@ type RowObject = Record<string, unknown>;
127
206
  /** Format for fetchRow/fetchAll */
128
207
  type FetchRowFormat = 'JSON_ARRAY' | 'JSON_OBJECT';
129
208
  /** Options for fetchStream */
130
- type FetchStreamOptions = SignalOptions;
209
+ type FetchStreamOptions = SignalOptions & {
210
+ /** Force merge even when there is only a single external link */
211
+ forceMerge?: boolean;
212
+ };
131
213
  /** Options for fetchRow */
132
214
  type FetchRowsOptions = SignalOptions & {
133
215
  /** Callback for each row */
@@ -153,6 +235,8 @@ type MergeExternalLinksResult = {
153
235
  type MergeExternalLinksOptions = SignalOptions & {
154
236
  /** Callback to upload merged stream to external link */
155
237
  mergeStreamToExternalLink: (stream: Readable) => Promise<MergeExternalLinksResult>;
238
+ /** Force merge even when there is only a single external link chunk */
239
+ forceMerge?: boolean;
156
240
  };
157
241
  /**
158
242
  * API request for executeStatement
@@ -247,4 +331,4 @@ declare function fetchStream(statementResult: StatementResult, auth: AuthInfo, o
247
331
  */
248
332
  declare function mergeExternalLinks(statementResult: StatementResult, auth: AuthInfo, options: MergeExternalLinksOptions): Promise<StatementResult>;
249
333
 
250
- export { AbortError, type AuthInfo, AuthenticationError, type ChunkInfo, type ColumnInfo, DatabricksSqlError, type ExecuteStatementOptions, type ExecuteStatementRequest, type ExternalLinkInfo, type ExternalLinksResultData, type FetchAllOptions, type FetchRowFormat, type FetchRowsOptions, type FetchStreamOptions, type GetChunkResponse, HttpError, type InlineResultData, type MergeExternalLinksOptions, type MergeExternalLinksResult, RateLimitError, type ResultData, type RowArray, type RowObject, type SignalOptions, StatementCancelledError, type StatementManifest, type StatementParameter, type StatementResult, type StatementState, type StatementStatus, executeStatement, fetchAll, fetchRow, fetchStream, mergeExternalLinks };
334
+ export { AbortError, type AuthInfo, AuthenticationError, type ChunkInfo, type ColumnInfo, DatabricksSqlError, type ExecuteStatementOptions, type ExecuteStatementRequest, type ExternalLinkInfo, type ExternalLinksResultData, type FetchAllOptions, type FetchRowFormat, type FetchRowsOptions, type FetchStreamOptions, type GetChunkResponse, HttpError, type InlineResultData, type MergeExternalLinksOptions, type MergeExternalLinksResult, type QueryInfo, type QueryMetrics, RateLimitError, type ResultData, type RowArray, type RowObject, type SignalOptions, StatementCancelledError, type StatementManifest, type StatementParameter, type StatementResult, type StatementState, type StatementStatus, executeStatement, fetchAll, fetchRow, fetchStream, mergeExternalLinks };
package/dist/index.d.ts CHANGED
@@ -19,6 +19,83 @@ type StatementStatus = {
19
19
  message: string;
20
20
  };
21
21
  };
22
+ /**
23
+ * Query execution metrics from Query History API
24
+ * @see https://docs.databricks.com/api/workspace/queryhistory/list
25
+ */
26
+ type QueryMetrics = {
27
+ /** Total time in milliseconds */
28
+ total_time_ms?: number;
29
+ /** Compilation time in milliseconds */
30
+ compilation_time_ms?: number;
31
+ /** Execution time in milliseconds */
32
+ execution_time_ms?: number;
33
+ /** Result fetch time in milliseconds */
34
+ result_fetch_time_ms?: number;
35
+ /** Query execution time in milliseconds */
36
+ query_execution_time_ms?: number;
37
+ /** Metadata time in milliseconds */
38
+ metadata_time_ms?: number;
39
+ /** Task total time in milliseconds */
40
+ task_total_time_ms?: number;
41
+ /** Photon total time in milliseconds */
42
+ photon_total_time_ms?: number;
43
+ /** Query compilation start timestamp */
44
+ query_compilation_start_timestamp?: number;
45
+ /** Bytes read */
46
+ read_bytes?: number;
47
+ /** Remote bytes read */
48
+ read_remote_bytes?: number;
49
+ /** Remote bytes written */
50
+ write_remote_bytes?: number;
51
+ /** Cache bytes read */
52
+ read_cache_bytes?: number;
53
+ /** Bytes spilled to disk */
54
+ spill_to_disk_bytes?: number;
55
+ /** Network bytes sent */
56
+ network_sent_bytes?: number;
57
+ /** Pruned bytes */
58
+ pruned_bytes?: number;
59
+ /** Rows produced count */
60
+ rows_produced_count?: number;
61
+ /** Rows read count */
62
+ rows_read_count?: number;
63
+ /** Files read count */
64
+ read_files_count?: number;
65
+ /** Partitions read count */
66
+ read_partitions_count?: number;
67
+ /** Pruned files count */
68
+ pruned_files_count?: number;
69
+ /** Whether result is from cache */
70
+ result_from_cache?: boolean;
71
+ /** Percentage of bytes read from cache */
72
+ bytes_read_from_cache_percentage?: number;
73
+ /** Remote rows written */
74
+ write_remote_rows?: number;
75
+ /** Remote files written */
76
+ write_remote_files?: number;
77
+ };
78
+ /**
79
+ * Query info from Query History API
80
+ * @see https://docs.databricks.com/api/workspace/queryhistory/list
81
+ */
82
+ type QueryInfo = {
83
+ query_id: string;
84
+ status: string;
85
+ query_text: string;
86
+ query_start_time_ms: number;
87
+ execution_end_time_ms?: number;
88
+ query_end_time_ms?: number;
89
+ user_id: number;
90
+ user_name: string;
91
+ endpoint_id: string;
92
+ warehouse_id: string;
93
+ rows_produced?: number;
94
+ metrics?: QueryMetrics;
95
+ is_final: boolean;
96
+ duration?: number;
97
+ statement_type?: string;
98
+ };
22
99
  /** Column schema information */
23
100
  type ColumnInfo = {
24
101
  name: string;
@@ -91,7 +168,9 @@ type StatementParameter = {
91
168
  */
92
169
  type ExecuteStatementOptions = {
93
170
  /** Progress callback (called on each poll) */
94
- onProgress?: (status: StatementStatus) => void;
171
+ onProgress?: (status: StatementStatus, metrics?: QueryMetrics) => void;
172
+ /** Enable query metrics fetching during polling (default: false) */
173
+ enableMetrics?: boolean;
95
174
  /** Abort signal for cancellation */
96
175
  signal?: AbortSignal;
97
176
  /** Result byte limit */
@@ -127,7 +206,10 @@ type RowObject = Record<string, unknown>;
127
206
  /** Format for fetchRow/fetchAll */
128
207
  type FetchRowFormat = 'JSON_ARRAY' | 'JSON_OBJECT';
129
208
  /** Options for fetchStream */
130
- type FetchStreamOptions = SignalOptions;
209
+ type FetchStreamOptions = SignalOptions & {
210
+ /** Force merge even when there is only a single external link */
211
+ forceMerge?: boolean;
212
+ };
131
213
  /** Options for fetchRow */
132
214
  type FetchRowsOptions = SignalOptions & {
133
215
  /** Callback for each row */
@@ -153,6 +235,8 @@ type MergeExternalLinksResult = {
153
235
  type MergeExternalLinksOptions = SignalOptions & {
154
236
  /** Callback to upload merged stream to external link */
155
237
  mergeStreamToExternalLink: (stream: Readable) => Promise<MergeExternalLinksResult>;
238
+ /** Force merge even when there is only a single external link chunk */
239
+ forceMerge?: boolean;
156
240
  };
157
241
  /**
158
242
  * API request for executeStatement
@@ -247,4 +331,4 @@ declare function fetchStream(statementResult: StatementResult, auth: AuthInfo, o
247
331
  */
248
332
  declare function mergeExternalLinks(statementResult: StatementResult, auth: AuthInfo, options: MergeExternalLinksOptions): Promise<StatementResult>;
249
333
 
250
- export { AbortError, type AuthInfo, AuthenticationError, type ChunkInfo, type ColumnInfo, DatabricksSqlError, type ExecuteStatementOptions, type ExecuteStatementRequest, type ExternalLinkInfo, type ExternalLinksResultData, type FetchAllOptions, type FetchRowFormat, type FetchRowsOptions, type FetchStreamOptions, type GetChunkResponse, HttpError, type InlineResultData, type MergeExternalLinksOptions, type MergeExternalLinksResult, RateLimitError, type ResultData, type RowArray, type RowObject, type SignalOptions, StatementCancelledError, type StatementManifest, type StatementParameter, type StatementResult, type StatementState, type StatementStatus, executeStatement, fetchAll, fetchRow, fetchStream, mergeExternalLinks };
334
+ export { AbortError, type AuthInfo, AuthenticationError, type ChunkInfo, type ColumnInfo, DatabricksSqlError, type ExecuteStatementOptions, type ExecuteStatementRequest, type ExternalLinkInfo, type ExternalLinksResultData, type FetchAllOptions, type FetchRowFormat, type FetchRowsOptions, type FetchStreamOptions, type GetChunkResponse, HttpError, type InlineResultData, type MergeExternalLinksOptions, type MergeExternalLinksResult, type QueryInfo, type QueryMetrics, RateLimitError, type ResultData, type RowArray, type RowObject, type SignalOptions, StatementCancelledError, type StatementManifest, type StatementParameter, type StatementResult, type StatementState, type StatementStatus, executeStatement, fetchAll, fetchRow, fetchStream, mergeExternalLinks };
package/dist/index.js CHANGED
@@ -48,6 +48,8 @@ var RateLimitError = class extends HttpError {
48
48
  };
49
49
 
50
50
  // src/util.ts
51
+ import { Readable } from "stream";
52
+ import { pipeline } from "stream/promises";
51
53
  function extractWarehouseId(httpPath) {
52
54
  const match = httpPath.match(/\/sql\/\d+\.\d+\/warehouses\/([a-zA-Z0-9]+)/);
53
55
  if (!match?.[1])
@@ -97,6 +99,24 @@ function validateSucceededResult(statementResult) {
97
99
  );
98
100
  return statementResult.manifest;
99
101
  }
102
+ function isWebReadableStream(body) {
103
+ return typeof body.getReader === "function";
104
+ }
105
+ async function pipeUrlToOutput(url, output, signal) {
106
+ if (signal?.aborted)
107
+ throw new AbortError("Aborted while streaming");
108
+ const response = await fetch(url, signal ? { signal } : void 0);
109
+ if (!response.ok) {
110
+ throw new Error(
111
+ `Failed to fetch external link: ${response.status} ${response.statusText}`
112
+ );
113
+ }
114
+ if (!response.body)
115
+ return void output.end();
116
+ const body = response.body;
117
+ const input = isWebReadableStream(body) ? Readable.fromWeb(body) : body;
118
+ await pipeline(input, output);
119
+ }
100
120
 
101
121
  // src/http.ts
102
122
  var MAX_RETRIES = 3;
@@ -169,6 +189,7 @@ async function httpRequest(auth, options) {
169
189
 
170
190
  // src/databricks-api.ts
171
191
  var BASE_PATH = "/api/2.0/sql/statements";
192
+ var HISTORY_BASE_PATH = "/api/2.0/sql/history/queries";
172
193
  async function postStatement(auth, request, signal) {
173
194
  return httpRequest(auth, {
174
195
  method: "POST",
@@ -198,6 +219,13 @@ async function getChunk(auth, statementId, chunkIndex, signal) {
198
219
  ...signal ? { signal } : {}
199
220
  });
200
221
  }
222
+ async function getQueryMetrics(auth, queryId, signal) {
223
+ return httpRequest(auth, {
224
+ method: "GET",
225
+ path: `${HISTORY_BASE_PATH}/${queryId}?include_metrics=true`,
226
+ ...signal ? { signal } : {}
227
+ });
228
+ }
201
229
 
202
230
  // src/api/executeStatement.ts
203
231
  var TERMINAL_STATES = /* @__PURE__ */ new Set([
@@ -206,12 +234,20 @@ var TERMINAL_STATES = /* @__PURE__ */ new Set([
206
234
  "CANCELED",
207
235
  "CLOSED"
208
236
  ]);
209
- var POLL_INTERVAL_MS = 500;
210
- var MAX_POLL_INTERVAL_MS = 5e3;
237
+ var POLL_INTERVAL_MS = 5e3;
238
+ async function fetchMetrics(auth, statementId, signal) {
239
+ try {
240
+ const queryInfo = await getQueryMetrics(auth, statementId, signal);
241
+ return queryInfo.metrics;
242
+ } catch {
243
+ return void 0;
244
+ }
245
+ }
211
246
  async function executeStatement(query, auth, options = {}) {
212
247
  const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
213
- const { signal, onProgress } = options;
248
+ const { signal, onProgress, enableMetrics } = options;
214
249
  throwIfAborted(signal, "executeStatement");
250
+ const emitProgress = onProgress ? async (statementId) => onProgress(result.status, enableMetrics ? await fetchMetrics(auth, statementId, signal) : void 0) : void 0;
215
251
  const request = Object.fromEntries(
216
252
  Object.entries({
217
253
  warehouse_id: warehouseId,
@@ -219,8 +255,8 @@ async function executeStatement(query, auth, options = {}) {
219
255
  byte_limit: options.byte_limit,
220
256
  disposition: options.disposition,
221
257
  format: options.format,
222
- on_wait_timeout: options.on_wait_timeout,
223
- wait_timeout: options.wait_timeout,
258
+ on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
259
+ wait_timeout: options.wait_timeout ?? "50s",
224
260
  row_limit: options.row_limit,
225
261
  catalog: options.catalog,
226
262
  schema: options.schema,
@@ -228,19 +264,17 @@ async function executeStatement(query, auth, options = {}) {
228
264
  }).filter(([, v]) => v !== void 0)
229
265
  );
230
266
  let result = await postStatement(auth, request, signal);
231
- let pollInterval = POLL_INTERVAL_MS;
232
267
  while (!TERMINAL_STATES.has(result.status.state)) {
233
268
  if (signal?.aborted) {
234
269
  await cancelStatement(auth, result.statement_id).catch(() => {
235
270
  });
236
271
  throw new AbortError("Aborted during polling");
237
272
  }
238
- onProgress?.(result.status);
239
- await delay(pollInterval, signal);
240
- pollInterval = Math.min(pollInterval * 1.5, MAX_POLL_INTERVAL_MS);
273
+ await emitProgress?.(result.statement_id);
274
+ await delay(POLL_INTERVAL_MS, signal);
241
275
  result = await getStatement(auth, result.statement_id, signal);
242
276
  }
243
- onProgress?.(result.status);
277
+ await emitProgress?.(result.statement_id);
244
278
  if (result.status.state === "SUCCEEDED")
245
279
  return result;
246
280
  if (result.status.state === "CANCELED")
@@ -547,40 +581,55 @@ function convertBoolean(value) {
547
581
  import { PassThrough } from "stream";
548
582
  import { mergeStreamsFromUrls } from "@bitofsky/merge-streams";
549
583
  function fetchStream(statementResult, auth, options = {}) {
550
- const { signal } = options;
584
+ const { signal, forceMerge } = options;
551
585
  const manifest = validateSucceededResult(statementResult);
552
586
  const format = manifest.format;
587
+ if (statementResult.result?.data_array) {
588
+ throw new DatabricksSqlError(
589
+ "fetchStream only supports EXTERNAL_LINKS results",
590
+ "UNSUPPORTED_FORMAT",
591
+ statementResult.statement_id
592
+ );
593
+ }
553
594
  const output = new PassThrough();
554
595
  if (signal) {
555
- const onAbort = () => {
556
- output.destroy(new AbortError("Stream aborted"));
557
- };
596
+ const onAbort = () => output.destroy(new AbortError("Stream aborted"));
558
597
  signal.addEventListener("abort", onAbort, { once: true });
559
- output.once("close", () => {
560
- signal.removeEventListener("abort", onAbort);
561
- });
598
+ output.once("close", () => signal.removeEventListener("abort", onAbort));
562
599
  }
563
- mergeChunksToStream(statementResult, auth, manifest, format, output, signal).catch(
564
- (err) => {
565
- output.destroy(err);
566
- }
567
- );
600
+ mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge).catch((err) => output.destroy(err));
568
601
  return output;
569
602
  }
570
- async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal) {
571
- const result = statementResult.result;
572
- let urls = result?.external_links?.map((link) => link.external_link) ?? [];
573
- if (urls.length === 0 && manifest.total_chunk_count > 0) {
574
- for (let i = 0; i < manifest.total_chunk_count; i++) {
575
- if (signal?.aborted) throw new AbortError("Aborted while collecting URLs");
576
- const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
577
- const chunkUrls = chunkData.external_links?.map((link) => link.external_link) ?? [];
578
- urls.push(...chunkUrls);
579
- }
580
- }
603
+ async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge) {
604
+ const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
581
605
  if (urls.length === 0)
582
606
  return void output.end();
583
- await mergeStreamsFromUrls(format, signal ? { urls, output, signal } : { urls, output });
607
+ if (urls.length === 1 && !forceMerge)
608
+ return pipeUrlToOutput(urls[0], output, signal);
609
+ return mergeStreamsFromUrls(format, signal ? { urls, output, signal } : { urls, output });
610
+ }
611
+ async function collectExternalUrls(statementResult, auth, manifest, signal) {
612
+ const urls = extractExternalLinks(statementResult.result?.external_links);
613
+ if (urls.length > 0)
614
+ return urls;
615
+ if (!manifest.total_chunk_count)
616
+ return [];
617
+ const chunkUrls = [];
618
+ for (let i = 0; i < manifest.total_chunk_count; i++) {
619
+ if (signal?.aborted)
620
+ throw new AbortError("Aborted while collecting URLs");
621
+ const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
622
+ chunkUrls.push(...extractExternalLinks(chunkData.external_links));
623
+ }
624
+ return chunkUrls;
625
+ }
626
+ function extractExternalLinks(externalLinks) {
627
+ if (!externalLinks)
628
+ return [];
629
+ return externalLinks.map((link) => link.external_link).filter(isNonEmptyString);
630
+ }
631
+ function isNonEmptyString(value) {
632
+ return typeof value === "string" && value.length > 0;
584
633
  }
585
634
 
586
635
  // src/api/fetchRow.ts
@@ -665,10 +714,20 @@ async function fetchAll(statementResult, auth, options = {}) {
665
714
 
666
715
  // src/api/mergeExternalLinks.ts
667
716
  async function mergeExternalLinks(statementResult, auth, options) {
668
- const { signal, mergeStreamToExternalLink } = options;
717
+ const { signal, mergeStreamToExternalLink, forceMerge } = options;
669
718
  if (!statementResult.result?.external_links)
670
719
  return statementResult;
671
- const stream = fetchStream(statementResult, auth, signal ? { signal } : {});
720
+ if (!forceMerge) {
721
+ const totalChunks = statementResult.manifest?.total_chunk_count;
722
+ const externalLinks = statementResult.result.external_links;
723
+ const isSingleChunk = totalChunks === void 0 ? externalLinks.length <= 1 : totalChunks <= 1;
724
+ if (isSingleChunk && externalLinks.length <= 1)
725
+ return statementResult;
726
+ }
727
+ const stream = fetchStream(statementResult, auth, {
728
+ ...signal ? { signal } : {},
729
+ ...forceMerge !== void 0 ? { forceMerge } : {}
730
+ });
672
731
  const uploadResult = await mergeStreamToExternalLink(stream);
673
732
  const manifest = validateSucceededResult(statementResult);
674
733
  const totalRowCount = manifest.total_row_count ?? 0;