@bitofsky/databricks-sql 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -48,6 +48,8 @@ var RateLimitError = class extends HttpError {
48
48
  };
49
49
 
50
50
  // src/util.ts
51
+ import { Readable } from "stream";
52
+ import { pipeline } from "stream/promises";
51
53
  function extractWarehouseId(httpPath) {
52
54
  const match = httpPath.match(/\/sql\/\d+\.\d+\/warehouses\/([a-zA-Z0-9]+)/);
53
55
  if (!match?.[1])
@@ -97,6 +99,24 @@ function validateSucceededResult(statementResult) {
97
99
  );
98
100
  return statementResult.manifest;
99
101
  }
102
+ function isWebReadableStream(body) {
103
+ return typeof body.getReader === "function";
104
+ }
105
+ async function pipeUrlToOutput(url, output, signal) {
106
+ if (signal?.aborted)
107
+ throw new AbortError("Aborted while streaming");
108
+ const response = await fetch(url, signal ? { signal } : void 0);
109
+ if (!response.ok) {
110
+ throw new Error(
111
+ `Failed to fetch external link: ${response.status} ${response.statusText}`
112
+ );
113
+ }
114
+ if (!response.body)
115
+ return void output.end();
116
+ const body = response.body;
117
+ const input = isWebReadableStream(body) ? Readable.fromWeb(body) : body;
118
+ await pipeline(input, output);
119
+ }
100
120
 
101
121
  // src/http.ts
102
122
  var MAX_RETRIES = 3;
@@ -169,6 +189,7 @@ async function httpRequest(auth, options) {
169
189
 
170
190
  // src/databricks-api.ts
171
191
  var BASE_PATH = "/api/2.0/sql/statements";
192
+ var HISTORY_BASE_PATH = "/api/2.0/sql/history/queries";
172
193
  async function postStatement(auth, request, signal) {
173
194
  return httpRequest(auth, {
174
195
  method: "POST",
@@ -198,6 +219,13 @@ async function getChunk(auth, statementId, chunkIndex, signal) {
198
219
  ...signal ? { signal } : {}
199
220
  });
200
221
  }
222
+ async function getQueryMetrics(auth, queryId, signal) {
223
+ return httpRequest(auth, {
224
+ method: "GET",
225
+ path: `${HISTORY_BASE_PATH}/${queryId}?include_metrics=true`,
226
+ ...signal ? { signal } : {}
227
+ });
228
+ }
201
229
 
202
230
  // src/api/executeStatement.ts
203
231
  var TERMINAL_STATES = /* @__PURE__ */ new Set([
@@ -206,12 +234,22 @@ var TERMINAL_STATES = /* @__PURE__ */ new Set([
206
234
  "CANCELED",
207
235
  "CLOSED"
208
236
  ]);
209
- var POLL_INTERVAL_MS = 500;
210
- var MAX_POLL_INTERVAL_MS = 5e3;
237
+ var POLL_INTERVAL_MS = 5e3;
238
+ async function fetchMetrics(auth, statementId, signal) {
239
+ try {
240
+ const queryInfo = await getQueryMetrics(auth, statementId, signal);
241
+ return queryInfo.metrics;
242
+ } catch {
243
+ return void 0;
244
+ }
245
+ }
211
246
  async function executeStatement(query, auth, options = {}) {
212
247
  const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
213
- const { signal, onProgress } = options;
248
+ const { signal, onProgress, enableMetrics, logger } = options;
249
+ const waitTimeout = options.wait_timeout ?? (onProgress ? "0s" : "50s");
250
+ let cancelIssued = false;
214
251
  throwIfAborted(signal, "executeStatement");
252
+ const emitProgress = onProgress ? async (statementId) => onProgress(result, enableMetrics ? await fetchMetrics(auth, statementId, signal) : void 0) : void 0;
215
253
  const request = Object.fromEntries(
216
254
  Object.entries({
217
255
  warehouse_id: warehouseId,
@@ -219,28 +257,51 @@ async function executeStatement(query, auth, options = {}) {
219
257
  byte_limit: options.byte_limit,
220
258
  disposition: options.disposition,
221
259
  format: options.format,
222
- on_wait_timeout: options.on_wait_timeout,
223
- wait_timeout: options.wait_timeout,
260
+ on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
261
+ wait_timeout: waitTimeout,
224
262
  row_limit: options.row_limit,
225
263
  catalog: options.catalog,
226
264
  schema: options.schema,
227
265
  parameters: options.parameters
228
266
  }).filter(([, v]) => v !== void 0)
229
267
  );
268
+ logger?.info?.(`executeStatement Executing statement on warehouse ${warehouseId}...`);
230
269
  let result = await postStatement(auth, request, signal);
231
- let pollInterval = POLL_INTERVAL_MS;
232
- while (!TERMINAL_STATES.has(result.status.state)) {
233
- if (signal?.aborted) {
234
- await cancelStatement(auth, result.statement_id).catch(() => {
235
- });
270
+ const cancelStatementSafely = async () => {
271
+ if (cancelIssued) return;
272
+ logger?.info?.("executeStatement Abort signal received during executeStatement.");
273
+ cancelIssued = true;
274
+ await cancelStatement(auth, result.statement_id).catch((err) => {
275
+ logger?.error?.("executeStatement Failed to cancel statement after abort.", err);
276
+ });
277
+ };
278
+ if (signal?.aborted) {
279
+ await cancelStatementSafely();
280
+ throw new AbortError("Aborted during polling");
281
+ }
282
+ const onAbort = () => cancelStatementSafely().catch(() => {
283
+ });
284
+ try {
285
+ signal?.addEventListener("abort", onAbort, { once: true });
286
+ while (!TERMINAL_STATES.has(result.status.state)) {
287
+ logger?.info?.(`executeStatement Statement ${result.statement_id} in state ${result.status.state}; polling for status...`);
288
+ await emitProgress?.(result.statement_id);
289
+ await delay(POLL_INTERVAL_MS, signal);
290
+ result = await getStatement(auth, result.statement_id, signal);
291
+ }
292
+ } catch (err) {
293
+ if (err instanceof AbortError || signal?.aborted) {
294
+ logger?.info?.("executeStatement Abort detected in executeStatement polling loop.");
295
+ await cancelStatementSafely();
236
296
  throw new AbortError("Aborted during polling");
237
297
  }
238
- onProgress?.(result.status);
239
- await delay(pollInterval, signal);
240
- pollInterval = Math.min(pollInterval * 1.5, MAX_POLL_INTERVAL_MS);
241
- result = await getStatement(auth, result.statement_id, signal);
298
+ logger?.error?.(`executeStatement Error during executeStatement polling: ${String(err)}`);
299
+ throw err;
300
+ } finally {
301
+ logger?.info?.(`executeStatement Statement ${result.statement_id} reached final state: ${result.status.state}`);
302
+ signal?.removeEventListener("abort", onAbort);
242
303
  }
243
- onProgress?.(result.status);
304
+ await emitProgress?.(result.statement_id);
244
305
  if (result.status.state === "SUCCEEDED")
245
306
  return result;
246
307
  if (result.status.state === "CANCELED")
@@ -547,69 +608,157 @@ function convertBoolean(value) {
547
608
  import { PassThrough } from "stream";
548
609
  import { mergeStreamsFromUrls } from "@bitofsky/merge-streams";
549
610
  function fetchStream(statementResult, auth, options = {}) {
550
- const { signal } = options;
611
+ const { signal, forceMerge, logger } = options;
551
612
  const manifest = validateSucceededResult(statementResult);
552
613
  const format = manifest.format;
614
+ const statementId = statementResult.statement_id;
615
+ const baseLog = { statementId, manifest, format, forceMerge };
616
+ if (statementResult.result?.data_array) {
617
+ logger?.error?.(
618
+ `fetchStream only supports EXTERNAL_LINKS results for statement ${statementId}.`,
619
+ { ...baseLog, hasDataArray: true }
620
+ );
621
+ throw new DatabricksSqlError(
622
+ "fetchStream only supports EXTERNAL_LINKS results",
623
+ "UNSUPPORTED_FORMAT",
624
+ statementId
625
+ );
626
+ }
627
+ logger?.info?.(`fetchStream creating stream for statement ${statementId}.`, {
628
+ ...baseLog,
629
+ hasExternalLinks: Boolean(statementResult.result?.external_links?.length)
630
+ });
553
631
  const output = new PassThrough();
554
632
  if (signal) {
555
633
  const onAbort = () => {
634
+ logger?.info?.(`fetchStream abort signal received while streaming statement ${statementId}.`, baseLog);
556
635
  output.destroy(new AbortError("Stream aborted"));
557
636
  };
558
637
  signal.addEventListener("abort", onAbort, { once: true });
559
- output.once("close", () => {
560
- signal.removeEventListener("abort", onAbort);
561
- });
638
+ output.once("close", () => signal.removeEventListener("abort", onAbort));
562
639
  }
563
- mergeChunksToStream(statementResult, auth, manifest, format, output, signal).catch(
564
- (err) => {
565
- output.destroy(err);
566
- }
567
- );
640
+ output.on("error", (err) => {
641
+ if (err instanceof AbortError)
642
+ return;
643
+ if (output.listenerCount("error") === 1)
644
+ throw err;
645
+ });
646
+ mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger).catch((err) => {
647
+ logger?.error?.(`fetchStream error while streaming statement ${statementId}.`, {
648
+ ...baseLog,
649
+ error: err
650
+ });
651
+ output.destroy(err);
652
+ });
568
653
  return output;
569
654
  }
570
- async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal) {
571
- const result = statementResult.result;
572
- let urls = result?.external_links?.map((link) => link.external_link) ?? [];
573
- if (urls.length === 0 && manifest.total_chunk_count > 0) {
574
- for (let i = 0; i < manifest.total_chunk_count; i++) {
575
- if (signal?.aborted) throw new AbortError("Aborted while collecting URLs");
576
- const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
577
- const chunkUrls = chunkData.external_links?.map((link) => link.external_link) ?? [];
578
- urls.push(...chunkUrls);
655
+ async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger) {
656
+ const statementId = statementResult.statement_id;
657
+ const baseLog = { statementId, manifest, format, forceMerge };
658
+ logger?.info?.(`fetchStream collecting external links for statement ${statementId}.`, baseLog);
659
+ const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
660
+ if (urls.length === 0) {
661
+ logger?.info?.(`fetchStream no external links found for statement ${statementId}.`, baseLog);
662
+ return void output.end();
663
+ }
664
+ if (urls.length === 1 && !forceMerge) {
665
+ logger?.info?.(`fetchStream piping single external link for statement ${statementId}.`, {
666
+ ...baseLog,
667
+ urlCount: urls.length
668
+ });
669
+ return pipeUrlToOutput(urls[0], output, signal);
670
+ }
671
+ logger?.info?.(`fetchStream merging ${urls.length} external links for statement ${statementId}.`, {
672
+ ...baseLog,
673
+ urlCount: urls.length
674
+ });
675
+ return mergeStreamsFromUrls(format, signal ? { urls, output, signal } : { urls, output });
676
+ }
677
+ async function collectExternalUrls(statementResult, auth, manifest, signal) {
678
+ const chunkUrls = /* @__PURE__ */ new Map();
679
+ addChunkLinks(chunkUrls, statementResult.result?.external_links);
680
+ if (!manifest.total_chunk_count)
681
+ return flattenChunkUrls(chunkUrls);
682
+ for (let i = 0; i < manifest.total_chunk_count; i++) {
683
+ if (chunkUrls.has(i))
684
+ continue;
685
+ if (signal?.aborted)
686
+ throw new AbortError("Aborted while collecting URLs");
687
+ const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
688
+ addChunkLinks(chunkUrls, chunkData.external_links);
689
+ }
690
+ return flattenChunkUrls(chunkUrls);
691
+ }
692
+ function addChunkLinks(chunkUrls, externalLinks) {
693
+ if (!externalLinks)
694
+ return;
695
+ for (const link of externalLinks) {
696
+ if (!isNonEmptyString(link.external_link))
697
+ continue;
698
+ const existing = chunkUrls.get(link.chunk_index);
699
+ if (existing) {
700
+ existing.push(link.external_link);
701
+ } else {
702
+ chunkUrls.set(link.chunk_index, [link.external_link]);
579
703
  }
580
704
  }
581
- if (urls.length === 0)
582
- return void output.end();
583
- await mergeStreamsFromUrls(format, signal ? { urls, output, signal } : { urls, output });
705
+ }
706
+ function flattenChunkUrls(chunkUrls) {
707
+ if (chunkUrls.size === 0)
708
+ return [];
709
+ const sorted = [...chunkUrls.entries()].sort(([a], [b]) => a - b);
710
+ const urls = [];
711
+ for (const [, links] of sorted) {
712
+ urls.push(...links);
713
+ }
714
+ return urls;
715
+ }
716
+ function isNonEmptyString(value) {
717
+ return typeof value === "string" && value.length > 0;
584
718
  }
585
719
 
586
720
  // src/api/fetchRow.ts
587
721
  async function fetchRow(statementResult, auth, options = {}) {
588
- const { signal, onEachRow, format } = options;
722
+ const { signal, onEachRow, format, logger } = options;
589
723
  const manifest = validateSucceededResult(statementResult);
724
+ const statementId = statementResult.statement_id;
725
+ const logContext = { statementId, manifest, requestedFormat: format };
590
726
  const mapRow = createRowMapper(manifest, format);
727
+ logger?.info?.(`fetchRow fetching rows for statement ${statementId}.`, {
728
+ ...logContext,
729
+ resultType: statementResult.result?.external_links ? "EXTERNAL_LINKS" : "INLINE"
730
+ });
591
731
  if (statementResult.result?.external_links) {
592
732
  if (manifest.format !== "JSON_ARRAY") {
733
+ logger?.error?.(`fetchRow only supports JSON_ARRAY for external_links; got ${manifest.format}.`, logContext);
593
734
  throw new DatabricksSqlError(
594
735
  `fetchRow only supports JSON_ARRAY for external_links. Received: ${manifest.format}`,
595
736
  "UNSUPPORTED_FORMAT",
596
- statementResult.statement_id
737
+ statementId
597
738
  );
598
739
  }
599
- const stream = fetchStream(statementResult, auth, signal ? { signal } : {});
600
- await consumeJsonArrayStream(stream, mapRow, onEachRow, signal);
740
+ logger?.info?.(`fetchRow streaming external links for statement ${statementId}.`, logContext);
741
+ const stream = fetchStream(statementResult, auth, {
742
+ ...signal ? { signal } : {},
743
+ ...logger ? { logger } : {}
744
+ });
745
+ await consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext);
601
746
  return;
602
747
  }
603
748
  const totalChunks = manifest.total_chunk_count;
604
749
  const dataArray = statementResult.result?.data_array;
605
750
  if (dataArray) {
751
+ logger?.info?.(`fetchRow processing inline rows for statement ${statementId}.`, {
752
+ ...logContext,
753
+ inlineRows: dataArray.length
754
+ });
606
755
  for (const row of dataArray) {
607
756
  if (signal?.aborted) throw new AbortError("Aborted");
608
757
  onEachRow?.(mapRow(row));
609
758
  }
610
759
  }
611
760
  if (totalChunks > 1) {
612
- const statementId = statementResult.statement_id;
761
+ logger?.info?.(`fetchRow processing ${totalChunks} chunks for statement ${statementId}.`, logContext);
613
762
  for (let chunkIndex = 1; chunkIndex < totalChunks; chunkIndex++) {
614
763
  if (signal?.aborted) throw new AbortError("Aborted");
615
764
  const chunk = await getChunk(auth, statementId, chunkIndex, signal);
@@ -628,10 +777,14 @@ async function fetchRow(statementResult, auth, options = {}) {
628
777
  }
629
778
  }
630
779
  }
631
- async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
780
+ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext) {
632
781
  const jsonStream = stream.pipe(parser()).pipe(streamArray());
633
782
  for await (const item of jsonStream) {
634
783
  if (signal?.aborted) {
784
+ logger?.info?.("fetchRow abort detected while streaming JSON_ARRAY rows.", {
785
+ ...logContext,
786
+ aborted: signal.aborted
787
+ });
635
788
  stream.destroy(new AbortError("Aborted"));
636
789
  throw new AbortError("Aborted");
637
790
  }
@@ -649,34 +802,74 @@ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
649
802
  // src/api/fetchAll.ts
650
803
  async function fetchAll(statementResult, auth, options = {}) {
651
804
  const rows = [];
805
+ const statementId = statementResult.statement_id;
806
+ const manifest = statementResult.manifest;
807
+ const logContext = { statementId, manifest, requestedFormat: options.format };
652
808
  const fetchOptions = {
653
809
  // Collect rows as they are streamed in.
654
810
  onEachRow: (row) => {
655
811
  rows.push(row);
656
812
  }
657
813
  };
814
+ const { logger } = options;
815
+ logger?.info?.(`fetchAll fetching all rows for statement ${statementId}.`, logContext);
658
816
  if (options.signal)
659
817
  fetchOptions.signal = options.signal;
660
818
  if (options.format)
661
819
  fetchOptions.format = options.format;
820
+ if (options.logger)
821
+ fetchOptions.logger = options.logger;
662
822
  await fetchRow(statementResult, auth, fetchOptions);
823
+ logger?.info?.(`fetchAll fetched ${rows.length} rows for statement ${statementId}.`, {
824
+ ...logContext,
825
+ rowCount: rows.length,
826
+ resolvedFormat: options.format ?? manifest?.format
827
+ });
663
828
  return rows;
664
829
  }
665
830
 
666
831
  // src/api/mergeExternalLinks.ts
667
832
  async function mergeExternalLinks(statementResult, auth, options) {
668
- const { signal, mergeStreamToExternalLink } = options;
669
- if (!statementResult.result?.external_links)
833
+ const { signal, mergeStreamToExternalLink, forceMerge, logger } = options;
834
+ const statementId = statementResult.statement_id;
835
+ const manifest = statementResult.manifest;
836
+ const externalLinks = statementResult.result?.external_links;
837
+ const totalChunks = manifest?.total_chunk_count ?? 0;
838
+ const logContext = { statementId, manifest, totalChunks, forceMerge };
839
+ if (!externalLinks) {
840
+ logger?.info?.(`mergeExternalLinks no external links to merge for statement ${statementId}.`, logContext);
670
841
  return statementResult;
671
- const stream = fetchStream(statementResult, auth, signal ? { signal } : {});
842
+ }
843
+ if (!forceMerge) {
844
+ const isSingleChunk = totalChunks <= 1;
845
+ if (isSingleChunk) {
846
+ logger?.info?.(`mergeExternalLinks skipping merge for single external link in statement ${statementId}.`, {
847
+ ...logContext,
848
+ totalChunks
849
+ });
850
+ return statementResult;
851
+ }
852
+ }
853
+ logger?.info?.(`mergeExternalLinks merging external links for statement ${statementId}.`, logContext);
854
+ const stream = fetchStream(statementResult, auth, {
855
+ ...signal ? { signal } : {},
856
+ ...forceMerge !== void 0 ? { forceMerge } : {},
857
+ ...logger ? { logger } : {}
858
+ });
859
+ logger?.info?.(`mergeExternalLinks uploading merged external link for statement ${statementId}.`, logContext);
672
860
  const uploadResult = await mergeStreamToExternalLink(stream);
673
- const manifest = validateSucceededResult(statementResult);
674
- const totalRowCount = manifest.total_row_count ?? 0;
861
+ logger?.info?.(`mergeExternalLinks uploaded merged external link for statement ${statementId}.`, {
862
+ ...logContext,
863
+ byteCount: uploadResult.byte_count,
864
+ expiration: uploadResult.expiration
865
+ });
866
+ const validatedManifest = validateSucceededResult(statementResult);
867
+ const totalRowCount = validatedManifest.total_row_count ?? 0;
675
868
  return {
676
869
  statement_id: statementResult.statement_id,
677
870
  status: statementResult.status,
678
871
  manifest: {
679
- ...manifest,
872
+ ...validatedManifest,
680
873
  total_chunk_count: 1,
681
874
  total_byte_count: uploadResult.byte_count,
682
875
  chunks: [