@bitofsky/databricks-sql 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @bitofsky/databricks-sql
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/@bitofsky/databricks-sql.svg)](https://www.npmjs.com/package/@bitofsky/databricks-sql)
4
+ [![npm downloads](https://img.shields.io/npm/dm/@bitofsky/databricks-sql.svg)](https://www.npmjs.com/package/@bitofsky/databricks-sql)
5
+ [![license](https://img.shields.io/npm/l/@bitofsky/databricks-sql.svg)](https://github.com/bitofsky/databricks-sql/blob/main/LICENSE)
6
+ [![node](https://img.shields.io/node/v/@bitofsky/databricks-sql.svg)](https://nodejs.org)
7
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.9.3-blue.svg)](https://www.typescriptlang.org/)
8
+
3
9
  Databricks SQL client for Node.js that talks directly to the REST API and streams large results efficiently. No SDK lock-in, no warehouse-side streaming bottlenecks.
4
10
 
5
11
  ## Why This Exists
@@ -14,8 +20,10 @@ The goal is simple: stream big results with stable memory usage and without forc
14
20
 
15
21
  ## Highlights
16
22
  - Direct REST calls to Statement Execution API.
17
- - Polls statement execution until completion.
23
+ - Optimized polling with server-side wait (up to 50s) before falling back to client polling.
24
+ - Query metrics support via Query History API (`enableMetrics` option).
18
25
  - Efficient external link handling: merge chunks into a single stream.
26
+ - Handles partial external link responses by fetching missing chunk metadata.
19
27
  - `mergeExternalLinks` supports streaming uploads and returns a new StatementResult with a presigned URL.
20
28
  - `fetchRow`/`fetchAll` support `JSON_OBJECT` (schema-based row mapping).
21
29
  - External links + JSON_ARRAY are supported for row iteration (streaming JSON parsing).
@@ -41,12 +49,16 @@ console.log(rows) // [{ value: 1 }]
41
49
  ```
42
50
 
43
51
  ## Sample (Streaming + Presigned URL)
44
- Stream external links into S3, then return a single presigned URL:
52
+ Stream external links into S3 with gzip compression, then return a single presigned URL.
45
53
 
46
54
  ```ts
47
55
  import { executeStatement, mergeExternalLinks } from '@bitofsky/databricks-sql'
48
- import { GetObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3'
56
+ import { GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'
57
+ import { Upload } from '@aws-sdk/lib-storage'
49
58
  import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
59
+ import { createGzip } from 'zlib'
60
+ import { pipeline } from 'stream/promises'
61
+ import { PassThrough } from 'stream'
50
62
 
51
63
  const auth = {
52
64
  token: process.env.DATABRICKS_TOKEN!,
@@ -65,31 +77,71 @@ const result = await executeStatement(
65
77
 
66
78
  const merged = await mergeExternalLinks(result, auth, {
67
79
  mergeStreamToExternalLink: async (stream) => {
68
- const key = `merged-${Date.now()}.csv`
69
- await s3.send(
70
- new PutObjectCommand({
80
+ const key = `merged-${Date.now()}.csv.gz`
81
+ const gzip = createGzip() // Compress with gzip and upload to S3
82
+ const passThrough = new PassThrough()
83
+
84
+ const upload = new Upload({
85
+ client: s3,
86
+ params: {
71
87
  Bucket: bucket,
72
88
  Key: key,
73
- Body: stream,
74
- ContentType: 'text/csv',
75
- })
76
- )
77
-
78
- const externalLink = await getSignedUrl(
79
- s3,
80
- new GetObjectCommand({ Bucket: bucket, Key: key }),
81
- { expiresIn: 3600 }
82
- )
89
+ Body: passThrough,
90
+ ContentType: 'text/csv; charset=utf-8',
91
+ ContentEncoding: 'gzip',
92
+ },
93
+ })
94
+ const uploadPromise = upload.done()
95
+
96
+ await Promise.all([
97
+ pipeline(stream, gzip, passThrough),
98
+ uploadPromise,
99
+ ])
100
+
101
+ // Get actual uploaded size via HeadObject
102
+ const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }))
103
+ // Generate presigned URL valid for 1 hour
104
+ const externalLink = await getSignedUrl(s3, new GetObjectCommand({ Bucket: bucket, Key: key }),{ expiresIn: 3600 })
83
105
 
84
106
  return {
85
- externalLink,
86
- byte_count: 0,
87
- expiration: new Date(Date.now() + 3600 * 1000).toISOString(),
107
+ externalLink, // Presigned URL to merged gzip CSV
108
+ byte_count: head.ContentLength ?? 0, // Actual compressed size
109
+ expiration: new Date(Date.now() + 3600 * 1000).toISOString(), // 1 hour from now
88
110
  }
89
111
  },
90
112
  })
91
113
 
92
- console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged CSV
114
+ console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged gzip CSV
115
+ console.log(merged.result?.external_links?.[0].byte_count) // Actual compressed size
116
+ ```
117
+
118
+ ## Sample (Progress with Metrics)
119
+ Track query progress with execution metrics:
120
+
121
+ ```ts
122
+ import { executeStatement } from '@bitofsky/databricks-sql'
123
+
124
+ const auth = {
125
+ token: process.env.DATABRICKS_TOKEN!,
126
+ host: process.env.DATABRICKS_HOST!,
127
+ httpPath: process.env.DATABRICKS_HTTP_PATH!,
128
+ }
129
+
130
+ const result = await executeStatement(
131
+ 'SELECT * FROM samples.tpch.lineitem LIMIT 10000',
132
+ auth,
133
+ {
134
+ enableMetrics: true,
135
+ onProgress: (result, metrics) => {
136
+ console.log(`State: ${result.status.state}`)
137
+ if (metrics) { // metrics is optional, only present when enableMetrics: true
138
+ console.log(` Execution time: ${metrics.execution_time_ms}ms`)
139
+ console.log(` Rows produced: ${metrics.rows_produced_count}`)
140
+ console.log(` Bytes read: ${metrics.read_bytes}`)
141
+ }
142
+ },
143
+ }
144
+ )
93
145
  ```
94
146
 
95
147
  ## Sample (Abort)
@@ -141,7 +193,10 @@ function executeStatement(
141
193
  ): Promise<StatementResult>
142
194
  ```
143
195
  - Calls the Databricks Statement Execution API and polls until completion.
144
- - Use `options.onProgress` to receive status updates.
196
+ - Server waits up to 50s (`wait_timeout`) before client-side polling begins.
197
+ - Default `wait_timeout` is `50s`, or `0s` when `onProgress` is provided.
198
+ - Use `options.onProgress` to receive status updates with optional metrics.
199
+ - Set `enableMetrics: true` to fetch query metrics from Query History API on each poll.
145
200
  - Throws `DatabricksSqlError` on failure, `StatementCancelledError` on cancel, and `AbortError` on abort.
146
201
 
147
202
  ### fetchRow(statementResult, auth, options?)
@@ -155,6 +210,7 @@ function fetchRow(
155
210
  - Streams each row to `options.onEachRow`.
156
211
  - Use `format: 'JSON_OBJECT'` to map rows into schema-based objects.
157
212
  - Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
213
+ - If only a subset of external links is returned, missing chunk metadata is fetched by index.
158
214
 
159
215
  ### fetchAll(statementResult, auth, options?)
160
216
  ```ts
@@ -166,6 +222,7 @@ function fetchAll(
166
222
  ```
167
223
  - Collects all rows into an array. For large results, prefer `fetchRow`/`fetchStream`.
168
224
  - Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
225
+ - If only a subset of external links is returned, missing chunk metadata is fetched by index.
169
226
 
170
227
  ### fetchStream(statementResult, auth, options?)
171
228
  ```ts
@@ -177,7 +234,10 @@ function fetchStream(
177
234
  ```
178
235
  - Merges `EXTERNAL_LINKS` into a single binary stream.
179
236
  - Preserves the original format (`JSON_ARRAY`, `CSV`, `ARROW_STREAM`).
237
+ - Throws if the result is `INLINE`.
180
238
  - Ends as an empty stream when no external links exist.
239
+ - `forceMerge: true` forces merge even when there is only a single external link.
240
+ - If only a subset of external links is returned, missing chunk metadata is fetched by index.
181
241
 
182
242
  ### mergeExternalLinks(statementResult, auth, options)
183
243
  ```ts
@@ -190,22 +250,25 @@ function mergeExternalLinks(
190
250
  - Creates a merged stream from `EXTERNAL_LINKS`, uploads it via
191
251
  `options.mergeStreamToExternalLink`, then returns a `StatementResult`
192
252
  with a single external link.
193
- - Returns the original result unchanged when input is `INLINE`.
253
+ - Returns the original result unchanged when input is `INLINE` or already a
254
+ single external link (unless `forceMerge: true`).
194
255
 
195
256
  ### Options (Summary)
196
257
  ```ts
197
258
  type ExecuteStatementOptions = {
198
- onProgress?: (status: StatementStatus) => void
259
+ onProgress?: (result: StatementResult, metrics?: QueryMetrics) => void
260
+ enableMetrics?: boolean // Fetch metrics from Query History API (default: false)
261
+ logger?: Logger
199
262
  signal?: AbortSignal
200
263
  disposition?: 'INLINE' | 'EXTERNAL_LINKS'
201
264
  format?: 'JSON_ARRAY' | 'ARROW_STREAM' | 'CSV'
202
- wait_timeout?: string
265
+ wait_timeout?: string // Server wait time (default: '50s', max: '50s')
203
266
  row_limit?: number
204
267
  byte_limit?: number
205
268
  catalog?: string
206
269
  schema?: string
207
270
  parameters?: StatementParameter[]
208
- on_wait_timeout?: 'CONTINUE' | 'CANCEL'
271
+ on_wait_timeout?: 'CONTINUE' | 'CANCEL' // Default: 'CONTINUE'
209
272
  warehouse_id?: string
210
273
  }
211
274
 
@@ -213,19 +276,25 @@ type FetchRowsOptions = {
213
276
  signal?: AbortSignal
214
277
  onEachRow?: (row: RowArray | RowObject) => void
215
278
  format?: 'JSON_ARRAY' | 'JSON_OBJECT'
279
+ logger?: Logger
216
280
  }
217
281
 
218
282
  type FetchAllOptions = {
219
283
  signal?: AbortSignal
220
284
  format?: 'JSON_ARRAY' | 'JSON_OBJECT'
285
+ logger?: Logger
221
286
  }
222
287
 
223
288
  type FetchStreamOptions = {
224
289
  signal?: AbortSignal
290
+ forceMerge?: boolean
291
+ logger?: Logger
225
292
  }
226
293
 
227
294
  type MergeExternalLinksOptions = {
228
295
  signal?: AbortSignal
296
+ forceMerge?: boolean
297
+ logger?: Logger
229
298
  mergeStreamToExternalLink: (stream: Readable) => Promise<{
230
299
  externalLink: string
231
300
  byte_count: number
@@ -237,6 +306,9 @@ type MergeExternalLinksOptions = {
237
306
  ## Notes
238
307
  - Databricks requires `INLINE` results to use `JSON_ARRAY` format. `INLINE + CSV` is rejected by the API.
239
308
  - `EXTERNAL_LINKS` are merged using `@bitofsky/merge-streams`.
309
+ - Query metrics are fetched from `/api/2.0/sql/history/queries/{query_id}?include_metrics=true` when `enableMetrics: true`.
310
+ - Metrics may not be immediately available; `is_final: true` indicates complete metrics.
311
+ - Requires Node.js >= 20 for global `fetch` and Web streams.
240
312
 
241
313
  ## Development
242
314
  ```bash
package/dist/index.cjs CHANGED
@@ -84,6 +84,8 @@ var RateLimitError = class extends HttpError {
84
84
  };
85
85
 
86
86
  // src/util.ts
87
+ var import_node_stream = require("stream");
88
+ var import_promises = require("stream/promises");
87
89
  function extractWarehouseId(httpPath) {
88
90
  const match = httpPath.match(/\/sql\/\d+\.\d+\/warehouses\/([a-zA-Z0-9]+)/);
89
91
  if (!match?.[1])
@@ -133,6 +135,24 @@ function validateSucceededResult(statementResult) {
133
135
  );
134
136
  return statementResult.manifest;
135
137
  }
138
+ function isWebReadableStream(body) {
139
+ return typeof body.getReader === "function";
140
+ }
141
+ async function pipeUrlToOutput(url, output, signal) {
142
+ if (signal?.aborted)
143
+ throw new AbortError("Aborted while streaming");
144
+ const response = await fetch(url, signal ? { signal } : void 0);
145
+ if (!response.ok) {
146
+ throw new Error(
147
+ `Failed to fetch external link: ${response.status} ${response.statusText}`
148
+ );
149
+ }
150
+ if (!response.body)
151
+ return void output.end();
152
+ const body = response.body;
153
+ const input = isWebReadableStream(body) ? import_node_stream.Readable.fromWeb(body) : body;
154
+ await (0, import_promises.pipeline)(input, output);
155
+ }
136
156
 
137
157
  // src/http.ts
138
158
  var MAX_RETRIES = 3;
@@ -205,6 +225,7 @@ async function httpRequest(auth, options) {
205
225
 
206
226
  // src/databricks-api.ts
207
227
  var BASE_PATH = "/api/2.0/sql/statements";
228
+ var HISTORY_BASE_PATH = "/api/2.0/sql/history/queries";
208
229
  async function postStatement(auth, request, signal) {
209
230
  return httpRequest(auth, {
210
231
  method: "POST",
@@ -234,6 +255,13 @@ async function getChunk(auth, statementId, chunkIndex, signal) {
234
255
  ...signal ? { signal } : {}
235
256
  });
236
257
  }
258
+ async function getQueryMetrics(auth, queryId, signal) {
259
+ return httpRequest(auth, {
260
+ method: "GET",
261
+ path: `${HISTORY_BASE_PATH}/${queryId}?include_metrics=true`,
262
+ ...signal ? { signal } : {}
263
+ });
264
+ }
237
265
 
238
266
  // src/api/executeStatement.ts
239
267
  var TERMINAL_STATES = /* @__PURE__ */ new Set([
@@ -242,12 +270,22 @@ var TERMINAL_STATES = /* @__PURE__ */ new Set([
242
270
  "CANCELED",
243
271
  "CLOSED"
244
272
  ]);
245
- var POLL_INTERVAL_MS = 500;
246
- var MAX_POLL_INTERVAL_MS = 5e3;
273
+ var POLL_INTERVAL_MS = 5e3;
274
+ async function fetchMetrics(auth, statementId, signal) {
275
+ try {
276
+ const queryInfo = await getQueryMetrics(auth, statementId, signal);
277
+ return queryInfo.metrics;
278
+ } catch {
279
+ return void 0;
280
+ }
281
+ }
247
282
  async function executeStatement(query, auth, options = {}) {
248
283
  const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
249
- const { signal, onProgress } = options;
284
+ const { signal, onProgress, enableMetrics, logger } = options;
285
+ const waitTimeout = options.wait_timeout ?? (onProgress ? "0s" : "50s");
286
+ let cancelIssued = false;
250
287
  throwIfAborted(signal, "executeStatement");
288
+ const emitProgress = onProgress ? async (statementId) => onProgress(result, enableMetrics ? await fetchMetrics(auth, statementId, signal) : void 0) : void 0;
251
289
  const request = Object.fromEntries(
252
290
  Object.entries({
253
291
  warehouse_id: warehouseId,
@@ -255,28 +293,51 @@ async function executeStatement(query, auth, options = {}) {
255
293
  byte_limit: options.byte_limit,
256
294
  disposition: options.disposition,
257
295
  format: options.format,
258
- on_wait_timeout: options.on_wait_timeout,
259
- wait_timeout: options.wait_timeout,
296
+ on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
297
+ wait_timeout: waitTimeout,
260
298
  row_limit: options.row_limit,
261
299
  catalog: options.catalog,
262
300
  schema: options.schema,
263
301
  parameters: options.parameters
264
302
  }).filter(([, v]) => v !== void 0)
265
303
  );
304
+ logger?.info?.(`executeStatement Executing statement on warehouse ${warehouseId}...`);
266
305
  let result = await postStatement(auth, request, signal);
267
- let pollInterval = POLL_INTERVAL_MS;
268
- while (!TERMINAL_STATES.has(result.status.state)) {
269
- if (signal?.aborted) {
270
- await cancelStatement(auth, result.statement_id).catch(() => {
271
- });
306
+ const cancelStatementSafely = async () => {
307
+ if (cancelIssued) return;
308
+ logger?.info?.("executeStatement Abort signal received during executeStatement.");
309
+ cancelIssued = true;
310
+ await cancelStatement(auth, result.statement_id).catch((err) => {
311
+ logger?.error?.("executeStatement Failed to cancel statement after abort.", err);
312
+ });
313
+ };
314
+ if (signal?.aborted) {
315
+ await cancelStatementSafely();
316
+ throw new AbortError("Aborted during polling");
317
+ }
318
+ const onAbort = () => cancelStatementSafely().catch(() => {
319
+ });
320
+ try {
321
+ signal?.addEventListener("abort", onAbort, { once: true });
322
+ while (!TERMINAL_STATES.has(result.status.state)) {
323
+ logger?.info?.(`executeStatement Statement ${result.statement_id} in state ${result.status.state}; polling for status...`);
324
+ await emitProgress?.(result.statement_id);
325
+ await delay(POLL_INTERVAL_MS, signal);
326
+ result = await getStatement(auth, result.statement_id, signal);
327
+ }
328
+ } catch (err) {
329
+ if (err instanceof AbortError || signal?.aborted) {
330
+ logger?.info?.("executeStatement Abort detected in executeStatement polling loop.");
331
+ await cancelStatementSafely();
272
332
  throw new AbortError("Aborted during polling");
273
333
  }
274
- onProgress?.(result.status);
275
- await delay(pollInterval, signal);
276
- pollInterval = Math.min(pollInterval * 1.5, MAX_POLL_INTERVAL_MS);
277
- result = await getStatement(auth, result.statement_id, signal);
334
+ logger?.error?.(`executeStatement Error during executeStatement polling: ${String(err)}`);
335
+ throw err;
336
+ } finally {
337
+ logger?.info?.(`executeStatement Statement ${result.statement_id} reached final state: ${result.status.state}`);
338
+ signal?.removeEventListener("abort", onAbort);
278
339
  }
279
- onProgress?.(result.status);
340
+ await emitProgress?.(result.statement_id);
280
341
  if (result.status.state === "SUCCEEDED")
281
342
  return result;
282
343
  if (result.status.state === "CANCELED")
@@ -580,72 +641,160 @@ function convertBoolean(value) {
580
641
  }
581
642
 
582
643
  // src/api/fetchStream.ts
583
- var import_node_stream = require("stream");
644
+ var import_node_stream2 = require("stream");
584
645
  var import_merge_streams = require("@bitofsky/merge-streams");
585
646
  function fetchStream(statementResult, auth, options = {}) {
586
- const { signal } = options;
647
+ const { signal, forceMerge, logger } = options;
587
648
  const manifest = validateSucceededResult(statementResult);
588
649
  const format = manifest.format;
589
- const output = new import_node_stream.PassThrough();
650
+ const statementId = statementResult.statement_id;
651
+ const baseLog = { statementId, manifest, format, forceMerge };
652
+ if (statementResult.result?.data_array) {
653
+ logger?.error?.(
654
+ `fetchStream only supports EXTERNAL_LINKS results for statement ${statementId}.`,
655
+ { ...baseLog, hasDataArray: true }
656
+ );
657
+ throw new DatabricksSqlError(
658
+ "fetchStream only supports EXTERNAL_LINKS results",
659
+ "UNSUPPORTED_FORMAT",
660
+ statementId
661
+ );
662
+ }
663
+ logger?.info?.(`fetchStream creating stream for statement ${statementId}.`, {
664
+ ...baseLog,
665
+ hasExternalLinks: Boolean(statementResult.result?.external_links?.length)
666
+ });
667
+ const output = new import_node_stream2.PassThrough();
590
668
  if (signal) {
591
669
  const onAbort = () => {
670
+ logger?.info?.(`fetchStream abort signal received while streaming statement ${statementId}.`, baseLog);
592
671
  output.destroy(new AbortError("Stream aborted"));
593
672
  };
594
673
  signal.addEventListener("abort", onAbort, { once: true });
595
- output.once("close", () => {
596
- signal.removeEventListener("abort", onAbort);
597
- });
674
+ output.once("close", () => signal.removeEventListener("abort", onAbort));
598
675
  }
599
- mergeChunksToStream(statementResult, auth, manifest, format, output, signal).catch(
600
- (err) => {
601
- output.destroy(err);
602
- }
603
- );
676
+ output.on("error", (err) => {
677
+ if (err instanceof AbortError)
678
+ return;
679
+ if (output.listenerCount("error") === 1)
680
+ throw err;
681
+ });
682
+ mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger).catch((err) => {
683
+ logger?.error?.(`fetchStream error while streaming statement ${statementId}.`, {
684
+ ...baseLog,
685
+ error: err
686
+ });
687
+ output.destroy(err);
688
+ });
604
689
  return output;
605
690
  }
606
- async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal) {
607
- const result = statementResult.result;
608
- let urls = result?.external_links?.map((link) => link.external_link) ?? [];
609
- if (urls.length === 0 && manifest.total_chunk_count > 0) {
610
- for (let i = 0; i < manifest.total_chunk_count; i++) {
611
- if (signal?.aborted) throw new AbortError("Aborted while collecting URLs");
612
- const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
613
- const chunkUrls = chunkData.external_links?.map((link) => link.external_link) ?? [];
614
- urls.push(...chunkUrls);
691
+ async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger) {
692
+ const statementId = statementResult.statement_id;
693
+ const baseLog = { statementId, manifest, format, forceMerge };
694
+ logger?.info?.(`fetchStream collecting external links for statement ${statementId}.`, baseLog);
695
+ const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
696
+ if (urls.length === 0) {
697
+ logger?.info?.(`fetchStream no external links found for statement ${statementId}.`, baseLog);
698
+ return void output.end();
699
+ }
700
+ if (urls.length === 1 && !forceMerge) {
701
+ logger?.info?.(`fetchStream piping single external link for statement ${statementId}.`, {
702
+ ...baseLog,
703
+ urlCount: urls.length
704
+ });
705
+ return pipeUrlToOutput(urls[0], output, signal);
706
+ }
707
+ logger?.info?.(`fetchStream merging ${urls.length} external links for statement ${statementId}.`, {
708
+ ...baseLog,
709
+ urlCount: urls.length
710
+ });
711
+ return (0, import_merge_streams.mergeStreamsFromUrls)(format, signal ? { urls, output, signal } : { urls, output });
712
+ }
713
+ async function collectExternalUrls(statementResult, auth, manifest, signal) {
714
+ const chunkUrls = /* @__PURE__ */ new Map();
715
+ addChunkLinks(chunkUrls, statementResult.result?.external_links);
716
+ if (!manifest.total_chunk_count)
717
+ return flattenChunkUrls(chunkUrls);
718
+ for (let i = 0; i < manifest.total_chunk_count; i++) {
719
+ if (chunkUrls.has(i))
720
+ continue;
721
+ if (signal?.aborted)
722
+ throw new AbortError("Aborted while collecting URLs");
723
+ const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
724
+ addChunkLinks(chunkUrls, chunkData.external_links);
725
+ }
726
+ return flattenChunkUrls(chunkUrls);
727
+ }
728
+ function addChunkLinks(chunkUrls, externalLinks) {
729
+ if (!externalLinks)
730
+ return;
731
+ for (const link of externalLinks) {
732
+ if (!isNonEmptyString(link.external_link))
733
+ continue;
734
+ const existing = chunkUrls.get(link.chunk_index);
735
+ if (existing) {
736
+ existing.push(link.external_link);
737
+ } else {
738
+ chunkUrls.set(link.chunk_index, [link.external_link]);
615
739
  }
616
740
  }
617
- if (urls.length === 0)
618
- return void output.end();
619
- await (0, import_merge_streams.mergeStreamsFromUrls)(format, signal ? { urls, output, signal } : { urls, output });
741
+ }
742
+ function flattenChunkUrls(chunkUrls) {
743
+ if (chunkUrls.size === 0)
744
+ return [];
745
+ const sorted = [...chunkUrls.entries()].sort(([a], [b]) => a - b);
746
+ const urls = [];
747
+ for (const [, links] of sorted) {
748
+ urls.push(...links);
749
+ }
750
+ return urls;
751
+ }
752
+ function isNonEmptyString(value) {
753
+ return typeof value === "string" && value.length > 0;
620
754
  }
621
755
 
622
756
  // src/api/fetchRow.ts
623
757
  async function fetchRow(statementResult, auth, options = {}) {
624
- const { signal, onEachRow, format } = options;
758
+ const { signal, onEachRow, format, logger } = options;
625
759
  const manifest = validateSucceededResult(statementResult);
760
+ const statementId = statementResult.statement_id;
761
+ const logContext = { statementId, manifest, requestedFormat: format };
626
762
  const mapRow = createRowMapper(manifest, format);
763
+ logger?.info?.(`fetchRow fetching rows for statement ${statementId}.`, {
764
+ ...logContext,
765
+ resultType: statementResult.result?.external_links ? "EXTERNAL_LINKS" : "INLINE"
766
+ });
627
767
  if (statementResult.result?.external_links) {
628
768
  if (manifest.format !== "JSON_ARRAY") {
769
+ logger?.error?.(`fetchRow only supports JSON_ARRAY for external_links; got ${manifest.format}.`, logContext);
629
770
  throw new DatabricksSqlError(
630
771
  `fetchRow only supports JSON_ARRAY for external_links. Received: ${manifest.format}`,
631
772
  "UNSUPPORTED_FORMAT",
632
- statementResult.statement_id
773
+ statementId
633
774
  );
634
775
  }
635
- const stream = fetchStream(statementResult, auth, signal ? { signal } : {});
636
- await consumeJsonArrayStream(stream, mapRow, onEachRow, signal);
776
+ logger?.info?.(`fetchRow streaming external links for statement ${statementId}.`, logContext);
777
+ const stream = fetchStream(statementResult, auth, {
778
+ ...signal ? { signal } : {},
779
+ ...logger ? { logger } : {}
780
+ });
781
+ await consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext);
637
782
  return;
638
783
  }
639
784
  const totalChunks = manifest.total_chunk_count;
640
785
  const dataArray = statementResult.result?.data_array;
641
786
  if (dataArray) {
787
+ logger?.info?.(`fetchRow processing inline rows for statement ${statementId}.`, {
788
+ ...logContext,
789
+ inlineRows: dataArray.length
790
+ });
642
791
  for (const row of dataArray) {
643
792
  if (signal?.aborted) throw new AbortError("Aborted");
644
793
  onEachRow?.(mapRow(row));
645
794
  }
646
795
  }
647
796
  if (totalChunks > 1) {
648
- const statementId = statementResult.statement_id;
797
+ logger?.info?.(`fetchRow processing ${totalChunks} chunks for statement ${statementId}.`, logContext);
649
798
  for (let chunkIndex = 1; chunkIndex < totalChunks; chunkIndex++) {
650
799
  if (signal?.aborted) throw new AbortError("Aborted");
651
800
  const chunk = await getChunk(auth, statementId, chunkIndex, signal);
@@ -664,10 +813,14 @@ async function fetchRow(statementResult, auth, options = {}) {
664
813
  }
665
814
  }
666
815
  }
667
- async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
816
+ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext) {
668
817
  const jsonStream = stream.pipe((0, import_stream_json.parser)()).pipe((0, import_StreamArray.streamArray)());
669
818
  for await (const item of jsonStream) {
670
819
  if (signal?.aborted) {
820
+ logger?.info?.("fetchRow abort detected while streaming JSON_ARRAY rows.", {
821
+ ...logContext,
822
+ aborted: signal.aborted
823
+ });
671
824
  stream.destroy(new AbortError("Aborted"));
672
825
  throw new AbortError("Aborted");
673
826
  }
@@ -685,34 +838,74 @@ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
685
838
  // src/api/fetchAll.ts
686
839
  async function fetchAll(statementResult, auth, options = {}) {
687
840
  const rows = [];
841
+ const statementId = statementResult.statement_id;
842
+ const manifest = statementResult.manifest;
843
+ const logContext = { statementId, manifest, requestedFormat: options.format };
688
844
  const fetchOptions = {
689
845
  // Collect rows as they are streamed in.
690
846
  onEachRow: (row) => {
691
847
  rows.push(row);
692
848
  }
693
849
  };
850
+ const { logger } = options;
851
+ logger?.info?.(`fetchAll fetching all rows for statement ${statementId}.`, logContext);
694
852
  if (options.signal)
695
853
  fetchOptions.signal = options.signal;
696
854
  if (options.format)
697
855
  fetchOptions.format = options.format;
856
+ if (options.logger)
857
+ fetchOptions.logger = options.logger;
698
858
  await fetchRow(statementResult, auth, fetchOptions);
859
+ logger?.info?.(`fetchAll fetched ${rows.length} rows for statement ${statementId}.`, {
860
+ ...logContext,
861
+ rowCount: rows.length,
862
+ resolvedFormat: options.format ?? manifest?.format
863
+ });
699
864
  return rows;
700
865
  }
701
866
 
702
867
  // src/api/mergeExternalLinks.ts
703
868
  async function mergeExternalLinks(statementResult, auth, options) {
704
- const { signal, mergeStreamToExternalLink } = options;
705
- if (!statementResult.result?.external_links)
869
+ const { signal, mergeStreamToExternalLink, forceMerge, logger } = options;
870
+ const statementId = statementResult.statement_id;
871
+ const manifest = statementResult.manifest;
872
+ const externalLinks = statementResult.result?.external_links;
873
+ const totalChunks = manifest?.total_chunk_count ?? 0;
874
+ const logContext = { statementId, manifest, totalChunks, forceMerge };
875
+ if (!externalLinks) {
876
+ logger?.info?.(`mergeExternalLinks no external links to merge for statement ${statementId}.`, logContext);
706
877
  return statementResult;
707
- const stream = fetchStream(statementResult, auth, signal ? { signal } : {});
878
+ }
879
+ if (!forceMerge) {
880
+ const isSingleChunk = totalChunks <= 1;
881
+ if (isSingleChunk) {
882
+ logger?.info?.(`mergeExternalLinks skipping merge for single external link in statement ${statementId}.`, {
883
+ ...logContext,
884
+ totalChunks
885
+ });
886
+ return statementResult;
887
+ }
888
+ }
889
+ logger?.info?.(`mergeExternalLinks merging external links for statement ${statementId}.`, logContext);
890
+ const stream = fetchStream(statementResult, auth, {
891
+ ...signal ? { signal } : {},
892
+ ...forceMerge !== void 0 ? { forceMerge } : {},
893
+ ...logger ? { logger } : {}
894
+ });
895
+ logger?.info?.(`mergeExternalLinks uploading merged external link for statement ${statementId}.`, logContext);
708
896
  const uploadResult = await mergeStreamToExternalLink(stream);
709
- const manifest = validateSucceededResult(statementResult);
710
- const totalRowCount = manifest.total_row_count ?? 0;
897
+ logger?.info?.(`mergeExternalLinks uploaded merged external link for statement ${statementId}.`, {
898
+ ...logContext,
899
+ byteCount: uploadResult.byte_count,
900
+ expiration: uploadResult.expiration
901
+ });
902
+ const validatedManifest = validateSucceededResult(statementResult);
903
+ const totalRowCount = validatedManifest.total_row_count ?? 0;
711
904
  return {
712
905
  statement_id: statementResult.statement_id,
713
906
  status: statementResult.status,
714
907
  manifest: {
715
- ...manifest,
908
+ ...validatedManifest,
716
909
  total_chunk_count: 1,
717
910
  total_byte_count: uploadResult.byte_count,
718
911
  chunks: [