@bitofsky/databricks-sql 1.0.1 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -22
- package/dist/index.cjs +185 -49
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -2
- package/dist/index.d.ts +17 -2
- package/dist/index.js +185 -49
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -23,6 +23,7 @@ The goal is simple: stream big results with stable memory usage and without forc
|
|
|
23
23
|
- Optimized polling with server-side wait (up to 50s) before falling back to client polling.
|
|
24
24
|
- Query metrics support via Query History API (`enableMetrics` option).
|
|
25
25
|
- Efficient external link handling: merge chunks into a single stream.
|
|
26
|
+
- Handles partial external link responses by fetching missing chunk metadata.
|
|
26
27
|
- `mergeExternalLinks` supports streaming uploads and returns a new StatementResult with a presigned URL.
|
|
27
28
|
- `fetchRow`/`fetchAll` support `JSON_OBJECT` (schema-based row mapping).
|
|
28
29
|
- External links + JSON_ARRAY are supported for row iteration (streaming JSON parsing).
|
|
@@ -48,12 +49,16 @@ console.log(rows) // [{ value: 1 }]
|
|
|
48
49
|
```
|
|
49
50
|
|
|
50
51
|
## Sample (Streaming + Presigned URL)
|
|
51
|
-
Stream external links into S3, then return a single presigned URL
|
|
52
|
+
Stream external links into S3 with gzip compression, then return a single presigned URL.
|
|
52
53
|
|
|
53
54
|
```ts
|
|
54
55
|
import { executeStatement, mergeExternalLinks } from '@bitofsky/databricks-sql'
|
|
55
|
-
import { GetObjectCommand,
|
|
56
|
+
import { GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'
|
|
57
|
+
import { Upload } from '@aws-sdk/lib-storage'
|
|
56
58
|
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
|
|
59
|
+
import { createGzip } from 'zlib'
|
|
60
|
+
import { pipeline } from 'stream/promises'
|
|
61
|
+
import { PassThrough } from 'stream'
|
|
57
62
|
|
|
58
63
|
const auth = {
|
|
59
64
|
token: process.env.DATABRICKS_TOKEN!,
|
|
@@ -72,31 +77,42 @@ const result = await executeStatement(
|
|
|
72
77
|
|
|
73
78
|
const merged = await mergeExternalLinks(result, auth, {
|
|
74
79
|
mergeStreamToExternalLink: async (stream) => {
|
|
75
|
-
const key = `merged-${Date.now()}.csv`
|
|
76
|
-
|
|
77
|
-
|
|
80
|
+
const key = `merged-${Date.now()}.csv.gz`
|
|
81
|
+
const gzip = createGzip() // Compress with gzip and upload to S3
|
|
82
|
+
const passThrough = new PassThrough()
|
|
83
|
+
|
|
84
|
+
const upload = new Upload({
|
|
85
|
+
client: s3,
|
|
86
|
+
params: {
|
|
78
87
|
Bucket: bucket,
|
|
79
88
|
Key: key,
|
|
80
|
-
Body:
|
|
81
|
-
ContentType: 'text/csv',
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
89
|
+
Body: passThrough,
|
|
90
|
+
ContentType: 'text/csv; charset=utf-8',
|
|
91
|
+
ContentEncoding: 'gzip',
|
|
92
|
+
},
|
|
93
|
+
})
|
|
94
|
+
const uploadPromise = upload.done()
|
|
95
|
+
|
|
96
|
+
await Promise.all([
|
|
97
|
+
pipeline(stream, gzip, passThrough),
|
|
98
|
+
uploadPromise,
|
|
99
|
+
])
|
|
100
|
+
|
|
101
|
+
// Get actual uploaded size via HeadObject
|
|
102
|
+
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }))
|
|
103
|
+
// Generate presigned URL valid for 1 hour
|
|
104
|
+
const externalLink = await getSignedUrl(s3, new GetObjectCommand({ Bucket: bucket, Key: key }),{ expiresIn: 3600 })
|
|
90
105
|
|
|
91
106
|
return {
|
|
92
|
-
externalLink,
|
|
93
|
-
byte_count: 0,
|
|
94
|
-
expiration: new Date(Date.now() + 3600 * 1000).toISOString(),
|
|
107
|
+
externalLink, // Presigned URL to merged gzip CSV
|
|
108
|
+
byte_count: head.ContentLength ?? 0, // Actual compressed size
|
|
109
|
+
expiration: new Date(Date.now() + 3600 * 1000).toISOString(), // 1 hour from now
|
|
95
110
|
}
|
|
96
111
|
},
|
|
97
112
|
})
|
|
98
113
|
|
|
99
|
-
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged CSV
|
|
114
|
+
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged gzip CSV
|
|
115
|
+
console.log(merged.result?.external_links?.[0].byte_count) // Actual compressed size
|
|
100
116
|
```
|
|
101
117
|
|
|
102
118
|
## Sample (Progress with Metrics)
|
|
@@ -116,8 +132,8 @@ const result = await executeStatement(
|
|
|
116
132
|
auth,
|
|
117
133
|
{
|
|
118
134
|
enableMetrics: true,
|
|
119
|
-
onProgress: (
|
|
120
|
-
console.log(`State: ${status.state}`)
|
|
135
|
+
onProgress: (result, metrics) => {
|
|
136
|
+
console.log(`State: ${result.status.state}`)
|
|
121
137
|
if (metrics) { // metrics is optional, only present when enableMetrics: true
|
|
122
138
|
console.log(` Execution time: ${metrics.execution_time_ms}ms`)
|
|
123
139
|
console.log(` Rows produced: ${metrics.rows_produced_count}`)
|
|
@@ -178,6 +194,7 @@ function executeStatement(
|
|
|
178
194
|
```
|
|
179
195
|
- Calls the Databricks Statement Execution API and polls until completion.
|
|
180
196
|
- Server waits up to 50s (`wait_timeout`) before client-side polling begins.
|
|
197
|
+
- Default `wait_timeout` is `50s`, or `0s` when `onProgress` is provided.
|
|
181
198
|
- Use `options.onProgress` to receive status updates with optional metrics.
|
|
182
199
|
- Set `enableMetrics: true` to fetch query metrics from Query History API on each poll.
|
|
183
200
|
- Throws `DatabricksSqlError` on failure, `StatementCancelledError` on cancel, and `AbortError` on abort.
|
|
@@ -193,6 +210,7 @@ function fetchRow(
|
|
|
193
210
|
- Streams each row to `options.onEachRow`.
|
|
194
211
|
- Use `format: 'JSON_OBJECT'` to map rows into schema-based objects.
|
|
195
212
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
213
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
196
214
|
|
|
197
215
|
### fetchAll(statementResult, auth, options?)
|
|
198
216
|
```ts
|
|
@@ -204,6 +222,7 @@ function fetchAll(
|
|
|
204
222
|
```
|
|
205
223
|
- Collects all rows into an array. For large results, prefer `fetchRow`/`fetchStream`.
|
|
206
224
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
225
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
207
226
|
|
|
208
227
|
### fetchStream(statementResult, auth, options?)
|
|
209
228
|
```ts
|
|
@@ -218,6 +237,7 @@ function fetchStream(
|
|
|
218
237
|
- Throws if the result is `INLINE`.
|
|
219
238
|
- Ends as an empty stream when no external links exist.
|
|
220
239
|
- `forceMerge: true` forces merge even when there is only a single external link.
|
|
240
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
221
241
|
|
|
222
242
|
### mergeExternalLinks(statementResult, auth, options)
|
|
223
243
|
```ts
|
|
@@ -236,8 +256,9 @@ function mergeExternalLinks(
|
|
|
236
256
|
### Options (Summary)
|
|
237
257
|
```ts
|
|
238
258
|
type ExecuteStatementOptions = {
|
|
239
|
-
onProgress?: (
|
|
259
|
+
onProgress?: (result: StatementResult, metrics?: QueryMetrics) => void
|
|
240
260
|
enableMetrics?: boolean // Fetch metrics from Query History API (default: false)
|
|
261
|
+
logger?: Logger
|
|
241
262
|
signal?: AbortSignal
|
|
242
263
|
disposition?: 'INLINE' | 'EXTERNAL_LINKS'
|
|
243
264
|
format?: 'JSON_ARRAY' | 'ARROW_STREAM' | 'CSV'
|
|
@@ -255,21 +276,25 @@ type FetchRowsOptions = {
|
|
|
255
276
|
signal?: AbortSignal
|
|
256
277
|
onEachRow?: (row: RowArray | RowObject) => void
|
|
257
278
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
279
|
+
logger?: Logger
|
|
258
280
|
}
|
|
259
281
|
|
|
260
282
|
type FetchAllOptions = {
|
|
261
283
|
signal?: AbortSignal
|
|
262
284
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
285
|
+
logger?: Logger
|
|
263
286
|
}
|
|
264
287
|
|
|
265
288
|
type FetchStreamOptions = {
|
|
266
289
|
signal?: AbortSignal
|
|
267
290
|
forceMerge?: boolean
|
|
291
|
+
logger?: Logger
|
|
268
292
|
}
|
|
269
293
|
|
|
270
294
|
type MergeExternalLinksOptions = {
|
|
271
295
|
signal?: AbortSignal
|
|
272
296
|
forceMerge?: boolean
|
|
297
|
+
logger?: Logger
|
|
273
298
|
mergeStreamToExternalLink: (stream: Readable) => Promise<{
|
|
274
299
|
externalLink: string
|
|
275
300
|
byte_count: number
|
package/dist/index.cjs
CHANGED
|
@@ -272,18 +272,22 @@ var TERMINAL_STATES = /* @__PURE__ */ new Set([
|
|
|
272
272
|
]);
|
|
273
273
|
var POLL_INTERVAL_MS = 5e3;
|
|
274
274
|
async function fetchMetrics(auth, statementId, signal) {
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
return queryInfo.metrics;
|
|
278
|
-
} catch {
|
|
279
|
-
return void 0;
|
|
280
|
-
}
|
|
275
|
+
const queryInfo = await getQueryMetrics(auth, statementId, signal);
|
|
276
|
+
return queryInfo.metrics;
|
|
281
277
|
}
|
|
282
278
|
async function executeStatement(query, auth, options = {}) {
|
|
283
279
|
const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
|
|
284
|
-
const { signal, onProgress, enableMetrics } = options;
|
|
280
|
+
const { signal, onProgress, enableMetrics, logger } = options;
|
|
281
|
+
const waitTimeout = options.wait_timeout ?? (onProgress ? "0s" : "50s");
|
|
282
|
+
let cancelIssued = false;
|
|
285
283
|
throwIfAborted(signal, "executeStatement");
|
|
286
|
-
const emitProgress = onProgress ? async (
|
|
284
|
+
const emitProgress = onProgress ? async () => result ? onProgress(
|
|
285
|
+
result,
|
|
286
|
+
enableMetrics ? await fetchMetrics(auth, result.statement_id, signal).catch((e) => {
|
|
287
|
+
logger?.error?.(`executeStatement Failed to fetch query metrics for statement ${result?.statement_id}: ${String(e)}`, { statementId: result?.statement_id });
|
|
288
|
+
return void 0;
|
|
289
|
+
}) : void 0
|
|
290
|
+
) : void 0 : void 0;
|
|
287
291
|
const request = Object.fromEntries(
|
|
288
292
|
Object.entries({
|
|
289
293
|
warehouse_id: warehouseId,
|
|
@@ -292,25 +296,50 @@ async function executeStatement(query, auth, options = {}) {
|
|
|
292
296
|
disposition: options.disposition,
|
|
293
297
|
format: options.format,
|
|
294
298
|
on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
|
|
295
|
-
wait_timeout:
|
|
299
|
+
wait_timeout: waitTimeout,
|
|
296
300
|
row_limit: options.row_limit,
|
|
297
301
|
catalog: options.catalog,
|
|
298
302
|
schema: options.schema,
|
|
299
303
|
parameters: options.parameters
|
|
300
304
|
}).filter(([, v]) => v !== void 0)
|
|
301
305
|
);
|
|
306
|
+
logger?.info?.(`executeStatement Executing statement on warehouse ${warehouseId}...`);
|
|
302
307
|
let result = await postStatement(auth, request, signal);
|
|
303
|
-
|
|
304
|
-
if (
|
|
305
|
-
|
|
306
|
-
|
|
308
|
+
const cancelStatementSafely = async () => {
|
|
309
|
+
if (cancelIssued) return;
|
|
310
|
+
logger?.info?.("executeStatement Abort signal received during executeStatement.");
|
|
311
|
+
cancelIssued = true;
|
|
312
|
+
await cancelStatement(auth, result.statement_id).catch((err) => {
|
|
313
|
+
logger?.error?.("executeStatement Failed to cancel statement after abort.", err);
|
|
314
|
+
});
|
|
315
|
+
};
|
|
316
|
+
if (signal?.aborted) {
|
|
317
|
+
await cancelStatementSafely();
|
|
318
|
+
throw new AbortError("Aborted during polling");
|
|
319
|
+
}
|
|
320
|
+
const onAbort = () => cancelStatementSafely().catch(() => {
|
|
321
|
+
});
|
|
322
|
+
try {
|
|
323
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
324
|
+
while (!TERMINAL_STATES.has(result.status.state)) {
|
|
325
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} in state ${result.status.state}; polling for status...`);
|
|
326
|
+
await delay(POLL_INTERVAL_MS, signal);
|
|
327
|
+
result = await getStatement(auth, result.statement_id, signal);
|
|
328
|
+
await emitProgress?.();
|
|
329
|
+
}
|
|
330
|
+
} catch (err) {
|
|
331
|
+
if (err instanceof AbortError || signal?.aborted) {
|
|
332
|
+
logger?.info?.("executeStatement Abort detected in executeStatement polling loop.");
|
|
333
|
+
await cancelStatementSafely();
|
|
307
334
|
throw new AbortError("Aborted during polling");
|
|
308
335
|
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
336
|
+
logger?.error?.(`executeStatement Error during executeStatement polling: ${String(err)}`);
|
|
337
|
+
throw err;
|
|
338
|
+
} finally {
|
|
339
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} reached final state: ${result.status.state}`);
|
|
340
|
+
signal?.removeEventListener("abort", onAbort);
|
|
312
341
|
}
|
|
313
|
-
await emitProgress?.(
|
|
342
|
+
await emitProgress?.();
|
|
314
343
|
if (result.status.state === "SUCCEEDED")
|
|
315
344
|
return result;
|
|
316
345
|
if (result.status.state === "CANCELED")
|
|
@@ -617,52 +646,110 @@ function convertBoolean(value) {
|
|
|
617
646
|
var import_node_stream2 = require("stream");
|
|
618
647
|
var import_merge_streams = require("@bitofsky/merge-streams");
|
|
619
648
|
function fetchStream(statementResult, auth, options = {}) {
|
|
620
|
-
const { signal, forceMerge } = options;
|
|
649
|
+
const { signal, forceMerge, logger } = options;
|
|
621
650
|
const manifest = validateSucceededResult(statementResult);
|
|
622
651
|
const format = manifest.format;
|
|
652
|
+
const statementId = statementResult.statement_id;
|
|
653
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
623
654
|
if (statementResult.result?.data_array) {
|
|
655
|
+
logger?.error?.(
|
|
656
|
+
`fetchStream only supports EXTERNAL_LINKS results for statement ${statementId}.`,
|
|
657
|
+
{ ...baseLog, hasDataArray: true }
|
|
658
|
+
);
|
|
624
659
|
throw new DatabricksSqlError(
|
|
625
660
|
"fetchStream only supports EXTERNAL_LINKS results",
|
|
626
661
|
"UNSUPPORTED_FORMAT",
|
|
627
|
-
|
|
662
|
+
statementId
|
|
628
663
|
);
|
|
629
664
|
}
|
|
665
|
+
logger?.info?.(`fetchStream creating stream for statement ${statementId}.`, {
|
|
666
|
+
...baseLog,
|
|
667
|
+
hasExternalLinks: Boolean(statementResult.result?.external_links?.length)
|
|
668
|
+
});
|
|
630
669
|
const output = new import_node_stream2.PassThrough();
|
|
631
670
|
if (signal) {
|
|
632
|
-
const onAbort = () =>
|
|
671
|
+
const onAbort = () => {
|
|
672
|
+
logger?.info?.(`fetchStream abort signal received while streaming statement ${statementId}.`, baseLog);
|
|
673
|
+
output.destroy(new AbortError("Stream aborted"));
|
|
674
|
+
};
|
|
633
675
|
signal.addEventListener("abort", onAbort, { once: true });
|
|
634
676
|
output.once("close", () => signal.removeEventListener("abort", onAbort));
|
|
635
677
|
}
|
|
636
|
-
|
|
678
|
+
output.on("error", (err) => {
|
|
679
|
+
if (err instanceof AbortError)
|
|
680
|
+
return;
|
|
681
|
+
if (output.listenerCount("error") === 1)
|
|
682
|
+
throw err;
|
|
683
|
+
});
|
|
684
|
+
mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger).catch((err) => {
|
|
685
|
+
logger?.error?.(`fetchStream error while streaming statement ${statementId}.`, {
|
|
686
|
+
...baseLog,
|
|
687
|
+
error: err
|
|
688
|
+
});
|
|
689
|
+
output.destroy(err);
|
|
690
|
+
});
|
|
637
691
|
return output;
|
|
638
692
|
}
|
|
639
|
-
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge) {
|
|
693
|
+
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger) {
|
|
694
|
+
const statementId = statementResult.statement_id;
|
|
695
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
696
|
+
logger?.info?.(`fetchStream collecting external links for statement ${statementId}.`, baseLog);
|
|
640
697
|
const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
|
|
641
|
-
if (urls.length === 0)
|
|
698
|
+
if (urls.length === 0) {
|
|
699
|
+
logger?.info?.(`fetchStream no external links found for statement ${statementId}.`, baseLog);
|
|
642
700
|
return void output.end();
|
|
643
|
-
|
|
701
|
+
}
|
|
702
|
+
if (urls.length === 1 && !forceMerge) {
|
|
703
|
+
logger?.info?.(`fetchStream piping single external link for statement ${statementId}.`, {
|
|
704
|
+
...baseLog,
|
|
705
|
+
urlCount: urls.length
|
|
706
|
+
});
|
|
644
707
|
return pipeUrlToOutput(urls[0], output, signal);
|
|
708
|
+
}
|
|
709
|
+
logger?.info?.(`fetchStream merging ${urls.length} external links for statement ${statementId}.`, {
|
|
710
|
+
...baseLog,
|
|
711
|
+
urlCount: urls.length
|
|
712
|
+
});
|
|
645
713
|
return (0, import_merge_streams.mergeStreamsFromUrls)(format, signal ? { urls, output, signal } : { urls, output });
|
|
646
714
|
}
|
|
647
715
|
async function collectExternalUrls(statementResult, auth, manifest, signal) {
|
|
648
|
-
const
|
|
649
|
-
|
|
650
|
-
return urls;
|
|
716
|
+
const chunkUrls = /* @__PURE__ */ new Map();
|
|
717
|
+
addChunkLinks(chunkUrls, statementResult.result?.external_links);
|
|
651
718
|
if (!manifest.total_chunk_count)
|
|
652
|
-
return
|
|
653
|
-
const chunkUrls = [];
|
|
719
|
+
return flattenChunkUrls(chunkUrls);
|
|
654
720
|
for (let i = 0; i < manifest.total_chunk_count; i++) {
|
|
721
|
+
if (chunkUrls.has(i))
|
|
722
|
+
continue;
|
|
655
723
|
if (signal?.aborted)
|
|
656
724
|
throw new AbortError("Aborted while collecting URLs");
|
|
657
725
|
const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
|
|
658
|
-
chunkUrls
|
|
726
|
+
addChunkLinks(chunkUrls, chunkData.external_links);
|
|
659
727
|
}
|
|
660
|
-
return chunkUrls;
|
|
728
|
+
return flattenChunkUrls(chunkUrls);
|
|
661
729
|
}
|
|
662
|
-
function
|
|
730
|
+
function addChunkLinks(chunkUrls, externalLinks) {
|
|
663
731
|
if (!externalLinks)
|
|
732
|
+
return;
|
|
733
|
+
for (const link of externalLinks) {
|
|
734
|
+
if (!isNonEmptyString(link.external_link))
|
|
735
|
+
continue;
|
|
736
|
+
const existing = chunkUrls.get(link.chunk_index);
|
|
737
|
+
if (existing) {
|
|
738
|
+
existing.push(link.external_link);
|
|
739
|
+
} else {
|
|
740
|
+
chunkUrls.set(link.chunk_index, [link.external_link]);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
function flattenChunkUrls(chunkUrls) {
|
|
745
|
+
if (chunkUrls.size === 0)
|
|
664
746
|
return [];
|
|
665
|
-
|
|
747
|
+
const sorted = [...chunkUrls.entries()].sort(([a], [b]) => a - b);
|
|
748
|
+
const urls = [];
|
|
749
|
+
for (const [, links] of sorted) {
|
|
750
|
+
urls.push(...links);
|
|
751
|
+
}
|
|
752
|
+
return urls;
|
|
666
753
|
}
|
|
667
754
|
function isNonEmptyString(value) {
|
|
668
755
|
return typeof value === "string" && value.length > 0;
|
|
@@ -670,31 +757,46 @@ function isNonEmptyString(value) {
|
|
|
670
757
|
|
|
671
758
|
// src/api/fetchRow.ts
|
|
672
759
|
async function fetchRow(statementResult, auth, options = {}) {
|
|
673
|
-
const { signal, onEachRow, format } = options;
|
|
760
|
+
const { signal, onEachRow, format, logger } = options;
|
|
674
761
|
const manifest = validateSucceededResult(statementResult);
|
|
762
|
+
const statementId = statementResult.statement_id;
|
|
763
|
+
const logContext = { statementId, manifest, requestedFormat: format };
|
|
675
764
|
const mapRow = createRowMapper(manifest, format);
|
|
765
|
+
logger?.info?.(`fetchRow fetching rows for statement ${statementId}.`, {
|
|
766
|
+
...logContext,
|
|
767
|
+
resultType: statementResult.result?.external_links ? "EXTERNAL_LINKS" : "INLINE"
|
|
768
|
+
});
|
|
676
769
|
if (statementResult.result?.external_links) {
|
|
677
770
|
if (manifest.format !== "JSON_ARRAY") {
|
|
771
|
+
logger?.error?.(`fetchRow only supports JSON_ARRAY for external_links; got ${manifest.format}.`, logContext);
|
|
678
772
|
throw new DatabricksSqlError(
|
|
679
773
|
`fetchRow only supports JSON_ARRAY for external_links. Received: ${manifest.format}`,
|
|
680
774
|
"UNSUPPORTED_FORMAT",
|
|
681
|
-
|
|
775
|
+
statementId
|
|
682
776
|
);
|
|
683
777
|
}
|
|
684
|
-
|
|
685
|
-
|
|
778
|
+
logger?.info?.(`fetchRow streaming external links for statement ${statementId}.`, logContext);
|
|
779
|
+
const stream = fetchStream(statementResult, auth, {
|
|
780
|
+
...signal ? { signal } : {},
|
|
781
|
+
...logger ? { logger } : {}
|
|
782
|
+
});
|
|
783
|
+
await consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext);
|
|
686
784
|
return;
|
|
687
785
|
}
|
|
688
786
|
const totalChunks = manifest.total_chunk_count;
|
|
689
787
|
const dataArray = statementResult.result?.data_array;
|
|
690
788
|
if (dataArray) {
|
|
789
|
+
logger?.info?.(`fetchRow processing inline rows for statement ${statementId}.`, {
|
|
790
|
+
...logContext,
|
|
791
|
+
inlineRows: dataArray.length
|
|
792
|
+
});
|
|
691
793
|
for (const row of dataArray) {
|
|
692
794
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
693
795
|
onEachRow?.(mapRow(row));
|
|
694
796
|
}
|
|
695
797
|
}
|
|
696
798
|
if (totalChunks > 1) {
|
|
697
|
-
|
|
799
|
+
logger?.info?.(`fetchRow processing ${totalChunks} chunks for statement ${statementId}.`, logContext);
|
|
698
800
|
for (let chunkIndex = 1; chunkIndex < totalChunks; chunkIndex++) {
|
|
699
801
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
700
802
|
const chunk = await getChunk(auth, statementId, chunkIndex, signal);
|
|
@@ -713,10 +815,14 @@ async function fetchRow(statementResult, auth, options = {}) {
|
|
|
713
815
|
}
|
|
714
816
|
}
|
|
715
817
|
}
|
|
716
|
-
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
818
|
+
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext) {
|
|
717
819
|
const jsonStream = stream.pipe((0, import_stream_json.parser)()).pipe((0, import_StreamArray.streamArray)());
|
|
718
820
|
for await (const item of jsonStream) {
|
|
719
821
|
if (signal?.aborted) {
|
|
822
|
+
logger?.info?.("fetchRow abort detected while streaming JSON_ARRAY rows.", {
|
|
823
|
+
...logContext,
|
|
824
|
+
aborted: signal.aborted
|
|
825
|
+
});
|
|
720
826
|
stream.destroy(new AbortError("Aborted"));
|
|
721
827
|
throw new AbortError("Aborted");
|
|
722
828
|
}
|
|
@@ -734,44 +840,74 @@ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
|
734
840
|
// src/api/fetchAll.ts
|
|
735
841
|
async function fetchAll(statementResult, auth, options = {}) {
|
|
736
842
|
const rows = [];
|
|
843
|
+
const statementId = statementResult.statement_id;
|
|
844
|
+
const manifest = statementResult.manifest;
|
|
845
|
+
const logContext = { statementId, manifest, requestedFormat: options.format };
|
|
737
846
|
const fetchOptions = {
|
|
738
847
|
// Collect rows as they are streamed in.
|
|
739
848
|
onEachRow: (row) => {
|
|
740
849
|
rows.push(row);
|
|
741
850
|
}
|
|
742
851
|
};
|
|
852
|
+
const { logger } = options;
|
|
853
|
+
logger?.info?.(`fetchAll fetching all rows for statement ${statementId}.`, logContext);
|
|
743
854
|
if (options.signal)
|
|
744
855
|
fetchOptions.signal = options.signal;
|
|
745
856
|
if (options.format)
|
|
746
857
|
fetchOptions.format = options.format;
|
|
858
|
+
if (options.logger)
|
|
859
|
+
fetchOptions.logger = options.logger;
|
|
747
860
|
await fetchRow(statementResult, auth, fetchOptions);
|
|
861
|
+
logger?.info?.(`fetchAll fetched ${rows.length} rows for statement ${statementId}.`, {
|
|
862
|
+
...logContext,
|
|
863
|
+
rowCount: rows.length,
|
|
864
|
+
resolvedFormat: options.format ?? manifest?.format
|
|
865
|
+
});
|
|
748
866
|
return rows;
|
|
749
867
|
}
|
|
750
868
|
|
|
751
869
|
// src/api/mergeExternalLinks.ts
|
|
752
870
|
async function mergeExternalLinks(statementResult, auth, options) {
|
|
753
|
-
const { signal, mergeStreamToExternalLink, forceMerge } = options;
|
|
754
|
-
|
|
871
|
+
const { signal, mergeStreamToExternalLink, forceMerge, logger } = options;
|
|
872
|
+
const statementId = statementResult.statement_id;
|
|
873
|
+
const manifest = statementResult.manifest;
|
|
874
|
+
const externalLinks = statementResult.result?.external_links;
|
|
875
|
+
const totalChunks = manifest?.total_chunk_count ?? 0;
|
|
876
|
+
const logContext = { statementId, manifest, totalChunks, forceMerge };
|
|
877
|
+
if (!externalLinks) {
|
|
878
|
+
logger?.info?.(`mergeExternalLinks no external links to merge for statement ${statementId}.`, logContext);
|
|
755
879
|
return statementResult;
|
|
880
|
+
}
|
|
756
881
|
if (!forceMerge) {
|
|
757
|
-
const
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
882
|
+
const isSingleChunk = totalChunks <= 1;
|
|
883
|
+
if (isSingleChunk) {
|
|
884
|
+
logger?.info?.(`mergeExternalLinks skipping merge for single external link in statement ${statementId}.`, {
|
|
885
|
+
...logContext,
|
|
886
|
+
totalChunks
|
|
887
|
+
});
|
|
761
888
|
return statementResult;
|
|
889
|
+
}
|
|
762
890
|
}
|
|
891
|
+
logger?.info?.(`mergeExternalLinks merging external links for statement ${statementId}.`, logContext);
|
|
763
892
|
const stream = fetchStream(statementResult, auth, {
|
|
764
893
|
...signal ? { signal } : {},
|
|
765
|
-
...forceMerge !== void 0 ? { forceMerge } : {}
|
|
894
|
+
...forceMerge !== void 0 ? { forceMerge } : {},
|
|
895
|
+
...logger ? { logger } : {}
|
|
766
896
|
});
|
|
897
|
+
logger?.info?.(`mergeExternalLinks uploading merged external link for statement ${statementId}.`, logContext);
|
|
767
898
|
const uploadResult = await mergeStreamToExternalLink(stream);
|
|
768
|
-
|
|
769
|
-
|
|
899
|
+
logger?.info?.(`mergeExternalLinks uploaded merged external link for statement ${statementId}.`, {
|
|
900
|
+
...logContext,
|
|
901
|
+
byteCount: uploadResult.byte_count,
|
|
902
|
+
expiration: uploadResult.expiration
|
|
903
|
+
});
|
|
904
|
+
const validatedManifest = validateSucceededResult(statementResult);
|
|
905
|
+
const totalRowCount = validatedManifest.total_row_count ?? 0;
|
|
770
906
|
return {
|
|
771
907
|
statement_id: statementResult.statement_id,
|
|
772
908
|
status: statementResult.status,
|
|
773
909
|
manifest: {
|
|
774
|
-
...
|
|
910
|
+
...validatedManifest,
|
|
775
911
|
total_chunk_count: 1,
|
|
776
912
|
total_byte_count: uploadResult.byte_count,
|
|
777
913
|
chunks: [
|