@bitofsky/databricks-sql 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -22
- package/dist/index.cjs +176 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -2
- package/dist/index.d.ts +17 -2
- package/dist/index.js +176 -42
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -23,6 +23,7 @@ The goal is simple: stream big results with stable memory usage and without forc
|
|
|
23
23
|
- Optimized polling with server-side wait (up to 50s) before falling back to client polling.
|
|
24
24
|
- Query metrics support via Query History API (`enableMetrics` option).
|
|
25
25
|
- Efficient external link handling: merge chunks into a single stream.
|
|
26
|
+
- Handles partial external link responses by fetching missing chunk metadata.
|
|
26
27
|
- `mergeExternalLinks` supports streaming uploads and returns a new StatementResult with a presigned URL.
|
|
27
28
|
- `fetchRow`/`fetchAll` support `JSON_OBJECT` (schema-based row mapping).
|
|
28
29
|
- External links + JSON_ARRAY are supported for row iteration (streaming JSON parsing).
|
|
@@ -48,12 +49,16 @@ console.log(rows) // [{ value: 1 }]
|
|
|
48
49
|
```
|
|
49
50
|
|
|
50
51
|
## Sample (Streaming + Presigned URL)
|
|
51
|
-
Stream external links into S3, then return a single presigned URL
|
|
52
|
+
Stream external links into S3 with gzip compression, then return a single presigned URL.
|
|
52
53
|
|
|
53
54
|
```ts
|
|
54
55
|
import { executeStatement, mergeExternalLinks } from '@bitofsky/databricks-sql'
|
|
55
|
-
import { GetObjectCommand,
|
|
56
|
+
import { GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'
|
|
57
|
+
import { Upload } from '@aws-sdk/lib-storage'
|
|
56
58
|
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
|
|
59
|
+
import { createGzip } from 'zlib'
|
|
60
|
+
import { pipeline } from 'stream/promises'
|
|
61
|
+
import { PassThrough } from 'stream'
|
|
57
62
|
|
|
58
63
|
const auth = {
|
|
59
64
|
token: process.env.DATABRICKS_TOKEN!,
|
|
@@ -72,31 +77,42 @@ const result = await executeStatement(
|
|
|
72
77
|
|
|
73
78
|
const merged = await mergeExternalLinks(result, auth, {
|
|
74
79
|
mergeStreamToExternalLink: async (stream) => {
|
|
75
|
-
const key = `merged-${Date.now()}.csv`
|
|
76
|
-
|
|
77
|
-
|
|
80
|
+
const key = `merged-${Date.now()}.csv.gz`
|
|
81
|
+
const gzip = createGzip() // Compress with gzip and upload to S3
|
|
82
|
+
const passThrough = new PassThrough()
|
|
83
|
+
|
|
84
|
+
const upload = new Upload({
|
|
85
|
+
client: s3,
|
|
86
|
+
params: {
|
|
78
87
|
Bucket: bucket,
|
|
79
88
|
Key: key,
|
|
80
|
-
Body:
|
|
81
|
-
ContentType: 'text/csv',
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
89
|
+
Body: passThrough,
|
|
90
|
+
ContentType: 'text/csv; charset=utf-8',
|
|
91
|
+
ContentEncoding: 'gzip',
|
|
92
|
+
},
|
|
93
|
+
})
|
|
94
|
+
const uploadPromise = upload.done()
|
|
95
|
+
|
|
96
|
+
await Promise.all([
|
|
97
|
+
pipeline(stream, gzip, passThrough),
|
|
98
|
+
uploadPromise,
|
|
99
|
+
])
|
|
100
|
+
|
|
101
|
+
// Get actual uploaded size via HeadObject
|
|
102
|
+
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }))
|
|
103
|
+
// Generate presigned URL valid for 1 hour
|
|
104
|
+
const externalLink = await getSignedUrl(s3, new GetObjectCommand({ Bucket: bucket, Key: key }),{ expiresIn: 3600 })
|
|
90
105
|
|
|
91
106
|
return {
|
|
92
|
-
externalLink,
|
|
93
|
-
byte_count: 0,
|
|
94
|
-
expiration: new Date(Date.now() + 3600 * 1000).toISOString(),
|
|
107
|
+
externalLink, // Presigned URL to merged gzip CSV
|
|
108
|
+
byte_count: head.ContentLength ?? 0, // Actual compressed size
|
|
109
|
+
expiration: new Date(Date.now() + 3600 * 1000).toISOString(), // 1 hour from now
|
|
95
110
|
}
|
|
96
111
|
},
|
|
97
112
|
})
|
|
98
113
|
|
|
99
|
-
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged CSV
|
|
114
|
+
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged gzip CSV
|
|
115
|
+
console.log(merged.result?.external_links?.[0].byte_count) // Actual compressed size
|
|
100
116
|
```
|
|
101
117
|
|
|
102
118
|
## Sample (Progress with Metrics)
|
|
@@ -116,8 +132,8 @@ const result = await executeStatement(
|
|
|
116
132
|
auth,
|
|
117
133
|
{
|
|
118
134
|
enableMetrics: true,
|
|
119
|
-
onProgress: (
|
|
120
|
-
console.log(`State: ${status.state}`)
|
|
135
|
+
onProgress: (result, metrics) => {
|
|
136
|
+
console.log(`State: ${result.status.state}`)
|
|
121
137
|
if (metrics) { // metrics is optional, only present when enableMetrics: true
|
|
122
138
|
console.log(` Execution time: ${metrics.execution_time_ms}ms`)
|
|
123
139
|
console.log(` Rows produced: ${metrics.rows_produced_count}`)
|
|
@@ -178,6 +194,7 @@ function executeStatement(
|
|
|
178
194
|
```
|
|
179
195
|
- Calls the Databricks Statement Execution API and polls until completion.
|
|
180
196
|
- Server waits up to 50s (`wait_timeout`) before client-side polling begins.
|
|
197
|
+
- Default `wait_timeout` is `50s`, or `0s` when `onProgress` is provided.
|
|
181
198
|
- Use `options.onProgress` to receive status updates with optional metrics.
|
|
182
199
|
- Set `enableMetrics: true` to fetch query metrics from Query History API on each poll.
|
|
183
200
|
- Throws `DatabricksSqlError` on failure, `StatementCancelledError` on cancel, and `AbortError` on abort.
|
|
@@ -193,6 +210,7 @@ function fetchRow(
|
|
|
193
210
|
- Streams each row to `options.onEachRow`.
|
|
194
211
|
- Use `format: 'JSON_OBJECT'` to map rows into schema-based objects.
|
|
195
212
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
213
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
196
214
|
|
|
197
215
|
### fetchAll(statementResult, auth, options?)
|
|
198
216
|
```ts
|
|
@@ -204,6 +222,7 @@ function fetchAll(
|
|
|
204
222
|
```
|
|
205
223
|
- Collects all rows into an array. For large results, prefer `fetchRow`/`fetchStream`.
|
|
206
224
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
225
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
207
226
|
|
|
208
227
|
### fetchStream(statementResult, auth, options?)
|
|
209
228
|
```ts
|
|
@@ -218,6 +237,7 @@ function fetchStream(
|
|
|
218
237
|
- Throws if the result is `INLINE`.
|
|
219
238
|
- Ends as an empty stream when no external links exist.
|
|
220
239
|
- `forceMerge: true` forces merge even when there is only a single external link.
|
|
240
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
221
241
|
|
|
222
242
|
### mergeExternalLinks(statementResult, auth, options)
|
|
223
243
|
```ts
|
|
@@ -236,8 +256,9 @@ function mergeExternalLinks(
|
|
|
236
256
|
### Options (Summary)
|
|
237
257
|
```ts
|
|
238
258
|
type ExecuteStatementOptions = {
|
|
239
|
-
onProgress?: (
|
|
259
|
+
onProgress?: (result: StatementResult, metrics?: QueryMetrics) => void
|
|
240
260
|
enableMetrics?: boolean // Fetch metrics from Query History API (default: false)
|
|
261
|
+
logger?: Logger
|
|
241
262
|
signal?: AbortSignal
|
|
242
263
|
disposition?: 'INLINE' | 'EXTERNAL_LINKS'
|
|
243
264
|
format?: 'JSON_ARRAY' | 'ARROW_STREAM' | 'CSV'
|
|
@@ -255,21 +276,25 @@ type FetchRowsOptions = {
|
|
|
255
276
|
signal?: AbortSignal
|
|
256
277
|
onEachRow?: (row: RowArray | RowObject) => void
|
|
257
278
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
279
|
+
logger?: Logger
|
|
258
280
|
}
|
|
259
281
|
|
|
260
282
|
type FetchAllOptions = {
|
|
261
283
|
signal?: AbortSignal
|
|
262
284
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
285
|
+
logger?: Logger
|
|
263
286
|
}
|
|
264
287
|
|
|
265
288
|
type FetchStreamOptions = {
|
|
266
289
|
signal?: AbortSignal
|
|
267
290
|
forceMerge?: boolean
|
|
291
|
+
logger?: Logger
|
|
268
292
|
}
|
|
269
293
|
|
|
270
294
|
type MergeExternalLinksOptions = {
|
|
271
295
|
signal?: AbortSignal
|
|
272
296
|
forceMerge?: boolean
|
|
297
|
+
logger?: Logger
|
|
273
298
|
mergeStreamToExternalLink: (stream: Readable) => Promise<{
|
|
274
299
|
externalLink: string
|
|
275
300
|
byte_count: number
|
package/dist/index.cjs
CHANGED
|
@@ -281,9 +281,11 @@ async function fetchMetrics(auth, statementId, signal) {
|
|
|
281
281
|
}
|
|
282
282
|
async function executeStatement(query, auth, options = {}) {
|
|
283
283
|
const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
|
|
284
|
-
const { signal, onProgress, enableMetrics } = options;
|
|
284
|
+
const { signal, onProgress, enableMetrics, logger } = options;
|
|
285
|
+
const waitTimeout = options.wait_timeout ?? (onProgress ? "0s" : "50s");
|
|
286
|
+
let cancelIssued = false;
|
|
285
287
|
throwIfAborted(signal, "executeStatement");
|
|
286
|
-
const emitProgress = onProgress ? async (statementId) => onProgress(result
|
|
288
|
+
const emitProgress = onProgress ? async (statementId) => onProgress(result, enableMetrics ? await fetchMetrics(auth, statementId, signal) : void 0) : void 0;
|
|
287
289
|
const request = Object.fromEntries(
|
|
288
290
|
Object.entries({
|
|
289
291
|
warehouse_id: warehouseId,
|
|
@@ -292,23 +294,48 @@ async function executeStatement(query, auth, options = {}) {
|
|
|
292
294
|
disposition: options.disposition,
|
|
293
295
|
format: options.format,
|
|
294
296
|
on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
|
|
295
|
-
wait_timeout:
|
|
297
|
+
wait_timeout: waitTimeout,
|
|
296
298
|
row_limit: options.row_limit,
|
|
297
299
|
catalog: options.catalog,
|
|
298
300
|
schema: options.schema,
|
|
299
301
|
parameters: options.parameters
|
|
300
302
|
}).filter(([, v]) => v !== void 0)
|
|
301
303
|
);
|
|
304
|
+
logger?.info?.(`executeStatement Executing statement on warehouse ${warehouseId}...`);
|
|
302
305
|
let result = await postStatement(auth, request, signal);
|
|
303
|
-
|
|
304
|
-
if (
|
|
305
|
-
|
|
306
|
-
|
|
306
|
+
const cancelStatementSafely = async () => {
|
|
307
|
+
if (cancelIssued) return;
|
|
308
|
+
logger?.info?.("executeStatement Abort signal received during executeStatement.");
|
|
309
|
+
cancelIssued = true;
|
|
310
|
+
await cancelStatement(auth, result.statement_id).catch((err) => {
|
|
311
|
+
logger?.error?.("executeStatement Failed to cancel statement after abort.", err);
|
|
312
|
+
});
|
|
313
|
+
};
|
|
314
|
+
if (signal?.aborted) {
|
|
315
|
+
await cancelStatementSafely();
|
|
316
|
+
throw new AbortError("Aborted during polling");
|
|
317
|
+
}
|
|
318
|
+
const onAbort = () => cancelStatementSafely().catch(() => {
|
|
319
|
+
});
|
|
320
|
+
try {
|
|
321
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
322
|
+
while (!TERMINAL_STATES.has(result.status.state)) {
|
|
323
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} in state ${result.status.state}; polling for status...`);
|
|
324
|
+
await emitProgress?.(result.statement_id);
|
|
325
|
+
await delay(POLL_INTERVAL_MS, signal);
|
|
326
|
+
result = await getStatement(auth, result.statement_id, signal);
|
|
327
|
+
}
|
|
328
|
+
} catch (err) {
|
|
329
|
+
if (err instanceof AbortError || signal?.aborted) {
|
|
330
|
+
logger?.info?.("executeStatement Abort detected in executeStatement polling loop.");
|
|
331
|
+
await cancelStatementSafely();
|
|
307
332
|
throw new AbortError("Aborted during polling");
|
|
308
333
|
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
334
|
+
logger?.error?.(`executeStatement Error during executeStatement polling: ${String(err)}`);
|
|
335
|
+
throw err;
|
|
336
|
+
} finally {
|
|
337
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} reached final state: ${result.status.state}`);
|
|
338
|
+
signal?.removeEventListener("abort", onAbort);
|
|
312
339
|
}
|
|
313
340
|
await emitProgress?.(result.statement_id);
|
|
314
341
|
if (result.status.state === "SUCCEEDED")
|
|
@@ -617,52 +644,110 @@ function convertBoolean(value) {
|
|
|
617
644
|
var import_node_stream2 = require("stream");
|
|
618
645
|
var import_merge_streams = require("@bitofsky/merge-streams");
|
|
619
646
|
function fetchStream(statementResult, auth, options = {}) {
|
|
620
|
-
const { signal, forceMerge } = options;
|
|
647
|
+
const { signal, forceMerge, logger } = options;
|
|
621
648
|
const manifest = validateSucceededResult(statementResult);
|
|
622
649
|
const format = manifest.format;
|
|
650
|
+
const statementId = statementResult.statement_id;
|
|
651
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
623
652
|
if (statementResult.result?.data_array) {
|
|
653
|
+
logger?.error?.(
|
|
654
|
+
`fetchStream only supports EXTERNAL_LINKS results for statement ${statementId}.`,
|
|
655
|
+
{ ...baseLog, hasDataArray: true }
|
|
656
|
+
);
|
|
624
657
|
throw new DatabricksSqlError(
|
|
625
658
|
"fetchStream only supports EXTERNAL_LINKS results",
|
|
626
659
|
"UNSUPPORTED_FORMAT",
|
|
627
|
-
|
|
660
|
+
statementId
|
|
628
661
|
);
|
|
629
662
|
}
|
|
663
|
+
logger?.info?.(`fetchStream creating stream for statement ${statementId}.`, {
|
|
664
|
+
...baseLog,
|
|
665
|
+
hasExternalLinks: Boolean(statementResult.result?.external_links?.length)
|
|
666
|
+
});
|
|
630
667
|
const output = new import_node_stream2.PassThrough();
|
|
631
668
|
if (signal) {
|
|
632
|
-
const onAbort = () =>
|
|
669
|
+
const onAbort = () => {
|
|
670
|
+
logger?.info?.(`fetchStream abort signal received while streaming statement ${statementId}.`, baseLog);
|
|
671
|
+
output.destroy(new AbortError("Stream aborted"));
|
|
672
|
+
};
|
|
633
673
|
signal.addEventListener("abort", onAbort, { once: true });
|
|
634
674
|
output.once("close", () => signal.removeEventListener("abort", onAbort));
|
|
635
675
|
}
|
|
636
|
-
|
|
676
|
+
output.on("error", (err) => {
|
|
677
|
+
if (err instanceof AbortError)
|
|
678
|
+
return;
|
|
679
|
+
if (output.listenerCount("error") === 1)
|
|
680
|
+
throw err;
|
|
681
|
+
});
|
|
682
|
+
mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger).catch((err) => {
|
|
683
|
+
logger?.error?.(`fetchStream error while streaming statement ${statementId}.`, {
|
|
684
|
+
...baseLog,
|
|
685
|
+
error: err
|
|
686
|
+
});
|
|
687
|
+
output.destroy(err);
|
|
688
|
+
});
|
|
637
689
|
return output;
|
|
638
690
|
}
|
|
639
|
-
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge) {
|
|
691
|
+
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger) {
|
|
692
|
+
const statementId = statementResult.statement_id;
|
|
693
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
694
|
+
logger?.info?.(`fetchStream collecting external links for statement ${statementId}.`, baseLog);
|
|
640
695
|
const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
|
|
641
|
-
if (urls.length === 0)
|
|
696
|
+
if (urls.length === 0) {
|
|
697
|
+
logger?.info?.(`fetchStream no external links found for statement ${statementId}.`, baseLog);
|
|
642
698
|
return void output.end();
|
|
643
|
-
|
|
699
|
+
}
|
|
700
|
+
if (urls.length === 1 && !forceMerge) {
|
|
701
|
+
logger?.info?.(`fetchStream piping single external link for statement ${statementId}.`, {
|
|
702
|
+
...baseLog,
|
|
703
|
+
urlCount: urls.length
|
|
704
|
+
});
|
|
644
705
|
return pipeUrlToOutput(urls[0], output, signal);
|
|
706
|
+
}
|
|
707
|
+
logger?.info?.(`fetchStream merging ${urls.length} external links for statement ${statementId}.`, {
|
|
708
|
+
...baseLog,
|
|
709
|
+
urlCount: urls.length
|
|
710
|
+
});
|
|
645
711
|
return (0, import_merge_streams.mergeStreamsFromUrls)(format, signal ? { urls, output, signal } : { urls, output });
|
|
646
712
|
}
|
|
647
713
|
async function collectExternalUrls(statementResult, auth, manifest, signal) {
|
|
648
|
-
const
|
|
649
|
-
|
|
650
|
-
return urls;
|
|
714
|
+
const chunkUrls = /* @__PURE__ */ new Map();
|
|
715
|
+
addChunkLinks(chunkUrls, statementResult.result?.external_links);
|
|
651
716
|
if (!manifest.total_chunk_count)
|
|
652
|
-
return
|
|
653
|
-
const chunkUrls = [];
|
|
717
|
+
return flattenChunkUrls(chunkUrls);
|
|
654
718
|
for (let i = 0; i < manifest.total_chunk_count; i++) {
|
|
719
|
+
if (chunkUrls.has(i))
|
|
720
|
+
continue;
|
|
655
721
|
if (signal?.aborted)
|
|
656
722
|
throw new AbortError("Aborted while collecting URLs");
|
|
657
723
|
const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
|
|
658
|
-
chunkUrls
|
|
724
|
+
addChunkLinks(chunkUrls, chunkData.external_links);
|
|
659
725
|
}
|
|
660
|
-
return chunkUrls;
|
|
726
|
+
return flattenChunkUrls(chunkUrls);
|
|
661
727
|
}
|
|
662
|
-
function
|
|
728
|
+
function addChunkLinks(chunkUrls, externalLinks) {
|
|
663
729
|
if (!externalLinks)
|
|
730
|
+
return;
|
|
731
|
+
for (const link of externalLinks) {
|
|
732
|
+
if (!isNonEmptyString(link.external_link))
|
|
733
|
+
continue;
|
|
734
|
+
const existing = chunkUrls.get(link.chunk_index);
|
|
735
|
+
if (existing) {
|
|
736
|
+
existing.push(link.external_link);
|
|
737
|
+
} else {
|
|
738
|
+
chunkUrls.set(link.chunk_index, [link.external_link]);
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
function flattenChunkUrls(chunkUrls) {
|
|
743
|
+
if (chunkUrls.size === 0)
|
|
664
744
|
return [];
|
|
665
|
-
|
|
745
|
+
const sorted = [...chunkUrls.entries()].sort(([a], [b]) => a - b);
|
|
746
|
+
const urls = [];
|
|
747
|
+
for (const [, links] of sorted) {
|
|
748
|
+
urls.push(...links);
|
|
749
|
+
}
|
|
750
|
+
return urls;
|
|
666
751
|
}
|
|
667
752
|
function isNonEmptyString(value) {
|
|
668
753
|
return typeof value === "string" && value.length > 0;
|
|
@@ -670,31 +755,46 @@ function isNonEmptyString(value) {
|
|
|
670
755
|
|
|
671
756
|
// src/api/fetchRow.ts
|
|
672
757
|
async function fetchRow(statementResult, auth, options = {}) {
|
|
673
|
-
const { signal, onEachRow, format } = options;
|
|
758
|
+
const { signal, onEachRow, format, logger } = options;
|
|
674
759
|
const manifest = validateSucceededResult(statementResult);
|
|
760
|
+
const statementId = statementResult.statement_id;
|
|
761
|
+
const logContext = { statementId, manifest, requestedFormat: format };
|
|
675
762
|
const mapRow = createRowMapper(manifest, format);
|
|
763
|
+
logger?.info?.(`fetchRow fetching rows for statement ${statementId}.`, {
|
|
764
|
+
...logContext,
|
|
765
|
+
resultType: statementResult.result?.external_links ? "EXTERNAL_LINKS" : "INLINE"
|
|
766
|
+
});
|
|
676
767
|
if (statementResult.result?.external_links) {
|
|
677
768
|
if (manifest.format !== "JSON_ARRAY") {
|
|
769
|
+
logger?.error?.(`fetchRow only supports JSON_ARRAY for external_links; got ${manifest.format}.`, logContext);
|
|
678
770
|
throw new DatabricksSqlError(
|
|
679
771
|
`fetchRow only supports JSON_ARRAY for external_links. Received: ${manifest.format}`,
|
|
680
772
|
"UNSUPPORTED_FORMAT",
|
|
681
|
-
|
|
773
|
+
statementId
|
|
682
774
|
);
|
|
683
775
|
}
|
|
684
|
-
|
|
685
|
-
|
|
776
|
+
logger?.info?.(`fetchRow streaming external links for statement ${statementId}.`, logContext);
|
|
777
|
+
const stream = fetchStream(statementResult, auth, {
|
|
778
|
+
...signal ? { signal } : {},
|
|
779
|
+
...logger ? { logger } : {}
|
|
780
|
+
});
|
|
781
|
+
await consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext);
|
|
686
782
|
return;
|
|
687
783
|
}
|
|
688
784
|
const totalChunks = manifest.total_chunk_count;
|
|
689
785
|
const dataArray = statementResult.result?.data_array;
|
|
690
786
|
if (dataArray) {
|
|
787
|
+
logger?.info?.(`fetchRow processing inline rows for statement ${statementId}.`, {
|
|
788
|
+
...logContext,
|
|
789
|
+
inlineRows: dataArray.length
|
|
790
|
+
});
|
|
691
791
|
for (const row of dataArray) {
|
|
692
792
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
693
793
|
onEachRow?.(mapRow(row));
|
|
694
794
|
}
|
|
695
795
|
}
|
|
696
796
|
if (totalChunks > 1) {
|
|
697
|
-
|
|
797
|
+
logger?.info?.(`fetchRow processing ${totalChunks} chunks for statement ${statementId}.`, logContext);
|
|
698
798
|
for (let chunkIndex = 1; chunkIndex < totalChunks; chunkIndex++) {
|
|
699
799
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
700
800
|
const chunk = await getChunk(auth, statementId, chunkIndex, signal);
|
|
@@ -713,10 +813,14 @@ async function fetchRow(statementResult, auth, options = {}) {
|
|
|
713
813
|
}
|
|
714
814
|
}
|
|
715
815
|
}
|
|
716
|
-
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
816
|
+
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext) {
|
|
717
817
|
const jsonStream = stream.pipe((0, import_stream_json.parser)()).pipe((0, import_StreamArray.streamArray)());
|
|
718
818
|
for await (const item of jsonStream) {
|
|
719
819
|
if (signal?.aborted) {
|
|
820
|
+
logger?.info?.("fetchRow abort detected while streaming JSON_ARRAY rows.", {
|
|
821
|
+
...logContext,
|
|
822
|
+
aborted: signal.aborted
|
|
823
|
+
});
|
|
720
824
|
stream.destroy(new AbortError("Aborted"));
|
|
721
825
|
throw new AbortError("Aborted");
|
|
722
826
|
}
|
|
@@ -734,44 +838,74 @@ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
|
734
838
|
// src/api/fetchAll.ts
|
|
735
839
|
async function fetchAll(statementResult, auth, options = {}) {
|
|
736
840
|
const rows = [];
|
|
841
|
+
const statementId = statementResult.statement_id;
|
|
842
|
+
const manifest = statementResult.manifest;
|
|
843
|
+
const logContext = { statementId, manifest, requestedFormat: options.format };
|
|
737
844
|
const fetchOptions = {
|
|
738
845
|
// Collect rows as they are streamed in.
|
|
739
846
|
onEachRow: (row) => {
|
|
740
847
|
rows.push(row);
|
|
741
848
|
}
|
|
742
849
|
};
|
|
850
|
+
const { logger } = options;
|
|
851
|
+
logger?.info?.(`fetchAll fetching all rows for statement ${statementId}.`, logContext);
|
|
743
852
|
if (options.signal)
|
|
744
853
|
fetchOptions.signal = options.signal;
|
|
745
854
|
if (options.format)
|
|
746
855
|
fetchOptions.format = options.format;
|
|
856
|
+
if (options.logger)
|
|
857
|
+
fetchOptions.logger = options.logger;
|
|
747
858
|
await fetchRow(statementResult, auth, fetchOptions);
|
|
859
|
+
logger?.info?.(`fetchAll fetched ${rows.length} rows for statement ${statementId}.`, {
|
|
860
|
+
...logContext,
|
|
861
|
+
rowCount: rows.length,
|
|
862
|
+
resolvedFormat: options.format ?? manifest?.format
|
|
863
|
+
});
|
|
748
864
|
return rows;
|
|
749
865
|
}
|
|
750
866
|
|
|
751
867
|
// src/api/mergeExternalLinks.ts
|
|
752
868
|
async function mergeExternalLinks(statementResult, auth, options) {
|
|
753
|
-
const { signal, mergeStreamToExternalLink, forceMerge } = options;
|
|
754
|
-
|
|
869
|
+
const { signal, mergeStreamToExternalLink, forceMerge, logger } = options;
|
|
870
|
+
const statementId = statementResult.statement_id;
|
|
871
|
+
const manifest = statementResult.manifest;
|
|
872
|
+
const externalLinks = statementResult.result?.external_links;
|
|
873
|
+
const totalChunks = manifest?.total_chunk_count ?? 0;
|
|
874
|
+
const logContext = { statementId, manifest, totalChunks, forceMerge };
|
|
875
|
+
if (!externalLinks) {
|
|
876
|
+
logger?.info?.(`mergeExternalLinks no external links to merge for statement ${statementId}.`, logContext);
|
|
755
877
|
return statementResult;
|
|
878
|
+
}
|
|
756
879
|
if (!forceMerge) {
|
|
757
|
-
const
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
880
|
+
const isSingleChunk = totalChunks <= 1;
|
|
881
|
+
if (isSingleChunk) {
|
|
882
|
+
logger?.info?.(`mergeExternalLinks skipping merge for single external link in statement ${statementId}.`, {
|
|
883
|
+
...logContext,
|
|
884
|
+
totalChunks
|
|
885
|
+
});
|
|
761
886
|
return statementResult;
|
|
887
|
+
}
|
|
762
888
|
}
|
|
889
|
+
logger?.info?.(`mergeExternalLinks merging external links for statement ${statementId}.`, logContext);
|
|
763
890
|
const stream = fetchStream(statementResult, auth, {
|
|
764
891
|
...signal ? { signal } : {},
|
|
765
|
-
...forceMerge !== void 0 ? { forceMerge } : {}
|
|
892
|
+
...forceMerge !== void 0 ? { forceMerge } : {},
|
|
893
|
+
...logger ? { logger } : {}
|
|
766
894
|
});
|
|
895
|
+
logger?.info?.(`mergeExternalLinks uploading merged external link for statement ${statementId}.`, logContext);
|
|
767
896
|
const uploadResult = await mergeStreamToExternalLink(stream);
|
|
768
|
-
|
|
769
|
-
|
|
897
|
+
logger?.info?.(`mergeExternalLinks uploaded merged external link for statement ${statementId}.`, {
|
|
898
|
+
...logContext,
|
|
899
|
+
byteCount: uploadResult.byte_count,
|
|
900
|
+
expiration: uploadResult.expiration
|
|
901
|
+
});
|
|
902
|
+
const validatedManifest = validateSucceededResult(statementResult);
|
|
903
|
+
const totalRowCount = validatedManifest.total_row_count ?? 0;
|
|
770
904
|
return {
|
|
771
905
|
statement_id: statementResult.statement_id,
|
|
772
906
|
status: statementResult.status,
|
|
773
907
|
manifest: {
|
|
774
|
-
...
|
|
908
|
+
...validatedManifest,
|
|
775
909
|
total_chunk_count: 1,
|
|
776
910
|
total_byte_count: uploadResult.byte_count,
|
|
777
911
|
chunks: [
|