@bitofsky/databricks-sql 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -25
- package/dist/index.cjs +243 -50
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +102 -3
- package/dist/index.d.ts +102 -3
- package/dist/index.js +241 -48
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @bitofsky/databricks-sql
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@bitofsky/databricks-sql)
|
|
4
|
+
[](https://www.npmjs.com/package/@bitofsky/databricks-sql)
|
|
5
|
+
[](https://github.com/bitofsky/databricks-sql/blob/main/LICENSE)
|
|
6
|
+
[](https://nodejs.org)
|
|
7
|
+
[](https://www.typescriptlang.org/)
|
|
8
|
+
|
|
3
9
|
Databricks SQL client for Node.js that talks directly to the REST API and streams large results efficiently. No SDK lock-in, no warehouse-side streaming bottlenecks.
|
|
4
10
|
|
|
5
11
|
## Why This Exists
|
|
@@ -14,8 +20,10 @@ The goal is simple: stream big results with stable memory usage and without forc
|
|
|
14
20
|
|
|
15
21
|
## Highlights
|
|
16
22
|
- Direct REST calls to Statement Execution API.
|
|
17
|
-
-
|
|
23
|
+
- Optimized polling with server-side wait (up to 50s) before falling back to client polling.
|
|
24
|
+
- Query metrics support via Query History API (`enableMetrics` option).
|
|
18
25
|
- Efficient external link handling: merge chunks into a single stream.
|
|
26
|
+
- Handles partial external link responses by fetching missing chunk metadata.
|
|
19
27
|
- `mergeExternalLinks` supports streaming uploads and returns a new StatementResult with a presigned URL.
|
|
20
28
|
- `fetchRow`/`fetchAll` support `JSON_OBJECT` (schema-based row mapping).
|
|
21
29
|
- External links + JSON_ARRAY are supported for row iteration (streaming JSON parsing).
|
|
@@ -41,12 +49,16 @@ console.log(rows) // [{ value: 1 }]
|
|
|
41
49
|
```
|
|
42
50
|
|
|
43
51
|
## Sample (Streaming + Presigned URL)
|
|
44
|
-
Stream external links into S3, then return a single presigned URL
|
|
52
|
+
Stream external links into S3 with gzip compression, then return a single presigned URL.
|
|
45
53
|
|
|
46
54
|
```ts
|
|
47
55
|
import { executeStatement, mergeExternalLinks } from '@bitofsky/databricks-sql'
|
|
48
|
-
import { GetObjectCommand,
|
|
56
|
+
import { GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'
|
|
57
|
+
import { Upload } from '@aws-sdk/lib-storage'
|
|
49
58
|
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
|
|
59
|
+
import { createGzip } from 'zlib'
|
|
60
|
+
import { pipeline } from 'stream/promises'
|
|
61
|
+
import { PassThrough } from 'stream'
|
|
50
62
|
|
|
51
63
|
const auth = {
|
|
52
64
|
token: process.env.DATABRICKS_TOKEN!,
|
|
@@ -65,31 +77,71 @@ const result = await executeStatement(
|
|
|
65
77
|
|
|
66
78
|
const merged = await mergeExternalLinks(result, auth, {
|
|
67
79
|
mergeStreamToExternalLink: async (stream) => {
|
|
68
|
-
const key = `merged-${Date.now()}.csv`
|
|
69
|
-
|
|
70
|
-
|
|
80
|
+
const key = `merged-${Date.now()}.csv.gz`
|
|
81
|
+
const gzip = createGzip() // Compress with gzip and upload to S3
|
|
82
|
+
const passThrough = new PassThrough()
|
|
83
|
+
|
|
84
|
+
const upload = new Upload({
|
|
85
|
+
client: s3,
|
|
86
|
+
params: {
|
|
71
87
|
Bucket: bucket,
|
|
72
88
|
Key: key,
|
|
73
|
-
Body:
|
|
74
|
-
ContentType: 'text/csv',
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
const
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
89
|
+
Body: passThrough,
|
|
90
|
+
ContentType: 'text/csv; charset=utf-8',
|
|
91
|
+
ContentEncoding: 'gzip',
|
|
92
|
+
},
|
|
93
|
+
})
|
|
94
|
+
const uploadPromise = upload.done()
|
|
95
|
+
|
|
96
|
+
await Promise.all([
|
|
97
|
+
pipeline(stream, gzip, passThrough),
|
|
98
|
+
uploadPromise,
|
|
99
|
+
])
|
|
100
|
+
|
|
101
|
+
// Get actual uploaded size via HeadObject
|
|
102
|
+
const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key }))
|
|
103
|
+
// Generate presigned URL valid for 1 hour
|
|
104
|
+
const externalLink = await getSignedUrl(s3, new GetObjectCommand({ Bucket: bucket, Key: key }),{ expiresIn: 3600 })
|
|
83
105
|
|
|
84
106
|
return {
|
|
85
|
-
externalLink,
|
|
86
|
-
byte_count: 0,
|
|
87
|
-
expiration: new Date(Date.now() + 3600 * 1000).toISOString(),
|
|
107
|
+
externalLink, // Presigned URL to merged gzip CSV
|
|
108
|
+
byte_count: head.ContentLength ?? 0, // Actual compressed size
|
|
109
|
+
expiration: new Date(Date.now() + 3600 * 1000).toISOString(), // 1 hour from now
|
|
88
110
|
}
|
|
89
111
|
},
|
|
90
112
|
})
|
|
91
113
|
|
|
92
|
-
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged CSV
|
|
114
|
+
console.log(merged.result?.external_links?.[0].external_link) // Presigned URL to merged gzip CSV
|
|
115
|
+
console.log(merged.result?.external_links?.[0].byte_count) // Actual compressed size
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Sample (Progress with Metrics)
|
|
119
|
+
Track query progress with execution metrics:
|
|
120
|
+
|
|
121
|
+
```ts
|
|
122
|
+
import { executeStatement } from '@bitofsky/databricks-sql'
|
|
123
|
+
|
|
124
|
+
const auth = {
|
|
125
|
+
token: process.env.DATABRICKS_TOKEN!,
|
|
126
|
+
host: process.env.DATABRICKS_HOST!,
|
|
127
|
+
httpPath: process.env.DATABRICKS_HTTP_PATH!,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const result = await executeStatement(
|
|
131
|
+
'SELECT * FROM samples.tpch.lineitem LIMIT 10000',
|
|
132
|
+
auth,
|
|
133
|
+
{
|
|
134
|
+
enableMetrics: true,
|
|
135
|
+
onProgress: (result, metrics) => {
|
|
136
|
+
console.log(`State: ${result.status.state}`)
|
|
137
|
+
if (metrics) { // metrics is optional, only present when enableMetrics: true
|
|
138
|
+
console.log(` Execution time: ${metrics.execution_time_ms}ms`)
|
|
139
|
+
console.log(` Rows produced: ${metrics.rows_produced_count}`)
|
|
140
|
+
console.log(` Bytes read: ${metrics.read_bytes}`)
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
)
|
|
93
145
|
```
|
|
94
146
|
|
|
95
147
|
## Sample (Abort)
|
|
@@ -141,7 +193,10 @@ function executeStatement(
|
|
|
141
193
|
): Promise<StatementResult>
|
|
142
194
|
```
|
|
143
195
|
- Calls the Databricks Statement Execution API and polls until completion.
|
|
144
|
-
-
|
|
196
|
+
- Server waits up to 50s (`wait_timeout`) before client-side polling begins.
|
|
197
|
+
- Default `wait_timeout` is `50s`, or `0s` when `onProgress` is provided.
|
|
198
|
+
- Use `options.onProgress` to receive status updates with optional metrics.
|
|
199
|
+
- Set `enableMetrics: true` to fetch query metrics from Query History API on each poll.
|
|
145
200
|
- Throws `DatabricksSqlError` on failure, `StatementCancelledError` on cancel, and `AbortError` on abort.
|
|
146
201
|
|
|
147
202
|
### fetchRow(statementResult, auth, options?)
|
|
@@ -155,6 +210,7 @@ function fetchRow(
|
|
|
155
210
|
- Streams each row to `options.onEachRow`.
|
|
156
211
|
- Use `format: 'JSON_OBJECT'` to map rows into schema-based objects.
|
|
157
212
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
213
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
158
214
|
|
|
159
215
|
### fetchAll(statementResult, auth, options?)
|
|
160
216
|
```ts
|
|
@@ -166,6 +222,7 @@ function fetchAll(
|
|
|
166
222
|
```
|
|
167
223
|
- Collects all rows into an array. For large results, prefer `fetchRow`/`fetchStream`.
|
|
168
224
|
- Supports `INLINE` results or `JSON_ARRAY` formatted `EXTERNAL_LINKS` only.
|
|
225
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
169
226
|
|
|
170
227
|
### fetchStream(statementResult, auth, options?)
|
|
171
228
|
```ts
|
|
@@ -177,7 +234,10 @@ function fetchStream(
|
|
|
177
234
|
```
|
|
178
235
|
- Merges `EXTERNAL_LINKS` into a single binary stream.
|
|
179
236
|
- Preserves the original format (`JSON_ARRAY`, `CSV`, `ARROW_STREAM`).
|
|
237
|
+
- Throws if the result is `INLINE`.
|
|
180
238
|
- Ends as an empty stream when no external links exist.
|
|
239
|
+
- `forceMerge: true` forces merge even when there is only a single external link.
|
|
240
|
+
- If only a subset of external links is returned, missing chunk metadata is fetched by index.
|
|
181
241
|
|
|
182
242
|
### mergeExternalLinks(statementResult, auth, options)
|
|
183
243
|
```ts
|
|
@@ -190,22 +250,25 @@ function mergeExternalLinks(
|
|
|
190
250
|
- Creates a merged stream from `EXTERNAL_LINKS`, uploads it via
|
|
191
251
|
`options.mergeStreamToExternalLink`, then returns a `StatementResult`
|
|
192
252
|
with a single external link.
|
|
193
|
-
- Returns the original result unchanged when input is `INLINE
|
|
253
|
+
- Returns the original result unchanged when input is `INLINE` or already a
|
|
254
|
+
single external link (unless `forceMerge: true`).
|
|
194
255
|
|
|
195
256
|
### Options (Summary)
|
|
196
257
|
```ts
|
|
197
258
|
type ExecuteStatementOptions = {
|
|
198
|
-
onProgress?: (
|
|
259
|
+
onProgress?: (result: StatementResult, metrics?: QueryMetrics) => void
|
|
260
|
+
enableMetrics?: boolean // Fetch metrics from Query History API (default: false)
|
|
261
|
+
logger?: Logger
|
|
199
262
|
signal?: AbortSignal
|
|
200
263
|
disposition?: 'INLINE' | 'EXTERNAL_LINKS'
|
|
201
264
|
format?: 'JSON_ARRAY' | 'ARROW_STREAM' | 'CSV'
|
|
202
|
-
wait_timeout?: string
|
|
265
|
+
wait_timeout?: string // Server wait time (default: '50s', max: '50s')
|
|
203
266
|
row_limit?: number
|
|
204
267
|
byte_limit?: number
|
|
205
268
|
catalog?: string
|
|
206
269
|
schema?: string
|
|
207
270
|
parameters?: StatementParameter[]
|
|
208
|
-
on_wait_timeout?: 'CONTINUE' | 'CANCEL'
|
|
271
|
+
on_wait_timeout?: 'CONTINUE' | 'CANCEL' // Default: 'CONTINUE'
|
|
209
272
|
warehouse_id?: string
|
|
210
273
|
}
|
|
211
274
|
|
|
@@ -213,19 +276,25 @@ type FetchRowsOptions = {
|
|
|
213
276
|
signal?: AbortSignal
|
|
214
277
|
onEachRow?: (row: RowArray | RowObject) => void
|
|
215
278
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
279
|
+
logger?: Logger
|
|
216
280
|
}
|
|
217
281
|
|
|
218
282
|
type FetchAllOptions = {
|
|
219
283
|
signal?: AbortSignal
|
|
220
284
|
format?: 'JSON_ARRAY' | 'JSON_OBJECT'
|
|
285
|
+
logger?: Logger
|
|
221
286
|
}
|
|
222
287
|
|
|
223
288
|
type FetchStreamOptions = {
|
|
224
289
|
signal?: AbortSignal
|
|
290
|
+
forceMerge?: boolean
|
|
291
|
+
logger?: Logger
|
|
225
292
|
}
|
|
226
293
|
|
|
227
294
|
type MergeExternalLinksOptions = {
|
|
228
295
|
signal?: AbortSignal
|
|
296
|
+
forceMerge?: boolean
|
|
297
|
+
logger?: Logger
|
|
229
298
|
mergeStreamToExternalLink: (stream: Readable) => Promise<{
|
|
230
299
|
externalLink: string
|
|
231
300
|
byte_count: number
|
|
@@ -237,6 +306,9 @@ type MergeExternalLinksOptions = {
|
|
|
237
306
|
## Notes
|
|
238
307
|
- Databricks requires `INLINE` results to use `JSON_ARRAY` format. `INLINE + CSV` is rejected by the API.
|
|
239
308
|
- `EXTERNAL_LINKS` are merged using `@bitofsky/merge-streams`.
|
|
309
|
+
- Query metrics are fetched from `/api/2.0/sql/history/queries/{query_id}?include_metrics=true` when `enableMetrics: true`.
|
|
310
|
+
- Metrics may not be immediately available; `is_final: true` indicates complete metrics.
|
|
311
|
+
- Requires Node.js >= 20 for global `fetch` and Web streams.
|
|
240
312
|
|
|
241
313
|
## Development
|
|
242
314
|
```bash
|
package/dist/index.cjs
CHANGED
|
@@ -84,6 +84,8 @@ var RateLimitError = class extends HttpError {
|
|
|
84
84
|
};
|
|
85
85
|
|
|
86
86
|
// src/util.ts
|
|
87
|
+
var import_node_stream = require("stream");
|
|
88
|
+
var import_promises = require("stream/promises");
|
|
87
89
|
function extractWarehouseId(httpPath) {
|
|
88
90
|
const match = httpPath.match(/\/sql\/\d+\.\d+\/warehouses\/([a-zA-Z0-9]+)/);
|
|
89
91
|
if (!match?.[1])
|
|
@@ -133,6 +135,24 @@ function validateSucceededResult(statementResult) {
|
|
|
133
135
|
);
|
|
134
136
|
return statementResult.manifest;
|
|
135
137
|
}
|
|
138
|
+
function isWebReadableStream(body) {
|
|
139
|
+
return typeof body.getReader === "function";
|
|
140
|
+
}
|
|
141
|
+
async function pipeUrlToOutput(url, output, signal) {
|
|
142
|
+
if (signal?.aborted)
|
|
143
|
+
throw new AbortError("Aborted while streaming");
|
|
144
|
+
const response = await fetch(url, signal ? { signal } : void 0);
|
|
145
|
+
if (!response.ok) {
|
|
146
|
+
throw new Error(
|
|
147
|
+
`Failed to fetch external link: ${response.status} ${response.statusText}`
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
if (!response.body)
|
|
151
|
+
return void output.end();
|
|
152
|
+
const body = response.body;
|
|
153
|
+
const input = isWebReadableStream(body) ? import_node_stream.Readable.fromWeb(body) : body;
|
|
154
|
+
await (0, import_promises.pipeline)(input, output);
|
|
155
|
+
}
|
|
136
156
|
|
|
137
157
|
// src/http.ts
|
|
138
158
|
var MAX_RETRIES = 3;
|
|
@@ -205,6 +225,7 @@ async function httpRequest(auth, options) {
|
|
|
205
225
|
|
|
206
226
|
// src/databricks-api.ts
|
|
207
227
|
var BASE_PATH = "/api/2.0/sql/statements";
|
|
228
|
+
var HISTORY_BASE_PATH = "/api/2.0/sql/history/queries";
|
|
208
229
|
async function postStatement(auth, request, signal) {
|
|
209
230
|
return httpRequest(auth, {
|
|
210
231
|
method: "POST",
|
|
@@ -234,6 +255,13 @@ async function getChunk(auth, statementId, chunkIndex, signal) {
|
|
|
234
255
|
...signal ? { signal } : {}
|
|
235
256
|
});
|
|
236
257
|
}
|
|
258
|
+
async function getQueryMetrics(auth, queryId, signal) {
|
|
259
|
+
return httpRequest(auth, {
|
|
260
|
+
method: "GET",
|
|
261
|
+
path: `${HISTORY_BASE_PATH}/${queryId}?include_metrics=true`,
|
|
262
|
+
...signal ? { signal } : {}
|
|
263
|
+
});
|
|
264
|
+
}
|
|
237
265
|
|
|
238
266
|
// src/api/executeStatement.ts
|
|
239
267
|
var TERMINAL_STATES = /* @__PURE__ */ new Set([
|
|
@@ -242,12 +270,22 @@ var TERMINAL_STATES = /* @__PURE__ */ new Set([
|
|
|
242
270
|
"CANCELED",
|
|
243
271
|
"CLOSED"
|
|
244
272
|
]);
|
|
245
|
-
var POLL_INTERVAL_MS =
|
|
246
|
-
|
|
273
|
+
var POLL_INTERVAL_MS = 5e3;
|
|
274
|
+
async function fetchMetrics(auth, statementId, signal) {
|
|
275
|
+
try {
|
|
276
|
+
const queryInfo = await getQueryMetrics(auth, statementId, signal);
|
|
277
|
+
return queryInfo.metrics;
|
|
278
|
+
} catch {
|
|
279
|
+
return void 0;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
247
282
|
async function executeStatement(query, auth, options = {}) {
|
|
248
283
|
const warehouseId = options.warehouse_id ?? extractWarehouseId(auth.httpPath);
|
|
249
|
-
const { signal, onProgress } = options;
|
|
284
|
+
const { signal, onProgress, enableMetrics, logger } = options;
|
|
285
|
+
const waitTimeout = options.wait_timeout ?? (onProgress ? "0s" : "50s");
|
|
286
|
+
let cancelIssued = false;
|
|
250
287
|
throwIfAborted(signal, "executeStatement");
|
|
288
|
+
const emitProgress = onProgress ? async (statementId) => onProgress(result, enableMetrics ? await fetchMetrics(auth, statementId, signal) : void 0) : void 0;
|
|
251
289
|
const request = Object.fromEntries(
|
|
252
290
|
Object.entries({
|
|
253
291
|
warehouse_id: warehouseId,
|
|
@@ -255,28 +293,51 @@ async function executeStatement(query, auth, options = {}) {
|
|
|
255
293
|
byte_limit: options.byte_limit,
|
|
256
294
|
disposition: options.disposition,
|
|
257
295
|
format: options.format,
|
|
258
|
-
on_wait_timeout: options.on_wait_timeout,
|
|
259
|
-
wait_timeout:
|
|
296
|
+
on_wait_timeout: options.on_wait_timeout ?? "CONTINUE",
|
|
297
|
+
wait_timeout: waitTimeout,
|
|
260
298
|
row_limit: options.row_limit,
|
|
261
299
|
catalog: options.catalog,
|
|
262
300
|
schema: options.schema,
|
|
263
301
|
parameters: options.parameters
|
|
264
302
|
}).filter(([, v]) => v !== void 0)
|
|
265
303
|
);
|
|
304
|
+
logger?.info?.(`executeStatement Executing statement on warehouse ${warehouseId}...`);
|
|
266
305
|
let result = await postStatement(auth, request, signal);
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
306
|
+
const cancelStatementSafely = async () => {
|
|
307
|
+
if (cancelIssued) return;
|
|
308
|
+
logger?.info?.("executeStatement Abort signal received during executeStatement.");
|
|
309
|
+
cancelIssued = true;
|
|
310
|
+
await cancelStatement(auth, result.statement_id).catch((err) => {
|
|
311
|
+
logger?.error?.("executeStatement Failed to cancel statement after abort.", err);
|
|
312
|
+
});
|
|
313
|
+
};
|
|
314
|
+
if (signal?.aborted) {
|
|
315
|
+
await cancelStatementSafely();
|
|
316
|
+
throw new AbortError("Aborted during polling");
|
|
317
|
+
}
|
|
318
|
+
const onAbort = () => cancelStatementSafely().catch(() => {
|
|
319
|
+
});
|
|
320
|
+
try {
|
|
321
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
322
|
+
while (!TERMINAL_STATES.has(result.status.state)) {
|
|
323
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} in state ${result.status.state}; polling for status...`);
|
|
324
|
+
await emitProgress?.(result.statement_id);
|
|
325
|
+
await delay(POLL_INTERVAL_MS, signal);
|
|
326
|
+
result = await getStatement(auth, result.statement_id, signal);
|
|
327
|
+
}
|
|
328
|
+
} catch (err) {
|
|
329
|
+
if (err instanceof AbortError || signal?.aborted) {
|
|
330
|
+
logger?.info?.("executeStatement Abort detected in executeStatement polling loop.");
|
|
331
|
+
await cancelStatementSafely();
|
|
272
332
|
throw new AbortError("Aborted during polling");
|
|
273
333
|
}
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
result
|
|
334
|
+
logger?.error?.(`executeStatement Error during executeStatement polling: ${String(err)}`);
|
|
335
|
+
throw err;
|
|
336
|
+
} finally {
|
|
337
|
+
logger?.info?.(`executeStatement Statement ${result.statement_id} reached final state: ${result.status.state}`);
|
|
338
|
+
signal?.removeEventListener("abort", onAbort);
|
|
278
339
|
}
|
|
279
|
-
|
|
340
|
+
await emitProgress?.(result.statement_id);
|
|
280
341
|
if (result.status.state === "SUCCEEDED")
|
|
281
342
|
return result;
|
|
282
343
|
if (result.status.state === "CANCELED")
|
|
@@ -580,72 +641,160 @@ function convertBoolean(value) {
|
|
|
580
641
|
}
|
|
581
642
|
|
|
582
643
|
// src/api/fetchStream.ts
|
|
583
|
-
var
|
|
644
|
+
var import_node_stream2 = require("stream");
|
|
584
645
|
var import_merge_streams = require("@bitofsky/merge-streams");
|
|
585
646
|
function fetchStream(statementResult, auth, options = {}) {
|
|
586
|
-
const { signal } = options;
|
|
647
|
+
const { signal, forceMerge, logger } = options;
|
|
587
648
|
const manifest = validateSucceededResult(statementResult);
|
|
588
649
|
const format = manifest.format;
|
|
589
|
-
const
|
|
650
|
+
const statementId = statementResult.statement_id;
|
|
651
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
652
|
+
if (statementResult.result?.data_array) {
|
|
653
|
+
logger?.error?.(
|
|
654
|
+
`fetchStream only supports EXTERNAL_LINKS results for statement ${statementId}.`,
|
|
655
|
+
{ ...baseLog, hasDataArray: true }
|
|
656
|
+
);
|
|
657
|
+
throw new DatabricksSqlError(
|
|
658
|
+
"fetchStream only supports EXTERNAL_LINKS results",
|
|
659
|
+
"UNSUPPORTED_FORMAT",
|
|
660
|
+
statementId
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
logger?.info?.(`fetchStream creating stream for statement ${statementId}.`, {
|
|
664
|
+
...baseLog,
|
|
665
|
+
hasExternalLinks: Boolean(statementResult.result?.external_links?.length)
|
|
666
|
+
});
|
|
667
|
+
const output = new import_node_stream2.PassThrough();
|
|
590
668
|
if (signal) {
|
|
591
669
|
const onAbort = () => {
|
|
670
|
+
logger?.info?.(`fetchStream abort signal received while streaming statement ${statementId}.`, baseLog);
|
|
592
671
|
output.destroy(new AbortError("Stream aborted"));
|
|
593
672
|
};
|
|
594
673
|
signal.addEventListener("abort", onAbort, { once: true });
|
|
595
|
-
output.once("close", () =>
|
|
596
|
-
signal.removeEventListener("abort", onAbort);
|
|
597
|
-
});
|
|
674
|
+
output.once("close", () => signal.removeEventListener("abort", onAbort));
|
|
598
675
|
}
|
|
599
|
-
|
|
600
|
-
(err
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
676
|
+
output.on("error", (err) => {
|
|
677
|
+
if (err instanceof AbortError)
|
|
678
|
+
return;
|
|
679
|
+
if (output.listenerCount("error") === 1)
|
|
680
|
+
throw err;
|
|
681
|
+
});
|
|
682
|
+
mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger).catch((err) => {
|
|
683
|
+
logger?.error?.(`fetchStream error while streaming statement ${statementId}.`, {
|
|
684
|
+
...baseLog,
|
|
685
|
+
error: err
|
|
686
|
+
});
|
|
687
|
+
output.destroy(err);
|
|
688
|
+
});
|
|
604
689
|
return output;
|
|
605
690
|
}
|
|
606
|
-
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal) {
|
|
607
|
-
const
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
691
|
+
async function mergeChunksToStream(statementResult, auth, manifest, format, output, signal, forceMerge, logger) {
|
|
692
|
+
const statementId = statementResult.statement_id;
|
|
693
|
+
const baseLog = { statementId, manifest, format, forceMerge };
|
|
694
|
+
logger?.info?.(`fetchStream collecting external links for statement ${statementId}.`, baseLog);
|
|
695
|
+
const urls = await collectExternalUrls(statementResult, auth, manifest, signal);
|
|
696
|
+
if (urls.length === 0) {
|
|
697
|
+
logger?.info?.(`fetchStream no external links found for statement ${statementId}.`, baseLog);
|
|
698
|
+
return void output.end();
|
|
699
|
+
}
|
|
700
|
+
if (urls.length === 1 && !forceMerge) {
|
|
701
|
+
logger?.info?.(`fetchStream piping single external link for statement ${statementId}.`, {
|
|
702
|
+
...baseLog,
|
|
703
|
+
urlCount: urls.length
|
|
704
|
+
});
|
|
705
|
+
return pipeUrlToOutput(urls[0], output, signal);
|
|
706
|
+
}
|
|
707
|
+
logger?.info?.(`fetchStream merging ${urls.length} external links for statement ${statementId}.`, {
|
|
708
|
+
...baseLog,
|
|
709
|
+
urlCount: urls.length
|
|
710
|
+
});
|
|
711
|
+
return (0, import_merge_streams.mergeStreamsFromUrls)(format, signal ? { urls, output, signal } : { urls, output });
|
|
712
|
+
}
|
|
713
|
+
async function collectExternalUrls(statementResult, auth, manifest, signal) {
|
|
714
|
+
const chunkUrls = /* @__PURE__ */ new Map();
|
|
715
|
+
addChunkLinks(chunkUrls, statementResult.result?.external_links);
|
|
716
|
+
if (!manifest.total_chunk_count)
|
|
717
|
+
return flattenChunkUrls(chunkUrls);
|
|
718
|
+
for (let i = 0; i < manifest.total_chunk_count; i++) {
|
|
719
|
+
if (chunkUrls.has(i))
|
|
720
|
+
continue;
|
|
721
|
+
if (signal?.aborted)
|
|
722
|
+
throw new AbortError("Aborted while collecting URLs");
|
|
723
|
+
const chunkData = await getChunk(auth, statementResult.statement_id, i, signal);
|
|
724
|
+
addChunkLinks(chunkUrls, chunkData.external_links);
|
|
725
|
+
}
|
|
726
|
+
return flattenChunkUrls(chunkUrls);
|
|
727
|
+
}
|
|
728
|
+
function addChunkLinks(chunkUrls, externalLinks) {
|
|
729
|
+
if (!externalLinks)
|
|
730
|
+
return;
|
|
731
|
+
for (const link of externalLinks) {
|
|
732
|
+
if (!isNonEmptyString(link.external_link))
|
|
733
|
+
continue;
|
|
734
|
+
const existing = chunkUrls.get(link.chunk_index);
|
|
735
|
+
if (existing) {
|
|
736
|
+
existing.push(link.external_link);
|
|
737
|
+
} else {
|
|
738
|
+
chunkUrls.set(link.chunk_index, [link.external_link]);
|
|
615
739
|
}
|
|
616
740
|
}
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
741
|
+
}
|
|
742
|
+
function flattenChunkUrls(chunkUrls) {
|
|
743
|
+
if (chunkUrls.size === 0)
|
|
744
|
+
return [];
|
|
745
|
+
const sorted = [...chunkUrls.entries()].sort(([a], [b]) => a - b);
|
|
746
|
+
const urls = [];
|
|
747
|
+
for (const [, links] of sorted) {
|
|
748
|
+
urls.push(...links);
|
|
749
|
+
}
|
|
750
|
+
return urls;
|
|
751
|
+
}
|
|
752
|
+
function isNonEmptyString(value) {
|
|
753
|
+
return typeof value === "string" && value.length > 0;
|
|
620
754
|
}
|
|
621
755
|
|
|
622
756
|
// src/api/fetchRow.ts
|
|
623
757
|
async function fetchRow(statementResult, auth, options = {}) {
|
|
624
|
-
const { signal, onEachRow, format } = options;
|
|
758
|
+
const { signal, onEachRow, format, logger } = options;
|
|
625
759
|
const manifest = validateSucceededResult(statementResult);
|
|
760
|
+
const statementId = statementResult.statement_id;
|
|
761
|
+
const logContext = { statementId, manifest, requestedFormat: format };
|
|
626
762
|
const mapRow = createRowMapper(manifest, format);
|
|
763
|
+
logger?.info?.(`fetchRow fetching rows for statement ${statementId}.`, {
|
|
764
|
+
...logContext,
|
|
765
|
+
resultType: statementResult.result?.external_links ? "EXTERNAL_LINKS" : "INLINE"
|
|
766
|
+
});
|
|
627
767
|
if (statementResult.result?.external_links) {
|
|
628
768
|
if (manifest.format !== "JSON_ARRAY") {
|
|
769
|
+
logger?.error?.(`fetchRow only supports JSON_ARRAY for external_links; got ${manifest.format}.`, logContext);
|
|
629
770
|
throw new DatabricksSqlError(
|
|
630
771
|
`fetchRow only supports JSON_ARRAY for external_links. Received: ${manifest.format}`,
|
|
631
772
|
"UNSUPPORTED_FORMAT",
|
|
632
|
-
|
|
773
|
+
statementId
|
|
633
774
|
);
|
|
634
775
|
}
|
|
635
|
-
|
|
636
|
-
|
|
776
|
+
logger?.info?.(`fetchRow streaming external links for statement ${statementId}.`, logContext);
|
|
777
|
+
const stream = fetchStream(statementResult, auth, {
|
|
778
|
+
...signal ? { signal } : {},
|
|
779
|
+
...logger ? { logger } : {}
|
|
780
|
+
});
|
|
781
|
+
await consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext);
|
|
637
782
|
return;
|
|
638
783
|
}
|
|
639
784
|
const totalChunks = manifest.total_chunk_count;
|
|
640
785
|
const dataArray = statementResult.result?.data_array;
|
|
641
786
|
if (dataArray) {
|
|
787
|
+
logger?.info?.(`fetchRow processing inline rows for statement ${statementId}.`, {
|
|
788
|
+
...logContext,
|
|
789
|
+
inlineRows: dataArray.length
|
|
790
|
+
});
|
|
642
791
|
for (const row of dataArray) {
|
|
643
792
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
644
793
|
onEachRow?.(mapRow(row));
|
|
645
794
|
}
|
|
646
795
|
}
|
|
647
796
|
if (totalChunks > 1) {
|
|
648
|
-
|
|
797
|
+
logger?.info?.(`fetchRow processing ${totalChunks} chunks for statement ${statementId}.`, logContext);
|
|
649
798
|
for (let chunkIndex = 1; chunkIndex < totalChunks; chunkIndex++) {
|
|
650
799
|
if (signal?.aborted) throw new AbortError("Aborted");
|
|
651
800
|
const chunk = await getChunk(auth, statementId, chunkIndex, signal);
|
|
@@ -664,10 +813,14 @@ async function fetchRow(statementResult, auth, options = {}) {
|
|
|
664
813
|
}
|
|
665
814
|
}
|
|
666
815
|
}
|
|
667
|
-
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
816
|
+
async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal, logger, logContext) {
|
|
668
817
|
const jsonStream = stream.pipe((0, import_stream_json.parser)()).pipe((0, import_StreamArray.streamArray)());
|
|
669
818
|
for await (const item of jsonStream) {
|
|
670
819
|
if (signal?.aborted) {
|
|
820
|
+
logger?.info?.("fetchRow abort detected while streaming JSON_ARRAY rows.", {
|
|
821
|
+
...logContext,
|
|
822
|
+
aborted: signal.aborted
|
|
823
|
+
});
|
|
671
824
|
stream.destroy(new AbortError("Aborted"));
|
|
672
825
|
throw new AbortError("Aborted");
|
|
673
826
|
}
|
|
@@ -685,34 +838,74 @@ async function consumeJsonArrayStream(stream, mapRow, onEachRow, signal) {
|
|
|
685
838
|
// src/api/fetchAll.ts
|
|
686
839
|
async function fetchAll(statementResult, auth, options = {}) {
|
|
687
840
|
const rows = [];
|
|
841
|
+
const statementId = statementResult.statement_id;
|
|
842
|
+
const manifest = statementResult.manifest;
|
|
843
|
+
const logContext = { statementId, manifest, requestedFormat: options.format };
|
|
688
844
|
const fetchOptions = {
|
|
689
845
|
// Collect rows as they are streamed in.
|
|
690
846
|
onEachRow: (row) => {
|
|
691
847
|
rows.push(row);
|
|
692
848
|
}
|
|
693
849
|
};
|
|
850
|
+
const { logger } = options;
|
|
851
|
+
logger?.info?.(`fetchAll fetching all rows for statement ${statementId}.`, logContext);
|
|
694
852
|
if (options.signal)
|
|
695
853
|
fetchOptions.signal = options.signal;
|
|
696
854
|
if (options.format)
|
|
697
855
|
fetchOptions.format = options.format;
|
|
856
|
+
if (options.logger)
|
|
857
|
+
fetchOptions.logger = options.logger;
|
|
698
858
|
await fetchRow(statementResult, auth, fetchOptions);
|
|
859
|
+
logger?.info?.(`fetchAll fetched ${rows.length} rows for statement ${statementId}.`, {
|
|
860
|
+
...logContext,
|
|
861
|
+
rowCount: rows.length,
|
|
862
|
+
resolvedFormat: options.format ?? manifest?.format
|
|
863
|
+
});
|
|
699
864
|
return rows;
|
|
700
865
|
}
|
|
701
866
|
|
|
702
867
|
// src/api/mergeExternalLinks.ts
|
|
703
868
|
async function mergeExternalLinks(statementResult, auth, options) {
|
|
704
|
-
const { signal, mergeStreamToExternalLink } = options;
|
|
705
|
-
|
|
869
|
+
const { signal, mergeStreamToExternalLink, forceMerge, logger } = options;
|
|
870
|
+
const statementId = statementResult.statement_id;
|
|
871
|
+
const manifest = statementResult.manifest;
|
|
872
|
+
const externalLinks = statementResult.result?.external_links;
|
|
873
|
+
const totalChunks = manifest?.total_chunk_count ?? 0;
|
|
874
|
+
const logContext = { statementId, manifest, totalChunks, forceMerge };
|
|
875
|
+
if (!externalLinks) {
|
|
876
|
+
logger?.info?.(`mergeExternalLinks no external links to merge for statement ${statementId}.`, logContext);
|
|
706
877
|
return statementResult;
|
|
707
|
-
|
|
878
|
+
}
|
|
879
|
+
if (!forceMerge) {
|
|
880
|
+
const isSingleChunk = totalChunks <= 1;
|
|
881
|
+
if (isSingleChunk) {
|
|
882
|
+
logger?.info?.(`mergeExternalLinks skipping merge for single external link in statement ${statementId}.`, {
|
|
883
|
+
...logContext,
|
|
884
|
+
totalChunks
|
|
885
|
+
});
|
|
886
|
+
return statementResult;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
logger?.info?.(`mergeExternalLinks merging external links for statement ${statementId}.`, logContext);
|
|
890
|
+
const stream = fetchStream(statementResult, auth, {
|
|
891
|
+
...signal ? { signal } : {},
|
|
892
|
+
...forceMerge !== void 0 ? { forceMerge } : {},
|
|
893
|
+
...logger ? { logger } : {}
|
|
894
|
+
});
|
|
895
|
+
logger?.info?.(`mergeExternalLinks uploading merged external link for statement ${statementId}.`, logContext);
|
|
708
896
|
const uploadResult = await mergeStreamToExternalLink(stream);
|
|
709
|
-
|
|
710
|
-
|
|
897
|
+
logger?.info?.(`mergeExternalLinks uploaded merged external link for statement ${statementId}.`, {
|
|
898
|
+
...logContext,
|
|
899
|
+
byteCount: uploadResult.byte_count,
|
|
900
|
+
expiration: uploadResult.expiration
|
|
901
|
+
});
|
|
902
|
+
const validatedManifest = validateSucceededResult(statementResult);
|
|
903
|
+
const totalRowCount = validatedManifest.total_row_count ?? 0;
|
|
711
904
|
return {
|
|
712
905
|
statement_id: statementResult.statement_id,
|
|
713
906
|
status: statementResult.status,
|
|
714
907
|
manifest: {
|
|
715
|
-
...
|
|
908
|
+
...validatedManifest,
|
|
716
909
|
total_chunk_count: 1,
|
|
717
910
|
total_byte_count: uploadResult.byte_count,
|
|
718
911
|
chunks: [
|