@databricks/zerobus-ingest-sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts ADDED
@@ -0,0 +1,538 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /* auto-generated by NAPI-RS */
5
+
6
+ /**
7
+ * Record serialization format.
8
+ *
9
+ * Specifies how records should be encoded when ingested into the stream.
10
+ */
11
+ export const enum RecordType {
12
+ /** JSON encoding - records are JSON-encoded strings */
13
+ Json = 0,
14
+ /** Protocol Buffers encoding - records are binary protobuf messages */
15
+ Proto = 1
16
+ }
17
+ /**
18
+ * Configuration options for the Zerobus stream.
19
+ *
20
+ * These options control stream behavior including recovery, timeouts, and inflight limits.
21
+ */
22
+ export interface StreamConfigurationOptions {
23
+ /**
24
+ * Maximum number of unacknowledged requests that can be in flight.
25
+ * Default: 10,000
26
+ */
27
+ maxInflightRequests?: number
28
+ /**
29
+ * Enable automatic stream recovery on transient failures.
30
+ * Default: true
31
+ */
32
+ recovery?: boolean
33
+ /**
34
+ * Timeout for recovery operations in milliseconds.
35
+ * Default: 15,000 (15 seconds)
36
+ */
37
+ recoveryTimeoutMs?: number
38
+ /**
39
+ * Delay between recovery retry attempts in milliseconds.
40
+ * Default: 2,000 (2 seconds)
41
+ */
42
+ recoveryBackoffMs?: number
43
+ /**
44
+ * Maximum number of recovery attempts before giving up.
45
+ * Default: 4
46
+ */
47
+ recoveryRetries?: number
48
+ /**
49
+ * Timeout for flush operations in milliseconds.
50
+ * Default: 300,000 (5 minutes)
51
+ */
52
+ flushTimeoutMs?: number
53
+ /**
54
+ * Timeout waiting for server acknowledgments in milliseconds.
55
+ * Default: 60,000 (1 minute)
56
+ */
57
+ serverLackOfAckTimeoutMs?: number
58
+ /**
59
+ * Record serialization format.
60
+ * Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
61
+ * Default: RecordType.Proto (Protocol Buffers)
62
+ */
63
+ recordType?: number
64
+ /**
65
+ * Maximum time in milliseconds that acknowledgment callbacks may run after stream closure.
66
+ * - None (undefined): Wait indefinitely for callbacks to complete
67
+ * - Some(0): Don't wait for callbacks at all
68
+ * - Some(x): Wait up to x milliseconds for callbacks
69
+ * Default: None (wait indefinitely)
70
+ */
71
+ callbackMaxWaitTimeMs?: number
72
+ /**
73
+ * Maximum wait time during graceful stream close in milliseconds.
74
+ * When the server signals stream closure, this controls how long to wait
75
+ * for in-flight records to be acknowledged.
76
+ * - None (undefined): Wait for full server-specified duration
77
+ * - Some(0): Immediately trigger recovery without waiting
78
+ * - Some(x): Wait up to min(x, server_duration) milliseconds
79
+ * Default: None (wait for full server duration)
80
+ */
81
+ streamPausedMaxWaitTimeMs?: number
82
+ }
83
+ /**
84
+ * Properties of the target Delta table for ingestion.
85
+ *
86
+ * Specifies which Unity Catalog table to write to and optionally the schema descriptor
87
+ * for Protocol Buffers encoding.
88
+ */
89
+ export interface TableProperties {
90
+ /** Full table name in Unity Catalog (e.g., "catalog.schema.table") */
91
+ tableName: string
92
+ /**
93
+ * Optional Protocol Buffer descriptor as a base64-encoded string.
94
+ * If not provided, JSON encoding will be used.
95
+ */
96
+ descriptorProto?: string
97
+ }
98
+ /**
99
+ * JavaScript headers provider callback wrapper.
100
+ *
101
+ * Allows TypeScript code to provide custom authentication headers
102
+ * by implementing a getHeaders() function.
103
+ */
104
+ export interface JsHeadersProvider {
105
+ /** JavaScript function: () => Promise<Array<[string, string]>> */
106
+ getHeadersCallback: (...args: any[]) => any
107
+ }
108
+ /**
109
+ * JavaScript acknowledgment callback wrapper.
110
+ *
111
+ * Allows TypeScript code to receive notifications when records are acknowledged
112
+ * or when errors occur.
113
+ */
114
+ export interface JsAckCallback {
115
+ /** JavaScript function called when a record is acknowledged: (offsetId: bigint) => void */
116
+ onAck?: (...args: any[]) => any
117
+ /** JavaScript function called when an error occurs: (offsetId: bigint, errorMessage: string) => void */
118
+ onError?: (...args: any[]) => any
119
+ }
120
+ /**
121
+ * Custom error type for Zerobus operations.
122
+ *
123
+ * This error type includes information about whether the error is retryable,
124
+ * which helps determine if automatic recovery can resolve the issue.
125
+ */
126
+ export declare class ZerobusError {
127
+ /** Returns true if this error can be automatically retried by the SDK. */
128
+ get isRetryable(): boolean
129
+ /** Get the error message. */
130
+ get message(): string
131
+ }
132
+ /**
133
+ * A stream for ingesting data into a Databricks Delta table.
134
+ *
135
+ * The stream manages a bidirectional gRPC connection, handles acknowledgments,
136
+ * and provides automatic recovery on transient failures.
137
+ *
138
+ * # Example
139
+ *
140
+ * ```typescript
141
+ * const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
142
+ * const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
143
+ * const offset = await ackPromise;
144
+ * await stream.close();
145
+ * ```
146
+ */
147
+ export declare class ZerobusStream {
148
+ /**
149
+ * Ingests a single record into the stream.
150
+ *
151
+ * **@deprecated** Use `ingestRecordOffset()` instead, which returns the offset directly
152
+ * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
153
+ *
154
+ * This method accepts either:
155
+ * - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
156
+ * - A JSON string
157
+ *
158
+ * This method BLOCKS until the record is sent to the SDK's internal landing zone,
159
+ * then returns a Promise for the server acknowledgment. This allows you to send
160
+ * many records immediately without waiting for acknowledgments:
161
+ *
162
+ * ```typescript
163
+ * let lastAckPromise;
164
+ * for (let i = 0; i < 1000; i++) {
165
+ * // This call blocks until record is sent (in SDK)
166
+ * lastAckPromise = stream.ingestRecord(record);
167
+ * }
168
+ * // All 1000 records are now in the SDK's internal queue
169
+ * // Wait for the last acknowledgment
170
+ * await lastAckPromise;
171
+ * // Flush to ensure all records are acknowledged
172
+ * await stream.flush();
173
+ * ```
174
+ *
175
+ * # Arguments
176
+ *
177
+ * * `payload` - The record data. Accepts:
178
+ * - Buffer (low-level proto bytes)
179
+ * - string (low-level JSON string)
180
+ * - Protobuf message object with .encode() method (high-level, auto-serializes)
181
+ * - Plain JavaScript object (high-level, auto-stringifies to JSON)
182
+ *
183
+ * # Returns
184
+ *
185
+ * A Promise that resolves to the offset ID when the server acknowledges the record.
186
+ */
187
+ ingestRecord(payload: unknown): Promise<bigint>
188
+ /**
189
+ * Ingests multiple records as a single atomic batch.
190
+ *
191
+ * **@deprecated** Use `ingestRecordsOffset()` instead, which returns the offset directly
192
+ * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
193
+ *
194
+ * This method accepts an array of records (Protocol Buffer buffers or JSON strings)
195
+ * and ingests them as a batch. The batch receives a single acknowledgment from
196
+ * the server with all-or-nothing semantics.
197
+ *
198
+ * Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
199
+ * internal landing zone, then returns a Promise for the server acknowledgment.
200
+ *
201
+ * # Arguments
202
+ *
203
+ * * `records` - Array of record data (Buffer for protobuf, string for JSON)
204
+ *
205
+ * # Returns
206
+ *
207
+ * Promise resolving to:
208
+ * - `bigint`: offset ID for non-empty batches
209
+ * - `null`: for empty batches
210
+ *
211
+ * # Example
212
+ *
213
+ * ```typescript
214
+ * const buffers = records.map(r => Buffer.from(encode(r)));
215
+ * const offsetId = await stream.ingestRecords(buffers);
216
+ *
217
+ * if (offsetId !== null) {
218
+ * console.log('Batch acknowledged at offset:', offsetId);
219
+ * }
220
+ * ```
221
+ */
222
+ ingestRecords(records: Array<unknown>): Promise<bigint | null>
223
+ /**
224
+ * Ingests a single record and returns the offset ID directly after queuing.
225
+ *
226
+ * Unlike `ingestRecord()`, this method returns the offset ID immediately after
227
+ * the record is queued, without waiting for server acknowledgment. Use
228
+ * `waitForOffset()` to wait for acknowledgment when needed.
229
+ *
230
+ * This is the recommended API for high-throughput scenarios where you want to
231
+ * decouple record ingestion from acknowledgment tracking.
232
+ *
233
+ * # Arguments
234
+ *
235
+ * * `payload` - The record data (Buffer, string, protobuf message, or plain object)
236
+ *
237
+ * # Returns
238
+ *
239
+ * The offset ID (bigint) assigned to this record.
240
+ *
241
+ * # Example
242
+ *
243
+ * ```typescript
244
+ * const offset1 = await stream.ingestRecordOffset(record1);
245
+ * const offset2 = await stream.ingestRecordOffset(record2);
246
+ * // Wait for both to be acknowledged
247
+ * await stream.waitForOffset(offset2);
248
+ * ```
249
+ */
250
+ ingestRecordOffset(payload: unknown): Promise<bigint>
251
+ /**
252
+ * Ingests multiple records as a batch and returns the offset ID directly after queuing.
253
+ *
254
+ * Unlike `ingestRecords()`, this method returns the offset ID immediately after
255
+ * the batch is queued, without waiting for server acknowledgment. Use
256
+ * `waitForOffset()` to wait for acknowledgment when needed.
257
+ *
258
+ * # Arguments
259
+ *
260
+ * * `records` - Array of record data
261
+ *
262
+ * # Returns
263
+ *
264
+ * The offset ID (bigint) for the batch, or null for empty batches.
265
+ *
266
+ * # Example
267
+ *
268
+ * ```typescript
269
+ * const offset = await stream.ingestRecordsOffset(batch);
270
+ * if (offset !== null) {
271
+ * await stream.waitForOffset(offset);
272
+ * }
273
+ * ```
274
+ */
275
+ ingestRecordsOffset(records: Array<unknown>): Promise<bigint | null>
276
+ /**
277
+ * Waits for a specific offset to be acknowledged by the server.
278
+ *
279
+ * Use this method with `ingestRecordOffset()` and `ingestRecordsOffset()` to
280
+ * selectively wait for acknowledgments. This allows you to ingest many records
281
+ * quickly and then wait only for specific offsets when needed.
282
+ *
283
+ * # Arguments
284
+ *
285
+ * * `offset_id` - The offset ID to wait for (returned by ingestRecordOffset/ingestRecordsOffset)
286
+ *
287
+ * # Errors
288
+ *
289
+ * - Timeout if acknowledgment takes too long
290
+ * - Server errors propagated immediately (no waiting for timeout)
291
+ *
292
+ * # Example
293
+ *
294
+ * ```typescript
295
+ * const offsets = [];
296
+ * for (const record of records) {
297
+ * offsets.push(await stream.ingestRecordOffset(record));
298
+ * }
299
+ * // Wait for the last offset (implies all previous are also acknowledged)
300
+ * await stream.waitForOffset(offsets[offsets.length - 1]);
301
+ * ```
302
+ */
303
+ waitForOffset(offsetId: bigint): Promise<void>
304
+ /**
305
+ * Flushes all pending records and waits for acknowledgments.
306
+ *
307
+ * This method ensures all previously ingested records have been sent to the server
308
+ * and acknowledged. It's useful for checkpointing or ensuring data durability.
309
+ *
310
+ * # Errors
311
+ *
312
+ * - Timeout errors if flush takes longer than configured timeout
313
+ * - Network errors if the connection fails during flush
314
+ */
315
+ flush(): Promise<void>
316
+ /**
317
+ * Closes the stream gracefully.
318
+ *
319
+ * This method flushes all pending records, waits for acknowledgments, and then
320
+ * closes the underlying gRPC connection. Always call this method when done with
321
+ * the stream to ensure data integrity.
322
+ *
323
+ * # Errors
324
+ *
325
+ * - Returns an error if some records could not be acknowledged
326
+ * - Network errors during the close operation
327
+ */
328
+ close(): Promise<void>
329
+ /**
330
+ * Gets the list of unacknowledged records.
331
+ *
332
+ * This method should only be called after a stream failure to retrieve records
333
+ * that were sent but not acknowledged by the server. These records can be
334
+ * re-ingested into a new stream.
335
+ *
336
+ * # Returns
337
+ *
338
+ * An array of Buffers containing the unacknowledged record payloads.
339
+ */
340
+ getUnackedRecords(): Promise<Array<Buffer>>
341
+ /**
342
+ * Gets unacknowledged records grouped by their original batches.
343
+ *
344
+ * This preserves the batch structure from ingestion:
345
+ * - Each ingestRecord() call → 1-element batch
346
+ * - Each ingestRecords() call → N-element batch
347
+ *
348
+ * Should only be called after stream failure. All records returned as Buffers
349
+ * (JSON strings are converted to UTF-8 bytes).
350
+ *
351
+ * # Returns
352
+ *
353
+ * Array of batches, where each batch is an array of Buffers
354
+ *
355
+ * # Example
356
+ *
357
+ * ```typescript
358
+ * try {
359
+ * await stream.ingestRecords(batch1);
360
+ * await stream.ingestRecords(batch2);
361
+ * } catch (error) {
362
+ * const unackedBatches = await stream.getUnackedBatches();
363
+ *
364
+ * // Re-ingest with new stream
365
+ * for (const batch of unackedBatches) {
366
+ * await newStream.ingestRecords(batch);
367
+ * }
368
+ * }
369
+ * ```
370
+ */
371
+ getUnackedBatches(): Promise<Array<Array<Buffer>>>
372
+ }
373
+ /**
374
+ * The main SDK for interacting with the Databricks Zerobus service.
375
+ *
376
+ * This is the entry point for creating ingestion streams to Delta tables.
377
+ *
378
+ * # Example
379
+ *
380
+ * ```typescript
381
+ * const sdk = new ZerobusSdk(
382
+ * "https://workspace-id.zerobus.region.cloud.databricks.com",
383
+ * "https://workspace.cloud.databricks.com"
384
+ * );
385
+ *
386
+ * const stream = await sdk.createStream(
387
+ * { tableName: "catalog.schema.table" },
388
+ * "client-id",
389
+ * "client-secret"
390
+ * );
391
+ * ```
392
+ */
393
+ export declare class ZerobusSdk {
394
+ /**
395
+ * Creates a new Zerobus SDK instance.
396
+ *
397
+ * # Arguments
398
+ *
399
+ * * `zerobus_endpoint` - The Zerobus API endpoint URL
400
+ * (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
401
+ * * `unity_catalog_url` - The Unity Catalog endpoint URL
402
+ * (e.g., "https://workspace.cloud.databricks.com")
403
+ *
404
+ * # Errors
405
+ *
406
+ * - Invalid endpoint URLs
407
+ * - Failed to extract workspace ID from the endpoint
408
+ */
409
+ constructor(zerobusEndpoint: string, unityCatalogUrl: string)
410
+ /**
411
+ * Creates a new ingestion stream to a Delta table.
412
+ *
413
+ * This method establishes a bidirectional gRPC connection to the Zerobus service
414
+ * and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
415
+ * authentication. For custom authentication (e.g., Personal Access Tokens), provide
416
+ * a custom headers_provider.
417
+ *
418
+ * # Arguments
419
+ *
420
+ * * `table_properties` - Properties of the target table including name and optional schema
421
+ * * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
422
+ * * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
423
+ * * `options` - Optional stream configuration (uses defaults if not provided)
424
+ * * `headers_provider` - Optional custom headers provider for authentication.
425
+ * If not provided, uses OAuth with client_id and client_secret.
426
+ * * `ack_callback` - Optional callback for receiving acknowledgment notifications.
427
+ * Called when records are acknowledged or when errors occur.
428
+ *
429
+ * # Returns
430
+ *
431
+ * A Promise that resolves to a ZerobusStream ready for data ingestion.
432
+ *
433
+ * # Errors
434
+ *
435
+ * - Authentication failures (invalid credentials)
436
+ * - Invalid table name or insufficient permissions
437
+ * - Network connectivity issues
438
+ * - Schema validation errors
439
+ *
440
+ * # Examples
441
+ *
442
+ * OAuth authentication (default):
443
+ * ```typescript
444
+ * const stream = await sdk.createStream(
445
+ * { tableName: "catalog.schema.table" },
446
+ * "client-id",
447
+ * "client-secret"
448
+ * );
449
+ * ```
450
+ *
451
+ * Custom authentication with headers provider:
452
+ * ```typescript
453
+ * const headersProvider = {
454
+ * getHeadersCallback: async () => [
455
+ * ["authorization", `Bearer ${myToken}`],
456
+ * ["x-databricks-zerobus-table-name", tableName]
457
+ * ]
458
+ * };
459
+ * const stream = await sdk.createStream(
460
+ * { tableName: "catalog.schema.table" },
461
+ * "", // ignored
462
+ * "", // ignored
463
+ * undefined,
464
+ * headersProvider
465
+ * );
466
+ * ```
467
+ *
468
+ * With acknowledgment callbacks:
469
+ * ```typescript
470
+ * const ackCallback = {
471
+ * onAck: (offsetId) => console.log(`Record ${offsetId} acknowledged`),
472
+ * onError: (offsetId, error) => console.error(`Record ${offsetId} failed: ${error}`)
473
+ * };
474
+ * const stream = await sdk.createStream(
475
+ * { tableName: "catalog.schema.table" },
476
+ * "client-id",
477
+ * "client-secret",
478
+ * undefined,
479
+ * undefined,
480
+ * ackCallback
481
+ * );
482
+ * ```
483
+ */
484
+ createStream(tableProperties: TableProperties, clientId: string, clientSecret: string, options?: StreamConfigurationOptions | undefined | null, headersProvider?: JsHeadersProvider | undefined | null, ackCallback?: JsAckCallback | undefined | null): Promise<ZerobusStream>
485
+ /**
486
+ * Creates a new ingestion stream for local testing without authentication.
487
+ *
488
+ * **Warning**: This should only be used for local development/testing
489
+ * where the server does not require authentication.
490
+ *
491
+ * # Arguments
492
+ *
493
+ * * `table_properties` - Properties of the target table
494
+ * * `options` - Optional stream configuration
495
+ *
496
+ * # Returns
497
+ *
498
+ * A Promise that resolves to a ZerobusStream ready for data ingestion.
499
+ */
500
+ createStreamLocal(tableProperties: TableProperties, options?: StreamConfigurationOptions | undefined | null): object
501
+ /**
502
+ * Recreates a stream with the same configuration and re-ingests unacknowledged batches.
503
+ *
504
+ * This method is the recommended approach for recovering from stream failures. It:
505
+ * 1. Retrieves all unacknowledged batches from the failed stream
506
+ * 2. Creates a new stream with identical configuration
507
+ * 3. Re-ingests all unacknowledged batches in order
508
+ * 4. Returns the new stream ready for continued ingestion
509
+ *
510
+ * # Arguments
511
+ *
512
+ * * `stream` - The failed or closed stream to recreate
513
+ *
514
+ * # Returns
515
+ *
516
+ * A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
517
+ *
518
+ * # Errors
519
+ *
520
+ * - Failed to retrieve unacknowledged batches from the original stream
521
+ * - Authentication failures when creating the new stream
522
+ * - Network connectivity issues during re-ingestion
523
+ *
524
+ * # Examples
525
+ *
526
+ * ```typescript
527
+ * try {
528
+ * await stream.ingestRecords(batch);
529
+ * } catch (error) {
530
+ * await stream.close();
531
+ * // Recreate stream with all unacked batches re-ingested
532
+ * const newStream = await sdk.recreateStream(stream);
533
+ * // Continue ingesting with newStream
534
+ * }
535
+ * ```
536
+ */
537
+ recreateStream(stream: ZerobusStream): Promise<ZerobusStream>
538
+ }