@databricks/zerobus-ingest-sdk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE DELETED
@@ -1,69 +0,0 @@
1
- Databricks License
2
- Copyright (2022) Databricks, Inc.
3
-
4
- Definitions.
5
-
6
- Agreement: The agreement between Databricks, Inc., and you governing
7
- the use of the Databricks Services, as that term is defined in
8
- the Master Cloud Services Agreement (MCSA) located at
9
- www.databricks.com/legal/mcsa.
10
-
11
- Licensed Materials: The source code, object code, data, and/or other
12
- works to which this license applies.
13
-
14
- Scope of Use. You may not use the Licensed Materials except in
15
- connection with your use of the Databricks Services pursuant to
16
- the Agreement. Your use of the Licensed Materials must comply at all
17
- times with any restrictions applicable to the Databricks Services,
18
- generally, and must be used in accordance with any applicable
19
- documentation. You may view, use, copy, modify, publish, and/or
20
- distribute the Licensed Materials solely for the purposes of using
21
- the Licensed Materials within or connecting to the Databricks Services.
22
- If you do not agree to these terms, you may not view, use, copy,
23
- modify, publish, and/or distribute the Licensed Materials.
24
-
25
- Redistribution. You may redistribute and sublicense the Licensed
26
- Materials so long as all use is in compliance with these terms.
27
- In addition:
28
-
29
- - You must give any other recipients a copy of this License;
30
- - You must cause any modified files to carry prominent notices
31
- stating that you changed the files;
32
- - You must retain, in any derivative works that you distribute,
33
- all copyright, patent, trademark, and attribution notices,
34
- excluding those notices that do not pertain to any part of
35
- the derivative works; and
36
- - If a "NOTICE" text file is provided as part of its
37
- distribution, then any derivative works that you distribute
38
- must include a readable copy of the attribution notices
39
- contained within such NOTICE file, excluding those notices
40
- that do not pertain to any part of the derivative works.
41
-
42
- You may add your own copyright statement to your modifications and may
43
- provide additional license terms and conditions for use, reproduction,
44
- or distribution of your modifications, or for any such derivative works
45
- as a whole, provided your use, reproduction, and distribution of
46
- the Licensed Materials otherwise complies with the conditions stated
47
- in this License.
48
-
49
- Termination. This license terminates automatically upon your breach of
50
- these terms or upon the termination of your Agreement. Additionally,
51
- Databricks may terminate this license at any time on notice. Upon
52
- termination, you must permanently delete the Licensed Materials and
53
- all copies thereof.
54
-
55
- DISCLAIMER; LIMITATION OF LIABILITY.
56
-
57
- THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS.
58
- DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY
59
- DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS
60
- AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES,
61
- CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR
62
- FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND
63
- ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF
64
- YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL
65
- BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS. IN NO EVENT SHALL
66
- THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
67
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
68
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR
69
- THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS.
package/index.d.ts DELETED
@@ -1,538 +0,0 @@
1
- /* tslint:disable */
2
- /* eslint-disable */
3
-
4
- /* auto-generated by NAPI-RS */
5
-
6
- /**
7
- * Record serialization format.
8
- *
9
- * Specifies how records should be encoded when ingested into the stream.
10
- */
11
- export const enum RecordType {
12
- /** JSON encoding - records are JSON-encoded strings */
13
- Json = 0,
14
- /** Protocol Buffers encoding - records are binary protobuf messages */
15
- Proto = 1
16
- }
17
- /**
18
- * Configuration options for the Zerobus stream.
19
- *
20
- * These options control stream behavior including recovery, timeouts, and inflight limits.
21
- */
22
- export interface StreamConfigurationOptions {
23
- /**
24
- * Maximum number of unacknowledged requests that can be in flight.
25
- * Default: 10,000
26
- */
27
- maxInflightRequests?: number
28
- /**
29
- * Enable automatic stream recovery on transient failures.
30
- * Default: true
31
- */
32
- recovery?: boolean
33
- /**
34
- * Timeout for recovery operations in milliseconds.
35
- * Default: 15,000 (15 seconds)
36
- */
37
- recoveryTimeoutMs?: number
38
- /**
39
- * Delay between recovery retry attempts in milliseconds.
40
- * Default: 2,000 (2 seconds)
41
- */
42
- recoveryBackoffMs?: number
43
- /**
44
- * Maximum number of recovery attempts before giving up.
45
- * Default: 4
46
- */
47
- recoveryRetries?: number
48
- /**
49
- * Timeout for flush operations in milliseconds.
50
- * Default: 300,000 (5 minutes)
51
- */
52
- flushTimeoutMs?: number
53
- /**
54
- * Timeout waiting for server acknowledgments in milliseconds.
55
- * Default: 60,000 (1 minute)
56
- */
57
- serverLackOfAckTimeoutMs?: number
58
- /**
59
- * Record serialization format.
60
- * Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
61
- * Default: RecordType.Proto (Protocol Buffers)
62
- */
63
- recordType?: number
64
- /**
65
- * Maximum time in milliseconds that acknowledgment callbacks may run after stream closure.
66
- * - None (undefined): Wait indefinitely for callbacks to complete
67
- * - Some(0): Don't wait for callbacks at all
68
- * - Some(x): Wait up to x milliseconds for callbacks
69
- * Default: None (wait indefinitely)
70
- */
71
- callbackMaxWaitTimeMs?: number
72
- /**
73
- * Maximum wait time during graceful stream close in milliseconds.
74
- * When the server signals stream closure, this controls how long to wait
75
- * for in-flight records to be acknowledged.
76
- * - None (undefined): Wait for full server-specified duration
77
- * - Some(0): Immediately trigger recovery without waiting
78
- * - Some(x): Wait up to min(x, server_duration) milliseconds
79
- * Default: None (wait for full server duration)
80
- */
81
- streamPausedMaxWaitTimeMs?: number
82
- }
83
- /**
84
- * Properties of the target Delta table for ingestion.
85
- *
86
- * Specifies which Unity Catalog table to write to and optionally the schema descriptor
87
- * for Protocol Buffers encoding.
88
- */
89
- export interface TableProperties {
90
- /** Full table name in Unity Catalog (e.g., "catalog.schema.table") */
91
- tableName: string
92
- /**
93
- * Optional Protocol Buffer descriptor as a base64-encoded string.
94
- * If not provided, JSON encoding will be used.
95
- */
96
- descriptorProto?: string
97
- }
98
- /**
99
- * JavaScript headers provider callback wrapper.
100
- *
101
- * Allows TypeScript code to provide custom authentication headers
102
- * by implementing a getHeaders() function.
103
- */
104
- export interface JsHeadersProvider {
105
- /** JavaScript function: () => Promise<Array<[string, string]>> */
106
- getHeadersCallback: (...args: any[]) => any
107
- }
108
- /**
109
- * JavaScript acknowledgment callback wrapper.
110
- *
111
- * Allows TypeScript code to receive notifications when records are acknowledged
112
- * or when errors occur.
113
- */
114
- export interface JsAckCallback {
115
- /** JavaScript function called when a record is acknowledged: (offsetId: bigint) => void */
116
- onAck?: (...args: any[]) => any
117
- /** JavaScript function called when an error occurs: (offsetId: bigint, errorMessage: string) => void */
118
- onError?: (...args: any[]) => any
119
- }
120
- /**
121
- * Custom error type for Zerobus operations.
122
- *
123
- * This error type includes information about whether the error is retryable,
124
- * which helps determine if automatic recovery can resolve the issue.
125
- */
126
- export declare class ZerobusError {
127
- /** Returns true if this error can be automatically retried by the SDK. */
128
- get isRetryable(): boolean
129
- /** Get the error message. */
130
- get message(): string
131
- }
132
- /**
133
- * A stream for ingesting data into a Databricks Delta table.
134
- *
135
- * The stream manages a bidirectional gRPC connection, handles acknowledgments,
136
- * and provides automatic recovery on transient failures.
137
- *
138
- * # Example
139
- *
140
- * ```typescript
141
- * const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
142
- * const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
143
- * const offset = await ackPromise;
144
- * await stream.close();
145
- * ```
146
- */
147
- export declare class ZerobusStream {
148
- /**
149
- * Ingests a single record into the stream.
150
- *
151
- * **@deprecated** Use `ingestRecordOffset()` instead, which returns the offset directly
152
- * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
153
- *
154
- * This method accepts either:
155
- * - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
156
- * - A JSON string
157
- *
158
- * This method BLOCKS until the record is sent to the SDK's internal landing zone,
159
- * then returns a Promise for the server acknowledgment. This allows you to send
160
- * many records immediately without waiting for acknowledgments:
161
- *
162
- * ```typescript
163
- * let lastAckPromise;
164
- * for (let i = 0; i < 1000; i++) {
165
- * // This call blocks until record is sent (in SDK)
166
- * lastAckPromise = stream.ingestRecord(record);
167
- * }
168
- * // All 1000 records are now in the SDK's internal queue
169
- * // Wait for the last acknowledgment
170
- * await lastAckPromise;
171
- * // Flush to ensure all records are acknowledged
172
- * await stream.flush();
173
- * ```
174
- *
175
- * # Arguments
176
- *
177
- * * `payload` - The record data. Accepts:
178
- * - Buffer (low-level proto bytes)
179
- * - string (low-level JSON string)
180
- * - Protobuf message object with .encode() method (high-level, auto-serializes)
181
- * - Plain JavaScript object (high-level, auto-stringifies to JSON)
182
- *
183
- * # Returns
184
- *
185
- * A Promise that resolves to the offset ID when the server acknowledges the record.
186
- */
187
- ingestRecord(payload: unknown): Promise<bigint>
188
- /**
189
- * Ingests multiple records as a single atomic batch.
190
- *
191
- * **@deprecated** Use `ingestRecordsOffset()` instead, which returns the offset directly
192
- * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
193
- *
194
- * This method accepts an array of records (Protocol Buffer buffers or JSON strings)
195
- * and ingests them as a batch. The batch receives a single acknowledgment from
196
- * the server with all-or-nothing semantics.
197
- *
198
- * Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
199
- * internal landing zone, then returns a Promise for the server acknowledgment.
200
- *
201
- * # Arguments
202
- *
203
- * * `records` - Array of record data (Buffer for protobuf, string for JSON)
204
- *
205
- * # Returns
206
- *
207
- * Promise resolving to:
208
- * - `bigint`: offset ID for non-empty batches
209
- * - `null`: for empty batches
210
- *
211
- * # Example
212
- *
213
- * ```typescript
214
- * const buffers = records.map(r => Buffer.from(encode(r)));
215
- * const offsetId = await stream.ingestRecords(buffers);
216
- *
217
- * if (offsetId !== null) {
218
- * console.log('Batch acknowledged at offset:', offsetId);
219
- * }
220
- * ```
221
- */
222
- ingestRecords(records: Array<unknown>): Promise<bigint | null>
223
- /**
224
- * Ingests a single record and returns the offset ID directly after queuing.
225
- *
226
- * Unlike `ingestRecord()`, this method returns the offset ID immediately after
227
- * the record is queued, without waiting for server acknowledgment. Use
228
- * `waitForOffset()` to wait for acknowledgment when needed.
229
- *
230
- * This is the recommended API for high-throughput scenarios where you want to
231
- * decouple record ingestion from acknowledgment tracking.
232
- *
233
- * # Arguments
234
- *
235
- * * `payload` - The record data (Buffer, string, protobuf message, or plain object)
236
- *
237
- * # Returns
238
- *
239
- * The offset ID (bigint) assigned to this record.
240
- *
241
- * # Example
242
- *
243
- * ```typescript
244
- * const offset1 = await stream.ingestRecordOffset(record1);
245
- * const offset2 = await stream.ingestRecordOffset(record2);
246
- * // Wait for both to be acknowledged
247
- * await stream.waitForOffset(offset2);
248
- * ```
249
- */
250
- ingestRecordOffset(payload: unknown): Promise<bigint>
251
- /**
252
- * Ingests multiple records as a batch and returns the offset ID directly after queuing.
253
- *
254
- * Unlike `ingestRecords()`, this method returns the offset ID immediately after
255
- * the batch is queued, without waiting for server acknowledgment. Use
256
- * `waitForOffset()` to wait for acknowledgment when needed.
257
- *
258
- * # Arguments
259
- *
260
- * * `records` - Array of record data
261
- *
262
- * # Returns
263
- *
264
- * The offset ID (bigint) for the batch, or null for empty batches.
265
- *
266
- * # Example
267
- *
268
- * ```typescript
269
- * const offset = await stream.ingestRecordsOffset(batch);
270
- * if (offset !== null) {
271
- * await stream.waitForOffset(offset);
272
- * }
273
- * ```
274
- */
275
- ingestRecordsOffset(records: Array<unknown>): Promise<bigint | null>
276
- /**
277
- * Waits for a specific offset to be acknowledged by the server.
278
- *
279
- * Use this method with `ingestRecordOffset()` and `ingestRecordsOffset()` to
280
- * selectively wait for acknowledgments. This allows you to ingest many records
281
- * quickly and then wait only for specific offsets when needed.
282
- *
283
- * # Arguments
284
- *
285
- * * `offset_id` - The offset ID to wait for (returned by ingestRecordOffset/ingestRecordsOffset)
286
- *
287
- * # Errors
288
- *
289
- * - Timeout if acknowledgment takes too long
290
- * - Server errors propagated immediately (no waiting for timeout)
291
- *
292
- * # Example
293
- *
294
- * ```typescript
295
- * const offsets = [];
296
- * for (const record of records) {
297
- * offsets.push(await stream.ingestRecordOffset(record));
298
- * }
299
- * // Wait for the last offset (implies all previous are also acknowledged)
300
- * await stream.waitForOffset(offsets[offsets.length - 1]);
301
- * ```
302
- */
303
- waitForOffset(offsetId: bigint): Promise<void>
304
- /**
305
- * Flushes all pending records and waits for acknowledgments.
306
- *
307
- * This method ensures all previously ingested records have been sent to the server
308
- * and acknowledged. It's useful for checkpointing or ensuring data durability.
309
- *
310
- * # Errors
311
- *
312
- * - Timeout errors if flush takes longer than configured timeout
313
- * - Network errors if the connection fails during flush
314
- */
315
- flush(): Promise<void>
316
- /**
317
- * Closes the stream gracefully.
318
- *
319
- * This method flushes all pending records, waits for acknowledgments, and then
320
- * closes the underlying gRPC connection. Always call this method when done with
321
- * the stream to ensure data integrity.
322
- *
323
- * # Errors
324
- *
325
- * - Returns an error if some records could not be acknowledged
326
- * - Network errors during the close operation
327
- */
328
- close(): Promise<void>
329
- /**
330
- * Gets the list of unacknowledged records.
331
- *
332
- * This method should only be called after a stream failure to retrieve records
333
- * that were sent but not acknowledged by the server. These records can be
334
- * re-ingested into a new stream.
335
- *
336
- * # Returns
337
- *
338
- * An array of Buffers containing the unacknowledged record payloads.
339
- */
340
- getUnackedRecords(): Promise<Array<Buffer>>
341
- /**
342
- * Gets unacknowledged records grouped by their original batches.
343
- *
344
- * This preserves the batch structure from ingestion:
345
- * - Each ingestRecord() call → 1-element batch
346
- * - Each ingestRecords() call → N-element batch
347
- *
348
- * Should only be called after stream failure. All records returned as Buffers
349
- * (JSON strings are converted to UTF-8 bytes).
350
- *
351
- * # Returns
352
- *
353
- * Array of batches, where each batch is an array of Buffers
354
- *
355
- * # Example
356
- *
357
- * ```typescript
358
- * try {
359
- * await stream.ingestRecords(batch1);
360
- * await stream.ingestRecords(batch2);
361
- * } catch (error) {
362
- * const unackedBatches = await stream.getUnackedBatches();
363
- *
364
- * // Re-ingest with new stream
365
- * for (const batch of unackedBatches) {
366
- * await newStream.ingestRecords(batch);
367
- * }
368
- * }
369
- * ```
370
- */
371
- getUnackedBatches(): Promise<Array<Array<Buffer>>>
372
- }
373
- /**
374
- * The main SDK for interacting with the Databricks Zerobus service.
375
- *
376
- * This is the entry point for creating ingestion streams to Delta tables.
377
- *
378
- * # Example
379
- *
380
- * ```typescript
381
- * const sdk = new ZerobusSdk(
382
- * "https://workspace-id.zerobus.region.cloud.databricks.com",
383
- * "https://workspace.cloud.databricks.com"
384
- * );
385
- *
386
- * const stream = await sdk.createStream(
387
- * { tableName: "catalog.schema.table" },
388
- * "client-id",
389
- * "client-secret"
390
- * );
391
- * ```
392
- */
393
- export declare class ZerobusSdk {
394
- /**
395
- * Creates a new Zerobus SDK instance.
396
- *
397
- * # Arguments
398
- *
399
- * * `zerobus_endpoint` - The Zerobus API endpoint URL
400
- * (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
401
- * * `unity_catalog_url` - The Unity Catalog endpoint URL
402
- * (e.g., "https://workspace.cloud.databricks.com")
403
- *
404
- * # Errors
405
- *
406
- * - Invalid endpoint URLs
407
- * - Failed to extract workspace ID from the endpoint
408
- */
409
- constructor(zerobusEndpoint: string, unityCatalogUrl: string)
410
- /**
411
- * Creates a new ingestion stream to a Delta table.
412
- *
413
- * This method establishes a bidirectional gRPC connection to the Zerobus service
414
- * and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
415
- * authentication. For custom authentication (e.g., Personal Access Tokens), provide
416
- * a custom headers_provider.
417
- *
418
- * # Arguments
419
- *
420
- * * `table_properties` - Properties of the target table including name and optional schema
421
- * * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
422
- * * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
423
- * * `options` - Optional stream configuration (uses defaults if not provided)
424
- * * `headers_provider` - Optional custom headers provider for authentication.
425
- * If not provided, uses OAuth with client_id and client_secret.
426
- * * `ack_callback` - Optional callback for receiving acknowledgment notifications.
427
- * Called when records are acknowledged or when errors occur.
428
- *
429
- * # Returns
430
- *
431
- * A Promise that resolves to a ZerobusStream ready for data ingestion.
432
- *
433
- * # Errors
434
- *
435
- * - Authentication failures (invalid credentials)
436
- * - Invalid table name or insufficient permissions
437
- * - Network connectivity issues
438
- * - Schema validation errors
439
- *
440
- * # Examples
441
- *
442
- * OAuth authentication (default):
443
- * ```typescript
444
- * const stream = await sdk.createStream(
445
- * { tableName: "catalog.schema.table" },
446
- * "client-id",
447
- * "client-secret"
448
- * );
449
- * ```
450
- *
451
- * Custom authentication with headers provider:
452
- * ```typescript
453
- * const headersProvider = {
454
- * getHeadersCallback: async () => [
455
- * ["authorization", `Bearer ${myToken}`],
456
- * ["x-databricks-zerobus-table-name", tableName]
457
- * ]
458
- * };
459
- * const stream = await sdk.createStream(
460
- * { tableName: "catalog.schema.table" },
461
- * "", // ignored
462
- * "", // ignored
463
- * undefined,
464
- * headersProvider
465
- * );
466
- * ```
467
- *
468
- * With acknowledgment callbacks:
469
- * ```typescript
470
- * const ackCallback = {
471
- * onAck: (offsetId) => console.log(`Record ${offsetId} acknowledged`),
472
- * onError: (offsetId, error) => console.error(`Record ${offsetId} failed: ${error}`)
473
- * };
474
- * const stream = await sdk.createStream(
475
- * { tableName: "catalog.schema.table" },
476
- * "client-id",
477
- * "client-secret",
478
- * undefined,
479
- * undefined,
480
- * ackCallback
481
- * );
482
- * ```
483
- */
484
- createStream(tableProperties: TableProperties, clientId: string, clientSecret: string, options?: StreamConfigurationOptions | undefined | null, headersProvider?: JsHeadersProvider | undefined | null, ackCallback?: JsAckCallback | undefined | null): Promise<ZerobusStream>
485
- /**
486
- * Creates a new ingestion stream for local testing without authentication.
487
- *
488
- * **Warning**: This should only be used for local development/testing
489
- * where the server does not require authentication.
490
- *
491
- * # Arguments
492
- *
493
- * * `table_properties` - Properties of the target table
494
- * * `options` - Optional stream configuration
495
- *
496
- * # Returns
497
- *
498
- * A Promise that resolves to a ZerobusStream ready for data ingestion.
499
- */
500
- createStreamLocal(tableProperties: TableProperties, options?: StreamConfigurationOptions | undefined | null): object
501
- /**
502
- * Recreates a stream with the same configuration and re-ingests unacknowledged batches.
503
- *
504
- * This method is the recommended approach for recovering from stream failures. It:
505
- * 1. Retrieves all unacknowledged batches from the failed stream
506
- * 2. Creates a new stream with identical configuration
507
- * 3. Re-ingests all unacknowledged batches in order
508
- * 4. Returns the new stream ready for continued ingestion
509
- *
510
- * # Arguments
511
- *
512
- * * `stream` - The failed or closed stream to recreate
513
- *
514
- * # Returns
515
- *
516
- * A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
517
- *
518
- * # Errors
519
- *
520
- * - Failed to retrieve unacknowledged batches from the original stream
521
- * - Authentication failures when creating the new stream
522
- * - Network connectivity issues during re-ingestion
523
- *
524
- * # Examples
525
- *
526
- * ```typescript
527
- * try {
528
- * await stream.ingestRecords(batch);
529
- * } catch (error) {
530
- * await stream.close();
531
- * // Recreate stream with all unacked batches re-ingested
532
- * const newStream = await sdk.recreateStream(stream);
533
- * // Continue ingesting with newStream
534
- * }
535
- * ```
536
- */
537
- recreateStream(stream: ZerobusStream): Promise<ZerobusStream>
538
- }