npm - @databricks/zerobus-ingest-sdk - Versions diffs - 0.1.1 → 0.2.0 - Mend

@databricks/zerobus-ingest-sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/Cargo.lock +707 -205
package/Cargo.toml +18 -3
package/README.md +89 -69
package/index.d.ts +538 -0
package/index.js +318 -0
package/package.json +23 -13
package/schemas/air_quality_descriptor.pb +9 -0
package/schemas/edge_cases_descriptor.pb +41 -0
package/src/lib.rs +887 -38
package/zerobus-ingest-sdk.linux-arm64-gnu.node +0 -0
package/zerobus-ingest-sdk.linux-x64-gnu.node +0 -0
package/zerobus-ingest-sdk.win32-x64-msvc.node +0 -0

package/index.d.ts ADDED Viewed

@@ -0,0 +1,538 @@
+/* tslint:disable */
+/* eslint-disable */
+/* auto-generated by NAPI-RS */
+/**
+ * Record serialization format.
+ *
+ * Specifies how records should be encoded when ingested into the stream.
+ */
+export const enum RecordType {
+  /** JSON encoding - records are JSON-encoded strings */
+  Json = 0,
+  /** Protocol Buffers encoding - records are binary protobuf messages */
+  Proto = 1
+}
+/**
+ * Configuration options for the Zerobus stream.
+ *
+ * These options control stream behavior including recovery, timeouts, and inflight limits.
+ */
+export interface StreamConfigurationOptions {
+  /**
+   * Maximum number of unacknowledged requests that can be in flight.
+   * Default: 10,000
+   */
+  maxInflightRequests?: number
+  /**
+   * Enable automatic stream recovery on transient failures.
+   * Default: true
+   */
+  recovery?: boolean
+  /**
+   * Timeout for recovery operations in milliseconds.
+   * Default: 15,000 (15 seconds)
+   */
+  recoveryTimeoutMs?: number
+  /**
+   * Delay between recovery retry attempts in milliseconds.
+   * Default: 2,000 (2 seconds)
+   */
+  recoveryBackoffMs?: number
+  /**
+   * Maximum number of recovery attempts before giving up.
+   * Default: 4
+   */
+  recoveryRetries?: number
+  /**
+   * Timeout for flush operations in milliseconds.
+   * Default: 300,000 (5 minutes)
+   */
+  flushTimeoutMs?: number
+  /**
+   * Timeout waiting for server acknowledgments in milliseconds.
+   * Default: 60,000 (1 minute)
+   */
+  serverLackOfAckTimeoutMs?: number
+  /**
+   * Record serialization format.
+   * Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
+   * Default: RecordType.Proto (Protocol Buffers)
+   */
+  recordType?: number
+  /**
+   * Maximum time in milliseconds that acknowledgment callbacks may run after stream closure.
+   * - None (undefined): Wait indefinitely for callbacks to complete
+   * - Some(0): Don't wait for callbacks at all
+   * - Some(x): Wait up to x milliseconds for callbacks
+   * Default: None (wait indefinitely)
+   */
+  callbackMaxWaitTimeMs?: number
+  /**
+   * Maximum wait time during graceful stream close in milliseconds.
+   * When the server signals stream closure, this controls how long to wait
+   * for in-flight records to be acknowledged.
+   * - None (undefined): Wait for full server-specified duration
+   * - Some(0): Immediately trigger recovery without waiting
+   * - Some(x): Wait up to min(x, server_duration) milliseconds
+   * Default: None (wait for full server duration)
+   */
+  streamPausedMaxWaitTimeMs?: number
+}
+/**
+ * Properties of the target Delta table for ingestion.
+ *
+ * Specifies which Unity Catalog table to write to and optionally the schema descriptor
+ * for Protocol Buffers encoding.
+ */
+export interface TableProperties {
+  /** Full table name in Unity Catalog (e.g., "catalog.schema.table") */
+  tableName: string
+  /**
+   * Optional Protocol Buffer descriptor as a base64-encoded string.
+   * If not provided, JSON encoding will be used.
+   */
+  descriptorProto?: string
+}
+/**
+ * JavaScript headers provider callback wrapper.
+ *
+ * Allows TypeScript code to provide custom authentication headers
+ * by implementing a getHeaders() function.
+ */
+export interface JsHeadersProvider {
+  /** JavaScript function: () => Promise<Array<[string, string]>> */
+  getHeadersCallback: (...args: any[]) => any
+}
+/**
+ * JavaScript acknowledgment callback wrapper.
+ *
+ * Allows TypeScript code to receive notifications when records are acknowledged
+ * or when errors occur.
+ */
+export interface JsAckCallback {
+  /** JavaScript function called when a record is acknowledged: (offsetId: bigint) => void */
+  onAck?: (...args: any[]) => any
+  /** JavaScript function called when an error occurs: (offsetId: bigint, errorMessage: string) => void */
+  onError?: (...args: any[]) => any
+}
+/**
+ * Custom error type for Zerobus operations.
+ *
+ * This error type includes information about whether the error is retryable,
+ * which helps determine if automatic recovery can resolve the issue.
+ */
+export declare class ZerobusError {
+  /** Returns true if this error can be automatically retried by the SDK. */
+  get isRetryable(): boolean
+  /** Get the error message. */
+  get message(): string
+}
+/**
+ * A stream for ingesting data into a Databricks Delta table.
+ *
+ * The stream manages a bidirectional gRPC connection, handles acknowledgments,
+ * and provides automatic recovery on transient failures.
+ *
+ * # Example
+ *
+ * ```typescript
+ * const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
+ * const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
+ * const offset = await ackPromise;
+ * await stream.close();
+ * ```
+ */
+export declare class ZerobusStream {
+  /**
+   * Ingests a single record into the stream.
+   *
+   * **@deprecated** Use `ingestRecordOffset()` instead, which returns the offset directly
+   * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
+   *
+   * This method accepts either:
+   * - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
+   * - A JSON string
+   *
+   * This method BLOCKS until the record is sent to the SDK's internal landing zone,
+   * then returns a Promise for the server acknowledgment. This allows you to send
+   * many records immediately without waiting for acknowledgments:
+   *
+   * ```typescript
+   * let lastAckPromise;
+   * for (let i = 0; i < 1000; i++) {
+   *     // This call blocks until record is sent (in SDK)
+   *     lastAckPromise = stream.ingestRecord(record);
+   * }
+   * // All 1000 records are now in the SDK's internal queue
+   * // Wait for the last acknowledgment
+   * await lastAckPromise;
+   * // Flush to ensure all records are acknowledged
+   * await stream.flush();
+   * ```
+   *
+   * # Arguments
+   *
+   * * `payload` - The record data. Accepts:
+   *   - Buffer (low-level proto bytes)
+   *   - string (low-level JSON string)
+   *   - Protobuf message object with .encode() method (high-level, auto-serializes)
+   *   - Plain JavaScript object (high-level, auto-stringifies to JSON)
+   *
+   * # Returns
+   *
+   * A Promise that resolves to the offset ID when the server acknowledges the record.
+   */
+  ingestRecord(payload: unknown): Promise<bigint>
+  /**
+   * Ingests multiple records as a single atomic batch.
+   *
+   * **@deprecated** Use `ingestRecordsOffset()` instead, which returns the offset directly
+   * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
+   *
+   * This method accepts an array of records (Protocol Buffer buffers or JSON strings)
+   * and ingests them as a batch. The batch receives a single acknowledgment from
+   * the server with all-or-nothing semantics.
+   *
+   * Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
+   * internal landing zone, then returns a Promise for the server acknowledgment.
+   *
+   * # Arguments
+   *
+   * * `records` - Array of record data (Buffer for protobuf, string for JSON)
+   *
+   * # Returns
+   *
+   * Promise resolving to:
+   * - `bigint`: offset ID for non-empty batches
+   * - `null`: for empty batches
+   *
+   * # Example
+   *
+   * ```typescript
+   * const buffers = records.map(r => Buffer.from(encode(r)));
+   * const offsetId = await stream.ingestRecords(buffers);
+   *
+   * if (offsetId !== null) {
+   *   console.log('Batch acknowledged at offset:', offsetId);
+   * }
+   * ```
+   */
+  ingestRecords(records: Array<unknown>): Promise<bigint | null>
+  /**
+   * Ingests a single record and returns the offset ID directly after queuing.
+   *
+   * Unlike `ingestRecord()`, this method returns the offset ID immediately after
+   * the record is queued, without waiting for server acknowledgment. Use
+   * `waitForOffset()` to wait for acknowledgment when needed.
+   *
+   * This is the recommended API for high-throughput scenarios where you want to
+   * decouple record ingestion from acknowledgment tracking.
+   *
+   * # Arguments
+   *
+   * * `payload` - The record data (Buffer, string, protobuf message, or plain object)
+   *
+   * # Returns
+   *
+   * The offset ID (bigint) assigned to this record.
+   *
+   * # Example
+   *
+   * ```typescript
+   * const offset1 = await stream.ingestRecordOffset(record1);
+   * const offset2 = await stream.ingestRecordOffset(record2);
+   * // Wait for both to be acknowledged
+   * await stream.waitForOffset(offset2);
+   * ```
+   */
+  ingestRecordOffset(payload: unknown): Promise<bigint>
+  /**
+   * Ingests multiple records as a batch and returns the offset ID directly after queuing.
+   *
+   * Unlike `ingestRecords()`, this method returns the offset ID immediately after
+   * the batch is queued, without waiting for server acknowledgment. Use
+   * `waitForOffset()` to wait for acknowledgment when needed.
+   *
+   * # Arguments
+   *
+   * * `records` - Array of record data
+   *
+   * # Returns
+   *
+   * The offset ID (bigint) for the batch, or null for empty batches.
+   *
+   * # Example
+   *
+   * ```typescript
+   * const offset = await stream.ingestRecordsOffset(batch);
+   * if (offset !== null) {
+   *   await stream.waitForOffset(offset);
+   * }
+   * ```
+   */
+  ingestRecordsOffset(records: Array<unknown>): Promise<bigint | null>
+  /**
+   * Waits for a specific offset to be acknowledged by the server.
+   *
+   * Use this method with `ingestRecordOffset()` and `ingestRecordsOffset()` to
+   * selectively wait for acknowledgments. This allows you to ingest many records
+   * quickly and then wait only for specific offsets when needed.
+   *
+   * # Arguments
+   *
+   * * `offset_id` - The offset ID to wait for (returned by ingestRecordOffset/ingestRecordsOffset)
+   *
+   * # Errors
+   *
+   * - Timeout if acknowledgment takes too long
+   * - Server errors propagated immediately (no waiting for timeout)
+   *
+   * # Example
+   *
+   * ```typescript
+   * const offsets = [];
+   * for (const record of records) {
+   *   offsets.push(await stream.ingestRecordOffset(record));
+   * }
+   * // Wait for the last offset (implies all previous are also acknowledged)
+   * await stream.waitForOffset(offsets[offsets.length - 1]);
+   * ```
+   */
+  waitForOffset(offsetId: bigint): Promise<void>
+  /**
+   * Flushes all pending records and waits for acknowledgments.
+   *
+   * This method ensures all previously ingested records have been sent to the server
+   * and acknowledged. It's useful for checkpointing or ensuring data durability.
+   *
+   * # Errors
+   *
+   * - Timeout errors if flush takes longer than configured timeout
+   * - Network errors if the connection fails during flush
+   */
+  flush(): Promise<void>
+  /**
+   * Closes the stream gracefully.
+   *
+   * This method flushes all pending records, waits for acknowledgments, and then
+   * closes the underlying gRPC connection. Always call this method when done with
+   * the stream to ensure data integrity.
+   *
+   * # Errors
+   *
+   * - Returns an error if some records could not be acknowledged
+   * - Network errors during the close operation
+   */
+  close(): Promise<void>
+  /**
+   * Gets the list of unacknowledged records.
+   *
+   * This method should only be called after a stream failure to retrieve records
+   * that were sent but not acknowledged by the server. These records can be
+   * re-ingested into a new stream.
+   *
+   * # Returns
+   *
+   * An array of Buffers containing the unacknowledged record payloads.
+   */
+  getUnackedRecords(): Promise<Array<Buffer>>
+  /**
+   * Gets unacknowledged records grouped by their original batches.
+   *
+   * This preserves the batch structure from ingestion:
+   * - Each ingestRecord() call → 1-element batch
+   * - Each ingestRecords() call → N-element batch
+   *
+   * Should only be called after stream failure. All records returned as Buffers
+   * (JSON strings are converted to UTF-8 bytes).
+   *
+   * # Returns
+   *
+   * Array of batches, where each batch is an array of Buffers
+   *
+   * # Example
+   *
+   * ```typescript
+   * try {
+   *   await stream.ingestRecords(batch1);
+   *   await stream.ingestRecords(batch2);
+   * } catch (error) {
+   *   const unackedBatches = await stream.getUnackedBatches();
+   *
+   *   // Re-ingest with new stream
+   *   for (const batch of unackedBatches) {
+   *     await newStream.ingestRecords(batch);
+   *   }
+   * }
+   * ```
+   */
+  getUnackedBatches(): Promise<Array<Array<Buffer>>>
+}
+/**
+ * The main SDK for interacting with the Databricks Zerobus service.
+ *
+ * This is the entry point for creating ingestion streams to Delta tables.
+ *
+ * # Example
+ *
+ * ```typescript
+ * const sdk = new ZerobusSdk(
+ *   "https://workspace-id.zerobus.region.cloud.databricks.com",
+ *   "https://workspace.cloud.databricks.com"
+ * );
+ *
+ * const stream = await sdk.createStream(
+ *   { tableName: "catalog.schema.table" },
+ *   "client-id",
+ *   "client-secret"
+ * );
+ * ```
+ */
+export declare class ZerobusSdk {
+  /**
+   * Creates a new Zerobus SDK instance.
+   *
+   * # Arguments
+   *
+   * * `zerobus_endpoint` - The Zerobus API endpoint URL
+   *   (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
+   * * `unity_catalog_url` - The Unity Catalog endpoint URL
+   *   (e.g., "https://workspace.cloud.databricks.com")
+   *
+   * # Errors
+   *
+   * - Invalid endpoint URLs
+   * - Failed to extract workspace ID from the endpoint
+   */
+  constructor(zerobusEndpoint: string, unityCatalogUrl: string)
+  /**
+   * Creates a new ingestion stream to a Delta table.
+   *
+   * This method establishes a bidirectional gRPC connection to the Zerobus service
+   * and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
+   * authentication. For custom authentication (e.g., Personal Access Tokens), provide
+   * a custom headers_provider.
+   *
+   * # Arguments
+   *
+   * * `table_properties` - Properties of the target table including name and optional schema
+   * * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
+   * * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
+   * * `options` - Optional stream configuration (uses defaults if not provided)
+   * * `headers_provider` - Optional custom headers provider for authentication.
+   *   If not provided, uses OAuth with client_id and client_secret.
+   * * `ack_callback` - Optional callback for receiving acknowledgment notifications.
+   *   Called when records are acknowledged or when errors occur.
+   *
+   * # Returns
+   *
+   * A Promise that resolves to a ZerobusStream ready for data ingestion.
+   *
+   * # Errors
+   *
+   * - Authentication failures (invalid credentials)
+   * - Invalid table name or insufficient permissions
+   * - Network connectivity issues
+   * - Schema validation errors
+   *
+   * # Examples
+   *
+   * OAuth authentication (default):
+   * ```typescript
+   * const stream = await sdk.createStream(
+   *   { tableName: "catalog.schema.table" },
+   *   "client-id",
+   *   "client-secret"
+   * );
+   * ```
+   *
+   * Custom authentication with headers provider:
+   * ```typescript
+   * const headersProvider = {
+   *   getHeadersCallback: async () => [
+   *     ["authorization", `Bearer ${myToken}`],
+   *     ["x-databricks-zerobus-table-name", tableName]
+   *   ]
+   * };
+   * const stream = await sdk.createStream(
+   *   { tableName: "catalog.schema.table" },
+   *   "", // ignored
+   *   "", // ignored
+   *   undefined,
+   *   headersProvider
+   * );
+   * ```
+   *
+   * With acknowledgment callbacks:
+   * ```typescript
+   * const ackCallback = {
+   *   onAck: (offsetId) => console.log(`Record ${offsetId} acknowledged`),
+   *   onError: (offsetId, error) => console.error(`Record ${offsetId} failed: ${error}`)
+   * };
+   * const stream = await sdk.createStream(
+   *   { tableName: "catalog.schema.table" },
+   *   "client-id",
+   *   "client-secret",
+   *   undefined,
+   *   undefined,
+   *   ackCallback
+   * );
+   * ```
+   */
+  createStream(tableProperties: TableProperties, clientId: string, clientSecret: string, options?: StreamConfigurationOptions | undefined | null, headersProvider?: JsHeadersProvider | undefined | null, ackCallback?: JsAckCallback | undefined | null): Promise<ZerobusStream>
+  /**
+   * Creates a new ingestion stream for local testing without authentication.
+   *
+   * **Warning**: This should only be used for local development/testing
+   * where the server does not require authentication.
+   *
+   * # Arguments
+   *
+   * * `table_properties` - Properties of the target table
+   * * `options` - Optional stream configuration
+   *
+   * # Returns
+   *
+   * A Promise that resolves to a ZerobusStream ready for data ingestion.
+   */
+  createStreamLocal(tableProperties: TableProperties, options?: StreamConfigurationOptions | undefined | null): object
+  /**
+   * Recreates a stream with the same configuration and re-ingests unacknowledged batches.
+   *
+   * This method is the recommended approach for recovering from stream failures. It:
+   * 1. Retrieves all unacknowledged batches from the failed stream
+   * 2. Creates a new stream with identical configuration
+   * 3. Re-ingests all unacknowledged batches in order
+   * 4. Returns the new stream ready for continued ingestion
+   *
+   * # Arguments
+   *
+   * * `stream` - The failed or closed stream to recreate
+   *
+   * # Returns
+   *
+   * A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
+   *
+   * # Errors
+   *
+   * - Failed to retrieve unacknowledged batches from the original stream
+   * - Authentication failures when creating the new stream
+   * - Network connectivity issues during re-ingestion
+   *
+   * # Examples
+   *
+   * ```typescript
+   * try {
+   *   await stream.ingestRecords(batch);
+   * } catch (error) {
+   *   await stream.close();
+   *   // Recreate stream with all unacked batches re-ingested
+   *   const newStream = await sdk.recreateStream(stream);
+   *   // Continue ingesting with newStream
+   * }
+   * ```
+   */
+  recreateStream(stream: ZerobusStream): Promise<ZerobusStream>
+}