npm - @databricks/zerobus-ingest-sdk - Versions diffs - 0.2.0 → 0.3.0 - Mend

@databricks/zerobus-ingest-sdk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/Cargo.toml +3 -3
package/README.md +22 -82
package/package.json +13 -14
package/src/lib.rs +4 -1
package/zerobus-ingest-sdk.linux-arm64-gnu.node +0 -0
package/zerobus-ingest-sdk.linux-x64-gnu.node +0 -0
package/zerobus-ingest-sdk.win32-x64-msvc.node +0 -0
package/Cargo.lock +0 -2735
package/LICENSE +0 -69
package/index.d.ts +0 -538
package/index.js +0 -318
package/schemas/air_quality_descriptor.pb +0 -9
package/schemas/edge_cases_descriptor.pb +0 -41

package/LICENSE DELETED Viewed

@@ -1,69 +0,0 @@
-                               Databricks License
-                        Copyright (2022) Databricks, Inc.
-    Definitions.
-    Agreement: The agreement between Databricks, Inc., and you governing
-    the use of the Databricks Services, as that term is defined in
-    the Master Cloud Services Agreement (MCSA) located at
-    www.databricks.com/legal/mcsa.
-    Licensed Materials: The source code, object code, data, and/or other
-    works to which this license applies.
-    Scope of Use. You may not use the Licensed Materials except in
-    connection with your use of the Databricks Services pursuant to
-    the Agreement. Your use of the Licensed Materials must comply at all
-    times with any restrictions applicable to the Databricks Services,
-    generally, and must be used in accordance with any applicable
-    documentation. You may view, use, copy, modify, publish, and/or
-    distribute the Licensed Materials solely for the purposes of using
-    the Licensed Materials within or connecting to the Databricks Services.
-    If you do not agree to these terms, you may not view, use, copy,
-    modify, publish, and/or distribute the Licensed Materials.
-    Redistribution. You may redistribute and sublicense the Licensed
-    Materials so long as all use is in compliance with these terms.
-    In addition:
-        -   You must give any other recipients a copy of this License;
-        -   You must cause any modified files to carry prominent notices
-            stating that you changed the files;
-        -   You must retain, in any derivative works that you distribute,
-            all copyright, patent, trademark, and attribution notices,
-            excluding those notices that do not pertain to any part of
-            the derivative works; and
-        -   If a "NOTICE" text file is provided as part of its
-            distribution, then any derivative works that you distribute
-            must include a readable copy of the attribution notices
-            contained within such NOTICE file, excluding those notices
-            that do not pertain to any part of the derivative works.
-    You may add your own copyright statement to your modifications and may
-    provide additional license terms and conditions for use, reproduction,
-    or distribution of your modifications, or for any such derivative works
-    as a whole, provided your use, reproduction, and distribution of
-    the Licensed Materials otherwise complies with the conditions stated
-    in this License.
-    Termination. This license terminates automatically upon your breach of
-    these terms or upon the termination of your Agreement. Additionally,
-    Databricks may terminate this license at any time on notice. Upon
-    termination, you must permanently delete the Licensed Materials and
-    all copies thereof.
-    DISCLAIMER; LIMITATION OF LIABILITY.
-    THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS.
-    DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY
-    DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS
-    AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES,
-    CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR
-    FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND
-    ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF
-    YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL
-    BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS.  IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-    ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR
-    THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS.

package/index.d.ts DELETED Viewed

@@ -1,538 +0,0 @@
-/* tslint:disable */
-/* eslint-disable */
-/* auto-generated by NAPI-RS */
-/**
- * Record serialization format.
- *
- * Specifies how records should be encoded when ingested into the stream.
- */
-export const enum RecordType {
-  /** JSON encoding - records are JSON-encoded strings */
-  Json = 0,
-  /** Protocol Buffers encoding - records are binary protobuf messages */
-  Proto = 1
-}
-/**
- * Configuration options for the Zerobus stream.
- *
- * These options control stream behavior including recovery, timeouts, and inflight limits.
- */
-export interface StreamConfigurationOptions {
-  /**
-   * Maximum number of unacknowledged requests that can be in flight.
-   * Default: 10,000
-   */
-  maxInflightRequests?: number
-  /**
-   * Enable automatic stream recovery on transient failures.
-   * Default: true
-   */
-  recovery?: boolean
-  /**
-   * Timeout for recovery operations in milliseconds.
-   * Default: 15,000 (15 seconds)
-   */
-  recoveryTimeoutMs?: number
-  /**
-   * Delay between recovery retry attempts in milliseconds.
-   * Default: 2,000 (2 seconds)
-   */
-  recoveryBackoffMs?: number
-  /**
-   * Maximum number of recovery attempts before giving up.
-   * Default: 4
-   */
-  recoveryRetries?: number
-  /**
-   * Timeout for flush operations in milliseconds.
-   * Default: 300,000 (5 minutes)
-   */
-  flushTimeoutMs?: number
-  /**
-   * Timeout waiting for server acknowledgments in milliseconds.
-   * Default: 60,000 (1 minute)
-   */
-  serverLackOfAckTimeoutMs?: number
-  /**
-   * Record serialization format.
-   * Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
-   * Default: RecordType.Proto (Protocol Buffers)
-   */
-  recordType?: number
-  /**
-   * Maximum time in milliseconds that acknowledgment callbacks may run after stream closure.
-   * - None (undefined): Wait indefinitely for callbacks to complete
-   * - Some(0): Don't wait for callbacks at all
-   * - Some(x): Wait up to x milliseconds for callbacks
-   * Default: None (wait indefinitely)
-   */
-  callbackMaxWaitTimeMs?: number
-  /**
-   * Maximum wait time during graceful stream close in milliseconds.
-   * When the server signals stream closure, this controls how long to wait
-   * for in-flight records to be acknowledged.
-   * - None (undefined): Wait for full server-specified duration
-   * - Some(0): Immediately trigger recovery without waiting
-   * - Some(x): Wait up to min(x, server_duration) milliseconds
-   * Default: None (wait for full server duration)
-   */
-  streamPausedMaxWaitTimeMs?: number
-}
-/**
- * Properties of the target Delta table for ingestion.
- *
- * Specifies which Unity Catalog table to write to and optionally the schema descriptor
- * for Protocol Buffers encoding.
- */
-export interface TableProperties {
-  /** Full table name in Unity Catalog (e.g., "catalog.schema.table") */
-  tableName: string
-  /**
-   * Optional Protocol Buffer descriptor as a base64-encoded string.
-   * If not provided, JSON encoding will be used.
-   */
-  descriptorProto?: string
-}
-/**
- * JavaScript headers provider callback wrapper.
- *
- * Allows TypeScript code to provide custom authentication headers
- * by implementing a getHeaders() function.
- */
-export interface JsHeadersProvider {
-  /** JavaScript function: () => Promise<Array<[string, string]>> */
-  getHeadersCallback: (...args: any[]) => any
-}
-/**
- * JavaScript acknowledgment callback wrapper.
- *
- * Allows TypeScript code to receive notifications when records are acknowledged
- * or when errors occur.
- */
-export interface JsAckCallback {
-  /** JavaScript function called when a record is acknowledged: (offsetId: bigint) => void */
-  onAck?: (...args: any[]) => any
-  /** JavaScript function called when an error occurs: (offsetId: bigint, errorMessage: string) => void */
-  onError?: (...args: any[]) => any
-}
-/**
- * Custom error type for Zerobus operations.
- *
- * This error type includes information about whether the error is retryable,
- * which helps determine if automatic recovery can resolve the issue.
- */
-export declare class ZerobusError {
-  /** Returns true if this error can be automatically retried by the SDK. */
-  get isRetryable(): boolean
-  /** Get the error message. */
-  get message(): string
-}
-/**
- * A stream for ingesting data into a Databricks Delta table.
- *
- * The stream manages a bidirectional gRPC connection, handles acknowledgments,
- * and provides automatic recovery on transient failures.
- *
- * # Example
- *
- * ```typescript
- * const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
- * const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
- * const offset = await ackPromise;
- * await stream.close();
- * ```
- */
-export declare class ZerobusStream {
-  /**
-   * Ingests a single record into the stream.
-   *
-   * **@deprecated** Use `ingestRecordOffset()` instead, which returns the offset directly
-   * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
-   *
-   * This method accepts either:
-   * - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
-   * - A JSON string
-   *
-   * This method BLOCKS until the record is sent to the SDK's internal landing zone,
-   * then returns a Promise for the server acknowledgment. This allows you to send
-   * many records immediately without waiting for acknowledgments:
-   *
-   * ```typescript
-   * let lastAckPromise;
-   * for (let i = 0; i < 1000; i++) {
-   *     // This call blocks until record is sent (in SDK)
-   *     lastAckPromise = stream.ingestRecord(record);
-   * }
-   * // All 1000 records are now in the SDK's internal queue
-   * // Wait for the last acknowledgment
-   * await lastAckPromise;
-   * // Flush to ensure all records are acknowledged
-   * await stream.flush();
-   * ```
-   *
-   * # Arguments
-   *
-   * * `payload` - The record data. Accepts:
-   *   - Buffer (low-level proto bytes)
-   *   - string (low-level JSON string)
-   *   - Protobuf message object with .encode() method (high-level, auto-serializes)
-   *   - Plain JavaScript object (high-level, auto-stringifies to JSON)
-   *
-   * # Returns
-   *
-   * A Promise that resolves to the offset ID when the server acknowledges the record.
-   */
-  ingestRecord(payload: unknown): Promise<bigint>
-  /**
-   * Ingests multiple records as a single atomic batch.
-   *
-   * **@deprecated** Use `ingestRecordsOffset()` instead, which returns the offset directly
-   * after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
-   *
-   * This method accepts an array of records (Protocol Buffer buffers or JSON strings)
-   * and ingests them as a batch. The batch receives a single acknowledgment from
-   * the server with all-or-nothing semantics.
-   *
-   * Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
-   * internal landing zone, then returns a Promise for the server acknowledgment.
-   *
-   * # Arguments
-   *
-   * * `records` - Array of record data (Buffer for protobuf, string for JSON)
-   *
-   * # Returns
-   *
-   * Promise resolving to:
-   * - `bigint`: offset ID for non-empty batches
-   * - `null`: for empty batches
-   *
-   * # Example
-   *
-   * ```typescript
-   * const buffers = records.map(r => Buffer.from(encode(r)));
-   * const offsetId = await stream.ingestRecords(buffers);
-   *
-   * if (offsetId !== null) {
-   *   console.log('Batch acknowledged at offset:', offsetId);
-   * }
-   * ```
-   */
-  ingestRecords(records: Array<unknown>): Promise<bigint | null>
-  /**
-   * Ingests a single record and returns the offset ID directly after queuing.
-   *
-   * Unlike `ingestRecord()`, this method returns the offset ID immediately after
-   * the record is queued, without waiting for server acknowledgment. Use
-   * `waitForOffset()` to wait for acknowledgment when needed.
-   *
-   * This is the recommended API for high-throughput scenarios where you want to
-   * decouple record ingestion from acknowledgment tracking.
-   *
-   * # Arguments
-   *
-   * * `payload` - The record data (Buffer, string, protobuf message, or plain object)
-   *
-   * # Returns
-   *
-   * The offset ID (bigint) assigned to this record.
-   *
-   * # Example
-   *
-   * ```typescript
-   * const offset1 = await stream.ingestRecordOffset(record1);
-   * const offset2 = await stream.ingestRecordOffset(record2);
-   * // Wait for both to be acknowledged
-   * await stream.waitForOffset(offset2);
-   * ```
-   */
-  ingestRecordOffset(payload: unknown): Promise<bigint>
-  /**
-   * Ingests multiple records as a batch and returns the offset ID directly after queuing.
-   *
-   * Unlike `ingestRecords()`, this method returns the offset ID immediately after
-   * the batch is queued, without waiting for server acknowledgment. Use
-   * `waitForOffset()` to wait for acknowledgment when needed.
-   *
-   * # Arguments
-   *
-   * * `records` - Array of record data
-   *
-   * # Returns
-   *
-   * The offset ID (bigint) for the batch, or null for empty batches.
-   *
-   * # Example
-   *
-   * ```typescript
-   * const offset = await stream.ingestRecordsOffset(batch);
-   * if (offset !== null) {
-   *   await stream.waitForOffset(offset);
-   * }
-   * ```
-   */
-  ingestRecordsOffset(records: Array<unknown>): Promise<bigint | null>
-  /**
-   * Waits for a specific offset to be acknowledged by the server.
-   *
-   * Use this method with `ingestRecordOffset()` and `ingestRecordsOffset()` to
-   * selectively wait for acknowledgments. This allows you to ingest many records
-   * quickly and then wait only for specific offsets when needed.
-   *
-   * # Arguments
-   *
-   * * `offset_id` - The offset ID to wait for (returned by ingestRecordOffset/ingestRecordsOffset)
-   *
-   * # Errors
-   *
-   * - Timeout if acknowledgment takes too long
-   * - Server errors propagated immediately (no waiting for timeout)
-   *
-   * # Example
-   *
-   * ```typescript
-   * const offsets = [];
-   * for (const record of records) {
-   *   offsets.push(await stream.ingestRecordOffset(record));
-   * }
-   * // Wait for the last offset (implies all previous are also acknowledged)
-   * await stream.waitForOffset(offsets[offsets.length - 1]);
-   * ```
-   */
-  waitForOffset(offsetId: bigint): Promise<void>
-  /**
-   * Flushes all pending records and waits for acknowledgments.
-   *
-   * This method ensures all previously ingested records have been sent to the server
-   * and acknowledged. It's useful for checkpointing or ensuring data durability.
-   *
-   * # Errors
-   *
-   * - Timeout errors if flush takes longer than configured timeout
-   * - Network errors if the connection fails during flush
-   */
-  flush(): Promise<void>
-  /**
-   * Closes the stream gracefully.
-   *
-   * This method flushes all pending records, waits for acknowledgments, and then
-   * closes the underlying gRPC connection. Always call this method when done with
-   * the stream to ensure data integrity.
-   *
-   * # Errors
-   *
-   * - Returns an error if some records could not be acknowledged
-   * - Network errors during the close operation
-   */
-  close(): Promise<void>
-  /**
-   * Gets the list of unacknowledged records.
-   *
-   * This method should only be called after a stream failure to retrieve records
-   * that were sent but not acknowledged by the server. These records can be
-   * re-ingested into a new stream.
-   *
-   * # Returns
-   *
-   * An array of Buffers containing the unacknowledged record payloads.
-   */
-  getUnackedRecords(): Promise<Array<Buffer>>
-  /**
-   * Gets unacknowledged records grouped by their original batches.
-   *
-   * This preserves the batch structure from ingestion:
-   * - Each ingestRecord() call → 1-element batch
-   * - Each ingestRecords() call → N-element batch
-   *
-   * Should only be called after stream failure. All records returned as Buffers
-   * (JSON strings are converted to UTF-8 bytes).
-   *
-   * # Returns
-   *
-   * Array of batches, where each batch is an array of Buffers
-   *
-   * # Example
-   *
-   * ```typescript
-   * try {
-   *   await stream.ingestRecords(batch1);
-   *   await stream.ingestRecords(batch2);
-   * } catch (error) {
-   *   const unackedBatches = await stream.getUnackedBatches();
-   *
-   *   // Re-ingest with new stream
-   *   for (const batch of unackedBatches) {
-   *     await newStream.ingestRecords(batch);
-   *   }
-   * }
-   * ```
-   */
-  getUnackedBatches(): Promise<Array<Array<Buffer>>>
-}
-/**
- * The main SDK for interacting with the Databricks Zerobus service.
- *
- * This is the entry point for creating ingestion streams to Delta tables.
- *
- * # Example
- *
- * ```typescript
- * const sdk = new ZerobusSdk(
- *   "https://workspace-id.zerobus.region.cloud.databricks.com",
- *   "https://workspace.cloud.databricks.com"
- * );
- *
- * const stream = await sdk.createStream(
- *   { tableName: "catalog.schema.table" },
- *   "client-id",
- *   "client-secret"
- * );
- * ```
- */
-export declare class ZerobusSdk {
-  /**
-   * Creates a new Zerobus SDK instance.
-   *
-   * # Arguments
-   *
-   * * `zerobus_endpoint` - The Zerobus API endpoint URL
-   *   (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
-   * * `unity_catalog_url` - The Unity Catalog endpoint URL
-   *   (e.g., "https://workspace.cloud.databricks.com")
-   *
-   * # Errors
-   *
-   * - Invalid endpoint URLs
-   * - Failed to extract workspace ID from the endpoint
-   */
-  constructor(zerobusEndpoint: string, unityCatalogUrl: string)
-  /**
-   * Creates a new ingestion stream to a Delta table.
-   *
-   * This method establishes a bidirectional gRPC connection to the Zerobus service
-   * and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
-   * authentication. For custom authentication (e.g., Personal Access Tokens), provide
-   * a custom headers_provider.
-   *
-   * # Arguments
-   *
-   * * `table_properties` - Properties of the target table including name and optional schema
-   * * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
-   * * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
-   * * `options` - Optional stream configuration (uses defaults if not provided)
-   * * `headers_provider` - Optional custom headers provider for authentication.
-   *   If not provided, uses OAuth with client_id and client_secret.
-   * * `ack_callback` - Optional callback for receiving acknowledgment notifications.
-   *   Called when records are acknowledged or when errors occur.
-   *
-   * # Returns
-   *
-   * A Promise that resolves to a ZerobusStream ready for data ingestion.
-   *
-   * # Errors
-   *
-   * - Authentication failures (invalid credentials)
-   * - Invalid table name or insufficient permissions
-   * - Network connectivity issues
-   * - Schema validation errors
-   *
-   * # Examples
-   *
-   * OAuth authentication (default):
-   * ```typescript
-   * const stream = await sdk.createStream(
-   *   { tableName: "catalog.schema.table" },
-   *   "client-id",
-   *   "client-secret"
-   * );
-   * ```
-   *
-   * Custom authentication with headers provider:
-   * ```typescript
-   * const headersProvider = {
-   *   getHeadersCallback: async () => [
-   *     ["authorization", `Bearer ${myToken}`],
-   *     ["x-databricks-zerobus-table-name", tableName]
-   *   ]
-   * };
-   * const stream = await sdk.createStream(
-   *   { tableName: "catalog.schema.table" },
-   *   "", // ignored
-   *   "", // ignored
-   *   undefined,
-   *   headersProvider
-   * );
-   * ```
-   *
-   * With acknowledgment callbacks:
-   * ```typescript
-   * const ackCallback = {
-   *   onAck: (offsetId) => console.log(`Record ${offsetId} acknowledged`),
-   *   onError: (offsetId, error) => console.error(`Record ${offsetId} failed: ${error}`)
-   * };
-   * const stream = await sdk.createStream(
-   *   { tableName: "catalog.schema.table" },
-   *   "client-id",
-   *   "client-secret",
-   *   undefined,
-   *   undefined,
-   *   ackCallback
-   * );
-   * ```
-   */
-  createStream(tableProperties: TableProperties, clientId: string, clientSecret: string, options?: StreamConfigurationOptions | undefined | null, headersProvider?: JsHeadersProvider | undefined | null, ackCallback?: JsAckCallback | undefined | null): Promise<ZerobusStream>
-  /**
-   * Creates a new ingestion stream for local testing without authentication.
-   *
-   * **Warning**: This should only be used for local development/testing
-   * where the server does not require authentication.
-   *
-   * # Arguments
-   *
-   * * `table_properties` - Properties of the target table
-   * * `options` - Optional stream configuration
-   *
-   * # Returns
-   *
-   * A Promise that resolves to a ZerobusStream ready for data ingestion.
-   */
-  createStreamLocal(tableProperties: TableProperties, options?: StreamConfigurationOptions | undefined | null): object
-  /**
-   * Recreates a stream with the same configuration and re-ingests unacknowledged batches.
-   *
-   * This method is the recommended approach for recovering from stream failures. It:
-   * 1. Retrieves all unacknowledged batches from the failed stream
-   * 2. Creates a new stream with identical configuration
-   * 3. Re-ingests all unacknowledged batches in order
-   * 4. Returns the new stream ready for continued ingestion
-   *
-   * # Arguments
-   *
-   * * `stream` - The failed or closed stream to recreate
-   *
-   * # Returns
-   *
-   * A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
-   *
-   * # Errors
-   *
-   * - Failed to retrieve unacknowledged batches from the original stream
-   * - Authentication failures when creating the new stream
-   * - Network connectivity issues during re-ingestion
-   *
-   * # Examples
-   *
-   * ```typescript
-   * try {
-   *   await stream.ingestRecords(batch);
-   * } catch (error) {
-   *   await stream.close();
-   *   // Recreate stream with all unacked batches re-ingested
-   *   const newStream = await sdk.recreateStream(stream);
-   *   // Continue ingesting with newStream
-   * }
-   * ```
-   */
-  recreateStream(stream: ZerobusStream): Promise<ZerobusStream>
-}