@databricks/zerobus-ingest-sdk 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -1,11 +1,11 @@
1
1
  [package]
2
2
  name = "zerobus-sdk-ts"
3
- version = "0.1.0"
3
+ version = "0.3.0"
4
4
  authors = ["Databricks"]
5
5
  edition = "2021"
6
6
  license-file = "LICENSE"
7
7
  description = "TypeScript/Node.js bindings for the Databricks Zerobus SDK"
8
- repository = "https://github.com/databricks/zerobus-sdk-ts"
8
+ repository = "https://github.com/databricks/zerobus-sdk"
9
9
 
10
10
  [lib]
11
11
  crate-type = ["cdylib"]
@@ -15,8 +15,8 @@ crate-type = ["cdylib"]
15
15
  napi = { version = "2", features = ["async", "tokio_rt", "serde-json"] }
16
16
  napi-derive = "2"
17
17
 
18
- # The Rust SDK we're wrapping (using local path for development)
19
- databricks-zerobus-ingest-sdk = { git = "https://github.com/databricks/zerobus-sdk-rs", tag = "v0.2.0" }
18
+ # The Rust SDK we're wrapping
19
+ databricks-zerobus-ingest-sdk = "0.6.0"
20
20
 
21
21
  # Async runtime (same as the Rust SDK)
22
22
  tokio = { version = "1.42", features = ["macros", "rt-multi-thread"] }
@@ -38,9 +38,24 @@ base64 = "0.21"
38
38
  # Async trait support
39
39
  async-trait = "0.1"
40
40
 
41
+ # Arrow dependencies (only used when arrow-flight feature is enabled)
42
+ arrow-array = { version = "56.2.0", optional = true }
43
+ arrow-schema = { version = "56.2.0", optional = true }
44
+ arrow-ipc = { version = "56.2.0", features = ["lz4", "zstd"], optional = true }
45
+
41
46
  [build-dependencies]
42
47
  napi-build = "2"
43
48
 
49
+ [features]
50
+ default = []
51
+ # Arrow Flight is experimental/unsupported - enable with: npm run build:arrow
52
+ arrow-flight = [
53
+ "databricks-zerobus-ingest-sdk/arrow-flight",
54
+ "dep:arrow-array",
55
+ "dep:arrow-schema",
56
+ "dep:arrow-ipc"
57
+ ]
58
+
44
59
  [profile.release]
45
60
  lto = true
46
61
  strip = true
package/README.md CHANGED
@@ -1,17 +1,12 @@
1
1
  # Databricks Zerobus Ingest SDK for TypeScript
2
2
 
3
- [Public Preview](https://docs.databricks.com/release-notes/release-types.html): This SDK is supported for production use cases and is available to all customers. Databricks is actively working on stabilizing the Zerobus Ingest SDK for TypeScript. Minor version updates may include backwards-incompatible changes.
4
-
5
- We are keen to hear feedback from you on this SDK. Please [file issues](https://github.com/databricks/zerobus-sdk-ts/issues), and we will address them.
6
-
7
- The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance client for ingesting data directly into Databricks Delta tables using the Zerobus streaming protocol. This SDK wraps the high-performance [Rust SDK](https://github.com/databricks/zerobus-sdk-rs) using native bindings for optimal performance. | See also the [SDK for Rust](https://github.com/databricks/zerobus-sdk-rs) | See also the [SDK for Python](https://github.com/databricks/zerobus-sdk-py) | See also the [SDK for Java](https://github.com/databricks/zerobus-sdk-java) | See also the [SDK for Go](https://github.com/databricks/zerobus-sdk-go)
3
+ The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance client for ingesting data directly into Databricks Delta tables using the Zerobus streaming protocol. This SDK wraps the high-performance [Rust SDK](https://github.com/databricks/zerobus-sdk/tree/main/rust) using native bindings for optimal performance.
8
4
 
9
5
  ## Table of Contents
10
6
 
11
7
  - [Features](#features)
12
8
  - [Requirements](#requirements)
13
9
  - [Quick Start User Guide](#quick-start-user-guide)
14
- - [Prerequisites](#prerequisites)
15
10
  - [Installation](#installation)
16
11
  - [Choose Your Serialization Format](#choose-your-serialization-format)
17
12
  - [Option 1: Using JSON (Quick Start)](#option-1-using-json-quick-start)
@@ -25,8 +20,8 @@ The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance cli
25
20
  - [Best Practices](#best-practices)
26
21
  - [Platform Support](#platform-support)
27
22
  - [Architecture](#architecture)
28
- - [Contributing](#contributing)
29
- - [Related Projects](#related-projects)
23
+ - [Community and Contributing](#community-and-contributing)
24
+ - [License](#license)
30
25
 
31
26
  ## Features
32
27
 
@@ -67,65 +62,7 @@ These will be installed automatically:
67
62
 
68
63
  ### Prerequisites
69
64
 
70
- Before using the SDK, you'll need the following:
71
-
72
- #### 1. Workspace URL and Workspace ID
73
-
74
- After logging into your Databricks workspace, look at the browser URL:
75
-
76
- ```
77
- https://<databricks-instance>.cloud.databricks.com/?o=<workspace-id>
78
- ```
79
-
80
- - **Workspace URL**: The part before `/?o=` → `https://<databricks-instance>.cloud.databricks.com`
81
- - **Workspace ID**: The part after `?o=` → `<workspace-id>`
82
- - **Zerobus Endpoint**: `https://<workspace-id>.zerobus.<region>.cloud.databricks.com`
83
-
84
- > **Note:** The examples above show AWS endpoints (`.cloud.databricks.com`). For Azure deployments, the workspace URL will be `https://<databricks-instance>.azuredatabricks.net` and Zerobus endpoint will use `.azuredatabricks.net`.
85
-
86
- Example:
87
- - Full URL: `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com/?o=1234567890123456`
88
- - Workspace URL: `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com`
89
- - Workspace ID: `1234567890123456`
90
- - Zerobus Endpoint: `https://1234567890123456.zerobus.us-west-2.cloud.databricks.com`
91
-
92
- #### 2. Create a Delta Table
93
-
94
- Create a table using Databricks SQL:
95
-
96
- ```sql
97
- CREATE TABLE <catalog_name>.default.air_quality (
98
- device_name STRING,
99
- temp INT,
100
- humidity BIGINT
101
- )
102
- USING DELTA;
103
- ```
104
-
105
- Replace `<catalog_name>` with your catalog name (e.g., `main`).
106
-
107
- #### 3. Create a Service Principal
108
-
109
- 1. Navigate to **Settings > Identity and Access** in your Databricks workspace
110
- 2. Click **Service principals** and create a new service principal
111
- 3. Generate a new secret for the service principal and save it securely
112
- 4. Grant the following permissions:
113
- - `USE_CATALOG` on the catalog (e.g., `main`)
114
- - `USE_SCHEMA` on the schema (e.g., `default`)
115
- - `MODIFY` and `SELECT` on the table (e.g., `air_quality`)
116
-
117
- Grant permissions using SQL:
118
-
119
- ```sql
120
- -- Grant catalog permission
121
- GRANT USE CATALOG ON CATALOG <catalog_name> TO `<service-principal-application-id>`;
122
-
123
- -- Grant schema permission
124
- GRANT USE SCHEMA ON SCHEMA <catalog_name>.default TO `<service-principal-application-id>`;
125
-
126
- -- Grant table permissions
127
- GRANT SELECT, MODIFY ON TABLE <catalog_name>.default.air_quality TO `<service-principal-application-id>`;
128
- ```
65
+ Before using the SDK, you need a Databricks workspace URL, a Delta table, and a service principal. See the [monorepo prerequisites](https://github.com/databricks/zerobus-sdk/blob/main/README.md#prerequisites) for detailed setup instructions.
129
66
 
130
67
  ### Installation
131
68
 
@@ -202,10 +139,10 @@ source $HOME/.cargo/env
202
139
 
203
140
  **Note for macOS users**: Pre-built binaries are not available. The package will automatically build from source during `npm install`. Ensure you have Rust toolchain and Xcode Command Line Tools installed (see prerequisites above).
204
141
 
205
- 1. Extract the SDK package:
142
+ 1. Clone the repository:
206
143
  ```bash
207
- unzip zerobus-sdk-ts.zip
208
- cd zerobus-sdk-ts
144
+ git clone https://github.com/databricks/zerobus-sdk.git
145
+ cd zerobus-sdk/ts
209
146
  ```
210
147
 
211
148
  2. Install dependencies:
@@ -256,7 +193,7 @@ import { ZerobusSdk, RecordType } from '@databricks/zerobus-ingest-sdk';
256
193
 
257
194
  // Configuration
258
195
  // For AWS:
259
- const zerobusEndpoint = '<workspace-id>.zerobus.<region>.cloud.databricks.com';
196
+ const zerobusEndpoint = 'https://<workspace-id>.zerobus.<region>.cloud.databricks.com';
260
197
  const workspaceUrl = 'https://<workspace-name>.cloud.databricks.com';
261
198
  // For Azure:
262
199
  // const zerobusEndpoint = '<workspace-id>.zerobus.<region>.azuredatabricks.net';
@@ -288,35 +225,24 @@ const stream = await sdk.createStream(
288
225
  );
289
226
 
290
227
  try {
291
- let lastAckPromise;
228
+ let lastOffset: bigint;
292
229
 
293
230
  // Send all records
294
231
  for (let i = 0; i < 100; i++) {
295
- // Create JSON record
296
232
  const record = {
297
233
  device_name: `sensor-${i % 10}`,
298
234
  temp: 20 + (i % 15),
299
235
  humidity: 50 + (i % 40)
300
236
  };
301
237
 
302
- // JSON supports 2 types:
303
- // 1. object (high-level) - SDK auto-stringifies
304
- lastAckPromise = stream.ingestRecord(record);
305
- // 2. string (low-level) - pre-serialized JSON
306
- // lastAckPromise = stream.ingestRecord(JSON.stringify(record));
238
+ // ingestRecordOffset returns immediately after queuing
239
+ lastOffset = await stream.ingestRecordOffset(record);
307
240
  }
308
241
 
309
- console.log('All records sent. Waiting for last acknowledgment...');
310
-
311
- // Wait for the last record's acknowledgment
312
- const lastOffset = await lastAckPromise;
313
- console.log(`Last record offset: ${lastOffset}`);
314
-
315
- // Flush to ensure all records are acknowledged
316
- await stream.flush();
242
+ // Wait for all records to be acknowledged
243
+ await stream.waitForOffset(lastOffset);
317
244
  console.log('Successfully ingested 100 records!');
318
245
  } finally {
319
- // Always close the stream
320
246
  await stream.close();
321
247
  }
322
248
  ```
@@ -430,7 +356,7 @@ import * as airQuality from './examples/generated/air_quality';
430
356
  import { loadDescriptorProto } from '@databricks/zerobus-ingest-sdk/utils/descriptor';
431
357
 
432
358
  // Configuration
433
- const zerobusEndpoint = '<workspace-id>.zerobus.<region>.cloud.databricks.com';
359
+ const zerobusEndpoint = 'https://<workspace-id>.zerobus.<region>.cloud.databricks.com';
434
360
  const workspaceUrl = 'https://<workspace-name>.cloud.databricks.com';
435
361
  const tableName = 'main.default.air_quality';
436
362
  const clientId = process.env.DATABRICKS_CLIENT_ID!;
@@ -464,7 +390,7 @@ const stream = await sdk.createStream(tableProperties, clientId, clientSecret, o
464
390
 
465
391
  try {
466
392
  const AirQuality = airQuality.examples.AirQuality;
467
- let lastAckPromise;
393
+ let lastOffset: bigint;
468
394
 
469
395
  // Send all records
470
396
  for (let i = 0; i < 100; i++) {
@@ -474,22 +400,12 @@ try {
474
400
  humidity: 50 + i
475
401
  });
476
402
 
477
- // Protobuf supports 2 types:
478
- // 1. Message object (high-level) - SDK calls .encode().finish()
479
- lastAckPromise = stream.ingestRecord(record);
480
- // 2. Buffer (low-level) - pre-serialized bytes
481
- // const buffer = Buffer.from(AirQuality.encode(record).finish());
482
- // lastAckPromise = stream.ingestRecord(buffer);
403
+ // ingestRecordOffset returns immediately after queuing
404
+ lastOffset = await stream.ingestRecordOffset(record);
483
405
  }
484
406
 
485
- console.log('All records sent. Waiting for last acknowledgment...');
486
-
487
- // Wait for the last record's acknowledgment
488
- const lastOffset = await lastAckPromise;
489
- console.log(`Last record offset: ${lastOffset}`);
490
-
491
- // Flush to ensure all records are acknowledged
492
- await stream.flush();
407
+ // Wait for all records to be acknowledged
408
+ await stream.waitForOffset(lastOffset);
493
409
  console.log('Successfully ingested 100 records!');
494
410
  } finally {
495
411
  await stream.close();
@@ -610,7 +526,7 @@ npm run build:proto
610
526
  protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto
611
527
 
612
528
  # Run example
613
- npx tsx examples/proto.ts
529
+ npm run example:proto:single
614
530
  ```
615
531
 
616
532
  #### Why Two Steps (TypeScript + Descriptor)?
@@ -635,21 +551,23 @@ See the `examples/` directory for complete, runnable examples. See [examples/REA
635
551
 
636
552
  ```bash
637
553
  # Set environment variables
638
- export ZEROBUS_SERVER_ENDPOINT="<workspace-id>.zerobus.<region>.cloud.databricks.com"
554
+ export ZEROBUS_SERVER_ENDPOINT="https://<workspace-id>.zerobus.<region>.cloud.databricks.com"
639
555
  export DATABRICKS_WORKSPACE_URL="https://<workspace-name>.cloud.databricks.com"
640
556
  export DATABRICKS_CLIENT_ID="your-client-id"
641
557
  export DATABRICKS_CLIENT_SECRET="your-client-secret"
642
558
  export ZEROBUS_TABLE_NAME="main.default.air_quality"
643
559
 
644
- # Run JSON example
645
- npx tsx examples/json.ts
560
+ # Run JSON examples
561
+ npm run example:json:single
562
+ npm run example:json:batch
646
563
 
647
564
  # For Protocol Buffers, generate TypeScript code and descriptor
648
565
  npm run build:proto
649
566
  protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto
650
567
 
651
- # Run Protocol Buffers example
652
- npx tsx examples/proto.ts
568
+ # Run Protocol Buffers examples
569
+ npm run example:proto:single
570
+ npm run example:proto:batch
653
571
  ```
654
572
 
655
573
  ### Batch Ingestion
@@ -664,13 +582,14 @@ const records = Array.from({ length: 1000 }, (_, i) =>
664
582
  );
665
583
 
666
584
  // Protobuf Type 1: Message objects (high-level) - SDK auto-serializes
667
- const offsetId = await stream.ingestRecords(records);
585
+ const offsetId = await stream.ingestRecordsOffset(records);
668
586
 
669
587
  // Protobuf Type 2: Buffers (low-level) - pre-serialized bytes
670
588
  // const buffers = records.map(r => Buffer.from(AirQuality.encode(r).finish()));
671
- // const offsetId = await stream.ingestRecords(buffers);
589
+ // const offsetId = await stream.ingestRecordsOffset(buffers);
672
590
 
673
591
  if (offsetId !== null) {
592
+ await stream.waitForOffset(offsetId);
674
593
  console.log(`Batch acknowledged at offset ${offsetId}`);
675
594
  }
676
595
  ```
@@ -685,11 +604,15 @@ const records = Array.from({ length: 1000 }, (_, i) => ({
685
604
  }));
686
605
 
687
606
  // JSON Type 1: objects (high-level) - SDK auto-stringifies
688
- const offsetId = await stream.ingestRecords(records);
607
+ const offsetId = await stream.ingestRecordsOffset(records);
689
608
 
690
609
  // JSON Type 2: strings (low-level) - pre-serialized JSON
691
610
  // const jsonRecords = records.map(r => JSON.stringify(r));
692
- // const offsetId = await stream.ingestRecords(jsonRecords);
611
+ // const offsetId = await stream.ingestRecordsOffset(jsonRecords);
612
+
613
+ if (offsetId !== null) {
614
+ await stream.waitForOffset(offsetId);
615
+ }
693
616
  ```
694
617
 
695
618
  **Type Widening Support:**
@@ -703,7 +626,7 @@ const offsetId = await stream.ingestRecords(records);
703
626
  - Use `recreateStream()` for recovery - it automatically handles unacknowledged batches
704
627
 
705
628
  **Examples:**
706
- Both `json.ts` and `proto.ts` examples demonstrate batch ingestion.
629
+ See `examples/json/batch.ts` and `examples/proto/batch.ts` for batch ingestion examples.
707
630
 
708
631
  ## Authentication
709
632
 
@@ -732,29 +655,25 @@ The SDK automatically fetches access tokens and includes these headers:
732
655
  Beyond OAuth, you can use custom headers for Personal Access Tokens (PAT) or other auth methods:
733
656
 
734
657
  ```typescript
735
- import { ZerobusSdk } from '@databricks/zerobus-ingest-sdk';
736
- import { HeadersProvider } from '@databricks/zerobus-ingest-sdk/src/headers_provider';
737
-
738
- class CustomHeadersProvider implements HeadersProvider {
739
- async getHeaders(): Promise<Array<[string, string]>> {
740
- return [
741
- ["authorization", `Bearer ${myToken}`],
742
- ["x-databricks-zerobus-table-name", tableName]
743
- ];
744
- }
745
- }
746
-
747
- const headersProvider = new CustomHeadersProvider();
748
658
  const stream = await sdk.createStream(
749
659
  tableProperties,
750
660
  '', // client_id (ignored when headers_provider is provided)
751
661
  '', // client_secret (ignored when headers_provider is provided)
752
662
  options,
753
- { getHeadersCallback: headersProvider.getHeaders.bind(headersProvider) }
663
+ {
664
+ getHeadersCallback: async () => [
665
+ ["authorization", `Bearer ${myToken}`],
666
+ ["x-databricks-zerobus-table-name", tableName]
667
+ ]
668
+ }
754
669
  );
755
670
  ```
756
671
 
757
- **Note:** Custom authentication is integrated into the main `createStream()` method. See the API Reference for details.
672
+ **Required headers:**
673
+ - `authorization` - Bearer token or other auth header
674
+ - `x-databricks-zerobus-table-name` - The fully qualified table name
675
+
676
+ **Note:** The SDK automatically adds the `user-agent` header if not provided.
758
677
 
759
678
  ## Configuration
760
679
 
@@ -770,6 +689,7 @@ const stream = await sdk.createStream(
770
689
  | `recoveryRetries` | 4 | Maximum number of recovery attempts |
771
690
  | `flushTimeoutMs` | 300,000 | Timeout for flush operations (ms) |
772
691
  | `serverLackOfAckTimeoutMs` | 60,000 | Server acknowledgment timeout (ms) |
692
+ | `streamPausedMaxWaitTimeMs` | undefined | Max wait time during graceful stream close (ms) |
773
693
 
774
694
  ### Example Configuration
775
695
 
@@ -839,7 +759,8 @@ The SDK includes automatic recovery for transient failures (enabled by default w
839
759
 
840
760
  ```typescript
841
761
  try {
842
- const offset = await stream.ingestRecord(JSON.stringify(record));
762
+ const offset = await stream.ingestRecordOffset(record);
763
+ await stream.waitForOffset(offset);
843
764
  console.log(`Success: offset ${offset}`);
844
765
  } catch (error) {
845
766
  console.error('Ingestion failed:', error);
@@ -889,7 +810,7 @@ new ZerobusSdk(zerobusEndpoint: string, unityCatalogUrl: string)
889
810
  ```
890
811
 
891
812
  **Parameters:**
892
- - `zerobusEndpoint` (string) - The Zerobus gRPC endpoint (e.g., `<workspace-id>.zerobus.<region>.cloud.databricks.com` for AWS, or `<workspace-id>.zerobus.<region>.azuredatabricks.net` for Azure)
813
+ - `zerobusEndpoint` (string) - The Zerobus gRPC endpoint (e.g., `https://<workspace-id>.zerobus.<region>.cloud.databricks.com` for AWS, or `https://<workspace-id>.zerobus.<region>.azuredatabricks.net` for Azure)
893
814
  - `unityCatalogUrl` (string) - The Unity Catalog endpoint (your workspace URL)
894
815
 
895
816
  **Methods:**
@@ -952,11 +873,44 @@ Represents an active ingestion stream.
952
873
 
953
874
  **Methods:**
954
875
 
876
+ ```typescript
877
+ async ingestRecordOffset(payload: Buffer | string | object): Promise<bigint>
878
+ ```
879
+
880
+ **(Recommended)** Ingests a single record. The Promise resolves immediately after the record is queued (before server acknowledgment). Use `waitForOffset()` to wait for acknowledgment when needed.
881
+
882
+ ```typescript
883
+ // High-throughput pattern: send many, wait once
884
+ const offset1 = await stream.ingestRecordOffset(record1); // Resolves immediately
885
+ const offset2 = await stream.ingestRecordOffset(record2); // Resolves immediately
886
+ await stream.waitForOffset(offset2); // Waits for server to acknowledge all records up to offset2
887
+ ```
888
+
889
+ ---
890
+
891
+ ```typescript
892
+ async ingestRecordsOffset(payloads: Array<Buffer | string | object>): Promise<bigint | null>
893
+ ```
894
+
895
+ **(Recommended)** Ingests multiple records as a batch. The Promise resolves immediately after the batch is queued (before server acknowledgment). Returns `null` for empty batches.
896
+
897
+ ---
898
+
899
+ ```typescript
900
+ async waitForOffset(offsetId: bigint): Promise<void>
901
+ ```
902
+
903
+ Waits for the server to acknowledge all records up to and including the specified offset ID.
904
+
905
+ ---
906
+
955
907
  ```typescript
956
908
  async ingestRecord(payload: Buffer | string | object): Promise<bigint>
957
909
  ```
958
910
 
959
- Ingests a single record. This method **blocks** until the record is sent to the SDK's internal landing zone, then returns a Promise for the server acknowledgment. This allows you to send many records without waiting for individual acknowledgments.
911
+ **@deprecated** Use `ingestRecordOffset()` instead.
912
+
913
+ Ingests a single record. Unlike `ingestRecordOffset()`, the Promise only resolves **after the server acknowledges** the record. This is slower for high-throughput scenarios.
960
914
 
961
915
  **Parameters:**
962
916
  - `payload` - Record data. The SDK supports 4 input types for flexibility:
@@ -994,7 +948,9 @@ await stream.ingestRecord(buffer);
994
948
  async ingestRecords(payloads: Array<Buffer | string | object>): Promise<bigint | null>
995
949
  ```
996
950
 
997
- Ingests multiple records as a batch. All records in a batch are acknowledged together atomically. This method **blocks** until all records are sent to the SDK's internal landing zone, then returns a Promise for the server acknowledgment.
951
+ **@deprecated** Use `ingestRecordsOffset()` instead.
952
+
953
+ Ingests multiple records as a batch. Unlike `ingestRecordsOffset()`, the Promise only resolves **after the server acknowledges** the batch. This is slower for high-throughput scenarios.
998
954
 
999
955
  **Parameters:**
1000
956
  - `payloads` - Array of record data. Supports the same 4 types as `ingestRecord()`:
@@ -1135,14 +1091,15 @@ Configuration options for stream behavior.
1135
1091
 
1136
1092
  ```typescript
1137
1093
  interface StreamConfigurationOptions {
1138
- recordType?: RecordType; // RecordType.Json or RecordType.Proto. Default: RecordType.Proto
1139
- maxInflightRequests?: number; // Default: 10,000
1140
- recovery?: boolean; // Default: true
1141
- recoveryTimeoutMs?: number; // Default: 15,000
1142
- recoveryBackoffMs?: number; // Default: 2,000
1143
- recoveryRetries?: number; // Default: 4
1144
- flushTimeoutMs?: number; // Default: 300,000
1145
- serverLackOfAckTimeoutMs?: number; // Default: 60,000
1094
+ recordType?: RecordType; // RecordType.Json or RecordType.Proto. Default: RecordType.Proto
1095
+ maxInflightRequests?: number; // Default: 10,000
1096
+ recovery?: boolean; // Default: true
1097
+ recoveryTimeoutMs?: number; // Default: 15,000
1098
+ recoveryBackoffMs?: number; // Default: 2,000
1099
+ recoveryRetries?: number; // Default: 4
1100
+ flushTimeoutMs?: number; // Default: 300,000
1101
+ serverLackOfAckTimeoutMs?: number; // Default: 60,000
1102
+ streamPausedMaxWaitTimeMs?: number; // Default: undefined (wait for full server duration)
1146
1103
  }
1147
1104
 
1148
1105
  enum RecordType {
@@ -1159,7 +1116,7 @@ enum RecordType {
1159
1116
  4. **Error handling**: The stream handles errors internally with automatic retry. Only use `recreateStream()` for persistent failures after internal retries are exhausted.
1160
1117
  5. **Use Protocol Buffers for production**: Protocol Buffers (the default) provides better performance and schema validation. Use JSON only when you need schema flexibility or for quick prototyping.
1161
1118
  6. **Store credentials securely**: Use environment variables, never hardcode credentials
1162
- 7. **Use batch ingestion**: For high-throughput scenarios, use `ingestRecords()` instead of individual `ingestRecord()` calls
1119
+ 7. **Use batch ingestion**: For high-throughput scenarios, use `ingestRecordsOffset()` instead of individual `ingestRecordOffset()` calls
1163
1120
 
1164
1121
  ## Platform Support
1165
1122
 
@@ -1183,7 +1140,7 @@ The build process happens automatically during installation and typically takes
1183
1140
 
1184
1141
  ## Architecture
1185
1142
 
1186
- This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databricks/zerobus-sdk-rs) using [NAPI-RS](https://napi.rs):
1143
+ This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databricks/zerobus-sdk/tree/main/rust) using [NAPI-RS](https://napi.rs):
1187
1144
 
1188
1145
  ```
1189
1146
  ┌─────────────────────────────┐
@@ -1203,18 +1160,21 @@ This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databr
1203
1160
  ```
1204
1161
 
1205
1162
  **Benefits:**
1206
- - **Zero-copy data transfer** between JavaScript and Rust
1163
+ - **Native performance** - Rust implementation for high-throughput ingestion
1207
1164
  - **Native async/await support** - Rust futures become JavaScript Promises
1208
1165
  - **Automatic memory management** - No manual cleanup required
1209
1166
  - **Type safety** - Compile-time checks on both sides
1210
1167
 
1211
- ## Contributing
1168
+ ## Community and Contributing
1169
+
1170
+ This is an open source project. We welcome contributions, feedback, and bug reports.
1212
1171
 
1213
- We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details.
1172
+ - **[Contributing Guide](https://github.com/databricks/zerobus-sdk/blob/main/typescript/CONTRIBUTING.md)**: TypeScript-specific development setup and workflow.
1173
+ - **[General Contributing Guide](https://github.com/databricks/zerobus-sdk/blob/main/CONTRIBUTING.md)**: Pull request process, commit requirements, and policies.
1174
+ - **[Changelog](https://github.com/databricks/zerobus-sdk/blob/main/typescript/CHANGELOG.md)**: See the history of changes in the SDK.
1175
+ - **[Security Policy](https://github.com/databricks/zerobus-sdk/blob/main/SECURITY.md)**: Read about our security process and how to report vulnerabilities.
1176
+ - **[Developer Certificate of Origin (DCO)](https://github.com/databricks/zerobus-sdk/blob/main/DCO)**: Understand the agreement for contributions.
1214
1177
 
1215
- ## Related Projects
1178
+ ## License
1216
1179
 
1217
- - [Zerobus Rust SDK](https://github.com/databricks/zerobus-sdk-rs) - The underlying Rust implementation
1218
- - [Zerobus Python SDK](https://github.com/databricks/zerobus-sdk-py) - Python SDK for Zerobus
1219
- - [Zerobus Java SDK](https://github.com/databricks/zerobus-sdk-java) - Java SDK for Zerobus
1220
- - [NAPI-RS](https://napi.rs) - Rust/Node.js binding framework
1180
+ This SDK is licensed under the Databricks License. See the [LICENSE](https://github.com/databricks/zerobus-sdk/blob/main/LICENSE) file for the full license text. The license is also available online at [https://www.databricks.com/legal/db-license](https://www.databricks.com/legal/db-license).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@databricks/zerobus-ingest-sdk",
3
- "version": "0.1.1",
3
+ "version": "0.3.0",
4
4
  "description": "TypeScript/Node.js SDK for streaming data ingestion into Databricks Delta tables using Zerobus",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -15,7 +15,7 @@
15
15
  ],
16
16
  "repository": {
17
17
  "type": "git",
18
- "url": "https://github.com/databricks/zerobus-sdk-ts.git"
18
+ "url": "https://github.com/databricks/zerobus-sdk"
19
19
  },
20
20
  "license": "Databricks License",
21
21
  "files": [
@@ -34,12 +34,13 @@
34
34
  "napi": {
35
35
  "name": "zerobus-ingest-sdk",
36
36
  "triples": {
37
- "defaults": true,
37
+ "defaults": false,
38
38
  "additional": [
39
- "x86_64-unknown-linux-musl",
39
+ "x86_64-unknown-linux-gnu",
40
40
  "aarch64-unknown-linux-gnu",
41
- "aarch64-apple-darwin",
42
- "aarch64-unknown-linux-musl"
41
+ "x86_64-pc-windows-msvc",
42
+ "x86_64-apple-darwin",
43
+ "aarch64-apple-darwin"
43
44
  ]
44
45
  }
45
46
  },
@@ -49,7 +50,9 @@
49
50
  "scripts": {
50
51
  "artifacts": "napi artifacts",
51
52
  "build": "napi build --platform --release",
53
+ "build:arrow": "napi build --platform --release --features arrow-flight",
52
54
  "build:debug": "napi build --platform",
55
+ "build:debug:arrow": "napi build --platform --features arrow-flight",
53
56
  "build:proto": "mkdir -p examples/generated && pbjs -t static-module -w commonjs -o examples/generated/air_quality.js schemas/air_quality.proto && pbts -o examples/generated/air_quality.d.ts examples/generated/air_quality.js && protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto",
54
57
  "prepublishOnly": "napi prepublish -t npm",
55
58
  "test": "tsx --test test/unit.test.ts test/integration.test.ts",
@@ -57,21 +60,29 @@
57
60
  "test:integration": "tsx --test test/integration.test.ts",
58
61
  "universal": "napi universal",
59
62
  "version": "napi version",
60
- "example:json": "tsx examples/json.ts",
61
- "example:proto": "tsx examples/proto.ts",
62
- "example:parallel": "tsx examples/parallel_streams.ts"
63
+ "example:json:single": "tsx examples/json/single.ts",
64
+ "example:json:batch": "tsx examples/json/batch.ts",
65
+ "example:proto:single": "tsx examples/proto/single.ts",
66
+ "example:proto:batch": "tsx examples/proto/batch.ts",
67
+ "example:arrow:single": "tsx examples/arrow/single.ts",
68
+ "example:arrow:batch": "tsx examples/arrow/batch.ts"
63
69
  },
64
70
  "peerDependencies": {
65
- "protobufjs": "^7.0.0"
71
+ "protobufjs": "^7.0.0",
72
+ "apache-arrow": "^56.0.0"
66
73
  },
67
74
  "peerDependenciesMeta": {
68
75
  "protobufjs": {
69
76
  "optional": true
77
+ },
78
+ "apache-arrow": {
79
+ "optional": true
70
80
  }
71
81
  },
72
82
  "devDependencies": {
73
83
  "@napi-rs/cli": "^2.18.0",
74
84
  "@types/node": "^20.0.0",
85
+ "apache-arrow": "^18.1.0",
75
86
  "dotenv": "^17.2.3",
76
87
  "protobufjs": "^7.5.4",
77
88
  "protobufjs-cli": "^2.0.0",
@@ -82,12 +93,10 @@
82
93
  "glob": "^10.0.0"
83
94
  },
84
95
  "optionalDependencies": {
85
- "@databricks/zerobus-ingest-sdk-win32-x64-msvc": "0.1.1",
86
- "@databricks/zerobus-ingest-sdk-darwin-x64": "0.1.1",
87
- "@databricks/zerobus-ingest-sdk-linux-x64-gnu": "0.1.1",
88
- "@databricks/zerobus-ingest-sdk-linux-x64-musl": "0.1.1",
89
- "@databricks/zerobus-ingest-sdk-linux-arm64-gnu": "0.1.1",
90
- "@databricks/zerobus-ingest-sdk-darwin-arm64": "0.1.1",
91
- "@databricks/zerobus-ingest-sdk-linux-arm64-musl": "0.1.1"
96
+ "@databricks/zerobus-ingest-sdk-linux-x64-gnu": "0.3.0",
97
+ "@databricks/zerobus-ingest-sdk-linux-arm64-gnu": "0.3.0",
98
+ "@databricks/zerobus-ingest-sdk-win32-x64-msvc": "0.3.0",
99
+ "@databricks/zerobus-ingest-sdk-darwin-x64": "0.3.0",
100
+ "@databricks/zerobus-ingest-sdk-darwin-arm64": "0.3.0"
92
101
  }
93
102
  }