@databricks/zerobus-ingest-sdk 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +19 -4
- package/README.md +111 -151
- package/package.json +26 -17
- package/src/lib.rs +890 -38
- package/zerobus-ingest-sdk.linux-arm64-gnu.node +0 -0
- package/zerobus-ingest-sdk.linux-x64-gnu.node +0 -0
- package/zerobus-ingest-sdk.win32-x64-msvc.node +0 -0
- package/Cargo.lock +0 -2233
- package/LICENSE +0 -69
package/Cargo.toml
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "zerobus-sdk-ts"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
authors = ["Databricks"]
|
|
5
5
|
edition = "2021"
|
|
6
6
|
license-file = "LICENSE"
|
|
7
7
|
description = "TypeScript/Node.js bindings for the Databricks Zerobus SDK"
|
|
8
|
-
repository = "https://github.com/databricks/zerobus-sdk
|
|
8
|
+
repository = "https://github.com/databricks/zerobus-sdk"
|
|
9
9
|
|
|
10
10
|
[lib]
|
|
11
11
|
crate-type = ["cdylib"]
|
|
@@ -15,8 +15,8 @@ crate-type = ["cdylib"]
|
|
|
15
15
|
napi = { version = "2", features = ["async", "tokio_rt", "serde-json"] }
|
|
16
16
|
napi-derive = "2"
|
|
17
17
|
|
|
18
|
-
# The Rust SDK we're wrapping
|
|
19
|
-
databricks-zerobus-ingest-sdk =
|
|
18
|
+
# The Rust SDK we're wrapping
|
|
19
|
+
databricks-zerobus-ingest-sdk = "0.6.0"
|
|
20
20
|
|
|
21
21
|
# Async runtime (same as the Rust SDK)
|
|
22
22
|
tokio = { version = "1.42", features = ["macros", "rt-multi-thread"] }
|
|
@@ -38,9 +38,24 @@ base64 = "0.21"
|
|
|
38
38
|
# Async trait support
|
|
39
39
|
async-trait = "0.1"
|
|
40
40
|
|
|
41
|
+
# Arrow dependencies (only used when arrow-flight feature is enabled)
|
|
42
|
+
arrow-array = { version = "56.2.0", optional = true }
|
|
43
|
+
arrow-schema = { version = "56.2.0", optional = true }
|
|
44
|
+
arrow-ipc = { version = "56.2.0", features = ["lz4", "zstd"], optional = true }
|
|
45
|
+
|
|
41
46
|
[build-dependencies]
|
|
42
47
|
napi-build = "2"
|
|
43
48
|
|
|
49
|
+
[features]
|
|
50
|
+
default = []
|
|
51
|
+
# Arrow Flight is experimental/unsupported - enable with: npm run build:arrow
|
|
52
|
+
arrow-flight = [
|
|
53
|
+
"databricks-zerobus-ingest-sdk/arrow-flight",
|
|
54
|
+
"dep:arrow-array",
|
|
55
|
+
"dep:arrow-schema",
|
|
56
|
+
"dep:arrow-ipc"
|
|
57
|
+
]
|
|
58
|
+
|
|
44
59
|
[profile.release]
|
|
45
60
|
lto = true
|
|
46
61
|
strip = true
|
package/README.md
CHANGED
|
@@ -1,17 +1,12 @@
|
|
|
1
1
|
# Databricks Zerobus Ingest SDK for TypeScript
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
We are keen to hear feedback from you on this SDK. Please [file issues](https://github.com/databricks/zerobus-sdk-ts/issues), and we will address them.
|
|
6
|
-
|
|
7
|
-
The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance client for ingesting data directly into Databricks Delta tables using the Zerobus streaming protocol. This SDK wraps the high-performance [Rust SDK](https://github.com/databricks/zerobus-sdk-rs) using native bindings for optimal performance. | See also the [SDK for Rust](https://github.com/databricks/zerobus-sdk-rs) | See also the [SDK for Python](https://github.com/databricks/zerobus-sdk-py) | See also the [SDK for Java](https://github.com/databricks/zerobus-sdk-java) | See also the [SDK for Go](https://github.com/databricks/zerobus-sdk-go)
|
|
3
|
+
The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance client for ingesting data directly into Databricks Delta tables using the Zerobus streaming protocol. This SDK wraps the high-performance [Rust SDK](https://github.com/databricks/zerobus-sdk/tree/main/rust) using native bindings for optimal performance.
|
|
8
4
|
|
|
9
5
|
## Table of Contents
|
|
10
6
|
|
|
11
7
|
- [Features](#features)
|
|
12
8
|
- [Requirements](#requirements)
|
|
13
9
|
- [Quick Start User Guide](#quick-start-user-guide)
|
|
14
|
-
- [Prerequisites](#prerequisites)
|
|
15
10
|
- [Installation](#installation)
|
|
16
11
|
- [Choose Your Serialization Format](#choose-your-serialization-format)
|
|
17
12
|
- [Option 1: Using JSON (Quick Start)](#option-1-using-json-quick-start)
|
|
@@ -25,8 +20,8 @@ The Databricks Zerobus Ingest SDK for TypeScript provides a high-performance cli
|
|
|
25
20
|
- [Best Practices](#best-practices)
|
|
26
21
|
- [Platform Support](#platform-support)
|
|
27
22
|
- [Architecture](#architecture)
|
|
28
|
-
- [Contributing](#contributing)
|
|
29
|
-
- [
|
|
23
|
+
- [Community and Contributing](#community-and-contributing)
|
|
24
|
+
- [License](#license)
|
|
30
25
|
|
|
31
26
|
## Features
|
|
32
27
|
|
|
@@ -67,65 +62,7 @@ These will be installed automatically:
|
|
|
67
62
|
|
|
68
63
|
### Prerequisites
|
|
69
64
|
|
|
70
|
-
Before using the SDK, you
|
|
71
|
-
|
|
72
|
-
#### 1. Workspace URL and Workspace ID
|
|
73
|
-
|
|
74
|
-
After logging into your Databricks workspace, look at the browser URL:
|
|
75
|
-
|
|
76
|
-
```
|
|
77
|
-
https://<databricks-instance>.cloud.databricks.com/?o=<workspace-id>
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
- **Workspace URL**: The part before `/?o=` → `https://<databricks-instance>.cloud.databricks.com`
|
|
81
|
-
- **Workspace ID**: The part after `?o=` → `<workspace-id>`
|
|
82
|
-
- **Zerobus Endpoint**: `https://<workspace-id>.zerobus.<region>.cloud.databricks.com`
|
|
83
|
-
|
|
84
|
-
> **Note:** The examples above show AWS endpoints (`.cloud.databricks.com`). For Azure deployments, the workspace URL will be `https://<databricks-instance>.azuredatabricks.net` and Zerobus endpoint will use `.azuredatabricks.net`.
|
|
85
|
-
|
|
86
|
-
Example:
|
|
87
|
-
- Full URL: `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com/?o=1234567890123456`
|
|
88
|
-
- Workspace URL: `https://dbc-a1b2c3d4-e5f6.cloud.databricks.com`
|
|
89
|
-
- Workspace ID: `1234567890123456`
|
|
90
|
-
- Zerobus Endpoint: `https://1234567890123456.zerobus.us-west-2.cloud.databricks.com`
|
|
91
|
-
|
|
92
|
-
#### 2. Create a Delta Table
|
|
93
|
-
|
|
94
|
-
Create a table using Databricks SQL:
|
|
95
|
-
|
|
96
|
-
```sql
|
|
97
|
-
CREATE TABLE <catalog_name>.default.air_quality (
|
|
98
|
-
device_name STRING,
|
|
99
|
-
temp INT,
|
|
100
|
-
humidity BIGINT
|
|
101
|
-
)
|
|
102
|
-
USING DELTA;
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
Replace `<catalog_name>` with your catalog name (e.g., `main`).
|
|
106
|
-
|
|
107
|
-
#### 3. Create a Service Principal
|
|
108
|
-
|
|
109
|
-
1. Navigate to **Settings > Identity and Access** in your Databricks workspace
|
|
110
|
-
2. Click **Service principals** and create a new service principal
|
|
111
|
-
3. Generate a new secret for the service principal and save it securely
|
|
112
|
-
4. Grant the following permissions:
|
|
113
|
-
- `USE_CATALOG` on the catalog (e.g., `main`)
|
|
114
|
-
- `USE_SCHEMA` on the schema (e.g., `default`)
|
|
115
|
-
- `MODIFY` and `SELECT` on the table (e.g., `air_quality`)
|
|
116
|
-
|
|
117
|
-
Grant permissions using SQL:
|
|
118
|
-
|
|
119
|
-
```sql
|
|
120
|
-
-- Grant catalog permission
|
|
121
|
-
GRANT USE CATALOG ON CATALOG <catalog_name> TO `<service-principal-application-id>`;
|
|
122
|
-
|
|
123
|
-
-- Grant schema permission
|
|
124
|
-
GRANT USE SCHEMA ON SCHEMA <catalog_name>.default TO `<service-principal-application-id>`;
|
|
125
|
-
|
|
126
|
-
-- Grant table permissions
|
|
127
|
-
GRANT SELECT, MODIFY ON TABLE <catalog_name>.default.air_quality TO `<service-principal-application-id>`;
|
|
128
|
-
```
|
|
65
|
+
Before using the SDK, you need a Databricks workspace URL, a Delta table, and a service principal. See the [monorepo prerequisites](https://github.com/databricks/zerobus-sdk/blob/main/README.md#prerequisites) for detailed setup instructions.
|
|
129
66
|
|
|
130
67
|
### Installation
|
|
131
68
|
|
|
@@ -202,10 +139,10 @@ source $HOME/.cargo/env
|
|
|
202
139
|
|
|
203
140
|
**Note for macOS users**: Pre-built binaries are not available. The package will automatically build from source during `npm install`. Ensure you have Rust toolchain and Xcode Command Line Tools installed (see prerequisites above).
|
|
204
141
|
|
|
205
|
-
1.
|
|
142
|
+
1. Clone the repository:
|
|
206
143
|
```bash
|
|
207
|
-
|
|
208
|
-
cd zerobus-sdk
|
|
144
|
+
git clone https://github.com/databricks/zerobus-sdk.git
|
|
145
|
+
cd zerobus-sdk/ts
|
|
209
146
|
```
|
|
210
147
|
|
|
211
148
|
2. Install dependencies:
|
|
@@ -256,7 +193,7 @@ import { ZerobusSdk, RecordType } from '@databricks/zerobus-ingest-sdk';
|
|
|
256
193
|
|
|
257
194
|
// Configuration
|
|
258
195
|
// For AWS:
|
|
259
|
-
const zerobusEndpoint = '
|
|
196
|
+
const zerobusEndpoint = 'https://<workspace-id>.zerobus.<region>.cloud.databricks.com';
|
|
260
197
|
const workspaceUrl = 'https://<workspace-name>.cloud.databricks.com';
|
|
261
198
|
// For Azure:
|
|
262
199
|
// const zerobusEndpoint = '<workspace-id>.zerobus.<region>.azuredatabricks.net';
|
|
@@ -288,35 +225,24 @@ const stream = await sdk.createStream(
|
|
|
288
225
|
);
|
|
289
226
|
|
|
290
227
|
try {
|
|
291
|
-
let
|
|
228
|
+
let lastOffset: bigint;
|
|
292
229
|
|
|
293
230
|
// Send all records
|
|
294
231
|
for (let i = 0; i < 100; i++) {
|
|
295
|
-
// Create JSON record
|
|
296
232
|
const record = {
|
|
297
233
|
device_name: `sensor-${i % 10}`,
|
|
298
234
|
temp: 20 + (i % 15),
|
|
299
235
|
humidity: 50 + (i % 40)
|
|
300
236
|
};
|
|
301
237
|
|
|
302
|
-
//
|
|
303
|
-
|
|
304
|
-
lastAckPromise = stream.ingestRecord(record);
|
|
305
|
-
// 2. string (low-level) - pre-serialized JSON
|
|
306
|
-
// lastAckPromise = stream.ingestRecord(JSON.stringify(record));
|
|
238
|
+
// ingestRecordOffset returns immediately after queuing
|
|
239
|
+
lastOffset = await stream.ingestRecordOffset(record);
|
|
307
240
|
}
|
|
308
241
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
// Wait for the last record's acknowledgment
|
|
312
|
-
const lastOffset = await lastAckPromise;
|
|
313
|
-
console.log(`Last record offset: ${lastOffset}`);
|
|
314
|
-
|
|
315
|
-
// Flush to ensure all records are acknowledged
|
|
316
|
-
await stream.flush();
|
|
242
|
+
// Wait for all records to be acknowledged
|
|
243
|
+
await stream.waitForOffset(lastOffset);
|
|
317
244
|
console.log('Successfully ingested 100 records!');
|
|
318
245
|
} finally {
|
|
319
|
-
// Always close the stream
|
|
320
246
|
await stream.close();
|
|
321
247
|
}
|
|
322
248
|
```
|
|
@@ -430,7 +356,7 @@ import * as airQuality from './examples/generated/air_quality';
|
|
|
430
356
|
import { loadDescriptorProto } from '@databricks/zerobus-ingest-sdk/utils/descriptor';
|
|
431
357
|
|
|
432
358
|
// Configuration
|
|
433
|
-
const zerobusEndpoint = '
|
|
359
|
+
const zerobusEndpoint = 'https://<workspace-id>.zerobus.<region>.cloud.databricks.com';
|
|
434
360
|
const workspaceUrl = 'https://<workspace-name>.cloud.databricks.com';
|
|
435
361
|
const tableName = 'main.default.air_quality';
|
|
436
362
|
const clientId = process.env.DATABRICKS_CLIENT_ID!;
|
|
@@ -464,7 +390,7 @@ const stream = await sdk.createStream(tableProperties, clientId, clientSecret, o
|
|
|
464
390
|
|
|
465
391
|
try {
|
|
466
392
|
const AirQuality = airQuality.examples.AirQuality;
|
|
467
|
-
let
|
|
393
|
+
let lastOffset: bigint;
|
|
468
394
|
|
|
469
395
|
// Send all records
|
|
470
396
|
for (let i = 0; i < 100; i++) {
|
|
@@ -474,22 +400,12 @@ try {
|
|
|
474
400
|
humidity: 50 + i
|
|
475
401
|
});
|
|
476
402
|
|
|
477
|
-
//
|
|
478
|
-
|
|
479
|
-
lastAckPromise = stream.ingestRecord(record);
|
|
480
|
-
// 2. Buffer (low-level) - pre-serialized bytes
|
|
481
|
-
// const buffer = Buffer.from(AirQuality.encode(record).finish());
|
|
482
|
-
// lastAckPromise = stream.ingestRecord(buffer);
|
|
403
|
+
// ingestRecordOffset returns immediately after queuing
|
|
404
|
+
lastOffset = await stream.ingestRecordOffset(record);
|
|
483
405
|
}
|
|
484
406
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
// Wait for the last record's acknowledgment
|
|
488
|
-
const lastOffset = await lastAckPromise;
|
|
489
|
-
console.log(`Last record offset: ${lastOffset}`);
|
|
490
|
-
|
|
491
|
-
// Flush to ensure all records are acknowledged
|
|
492
|
-
await stream.flush();
|
|
407
|
+
// Wait for all records to be acknowledged
|
|
408
|
+
await stream.waitForOffset(lastOffset);
|
|
493
409
|
console.log('Successfully ingested 100 records!');
|
|
494
410
|
} finally {
|
|
495
411
|
await stream.close();
|
|
@@ -610,7 +526,7 @@ npm run build:proto
|
|
|
610
526
|
protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto
|
|
611
527
|
|
|
612
528
|
# Run example
|
|
613
|
-
|
|
529
|
+
npm run example:proto:single
|
|
614
530
|
```
|
|
615
531
|
|
|
616
532
|
#### Why Two Steps (TypeScript + Descriptor)?
|
|
@@ -635,21 +551,23 @@ See the `examples/` directory for complete, runnable examples. See [examples/REA
|
|
|
635
551
|
|
|
636
552
|
```bash
|
|
637
553
|
# Set environment variables
|
|
638
|
-
export ZEROBUS_SERVER_ENDPOINT="
|
|
554
|
+
export ZEROBUS_SERVER_ENDPOINT="https://<workspace-id>.zerobus.<region>.cloud.databricks.com"
|
|
639
555
|
export DATABRICKS_WORKSPACE_URL="https://<workspace-name>.cloud.databricks.com"
|
|
640
556
|
export DATABRICKS_CLIENT_ID="your-client-id"
|
|
641
557
|
export DATABRICKS_CLIENT_SECRET="your-client-secret"
|
|
642
558
|
export ZEROBUS_TABLE_NAME="main.default.air_quality"
|
|
643
559
|
|
|
644
|
-
# Run JSON
|
|
645
|
-
|
|
560
|
+
# Run JSON examples
|
|
561
|
+
npm run example:json:single
|
|
562
|
+
npm run example:json:batch
|
|
646
563
|
|
|
647
564
|
# For Protocol Buffers, generate TypeScript code and descriptor
|
|
648
565
|
npm run build:proto
|
|
649
566
|
protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto
|
|
650
567
|
|
|
651
|
-
# Run Protocol Buffers
|
|
652
|
-
|
|
568
|
+
# Run Protocol Buffers examples
|
|
569
|
+
npm run example:proto:single
|
|
570
|
+
npm run example:proto:batch
|
|
653
571
|
```
|
|
654
572
|
|
|
655
573
|
### Batch Ingestion
|
|
@@ -664,13 +582,14 @@ const records = Array.from({ length: 1000 }, (_, i) =>
|
|
|
664
582
|
);
|
|
665
583
|
|
|
666
584
|
// Protobuf Type 1: Message objects (high-level) - SDK auto-serializes
|
|
667
|
-
const offsetId = await stream.
|
|
585
|
+
const offsetId = await stream.ingestRecordsOffset(records);
|
|
668
586
|
|
|
669
587
|
// Protobuf Type 2: Buffers (low-level) - pre-serialized bytes
|
|
670
588
|
// const buffers = records.map(r => Buffer.from(AirQuality.encode(r).finish()));
|
|
671
|
-
// const offsetId = await stream.
|
|
589
|
+
// const offsetId = await stream.ingestRecordsOffset(buffers);
|
|
672
590
|
|
|
673
591
|
if (offsetId !== null) {
|
|
592
|
+
await stream.waitForOffset(offsetId);
|
|
674
593
|
console.log(`Batch acknowledged at offset ${offsetId}`);
|
|
675
594
|
}
|
|
676
595
|
```
|
|
@@ -685,11 +604,15 @@ const records = Array.from({ length: 1000 }, (_, i) => ({
|
|
|
685
604
|
}));
|
|
686
605
|
|
|
687
606
|
// JSON Type 1: objects (high-level) - SDK auto-stringifies
|
|
688
|
-
const offsetId = await stream.
|
|
607
|
+
const offsetId = await stream.ingestRecordsOffset(records);
|
|
689
608
|
|
|
690
609
|
// JSON Type 2: strings (low-level) - pre-serialized JSON
|
|
691
610
|
// const jsonRecords = records.map(r => JSON.stringify(r));
|
|
692
|
-
// const offsetId = await stream.
|
|
611
|
+
// const offsetId = await stream.ingestRecordsOffset(jsonRecords);
|
|
612
|
+
|
|
613
|
+
if (offsetId !== null) {
|
|
614
|
+
await stream.waitForOffset(offsetId);
|
|
615
|
+
}
|
|
693
616
|
```
|
|
694
617
|
|
|
695
618
|
**Type Widening Support:**
|
|
@@ -703,7 +626,7 @@ const offsetId = await stream.ingestRecords(records);
|
|
|
703
626
|
- Use `recreateStream()` for recovery - it automatically handles unacknowledged batches
|
|
704
627
|
|
|
705
628
|
**Examples:**
|
|
706
|
-
|
|
629
|
+
See `examples/json/batch.ts` and `examples/proto/batch.ts` for batch ingestion examples.
|
|
707
630
|
|
|
708
631
|
## Authentication
|
|
709
632
|
|
|
@@ -732,29 +655,25 @@ The SDK automatically fetches access tokens and includes these headers:
|
|
|
732
655
|
Beyond OAuth, you can use custom headers for Personal Access Tokens (PAT) or other auth methods:
|
|
733
656
|
|
|
734
657
|
```typescript
|
|
735
|
-
import { ZerobusSdk } from '@databricks/zerobus-ingest-sdk';
|
|
736
|
-
import { HeadersProvider } from '@databricks/zerobus-ingest-sdk/src/headers_provider';
|
|
737
|
-
|
|
738
|
-
class CustomHeadersProvider implements HeadersProvider {
|
|
739
|
-
async getHeaders(): Promise<Array<[string, string]>> {
|
|
740
|
-
return [
|
|
741
|
-
["authorization", `Bearer ${myToken}`],
|
|
742
|
-
["x-databricks-zerobus-table-name", tableName]
|
|
743
|
-
];
|
|
744
|
-
}
|
|
745
|
-
}
|
|
746
|
-
|
|
747
|
-
const headersProvider = new CustomHeadersProvider();
|
|
748
658
|
const stream = await sdk.createStream(
|
|
749
659
|
tableProperties,
|
|
750
660
|
'', // client_id (ignored when headers_provider is provided)
|
|
751
661
|
'', // client_secret (ignored when headers_provider is provided)
|
|
752
662
|
options,
|
|
753
|
-
{
|
|
663
|
+
{
|
|
664
|
+
getHeadersCallback: async () => [
|
|
665
|
+
["authorization", `Bearer ${myToken}`],
|
|
666
|
+
["x-databricks-zerobus-table-name", tableName]
|
|
667
|
+
]
|
|
668
|
+
}
|
|
754
669
|
);
|
|
755
670
|
```
|
|
756
671
|
|
|
757
|
-
**
|
|
672
|
+
**Required headers:**
|
|
673
|
+
- `authorization` - Bearer token or other auth header
|
|
674
|
+
- `x-databricks-zerobus-table-name` - The fully qualified table name
|
|
675
|
+
|
|
676
|
+
**Note:** The SDK automatically adds the `user-agent` header if not provided.
|
|
758
677
|
|
|
759
678
|
## Configuration
|
|
760
679
|
|
|
@@ -770,6 +689,7 @@ const stream = await sdk.createStream(
|
|
|
770
689
|
| `recoveryRetries` | 4 | Maximum number of recovery attempts |
|
|
771
690
|
| `flushTimeoutMs` | 300,000 | Timeout for flush operations (ms) |
|
|
772
691
|
| `serverLackOfAckTimeoutMs` | 60,000 | Server acknowledgment timeout (ms) |
|
|
692
|
+
| `streamPausedMaxWaitTimeMs` | undefined | Max wait time during graceful stream close (ms) |
|
|
773
693
|
|
|
774
694
|
### Example Configuration
|
|
775
695
|
|
|
@@ -839,7 +759,8 @@ The SDK includes automatic recovery for transient failures (enabled by default w
|
|
|
839
759
|
|
|
840
760
|
```typescript
|
|
841
761
|
try {
|
|
842
|
-
const offset = await stream.
|
|
762
|
+
const offset = await stream.ingestRecordOffset(record);
|
|
763
|
+
await stream.waitForOffset(offset);
|
|
843
764
|
console.log(`Success: offset ${offset}`);
|
|
844
765
|
} catch (error) {
|
|
845
766
|
console.error('Ingestion failed:', error);
|
|
@@ -889,7 +810,7 @@ new ZerobusSdk(zerobusEndpoint: string, unityCatalogUrl: string)
|
|
|
889
810
|
```
|
|
890
811
|
|
|
891
812
|
**Parameters:**
|
|
892
|
-
- `zerobusEndpoint` (string) - The Zerobus gRPC endpoint (e.g.,
|
|
813
|
+
- `zerobusEndpoint` (string) - The Zerobus gRPC endpoint (e.g., `https://<workspace-id>.zerobus.<region>.cloud.databricks.com` for AWS, or `https://<workspace-id>.zerobus.<region>.azuredatabricks.net` for Azure)
|
|
893
814
|
- `unityCatalogUrl` (string) - The Unity Catalog endpoint (your workspace URL)
|
|
894
815
|
|
|
895
816
|
**Methods:**
|
|
@@ -952,11 +873,44 @@ Represents an active ingestion stream.
|
|
|
952
873
|
|
|
953
874
|
**Methods:**
|
|
954
875
|
|
|
876
|
+
```typescript
|
|
877
|
+
async ingestRecordOffset(payload: Buffer | string | object): Promise<bigint>
|
|
878
|
+
```
|
|
879
|
+
|
|
880
|
+
**(Recommended)** Ingests a single record. The Promise resolves immediately after the record is queued (before server acknowledgment). Use `waitForOffset()` to wait for acknowledgment when needed.
|
|
881
|
+
|
|
882
|
+
```typescript
|
|
883
|
+
// High-throughput pattern: send many, wait once
|
|
884
|
+
const offset1 = await stream.ingestRecordOffset(record1); // Resolves immediately
|
|
885
|
+
const offset2 = await stream.ingestRecordOffset(record2); // Resolves immediately
|
|
886
|
+
await stream.waitForOffset(offset2); // Waits for server to acknowledge all records up to offset2
|
|
887
|
+
```
|
|
888
|
+
|
|
889
|
+
---
|
|
890
|
+
|
|
891
|
+
```typescript
|
|
892
|
+
async ingestRecordsOffset(payloads: Array<Buffer | string | object>): Promise<bigint | null>
|
|
893
|
+
```
|
|
894
|
+
|
|
895
|
+
**(Recommended)** Ingests multiple records as a batch. The Promise resolves immediately after the batch is queued (before server acknowledgment). Returns `null` for empty batches.
|
|
896
|
+
|
|
897
|
+
---
|
|
898
|
+
|
|
899
|
+
```typescript
|
|
900
|
+
async waitForOffset(offsetId: bigint): Promise<void>
|
|
901
|
+
```
|
|
902
|
+
|
|
903
|
+
Waits for the server to acknowledge all records up to and including the specified offset ID.
|
|
904
|
+
|
|
905
|
+
---
|
|
906
|
+
|
|
955
907
|
```typescript
|
|
956
908
|
async ingestRecord(payload: Buffer | string | object): Promise<bigint>
|
|
957
909
|
```
|
|
958
910
|
|
|
959
|
-
|
|
911
|
+
**@deprecated** Use `ingestRecordOffset()` instead.
|
|
912
|
+
|
|
913
|
+
Ingests a single record. Unlike `ingestRecordOffset()`, the Promise only resolves **after the server acknowledges** the record. This is slower for high-throughput scenarios.
|
|
960
914
|
|
|
961
915
|
**Parameters:**
|
|
962
916
|
- `payload` - Record data. The SDK supports 4 input types for flexibility:
|
|
@@ -994,7 +948,9 @@ await stream.ingestRecord(buffer);
|
|
|
994
948
|
async ingestRecords(payloads: Array<Buffer | string | object>): Promise<bigint | null>
|
|
995
949
|
```
|
|
996
950
|
|
|
997
|
-
|
|
951
|
+
**@deprecated** Use `ingestRecordsOffset()` instead.
|
|
952
|
+
|
|
953
|
+
Ingests multiple records as a batch. Unlike `ingestRecordsOffset()`, the Promise only resolves **after the server acknowledges** the batch. This is slower for high-throughput scenarios.
|
|
998
954
|
|
|
999
955
|
**Parameters:**
|
|
1000
956
|
- `payloads` - Array of record data. Supports the same 4 types as `ingestRecord()`:
|
|
@@ -1135,14 +1091,15 @@ Configuration options for stream behavior.
|
|
|
1135
1091
|
|
|
1136
1092
|
```typescript
|
|
1137
1093
|
interface StreamConfigurationOptions {
|
|
1138
|
-
recordType?: RecordType;
|
|
1139
|
-
maxInflightRequests?: number;
|
|
1140
|
-
recovery?: boolean;
|
|
1141
|
-
recoveryTimeoutMs?: number;
|
|
1142
|
-
recoveryBackoffMs?: number;
|
|
1143
|
-
recoveryRetries?: number;
|
|
1144
|
-
flushTimeoutMs?: number;
|
|
1145
|
-
serverLackOfAckTimeoutMs?: number;
|
|
1094
|
+
recordType?: RecordType; // RecordType.Json or RecordType.Proto. Default: RecordType.Proto
|
|
1095
|
+
maxInflightRequests?: number; // Default: 10,000
|
|
1096
|
+
recovery?: boolean; // Default: true
|
|
1097
|
+
recoveryTimeoutMs?: number; // Default: 15,000
|
|
1098
|
+
recoveryBackoffMs?: number; // Default: 2,000
|
|
1099
|
+
recoveryRetries?: number; // Default: 4
|
|
1100
|
+
flushTimeoutMs?: number; // Default: 300,000
|
|
1101
|
+
serverLackOfAckTimeoutMs?: number; // Default: 60,000
|
|
1102
|
+
streamPausedMaxWaitTimeMs?: number; // Default: undefined (wait for full server duration)
|
|
1146
1103
|
}
|
|
1147
1104
|
|
|
1148
1105
|
enum RecordType {
|
|
@@ -1159,7 +1116,7 @@ enum RecordType {
|
|
|
1159
1116
|
4. **Error handling**: The stream handles errors internally with automatic retry. Only use `recreateStream()` for persistent failures after internal retries are exhausted.
|
|
1160
1117
|
5. **Use Protocol Buffers for production**: Protocol Buffers (the default) provides better performance and schema validation. Use JSON only when you need schema flexibility or for quick prototyping.
|
|
1161
1118
|
6. **Store credentials securely**: Use environment variables, never hardcode credentials
|
|
1162
|
-
7. **Use batch ingestion**: For high-throughput scenarios, use `
|
|
1119
|
+
7. **Use batch ingestion**: For high-throughput scenarios, use `ingestRecordsOffset()` instead of individual `ingestRecordOffset()` calls
|
|
1163
1120
|
|
|
1164
1121
|
## Platform Support
|
|
1165
1122
|
|
|
@@ -1183,7 +1140,7 @@ The build process happens automatically during installation and typically takes
|
|
|
1183
1140
|
|
|
1184
1141
|
## Architecture
|
|
1185
1142
|
|
|
1186
|
-
This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databricks/zerobus-sdk
|
|
1143
|
+
This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databricks/zerobus-sdk/tree/main/rust) using [NAPI-RS](https://napi.rs):
|
|
1187
1144
|
|
|
1188
1145
|
```
|
|
1189
1146
|
┌─────────────────────────────┐
|
|
@@ -1203,18 +1160,21 @@ This SDK wraps the high-performance [Rust Zerobus SDK](https://github.com/databr
|
|
|
1203
1160
|
```
|
|
1204
1161
|
|
|
1205
1162
|
**Benefits:**
|
|
1206
|
-
- **
|
|
1163
|
+
- **Native performance** - Rust implementation for high-throughput ingestion
|
|
1207
1164
|
- **Native async/await support** - Rust futures become JavaScript Promises
|
|
1208
1165
|
- **Automatic memory management** - No manual cleanup required
|
|
1209
1166
|
- **Type safety** - Compile-time checks on both sides
|
|
1210
1167
|
|
|
1211
|
-
## Contributing
|
|
1168
|
+
## Community and Contributing
|
|
1169
|
+
|
|
1170
|
+
This is an open source project. We welcome contributions, feedback, and bug reports.
|
|
1212
1171
|
|
|
1213
|
-
|
|
1172
|
+
- **[Contributing Guide](https://github.com/databricks/zerobus-sdk/blob/main/typescript/CONTRIBUTING.md)**: TypeScript-specific development setup and workflow.
|
|
1173
|
+
- **[General Contributing Guide](https://github.com/databricks/zerobus-sdk/blob/main/CONTRIBUTING.md)**: Pull request process, commit requirements, and policies.
|
|
1174
|
+
- **[Changelog](https://github.com/databricks/zerobus-sdk/blob/main/typescript/CHANGELOG.md)**: See the history of changes in the SDK.
|
|
1175
|
+
- **[Security Policy](https://github.com/databricks/zerobus-sdk/blob/main/SECURITY.md)**: Read about our security process and how to report vulnerabilities.
|
|
1176
|
+
- **[Developer Certificate of Origin (DCO)](https://github.com/databricks/zerobus-sdk/blob/main/DCO)**: Understand the agreement for contributions.
|
|
1214
1177
|
|
|
1215
|
-
##
|
|
1178
|
+
## License
|
|
1216
1179
|
|
|
1217
|
-
|
|
1218
|
-
- [Zerobus Python SDK](https://github.com/databricks/zerobus-sdk-py) - Python SDK for Zerobus
|
|
1219
|
-
- [Zerobus Java SDK](https://github.com/databricks/zerobus-sdk-java) - Java SDK for Zerobus
|
|
1220
|
-
- [NAPI-RS](https://napi.rs) - Rust/Node.js binding framework
|
|
1180
|
+
This SDK is licensed under the Databricks License. See the [LICENSE](https://github.com/databricks/zerobus-sdk/blob/main/LICENSE) file for the full license text. The license is also available online at [https://www.databricks.com/legal/db-license](https://www.databricks.com/legal/db-license).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@databricks/zerobus-ingest-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "TypeScript/Node.js SDK for streaming data ingestion into Databricks Delta tables using Zerobus",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
],
|
|
16
16
|
"repository": {
|
|
17
17
|
"type": "git",
|
|
18
|
-
"url": "https://github.com/databricks/zerobus-sdk
|
|
18
|
+
"url": "https://github.com/databricks/zerobus-sdk"
|
|
19
19
|
},
|
|
20
20
|
"license": "Databricks License",
|
|
21
21
|
"files": [
|
|
@@ -34,12 +34,13 @@
|
|
|
34
34
|
"napi": {
|
|
35
35
|
"name": "zerobus-ingest-sdk",
|
|
36
36
|
"triples": {
|
|
37
|
-
"defaults":
|
|
37
|
+
"defaults": false,
|
|
38
38
|
"additional": [
|
|
39
|
-
"x86_64-unknown-linux-
|
|
39
|
+
"x86_64-unknown-linux-gnu",
|
|
40
40
|
"aarch64-unknown-linux-gnu",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
41
|
+
"x86_64-pc-windows-msvc",
|
|
42
|
+
"x86_64-apple-darwin",
|
|
43
|
+
"aarch64-apple-darwin"
|
|
43
44
|
]
|
|
44
45
|
}
|
|
45
46
|
},
|
|
@@ -49,7 +50,9 @@
|
|
|
49
50
|
"scripts": {
|
|
50
51
|
"artifacts": "napi artifacts",
|
|
51
52
|
"build": "napi build --platform --release",
|
|
53
|
+
"build:arrow": "napi build --platform --release --features arrow-flight",
|
|
52
54
|
"build:debug": "napi build --platform",
|
|
55
|
+
"build:debug:arrow": "napi build --platform --features arrow-flight",
|
|
53
56
|
"build:proto": "mkdir -p examples/generated && pbjs -t static-module -w commonjs -o examples/generated/air_quality.js schemas/air_quality.proto && pbts -o examples/generated/air_quality.d.ts examples/generated/air_quality.js && protoc --descriptor_set_out=schemas/air_quality_descriptor.pb --include_imports schemas/air_quality.proto",
|
|
54
57
|
"prepublishOnly": "napi prepublish -t npm",
|
|
55
58
|
"test": "tsx --test test/unit.test.ts test/integration.test.ts",
|
|
@@ -57,21 +60,29 @@
|
|
|
57
60
|
"test:integration": "tsx --test test/integration.test.ts",
|
|
58
61
|
"universal": "napi universal",
|
|
59
62
|
"version": "napi version",
|
|
60
|
-
"example:json": "tsx examples/json.ts",
|
|
61
|
-
"example:
|
|
62
|
-
"example:
|
|
63
|
+
"example:json:single": "tsx examples/json/single.ts",
|
|
64
|
+
"example:json:batch": "tsx examples/json/batch.ts",
|
|
65
|
+
"example:proto:single": "tsx examples/proto/single.ts",
|
|
66
|
+
"example:proto:batch": "tsx examples/proto/batch.ts",
|
|
67
|
+
"example:arrow:single": "tsx examples/arrow/single.ts",
|
|
68
|
+
"example:arrow:batch": "tsx examples/arrow/batch.ts"
|
|
63
69
|
},
|
|
64
70
|
"peerDependencies": {
|
|
65
|
-
"protobufjs": "^7.0.0"
|
|
71
|
+
"protobufjs": "^7.0.0",
|
|
72
|
+
"apache-arrow": "^56.0.0"
|
|
66
73
|
},
|
|
67
74
|
"peerDependenciesMeta": {
|
|
68
75
|
"protobufjs": {
|
|
69
76
|
"optional": true
|
|
77
|
+
},
|
|
78
|
+
"apache-arrow": {
|
|
79
|
+
"optional": true
|
|
70
80
|
}
|
|
71
81
|
},
|
|
72
82
|
"devDependencies": {
|
|
73
83
|
"@napi-rs/cli": "^2.18.0",
|
|
74
84
|
"@types/node": "^20.0.0",
|
|
85
|
+
"apache-arrow": "^18.1.0",
|
|
75
86
|
"dotenv": "^17.2.3",
|
|
76
87
|
"protobufjs": "^7.5.4",
|
|
77
88
|
"protobufjs-cli": "^2.0.0",
|
|
@@ -82,12 +93,10 @@
|
|
|
82
93
|
"glob": "^10.0.0"
|
|
83
94
|
},
|
|
84
95
|
"optionalDependencies": {
|
|
85
|
-
"@databricks/zerobus-ingest-sdk-
|
|
86
|
-
"@databricks/zerobus-ingest-sdk-
|
|
87
|
-
"@databricks/zerobus-ingest-sdk-
|
|
88
|
-
"@databricks/zerobus-ingest-sdk-
|
|
89
|
-
"@databricks/zerobus-ingest-sdk-
|
|
90
|
-
"@databricks/zerobus-ingest-sdk-darwin-arm64": "0.1.1",
|
|
91
|
-
"@databricks/zerobus-ingest-sdk-linux-arm64-musl": "0.1.1"
|
|
96
|
+
"@databricks/zerobus-ingest-sdk-linux-x64-gnu": "0.3.0",
|
|
97
|
+
"@databricks/zerobus-ingest-sdk-linux-arm64-gnu": "0.3.0",
|
|
98
|
+
"@databricks/zerobus-ingest-sdk-win32-x64-msvc": "0.3.0",
|
|
99
|
+
"@databricks/zerobus-ingest-sdk-darwin-x64": "0.3.0",
|
|
100
|
+
"@databricks/zerobus-ingest-sdk-darwin-arm64": "0.3.0"
|
|
92
101
|
}
|
|
93
102
|
}
|