@databricks/zerobus-ingest-sdk 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +2233 -0
- package/Cargo.toml +46 -0
- package/LICENSE +69 -0
- package/README.md +1220 -0
- package/build.rs +5 -0
- package/index.d.ts +387 -0
- package/index.js +318 -0
- package/package.json +88 -6
- package/schemas/air_quality.proto +10 -0
- package/schemas/air_quality_descriptor.pb +9 -0
- package/src/headers_provider.ts +82 -0
- package/src/lib.rs +815 -0
- package/utils/descriptor.ts +103 -0
- package/zerobus-sdk-ts.linux-x64-gnu.node +0 -0
package/src/lib.rs
ADDED
|
@@ -0,0 +1,815 @@
|
|
|
1
|
+
// Zerobus TypeScript SDK - NAPI-RS Bindings
|
|
2
|
+
//
|
|
3
|
+
// This file provides the Node.js/TypeScript bindings for the Rust Zerobus SDK
|
|
4
|
+
// using NAPI-RS. It exposes a TypeScript-friendly API while leveraging the
|
|
5
|
+
// high-performance Rust implementation underneath.
|
|
6
|
+
//
|
|
7
|
+
// The binding layer handles:
|
|
8
|
+
// - Type conversions between JavaScript and Rust types
|
|
9
|
+
// - Async/await bridging (Rust futures → JavaScript Promises)
|
|
10
|
+
// - Memory management and thread safety
|
|
11
|
+
// - Error propagation
|
|
12
|
+
|
|
13
|
+
#![deny(clippy::all)]
|
|
14
|
+
|
|
15
|
+
use napi::bindgen_prelude::*;
|
|
16
|
+
use napi::threadsafe_function::{ThreadsafeFunction, ErrorStrategy};
|
|
17
|
+
use napi::{Env, JsObject, JsFunction, JsUnknown, JsString, JsGlobal, ValueType};
|
|
18
|
+
use napi_derive::napi;
|
|
19
|
+
|
|
20
|
+
use databricks_zerobus_ingest_sdk::{
|
|
21
|
+
EncodedRecord as RustRecordPayload,
|
|
22
|
+
StreamConfigurationOptions as RustStreamOptions,
|
|
23
|
+
TableProperties as RustTableProperties, ZerobusSdk as RustZerobusSdk,
|
|
24
|
+
ZerobusStream as RustZerobusStream,
|
|
25
|
+
HeadersProvider as RustHeadersProvider,
|
|
26
|
+
ZerobusError as RustZerobusError,
|
|
27
|
+
ZerobusResult as RustZerobusResult,
|
|
28
|
+
};
|
|
29
|
+
use databricks_zerobus_ingest_sdk::databricks::zerobus::RecordType as RustRecordType;
|
|
30
|
+
use async_trait::async_trait;
|
|
31
|
+
use prost_types;
|
|
32
|
+
use std::collections::HashMap;
|
|
33
|
+
use std::sync::Arc;
|
|
34
|
+
use tokio::sync::Mutex;
|
|
35
|
+
|
|
36
|
+
/// Record serialization format.
|
|
37
|
+
///
|
|
38
|
+
/// Specifies how records should be encoded when ingested into the stream.
|
|
39
|
+
#[napi]
|
|
40
|
+
pub enum RecordType {
|
|
41
|
+
/// JSON encoding - records are JSON-encoded strings
|
|
42
|
+
Json = 0,
|
|
43
|
+
/// Protocol Buffers encoding - records are binary protobuf messages
|
|
44
|
+
Proto = 1,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/// Configuration options for the Zerobus stream.
|
|
48
|
+
///
|
|
49
|
+
/// These options control stream behavior including recovery, timeouts, and inflight limits.
|
|
50
|
+
#[napi(object)]
|
|
51
|
+
#[derive(Debug, Clone)]
|
|
52
|
+
pub struct StreamConfigurationOptions {
|
|
53
|
+
/// Maximum number of unacknowledged requests that can be in flight.
|
|
54
|
+
/// Default: 10,000
|
|
55
|
+
pub max_inflight_requests: Option<u32>,
|
|
56
|
+
|
|
57
|
+
/// Enable automatic stream recovery on transient failures.
|
|
58
|
+
/// Default: true
|
|
59
|
+
pub recovery: Option<bool>,
|
|
60
|
+
|
|
61
|
+
/// Timeout for recovery operations in milliseconds.
|
|
62
|
+
/// Default: 15,000 (15 seconds)
|
|
63
|
+
pub recovery_timeout_ms: Option<u32>,
|
|
64
|
+
|
|
65
|
+
/// Delay between recovery retry attempts in milliseconds.
|
|
66
|
+
/// Default: 2,000 (2 seconds)
|
|
67
|
+
pub recovery_backoff_ms: Option<u32>,
|
|
68
|
+
|
|
69
|
+
/// Maximum number of recovery attempts before giving up.
|
|
70
|
+
/// Default: 4
|
|
71
|
+
pub recovery_retries: Option<u32>,
|
|
72
|
+
|
|
73
|
+
/// Timeout for flush operations in milliseconds.
|
|
74
|
+
/// Default: 300,000 (5 minutes)
|
|
75
|
+
pub flush_timeout_ms: Option<u32>,
|
|
76
|
+
|
|
77
|
+
/// Timeout waiting for server acknowledgments in milliseconds.
|
|
78
|
+
/// Default: 60,000 (1 minute)
|
|
79
|
+
pub server_lack_of_ack_timeout_ms: Option<u32>,
|
|
80
|
+
|
|
81
|
+
/// Record serialization format.
|
|
82
|
+
/// Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
|
|
83
|
+
/// Default: RecordType.Proto (Protocol Buffers)
|
|
84
|
+
pub record_type: Option<i32>,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
impl From<StreamConfigurationOptions> for RustStreamOptions {
|
|
88
|
+
fn from(opts: StreamConfigurationOptions) -> Self {
|
|
89
|
+
let default = RustStreamOptions::default();
|
|
90
|
+
|
|
91
|
+
let record_type = match opts.record_type {
|
|
92
|
+
Some(0) => RustRecordType::Json,
|
|
93
|
+
Some(1) => RustRecordType::Proto,
|
|
94
|
+
_ => RustRecordType::Proto,
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
RustStreamOptions {
|
|
98
|
+
max_inflight_requests: opts.max_inflight_requests.unwrap_or(default.max_inflight_requests as u32) as usize,
|
|
99
|
+
recovery: opts.recovery.unwrap_or(default.recovery),
|
|
100
|
+
recovery_timeout_ms: opts.recovery_timeout_ms.map(|v| v as u64).unwrap_or(default.recovery_timeout_ms),
|
|
101
|
+
recovery_backoff_ms: opts.recovery_backoff_ms.map(|v| v as u64).unwrap_or(default.recovery_backoff_ms),
|
|
102
|
+
recovery_retries: opts.recovery_retries.unwrap_or(default.recovery_retries),
|
|
103
|
+
flush_timeout_ms: opts.flush_timeout_ms.map(|v| v as u64).unwrap_or(default.flush_timeout_ms),
|
|
104
|
+
server_lack_of_ack_timeout_ms: opts.server_lack_of_ack_timeout_ms.map(|v| v as u64).unwrap_or(default.server_lack_of_ack_timeout_ms),
|
|
105
|
+
record_type,
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/// Properties of the target Delta table for ingestion.
|
|
111
|
+
///
|
|
112
|
+
/// Specifies which Unity Catalog table to write to and optionally the schema descriptor
|
|
113
|
+
/// for Protocol Buffers encoding.
|
|
114
|
+
#[napi(object)]
|
|
115
|
+
#[derive(Debug, Clone)]
|
|
116
|
+
pub struct TableProperties {
|
|
117
|
+
/// Full table name in Unity Catalog (e.g., "catalog.schema.table")
|
|
118
|
+
pub table_name: String,
|
|
119
|
+
|
|
120
|
+
/// Optional Protocol Buffer descriptor as a base64-encoded string.
|
|
121
|
+
/// If not provided, JSON encoding will be used.
|
|
122
|
+
pub descriptor_proto: Option<String>,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
impl TableProperties {
|
|
126
|
+
fn to_rust(&self) -> Result<RustTableProperties> {
|
|
127
|
+
let descriptor: Option<prost_types::DescriptorProto> = if let Some(ref desc_str) = self.descriptor_proto {
|
|
128
|
+
let bytes = base64_decode(desc_str)
|
|
129
|
+
.map_err(|e| Error::from_reason(format!("Failed to decode descriptor: {}", e)))?;
|
|
130
|
+
|
|
131
|
+
let descriptor_proto: prost_types::DescriptorProto = prost::Message::decode(&bytes[..])
|
|
132
|
+
.map_err(|e| Error::from_reason(format!("Failed to parse descriptor proto: {}", e)))?;
|
|
133
|
+
|
|
134
|
+
Some(descriptor_proto)
|
|
135
|
+
} else {
|
|
136
|
+
None
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
Ok(RustTableProperties {
|
|
140
|
+
table_name: self.table_name.clone(),
|
|
141
|
+
descriptor_proto: descriptor,
|
|
142
|
+
})
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/// Custom error type for Zerobus operations.
|
|
147
|
+
///
|
|
148
|
+
/// This error type includes information about whether the error is retryable,
|
|
149
|
+
/// which helps determine if automatic recovery can resolve the issue.
|
|
150
|
+
#[napi]
|
|
151
|
+
pub struct ZerobusError {
|
|
152
|
+
message: String,
|
|
153
|
+
is_retryable: bool,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
#[napi]
|
|
157
|
+
impl ZerobusError {
|
|
158
|
+
/// Returns true if this error can be automatically retried by the SDK.
|
|
159
|
+
#[napi(getter)]
|
|
160
|
+
pub fn is_retryable(&self) -> bool {
|
|
161
|
+
self.is_retryable
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/// Get the error message.
|
|
165
|
+
#[napi(getter)]
|
|
166
|
+
pub fn message(&self) -> String {
|
|
167
|
+
self.message.clone()
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/// Helper function to convert a JavaScript value to a RustRecordPayload.
|
|
172
|
+
///
|
|
173
|
+
/// Supports:
|
|
174
|
+
/// - Buffer (low-level proto bytes)
|
|
175
|
+
/// - string (low-level JSON string)
|
|
176
|
+
/// - Protobuf message object with .encode() method (high-level, auto-serializes)
|
|
177
|
+
/// - Plain JavaScript object (high-level, auto-stringifies to JSON)
|
|
178
|
+
fn convert_js_to_record_payload(env: &Env, payload: Unknown) -> Result<RustRecordPayload> {
|
|
179
|
+
let value_type = payload.get_type()?;
|
|
180
|
+
|
|
181
|
+
match value_type {
|
|
182
|
+
ValueType::Object => {
|
|
183
|
+
let js_value: JsUnknown = payload.try_into()?;
|
|
184
|
+
if js_value.is_buffer()? {
|
|
185
|
+
let buffer: Buffer = Buffer::from_unknown(js_value)?;
|
|
186
|
+
return Ok(RustRecordPayload::Proto(buffer.to_vec()));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
let obj: JsObject = JsObject::from_unknown(js_value)?;
|
|
190
|
+
|
|
191
|
+
let constructor: JsFunction = obj.get_named_property("constructor")?;
|
|
192
|
+
let constructor_obj = JsObject::from_unknown(constructor.into_unknown())?;
|
|
193
|
+
|
|
194
|
+
if constructor_obj.has_named_property("encode")? {
|
|
195
|
+
let encode_fn: JsFunction = constructor_obj.get_named_property("encode")?;
|
|
196
|
+
let obj_as_unknown = obj.into_unknown();
|
|
197
|
+
let encode_result: JsUnknown = encode_fn.call::<JsUnknown>(Some(&constructor_obj), &[obj_as_unknown])?;
|
|
198
|
+
let encode_obj = JsObject::from_unknown(encode_result)?;
|
|
199
|
+
|
|
200
|
+
if encode_obj.has_named_property("finish")? {
|
|
201
|
+
let finish_fn: JsFunction = encode_obj.get_named_property("finish")?;
|
|
202
|
+
let buffer_result: JsUnknown = finish_fn.call::<JsUnknown>(Some(&encode_obj), &[])?;
|
|
203
|
+
let buffer: Buffer = Buffer::from_unknown(buffer_result)?;
|
|
204
|
+
Ok(RustRecordPayload::Proto(buffer.to_vec()))
|
|
205
|
+
} else {
|
|
206
|
+
Err(Error::from_reason(
|
|
207
|
+
"Protobuf message .encode() must return an object with .finish() method"
|
|
208
|
+
))
|
|
209
|
+
}
|
|
210
|
+
} else {
|
|
211
|
+
let global: JsGlobal = env.get_global()?;
|
|
212
|
+
let json_obj: JsObject = global.get_named_property("JSON")?;
|
|
213
|
+
let stringify: JsFunction = json_obj.get_named_property("stringify")?;
|
|
214
|
+
let obj_as_unknown = obj.into_unknown();
|
|
215
|
+
let str_result: JsUnknown = stringify.call::<JsUnknown>(Some(&json_obj), &[obj_as_unknown])?;
|
|
216
|
+
let js_string = JsString::from_unknown(str_result)?;
|
|
217
|
+
let json_string = js_string.into_utf8()?.as_str()?.to_string();
|
|
218
|
+
|
|
219
|
+
Ok(RustRecordPayload::Json(json_string))
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
ValueType::String => {
|
|
223
|
+
let js_value: JsUnknown = payload.try_into()?;
|
|
224
|
+
let js_string = JsString::from_unknown(js_value)?;
|
|
225
|
+
let json_string = js_string.into_utf8()?.as_str()?.to_string();
|
|
226
|
+
Ok(RustRecordPayload::Json(json_string))
|
|
227
|
+
}
|
|
228
|
+
_ => {
|
|
229
|
+
Err(Error::from_reason(
|
|
230
|
+
"Payload must be a Buffer, string, protobuf message object, or plain JavaScript object"
|
|
231
|
+
))
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/// A stream for ingesting data into a Databricks Delta table.
|
|
237
|
+
///
|
|
238
|
+
/// The stream manages a bidirectional gRPC connection, handles acknowledgments,
|
|
239
|
+
/// and provides automatic recovery on transient failures.
|
|
240
|
+
///
|
|
241
|
+
/// # Example
|
|
242
|
+
///
|
|
243
|
+
/// ```typescript
|
|
244
|
+
/// const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
|
|
245
|
+
/// const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
|
|
246
|
+
/// const offset = await ackPromise;
|
|
247
|
+
/// await stream.close();
|
|
248
|
+
/// ```
|
|
249
|
+
#[napi]
|
|
250
|
+
pub struct ZerobusStream {
|
|
251
|
+
inner: Arc<Mutex<Option<RustZerobusStream>>>,
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
#[napi]
|
|
255
|
+
impl ZerobusStream {
|
|
256
|
+
/// Ingests a single record into the stream.
|
|
257
|
+
///
|
|
258
|
+
/// This method accepts either:
|
|
259
|
+
/// - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
|
|
260
|
+
/// - A JSON string
|
|
261
|
+
///
|
|
262
|
+
/// This method BLOCKS until the record is sent to the SDK's internal landing zone,
|
|
263
|
+
/// then returns a Promise for the server acknowledgment. This allows you to send
|
|
264
|
+
/// many records immediately without waiting for acknowledgments:
|
|
265
|
+
///
|
|
266
|
+
/// ```typescript
|
|
267
|
+
/// let lastAckPromise;
|
|
268
|
+
/// for (let i = 0; i < 1000; i++) {
|
|
269
|
+
/// // This call blocks until record is sent (in SDK)
|
|
270
|
+
/// lastAckPromise = stream.ingestRecord(record);
|
|
271
|
+
/// }
|
|
272
|
+
/// // All 1000 records are now in the SDK's internal queue
|
|
273
|
+
/// // Wait for the last acknowledgment
|
|
274
|
+
/// await lastAckPromise;
|
|
275
|
+
/// // Flush to ensure all records are acknowledged
|
|
276
|
+
/// await stream.flush();
|
|
277
|
+
/// ```
|
|
278
|
+
///
|
|
279
|
+
/// # Arguments
|
|
280
|
+
///
|
|
281
|
+
/// * `payload` - The record data. Accepts:
|
|
282
|
+
/// - Buffer (low-level proto bytes)
|
|
283
|
+
/// - string (low-level JSON string)
|
|
284
|
+
/// - Protobuf message object with .encode() method (high-level, auto-serializes)
|
|
285
|
+
/// - Plain JavaScript object (high-level, auto-stringifies to JSON)
|
|
286
|
+
///
|
|
287
|
+
/// # Returns
|
|
288
|
+
///
|
|
289
|
+
/// A Promise that resolves to the offset ID when the server acknowledges the record.
|
|
290
|
+
#[napi(ts_return_type = "Promise<bigint>")]
|
|
291
|
+
pub fn ingest_record(&self, env: Env, payload: Unknown) -> Result<JsObject> {
|
|
292
|
+
let record_payload = convert_js_to_record_payload(&env, payload)?;
|
|
293
|
+
|
|
294
|
+
let ack_future = {
|
|
295
|
+
let handle = tokio::runtime::Handle::current();
|
|
296
|
+
let stream = self.inner.clone();
|
|
297
|
+
|
|
298
|
+
handle.block_on(async move {
|
|
299
|
+
let mut guard = stream.lock().await;
|
|
300
|
+
let stream_ref = guard
|
|
301
|
+
.as_mut()
|
|
302
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
303
|
+
|
|
304
|
+
stream_ref
|
|
305
|
+
.ingest_record(record_payload)
|
|
306
|
+
.await
|
|
307
|
+
.map_err(|e| Error::from_reason(format!("Failed to ingest record: {}", e)))
|
|
308
|
+
})?
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
env.execute_tokio_future(
|
|
312
|
+
async move {
|
|
313
|
+
ack_future
|
|
314
|
+
.await
|
|
315
|
+
.map_err(|e| napi::Error::from_reason(format!("Acknowledgment failed: {}", e)))
|
|
316
|
+
},
|
|
317
|
+
|env, result| {
|
|
318
|
+
let result_str = result.to_string();
|
|
319
|
+
let global: JsGlobal = env.get_global()?;
|
|
320
|
+
let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
|
|
321
|
+
let js_str = env.create_string(&result_str)?;
|
|
322
|
+
bigint_ctor.call(None, &[js_str.into_unknown()])
|
|
323
|
+
},
|
|
324
|
+
)
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/// Ingests multiple records as a single atomic batch.
|
|
328
|
+
///
|
|
329
|
+
/// This method accepts an array of records (Protocol Buffer buffers or JSON strings)
|
|
330
|
+
/// and ingests them as a batch. The batch receives a single acknowledgment from
|
|
331
|
+
/// the server with all-or-nothing semantics.
|
|
332
|
+
///
|
|
333
|
+
/// Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
|
|
334
|
+
/// internal landing zone, then returns a Promise for the server acknowledgment.
|
|
335
|
+
///
|
|
336
|
+
/// # Arguments
|
|
337
|
+
///
|
|
338
|
+
/// * `records` - Array of record data (Buffer for protobuf, string for JSON)
|
|
339
|
+
///
|
|
340
|
+
/// # Returns
|
|
341
|
+
///
|
|
342
|
+
/// Promise resolving to:
|
|
343
|
+
/// - `bigint`: offset ID for non-empty batches
|
|
344
|
+
/// - `null`: for empty batches
|
|
345
|
+
///
|
|
346
|
+
/// # Example
|
|
347
|
+
///
|
|
348
|
+
/// ```typescript
|
|
349
|
+
/// const buffers = records.map(r => Buffer.from(encode(r)));
|
|
350
|
+
/// const offsetId = await stream.ingestRecords(buffers);
|
|
351
|
+
///
|
|
352
|
+
/// if (offsetId !== null) {
|
|
353
|
+
/// console.log('Batch acknowledged at offset:', offsetId);
|
|
354
|
+
/// }
|
|
355
|
+
/// ```
|
|
356
|
+
#[napi(ts_return_type = "Promise<bigint | null>")]
|
|
357
|
+
pub fn ingest_records(&self, env: Env, records: Vec<Unknown>) -> Result<JsObject> {
|
|
358
|
+
let record_payloads: Result<Vec<RustRecordPayload>> = records
|
|
359
|
+
.into_iter()
|
|
360
|
+
.map(|payload| convert_js_to_record_payload(&env, payload))
|
|
361
|
+
.collect();
|
|
362
|
+
|
|
363
|
+
let record_payloads = record_payloads?;
|
|
364
|
+
|
|
365
|
+
let ack_future_option = {
|
|
366
|
+
let handle = tokio::runtime::Handle::current();
|
|
367
|
+
let stream = self.inner.clone();
|
|
368
|
+
|
|
369
|
+
handle.block_on(async move {
|
|
370
|
+
let mut guard = stream.lock().await;
|
|
371
|
+
let stream_ref = guard
|
|
372
|
+
.as_mut()
|
|
373
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
374
|
+
|
|
375
|
+
// Send batch to SDK
|
|
376
|
+
stream_ref
|
|
377
|
+
.ingest_records(record_payloads)
|
|
378
|
+
.await
|
|
379
|
+
.map_err(|e| Error::from_reason(format!("Failed to ingest batch: {}", e)))
|
|
380
|
+
})?
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
env.execute_tokio_future(
|
|
384
|
+
async move {
|
|
385
|
+
match ack_future_option.await {
|
|
386
|
+
Ok(Some(offset_id)) => Ok(Some(offset_id)),
|
|
387
|
+
Ok(None) => Ok(None),
|
|
388
|
+
Err(e) => Err(napi::Error::from_reason(
|
|
389
|
+
format!("Batch acknowledgment failed: {}", e)
|
|
390
|
+
)),
|
|
391
|
+
}
|
|
392
|
+
},
|
|
393
|
+
|env, result| match result {
|
|
394
|
+
Some(offset_id) => {
|
|
395
|
+
let offset_str = offset_id.to_string();
|
|
396
|
+
let global: JsGlobal = env.get_global()?;
|
|
397
|
+
let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
|
|
398
|
+
let js_str = env.create_string(&offset_str)?;
|
|
399
|
+
let bigint = bigint_ctor.call(None, &[js_str.into_unknown()])?;
|
|
400
|
+
Ok(bigint.into_unknown())
|
|
401
|
+
},
|
|
402
|
+
None => env.get_null().map(|v| v.into_unknown()),
|
|
403
|
+
},
|
|
404
|
+
)
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/// Flushes all pending records and waits for acknowledgments.
|
|
408
|
+
///
|
|
409
|
+
/// This method ensures all previously ingested records have been sent to the server
|
|
410
|
+
/// and acknowledged. It's useful for checkpointing or ensuring data durability.
|
|
411
|
+
///
|
|
412
|
+
/// # Errors
|
|
413
|
+
///
|
|
414
|
+
/// - Timeout errors if flush takes longer than configured timeout
|
|
415
|
+
/// - Network errors if the connection fails during flush
|
|
416
|
+
#[napi]
|
|
417
|
+
pub async fn flush(&self) -> Result<()> {
|
|
418
|
+
let guard = self.inner.lock().await;
|
|
419
|
+
let stream = guard
|
|
420
|
+
.as_ref()
|
|
421
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
422
|
+
|
|
423
|
+
stream
|
|
424
|
+
.flush()
|
|
425
|
+
.await
|
|
426
|
+
.map_err(|e| Error::from_reason(format!("Failed to flush stream: {}", e)))
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/// Closes the stream gracefully.
|
|
430
|
+
///
|
|
431
|
+
/// This method flushes all pending records, waits for acknowledgments, and then
|
|
432
|
+
/// closes the underlying gRPC connection. Always call this method when done with
|
|
433
|
+
/// the stream to ensure data integrity.
|
|
434
|
+
///
|
|
435
|
+
/// # Errors
|
|
436
|
+
///
|
|
437
|
+
/// - Returns an error if some records could not be acknowledged
|
|
438
|
+
/// - Network errors during the close operation
|
|
439
|
+
#[napi]
|
|
440
|
+
pub async fn close(&self) -> Result<()> {
|
|
441
|
+
let mut guard = self.inner.lock().await;
|
|
442
|
+
if let Some(mut stream) = guard.take() {
|
|
443
|
+
stream
|
|
444
|
+
.close()
|
|
445
|
+
.await
|
|
446
|
+
.map_err(|e| Error::from_reason(format!("Failed to close stream: {}", e)))?;
|
|
447
|
+
}
|
|
448
|
+
Ok(())
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/// Gets the list of unacknowledged records.
|
|
452
|
+
///
|
|
453
|
+
/// This method should only be called after a stream failure to retrieve records
|
|
454
|
+
/// that were sent but not acknowledged by the server. These records can be
|
|
455
|
+
/// re-ingested into a new stream.
|
|
456
|
+
///
|
|
457
|
+
/// # Returns
|
|
458
|
+
///
|
|
459
|
+
/// An array of Buffers containing the unacknowledged record payloads.
|
|
460
|
+
#[napi]
|
|
461
|
+
pub async fn get_unacked_records(&self) -> Result<Vec<Buffer>> {
|
|
462
|
+
let guard = self.inner.lock().await;
|
|
463
|
+
let stream = guard
|
|
464
|
+
.as_ref()
|
|
465
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
466
|
+
|
|
467
|
+
let unacked = stream
|
|
468
|
+
.get_unacked_records()
|
|
469
|
+
.await
|
|
470
|
+
.map_err(|e| Error::from_reason(format!("Failed to get unacked records: {}", e)))?;
|
|
471
|
+
|
|
472
|
+
Ok(unacked
|
|
473
|
+
.into_iter()
|
|
474
|
+
.map(|payload| match payload {
|
|
475
|
+
RustRecordPayload::Proto(vec) => vec.into(),
|
|
476
|
+
RustRecordPayload::Json(s) => s.into_bytes().into(),
|
|
477
|
+
})
|
|
478
|
+
.collect())
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/// Gets unacknowledged records grouped by their original batches.
|
|
482
|
+
///
|
|
483
|
+
/// This preserves the batch structure from ingestion:
|
|
484
|
+
/// - Each ingestRecord() call → 1-element batch
|
|
485
|
+
/// - Each ingestRecords() call → N-element batch
|
|
486
|
+
///
|
|
487
|
+
/// Should only be called after stream failure. All records returned as Buffers
|
|
488
|
+
/// (JSON strings are converted to UTF-8 bytes).
|
|
489
|
+
///
|
|
490
|
+
/// # Returns
|
|
491
|
+
///
|
|
492
|
+
/// Array of batches, where each batch is an array of Buffers
|
|
493
|
+
///
|
|
494
|
+
/// # Example
|
|
495
|
+
///
|
|
496
|
+
/// ```typescript
|
|
497
|
+
/// try {
|
|
498
|
+
/// await stream.ingestRecords(batch1);
|
|
499
|
+
/// await stream.ingestRecords(batch2);
|
|
500
|
+
/// } catch (error) {
|
|
501
|
+
/// const unackedBatches = await stream.getUnackedBatches();
|
|
502
|
+
///
|
|
503
|
+
/// // Re-ingest with new stream
|
|
504
|
+
/// for (const batch of unackedBatches) {
|
|
505
|
+
/// await newStream.ingestRecords(batch);
|
|
506
|
+
/// }
|
|
507
|
+
/// }
|
|
508
|
+
/// ```
|
|
509
|
+
#[napi]
|
|
510
|
+
pub async fn get_unacked_batches(&self) -> Result<Vec<Vec<Buffer>>> {
|
|
511
|
+
let guard = self.inner.lock().await;
|
|
512
|
+
let stream = guard
|
|
513
|
+
.as_ref()
|
|
514
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
515
|
+
|
|
516
|
+
let unacked_batches = stream
|
|
517
|
+
.get_unacked_batches()
|
|
518
|
+
.await
|
|
519
|
+
.map_err(|e| Error::from_reason(format!("Failed to get unacked batches: {}", e)))?;
|
|
520
|
+
|
|
521
|
+
Ok(unacked_batches
|
|
522
|
+
.into_iter()
|
|
523
|
+
.map(|batch| {
|
|
524
|
+
batch
|
|
525
|
+
.into_iter()
|
|
526
|
+
.map(|record| match record {
|
|
527
|
+
RustRecordPayload::Proto(vec) => vec.into(),
|
|
528
|
+
RustRecordPayload::Json(s) => s.into_bytes().into(),
|
|
529
|
+
})
|
|
530
|
+
.collect()
|
|
531
|
+
})
|
|
532
|
+
.collect())
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/// JavaScript headers provider callback wrapper.
|
|
537
|
+
///
|
|
538
|
+
/// Allows TypeScript code to provide custom authentication headers
|
|
539
|
+
/// by implementing a getHeaders() function.
|
|
540
|
+
#[napi(object)]
|
|
541
|
+
pub struct JsHeadersProvider {
|
|
542
|
+
/// JavaScript function: () => Promise<Array<[string, string]>>
|
|
543
|
+
pub get_headers_callback: JsFunction,
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
/// Internal adapter that wraps static headers as a HeadersProvider
|
|
547
|
+
/// This is used for custom authentication in the TypeScript SDK
|
|
548
|
+
struct StaticHeadersProvider {
|
|
549
|
+
headers: HashMap<&'static str, String>,
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
impl StaticHeadersProvider {
|
|
553
|
+
fn new(headers: Vec<(String, String)>) -> RustZerobusResult<Self> {
|
|
554
|
+
// Convert Vec<(String, String)> to HashMap<&'static str, String>
|
|
555
|
+
// We need to leak strings to get 'static lifetime for keys
|
|
556
|
+
let mut map = HashMap::new();
|
|
557
|
+
for (k, v) in headers {
|
|
558
|
+
let static_key: &'static str = Box::leak(k.into_boxed_str());
|
|
559
|
+
map.insert(static_key, v);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
if !map.contains_key("authorization") {
|
|
563
|
+
return Err(RustZerobusError::InvalidArgument(
|
|
564
|
+
"HeadersProvider must include 'authorization' header with Bearer token".to_string()
|
|
565
|
+
));
|
|
566
|
+
}
|
|
567
|
+
if !map.contains_key("x-databricks-zerobus-table-name") {
|
|
568
|
+
return Err(RustZerobusError::InvalidArgument(
|
|
569
|
+
"HeadersProvider must include 'x-databricks-zerobus-table-name' header".to_string()
|
|
570
|
+
));
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
Ok(Self { headers: map })
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
#[async_trait]
|
|
578
|
+
impl RustHeadersProvider for StaticHeadersProvider {
|
|
579
|
+
async fn get_headers(&self) -> RustZerobusResult<HashMap<&'static str, String>> {
|
|
580
|
+
Ok(self.headers.clone())
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/// Helper to create a threadsafe function from JavaScript callback
|
|
585
|
+
fn create_headers_tsfn(js_func: JsFunction) -> Result<ThreadsafeFunction<(), ErrorStrategy::Fatal>> {
|
|
586
|
+
js_func.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/// Helper to call headers callback and get result
|
|
590
|
+
async fn call_headers_tsfn(tsfn: ThreadsafeFunction<(), ErrorStrategy::Fatal>) -> Result<Vec<(String, String)>> {
|
|
591
|
+
tsfn.call_async::<Vec<(String, String)>>(())
|
|
592
|
+
.await
|
|
593
|
+
.map_err(|e| Error::from_reason(format!("Failed to call headers callback: {}", e)))
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/// The main SDK for interacting with the Databricks Zerobus service.
|
|
597
|
+
///
|
|
598
|
+
/// This is the entry point for creating ingestion streams to Delta tables.
|
|
599
|
+
///
|
|
600
|
+
/// # Example
|
|
601
|
+
///
|
|
602
|
+
/// ```typescript
|
|
603
|
+
/// const sdk = new ZerobusSdk(
|
|
604
|
+
/// "https://workspace-id.zerobus.region.cloud.databricks.com",
|
|
605
|
+
/// "https://workspace.cloud.databricks.com"
|
|
606
|
+
/// );
|
|
607
|
+
///
|
|
608
|
+
/// const stream = await sdk.createStream(
|
|
609
|
+
/// { tableName: "catalog.schema.table" },
|
|
610
|
+
/// "client-id",
|
|
611
|
+
/// "client-secret"
|
|
612
|
+
/// );
|
|
613
|
+
/// ```
|
|
614
|
+
#[napi]
|
|
615
|
+
pub struct ZerobusSdk {
|
|
616
|
+
inner: Arc<RustZerobusSdk>,
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
#[napi]
|
|
620
|
+
impl ZerobusSdk {
|
|
621
|
+
/// Creates a new Zerobus SDK instance.
|
|
622
|
+
///
|
|
623
|
+
/// # Arguments
|
|
624
|
+
///
|
|
625
|
+
/// * `zerobus_endpoint` - The Zerobus API endpoint URL
|
|
626
|
+
/// (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
|
|
627
|
+
/// * `unity_catalog_url` - The Unity Catalog endpoint URL
|
|
628
|
+
/// (e.g., "https://workspace.cloud.databricks.com")
|
|
629
|
+
///
|
|
630
|
+
/// # Errors
|
|
631
|
+
///
|
|
632
|
+
/// - Invalid endpoint URLs
|
|
633
|
+
/// - Failed to extract workspace ID from the endpoint
|
|
634
|
+
#[napi(constructor)]
|
|
635
|
+
pub fn new(zerobus_endpoint: String, unity_catalog_url: String) -> Result<Self> {
|
|
636
|
+
let inner = RustZerobusSdk::new(zerobus_endpoint, unity_catalog_url)
|
|
637
|
+
.map_err(|e| Error::from_reason(format!("Failed to create SDK: {}", e)))?;
|
|
638
|
+
|
|
639
|
+
Ok(ZerobusSdk { inner: Arc::new(inner) })
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/// Creates a new ingestion stream to a Delta table.
|
|
643
|
+
///
|
|
644
|
+
/// This method establishes a bidirectional gRPC connection to the Zerobus service
|
|
645
|
+
/// and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
|
|
646
|
+
/// authentication. For custom authentication (e.g., Personal Access Tokens), provide
|
|
647
|
+
/// a custom headers_provider.
|
|
648
|
+
///
|
|
649
|
+
/// # Arguments
|
|
650
|
+
///
|
|
651
|
+
/// * `table_properties` - Properties of the target table including name and optional schema
|
|
652
|
+
/// * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
|
|
653
|
+
/// * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
|
|
654
|
+
/// * `options` - Optional stream configuration (uses defaults if not provided)
|
|
655
|
+
/// * `headers_provider` - Optional custom headers provider for authentication.
|
|
656
|
+
/// If not provided, uses OAuth with client_id and client_secret.
|
|
657
|
+
///
|
|
658
|
+
/// # Returns
|
|
659
|
+
///
|
|
660
|
+
/// A Promise that resolves to a ZerobusStream ready for data ingestion.
|
|
661
|
+
///
|
|
662
|
+
/// # Errors
|
|
663
|
+
///
|
|
664
|
+
/// - Authentication failures (invalid credentials)
|
|
665
|
+
/// - Invalid table name or insufficient permissions
|
|
666
|
+
/// - Network connectivity issues
|
|
667
|
+
/// - Schema validation errors
|
|
668
|
+
///
|
|
669
|
+
/// # Examples
|
|
670
|
+
///
|
|
671
|
+
/// OAuth authentication (default):
|
|
672
|
+
/// ```typescript
|
|
673
|
+
/// const stream = await sdk.createStream(
|
|
674
|
+
/// { tableName: "catalog.schema.table" },
|
|
675
|
+
/// "client-id",
|
|
676
|
+
/// "client-secret"
|
|
677
|
+
/// );
|
|
678
|
+
/// ```
|
|
679
|
+
///
|
|
680
|
+
/// Custom authentication with headers provider:
|
|
681
|
+
/// ```typescript
|
|
682
|
+
/// const headersProvider = {
|
|
683
|
+
/// getHeadersCallback: async () => [
|
|
684
|
+
/// ["authorization", `Bearer ${myToken}`],
|
|
685
|
+
/// ["x-databricks-zerobus-table-name", tableName]
|
|
686
|
+
/// ]
|
|
687
|
+
/// };
|
|
688
|
+
/// const stream = await sdk.createStream(
|
|
689
|
+
/// { tableName: "catalog.schema.table" },
|
|
690
|
+
/// "", // ignored
|
|
691
|
+
/// "", // ignored
|
|
692
|
+
/// undefined,
|
|
693
|
+
/// headersProvider
|
|
694
|
+
/// );
|
|
695
|
+
/// ```
|
|
696
|
+
#[napi(ts_return_type = "Promise<ZerobusStream>")]
|
|
697
|
+
pub fn create_stream(
|
|
698
|
+
&self,
|
|
699
|
+
env: Env,
|
|
700
|
+
table_properties: TableProperties,
|
|
701
|
+
client_id: String,
|
|
702
|
+
client_secret: String,
|
|
703
|
+
options: Option<StreamConfigurationOptions>,
|
|
704
|
+
headers_provider: Option<JsHeadersProvider>,
|
|
705
|
+
) -> Result<JsObject> {
|
|
706
|
+
let rust_table_props = table_properties.to_rust()?;
|
|
707
|
+
let rust_options = options.map(|o| o.into());
|
|
708
|
+
|
|
709
|
+
let headers_tsfn = match headers_provider {
|
|
710
|
+
Some(JsHeadersProvider { get_headers_callback }) => {
|
|
711
|
+
Some(create_headers_tsfn(get_headers_callback)?)
|
|
712
|
+
}
|
|
713
|
+
None => None,
|
|
714
|
+
};
|
|
715
|
+
|
|
716
|
+
let sdk = self.inner.clone();
|
|
717
|
+
|
|
718
|
+
env.execute_tokio_future(
|
|
719
|
+
async move {
|
|
720
|
+
let headers_provider_arc = if let Some(tsfn) = headers_tsfn {
|
|
721
|
+
let headers = call_headers_tsfn(tsfn).await
|
|
722
|
+
.map_err(|e| napi::Error::from_reason(format!("Headers callback failed: {}", e)))?;
|
|
723
|
+
|
|
724
|
+
let static_provider = StaticHeadersProvider::new(headers)
|
|
725
|
+
.map_err(|e| napi::Error::from_reason(format!("Invalid headers: {}", e)))?;
|
|
726
|
+
|
|
727
|
+
Some(Arc::new(static_provider) as Arc<dyn RustHeadersProvider>)
|
|
728
|
+
} else {
|
|
729
|
+
None
|
|
730
|
+
};
|
|
731
|
+
|
|
732
|
+
let stream = if let Some(provider) = headers_provider_arc {
|
|
733
|
+
sdk
|
|
734
|
+
.create_stream_with_headers_provider(
|
|
735
|
+
rust_table_props,
|
|
736
|
+
provider,
|
|
737
|
+
rust_options,
|
|
738
|
+
)
|
|
739
|
+
.await
|
|
740
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
|
|
741
|
+
} else {
|
|
742
|
+
sdk
|
|
743
|
+
.create_stream(rust_table_props, client_id, client_secret, rust_options)
|
|
744
|
+
.await
|
|
745
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
|
|
746
|
+
};
|
|
747
|
+
|
|
748
|
+
Ok(ZerobusStream {
|
|
749
|
+
inner: Arc::new(Mutex::new(Some(stream))),
|
|
750
|
+
})
|
|
751
|
+
},
|
|
752
|
+
|_env, stream| Ok(stream),
|
|
753
|
+
)
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/// Recreates a stream with the same configuration and re-ingests unacknowledged batches.
|
|
757
|
+
///
|
|
758
|
+
/// This method is the recommended approach for recovering from stream failures. It:
|
|
759
|
+
/// 1. Retrieves all unacknowledged batches from the failed stream
|
|
760
|
+
/// 2. Creates a new stream with identical configuration
|
|
761
|
+
/// 3. Re-ingests all unacknowledged batches in order
|
|
762
|
+
/// 4. Returns the new stream ready for continued ingestion
|
|
763
|
+
///
|
|
764
|
+
/// # Arguments
|
|
765
|
+
///
|
|
766
|
+
/// * `stream` - The failed or closed stream to recreate
|
|
767
|
+
///
|
|
768
|
+
/// # Returns
|
|
769
|
+
///
|
|
770
|
+
/// A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
|
|
771
|
+
///
|
|
772
|
+
/// # Errors
|
|
773
|
+
///
|
|
774
|
+
/// - Failed to retrieve unacknowledged batches from the original stream
|
|
775
|
+
/// - Authentication failures when creating the new stream
|
|
776
|
+
/// - Network connectivity issues during re-ingestion
|
|
777
|
+
///
|
|
778
|
+
/// # Examples
|
|
779
|
+
///
|
|
780
|
+
/// ```typescript
|
|
781
|
+
/// try {
|
|
782
|
+
/// await stream.ingestRecords(batch);
|
|
783
|
+
/// } catch (error) {
|
|
784
|
+
/// await stream.close();
|
|
785
|
+
/// // Recreate stream with all unacked batches re-ingested
|
|
786
|
+
/// const newStream = await sdk.recreateStream(stream);
|
|
787
|
+
/// // Continue ingesting with newStream
|
|
788
|
+
/// }
|
|
789
|
+
/// ```
|
|
790
|
+
#[napi]
|
|
791
|
+
pub async fn recreate_stream(&self, stream: &ZerobusStream) -> Result<ZerobusStream> {
|
|
792
|
+
let inner_guard = stream.inner.lock().await;
|
|
793
|
+
let rust_stream = inner_guard
|
|
794
|
+
.as_ref()
|
|
795
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
796
|
+
|
|
797
|
+
let new_rust_stream = self
|
|
798
|
+
.inner
|
|
799
|
+
.recreate_stream(rust_stream)
|
|
800
|
+
.await
|
|
801
|
+
.map_err(|e| Error::from_reason(format!("Failed to recreate stream: {}", e)))?;
|
|
802
|
+
|
|
803
|
+
Ok(ZerobusStream {
|
|
804
|
+
inner: Arc::new(Mutex::new(Some(new_rust_stream))),
|
|
805
|
+
})
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
/// Helper function to decode base64 strings.
|
|
810
|
+
fn base64_decode(input: &str) -> std::result::Result<Vec<u8>, String> {
|
|
811
|
+
use base64::{engine::general_purpose::STANDARD, Engine};
|
|
812
|
+
STANDARD
|
|
813
|
+
.decode(input)
|
|
814
|
+
.map_err(|e| format!("Base64 decode error: {}", e))
|
|
815
|
+
}
|