@databricks/zerobus-ingest-sdk 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib.rs ADDED
@@ -0,0 +1,815 @@
1
+ // Zerobus TypeScript SDK - NAPI-RS Bindings
2
+ //
3
+ // This file provides the Node.js/TypeScript bindings for the Rust Zerobus SDK
4
+ // using NAPI-RS. It exposes a TypeScript-friendly API while leveraging the
5
+ // high-performance Rust implementation underneath.
6
+ //
7
+ // The binding layer handles:
8
+ // - Type conversions between JavaScript and Rust types
9
+ // - Async/await bridging (Rust futures → JavaScript Promises)
10
+ // - Memory management and thread safety
11
+ // - Error propagation
12
+
13
+ #![deny(clippy::all)]
14
+
15
+ use napi::bindgen_prelude::*;
16
+ use napi::threadsafe_function::{ThreadsafeFunction, ErrorStrategy};
17
+ use napi::{Env, JsObject, JsFunction, JsUnknown, JsString, JsGlobal, ValueType};
18
+ use napi_derive::napi;
19
+
20
+ use databricks_zerobus_ingest_sdk::{
21
+ EncodedRecord as RustRecordPayload,
22
+ StreamConfigurationOptions as RustStreamOptions,
23
+ TableProperties as RustTableProperties, ZerobusSdk as RustZerobusSdk,
24
+ ZerobusStream as RustZerobusStream,
25
+ HeadersProvider as RustHeadersProvider,
26
+ ZerobusError as RustZerobusError,
27
+ ZerobusResult as RustZerobusResult,
28
+ };
29
+ use databricks_zerobus_ingest_sdk::databricks::zerobus::RecordType as RustRecordType;
30
+ use async_trait::async_trait;
31
+ use prost_types;
32
+ use std::collections::HashMap;
33
+ use std::sync::Arc;
34
+ use tokio::sync::Mutex;
35
+
36
+ /// Record serialization format.
37
+ ///
38
+ /// Specifies how records should be encoded when ingested into the stream.
39
+ #[napi]
40
+ pub enum RecordType {
41
+ /// JSON encoding - records are JSON-encoded strings
42
+ Json = 0,
43
+ /// Protocol Buffers encoding - records are binary protobuf messages
44
+ Proto = 1,
45
+ }
46
+
47
+ /// Configuration options for the Zerobus stream.
48
+ ///
49
+ /// These options control stream behavior including recovery, timeouts, and inflight limits.
50
+ #[napi(object)]
51
+ #[derive(Debug, Clone)]
52
+ pub struct StreamConfigurationOptions {
53
+ /// Maximum number of unacknowledged requests that can be in flight.
54
+ /// Default: 10,000
55
+ pub max_inflight_requests: Option<u32>,
56
+
57
+ /// Enable automatic stream recovery on transient failures.
58
+ /// Default: true
59
+ pub recovery: Option<bool>,
60
+
61
+ /// Timeout for recovery operations in milliseconds.
62
+ /// Default: 15,000 (15 seconds)
63
+ pub recovery_timeout_ms: Option<u32>,
64
+
65
+ /// Delay between recovery retry attempts in milliseconds.
66
+ /// Default: 2,000 (2 seconds)
67
+ pub recovery_backoff_ms: Option<u32>,
68
+
69
+ /// Maximum number of recovery attempts before giving up.
70
+ /// Default: 4
71
+ pub recovery_retries: Option<u32>,
72
+
73
+ /// Timeout for flush operations in milliseconds.
74
+ /// Default: 300,000 (5 minutes)
75
+ pub flush_timeout_ms: Option<u32>,
76
+
77
+ /// Timeout waiting for server acknowledgments in milliseconds.
78
+ /// Default: 60,000 (1 minute)
79
+ pub server_lack_of_ack_timeout_ms: Option<u32>,
80
+
81
+ /// Record serialization format.
82
+ /// Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
83
+ /// Default: RecordType.Proto (Protocol Buffers)
84
+ pub record_type: Option<i32>,
85
+ }
86
+
87
+ impl From<StreamConfigurationOptions> for RustStreamOptions {
88
+ fn from(opts: StreamConfigurationOptions) -> Self {
89
+ let default = RustStreamOptions::default();
90
+
91
+ let record_type = match opts.record_type {
92
+ Some(0) => RustRecordType::Json,
93
+ Some(1) => RustRecordType::Proto,
94
+ _ => RustRecordType::Proto,
95
+ };
96
+
97
+ RustStreamOptions {
98
+ max_inflight_requests: opts.max_inflight_requests.unwrap_or(default.max_inflight_requests as u32) as usize,
99
+ recovery: opts.recovery.unwrap_or(default.recovery),
100
+ recovery_timeout_ms: opts.recovery_timeout_ms.map(|v| v as u64).unwrap_or(default.recovery_timeout_ms),
101
+ recovery_backoff_ms: opts.recovery_backoff_ms.map(|v| v as u64).unwrap_or(default.recovery_backoff_ms),
102
+ recovery_retries: opts.recovery_retries.unwrap_or(default.recovery_retries),
103
+ flush_timeout_ms: opts.flush_timeout_ms.map(|v| v as u64).unwrap_or(default.flush_timeout_ms),
104
+ server_lack_of_ack_timeout_ms: opts.server_lack_of_ack_timeout_ms.map(|v| v as u64).unwrap_or(default.server_lack_of_ack_timeout_ms),
105
+ record_type,
106
+ }
107
+ }
108
+ }
109
+
110
+ /// Properties of the target Delta table for ingestion.
111
+ ///
112
+ /// Specifies which Unity Catalog table to write to and optionally the schema descriptor
113
+ /// for Protocol Buffers encoding.
114
+ #[napi(object)]
115
+ #[derive(Debug, Clone)]
116
+ pub struct TableProperties {
117
+ /// Full table name in Unity Catalog (e.g., "catalog.schema.table")
118
+ pub table_name: String,
119
+
120
+ /// Optional Protocol Buffer descriptor as a base64-encoded string.
121
+ /// If not provided, JSON encoding will be used.
122
+ pub descriptor_proto: Option<String>,
123
+ }
124
+
125
+ impl TableProperties {
126
+ fn to_rust(&self) -> Result<RustTableProperties> {
127
+ let descriptor: Option<prost_types::DescriptorProto> = if let Some(ref desc_str) = self.descriptor_proto {
128
+ let bytes = base64_decode(desc_str)
129
+ .map_err(|e| Error::from_reason(format!("Failed to decode descriptor: {}", e)))?;
130
+
131
+ let descriptor_proto: prost_types::DescriptorProto = prost::Message::decode(&bytes[..])
132
+ .map_err(|e| Error::from_reason(format!("Failed to parse descriptor proto: {}", e)))?;
133
+
134
+ Some(descriptor_proto)
135
+ } else {
136
+ None
137
+ };
138
+
139
+ Ok(RustTableProperties {
140
+ table_name: self.table_name.clone(),
141
+ descriptor_proto: descriptor,
142
+ })
143
+ }
144
+ }
145
+
146
+ /// Custom error type for Zerobus operations.
147
+ ///
148
+ /// This error type includes information about whether the error is retryable,
149
+ /// which helps determine if automatic recovery can resolve the issue.
150
+ #[napi]
151
+ pub struct ZerobusError {
152
+ message: String,
153
+ is_retryable: bool,
154
+ }
155
+
156
+ #[napi]
157
+ impl ZerobusError {
158
+ /// Returns true if this error can be automatically retried by the SDK.
159
+ #[napi(getter)]
160
+ pub fn is_retryable(&self) -> bool {
161
+ self.is_retryable
162
+ }
163
+
164
+ /// Get the error message.
165
+ #[napi(getter)]
166
+ pub fn message(&self) -> String {
167
+ self.message.clone()
168
+ }
169
+ }
170
+
171
+ /// Helper function to convert a JavaScript value to a RustRecordPayload.
172
+ ///
173
+ /// Supports:
174
+ /// - Buffer (low-level proto bytes)
175
+ /// - string (low-level JSON string)
176
+ /// - Protobuf message object with .encode() method (high-level, auto-serializes)
177
+ /// - Plain JavaScript object (high-level, auto-stringifies to JSON)
178
+ fn convert_js_to_record_payload(env: &Env, payload: Unknown) -> Result<RustRecordPayload> {
179
+ let value_type = payload.get_type()?;
180
+
181
+ match value_type {
182
+ ValueType::Object => {
183
+ let js_value: JsUnknown = payload.try_into()?;
184
+ if js_value.is_buffer()? {
185
+ let buffer: Buffer = Buffer::from_unknown(js_value)?;
186
+ return Ok(RustRecordPayload::Proto(buffer.to_vec()));
187
+ }
188
+
189
+ let obj: JsObject = JsObject::from_unknown(js_value)?;
190
+
191
+ let constructor: JsFunction = obj.get_named_property("constructor")?;
192
+ let constructor_obj = JsObject::from_unknown(constructor.into_unknown())?;
193
+
194
+ if constructor_obj.has_named_property("encode")? {
195
+ let encode_fn: JsFunction = constructor_obj.get_named_property("encode")?;
196
+ let obj_as_unknown = obj.into_unknown();
197
+ let encode_result: JsUnknown = encode_fn.call::<JsUnknown>(Some(&constructor_obj), &[obj_as_unknown])?;
198
+ let encode_obj = JsObject::from_unknown(encode_result)?;
199
+
200
+ if encode_obj.has_named_property("finish")? {
201
+ let finish_fn: JsFunction = encode_obj.get_named_property("finish")?;
202
+ let buffer_result: JsUnknown = finish_fn.call::<JsUnknown>(Some(&encode_obj), &[])?;
203
+ let buffer: Buffer = Buffer::from_unknown(buffer_result)?;
204
+ Ok(RustRecordPayload::Proto(buffer.to_vec()))
205
+ } else {
206
+ Err(Error::from_reason(
207
+ "Protobuf message .encode() must return an object with .finish() method"
208
+ ))
209
+ }
210
+ } else {
211
+ let global: JsGlobal = env.get_global()?;
212
+ let json_obj: JsObject = global.get_named_property("JSON")?;
213
+ let stringify: JsFunction = json_obj.get_named_property("stringify")?;
214
+ let obj_as_unknown = obj.into_unknown();
215
+ let str_result: JsUnknown = stringify.call::<JsUnknown>(Some(&json_obj), &[obj_as_unknown])?;
216
+ let js_string = JsString::from_unknown(str_result)?;
217
+ let json_string = js_string.into_utf8()?.as_str()?.to_string();
218
+
219
+ Ok(RustRecordPayload::Json(json_string))
220
+ }
221
+ }
222
+ ValueType::String => {
223
+ let js_value: JsUnknown = payload.try_into()?;
224
+ let js_string = JsString::from_unknown(js_value)?;
225
+ let json_string = js_string.into_utf8()?.as_str()?.to_string();
226
+ Ok(RustRecordPayload::Json(json_string))
227
+ }
228
+ _ => {
229
+ Err(Error::from_reason(
230
+ "Payload must be a Buffer, string, protobuf message object, or plain JavaScript object"
231
+ ))
232
+ }
233
+ }
234
+ }
235
+
236
+ /// A stream for ingesting data into a Databricks Delta table.
237
+ ///
238
+ /// The stream manages a bidirectional gRPC connection, handles acknowledgments,
239
+ /// and provides automatic recovery on transient failures.
240
+ ///
241
+ /// # Example
242
+ ///
243
+ /// ```typescript
244
+ /// const stream = await sdk.createStream(tableProps, clientId, clientSecret, options);
245
+ /// const ackPromise = await stream.ingestRecord(Buffer.from([1, 2, 3]));
246
+ /// const offset = await ackPromise;
247
+ /// await stream.close();
248
+ /// ```
249
+ #[napi]
250
+ pub struct ZerobusStream {
251
+ inner: Arc<Mutex<Option<RustZerobusStream>>>,
252
+ }
253
+
254
+ #[napi]
255
+ impl ZerobusStream {
256
+ /// Ingests a single record into the stream.
257
+ ///
258
+ /// This method accepts either:
259
+ /// - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
260
+ /// - A JSON string
261
+ ///
262
+ /// This method BLOCKS until the record is sent to the SDK's internal landing zone,
263
+ /// then returns a Promise for the server acknowledgment. This allows you to send
264
+ /// many records immediately without waiting for acknowledgments:
265
+ ///
266
+ /// ```typescript
267
+ /// let lastAckPromise;
268
+ /// for (let i = 0; i < 1000; i++) {
269
+ /// // This call blocks until record is sent (in SDK)
270
+ /// lastAckPromise = stream.ingestRecord(record);
271
+ /// }
272
+ /// // All 1000 records are now in the SDK's internal queue
273
+ /// // Wait for the last acknowledgment
274
+ /// await lastAckPromise;
275
+ /// // Flush to ensure all records are acknowledged
276
+ /// await stream.flush();
277
+ /// ```
278
+ ///
279
+ /// # Arguments
280
+ ///
281
+ /// * `payload` - The record data. Accepts:
282
+ /// - Buffer (low-level proto bytes)
283
+ /// - string (low-level JSON string)
284
+ /// - Protobuf message object with .encode() method (high-level, auto-serializes)
285
+ /// - Plain JavaScript object (high-level, auto-stringifies to JSON)
286
+ ///
287
+ /// # Returns
288
+ ///
289
+ /// A Promise that resolves to the offset ID when the server acknowledges the record.
290
+ #[napi(ts_return_type = "Promise<bigint>")]
291
+ pub fn ingest_record(&self, env: Env, payload: Unknown) -> Result<JsObject> {
292
+ let record_payload = convert_js_to_record_payload(&env, payload)?;
293
+
294
+ let ack_future = {
295
+ let handle = tokio::runtime::Handle::current();
296
+ let stream = self.inner.clone();
297
+
298
+ handle.block_on(async move {
299
+ let mut guard = stream.lock().await;
300
+ let stream_ref = guard
301
+ .as_mut()
302
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
303
+
304
+ stream_ref
305
+ .ingest_record(record_payload)
306
+ .await
307
+ .map_err(|e| Error::from_reason(format!("Failed to ingest record: {}", e)))
308
+ })?
309
+ };
310
+
311
+ env.execute_tokio_future(
312
+ async move {
313
+ ack_future
314
+ .await
315
+ .map_err(|e| napi::Error::from_reason(format!("Acknowledgment failed: {}", e)))
316
+ },
317
+ |env, result| {
318
+ let result_str = result.to_string();
319
+ let global: JsGlobal = env.get_global()?;
320
+ let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
321
+ let js_str = env.create_string(&result_str)?;
322
+ bigint_ctor.call(None, &[js_str.into_unknown()])
323
+ },
324
+ )
325
+ }
326
+
327
+ /// Ingests multiple records as a single atomic batch.
328
+ ///
329
+ /// This method accepts an array of records (Protocol Buffer buffers or JSON strings)
330
+ /// and ingests them as a batch. The batch receives a single acknowledgment from
331
+ /// the server with all-or-nothing semantics.
332
+ ///
333
+ /// Similar to ingestRecord(), this BLOCKS until the batch is sent to the SDK's
334
+ /// internal landing zone, then returns a Promise for the server acknowledgment.
335
+ ///
336
+ /// # Arguments
337
+ ///
338
+ /// * `records` - Array of record data (Buffer for protobuf, string for JSON)
339
+ ///
340
+ /// # Returns
341
+ ///
342
+ /// Promise resolving to:
343
+ /// - `bigint`: offset ID for non-empty batches
344
+ /// - `null`: for empty batches
345
+ ///
346
+ /// # Example
347
+ ///
348
+ /// ```typescript
349
+ /// const buffers = records.map(r => Buffer.from(encode(r)));
350
+ /// const offsetId = await stream.ingestRecords(buffers);
351
+ ///
352
+ /// if (offsetId !== null) {
353
+ /// console.log('Batch acknowledged at offset:', offsetId);
354
+ /// }
355
+ /// ```
356
+ #[napi(ts_return_type = "Promise<bigint | null>")]
357
+ pub fn ingest_records(&self, env: Env, records: Vec<Unknown>) -> Result<JsObject> {
358
+ let record_payloads: Result<Vec<RustRecordPayload>> = records
359
+ .into_iter()
360
+ .map(|payload| convert_js_to_record_payload(&env, payload))
361
+ .collect();
362
+
363
+ let record_payloads = record_payloads?;
364
+
365
+ let ack_future_option = {
366
+ let handle = tokio::runtime::Handle::current();
367
+ let stream = self.inner.clone();
368
+
369
+ handle.block_on(async move {
370
+ let mut guard = stream.lock().await;
371
+ let stream_ref = guard
372
+ .as_mut()
373
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
374
+
375
+ // Send batch to SDK
376
+ stream_ref
377
+ .ingest_records(record_payloads)
378
+ .await
379
+ .map_err(|e| Error::from_reason(format!("Failed to ingest batch: {}", e)))
380
+ })?
381
+ };
382
+
383
+ env.execute_tokio_future(
384
+ async move {
385
+ match ack_future_option.await {
386
+ Ok(Some(offset_id)) => Ok(Some(offset_id)),
387
+ Ok(None) => Ok(None),
388
+ Err(e) => Err(napi::Error::from_reason(
389
+ format!("Batch acknowledgment failed: {}", e)
390
+ )),
391
+ }
392
+ },
393
+ |env, result| match result {
394
+ Some(offset_id) => {
395
+ let offset_str = offset_id.to_string();
396
+ let global: JsGlobal = env.get_global()?;
397
+ let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
398
+ let js_str = env.create_string(&offset_str)?;
399
+ let bigint = bigint_ctor.call(None, &[js_str.into_unknown()])?;
400
+ Ok(bigint.into_unknown())
401
+ },
402
+ None => env.get_null().map(|v| v.into_unknown()),
403
+ },
404
+ )
405
+ }
406
+
407
+ /// Flushes all pending records and waits for acknowledgments.
408
+ ///
409
+ /// This method ensures all previously ingested records have been sent to the server
410
+ /// and acknowledged. It's useful for checkpointing or ensuring data durability.
411
+ ///
412
+ /// # Errors
413
+ ///
414
+ /// - Timeout errors if flush takes longer than configured timeout
415
+ /// - Network errors if the connection fails during flush
416
+ #[napi]
417
+ pub async fn flush(&self) -> Result<()> {
418
+ let guard = self.inner.lock().await;
419
+ let stream = guard
420
+ .as_ref()
421
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
422
+
423
+ stream
424
+ .flush()
425
+ .await
426
+ .map_err(|e| Error::from_reason(format!("Failed to flush stream: {}", e)))
427
+ }
428
+
429
+ /// Closes the stream gracefully.
430
+ ///
431
+ /// This method flushes all pending records, waits for acknowledgments, and then
432
+ /// closes the underlying gRPC connection. Always call this method when done with
433
+ /// the stream to ensure data integrity.
434
+ ///
435
+ /// # Errors
436
+ ///
437
+ /// - Returns an error if some records could not be acknowledged
438
+ /// - Network errors during the close operation
439
+ #[napi]
440
+ pub async fn close(&self) -> Result<()> {
441
+ let mut guard = self.inner.lock().await;
442
+ if let Some(mut stream) = guard.take() {
443
+ stream
444
+ .close()
445
+ .await
446
+ .map_err(|e| Error::from_reason(format!("Failed to close stream: {}", e)))?;
447
+ }
448
+ Ok(())
449
+ }
450
+
451
+ /// Gets the list of unacknowledged records.
452
+ ///
453
+ /// This method should only be called after a stream failure to retrieve records
454
+ /// that were sent but not acknowledged by the server. These records can be
455
+ /// re-ingested into a new stream.
456
+ ///
457
+ /// # Returns
458
+ ///
459
+ /// An array of Buffers containing the unacknowledged record payloads.
460
+ #[napi]
461
+ pub async fn get_unacked_records(&self) -> Result<Vec<Buffer>> {
462
+ let guard = self.inner.lock().await;
463
+ let stream = guard
464
+ .as_ref()
465
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
466
+
467
+ let unacked = stream
468
+ .get_unacked_records()
469
+ .await
470
+ .map_err(|e| Error::from_reason(format!("Failed to get unacked records: {}", e)))?;
471
+
472
+ Ok(unacked
473
+ .into_iter()
474
+ .map(|payload| match payload {
475
+ RustRecordPayload::Proto(vec) => vec.into(),
476
+ RustRecordPayload::Json(s) => s.into_bytes().into(),
477
+ })
478
+ .collect())
479
+ }
480
+
481
+ /// Gets unacknowledged records grouped by their original batches.
482
+ ///
483
+ /// This preserves the batch structure from ingestion:
484
+ /// - Each ingestRecord() call → 1-element batch
485
+ /// - Each ingestRecords() call → N-element batch
486
+ ///
487
+ /// Should only be called after stream failure. All records returned as Buffers
488
+ /// (JSON strings are converted to UTF-8 bytes).
489
+ ///
490
+ /// # Returns
491
+ ///
492
+ /// Array of batches, where each batch is an array of Buffers
493
+ ///
494
+ /// # Example
495
+ ///
496
+ /// ```typescript
497
+ /// try {
498
+ /// await stream.ingestRecords(batch1);
499
+ /// await stream.ingestRecords(batch2);
500
+ /// } catch (error) {
501
+ /// const unackedBatches = await stream.getUnackedBatches();
502
+ ///
503
+ /// // Re-ingest with new stream
504
+ /// for (const batch of unackedBatches) {
505
+ /// await newStream.ingestRecords(batch);
506
+ /// }
507
+ /// }
508
+ /// ```
509
+ #[napi]
510
+ pub async fn get_unacked_batches(&self) -> Result<Vec<Vec<Buffer>>> {
511
+ let guard = self.inner.lock().await;
512
+ let stream = guard
513
+ .as_ref()
514
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
515
+
516
+ let unacked_batches = stream
517
+ .get_unacked_batches()
518
+ .await
519
+ .map_err(|e| Error::from_reason(format!("Failed to get unacked batches: {}", e)))?;
520
+
521
+ Ok(unacked_batches
522
+ .into_iter()
523
+ .map(|batch| {
524
+ batch
525
+ .into_iter()
526
+ .map(|record| match record {
527
+ RustRecordPayload::Proto(vec) => vec.into(),
528
+ RustRecordPayload::Json(s) => s.into_bytes().into(),
529
+ })
530
+ .collect()
531
+ })
532
+ .collect())
533
+ }
534
+ }
535
+
536
+ /// JavaScript headers provider callback wrapper.
537
+ ///
538
+ /// Allows TypeScript code to provide custom authentication headers
539
+ /// by implementing a getHeaders() function.
540
+ #[napi(object)]
541
+ pub struct JsHeadersProvider {
542
+ /// JavaScript function: () => Promise<Array<[string, string]>>
543
+ pub get_headers_callback: JsFunction,
544
+ }
545
+
546
+ /// Internal adapter that wraps static headers as a HeadersProvider
547
+ /// This is used for custom authentication in the TypeScript SDK
548
+ struct StaticHeadersProvider {
549
+ headers: HashMap<&'static str, String>,
550
+ }
551
+
552
+ impl StaticHeadersProvider {
553
+ fn new(headers: Vec<(String, String)>) -> RustZerobusResult<Self> {
554
+ // Convert Vec<(String, String)> to HashMap<&'static str, String>
555
+ // We need to leak strings to get 'static lifetime for keys
556
+ let mut map = HashMap::new();
557
+ for (k, v) in headers {
558
+ let static_key: &'static str = Box::leak(k.into_boxed_str());
559
+ map.insert(static_key, v);
560
+ }
561
+
562
+ if !map.contains_key("authorization") {
563
+ return Err(RustZerobusError::InvalidArgument(
564
+ "HeadersProvider must include 'authorization' header with Bearer token".to_string()
565
+ ));
566
+ }
567
+ if !map.contains_key("x-databricks-zerobus-table-name") {
568
+ return Err(RustZerobusError::InvalidArgument(
569
+ "HeadersProvider must include 'x-databricks-zerobus-table-name' header".to_string()
570
+ ));
571
+ }
572
+
573
+ Ok(Self { headers: map })
574
+ }
575
+ }
576
+
577
+ #[async_trait]
578
+ impl RustHeadersProvider for StaticHeadersProvider {
579
+ async fn get_headers(&self) -> RustZerobusResult<HashMap<&'static str, String>> {
580
+ Ok(self.headers.clone())
581
+ }
582
+ }
583
+
584
+ /// Helper to create a threadsafe function from JavaScript callback
585
+ fn create_headers_tsfn(js_func: JsFunction) -> Result<ThreadsafeFunction<(), ErrorStrategy::Fatal>> {
586
+ js_func.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))
587
+ }
588
+
589
+ /// Helper to call headers callback and get result
590
+ async fn call_headers_tsfn(tsfn: ThreadsafeFunction<(), ErrorStrategy::Fatal>) -> Result<Vec<(String, String)>> {
591
+ tsfn.call_async::<Vec<(String, String)>>(())
592
+ .await
593
+ .map_err(|e| Error::from_reason(format!("Failed to call headers callback: {}", e)))
594
+ }
595
+
596
+ /// The main SDK for interacting with the Databricks Zerobus service.
597
+ ///
598
+ /// This is the entry point for creating ingestion streams to Delta tables.
599
+ ///
600
+ /// # Example
601
+ ///
602
+ /// ```typescript
603
+ /// const sdk = new ZerobusSdk(
604
+ /// "https://workspace-id.zerobus.region.cloud.databricks.com",
605
+ /// "https://workspace.cloud.databricks.com"
606
+ /// );
607
+ ///
608
+ /// const stream = await sdk.createStream(
609
+ /// { tableName: "catalog.schema.table" },
610
+ /// "client-id",
611
+ /// "client-secret"
612
+ /// );
613
+ /// ```
614
+ #[napi]
615
+ pub struct ZerobusSdk {
616
+ inner: Arc<RustZerobusSdk>,
617
+ }
618
+
619
+ #[napi]
620
+ impl ZerobusSdk {
621
+ /// Creates a new Zerobus SDK instance.
622
+ ///
623
+ /// # Arguments
624
+ ///
625
+ /// * `zerobus_endpoint` - The Zerobus API endpoint URL
626
+ /// (e.g., "https://workspace-id.zerobus.region.cloud.databricks.com")
627
+ /// * `unity_catalog_url` - The Unity Catalog endpoint URL
628
+ /// (e.g., "https://workspace.cloud.databricks.com")
629
+ ///
630
+ /// # Errors
631
+ ///
632
+ /// - Invalid endpoint URLs
633
+ /// - Failed to extract workspace ID from the endpoint
634
+ #[napi(constructor)]
635
+ pub fn new(zerobus_endpoint: String, unity_catalog_url: String) -> Result<Self> {
636
+ let inner = RustZerobusSdk::new(zerobus_endpoint, unity_catalog_url)
637
+ .map_err(|e| Error::from_reason(format!("Failed to create SDK: {}", e)))?;
638
+
639
+ Ok(ZerobusSdk { inner: Arc::new(inner) })
640
+ }
641
+
642
+ /// Creates a new ingestion stream to a Delta table.
643
+ ///
644
+ /// This method establishes a bidirectional gRPC connection to the Zerobus service
645
+ /// and prepares it for data ingestion. By default, it uses OAuth 2.0 Client Credentials
646
+ /// authentication. For custom authentication (e.g., Personal Access Tokens), provide
647
+ /// a custom headers_provider.
648
+ ///
649
+ /// # Arguments
650
+ ///
651
+ /// * `table_properties` - Properties of the target table including name and optional schema
652
+ /// * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
653
+ /// * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
654
+ /// * `options` - Optional stream configuration (uses defaults if not provided)
655
+ /// * `headers_provider` - Optional custom headers provider for authentication.
656
+ /// If not provided, uses OAuth with client_id and client_secret.
657
+ ///
658
+ /// # Returns
659
+ ///
660
+ /// A Promise that resolves to a ZerobusStream ready for data ingestion.
661
+ ///
662
+ /// # Errors
663
+ ///
664
+ /// - Authentication failures (invalid credentials)
665
+ /// - Invalid table name or insufficient permissions
666
+ /// - Network connectivity issues
667
+ /// - Schema validation errors
668
+ ///
669
+ /// # Examples
670
+ ///
671
+ /// OAuth authentication (default):
672
+ /// ```typescript
673
+ /// const stream = await sdk.createStream(
674
+ /// { tableName: "catalog.schema.table" },
675
+ /// "client-id",
676
+ /// "client-secret"
677
+ /// );
678
+ /// ```
679
+ ///
680
+ /// Custom authentication with headers provider:
681
+ /// ```typescript
682
+ /// const headersProvider = {
683
+ /// getHeadersCallback: async () => [
684
+ /// ["authorization", `Bearer ${myToken}`],
685
+ /// ["x-databricks-zerobus-table-name", tableName]
686
+ /// ]
687
+ /// };
688
+ /// const stream = await sdk.createStream(
689
+ /// { tableName: "catalog.schema.table" },
690
+ /// "", // ignored
691
+ /// "", // ignored
692
+ /// undefined,
693
+ /// headersProvider
694
+ /// );
695
+ /// ```
696
+ #[napi(ts_return_type = "Promise<ZerobusStream>")]
697
+ pub fn create_stream(
698
+ &self,
699
+ env: Env,
700
+ table_properties: TableProperties,
701
+ client_id: String,
702
+ client_secret: String,
703
+ options: Option<StreamConfigurationOptions>,
704
+ headers_provider: Option<JsHeadersProvider>,
705
+ ) -> Result<JsObject> {
706
+ let rust_table_props = table_properties.to_rust()?;
707
+ let rust_options = options.map(|o| o.into());
708
+
709
+ let headers_tsfn = match headers_provider {
710
+ Some(JsHeadersProvider { get_headers_callback }) => {
711
+ Some(create_headers_tsfn(get_headers_callback)?)
712
+ }
713
+ None => None,
714
+ };
715
+
716
+ let sdk = self.inner.clone();
717
+
718
+ env.execute_tokio_future(
719
+ async move {
720
+ let headers_provider_arc = if let Some(tsfn) = headers_tsfn {
721
+ let headers = call_headers_tsfn(tsfn).await
722
+ .map_err(|e| napi::Error::from_reason(format!("Headers callback failed: {}", e)))?;
723
+
724
+ let static_provider = StaticHeadersProvider::new(headers)
725
+ .map_err(|e| napi::Error::from_reason(format!("Invalid headers: {}", e)))?;
726
+
727
+ Some(Arc::new(static_provider) as Arc<dyn RustHeadersProvider>)
728
+ } else {
729
+ None
730
+ };
731
+
732
+ let stream = if let Some(provider) = headers_provider_arc {
733
+ sdk
734
+ .create_stream_with_headers_provider(
735
+ rust_table_props,
736
+ provider,
737
+ rust_options,
738
+ )
739
+ .await
740
+ .map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
741
+ } else {
742
+ sdk
743
+ .create_stream(rust_table_props, client_id, client_secret, rust_options)
744
+ .await
745
+ .map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
746
+ };
747
+
748
+ Ok(ZerobusStream {
749
+ inner: Arc::new(Mutex::new(Some(stream))),
750
+ })
751
+ },
752
+ |_env, stream| Ok(stream),
753
+ )
754
+ }
755
+
756
+ /// Recreates a stream with the same configuration and re-ingests unacknowledged batches.
757
+ ///
758
+ /// This method is the recommended approach for recovering from stream failures. It:
759
+ /// 1. Retrieves all unacknowledged batches from the failed stream
760
+ /// 2. Creates a new stream with identical configuration
761
+ /// 3. Re-ingests all unacknowledged batches in order
762
+ /// 4. Returns the new stream ready for continued ingestion
763
+ ///
764
+ /// # Arguments
765
+ ///
766
+ /// * `stream` - The failed or closed stream to recreate
767
+ ///
768
+ /// # Returns
769
+ ///
770
+ /// A Promise that resolves to a new ZerobusStream with all unacknowledged batches re-ingested.
771
+ ///
772
+ /// # Errors
773
+ ///
774
+ /// - Failed to retrieve unacknowledged batches from the original stream
775
+ /// - Authentication failures when creating the new stream
776
+ /// - Network connectivity issues during re-ingestion
777
+ ///
778
+ /// # Examples
779
+ ///
780
+ /// ```typescript
781
+ /// try {
782
+ /// await stream.ingestRecords(batch);
783
+ /// } catch (error) {
784
+ /// await stream.close();
785
+ /// // Recreate stream with all unacked batches re-ingested
786
+ /// const newStream = await sdk.recreateStream(stream);
787
+ /// // Continue ingesting with newStream
788
+ /// }
789
+ /// ```
790
+ #[napi]
791
+ pub async fn recreate_stream(&self, stream: &ZerobusStream) -> Result<ZerobusStream> {
792
+ let inner_guard = stream.inner.lock().await;
793
+ let rust_stream = inner_guard
794
+ .as_ref()
795
+ .ok_or_else(|| Error::from_reason("Stream has been closed"))?;
796
+
797
+ let new_rust_stream = self
798
+ .inner
799
+ .recreate_stream(rust_stream)
800
+ .await
801
+ .map_err(|e| Error::from_reason(format!("Failed to recreate stream: {}", e)))?;
802
+
803
+ Ok(ZerobusStream {
804
+ inner: Arc::new(Mutex::new(Some(new_rust_stream))),
805
+ })
806
+ }
807
+ }
808
+
809
+ /// Helper function to decode base64 strings.
810
+ fn base64_decode(input: &str) -> std::result::Result<Vec<u8>, String> {
811
+ use base64::{engine::general_purpose::STANDARD, Engine};
812
+ STANDARD
813
+ .decode(input)
814
+ .map_err(|e| format!("Base64 decode error: {}", e))
815
+ }