@databricks/zerobus-ingest-sdk 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +19 -4
- package/README.md +111 -151
- package/package.json +26 -17
- package/src/lib.rs +890 -38
- package/zerobus-ingest-sdk.linux-arm64-gnu.node +0 -0
- package/zerobus-ingest-sdk.linux-x64-gnu.node +0 -0
- package/zerobus-ingest-sdk.win32-x64-msvc.node +0 -0
- package/Cargo.lock +0 -2233
- package/LICENSE +0 -69
package/src/lib.rs
CHANGED
|
@@ -23,8 +23,9 @@ use databricks_zerobus_ingest_sdk::{
|
|
|
23
23
|
TableProperties as RustTableProperties, ZerobusSdk as RustZerobusSdk,
|
|
24
24
|
ZerobusStream as RustZerobusStream,
|
|
25
25
|
HeadersProvider as RustHeadersProvider,
|
|
26
|
-
ZerobusError as RustZerobusError,
|
|
27
26
|
ZerobusResult as RustZerobusResult,
|
|
27
|
+
ZerobusError as RustZerobusError,
|
|
28
|
+
DefaultTokenFactory,
|
|
28
29
|
};
|
|
29
30
|
use databricks_zerobus_ingest_sdk::databricks::zerobus::RecordType as RustRecordType;
|
|
30
31
|
use async_trait::async_trait;
|
|
@@ -33,6 +34,9 @@ use std::collections::HashMap;
|
|
|
33
34
|
use std::sync::Arc;
|
|
34
35
|
use tokio::sync::Mutex;
|
|
35
36
|
|
|
37
|
+
/// User-Agent header value for TypeScript SDK requests.
|
|
38
|
+
const TS_SDK_USER_AGENT: &str = concat!("zerobus-sdk-ts/", env!("CARGO_PKG_VERSION"));
|
|
39
|
+
|
|
36
40
|
/// Record serialization format.
|
|
37
41
|
///
|
|
38
42
|
/// Specifies how records should be encoded when ingested into the stream.
|
|
@@ -48,7 +52,6 @@ pub enum RecordType {
|
|
|
48
52
|
///
|
|
49
53
|
/// These options control stream behavior including recovery, timeouts, and inflight limits.
|
|
50
54
|
#[napi(object)]
|
|
51
|
-
#[derive(Debug, Clone)]
|
|
52
55
|
pub struct StreamConfigurationOptions {
|
|
53
56
|
/// Maximum number of unacknowledged requests that can be in flight.
|
|
54
57
|
/// Default: 10,000
|
|
@@ -82,6 +85,15 @@ pub struct StreamConfigurationOptions {
|
|
|
82
85
|
/// Use RecordType.Json for JSON encoding or RecordType.Proto for Protocol Buffers.
|
|
83
86
|
/// Default: RecordType.Proto (Protocol Buffers)
|
|
84
87
|
pub record_type: Option<i32>,
|
|
88
|
+
|
|
89
|
+
/// Maximum wait time during graceful stream close in milliseconds.
|
|
90
|
+
/// When the server signals stream closure, this controls how long to wait
|
|
91
|
+
/// for in-flight records to be acknowledged.
|
|
92
|
+
/// - None (undefined): Wait for full server-specified duration
|
|
93
|
+
/// - Some(0): Immediately trigger recovery without waiting
|
|
94
|
+
/// - Some(x): Wait up to min(x, server_duration) milliseconds
|
|
95
|
+
/// Default: None (wait for full server duration)
|
|
96
|
+
pub stream_paused_max_wait_time_ms: Option<u32>,
|
|
85
97
|
}
|
|
86
98
|
|
|
87
99
|
impl From<StreamConfigurationOptions> for RustStreamOptions {
|
|
@@ -103,6 +115,9 @@ impl From<StreamConfigurationOptions> for RustStreamOptions {
|
|
|
103
115
|
flush_timeout_ms: opts.flush_timeout_ms.map(|v| v as u64).unwrap_or(default.flush_timeout_ms),
|
|
104
116
|
server_lack_of_ack_timeout_ms: opts.server_lack_of_ack_timeout_ms.map(|v| v as u64).unwrap_or(default.server_lack_of_ack_timeout_ms),
|
|
105
117
|
record_type,
|
|
118
|
+
callback_max_wait_time_ms: None, // Callbacks not supported in TS SDK
|
|
119
|
+
stream_paused_max_wait_time_ms: opts.stream_paused_max_wait_time_ms.map(|v| v as u64),
|
|
120
|
+
ack_callback: None, // Callbacks not supported in TS SDK
|
|
106
121
|
}
|
|
107
122
|
}
|
|
108
123
|
}
|
|
@@ -255,6 +270,9 @@ pub struct ZerobusStream {
|
|
|
255
270
|
impl ZerobusStream {
|
|
256
271
|
/// Ingests a single record into the stream.
|
|
257
272
|
///
|
|
273
|
+
/// **@deprecated** Use `ingestRecordOffset()` instead, which returns the offset directly
|
|
274
|
+
/// after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
|
|
275
|
+
///
|
|
258
276
|
/// This method accepts either:
|
|
259
277
|
/// - A Protocol Buffer encoded record as a Buffer (Vec<u8>)
|
|
260
278
|
/// - A JSON string
|
|
@@ -288,6 +306,7 @@ impl ZerobusStream {
|
|
|
288
306
|
///
|
|
289
307
|
/// A Promise that resolves to the offset ID when the server acknowledges the record.
|
|
290
308
|
#[napi(ts_return_type = "Promise<bigint>")]
|
|
309
|
+
#[allow(deprecated)]
|
|
291
310
|
pub fn ingest_record(&self, env: Env, payload: Unknown) -> Result<JsObject> {
|
|
292
311
|
let record_payload = convert_js_to_record_payload(&env, payload)?;
|
|
293
312
|
|
|
@@ -326,6 +345,9 @@ impl ZerobusStream {
|
|
|
326
345
|
|
|
327
346
|
/// Ingests multiple records as a single atomic batch.
|
|
328
347
|
///
|
|
348
|
+
/// **@deprecated** Use `ingestRecordsOffset()` instead, which returns the offset directly
|
|
349
|
+
/// after queuing. Then use `waitForOffset()` to wait for acknowledgment when needed.
|
|
350
|
+
///
|
|
329
351
|
/// This method accepts an array of records (Protocol Buffer buffers or JSON strings)
|
|
330
352
|
/// and ingests them as a batch. The batch receives a single acknowledgment from
|
|
331
353
|
/// the server with all-or-nothing semantics.
|
|
@@ -354,6 +376,7 @@ impl ZerobusStream {
|
|
|
354
376
|
/// }
|
|
355
377
|
/// ```
|
|
356
378
|
#[napi(ts_return_type = "Promise<bigint | null>")]
|
|
379
|
+
#[allow(deprecated)]
|
|
357
380
|
pub fn ingest_records(&self, env: Env, records: Vec<Unknown>) -> Result<JsObject> {
|
|
358
381
|
let record_payloads: Result<Vec<RustRecordPayload>> = records
|
|
359
382
|
.into_iter()
|
|
@@ -404,6 +427,172 @@ impl ZerobusStream {
|
|
|
404
427
|
)
|
|
405
428
|
}
|
|
406
429
|
|
|
430
|
+
/// Ingests a single record and returns a future that resolves to the offset ID after queuing.
|
|
431
|
+
///
|
|
432
|
+
/// Unlike `ingestRecord()`, this method's Promise resolves immediately after
|
|
433
|
+
/// the record is queued, without waiting for server acknowledgment. Use
|
|
434
|
+
/// `waitForOffset()` to wait for acknowledgment when needed.
|
|
435
|
+
///
|
|
436
|
+
/// This is the recommended API for high-throughput scenarios where you want to
|
|
437
|
+
/// decouple record ingestion from acknowledgment tracking.
|
|
438
|
+
///
|
|
439
|
+
/// # Arguments
|
|
440
|
+
///
|
|
441
|
+
/// * `payload` - The record data (Buffer, string, protobuf message, or plain object)
|
|
442
|
+
///
|
|
443
|
+
/// # Returns
|
|
444
|
+
///
|
|
445
|
+
/// `Promise<bigint>` - Resolves to the offset ID immediately after the record is queued
|
|
446
|
+
/// (does not wait for server acknowledgment).
|
|
447
|
+
///
|
|
448
|
+
/// # Example
|
|
449
|
+
///
|
|
450
|
+
/// ```typescript
|
|
451
|
+
/// // Promise resolves immediately with offset (before server ack)
|
|
452
|
+
/// const offset1 = await stream.ingestRecordOffset(record1);
|
|
453
|
+
/// const offset2 = await stream.ingestRecordOffset(record2);
|
|
454
|
+
/// // Wait for both to be acknowledged
|
|
455
|
+
/// await stream.waitForOffset(offset2);
|
|
456
|
+
/// ```
|
|
457
|
+
#[napi(ts_return_type = "Promise<bigint>")]
|
|
458
|
+
pub fn ingest_record_offset(&self, env: Env, payload: Unknown) -> Result<JsObject> {
|
|
459
|
+
let record_payload = convert_js_to_record_payload(&env, payload)?;
|
|
460
|
+
|
|
461
|
+
let stream = self.inner.clone();
|
|
462
|
+
|
|
463
|
+
env.execute_tokio_future(
|
|
464
|
+
async move {
|
|
465
|
+
let mut guard = stream.lock().await;
|
|
466
|
+
let stream_ref = guard
|
|
467
|
+
.as_mut()
|
|
468
|
+
.ok_or_else(|| napi::Error::from_reason("Stream has been closed"))?;
|
|
469
|
+
|
|
470
|
+
stream_ref
|
|
471
|
+
.ingest_record_offset(record_payload)
|
|
472
|
+
.await
|
|
473
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to ingest record: {}", e)))
|
|
474
|
+
},
|
|
475
|
+
|env, offset_id| {
|
|
476
|
+
let offset_str = offset_id.to_string();
|
|
477
|
+
let global: JsGlobal = env.get_global()?;
|
|
478
|
+
let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
|
|
479
|
+
let js_str = env.create_string(&offset_str)?;
|
|
480
|
+
bigint_ctor.call(None, &[js_str.into_unknown()])
|
|
481
|
+
},
|
|
482
|
+
)
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/// Ingests multiple records as a batch and returns a future that resolves to the offset ID after queuing.
|
|
486
|
+
///
|
|
487
|
+
/// Unlike `ingestRecords()`, this method's Promise resolves immediately after
|
|
488
|
+
/// the batch is queued, without waiting for server acknowledgment. Use
|
|
489
|
+
/// `waitForOffset()` to wait for acknowledgment when needed.
|
|
490
|
+
///
|
|
491
|
+
/// # Arguments
|
|
492
|
+
///
|
|
493
|
+
/// * `records` - Array of record data
|
|
494
|
+
///
|
|
495
|
+
/// # Returns
|
|
496
|
+
///
|
|
497
|
+
/// `Promise<bigint | null>` - Resolves to the offset ID immediately after the batch
|
|
498
|
+
/// is queued (does not wait for server acknowledgment). Returns null for empty batches.
|
|
499
|
+
///
|
|
500
|
+
/// # Example
|
|
501
|
+
///
|
|
502
|
+
/// ```typescript
|
|
503
|
+
/// // Promise resolves immediately with offset (before server ack)
|
|
504
|
+
/// const offset = await stream.ingestRecordsOffset(batch);
|
|
505
|
+
/// if (offset !== null) {
|
|
506
|
+
/// await stream.waitForOffset(offset);
|
|
507
|
+
/// }
|
|
508
|
+
/// ```
|
|
509
|
+
#[napi(ts_return_type = "Promise<bigint | null>")]
|
|
510
|
+
pub fn ingest_records_offset(&self, env: Env, records: Vec<Unknown>) -> Result<JsObject> {
|
|
511
|
+
let record_payloads: Result<Vec<RustRecordPayload>> = records
|
|
512
|
+
.into_iter()
|
|
513
|
+
.map(|payload| convert_js_to_record_payload(&env, payload))
|
|
514
|
+
.collect();
|
|
515
|
+
|
|
516
|
+
let record_payloads = record_payloads?;
|
|
517
|
+
|
|
518
|
+
let stream = self.inner.clone();
|
|
519
|
+
|
|
520
|
+
env.execute_tokio_future(
|
|
521
|
+
async move {
|
|
522
|
+
let mut guard = stream.lock().await;
|
|
523
|
+
let stream_ref = guard
|
|
524
|
+
.as_mut()
|
|
525
|
+
.ok_or_else(|| napi::Error::from_reason("Stream has been closed"))?;
|
|
526
|
+
|
|
527
|
+
stream_ref
|
|
528
|
+
.ingest_records_offset(record_payloads)
|
|
529
|
+
.await
|
|
530
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to ingest batch: {}", e)))
|
|
531
|
+
},
|
|
532
|
+
|env, result| match result {
|
|
533
|
+
Some(offset_id) => {
|
|
534
|
+
let offset_str = offset_id.to_string();
|
|
535
|
+
let global: JsGlobal = env.get_global()?;
|
|
536
|
+
let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
|
|
537
|
+
let js_str = env.create_string(&offset_str)?;
|
|
538
|
+
let bigint = bigint_ctor.call(None, &[js_str.into_unknown()])?;
|
|
539
|
+
Ok(bigint.into_unknown())
|
|
540
|
+
},
|
|
541
|
+
None => env.get_null().map(|v| v.into_unknown()),
|
|
542
|
+
},
|
|
543
|
+
)
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
/// Waits for a specific offset to be acknowledged by the server.
|
|
547
|
+
///
|
|
548
|
+
/// Use this method with `ingestRecordOffset()` and `ingestRecordsOffset()` to
|
|
549
|
+
/// selectively wait for acknowledgments. This allows you to ingest many records
|
|
550
|
+
/// quickly and then wait only for specific offsets when needed.
|
|
551
|
+
///
|
|
552
|
+
/// # Arguments
|
|
553
|
+
///
|
|
554
|
+
/// * `offset_id` - The offset ID to wait for (returned by ingestRecordOffset/ingestRecordsOffset)
|
|
555
|
+
///
|
|
556
|
+
/// # Errors
|
|
557
|
+
///
|
|
558
|
+
/// - Timeout if acknowledgment takes too long
|
|
559
|
+
/// - Server errors propagated immediately (no waiting for timeout)
|
|
560
|
+
///
|
|
561
|
+
/// # Example
|
|
562
|
+
///
|
|
563
|
+
/// ```typescript
|
|
564
|
+
/// const offsets = [];
|
|
565
|
+
/// for (const record of records) {
|
|
566
|
+
/// offsets.push(await stream.ingestRecordOffset(record));
|
|
567
|
+
/// }
|
|
568
|
+
/// // Wait for the last offset (implies all previous are also acknowledged)
|
|
569
|
+
/// await stream.waitForOffset(offsets[offsets.length - 1]);
|
|
570
|
+
/// ```
|
|
571
|
+
#[napi(ts_args_type = "offsetId: bigint", ts_return_type = "Promise<void>")]
|
|
572
|
+
pub fn wait_for_offset(&self, env: Env, offset_id: JsUnknown) -> Result<JsObject> {
|
|
573
|
+
let global: JsGlobal = env.get_global()?;
|
|
574
|
+
let number_ctor: JsFunction = global.get_named_property("Number")?;
|
|
575
|
+
let num_result: JsUnknown = number_ctor.call(None, &[offset_id])?;
|
|
576
|
+
let offset: i64 = num_result.coerce_to_number()?.get_int64()?;
|
|
577
|
+
|
|
578
|
+
let stream = self.inner.clone();
|
|
579
|
+
|
|
580
|
+
env.execute_tokio_future(
|
|
581
|
+
async move {
|
|
582
|
+
let guard = stream.lock().await;
|
|
583
|
+
let stream_ref = guard
|
|
584
|
+
.as_ref()
|
|
585
|
+
.ok_or_else(|| napi::Error::from_reason("Stream has been closed"))?;
|
|
586
|
+
|
|
587
|
+
stream_ref
|
|
588
|
+
.wait_for_offset(offset)
|
|
589
|
+
.await
|
|
590
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to wait for offset: {}", e)))
|
|
591
|
+
},
|
|
592
|
+
|_env, _| Ok(()),
|
|
593
|
+
)
|
|
594
|
+
}
|
|
595
|
+
|
|
407
596
|
/// Flushes all pending records and waits for acknowledgments.
|
|
408
597
|
///
|
|
409
598
|
/// This method ensures all previously ingested records have been sent to the server
|
|
@@ -570,6 +759,11 @@ impl StaticHeadersProvider {
|
|
|
570
759
|
));
|
|
571
760
|
}
|
|
572
761
|
|
|
762
|
+
// Add TS user agent if not provided
|
|
763
|
+
if !map.contains_key("user-agent") {
|
|
764
|
+
map.insert("user-agent", TS_SDK_USER_AGENT.to_string());
|
|
765
|
+
}
|
|
766
|
+
|
|
573
767
|
Ok(Self { headers: map })
|
|
574
768
|
}
|
|
575
769
|
}
|
|
@@ -588,9 +782,70 @@ fn create_headers_tsfn(js_func: JsFunction) -> Result<ThreadsafeFunction<(), Err
|
|
|
588
782
|
|
|
589
783
|
/// Helper to call headers callback and get result
|
|
590
784
|
async fn call_headers_tsfn(tsfn: ThreadsafeFunction<(), ErrorStrategy::Fatal>) -> Result<Vec<(String, String)>> {
|
|
591
|
-
|
|
785
|
+
let raw_headers: Vec<Vec<String>> = tsfn.call_async(())
|
|
592
786
|
.await
|
|
593
|
-
.map_err(|e| Error::from_reason(format!("Failed to call headers callback: {}", e)))
|
|
787
|
+
.map_err(|e| Error::from_reason(format!("Failed to call headers callback: {}", e)))?;
|
|
788
|
+
|
|
789
|
+
let headers: Vec<(String, String)> = raw_headers
|
|
790
|
+
.into_iter()
|
|
791
|
+
.filter_map(|pair| {
|
|
792
|
+
if pair.len() >= 2 {
|
|
793
|
+
Some((pair[0].clone(), pair[1].clone()))
|
|
794
|
+
} else {
|
|
795
|
+
None
|
|
796
|
+
}
|
|
797
|
+
})
|
|
798
|
+
.collect();
|
|
799
|
+
|
|
800
|
+
Ok(headers)
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
/// OAuth headers provider for TypeScript SDK.
|
|
804
|
+
/// Uses the Rust SDK's token factory but with the TS user agent.
|
|
805
|
+
struct TsOAuthHeadersProvider {
|
|
806
|
+
client_id: String,
|
|
807
|
+
client_secret: String,
|
|
808
|
+
table_name: String,
|
|
809
|
+
workspace_id: String,
|
|
810
|
+
unity_catalog_url: String,
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
impl TsOAuthHeadersProvider {
|
|
814
|
+
fn new(
|
|
815
|
+
client_id: String,
|
|
816
|
+
client_secret: String,
|
|
817
|
+
table_name: String,
|
|
818
|
+
workspace_id: String,
|
|
819
|
+
unity_catalog_url: String,
|
|
820
|
+
) -> Self {
|
|
821
|
+
Self {
|
|
822
|
+
client_id,
|
|
823
|
+
client_secret,
|
|
824
|
+
table_name,
|
|
825
|
+
workspace_id,
|
|
826
|
+
unity_catalog_url,
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
#[async_trait]
|
|
832
|
+
impl RustHeadersProvider for TsOAuthHeadersProvider {
|
|
833
|
+
async fn get_headers(&self) -> RustZerobusResult<HashMap<&'static str, String>> {
|
|
834
|
+
let token = DefaultTokenFactory::get_token(
|
|
835
|
+
&self.unity_catalog_url,
|
|
836
|
+
&self.table_name,
|
|
837
|
+
&self.client_id,
|
|
838
|
+
&self.client_secret,
|
|
839
|
+
&self.workspace_id,
|
|
840
|
+
)
|
|
841
|
+
.await?;
|
|
842
|
+
|
|
843
|
+
let mut headers = HashMap::new();
|
|
844
|
+
headers.insert("authorization", format!("Bearer {}", token));
|
|
845
|
+
headers.insert("x-databricks-zerobus-table-name", self.table_name.clone());
|
|
846
|
+
headers.insert("user-agent", TS_SDK_USER_AGENT.to_string());
|
|
847
|
+
Ok(headers)
|
|
848
|
+
}
|
|
594
849
|
}
|
|
595
850
|
|
|
596
851
|
/// The main SDK for interacting with the Databricks Zerobus service.
|
|
@@ -614,6 +869,10 @@ async fn call_headers_tsfn(tsfn: ThreadsafeFunction<(), ErrorStrategy::Fatal>) -
|
|
|
614
869
|
#[napi]
|
|
615
870
|
pub struct ZerobusSdk {
|
|
616
871
|
inner: Arc<RustZerobusSdk>,
|
|
872
|
+
/// Stored for creating TsOAuthHeadersProvider
|
|
873
|
+
workspace_id: String,
|
|
874
|
+
/// Stored for creating TsOAuthHeadersProvider
|
|
875
|
+
unity_catalog_url: String,
|
|
617
876
|
}
|
|
618
877
|
|
|
619
878
|
#[napi]
|
|
@@ -633,10 +892,26 @@ impl ZerobusSdk {
|
|
|
633
892
|
/// - Failed to extract workspace ID from the endpoint
|
|
634
893
|
#[napi(constructor)]
|
|
635
894
|
pub fn new(zerobus_endpoint: String, unity_catalog_url: String) -> Result<Self> {
|
|
636
|
-
let
|
|
895
|
+
let workspace_id = zerobus_endpoint
|
|
896
|
+
.strip_prefix("https://")
|
|
897
|
+
.or_else(|| zerobus_endpoint.strip_prefix("http://"))
|
|
898
|
+
.and_then(|s| s.split('.').next())
|
|
899
|
+
.map(|s| s.to_string())
|
|
900
|
+
.ok_or_else(|| {
|
|
901
|
+
Error::from_reason("Failed to extract workspace_id from zerobus_endpoint".to_string())
|
|
902
|
+
})?;
|
|
903
|
+
|
|
904
|
+
let inner = RustZerobusSdk::builder()
|
|
905
|
+
.endpoint(&zerobus_endpoint)
|
|
906
|
+
.unity_catalog_url(&unity_catalog_url)
|
|
907
|
+
.build()
|
|
637
908
|
.map_err(|e| Error::from_reason(format!("Failed to create SDK: {}", e)))?;
|
|
638
909
|
|
|
639
|
-
Ok(ZerobusSdk {
|
|
910
|
+
Ok(ZerobusSdk {
|
|
911
|
+
inner: Arc::new(inner),
|
|
912
|
+
workspace_id,
|
|
913
|
+
unity_catalog_url,
|
|
914
|
+
})
|
|
640
915
|
}
|
|
641
916
|
|
|
642
917
|
/// Creates a new ingestion stream to a Delta table.
|
|
@@ -651,7 +926,7 @@ impl ZerobusSdk {
|
|
|
651
926
|
/// * `table_properties` - Properties of the target table including name and optional schema
|
|
652
927
|
/// * `client_id` - OAuth 2.0 client ID (ignored if headers_provider is provided)
|
|
653
928
|
/// * `client_secret` - OAuth 2.0 client secret (ignored if headers_provider is provided)
|
|
654
|
-
/// * `options` - Optional stream configuration (
|
|
929
|
+
/// * `options` - Optional stream configuration (timeouts, recovery settings, etc.)
|
|
655
930
|
/// * `headers_provider` - Optional custom headers provider for authentication.
|
|
656
931
|
/// If not provided, uses OAuth with client_id and client_secret.
|
|
657
932
|
///
|
|
@@ -666,31 +941,28 @@ impl ZerobusSdk {
|
|
|
666
941
|
/// - Network connectivity issues
|
|
667
942
|
/// - Schema validation errors
|
|
668
943
|
///
|
|
669
|
-
/// #
|
|
944
|
+
/// # Example
|
|
670
945
|
///
|
|
671
|
-
/// OAuth authentication (default):
|
|
672
946
|
/// ```typescript
|
|
947
|
+
/// // OAuth authentication (default)
|
|
673
948
|
/// const stream = await sdk.createStream(
|
|
674
949
|
/// { tableName: "catalog.schema.table" },
|
|
675
950
|
/// "client-id",
|
|
676
951
|
/// "client-secret"
|
|
677
952
|
/// );
|
|
678
|
-
/// ```
|
|
679
953
|
///
|
|
680
|
-
/// Custom authentication with headers provider
|
|
681
|
-
/// ```typescript
|
|
682
|
-
/// const headersProvider = {
|
|
683
|
-
/// getHeadersCallback: async () => [
|
|
684
|
-
/// ["authorization", `Bearer ${myToken}`],
|
|
685
|
-
/// ["x-databricks-zerobus-table-name", tableName]
|
|
686
|
-
/// ]
|
|
687
|
-
/// };
|
|
954
|
+
/// // Custom authentication with headers provider
|
|
688
955
|
/// const stream = await sdk.createStream(
|
|
689
956
|
/// { tableName: "catalog.schema.table" },
|
|
690
957
|
/// "", // ignored
|
|
691
958
|
/// "", // ignored
|
|
692
959
|
/// undefined,
|
|
693
|
-
///
|
|
960
|
+
/// {
|
|
961
|
+
/// getHeadersCallback: async () => [
|
|
962
|
+
/// ["authorization", `Bearer ${myToken}`],
|
|
963
|
+
/// ["x-databricks-zerobus-table-name", tableName]
|
|
964
|
+
/// ]
|
|
965
|
+
/// }
|
|
694
966
|
/// );
|
|
695
967
|
/// ```
|
|
696
968
|
#[napi(ts_return_type = "Promise<ZerobusStream>")]
|
|
@@ -704,7 +976,7 @@ impl ZerobusSdk {
|
|
|
704
976
|
headers_provider: Option<JsHeadersProvider>,
|
|
705
977
|
) -> Result<JsObject> {
|
|
706
978
|
let rust_table_props = table_properties.to_rust()?;
|
|
707
|
-
let rust_options = options.map(|o| o.into());
|
|
979
|
+
let rust_options: RustStreamOptions = options.map(|o| o.into()).unwrap_or_default();
|
|
708
980
|
|
|
709
981
|
let headers_tsfn = match headers_provider {
|
|
710
982
|
Some(JsHeadersProvider { get_headers_callback }) => {
|
|
@@ -714,36 +986,40 @@ impl ZerobusSdk {
|
|
|
714
986
|
};
|
|
715
987
|
|
|
716
988
|
let sdk = self.inner.clone();
|
|
989
|
+
let workspace_id = self.workspace_id.clone();
|
|
990
|
+
let unity_catalog_url = self.unity_catalog_url.clone();
|
|
991
|
+
let table_name = table_properties.table_name.clone();
|
|
717
992
|
|
|
718
993
|
env.execute_tokio_future(
|
|
719
994
|
async move {
|
|
720
|
-
let headers_provider_arc = if let Some(tsfn) = headers_tsfn {
|
|
995
|
+
let headers_provider_arc: Arc<dyn RustHeadersProvider> = if let Some(tsfn) = headers_tsfn {
|
|
996
|
+
// Custom headers provider from JavaScript callback
|
|
721
997
|
let headers = call_headers_tsfn(tsfn).await
|
|
722
998
|
.map_err(|e| napi::Error::from_reason(format!("Headers callback failed: {}", e)))?;
|
|
723
999
|
|
|
724
1000
|
let static_provider = StaticHeadersProvider::new(headers)
|
|
725
1001
|
.map_err(|e| napi::Error::from_reason(format!("Invalid headers: {}", e)))?;
|
|
726
1002
|
|
|
727
|
-
|
|
1003
|
+
Arc::new(static_provider)
|
|
728
1004
|
} else {
|
|
729
|
-
|
|
1005
|
+
// Default OAuth with TS user agent
|
|
1006
|
+
Arc::new(TsOAuthHeadersProvider::new(
|
|
1007
|
+
client_id,
|
|
1008
|
+
client_secret,
|
|
1009
|
+
table_name,
|
|
1010
|
+
workspace_id,
|
|
1011
|
+
unity_catalog_url,
|
|
1012
|
+
))
|
|
730
1013
|
};
|
|
731
1014
|
|
|
732
|
-
let stream =
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
.map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
|
|
741
|
-
} else {
|
|
742
|
-
sdk
|
|
743
|
-
.create_stream(rust_table_props, client_id, client_secret, rust_options)
|
|
744
|
-
.await
|
|
745
|
-
.map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?
|
|
746
|
-
};
|
|
1015
|
+
let stream = sdk
|
|
1016
|
+
.create_stream_with_headers_provider(
|
|
1017
|
+
rust_table_props,
|
|
1018
|
+
headers_provider_arc,
|
|
1019
|
+
Some(rust_options),
|
|
1020
|
+
)
|
|
1021
|
+
.await
|
|
1022
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to create stream: {}", e)))?;
|
|
747
1023
|
|
|
748
1024
|
Ok(ZerobusStream {
|
|
749
1025
|
inner: Arc::new(Mutex::new(Some(stream))),
|
|
@@ -813,3 +1089,579 @@ fn base64_decode(input: &str) -> std::result::Result<Vec<u8>, String> {
|
|
|
813
1089
|
.decode(input)
|
|
814
1090
|
.map_err(|e| format!("Base64 decode error: {}", e))
|
|
815
1091
|
}
|
|
1092
|
+
|
|
1093
|
+
// =============================================================================
|
|
1094
|
+
// Arrow Flight Support (Experimental/Unsupported)
|
|
1095
|
+
// Enabled with feature flag: cargo build --features arrow-flight
|
|
1096
|
+
// =============================================================================
|
|
1097
|
+
|
|
1098
|
+
#[cfg(feature = "arrow-flight")]
|
|
1099
|
+
use databricks_zerobus_ingest_sdk::{
|
|
1100
|
+
ArrowStreamConfigurationOptions as RustArrowStreamOptions,
|
|
1101
|
+
ArrowTableProperties as RustArrowTableProperties,
|
|
1102
|
+
ZerobusArrowStream as RustZerobusArrowStream,
|
|
1103
|
+
ArrowSchema as RustArrowSchema,
|
|
1104
|
+
RecordBatch as RustRecordBatch,
|
|
1105
|
+
Field as RustField,
|
|
1106
|
+
DataType as RustDataType,
|
|
1107
|
+
};
|
|
1108
|
+
#[cfg(feature = "arrow-flight")]
|
|
1109
|
+
use arrow_ipc::reader::StreamReader;
|
|
1110
|
+
#[cfg(feature = "arrow-flight")]
|
|
1111
|
+
use arrow_ipc::writer::StreamWriter;
|
|
1112
|
+
#[cfg(feature = "arrow-flight")]
|
|
1113
|
+
use std::io::Cursor;
|
|
1114
|
+
|
|
1115
|
+
/// IPC compression type for Arrow Flight streams.
|
|
1116
|
+
///
|
|
1117
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental and not yet
|
|
1118
|
+
/// supported for production use. The API may change in future releases.
|
|
1119
|
+
#[cfg(feature = "arrow-flight")]
|
|
1120
|
+
#[napi]
|
|
1121
|
+
pub enum IpcCompressionType {
|
|
1122
|
+
/// LZ4 frame compression - fast compression with moderate ratio
|
|
1123
|
+
Lz4Frame = 0,
|
|
1124
|
+
/// Zstandard compression - better compression ratio, slightly slower
|
|
1125
|
+
Zstd = 1,
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
/// Configuration options for Arrow Flight streams.
|
|
1129
|
+
///
|
|
1130
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental and not yet
|
|
1131
|
+
/// supported for production use. The API may change in future releases.
|
|
1132
|
+
#[cfg(feature = "arrow-flight")]
|
|
1133
|
+
#[napi(object)]
|
|
1134
|
+
#[derive(Debug, Clone)]
|
|
1135
|
+
pub struct ArrowStreamConfigurationOptions {
|
|
1136
|
+
/// Maximum number of batches that can be in-flight (sent but not acknowledged).
|
|
1137
|
+
/// Default: 1,000
|
|
1138
|
+
pub max_inflight_batches: Option<u32>,
|
|
1139
|
+
|
|
1140
|
+
/// Whether to enable automatic stream recovery on failure.
|
|
1141
|
+
/// Default: true
|
|
1142
|
+
pub recovery: Option<bool>,
|
|
1143
|
+
|
|
1144
|
+
/// Timeout for recovery operations in milliseconds.
|
|
1145
|
+
/// Default: 15,000 (15 seconds)
|
|
1146
|
+
pub recovery_timeout_ms: Option<u32>,
|
|
1147
|
+
|
|
1148
|
+
/// Delay between recovery retry attempts in milliseconds.
|
|
1149
|
+
/// Default: 2,000 (2 seconds)
|
|
1150
|
+
pub recovery_backoff_ms: Option<u32>,
|
|
1151
|
+
|
|
1152
|
+
/// Maximum number of recovery attempts before giving up.
|
|
1153
|
+
/// Default: 4
|
|
1154
|
+
pub recovery_retries: Option<u32>,
|
|
1155
|
+
|
|
1156
|
+
/// Timeout waiting for server acknowledgments in milliseconds.
|
|
1157
|
+
/// Default: 60,000 (1 minute)
|
|
1158
|
+
pub server_lack_of_ack_timeout_ms: Option<u32>,
|
|
1159
|
+
|
|
1160
|
+
/// Timeout for flush operations in milliseconds.
|
|
1161
|
+
/// Default: 300,000 (5 minutes)
|
|
1162
|
+
pub flush_timeout_ms: Option<u32>,
|
|
1163
|
+
|
|
1164
|
+
/// Timeout for connection establishment in milliseconds.
|
|
1165
|
+
/// Default: 30,000 (30 seconds)
|
|
1166
|
+
pub connection_timeout_ms: Option<u32>,
|
|
1167
|
+
|
|
1168
|
+
/// Optional IPC compression type (0 = LZ4Frame, 1 = Zstd, undefined = no compression)
|
|
1169
|
+
pub ipc_compression: Option<i32>,
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
#[cfg(feature = "arrow-flight")]
|
|
1173
|
+
impl From<ArrowStreamConfigurationOptions> for RustArrowStreamOptions {
|
|
1174
|
+
fn from(opts: ArrowStreamConfigurationOptions) -> Self {
|
|
1175
|
+
let default = RustArrowStreamOptions::default();
|
|
1176
|
+
|
|
1177
|
+
let ipc_compression = match opts.ipc_compression {
|
|
1178
|
+
Some(0) => Some(arrow_ipc::CompressionType::LZ4_FRAME),
|
|
1179
|
+
Some(1) => Some(arrow_ipc::CompressionType::ZSTD),
|
|
1180
|
+
_ => None,
|
|
1181
|
+
};
|
|
1182
|
+
|
|
1183
|
+
RustArrowStreamOptions {
|
|
1184
|
+
max_inflight_batches: opts.max_inflight_batches.unwrap_or(default.max_inflight_batches as u32) as usize,
|
|
1185
|
+
recovery: opts.recovery.unwrap_or(default.recovery),
|
|
1186
|
+
recovery_timeout_ms: opts.recovery_timeout_ms.map(|v| v as u64).unwrap_or(default.recovery_timeout_ms),
|
|
1187
|
+
recovery_backoff_ms: opts.recovery_backoff_ms.map(|v| v as u64).unwrap_or(default.recovery_backoff_ms),
|
|
1188
|
+
recovery_retries: opts.recovery_retries.unwrap_or(default.recovery_retries),
|
|
1189
|
+
server_lack_of_ack_timeout_ms: opts.server_lack_of_ack_timeout_ms.map(|v| v as u64).unwrap_or(default.server_lack_of_ack_timeout_ms),
|
|
1190
|
+
flush_timeout_ms: opts.flush_timeout_ms.map(|v| v as u64).unwrap_or(default.flush_timeout_ms),
|
|
1191
|
+
connection_timeout_ms: opts.connection_timeout_ms.map(|v| v as u64).unwrap_or(default.connection_timeout_ms),
|
|
1192
|
+
ipc_compression,
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
/// Arrow data type enum for schema definition.
|
|
1198
|
+
///
|
|
1199
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental.
|
|
1200
|
+
#[cfg(feature = "arrow-flight")]
|
|
1201
|
+
#[napi]
|
|
1202
|
+
pub enum ArrowDataType {
|
|
1203
|
+
/// Boolean type
|
|
1204
|
+
Boolean = 0,
|
|
1205
|
+
/// Signed 8-bit integer
|
|
1206
|
+
Int8 = 1,
|
|
1207
|
+
/// Signed 16-bit integer
|
|
1208
|
+
Int16 = 2,
|
|
1209
|
+
/// Signed 32-bit integer
|
|
1210
|
+
Int32 = 3,
|
|
1211
|
+
/// Signed 64-bit integer
|
|
1212
|
+
Int64 = 4,
|
|
1213
|
+
/// Unsigned 8-bit integer
|
|
1214
|
+
UInt8 = 5,
|
|
1215
|
+
/// Unsigned 16-bit integer
|
|
1216
|
+
UInt16 = 6,
|
|
1217
|
+
/// Unsigned 32-bit integer
|
|
1218
|
+
UInt32 = 7,
|
|
1219
|
+
/// Unsigned 64-bit integer
|
|
1220
|
+
UInt64 = 8,
|
|
1221
|
+
/// 32-bit floating point
|
|
1222
|
+
Float32 = 9,
|
|
1223
|
+
/// 64-bit floating point
|
|
1224
|
+
Float64 = 10,
|
|
1225
|
+
/// UTF-8 encoded string
|
|
1226
|
+
Utf8 = 11,
|
|
1227
|
+
/// Large UTF-8 encoded string (64-bit offsets)
|
|
1228
|
+
LargeUtf8 = 12,
|
|
1229
|
+
/// Binary data
|
|
1230
|
+
Binary = 13,
|
|
1231
|
+
/// Large binary data (64-bit offsets)
|
|
1232
|
+
LargeBinary = 14,
|
|
1233
|
+
/// Date (32-bit days since epoch)
|
|
1234
|
+
Date32 = 15,
|
|
1235
|
+
/// Date (64-bit milliseconds since epoch)
|
|
1236
|
+
Date64 = 16,
|
|
1237
|
+
/// Timestamp with microsecond precision (UTC)
|
|
1238
|
+
TimestampMicros = 17,
|
|
1239
|
+
/// Timestamp with nanosecond precision (UTC)
|
|
1240
|
+
TimestampNanos = 18,
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
#[cfg(feature = "arrow-flight")]
|
|
1244
|
+
fn convert_arrow_data_type(dt: i32) -> RustDataType {
|
|
1245
|
+
match dt {
|
|
1246
|
+
0 => RustDataType::Boolean,
|
|
1247
|
+
1 => RustDataType::Int8,
|
|
1248
|
+
2 => RustDataType::Int16,
|
|
1249
|
+
3 => RustDataType::Int32,
|
|
1250
|
+
4 => RustDataType::Int64,
|
|
1251
|
+
5 => RustDataType::UInt8,
|
|
1252
|
+
6 => RustDataType::UInt16,
|
|
1253
|
+
7 => RustDataType::UInt32,
|
|
1254
|
+
8 => RustDataType::UInt64,
|
|
1255
|
+
9 => RustDataType::Float32,
|
|
1256
|
+
10 => RustDataType::Float64,
|
|
1257
|
+
11 => RustDataType::Utf8,
|
|
1258
|
+
12 => RustDataType::LargeUtf8,
|
|
1259
|
+
13 => RustDataType::Binary,
|
|
1260
|
+
14 => RustDataType::LargeBinary,
|
|
1261
|
+
15 => RustDataType::Date32,
|
|
1262
|
+
16 => RustDataType::Date64,
|
|
1263
|
+
17 => RustDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, Some("UTC".into())),
|
|
1264
|
+
18 => RustDataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, Some("UTC".into())),
|
|
1265
|
+
_ => RustDataType::Utf8,
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
/// Arrow field definition for schema.
|
|
1270
|
+
///
|
|
1271
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental.
|
|
1272
|
+
#[cfg(feature = "arrow-flight")]
|
|
1273
|
+
#[napi(object)]
|
|
1274
|
+
#[derive(Debug, Clone)]
|
|
1275
|
+
pub struct ArrowField {
|
|
1276
|
+
/// Field name
|
|
1277
|
+
pub name: String,
|
|
1278
|
+
/// Field data type (ArrowDataType enum value)
|
|
1279
|
+
pub data_type: i32,
|
|
1280
|
+
/// Whether the field is nullable
|
|
1281
|
+
pub nullable: Option<bool>,
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
/// Properties of the target Delta table for Arrow Flight ingestion.
|
|
1285
|
+
///
|
|
1286
|
+
/// Unlike `TableProperties` which uses Protocol Buffers, Arrow Flight streams
|
|
1287
|
+
/// require an Arrow schema definition.
|
|
1288
|
+
///
|
|
1289
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental and not yet
|
|
1290
|
+
/// supported for production use. The API may change in future releases.
|
|
1291
|
+
#[cfg(feature = "arrow-flight")]
|
|
1292
|
+
#[napi(object)]
|
|
1293
|
+
#[derive(Debug, Clone)]
|
|
1294
|
+
pub struct ArrowTableProperties {
|
|
1295
|
+
/// Full table name in Unity Catalog (e.g., "catalog.schema.table")
|
|
1296
|
+
pub table_name: String,
|
|
1297
|
+
/// Arrow schema fields
|
|
1298
|
+
pub schema_fields: Vec<ArrowField>,
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
#[cfg(feature = "arrow-flight")]
|
|
1302
|
+
impl ArrowTableProperties {
|
|
1303
|
+
fn to_rust(&self) -> Result<RustArrowTableProperties> {
|
|
1304
|
+
let fields: Vec<RustField> = self.schema_fields.iter().map(|f| {
|
|
1305
|
+
RustField::new(
|
|
1306
|
+
&f.name,
|
|
1307
|
+
convert_arrow_data_type(f.data_type),
|
|
1308
|
+
f.nullable.unwrap_or(true),
|
|
1309
|
+
)
|
|
1310
|
+
}).collect();
|
|
1311
|
+
|
|
1312
|
+
let schema = Arc::new(RustArrowSchema::new(fields));
|
|
1313
|
+
|
|
1314
|
+
Ok(RustArrowTableProperties {
|
|
1315
|
+
table_name: self.table_name.clone(),
|
|
1316
|
+
schema,
|
|
1317
|
+
})
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
/// An Arrow Flight stream for ingesting Arrow RecordBatches into a Delta table.
|
|
1322
|
+
///
|
|
1323
|
+
/// This stream provides a high-performance interface for streaming Arrow data
|
|
1324
|
+
/// to Databricks Delta tables using the Arrow Flight protocol.
|
|
1325
|
+
///
|
|
1326
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental and not yet
|
|
1327
|
+
/// supported for production use. The API may change in future releases.
|
|
1328
|
+
///
|
|
1329
|
+
/// # Lifecycle
|
|
1330
|
+
///
|
|
1331
|
+
/// 1. Create a stream via `sdk.createArrowStream()`
|
|
1332
|
+
/// 2. Ingest Arrow IPC buffers with `ingestBatch()`
|
|
1333
|
+
/// 3. Use `waitForOffset()` to wait for acknowledgments
|
|
1334
|
+
/// 4. Call `flush()` to ensure all batches are persisted
|
|
1335
|
+
/// 5. Close the stream with `close()`
|
|
1336
|
+
///
|
|
1337
|
+
/// # Example
|
|
1338
|
+
///
|
|
1339
|
+
/// ```typescript
|
|
1340
|
+
/// import { tableToIPC } from 'apache-arrow';
|
|
1341
|
+
///
|
|
1342
|
+
/// const arrowStream = await sdk.createArrowStream(
|
|
1343
|
+
/// arrowTableProps,
|
|
1344
|
+
/// clientId,
|
|
1345
|
+
/// clientSecret,
|
|
1346
|
+
/// options
|
|
1347
|
+
/// );
|
|
1348
|
+
///
|
|
1349
|
+
/// const ipcBuffer = tableToIPC(arrowTable, 'stream');
|
|
1350
|
+
/// const offset = await arrowStream.ingestBatch(Buffer.from(ipcBuffer));
|
|
1351
|
+
/// await arrowStream.waitForOffset(offset);
|
|
1352
|
+
/// await arrowStream.close();
|
|
1353
|
+
/// ```
|
|
1354
|
+
#[cfg(feature = "arrow-flight")]
|
|
1355
|
+
#[napi]
|
|
1356
|
+
pub struct ZerobusArrowStream {
|
|
1357
|
+
inner: Arc<Mutex<Option<RustZerobusArrowStream>>>,
|
|
1358
|
+
schema: Arc<RustArrowSchema>,
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
/// Helper to parse Arrow IPC buffer to RecordBatch
|
|
1362
|
+
#[cfg(feature = "arrow-flight")]
|
|
1363
|
+
fn parse_arrow_ipc_to_batch(ipc_buffer: &[u8], _expected_schema: &RustArrowSchema) -> Result<RustRecordBatch> {
|
|
1364
|
+
let cursor = Cursor::new(ipc_buffer);
|
|
1365
|
+
let reader = StreamReader::try_new(cursor, None)
|
|
1366
|
+
.map_err(|e| Error::from_reason(format!("Failed to parse Arrow IPC: {}", e)))?;
|
|
1367
|
+
|
|
1368
|
+
// Collect all batches (typically just one)
|
|
1369
|
+
let batches: Vec<RustRecordBatch> = reader
|
|
1370
|
+
.filter_map(|r| r.ok())
|
|
1371
|
+
.collect();
|
|
1372
|
+
|
|
1373
|
+
if batches.is_empty() {
|
|
1374
|
+
return Err(Error::from_reason("Arrow IPC buffer contains no record batches"));
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
// Return the first batch (or could concatenate if multiple)
|
|
1378
|
+
Ok(batches.into_iter().next().unwrap())
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
/// Helper to serialize RecordBatch to Arrow IPC buffer
|
|
1382
|
+
#[cfg(feature = "arrow-flight")]
|
|
1383
|
+
fn serialize_batch_to_ipc(batch: &RustRecordBatch) -> Result<Vec<u8>> {
|
|
1384
|
+
let mut buffer = Vec::new();
|
|
1385
|
+
{
|
|
1386
|
+
let mut writer = StreamWriter::try_new(&mut buffer, batch.schema().as_ref())
|
|
1387
|
+
.map_err(|e| Error::from_reason(format!("Failed to create Arrow IPC writer: {}", e)))?;
|
|
1388
|
+
writer.write(batch)
|
|
1389
|
+
.map_err(|e| Error::from_reason(format!("Failed to write batch to IPC: {}", e)))?;
|
|
1390
|
+
writer.finish()
|
|
1391
|
+
.map_err(|e| Error::from_reason(format!("Failed to finish IPC stream: {}", e)))?;
|
|
1392
|
+
}
|
|
1393
|
+
Ok(buffer)
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
#[cfg(feature = "arrow-flight")]
|
|
1397
|
+
#[napi]
|
|
1398
|
+
impl ZerobusArrowStream {
|
|
1399
|
+
/// Ingests a single Arrow IPC buffer into the stream.
|
|
1400
|
+
///
|
|
1401
|
+
/// The buffer should be an Arrow IPC stream format containing one or more RecordBatches.
|
|
1402
|
+
/// You can create this using `tableToIPC(table, 'stream')` from the apache-arrow package.
|
|
1403
|
+
///
|
|
1404
|
+
/// # Arguments
|
|
1405
|
+
///
|
|
1406
|
+
/// * `ipc_buffer` - Arrow IPC stream format buffer
|
|
1407
|
+
///
|
|
1408
|
+
/// # Returns
|
|
1409
|
+
///
|
|
1410
|
+
/// The offset ID (bigint) assigned to this batch.
|
|
1411
|
+
///
|
|
1412
|
+
/// # Example
|
|
1413
|
+
///
|
|
1414
|
+
/// ```typescript
|
|
1415
|
+
/// const table = tableFromArrays({
|
|
1416
|
+
/// device_name: ['sensor-1'],
|
|
1417
|
+
/// temp: [25],
|
|
1418
|
+
/// humidity: [60]
|
|
1419
|
+
/// });
|
|
1420
|
+
/// const ipcBuffer = tableToIPC(table, 'stream');
|
|
1421
|
+
/// const offset = await stream.ingestBatch(Buffer.from(ipcBuffer));
|
|
1422
|
+
/// await stream.waitForOffset(offset);
|
|
1423
|
+
/// ```
|
|
1424
|
+
#[napi(ts_return_type = "Promise<bigint>")]
|
|
1425
|
+
pub fn ingest_batch(&self, env: Env, ipc_buffer: Buffer) -> Result<JsObject> {
|
|
1426
|
+
let schema = self.schema.clone();
|
|
1427
|
+
let stream = self.inner.clone();
|
|
1428
|
+
let buffer_vec = ipc_buffer.to_vec();
|
|
1429
|
+
|
|
1430
|
+
env.execute_tokio_future(
|
|
1431
|
+
async move {
|
|
1432
|
+
let batch = parse_arrow_ipc_to_batch(&buffer_vec, &schema)?;
|
|
1433
|
+
|
|
1434
|
+
let mut guard = stream.lock().await;
|
|
1435
|
+
let stream_ref = guard
|
|
1436
|
+
.as_mut()
|
|
1437
|
+
.ok_or_else(|| napi::Error::from_reason("Arrow stream has been closed"))?;
|
|
1438
|
+
|
|
1439
|
+
stream_ref
|
|
1440
|
+
.ingest_batch(batch)
|
|
1441
|
+
.await
|
|
1442
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to ingest batch: {}", e)))
|
|
1443
|
+
},
|
|
1444
|
+
|env, offset_id| {
|
|
1445
|
+
let offset_str = offset_id.to_string();
|
|
1446
|
+
let global: JsGlobal = env.get_global()?;
|
|
1447
|
+
let bigint_ctor: JsFunction = global.get_named_property("BigInt")?;
|
|
1448
|
+
let js_str = env.create_string(&offset_str)?;
|
|
1449
|
+
bigint_ctor.call(None, &[js_str.into_unknown()])
|
|
1450
|
+
},
|
|
1451
|
+
)
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
/// Waits for a specific offset to be acknowledged by the server.
|
|
1455
|
+
///
|
|
1456
|
+
/// Use this method with `ingestBatch()` to selectively wait for acknowledgments.
|
|
1457
|
+
///
|
|
1458
|
+
/// # Arguments
|
|
1459
|
+
///
|
|
1460
|
+
/// * `offset_id` - The offset ID to wait for (returned by ingestBatch)
|
|
1461
|
+
#[napi(ts_args_type = "offsetId: bigint", ts_return_type = "Promise<void>")]
|
|
1462
|
+
pub fn wait_for_offset(&self, env: Env, offset_id: JsUnknown) -> Result<JsObject> {
|
|
1463
|
+
let global: JsGlobal = env.get_global()?;
|
|
1464
|
+
let number_ctor: JsFunction = global.get_named_property("Number")?;
|
|
1465
|
+
let num_result: JsUnknown = number_ctor.call(None, &[offset_id])?;
|
|
1466
|
+
let offset: i64 = num_result.coerce_to_number()?.get_int64()?;
|
|
1467
|
+
|
|
1468
|
+
let stream = self.inner.clone();
|
|
1469
|
+
|
|
1470
|
+
env.execute_tokio_future(
|
|
1471
|
+
async move {
|
|
1472
|
+
let guard = stream.lock().await;
|
|
1473
|
+
let stream_ref = guard
|
|
1474
|
+
.as_ref()
|
|
1475
|
+
.ok_or_else(|| napi::Error::from_reason("Arrow stream has been closed"))?;
|
|
1476
|
+
|
|
1477
|
+
stream_ref
|
|
1478
|
+
.wait_for_offset(offset)
|
|
1479
|
+
.await
|
|
1480
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to wait for offset: {}", e)))
|
|
1481
|
+
},
|
|
1482
|
+
|_env, _| Ok(()),
|
|
1483
|
+
)
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
/// Flushes all pending batches and waits for acknowledgments.
|
|
1487
|
+
#[napi]
|
|
1488
|
+
pub async fn flush(&self) -> Result<()> {
|
|
1489
|
+
let guard = self.inner.lock().await;
|
|
1490
|
+
let stream = guard
|
|
1491
|
+
.as_ref()
|
|
1492
|
+
.ok_or_else(|| Error::from_reason("Arrow stream has been closed"))?;
|
|
1493
|
+
|
|
1494
|
+
stream
|
|
1495
|
+
.flush()
|
|
1496
|
+
.await
|
|
1497
|
+
.map_err(|e| Error::from_reason(format!("Failed to flush arrow stream: {}", e)))
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
/// Closes the stream gracefully.
|
|
1501
|
+
#[napi]
|
|
1502
|
+
pub async fn close(&self) -> Result<()> {
|
|
1503
|
+
let mut guard = self.inner.lock().await;
|
|
1504
|
+
if let Some(mut stream) = guard.take() {
|
|
1505
|
+
stream
|
|
1506
|
+
.close()
|
|
1507
|
+
.await
|
|
1508
|
+
.map_err(|e| Error::from_reason(format!("Failed to close arrow stream: {}", e)))?;
|
|
1509
|
+
}
|
|
1510
|
+
Ok(())
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
/// Returns whether the stream has been closed.
|
|
1514
|
+
#[napi(getter)]
|
|
1515
|
+
pub fn is_closed(&self) -> bool {
|
|
1516
|
+
// Check synchronously using try_lock
|
|
1517
|
+
match self.inner.try_lock() {
|
|
1518
|
+
Ok(guard) => guard.is_none(),
|
|
1519
|
+
Err(_) => false, // If we can't acquire lock, assume not closed
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
/// Returns the table name for this stream.
|
|
1524
|
+
#[napi(getter)]
|
|
1525
|
+
pub fn table_name(&self) -> Result<String> {
|
|
1526
|
+
let guard = self.inner.try_lock()
|
|
1527
|
+
.map_err(|_| Error::from_reason("Stream is busy"))?;
|
|
1528
|
+
let stream = guard
|
|
1529
|
+
.as_ref()
|
|
1530
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
1531
|
+
Ok(stream.table_name().to_string())
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
/// Gets unacknowledged batches as Arrow IPC buffers.
|
|
1535
|
+
///
|
|
1536
|
+
/// This method should only be called after a stream failure to retrieve batches
|
|
1537
|
+
/// that were sent but not acknowledged. These can be re-ingested into a new stream.
|
|
1538
|
+
///
|
|
1539
|
+
/// # Returns
|
|
1540
|
+
///
|
|
1541
|
+
/// An array of Buffers containing the unacknowledged batches in Arrow IPC format.
|
|
1542
|
+
#[napi]
|
|
1543
|
+
pub async fn get_unacked_batches(&self) -> Result<Vec<Buffer>> {
|
|
1544
|
+
let guard = self.inner.lock().await;
|
|
1545
|
+
let stream = guard
|
|
1546
|
+
.as_ref()
|
|
1547
|
+
.ok_or_else(|| Error::from_reason("Stream has been closed"))?;
|
|
1548
|
+
|
|
1549
|
+
let batches = stream
|
|
1550
|
+
.get_unacked_batches()
|
|
1551
|
+
.await
|
|
1552
|
+
.map_err(|e| Error::from_reason(format!("Failed to get unacked batches: {}", e)))?;
|
|
1553
|
+
|
|
1554
|
+
// Convert each RecordBatch back to IPC format
|
|
1555
|
+
batches
|
|
1556
|
+
.iter()
|
|
1557
|
+
.map(|batch| {
|
|
1558
|
+
let ipc_bytes = serialize_batch_to_ipc(batch)?;
|
|
1559
|
+
Ok(ipc_bytes.into())
|
|
1560
|
+
})
|
|
1561
|
+
.collect()
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
// Add Arrow stream methods to ZerobusSdk
|
|
1566
|
+
#[cfg(feature = "arrow-flight")]
|
|
1567
|
+
#[napi]
|
|
1568
|
+
impl ZerobusSdk {
|
|
1569
|
+
/// Creates a new Arrow Flight stream to a Delta table.
|
|
1570
|
+
///
|
|
1571
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental and not yet
|
|
1572
|
+
/// supported for production use. The API may change in future releases.
|
|
1573
|
+
///
|
|
1574
|
+
/// This method establishes an Arrow Flight connection to the Zerobus service
|
|
1575
|
+
/// for high-performance columnar data ingestion.
|
|
1576
|
+
///
|
|
1577
|
+
/// # Arguments
|
|
1578
|
+
///
|
|
1579
|
+
/// * `table_properties` - Properties of the target table including name and Arrow schema
|
|
1580
|
+
/// * `client_id` - OAuth 2.0 client ID
|
|
1581
|
+
/// * `client_secret` - OAuth 2.0 client secret
|
|
1582
|
+
/// * `options` - Optional stream configuration
|
|
1583
|
+
///
|
|
1584
|
+
/// # Returns
|
|
1585
|
+
///
|
|
1586
|
+
/// A Promise that resolves to a ZerobusArrowStream ready for data ingestion.
|
|
1587
|
+
///
|
|
1588
|
+
/// # Example
|
|
1589
|
+
///
|
|
1590
|
+
/// ```typescript
|
|
1591
|
+
/// const tableProps = {
|
|
1592
|
+
/// tableName: 'catalog.schema.table',
|
|
1593
|
+
/// schemaFields: [
|
|
1594
|
+
/// { name: 'device_name', dataType: ArrowDataType.Utf8 },
|
|
1595
|
+
/// { name: 'temp', dataType: ArrowDataType.Int32 },
|
|
1596
|
+
/// { name: 'humidity', dataType: ArrowDataType.Int64 }
|
|
1597
|
+
/// ]
|
|
1598
|
+
/// };
|
|
1599
|
+
///
|
|
1600
|
+
/// const arrowStream = await sdk.createArrowStream(
|
|
1601
|
+
/// tableProps,
|
|
1602
|
+
/// clientId,
|
|
1603
|
+
/// clientSecret,
|
|
1604
|
+
/// { maxInflightBatches: 100 }
|
|
1605
|
+
/// );
|
|
1606
|
+
/// ```
|
|
1607
|
+
#[napi(ts_return_type = "Promise<ZerobusArrowStream>")]
|
|
1608
|
+
pub fn create_arrow_stream(
|
|
1609
|
+
&self,
|
|
1610
|
+
env: Env,
|
|
1611
|
+
table_properties: ArrowTableProperties,
|
|
1612
|
+
client_id: String,
|
|
1613
|
+
client_secret: String,
|
|
1614
|
+
options: Option<ArrowStreamConfigurationOptions>,
|
|
1615
|
+
) -> Result<JsObject> {
|
|
1616
|
+
let rust_table_props = table_properties.to_rust()?;
|
|
1617
|
+
let schema = rust_table_props.schema.clone();
|
|
1618
|
+
let rust_options: Option<RustArrowStreamOptions> = options.map(|o| o.into());
|
|
1619
|
+
|
|
1620
|
+
let sdk = self.inner.clone();
|
|
1621
|
+
|
|
1622
|
+
env.execute_tokio_future(
|
|
1623
|
+
async move {
|
|
1624
|
+
let stream = sdk
|
|
1625
|
+
.create_arrow_stream(rust_table_props, client_id, client_secret, rust_options)
|
|
1626
|
+
.await
|
|
1627
|
+
.map_err(|e| napi::Error::from_reason(format!("Failed to create arrow stream: {}", e)))?;
|
|
1628
|
+
|
|
1629
|
+
Ok(ZerobusArrowStream {
|
|
1630
|
+
inner: Arc::new(Mutex::new(Some(stream))),
|
|
1631
|
+
schema,
|
|
1632
|
+
})
|
|
1633
|
+
},
|
|
1634
|
+
|_env, stream| Ok(stream),
|
|
1635
|
+
)
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
/// Recreates an Arrow stream with the same configuration and re-ingests unacknowledged batches.
|
|
1639
|
+
///
|
|
1640
|
+
/// **Experimental/Unsupported**: Arrow Flight support is experimental.
|
|
1641
|
+
///
|
|
1642
|
+
/// # Arguments
|
|
1643
|
+
///
|
|
1644
|
+
/// * `stream` - The failed or closed Arrow stream to recreate
|
|
1645
|
+
///
|
|
1646
|
+
/// # Returns
|
|
1647
|
+
///
|
|
1648
|
+
/// A Promise that resolves to a new ZerobusArrowStream with all unacknowledged batches re-ingested.
|
|
1649
|
+
#[napi]
|
|
1650
|
+
pub async fn recreate_arrow_stream(&self, stream: &ZerobusArrowStream) -> Result<ZerobusArrowStream> {
|
|
1651
|
+
let inner_guard = stream.inner.lock().await;
|
|
1652
|
+
let rust_stream = inner_guard
|
|
1653
|
+
.as_ref()
|
|
1654
|
+
.ok_or_else(|| Error::from_reason("Arrow stream has been closed"))?;
|
|
1655
|
+
|
|
1656
|
+
let new_rust_stream = self
|
|
1657
|
+
.inner
|
|
1658
|
+
.recreate_arrow_stream(rust_stream)
|
|
1659
|
+
.await
|
|
1660
|
+
.map_err(|e| Error::from_reason(format!("Failed to recreate arrow stream: {}", e)))?;
|
|
1661
|
+
|
|
1662
|
+
Ok(ZerobusArrowStream {
|
|
1663
|
+
inner: Arc::new(Mutex::new(Some(new_rust_stream))),
|
|
1664
|
+
schema: stream.schema.clone(),
|
|
1665
|
+
})
|
|
1666
|
+
}
|
|
1667
|
+
}
|