deltalake-rb 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,6 @@ mod merge;
4
4
  mod schema;
5
5
  mod utils;
6
6
 
7
- use std::cell::RefCell;
8
- use std::collections::{HashMap, HashSet};
9
- use std::future::IntoFuture;
10
- use std::str::FromStr;
11
- use std::sync::Arc;
12
- use std::time;
13
-
14
7
  use chrono::{DateTime, Duration, FixedOffset, Utc};
15
8
  use delta_kernel::schema::StructField;
16
9
  use delta_kernel::table_properties::DataSkippingNumIndexedCols;
@@ -26,35 +19,31 @@ use deltalake::kernel::{scalars::ScalarExt, Transaction};
26
19
  use deltalake::kernel::{EagerSnapshot, StructDataExt};
27
20
  use deltalake::logstore::IORuntime;
28
21
  use deltalake::logstore::LogStoreRef;
29
- use deltalake::operations::add_column::AddColumnBuilder;
30
- use deltalake::operations::add_feature::AddTableFeatureBuilder;
31
22
  use deltalake::operations::collect_sendable_stream;
32
- use deltalake::operations::constraints::ConstraintBuilder;
33
- use deltalake::operations::delete::DeleteBuilder;
34
- use deltalake::operations::drop_constraints::DropConstraintBuilder;
35
- use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
36
- use deltalake::operations::load_cdf::CdfLoadBuilder;
37
- use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
38
- use deltalake::operations::restore::RestoreBuilder;
39
- use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
40
- use deltalake::operations::vacuum::VacuumBuilder;
23
+ use deltalake::operations::optimize::{create_session_state_for_optimize, OptimizeType};
41
24
  use deltalake::parquet::basic::Compression;
42
25
  use deltalake::parquet::errors::ParquetError;
43
26
  use deltalake::parquet::file::properties::WriterProperties;
44
27
  use deltalake::partitions::PartitionFilter;
45
28
  use deltalake::table::config::TablePropertiesExt;
46
- use deltalake::{DeltaOps, DeltaResult};
29
+ use deltalake::table::state::DeltaTableState;
30
+ use deltalake::{DeltaResult, DeltaTable};
47
31
  use error::DeltaError;
48
32
  use futures::future::join_all;
49
33
  use futures::TryStreamExt;
50
-
51
34
  use magnus::{
52
35
  function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
53
36
  Integer, Module, RArray, Ruby, TryConvert, Value,
54
37
  };
55
38
  use serde_json::Map;
39
+ use std::collections::{HashMap, HashSet};
40
+ use std::future::IntoFuture;
41
+ use std::str::FromStr;
42
+ use std::sync::{Arc, Mutex};
43
+ use std::time;
44
+ use uuid::Uuid;
56
45
 
57
- use crate::error::{RbValueError, RubyError};
46
+ use crate::error::{to_rt_err, RbRuntimeError, RbValueError, RubyError};
58
47
  use crate::features::TableFeatures;
59
48
  use crate::merge::RbMergeBuilder;
60
49
  use crate::schema::{schema_to_rbobject, Field};
@@ -81,7 +70,7 @@ unsafe impl TryConvertOwned for PartitionFilterValue {}
81
70
 
82
71
  #[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
83
72
  struct RawDeltaTable {
84
- _table: RefCell<deltalake::DeltaTable>,
73
+ _table: Arc<Mutex<deltalake::DeltaTable>>,
85
74
  }
86
75
 
87
76
  #[magnus::wrap(class = "DeltaLake::RawDeltaTableMetaData")]
@@ -124,7 +113,10 @@ type StringVec = Vec<String>;
124
113
 
125
114
  impl RawDeltaTable {
126
115
  fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
127
- func(&self._table.borrow())
116
+ match self._table.lock() {
117
+ Ok(table) => func(&table),
118
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
119
+ }
128
120
  }
129
121
 
130
122
  fn cloned_state(&self) -> RbResult<EagerSnapshot> {
@@ -140,6 +132,15 @@ impl RawDeltaTable {
140
132
  fn log_store(&self) -> RbResult<LogStoreRef> {
141
133
  self.with_table(|t| Ok(t.log_store().clone()))
142
134
  }
135
+
136
+ fn set_state(&self, state: Option<DeltaTableState>) -> RbResult<()> {
137
+ let mut original = self
138
+ ._table
139
+ .lock()
140
+ .map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
141
+ original.state = state;
142
+ Ok(())
143
+ }
143
144
  }
144
145
 
145
146
  impl RawDeltaTable {
@@ -152,7 +153,7 @@ impl RawDeltaTable {
152
153
  ) -> RbResult<Self> {
153
154
  let table_url = deltalake::table::builder::parse_table_uri(table_uri)
154
155
  .map_err(error::RubyError::from)?;
155
- let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
156
+ let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
156
157
  .map_err(error::RubyError::from)?
157
158
  .with_io_runtime(IORuntime::default());
158
159
 
@@ -173,7 +174,7 @@ impl RawDeltaTable {
173
174
 
174
175
  let table = rt().block_on(builder.load()).map_err(RubyError::from)?;
175
176
  Ok(RawDeltaTable {
176
- _table: RefCell::new(table),
177
+ _table: Arc::new(Mutex::new(table)),
177
178
  })
178
179
  }
179
180
 
@@ -183,7 +184,7 @@ impl RawDeltaTable {
183
184
  ) -> RbResult<bool> {
184
185
  let table_url = deltalake::table::builder::ensure_table_uri(table_uri)
185
186
  .map_err(|_| RbValueError::new_err("Invalid table URI"))?;
186
- let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
187
+ let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
187
188
  .map_err(|_| RbValueError::new_err("Failed to create table builder"))?;
188
189
  if let Some(storage_options) = storage_options {
189
190
  builder = builder.with_storage_options(storage_options)
@@ -199,7 +200,7 @@ impl RawDeltaTable {
199
200
  }
200
201
 
201
202
  pub fn table_uri(&self) -> RbResult<String> {
202
- self.with_table(|t| Ok(t.table_uri()))
203
+ self.with_table(|t| Ok(t.table_url().to_string()))
203
204
  }
204
205
 
205
206
  pub fn version(&self) -> RbResult<Option<i64>> {
@@ -255,15 +256,32 @@ impl RawDeltaTable {
255
256
  }
256
257
 
257
258
  pub fn load_version(&self, version: i64) -> RbResult<()> {
258
- Ok(rt()
259
- .block_on(self._table.borrow_mut().load_version(version))
260
- .map_err(RubyError::from)?)
259
+ #[allow(clippy::await_holding_lock)]
260
+ rt().block_on(async {
261
+ let mut table = self
262
+ ._table
263
+ .lock()
264
+ .map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
265
+ (*table)
266
+ .load_version(version)
267
+ .await
268
+ .map_err(RubyError::from)
269
+ .map_err(RbErr::from)
270
+ })
261
271
  }
262
272
 
263
273
  pub fn get_latest_version(&self) -> RbResult<i64> {
264
- Ok(rt()
265
- .block_on(self._table.borrow().get_latest_version())
266
- .map_err(RubyError::from)?)
274
+ #[allow(clippy::await_holding_lock)]
275
+ rt().block_on(async {
276
+ match self._table.lock() {
277
+ Ok(table) => table
278
+ .get_latest_version()
279
+ .await
280
+ .map_err(RubyError::from)
281
+ .map_err(RbErr::from),
282
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
283
+ }
284
+ })
267
285
  }
268
286
 
269
287
  pub fn get_num_index_cols(&self) -> RbResult<i32> {
@@ -296,9 +314,18 @@ impl RawDeltaTable {
296
314
  DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
297
315
  |err| RbValueError::new_err(format!("Failed to parse datetime string: {err}")),
298
316
  )?);
299
- Ok(rt()
300
- .block_on(self._table.borrow_mut().load_with_datetime(datetime))
301
- .map_err(RubyError::from)?)
317
+ #[allow(clippy::await_holding_lock)]
318
+ rt().block_on(async {
319
+ let mut table = self
320
+ ._table
321
+ .lock()
322
+ .map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
323
+ (*table)
324
+ .load_with_datetime(datetime)
325
+ .await
326
+ .map_err(RubyError::from)
327
+ .map_err(RbErr::from)
328
+ })
302
329
  }
303
330
 
304
331
  pub fn files(
@@ -324,14 +351,14 @@ impl RawDeltaTable {
324
351
  .map(|p| p.to_string())
325
352
  .collect())
326
353
  } else {
327
- Ok(self
328
- ._table
329
- .borrow()
330
- .snapshot()
331
- .map_err(RubyError::from)?
332
- .file_paths_iter()
333
- .map(|f| f.to_string())
334
- .collect())
354
+ match self._table.lock() {
355
+ Ok(table) => Ok(table
356
+ .get_file_uris()
357
+ .map_err(RubyError::from)?
358
+ .map(|f| f.to_string())
359
+ .collect()),
360
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
361
+ }
335
362
  }
336
363
  }
337
364
 
@@ -379,19 +406,12 @@ impl RawDeltaTable {
379
406
  commit_properties: Option<RbCommitProperties>,
380
407
  post_commithook_properties: Option<RbPostCommitHookProperties>,
381
408
  ) -> RbResult<Vec<String>> {
382
- let snapshot = self
383
- ._table
384
- .borrow()
385
- .snapshot()
386
- .cloned()
387
- .map_err(RubyError::from)
388
- .map_err(RbErr::from)?;
389
- let mut cmd = VacuumBuilder::new(
390
- self._table.borrow().log_store(),
391
- snapshot.snapshot().clone(),
392
- )
393
- .with_enforce_retention_duration(enforce_retention_duration)
394
- .with_dry_run(dry_run);
409
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
410
+ let mut cmd = table
411
+ .vacuum()
412
+ .with_enforce_retention_duration(enforce_retention_duration)
413
+ .with_dry_run(dry_run);
414
+
395
415
  if let Some(retention_period) = retention_hours {
396
416
  cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
397
417
  }
@@ -402,7 +422,7 @@ impl RawDeltaTable {
402
422
  cmd = cmd.with_commit_properties(commit_properties);
403
423
  }
404
424
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
405
- self._table.borrow_mut().state = table.state;
425
+ self.set_state(table.state)?;
406
426
  Ok(metrics.files_deleted)
407
427
  }
408
428
 
@@ -417,8 +437,11 @@ impl RawDeltaTable {
417
437
  commit_properties: Option<RbCommitProperties>,
418
438
  post_commithook_properties: Option<RbPostCommitHookProperties>,
419
439
  ) -> RbResult<String> {
420
- let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
440
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
441
+ let mut cmd = table
442
+ .optimize()
421
443
  .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
444
+
422
445
  if let Some(size) = target_size {
423
446
  cmd = cmd.with_target_size(size);
424
447
  }
@@ -443,7 +466,7 @@ impl RawDeltaTable {
443
466
  cmd = cmd.with_filters(&converted_filters);
444
467
 
445
468
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
446
- self._table.borrow_mut().state = table.state;
469
+ self.set_state(table.state)?;
447
470
  Ok(serde_json::to_string(&metrics).unwrap())
448
471
  }
449
472
 
@@ -454,16 +477,26 @@ impl RawDeltaTable {
454
477
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
455
478
  target_size: Option<u64>,
456
479
  max_concurrent_tasks: Option<usize>,
457
- max_spill_size: usize,
480
+ max_spill_size: Option<usize>,
481
+ max_temp_directory_size: Option<u64>,
458
482
  min_commit_interval: Option<u64>,
459
483
  writer_properties: Option<RbWriterProperties>,
460
484
  commit_properties: Option<RbCommitProperties>,
461
485
  post_commithook_properties: Option<RbPostCommitHookProperties>,
462
486
  ) -> RbResult<String> {
463
- let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
487
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
488
+ let mut cmd = table
489
+ .clone()
490
+ .optimize()
464
491
  .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
465
- .with_max_spill_size(max_spill_size)
466
492
  .with_type(OptimizeType::ZOrder(z_order_columns));
493
+
494
+ if max_spill_size.is_some() || max_temp_directory_size.is_some() {
495
+ let session =
496
+ create_session_state_for_optimize(max_spill_size, max_temp_directory_size);
497
+ cmd = cmd.with_session_state(Arc::new(session));
498
+ }
499
+
467
500
  if let Some(size) = target_size {
468
501
  cmd = cmd.with_target_size(size);
469
502
  }
@@ -488,13 +521,15 @@ impl RawDeltaTable {
488
521
  cmd = cmd.with_filters(&converted_filters);
489
522
 
490
523
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
491
- self._table.borrow_mut().state = table.state;
524
+ self.set_state(table.state)?;
492
525
  Ok(serde_json::to_string(&metrics).unwrap())
493
526
  }
494
527
 
495
528
  pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
496
529
  let fields = fields.typecheck::<Obj<Field>>()?;
497
- let mut cmd = AddColumnBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
530
+
531
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
532
+ let mut cmd = table.add_columns();
498
533
 
499
534
  let new_fields = fields
500
535
  .iter()
@@ -504,7 +539,7 @@ impl RawDeltaTable {
504
539
  cmd = cmd.with_fields(new_fields);
505
540
 
506
541
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
507
- self._table.borrow_mut().state = table.state;
542
+ self.set_state(table.state)?;
508
543
  Ok(())
509
544
  }
510
545
 
@@ -517,71 +552,76 @@ impl RawDeltaTable {
517
552
  .into_iter()
518
553
  .map(TableFeatures::try_convert)
519
554
  .collect::<RbResult<Vec<_>>>()?;
520
- let cmd =
521
- AddTableFeatureBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
522
- .with_features(feature)
523
- .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
555
+
556
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
557
+ let cmd = table
558
+ .add_feature()
559
+ .with_features(feature)
560
+ .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
524
561
 
525
562
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
526
- self._table.borrow_mut().state = table.state;
563
+ self.set_state(table.state)?;
527
564
  Ok(())
528
565
  }
529
566
 
530
567
  pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
531
- let mut cmd =
532
- ConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
568
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
569
+ let mut cmd = table.add_constraint();
533
570
 
534
571
  for (col_name, expression) in constraints {
535
572
  cmd = cmd.with_constraint(col_name.clone(), expression.clone());
536
573
  }
537
574
 
538
575
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
539
- self._table.borrow_mut().state = table.state;
576
+ self.set_state(table.state)?;
540
577
  Ok(())
541
578
  }
542
579
 
543
580
  pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
544
- let cmd =
545
- DropConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
546
- .with_constraint(name)
547
- .with_raise_if_not_exists(raise_if_not_exists);
581
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
582
+ let cmd = table
583
+ .drop_constraints()
584
+ .with_constraint(name)
585
+ .with_raise_if_not_exists(raise_if_not_exists);
548
586
 
549
587
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
550
- self._table.borrow_mut().state = table.state;
588
+ self.set_state(table.state)?;
551
589
  Ok(())
552
590
  }
553
591
 
554
592
  pub fn load_cdf(
555
593
  &self,
556
- starting_version: i64,
594
+ starting_version: Option<i64>,
557
595
  ending_version: Option<i64>,
558
596
  starting_timestamp: Option<String>,
559
597
  ending_timestamp: Option<String>,
560
598
  columns: Option<Vec<String>>,
561
599
  ) -> RbResult<ArrowArrayStream> {
562
600
  let ctx = SessionContext::new();
563
- let mut cdf_read =
564
- CdfLoadBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
565
- .with_starting_version(starting_version);
601
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
602
+ let mut cmd = table.scan_cdf();
566
603
 
604
+ if let Some(sv) = starting_version {
605
+ cmd = cmd.with_starting_version(sv);
606
+ }
567
607
  if let Some(ev) = ending_version {
568
- cdf_read = cdf_read.with_ending_version(ev);
608
+ cmd = cmd.with_ending_version(ev);
569
609
  }
570
610
  if let Some(st) = starting_timestamp {
571
611
  let starting_ts: DateTime<Utc> = DateTime::<Utc>::from_str(&st)
572
612
  .map_err(|pe| RbValueError::new_err(pe.to_string()))?
573
613
  .to_utc();
574
- cdf_read = cdf_read.with_starting_timestamp(starting_ts);
614
+ cmd = cmd.with_starting_timestamp(starting_ts);
575
615
  }
576
616
  if let Some(et) = ending_timestamp {
577
617
  let ending_ts = DateTime::<Utc>::from_str(&et)
578
618
  .map_err(|pe| RbValueError::new_err(pe.to_string()))?
579
619
  .to_utc();
580
- cdf_read = cdf_read.with_starting_timestamp(ending_ts);
620
+ cmd = cmd.with_starting_timestamp(ending_ts);
581
621
  }
582
622
 
583
623
  let table_provider: Arc<dyn TableProvider> =
584
- Arc::new(DeltaCdfTableProvider::try_new(cdf_read).map_err(RubyError::from)?);
624
+ Arc::new(DeltaCdfTableProvider::try_new(cmd).map_err(RubyError::from)?);
585
625
 
586
626
  let plan = rt()
587
627
  .block_on(async {
@@ -630,7 +670,7 @@ impl RawDeltaTable {
630
670
  commit_properties: Option<RbCommitProperties>,
631
671
  ) -> RbResult<RbMergeBuilder> {
632
672
  Ok(RbMergeBuilder::new(
633
- self._table.borrow().log_store(),
673
+ self.log_store()?,
634
674
  self.cloned_state()?,
635
675
  source.0,
636
676
  predicate,
@@ -646,7 +686,7 @@ impl RawDeltaTable {
646
686
 
647
687
  pub fn merge_execute(&self, merge_builder: &RbMergeBuilder) -> RbResult<String> {
648
688
  let (table, metrics) = merge_builder.execute().map_err(RubyError::from)?;
649
- self._table.borrow_mut().state = table.state;
689
+ self.set_state(table.state)?;
650
690
  Ok(metrics)
651
691
  }
652
692
 
@@ -657,7 +697,8 @@ impl RawDeltaTable {
657
697
  protocol_downgrade_allowed: bool,
658
698
  commit_properties: Option<RbCommitProperties>,
659
699
  ) -> RbResult<String> {
660
- let mut cmd = RestoreBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
700
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
701
+ let mut cmd = table.restore();
661
702
  if let Some(val) = target {
662
703
  if let Some(version) = Integer::from_value(val) {
663
704
  cmd = cmd.with_version_to_restore(version.to_i64()?)
@@ -679,23 +720,37 @@ impl RawDeltaTable {
679
720
  }
680
721
 
681
722
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
682
- self._table.borrow_mut().state = table.state;
723
+ self.set_state(table.state)?;
683
724
  Ok(serde_json::to_string(&metrics).unwrap())
684
725
  }
685
726
 
686
727
  pub fn history(&self, limit: Option<usize>) -> RbResult<Vec<String>> {
687
- let history = rt()
688
- .block_on(self._table.borrow().history(limit))
689
- .map_err(RubyError::from)?;
728
+ #[allow(clippy::await_holding_lock)]
729
+ let history = rt().block_on(async {
730
+ match self._table.lock() {
731
+ Ok(table) => table
732
+ .history(limit)
733
+ .await
734
+ .map_err(RubyError::from)
735
+ .map_err(RbErr::from),
736
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
737
+ }
738
+ })?;
690
739
  Ok(history
691
740
  .map(|c| serde_json::to_string(&c).unwrap())
692
741
  .collect())
693
742
  }
694
743
 
695
744
  pub fn update_incremental(&self) -> RbResult<()> {
696
- #[allow(deprecated)]
745
+ #[allow(clippy::await_holding_lock)]
697
746
  Ok(rt()
698
- .block_on(self._table.borrow_mut().update_incremental(None))
747
+ .block_on(async {
748
+ let mut table = self
749
+ ._table
750
+ .lock()
751
+ .map_err(|e| DeltaTableError::Generic(e.to_string()))?;
752
+ (*table).update_incremental(None).await
753
+ })
699
754
  .map_err(RubyError::from)?)
700
755
  }
701
756
 
@@ -755,15 +810,61 @@ impl RawDeltaTable {
755
810
  }
756
811
 
757
812
  pub fn create_checkpoint(&self) -> RbResult<()> {
758
- rt().block_on(create_checkpoint(&self._table.borrow(), None))
759
- .map_err(RubyError::from)?;
813
+ let operation_id = Uuid::new_v4();
814
+
815
+ #[allow(clippy::await_holding_lock)]
816
+ let _result = rt().block_on(async {
817
+ match self._table.lock() {
818
+ Ok(table) => create_checkpoint(&table, Some(operation_id))
819
+ .await
820
+ .map_err(RubyError::from)
821
+ .map_err(RbErr::from),
822
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
823
+ }
824
+ });
760
825
 
761
826
  Ok(())
762
827
  }
763
828
 
764
829
  pub fn cleanup_metadata(&self) -> RbResult<()> {
765
- rt().block_on(cleanup_metadata(&self._table.borrow(), None))
766
- .map_err(RubyError::from)?;
830
+ let (_result, new_state) = {
831
+ let operation_id = Uuid::new_v4();
832
+
833
+ #[allow(clippy::await_holding_lock)]
834
+ let result = rt().block_on(async {
835
+ match self._table.lock() {
836
+ Ok(table) => {
837
+ let result = cleanup_metadata(&table, Some(operation_id))
838
+ .await
839
+ .map_err(RubyError::from)
840
+ .map_err(RbErr::from)?;
841
+
842
+ let new_state = if result > 0 {
843
+ Some(
844
+ DeltaTableState::try_new(
845
+ &table.log_store(),
846
+ table.config.clone(),
847
+ table.version(),
848
+ )
849
+ .await
850
+ .map_err(RubyError::from)?,
851
+ )
852
+ } else {
853
+ None
854
+ };
855
+
856
+ Ok((result, new_state))
857
+ }
858
+ Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
859
+ }
860
+ });
861
+
862
+ result
863
+ }?;
864
+
865
+ if new_state.is_some() {
866
+ self.set_state(new_state)?;
867
+ }
767
868
 
768
869
  Ok(())
769
870
  }
@@ -792,7 +893,8 @@ impl RawDeltaTable {
792
893
  commit_properties: Option<RbCommitProperties>,
793
894
  post_commithook_properties: Option<RbPostCommitHookProperties>,
794
895
  ) -> RbResult<String> {
795
- let mut cmd = DeleteBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
896
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
897
+ let mut cmd = table.delete();
796
898
  if let Some(predicate) = predicate {
797
899
  cmd = cmd.with_predicate(predicate);
798
900
  }
@@ -808,7 +910,7 @@ impl RawDeltaTable {
808
910
  }
809
911
 
810
912
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
811
- self._table.borrow_mut().state = table.state;
913
+ self.set_state(table.state)?;
812
914
  Ok(serde_json::to_string(&metrics).unwrap())
813
915
  }
814
916
 
@@ -817,13 +919,14 @@ impl RawDeltaTable {
817
919
  properties: HashMap<String, String>,
818
920
  raise_if_not_exists: bool,
819
921
  ) -> RbResult<()> {
820
- let cmd =
821
- SetTablePropertiesBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
822
- .with_properties(properties)
823
- .with_raise_if_not_exists(raise_if_not_exists);
922
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
923
+ let cmd = table
924
+ .set_tbl_properties()
925
+ .with_properties(properties)
926
+ .with_raise_if_not_exists(raise_if_not_exists);
824
927
 
825
928
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
826
- self._table.borrow_mut().state = table.state;
929
+ self.set_state(table.state)?;
827
930
  Ok(())
828
931
  }
829
932
 
@@ -833,9 +936,8 @@ impl RawDeltaTable {
833
936
  commit_properties: Option<RbCommitProperties>,
834
937
  post_commithook_properties: Option<RbPostCommitHookProperties>,
835
938
  ) -> RbResult<String> {
836
- let mut cmd =
837
- FileSystemCheckBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
838
- .with_dry_run(dry_run);
939
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
940
+ let mut cmd = table.filesystem_check().with_dry_run(dry_run);
839
941
 
840
942
  if let Some(commit_properties) =
841
943
  maybe_create_commit_properties(commit_properties, post_commithook_properties)
@@ -844,7 +946,7 @@ impl RawDeltaTable {
844
946
  }
845
947
 
846
948
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
847
- self._table.borrow_mut().state = table.state;
949
+ self.set_state(table.state)?;
848
950
  Ok(serde_json::to_string(&metrics).unwrap())
849
951
  }
850
952
 
@@ -856,6 +958,77 @@ impl RawDeltaTable {
856
958
  .block_on(snapshot.transaction_version(log_store.as_ref(), app_id))
857
959
  .map_err(RubyError::from)?)
858
960
  }
961
+
962
+ #[allow(clippy::too_many_arguments)]
963
+ pub fn write(
964
+ &self,
965
+ data: RbArrowType<ArrowArrayStreamReader>,
966
+ mode: String,
967
+ schema_mode: Option<String>,
968
+ partition_by: Option<Vec<String>>,
969
+ predicate: Option<String>,
970
+ target_file_size: Option<usize>,
971
+ name: Option<String>,
972
+ description: Option<String>,
973
+ configuration: Option<HashMap<String, Option<String>>>,
974
+ writer_properties: Option<RbWriterProperties>,
975
+ commit_properties: Option<RbCommitProperties>,
976
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
977
+ ) -> RbResult<()> {
978
+ let table = {
979
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
980
+ let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
981
+
982
+ let save_mode = mode.parse().map_err(RubyError::from)?;
983
+ let mut builder = table.write(batches).with_save_mode(save_mode);
984
+
985
+ if let Some(schema_mode) = schema_mode {
986
+ builder = builder.with_schema_mode(schema_mode.parse().map_err(RubyError::from)?);
987
+ }
988
+ if let Some(partition_columns) = partition_by {
989
+ builder = builder.with_partition_columns(partition_columns);
990
+ }
991
+
992
+ if let Some(writer_props) = writer_properties {
993
+ builder = builder.with_writer_properties(
994
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
995
+ );
996
+ }
997
+
998
+ if let Some(name) = &name {
999
+ builder = builder.with_table_name(name);
1000
+ };
1001
+
1002
+ if let Some(description) = &description {
1003
+ builder = builder.with_description(description);
1004
+ };
1005
+
1006
+ if let Some(predicate) = predicate {
1007
+ builder = builder.with_replace_where(predicate);
1008
+ };
1009
+
1010
+ if let Some(target_file_size) = target_file_size {
1011
+ builder = builder.with_target_file_size(target_file_size)
1012
+ };
1013
+
1014
+ if let Some(config) = configuration {
1015
+ builder = builder.with_configuration(config);
1016
+ };
1017
+
1018
+ if let Some(commit_properties) =
1019
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1020
+ {
1021
+ builder = builder.with_commit_properties(commit_properties);
1022
+ };
1023
+
1024
+ rt().block_on(builder.into_future())
1025
+ .map_err(RubyError::from)
1026
+ .map_err(RbErr::from)
1027
+ }?;
1028
+
1029
+ self.set_state(table.state)?;
1030
+ Ok(())
1031
+ }
859
1032
  }
860
1033
 
861
1034
  fn set_post_commithook_properties(
@@ -1141,7 +1314,6 @@ fn write_to_deltalake(
1141
1314
  table_uri: String,
1142
1315
  data: RbArrowType<ArrowArrayStreamReader>,
1143
1316
  mode: String,
1144
- table: Option<&RawDeltaTable>,
1145
1317
  schema_mode: Option<String>,
1146
1318
  partition_by: Option<Vec<String>>,
1147
1319
  predicate: Option<String>,
@@ -1158,16 +1330,14 @@ fn write_to_deltalake(
1158
1330
  let save_mode = mode.parse().map_err(RubyError::from)?;
1159
1331
 
1160
1332
  let options = storage_options.clone().unwrap_or_default();
1161
- let table = if let Some(table) = table {
1162
- DeltaOps(table._table.borrow().clone())
1163
- } else {
1164
- let table_url =
1165
- deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
1166
- rt().block_on(DeltaOps::try_from_uri_with_storage_options(
1167
- table_url, options,
1333
+ let table_url =
1334
+ deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
1335
+ let table = rt()
1336
+ .block_on(DeltaTable::try_from_url_with_storage_options(
1337
+ table_url.clone(),
1338
+ options.clone(),
1168
1339
  ))
1169
- .map_err(RubyError::from)?
1170
- };
1340
+ .map_err(RubyError::from)?;
1171
1341
 
1172
1342
  let mut builder = table.write(batches).with_save_mode(save_mode);
1173
1343
  if let Some(schema_mode) = schema_mode {
@@ -1249,7 +1419,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1249
1419
  deltalake::gcp::register_handlers(None);
1250
1420
 
1251
1421
  let module = ruby.define_module("DeltaLake")?;
1252
- module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 15))?;
1422
+ module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 14))?;
1253
1423
  module.define_singleton_method("rust_core_version", function!(rust_core_version, 0))?;
1254
1424
 
1255
1425
  let class = module.define_class("RawDeltaTable", ruby.class_object())?;
@@ -1290,7 +1460,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1290
1460
  )?;
1291
1461
  class.define_method(
1292
1462
  "z_order_optimize",
1293
- method!(RawDeltaTable::z_order_optimize, 9),
1463
+ method!(RawDeltaTable::z_order_optimize, 10),
1294
1464
  )?;
1295
1465
  class.define_method("add_columns", method!(RawDeltaTable::add_columns, 1))?;
1296
1466
  class.define_method("add_feature", method!(RawDeltaTable::add_feature, 2))?;
@@ -1340,6 +1510,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1340
1510
  "transaction_version",
1341
1511
  method!(RawDeltaTable::transaction_version, 1),
1342
1512
  )?;
1513
+ class.define_method("write", method!(RawDeltaTable::write, 12))?;
1343
1514
 
1344
1515
  let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
1345
1516
  class.define_method("id", method!(RawDeltaTableMetaData::id, 0))?;