deltalake-rb 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,30 +1,33 @@
1
1
  mod error;
2
2
  mod features;
3
3
  mod merge;
4
+ mod ruby;
4
5
  mod schema;
5
6
  mod utils;
6
7
 
7
8
  use chrono::{DateTime, Duration, FixedOffset, Utc};
8
- use delta_kernel::schema::StructField;
9
+ use delta_kernel::schema::{MetadataValue, StructField};
9
10
  use delta_kernel::table_properties::DataSkippingNumIndexedCols;
10
11
  use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
11
12
  use deltalake::arrow::record_batch::RecordBatchIterator;
12
13
  use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
13
14
  use deltalake::datafusion::catalog::TableProvider;
14
15
  use deltalake::datafusion::prelude::SessionContext;
15
- use deltalake::delta_datafusion::DeltaCdfTableProvider;
16
+ use deltalake::delta_datafusion::{create_session_state_with_spill_config, DeltaCdfTableProvider};
16
17
  use deltalake::errors::DeltaTableError;
17
18
  use deltalake::kernel::transaction::{CommitProperties, TableReference};
18
19
  use deltalake::kernel::{scalars::ScalarExt, Transaction};
19
- use deltalake::kernel::{EagerSnapshot, StructDataExt};
20
+ use deltalake::kernel::{EagerSnapshot, StructDataExt, Version};
20
21
  use deltalake::logstore::IORuntime;
21
22
  use deltalake::logstore::LogStoreRef;
22
23
  use deltalake::operations::collect_sendable_stream;
23
- use deltalake::operations::optimize::{create_session_state_for_optimize, OptimizeType};
24
+ use deltalake::operations::optimize::OptimizeType;
25
+ use deltalake::operations::update_table_metadata::TableMetadataUpdate;
24
26
  use deltalake::parquet::basic::Compression;
25
27
  use deltalake::parquet::errors::ParquetError;
26
28
  use deltalake::parquet::file::properties::WriterProperties;
27
29
  use deltalake::partitions::PartitionFilter;
30
+ use deltalake::protocol::log_compaction::compact_logs;
28
31
  use deltalake::table::config::TablePropertiesExt;
29
32
  use deltalake::table::state::DeltaTableState;
30
33
  use deltalake::{DeltaResult, DeltaTable};
@@ -32,20 +35,22 @@ use error::DeltaError;
32
35
  use futures::future::join_all;
33
36
  use futures::TryStreamExt;
34
37
  use magnus::{
35
- function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
36
- Integer, Module, RArray, Ruby, TryConvert, Value,
38
+ function, method, prelude::*, try_convert::TryConvertOwned, Error as RbErr, Integer, Module,
39
+ RArray, Ruby, TryConvert, Value,
37
40
  };
38
41
  use serde_json::Map;
39
42
  use std::collections::{HashMap, HashSet};
40
43
  use std::future::IntoFuture;
44
+ use std::num::NonZeroU64;
41
45
  use std::str::FromStr;
42
46
  use std::sync::{Arc, Mutex};
43
47
  use std::time;
44
48
  use uuid::Uuid;
45
49
 
46
- use crate::error::{to_rt_err, RbRuntimeError, RbValueError, RubyError};
50
+ use crate::error::{to_rt_err, to_rt_err2, RubyError};
47
51
  use crate::features::TableFeatures;
48
52
  use crate::merge::RbMergeBuilder;
53
+ use crate::ruby::{GvlExt, RbRuntimeError, RbValueError};
49
54
  use crate::schema::{schema_to_rbobject, Field};
50
55
  use crate::utils::rt;
51
56
 
@@ -111,6 +116,24 @@ impl RawDeltaTableMetaData {
111
116
 
112
117
  type StringVec = Vec<String>;
113
118
 
119
+ const MAX_OPTIMIZE_TARGET_SIZE: u64 = i64::MAX as u64;
120
+
121
+ fn parse_optimize_target_size(target_size: u64) -> Result<NonZeroU64, RubyError> {
122
+ let target_size = NonZeroU64::new(target_size).ok_or_else(|| {
123
+ RubyError::ValueError(format!(
124
+ "target_file_size must be between 1 and {MAX_OPTIMIZE_TARGET_SIZE}"
125
+ ))
126
+ })?;
127
+
128
+ if target_size.get() > MAX_OPTIMIZE_TARGET_SIZE {
129
+ return Err(RubyError::ValueError(format!(
130
+ "target_file_size must be between 1 and {MAX_OPTIMIZE_TARGET_SIZE}"
131
+ )));
132
+ }
133
+
134
+ Ok(target_size)
135
+ }
136
+
114
137
  impl RawDeltaTable {
115
138
  fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
116
139
  match self._table.lock() {
@@ -119,6 +142,16 @@ impl RawDeltaTable {
119
142
  }
120
143
  }
121
144
 
145
+ fn with_table2<T>(
146
+ &self,
147
+ func: impl Fn(&deltalake::DeltaTable) -> Result<T, RubyError>,
148
+ ) -> Result<T, RubyError> {
149
+ match self._table.lock() {
150
+ Ok(table) => func(&table),
151
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
152
+ }
153
+ }
154
+
122
155
  fn cloned_state(&self) -> RbResult<EagerSnapshot> {
123
156
  self.with_table(|t| {
124
157
  t.snapshot()
@@ -145,37 +178,41 @@ impl RawDeltaTable {
145
178
 
146
179
  impl RawDeltaTable {
147
180
  pub fn new(
181
+ rb: &Ruby,
148
182
  table_uri: String,
149
- version: Option<i64>,
183
+ version: Option<Version>,
150
184
  storage_options: Option<HashMap<String, String>>,
151
185
  without_files: bool,
152
186
  log_buffer_size: Option<usize>,
153
187
  ) -> RbResult<Self> {
154
- let table_url = deltalake::table::builder::parse_table_uri(table_uri)
155
- .map_err(error::RubyError::from)?;
156
- let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
157
- .map_err(error::RubyError::from)?
158
- .with_io_runtime(IORuntime::default());
159
-
160
- if let Some(storage_options) = storage_options {
161
- builder = builder.with_storage_options(storage_options)
162
- }
163
- if let Some(version) = version {
164
- builder = builder.with_version(version)
165
- }
166
- if without_files {
167
- builder = builder.without_files()
168
- }
169
- if let Some(buf_size) = log_buffer_size {
170
- builder = builder
171
- .with_log_buffer_size(buf_size)
172
- .map_err(RubyError::from)?;
173
- }
188
+ rb.detach(|| {
189
+ let table_url = deltalake::table::builder::parse_table_uri(table_uri)
190
+ .map_err(error::RubyError::from)?;
191
+ let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
192
+ .map_err(error::RubyError::from)?
193
+ .with_io_runtime(IORuntime::default());
194
+
195
+ if let Some(storage_options) = storage_options {
196
+ builder = builder.with_storage_options(storage_options)
197
+ }
198
+ if let Some(version) = version {
199
+ builder = builder.with_version(version)
200
+ }
201
+ if without_files {
202
+ builder = builder.without_files()
203
+ }
204
+ if let Some(buf_size) = log_buffer_size {
205
+ builder = builder
206
+ .with_log_buffer_size(buf_size)
207
+ .map_err(RubyError::from)?;
208
+ }
174
209
 
175
- let table = rt().block_on(builder.load()).map_err(RubyError::from)?;
176
- Ok(RawDeltaTable {
177
- _table: Arc::new(Mutex::new(table)),
210
+ let table = rt().block_on(builder.load()).map_err(RubyError::from)?;
211
+ Ok::<_, RubyError>(RawDeltaTable {
212
+ _table: Arc::new(Mutex::new(table)),
213
+ })
178
214
  })
215
+ .map_err(RbErr::from)
179
216
  }
180
217
 
181
218
  pub fn is_deltatable(
@@ -203,7 +240,7 @@ impl RawDeltaTable {
203
240
  self.with_table(|t| Ok(t.table_url().to_string()))
204
241
  }
205
242
 
206
- pub fn version(&self) -> RbResult<Option<i64>> {
243
+ pub fn version(&self) -> RbResult<Option<Version>> {
207
244
  self.with_table(|t| Ok(t.version()))
208
245
  }
209
246
 
@@ -220,7 +257,7 @@ impl RawDeltaTable {
220
257
  id: metadata.id().to_string(),
221
258
  name: metadata.name().map(String::from),
222
259
  description: metadata.description().map(String::from),
223
- partition_columns: metadata.partition_columns().clone(),
260
+ partition_columns: metadata.partition_columns().to_vec(),
224
261
  created_time: metadata.created_time(),
225
262
  configuration: metadata.configuration().clone(),
226
263
  })
@@ -255,33 +292,34 @@ impl RawDeltaTable {
255
292
  ))
256
293
  }
257
294
 
258
- pub fn load_version(&self, version: i64) -> RbResult<()> {
259
- #[allow(clippy::await_holding_lock)]
260
- rt().block_on(async {
261
- let mut table = self
262
- ._table
263
- .lock()
264
- .map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
265
- (*table)
266
- .load_version(version)
267
- .await
268
- .map_err(RubyError::from)
269
- .map_err(RbErr::from)
295
+ pub fn load_version(rb: &Ruby, self_: &Self, version: Version) -> RbResult<()> {
296
+ rb.detach(|| {
297
+ #[allow(clippy::await_holding_lock)]
298
+ rt().block_on(async {
299
+ let mut table = self_
300
+ ._table
301
+ .lock()
302
+ .map_err(|e| RubyError::RuntimeError(e.to_string()))?;
303
+ (*table)
304
+ .load_version(version)
305
+ .await
306
+ .map_err(RubyError::from)
307
+ })
270
308
  })
309
+ .map_err(RbErr::from)
271
310
  }
272
311
 
273
- pub fn get_latest_version(&self) -> RbResult<i64> {
274
- #[allow(clippy::await_holding_lock)]
275
- rt().block_on(async {
276
- match self._table.lock() {
277
- Ok(table) => table
278
- .get_latest_version()
279
- .await
280
- .map_err(RubyError::from)
281
- .map_err(RbErr::from),
282
- Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
283
- }
312
+ pub fn get_latest_version(rb: &Ruby, self_: &Self) -> RbResult<Version> {
313
+ rb.detach(|| {
314
+ #[allow(clippy::await_holding_lock)]
315
+ rt().block_on(async {
316
+ match self_._table.lock() {
317
+ Ok(table) => table.get_latest_version().await.map_err(RubyError::from),
318
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
319
+ }
320
+ })
284
321
  })
322
+ .map_err(RbErr::from)
285
323
  }
286
324
 
287
325
  pub fn get_num_index_cols(&self) -> RbResult<i32> {
@@ -309,57 +347,61 @@ impl RawDeltaTable {
309
347
  })
310
348
  }
311
349
 
312
- pub fn load_with_datetime(&self, ds: String) -> RbResult<()> {
313
- let datetime =
314
- DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
315
- |err| RbValueError::new_err(format!("Failed to parse datetime string: {err}")),
316
- )?);
317
- #[allow(clippy::await_holding_lock)]
318
- rt().block_on(async {
319
- let mut table = self
320
- ._table
321
- .lock()
322
- .map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
323
- (*table)
324
- .load_with_datetime(datetime)
325
- .await
326
- .map_err(RubyError::from)
327
- .map_err(RbErr::from)
350
+ pub fn load_with_datetime(rb: &Ruby, self_: &Self, ds: String) -> RbResult<()> {
351
+ rb.detach(|| {
352
+ let datetime =
353
+ DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
354
+ |err| RubyError::ValueError(format!("Failed to parse datetime string: {err}")),
355
+ )?);
356
+ #[allow(clippy::await_holding_lock)]
357
+ rt().block_on(async {
358
+ let mut table = self_
359
+ ._table
360
+ .lock()
361
+ .map_err(|e| RubyError::RuntimeError(e.to_string()))?;
362
+ (*table)
363
+ .load_with_datetime(datetime)
364
+ .await
365
+ .map_err(RubyError::from)
366
+ })
328
367
  })
368
+ .map_err(RbErr::from)
329
369
  }
330
370
 
331
371
  pub fn files(
332
- &self,
372
+ rb: &Ruby,
373
+ self_: &Self,
333
374
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
334
375
  ) -> RbResult<Vec<String>> {
335
- if !self.has_files()? {
376
+ if !self_.has_files()? {
336
377
  return Err(DeltaError::new_err("Table is instantiated without files."));
337
378
  }
338
-
339
- if let Some(filters) = partition_filters {
340
- let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
341
- Ok(self
342
- .with_table(|t| {
343
- rt().block_on(async {
344
- t.get_files_by_partitions(&filters)
345
- .await
346
- .map_err(RubyError::from)
347
- .map_err(RbErr::from)
348
- })
349
- })?
350
- .into_iter()
351
- .map(|p| p.to_string())
352
- .collect())
353
- } else {
354
- match self._table.lock() {
355
- Ok(table) => Ok(table
356
- .get_file_uris()
357
- .map_err(RubyError::from)?
358
- .map(|f| f.to_string())
359
- .collect()),
360
- Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
379
+ rb.detach(|| {
380
+ if let Some(filters) = partition_filters {
381
+ let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
382
+ Ok(self_
383
+ .with_table2(|t| {
384
+ rt().block_on(async {
385
+ t.get_files_by_partitions(&filters)
386
+ .await
387
+ .map_err(RubyError::from)
388
+ })
389
+ })?
390
+ .into_iter()
391
+ .map(|p| p.to_string())
392
+ .collect())
393
+ } else {
394
+ match self_._table.lock() {
395
+ Ok(table) => Ok(table
396
+ .get_file_uris()
397
+ .map_err(RubyError::from)?
398
+ .map(|f| f.to_string())
399
+ .collect()),
400
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
401
+ }
361
402
  }
362
- }
403
+ })
404
+ .map_err(RbErr::from)
363
405
  }
364
406
 
365
407
  pub fn file_uris(
@@ -399,36 +441,84 @@ impl RawDeltaTable {
399
441
  }
400
442
 
401
443
  pub fn vacuum(
402
- &self,
444
+ rb: &Ruby,
445
+ self_: &Self,
403
446
  dry_run: bool,
404
447
  retention_hours: Option<u64>,
405
448
  enforce_retention_duration: bool,
406
449
  commit_properties: Option<RbCommitProperties>,
407
450
  post_commithook_properties: Option<RbPostCommitHookProperties>,
408
451
  ) -> RbResult<Vec<String>> {
409
- let table = self._table.lock().map_err(to_rt_err)?.clone();
410
- let mut cmd = table
411
- .vacuum()
412
- .with_enforce_retention_duration(enforce_retention_duration)
413
- .with_dry_run(dry_run);
452
+ let (table, metrics) = rb.detach(|| {
453
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
454
+ let mut cmd = table
455
+ .vacuum()
456
+ .with_enforce_retention_duration(enforce_retention_duration)
457
+ .with_dry_run(dry_run);
458
+
459
+ if let Some(retention_period) = retention_hours {
460
+ cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
461
+ }
414
462
 
415
- if let Some(retention_period) = retention_hours {
416
- cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
417
- }
463
+ if let Some(commit_properties) =
464
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
465
+ {
466
+ cmd = cmd.with_commit_properties(commit_properties);
467
+ }
418
468
 
419
- if let Some(commit_properties) =
420
- maybe_create_commit_properties(commit_properties, post_commithook_properties)
421
- {
422
- cmd = cmd.with_commit_properties(commit_properties);
423
- }
424
- let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
425
- self.set_state(table.state)?;
469
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
470
+ })?;
471
+ self_.set_state(table.state)?;
426
472
  Ok(metrics.files_deleted)
427
473
  }
428
474
 
475
+ #[allow(clippy::too_many_arguments)]
476
+ pub fn update(
477
+ rb: &Ruby,
478
+ self_: &Self,
479
+ updates: HashMap<String, String>,
480
+ predicate: Option<String>,
481
+ writer_properties: Option<RbWriterProperties>,
482
+ safe_cast: bool,
483
+ commit_properties: Option<RbCommitProperties>,
484
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
485
+ ) -> RbResult<String> {
486
+ let (table, metrics) = rb
487
+ .detach(|| {
488
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
489
+ let mut cmd = table.update().with_safe_cast(safe_cast);
490
+
491
+ if let Some(writer_props) = writer_properties {
492
+ cmd = cmd.with_writer_properties(
493
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
494
+ );
495
+ }
496
+
497
+ for (col_name, expression) in updates {
498
+ cmd = cmd.with_update(col_name.clone(), expression.clone());
499
+ }
500
+
501
+ if let Some(update_predicate) = predicate {
502
+ cmd = cmd.with_predicate(update_predicate);
503
+ }
504
+
505
+ if let Some(commit_properties) =
506
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
507
+ {
508
+ cmd = cmd.with_commit_properties(commit_properties);
509
+ }
510
+
511
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
512
+ })
513
+ .map_err(RbErr::from)?;
514
+ self_.set_state(table.state)?;
515
+ Ok(serde_json::to_string(&metrics).unwrap())
516
+ }
517
+
429
518
  #[allow(clippy::too_many_arguments)]
430
519
  pub fn compact_optimize(
431
- &self,
520
+ rb: &Ruby,
521
+ self_: &Self,
432
522
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
433
523
  target_size: Option<u64>,
434
524
  max_concurrent_tasks: Option<usize>,
@@ -437,42 +527,47 @@ impl RawDeltaTable {
437
527
  commit_properties: Option<RbCommitProperties>,
438
528
  post_commithook_properties: Option<RbPostCommitHookProperties>,
439
529
  ) -> RbResult<String> {
440
- let table = self._table.lock().map_err(to_rt_err)?.clone();
441
- let mut cmd = table
442
- .optimize()
443
- .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
444
-
445
- if let Some(size) = target_size {
446
- cmd = cmd.with_target_size(size);
447
- }
448
- if let Some(commit_interval) = min_commit_interval {
449
- cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
450
- }
530
+ let (table, metrics) = rb.detach(|| {
531
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
532
+ let mut cmd = table
533
+ .optimize()
534
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
535
+
536
+ if let Some(target_size) = target_size {
537
+ let target_size = parse_optimize_target_size(target_size)?;
538
+ cmd = cmd.with_target_size(target_size);
539
+ }
540
+ if let Some(commit_interval) = min_commit_interval {
541
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
542
+ }
451
543
 
452
- if let Some(writer_props) = writer_properties {
453
- cmd = cmd.with_writer_properties(
454
- set_writer_properties(writer_props).map_err(RubyError::from)?,
455
- );
456
- }
544
+ if let Some(writer_props) = writer_properties {
545
+ cmd = cmd.with_writer_properties(
546
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
547
+ );
548
+ }
457
549
 
458
- if let Some(commit_properties) =
459
- maybe_create_commit_properties(commit_properties, post_commithook_properties)
460
- {
461
- cmd = cmd.with_commit_properties(commit_properties);
462
- }
550
+ if let Some(commit_properties) =
551
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
552
+ {
553
+ cmd = cmd.with_commit_properties(commit_properties);
554
+ }
463
555
 
464
- let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
465
- .map_err(RubyError::from)?;
466
- cmd = cmd.with_filters(&converted_filters);
556
+ let converted_filters =
557
+ convert_partition_filters(partition_filters.unwrap_or_default())
558
+ .map_err(RubyError::from)?;
559
+ cmd = cmd.with_filters(&converted_filters);
467
560
 
468
- let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
469
- self.set_state(table.state)?;
561
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
562
+ })?;
563
+ self_.set_state(table.state)?;
470
564
  Ok(serde_json::to_string(&metrics).unwrap())
471
565
  }
472
566
 
473
567
  #[allow(clippy::too_many_arguments)]
474
568
  pub fn z_order_optimize(
475
- &self,
569
+ rb: &Ruby,
570
+ self_: &Self,
476
571
  z_order_columns: Vec<String>,
477
572
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
478
573
  target_size: Option<u64>,
@@ -484,67 +579,77 @@ impl RawDeltaTable {
484
579
  commit_properties: Option<RbCommitProperties>,
485
580
  post_commithook_properties: Option<RbPostCommitHookProperties>,
486
581
  ) -> RbResult<String> {
487
- let table = self._table.lock().map_err(to_rt_err)?.clone();
488
- let mut cmd = table
489
- .clone()
490
- .optimize()
491
- .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
492
- .with_type(OptimizeType::ZOrder(z_order_columns));
493
-
494
- if max_spill_size.is_some() || max_temp_directory_size.is_some() {
495
- let session =
496
- create_session_state_for_optimize(max_spill_size, max_temp_directory_size);
497
- cmd = cmd.with_session_state(Arc::new(session));
498
- }
582
+ let (table, metrics) = rb.detach(|| {
583
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
584
+ let mut cmd = table
585
+ .clone()
586
+ .optimize()
587
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
588
+ .with_type(OptimizeType::ZOrder(z_order_columns));
589
+
590
+ if max_spill_size.is_some() || max_temp_directory_size.is_some() {
591
+ let session =
592
+ create_session_state_with_spill_config(max_spill_size, max_temp_directory_size);
593
+ cmd = cmd.with_session_state(Arc::new(session));
594
+ }
499
595
 
500
- if let Some(size) = target_size {
501
- cmd = cmd.with_target_size(size);
502
- }
503
- if let Some(commit_interval) = min_commit_interval {
504
- cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
505
- }
596
+ if let Some(target_size) = target_size {
597
+ let target_size = parse_optimize_target_size(target_size)?;
598
+ cmd = cmd.with_target_size(target_size);
599
+ }
600
+ if let Some(commit_interval) = min_commit_interval {
601
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
602
+ }
506
603
 
507
- if let Some(writer_props) = writer_properties {
508
- cmd = cmd.with_writer_properties(
509
- set_writer_properties(writer_props).map_err(RubyError::from)?,
510
- );
511
- }
604
+ if let Some(writer_props) = writer_properties {
605
+ cmd = cmd.with_writer_properties(
606
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
607
+ );
608
+ }
512
609
 
513
- if let Some(commit_properties) =
514
- maybe_create_commit_properties(commit_properties, post_commithook_properties)
515
- {
516
- cmd = cmd.with_commit_properties(commit_properties);
517
- }
610
+ if let Some(commit_properties) =
611
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
612
+ {
613
+ cmd = cmd.with_commit_properties(commit_properties);
614
+ }
518
615
 
519
- let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
520
- .map_err(RubyError::from)?;
521
- cmd = cmd.with_filters(&converted_filters);
616
+ let converted_filters =
617
+ convert_partition_filters(partition_filters.unwrap_or_default())
618
+ .map_err(RubyError::from)?;
619
+ cmd = cmd.with_filters(&converted_filters);
522
620
 
523
- let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
524
- self.set_state(table.state)?;
621
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
622
+ })?;
623
+ self_.set_state(table.state)?;
525
624
  Ok(serde_json::to_string(&metrics).unwrap())
526
625
  }
527
626
 
528
- pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
529
- let fields = fields.typecheck::<Obj<Field>>()?;
627
+ pub fn add_columns(rb: &Ruby, self_: &Self, fields: RArray) -> RbResult<()> {
628
+ let fields = fields
629
+ .into_iter()
630
+ .map(|v| <&Field>::try_convert(v).cloned())
631
+ .collect::<RbResult<Vec<_>>>()?;
530
632
 
531
- let table = self._table.lock().map_err(to_rt_err)?.clone();
532
- let mut cmd = table.add_columns();
633
+ let table = rb.detach(|| {
634
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
635
+ let mut cmd = table.add_columns();
533
636
 
534
- let new_fields = fields
535
- .iter()
536
- .map(|v| v.inner.clone())
537
- .collect::<Vec<StructField>>();
637
+ let new_fields = fields
638
+ .iter()
639
+ .map(|v| v.inner.clone())
640
+ .collect::<Vec<StructField>>();
538
641
 
539
- cmd = cmd.with_fields(new_fields);
642
+ cmd = cmd.with_fields(new_fields);
540
643
 
541
- let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
542
- self.set_state(table.state)?;
644
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
645
+ })?;
646
+ self_.set_state(table.state)?;
543
647
  Ok(())
544
648
  }
545
649
 
546
650
  pub fn add_feature(
547
- &self,
651
+ rb: &Ruby,
652
+ self_: &Self,
548
653
  feature: RArray,
549
654
  allow_protocol_versions_increase: bool,
550
655
  ) -> RbResult<()> {
@@ -553,52 +658,75 @@ impl RawDeltaTable {
553
658
  .map(TableFeatures::try_convert)
554
659
  .collect::<RbResult<Vec<_>>>()?;
555
660
 
556
- let table = self._table.lock().map_err(to_rt_err)?.clone();
557
- let cmd = table
558
- .add_feature()
559
- .with_features(feature)
560
- .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
661
+ let table = rb.detach(|| {
662
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
663
+ let cmd = table
664
+ .add_feature()
665
+ .with_features(feature)
666
+ .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
561
667
 
562
- let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
563
- self.set_state(table.state)?;
668
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
669
+ })?;
670
+ self_.set_state(table.state)?;
564
671
  Ok(())
565
672
  }
566
673
 
567
- pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
568
- let table = self._table.lock().map_err(to_rt_err)?.clone();
569
- let mut cmd = table.add_constraint();
674
+ pub fn add_constraints(
675
+ rb: &Ruby,
676
+ self_: &Self,
677
+ constraints: HashMap<String, String>,
678
+ ) -> RbResult<()> {
679
+ let table = rb.detach(|| {
680
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
681
+ let mut cmd = table.add_constraint();
570
682
 
571
- for (col_name, expression) in constraints {
572
- cmd = cmd.with_constraint(col_name.clone(), expression.clone());
573
- }
683
+ for (col_name, expression) in constraints {
684
+ cmd = cmd.with_constraint(col_name.clone(), expression.clone());
685
+ }
574
686
 
575
- let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
576
- self.set_state(table.state)?;
687
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
688
+ })?;
689
+ self_.set_state(table.state)?;
577
690
  Ok(())
578
691
  }
579
692
 
580
- pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
581
- let table = self._table.lock().map_err(to_rt_err)?.clone();
582
- let cmd = table
583
- .drop_constraints()
584
- .with_constraint(name)
585
- .with_raise_if_not_exists(raise_if_not_exists);
693
+ pub fn drop_constraints(
694
+ rb: &Ruby,
695
+ self_: &Self,
696
+ name: String,
697
+ raise_if_not_exists: bool,
698
+ ) -> RbResult<()> {
699
+ let table = rb.detach(|| {
700
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
701
+ let cmd = table
702
+ .drop_constraints()
703
+ .with_constraint(name)
704
+ .with_raise_if_not_exists(raise_if_not_exists);
705
+
706
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
707
+ })?;
708
+ self_.set_state(table.state)?;
709
+ Ok(())
710
+ }
586
711
 
587
- let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
588
- self.set_state(table.state)?;
712
+ pub fn generate(&self) -> RbResult<()> {
713
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
714
+ rt().block_on(async { table.generate().await })
715
+ .map_err(RubyError::from)?;
589
716
  Ok(())
590
717
  }
591
718
 
592
719
  pub fn load_cdf(
593
- &self,
594
- starting_version: Option<i64>,
595
- ending_version: Option<i64>,
720
+ rb: &Ruby,
721
+ self_: &Self,
722
+ starting_version: Option<Version>,
723
+ ending_version: Option<Version>,
596
724
  starting_timestamp: Option<String>,
597
725
  ending_timestamp: Option<String>,
598
726
  columns: Option<Vec<String>>,
599
727
  ) -> RbResult<ArrowArrayStream> {
600
728
  let ctx = SessionContext::new();
601
- let table = self._table.lock().map_err(to_rt_err)?.clone();
729
+ let table = self_._table.lock().map_err(to_rt_err)?.clone();
602
730
  let mut cmd = table.scan_cdf();
603
731
 
604
732
  if let Some(sv) = starting_version {
@@ -623,38 +751,41 @@ impl RawDeltaTable {
623
751
  let table_provider: Arc<dyn TableProvider> =
624
752
  Arc::new(DeltaCdfTableProvider::try_new(cmd).map_err(RubyError::from)?);
625
753
 
626
- let plan = rt()
627
- .block_on(async {
628
- let mut df = ctx.read_table(table_provider)?;
629
- if let Some(columns) = columns {
630
- let cols: Vec<_> = columns.iter().map(|c| c.as_ref()).collect();
631
- df = df.select_columns(&cols)?;
632
- }
633
- df.create_physical_plan().await
634
- })
635
- .map_err(RubyError::from)?;
754
+ rb.detach(|| {
755
+ let plan = rt()
756
+ .block_on(async {
757
+ let mut df = ctx.read_table(table_provider)?;
758
+ if let Some(columns) = columns {
759
+ let cols: Vec<_> = columns.iter().map(|c| c.as_ref()).collect();
760
+ df = df.select_columns(&cols)?;
761
+ }
762
+ df.create_physical_plan().await
763
+ })
764
+ .map_err(RubyError::from)?;
636
765
 
637
- let mut tasks = vec![];
638
- for p in 0..plan.properties().output_partitioning().partition_count() {
639
- let inner_plan = plan.clone();
640
- let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
641
- let handle = rt().spawn(collect_sendable_stream(partition_batch));
642
- tasks.push(handle);
643
- }
766
+ let mut tasks = vec![];
767
+ for p in 0..plan.properties().output_partitioning().partition_count() {
768
+ let inner_plan = plan.clone();
769
+ let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
770
+ let handle = rt().spawn(collect_sendable_stream(partition_batch));
771
+ tasks.push(handle);
772
+ }
644
773
 
645
- // This is unfortunate.
646
- let batches = rt()
647
- .block_on(join_all(tasks))
648
- .into_iter()
649
- .flatten()
650
- .collect::<Result<Vec<Vec<_>>, _>>()
651
- .unwrap()
652
- .into_iter()
653
- .flatten()
654
- .map(Ok);
655
- let batch_iter = RecordBatchIterator::new(batches, plan.schema());
656
- let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
657
- Ok(ArrowArrayStream { stream: ffi_stream })
774
+ // This is unfortunate.
775
+ let batches = rt()
776
+ .block_on(join_all(tasks))
777
+ .into_iter()
778
+ .flatten()
779
+ .collect::<Result<Vec<Vec<_>>, _>>()
780
+ .unwrap()
781
+ .into_iter()
782
+ .flatten()
783
+ .map(Ok);
784
+ let batch_iter = RecordBatchIterator::new(batches, plan.schema());
785
+ let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
786
+ Ok::<_, RubyError>(ArrowArrayStream { stream: ffi_stream })
787
+ })
788
+ .map_err(RbErr::from)
658
789
  }
659
790
 
660
791
  #[allow(clippy::too_many_arguments)]
@@ -701,7 +832,7 @@ impl RawDeltaTable {
701
832
  let mut cmd = table.restore();
702
833
  if let Some(val) = target {
703
834
  if let Some(version) = Integer::from_value(val) {
704
- cmd = cmd.with_version_to_restore(version.to_i64()?)
835
+ cmd = cmd.with_version_to_restore(version.to_u64()?)
705
836
  }
706
837
  if let Ok(ds) = String::try_convert(val) {
707
838
  let datetime = DateTime::<Utc>::from(
@@ -809,35 +940,64 @@ impl RawDeltaTable {
809
940
  Ok(ruby.ary_from_iter(active_partitions))
810
941
  }
811
942
 
812
- pub fn create_checkpoint(&self) -> RbResult<()> {
813
- let operation_id = Uuid::new_v4();
943
+ pub fn create_checkpoint(rb: &Ruby, self_: &Self) -> RbResult<()> {
944
+ rb.detach(|| {
945
+ let operation_id = Uuid::new_v4();
814
946
 
815
- #[allow(clippy::await_holding_lock)]
816
- let _result = rt().block_on(async {
817
- match self._table.lock() {
818
- Ok(table) => create_checkpoint(&table, Some(operation_id))
819
- .await
820
- .map_err(RubyError::from)
821
- .map_err(RbErr::from),
822
- Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
823
- }
824
- });
947
+ #[allow(clippy::await_holding_lock)]
948
+ let _result = rt().block_on(async {
949
+ match self_._table.lock() {
950
+ Ok(table) => create_checkpoint(&table, Some(operation_id))
951
+ .await
952
+ .map_err(RubyError::from),
953
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
954
+ }
955
+ });
956
+
957
+ Ok::<_, RubyError>(())
958
+ })
959
+ .map_err(RbErr::from)
960
+ }
961
+
962
+ pub fn compact_logs(
963
+ rb: &Ruby,
964
+ self_: &Self,
965
+ starting_version: u64,
966
+ ending_version: u64,
967
+ ) -> RbResult<()> {
968
+ rb.detach(|| {
969
+ let operation_id = Uuid::new_v4();
970
+
971
+ #[allow(clippy::await_holding_lock)]
972
+ let result = rt().block_on(async {
973
+ match self_._table.lock() {
974
+ Ok(table) => {
975
+ compact_logs(&table, starting_version, ending_version, Some(operation_id))
976
+ .await
977
+ .map_err(RubyError::from)
978
+ }
979
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
980
+ }
981
+ });
982
+
983
+ result
984
+ })
985
+ .map_err(RbErr::from)?;
825
986
 
826
987
  Ok(())
827
988
  }
828
989
 
829
- pub fn cleanup_metadata(&self) -> RbResult<()> {
830
- let (_result, new_state) = {
990
+ pub fn cleanup_metadata(rb: &Ruby, self_: &Self) -> RbResult<()> {
991
+ let (_result, new_state) = rb.detach(|| {
831
992
  let operation_id = Uuid::new_v4();
832
993
 
833
994
  #[allow(clippy::await_holding_lock)]
834
995
  let result = rt().block_on(async {
835
- match self._table.lock() {
996
+ match self_._table.lock() {
836
997
  Ok(table) => {
837
998
  let result = cleanup_metadata(&table, Some(operation_id))
838
999
  .await
839
- .map_err(RubyError::from)
840
- .map_err(RbErr::from)?;
1000
+ .map_err(RubyError::from)?;
841
1001
 
842
1002
  let new_state = if result > 0 {
843
1003
  Some(
@@ -855,15 +1015,15 @@ impl RawDeltaTable {
855
1015
 
856
1016
  Ok((result, new_state))
857
1017
  }
858
- Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
1018
+ Err(e) => Err(RubyError::RuntimeError(e.to_string())),
859
1019
  }
860
1020
  });
861
1021
 
862
1022
  result
863
- }?;
1023
+ })?;
864
1024
 
865
1025
  if new_state.is_some() {
866
- self.set_state(new_state)?;
1026
+ self_.set_state(new_state)?;
867
1027
  }
868
1028
 
869
1029
  Ok(())
@@ -887,30 +1047,33 @@ impl RawDeltaTable {
887
1047
  }
888
1048
 
889
1049
  pub fn delete(
890
- &self,
1050
+ rb: &Ruby,
1051
+ self_: &Self,
891
1052
  predicate: Option<String>,
892
1053
  writer_properties: Option<RbWriterProperties>,
893
1054
  commit_properties: Option<RbCommitProperties>,
894
1055
  post_commithook_properties: Option<RbPostCommitHookProperties>,
895
1056
  ) -> RbResult<String> {
896
- let table = self._table.lock().map_err(to_rt_err)?.clone();
897
- let mut cmd = table.delete();
898
- if let Some(predicate) = predicate {
899
- cmd = cmd.with_predicate(predicate);
900
- }
901
- if let Some(writer_props) = writer_properties {
902
- cmd = cmd.with_writer_properties(
903
- set_writer_properties(writer_props).map_err(RubyError::from)?,
904
- );
905
- }
906
- if let Some(commit_properties) =
907
- maybe_create_commit_properties(commit_properties, post_commithook_properties)
908
- {
909
- cmd = cmd.with_commit_properties(commit_properties);
910
- }
1057
+ let (table, metrics) = rb.detach(|| {
1058
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
1059
+ let mut cmd = table.delete();
1060
+ if let Some(predicate) = predicate {
1061
+ cmd = cmd.with_predicate(predicate);
1062
+ }
1063
+ if let Some(writer_props) = writer_properties {
1064
+ cmd = cmd.with_writer_properties(
1065
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
1066
+ );
1067
+ }
1068
+ if let Some(commit_properties) =
1069
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1070
+ {
1071
+ cmd = cmd.with_commit_properties(commit_properties);
1072
+ }
911
1073
 
912
- let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
913
- self.set_state(table.state)?;
1074
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
1075
+ })?;
1076
+ self_.set_state(table.state)?;
914
1077
  Ok(serde_json::to_string(&metrics).unwrap())
915
1078
  }
916
1079
 
@@ -930,6 +1093,54 @@ impl RawDeltaTable {
930
1093
  Ok(())
931
1094
  }
932
1095
 
1096
+ pub fn set_table_name(
1097
+ &self,
1098
+ name: String,
1099
+ commit_properties: Option<RbCommitProperties>,
1100
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
1101
+ ) -> RbResult<()> {
1102
+ let update = TableMetadataUpdate {
1103
+ name: Some(name),
1104
+ description: None,
1105
+ };
1106
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
1107
+ let mut cmd = table.update_table_metadata().with_update(update);
1108
+
1109
+ if let Some(commit_properties) =
1110
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1111
+ {
1112
+ cmd = cmd.with_commit_properties(commit_properties);
1113
+ }
1114
+
1115
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
1116
+ self.set_state(table.state)?;
1117
+ Ok(())
1118
+ }
1119
+
1120
+ pub fn set_table_description(
1121
+ &self,
1122
+ description: String,
1123
+ commit_properties: Option<RbCommitProperties>,
1124
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
1125
+ ) -> RbResult<()> {
1126
+ let update = TableMetadataUpdate {
1127
+ name: None,
1128
+ description: Some(description),
1129
+ };
1130
+ let table = self._table.lock().map_err(to_rt_err)?.clone();
1131
+ let mut cmd = table.update_table_metadata().with_update(update);
1132
+
1133
+ if let Some(commit_properties) =
1134
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1135
+ {
1136
+ cmd = cmd.with_commit_properties(commit_properties);
1137
+ }
1138
+
1139
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
1140
+ self.set_state(table.state)?;
1141
+ Ok(())
1142
+ }
1143
+
933
1144
  pub fn repair(
934
1145
  &self,
935
1146
  dry_run: bool,
@@ -959,15 +1170,50 @@ impl RawDeltaTable {
959
1170
  .map_err(RubyError::from)?)
960
1171
  }
961
1172
 
1173
+ pub fn set_column_metadata(
1174
+ rb: &Ruby,
1175
+ self_: &Self,
1176
+ field_name: String,
1177
+ metadata: HashMap<String, String>,
1178
+ commit_properties: Option<RbCommitProperties>,
1179
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
1180
+ ) -> RbResult<()> {
1181
+ let table = rb
1182
+ .detach(|| {
1183
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
1184
+ let mut cmd = table
1185
+ .update_field_metadata()
1186
+ .with_field_name(&field_name)
1187
+ .with_metadata(
1188
+ metadata
1189
+ .iter()
1190
+ .map(|(k, v)| (k.clone(), MetadataValue::String(v.clone())))
1191
+ .collect(),
1192
+ );
1193
+
1194
+ if let Some(commit_properties) =
1195
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1196
+ {
1197
+ cmd = cmd.with_commit_properties(commit_properties)
1198
+ }
1199
+
1200
+ rt().block_on(cmd.into_future()).map_err(RubyError::from)
1201
+ })
1202
+ .map_err(RbErr::from)?;
1203
+ self_.set_state(table.state)?;
1204
+ Ok(())
1205
+ }
1206
+
962
1207
  #[allow(clippy::too_many_arguments)]
963
1208
  pub fn write(
964
- &self,
1209
+ rb: &Ruby,
1210
+ self_: &Self,
965
1211
  data: RbArrowType<ArrowArrayStreamReader>,
966
1212
  mode: String,
967
1213
  schema_mode: Option<String>,
968
1214
  partition_by: Option<Vec<String>>,
969
1215
  predicate: Option<String>,
970
- target_file_size: Option<usize>,
1216
+ target_file_size: Option<u64>,
971
1217
  name: Option<String>,
972
1218
  description: Option<String>,
973
1219
  configuration: Option<HashMap<String, Option<String>>>,
@@ -975,8 +1221,8 @@ impl RawDeltaTable {
975
1221
  commit_properties: Option<RbCommitProperties>,
976
1222
  post_commithook_properties: Option<RbPostCommitHookProperties>,
977
1223
  ) -> RbResult<()> {
978
- let table = {
979
- let table = self._table.lock().map_err(to_rt_err)?.clone();
1224
+ let table = rb.detach(|| {
1225
+ let table = self_._table.lock().map_err(to_rt_err2)?.clone();
980
1226
  let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
981
1227
 
982
1228
  let save_mode = mode.parse().map_err(RubyError::from)?;
@@ -1008,7 +1254,10 @@ impl RawDeltaTable {
1008
1254
  };
1009
1255
 
1010
1256
  if let Some(target_file_size) = target_file_size {
1011
- builder = builder.with_target_file_size(target_file_size)
1257
+ let target_file_size = NonZeroU64::new(target_file_size).ok_or_else(|| {
1258
+ RubyError::ValueError("target_file_size must be greater than 0".to_string())
1259
+ })?;
1260
+ builder = builder.with_target_file_size(Some(target_file_size))
1012
1261
  };
1013
1262
 
1014
1263
  if let Some(config) = configuration {
@@ -1023,10 +1272,9 @@ impl RawDeltaTable {
1023
1272
 
1024
1273
  rt().block_on(builder.into_future())
1025
1274
  .map_err(RubyError::from)
1026
- .map_err(RbErr::from)
1027
- }?;
1275
+ })?;
1028
1276
 
1029
- self.set_state(table.state)?;
1277
+ self_.set_state(table.state)?;
1030
1278
  Ok(())
1031
1279
  }
1032
1280
  }
@@ -1067,7 +1315,7 @@ fn set_writer_properties(writer_properties: RbWriterProperties) -> DeltaResult<W
1067
1315
  properties = properties.set_write_batch_size(batch_size);
1068
1316
  }
1069
1317
  if let Some(row_group_size) = max_row_group_size {
1070
- properties = properties.set_max_row_group_size(row_group_size);
1318
+ properties = properties.set_max_row_group_row_count(Some(row_group_size));
1071
1319
  }
1072
1320
  properties = properties.set_statistics_truncate_length(statistics_truncate_length);
1073
1321
 
@@ -1311,13 +1559,14 @@ impl TryConvert for RbCommitProperties {
1311
1559
 
1312
1560
  #[allow(clippy::too_many_arguments)]
1313
1561
  fn write_to_deltalake(
1562
+ rb: &Ruby,
1314
1563
  table_uri: String,
1315
1564
  data: RbArrowType<ArrowArrayStreamReader>,
1316
1565
  mode: String,
1317
1566
  schema_mode: Option<String>,
1318
1567
  partition_by: Option<Vec<String>>,
1319
1568
  predicate: Option<String>,
1320
- target_file_size: Option<usize>,
1569
+ target_file_size: Option<u64>,
1321
1570
  name: Option<String>,
1322
1571
  description: Option<String>,
1323
1572
  configuration: Option<HashMap<String, Option<String>>>,
@@ -1326,62 +1575,36 @@ fn write_to_deltalake(
1326
1575
  commit_properties: Option<RbCommitProperties>,
1327
1576
  post_commithook_properties: Option<RbPostCommitHookProperties>,
1328
1577
  ) -> RbResult<()> {
1329
- let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
1330
- let save_mode = mode.parse().map_err(RubyError::from)?;
1331
-
1332
- let options = storage_options.clone().unwrap_or_default();
1333
- let table_url =
1334
- deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
1335
- let table = rt()
1336
- .block_on(DeltaTable::try_from_url_with_storage_options(
1578
+ let raw_table: DeltaResult<RawDeltaTable> = rb.detach(|| {
1579
+ let options = storage_options.clone().unwrap_or_default();
1580
+ let table_url = deltalake::table::builder::ensure_table_uri(&table_uri)?;
1581
+ let table = rt().block_on(DeltaTable::try_from_url_with_storage_options(
1337
1582
  table_url.clone(),
1338
1583
  options.clone(),
1339
- ))
1340
- .map_err(RubyError::from)?;
1341
-
1342
- let mut builder = table.write(batches).with_save_mode(save_mode);
1343
- if let Some(schema_mode) = schema_mode {
1344
- builder = builder.with_schema_mode(schema_mode.parse().map_err(RubyError::from)?);
1345
- }
1346
- if let Some(partition_columns) = partition_by {
1347
- builder = builder.with_partition_columns(partition_columns);
1348
- }
1349
-
1350
- if let Some(writer_props) = writer_properties {
1351
- builder = builder
1352
- .with_writer_properties(set_writer_properties(writer_props).map_err(RubyError::from)?);
1353
- }
1354
-
1355
- if let Some(name) = &name {
1356
- builder = builder.with_table_name(name);
1357
- };
1358
-
1359
- if let Some(description) = &description {
1360
- builder = builder.with_description(description);
1361
- };
1362
-
1363
- if let Some(predicate) = predicate {
1364
- builder = builder.with_replace_where(predicate);
1365
- };
1366
-
1367
- if let Some(target_file_size) = target_file_size {
1368
- builder = builder.with_target_file_size(target_file_size)
1369
- };
1584
+ ))?;
1370
1585
 
1371
- if let Some(config) = configuration {
1372
- builder = builder.with_configuration(config);
1373
- };
1374
-
1375
- if let Some(commit_properties) =
1376
- maybe_create_commit_properties(commit_properties, post_commithook_properties)
1377
- {
1378
- builder = builder.with_commit_properties(commit_properties);
1379
- };
1380
-
1381
- rt().block_on(builder.into_future())
1382
- .map_err(RubyError::from)?;
1383
-
1384
- Ok(())
1586
+ let raw_table = RawDeltaTable {
1587
+ _table: Arc::new(Mutex::new(table)),
1588
+ };
1589
+ Ok(raw_table)
1590
+ });
1591
+
1592
+ RawDeltaTable::write(
1593
+ rb,
1594
+ &raw_table.map_err(RubyError::from)?,
1595
+ data,
1596
+ mode,
1597
+ schema_mode,
1598
+ partition_by,
1599
+ predicate,
1600
+ target_file_size,
1601
+ name,
1602
+ description,
1603
+ configuration,
1604
+ writer_properties,
1605
+ commit_properties,
1606
+ post_commithook_properties,
1607
+ )
1385
1608
  }
1386
1609
 
1387
1610
  pub struct RbArrowType<T>(pub T);
@@ -1454,6 +1677,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1454
1677
  class.define_method("file_uris", method!(RawDeltaTable::file_uris, 1))?;
1455
1678
  class.define_method("schema", method!(RawDeltaTable::schema, 0))?;
1456
1679
  class.define_method("vacuum", method!(RawDeltaTable::vacuum, 5))?;
1680
+ class.define_method("update", method!(RawDeltaTable::update, 6))?;
1457
1681
  class.define_method(
1458
1682
  "compact_optimize",
1459
1683
  method!(RawDeltaTable::compact_optimize, 7),
@@ -1472,6 +1696,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1472
1696
  "drop_constraints",
1473
1697
  method!(RawDeltaTable::drop_constraints, 2),
1474
1698
  )?;
1699
+ class.define_method("generate", method!(RawDeltaTable::generate, 0))?;
1475
1700
  class.define_method("load_cdf", method!(RawDeltaTable::load_cdf, 5))?;
1476
1701
  class.define_method(
1477
1702
  "create_merge_builder",
@@ -1492,6 +1717,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1492
1717
  "create_checkpoint",
1493
1718
  method!(RawDeltaTable::create_checkpoint, 0),
1494
1719
  )?;
1720
+ class.define_method("compact_logs", method!(RawDeltaTable::compact_logs, 2))?;
1495
1721
  class.define_method(
1496
1722
  "cleanup_metadata",
1497
1723
  method!(RawDeltaTable::cleanup_metadata, 0),
@@ -1505,11 +1731,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1505
1731
  "set_table_properties",
1506
1732
  method!(RawDeltaTable::set_table_properties, 2),
1507
1733
  )?;
1734
+ class.define_method("set_table_name", method!(RawDeltaTable::set_table_name, 3))?;
1735
+ class.define_method(
1736
+ "set_table_description",
1737
+ method!(RawDeltaTable::set_table_description, 3),
1738
+ )?;
1508
1739
  class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
1509
1740
  class.define_method(
1510
1741
  "transaction_version",
1511
1742
  method!(RawDeltaTable::transaction_version, 1),
1512
1743
  )?;
1744
+ class.define_method(
1745
+ "set_column_metadata",
1746
+ method!(RawDeltaTable::set_column_metadata, 4),
1747
+ )?;
1513
1748
  class.define_method("write", method!(RawDeltaTable::write, 12))?;
1514
1749
 
1515
1750
  let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;