deltalake-rb 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +442 -323
- data/ext/deltalake/Cargo.toml +6 -5
- data/ext/deltalake/src/error.rs +5 -0
- data/ext/deltalake/src/lib.rs +294 -123
- data/ext/deltalake/src/merge.rs +2 -2
- data/lib/deltalake/table_optimizer.rb +2 -0
- data/lib/deltalake/version.rb +1 -1
- data/lib/deltalake.rb +31 -19
- metadata +1 -2
- data/lib/deltalake/deltalake.so +0 -0
data/ext/deltalake/src/lib.rs
CHANGED
|
@@ -4,13 +4,6 @@ mod merge;
|
|
|
4
4
|
mod schema;
|
|
5
5
|
mod utils;
|
|
6
6
|
|
|
7
|
-
use std::cell::RefCell;
|
|
8
|
-
use std::collections::{HashMap, HashSet};
|
|
9
|
-
use std::future::IntoFuture;
|
|
10
|
-
use std::str::FromStr;
|
|
11
|
-
use std::sync::Arc;
|
|
12
|
-
use std::time;
|
|
13
|
-
|
|
14
7
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
|
15
8
|
use delta_kernel::schema::StructField;
|
|
16
9
|
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
|
@@ -26,35 +19,31 @@ use deltalake::kernel::{scalars::ScalarExt, Transaction};
|
|
|
26
19
|
use deltalake::kernel::{EagerSnapshot, StructDataExt};
|
|
27
20
|
use deltalake::logstore::IORuntime;
|
|
28
21
|
use deltalake::logstore::LogStoreRef;
|
|
29
|
-
use deltalake::operations::add_column::AddColumnBuilder;
|
|
30
|
-
use deltalake::operations::add_feature::AddTableFeatureBuilder;
|
|
31
22
|
use deltalake::operations::collect_sendable_stream;
|
|
32
|
-
use deltalake::operations::
|
|
33
|
-
use deltalake::operations::delete::DeleteBuilder;
|
|
34
|
-
use deltalake::operations::drop_constraints::DropConstraintBuilder;
|
|
35
|
-
use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
|
|
36
|
-
use deltalake::operations::load_cdf::CdfLoadBuilder;
|
|
37
|
-
use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
|
|
38
|
-
use deltalake::operations::restore::RestoreBuilder;
|
|
39
|
-
use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
|
|
40
|
-
use deltalake::operations::vacuum::VacuumBuilder;
|
|
23
|
+
use deltalake::operations::optimize::{create_session_state_for_optimize, OptimizeType};
|
|
41
24
|
use deltalake::parquet::basic::Compression;
|
|
42
25
|
use deltalake::parquet::errors::ParquetError;
|
|
43
26
|
use deltalake::parquet::file::properties::WriterProperties;
|
|
44
27
|
use deltalake::partitions::PartitionFilter;
|
|
45
28
|
use deltalake::table::config::TablePropertiesExt;
|
|
46
|
-
use deltalake::
|
|
29
|
+
use deltalake::table::state::DeltaTableState;
|
|
30
|
+
use deltalake::{DeltaResult, DeltaTable};
|
|
47
31
|
use error::DeltaError;
|
|
48
32
|
use futures::future::join_all;
|
|
49
33
|
use futures::TryStreamExt;
|
|
50
|
-
|
|
51
34
|
use magnus::{
|
|
52
35
|
function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
|
|
53
36
|
Integer, Module, RArray, Ruby, TryConvert, Value,
|
|
54
37
|
};
|
|
55
38
|
use serde_json::Map;
|
|
39
|
+
use std::collections::{HashMap, HashSet};
|
|
40
|
+
use std::future::IntoFuture;
|
|
41
|
+
use std::str::FromStr;
|
|
42
|
+
use std::sync::{Arc, Mutex};
|
|
43
|
+
use std::time;
|
|
44
|
+
use uuid::Uuid;
|
|
56
45
|
|
|
57
|
-
use crate::error::{RbValueError, RubyError};
|
|
46
|
+
use crate::error::{to_rt_err, RbRuntimeError, RbValueError, RubyError};
|
|
58
47
|
use crate::features::TableFeatures;
|
|
59
48
|
use crate::merge::RbMergeBuilder;
|
|
60
49
|
use crate::schema::{schema_to_rbobject, Field};
|
|
@@ -81,7 +70,7 @@ unsafe impl TryConvertOwned for PartitionFilterValue {}
|
|
|
81
70
|
|
|
82
71
|
#[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
|
|
83
72
|
struct RawDeltaTable {
|
|
84
|
-
_table:
|
|
73
|
+
_table: Arc<Mutex<deltalake::DeltaTable>>,
|
|
85
74
|
}
|
|
86
75
|
|
|
87
76
|
#[magnus::wrap(class = "DeltaLake::RawDeltaTableMetaData")]
|
|
@@ -124,7 +113,10 @@ type StringVec = Vec<String>;
|
|
|
124
113
|
|
|
125
114
|
impl RawDeltaTable {
|
|
126
115
|
fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
|
|
127
|
-
|
|
116
|
+
match self._table.lock() {
|
|
117
|
+
Ok(table) => func(&table),
|
|
118
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
119
|
+
}
|
|
128
120
|
}
|
|
129
121
|
|
|
130
122
|
fn cloned_state(&self) -> RbResult<EagerSnapshot> {
|
|
@@ -140,6 +132,15 @@ impl RawDeltaTable {
|
|
|
140
132
|
fn log_store(&self) -> RbResult<LogStoreRef> {
|
|
141
133
|
self.with_table(|t| Ok(t.log_store().clone()))
|
|
142
134
|
}
|
|
135
|
+
|
|
136
|
+
fn set_state(&self, state: Option<DeltaTableState>) -> RbResult<()> {
|
|
137
|
+
let mut original = self
|
|
138
|
+
._table
|
|
139
|
+
.lock()
|
|
140
|
+
.map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
|
|
141
|
+
original.state = state;
|
|
142
|
+
Ok(())
|
|
143
|
+
}
|
|
143
144
|
}
|
|
144
145
|
|
|
145
146
|
impl RawDeltaTable {
|
|
@@ -152,7 +153,7 @@ impl RawDeltaTable {
|
|
|
152
153
|
) -> RbResult<Self> {
|
|
153
154
|
let table_url = deltalake::table::builder::parse_table_uri(table_uri)
|
|
154
155
|
.map_err(error::RubyError::from)?;
|
|
155
|
-
let mut builder = deltalake::DeltaTableBuilder::
|
|
156
|
+
let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
|
|
156
157
|
.map_err(error::RubyError::from)?
|
|
157
158
|
.with_io_runtime(IORuntime::default());
|
|
158
159
|
|
|
@@ -173,7 +174,7 @@ impl RawDeltaTable {
|
|
|
173
174
|
|
|
174
175
|
let table = rt().block_on(builder.load()).map_err(RubyError::from)?;
|
|
175
176
|
Ok(RawDeltaTable {
|
|
176
|
-
_table:
|
|
177
|
+
_table: Arc::new(Mutex::new(table)),
|
|
177
178
|
})
|
|
178
179
|
}
|
|
179
180
|
|
|
@@ -183,7 +184,7 @@ impl RawDeltaTable {
|
|
|
183
184
|
) -> RbResult<bool> {
|
|
184
185
|
let table_url = deltalake::table::builder::ensure_table_uri(table_uri)
|
|
185
186
|
.map_err(|_| RbValueError::new_err("Invalid table URI"))?;
|
|
186
|
-
let mut builder = deltalake::DeltaTableBuilder::
|
|
187
|
+
let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
|
|
187
188
|
.map_err(|_| RbValueError::new_err("Failed to create table builder"))?;
|
|
188
189
|
if let Some(storage_options) = storage_options {
|
|
189
190
|
builder = builder.with_storage_options(storage_options)
|
|
@@ -199,7 +200,7 @@ impl RawDeltaTable {
|
|
|
199
200
|
}
|
|
200
201
|
|
|
201
202
|
pub fn table_uri(&self) -> RbResult<String> {
|
|
202
|
-
self.with_table(|t| Ok(t.
|
|
203
|
+
self.with_table(|t| Ok(t.table_url().to_string()))
|
|
203
204
|
}
|
|
204
205
|
|
|
205
206
|
pub fn version(&self) -> RbResult<Option<i64>> {
|
|
@@ -255,15 +256,32 @@ impl RawDeltaTable {
|
|
|
255
256
|
}
|
|
256
257
|
|
|
257
258
|
pub fn load_version(&self, version: i64) -> RbResult<()> {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
259
|
+
#[allow(clippy::await_holding_lock)]
|
|
260
|
+
rt().block_on(async {
|
|
261
|
+
let mut table = self
|
|
262
|
+
._table
|
|
263
|
+
.lock()
|
|
264
|
+
.map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
|
|
265
|
+
(*table)
|
|
266
|
+
.load_version(version)
|
|
267
|
+
.await
|
|
268
|
+
.map_err(RubyError::from)
|
|
269
|
+
.map_err(RbErr::from)
|
|
270
|
+
})
|
|
261
271
|
}
|
|
262
272
|
|
|
263
273
|
pub fn get_latest_version(&self) -> RbResult<i64> {
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
.
|
|
274
|
+
#[allow(clippy::await_holding_lock)]
|
|
275
|
+
rt().block_on(async {
|
|
276
|
+
match self._table.lock() {
|
|
277
|
+
Ok(table) => table
|
|
278
|
+
.get_latest_version()
|
|
279
|
+
.await
|
|
280
|
+
.map_err(RubyError::from)
|
|
281
|
+
.map_err(RbErr::from),
|
|
282
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
283
|
+
}
|
|
284
|
+
})
|
|
267
285
|
}
|
|
268
286
|
|
|
269
287
|
pub fn get_num_index_cols(&self) -> RbResult<i32> {
|
|
@@ -296,9 +314,18 @@ impl RawDeltaTable {
|
|
|
296
314
|
DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
|
|
297
315
|
|err| RbValueError::new_err(format!("Failed to parse datetime string: {err}")),
|
|
298
316
|
)?);
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
317
|
+
#[allow(clippy::await_holding_lock)]
|
|
318
|
+
rt().block_on(async {
|
|
319
|
+
let mut table = self
|
|
320
|
+
._table
|
|
321
|
+
.lock()
|
|
322
|
+
.map_err(|e| RbRuntimeError::new_err(e.to_string()))?;
|
|
323
|
+
(*table)
|
|
324
|
+
.load_with_datetime(datetime)
|
|
325
|
+
.await
|
|
326
|
+
.map_err(RubyError::from)
|
|
327
|
+
.map_err(RbErr::from)
|
|
328
|
+
})
|
|
302
329
|
}
|
|
303
330
|
|
|
304
331
|
pub fn files(
|
|
@@ -324,14 +351,14 @@ impl RawDeltaTable {
|
|
|
324
351
|
.map(|p| p.to_string())
|
|
325
352
|
.collect())
|
|
326
353
|
} else {
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
354
|
+
match self._table.lock() {
|
|
355
|
+
Ok(table) => Ok(table
|
|
356
|
+
.get_file_uris()
|
|
357
|
+
.map_err(RubyError::from)?
|
|
358
|
+
.map(|f| f.to_string())
|
|
359
|
+
.collect()),
|
|
360
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
361
|
+
}
|
|
335
362
|
}
|
|
336
363
|
}
|
|
337
364
|
|
|
@@ -379,19 +406,12 @@ impl RawDeltaTable {
|
|
|
379
406
|
commit_properties: Option<RbCommitProperties>,
|
|
380
407
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
381
408
|
) -> RbResult<Vec<String>> {
|
|
382
|
-
let
|
|
383
|
-
|
|
384
|
-
.
|
|
385
|
-
.
|
|
386
|
-
.
|
|
387
|
-
|
|
388
|
-
.map_err(RbErr::from)?;
|
|
389
|
-
let mut cmd = VacuumBuilder::new(
|
|
390
|
-
self._table.borrow().log_store(),
|
|
391
|
-
snapshot.snapshot().clone(),
|
|
392
|
-
)
|
|
393
|
-
.with_enforce_retention_duration(enforce_retention_duration)
|
|
394
|
-
.with_dry_run(dry_run);
|
|
409
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
410
|
+
let mut cmd = table
|
|
411
|
+
.vacuum()
|
|
412
|
+
.with_enforce_retention_duration(enforce_retention_duration)
|
|
413
|
+
.with_dry_run(dry_run);
|
|
414
|
+
|
|
395
415
|
if let Some(retention_period) = retention_hours {
|
|
396
416
|
cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
|
|
397
417
|
}
|
|
@@ -402,7 +422,7 @@ impl RawDeltaTable {
|
|
|
402
422
|
cmd = cmd.with_commit_properties(commit_properties);
|
|
403
423
|
}
|
|
404
424
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
405
|
-
self.
|
|
425
|
+
self.set_state(table.state)?;
|
|
406
426
|
Ok(metrics.files_deleted)
|
|
407
427
|
}
|
|
408
428
|
|
|
@@ -417,8 +437,11 @@ impl RawDeltaTable {
|
|
|
417
437
|
commit_properties: Option<RbCommitProperties>,
|
|
418
438
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
419
439
|
) -> RbResult<String> {
|
|
420
|
-
let
|
|
440
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
441
|
+
let mut cmd = table
|
|
442
|
+
.optimize()
|
|
421
443
|
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
|
|
444
|
+
|
|
422
445
|
if let Some(size) = target_size {
|
|
423
446
|
cmd = cmd.with_target_size(size);
|
|
424
447
|
}
|
|
@@ -443,7 +466,7 @@ impl RawDeltaTable {
|
|
|
443
466
|
cmd = cmd.with_filters(&converted_filters);
|
|
444
467
|
|
|
445
468
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
446
|
-
self.
|
|
469
|
+
self.set_state(table.state)?;
|
|
447
470
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
448
471
|
}
|
|
449
472
|
|
|
@@ -454,16 +477,26 @@ impl RawDeltaTable {
|
|
|
454
477
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
|
455
478
|
target_size: Option<u64>,
|
|
456
479
|
max_concurrent_tasks: Option<usize>,
|
|
457
|
-
max_spill_size: usize
|
|
480
|
+
max_spill_size: Option<usize>,
|
|
481
|
+
max_temp_directory_size: Option<u64>,
|
|
458
482
|
min_commit_interval: Option<u64>,
|
|
459
483
|
writer_properties: Option<RbWriterProperties>,
|
|
460
484
|
commit_properties: Option<RbCommitProperties>,
|
|
461
485
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
462
486
|
) -> RbResult<String> {
|
|
463
|
-
let
|
|
487
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
488
|
+
let mut cmd = table
|
|
489
|
+
.clone()
|
|
490
|
+
.optimize()
|
|
464
491
|
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
|
|
465
|
-
.with_max_spill_size(max_spill_size)
|
|
466
492
|
.with_type(OptimizeType::ZOrder(z_order_columns));
|
|
493
|
+
|
|
494
|
+
if max_spill_size.is_some() || max_temp_directory_size.is_some() {
|
|
495
|
+
let session =
|
|
496
|
+
create_session_state_for_optimize(max_spill_size, max_temp_directory_size);
|
|
497
|
+
cmd = cmd.with_session_state(Arc::new(session));
|
|
498
|
+
}
|
|
499
|
+
|
|
467
500
|
if let Some(size) = target_size {
|
|
468
501
|
cmd = cmd.with_target_size(size);
|
|
469
502
|
}
|
|
@@ -488,13 +521,15 @@ impl RawDeltaTable {
|
|
|
488
521
|
cmd = cmd.with_filters(&converted_filters);
|
|
489
522
|
|
|
490
523
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
491
|
-
self.
|
|
524
|
+
self.set_state(table.state)?;
|
|
492
525
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
493
526
|
}
|
|
494
527
|
|
|
495
528
|
pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
|
|
496
529
|
let fields = fields.typecheck::<Obj<Field>>()?;
|
|
497
|
-
|
|
530
|
+
|
|
531
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
532
|
+
let mut cmd = table.add_columns();
|
|
498
533
|
|
|
499
534
|
let new_fields = fields
|
|
500
535
|
.iter()
|
|
@@ -504,7 +539,7 @@ impl RawDeltaTable {
|
|
|
504
539
|
cmd = cmd.with_fields(new_fields);
|
|
505
540
|
|
|
506
541
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
507
|
-
self.
|
|
542
|
+
self.set_state(table.state)?;
|
|
508
543
|
Ok(())
|
|
509
544
|
}
|
|
510
545
|
|
|
@@ -517,71 +552,76 @@ impl RawDeltaTable {
|
|
|
517
552
|
.into_iter()
|
|
518
553
|
.map(TableFeatures::try_convert)
|
|
519
554
|
.collect::<RbResult<Vec<_>>>()?;
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
555
|
+
|
|
556
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
557
|
+
let cmd = table
|
|
558
|
+
.add_feature()
|
|
559
|
+
.with_features(feature)
|
|
560
|
+
.with_allow_protocol_versions_increase(allow_protocol_versions_increase);
|
|
524
561
|
|
|
525
562
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
526
|
-
self.
|
|
563
|
+
self.set_state(table.state)?;
|
|
527
564
|
Ok(())
|
|
528
565
|
}
|
|
529
566
|
|
|
530
567
|
pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
|
|
531
|
-
let
|
|
532
|
-
|
|
568
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
569
|
+
let mut cmd = table.add_constraint();
|
|
533
570
|
|
|
534
571
|
for (col_name, expression) in constraints {
|
|
535
572
|
cmd = cmd.with_constraint(col_name.clone(), expression.clone());
|
|
536
573
|
}
|
|
537
574
|
|
|
538
575
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
539
|
-
self.
|
|
576
|
+
self.set_state(table.state)?;
|
|
540
577
|
Ok(())
|
|
541
578
|
}
|
|
542
579
|
|
|
543
580
|
pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
|
|
544
|
-
let
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
581
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
582
|
+
let cmd = table
|
|
583
|
+
.drop_constraints()
|
|
584
|
+
.with_constraint(name)
|
|
585
|
+
.with_raise_if_not_exists(raise_if_not_exists);
|
|
548
586
|
|
|
549
587
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
550
|
-
self.
|
|
588
|
+
self.set_state(table.state)?;
|
|
551
589
|
Ok(())
|
|
552
590
|
}
|
|
553
591
|
|
|
554
592
|
pub fn load_cdf(
|
|
555
593
|
&self,
|
|
556
|
-
starting_version: i64
|
|
594
|
+
starting_version: Option<i64>,
|
|
557
595
|
ending_version: Option<i64>,
|
|
558
596
|
starting_timestamp: Option<String>,
|
|
559
597
|
ending_timestamp: Option<String>,
|
|
560
598
|
columns: Option<Vec<String>>,
|
|
561
599
|
) -> RbResult<ArrowArrayStream> {
|
|
562
600
|
let ctx = SessionContext::new();
|
|
563
|
-
let
|
|
564
|
-
|
|
565
|
-
.with_starting_version(starting_version);
|
|
601
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
602
|
+
let mut cmd = table.scan_cdf();
|
|
566
603
|
|
|
604
|
+
if let Some(sv) = starting_version {
|
|
605
|
+
cmd = cmd.with_starting_version(sv);
|
|
606
|
+
}
|
|
567
607
|
if let Some(ev) = ending_version {
|
|
568
|
-
|
|
608
|
+
cmd = cmd.with_ending_version(ev);
|
|
569
609
|
}
|
|
570
610
|
if let Some(st) = starting_timestamp {
|
|
571
611
|
let starting_ts: DateTime<Utc> = DateTime::<Utc>::from_str(&st)
|
|
572
612
|
.map_err(|pe| RbValueError::new_err(pe.to_string()))?
|
|
573
613
|
.to_utc();
|
|
574
|
-
|
|
614
|
+
cmd = cmd.with_starting_timestamp(starting_ts);
|
|
575
615
|
}
|
|
576
616
|
if let Some(et) = ending_timestamp {
|
|
577
617
|
let ending_ts = DateTime::<Utc>::from_str(&et)
|
|
578
618
|
.map_err(|pe| RbValueError::new_err(pe.to_string()))?
|
|
579
619
|
.to_utc();
|
|
580
|
-
|
|
620
|
+
cmd = cmd.with_starting_timestamp(ending_ts);
|
|
581
621
|
}
|
|
582
622
|
|
|
583
623
|
let table_provider: Arc<dyn TableProvider> =
|
|
584
|
-
Arc::new(DeltaCdfTableProvider::try_new(
|
|
624
|
+
Arc::new(DeltaCdfTableProvider::try_new(cmd).map_err(RubyError::from)?);
|
|
585
625
|
|
|
586
626
|
let plan = rt()
|
|
587
627
|
.block_on(async {
|
|
@@ -630,7 +670,7 @@ impl RawDeltaTable {
|
|
|
630
670
|
commit_properties: Option<RbCommitProperties>,
|
|
631
671
|
) -> RbResult<RbMergeBuilder> {
|
|
632
672
|
Ok(RbMergeBuilder::new(
|
|
633
|
-
self.
|
|
673
|
+
self.log_store()?,
|
|
634
674
|
self.cloned_state()?,
|
|
635
675
|
source.0,
|
|
636
676
|
predicate,
|
|
@@ -646,7 +686,7 @@ impl RawDeltaTable {
|
|
|
646
686
|
|
|
647
687
|
pub fn merge_execute(&self, merge_builder: &RbMergeBuilder) -> RbResult<String> {
|
|
648
688
|
let (table, metrics) = merge_builder.execute().map_err(RubyError::from)?;
|
|
649
|
-
self.
|
|
689
|
+
self.set_state(table.state)?;
|
|
650
690
|
Ok(metrics)
|
|
651
691
|
}
|
|
652
692
|
|
|
@@ -657,7 +697,8 @@ impl RawDeltaTable {
|
|
|
657
697
|
protocol_downgrade_allowed: bool,
|
|
658
698
|
commit_properties: Option<RbCommitProperties>,
|
|
659
699
|
) -> RbResult<String> {
|
|
660
|
-
let
|
|
700
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
701
|
+
let mut cmd = table.restore();
|
|
661
702
|
if let Some(val) = target {
|
|
662
703
|
if let Some(version) = Integer::from_value(val) {
|
|
663
704
|
cmd = cmd.with_version_to_restore(version.to_i64()?)
|
|
@@ -679,23 +720,37 @@ impl RawDeltaTable {
|
|
|
679
720
|
}
|
|
680
721
|
|
|
681
722
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
682
|
-
self.
|
|
723
|
+
self.set_state(table.state)?;
|
|
683
724
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
684
725
|
}
|
|
685
726
|
|
|
686
727
|
pub fn history(&self, limit: Option<usize>) -> RbResult<Vec<String>> {
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
.
|
|
728
|
+
#[allow(clippy::await_holding_lock)]
|
|
729
|
+
let history = rt().block_on(async {
|
|
730
|
+
match self._table.lock() {
|
|
731
|
+
Ok(table) => table
|
|
732
|
+
.history(limit)
|
|
733
|
+
.await
|
|
734
|
+
.map_err(RubyError::from)
|
|
735
|
+
.map_err(RbErr::from),
|
|
736
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
737
|
+
}
|
|
738
|
+
})?;
|
|
690
739
|
Ok(history
|
|
691
740
|
.map(|c| serde_json::to_string(&c).unwrap())
|
|
692
741
|
.collect())
|
|
693
742
|
}
|
|
694
743
|
|
|
695
744
|
pub fn update_incremental(&self) -> RbResult<()> {
|
|
696
|
-
#[allow(
|
|
745
|
+
#[allow(clippy::await_holding_lock)]
|
|
697
746
|
Ok(rt()
|
|
698
|
-
.block_on(
|
|
747
|
+
.block_on(async {
|
|
748
|
+
let mut table = self
|
|
749
|
+
._table
|
|
750
|
+
.lock()
|
|
751
|
+
.map_err(|e| DeltaTableError::Generic(e.to_string()))?;
|
|
752
|
+
(*table).update_incremental(None).await
|
|
753
|
+
})
|
|
699
754
|
.map_err(RubyError::from)?)
|
|
700
755
|
}
|
|
701
756
|
|
|
@@ -755,15 +810,61 @@ impl RawDeltaTable {
|
|
|
755
810
|
}
|
|
756
811
|
|
|
757
812
|
pub fn create_checkpoint(&self) -> RbResult<()> {
|
|
758
|
-
|
|
759
|
-
|
|
813
|
+
let operation_id = Uuid::new_v4();
|
|
814
|
+
|
|
815
|
+
#[allow(clippy::await_holding_lock)]
|
|
816
|
+
let _result = rt().block_on(async {
|
|
817
|
+
match self._table.lock() {
|
|
818
|
+
Ok(table) => create_checkpoint(&table, Some(operation_id))
|
|
819
|
+
.await
|
|
820
|
+
.map_err(RubyError::from)
|
|
821
|
+
.map_err(RbErr::from),
|
|
822
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
823
|
+
}
|
|
824
|
+
});
|
|
760
825
|
|
|
761
826
|
Ok(())
|
|
762
827
|
}
|
|
763
828
|
|
|
764
829
|
pub fn cleanup_metadata(&self) -> RbResult<()> {
|
|
765
|
-
|
|
766
|
-
|
|
830
|
+
let (_result, new_state) = {
|
|
831
|
+
let operation_id = Uuid::new_v4();
|
|
832
|
+
|
|
833
|
+
#[allow(clippy::await_holding_lock)]
|
|
834
|
+
let result = rt().block_on(async {
|
|
835
|
+
match self._table.lock() {
|
|
836
|
+
Ok(table) => {
|
|
837
|
+
let result = cleanup_metadata(&table, Some(operation_id))
|
|
838
|
+
.await
|
|
839
|
+
.map_err(RubyError::from)
|
|
840
|
+
.map_err(RbErr::from)?;
|
|
841
|
+
|
|
842
|
+
let new_state = if result > 0 {
|
|
843
|
+
Some(
|
|
844
|
+
DeltaTableState::try_new(
|
|
845
|
+
&table.log_store(),
|
|
846
|
+
table.config.clone(),
|
|
847
|
+
table.version(),
|
|
848
|
+
)
|
|
849
|
+
.await
|
|
850
|
+
.map_err(RubyError::from)?,
|
|
851
|
+
)
|
|
852
|
+
} else {
|
|
853
|
+
None
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
Ok((result, new_state))
|
|
857
|
+
}
|
|
858
|
+
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
859
|
+
}
|
|
860
|
+
});
|
|
861
|
+
|
|
862
|
+
result
|
|
863
|
+
}?;
|
|
864
|
+
|
|
865
|
+
if new_state.is_some() {
|
|
866
|
+
self.set_state(new_state)?;
|
|
867
|
+
}
|
|
767
868
|
|
|
768
869
|
Ok(())
|
|
769
870
|
}
|
|
@@ -792,7 +893,8 @@ impl RawDeltaTable {
|
|
|
792
893
|
commit_properties: Option<RbCommitProperties>,
|
|
793
894
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
794
895
|
) -> RbResult<String> {
|
|
795
|
-
let
|
|
896
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
897
|
+
let mut cmd = table.delete();
|
|
796
898
|
if let Some(predicate) = predicate {
|
|
797
899
|
cmd = cmd.with_predicate(predicate);
|
|
798
900
|
}
|
|
@@ -808,7 +910,7 @@ impl RawDeltaTable {
|
|
|
808
910
|
}
|
|
809
911
|
|
|
810
912
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
811
|
-
self.
|
|
913
|
+
self.set_state(table.state)?;
|
|
812
914
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
813
915
|
}
|
|
814
916
|
|
|
@@ -817,13 +919,14 @@ impl RawDeltaTable {
|
|
|
817
919
|
properties: HashMap<String, String>,
|
|
818
920
|
raise_if_not_exists: bool,
|
|
819
921
|
) -> RbResult<()> {
|
|
820
|
-
let
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
922
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
923
|
+
let cmd = table
|
|
924
|
+
.set_tbl_properties()
|
|
925
|
+
.with_properties(properties)
|
|
926
|
+
.with_raise_if_not_exists(raise_if_not_exists);
|
|
824
927
|
|
|
825
928
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
826
|
-
self.
|
|
929
|
+
self.set_state(table.state)?;
|
|
827
930
|
Ok(())
|
|
828
931
|
}
|
|
829
932
|
|
|
@@ -833,9 +936,8 @@ impl RawDeltaTable {
|
|
|
833
936
|
commit_properties: Option<RbCommitProperties>,
|
|
834
937
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
835
938
|
) -> RbResult<String> {
|
|
836
|
-
let
|
|
837
|
-
|
|
838
|
-
.with_dry_run(dry_run);
|
|
939
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
940
|
+
let mut cmd = table.filesystem_check().with_dry_run(dry_run);
|
|
839
941
|
|
|
840
942
|
if let Some(commit_properties) =
|
|
841
943
|
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
@@ -844,7 +946,7 @@ impl RawDeltaTable {
|
|
|
844
946
|
}
|
|
845
947
|
|
|
846
948
|
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
847
|
-
self.
|
|
949
|
+
self.set_state(table.state)?;
|
|
848
950
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
849
951
|
}
|
|
850
952
|
|
|
@@ -856,6 +958,77 @@ impl RawDeltaTable {
|
|
|
856
958
|
.block_on(snapshot.transaction_version(log_store.as_ref(), app_id))
|
|
857
959
|
.map_err(RubyError::from)?)
|
|
858
960
|
}
|
|
961
|
+
|
|
962
|
+
#[allow(clippy::too_many_arguments)]
|
|
963
|
+
pub fn write(
|
|
964
|
+
&self,
|
|
965
|
+
data: RbArrowType<ArrowArrayStreamReader>,
|
|
966
|
+
mode: String,
|
|
967
|
+
schema_mode: Option<String>,
|
|
968
|
+
partition_by: Option<Vec<String>>,
|
|
969
|
+
predicate: Option<String>,
|
|
970
|
+
target_file_size: Option<usize>,
|
|
971
|
+
name: Option<String>,
|
|
972
|
+
description: Option<String>,
|
|
973
|
+
configuration: Option<HashMap<String, Option<String>>>,
|
|
974
|
+
writer_properties: Option<RbWriterProperties>,
|
|
975
|
+
commit_properties: Option<RbCommitProperties>,
|
|
976
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
977
|
+
) -> RbResult<()> {
|
|
978
|
+
let table = {
|
|
979
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
980
|
+
let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
|
|
981
|
+
|
|
982
|
+
let save_mode = mode.parse().map_err(RubyError::from)?;
|
|
983
|
+
let mut builder = table.write(batches).with_save_mode(save_mode);
|
|
984
|
+
|
|
985
|
+
if let Some(schema_mode) = schema_mode {
|
|
986
|
+
builder = builder.with_schema_mode(schema_mode.parse().map_err(RubyError::from)?);
|
|
987
|
+
}
|
|
988
|
+
if let Some(partition_columns) = partition_by {
|
|
989
|
+
builder = builder.with_partition_columns(partition_columns);
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
if let Some(writer_props) = writer_properties {
|
|
993
|
+
builder = builder.with_writer_properties(
|
|
994
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
995
|
+
);
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
if let Some(name) = &name {
|
|
999
|
+
builder = builder.with_table_name(name);
|
|
1000
|
+
};
|
|
1001
|
+
|
|
1002
|
+
if let Some(description) = &description {
|
|
1003
|
+
builder = builder.with_description(description);
|
|
1004
|
+
};
|
|
1005
|
+
|
|
1006
|
+
if let Some(predicate) = predicate {
|
|
1007
|
+
builder = builder.with_replace_where(predicate);
|
|
1008
|
+
};
|
|
1009
|
+
|
|
1010
|
+
if let Some(target_file_size) = target_file_size {
|
|
1011
|
+
builder = builder.with_target_file_size(target_file_size)
|
|
1012
|
+
};
|
|
1013
|
+
|
|
1014
|
+
if let Some(config) = configuration {
|
|
1015
|
+
builder = builder.with_configuration(config);
|
|
1016
|
+
};
|
|
1017
|
+
|
|
1018
|
+
if let Some(commit_properties) =
|
|
1019
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1020
|
+
{
|
|
1021
|
+
builder = builder.with_commit_properties(commit_properties);
|
|
1022
|
+
};
|
|
1023
|
+
|
|
1024
|
+
rt().block_on(builder.into_future())
|
|
1025
|
+
.map_err(RubyError::from)
|
|
1026
|
+
.map_err(RbErr::from)
|
|
1027
|
+
}?;
|
|
1028
|
+
|
|
1029
|
+
self.set_state(table.state)?;
|
|
1030
|
+
Ok(())
|
|
1031
|
+
}
|
|
859
1032
|
}
|
|
860
1033
|
|
|
861
1034
|
fn set_post_commithook_properties(
|
|
@@ -1141,7 +1314,6 @@ fn write_to_deltalake(
|
|
|
1141
1314
|
table_uri: String,
|
|
1142
1315
|
data: RbArrowType<ArrowArrayStreamReader>,
|
|
1143
1316
|
mode: String,
|
|
1144
|
-
table: Option<&RawDeltaTable>,
|
|
1145
1317
|
schema_mode: Option<String>,
|
|
1146
1318
|
partition_by: Option<Vec<String>>,
|
|
1147
1319
|
predicate: Option<String>,
|
|
@@ -1158,16 +1330,14 @@ fn write_to_deltalake(
|
|
|
1158
1330
|
let save_mode = mode.parse().map_err(RubyError::from)?;
|
|
1159
1331
|
|
|
1160
1332
|
let options = storage_options.clone().unwrap_or_default();
|
|
1161
|
-
let
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
table_url, options,
|
|
1333
|
+
let table_url =
|
|
1334
|
+
deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
|
|
1335
|
+
let table = rt()
|
|
1336
|
+
.block_on(DeltaTable::try_from_url_with_storage_options(
|
|
1337
|
+
table_url.clone(),
|
|
1338
|
+
options.clone(),
|
|
1168
1339
|
))
|
|
1169
|
-
.map_err(RubyError::from)
|
|
1170
|
-
};
|
|
1340
|
+
.map_err(RubyError::from)?;
|
|
1171
1341
|
|
|
1172
1342
|
let mut builder = table.write(batches).with_save_mode(save_mode);
|
|
1173
1343
|
if let Some(schema_mode) = schema_mode {
|
|
@@ -1249,7 +1419,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1249
1419
|
deltalake::gcp::register_handlers(None);
|
|
1250
1420
|
|
|
1251
1421
|
let module = ruby.define_module("DeltaLake")?;
|
|
1252
|
-
module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake,
|
|
1422
|
+
module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 14))?;
|
|
1253
1423
|
module.define_singleton_method("rust_core_version", function!(rust_core_version, 0))?;
|
|
1254
1424
|
|
|
1255
1425
|
let class = module.define_class("RawDeltaTable", ruby.class_object())?;
|
|
@@ -1290,7 +1460,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1290
1460
|
)?;
|
|
1291
1461
|
class.define_method(
|
|
1292
1462
|
"z_order_optimize",
|
|
1293
|
-
method!(RawDeltaTable::z_order_optimize,
|
|
1463
|
+
method!(RawDeltaTable::z_order_optimize, 10),
|
|
1294
1464
|
)?;
|
|
1295
1465
|
class.define_method("add_columns", method!(RawDeltaTable::add_columns, 1))?;
|
|
1296
1466
|
class.define_method("add_feature", method!(RawDeltaTable::add_feature, 2))?;
|
|
@@ -1340,6 +1510,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1340
1510
|
"transaction_version",
|
|
1341
1511
|
method!(RawDeltaTable::transaction_version, 1),
|
|
1342
1512
|
)?;
|
|
1513
|
+
class.define_method("write", method!(RawDeltaTable::write, 12))?;
|
|
1343
1514
|
|
|
1344
1515
|
let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
|
|
1345
1516
|
class.define_method("id", method!(RawDeltaTableMetaData::id, 0))?;
|