deltalake-rb 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +333 -295
- data/ext/deltalake/Cargo.toml +5 -5
- data/ext/deltalake/src/lib.rs +58 -123
- data/ext/deltalake/src/merge.rs +2 -2
- data/ext/deltalake/src/schema.rs +2 -1
- data/lib/deltalake/version.rb +1 -1
- metadata +1 -1
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.2.
|
3
|
+
version = "0.2.3"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,10 +11,10 @@ publish = false
|
|
11
11
|
crate-type = ["cdylib"]
|
12
12
|
|
13
13
|
[dependencies]
|
14
|
-
arrow = { version = "
|
15
|
-
arrow-schema = { version = "
|
14
|
+
arrow = { version = "56", features = ["ffi"] }
|
15
|
+
arrow-schema = { version = "56", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = { version = "0.
|
17
|
+
delta_kernel = { version = "0.16", features = ["arrow-56", "default-engine-rustls"] }
|
18
18
|
futures = "0.3"
|
19
19
|
magnus = "0.8"
|
20
20
|
num_cpus = "1"
|
@@ -23,7 +23,7 @@ serde_json = "1"
|
|
23
23
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
24
24
|
|
25
25
|
[dependencies.deltalake]
|
26
|
-
version = "=0.
|
26
|
+
version = "=0.29.0"
|
27
27
|
features = [
|
28
28
|
"azure",
|
29
29
|
"datafusion",
|
data/ext/deltalake/src/lib.rs
CHANGED
@@ -22,8 +22,8 @@ use deltalake::datafusion::prelude::SessionContext;
|
|
22
22
|
use deltalake::delta_datafusion::DeltaCdfTableProvider;
|
23
23
|
use deltalake::errors::DeltaTableError;
|
24
24
|
use deltalake::kernel::transaction::{CommitProperties, TableReference};
|
25
|
-
use deltalake::kernel::
|
26
|
-
use deltalake::kernel::{
|
25
|
+
use deltalake::kernel::{scalars::ScalarExt, Transaction};
|
26
|
+
use deltalake::kernel::{EagerSnapshot, StructDataExt};
|
27
27
|
use deltalake::logstore::IORuntime;
|
28
28
|
use deltalake::logstore::LogStoreRef;
|
29
29
|
use deltalake::operations::add_column::AddColumnBuilder;
|
@@ -43,7 +43,6 @@ use deltalake::parquet::errors::ParquetError;
|
|
43
43
|
use deltalake::parquet::file::properties::WriterProperties;
|
44
44
|
use deltalake::partitions::PartitionFilter;
|
45
45
|
use deltalake::table::config::TablePropertiesExt;
|
46
|
-
use deltalake::table::state::DeltaTableState;
|
47
46
|
use deltalake::{DeltaOps, DeltaResult};
|
48
47
|
use error::DeltaError;
|
49
48
|
use futures::future::join_all;
|
@@ -128,9 +127,10 @@ impl RawDeltaTable {
|
|
128
127
|
func(&self._table.borrow())
|
129
128
|
}
|
130
129
|
|
131
|
-
fn cloned_state(&self) -> RbResult<
|
130
|
+
fn cloned_state(&self) -> RbResult<EagerSnapshot> {
|
132
131
|
self.with_table(|t| {
|
133
132
|
t.snapshot()
|
133
|
+
.map(|snapshot| snapshot.snapshot())
|
134
134
|
.cloned()
|
135
135
|
.map_err(RubyError::from)
|
136
136
|
.map_err(RbErr::from)
|
@@ -150,7 +150,10 @@ impl RawDeltaTable {
|
|
150
150
|
without_files: bool,
|
151
151
|
log_buffer_size: Option<usize>,
|
152
152
|
) -> RbResult<Self> {
|
153
|
-
let
|
153
|
+
let table_url = deltalake::table::builder::parse_table_uri(table_uri)
|
154
|
+
.map_err(error::RubyError::from)?;
|
155
|
+
let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
|
156
|
+
.map_err(error::RubyError::from)?
|
154
157
|
.with_io_runtime(IORuntime::default());
|
155
158
|
|
156
159
|
if let Some(storage_options) = storage_options {
|
@@ -178,7 +181,10 @@ impl RawDeltaTable {
|
|
178
181
|
table_uri: String,
|
179
182
|
storage_options: Option<HashMap<String, String>>,
|
180
183
|
) -> RbResult<bool> {
|
181
|
-
let
|
184
|
+
let table_url = deltalake::table::builder::ensure_table_uri(table_uri)
|
185
|
+
.map_err(|_| RbValueError::new_err("Invalid table URI"))?;
|
186
|
+
let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
|
187
|
+
.map_err(|_| RbValueError::new_err("Failed to create table builder"))?;
|
182
188
|
if let Some(storage_options) = storage_options {
|
183
189
|
builder = builder.with_storage_options(storage_options)
|
184
190
|
}
|
@@ -358,11 +364,11 @@ impl RawDeltaTable {
|
|
358
364
|
}
|
359
365
|
|
360
366
|
pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
361
|
-
let schema
|
367
|
+
let schema = rb_self.with_table(|t| {
|
362
368
|
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
363
369
|
Ok(snapshot.schema().clone())
|
364
370
|
})?;
|
365
|
-
schema_to_rbobject(schema
|
371
|
+
schema_to_rbobject(schema, ruby)
|
366
372
|
}
|
367
373
|
|
368
374
|
pub fn vacuum(
|
@@ -373,13 +379,16 @@ impl RawDeltaTable {
|
|
373
379
|
commit_properties: Option<RbCommitProperties>,
|
374
380
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
375
381
|
) -> RbResult<Vec<String>> {
|
382
|
+
let snapshot = self
|
383
|
+
._table
|
384
|
+
.borrow()
|
385
|
+
.snapshot()
|
386
|
+
.cloned()
|
387
|
+
.map_err(RubyError::from)
|
388
|
+
.map_err(RbErr::from)?;
|
376
389
|
let mut cmd = VacuumBuilder::new(
|
377
390
|
self._table.borrow().log_store(),
|
378
|
-
|
379
|
-
.borrow()
|
380
|
-
.snapshot()
|
381
|
-
.map_err(RubyError::from)?
|
382
|
-
.clone(),
|
391
|
+
snapshot.snapshot().clone(),
|
383
392
|
)
|
384
393
|
.with_enforce_retention_duration(enforce_retention_duration)
|
385
394
|
.with_dry_run(dry_run);
|
@@ -408,15 +417,8 @@ impl RawDeltaTable {
|
|
408
417
|
commit_properties: Option<RbCommitProperties>,
|
409
418
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
410
419
|
) -> RbResult<String> {
|
411
|
-
let mut cmd = OptimizeBuilder::new(
|
412
|
-
|
413
|
-
self._table
|
414
|
-
.borrow()
|
415
|
-
.snapshot()
|
416
|
-
.map_err(RubyError::from)?
|
417
|
-
.clone(),
|
418
|
-
)
|
419
|
-
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
|
420
|
+
let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
421
|
+
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
|
420
422
|
if let Some(size) = target_size {
|
421
423
|
cmd = cmd.with_target_size(size);
|
422
424
|
}
|
@@ -458,17 +460,10 @@ impl RawDeltaTable {
|
|
458
460
|
commit_properties: Option<RbCommitProperties>,
|
459
461
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
460
462
|
) -> RbResult<String> {
|
461
|
-
let mut cmd = OptimizeBuilder::new(
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
.snapshot()
|
466
|
-
.map_err(RubyError::from)?
|
467
|
-
.clone(),
|
468
|
-
)
|
469
|
-
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
|
470
|
-
.with_max_spill_size(max_spill_size)
|
471
|
-
.with_type(OptimizeType::ZOrder(z_order_columns));
|
463
|
+
let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
464
|
+
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
|
465
|
+
.with_max_spill_size(max_spill_size)
|
466
|
+
.with_type(OptimizeType::ZOrder(z_order_columns));
|
472
467
|
if let Some(size) = target_size {
|
473
468
|
cmd = cmd.with_target_size(size);
|
474
469
|
}
|
@@ -499,14 +494,7 @@ impl RawDeltaTable {
|
|
499
494
|
|
500
495
|
pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
|
501
496
|
let fields = fields.typecheck::<Obj<Field>>()?;
|
502
|
-
let mut cmd = AddColumnBuilder::new(
|
503
|
-
self._table.borrow().log_store(),
|
504
|
-
self._table
|
505
|
-
.borrow()
|
506
|
-
.snapshot()
|
507
|
-
.map_err(RubyError::from)?
|
508
|
-
.clone(),
|
509
|
-
);
|
497
|
+
let mut cmd = AddColumnBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
|
510
498
|
|
511
499
|
let new_fields = fields
|
512
500
|
.iter()
|
@@ -529,16 +517,10 @@ impl RawDeltaTable {
|
|
529
517
|
.into_iter()
|
530
518
|
.map(TableFeatures::try_convert)
|
531
519
|
.collect::<RbResult<Vec<_>>>()?;
|
532
|
-
let cmd =
|
533
|
-
self._table.borrow().log_store(),
|
534
|
-
|
535
|
-
.
|
536
|
-
.snapshot()
|
537
|
-
.map_err(RubyError::from)?
|
538
|
-
.clone(),
|
539
|
-
)
|
540
|
-
.with_features(feature)
|
541
|
-
.with_allow_protocol_versions_increase(allow_protocol_versions_increase);
|
520
|
+
let cmd =
|
521
|
+
AddTableFeatureBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
522
|
+
.with_features(feature)
|
523
|
+
.with_allow_protocol_versions_increase(allow_protocol_versions_increase);
|
542
524
|
|
543
525
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
544
526
|
self._table.borrow_mut().state = table.state;
|
@@ -546,14 +528,8 @@ impl RawDeltaTable {
|
|
546
528
|
}
|
547
529
|
|
548
530
|
pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
|
549
|
-
let mut cmd =
|
550
|
-
self._table.borrow().log_store(),
|
551
|
-
self._table
|
552
|
-
.borrow()
|
553
|
-
.snapshot()
|
554
|
-
.map_err(RubyError::from)?
|
555
|
-
.clone(),
|
556
|
-
);
|
531
|
+
let mut cmd =
|
532
|
+
ConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
|
557
533
|
|
558
534
|
for (col_name, expression) in constraints {
|
559
535
|
cmd = cmd.with_constraint(col_name.clone(), expression.clone());
|
@@ -565,16 +541,10 @@ impl RawDeltaTable {
|
|
565
541
|
}
|
566
542
|
|
567
543
|
pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
|
568
|
-
let cmd =
|
569
|
-
self._table.borrow().log_store(),
|
570
|
-
|
571
|
-
.
|
572
|
-
.snapshot()
|
573
|
-
.map_err(RubyError::from)?
|
574
|
-
.clone(),
|
575
|
-
)
|
576
|
-
.with_constraint(name)
|
577
|
-
.with_raise_if_not_exists(raise_if_not_exists);
|
544
|
+
let cmd =
|
545
|
+
DropConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
546
|
+
.with_constraint(name)
|
547
|
+
.with_raise_if_not_exists(raise_if_not_exists);
|
578
548
|
|
579
549
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
580
550
|
self._table.borrow_mut().state = table.state;
|
@@ -590,15 +560,9 @@ impl RawDeltaTable {
|
|
590
560
|
columns: Option<Vec<String>>,
|
591
561
|
) -> RbResult<ArrowArrayStream> {
|
592
562
|
let ctx = SessionContext::new();
|
593
|
-
let mut cdf_read =
|
594
|
-
self._table.borrow().log_store(),
|
595
|
-
|
596
|
-
.borrow()
|
597
|
-
.snapshot()
|
598
|
-
.map_err(RubyError::from)?
|
599
|
-
.clone(),
|
600
|
-
)
|
601
|
-
.with_starting_version(starting_version);
|
563
|
+
let mut cdf_read =
|
564
|
+
CdfLoadBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
565
|
+
.with_starting_version(starting_version);
|
602
566
|
|
603
567
|
if let Some(ev) = ending_version {
|
604
568
|
cdf_read = cdf_read.with_ending_version(ev);
|
@@ -667,11 +631,7 @@ impl RawDeltaTable {
|
|
667
631
|
) -> RbResult<RbMergeBuilder> {
|
668
632
|
Ok(RbMergeBuilder::new(
|
669
633
|
self._table.borrow().log_store(),
|
670
|
-
self.
|
671
|
-
.borrow()
|
672
|
-
.snapshot()
|
673
|
-
.map_err(RubyError::from)?
|
674
|
-
.clone(),
|
634
|
+
self.cloned_state()?,
|
675
635
|
source.0,
|
676
636
|
predicate,
|
677
637
|
source_alias,
|
@@ -697,14 +657,7 @@ impl RawDeltaTable {
|
|
697
657
|
protocol_downgrade_allowed: bool,
|
698
658
|
commit_properties: Option<RbCommitProperties>,
|
699
659
|
) -> RbResult<String> {
|
700
|
-
let mut cmd = RestoreBuilder::new(
|
701
|
-
self._table.borrow().log_store(),
|
702
|
-
self._table
|
703
|
-
.borrow()
|
704
|
-
.snapshot()
|
705
|
-
.map_err(RubyError::from)?
|
706
|
-
.clone(),
|
707
|
-
);
|
660
|
+
let mut cmd = RestoreBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
|
708
661
|
if let Some(val) = target {
|
709
662
|
if let Some(version) = Integer::from_value(val) {
|
710
663
|
cmd = cmd.with_version_to_restore(version.to_i64()?)
|
@@ -735,8 +688,7 @@ impl RawDeltaTable {
|
|
735
688
|
.block_on(self._table.borrow().history(limit))
|
736
689
|
.map_err(RubyError::from)?;
|
737
690
|
Ok(history
|
738
|
-
.
|
739
|
-
.map(|c| serde_json::to_string(c).unwrap())
|
691
|
+
.map(|c| serde_json::to_string(&c).unwrap())
|
740
692
|
.collect())
|
741
693
|
}
|
742
694
|
|
@@ -773,7 +725,7 @@ impl RawDeltaTable {
|
|
773
725
|
let adds: Vec<_> = rt()
|
774
726
|
.block_on(async {
|
775
727
|
state
|
776
|
-
.
|
728
|
+
.file_views_by_partitions(&log_store, &converted_filters)
|
777
729
|
.try_collect()
|
778
730
|
.await
|
779
731
|
})
|
@@ -823,7 +775,7 @@ impl RawDeltaTable {
|
|
823
775
|
.block_on(async {
|
824
776
|
t.snapshot()?
|
825
777
|
.snapshot()
|
826
|
-
.
|
778
|
+
.file_views(&log_store, None)
|
827
779
|
.map_ok(|f| (f.path().to_string(), f.size()))
|
828
780
|
.try_collect()
|
829
781
|
.await
|
@@ -840,14 +792,7 @@ impl RawDeltaTable {
|
|
840
792
|
commit_properties: Option<RbCommitProperties>,
|
841
793
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
842
794
|
) -> RbResult<String> {
|
843
|
-
let mut cmd = DeleteBuilder::new(
|
844
|
-
self._table.borrow().log_store(),
|
845
|
-
self._table
|
846
|
-
.borrow()
|
847
|
-
.snapshot()
|
848
|
-
.map_err(RubyError::from)?
|
849
|
-
.clone(),
|
850
|
-
);
|
795
|
+
let mut cmd = DeleteBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
|
851
796
|
if let Some(predicate) = predicate {
|
852
797
|
cmd = cmd.with_predicate(predicate);
|
853
798
|
}
|
@@ -872,16 +817,10 @@ impl RawDeltaTable {
|
|
872
817
|
properties: HashMap<String, String>,
|
873
818
|
raise_if_not_exists: bool,
|
874
819
|
) -> RbResult<()> {
|
875
|
-
let cmd =
|
876
|
-
self._table.borrow().log_store(),
|
877
|
-
|
878
|
-
.
|
879
|
-
.snapshot()
|
880
|
-
.map_err(RubyError::from)?
|
881
|
-
.clone(),
|
882
|
-
)
|
883
|
-
.with_properties(properties)
|
884
|
-
.with_raise_if_not_exists(raise_if_not_exists);
|
820
|
+
let cmd =
|
821
|
+
SetTablePropertiesBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
822
|
+
.with_properties(properties)
|
823
|
+
.with_raise_if_not_exists(raise_if_not_exists);
|
885
824
|
|
886
825
|
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
887
826
|
self._table.borrow_mut().state = table.state;
|
@@ -894,15 +833,9 @@ impl RawDeltaTable {
|
|
894
833
|
commit_properties: Option<RbCommitProperties>,
|
895
834
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
896
835
|
) -> RbResult<String> {
|
897
|
-
let mut cmd =
|
898
|
-
self._table.borrow().log_store(),
|
899
|
-
|
900
|
-
.borrow()
|
901
|
-
.snapshot()
|
902
|
-
.map_err(RubyError::from)?
|
903
|
-
.clone(),
|
904
|
-
)
|
905
|
-
.with_dry_run(dry_run);
|
836
|
+
let mut cmd =
|
837
|
+
FileSystemCheckBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
|
838
|
+
.with_dry_run(dry_run);
|
906
839
|
|
907
840
|
if let Some(commit_properties) =
|
908
841
|
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
@@ -1228,8 +1161,10 @@ fn write_to_deltalake(
|
|
1228
1161
|
let table = if let Some(table) = table {
|
1229
1162
|
DeltaOps(table._table.borrow().clone())
|
1230
1163
|
} else {
|
1164
|
+
let table_url =
|
1165
|
+
deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
|
1231
1166
|
rt().block_on(DeltaOps::try_from_uri_with_storage_options(
|
1232
|
-
|
1167
|
+
table_url, options,
|
1233
1168
|
))
|
1234
1169
|
.map_err(RubyError::from)?
|
1235
1170
|
};
|
data/ext/deltalake/src/merge.rs
CHANGED
@@ -4,9 +4,9 @@ use deltalake::arrow::ffi_stream::ArrowArrayStreamReader;
|
|
4
4
|
use deltalake::datafusion::catalog::TableProvider;
|
5
5
|
use deltalake::datafusion::datasource::MemTable;
|
6
6
|
use deltalake::datafusion::prelude::SessionContext;
|
7
|
+
use deltalake::kernel::EagerSnapshot;
|
7
8
|
use deltalake::logstore::LogStoreRef;
|
8
9
|
use deltalake::operations::merge::MergeBuilder;
|
9
|
-
use deltalake::table::state::DeltaTableState;
|
10
10
|
use deltalake::{DeltaResult, DeltaTable};
|
11
11
|
use std::cell::RefCell;
|
12
12
|
use std::collections::HashMap;
|
@@ -45,7 +45,7 @@ impl RbMergeBuilder {
|
|
45
45
|
#[allow(clippy::too_many_arguments)]
|
46
46
|
pub fn new(
|
47
47
|
log_store: LogStoreRef,
|
48
|
-
snapshot:
|
48
|
+
snapshot: EagerSnapshot,
|
49
49
|
source: ArrowArrayStreamReader,
|
50
50
|
predicate: String,
|
51
51
|
source_alias: Option<String>,
|
data/ext/deltalake/src/schema.rs
CHANGED
@@ -2,10 +2,11 @@ use deltalake::kernel::{
|
|
2
2
|
DataType, PrimitiveType as DeltaPrimitive, StructField, StructType as DeltaStructType,
|
3
3
|
};
|
4
4
|
use magnus::{value::ReprValue, Module, RModule, Ruby, TryConvert, Value};
|
5
|
+
use std::sync::Arc;
|
5
6
|
|
6
7
|
use crate::{RbResult, RbValueError};
|
7
8
|
|
8
|
-
pub fn schema_to_rbobject(schema: DeltaStructType
|
9
|
+
pub fn schema_to_rbobject(schema: Arc<DeltaStructType>, ruby: &Ruby) -> RbResult<Value> {
|
9
10
|
let fields = schema.fields().map(|field| Field {
|
10
11
|
inner: field.clone(),
|
11
12
|
});
|
data/lib/deltalake/version.rb
CHANGED