deltalake-rb 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +78 -17
- data/ext/deltalake/Cargo.toml +3 -3
- data/ext/deltalake/src/lib.rs +227 -49
- data/ext/deltalake/src/merge.rs +13 -13
- data/lib/deltalake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 16f0dd7fef3c4d05ba4f20bfb9b3580e7fd0b970c29df1e09f8c6419751fec76
|
|
4
|
+
data.tar.gz: 5c283e45ba6bfc0e329c84e04cc289a9e9093a4587de02a64fb9309dfdfb441d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 47f7b19281515b0289de294165d02fe26d931f0d8b7fee59666772688acbe99effa3465a43605a72686d6c3f42d8403957954ee66cf3d62966265db8781d19cb
|
|
7
|
+
data.tar.gz: 938ca39e060cd9c1815b63510b3238d9da5afedcda3e3ab4adce177d0d89fffea29172a867fbef2786daf9c7c4cdf3992d7c7bd9a90f549e111222e70264f2fd
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
|
@@ -306,9 +306,9 @@ dependencies = [
|
|
|
306
306
|
|
|
307
307
|
[[package]]
|
|
308
308
|
name = "async-compression"
|
|
309
|
-
version = "0.4.
|
|
309
|
+
version = "0.4.42"
|
|
310
310
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
311
|
-
checksum = "
|
|
311
|
+
checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
|
|
312
312
|
dependencies = [
|
|
313
313
|
"compression-codecs",
|
|
314
314
|
"compression-core",
|
|
@@ -902,9 +902,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
|
|
902
902
|
|
|
903
903
|
[[package]]
|
|
904
904
|
name = "buoyant_kernel"
|
|
905
|
-
version = "0.
|
|
905
|
+
version = "0.22.0"
|
|
906
906
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
907
|
-
checksum = "
|
|
907
|
+
checksum = "0d3ca37afa82755db7b4fd51a4eab9e53eeb0aa1898fae15d373bd6df4bdf0f8"
|
|
908
908
|
dependencies = [
|
|
909
909
|
"arrow",
|
|
910
910
|
"buoyant_kernel_derive",
|
|
@@ -916,6 +916,7 @@ dependencies = [
|
|
|
916
916
|
"itertools 0.14.0",
|
|
917
917
|
"object_store",
|
|
918
918
|
"parquet",
|
|
919
|
+
"percent-encoding",
|
|
919
920
|
"rand 0.9.4",
|
|
920
921
|
"reqwest 0.13.2",
|
|
921
922
|
"roaring",
|
|
@@ -926,6 +927,7 @@ dependencies = [
|
|
|
926
927
|
"thiserror 2.0.12",
|
|
927
928
|
"tokio",
|
|
928
929
|
"tracing",
|
|
930
|
+
"tracing-subscriber",
|
|
929
931
|
"url",
|
|
930
932
|
"uuid",
|
|
931
933
|
"z85",
|
|
@@ -1101,9 +1103,9 @@ dependencies = [
|
|
|
1101
1103
|
|
|
1102
1104
|
[[package]]
|
|
1103
1105
|
name = "compression-codecs"
|
|
1104
|
-
version = "0.4.
|
|
1106
|
+
version = "0.4.38"
|
|
1105
1107
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1106
|
-
checksum = "
|
|
1108
|
+
checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
|
|
1107
1109
|
dependencies = [
|
|
1108
1110
|
"bzip2",
|
|
1109
1111
|
"compression-core",
|
|
@@ -1116,9 +1118,9 @@ dependencies = [
|
|
|
1116
1118
|
|
|
1117
1119
|
[[package]]
|
|
1118
1120
|
name = "compression-core"
|
|
1119
|
-
version = "0.4.
|
|
1121
|
+
version = "0.4.32"
|
|
1120
1122
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1121
|
-
checksum = "
|
|
1123
|
+
checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
|
|
1122
1124
|
|
|
1123
1125
|
[[package]]
|
|
1124
1126
|
name = "const-oid"
|
|
@@ -2061,9 +2063,9 @@ dependencies = [
|
|
|
2061
2063
|
|
|
2062
2064
|
[[package]]
|
|
2063
2065
|
name = "deltalake"
|
|
2064
|
-
version = "0.32.
|
|
2066
|
+
version = "0.32.4"
|
|
2065
2067
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2066
|
-
checksum = "
|
|
2068
|
+
checksum = "2695b91fd02afd5b4726a190fd58019674492d824e30598391390b0307644da8"
|
|
2067
2069
|
dependencies = [
|
|
2068
2070
|
"buoyant_kernel",
|
|
2069
2071
|
"ctor",
|
|
@@ -2116,9 +2118,9 @@ dependencies = [
|
|
|
2116
2118
|
|
|
2117
2119
|
[[package]]
|
|
2118
2120
|
name = "deltalake-core"
|
|
2119
|
-
version = "0.32.
|
|
2121
|
+
version = "0.32.4"
|
|
2120
2122
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2121
|
-
checksum = "
|
|
2123
|
+
checksum = "4588e95ff3b2ccdba56d9ec262bd3467c0593000f729402528706f62be8be1ca"
|
|
2122
2124
|
dependencies = [
|
|
2123
2125
|
"arrow",
|
|
2124
2126
|
"arrow-arith",
|
|
@@ -2201,7 +2203,7 @@ dependencies = [
|
|
|
2201
2203
|
|
|
2202
2204
|
[[package]]
|
|
2203
2205
|
name = "deltalake-ruby"
|
|
2204
|
-
version = "0.3.
|
|
2206
|
+
version = "0.3.2"
|
|
2205
2207
|
dependencies = [
|
|
2206
2208
|
"arrow",
|
|
2207
2209
|
"arrow-schema",
|
|
@@ -3183,9 +3185,9 @@ dependencies = [
|
|
|
3183
3185
|
|
|
3184
3186
|
[[package]]
|
|
3185
3187
|
name = "link-section"
|
|
3186
|
-
version = "0.2.
|
|
3188
|
+
version = "0.2.1"
|
|
3187
3189
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3188
|
-
checksum = "
|
|
3190
|
+
checksum = "b685d66585d646efe09fec763d796c291049c8b6bf84e04954bffc8748341f0d"
|
|
3189
3191
|
|
|
3190
3192
|
[[package]]
|
|
3191
3193
|
name = "linux-raw-sys"
|
|
@@ -3312,6 +3314,15 @@ dependencies = [
|
|
|
3312
3314
|
"minimal-lexical",
|
|
3313
3315
|
]
|
|
3314
3316
|
|
|
3317
|
+
[[package]]
|
|
3318
|
+
name = "nu-ansi-term"
|
|
3319
|
+
version = "0.50.3"
|
|
3320
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3321
|
+
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
|
3322
|
+
dependencies = [
|
|
3323
|
+
"windows-sys 0.61.2",
|
|
3324
|
+
]
|
|
3325
|
+
|
|
3315
3326
|
[[package]]
|
|
3316
3327
|
name = "num-bigint"
|
|
3317
3328
|
version = "0.4.6"
|
|
@@ -4097,9 +4108,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
|
|
|
4097
4108
|
|
|
4098
4109
|
[[package]]
|
|
4099
4110
|
name = "rustls-webpki"
|
|
4100
|
-
version = "0.103.
|
|
4111
|
+
version = "0.103.13"
|
|
4101
4112
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4102
|
-
checksum = "
|
|
4113
|
+
checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
|
|
4103
4114
|
dependencies = [
|
|
4104
4115
|
"aws-lc-rs",
|
|
4105
4116
|
"ring",
|
|
@@ -4254,6 +4265,15 @@ dependencies = [
|
|
|
4254
4265
|
"digest 0.11.2",
|
|
4255
4266
|
]
|
|
4256
4267
|
|
|
4268
|
+
[[package]]
|
|
4269
|
+
name = "sharded-slab"
|
|
4270
|
+
version = "0.1.7"
|
|
4271
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4272
|
+
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
|
4273
|
+
dependencies = [
|
|
4274
|
+
"lazy_static",
|
|
4275
|
+
]
|
|
4276
|
+
|
|
4257
4277
|
[[package]]
|
|
4258
4278
|
name = "shell-words"
|
|
4259
4279
|
version = "1.1.0"
|
|
@@ -4536,6 +4556,15 @@ dependencies = [
|
|
|
4536
4556
|
"syn",
|
|
4537
4557
|
]
|
|
4538
4558
|
|
|
4559
|
+
[[package]]
|
|
4560
|
+
name = "thread_local"
|
|
4561
|
+
version = "1.1.9"
|
|
4562
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4563
|
+
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
|
|
4564
|
+
dependencies = [
|
|
4565
|
+
"cfg-if",
|
|
4566
|
+
]
|
|
4567
|
+
|
|
4539
4568
|
[[package]]
|
|
4540
4569
|
name = "thrift"
|
|
4541
4570
|
version = "0.17.0"
|
|
@@ -4749,6 +4778,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
4749
4778
|
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
|
|
4750
4779
|
dependencies = [
|
|
4751
4780
|
"once_cell",
|
|
4781
|
+
"valuable",
|
|
4782
|
+
]
|
|
4783
|
+
|
|
4784
|
+
[[package]]
|
|
4785
|
+
name = "tracing-log"
|
|
4786
|
+
version = "0.2.0"
|
|
4787
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4788
|
+
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
|
4789
|
+
dependencies = [
|
|
4790
|
+
"log",
|
|
4791
|
+
"once_cell",
|
|
4792
|
+
"tracing-core",
|
|
4793
|
+
]
|
|
4794
|
+
|
|
4795
|
+
[[package]]
|
|
4796
|
+
name = "tracing-subscriber"
|
|
4797
|
+
version = "0.3.23"
|
|
4798
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4799
|
+
checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
|
|
4800
|
+
dependencies = [
|
|
4801
|
+
"nu-ansi-term",
|
|
4802
|
+
"sharded-slab",
|
|
4803
|
+
"smallvec",
|
|
4804
|
+
"thread_local",
|
|
4805
|
+
"tracing-core",
|
|
4806
|
+
"tracing-log",
|
|
4752
4807
|
]
|
|
4753
4808
|
|
|
4754
4809
|
[[package]]
|
|
@@ -4886,6 +4941,12 @@ dependencies = [
|
|
|
4886
4941
|
"syn",
|
|
4887
4942
|
]
|
|
4888
4943
|
|
|
4944
|
+
[[package]]
|
|
4945
|
+
name = "valuable"
|
|
4946
|
+
version = "0.1.1"
|
|
4947
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4948
|
+
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
|
4949
|
+
|
|
4889
4950
|
[[package]]
|
|
4890
4951
|
name = "version_check"
|
|
4891
4952
|
version = "0.9.5"
|
data/ext/deltalake/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "deltalake-ruby"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
|
6
6
|
edition = "2021"
|
|
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]
|
|
|
15
15
|
arrow = { version = "58", features = ["ffi"] }
|
|
16
16
|
arrow-schema = { version = "58", features = ["serde"] }
|
|
17
17
|
chrono = "0.4"
|
|
18
|
-
delta_kernel = { package = "buoyant_kernel", version = "0.
|
|
18
|
+
delta_kernel = { package = "buoyant_kernel", version = "0.22", features = ["arrow-58", "default-engine-rustls"] }
|
|
19
19
|
futures = "0.3"
|
|
20
20
|
magnus = "0.8"
|
|
21
21
|
num_cpus = "1"
|
|
@@ -26,7 +26,7 @@ tokio = { version = "1", features = ["rt-multi-thread"] }
|
|
|
26
26
|
uuid = { version = "1", features = ["serde", "v4"] }
|
|
27
27
|
|
|
28
28
|
[dependencies.deltalake]
|
|
29
|
-
version = "=0.32.
|
|
29
|
+
version = "=0.32.4"
|
|
30
30
|
features = [
|
|
31
31
|
"azure",
|
|
32
32
|
"datafusion",
|
data/ext/deltalake/src/lib.rs
CHANGED
|
@@ -6,7 +6,7 @@ mod schema;
|
|
|
6
6
|
mod utils;
|
|
7
7
|
|
|
8
8
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
|
9
|
-
use delta_kernel::schema::StructField;
|
|
9
|
+
use delta_kernel::schema::{MetadataValue, StructField};
|
|
10
10
|
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
|
11
11
|
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
|
12
12
|
use deltalake::arrow::record_batch::RecordBatchIterator;
|
|
@@ -22,10 +22,12 @@ use deltalake::logstore::IORuntime;
|
|
|
22
22
|
use deltalake::logstore::LogStoreRef;
|
|
23
23
|
use deltalake::operations::collect_sendable_stream;
|
|
24
24
|
use deltalake::operations::optimize::OptimizeType;
|
|
25
|
+
use deltalake::operations::update_table_metadata::TableMetadataUpdate;
|
|
25
26
|
use deltalake::parquet::basic::Compression;
|
|
26
27
|
use deltalake::parquet::errors::ParquetError;
|
|
27
28
|
use deltalake::parquet::file::properties::WriterProperties;
|
|
28
29
|
use deltalake::partitions::PartitionFilter;
|
|
30
|
+
use deltalake::protocol::log_compaction::compact_logs;
|
|
29
31
|
use deltalake::table::config::TablePropertiesExt;
|
|
30
32
|
use deltalake::table::state::DeltaTableState;
|
|
31
33
|
use deltalake::{DeltaResult, DeltaTable};
|
|
@@ -191,13 +193,13 @@ impl RawDeltaTable {
|
|
|
191
193
|
.with_io_runtime(IORuntime::default());
|
|
192
194
|
|
|
193
195
|
if let Some(storage_options) = storage_options {
|
|
194
|
-
builder = builder.with_storage_options(storage_options)
|
|
196
|
+
builder = builder.with_storage_options(storage_options);
|
|
195
197
|
}
|
|
196
198
|
if let Some(version) = version {
|
|
197
|
-
builder = builder.with_version(version)
|
|
199
|
+
builder = builder.with_version(version);
|
|
198
200
|
}
|
|
199
201
|
if without_files {
|
|
200
|
-
builder = builder.without_files()
|
|
202
|
+
builder = builder.without_files();
|
|
201
203
|
}
|
|
202
204
|
if let Some(buf_size) = log_buffer_size {
|
|
203
205
|
builder = builder
|
|
@@ -222,7 +224,7 @@ impl RawDeltaTable {
|
|
|
222
224
|
let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
|
|
223
225
|
.map_err(|_| RbValueError::new_err("Failed to create table builder"))?;
|
|
224
226
|
if let Some(storage_options) = storage_options {
|
|
225
|
-
builder = builder.with_storage_options(storage_options)
|
|
227
|
+
builder = builder.with_storage_options(storage_options);
|
|
226
228
|
}
|
|
227
229
|
Ok(rt()
|
|
228
230
|
.block_on(async {
|
|
@@ -470,6 +472,49 @@ impl RawDeltaTable {
|
|
|
470
472
|
Ok(metrics.files_deleted)
|
|
471
473
|
}
|
|
472
474
|
|
|
475
|
+
#[allow(clippy::too_many_arguments)]
|
|
476
|
+
pub fn update(
|
|
477
|
+
rb: &Ruby,
|
|
478
|
+
self_: &Self,
|
|
479
|
+
updates: HashMap<String, String>,
|
|
480
|
+
predicate: Option<String>,
|
|
481
|
+
writer_properties: Option<RbWriterProperties>,
|
|
482
|
+
safe_cast: bool,
|
|
483
|
+
commit_properties: Option<RbCommitProperties>,
|
|
484
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
485
|
+
) -> RbResult<String> {
|
|
486
|
+
let (table, metrics) = rb
|
|
487
|
+
.detach(|| {
|
|
488
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
489
|
+
let mut cmd = table.update().with_safe_cast(safe_cast);
|
|
490
|
+
|
|
491
|
+
if let Some(writer_props) = writer_properties {
|
|
492
|
+
cmd = cmd.with_writer_properties(
|
|
493
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
494
|
+
);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
for (col_name, expression) in updates {
|
|
498
|
+
cmd = cmd.with_update(col_name.clone(), expression.clone());
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if let Some(update_predicate) = predicate {
|
|
502
|
+
cmd = cmd.with_predicate(update_predicate);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if let Some(commit_properties) =
|
|
506
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
507
|
+
{
|
|
508
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
512
|
+
})
|
|
513
|
+
.map_err(RbErr::from)?;
|
|
514
|
+
self_.set_state(table.state)?;
|
|
515
|
+
Ok(serde_json::to_string(&metrics).unwrap())
|
|
516
|
+
}
|
|
517
|
+
|
|
473
518
|
#[allow(clippy::too_many_arguments)]
|
|
474
519
|
pub fn compact_optimize(
|
|
475
520
|
rb: &Ruby,
|
|
@@ -664,8 +709,16 @@ impl RawDeltaTable {
|
|
|
664
709
|
Ok(())
|
|
665
710
|
}
|
|
666
711
|
|
|
712
|
+
pub fn generate(&self) -> RbResult<()> {
|
|
713
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
714
|
+
rt().block_on(async { table.generate().await })
|
|
715
|
+
.map_err(RubyError::from)?;
|
|
716
|
+
Ok(())
|
|
717
|
+
}
|
|
718
|
+
|
|
667
719
|
pub fn load_cdf(
|
|
668
|
-
&
|
|
720
|
+
rb: &Ruby,
|
|
721
|
+
self_: &Self,
|
|
669
722
|
starting_version: Option<Version>,
|
|
670
723
|
ending_version: Option<Version>,
|
|
671
724
|
starting_timestamp: Option<String>,
|
|
@@ -673,7 +726,7 @@ impl RawDeltaTable {
|
|
|
673
726
|
columns: Option<Vec<String>>,
|
|
674
727
|
) -> RbResult<ArrowArrayStream> {
|
|
675
728
|
let ctx = SessionContext::new();
|
|
676
|
-
let table =
|
|
729
|
+
let table = self_._table.lock().map_err(to_rt_err)?.clone();
|
|
677
730
|
let mut cmd = table.scan_cdf();
|
|
678
731
|
|
|
679
732
|
if let Some(sv) = starting_version {
|
|
@@ -698,38 +751,41 @@ impl RawDeltaTable {
|
|
|
698
751
|
let table_provider: Arc<dyn TableProvider> =
|
|
699
752
|
Arc::new(DeltaCdfTableProvider::try_new(cmd).map_err(RubyError::from)?);
|
|
700
753
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
let
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
754
|
+
rb.detach(|| {
|
|
755
|
+
let plan = rt()
|
|
756
|
+
.block_on(async {
|
|
757
|
+
let mut df = ctx.read_table(table_provider)?;
|
|
758
|
+
if let Some(columns) = columns {
|
|
759
|
+
let cols: Vec<_> = columns.iter().map(|c| c.as_ref()).collect();
|
|
760
|
+
df = df.select_columns(&cols)?;
|
|
761
|
+
}
|
|
762
|
+
df.create_physical_plan().await
|
|
763
|
+
})
|
|
764
|
+
.map_err(RubyError::from)?;
|
|
711
765
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
766
|
+
let mut tasks = vec![];
|
|
767
|
+
for p in 0..plan.properties().output_partitioning().partition_count() {
|
|
768
|
+
let inner_plan = plan.clone();
|
|
769
|
+
let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
|
|
770
|
+
let handle = rt().spawn(collect_sendable_stream(partition_batch));
|
|
771
|
+
tasks.push(handle);
|
|
772
|
+
}
|
|
719
773
|
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
774
|
+
// This is unfortunate.
|
|
775
|
+
let batches = rt()
|
|
776
|
+
.block_on(join_all(tasks))
|
|
777
|
+
.into_iter()
|
|
778
|
+
.flatten()
|
|
779
|
+
.collect::<Result<Vec<Vec<_>>, _>>()
|
|
780
|
+
.unwrap()
|
|
781
|
+
.into_iter()
|
|
782
|
+
.flatten()
|
|
783
|
+
.map(Ok);
|
|
784
|
+
let batch_iter = RecordBatchIterator::new(batches, plan.schema());
|
|
785
|
+
let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
|
|
786
|
+
Ok::<_, RubyError>(ArrowArrayStream { stream: ffi_stream })
|
|
787
|
+
})
|
|
788
|
+
.map_err(RbErr::from)
|
|
733
789
|
}
|
|
734
790
|
|
|
735
791
|
#[allow(clippy::too_many_arguments)]
|
|
@@ -776,7 +832,7 @@ impl RawDeltaTable {
|
|
|
776
832
|
let mut cmd = table.restore();
|
|
777
833
|
if let Some(val) = target {
|
|
778
834
|
if let Some(version) = Integer::from_value(val) {
|
|
779
|
-
cmd = cmd.with_version_to_restore(version.to_u64()?)
|
|
835
|
+
cmd = cmd.with_version_to_restore(version.to_u64()?);
|
|
780
836
|
}
|
|
781
837
|
if let Ok(ds) = String::try_convert(val) {
|
|
782
838
|
let datetime = DateTime::<Utc>::from(
|
|
@@ -784,7 +840,7 @@ impl RawDeltaTable {
|
|
|
784
840
|
RbValueError::new_err(format!("Failed to parse datetime string: {err}"))
|
|
785
841
|
})?,
|
|
786
842
|
);
|
|
787
|
-
cmd = cmd.with_datetime_to_restore(datetime)
|
|
843
|
+
cmd = cmd.with_datetime_to_restore(datetime);
|
|
788
844
|
}
|
|
789
845
|
}
|
|
790
846
|
cmd = cmd.with_ignore_missing_files(ignore_missing_files);
|
|
@@ -903,6 +959,34 @@ impl RawDeltaTable {
|
|
|
903
959
|
.map_err(RbErr::from)
|
|
904
960
|
}
|
|
905
961
|
|
|
962
|
+
pub fn compact_logs(
|
|
963
|
+
rb: &Ruby,
|
|
964
|
+
self_: &Self,
|
|
965
|
+
starting_version: u64,
|
|
966
|
+
ending_version: u64,
|
|
967
|
+
) -> RbResult<()> {
|
|
968
|
+
rb.detach(|| {
|
|
969
|
+
let operation_id = Uuid::new_v4();
|
|
970
|
+
|
|
971
|
+
#[allow(clippy::await_holding_lock)]
|
|
972
|
+
let result = rt().block_on(async {
|
|
973
|
+
match self_._table.lock() {
|
|
974
|
+
Ok(table) => {
|
|
975
|
+
compact_logs(&table, starting_version, ending_version, Some(operation_id))
|
|
976
|
+
.await
|
|
977
|
+
.map_err(RubyError::from)
|
|
978
|
+
}
|
|
979
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
980
|
+
}
|
|
981
|
+
});
|
|
982
|
+
|
|
983
|
+
result
|
|
984
|
+
})
|
|
985
|
+
.map_err(RbErr::from)?;
|
|
986
|
+
|
|
987
|
+
Ok(())
|
|
988
|
+
}
|
|
989
|
+
|
|
906
990
|
pub fn cleanup_metadata(rb: &Ruby, self_: &Self) -> RbResult<()> {
|
|
907
991
|
let (_result, new_state) = rb.detach(|| {
|
|
908
992
|
let operation_id = Uuid::new_v4();
|
|
@@ -1009,6 +1093,54 @@ impl RawDeltaTable {
|
|
|
1009
1093
|
Ok(())
|
|
1010
1094
|
}
|
|
1011
1095
|
|
|
1096
|
+
pub fn set_table_name(
|
|
1097
|
+
&self,
|
|
1098
|
+
name: String,
|
|
1099
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1100
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1101
|
+
) -> RbResult<()> {
|
|
1102
|
+
let update = TableMetadataUpdate {
|
|
1103
|
+
name: Some(name),
|
|
1104
|
+
description: None,
|
|
1105
|
+
};
|
|
1106
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
1107
|
+
let mut cmd = table.update_table_metadata().with_update(update);
|
|
1108
|
+
|
|
1109
|
+
if let Some(commit_properties) =
|
|
1110
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1111
|
+
{
|
|
1112
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
1116
|
+
self.set_state(table.state)?;
|
|
1117
|
+
Ok(())
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
pub fn set_table_description(
|
|
1121
|
+
&self,
|
|
1122
|
+
description: String,
|
|
1123
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1124
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1125
|
+
) -> RbResult<()> {
|
|
1126
|
+
let update = TableMetadataUpdate {
|
|
1127
|
+
name: None,
|
|
1128
|
+
description: Some(description),
|
|
1129
|
+
};
|
|
1130
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
1131
|
+
let mut cmd = table.update_table_metadata().with_update(update);
|
|
1132
|
+
|
|
1133
|
+
if let Some(commit_properties) =
|
|
1134
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1135
|
+
{
|
|
1136
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
1140
|
+
self.set_state(table.state)?;
|
|
1141
|
+
Ok(())
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1012
1144
|
pub fn repair(
|
|
1013
1145
|
&self,
|
|
1014
1146
|
dry_run: bool,
|
|
@@ -1038,6 +1170,40 @@ impl RawDeltaTable {
|
|
|
1038
1170
|
.map_err(RubyError::from)?)
|
|
1039
1171
|
}
|
|
1040
1172
|
|
|
1173
|
+
pub fn set_column_metadata(
|
|
1174
|
+
rb: &Ruby,
|
|
1175
|
+
self_: &Self,
|
|
1176
|
+
field_name: String,
|
|
1177
|
+
metadata: HashMap<String, String>,
|
|
1178
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1179
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1180
|
+
) -> RbResult<()> {
|
|
1181
|
+
let table = rb
|
|
1182
|
+
.detach(|| {
|
|
1183
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
1184
|
+
let mut cmd = table
|
|
1185
|
+
.update_field_metadata()
|
|
1186
|
+
.with_field_name(&field_name)
|
|
1187
|
+
.with_metadata(
|
|
1188
|
+
metadata
|
|
1189
|
+
.iter()
|
|
1190
|
+
.map(|(k, v)| (k.clone(), MetadataValue::String(v.clone())))
|
|
1191
|
+
.collect(),
|
|
1192
|
+
);
|
|
1193
|
+
|
|
1194
|
+
if let Some(commit_properties) =
|
|
1195
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1196
|
+
{
|
|
1197
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
1201
|
+
})
|
|
1202
|
+
.map_err(RbErr::from)?;
|
|
1203
|
+
self_.set_state(table.state)?;
|
|
1204
|
+
Ok(())
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1041
1207
|
#[allow(clippy::too_many_arguments)]
|
|
1042
1208
|
pub fn write(
|
|
1043
1209
|
rb: &Ruby,
|
|
@@ -1077,32 +1243,32 @@ impl RawDeltaTable {
|
|
|
1077
1243
|
|
|
1078
1244
|
if let Some(name) = &name {
|
|
1079
1245
|
builder = builder.with_table_name(name);
|
|
1080
|
-
}
|
|
1246
|
+
}
|
|
1081
1247
|
|
|
1082
1248
|
if let Some(description) = &description {
|
|
1083
1249
|
builder = builder.with_description(description);
|
|
1084
|
-
}
|
|
1250
|
+
}
|
|
1085
1251
|
|
|
1086
1252
|
if let Some(predicate) = predicate {
|
|
1087
1253
|
builder = builder.with_replace_where(predicate);
|
|
1088
|
-
}
|
|
1254
|
+
}
|
|
1089
1255
|
|
|
1090
1256
|
if let Some(target_file_size) = target_file_size {
|
|
1091
1257
|
let target_file_size = NonZeroU64::new(target_file_size).ok_or_else(|| {
|
|
1092
1258
|
RubyError::ValueError("target_file_size must be greater than 0".to_string())
|
|
1093
1259
|
})?;
|
|
1094
|
-
builder = builder.with_target_file_size(Some(target_file_size))
|
|
1095
|
-
}
|
|
1260
|
+
builder = builder.with_target_file_size(Some(target_file_size));
|
|
1261
|
+
}
|
|
1096
1262
|
|
|
1097
1263
|
if let Some(config) = configuration {
|
|
1098
1264
|
builder = builder.with_configuration(config);
|
|
1099
|
-
}
|
|
1265
|
+
}
|
|
1100
1266
|
|
|
1101
1267
|
if let Some(commit_properties) =
|
|
1102
1268
|
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1103
1269
|
{
|
|
1104
1270
|
builder = builder.with_commit_properties(commit_properties);
|
|
1105
|
-
}
|
|
1271
|
+
}
|
|
1106
1272
|
|
|
1107
1273
|
rt().block_on(builder.into_future())
|
|
1108
1274
|
.map_err(RubyError::from)
|
|
@@ -1249,11 +1415,11 @@ fn maybe_create_commit_properties(
|
|
|
1249
1415
|
let json_metadata: Map<String, serde_json::Value> =
|
|
1250
1416
|
metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
|
|
1251
1417
|
commit_properties = commit_properties.with_metadata(json_metadata);
|
|
1252
|
-
}
|
|
1418
|
+
}
|
|
1253
1419
|
|
|
1254
1420
|
if let Some(max_retries) = commit_props.max_commit_retries {
|
|
1255
1421
|
commit_properties = commit_properties.with_max_retries(max_retries);
|
|
1256
|
-
}
|
|
1422
|
+
}
|
|
1257
1423
|
|
|
1258
1424
|
if let Some(app_transactions) = commit_props.app_transactions {
|
|
1259
1425
|
let app_transactions = app_transactions.iter().map(Transaction::from).collect();
|
|
@@ -1263,7 +1429,7 @@ fn maybe_create_commit_properties(
|
|
|
1263
1429
|
|
|
1264
1430
|
if let Some(post_commit_hook_props) = post_commithook_properties {
|
|
1265
1431
|
commit_properties =
|
|
1266
|
-
set_post_commithook_properties(commit_properties, post_commit_hook_props)
|
|
1432
|
+
set_post_commithook_properties(commit_properties, post_commit_hook_props);
|
|
1267
1433
|
}
|
|
1268
1434
|
Some(commit_properties)
|
|
1269
1435
|
}
|
|
@@ -1511,6 +1677,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1511
1677
|
class.define_method("file_uris", method!(RawDeltaTable::file_uris, 1))?;
|
|
1512
1678
|
class.define_method("schema", method!(RawDeltaTable::schema, 0))?;
|
|
1513
1679
|
class.define_method("vacuum", method!(RawDeltaTable::vacuum, 5))?;
|
|
1680
|
+
class.define_method("update", method!(RawDeltaTable::update, 6))?;
|
|
1514
1681
|
class.define_method(
|
|
1515
1682
|
"compact_optimize",
|
|
1516
1683
|
method!(RawDeltaTable::compact_optimize, 7),
|
|
@@ -1529,6 +1696,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1529
1696
|
"drop_constraints",
|
|
1530
1697
|
method!(RawDeltaTable::drop_constraints, 2),
|
|
1531
1698
|
)?;
|
|
1699
|
+
class.define_method("generate", method!(RawDeltaTable::generate, 0))?;
|
|
1532
1700
|
class.define_method("load_cdf", method!(RawDeltaTable::load_cdf, 5))?;
|
|
1533
1701
|
class.define_method(
|
|
1534
1702
|
"create_merge_builder",
|
|
@@ -1549,6 +1717,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1549
1717
|
"create_checkpoint",
|
|
1550
1718
|
method!(RawDeltaTable::create_checkpoint, 0),
|
|
1551
1719
|
)?;
|
|
1720
|
+
class.define_method("compact_logs", method!(RawDeltaTable::compact_logs, 2))?;
|
|
1552
1721
|
class.define_method(
|
|
1553
1722
|
"cleanup_metadata",
|
|
1554
1723
|
method!(RawDeltaTable::cleanup_metadata, 0),
|
|
@@ -1562,11 +1731,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1562
1731
|
"set_table_properties",
|
|
1563
1732
|
method!(RawDeltaTable::set_table_properties, 2),
|
|
1564
1733
|
)?;
|
|
1734
|
+
class.define_method("set_table_name", method!(RawDeltaTable::set_table_name, 3))?;
|
|
1735
|
+
class.define_method(
|
|
1736
|
+
"set_table_description",
|
|
1737
|
+
method!(RawDeltaTable::set_table_description, 3),
|
|
1738
|
+
)?;
|
|
1565
1739
|
class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
|
|
1566
1740
|
class.define_method(
|
|
1567
1741
|
"transaction_version",
|
|
1568
1742
|
method!(RawDeltaTable::transaction_version, 1),
|
|
1569
1743
|
)?;
|
|
1744
|
+
class.define_method(
|
|
1745
|
+
"set_column_metadata",
|
|
1746
|
+
method!(RawDeltaTable::set_column_metadata, 4),
|
|
1747
|
+
)?;
|
|
1570
1748
|
class.define_method("write", method!(RawDeltaTable::write, 12))?;
|
|
1571
1749
|
|
|
1572
1750
|
let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
|
data/ext/deltalake/src/merge.rs
CHANGED
|
@@ -108,11 +108,11 @@ impl RbMergeBuilder {
|
|
|
108
108
|
Some(cmd) => Some(
|
|
109
109
|
cmd.when_matched_update(|mut update| {
|
|
110
110
|
for (column, expression) in updates {
|
|
111
|
-
update = update.update(column, expression)
|
|
111
|
+
update = update.update(column, expression);
|
|
112
112
|
}
|
|
113
113
|
if let Some(predicate) = predicate {
|
|
114
|
-
update = update.predicate(predicate)
|
|
115
|
-
}
|
|
114
|
+
update = update.predicate(predicate);
|
|
115
|
+
}
|
|
116
116
|
update
|
|
117
117
|
})
|
|
118
118
|
.map_err(RubyError::from)?,
|
|
@@ -128,8 +128,8 @@ impl RbMergeBuilder {
|
|
|
128
128
|
Some(cmd) => Some(
|
|
129
129
|
cmd.when_matched_delete(|mut delete| {
|
|
130
130
|
if let Some(predicate) = predicate {
|
|
131
|
-
delete = delete.predicate(predicate)
|
|
132
|
-
}
|
|
131
|
+
delete = delete.predicate(predicate);
|
|
132
|
+
}
|
|
133
133
|
delete
|
|
134
134
|
})
|
|
135
135
|
.map_err(RubyError::from)?,
|
|
@@ -149,11 +149,11 @@ impl RbMergeBuilder {
|
|
|
149
149
|
Some(cmd) => Some(
|
|
150
150
|
cmd.when_not_matched_insert(|mut insert| {
|
|
151
151
|
for (column, expression) in updates {
|
|
152
|
-
insert = insert.set(column, expression)
|
|
152
|
+
insert = insert.set(column, expression);
|
|
153
153
|
}
|
|
154
154
|
if let Some(predicate) = predicate {
|
|
155
|
-
insert = insert.predicate(predicate)
|
|
156
|
-
}
|
|
155
|
+
insert = insert.predicate(predicate);
|
|
156
|
+
}
|
|
157
157
|
insert
|
|
158
158
|
})
|
|
159
159
|
.map_err(RubyError::from)?,
|
|
@@ -173,11 +173,11 @@ impl RbMergeBuilder {
|
|
|
173
173
|
Some(cmd) => Some(
|
|
174
174
|
cmd.when_not_matched_by_source_update(|mut update| {
|
|
175
175
|
for (column, expression) in updates {
|
|
176
|
-
update = update.update(column, expression)
|
|
176
|
+
update = update.update(column, expression);
|
|
177
177
|
}
|
|
178
178
|
if let Some(predicate) = predicate {
|
|
179
|
-
update = update.predicate(predicate)
|
|
180
|
-
}
|
|
179
|
+
update = update.predicate(predicate);
|
|
180
|
+
}
|
|
181
181
|
update
|
|
182
182
|
})
|
|
183
183
|
.map_err(RubyError::from)?,
|
|
@@ -193,8 +193,8 @@ impl RbMergeBuilder {
|
|
|
193
193
|
Some(cmd) => Some(
|
|
194
194
|
cmd.when_not_matched_by_source_delete(|mut delete| {
|
|
195
195
|
if let Some(predicate) = predicate {
|
|
196
|
-
delete = delete.predicate(predicate)
|
|
197
|
-
}
|
|
196
|
+
delete = delete.predicate(predicate);
|
|
197
|
+
}
|
|
198
198
|
delete
|
|
199
199
|
})
|
|
200
200
|
.map_err(RubyError::from)?,
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: deltalake-rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -72,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
72
72
|
- !ruby/object:Gem::Version
|
|
73
73
|
version: '0'
|
|
74
74
|
requirements: []
|
|
75
|
-
rubygems_version: 4.0.
|
|
75
|
+
rubygems_version: 4.0.10
|
|
76
76
|
specification_version: 4
|
|
77
77
|
summary: Delta Lake for Ruby
|
|
78
78
|
test_files: []
|