deltalake-rb 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +700 -384
- data/ext/deltalake/Cargo.toml +7 -6
- data/ext/deltalake/src/error.rs +11 -22
- data/ext/deltalake/src/lib.rs +581 -346
- data/ext/deltalake/src/ruby.rs +74 -0
- data/ext/deltalake/src/schema.rs +1 -0
- data/lib/deltalake/version.rb +1 -1
- metadata +4 -3
data/ext/deltalake/src/lib.rs
CHANGED
|
@@ -1,30 +1,33 @@
|
|
|
1
1
|
mod error;
|
|
2
2
|
mod features;
|
|
3
3
|
mod merge;
|
|
4
|
+
mod ruby;
|
|
4
5
|
mod schema;
|
|
5
6
|
mod utils;
|
|
6
7
|
|
|
7
8
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
|
8
|
-
use delta_kernel::schema::StructField;
|
|
9
|
+
use delta_kernel::schema::{MetadataValue, StructField};
|
|
9
10
|
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
|
10
11
|
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
|
11
12
|
use deltalake::arrow::record_batch::RecordBatchIterator;
|
|
12
13
|
use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
|
|
13
14
|
use deltalake::datafusion::catalog::TableProvider;
|
|
14
15
|
use deltalake::datafusion::prelude::SessionContext;
|
|
15
|
-
use deltalake::delta_datafusion::DeltaCdfTableProvider;
|
|
16
|
+
use deltalake::delta_datafusion::{create_session_state_with_spill_config, DeltaCdfTableProvider};
|
|
16
17
|
use deltalake::errors::DeltaTableError;
|
|
17
18
|
use deltalake::kernel::transaction::{CommitProperties, TableReference};
|
|
18
19
|
use deltalake::kernel::{scalars::ScalarExt, Transaction};
|
|
19
|
-
use deltalake::kernel::{EagerSnapshot, StructDataExt};
|
|
20
|
+
use deltalake::kernel::{EagerSnapshot, StructDataExt, Version};
|
|
20
21
|
use deltalake::logstore::IORuntime;
|
|
21
22
|
use deltalake::logstore::LogStoreRef;
|
|
22
23
|
use deltalake::operations::collect_sendable_stream;
|
|
23
|
-
use deltalake::operations::optimize::
|
|
24
|
+
use deltalake::operations::optimize::OptimizeType;
|
|
25
|
+
use deltalake::operations::update_table_metadata::TableMetadataUpdate;
|
|
24
26
|
use deltalake::parquet::basic::Compression;
|
|
25
27
|
use deltalake::parquet::errors::ParquetError;
|
|
26
28
|
use deltalake::parquet::file::properties::WriterProperties;
|
|
27
29
|
use deltalake::partitions::PartitionFilter;
|
|
30
|
+
use deltalake::protocol::log_compaction::compact_logs;
|
|
28
31
|
use deltalake::table::config::TablePropertiesExt;
|
|
29
32
|
use deltalake::table::state::DeltaTableState;
|
|
30
33
|
use deltalake::{DeltaResult, DeltaTable};
|
|
@@ -32,20 +35,22 @@ use error::DeltaError;
|
|
|
32
35
|
use futures::future::join_all;
|
|
33
36
|
use futures::TryStreamExt;
|
|
34
37
|
use magnus::{
|
|
35
|
-
function, method, prelude::*, try_convert::TryConvertOwned,
|
|
36
|
-
|
|
38
|
+
function, method, prelude::*, try_convert::TryConvertOwned, Error as RbErr, Integer, Module,
|
|
39
|
+
RArray, Ruby, TryConvert, Value,
|
|
37
40
|
};
|
|
38
41
|
use serde_json::Map;
|
|
39
42
|
use std::collections::{HashMap, HashSet};
|
|
40
43
|
use std::future::IntoFuture;
|
|
44
|
+
use std::num::NonZeroU64;
|
|
41
45
|
use std::str::FromStr;
|
|
42
46
|
use std::sync::{Arc, Mutex};
|
|
43
47
|
use std::time;
|
|
44
48
|
use uuid::Uuid;
|
|
45
49
|
|
|
46
|
-
use crate::error::{to_rt_err,
|
|
50
|
+
use crate::error::{to_rt_err, to_rt_err2, RubyError};
|
|
47
51
|
use crate::features::TableFeatures;
|
|
48
52
|
use crate::merge::RbMergeBuilder;
|
|
53
|
+
use crate::ruby::{GvlExt, RbRuntimeError, RbValueError};
|
|
49
54
|
use crate::schema::{schema_to_rbobject, Field};
|
|
50
55
|
use crate::utils::rt;
|
|
51
56
|
|
|
@@ -111,6 +116,24 @@ impl RawDeltaTableMetaData {
|
|
|
111
116
|
|
|
112
117
|
type StringVec = Vec<String>;
|
|
113
118
|
|
|
119
|
+
const MAX_OPTIMIZE_TARGET_SIZE: u64 = i64::MAX as u64;
|
|
120
|
+
|
|
121
|
+
fn parse_optimize_target_size(target_size: u64) -> Result<NonZeroU64, RubyError> {
|
|
122
|
+
let target_size = NonZeroU64::new(target_size).ok_or_else(|| {
|
|
123
|
+
RubyError::ValueError(format!(
|
|
124
|
+
"target_file_size must be between 1 and {MAX_OPTIMIZE_TARGET_SIZE}"
|
|
125
|
+
))
|
|
126
|
+
})?;
|
|
127
|
+
|
|
128
|
+
if target_size.get() > MAX_OPTIMIZE_TARGET_SIZE {
|
|
129
|
+
return Err(RubyError::ValueError(format!(
|
|
130
|
+
"target_file_size must be between 1 and {MAX_OPTIMIZE_TARGET_SIZE}"
|
|
131
|
+
)));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
Ok(target_size)
|
|
135
|
+
}
|
|
136
|
+
|
|
114
137
|
impl RawDeltaTable {
|
|
115
138
|
fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
|
|
116
139
|
match self._table.lock() {
|
|
@@ -119,6 +142,16 @@ impl RawDeltaTable {
|
|
|
119
142
|
}
|
|
120
143
|
}
|
|
121
144
|
|
|
145
|
+
fn with_table2<T>(
|
|
146
|
+
&self,
|
|
147
|
+
func: impl Fn(&deltalake::DeltaTable) -> Result<T, RubyError>,
|
|
148
|
+
) -> Result<T, RubyError> {
|
|
149
|
+
match self._table.lock() {
|
|
150
|
+
Ok(table) => func(&table),
|
|
151
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
122
155
|
fn cloned_state(&self) -> RbResult<EagerSnapshot> {
|
|
123
156
|
self.with_table(|t| {
|
|
124
157
|
t.snapshot()
|
|
@@ -145,37 +178,41 @@ impl RawDeltaTable {
|
|
|
145
178
|
|
|
146
179
|
impl RawDeltaTable {
|
|
147
180
|
pub fn new(
|
|
181
|
+
rb: &Ruby,
|
|
148
182
|
table_uri: String,
|
|
149
|
-
version: Option<
|
|
183
|
+
version: Option<Version>,
|
|
150
184
|
storage_options: Option<HashMap<String, String>>,
|
|
151
185
|
without_files: bool,
|
|
152
186
|
log_buffer_size: Option<usize>,
|
|
153
187
|
) -> RbResult<Self> {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
188
|
+
rb.detach(|| {
|
|
189
|
+
let table_url = deltalake::table::builder::parse_table_uri(table_uri)
|
|
190
|
+
.map_err(error::RubyError::from)?;
|
|
191
|
+
let mut builder = deltalake::DeltaTableBuilder::from_url(table_url)
|
|
192
|
+
.map_err(error::RubyError::from)?
|
|
193
|
+
.with_io_runtime(IORuntime::default());
|
|
194
|
+
|
|
195
|
+
if let Some(storage_options) = storage_options {
|
|
196
|
+
builder = builder.with_storage_options(storage_options)
|
|
197
|
+
}
|
|
198
|
+
if let Some(version) = version {
|
|
199
|
+
builder = builder.with_version(version)
|
|
200
|
+
}
|
|
201
|
+
if without_files {
|
|
202
|
+
builder = builder.without_files()
|
|
203
|
+
}
|
|
204
|
+
if let Some(buf_size) = log_buffer_size {
|
|
205
|
+
builder = builder
|
|
206
|
+
.with_log_buffer_size(buf_size)
|
|
207
|
+
.map_err(RubyError::from)?;
|
|
208
|
+
}
|
|
174
209
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
210
|
+
let table = rt().block_on(builder.load()).map_err(RubyError::from)?;
|
|
211
|
+
Ok::<_, RubyError>(RawDeltaTable {
|
|
212
|
+
_table: Arc::new(Mutex::new(table)),
|
|
213
|
+
})
|
|
178
214
|
})
|
|
215
|
+
.map_err(RbErr::from)
|
|
179
216
|
}
|
|
180
217
|
|
|
181
218
|
pub fn is_deltatable(
|
|
@@ -203,7 +240,7 @@ impl RawDeltaTable {
|
|
|
203
240
|
self.with_table(|t| Ok(t.table_url().to_string()))
|
|
204
241
|
}
|
|
205
242
|
|
|
206
|
-
pub fn version(&self) -> RbResult<Option<
|
|
243
|
+
pub fn version(&self) -> RbResult<Option<Version>> {
|
|
207
244
|
self.with_table(|t| Ok(t.version()))
|
|
208
245
|
}
|
|
209
246
|
|
|
@@ -220,7 +257,7 @@ impl RawDeltaTable {
|
|
|
220
257
|
id: metadata.id().to_string(),
|
|
221
258
|
name: metadata.name().map(String::from),
|
|
222
259
|
description: metadata.description().map(String::from),
|
|
223
|
-
partition_columns: metadata.partition_columns().
|
|
260
|
+
partition_columns: metadata.partition_columns().to_vec(),
|
|
224
261
|
created_time: metadata.created_time(),
|
|
225
262
|
configuration: metadata.configuration().clone(),
|
|
226
263
|
})
|
|
@@ -255,33 +292,34 @@ impl RawDeltaTable {
|
|
|
255
292
|
))
|
|
256
293
|
}
|
|
257
294
|
|
|
258
|
-
pub fn load_version(&
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
295
|
+
pub fn load_version(rb: &Ruby, self_: &Self, version: Version) -> RbResult<()> {
|
|
296
|
+
rb.detach(|| {
|
|
297
|
+
#[allow(clippy::await_holding_lock)]
|
|
298
|
+
rt().block_on(async {
|
|
299
|
+
let mut table = self_
|
|
300
|
+
._table
|
|
301
|
+
.lock()
|
|
302
|
+
.map_err(|e| RubyError::RuntimeError(e.to_string()))?;
|
|
303
|
+
(*table)
|
|
304
|
+
.load_version(version)
|
|
305
|
+
.await
|
|
306
|
+
.map_err(RubyError::from)
|
|
307
|
+
})
|
|
270
308
|
})
|
|
309
|
+
.map_err(RbErr::from)
|
|
271
310
|
}
|
|
272
311
|
|
|
273
|
-
pub fn get_latest_version(&
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
.get_latest_version()
|
|
279
|
-
.
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
Err(e) => Err(RbRuntimeError::new_err(e.to_string())),
|
|
283
|
-
}
|
|
312
|
+
pub fn get_latest_version(rb: &Ruby, self_: &Self) -> RbResult<Version> {
|
|
313
|
+
rb.detach(|| {
|
|
314
|
+
#[allow(clippy::await_holding_lock)]
|
|
315
|
+
rt().block_on(async {
|
|
316
|
+
match self_._table.lock() {
|
|
317
|
+
Ok(table) => table.get_latest_version().await.map_err(RubyError::from),
|
|
318
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
319
|
+
}
|
|
320
|
+
})
|
|
284
321
|
})
|
|
322
|
+
.map_err(RbErr::from)
|
|
285
323
|
}
|
|
286
324
|
|
|
287
325
|
pub fn get_num_index_cols(&self) -> RbResult<i32> {
|
|
@@ -309,57 +347,61 @@ impl RawDeltaTable {
|
|
|
309
347
|
})
|
|
310
348
|
}
|
|
311
349
|
|
|
312
|
-
pub fn load_with_datetime(&
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
350
|
+
pub fn load_with_datetime(rb: &Ruby, self_: &Self, ds: String) -> RbResult<()> {
|
|
351
|
+
rb.detach(|| {
|
|
352
|
+
let datetime =
|
|
353
|
+
DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
|
|
354
|
+
|err| RubyError::ValueError(format!("Failed to parse datetime string: {err}")),
|
|
355
|
+
)?);
|
|
356
|
+
#[allow(clippy::await_holding_lock)]
|
|
357
|
+
rt().block_on(async {
|
|
358
|
+
let mut table = self_
|
|
359
|
+
._table
|
|
360
|
+
.lock()
|
|
361
|
+
.map_err(|e| RubyError::RuntimeError(e.to_string()))?;
|
|
362
|
+
(*table)
|
|
363
|
+
.load_with_datetime(datetime)
|
|
364
|
+
.await
|
|
365
|
+
.map_err(RubyError::from)
|
|
366
|
+
})
|
|
328
367
|
})
|
|
368
|
+
.map_err(RbErr::from)
|
|
329
369
|
}
|
|
330
370
|
|
|
331
371
|
pub fn files(
|
|
332
|
-
&
|
|
372
|
+
rb: &Ruby,
|
|
373
|
+
self_: &Self,
|
|
333
374
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
|
334
375
|
) -> RbResult<Vec<String>> {
|
|
335
|
-
if !
|
|
376
|
+
if !self_.has_files()? {
|
|
336
377
|
return Err(DeltaError::new_err("Table is instantiated without files."));
|
|
337
378
|
}
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
})
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
.
|
|
360
|
-
|
|
379
|
+
rb.detach(|| {
|
|
380
|
+
if let Some(filters) = partition_filters {
|
|
381
|
+
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
|
382
|
+
Ok(self_
|
|
383
|
+
.with_table2(|t| {
|
|
384
|
+
rt().block_on(async {
|
|
385
|
+
t.get_files_by_partitions(&filters)
|
|
386
|
+
.await
|
|
387
|
+
.map_err(RubyError::from)
|
|
388
|
+
})
|
|
389
|
+
})?
|
|
390
|
+
.into_iter()
|
|
391
|
+
.map(|p| p.to_string())
|
|
392
|
+
.collect())
|
|
393
|
+
} else {
|
|
394
|
+
match self_._table.lock() {
|
|
395
|
+
Ok(table) => Ok(table
|
|
396
|
+
.get_file_uris()
|
|
397
|
+
.map_err(RubyError::from)?
|
|
398
|
+
.map(|f| f.to_string())
|
|
399
|
+
.collect()),
|
|
400
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
401
|
+
}
|
|
361
402
|
}
|
|
362
|
-
}
|
|
403
|
+
})
|
|
404
|
+
.map_err(RbErr::from)
|
|
363
405
|
}
|
|
364
406
|
|
|
365
407
|
pub fn file_uris(
|
|
@@ -399,36 +441,84 @@ impl RawDeltaTable {
|
|
|
399
441
|
}
|
|
400
442
|
|
|
401
443
|
pub fn vacuum(
|
|
402
|
-
&
|
|
444
|
+
rb: &Ruby,
|
|
445
|
+
self_: &Self,
|
|
403
446
|
dry_run: bool,
|
|
404
447
|
retention_hours: Option<u64>,
|
|
405
448
|
enforce_retention_duration: bool,
|
|
406
449
|
commit_properties: Option<RbCommitProperties>,
|
|
407
450
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
408
451
|
) -> RbResult<Vec<String>> {
|
|
409
|
-
let table =
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
452
|
+
let (table, metrics) = rb.detach(|| {
|
|
453
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
454
|
+
let mut cmd = table
|
|
455
|
+
.vacuum()
|
|
456
|
+
.with_enforce_retention_duration(enforce_retention_duration)
|
|
457
|
+
.with_dry_run(dry_run);
|
|
458
|
+
|
|
459
|
+
if let Some(retention_period) = retention_hours {
|
|
460
|
+
cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
|
|
461
|
+
}
|
|
414
462
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
463
|
+
if let Some(commit_properties) =
|
|
464
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
465
|
+
{
|
|
466
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
467
|
+
}
|
|
418
468
|
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
cmd = cmd.with_commit_properties(commit_properties);
|
|
423
|
-
}
|
|
424
|
-
let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
425
|
-
self.set_state(table.state)?;
|
|
469
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
470
|
+
})?;
|
|
471
|
+
self_.set_state(table.state)?;
|
|
426
472
|
Ok(metrics.files_deleted)
|
|
427
473
|
}
|
|
428
474
|
|
|
475
|
+
#[allow(clippy::too_many_arguments)]
|
|
476
|
+
pub fn update(
|
|
477
|
+
rb: &Ruby,
|
|
478
|
+
self_: &Self,
|
|
479
|
+
updates: HashMap<String, String>,
|
|
480
|
+
predicate: Option<String>,
|
|
481
|
+
writer_properties: Option<RbWriterProperties>,
|
|
482
|
+
safe_cast: bool,
|
|
483
|
+
commit_properties: Option<RbCommitProperties>,
|
|
484
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
485
|
+
) -> RbResult<String> {
|
|
486
|
+
let (table, metrics) = rb
|
|
487
|
+
.detach(|| {
|
|
488
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
489
|
+
let mut cmd = table.update().with_safe_cast(safe_cast);
|
|
490
|
+
|
|
491
|
+
if let Some(writer_props) = writer_properties {
|
|
492
|
+
cmd = cmd.with_writer_properties(
|
|
493
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
494
|
+
);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
for (col_name, expression) in updates {
|
|
498
|
+
cmd = cmd.with_update(col_name.clone(), expression.clone());
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if let Some(update_predicate) = predicate {
|
|
502
|
+
cmd = cmd.with_predicate(update_predicate);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if let Some(commit_properties) =
|
|
506
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
507
|
+
{
|
|
508
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
512
|
+
})
|
|
513
|
+
.map_err(RbErr::from)?;
|
|
514
|
+
self_.set_state(table.state)?;
|
|
515
|
+
Ok(serde_json::to_string(&metrics).unwrap())
|
|
516
|
+
}
|
|
517
|
+
|
|
429
518
|
#[allow(clippy::too_many_arguments)]
|
|
430
519
|
pub fn compact_optimize(
|
|
431
|
-
&
|
|
520
|
+
rb: &Ruby,
|
|
521
|
+
self_: &Self,
|
|
432
522
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
|
433
523
|
target_size: Option<u64>,
|
|
434
524
|
max_concurrent_tasks: Option<usize>,
|
|
@@ -437,42 +527,47 @@ impl RawDeltaTable {
|
|
|
437
527
|
commit_properties: Option<RbCommitProperties>,
|
|
438
528
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
439
529
|
) -> RbResult<String> {
|
|
440
|
-
let table =
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
530
|
+
let (table, metrics) = rb.detach(|| {
|
|
531
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
532
|
+
let mut cmd = table
|
|
533
|
+
.optimize()
|
|
534
|
+
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
|
|
535
|
+
|
|
536
|
+
if let Some(target_size) = target_size {
|
|
537
|
+
let target_size = parse_optimize_target_size(target_size)?;
|
|
538
|
+
cmd = cmd.with_target_size(target_size);
|
|
539
|
+
}
|
|
540
|
+
if let Some(commit_interval) = min_commit_interval {
|
|
541
|
+
cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
|
|
542
|
+
}
|
|
451
543
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
544
|
+
if let Some(writer_props) = writer_properties {
|
|
545
|
+
cmd = cmd.with_writer_properties(
|
|
546
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
547
|
+
);
|
|
548
|
+
}
|
|
457
549
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
550
|
+
if let Some(commit_properties) =
|
|
551
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
552
|
+
{
|
|
553
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
554
|
+
}
|
|
463
555
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
556
|
+
let converted_filters =
|
|
557
|
+
convert_partition_filters(partition_filters.unwrap_or_default())
|
|
558
|
+
.map_err(RubyError::from)?;
|
|
559
|
+
cmd = cmd.with_filters(&converted_filters);
|
|
467
560
|
|
|
468
|
-
|
|
469
|
-
|
|
561
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
562
|
+
})?;
|
|
563
|
+
self_.set_state(table.state)?;
|
|
470
564
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
471
565
|
}
|
|
472
566
|
|
|
473
567
|
#[allow(clippy::too_many_arguments)]
|
|
474
568
|
pub fn z_order_optimize(
|
|
475
|
-
&
|
|
569
|
+
rb: &Ruby,
|
|
570
|
+
self_: &Self,
|
|
476
571
|
z_order_columns: Vec<String>,
|
|
477
572
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
|
478
573
|
target_size: Option<u64>,
|
|
@@ -484,67 +579,77 @@ impl RawDeltaTable {
|
|
|
484
579
|
commit_properties: Option<RbCommitProperties>,
|
|
485
580
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
486
581
|
) -> RbResult<String> {
|
|
487
|
-
let table =
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
582
|
+
let (table, metrics) = rb.detach(|| {
|
|
583
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
584
|
+
let mut cmd = table
|
|
585
|
+
.clone()
|
|
586
|
+
.optimize()
|
|
587
|
+
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
|
|
588
|
+
.with_type(OptimizeType::ZOrder(z_order_columns));
|
|
589
|
+
|
|
590
|
+
if max_spill_size.is_some() || max_temp_directory_size.is_some() {
|
|
591
|
+
let session =
|
|
592
|
+
create_session_state_with_spill_config(max_spill_size, max_temp_directory_size);
|
|
593
|
+
cmd = cmd.with_session_state(Arc::new(session));
|
|
594
|
+
}
|
|
499
595
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
596
|
+
if let Some(target_size) = target_size {
|
|
597
|
+
let target_size = parse_optimize_target_size(target_size)?;
|
|
598
|
+
cmd = cmd.with_target_size(target_size);
|
|
599
|
+
}
|
|
600
|
+
if let Some(commit_interval) = min_commit_interval {
|
|
601
|
+
cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
|
|
602
|
+
}
|
|
506
603
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
604
|
+
if let Some(writer_props) = writer_properties {
|
|
605
|
+
cmd = cmd.with_writer_properties(
|
|
606
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
607
|
+
);
|
|
608
|
+
}
|
|
512
609
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
610
|
+
if let Some(commit_properties) =
|
|
611
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
612
|
+
{
|
|
613
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
614
|
+
}
|
|
518
615
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
616
|
+
let converted_filters =
|
|
617
|
+
convert_partition_filters(partition_filters.unwrap_or_default())
|
|
618
|
+
.map_err(RubyError::from)?;
|
|
619
|
+
cmd = cmd.with_filters(&converted_filters);
|
|
522
620
|
|
|
523
|
-
|
|
524
|
-
|
|
621
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
622
|
+
})?;
|
|
623
|
+
self_.set_state(table.state)?;
|
|
525
624
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
526
625
|
}
|
|
527
626
|
|
|
528
|
-
pub fn add_columns(&
|
|
529
|
-
let fields = fields
|
|
627
|
+
pub fn add_columns(rb: &Ruby, self_: &Self, fields: RArray) -> RbResult<()> {
|
|
628
|
+
let fields = fields
|
|
629
|
+
.into_iter()
|
|
630
|
+
.map(|v| <&Field>::try_convert(v).cloned())
|
|
631
|
+
.collect::<RbResult<Vec<_>>>()?;
|
|
530
632
|
|
|
531
|
-
let table =
|
|
532
|
-
|
|
633
|
+
let table = rb.detach(|| {
|
|
634
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
635
|
+
let mut cmd = table.add_columns();
|
|
533
636
|
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
637
|
+
let new_fields = fields
|
|
638
|
+
.iter()
|
|
639
|
+
.map(|v| v.inner.clone())
|
|
640
|
+
.collect::<Vec<StructField>>();
|
|
538
641
|
|
|
539
|
-
|
|
642
|
+
cmd = cmd.with_fields(new_fields);
|
|
540
643
|
|
|
541
|
-
|
|
542
|
-
|
|
644
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
645
|
+
})?;
|
|
646
|
+
self_.set_state(table.state)?;
|
|
543
647
|
Ok(())
|
|
544
648
|
}
|
|
545
649
|
|
|
546
650
|
pub fn add_feature(
|
|
547
|
-
&
|
|
651
|
+
rb: &Ruby,
|
|
652
|
+
self_: &Self,
|
|
548
653
|
feature: RArray,
|
|
549
654
|
allow_protocol_versions_increase: bool,
|
|
550
655
|
) -> RbResult<()> {
|
|
@@ -553,52 +658,75 @@ impl RawDeltaTable {
|
|
|
553
658
|
.map(TableFeatures::try_convert)
|
|
554
659
|
.collect::<RbResult<Vec<_>>>()?;
|
|
555
660
|
|
|
556
|
-
let table =
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
661
|
+
let table = rb.detach(|| {
|
|
662
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
663
|
+
let cmd = table
|
|
664
|
+
.add_feature()
|
|
665
|
+
.with_features(feature)
|
|
666
|
+
.with_allow_protocol_versions_increase(allow_protocol_versions_increase);
|
|
561
667
|
|
|
562
|
-
|
|
563
|
-
|
|
668
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
669
|
+
})?;
|
|
670
|
+
self_.set_state(table.state)?;
|
|
564
671
|
Ok(())
|
|
565
672
|
}
|
|
566
673
|
|
|
567
|
-
pub fn add_constraints(
|
|
568
|
-
|
|
569
|
-
|
|
674
|
+
pub fn add_constraints(
|
|
675
|
+
rb: &Ruby,
|
|
676
|
+
self_: &Self,
|
|
677
|
+
constraints: HashMap<String, String>,
|
|
678
|
+
) -> RbResult<()> {
|
|
679
|
+
let table = rb.detach(|| {
|
|
680
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
681
|
+
let mut cmd = table.add_constraint();
|
|
570
682
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
683
|
+
for (col_name, expression) in constraints {
|
|
684
|
+
cmd = cmd.with_constraint(col_name.clone(), expression.clone());
|
|
685
|
+
}
|
|
574
686
|
|
|
575
|
-
|
|
576
|
-
|
|
687
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
688
|
+
})?;
|
|
689
|
+
self_.set_state(table.state)?;
|
|
577
690
|
Ok(())
|
|
578
691
|
}
|
|
579
692
|
|
|
580
|
-
pub fn drop_constraints(
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
693
|
+
pub fn drop_constraints(
|
|
694
|
+
rb: &Ruby,
|
|
695
|
+
self_: &Self,
|
|
696
|
+
name: String,
|
|
697
|
+
raise_if_not_exists: bool,
|
|
698
|
+
) -> RbResult<()> {
|
|
699
|
+
let table = rb.detach(|| {
|
|
700
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
701
|
+
let cmd = table
|
|
702
|
+
.drop_constraints()
|
|
703
|
+
.with_constraint(name)
|
|
704
|
+
.with_raise_if_not_exists(raise_if_not_exists);
|
|
705
|
+
|
|
706
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
707
|
+
})?;
|
|
708
|
+
self_.set_state(table.state)?;
|
|
709
|
+
Ok(())
|
|
710
|
+
}
|
|
586
711
|
|
|
587
|
-
|
|
588
|
-
self.
|
|
712
|
+
pub fn generate(&self) -> RbResult<()> {
|
|
713
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
714
|
+
rt().block_on(async { table.generate().await })
|
|
715
|
+
.map_err(RubyError::from)?;
|
|
589
716
|
Ok(())
|
|
590
717
|
}
|
|
591
718
|
|
|
592
719
|
pub fn load_cdf(
|
|
593
|
-
&
|
|
594
|
-
|
|
595
|
-
|
|
720
|
+
rb: &Ruby,
|
|
721
|
+
self_: &Self,
|
|
722
|
+
starting_version: Option<Version>,
|
|
723
|
+
ending_version: Option<Version>,
|
|
596
724
|
starting_timestamp: Option<String>,
|
|
597
725
|
ending_timestamp: Option<String>,
|
|
598
726
|
columns: Option<Vec<String>>,
|
|
599
727
|
) -> RbResult<ArrowArrayStream> {
|
|
600
728
|
let ctx = SessionContext::new();
|
|
601
|
-
let table =
|
|
729
|
+
let table = self_._table.lock().map_err(to_rt_err)?.clone();
|
|
602
730
|
let mut cmd = table.scan_cdf();
|
|
603
731
|
|
|
604
732
|
if let Some(sv) = starting_version {
|
|
@@ -623,38 +751,41 @@ impl RawDeltaTable {
|
|
|
623
751
|
let table_provider: Arc<dyn TableProvider> =
|
|
624
752
|
Arc::new(DeltaCdfTableProvider::try_new(cmd).map_err(RubyError::from)?);
|
|
625
753
|
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
let
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
754
|
+
rb.detach(|| {
|
|
755
|
+
let plan = rt()
|
|
756
|
+
.block_on(async {
|
|
757
|
+
let mut df = ctx.read_table(table_provider)?;
|
|
758
|
+
if let Some(columns) = columns {
|
|
759
|
+
let cols: Vec<_> = columns.iter().map(|c| c.as_ref()).collect();
|
|
760
|
+
df = df.select_columns(&cols)?;
|
|
761
|
+
}
|
|
762
|
+
df.create_physical_plan().await
|
|
763
|
+
})
|
|
764
|
+
.map_err(RubyError::from)?;
|
|
636
765
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
766
|
+
let mut tasks = vec![];
|
|
767
|
+
for p in 0..plan.properties().output_partitioning().partition_count() {
|
|
768
|
+
let inner_plan = plan.clone();
|
|
769
|
+
let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
|
|
770
|
+
let handle = rt().spawn(collect_sendable_stream(partition_batch));
|
|
771
|
+
tasks.push(handle);
|
|
772
|
+
}
|
|
644
773
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
774
|
+
// This is unfortunate.
|
|
775
|
+
let batches = rt()
|
|
776
|
+
.block_on(join_all(tasks))
|
|
777
|
+
.into_iter()
|
|
778
|
+
.flatten()
|
|
779
|
+
.collect::<Result<Vec<Vec<_>>, _>>()
|
|
780
|
+
.unwrap()
|
|
781
|
+
.into_iter()
|
|
782
|
+
.flatten()
|
|
783
|
+
.map(Ok);
|
|
784
|
+
let batch_iter = RecordBatchIterator::new(batches, plan.schema());
|
|
785
|
+
let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
|
|
786
|
+
Ok::<_, RubyError>(ArrowArrayStream { stream: ffi_stream })
|
|
787
|
+
})
|
|
788
|
+
.map_err(RbErr::from)
|
|
658
789
|
}
|
|
659
790
|
|
|
660
791
|
#[allow(clippy::too_many_arguments)]
|
|
@@ -701,7 +832,7 @@ impl RawDeltaTable {
|
|
|
701
832
|
let mut cmd = table.restore();
|
|
702
833
|
if let Some(val) = target {
|
|
703
834
|
if let Some(version) = Integer::from_value(val) {
|
|
704
|
-
cmd = cmd.with_version_to_restore(version.
|
|
835
|
+
cmd = cmd.with_version_to_restore(version.to_u64()?)
|
|
705
836
|
}
|
|
706
837
|
if let Ok(ds) = String::try_convert(val) {
|
|
707
838
|
let datetime = DateTime::<Utc>::from(
|
|
@@ -809,35 +940,64 @@ impl RawDeltaTable {
|
|
|
809
940
|
Ok(ruby.ary_from_iter(active_partitions))
|
|
810
941
|
}
|
|
811
942
|
|
|
812
|
-
pub fn create_checkpoint(&
|
|
813
|
-
|
|
943
|
+
pub fn create_checkpoint(rb: &Ruby, self_: &Self) -> RbResult<()> {
|
|
944
|
+
rb.detach(|| {
|
|
945
|
+
let operation_id = Uuid::new_v4();
|
|
814
946
|
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
.
|
|
822
|
-
|
|
823
|
-
}
|
|
824
|
-
|
|
947
|
+
#[allow(clippy::await_holding_lock)]
|
|
948
|
+
let _result = rt().block_on(async {
|
|
949
|
+
match self_._table.lock() {
|
|
950
|
+
Ok(table) => create_checkpoint(&table, Some(operation_id))
|
|
951
|
+
.await
|
|
952
|
+
.map_err(RubyError::from),
|
|
953
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
954
|
+
}
|
|
955
|
+
});
|
|
956
|
+
|
|
957
|
+
Ok::<_, RubyError>(())
|
|
958
|
+
})
|
|
959
|
+
.map_err(RbErr::from)
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
pub fn compact_logs(
|
|
963
|
+
rb: &Ruby,
|
|
964
|
+
self_: &Self,
|
|
965
|
+
starting_version: u64,
|
|
966
|
+
ending_version: u64,
|
|
967
|
+
) -> RbResult<()> {
|
|
968
|
+
rb.detach(|| {
|
|
969
|
+
let operation_id = Uuid::new_v4();
|
|
970
|
+
|
|
971
|
+
#[allow(clippy::await_holding_lock)]
|
|
972
|
+
let result = rt().block_on(async {
|
|
973
|
+
match self_._table.lock() {
|
|
974
|
+
Ok(table) => {
|
|
975
|
+
compact_logs(&table, starting_version, ending_version, Some(operation_id))
|
|
976
|
+
.await
|
|
977
|
+
.map_err(RubyError::from)
|
|
978
|
+
}
|
|
979
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
980
|
+
}
|
|
981
|
+
});
|
|
982
|
+
|
|
983
|
+
result
|
|
984
|
+
})
|
|
985
|
+
.map_err(RbErr::from)?;
|
|
825
986
|
|
|
826
987
|
Ok(())
|
|
827
988
|
}
|
|
828
989
|
|
|
829
|
-
pub fn cleanup_metadata(&
|
|
830
|
-
let (_result, new_state) = {
|
|
990
|
+
pub fn cleanup_metadata(rb: &Ruby, self_: &Self) -> RbResult<()> {
|
|
991
|
+
let (_result, new_state) = rb.detach(|| {
|
|
831
992
|
let operation_id = Uuid::new_v4();
|
|
832
993
|
|
|
833
994
|
#[allow(clippy::await_holding_lock)]
|
|
834
995
|
let result = rt().block_on(async {
|
|
835
|
-
match
|
|
996
|
+
match self_._table.lock() {
|
|
836
997
|
Ok(table) => {
|
|
837
998
|
let result = cleanup_metadata(&table, Some(operation_id))
|
|
838
999
|
.await
|
|
839
|
-
.map_err(RubyError::from)
|
|
840
|
-
.map_err(RbErr::from)?;
|
|
1000
|
+
.map_err(RubyError::from)?;
|
|
841
1001
|
|
|
842
1002
|
let new_state = if result > 0 {
|
|
843
1003
|
Some(
|
|
@@ -855,15 +1015,15 @@ impl RawDeltaTable {
|
|
|
855
1015
|
|
|
856
1016
|
Ok((result, new_state))
|
|
857
1017
|
}
|
|
858
|
-
Err(e) => Err(
|
|
1018
|
+
Err(e) => Err(RubyError::RuntimeError(e.to_string())),
|
|
859
1019
|
}
|
|
860
1020
|
});
|
|
861
1021
|
|
|
862
1022
|
result
|
|
863
|
-
}?;
|
|
1023
|
+
})?;
|
|
864
1024
|
|
|
865
1025
|
if new_state.is_some() {
|
|
866
|
-
|
|
1026
|
+
self_.set_state(new_state)?;
|
|
867
1027
|
}
|
|
868
1028
|
|
|
869
1029
|
Ok(())
|
|
@@ -887,30 +1047,33 @@ impl RawDeltaTable {
|
|
|
887
1047
|
}
|
|
888
1048
|
|
|
889
1049
|
pub fn delete(
|
|
890
|
-
&
|
|
1050
|
+
rb: &Ruby,
|
|
1051
|
+
self_: &Self,
|
|
891
1052
|
predicate: Option<String>,
|
|
892
1053
|
writer_properties: Option<RbWriterProperties>,
|
|
893
1054
|
commit_properties: Option<RbCommitProperties>,
|
|
894
1055
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
895
1056
|
) -> RbResult<String> {
|
|
896
|
-
let table =
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
1057
|
+
let (table, metrics) = rb.detach(|| {
|
|
1058
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
1059
|
+
let mut cmd = table.delete();
|
|
1060
|
+
if let Some(predicate) = predicate {
|
|
1061
|
+
cmd = cmd.with_predicate(predicate);
|
|
1062
|
+
}
|
|
1063
|
+
if let Some(writer_props) = writer_properties {
|
|
1064
|
+
cmd = cmd.with_writer_properties(
|
|
1065
|
+
set_writer_properties(writer_props).map_err(RubyError::from)?,
|
|
1066
|
+
);
|
|
1067
|
+
}
|
|
1068
|
+
if let Some(commit_properties) =
|
|
1069
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1070
|
+
{
|
|
1071
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1072
|
+
}
|
|
911
1073
|
|
|
912
|
-
|
|
913
|
-
|
|
1074
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
1075
|
+
})?;
|
|
1076
|
+
self_.set_state(table.state)?;
|
|
914
1077
|
Ok(serde_json::to_string(&metrics).unwrap())
|
|
915
1078
|
}
|
|
916
1079
|
|
|
@@ -930,6 +1093,54 @@ impl RawDeltaTable {
|
|
|
930
1093
|
Ok(())
|
|
931
1094
|
}
|
|
932
1095
|
|
|
1096
|
+
pub fn set_table_name(
|
|
1097
|
+
&self,
|
|
1098
|
+
name: String,
|
|
1099
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1100
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1101
|
+
) -> RbResult<()> {
|
|
1102
|
+
let update = TableMetadataUpdate {
|
|
1103
|
+
name: Some(name),
|
|
1104
|
+
description: None,
|
|
1105
|
+
};
|
|
1106
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
1107
|
+
let mut cmd = table.update_table_metadata().with_update(update);
|
|
1108
|
+
|
|
1109
|
+
if let Some(commit_properties) =
|
|
1110
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1111
|
+
{
|
|
1112
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
1116
|
+
self.set_state(table.state)?;
|
|
1117
|
+
Ok(())
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
pub fn set_table_description(
|
|
1121
|
+
&self,
|
|
1122
|
+
description: String,
|
|
1123
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1124
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1125
|
+
) -> RbResult<()> {
|
|
1126
|
+
let update = TableMetadataUpdate {
|
|
1127
|
+
name: None,
|
|
1128
|
+
description: Some(description),
|
|
1129
|
+
};
|
|
1130
|
+
let table = self._table.lock().map_err(to_rt_err)?.clone();
|
|
1131
|
+
let mut cmd = table.update_table_metadata().with_update(update);
|
|
1132
|
+
|
|
1133
|
+
if let Some(commit_properties) =
|
|
1134
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1135
|
+
{
|
|
1136
|
+
cmd = cmd.with_commit_properties(commit_properties);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
|
|
1140
|
+
self.set_state(table.state)?;
|
|
1141
|
+
Ok(())
|
|
1142
|
+
}
|
|
1143
|
+
|
|
933
1144
|
pub fn repair(
|
|
934
1145
|
&self,
|
|
935
1146
|
dry_run: bool,
|
|
@@ -959,15 +1170,50 @@ impl RawDeltaTable {
|
|
|
959
1170
|
.map_err(RubyError::from)?)
|
|
960
1171
|
}
|
|
961
1172
|
|
|
1173
|
+
pub fn set_column_metadata(
|
|
1174
|
+
rb: &Ruby,
|
|
1175
|
+
self_: &Self,
|
|
1176
|
+
field_name: String,
|
|
1177
|
+
metadata: HashMap<String, String>,
|
|
1178
|
+
commit_properties: Option<RbCommitProperties>,
|
|
1179
|
+
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1180
|
+
) -> RbResult<()> {
|
|
1181
|
+
let table = rb
|
|
1182
|
+
.detach(|| {
|
|
1183
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
1184
|
+
let mut cmd = table
|
|
1185
|
+
.update_field_metadata()
|
|
1186
|
+
.with_field_name(&field_name)
|
|
1187
|
+
.with_metadata(
|
|
1188
|
+
metadata
|
|
1189
|
+
.iter()
|
|
1190
|
+
.map(|(k, v)| (k.clone(), MetadataValue::String(v.clone())))
|
|
1191
|
+
.collect(),
|
|
1192
|
+
);
|
|
1193
|
+
|
|
1194
|
+
if let Some(commit_properties) =
|
|
1195
|
+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
|
|
1196
|
+
{
|
|
1197
|
+
cmd = cmd.with_commit_properties(commit_properties)
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
rt().block_on(cmd.into_future()).map_err(RubyError::from)
|
|
1201
|
+
})
|
|
1202
|
+
.map_err(RbErr::from)?;
|
|
1203
|
+
self_.set_state(table.state)?;
|
|
1204
|
+
Ok(())
|
|
1205
|
+
}
|
|
1206
|
+
|
|
962
1207
|
#[allow(clippy::too_many_arguments)]
|
|
963
1208
|
pub fn write(
|
|
964
|
-
&
|
|
1209
|
+
rb: &Ruby,
|
|
1210
|
+
self_: &Self,
|
|
965
1211
|
data: RbArrowType<ArrowArrayStreamReader>,
|
|
966
1212
|
mode: String,
|
|
967
1213
|
schema_mode: Option<String>,
|
|
968
1214
|
partition_by: Option<Vec<String>>,
|
|
969
1215
|
predicate: Option<String>,
|
|
970
|
-
target_file_size: Option<
|
|
1216
|
+
target_file_size: Option<u64>,
|
|
971
1217
|
name: Option<String>,
|
|
972
1218
|
description: Option<String>,
|
|
973
1219
|
configuration: Option<HashMap<String, Option<String>>>,
|
|
@@ -975,8 +1221,8 @@ impl RawDeltaTable {
|
|
|
975
1221
|
commit_properties: Option<RbCommitProperties>,
|
|
976
1222
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
977
1223
|
) -> RbResult<()> {
|
|
978
|
-
let table = {
|
|
979
|
-
let table =
|
|
1224
|
+
let table = rb.detach(|| {
|
|
1225
|
+
let table = self_._table.lock().map_err(to_rt_err2)?.clone();
|
|
980
1226
|
let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
|
|
981
1227
|
|
|
982
1228
|
let save_mode = mode.parse().map_err(RubyError::from)?;
|
|
@@ -1008,7 +1254,10 @@ impl RawDeltaTable {
|
|
|
1008
1254
|
};
|
|
1009
1255
|
|
|
1010
1256
|
if let Some(target_file_size) = target_file_size {
|
|
1011
|
-
|
|
1257
|
+
let target_file_size = NonZeroU64::new(target_file_size).ok_or_else(|| {
|
|
1258
|
+
RubyError::ValueError("target_file_size must be greater than 0".to_string())
|
|
1259
|
+
})?;
|
|
1260
|
+
builder = builder.with_target_file_size(Some(target_file_size))
|
|
1012
1261
|
};
|
|
1013
1262
|
|
|
1014
1263
|
if let Some(config) = configuration {
|
|
@@ -1023,10 +1272,9 @@ impl RawDeltaTable {
|
|
|
1023
1272
|
|
|
1024
1273
|
rt().block_on(builder.into_future())
|
|
1025
1274
|
.map_err(RubyError::from)
|
|
1026
|
-
|
|
1027
|
-
}?;
|
|
1275
|
+
})?;
|
|
1028
1276
|
|
|
1029
|
-
|
|
1277
|
+
self_.set_state(table.state)?;
|
|
1030
1278
|
Ok(())
|
|
1031
1279
|
}
|
|
1032
1280
|
}
|
|
@@ -1067,7 +1315,7 @@ fn set_writer_properties(writer_properties: RbWriterProperties) -> DeltaResult<W
|
|
|
1067
1315
|
properties = properties.set_write_batch_size(batch_size);
|
|
1068
1316
|
}
|
|
1069
1317
|
if let Some(row_group_size) = max_row_group_size {
|
|
1070
|
-
properties = properties.
|
|
1318
|
+
properties = properties.set_max_row_group_row_count(Some(row_group_size));
|
|
1071
1319
|
}
|
|
1072
1320
|
properties = properties.set_statistics_truncate_length(statistics_truncate_length);
|
|
1073
1321
|
|
|
@@ -1311,13 +1559,14 @@ impl TryConvert for RbCommitProperties {
|
|
|
1311
1559
|
|
|
1312
1560
|
#[allow(clippy::too_many_arguments)]
|
|
1313
1561
|
fn write_to_deltalake(
|
|
1562
|
+
rb: &Ruby,
|
|
1314
1563
|
table_uri: String,
|
|
1315
1564
|
data: RbArrowType<ArrowArrayStreamReader>,
|
|
1316
1565
|
mode: String,
|
|
1317
1566
|
schema_mode: Option<String>,
|
|
1318
1567
|
partition_by: Option<Vec<String>>,
|
|
1319
1568
|
predicate: Option<String>,
|
|
1320
|
-
target_file_size: Option<
|
|
1569
|
+
target_file_size: Option<u64>,
|
|
1321
1570
|
name: Option<String>,
|
|
1322
1571
|
description: Option<String>,
|
|
1323
1572
|
configuration: Option<HashMap<String, Option<String>>>,
|
|
@@ -1326,62 +1575,36 @@ fn write_to_deltalake(
|
|
|
1326
1575
|
commit_properties: Option<RbCommitProperties>,
|
|
1327
1576
|
post_commithook_properties: Option<RbPostCommitHookProperties>,
|
|
1328
1577
|
) -> RbResult<()> {
|
|
1329
|
-
let
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
let table_url =
|
|
1334
|
-
deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
|
|
1335
|
-
let table = rt()
|
|
1336
|
-
.block_on(DeltaTable::try_from_url_with_storage_options(
|
|
1578
|
+
let raw_table: DeltaResult<RawDeltaTable> = rb.detach(|| {
|
|
1579
|
+
let options = storage_options.clone().unwrap_or_default();
|
|
1580
|
+
let table_url = deltalake::table::builder::ensure_table_uri(&table_uri)?;
|
|
1581
|
+
let table = rt().block_on(DeltaTable::try_from_url_with_storage_options(
|
|
1337
1582
|
table_url.clone(),
|
|
1338
1583
|
options.clone(),
|
|
1339
|
-
))
|
|
1340
|
-
.map_err(RubyError::from)?;
|
|
1341
|
-
|
|
1342
|
-
let mut builder = table.write(batches).with_save_mode(save_mode);
|
|
1343
|
-
if let Some(schema_mode) = schema_mode {
|
|
1344
|
-
builder = builder.with_schema_mode(schema_mode.parse().map_err(RubyError::from)?);
|
|
1345
|
-
}
|
|
1346
|
-
if let Some(partition_columns) = partition_by {
|
|
1347
|
-
builder = builder.with_partition_columns(partition_columns);
|
|
1348
|
-
}
|
|
1349
|
-
|
|
1350
|
-
if let Some(writer_props) = writer_properties {
|
|
1351
|
-
builder = builder
|
|
1352
|
-
.with_writer_properties(set_writer_properties(writer_props).map_err(RubyError::from)?);
|
|
1353
|
-
}
|
|
1354
|
-
|
|
1355
|
-
if let Some(name) = &name {
|
|
1356
|
-
builder = builder.with_table_name(name);
|
|
1357
|
-
};
|
|
1358
|
-
|
|
1359
|
-
if let Some(description) = &description {
|
|
1360
|
-
builder = builder.with_description(description);
|
|
1361
|
-
};
|
|
1362
|
-
|
|
1363
|
-
if let Some(predicate) = predicate {
|
|
1364
|
-
builder = builder.with_replace_where(predicate);
|
|
1365
|
-
};
|
|
1366
|
-
|
|
1367
|
-
if let Some(target_file_size) = target_file_size {
|
|
1368
|
-
builder = builder.with_target_file_size(target_file_size)
|
|
1369
|
-
};
|
|
1584
|
+
))?;
|
|
1370
1585
|
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1586
|
+
let raw_table = RawDeltaTable {
|
|
1587
|
+
_table: Arc::new(Mutex::new(table)),
|
|
1588
|
+
};
|
|
1589
|
+
Ok(raw_table)
|
|
1590
|
+
});
|
|
1591
|
+
|
|
1592
|
+
RawDeltaTable::write(
|
|
1593
|
+
rb,
|
|
1594
|
+
&raw_table.map_err(RubyError::from)?,
|
|
1595
|
+
data,
|
|
1596
|
+
mode,
|
|
1597
|
+
schema_mode,
|
|
1598
|
+
partition_by,
|
|
1599
|
+
predicate,
|
|
1600
|
+
target_file_size,
|
|
1601
|
+
name,
|
|
1602
|
+
description,
|
|
1603
|
+
configuration,
|
|
1604
|
+
writer_properties,
|
|
1605
|
+
commit_properties,
|
|
1606
|
+
post_commithook_properties,
|
|
1607
|
+
)
|
|
1385
1608
|
}
|
|
1386
1609
|
|
|
1387
1610
|
pub struct RbArrowType<T>(pub T);
|
|
@@ -1454,6 +1677,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1454
1677
|
class.define_method("file_uris", method!(RawDeltaTable::file_uris, 1))?;
|
|
1455
1678
|
class.define_method("schema", method!(RawDeltaTable::schema, 0))?;
|
|
1456
1679
|
class.define_method("vacuum", method!(RawDeltaTable::vacuum, 5))?;
|
|
1680
|
+
class.define_method("update", method!(RawDeltaTable::update, 6))?;
|
|
1457
1681
|
class.define_method(
|
|
1458
1682
|
"compact_optimize",
|
|
1459
1683
|
method!(RawDeltaTable::compact_optimize, 7),
|
|
@@ -1472,6 +1696,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1472
1696
|
"drop_constraints",
|
|
1473
1697
|
method!(RawDeltaTable::drop_constraints, 2),
|
|
1474
1698
|
)?;
|
|
1699
|
+
class.define_method("generate", method!(RawDeltaTable::generate, 0))?;
|
|
1475
1700
|
class.define_method("load_cdf", method!(RawDeltaTable::load_cdf, 5))?;
|
|
1476
1701
|
class.define_method(
|
|
1477
1702
|
"create_merge_builder",
|
|
@@ -1492,6 +1717,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1492
1717
|
"create_checkpoint",
|
|
1493
1718
|
method!(RawDeltaTable::create_checkpoint, 0),
|
|
1494
1719
|
)?;
|
|
1720
|
+
class.define_method("compact_logs", method!(RawDeltaTable::compact_logs, 2))?;
|
|
1495
1721
|
class.define_method(
|
|
1496
1722
|
"cleanup_metadata",
|
|
1497
1723
|
method!(RawDeltaTable::cleanup_metadata, 0),
|
|
@@ -1505,11 +1731,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1505
1731
|
"set_table_properties",
|
|
1506
1732
|
method!(RawDeltaTable::set_table_properties, 2),
|
|
1507
1733
|
)?;
|
|
1734
|
+
class.define_method("set_table_name", method!(RawDeltaTable::set_table_name, 3))?;
|
|
1735
|
+
class.define_method(
|
|
1736
|
+
"set_table_description",
|
|
1737
|
+
method!(RawDeltaTable::set_table_description, 3),
|
|
1738
|
+
)?;
|
|
1508
1739
|
class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
|
|
1509
1740
|
class.define_method(
|
|
1510
1741
|
"transaction_version",
|
|
1511
1742
|
method!(RawDeltaTable::transaction_version, 1),
|
|
1512
1743
|
)?;
|
|
1744
|
+
class.define_method(
|
|
1745
|
+
"set_column_metadata",
|
|
1746
|
+
method!(RawDeltaTable::set_column_metadata, 4),
|
|
1747
|
+
)?;
|
|
1513
1748
|
class.define_method("write", method!(RawDeltaTable::write, 12))?;
|
|
1514
1749
|
|
|
1515
1750
|
let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
|