deltalake-rb 0.1.7 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +636 -547
- data/ext/deltalake/Cargo.toml +15 -7
- data/ext/deltalake/src/error.rs +9 -35
- data/ext/deltalake/src/lib.rs +176 -139
- data/ext/deltalake/src/schema.rs +47 -7
- data/ext/deltalake/src/utils.rs +2 -3
- data/lib/deltalake/table.rb +11 -3
- data/lib/deltalake/version.rb +1 -1
- metadata +3 -3
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,24 +1,32 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.1
|
3
|
+
version = "0.2.1"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.82
|
7
|
+
rust-version = "1.82"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
11
11
|
crate-type = ["cdylib"]
|
12
12
|
|
13
13
|
[dependencies]
|
14
|
-
arrow = { version = "55", features = ["ffi"] }
|
15
|
-
arrow-schema = { version = "55", features = ["serde"] }
|
14
|
+
arrow = { version = "55.2", features = ["ffi"] }
|
15
|
+
arrow-schema = { version = "55.2", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = "
|
18
|
-
deltalake = { version = "=0.26.0", features = ["azure", "datafusion", "gcs", "s3"] }
|
17
|
+
delta_kernel = { version = "0.14", features = ["arrow-55", "default-engine-rustls"] }
|
19
18
|
futures = "0.3"
|
20
|
-
magnus = "0.
|
19
|
+
magnus = "0.8"
|
21
20
|
num_cpus = "1"
|
22
21
|
serde = "1"
|
23
22
|
serde_json = "1"
|
24
23
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
24
|
+
|
25
|
+
[dependencies.deltalake]
|
26
|
+
version = "=0.28.0"
|
27
|
+
features = [
|
28
|
+
"azure",
|
29
|
+
"datafusion",
|
30
|
+
"gcs",
|
31
|
+
"s3"
|
32
|
+
]
|
data/ext/deltalake/src/error.rs
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
use arrow_schema::ArrowError;
|
2
2
|
use deltalake::datafusion::error::DataFusionError;
|
3
|
-
use deltalake::protocol::ProtocolError;
|
4
3
|
use deltalake::{errors::DeltaTableError, ObjectStoreError};
|
5
|
-
use magnus::{
|
4
|
+
use magnus::{Error as RbErr, Module, RModule, Ruby};
|
6
5
|
use std::borrow::Cow;
|
7
6
|
|
8
7
|
macro_rules! create_exception {
|
@@ -42,7 +41,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
|
|
42
41
|
DeltaTableError::InvalidJsonLog { .. } => DeltaProtocolError::new_err(err.to_string()),
|
43
42
|
DeltaTableError::InvalidStatsJson { .. } => DeltaProtocolError::new_err(err.to_string()),
|
44
43
|
DeltaTableError::InvalidData { violations } => {
|
45
|
-
DeltaProtocolError::new_err(format!("Invariant violations: {:?}"
|
44
|
+
DeltaProtocolError::new_err(format!("Invariant violations: {violations:?}"))
|
46
45
|
}
|
47
46
|
|
48
47
|
// commit errors
|
@@ -81,31 +80,12 @@ fn arrow_to_rb(err: ArrowError) -> RbErr {
|
|
81
80
|
}
|
82
81
|
}
|
83
82
|
|
84
|
-
fn checkpoint_to_rb(err: ProtocolError) -> RbErr {
|
85
|
-
match err {
|
86
|
-
ProtocolError::Arrow { source } => arrow_to_rb(source),
|
87
|
-
ProtocolError::ObjectStore { source } => object_store_to_rb(source),
|
88
|
-
ProtocolError::EndOfLog => DeltaProtocolError::new_err("End of log"),
|
89
|
-
ProtocolError::NoMetaData => DeltaProtocolError::new_err("Table metadata missing"),
|
90
|
-
ProtocolError::CheckpointNotFound => DeltaProtocolError::new_err(err.to_string()),
|
91
|
-
ProtocolError::InvalidField(err) => RbValueError::new_err(err),
|
92
|
-
ProtocolError::InvalidRow(err) => RbValueError::new_err(err),
|
93
|
-
ProtocolError::InvalidDeletionVectorStorageType(err) => RbValueError::new_err(err),
|
94
|
-
ProtocolError::SerializeOperation { source } => RbValueError::new_err(source.to_string()),
|
95
|
-
ProtocolError::ParquetParseError { source } => RbIOError::new_err(source.to_string()),
|
96
|
-
ProtocolError::IO { source } => RbIOError::new_err(source.to_string()),
|
97
|
-
ProtocolError::Generic(msg) => DeltaError::new_err(msg),
|
98
|
-
ProtocolError::Kernel { source } => DeltaError::new_err(source.to_string()),
|
99
|
-
}
|
100
|
-
}
|
101
|
-
|
102
83
|
fn datafusion_to_rb(err: DataFusionError) -> RbErr {
|
103
84
|
DeltaError::new_err(err.to_string())
|
104
85
|
}
|
105
86
|
|
106
87
|
pub enum RubyError {
|
107
88
|
DeltaTable(DeltaTableError),
|
108
|
-
Protocol(ProtocolError),
|
109
89
|
DataFusion(DataFusionError),
|
110
90
|
}
|
111
91
|
|
@@ -115,12 +95,6 @@ impl From<DeltaTableError> for RubyError {
|
|
115
95
|
}
|
116
96
|
}
|
117
97
|
|
118
|
-
impl From<ProtocolError> for RubyError {
|
119
|
-
fn from(err: ProtocolError) -> Self {
|
120
|
-
RubyError::Protocol(err)
|
121
|
-
}
|
122
|
-
}
|
123
|
-
|
124
98
|
impl From<DataFusionError> for RubyError {
|
125
99
|
fn from(err: DataFusionError) -> Self {
|
126
100
|
RubyError::DataFusion(err)
|
@@ -131,14 +105,13 @@ impl From<RubyError> for RbErr {
|
|
131
105
|
fn from(value: RubyError) -> Self {
|
132
106
|
match value {
|
133
107
|
RubyError::DeltaTable(err) => inner_to_rb_err(err),
|
134
|
-
RubyError::Protocol(err) => checkpoint_to_rb(err),
|
135
108
|
RubyError::DataFusion(err) => datafusion_to_rb(err),
|
136
109
|
}
|
137
110
|
}
|
138
111
|
}
|
139
112
|
|
140
113
|
macro_rules! create_builtin_exception {
|
141
|
-
($type:ident, $
|
114
|
+
($type:ident, $method:ident) => {
|
142
115
|
pub struct $type {}
|
143
116
|
|
144
117
|
impl $type {
|
@@ -146,13 +119,14 @@ macro_rules! create_builtin_exception {
|
|
146
119
|
where
|
147
120
|
T: Into<Cow<'static, str>>,
|
148
121
|
{
|
149
|
-
|
122
|
+
let ruby = Ruby::get().unwrap();
|
123
|
+
RbErr::new(ruby.$method(), message)
|
150
124
|
}
|
151
125
|
}
|
152
126
|
};
|
153
127
|
}
|
154
128
|
|
155
|
-
create_builtin_exception!(RbException,
|
156
|
-
create_builtin_exception!(RbIOError,
|
157
|
-
create_builtin_exception!(RbNotImplementedError,
|
158
|
-
create_builtin_exception!(RbValueError,
|
129
|
+
create_builtin_exception!(RbException, exception_runtime_error);
|
130
|
+
create_builtin_exception!(RbIOError, exception_io_error);
|
131
|
+
create_builtin_exception!(RbNotImplementedError, exception_not_imp_error);
|
132
|
+
create_builtin_exception!(RbValueError, exception_arg_error);
|
data/ext/deltalake/src/lib.rs
CHANGED
@@ -13,6 +13,7 @@ use std::time;
|
|
13
13
|
|
14
14
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
15
15
|
use delta_kernel::schema::StructField;
|
16
|
+
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
16
17
|
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
17
18
|
use deltalake::arrow::record_batch::RecordBatchIterator;
|
18
19
|
use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
|
@@ -21,8 +22,10 @@ use deltalake::datafusion::prelude::SessionContext;
|
|
21
22
|
use deltalake::delta_datafusion::DeltaCdfTableProvider;
|
22
23
|
use deltalake::errors::DeltaTableError;
|
23
24
|
use deltalake::kernel::transaction::{CommitProperties, TableReference};
|
25
|
+
use deltalake::kernel::StructDataExt;
|
24
26
|
use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
|
25
27
|
use deltalake::logstore::IORuntime;
|
28
|
+
use deltalake::logstore::LogStoreRef;
|
26
29
|
use deltalake::operations::add_column::AddColumnBuilder;
|
27
30
|
use deltalake::operations::add_feature::AddTableFeatureBuilder;
|
28
31
|
use deltalake::operations::collect_sendable_stream;
|
@@ -39,25 +42,26 @@ use deltalake::parquet::basic::Compression;
|
|
39
42
|
use deltalake::parquet::errors::ParquetError;
|
40
43
|
use deltalake::parquet::file::properties::WriterProperties;
|
41
44
|
use deltalake::partitions::PartitionFilter;
|
45
|
+
use deltalake::table::config::TablePropertiesExt;
|
46
|
+
use deltalake::table::state::DeltaTableState;
|
42
47
|
use deltalake::{DeltaOps, DeltaResult};
|
43
48
|
use error::DeltaError;
|
44
49
|
use futures::future::join_all;
|
50
|
+
use futures::TryStreamExt;
|
45
51
|
|
46
52
|
use magnus::{
|
47
|
-
function, method, prelude::*, typed_data::Obj, Error
|
48
|
-
TryConvert, Value,
|
53
|
+
function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
|
54
|
+
Integer, Module, RArray, Ruby, TryConvert, Value,
|
49
55
|
};
|
50
56
|
use serde_json::Map;
|
51
57
|
|
52
|
-
use crate::error::
|
53
|
-
use crate::error::RbValueError;
|
54
|
-
use crate::error::RubyError;
|
58
|
+
use crate::error::{RbValueError, RubyError};
|
55
59
|
use crate::features::TableFeatures;
|
56
60
|
use crate::merge::RbMergeBuilder;
|
57
61
|
use crate::schema::{schema_to_rbobject, Field};
|
58
62
|
use crate::utils::rt;
|
59
63
|
|
60
|
-
type RbResult<T> = Result<T,
|
64
|
+
type RbResult<T> = Result<T, RbErr>;
|
61
65
|
|
62
66
|
enum PartitionFilterValue {
|
63
67
|
Single(String),
|
@@ -74,6 +78,8 @@ impl TryConvert for PartitionFilterValue {
|
|
74
78
|
}
|
75
79
|
}
|
76
80
|
|
81
|
+
unsafe impl TryConvertOwned for PartitionFilterValue {}
|
82
|
+
|
77
83
|
#[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
|
78
84
|
struct RawDeltaTable {
|
79
85
|
_table: RefCell<deltalake::DeltaTable>,
|
@@ -86,7 +92,7 @@ struct RawDeltaTableMetaData {
|
|
86
92
|
description: Option<String>,
|
87
93
|
partition_columns: Vec<String>,
|
88
94
|
created_time: Option<i64>,
|
89
|
-
configuration: HashMap<String,
|
95
|
+
configuration: HashMap<String, String>,
|
90
96
|
}
|
91
97
|
|
92
98
|
impl RawDeltaTableMetaData {
|
@@ -110,13 +116,32 @@ impl RawDeltaTableMetaData {
|
|
110
116
|
self.created_time
|
111
117
|
}
|
112
118
|
|
113
|
-
fn configuration(&self) -> HashMap<String,
|
119
|
+
fn configuration(&self) -> HashMap<String, String> {
|
114
120
|
self.configuration.clone()
|
115
121
|
}
|
116
122
|
}
|
117
123
|
|
118
124
|
type StringVec = Vec<String>;
|
119
125
|
|
126
|
+
impl RawDeltaTable {
|
127
|
+
fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
|
128
|
+
func(&self._table.borrow())
|
129
|
+
}
|
130
|
+
|
131
|
+
fn cloned_state(&self) -> RbResult<DeltaTableState> {
|
132
|
+
self.with_table(|t| {
|
133
|
+
t.snapshot()
|
134
|
+
.cloned()
|
135
|
+
.map_err(RubyError::from)
|
136
|
+
.map_err(RbErr::from)
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
140
|
+
fn log_store(&self) -> RbResult<LogStoreRef> {
|
141
|
+
self.with_table(|t| Ok(t.log_store().clone()))
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
120
145
|
impl RawDeltaTable {
|
121
146
|
pub fn new(
|
122
147
|
table_uri: String,
|
@@ -168,60 +193,58 @@ impl RawDeltaTable {
|
|
168
193
|
}
|
169
194
|
|
170
195
|
pub fn table_uri(&self) -> RbResult<String> {
|
171
|
-
|
196
|
+
self.with_table(|t| Ok(t.table_uri()))
|
172
197
|
}
|
173
198
|
|
174
|
-
pub fn version(&self) -> RbResult<i64
|
175
|
-
|
199
|
+
pub fn version(&self) -> RbResult<Option<i64>> {
|
200
|
+
self.with_table(|t| Ok(t.version()))
|
176
201
|
}
|
177
202
|
|
178
203
|
pub fn has_files(&self) -> RbResult<bool> {
|
179
|
-
|
204
|
+
self.with_table(|t| Ok(t.config.require_files))
|
180
205
|
}
|
181
206
|
|
182
207
|
pub fn metadata(&self) -> RbResult<RawDeltaTableMetaData> {
|
183
|
-
let
|
184
|
-
|
208
|
+
let metadata = self.with_table(|t| {
|
209
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
210
|
+
Ok(snapshot.metadata().clone())
|
211
|
+
})?;
|
185
212
|
Ok(RawDeltaTableMetaData {
|
186
|
-
id: metadata.id.
|
187
|
-
name: metadata.name.
|
188
|
-
description: metadata.description.
|
189
|
-
partition_columns: metadata.partition_columns.clone(),
|
190
|
-
created_time: metadata.created_time,
|
191
|
-
configuration: metadata.configuration.clone(),
|
213
|
+
id: metadata.id().to_string(),
|
214
|
+
name: metadata.name().map(String::from),
|
215
|
+
description: metadata.description().map(String::from),
|
216
|
+
partition_columns: metadata.partition_columns().clone(),
|
217
|
+
created_time: metadata.created_time(),
|
218
|
+
configuration: metadata.configuration().clone(),
|
192
219
|
})
|
193
220
|
}
|
194
221
|
|
195
222
|
pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
|
196
|
-
let
|
197
|
-
|
223
|
+
let table_protocol = self.with_table(|t| {
|
224
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
225
|
+
Ok(snapshot.protocol().clone())
|
226
|
+
})?;
|
198
227
|
Ok((
|
199
|
-
table_protocol.min_reader_version,
|
200
|
-
table_protocol.min_writer_version,
|
201
|
-
table_protocol
|
202
|
-
.
|
203
|
-
.
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
features
|
220
|
-
.iter()
|
221
|
-
.map(|v| v.to_string())
|
222
|
-
.collect::<Vec<String>>()
|
223
|
-
})
|
224
|
-
}),
|
228
|
+
table_protocol.min_reader_version(),
|
229
|
+
table_protocol.min_writer_version(),
|
230
|
+
table_protocol.writer_features().and_then(|features| {
|
231
|
+
let empty_set = !features.is_empty();
|
232
|
+
empty_set.then(|| {
|
233
|
+
features
|
234
|
+
.iter()
|
235
|
+
.map(|v| v.to_string())
|
236
|
+
.collect::<Vec<String>>()
|
237
|
+
})
|
238
|
+
}),
|
239
|
+
table_protocol.reader_features().and_then(|features| {
|
240
|
+
let empty_set = !features.is_empty();
|
241
|
+
empty_set.then(|| {
|
242
|
+
features
|
243
|
+
.iter()
|
244
|
+
.map(|v| v.to_string())
|
245
|
+
.collect::<Vec<String>>()
|
246
|
+
})
|
247
|
+
}),
|
225
248
|
))
|
226
249
|
}
|
227
250
|
|
@@ -237,31 +260,29 @@ impl RawDeltaTable {
|
|
237
260
|
.map_err(RubyError::from)?)
|
238
261
|
}
|
239
262
|
|
240
|
-
pub fn get_earliest_version(&self) -> RbResult<i64> {
|
241
|
-
Ok(rt()
|
242
|
-
.block_on(self._table.borrow().get_earliest_version())
|
243
|
-
.map_err(RubyError::from)?)
|
244
|
-
}
|
245
|
-
|
246
263
|
pub fn get_num_index_cols(&self) -> RbResult<i32> {
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
264
|
+
self.with_table(|t| {
|
265
|
+
let n_cols = t
|
266
|
+
.snapshot()
|
267
|
+
.map_err(RubyError::from)?
|
268
|
+
.config()
|
269
|
+
.num_indexed_cols();
|
270
|
+
Ok(match n_cols {
|
271
|
+
DataSkippingNumIndexedCols::NumColumns(n_cols) => n_cols as i32,
|
272
|
+
DataSkippingNumIndexedCols::AllColumns => -1,
|
273
|
+
})
|
274
|
+
})
|
254
275
|
}
|
255
276
|
|
256
277
|
pub fn get_stats_columns(&self) -> RbResult<Option<Vec<String>>> {
|
257
|
-
|
258
|
-
.
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
278
|
+
self.with_table(|t| {
|
279
|
+
Ok(t.snapshot()
|
280
|
+
.map_err(RubyError::from)?
|
281
|
+
.config()
|
282
|
+
.data_skipping_stats_columns
|
283
|
+
.as_ref()
|
284
|
+
.map(|v| v.iter().map(|s| s.to_string()).collect::<Vec<String>>()))
|
285
|
+
})
|
265
286
|
}
|
266
287
|
|
267
288
|
pub fn load_with_datetime(&self, ds: String) -> RbResult<()> {
|
@@ -285,10 +306,14 @@ impl RawDeltaTable {
|
|
285
306
|
if let Some(filters) = partition_filters {
|
286
307
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
287
308
|
Ok(self
|
288
|
-
.
|
289
|
-
|
290
|
-
|
291
|
-
|
309
|
+
.with_table(|t| {
|
310
|
+
rt().block_on(async {
|
311
|
+
t.get_files_by_partitions(&filters)
|
312
|
+
.await
|
313
|
+
.map_err(RubyError::from)
|
314
|
+
.map_err(RbErr::from)
|
315
|
+
})
|
316
|
+
})?
|
292
317
|
.into_iter()
|
293
318
|
.map(|p| p.to_string())
|
294
319
|
.collect())
|
@@ -296,8 +321,9 @@ impl RawDeltaTable {
|
|
296
321
|
Ok(self
|
297
322
|
._table
|
298
323
|
.borrow()
|
299
|
-
.
|
324
|
+
.snapshot()
|
300
325
|
.map_err(RubyError::from)?
|
326
|
+
.file_paths_iter()
|
301
327
|
.map(|f| f.to_string())
|
302
328
|
.collect())
|
303
329
|
}
|
@@ -307,31 +333,36 @@ impl RawDeltaTable {
|
|
307
333
|
&self,
|
308
334
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
309
335
|
) -> RbResult<Vec<String>> {
|
310
|
-
if !self.
|
336
|
+
if !self.with_table(|t| Ok(t.config.require_files))? {
|
311
337
|
return Err(DeltaError::new_err("Table is initiated without files."));
|
312
338
|
}
|
313
339
|
|
314
340
|
if let Some(filters) = partition_filters {
|
315
341
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
316
|
-
|
317
|
-
.
|
318
|
-
|
319
|
-
|
320
|
-
|
342
|
+
self.with_table(|t| {
|
343
|
+
rt().block_on(async {
|
344
|
+
t.get_file_uris_by_partitions(&filters)
|
345
|
+
.await
|
346
|
+
.map_err(RubyError::from)
|
347
|
+
.map_err(RbErr::from)
|
348
|
+
})
|
349
|
+
})
|
321
350
|
} else {
|
322
|
-
|
323
|
-
.
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
351
|
+
self.with_table(|t| {
|
352
|
+
Ok(t.get_file_uris()
|
353
|
+
.map_err(RubyError::from)
|
354
|
+
.map_err(RbErr::from)?
|
355
|
+
.collect::<Vec<String>>())
|
356
|
+
})
|
328
357
|
}
|
329
358
|
}
|
330
359
|
|
331
|
-
pub fn schema(&
|
332
|
-
let
|
333
|
-
|
334
|
-
|
360
|
+
pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
361
|
+
let schema: StructType = rb_self.with_table(|t| {
|
362
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
363
|
+
Ok(snapshot.schema().clone())
|
364
|
+
})?;
|
365
|
+
schema_to_rbobject(schema.to_owned(), ruby)
|
335
366
|
}
|
336
367
|
|
337
368
|
pub fn vacuum(
|
@@ -370,7 +401,7 @@ impl RawDeltaTable {
|
|
370
401
|
pub fn compact_optimize(
|
371
402
|
&self,
|
372
403
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
373
|
-
target_size: Option<
|
404
|
+
target_size: Option<u64>,
|
374
405
|
max_concurrent_tasks: Option<usize>,
|
375
406
|
min_commit_interval: Option<u64>,
|
376
407
|
writer_properties: Option<RbWriterProperties>,
|
@@ -419,7 +450,7 @@ impl RawDeltaTable {
|
|
419
450
|
&self,
|
420
451
|
z_order_columns: Vec<String>,
|
421
452
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
422
|
-
target_size: Option<
|
453
|
+
target_size: Option<u64>,
|
423
454
|
max_concurrent_tasks: Option<usize>,
|
424
455
|
max_spill_size: usize,
|
425
456
|
min_commit_interval: Option<u64>,
|
@@ -716,18 +747,19 @@ impl RawDeltaTable {
|
|
716
747
|
.map_err(RubyError::from)?)
|
717
748
|
}
|
718
749
|
|
719
|
-
fn get_active_partitions(&
|
720
|
-
let
|
721
|
-
|
722
|
-
.
|
723
|
-
|
724
|
-
|
725
|
-
.
|
726
|
-
.
|
727
|
-
|
728
|
-
|
729
|
-
.
|
730
|
-
|
750
|
+
fn get_active_partitions(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
751
|
+
let schema = rb_self.with_table(|t| {
|
752
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
753
|
+
Ok(snapshot.schema().clone())
|
754
|
+
})?;
|
755
|
+
let metadata = rb_self.with_table(|t| {
|
756
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
757
|
+
Ok(snapshot.metadata().clone())
|
758
|
+
})?;
|
759
|
+
let _column_names: HashSet<&str> =
|
760
|
+
schema.fields().map(|field| field.name().as_str()).collect();
|
761
|
+
let partition_columns: HashSet<&str> = metadata
|
762
|
+
.partition_columns()
|
731
763
|
.iter()
|
732
764
|
.map(|col| col.as_str())
|
733
765
|
.collect();
|
@@ -736,12 +768,15 @@ impl RawDeltaTable {
|
|
736
768
|
|
737
769
|
let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
|
738
770
|
|
739
|
-
let
|
740
|
-
|
741
|
-
|
742
|
-
.
|
743
|
-
|
744
|
-
|
771
|
+
let state = rb_self.cloned_state()?;
|
772
|
+
let log_store = rb_self.log_store()?;
|
773
|
+
let adds: Vec<_> = rt()
|
774
|
+
.block_on(async {
|
775
|
+
state
|
776
|
+
.get_active_add_actions_by_partitions(&log_store, &converted_filters)
|
777
|
+
.try_collect()
|
778
|
+
.await
|
779
|
+
})
|
745
780
|
.map_err(RubyError::from)?;
|
746
781
|
let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
|
747
782
|
.iter()
|
@@ -749,21 +784,22 @@ impl RawDeltaTable {
|
|
749
784
|
Ok::<_, RubyError>(
|
750
785
|
partition_columns
|
751
786
|
.iter()
|
752
|
-
.
|
753
|
-
|
787
|
+
.map(|col| {
|
788
|
+
(
|
754
789
|
*col,
|
755
790
|
add.partition_values()
|
756
|
-
.
|
757
|
-
|
791
|
+
.and_then(|v| {
|
792
|
+
v.index_of(col).and_then(|idx| v.value(idx).cloned())
|
793
|
+
})
|
758
794
|
.map(|v| v.serialize()),
|
759
|
-
)
|
795
|
+
)
|
760
796
|
})
|
761
797
|
.collect(),
|
762
798
|
)
|
763
799
|
})
|
764
800
|
.collect();
|
765
801
|
|
766
|
-
Ok(
|
802
|
+
Ok(ruby.ary_from_iter(active_partitions))
|
767
803
|
}
|
768
804
|
|
769
805
|
pub fn create_checkpoint(&self) -> RbResult<()> {
|
@@ -781,15 +817,20 @@ impl RawDeltaTable {
|
|
781
817
|
}
|
782
818
|
|
783
819
|
pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
|
784
|
-
|
785
|
-
.
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
820
|
+
self.with_table(|t| {
|
821
|
+
let log_store = t.log_store();
|
822
|
+
let sizes: HashMap<String, i64> = rt()
|
823
|
+
.block_on(async {
|
824
|
+
t.snapshot()?
|
825
|
+
.snapshot()
|
826
|
+
.files(&log_store, None)
|
827
|
+
.map_ok(|f| (f.path().to_string(), f.size()))
|
828
|
+
.try_collect()
|
829
|
+
.await
|
830
|
+
})
|
831
|
+
.map_err(RubyError::from)?;
|
832
|
+
Ok(sizes)
|
833
|
+
})
|
793
834
|
}
|
794
835
|
|
795
836
|
pub fn delete(
|
@@ -874,14 +915,13 @@ impl RawDeltaTable {
|
|
874
915
|
Ok(serde_json::to_string(&metrics).unwrap())
|
875
916
|
}
|
876
917
|
|
877
|
-
pub fn
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
)
|
918
|
+
pub fn transaction_version(&self, app_id: String) -> RbResult<Option<i64>> {
|
919
|
+
// NOTE: this will simplify once we have moved logstore onto state.
|
920
|
+
let log_store = self.log_store()?;
|
921
|
+
let snapshot = self.with_table(|t| Ok(t.snapshot().map_err(RubyError::from)?.clone()))?;
|
922
|
+
Ok(rt()
|
923
|
+
.block_on(snapshot.transaction_version(log_store.as_ref(), app_id))
|
924
|
+
.map_err(RubyError::from)?)
|
885
925
|
}
|
886
926
|
}
|
887
927
|
|
@@ -1293,10 +1333,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1293
1333
|
"get_latest_version",
|
1294
1334
|
method!(RawDeltaTable::get_latest_version, 0),
|
1295
1335
|
)?;
|
1296
|
-
class.define_method(
|
1297
|
-
"get_earliest_version",
|
1298
|
-
method!(RawDeltaTable::get_earliest_version, 0),
|
1299
|
-
)?;
|
1300
1336
|
class.define_method(
|
1301
1337
|
"get_num_index_cols",
|
1302
1338
|
method!(RawDeltaTable::get_num_index_cols, 0),
|
@@ -1366,8 +1402,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1366
1402
|
)?;
|
1367
1403
|
class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
|
1368
1404
|
class.define_method(
|
1369
|
-
"
|
1370
|
-
method!(RawDeltaTable::
|
1405
|
+
"transaction_version",
|
1406
|
+
method!(RawDeltaTable::transaction_version, 1),
|
1371
1407
|
)?;
|
1372
1408
|
|
1373
1409
|
let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
|
@@ -1394,6 +1430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1394
1430
|
class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
|
1395
1431
|
|
1396
1432
|
let class = module.define_class("Field", ruby.class_object())?;
|
1433
|
+
class.define_singleton_method("new", function!(Field::new, 2))?;
|
1397
1434
|
class.define_method("name", method!(Field::name, 0))?;
|
1398
1435
|
class.define_method("type", method!(Field::get_type, 0))?;
|
1399
1436
|
class.define_method("nullable", method!(Field::nullable, 0))?;
|