deltalake-rb 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +228 -122
- data/ext/deltalake/Cargo.toml +12 -4
- data/ext/deltalake/src/error.rs +9 -8
- data/ext/deltalake/src/lib.rs +96 -62
- data/ext/deltalake/src/schema.rs +47 -7
- data/ext/deltalake/src/utils.rs +2 -3
- data/lib/deltalake/table.rb +3 -1
- data/lib/deltalake/version.rb +1 -1
- metadata +2 -2
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.2.
|
3
|
+
version = "0.2.2"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,11 +14,19 @@ crate-type = ["cdylib"]
|
|
14
14
|
arrow = { version = "55.2", features = ["ffi"] }
|
15
15
|
arrow-schema = { version = "55.2", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = "0.
|
18
|
-
deltalake = { version = "=0.27.0", features = ["azure", "datafusion", "gcs", "s3"] }
|
17
|
+
delta_kernel = { version = "0.15", features = ["arrow-55", "default-engine-rustls"] }
|
19
18
|
futures = "0.3"
|
20
|
-
magnus = "0.
|
19
|
+
magnus = "0.8"
|
21
20
|
num_cpus = "1"
|
22
21
|
serde = "1"
|
23
22
|
serde_json = "1"
|
24
23
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
24
|
+
|
25
|
+
[dependencies.deltalake]
|
26
|
+
version = "=0.28.1"
|
27
|
+
features = [
|
28
|
+
"azure",
|
29
|
+
"datafusion",
|
30
|
+
"gcs",
|
31
|
+
"s3"
|
32
|
+
]
|
data/ext/deltalake/src/error.rs
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
use arrow_schema::ArrowError;
|
2
2
|
use deltalake::datafusion::error::DataFusionError;
|
3
3
|
use deltalake::{errors::DeltaTableError, ObjectStoreError};
|
4
|
-
use magnus::{
|
4
|
+
use magnus::{Error as RbErr, Module, RModule, Ruby};
|
5
5
|
use std::borrow::Cow;
|
6
6
|
|
7
7
|
macro_rules! create_exception {
|
@@ -41,7 +41,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
|
|
41
41
|
DeltaTableError::InvalidJsonLog { .. } => DeltaProtocolError::new_err(err.to_string()),
|
42
42
|
DeltaTableError::InvalidStatsJson { .. } => DeltaProtocolError::new_err(err.to_string()),
|
43
43
|
DeltaTableError::InvalidData { violations } => {
|
44
|
-
DeltaProtocolError::new_err(format!("Invariant violations: {:?}"
|
44
|
+
DeltaProtocolError::new_err(format!("Invariant violations: {violations:?}"))
|
45
45
|
}
|
46
46
|
|
47
47
|
// commit errors
|
@@ -111,7 +111,7 @@ impl From<RubyError> for RbErr {
|
|
111
111
|
}
|
112
112
|
|
113
113
|
macro_rules! create_builtin_exception {
|
114
|
-
($type:ident, $
|
114
|
+
($type:ident, $method:ident) => {
|
115
115
|
pub struct $type {}
|
116
116
|
|
117
117
|
impl $type {
|
@@ -119,13 +119,14 @@ macro_rules! create_builtin_exception {
|
|
119
119
|
where
|
120
120
|
T: Into<Cow<'static, str>>,
|
121
121
|
{
|
122
|
-
|
122
|
+
let ruby = Ruby::get().unwrap();
|
123
|
+
RbErr::new(ruby.$method(), message)
|
123
124
|
}
|
124
125
|
}
|
125
126
|
};
|
126
127
|
}
|
127
128
|
|
128
|
-
create_builtin_exception!(RbException,
|
129
|
-
create_builtin_exception!(RbIOError,
|
130
|
-
create_builtin_exception!(RbNotImplementedError,
|
131
|
-
create_builtin_exception!(RbValueError,
|
129
|
+
create_builtin_exception!(RbException, exception_runtime_error);
|
130
|
+
create_builtin_exception!(RbIOError, exception_io_error);
|
131
|
+
create_builtin_exception!(RbNotImplementedError, exception_not_imp_error);
|
132
|
+
create_builtin_exception!(RbValueError, exception_arg_error);
|
data/ext/deltalake/src/lib.rs
CHANGED
@@ -13,6 +13,7 @@ use std::time;
|
|
13
13
|
|
14
14
|
use chrono::{DateTime, Duration, FixedOffset, Utc};
|
15
15
|
use delta_kernel::schema::StructField;
|
16
|
+
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
|
16
17
|
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
17
18
|
use deltalake::arrow::record_batch::RecordBatchIterator;
|
18
19
|
use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
|
@@ -21,6 +22,7 @@ use deltalake::datafusion::prelude::SessionContext;
|
|
21
22
|
use deltalake::delta_datafusion::DeltaCdfTableProvider;
|
22
23
|
use deltalake::errors::DeltaTableError;
|
23
24
|
use deltalake::kernel::transaction::{CommitProperties, TableReference};
|
25
|
+
use deltalake::kernel::StructDataExt;
|
24
26
|
use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
|
25
27
|
use deltalake::logstore::IORuntime;
|
26
28
|
use deltalake::logstore::LogStoreRef;
|
@@ -40,17 +42,20 @@ use deltalake::parquet::basic::Compression;
|
|
40
42
|
use deltalake::parquet::errors::ParquetError;
|
41
43
|
use deltalake::parquet::file::properties::WriterProperties;
|
42
44
|
use deltalake::partitions::PartitionFilter;
|
45
|
+
use deltalake::table::config::TablePropertiesExt;
|
46
|
+
use deltalake::table::state::DeltaTableState;
|
43
47
|
use deltalake::{DeltaOps, DeltaResult};
|
44
48
|
use error::DeltaError;
|
45
49
|
use futures::future::join_all;
|
50
|
+
use futures::TryStreamExt;
|
46
51
|
|
47
52
|
use magnus::{
|
48
|
-
function, method, prelude::*, typed_data::Obj, Error as RbErr,
|
49
|
-
TryConvert, Value,
|
53
|
+
function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
|
54
|
+
Integer, Module, RArray, Ruby, TryConvert, Value,
|
50
55
|
};
|
51
56
|
use serde_json::Map;
|
52
57
|
|
53
|
-
use crate::error::{
|
58
|
+
use crate::error::{RbValueError, RubyError};
|
54
59
|
use crate::features::TableFeatures;
|
55
60
|
use crate::merge::RbMergeBuilder;
|
56
61
|
use crate::schema::{schema_to_rbobject, Field};
|
@@ -73,6 +78,8 @@ impl TryConvert for PartitionFilterValue {
|
|
73
78
|
}
|
74
79
|
}
|
75
80
|
|
81
|
+
unsafe impl TryConvertOwned for PartitionFilterValue {}
|
82
|
+
|
76
83
|
#[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
|
77
84
|
struct RawDeltaTable {
|
78
85
|
_table: RefCell<deltalake::DeltaTable>,
|
@@ -121,6 +128,15 @@ impl RawDeltaTable {
|
|
121
128
|
func(&self._table.borrow())
|
122
129
|
}
|
123
130
|
|
131
|
+
fn cloned_state(&self) -> RbResult<DeltaTableState> {
|
132
|
+
self.with_table(|t| {
|
133
|
+
t.snapshot()
|
134
|
+
.cloned()
|
135
|
+
.map_err(RubyError::from)
|
136
|
+
.map_err(RbErr::from)
|
137
|
+
})
|
138
|
+
}
|
139
|
+
|
124
140
|
fn log_store(&self) -> RbResult<LogStoreRef> {
|
125
141
|
self.with_table(|t| Ok(t.log_store().clone()))
|
126
142
|
}
|
@@ -190,10 +206,8 @@ impl RawDeltaTable {
|
|
190
206
|
|
191
207
|
pub fn metadata(&self) -> RbResult<RawDeltaTableMetaData> {
|
192
208
|
let metadata = self.with_table(|t| {
|
193
|
-
t.
|
194
|
-
|
195
|
-
.map_err(RubyError::from)
|
196
|
-
.map_err(RbErr::from)
|
209
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
210
|
+
Ok(snapshot.metadata().clone())
|
197
211
|
})?;
|
198
212
|
Ok(RawDeltaTableMetaData {
|
199
213
|
id: metadata.id().to_string(),
|
@@ -207,10 +221,8 @@ impl RawDeltaTable {
|
|
207
221
|
|
208
222
|
pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
|
209
223
|
let table_protocol = self.with_table(|t| {
|
210
|
-
t.
|
211
|
-
|
212
|
-
.map_err(RubyError::from)
|
213
|
-
.map_err(RbErr::from)
|
224
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
225
|
+
Ok(snapshot.protocol().clone())
|
214
226
|
})?;
|
215
227
|
Ok((
|
216
228
|
table_protocol.min_reader_version(),
|
@@ -250,10 +262,15 @@ impl RawDeltaTable {
|
|
250
262
|
|
251
263
|
pub fn get_num_index_cols(&self) -> RbResult<i32> {
|
252
264
|
self.with_table(|t| {
|
253
|
-
|
265
|
+
let n_cols = t
|
266
|
+
.snapshot()
|
254
267
|
.map_err(RubyError::from)?
|
255
268
|
.config()
|
256
|
-
.num_indexed_cols()
|
269
|
+
.num_indexed_cols();
|
270
|
+
Ok(match n_cols {
|
271
|
+
DataSkippingNumIndexedCols::NumColumns(n_cols) => n_cols as i32,
|
272
|
+
DataSkippingNumIndexedCols::AllColumns => -1,
|
273
|
+
})
|
257
274
|
})
|
258
275
|
}
|
259
276
|
|
@@ -262,7 +279,8 @@ impl RawDeltaTable {
|
|
262
279
|
Ok(t.snapshot()
|
263
280
|
.map_err(RubyError::from)?
|
264
281
|
.config()
|
265
|
-
.
|
282
|
+
.data_skipping_stats_columns
|
283
|
+
.as_ref()
|
266
284
|
.map(|v| v.iter().map(|s| s.to_string()).collect::<Vec<String>>()))
|
267
285
|
})
|
268
286
|
}
|
@@ -289,9 +307,12 @@ impl RawDeltaTable {
|
|
289
307
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
290
308
|
Ok(self
|
291
309
|
.with_table(|t| {
|
292
|
-
|
293
|
-
.
|
294
|
-
|
310
|
+
rt().block_on(async {
|
311
|
+
t.get_files_by_partitions(&filters)
|
312
|
+
.await
|
313
|
+
.map_err(RubyError::from)
|
314
|
+
.map_err(RbErr::from)
|
315
|
+
})
|
295
316
|
})?
|
296
317
|
.into_iter()
|
297
318
|
.map(|p| p.to_string())
|
@@ -300,8 +321,9 @@ impl RawDeltaTable {
|
|
300
321
|
Ok(self
|
301
322
|
._table
|
302
323
|
.borrow()
|
303
|
-
.
|
324
|
+
.snapshot()
|
304
325
|
.map_err(RubyError::from)?
|
326
|
+
.file_paths_iter()
|
305
327
|
.map(|f| f.to_string())
|
306
328
|
.collect())
|
307
329
|
}
|
@@ -318,9 +340,12 @@ impl RawDeltaTable {
|
|
318
340
|
if let Some(filters) = partition_filters {
|
319
341
|
let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
|
320
342
|
self.with_table(|t| {
|
321
|
-
|
322
|
-
.
|
323
|
-
|
343
|
+
rt().block_on(async {
|
344
|
+
t.get_file_uris_by_partitions(&filters)
|
345
|
+
.await
|
346
|
+
.map_err(RubyError::from)
|
347
|
+
.map_err(RbErr::from)
|
348
|
+
})
|
324
349
|
})
|
325
350
|
} else {
|
326
351
|
self.with_table(|t| {
|
@@ -332,14 +357,12 @@ impl RawDeltaTable {
|
|
332
357
|
}
|
333
358
|
}
|
334
359
|
|
335
|
-
pub fn schema(&
|
336
|
-
let schema: StructType =
|
337
|
-
t.
|
338
|
-
|
339
|
-
.map_err(RbErr::from)
|
340
|
-
.map(|s| s.to_owned())
|
360
|
+
pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
361
|
+
let schema: StructType = rb_self.with_table(|t| {
|
362
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
363
|
+
Ok(snapshot.schema().clone())
|
341
364
|
})?;
|
342
|
-
schema_to_rbobject(schema.to_owned())
|
365
|
+
schema_to_rbobject(schema.to_owned(), ruby)
|
343
366
|
}
|
344
367
|
|
345
368
|
pub fn vacuum(
|
@@ -378,7 +401,7 @@ impl RawDeltaTable {
|
|
378
401
|
pub fn compact_optimize(
|
379
402
|
&self,
|
380
403
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
381
|
-
target_size: Option<
|
404
|
+
target_size: Option<u64>,
|
382
405
|
max_concurrent_tasks: Option<usize>,
|
383
406
|
min_commit_interval: Option<u64>,
|
384
407
|
writer_properties: Option<RbWriterProperties>,
|
@@ -427,7 +450,7 @@ impl RawDeltaTable {
|
|
427
450
|
&self,
|
428
451
|
z_order_columns: Vec<String>,
|
429
452
|
partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
|
430
|
-
target_size: Option<
|
453
|
+
target_size: Option<u64>,
|
431
454
|
max_concurrent_tasks: Option<usize>,
|
432
455
|
max_spill_size: usize,
|
433
456
|
min_commit_interval: Option<u64>,
|
@@ -724,17 +747,18 @@ impl RawDeltaTable {
|
|
724
747
|
.map_err(RubyError::from)?)
|
725
748
|
}
|
726
749
|
|
727
|
-
fn get_active_partitions(&
|
728
|
-
let
|
729
|
-
|
730
|
-
.
|
731
|
-
|
732
|
-
|
733
|
-
.
|
734
|
-
.
|
735
|
-
|
736
|
-
|
737
|
-
.
|
750
|
+
fn get_active_partitions(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
751
|
+
let schema = rb_self.with_table(|t| {
|
752
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
753
|
+
Ok(snapshot.schema().clone())
|
754
|
+
})?;
|
755
|
+
let metadata = rb_self.with_table(|t| {
|
756
|
+
let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
|
757
|
+
Ok(snapshot.metadata().clone())
|
758
|
+
})?;
|
759
|
+
let _column_names: HashSet<&str> =
|
760
|
+
schema.fields().map(|field| field.name().as_str()).collect();
|
761
|
+
let partition_columns: HashSet<&str> = metadata
|
738
762
|
.partition_columns()
|
739
763
|
.iter()
|
740
764
|
.map(|col| col.as_str())
|
@@ -744,12 +768,15 @@ impl RawDeltaTable {
|
|
744
768
|
|
745
769
|
let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
|
746
770
|
|
747
|
-
let
|
748
|
-
|
749
|
-
|
750
|
-
.
|
751
|
-
|
752
|
-
|
771
|
+
let state = rb_self.cloned_state()?;
|
772
|
+
let log_store = rb_self.log_store()?;
|
773
|
+
let adds: Vec<_> = rt()
|
774
|
+
.block_on(async {
|
775
|
+
state
|
776
|
+
.get_active_add_actions_by_partitions(&log_store, &converted_filters)
|
777
|
+
.try_collect()
|
778
|
+
.await
|
779
|
+
})
|
753
780
|
.map_err(RubyError::from)?;
|
754
781
|
let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
|
755
782
|
.iter()
|
@@ -757,21 +784,22 @@ impl RawDeltaTable {
|
|
757
784
|
Ok::<_, RubyError>(
|
758
785
|
partition_columns
|
759
786
|
.iter()
|
760
|
-
.
|
761
|
-
|
787
|
+
.map(|col| {
|
788
|
+
(
|
762
789
|
*col,
|
763
790
|
add.partition_values()
|
764
|
-
.
|
765
|
-
|
791
|
+
.and_then(|v| {
|
792
|
+
v.index_of(col).and_then(|idx| v.value(idx).cloned())
|
793
|
+
})
|
766
794
|
.map(|v| v.serialize()),
|
767
|
-
)
|
795
|
+
)
|
768
796
|
})
|
769
797
|
.collect(),
|
770
798
|
)
|
771
799
|
})
|
772
800
|
.collect();
|
773
801
|
|
774
|
-
Ok(
|
802
|
+
Ok(ruby.ary_from_iter(active_partitions))
|
775
803
|
}
|
776
804
|
|
777
805
|
pub fn create_checkpoint(&self) -> RbResult<()> {
|
@@ -789,15 +817,20 @@ impl RawDeltaTable {
|
|
789
817
|
}
|
790
818
|
|
791
819
|
pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
|
792
|
-
|
793
|
-
.
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
820
|
+
self.with_table(|t| {
|
821
|
+
let log_store = t.log_store();
|
822
|
+
let sizes: HashMap<String, i64> = rt()
|
823
|
+
.block_on(async {
|
824
|
+
t.snapshot()?
|
825
|
+
.snapshot()
|
826
|
+
.files(&log_store, None)
|
827
|
+
.map_ok(|f| (f.path().to_string(), f.size()))
|
828
|
+
.try_collect()
|
829
|
+
.await
|
830
|
+
})
|
831
|
+
.map_err(RubyError::from)?;
|
832
|
+
Ok(sizes)
|
833
|
+
})
|
801
834
|
}
|
802
835
|
|
803
836
|
pub fn delete(
|
@@ -1397,6 +1430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1397
1430
|
class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
|
1398
1431
|
|
1399
1432
|
let class = module.define_class("Field", ruby.class_object())?;
|
1433
|
+
class.define_singleton_method("new", function!(Field::new, 2))?;
|
1400
1434
|
class.define_method("name", method!(Field::name, 0))?;
|
1401
1435
|
class.define_method("type", method!(Field::get_type, 0))?;
|
1402
1436
|
class.define_method("nullable", method!(Field::nullable, 0))?;
|
data/ext/deltalake/src/schema.rs
CHANGED
@@ -1,20 +1,53 @@
|
|
1
|
-
use deltalake::kernel::{
|
2
|
-
|
1
|
+
use deltalake::kernel::{
|
2
|
+
DataType, PrimitiveType as DeltaPrimitive, StructField, StructType as DeltaStructType,
|
3
|
+
};
|
4
|
+
use magnus::{value::ReprValue, Module, RModule, Ruby, TryConvert, Value};
|
3
5
|
|
4
|
-
use crate::RbResult;
|
6
|
+
use crate::{RbResult, RbValueError};
|
5
7
|
|
6
|
-
pub fn schema_to_rbobject(schema: DeltaStructType) -> RbResult<Value> {
|
8
|
+
pub fn schema_to_rbobject(schema: DeltaStructType, ruby: &Ruby) -> RbResult<Value> {
|
7
9
|
let fields = schema.fields().map(|field| Field {
|
8
10
|
inner: field.clone(),
|
9
11
|
});
|
10
12
|
|
11
|
-
let rb_schema: Value =
|
12
|
-
.unwrap()
|
13
|
+
let rb_schema: Value = ruby
|
13
14
|
.class_object()
|
14
15
|
.const_get::<_, RModule>("DeltaLake")?
|
15
16
|
.const_get("Schema")?;
|
16
17
|
|
17
|
-
rb_schema.funcall("new", (
|
18
|
+
rb_schema.funcall("new", (ruby.ary_from_iter(fields),))
|
19
|
+
}
|
20
|
+
|
21
|
+
fn ruby_type_to_schema(ob: Value) -> RbResult<DataType> {
|
22
|
+
if let Ok(raw_primitive) = String::try_convert(ob) {
|
23
|
+
// Pass through PrimitiveType::new() to do validation
|
24
|
+
return PrimitiveType::new(raw_primitive)
|
25
|
+
.map(|data_type| DataType::Primitive(data_type.inner_type));
|
26
|
+
}
|
27
|
+
Err(RbValueError::new_err("Invalid data type"))
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct PrimitiveType {
|
31
|
+
inner_type: DeltaPrimitive,
|
32
|
+
}
|
33
|
+
|
34
|
+
impl PrimitiveType {
|
35
|
+
fn new(data_type: String) -> RbResult<Self> {
|
36
|
+
let data_type: DeltaPrimitive =
|
37
|
+
serde_json::from_str(&format!("\"{data_type}\"")).map_err(|_| {
|
38
|
+
if data_type.starts_with("decimal") {
|
39
|
+
RbValueError::new_err(format!(
|
40
|
+
"invalid type string: {data_type}, precision/scale can't be larger than 38"
|
41
|
+
))
|
42
|
+
} else {
|
43
|
+
RbValueError::new_err(format!("invalid type string: {data_type}"))
|
44
|
+
}
|
45
|
+
})?;
|
46
|
+
|
47
|
+
Ok(Self {
|
48
|
+
inner_type: data_type,
|
49
|
+
})
|
50
|
+
}
|
18
51
|
}
|
19
52
|
|
20
53
|
#[magnus::wrap(class = "DeltaLake::Field")]
|
@@ -23,6 +56,13 @@ pub struct Field {
|
|
23
56
|
}
|
24
57
|
|
25
58
|
impl Field {
|
59
|
+
pub fn new(name: String, r#type: Value) -> RbResult<Self> {
|
60
|
+
let ty = ruby_type_to_schema(r#type)?;
|
61
|
+
Ok(Self {
|
62
|
+
inner: StructField::new(name, ty, true),
|
63
|
+
})
|
64
|
+
}
|
65
|
+
|
26
66
|
pub fn name(&self) -> String {
|
27
67
|
self.inner.name().to_string()
|
28
68
|
}
|
data/ext/deltalake/src/utils.rs
CHANGED
@@ -10,11 +10,10 @@ pub fn rt() -> &'static Runtime {
|
|
10
10
|
let runtime_pid = *PID.get_or_init(|| pid);
|
11
11
|
if pid != runtime_pid {
|
12
12
|
panic!(
|
13
|
-
"Forked process detected - current PID is {} but the tokio runtime was created by {}. The tokio \
|
13
|
+
"Forked process detected - current PID is {pid} but the tokio runtime was created by {runtime_pid}. The tokio \
|
14
14
|
runtime does not support forked processes https://github.com/tokio-rs/tokio/issues/4301. If you are \
|
15
15
|
seeing this message while using Ruby multithreading make sure to use the `spawn` or `forkserver` \
|
16
|
-
mode."
|
17
|
-
pid, runtime_pid
|
16
|
+
mode."
|
18
17
|
);
|
19
18
|
}
|
20
19
|
TOKIO_RT.get_or_init(|| Runtime::new().expect("Failed to create a tokio runtime."))
|
data/lib/deltalake/table.rb
CHANGED
@@ -195,6 +195,7 @@ module DeltaLake
|
|
195
195
|
|
196
196
|
sources = file_uris
|
197
197
|
if sources.empty?
|
198
|
+
# TODO pass schema
|
198
199
|
lf = Polars::LazyFrame.new
|
199
200
|
else
|
200
201
|
delta_keys = [
|
@@ -209,7 +210,8 @@ module DeltaLake
|
|
209
210
|
sources,
|
210
211
|
hive_partitioning: true,
|
211
212
|
storage_options: storage_options,
|
212
|
-
rechunk: rechunk
|
213
|
+
rechunk: rechunk,
|
214
|
+
allow_missing_columns: true
|
213
215
|
)
|
214
216
|
|
215
217
|
if columns
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -71,7 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: '0'
|
73
73
|
requirements: []
|
74
|
-
rubygems_version: 3.6.
|
74
|
+
rubygems_version: 3.6.9
|
75
75
|
specification_version: 4
|
76
76
|
summary: Delta Lake for Ruby
|
77
77
|
test_files: []
|