deltalake-rb 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,10 +11,10 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- arrow = { version = "55.2", features = ["ffi"] }
15
- arrow-schema = { version = "55.2", features = ["serde"] }
14
+ arrow = { version = "56", features = ["ffi"] }
15
+ arrow-schema = { version = "56", features = ["serde"] }
16
16
  chrono = "0.4"
17
- delta_kernel = { version = "0.14", features = ["arrow-55", "default-engine-rustls"] }
17
+ delta_kernel = { version = "0.16", features = ["arrow-56", "default-engine-rustls"] }
18
18
  futures = "0.3"
19
19
  magnus = "0.8"
20
20
  num_cpus = "1"
@@ -23,7 +23,7 @@ serde_json = "1"
23
23
  tokio = { version = "1", features = ["rt-multi-thread"] }
24
24
 
25
25
  [dependencies.deltalake]
26
- version = "=0.28.0"
26
+ version = "=0.29.0"
27
27
  features = [
28
28
  "azure",
29
29
  "datafusion",
@@ -22,8 +22,8 @@ use deltalake::datafusion::prelude::SessionContext;
22
22
  use deltalake::delta_datafusion::DeltaCdfTableProvider;
23
23
  use deltalake::errors::DeltaTableError;
24
24
  use deltalake::kernel::transaction::{CommitProperties, TableReference};
25
- use deltalake::kernel::StructDataExt;
26
- use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
25
+ use deltalake::kernel::{scalars::ScalarExt, Transaction};
26
+ use deltalake::kernel::{EagerSnapshot, StructDataExt};
27
27
  use deltalake::logstore::IORuntime;
28
28
  use deltalake::logstore::LogStoreRef;
29
29
  use deltalake::operations::add_column::AddColumnBuilder;
@@ -43,7 +43,6 @@ use deltalake::parquet::errors::ParquetError;
43
43
  use deltalake::parquet::file::properties::WriterProperties;
44
44
  use deltalake::partitions::PartitionFilter;
45
45
  use deltalake::table::config::TablePropertiesExt;
46
- use deltalake::table::state::DeltaTableState;
47
46
  use deltalake::{DeltaOps, DeltaResult};
48
47
  use error::DeltaError;
49
48
  use futures::future::join_all;
@@ -128,9 +127,10 @@ impl RawDeltaTable {
128
127
  func(&self._table.borrow())
129
128
  }
130
129
 
131
- fn cloned_state(&self) -> RbResult<DeltaTableState> {
130
+ fn cloned_state(&self) -> RbResult<EagerSnapshot> {
132
131
  self.with_table(|t| {
133
132
  t.snapshot()
133
+ .map(|snapshot| snapshot.snapshot())
134
134
  .cloned()
135
135
  .map_err(RubyError::from)
136
136
  .map_err(RbErr::from)
@@ -150,7 +150,10 @@ impl RawDeltaTable {
150
150
  without_files: bool,
151
151
  log_buffer_size: Option<usize>,
152
152
  ) -> RbResult<Self> {
153
- let mut builder = deltalake::DeltaTableBuilder::from_uri(&table_uri)
153
+ let table_url = deltalake::table::builder::parse_table_uri(table_uri)
154
+ .map_err(error::RubyError::from)?;
155
+ let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
156
+ .map_err(error::RubyError::from)?
154
157
  .with_io_runtime(IORuntime::default());
155
158
 
156
159
  if let Some(storage_options) = storage_options {
@@ -178,7 +181,10 @@ impl RawDeltaTable {
178
181
  table_uri: String,
179
182
  storage_options: Option<HashMap<String, String>>,
180
183
  ) -> RbResult<bool> {
181
- let mut builder = deltalake::DeltaTableBuilder::from_uri(&table_uri);
184
+ let table_url = deltalake::table::builder::ensure_table_uri(table_uri)
185
+ .map_err(|_| RbValueError::new_err("Invalid table URI"))?;
186
+ let mut builder = deltalake::DeltaTableBuilder::from_uri(table_url)
187
+ .map_err(|_| RbValueError::new_err("Failed to create table builder"))?;
182
188
  if let Some(storage_options) = storage_options {
183
189
  builder = builder.with_storage_options(storage_options)
184
190
  }
@@ -358,11 +364,11 @@ impl RawDeltaTable {
358
364
  }
359
365
 
360
366
  pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
361
- let schema: StructType = rb_self.with_table(|t| {
367
+ let schema = rb_self.with_table(|t| {
362
368
  let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
363
369
  Ok(snapshot.schema().clone())
364
370
  })?;
365
- schema_to_rbobject(schema.to_owned(), ruby)
371
+ schema_to_rbobject(schema, ruby)
366
372
  }
367
373
 
368
374
  pub fn vacuum(
@@ -373,13 +379,16 @@ impl RawDeltaTable {
373
379
  commit_properties: Option<RbCommitProperties>,
374
380
  post_commithook_properties: Option<RbPostCommitHookProperties>,
375
381
  ) -> RbResult<Vec<String>> {
382
+ let snapshot = self
383
+ ._table
384
+ .borrow()
385
+ .snapshot()
386
+ .cloned()
387
+ .map_err(RubyError::from)
388
+ .map_err(RbErr::from)?;
376
389
  let mut cmd = VacuumBuilder::new(
377
390
  self._table.borrow().log_store(),
378
- self._table
379
- .borrow()
380
- .snapshot()
381
- .map_err(RubyError::from)?
382
- .clone(),
391
+ snapshot.snapshot().clone(),
383
392
  )
384
393
  .with_enforce_retention_duration(enforce_retention_duration)
385
394
  .with_dry_run(dry_run);
@@ -408,15 +417,8 @@ impl RawDeltaTable {
408
417
  commit_properties: Option<RbCommitProperties>,
409
418
  post_commithook_properties: Option<RbPostCommitHookProperties>,
410
419
  ) -> RbResult<String> {
411
- let mut cmd = OptimizeBuilder::new(
412
- self._table.borrow().log_store(),
413
- self._table
414
- .borrow()
415
- .snapshot()
416
- .map_err(RubyError::from)?
417
- .clone(),
418
- )
419
- .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
420
+ let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
421
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
420
422
  if let Some(size) = target_size {
421
423
  cmd = cmd.with_target_size(size);
422
424
  }
@@ -458,17 +460,10 @@ impl RawDeltaTable {
458
460
  commit_properties: Option<RbCommitProperties>,
459
461
  post_commithook_properties: Option<RbPostCommitHookProperties>,
460
462
  ) -> RbResult<String> {
461
- let mut cmd = OptimizeBuilder::new(
462
- self._table.borrow().log_store(),
463
- self._table
464
- .borrow()
465
- .snapshot()
466
- .map_err(RubyError::from)?
467
- .clone(),
468
- )
469
- .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
470
- .with_max_spill_size(max_spill_size)
471
- .with_type(OptimizeType::ZOrder(z_order_columns));
463
+ let mut cmd = OptimizeBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
464
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
465
+ .with_max_spill_size(max_spill_size)
466
+ .with_type(OptimizeType::ZOrder(z_order_columns));
472
467
  if let Some(size) = target_size {
473
468
  cmd = cmd.with_target_size(size);
474
469
  }
@@ -499,14 +494,7 @@ impl RawDeltaTable {
499
494
 
500
495
  pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
501
496
  let fields = fields.typecheck::<Obj<Field>>()?;
502
- let mut cmd = AddColumnBuilder::new(
503
- self._table.borrow().log_store(),
504
- self._table
505
- .borrow()
506
- .snapshot()
507
- .map_err(RubyError::from)?
508
- .clone(),
509
- );
497
+ let mut cmd = AddColumnBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
510
498
 
511
499
  let new_fields = fields
512
500
  .iter()
@@ -529,16 +517,10 @@ impl RawDeltaTable {
529
517
  .into_iter()
530
518
  .map(TableFeatures::try_convert)
531
519
  .collect::<RbResult<Vec<_>>>()?;
532
- let cmd = AddTableFeatureBuilder::new(
533
- self._table.borrow().log_store(),
534
- self._table
535
- .borrow()
536
- .snapshot()
537
- .map_err(RubyError::from)?
538
- .clone(),
539
- )
540
- .with_features(feature)
541
- .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
520
+ let cmd =
521
+ AddTableFeatureBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
522
+ .with_features(feature)
523
+ .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
542
524
 
543
525
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
544
526
  self._table.borrow_mut().state = table.state;
@@ -546,14 +528,8 @@ impl RawDeltaTable {
546
528
  }
547
529
 
548
530
  pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
549
- let mut cmd = ConstraintBuilder::new(
550
- self._table.borrow().log_store(),
551
- self._table
552
- .borrow()
553
- .snapshot()
554
- .map_err(RubyError::from)?
555
- .clone(),
556
- );
531
+ let mut cmd =
532
+ ConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
557
533
 
558
534
  for (col_name, expression) in constraints {
559
535
  cmd = cmd.with_constraint(col_name.clone(), expression.clone());
@@ -565,16 +541,10 @@ impl RawDeltaTable {
565
541
  }
566
542
 
567
543
  pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
568
- let cmd = DropConstraintBuilder::new(
569
- self._table.borrow().log_store(),
570
- self._table
571
- .borrow()
572
- .snapshot()
573
- .map_err(RubyError::from)?
574
- .clone(),
575
- )
576
- .with_constraint(name)
577
- .with_raise_if_not_exists(raise_if_not_exists);
544
+ let cmd =
545
+ DropConstraintBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
546
+ .with_constraint(name)
547
+ .with_raise_if_not_exists(raise_if_not_exists);
578
548
 
579
549
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
580
550
  self._table.borrow_mut().state = table.state;
@@ -590,15 +560,9 @@ impl RawDeltaTable {
590
560
  columns: Option<Vec<String>>,
591
561
  ) -> RbResult<ArrowArrayStream> {
592
562
  let ctx = SessionContext::new();
593
- let mut cdf_read = CdfLoadBuilder::new(
594
- self._table.borrow().log_store(),
595
- self._table
596
- .borrow()
597
- .snapshot()
598
- .map_err(RubyError::from)?
599
- .clone(),
600
- )
601
- .with_starting_version(starting_version);
563
+ let mut cdf_read =
564
+ CdfLoadBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
565
+ .with_starting_version(starting_version);
602
566
 
603
567
  if let Some(ev) = ending_version {
604
568
  cdf_read = cdf_read.with_ending_version(ev);
@@ -667,11 +631,7 @@ impl RawDeltaTable {
667
631
  ) -> RbResult<RbMergeBuilder> {
668
632
  Ok(RbMergeBuilder::new(
669
633
  self._table.borrow().log_store(),
670
- self._table
671
- .borrow()
672
- .snapshot()
673
- .map_err(RubyError::from)?
674
- .clone(),
634
+ self.cloned_state()?,
675
635
  source.0,
676
636
  predicate,
677
637
  source_alias,
@@ -697,14 +657,7 @@ impl RawDeltaTable {
697
657
  protocol_downgrade_allowed: bool,
698
658
  commit_properties: Option<RbCommitProperties>,
699
659
  ) -> RbResult<String> {
700
- let mut cmd = RestoreBuilder::new(
701
- self._table.borrow().log_store(),
702
- self._table
703
- .borrow()
704
- .snapshot()
705
- .map_err(RubyError::from)?
706
- .clone(),
707
- );
660
+ let mut cmd = RestoreBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
708
661
  if let Some(val) = target {
709
662
  if let Some(version) = Integer::from_value(val) {
710
663
  cmd = cmd.with_version_to_restore(version.to_i64()?)
@@ -735,8 +688,7 @@ impl RawDeltaTable {
735
688
  .block_on(self._table.borrow().history(limit))
736
689
  .map_err(RubyError::from)?;
737
690
  Ok(history
738
- .iter()
739
- .map(|c| serde_json::to_string(c).unwrap())
691
+ .map(|c| serde_json::to_string(&c).unwrap())
740
692
  .collect())
741
693
  }
742
694
 
@@ -773,7 +725,7 @@ impl RawDeltaTable {
773
725
  let adds: Vec<_> = rt()
774
726
  .block_on(async {
775
727
  state
776
- .get_active_add_actions_by_partitions(&log_store, &converted_filters)
728
+ .file_views_by_partitions(&log_store, &converted_filters)
777
729
  .try_collect()
778
730
  .await
779
731
  })
@@ -823,7 +775,7 @@ impl RawDeltaTable {
823
775
  .block_on(async {
824
776
  t.snapshot()?
825
777
  .snapshot()
826
- .files(&log_store, None)
778
+ .file_views(&log_store, None)
827
779
  .map_ok(|f| (f.path().to_string(), f.size()))
828
780
  .try_collect()
829
781
  .await
@@ -840,14 +792,7 @@ impl RawDeltaTable {
840
792
  commit_properties: Option<RbCommitProperties>,
841
793
  post_commithook_properties: Option<RbPostCommitHookProperties>,
842
794
  ) -> RbResult<String> {
843
- let mut cmd = DeleteBuilder::new(
844
- self._table.borrow().log_store(),
845
- self._table
846
- .borrow()
847
- .snapshot()
848
- .map_err(RubyError::from)?
849
- .clone(),
850
- );
795
+ let mut cmd = DeleteBuilder::new(self._table.borrow().log_store(), self.cloned_state()?);
851
796
  if let Some(predicate) = predicate {
852
797
  cmd = cmd.with_predicate(predicate);
853
798
  }
@@ -872,16 +817,10 @@ impl RawDeltaTable {
872
817
  properties: HashMap<String, String>,
873
818
  raise_if_not_exists: bool,
874
819
  ) -> RbResult<()> {
875
- let cmd = SetTablePropertiesBuilder::new(
876
- self._table.borrow().log_store(),
877
- self._table
878
- .borrow()
879
- .snapshot()
880
- .map_err(RubyError::from)?
881
- .clone(),
882
- )
883
- .with_properties(properties)
884
- .with_raise_if_not_exists(raise_if_not_exists);
820
+ let cmd =
821
+ SetTablePropertiesBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
822
+ .with_properties(properties)
823
+ .with_raise_if_not_exists(raise_if_not_exists);
885
824
 
886
825
  let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
887
826
  self._table.borrow_mut().state = table.state;
@@ -894,15 +833,9 @@ impl RawDeltaTable {
894
833
  commit_properties: Option<RbCommitProperties>,
895
834
  post_commithook_properties: Option<RbPostCommitHookProperties>,
896
835
  ) -> RbResult<String> {
897
- let mut cmd = FileSystemCheckBuilder::new(
898
- self._table.borrow().log_store(),
899
- self._table
900
- .borrow()
901
- .snapshot()
902
- .map_err(RubyError::from)?
903
- .clone(),
904
- )
905
- .with_dry_run(dry_run);
836
+ let mut cmd =
837
+ FileSystemCheckBuilder::new(self._table.borrow().log_store(), self.cloned_state()?)
838
+ .with_dry_run(dry_run);
906
839
 
907
840
  if let Some(commit_properties) =
908
841
  maybe_create_commit_properties(commit_properties, post_commithook_properties)
@@ -1228,8 +1161,10 @@ fn write_to_deltalake(
1228
1161
  let table = if let Some(table) = table {
1229
1162
  DeltaOps(table._table.borrow().clone())
1230
1163
  } else {
1164
+ let table_url =
1165
+ deltalake::table::builder::ensure_table_uri(&table_uri).map_err(RubyError::from)?;
1231
1166
  rt().block_on(DeltaOps::try_from_uri_with_storage_options(
1232
- &table_uri, options,
1167
+ table_url, options,
1233
1168
  ))
1234
1169
  .map_err(RubyError::from)?
1235
1170
  };
@@ -4,9 +4,9 @@ use deltalake::arrow::ffi_stream::ArrowArrayStreamReader;
4
4
  use deltalake::datafusion::catalog::TableProvider;
5
5
  use deltalake::datafusion::datasource::MemTable;
6
6
  use deltalake::datafusion::prelude::SessionContext;
7
+ use deltalake::kernel::EagerSnapshot;
7
8
  use deltalake::logstore::LogStoreRef;
8
9
  use deltalake::operations::merge::MergeBuilder;
9
- use deltalake::table::state::DeltaTableState;
10
10
  use deltalake::{DeltaResult, DeltaTable};
11
11
  use std::cell::RefCell;
12
12
  use std::collections::HashMap;
@@ -45,7 +45,7 @@ impl RbMergeBuilder {
45
45
  #[allow(clippy::too_many_arguments)]
46
46
  pub fn new(
47
47
  log_store: LogStoreRef,
48
- snapshot: DeltaTableState,
48
+ snapshot: EagerSnapshot,
49
49
  source: ArrowArrayStreamReader,
50
50
  predicate: String,
51
51
  source_alias: Option<String>,
@@ -2,10 +2,11 @@ use deltalake::kernel::{
2
2
  DataType, PrimitiveType as DeltaPrimitive, StructField, StructType as DeltaStructType,
3
3
  };
4
4
  use magnus::{value::ReprValue, Module, RModule, Ruby, TryConvert, Value};
5
+ use std::sync::Arc;
5
6
 
6
7
  use crate::{RbResult, RbValueError};
7
8
 
8
- pub fn schema_to_rbobject(schema: DeltaStructType, ruby: &Ruby) -> RbResult<Value> {
9
+ pub fn schema_to_rbobject(schema: Arc<DeltaStructType>, ruby: &Ruby) -> RbResult<Value> {
9
10
  let fields = schema.fields().map(|field| Field {
10
11
  inner: field.clone(),
11
12
  });
@@ -1,3 +1,3 @@
1
1
  module DeltaLake
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane