deltalake-rb 0.1.7 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,32 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.7"
3
+ version = "0.2.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
- rust-version = "1.82.0"
7
+ rust-version = "1.82"
8
8
  publish = false
9
9
 
10
10
  [lib]
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- arrow = { version = "55", features = ["ffi"] }
15
- arrow-schema = { version = "55", features = ["serde"] }
14
+ arrow = { version = "55.2", features = ["ffi"] }
15
+ arrow-schema = { version = "55.2", features = ["serde"] }
16
16
  chrono = "0.4"
17
- delta_kernel = "=0.10.0"
18
- deltalake = { version = "=0.26.0", features = ["azure", "datafusion", "gcs", "s3"] }
17
+ delta_kernel = { version = "0.14", features = ["arrow-55", "default-engine-rustls"] }
19
18
  futures = "0.3"
20
- magnus = "0.7"
19
+ magnus = "0.8"
21
20
  num_cpus = "1"
22
21
  serde = "1"
23
22
  serde_json = "1"
24
23
  tokio = { version = "1", features = ["rt-multi-thread"] }
24
+
25
+ [dependencies.deltalake]
26
+ version = "=0.28.0"
27
+ features = [
28
+ "azure",
29
+ "datafusion",
30
+ "gcs",
31
+ "s3"
32
+ ]
@@ -1,8 +1,7 @@
1
1
  use arrow_schema::ArrowError;
2
2
  use deltalake::datafusion::error::DataFusionError;
3
- use deltalake::protocol::ProtocolError;
4
3
  use deltalake::{errors::DeltaTableError, ObjectStoreError};
5
- use magnus::{exception, Error as RbErr, Module, RModule, Ruby};
4
+ use magnus::{Error as RbErr, Module, RModule, Ruby};
6
5
  use std::borrow::Cow;
7
6
 
8
7
  macro_rules! create_exception {
@@ -42,7 +41,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
42
41
  DeltaTableError::InvalidJsonLog { .. } => DeltaProtocolError::new_err(err.to_string()),
43
42
  DeltaTableError::InvalidStatsJson { .. } => DeltaProtocolError::new_err(err.to_string()),
44
43
  DeltaTableError::InvalidData { violations } => {
45
- DeltaProtocolError::new_err(format!("Invariant violations: {:?}", violations))
44
+ DeltaProtocolError::new_err(format!("Invariant violations: {violations:?}"))
46
45
  }
47
46
 
48
47
  // commit errors
@@ -81,31 +80,12 @@ fn arrow_to_rb(err: ArrowError) -> RbErr {
81
80
  }
82
81
  }
83
82
 
84
- fn checkpoint_to_rb(err: ProtocolError) -> RbErr {
85
- match err {
86
- ProtocolError::Arrow { source } => arrow_to_rb(source),
87
- ProtocolError::ObjectStore { source } => object_store_to_rb(source),
88
- ProtocolError::EndOfLog => DeltaProtocolError::new_err("End of log"),
89
- ProtocolError::NoMetaData => DeltaProtocolError::new_err("Table metadata missing"),
90
- ProtocolError::CheckpointNotFound => DeltaProtocolError::new_err(err.to_string()),
91
- ProtocolError::InvalidField(err) => RbValueError::new_err(err),
92
- ProtocolError::InvalidRow(err) => RbValueError::new_err(err),
93
- ProtocolError::InvalidDeletionVectorStorageType(err) => RbValueError::new_err(err),
94
- ProtocolError::SerializeOperation { source } => RbValueError::new_err(source.to_string()),
95
- ProtocolError::ParquetParseError { source } => RbIOError::new_err(source.to_string()),
96
- ProtocolError::IO { source } => RbIOError::new_err(source.to_string()),
97
- ProtocolError::Generic(msg) => DeltaError::new_err(msg),
98
- ProtocolError::Kernel { source } => DeltaError::new_err(source.to_string()),
99
- }
100
- }
101
-
102
83
  fn datafusion_to_rb(err: DataFusionError) -> RbErr {
103
84
  DeltaError::new_err(err.to_string())
104
85
  }
105
86
 
106
87
  pub enum RubyError {
107
88
  DeltaTable(DeltaTableError),
108
- Protocol(ProtocolError),
109
89
  DataFusion(DataFusionError),
110
90
  }
111
91
 
@@ -115,12 +95,6 @@ impl From<DeltaTableError> for RubyError {
115
95
  }
116
96
  }
117
97
 
118
- impl From<ProtocolError> for RubyError {
119
- fn from(err: ProtocolError) -> Self {
120
- RubyError::Protocol(err)
121
- }
122
- }
123
-
124
98
  impl From<DataFusionError> for RubyError {
125
99
  fn from(err: DataFusionError) -> Self {
126
100
  RubyError::DataFusion(err)
@@ -131,14 +105,13 @@ impl From<RubyError> for RbErr {
131
105
  fn from(value: RubyError) -> Self {
132
106
  match value {
133
107
  RubyError::DeltaTable(err) => inner_to_rb_err(err),
134
- RubyError::Protocol(err) => checkpoint_to_rb(err),
135
108
  RubyError::DataFusion(err) => datafusion_to_rb(err),
136
109
  }
137
110
  }
138
111
  }
139
112
 
140
113
  macro_rules! create_builtin_exception {
141
- ($type:ident, $class:expr) => {
114
+ ($type:ident, $method:ident) => {
142
115
  pub struct $type {}
143
116
 
144
117
  impl $type {
@@ -146,13 +119,14 @@ macro_rules! create_builtin_exception {
146
119
  where
147
120
  T: Into<Cow<'static, str>>,
148
121
  {
149
- RbErr::new($class, message)
122
+ let ruby = Ruby::get().unwrap();
123
+ RbErr::new(ruby.$method(), message)
150
124
  }
151
125
  }
152
126
  };
153
127
  }
154
128
 
155
- create_builtin_exception!(RbException, exception::runtime_error());
156
- create_builtin_exception!(RbIOError, exception::io_error());
157
- create_builtin_exception!(RbNotImplementedError, exception::not_imp_error());
158
- create_builtin_exception!(RbValueError, exception::arg_error());
129
+ create_builtin_exception!(RbException, exception_runtime_error);
130
+ create_builtin_exception!(RbIOError, exception_io_error);
131
+ create_builtin_exception!(RbNotImplementedError, exception_not_imp_error);
132
+ create_builtin_exception!(RbValueError, exception_arg_error);
@@ -13,6 +13,7 @@ use std::time;
13
13
 
14
14
  use chrono::{DateTime, Duration, FixedOffset, Utc};
15
15
  use delta_kernel::schema::StructField;
16
+ use delta_kernel::table_properties::DataSkippingNumIndexedCols;
16
17
  use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
17
18
  use deltalake::arrow::record_batch::RecordBatchIterator;
18
19
  use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
@@ -21,8 +22,10 @@ use deltalake::datafusion::prelude::SessionContext;
21
22
  use deltalake::delta_datafusion::DeltaCdfTableProvider;
22
23
  use deltalake::errors::DeltaTableError;
23
24
  use deltalake::kernel::transaction::{CommitProperties, TableReference};
25
+ use deltalake::kernel::StructDataExt;
24
26
  use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
25
27
  use deltalake::logstore::IORuntime;
28
+ use deltalake::logstore::LogStoreRef;
26
29
  use deltalake::operations::add_column::AddColumnBuilder;
27
30
  use deltalake::operations::add_feature::AddTableFeatureBuilder;
28
31
  use deltalake::operations::collect_sendable_stream;
@@ -39,25 +42,26 @@ use deltalake::parquet::basic::Compression;
39
42
  use deltalake::parquet::errors::ParquetError;
40
43
  use deltalake::parquet::file::properties::WriterProperties;
41
44
  use deltalake::partitions::PartitionFilter;
45
+ use deltalake::table::config::TablePropertiesExt;
46
+ use deltalake::table::state::DeltaTableState;
42
47
  use deltalake::{DeltaOps, DeltaResult};
43
48
  use error::DeltaError;
44
49
  use futures::future::join_all;
50
+ use futures::TryStreamExt;
45
51
 
46
52
  use magnus::{
47
- function, method, prelude::*, typed_data::Obj, Error, Integer, Module, RArray, RHash, Ruby,
48
- TryConvert, Value,
53
+ function, method, prelude::*, try_convert::TryConvertOwned, typed_data::Obj, Error as RbErr,
54
+ Integer, Module, RArray, Ruby, TryConvert, Value,
49
55
  };
50
56
  use serde_json::Map;
51
57
 
52
- use crate::error::DeltaProtocolError;
53
- use crate::error::RbValueError;
54
- use crate::error::RubyError;
58
+ use crate::error::{RbValueError, RubyError};
55
59
  use crate::features::TableFeatures;
56
60
  use crate::merge::RbMergeBuilder;
57
61
  use crate::schema::{schema_to_rbobject, Field};
58
62
  use crate::utils::rt;
59
63
 
60
- type RbResult<T> = Result<T, Error>;
64
+ type RbResult<T> = Result<T, RbErr>;
61
65
 
62
66
  enum PartitionFilterValue {
63
67
  Single(String),
@@ -74,6 +78,8 @@ impl TryConvert for PartitionFilterValue {
74
78
  }
75
79
  }
76
80
 
81
+ unsafe impl TryConvertOwned for PartitionFilterValue {}
82
+
77
83
  #[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
78
84
  struct RawDeltaTable {
79
85
  _table: RefCell<deltalake::DeltaTable>,
@@ -86,7 +92,7 @@ struct RawDeltaTableMetaData {
86
92
  description: Option<String>,
87
93
  partition_columns: Vec<String>,
88
94
  created_time: Option<i64>,
89
- configuration: HashMap<String, Option<String>>,
95
+ configuration: HashMap<String, String>,
90
96
  }
91
97
 
92
98
  impl RawDeltaTableMetaData {
@@ -110,13 +116,32 @@ impl RawDeltaTableMetaData {
110
116
  self.created_time
111
117
  }
112
118
 
113
- fn configuration(&self) -> HashMap<String, Option<String>> {
119
+ fn configuration(&self) -> HashMap<String, String> {
114
120
  self.configuration.clone()
115
121
  }
116
122
  }
117
123
 
118
124
  type StringVec = Vec<String>;
119
125
 
126
+ impl RawDeltaTable {
127
+ fn with_table<T>(&self, func: impl Fn(&deltalake::DeltaTable) -> RbResult<T>) -> RbResult<T> {
128
+ func(&self._table.borrow())
129
+ }
130
+
131
+ fn cloned_state(&self) -> RbResult<DeltaTableState> {
132
+ self.with_table(|t| {
133
+ t.snapshot()
134
+ .cloned()
135
+ .map_err(RubyError::from)
136
+ .map_err(RbErr::from)
137
+ })
138
+ }
139
+
140
+ fn log_store(&self) -> RbResult<LogStoreRef> {
141
+ self.with_table(|t| Ok(t.log_store().clone()))
142
+ }
143
+ }
144
+
120
145
  impl RawDeltaTable {
121
146
  pub fn new(
122
147
  table_uri: String,
@@ -168,60 +193,58 @@ impl RawDeltaTable {
168
193
  }
169
194
 
170
195
  pub fn table_uri(&self) -> RbResult<String> {
171
- Ok(self._table.borrow().table_uri())
196
+ self.with_table(|t| Ok(t.table_uri()))
172
197
  }
173
198
 
174
- pub fn version(&self) -> RbResult<i64> {
175
- Ok(self._table.borrow().version())
199
+ pub fn version(&self) -> RbResult<Option<i64>> {
200
+ self.with_table(|t| Ok(t.version()))
176
201
  }
177
202
 
178
203
  pub fn has_files(&self) -> RbResult<bool> {
179
- Ok(self._table.borrow().config.require_files)
204
+ self.with_table(|t| Ok(t.config.require_files))
180
205
  }
181
206
 
182
207
  pub fn metadata(&self) -> RbResult<RawDeltaTableMetaData> {
183
- let binding = self._table.borrow();
184
- let metadata = binding.metadata().map_err(RubyError::from)?;
208
+ let metadata = self.with_table(|t| {
209
+ let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
210
+ Ok(snapshot.metadata().clone())
211
+ })?;
185
212
  Ok(RawDeltaTableMetaData {
186
- id: metadata.id.clone(),
187
- name: metadata.name.clone(),
188
- description: metadata.description.clone(),
189
- partition_columns: metadata.partition_columns.clone(),
190
- created_time: metadata.created_time,
191
- configuration: metadata.configuration.clone(),
213
+ id: metadata.id().to_string(),
214
+ name: metadata.name().map(String::from),
215
+ description: metadata.description().map(String::from),
216
+ partition_columns: metadata.partition_columns().clone(),
217
+ created_time: metadata.created_time(),
218
+ configuration: metadata.configuration().clone(),
192
219
  })
193
220
  }
194
221
 
195
222
  pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
196
- let binding = self._table.borrow();
197
- let table_protocol = binding.protocol().map_err(RubyError::from)?;
223
+ let table_protocol = self.with_table(|t| {
224
+ let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
225
+ Ok(snapshot.protocol().clone())
226
+ })?;
198
227
  Ok((
199
- table_protocol.min_reader_version,
200
- table_protocol.min_writer_version,
201
- table_protocol
202
- .writer_features
203
- .as_ref()
204
- .and_then(|features| {
205
- let empty_set = !features.is_empty();
206
- empty_set.then(|| {
207
- features
208
- .iter()
209
- .map(|v| v.to_string())
210
- .collect::<Vec<String>>()
211
- })
212
- }),
213
- table_protocol
214
- .reader_features
215
- .as_ref()
216
- .and_then(|features| {
217
- let empty_set = !features.is_empty();
218
- empty_set.then(|| {
219
- features
220
- .iter()
221
- .map(|v| v.to_string())
222
- .collect::<Vec<String>>()
223
- })
224
- }),
228
+ table_protocol.min_reader_version(),
229
+ table_protocol.min_writer_version(),
230
+ table_protocol.writer_features().and_then(|features| {
231
+ let empty_set = !features.is_empty();
232
+ empty_set.then(|| {
233
+ features
234
+ .iter()
235
+ .map(|v| v.to_string())
236
+ .collect::<Vec<String>>()
237
+ })
238
+ }),
239
+ table_protocol.reader_features().and_then(|features| {
240
+ let empty_set = !features.is_empty();
241
+ empty_set.then(|| {
242
+ features
243
+ .iter()
244
+ .map(|v| v.to_string())
245
+ .collect::<Vec<String>>()
246
+ })
247
+ }),
225
248
  ))
226
249
  }
227
250
 
@@ -237,31 +260,29 @@ impl RawDeltaTable {
237
260
  .map_err(RubyError::from)?)
238
261
  }
239
262
 
240
- pub fn get_earliest_version(&self) -> RbResult<i64> {
241
- Ok(rt()
242
- .block_on(self._table.borrow().get_earliest_version())
243
- .map_err(RubyError::from)?)
244
- }
245
-
246
263
  pub fn get_num_index_cols(&self) -> RbResult<i32> {
247
- Ok(self
248
- ._table
249
- .borrow()
250
- .snapshot()
251
- .map_err(RubyError::from)?
252
- .config()
253
- .num_indexed_cols())
264
+ self.with_table(|t| {
265
+ let n_cols = t
266
+ .snapshot()
267
+ .map_err(RubyError::from)?
268
+ .config()
269
+ .num_indexed_cols();
270
+ Ok(match n_cols {
271
+ DataSkippingNumIndexedCols::NumColumns(n_cols) => n_cols as i32,
272
+ DataSkippingNumIndexedCols::AllColumns => -1,
273
+ })
274
+ })
254
275
  }
255
276
 
256
277
  pub fn get_stats_columns(&self) -> RbResult<Option<Vec<String>>> {
257
- Ok(self
258
- ._table
259
- .borrow()
260
- .snapshot()
261
- .map_err(RubyError::from)?
262
- .config()
263
- .stats_columns()
264
- .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()))
278
+ self.with_table(|t| {
279
+ Ok(t.snapshot()
280
+ .map_err(RubyError::from)?
281
+ .config()
282
+ .data_skipping_stats_columns
283
+ .as_ref()
284
+ .map(|v| v.iter().map(|s| s.to_string()).collect::<Vec<String>>()))
285
+ })
265
286
  }
266
287
 
267
288
  pub fn load_with_datetime(&self, ds: String) -> RbResult<()> {
@@ -285,10 +306,14 @@ impl RawDeltaTable {
285
306
  if let Some(filters) = partition_filters {
286
307
  let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
287
308
  Ok(self
288
- ._table
289
- .borrow()
290
- .get_files_by_partitions(&filters)
291
- .map_err(RubyError::from)?
309
+ .with_table(|t| {
310
+ rt().block_on(async {
311
+ t.get_files_by_partitions(&filters)
312
+ .await
313
+ .map_err(RubyError::from)
314
+ .map_err(RbErr::from)
315
+ })
316
+ })?
292
317
  .into_iter()
293
318
  .map(|p| p.to_string())
294
319
  .collect())
@@ -296,8 +321,9 @@ impl RawDeltaTable {
296
321
  Ok(self
297
322
  ._table
298
323
  .borrow()
299
- .get_files_iter()
324
+ .snapshot()
300
325
  .map_err(RubyError::from)?
326
+ .file_paths_iter()
301
327
  .map(|f| f.to_string())
302
328
  .collect())
303
329
  }
@@ -307,31 +333,36 @@ impl RawDeltaTable {
307
333
  &self,
308
334
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
309
335
  ) -> RbResult<Vec<String>> {
310
- if !self._table.borrow().config.require_files {
336
+ if !self.with_table(|t| Ok(t.config.require_files))? {
311
337
  return Err(DeltaError::new_err("Table is initiated without files."));
312
338
  }
313
339
 
314
340
  if let Some(filters) = partition_filters {
315
341
  let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
316
- Ok(self
317
- ._table
318
- .borrow()
319
- .get_file_uris_by_partitions(&filters)
320
- .map_err(RubyError::from)?)
342
+ self.with_table(|t| {
343
+ rt().block_on(async {
344
+ t.get_file_uris_by_partitions(&filters)
345
+ .await
346
+ .map_err(RubyError::from)
347
+ .map_err(RbErr::from)
348
+ })
349
+ })
321
350
  } else {
322
- Ok(self
323
- ._table
324
- .borrow()
325
- .get_file_uris()
326
- .map_err(RubyError::from)?
327
- .collect())
351
+ self.with_table(|t| {
352
+ Ok(t.get_file_uris()
353
+ .map_err(RubyError::from)
354
+ .map_err(RbErr::from)?
355
+ .collect::<Vec<String>>())
356
+ })
328
357
  }
329
358
  }
330
359
 
331
- pub fn schema(&self) -> RbResult<Value> {
332
- let binding = self._table.borrow();
333
- let schema: &StructType = binding.get_schema().map_err(RubyError::from)?;
334
- schema_to_rbobject(schema.to_owned())
360
+ pub fn schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
361
+ let schema: StructType = rb_self.with_table(|t| {
362
+ let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
363
+ Ok(snapshot.schema().clone())
364
+ })?;
365
+ schema_to_rbobject(schema.to_owned(), ruby)
335
366
  }
336
367
 
337
368
  pub fn vacuum(
@@ -370,7 +401,7 @@ impl RawDeltaTable {
370
401
  pub fn compact_optimize(
371
402
  &self,
372
403
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
373
- target_size: Option<i64>,
404
+ target_size: Option<u64>,
374
405
  max_concurrent_tasks: Option<usize>,
375
406
  min_commit_interval: Option<u64>,
376
407
  writer_properties: Option<RbWriterProperties>,
@@ -419,7 +450,7 @@ impl RawDeltaTable {
419
450
  &self,
420
451
  z_order_columns: Vec<String>,
421
452
  partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
422
- target_size: Option<i64>,
453
+ target_size: Option<u64>,
423
454
  max_concurrent_tasks: Option<usize>,
424
455
  max_spill_size: usize,
425
456
  min_commit_interval: Option<u64>,
@@ -716,18 +747,19 @@ impl RawDeltaTable {
716
747
  .map_err(RubyError::from)?)
717
748
  }
718
749
 
719
- fn get_active_partitions(&self) -> RbResult<RArray> {
720
- let binding = self._table.borrow();
721
- let _column_names: HashSet<&str> = binding
722
- .get_schema()
723
- .map_err(|_| DeltaProtocolError::new_err("table does not yet have a schema"))?
724
- .fields()
725
- .map(|field| field.name().as_str())
726
- .collect();
727
- let partition_columns: HashSet<&str> = binding
728
- .metadata()
729
- .map_err(RubyError::from)?
730
- .partition_columns
750
+ fn get_active_partitions(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
751
+ let schema = rb_self.with_table(|t| {
752
+ let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
753
+ Ok(snapshot.schema().clone())
754
+ })?;
755
+ let metadata = rb_self.with_table(|t| {
756
+ let snapshot = t.snapshot().map_err(RubyError::from).map_err(RbErr::from)?;
757
+ Ok(snapshot.metadata().clone())
758
+ })?;
759
+ let _column_names: HashSet<&str> =
760
+ schema.fields().map(|field| field.name().as_str()).collect();
761
+ let partition_columns: HashSet<&str> = metadata
762
+ .partition_columns()
731
763
  .iter()
732
764
  .map(|col| col.as_str())
733
765
  .collect();
@@ -736,12 +768,15 @@ impl RawDeltaTable {
736
768
 
737
769
  let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
738
770
 
739
- let adds = binding
740
- .snapshot()
741
- .map_err(RubyError::from)?
742
- .get_active_add_actions_by_partitions(&converted_filters)
743
- .map_err(RubyError::from)?
744
- .collect::<Result<Vec<_>, _>>()
771
+ let state = rb_self.cloned_state()?;
772
+ let log_store = rb_self.log_store()?;
773
+ let adds: Vec<_> = rt()
774
+ .block_on(async {
775
+ state
776
+ .get_active_add_actions_by_partitions(&log_store, &converted_filters)
777
+ .try_collect()
778
+ .await
779
+ })
745
780
  .map_err(RubyError::from)?;
746
781
  let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
747
782
  .iter()
@@ -749,21 +784,22 @@ impl RawDeltaTable {
749
784
  Ok::<_, RubyError>(
750
785
  partition_columns
751
786
  .iter()
752
- .flat_map(|col| {
753
- Ok::<_, RubyError>((
787
+ .map(|col| {
788
+ (
754
789
  *col,
755
790
  add.partition_values()
756
- .map_err(RubyError::from)?
757
- .get(*col)
791
+ .and_then(|v| {
792
+ v.index_of(col).and_then(|idx| v.value(idx).cloned())
793
+ })
758
794
  .map(|v| v.serialize()),
759
- ))
795
+ )
760
796
  })
761
797
  .collect(),
762
798
  )
763
799
  })
764
800
  .collect();
765
801
 
766
- Ok(RArray::from_iter(active_partitions))
802
+ Ok(ruby.ary_from_iter(active_partitions))
767
803
  }
768
804
 
769
805
  pub fn create_checkpoint(&self) -> RbResult<()> {
@@ -781,15 +817,20 @@ impl RawDeltaTable {
781
817
  }
782
818
 
783
819
  pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
784
- Ok(self
785
- ._table
786
- .borrow()
787
- .snapshot()
788
- .map_err(RubyError::from)?
789
- .eager_snapshot()
790
- .files()
791
- .map(|f| (f.path().to_string(), f.size()))
792
- .collect::<HashMap<String, i64>>())
820
+ self.with_table(|t| {
821
+ let log_store = t.log_store();
822
+ let sizes: HashMap<String, i64> = rt()
823
+ .block_on(async {
824
+ t.snapshot()?
825
+ .snapshot()
826
+ .files(&log_store, None)
827
+ .map_ok(|f| (f.path().to_string(), f.size()))
828
+ .try_collect()
829
+ .await
830
+ })
831
+ .map_err(RubyError::from)?;
832
+ Ok(sizes)
833
+ })
793
834
  }
794
835
 
795
836
  pub fn delete(
@@ -874,14 +915,13 @@ impl RawDeltaTable {
874
915
  Ok(serde_json::to_string(&metrics).unwrap())
875
916
  }
876
917
 
877
- pub fn transaction_versions(&self) -> RHash {
878
- RHash::from_iter(
879
- self._table
880
- .borrow()
881
- .get_app_transaction_version()
882
- .into_iter()
883
- .map(|(app_id, transaction)| (app_id, RbTransaction::from(transaction))),
884
- )
918
+ pub fn transaction_version(&self, app_id: String) -> RbResult<Option<i64>> {
919
+ // NOTE: this will simplify once we have moved logstore onto state.
920
+ let log_store = self.log_store()?;
921
+ let snapshot = self.with_table(|t| Ok(t.snapshot().map_err(RubyError::from)?.clone()))?;
922
+ Ok(rt()
923
+ .block_on(snapshot.transaction_version(log_store.as_ref(), app_id))
924
+ .map_err(RubyError::from)?)
885
925
  }
886
926
  }
887
927
 
@@ -1293,10 +1333,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1293
1333
  "get_latest_version",
1294
1334
  method!(RawDeltaTable::get_latest_version, 0),
1295
1335
  )?;
1296
- class.define_method(
1297
- "get_earliest_version",
1298
- method!(RawDeltaTable::get_earliest_version, 0),
1299
- )?;
1300
1336
  class.define_method(
1301
1337
  "get_num_index_cols",
1302
1338
  method!(RawDeltaTable::get_num_index_cols, 0),
@@ -1366,8 +1402,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1366
1402
  )?;
1367
1403
  class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
1368
1404
  class.define_method(
1369
- "transaction_versions",
1370
- method!(RawDeltaTable::transaction_versions, 0),
1405
+ "transaction_version",
1406
+ method!(RawDeltaTable::transaction_version, 1),
1371
1407
  )?;
1372
1408
 
1373
1409
  let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
@@ -1394,6 +1430,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1394
1430
  class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
1395
1431
 
1396
1432
  let class = module.define_class("Field", ruby.class_object())?;
1433
+ class.define_singleton_method("new", function!(Field::new, 2))?;
1397
1434
  class.define_method("name", method!(Field::name, 0))?;
1398
1435
  class.define_method("type", method!(Field::get_type, 0))?;
1399
1436
  class.define_method("nullable", method!(Field::nullable, 0))?;