deltalake-rb 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,77 @@
1
1
  mod error;
2
+ mod features;
3
+ mod merge;
2
4
  mod schema;
3
5
  mod utils;
4
6
 
5
7
  use std::cell::RefCell;
6
- use std::collections::HashMap;
8
+ use std::collections::{HashMap, HashSet};
7
9
  use std::future::IntoFuture;
10
+ use std::str::FromStr;
11
+ use std::time;
8
12
 
9
- use chrono::Duration;
13
+ use chrono::{DateTime, Duration, FixedOffset, Utc};
14
+ use delta_kernel::schema::StructField;
10
15
  use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
11
- use deltalake::kernel::StructType;
16
+ use deltalake::arrow::record_batch::RecordBatchIterator;
17
+ use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
18
+ use deltalake::datafusion::physical_plan::ExecutionPlan;
19
+ use deltalake::datafusion::prelude::SessionContext;
20
+ use deltalake::errors::DeltaTableError;
21
+ use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
22
+ use deltalake::operations::add_column::AddColumnBuilder;
23
+ use deltalake::operations::add_feature::AddTableFeatureBuilder;
24
+ use deltalake::operations::collect_sendable_stream;
25
+ use deltalake::operations::constraints::ConstraintBuilder;
12
26
  use deltalake::operations::delete::DeleteBuilder;
27
+ use deltalake::operations::drop_constraints::DropConstraintBuilder;
28
+ use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
29
+ use deltalake::operations::load_cdf::CdfLoadBuilder;
30
+ use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
31
+ use deltalake::operations::restore::RestoreBuilder;
32
+ use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
33
+ use deltalake::operations::transaction::{CommitProperties, TableReference};
13
34
  use deltalake::operations::vacuum::VacuumBuilder;
35
+ use deltalake::parquet::basic::Compression;
36
+ use deltalake::parquet::errors::ParquetError;
37
+ use deltalake::parquet::file::properties::WriterProperties;
38
+ use deltalake::partitions::PartitionFilter;
14
39
  use deltalake::storage::IORuntime;
15
- use deltalake::DeltaOps;
40
+ use deltalake::{DeltaOps, DeltaResult};
16
41
  use error::DeltaError;
42
+ use futures::future::join_all;
17
43
 
18
- use magnus::{function, method, prelude::*, Error, Module, Ruby, Value};
44
+ use magnus::{
45
+ function, method, prelude::*, typed_data::Obj, Error, Integer, Module, RArray, RHash, Ruby,
46
+ TryConvert, Value,
47
+ };
48
+ use serde_json::Map;
19
49
 
50
+ use crate::error::DeltaProtocolError;
51
+ use crate::error::RbValueError;
20
52
  use crate::error::RubyError;
53
+ use crate::features::TableFeatures;
54
+ use crate::merge::RbMergeBuilder;
21
55
  use crate::schema::{schema_to_rbobject, Field};
22
56
  use crate::utils::rt;
23
57
 
24
58
  type RbResult<T> = Result<T, Error>;
25
59
 
60
+ enum PartitionFilterValue {
61
+ Single(String),
62
+ Multiple(Vec<String>),
63
+ }
64
+
65
+ impl TryConvert for PartitionFilterValue {
66
+ fn try_convert(val: Value) -> RbResult<Self> {
67
+ if let Ok(v) = Vec::<String>::try_convert(val) {
68
+ Ok(PartitionFilterValue::Multiple(v))
69
+ } else {
70
+ Ok(PartitionFilterValue::Single(String::try_convert(val)?))
71
+ }
72
+ }
73
+ }
74
+
26
75
  #[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
27
76
  struct RawDeltaTable {
28
77
  _table: RefCell<deltalake::DeltaTable>,
@@ -38,6 +87,34 @@ struct RawDeltaTableMetaData {
38
87
  configuration: HashMap<String, Option<String>>,
39
88
  }
40
89
 
90
+ impl RawDeltaTableMetaData {
91
+ fn id(&self) -> String {
92
+ self.id.clone()
93
+ }
94
+
95
+ fn name(&self) -> Option<String> {
96
+ self.name.clone()
97
+ }
98
+
99
+ fn description(&self) -> Option<String> {
100
+ self.description.clone()
101
+ }
102
+
103
+ fn partition_columns(&self) -> Vec<String> {
104
+ self.partition_columns.clone()
105
+ }
106
+
107
+ fn created_time(&self) -> Option<i64> {
108
+ self.created_time
109
+ }
110
+
111
+ fn configuration(&self) -> HashMap<String, Option<String>> {
112
+ self.configuration.clone()
113
+ }
114
+ }
115
+
116
+ type StringVec = Vec<String>;
117
+
41
118
  impl RawDeltaTable {
42
119
  pub fn new(
43
120
  table_uri: String,
@@ -113,37 +190,140 @@ impl RawDeltaTable {
113
190
  })
114
191
  }
115
192
 
193
+ pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
194
+ let binding = self._table.borrow();
195
+ let table_protocol = binding.protocol().map_err(RubyError::from)?;
196
+ Ok((
197
+ table_protocol.min_reader_version,
198
+ table_protocol.min_writer_version,
199
+ table_protocol
200
+ .writer_features
201
+ .as_ref()
202
+ .and_then(|features| {
203
+ let empty_set = !features.is_empty();
204
+ empty_set.then(|| {
205
+ features
206
+ .iter()
207
+ .map(|v| v.to_string())
208
+ .collect::<Vec<String>>()
209
+ })
210
+ }),
211
+ table_protocol
212
+ .reader_features
213
+ .as_ref()
214
+ .and_then(|features| {
215
+ let empty_set = !features.is_empty();
216
+ empty_set.then(|| {
217
+ features
218
+ .iter()
219
+ .map(|v| v.to_string())
220
+ .collect::<Vec<String>>()
221
+ })
222
+ }),
223
+ ))
224
+ }
225
+
116
226
  pub fn load_version(&self, version: i64) -> RbResult<()> {
117
227
  Ok(rt()
118
228
  .block_on(self._table.borrow_mut().load_version(version))
119
229
  .map_err(RubyError::from)?)
120
230
  }
121
231
 
122
- pub fn files(&self) -> RbResult<Vec<String>> {
123
- if !self.has_files()? {
124
- return Err(DeltaError::new_err("Table is instantiated without files."));
125
- }
232
+ pub fn get_latest_version(&self) -> RbResult<i64> {
233
+ Ok(rt()
234
+ .block_on(self._table.borrow().get_latest_version())
235
+ .map_err(RubyError::from)?)
236
+ }
126
237
 
238
+ pub fn get_earliest_version(&self) -> RbResult<i64> {
239
+ Ok(rt()
240
+ .block_on(self._table.borrow().get_earliest_version())
241
+ .map_err(RubyError::from)?)
242
+ }
243
+
244
+ pub fn get_num_index_cols(&self) -> RbResult<i32> {
127
245
  Ok(self
128
246
  ._table
129
247
  .borrow()
130
- .get_files_iter()
248
+ .snapshot()
131
249
  .map_err(RubyError::from)?
132
- .map(|f| f.to_string())
133
- .collect())
250
+ .config()
251
+ .num_indexed_cols())
134
252
  }
135
253
 
136
- pub fn file_uris(&self) -> RbResult<Vec<String>> {
137
- if !self._table.borrow().config.require_files {
138
- return Err(DeltaError::new_err("Table is initiated without files."));
139
- }
140
-
254
+ pub fn get_stats_columns(&self) -> RbResult<Option<Vec<String>>> {
141
255
  Ok(self
142
256
  ._table
143
257
  .borrow()
144
- .get_file_uris()
258
+ .snapshot()
145
259
  .map_err(RubyError::from)?
146
- .collect())
260
+ .config()
261
+ .stats_columns()
262
+ .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()))
263
+ }
264
+
265
+ pub fn load_with_datetime(&self, ds: String) -> RbResult<()> {
266
+ let datetime =
267
+ DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
268
+ |err| RbValueError::new_err(format!("Failed to parse datetime string: {err}")),
269
+ )?);
270
+ Ok(rt()
271
+ .block_on(self._table.borrow_mut().load_with_datetime(datetime))
272
+ .map_err(RubyError::from)?)
273
+ }
274
+
275
+ pub fn files(
276
+ &self,
277
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
278
+ ) -> RbResult<Vec<String>> {
279
+ if !self.has_files()? {
280
+ return Err(DeltaError::new_err("Table is instantiated without files."));
281
+ }
282
+
283
+ if let Some(filters) = partition_filters {
284
+ let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
285
+ Ok(self
286
+ ._table
287
+ .borrow()
288
+ .get_files_by_partitions(&filters)
289
+ .map_err(RubyError::from)?
290
+ .into_iter()
291
+ .map(|p| p.to_string())
292
+ .collect())
293
+ } else {
294
+ Ok(self
295
+ ._table
296
+ .borrow()
297
+ .get_files_iter()
298
+ .map_err(RubyError::from)?
299
+ .map(|f| f.to_string())
300
+ .collect())
301
+ }
302
+ }
303
+
304
+ pub fn file_uris(
305
+ &self,
306
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
307
+ ) -> RbResult<Vec<String>> {
308
+ if !self._table.borrow().config.require_files {
309
+ return Err(DeltaError::new_err("Table is initiated without files."));
310
+ }
311
+
312
+ if let Some(filters) = partition_filters {
313
+ let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
314
+ Ok(self
315
+ ._table
316
+ .borrow()
317
+ .get_file_uris_by_partitions(&filters)
318
+ .map_err(RubyError::from)?)
319
+ } else {
320
+ Ok(self
321
+ ._table
322
+ .borrow()
323
+ .get_file_uris()
324
+ .map_err(RubyError::from)?
325
+ .collect())
326
+ }
147
327
  }
148
328
 
149
329
  pub fn schema(&self) -> RbResult<Value> {
@@ -157,6 +337,8 @@ impl RawDeltaTable {
157
337
  dry_run: bool,
158
338
  retention_hours: Option<u64>,
159
339
  enforce_retention_duration: bool,
340
+ commit_properties: Option<RbCommitProperties>,
341
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
160
342
  ) -> RbResult<Vec<String>> {
161
343
  let mut cmd = VacuumBuilder::new(
162
344
  self._table.borrow().log_store(),
@@ -172,11 +354,350 @@ impl RawDeltaTable {
172
354
  cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
173
355
  }
174
356
 
357
+ if let Some(commit_properties) =
358
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
359
+ {
360
+ cmd = cmd.with_commit_properties(commit_properties);
361
+ }
175
362
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
176
363
  self._table.borrow_mut().state = table.state;
177
364
  Ok(metrics.files_deleted)
178
365
  }
179
366
 
367
+ pub fn compact_optimize(
368
+ &self,
369
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
370
+ target_size: Option<i64>,
371
+ max_concurrent_tasks: Option<usize>,
372
+ min_commit_interval: Option<u64>,
373
+ writer_properties: Option<RbWriterProperties>,
374
+ commit_properties: Option<RbCommitProperties>,
375
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
376
+ ) -> RbResult<String> {
377
+ let mut cmd = OptimizeBuilder::new(
378
+ self._table.borrow().log_store(),
379
+ self._table
380
+ .borrow()
381
+ .snapshot()
382
+ .map_err(RubyError::from)?
383
+ .clone(),
384
+ )
385
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
386
+ if let Some(size) = target_size {
387
+ cmd = cmd.with_target_size(size);
388
+ }
389
+ if let Some(commit_interval) = min_commit_interval {
390
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
391
+ }
392
+
393
+ if let Some(writer_props) = writer_properties {
394
+ cmd = cmd.with_writer_properties(
395
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
396
+ );
397
+ }
398
+
399
+ if let Some(commit_properties) =
400
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
401
+ {
402
+ cmd = cmd.with_commit_properties(commit_properties);
403
+ }
404
+
405
+ let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
406
+ .map_err(RubyError::from)?;
407
+ cmd = cmd.with_filters(&converted_filters);
408
+
409
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
410
+ self._table.borrow_mut().state = table.state;
411
+ Ok(serde_json::to_string(&metrics).unwrap())
412
+ }
413
+
414
+ pub fn z_order_optimize(
415
+ &self,
416
+ z_order_columns: Vec<String>,
417
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
418
+ target_size: Option<i64>,
419
+ max_concurrent_tasks: Option<usize>,
420
+ max_spill_size: usize,
421
+ min_commit_interval: Option<u64>,
422
+ writer_properties: Option<RbWriterProperties>,
423
+ commit_properties: Option<RbCommitProperties>,
424
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
425
+ ) -> RbResult<String> {
426
+ let mut cmd = OptimizeBuilder::new(
427
+ self._table.borrow().log_store(),
428
+ self._table
429
+ .borrow()
430
+ .snapshot()
431
+ .map_err(RubyError::from)?
432
+ .clone(),
433
+ )
434
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
435
+ .with_max_spill_size(max_spill_size)
436
+ .with_type(OptimizeType::ZOrder(z_order_columns));
437
+ if let Some(size) = target_size {
438
+ cmd = cmd.with_target_size(size);
439
+ }
440
+ if let Some(commit_interval) = min_commit_interval {
441
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
442
+ }
443
+
444
+ if let Some(writer_props) = writer_properties {
445
+ cmd = cmd.with_writer_properties(
446
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
447
+ );
448
+ }
449
+
450
+ if let Some(commit_properties) =
451
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
452
+ {
453
+ cmd = cmd.with_commit_properties(commit_properties);
454
+ }
455
+
456
+ let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
457
+ .map_err(RubyError::from)?;
458
+ cmd = cmd.with_filters(&converted_filters);
459
+
460
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
461
+ self._table.borrow_mut().state = table.state;
462
+ Ok(serde_json::to_string(&metrics).unwrap())
463
+ }
464
+
465
+ pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
466
+ let fields = fields.typecheck::<Obj<Field>>()?;
467
+ let mut cmd = AddColumnBuilder::new(
468
+ self._table.borrow().log_store(),
469
+ self._table
470
+ .borrow()
471
+ .snapshot()
472
+ .map_err(RubyError::from)?
473
+ .clone(),
474
+ );
475
+
476
+ let new_fields = fields
477
+ .iter()
478
+ .map(|v| v.inner.clone())
479
+ .collect::<Vec<StructField>>();
480
+
481
+ cmd = cmd.with_fields(new_fields);
482
+
483
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
484
+ self._table.borrow_mut().state = table.state;
485
+ Ok(())
486
+ }
487
+
488
+ pub fn add_feature(
489
+ &self,
490
+ feature: RArray,
491
+ allow_protocol_versions_increase: bool,
492
+ ) -> RbResult<()> {
493
+ let feature = feature
494
+ .into_iter()
495
+ .map(|v| TableFeatures::try_convert(v))
496
+ .collect::<RbResult<Vec<_>>>()?;
497
+ let cmd = AddTableFeatureBuilder::new(
498
+ self._table.borrow().log_store(),
499
+ self._table
500
+ .borrow()
501
+ .snapshot()
502
+ .map_err(RubyError::from)?
503
+ .clone(),
504
+ )
505
+ .with_features(feature)
506
+ .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
507
+
508
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
509
+ self._table.borrow_mut().state = table.state;
510
+ Ok(())
511
+ }
512
+
513
+ pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
514
+ let mut cmd = ConstraintBuilder::new(
515
+ self._table.borrow().log_store(),
516
+ self._table
517
+ .borrow()
518
+ .snapshot()
519
+ .map_err(RubyError::from)?
520
+ .clone(),
521
+ );
522
+
523
+ for (col_name, expression) in constraints {
524
+ cmd = cmd.with_constraint(col_name.clone(), expression.clone());
525
+ }
526
+
527
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
528
+ self._table.borrow_mut().state = table.state;
529
+ Ok(())
530
+ }
531
+
532
+ pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
533
+ let cmd = DropConstraintBuilder::new(
534
+ self._table.borrow().log_store(),
535
+ self._table
536
+ .borrow()
537
+ .snapshot()
538
+ .map_err(RubyError::from)?
539
+ .clone(),
540
+ )
541
+ .with_constraint(name)
542
+ .with_raise_if_not_exists(raise_if_not_exists);
543
+
544
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
545
+ self._table.borrow_mut().state = table.state;
546
+ Ok(())
547
+ }
548
+
549
+ pub fn load_cdf(
550
+ &self,
551
+ starting_version: i64,
552
+ ending_version: Option<i64>,
553
+ starting_timestamp: Option<String>,
554
+ ending_timestamp: Option<String>,
555
+ columns: Option<Vec<String>>,
556
+ ) -> RbResult<ArrowArrayStream> {
557
+ let ctx = SessionContext::new();
558
+ let mut cdf_read = CdfLoadBuilder::new(
559
+ self._table.borrow().log_store(),
560
+ self._table
561
+ .borrow()
562
+ .snapshot()
563
+ .map_err(RubyError::from)?
564
+ .clone(),
565
+ )
566
+ .with_starting_version(starting_version);
567
+
568
+ if let Some(ev) = ending_version {
569
+ cdf_read = cdf_read.with_ending_version(ev);
570
+ }
571
+ if let Some(st) = starting_timestamp {
572
+ let starting_ts: DateTime<Utc> = DateTime::<Utc>::from_str(&st)
573
+ .map_err(|pe| RbValueError::new_err(pe.to_string()))?
574
+ .to_utc();
575
+ cdf_read = cdf_read.with_starting_timestamp(starting_ts);
576
+ }
577
+ if let Some(et) = ending_timestamp {
578
+ let ending_ts = DateTime::<Utc>::from_str(&et)
579
+ .map_err(|pe| RbValueError::new_err(pe.to_string()))?
580
+ .to_utc();
581
+ cdf_read = cdf_read.with_starting_timestamp(ending_ts);
582
+ }
583
+
584
+ if let Some(columns) = columns {
585
+ cdf_read = cdf_read.with_columns(columns);
586
+ }
587
+
588
+ cdf_read = cdf_read.with_session_ctx(ctx.clone());
589
+
590
+ let plan = rt().block_on(cdf_read.build()).map_err(RubyError::from)?;
591
+
592
+ let mut tasks = vec![];
593
+ for p in 0..plan.properties().output_partitioning().partition_count() {
594
+ let inner_plan = plan.clone();
595
+ let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
596
+ let handle = rt().spawn(collect_sendable_stream(partition_batch));
597
+ tasks.push(handle);
598
+ }
599
+
600
+ // This is unfortunate.
601
+ let batches = rt()
602
+ .block_on(join_all(tasks))
603
+ .into_iter()
604
+ .flatten()
605
+ .collect::<Result<Vec<Vec<_>>, _>>()
606
+ .unwrap()
607
+ .into_iter()
608
+ .flatten()
609
+ .map(Ok);
610
+ let batch_iter = RecordBatchIterator::new(batches, plan.schema());
611
+ let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
612
+ Ok(ArrowArrayStream { stream: ffi_stream })
613
+ }
614
+
615
+ pub fn create_merge_builder(
616
+ &self,
617
+ source: RbArrowType<ArrowArrayStreamReader>,
618
+ predicate: String,
619
+ source_alias: Option<String>,
620
+ target_alias: Option<String>,
621
+ safe_cast: bool,
622
+ writer_properties: Option<RbWriterProperties>,
623
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
624
+ commit_properties: Option<RbCommitProperties>,
625
+ ) -> RbResult<RbMergeBuilder> {
626
+ Ok(RbMergeBuilder::new(
627
+ self._table.borrow().log_store(),
628
+ self._table
629
+ .borrow()
630
+ .snapshot()
631
+ .map_err(RubyError::from)?
632
+ .clone(),
633
+ source.0,
634
+ predicate,
635
+ source_alias,
636
+ target_alias,
637
+ safe_cast,
638
+ writer_properties,
639
+ post_commithook_properties,
640
+ commit_properties,
641
+ )
642
+ .map_err(RubyError::from)?)
643
+ }
644
+
645
+ pub fn merge_execute(&self, merge_builder: &RbMergeBuilder) -> RbResult<String> {
646
+ let (table, metrics) = merge_builder.execute().map_err(RubyError::from)?;
647
+ self._table.borrow_mut().state = table.state;
648
+ Ok(metrics)
649
+ }
650
+
651
+ pub fn restore(
652
+ &self,
653
+ target: Option<Value>,
654
+ ignore_missing_files: bool,
655
+ protocol_downgrade_allowed: bool,
656
+ commit_properties: Option<RbCommitProperties>,
657
+ ) -> RbResult<String> {
658
+ let mut cmd = RestoreBuilder::new(
659
+ self._table.borrow().log_store(),
660
+ self._table
661
+ .borrow()
662
+ .snapshot()
663
+ .map_err(RubyError::from)?
664
+ .clone(),
665
+ );
666
+ if let Some(val) = target {
667
+ if let Some(version) = Integer::from_value(val) {
668
+ cmd = cmd.with_version_to_restore(version.to_i64()?)
669
+ }
670
+ if let Ok(ds) = String::try_convert(val) {
671
+ let datetime = DateTime::<Utc>::from(
672
+ DateTime::<FixedOffset>::parse_from_rfc3339(ds.as_ref()).map_err(|err| {
673
+ RbValueError::new_err(format!("Failed to parse datetime string: {err}"))
674
+ })?,
675
+ );
676
+ cmd = cmd.with_datetime_to_restore(datetime)
677
+ }
678
+ }
679
+ cmd = cmd.with_ignore_missing_files(ignore_missing_files);
680
+ cmd = cmd.with_protocol_downgrade_allowed(protocol_downgrade_allowed);
681
+
682
+ if let Some(commit_properties) = maybe_create_commit_properties(commit_properties, None) {
683
+ cmd = cmd.with_commit_properties(commit_properties);
684
+ }
685
+
686
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
687
+ self._table.borrow_mut().state = table.state;
688
+ Ok(serde_json::to_string(&metrics).unwrap())
689
+ }
690
+
691
+ pub fn history(&self, limit: Option<usize>) -> RbResult<Vec<String>> {
692
+ let history = rt()
693
+ .block_on(self._table.borrow().history(limit))
694
+ .map_err(RubyError::from)?;
695
+ Ok(history
696
+ .iter()
697
+ .map(|c| serde_json::to_string(c).unwrap())
698
+ .collect())
699
+ }
700
+
180
701
  pub fn update_incremental(&self) -> RbResult<()> {
181
702
  #[allow(deprecated)]
182
703
  Ok(rt()
@@ -184,7 +705,89 @@ impl RawDeltaTable {
184
705
  .map_err(RubyError::from)?)
185
706
  }
186
707
 
187
- pub fn delete(&self, predicate: Option<String>) -> RbResult<String> {
708
+ fn get_active_partitions(&self) -> RbResult<RArray> {
709
+ let binding = self._table.borrow();
710
+ let _column_names: HashSet<&str> = binding
711
+ .get_schema()
712
+ .map_err(|_| DeltaProtocolError::new_err("table does not yet have a schema"))?
713
+ .fields()
714
+ .map(|field| field.name().as_str())
715
+ .collect();
716
+ let partition_columns: HashSet<&str> = binding
717
+ .metadata()
718
+ .map_err(RubyError::from)?
719
+ .partition_columns
720
+ .iter()
721
+ .map(|col| col.as_str())
722
+ .collect();
723
+
724
+ let converted_filters = Vec::new();
725
+
726
+ let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
727
+
728
+ let adds = binding
729
+ .snapshot()
730
+ .map_err(RubyError::from)?
731
+ .get_active_add_actions_by_partitions(&converted_filters)
732
+ .map_err(RubyError::from)?
733
+ .collect::<Result<Vec<_>, _>>()
734
+ .map_err(RubyError::from)?;
735
+ let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
736
+ .iter()
737
+ .flat_map(|add| {
738
+ Ok::<_, RubyError>(
739
+ partition_columns
740
+ .iter()
741
+ .flat_map(|col| {
742
+ Ok::<_, RubyError>((
743
+ *col,
744
+ add.partition_values()
745
+ .map_err(RubyError::from)?
746
+ .get(*col)
747
+ .map(|v| v.serialize()),
748
+ ))
749
+ })
750
+ .collect(),
751
+ )
752
+ })
753
+ .collect();
754
+
755
+ Ok(RArray::from_iter(active_partitions))
756
+ }
757
+
758
+ pub fn create_checkpoint(&self) -> RbResult<()> {
759
+ rt().block_on(create_checkpoint(&self._table.borrow()))
760
+ .map_err(RubyError::from)?;
761
+
762
+ Ok(())
763
+ }
764
+
765
+ pub fn cleanup_metadata(&self) -> RbResult<()> {
766
+ rt().block_on(cleanup_metadata(&self._table.borrow()))
767
+ .map_err(RubyError::from)?;
768
+
769
+ Ok(())
770
+ }
771
+
772
+ pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
773
+ Ok(self
774
+ ._table
775
+ .borrow()
776
+ .snapshot()
777
+ .map_err(RubyError::from)?
778
+ .eager_snapshot()
779
+ .files()
780
+ .map(|f| (f.path().to_string(), f.size()))
781
+ .collect::<HashMap<String, i64>>())
782
+ }
783
+
784
+ pub fn delete(
785
+ &self,
786
+ predicate: Option<String>,
787
+ writer_properties: Option<RbWriterProperties>,
788
+ commit_properties: Option<RbCommitProperties>,
789
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
790
+ ) -> RbResult<String> {
188
791
  let mut cmd = DeleteBuilder::new(
189
792
  self._table.borrow().log_store(),
190
793
  self._table
@@ -196,43 +799,366 @@ impl RawDeltaTable {
196
799
  if let Some(predicate) = predicate {
197
800
  cmd = cmd.with_predicate(predicate);
198
801
  }
802
+ if let Some(writer_props) = writer_properties {
803
+ cmd = cmd.with_writer_properties(
804
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
805
+ );
806
+ }
807
+ if let Some(commit_properties) =
808
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
809
+ {
810
+ cmd = cmd.with_commit_properties(commit_properties);
811
+ }
812
+
813
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
814
+ self._table.borrow_mut().state = table.state;
815
+ Ok(serde_json::to_string(&metrics).unwrap())
816
+ }
817
+
818
+ pub fn set_table_properties(
819
+ &self,
820
+ properties: HashMap<String, String>,
821
+ raise_if_not_exists: bool,
822
+ ) -> RbResult<()> {
823
+ let cmd = SetTablePropertiesBuilder::new(
824
+ self._table.borrow().log_store(),
825
+ self._table
826
+ .borrow()
827
+ .snapshot()
828
+ .map_err(RubyError::from)?
829
+ .clone(),
830
+ )
831
+ .with_properties(properties)
832
+ .with_raise_if_not_exists(raise_if_not_exists);
833
+
834
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
835
+ self._table.borrow_mut().state = table.state;
836
+ Ok(())
837
+ }
838
+
839
+ pub fn repair(
840
+ &self,
841
+ dry_run: bool,
842
+ commit_properties: Option<RbCommitProperties>,
843
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
844
+ ) -> RbResult<String> {
845
+ let mut cmd = FileSystemCheckBuilder::new(
846
+ self._table.borrow().log_store(),
847
+ self._table
848
+ .borrow()
849
+ .snapshot()
850
+ .map_err(RubyError::from)?
851
+ .clone(),
852
+ )
853
+ .with_dry_run(dry_run);
854
+
855
+ if let Some(commit_properties) =
856
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
857
+ {
858
+ cmd = cmd.with_commit_properties(commit_properties);
859
+ }
199
860
 
200
861
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
201
862
  self._table.borrow_mut().state = table.state;
202
863
  Ok(serde_json::to_string(&metrics).unwrap())
203
864
  }
865
+
866
+ pub fn transaction_versions(&self) -> RHash {
867
+ RHash::from_iter(
868
+ self._table
869
+ .borrow()
870
+ .get_app_transaction_version()
871
+ .into_iter()
872
+ .map(|(app_id, transaction)| (app_id, RbTransaction::from(transaction))),
873
+ )
874
+ }
204
875
  }
205
876
 
206
- impl RawDeltaTableMetaData {
207
- fn id(&self) -> String {
208
- self.id.clone()
877
+ fn set_post_commithook_properties(
878
+ mut commit_properties: CommitProperties,
879
+ post_commithook_properties: RbPostCommitHookProperties,
880
+ ) -> CommitProperties {
881
+ commit_properties =
882
+ commit_properties.with_create_checkpoint(post_commithook_properties.create_checkpoint);
883
+ commit_properties = commit_properties
884
+ .with_cleanup_expired_logs(post_commithook_properties.cleanup_expired_logs);
885
+ commit_properties
886
+ }
887
+
888
+ fn set_writer_properties(writer_properties: RbWriterProperties) -> DeltaResult<WriterProperties> {
889
+ let mut properties = WriterProperties::builder();
890
+ let data_page_size_limit = writer_properties.data_page_size_limit;
891
+ let dictionary_page_size_limit = writer_properties.dictionary_page_size_limit;
892
+ let data_page_row_count_limit = writer_properties.data_page_row_count_limit;
893
+ let write_batch_size = writer_properties.write_batch_size;
894
+ let max_row_group_size = writer_properties.max_row_group_size;
895
+ let compression = writer_properties.compression;
896
+ let statistics_truncate_length = writer_properties.statistics_truncate_length;
897
+ let default_column_properties = writer_properties.default_column_properties;
898
+ let column_properties = writer_properties.column_properties;
899
+
900
+ if let Some(data_page_size) = data_page_size_limit {
901
+ properties = properties.set_data_page_size_limit(data_page_size);
902
+ }
903
+ if let Some(dictionary_page_size) = dictionary_page_size_limit {
904
+ properties = properties.set_dictionary_page_size_limit(dictionary_page_size);
905
+ }
906
+ if let Some(data_page_row_count) = data_page_row_count_limit {
907
+ properties = properties.set_data_page_row_count_limit(data_page_row_count);
209
908
  }
909
+ if let Some(batch_size) = write_batch_size {
910
+ properties = properties.set_write_batch_size(batch_size);
911
+ }
912
+ if let Some(row_group_size) = max_row_group_size {
913
+ properties = properties.set_max_row_group_size(row_group_size);
914
+ }
915
+ properties = properties.set_statistics_truncate_length(statistics_truncate_length);
210
916
 
211
- fn name(&self) -> Option<String> {
212
- self.name.clone()
917
+ if let Some(compression) = compression {
918
+ let compress: Compression = compression
919
+ .parse()
920
+ .map_err(|err: ParquetError| DeltaTableError::Generic(err.to_string()))?;
921
+
922
+ properties = properties.set_compression(compress);
213
923
  }
214
924
 
215
- fn description(&self) -> Option<String> {
216
- self.description.clone()
925
+ if let Some(default_column_properties) = default_column_properties {
926
+ if let Some(dictionary_enabled) = default_column_properties.dictionary_enabled {
927
+ properties = properties.set_dictionary_enabled(dictionary_enabled);
928
+ }
929
+ if let Some(max_statistics_size) = default_column_properties.max_statistics_size {
930
+ properties = properties.set_max_statistics_size(max_statistics_size);
931
+ }
932
+ if let Some(bloom_filter_properties) = default_column_properties.bloom_filter_properties {
933
+ if let Some(set_bloom_filter_enabled) = bloom_filter_properties.set_bloom_filter_enabled
934
+ {
935
+ properties = properties.set_bloom_filter_enabled(set_bloom_filter_enabled);
936
+ }
937
+ if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
938
+ properties = properties.set_bloom_filter_fpp(bloom_filter_fpp);
939
+ }
940
+ if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
941
+ properties = properties.set_bloom_filter_ndv(bloom_filter_ndv);
942
+ }
943
+ }
944
+ }
945
+ if let Some(column_properties) = column_properties {
946
+ for (column_name, column_prop) in column_properties {
947
+ if let Some(column_prop) = column_prop {
948
+ if let Some(dictionary_enabled) = column_prop.dictionary_enabled {
949
+ properties = properties.set_column_dictionary_enabled(
950
+ column_name.clone().into(),
951
+ dictionary_enabled,
952
+ );
953
+ }
954
+ if let Some(bloom_filter_properties) = column_prop.bloom_filter_properties {
955
+ if let Some(set_bloom_filter_enabled) =
956
+ bloom_filter_properties.set_bloom_filter_enabled
957
+ {
958
+ properties = properties.set_column_bloom_filter_enabled(
959
+ column_name.clone().into(),
960
+ set_bloom_filter_enabled,
961
+ );
962
+ }
963
+ if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
964
+ properties = properties.set_column_bloom_filter_fpp(
965
+ column_name.clone().into(),
966
+ bloom_filter_fpp,
967
+ );
968
+ }
969
+ if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
970
+ properties = properties
971
+ .set_column_bloom_filter_ndv(column_name.into(), bloom_filter_ndv);
972
+ }
973
+ }
974
+ }
975
+ }
217
976
  }
977
+ Ok(properties.build())
978
+ }
218
979
 
219
- fn partition_columns(&self) -> Vec<String> {
220
- self.partition_columns.clone()
980
+ fn convert_partition_filters(
981
+ partitions_filters: Vec<(String, String, PartitionFilterValue)>,
982
+ ) -> Result<Vec<PartitionFilter>, DeltaTableError> {
983
+ partitions_filters
984
+ .into_iter()
985
+ .map(|filter| match filter {
986
+ (key, op, PartitionFilterValue::Single(v)) => {
987
+ let key: &'_ str = key.as_ref();
988
+ let op: &'_ str = op.as_ref();
989
+ let v: &'_ str = v.as_ref();
990
+ PartitionFilter::try_from((key, op, v))
991
+ }
992
+ (key, op, PartitionFilterValue::Multiple(v)) => {
993
+ let key: &'_ str = key.as_ref();
994
+ let op: &'_ str = op.as_ref();
995
+ let v: Vec<&'_ str> = v.iter().map(|v| v.as_ref()).collect();
996
+ PartitionFilter::try_from((key, op, v.as_slice()))
997
+ }
998
+ })
999
+ .collect()
1000
+ }
1001
+
1002
+ fn maybe_create_commit_properties(
1003
+ maybe_commit_properties: Option<RbCommitProperties>,
1004
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
1005
+ ) -> Option<CommitProperties> {
1006
+ if maybe_commit_properties.is_none() && post_commithook_properties.is_none() {
1007
+ return None;
221
1008
  }
1009
+ let mut commit_properties = CommitProperties::default();
222
1010
 
223
- fn created_time(&self) -> Option<i64> {
224
- self.created_time
1011
+ if let Some(commit_props) = maybe_commit_properties {
1012
+ if let Some(metadata) = commit_props.custom_metadata {
1013
+ let json_metadata: Map<String, serde_json::Value> =
1014
+ metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
1015
+ commit_properties = commit_properties.with_metadata(json_metadata);
1016
+ };
1017
+
1018
+ if let Some(max_retries) = commit_props.max_commit_retries {
1019
+ commit_properties = commit_properties.with_max_retries(max_retries);
1020
+ };
1021
+
1022
+ if let Some(app_transactions) = commit_props.app_transactions {
1023
+ let app_transactions = app_transactions.iter().map(Transaction::from).collect();
1024
+ commit_properties = commit_properties.with_application_transactions(app_transactions);
1025
+ }
225
1026
  }
226
1027
 
227
- fn configuration(&self) -> HashMap<String, Option<String>> {
228
- self.configuration.clone()
1028
+ if let Some(post_commit_hook_props) = post_commithook_properties {
1029
+ commit_properties =
1030
+ set_post_commithook_properties(commit_properties, post_commit_hook_props)
1031
+ }
1032
+ Some(commit_properties)
1033
+ }
1034
+
1035
+ fn rust_core_version() -> String {
1036
+ deltalake::crate_version().to_string()
1037
+ }
1038
+
1039
+ pub struct BloomFilterProperties {
1040
+ pub set_bloom_filter_enabled: Option<bool>,
1041
+ pub fpp: Option<f64>,
1042
+ pub ndv: Option<u64>,
1043
+ }
1044
+
1045
+ impl TryConvert for BloomFilterProperties {
1046
+ fn try_convert(val: Value) -> RbResult<Self> {
1047
+ Ok(BloomFilterProperties {
1048
+ set_bloom_filter_enabled: val.funcall("set_bloom_filter_enabled", ())?,
1049
+ fpp: val.funcall("fpp", ())?,
1050
+ ndv: val.funcall("ndv", ())?,
1051
+ })
1052
+ }
1053
+ }
1054
+
1055
+ pub struct ColumnProperties {
1056
+ pub dictionary_enabled: Option<bool>,
1057
+ pub max_statistics_size: Option<usize>,
1058
+ pub bloom_filter_properties: Option<BloomFilterProperties>,
1059
+ }
1060
+
1061
+ impl TryConvert for ColumnProperties {
1062
+ fn try_convert(val: Value) -> RbResult<Self> {
1063
+ Ok(ColumnProperties {
1064
+ dictionary_enabled: val.funcall("dictionary_enabled", ())?,
1065
+ max_statistics_size: val.funcall("max_statistics_size", ())?,
1066
+ bloom_filter_properties: val.funcall("bloom_filter_properties", ())?,
1067
+ })
1068
+ }
1069
+ }
1070
+
1071
+ pub struct RbWriterProperties {
1072
+ data_page_size_limit: Option<usize>,
1073
+ dictionary_page_size_limit: Option<usize>,
1074
+ data_page_row_count_limit: Option<usize>,
1075
+ write_batch_size: Option<usize>,
1076
+ max_row_group_size: Option<usize>,
1077
+ statistics_truncate_length: Option<usize>,
1078
+ compression: Option<String>,
1079
+ default_column_properties: Option<ColumnProperties>,
1080
+ column_properties: Option<HashMap<String, Option<ColumnProperties>>>,
1081
+ }
1082
+
1083
+ impl TryConvert for RbWriterProperties {
1084
+ fn try_convert(val: Value) -> RbResult<Self> {
1085
+ Ok(RbWriterProperties {
1086
+ data_page_size_limit: val.funcall("data_page_size_limit", ())?,
1087
+ dictionary_page_size_limit: val.funcall("dictionary_page_size_limit", ())?,
1088
+ data_page_row_count_limit: val.funcall("data_page_row_count_limit", ())?,
1089
+ write_batch_size: val.funcall("write_batch_size", ())?,
1090
+ max_row_group_size: val.funcall("max_row_group_size", ())?,
1091
+ statistics_truncate_length: val.funcall("statistics_truncate_length", ())?,
1092
+ compression: val.funcall("compression", ())?,
1093
+ default_column_properties: val.funcall("default_column_properties", ())?,
1094
+ // TODO fix
1095
+ column_properties: None,
1096
+ })
1097
+ }
1098
+ }
1099
+
1100
+ pub struct RbPostCommitHookProperties {
1101
+ create_checkpoint: bool,
1102
+ cleanup_expired_logs: Option<bool>,
1103
+ }
1104
+
1105
+ impl TryConvert for RbPostCommitHookProperties {
1106
+ fn try_convert(val: Value) -> RbResult<Self> {
1107
+ Ok(RbPostCommitHookProperties {
1108
+ create_checkpoint: val.funcall("create_checkpoint", ())?,
1109
+ cleanup_expired_logs: val.funcall("cleanup_expired_logs", ())?,
1110
+ })
1111
+ }
1112
+ }
1113
+
1114
+ #[magnus::wrap(class = "DeltaLake::Transaction")]
1115
+ pub struct RbTransaction {
1116
+ pub app_id: String,
1117
+ pub version: i64,
1118
+ pub last_updated: Option<i64>,
1119
+ }
1120
+
1121
+ impl From<Transaction> for RbTransaction {
1122
+ fn from(value: Transaction) -> Self {
1123
+ RbTransaction {
1124
+ app_id: value.app_id,
1125
+ version: value.version,
1126
+ last_updated: value.last_updated,
1127
+ }
1128
+ }
1129
+ }
1130
+
1131
+ impl From<&RbTransaction> for Transaction {
1132
+ fn from(value: &RbTransaction) -> Self {
1133
+ Transaction {
1134
+ app_id: value.app_id.clone(),
1135
+ version: value.version,
1136
+ last_updated: value.last_updated,
1137
+ }
1138
+ }
1139
+ }
1140
+
1141
+ pub struct RbCommitProperties {
1142
+ custom_metadata: Option<HashMap<String, String>>,
1143
+ max_commit_retries: Option<usize>,
1144
+ app_transactions: Option<Vec<RbTransaction>>,
1145
+ }
1146
+
1147
+ impl TryConvert for RbCommitProperties {
1148
+ fn try_convert(val: Value) -> RbResult<Self> {
1149
+ Ok(RbCommitProperties {
1150
+ custom_metadata: val.funcall("custom_metadata", ())?,
1151
+ max_commit_retries: val.funcall("max_commit_retries", ())?,
1152
+ // TODO fix
1153
+ app_transactions: None,
1154
+ })
229
1155
  }
230
1156
  }
231
1157
 
232
1158
  #[allow(clippy::too_many_arguments)]
233
1159
  fn write_to_deltalake(
234
1160
  table_uri: String,
235
- data: Value,
1161
+ data: RbArrowType<ArrowArrayStreamReader>,
236
1162
  mode: String,
237
1163
  table: Option<&RawDeltaTable>,
238
1164
  schema_mode: Option<String>,
@@ -243,16 +1169,11 @@ fn write_to_deltalake(
243
1169
  description: Option<String>,
244
1170
  configuration: Option<HashMap<String, Option<String>>>,
245
1171
  storage_options: Option<HashMap<String, String>>,
1172
+ writer_properties: Option<RbWriterProperties>,
1173
+ commit_properties: Option<RbCommitProperties>,
1174
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
246
1175
  ) -> RbResult<()> {
247
- let capsule_pointer: usize = data.funcall("to_i", ())?;
248
-
249
- // use similar approach as Polars to avoid copy
250
- let stream_ptr =
251
- Box::new(unsafe { std::ptr::replace(capsule_pointer as _, FFI_ArrowArrayStream::empty()) });
252
- let stream = ArrowArrayStreamReader::try_new(*stream_ptr)
253
- .map_err(|err| DeltaError::new_err(err.to_string()))?;
254
-
255
- let batches = stream.map(|batch| batch.unwrap()).collect::<Vec<_>>();
1176
+ let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
256
1177
  let save_mode = mode.parse().map_err(RubyError::from)?;
257
1178
 
258
1179
  let options = storage_options.clone().unwrap_or_default();
@@ -273,6 +1194,12 @@ fn write_to_deltalake(
273
1194
  builder = builder.with_partition_columns(partition_columns);
274
1195
  }
275
1196
 
1197
+ if let Some(writer_props) = writer_properties {
1198
+ builder = builder.with_writer_properties(
1199
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
1200
+ );
1201
+ }
1202
+
276
1203
  if let Some(name) = &name {
277
1204
  builder = builder.with_table_name(name);
278
1205
  };
@@ -293,18 +1220,55 @@ fn write_to_deltalake(
293
1220
  builder = builder.with_configuration(config);
294
1221
  };
295
1222
 
1223
+ if let Some(commit_properties) =
1224
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1225
+ {
1226
+ builder = builder.with_commit_properties(commit_properties);
1227
+ };
1228
+
296
1229
  rt().block_on(builder.into_future())
297
1230
  .map_err(RubyError::from)?;
298
1231
 
299
1232
  Ok(())
300
1233
  }
301
1234
 
1235
+ pub struct RbArrowType<T>(pub T);
1236
+
1237
+ impl TryConvert for RbArrowType<ArrowArrayStreamReader> {
1238
+ fn try_convert(val: Value) -> RbResult<Self> {
1239
+ let addr: usize = val.funcall("to_i", ())?;
1240
+
1241
+ // use similar approach as Polars to consume pointer and avoid copy
1242
+ let stream_ptr =
1243
+ Box::new(unsafe { std::ptr::replace(addr as _, FFI_ArrowArrayStream::empty()) });
1244
+
1245
+ Ok(RbArrowType(
1246
+ ArrowArrayStreamReader::try_new(*stream_ptr)
1247
+ .map_err(|err| DeltaError::new_err(err.to_string()))?,
1248
+ ))
1249
+ }
1250
+ }
1251
+
1252
+ #[magnus::wrap(class = "DeltaLake::ArrowArrayStream")]
1253
+ pub struct ArrowArrayStream {
1254
+ stream: FFI_ArrowArrayStream,
1255
+ }
1256
+
1257
+ impl ArrowArrayStream {
1258
+ pub fn to_i(&self) -> usize {
1259
+ (&self.stream as *const _) as usize
1260
+ }
1261
+ }
1262
+
302
1263
  #[magnus::init]
303
1264
  fn init(ruby: &Ruby) -> RbResult<()> {
304
1265
  deltalake::aws::register_handlers(None);
1266
+ deltalake::azure::register_handlers(None);
1267
+ deltalake::gcp::register_handlers(None);
305
1268
 
306
1269
  let module = ruby.define_module("DeltaLake")?;
307
- module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 12))?;
1270
+ module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 15))?;
1271
+ module.define_singleton_method("rust_core_version", function!(rust_core_version, 0))?;
308
1272
 
309
1273
  let class = module.define_class("RawDeltaTable", ruby.class_object())?;
310
1274
  class.define_singleton_method("new", function!(RawDeltaTable::new, 5))?;
@@ -313,16 +1277,91 @@ fn init(ruby: &Ruby) -> RbResult<()> {
313
1277
  class.define_method("version", method!(RawDeltaTable::version, 0))?;
314
1278
  class.define_method("has_files", method!(RawDeltaTable::has_files, 0))?;
315
1279
  class.define_method("metadata", method!(RawDeltaTable::metadata, 0))?;
1280
+ class.define_method(
1281
+ "protocol_versions",
1282
+ method!(RawDeltaTable::protocol_versions, 0),
1283
+ )?;
316
1284
  class.define_method("load_version", method!(RawDeltaTable::load_version, 1))?;
317
- class.define_method("files", method!(RawDeltaTable::files, 0))?;
318
- class.define_method("file_uris", method!(RawDeltaTable::file_uris, 0))?;
1285
+ class.define_method(
1286
+ "get_latest_version",
1287
+ method!(RawDeltaTable::get_latest_version, 0),
1288
+ )?;
1289
+ class.define_method(
1290
+ "get_earliest_version",
1291
+ method!(RawDeltaTable::get_earliest_version, 0),
1292
+ )?;
1293
+ class.define_method(
1294
+ "get_num_index_cols",
1295
+ method!(RawDeltaTable::get_num_index_cols, 0),
1296
+ )?;
1297
+ class.define_method(
1298
+ "get_stats_columns",
1299
+ method!(RawDeltaTable::get_stats_columns, 0),
1300
+ )?;
1301
+ class.define_method(
1302
+ "load_with_datetime",
1303
+ method!(RawDeltaTable::load_with_datetime, 1),
1304
+ )?;
1305
+ class.define_method("files", method!(RawDeltaTable::files, 1))?;
1306
+ class.define_method("file_uris", method!(RawDeltaTable::file_uris, 1))?;
319
1307
  class.define_method("schema", method!(RawDeltaTable::schema, 0))?;
320
- class.define_method("vacuum", method!(RawDeltaTable::vacuum, 3))?;
1308
+ class.define_method("vacuum", method!(RawDeltaTable::vacuum, 5))?;
1309
+ class.define_method(
1310
+ "compact_optimize",
1311
+ method!(RawDeltaTable::compact_optimize, 7),
1312
+ )?;
1313
+ class.define_method(
1314
+ "z_order_optimize",
1315
+ method!(RawDeltaTable::z_order_optimize, 9),
1316
+ )?;
1317
+ class.define_method("add_columns", method!(RawDeltaTable::add_columns, 1))?;
1318
+ class.define_method("add_feature", method!(RawDeltaTable::add_feature, 2))?;
1319
+ class.define_method(
1320
+ "add_constraints",
1321
+ method!(RawDeltaTable::add_constraints, 1),
1322
+ )?;
1323
+ class.define_method(
1324
+ "drop_constraints",
1325
+ method!(RawDeltaTable::drop_constraints, 2),
1326
+ )?;
1327
+ class.define_method("load_cdf", method!(RawDeltaTable::load_cdf, 5))?;
1328
+ class.define_method(
1329
+ "create_merge_builder",
1330
+ method!(RawDeltaTable::create_merge_builder, 8),
1331
+ )?;
1332
+ class.define_method("merge_execute", method!(RawDeltaTable::merge_execute, 1))?;
1333
+ class.define_method("restore", method!(RawDeltaTable::restore, 4))?;
1334
+ class.define_method("history", method!(RawDeltaTable::history, 1))?;
321
1335
  class.define_method(
322
1336
  "update_incremental",
323
1337
  method!(RawDeltaTable::update_incremental, 0),
324
1338
  )?;
325
- class.define_method("delete", method!(RawDeltaTable::delete, 1))?;
1339
+ class.define_method(
1340
+ "get_active_partitions",
1341
+ method!(RawDeltaTable::get_active_partitions, 0),
1342
+ )?;
1343
+ class.define_method(
1344
+ "create_checkpoint",
1345
+ method!(RawDeltaTable::create_checkpoint, 0),
1346
+ )?;
1347
+ class.define_method(
1348
+ "cleanup_metadata",
1349
+ method!(RawDeltaTable::cleanup_metadata, 0),
1350
+ )?;
1351
+ class.define_method(
1352
+ "get_add_file_sizes",
1353
+ method!(RawDeltaTable::get_add_file_sizes, 0),
1354
+ )?;
1355
+ class.define_method("delete", method!(RawDeltaTable::delete, 4))?;
1356
+ class.define_method(
1357
+ "set_table_properties",
1358
+ method!(RawDeltaTable::set_table_properties, 2),
1359
+ )?;
1360
+ class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
1361
+ class.define_method(
1362
+ "transaction_versions",
1363
+ method!(RawDeltaTable::transaction_versions, 0),
1364
+ )?;
326
1365
 
327
1366
  let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
328
1367
  class.define_method("id", method!(RawDeltaTableMetaData::id, 0))?;
@@ -344,10 +1383,37 @@ fn init(ruby: &Ruby) -> RbResult<()> {
344
1383
  method!(RawDeltaTableMetaData::configuration, 0),
345
1384
  )?;
346
1385
 
1386
+ let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1387
+ class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
1388
+
347
1389
  let class = module.define_class("Field", ruby.class_object())?;
348
1390
  class.define_method("name", method!(Field::name, 0))?;
349
1391
  class.define_method("type", method!(Field::get_type, 0))?;
350
1392
  class.define_method("nullable", method!(Field::nullable, 0))?;
351
1393
 
1394
+ let class = module.define_class("RbMergeBuilder", ruby.class_object())?;
1395
+ class.define_method("source_alias", method!(RbMergeBuilder::source_alias, 0))?;
1396
+ class.define_method("target_alias", method!(RbMergeBuilder::target_alias, 0))?;
1397
+ class.define_method(
1398
+ "when_matched_update",
1399
+ method!(RbMergeBuilder::when_matched_update, 2),
1400
+ )?;
1401
+ class.define_method(
1402
+ "when_matched_delete",
1403
+ method!(RbMergeBuilder::when_matched_delete, 1),
1404
+ )?;
1405
+ class.define_method(
1406
+ "when_not_matched_insert",
1407
+ method!(RbMergeBuilder::when_not_matched_insert, 2),
1408
+ )?;
1409
+ class.define_method(
1410
+ "when_not_matched_by_source_update",
1411
+ method!(RbMergeBuilder::when_not_matched_by_source_update, 2),
1412
+ )?;
1413
+ class.define_method(
1414
+ "when_not_matched_by_source_delete",
1415
+ method!(RbMergeBuilder::when_not_matched_by_source_delete, 1),
1416
+ )?;
1417
+
352
1418
  Ok(())
353
1419
  }