deltalake-rb 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,28 +1,77 @@
1
1
  mod error;
2
+ mod features;
3
+ mod merge;
2
4
  mod schema;
3
5
  mod utils;
4
6
 
5
7
  use std::cell::RefCell;
6
- use std::collections::HashMap;
8
+ use std::collections::{HashMap, HashSet};
7
9
  use std::future::IntoFuture;
10
+ use std::str::FromStr;
11
+ use std::time;
8
12
 
9
- use chrono::Duration;
13
+ use chrono::{DateTime, Duration, FixedOffset, Utc};
14
+ use delta_kernel::schema::StructField;
10
15
  use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
11
- use deltalake::kernel::StructType;
16
+ use deltalake::arrow::record_batch::RecordBatchIterator;
17
+ use deltalake::checkpoints::{cleanup_metadata, create_checkpoint};
18
+ use deltalake::datafusion::physical_plan::ExecutionPlan;
19
+ use deltalake::datafusion::prelude::SessionContext;
20
+ use deltalake::errors::DeltaTableError;
21
+ use deltalake::kernel::{scalars::ScalarExt, StructType, Transaction};
22
+ use deltalake::operations::add_column::AddColumnBuilder;
23
+ use deltalake::operations::add_feature::AddTableFeatureBuilder;
24
+ use deltalake::operations::collect_sendable_stream;
25
+ use deltalake::operations::constraints::ConstraintBuilder;
12
26
  use deltalake::operations::delete::DeleteBuilder;
27
+ use deltalake::operations::drop_constraints::DropConstraintBuilder;
28
+ use deltalake::operations::filesystem_check::FileSystemCheckBuilder;
29
+ use deltalake::operations::load_cdf::CdfLoadBuilder;
30
+ use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType};
31
+ use deltalake::operations::restore::RestoreBuilder;
32
+ use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder;
33
+ use deltalake::operations::transaction::{CommitProperties, TableReference};
13
34
  use deltalake::operations::vacuum::VacuumBuilder;
35
+ use deltalake::parquet::basic::Compression;
36
+ use deltalake::parquet::errors::ParquetError;
37
+ use deltalake::parquet::file::properties::WriterProperties;
38
+ use deltalake::partitions::PartitionFilter;
14
39
  use deltalake::storage::IORuntime;
15
- use deltalake::DeltaOps;
40
+ use deltalake::{DeltaOps, DeltaResult};
16
41
  use error::DeltaError;
42
+ use futures::future::join_all;
17
43
 
18
- use magnus::{function, method, prelude::*, Error, Module, Ruby, Value};
44
+ use magnus::{
45
+ function, method, prelude::*, typed_data::Obj, Error, Integer, Module, RArray, RHash, Ruby,
46
+ TryConvert, Value,
47
+ };
48
+ use serde_json::Map;
19
49
 
50
+ use crate::error::DeltaProtocolError;
51
+ use crate::error::RbValueError;
20
52
  use crate::error::RubyError;
53
+ use crate::features::TableFeatures;
54
+ use crate::merge::RbMergeBuilder;
21
55
  use crate::schema::{schema_to_rbobject, Field};
22
56
  use crate::utils::rt;
23
57
 
24
58
  type RbResult<T> = Result<T, Error>;
25
59
 
60
+ enum PartitionFilterValue {
61
+ Single(String),
62
+ Multiple(Vec<String>),
63
+ }
64
+
65
+ impl TryConvert for PartitionFilterValue {
66
+ fn try_convert(val: Value) -> RbResult<Self> {
67
+ if let Ok(v) = Vec::<String>::try_convert(val) {
68
+ Ok(PartitionFilterValue::Multiple(v))
69
+ } else {
70
+ Ok(PartitionFilterValue::Single(String::try_convert(val)?))
71
+ }
72
+ }
73
+ }
74
+
26
75
  #[magnus::wrap(class = "DeltaLake::RawDeltaTable")]
27
76
  struct RawDeltaTable {
28
77
  _table: RefCell<deltalake::DeltaTable>,
@@ -38,6 +87,34 @@ struct RawDeltaTableMetaData {
38
87
  configuration: HashMap<String, Option<String>>,
39
88
  }
40
89
 
90
+ impl RawDeltaTableMetaData {
91
+ fn id(&self) -> String {
92
+ self.id.clone()
93
+ }
94
+
95
+ fn name(&self) -> Option<String> {
96
+ self.name.clone()
97
+ }
98
+
99
+ fn description(&self) -> Option<String> {
100
+ self.description.clone()
101
+ }
102
+
103
+ fn partition_columns(&self) -> Vec<String> {
104
+ self.partition_columns.clone()
105
+ }
106
+
107
+ fn created_time(&self) -> Option<i64> {
108
+ self.created_time
109
+ }
110
+
111
+ fn configuration(&self) -> HashMap<String, Option<String>> {
112
+ self.configuration.clone()
113
+ }
114
+ }
115
+
116
+ type StringVec = Vec<String>;
117
+
41
118
  impl RawDeltaTable {
42
119
  pub fn new(
43
120
  table_uri: String,
@@ -113,37 +190,140 @@ impl RawDeltaTable {
113
190
  })
114
191
  }
115
192
 
193
+ pub fn protocol_versions(&self) -> RbResult<(i32, i32, Option<StringVec>, Option<StringVec>)> {
194
+ let binding = self._table.borrow();
195
+ let table_protocol = binding.protocol().map_err(RubyError::from)?;
196
+ Ok((
197
+ table_protocol.min_reader_version,
198
+ table_protocol.min_writer_version,
199
+ table_protocol
200
+ .writer_features
201
+ .as_ref()
202
+ .and_then(|features| {
203
+ let empty_set = !features.is_empty();
204
+ empty_set.then(|| {
205
+ features
206
+ .iter()
207
+ .map(|v| v.to_string())
208
+ .collect::<Vec<String>>()
209
+ })
210
+ }),
211
+ table_protocol
212
+ .reader_features
213
+ .as_ref()
214
+ .and_then(|features| {
215
+ let empty_set = !features.is_empty();
216
+ empty_set.then(|| {
217
+ features
218
+ .iter()
219
+ .map(|v| v.to_string())
220
+ .collect::<Vec<String>>()
221
+ })
222
+ }),
223
+ ))
224
+ }
225
+
116
226
  pub fn load_version(&self, version: i64) -> RbResult<()> {
117
227
  Ok(rt()
118
228
  .block_on(self._table.borrow_mut().load_version(version))
119
229
  .map_err(RubyError::from)?)
120
230
  }
121
231
 
122
- pub fn files(&self) -> RbResult<Vec<String>> {
123
- if !self.has_files()? {
124
- return Err(DeltaError::new_err("Table is instantiated without files."));
125
- }
232
+ pub fn get_latest_version(&self) -> RbResult<i64> {
233
+ Ok(rt()
234
+ .block_on(self._table.borrow().get_latest_version())
235
+ .map_err(RubyError::from)?)
236
+ }
126
237
 
238
+ pub fn get_earliest_version(&self) -> RbResult<i64> {
239
+ Ok(rt()
240
+ .block_on(self._table.borrow().get_earliest_version())
241
+ .map_err(RubyError::from)?)
242
+ }
243
+
244
+ pub fn get_num_index_cols(&self) -> RbResult<i32> {
127
245
  Ok(self
128
246
  ._table
129
247
  .borrow()
130
- .get_files_iter()
248
+ .snapshot()
131
249
  .map_err(RubyError::from)?
132
- .map(|f| f.to_string())
133
- .collect())
250
+ .config()
251
+ .num_indexed_cols())
134
252
  }
135
253
 
136
- pub fn file_uris(&self) -> RbResult<Vec<String>> {
137
- if !self._table.borrow().config.require_files {
138
- return Err(DeltaError::new_err("Table is initiated without files."));
139
- }
140
-
254
+ pub fn get_stats_columns(&self) -> RbResult<Option<Vec<String>>> {
141
255
  Ok(self
142
256
  ._table
143
257
  .borrow()
144
- .get_file_uris()
258
+ .snapshot()
145
259
  .map_err(RubyError::from)?
146
- .collect())
260
+ .config()
261
+ .stats_columns()
262
+ .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<String>>()))
263
+ }
264
+
265
+ pub fn load_with_datetime(&self, ds: String) -> RbResult<()> {
266
+ let datetime =
267
+ DateTime::<Utc>::from(DateTime::<FixedOffset>::parse_from_rfc3339(&ds).map_err(
268
+ |err| RbValueError::new_err(format!("Failed to parse datetime string: {err}")),
269
+ )?);
270
+ Ok(rt()
271
+ .block_on(self._table.borrow_mut().load_with_datetime(datetime))
272
+ .map_err(RubyError::from)?)
273
+ }
274
+
275
+ pub fn files(
276
+ &self,
277
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
278
+ ) -> RbResult<Vec<String>> {
279
+ if !self.has_files()? {
280
+ return Err(DeltaError::new_err("Table is instantiated without files."));
281
+ }
282
+
283
+ if let Some(filters) = partition_filters {
284
+ let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
285
+ Ok(self
286
+ ._table
287
+ .borrow()
288
+ .get_files_by_partitions(&filters)
289
+ .map_err(RubyError::from)?
290
+ .into_iter()
291
+ .map(|p| p.to_string())
292
+ .collect())
293
+ } else {
294
+ Ok(self
295
+ ._table
296
+ .borrow()
297
+ .get_files_iter()
298
+ .map_err(RubyError::from)?
299
+ .map(|f| f.to_string())
300
+ .collect())
301
+ }
302
+ }
303
+
304
+ pub fn file_uris(
305
+ &self,
306
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
307
+ ) -> RbResult<Vec<String>> {
308
+ if !self._table.borrow().config.require_files {
309
+ return Err(DeltaError::new_err("Table is initiated without files."));
310
+ }
311
+
312
+ if let Some(filters) = partition_filters {
313
+ let filters = convert_partition_filters(filters).map_err(RubyError::from)?;
314
+ Ok(self
315
+ ._table
316
+ .borrow()
317
+ .get_file_uris_by_partitions(&filters)
318
+ .map_err(RubyError::from)?)
319
+ } else {
320
+ Ok(self
321
+ ._table
322
+ .borrow()
323
+ .get_file_uris()
324
+ .map_err(RubyError::from)?
325
+ .collect())
326
+ }
147
327
  }
148
328
 
149
329
  pub fn schema(&self) -> RbResult<Value> {
@@ -157,6 +337,8 @@ impl RawDeltaTable {
157
337
  dry_run: bool,
158
338
  retention_hours: Option<u64>,
159
339
  enforce_retention_duration: bool,
340
+ commit_properties: Option<RbCommitProperties>,
341
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
160
342
  ) -> RbResult<Vec<String>> {
161
343
  let mut cmd = VacuumBuilder::new(
162
344
  self._table.borrow().log_store(),
@@ -172,11 +354,350 @@ impl RawDeltaTable {
172
354
  cmd = cmd.with_retention_period(Duration::hours(retention_period as i64));
173
355
  }
174
356
 
357
+ if let Some(commit_properties) =
358
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
359
+ {
360
+ cmd = cmd.with_commit_properties(commit_properties);
361
+ }
175
362
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
176
363
  self._table.borrow_mut().state = table.state;
177
364
  Ok(metrics.files_deleted)
178
365
  }
179
366
 
367
+ pub fn compact_optimize(
368
+ &self,
369
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
370
+ target_size: Option<i64>,
371
+ max_concurrent_tasks: Option<usize>,
372
+ min_commit_interval: Option<u64>,
373
+ writer_properties: Option<RbWriterProperties>,
374
+ commit_properties: Option<RbCommitProperties>,
375
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
376
+ ) -> RbResult<String> {
377
+ let mut cmd = OptimizeBuilder::new(
378
+ self._table.borrow().log_store(),
379
+ self._table
380
+ .borrow()
381
+ .snapshot()
382
+ .map_err(RubyError::from)?
383
+ .clone(),
384
+ )
385
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get));
386
+ if let Some(size) = target_size {
387
+ cmd = cmd.with_target_size(size);
388
+ }
389
+ if let Some(commit_interval) = min_commit_interval {
390
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
391
+ }
392
+
393
+ if let Some(writer_props) = writer_properties {
394
+ cmd = cmd.with_writer_properties(
395
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
396
+ );
397
+ }
398
+
399
+ if let Some(commit_properties) =
400
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
401
+ {
402
+ cmd = cmd.with_commit_properties(commit_properties);
403
+ }
404
+
405
+ let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
406
+ .map_err(RubyError::from)?;
407
+ cmd = cmd.with_filters(&converted_filters);
408
+
409
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
410
+ self._table.borrow_mut().state = table.state;
411
+ Ok(serde_json::to_string(&metrics).unwrap())
412
+ }
413
+
414
+ pub fn z_order_optimize(
415
+ &self,
416
+ z_order_columns: Vec<String>,
417
+ partition_filters: Option<Vec<(String, String, PartitionFilterValue)>>,
418
+ target_size: Option<i64>,
419
+ max_concurrent_tasks: Option<usize>,
420
+ max_spill_size: usize,
421
+ min_commit_interval: Option<u64>,
422
+ writer_properties: Option<RbWriterProperties>,
423
+ commit_properties: Option<RbCommitProperties>,
424
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
425
+ ) -> RbResult<String> {
426
+ let mut cmd = OptimizeBuilder::new(
427
+ self._table.borrow().log_store(),
428
+ self._table
429
+ .borrow()
430
+ .snapshot()
431
+ .map_err(RubyError::from)?
432
+ .clone(),
433
+ )
434
+ .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
435
+ .with_max_spill_size(max_spill_size)
436
+ .with_type(OptimizeType::ZOrder(z_order_columns));
437
+ if let Some(size) = target_size {
438
+ cmd = cmd.with_target_size(size);
439
+ }
440
+ if let Some(commit_interval) = min_commit_interval {
441
+ cmd = cmd.with_min_commit_interval(time::Duration::from_secs(commit_interval));
442
+ }
443
+
444
+ if let Some(writer_props) = writer_properties {
445
+ cmd = cmd.with_writer_properties(
446
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
447
+ );
448
+ }
449
+
450
+ if let Some(commit_properties) =
451
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
452
+ {
453
+ cmd = cmd.with_commit_properties(commit_properties);
454
+ }
455
+
456
+ let converted_filters = convert_partition_filters(partition_filters.unwrap_or_default())
457
+ .map_err(RubyError::from)?;
458
+ cmd = cmd.with_filters(&converted_filters);
459
+
460
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
461
+ self._table.borrow_mut().state = table.state;
462
+ Ok(serde_json::to_string(&metrics).unwrap())
463
+ }
464
+
465
+ pub fn add_columns(&self, fields: RArray) -> RbResult<()> {
466
+ let fields = fields.typecheck::<Obj<Field>>()?;
467
+ let mut cmd = AddColumnBuilder::new(
468
+ self._table.borrow().log_store(),
469
+ self._table
470
+ .borrow()
471
+ .snapshot()
472
+ .map_err(RubyError::from)?
473
+ .clone(),
474
+ );
475
+
476
+ let new_fields = fields
477
+ .iter()
478
+ .map(|v| v.inner.clone())
479
+ .collect::<Vec<StructField>>();
480
+
481
+ cmd = cmd.with_fields(new_fields);
482
+
483
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
484
+ self._table.borrow_mut().state = table.state;
485
+ Ok(())
486
+ }
487
+
488
+ pub fn add_feature(
489
+ &self,
490
+ feature: RArray,
491
+ allow_protocol_versions_increase: bool,
492
+ ) -> RbResult<()> {
493
+ let feature = feature
494
+ .into_iter()
495
+ .map(|v| TableFeatures::try_convert(v))
496
+ .collect::<RbResult<Vec<_>>>()?;
497
+ let cmd = AddTableFeatureBuilder::new(
498
+ self._table.borrow().log_store(),
499
+ self._table
500
+ .borrow()
501
+ .snapshot()
502
+ .map_err(RubyError::from)?
503
+ .clone(),
504
+ )
505
+ .with_features(feature)
506
+ .with_allow_protocol_versions_increase(allow_protocol_versions_increase);
507
+
508
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
509
+ self._table.borrow_mut().state = table.state;
510
+ Ok(())
511
+ }
512
+
513
+ pub fn add_constraints(&self, constraints: HashMap<String, String>) -> RbResult<()> {
514
+ let mut cmd = ConstraintBuilder::new(
515
+ self._table.borrow().log_store(),
516
+ self._table
517
+ .borrow()
518
+ .snapshot()
519
+ .map_err(RubyError::from)?
520
+ .clone(),
521
+ );
522
+
523
+ for (col_name, expression) in constraints {
524
+ cmd = cmd.with_constraint(col_name.clone(), expression.clone());
525
+ }
526
+
527
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
528
+ self._table.borrow_mut().state = table.state;
529
+ Ok(())
530
+ }
531
+
532
+ pub fn drop_constraints(&self, name: String, raise_if_not_exists: bool) -> RbResult<()> {
533
+ let cmd = DropConstraintBuilder::new(
534
+ self._table.borrow().log_store(),
535
+ self._table
536
+ .borrow()
537
+ .snapshot()
538
+ .map_err(RubyError::from)?
539
+ .clone(),
540
+ )
541
+ .with_constraint(name)
542
+ .with_raise_if_not_exists(raise_if_not_exists);
543
+
544
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
545
+ self._table.borrow_mut().state = table.state;
546
+ Ok(())
547
+ }
548
+
549
+ pub fn load_cdf(
550
+ &self,
551
+ starting_version: i64,
552
+ ending_version: Option<i64>,
553
+ starting_timestamp: Option<String>,
554
+ ending_timestamp: Option<String>,
555
+ columns: Option<Vec<String>>,
556
+ ) -> RbResult<ArrowArrayStream> {
557
+ let ctx = SessionContext::new();
558
+ let mut cdf_read = CdfLoadBuilder::new(
559
+ self._table.borrow().log_store(),
560
+ self._table
561
+ .borrow()
562
+ .snapshot()
563
+ .map_err(RubyError::from)?
564
+ .clone(),
565
+ )
566
+ .with_starting_version(starting_version);
567
+
568
+ if let Some(ev) = ending_version {
569
+ cdf_read = cdf_read.with_ending_version(ev);
570
+ }
571
+ if let Some(st) = starting_timestamp {
572
+ let starting_ts: DateTime<Utc> = DateTime::<Utc>::from_str(&st)
573
+ .map_err(|pe| RbValueError::new_err(pe.to_string()))?
574
+ .to_utc();
575
+ cdf_read = cdf_read.with_starting_timestamp(starting_ts);
576
+ }
577
+ if let Some(et) = ending_timestamp {
578
+ let ending_ts = DateTime::<Utc>::from_str(&et)
579
+ .map_err(|pe| RbValueError::new_err(pe.to_string()))?
580
+ .to_utc();
581
+ cdf_read = cdf_read.with_starting_timestamp(ending_ts);
582
+ }
583
+
584
+ if let Some(columns) = columns {
585
+ cdf_read = cdf_read.with_columns(columns);
586
+ }
587
+
588
+ cdf_read = cdf_read.with_session_ctx(ctx.clone());
589
+
590
+ let plan = rt().block_on(cdf_read.build()).map_err(RubyError::from)?;
591
+
592
+ let mut tasks = vec![];
593
+ for p in 0..plan.properties().output_partitioning().partition_count() {
594
+ let inner_plan = plan.clone();
595
+ let partition_batch = inner_plan.execute(p, ctx.task_ctx()).unwrap();
596
+ let handle = rt().spawn(collect_sendable_stream(partition_batch));
597
+ tasks.push(handle);
598
+ }
599
+
600
+ // This is unfortunate.
601
+ let batches = rt()
602
+ .block_on(join_all(tasks))
603
+ .into_iter()
604
+ .flatten()
605
+ .collect::<Result<Vec<Vec<_>>, _>>()
606
+ .unwrap()
607
+ .into_iter()
608
+ .flatten()
609
+ .map(Ok);
610
+ let batch_iter = RecordBatchIterator::new(batches, plan.schema());
611
+ let ffi_stream = FFI_ArrowArrayStream::new(Box::new(batch_iter));
612
+ Ok(ArrowArrayStream { stream: ffi_stream })
613
+ }
614
+
615
+ pub fn create_merge_builder(
616
+ &self,
617
+ source: RbArrowType<ArrowArrayStreamReader>,
618
+ predicate: String,
619
+ source_alias: Option<String>,
620
+ target_alias: Option<String>,
621
+ safe_cast: bool,
622
+ writer_properties: Option<RbWriterProperties>,
623
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
624
+ commit_properties: Option<RbCommitProperties>,
625
+ ) -> RbResult<RbMergeBuilder> {
626
+ Ok(RbMergeBuilder::new(
627
+ self._table.borrow().log_store(),
628
+ self._table
629
+ .borrow()
630
+ .snapshot()
631
+ .map_err(RubyError::from)?
632
+ .clone(),
633
+ source.0,
634
+ predicate,
635
+ source_alias,
636
+ target_alias,
637
+ safe_cast,
638
+ writer_properties,
639
+ post_commithook_properties,
640
+ commit_properties,
641
+ )
642
+ .map_err(RubyError::from)?)
643
+ }
644
+
645
+ pub fn merge_execute(&self, merge_builder: &RbMergeBuilder) -> RbResult<String> {
646
+ let (table, metrics) = merge_builder.execute().map_err(RubyError::from)?;
647
+ self._table.borrow_mut().state = table.state;
648
+ Ok(metrics)
649
+ }
650
+
651
+ pub fn restore(
652
+ &self,
653
+ target: Option<Value>,
654
+ ignore_missing_files: bool,
655
+ protocol_downgrade_allowed: bool,
656
+ commit_properties: Option<RbCommitProperties>,
657
+ ) -> RbResult<String> {
658
+ let mut cmd = RestoreBuilder::new(
659
+ self._table.borrow().log_store(),
660
+ self._table
661
+ .borrow()
662
+ .snapshot()
663
+ .map_err(RubyError::from)?
664
+ .clone(),
665
+ );
666
+ if let Some(val) = target {
667
+ if let Some(version) = Integer::from_value(val) {
668
+ cmd = cmd.with_version_to_restore(version.to_i64()?)
669
+ }
670
+ if let Ok(ds) = String::try_convert(val) {
671
+ let datetime = DateTime::<Utc>::from(
672
+ DateTime::<FixedOffset>::parse_from_rfc3339(ds.as_ref()).map_err(|err| {
673
+ RbValueError::new_err(format!("Failed to parse datetime string: {err}"))
674
+ })?,
675
+ );
676
+ cmd = cmd.with_datetime_to_restore(datetime)
677
+ }
678
+ }
679
+ cmd = cmd.with_ignore_missing_files(ignore_missing_files);
680
+ cmd = cmd.with_protocol_downgrade_allowed(protocol_downgrade_allowed);
681
+
682
+ if let Some(commit_properties) = maybe_create_commit_properties(commit_properties, None) {
683
+ cmd = cmd.with_commit_properties(commit_properties);
684
+ }
685
+
686
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
687
+ self._table.borrow_mut().state = table.state;
688
+ Ok(serde_json::to_string(&metrics).unwrap())
689
+ }
690
+
691
+ pub fn history(&self, limit: Option<usize>) -> RbResult<Vec<String>> {
692
+ let history = rt()
693
+ .block_on(self._table.borrow().history(limit))
694
+ .map_err(RubyError::from)?;
695
+ Ok(history
696
+ .iter()
697
+ .map(|c| serde_json::to_string(c).unwrap())
698
+ .collect())
699
+ }
700
+
180
701
  pub fn update_incremental(&self) -> RbResult<()> {
181
702
  #[allow(deprecated)]
182
703
  Ok(rt()
@@ -184,7 +705,89 @@ impl RawDeltaTable {
184
705
  .map_err(RubyError::from)?)
185
706
  }
186
707
 
187
- pub fn delete(&self, predicate: Option<String>) -> RbResult<String> {
708
+ fn get_active_partitions(&self) -> RbResult<RArray> {
709
+ let binding = self._table.borrow();
710
+ let _column_names: HashSet<&str> = binding
711
+ .get_schema()
712
+ .map_err(|_| DeltaProtocolError::new_err("table does not yet have a schema"))?
713
+ .fields()
714
+ .map(|field| field.name().as_str())
715
+ .collect();
716
+ let partition_columns: HashSet<&str> = binding
717
+ .metadata()
718
+ .map_err(RubyError::from)?
719
+ .partition_columns
720
+ .iter()
721
+ .map(|col| col.as_str())
722
+ .collect();
723
+
724
+ let converted_filters = Vec::new();
725
+
726
+ let partition_columns: Vec<&str> = partition_columns.into_iter().collect();
727
+
728
+ let adds = binding
729
+ .snapshot()
730
+ .map_err(RubyError::from)?
731
+ .get_active_add_actions_by_partitions(&converted_filters)
732
+ .map_err(RubyError::from)?
733
+ .collect::<Result<Vec<_>, _>>()
734
+ .map_err(RubyError::from)?;
735
+ let active_partitions: HashSet<Vec<(&str, Option<String>)>> = adds
736
+ .iter()
737
+ .flat_map(|add| {
738
+ Ok::<_, RubyError>(
739
+ partition_columns
740
+ .iter()
741
+ .flat_map(|col| {
742
+ Ok::<_, RubyError>((
743
+ *col,
744
+ add.partition_values()
745
+ .map_err(RubyError::from)?
746
+ .get(*col)
747
+ .map(|v| v.serialize()),
748
+ ))
749
+ })
750
+ .collect(),
751
+ )
752
+ })
753
+ .collect();
754
+
755
+ Ok(RArray::from_iter(active_partitions))
756
+ }
757
+
758
+ pub fn create_checkpoint(&self) -> RbResult<()> {
759
+ rt().block_on(create_checkpoint(&self._table.borrow()))
760
+ .map_err(RubyError::from)?;
761
+
762
+ Ok(())
763
+ }
764
+
765
+ pub fn cleanup_metadata(&self) -> RbResult<()> {
766
+ rt().block_on(cleanup_metadata(&self._table.borrow()))
767
+ .map_err(RubyError::from)?;
768
+
769
+ Ok(())
770
+ }
771
+
772
+ pub fn get_add_file_sizes(&self) -> RbResult<HashMap<String, i64>> {
773
+ Ok(self
774
+ ._table
775
+ .borrow()
776
+ .snapshot()
777
+ .map_err(RubyError::from)?
778
+ .eager_snapshot()
779
+ .files()
780
+ .map(|f| (f.path().to_string(), f.size()))
781
+ .collect::<HashMap<String, i64>>())
782
+ }
783
+
784
+ pub fn delete(
785
+ &self,
786
+ predicate: Option<String>,
787
+ writer_properties: Option<RbWriterProperties>,
788
+ commit_properties: Option<RbCommitProperties>,
789
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
790
+ ) -> RbResult<String> {
188
791
  let mut cmd = DeleteBuilder::new(
189
792
  self._table.borrow().log_store(),
190
793
  self._table
@@ -196,43 +799,366 @@ impl RawDeltaTable {
196
799
  if let Some(predicate) = predicate {
197
800
  cmd = cmd.with_predicate(predicate);
198
801
  }
802
+ if let Some(writer_props) = writer_properties {
803
+ cmd = cmd.with_writer_properties(
804
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
805
+ );
806
+ }
807
+ if let Some(commit_properties) =
808
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
809
+ {
810
+ cmd = cmd.with_commit_properties(commit_properties);
811
+ }
812
+
813
+ let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
814
+ self._table.borrow_mut().state = table.state;
815
+ Ok(serde_json::to_string(&metrics).unwrap())
816
+ }
817
+
818
+ pub fn set_table_properties(
819
+ &self,
820
+ properties: HashMap<String, String>,
821
+ raise_if_not_exists: bool,
822
+ ) -> RbResult<()> {
823
+ let cmd = SetTablePropertiesBuilder::new(
824
+ self._table.borrow().log_store(),
825
+ self._table
826
+ .borrow()
827
+ .snapshot()
828
+ .map_err(RubyError::from)?
829
+ .clone(),
830
+ )
831
+ .with_properties(properties)
832
+ .with_raise_if_not_exists(raise_if_not_exists);
833
+
834
+ let table = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
835
+ self._table.borrow_mut().state = table.state;
836
+ Ok(())
837
+ }
838
+
839
+ pub fn repair(
840
+ &self,
841
+ dry_run: bool,
842
+ commit_properties: Option<RbCommitProperties>,
843
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
844
+ ) -> RbResult<String> {
845
+ let mut cmd = FileSystemCheckBuilder::new(
846
+ self._table.borrow().log_store(),
847
+ self._table
848
+ .borrow()
849
+ .snapshot()
850
+ .map_err(RubyError::from)?
851
+ .clone(),
852
+ )
853
+ .with_dry_run(dry_run);
854
+
855
+ if let Some(commit_properties) =
856
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
857
+ {
858
+ cmd = cmd.with_commit_properties(commit_properties);
859
+ }
199
860
 
200
861
  let (table, metrics) = rt().block_on(cmd.into_future()).map_err(RubyError::from)?;
201
862
  self._table.borrow_mut().state = table.state;
202
863
  Ok(serde_json::to_string(&metrics).unwrap())
203
864
  }
865
+
866
+ pub fn transaction_versions(&self) -> RHash {
867
+ RHash::from_iter(
868
+ self._table
869
+ .borrow()
870
+ .get_app_transaction_version()
871
+ .into_iter()
872
+ .map(|(app_id, transaction)| (app_id, RbTransaction::from(transaction))),
873
+ )
874
+ }
204
875
  }
205
876
 
206
- impl RawDeltaTableMetaData {
207
- fn id(&self) -> String {
208
- self.id.clone()
877
+ fn set_post_commithook_properties(
878
+ mut commit_properties: CommitProperties,
879
+ post_commithook_properties: RbPostCommitHookProperties,
880
+ ) -> CommitProperties {
881
+ commit_properties =
882
+ commit_properties.with_create_checkpoint(post_commithook_properties.create_checkpoint);
883
+ commit_properties = commit_properties
884
+ .with_cleanup_expired_logs(post_commithook_properties.cleanup_expired_logs);
885
+ commit_properties
886
+ }
887
+
888
+ fn set_writer_properties(writer_properties: RbWriterProperties) -> DeltaResult<WriterProperties> {
889
+ let mut properties = WriterProperties::builder();
890
+ let data_page_size_limit = writer_properties.data_page_size_limit;
891
+ let dictionary_page_size_limit = writer_properties.dictionary_page_size_limit;
892
+ let data_page_row_count_limit = writer_properties.data_page_row_count_limit;
893
+ let write_batch_size = writer_properties.write_batch_size;
894
+ let max_row_group_size = writer_properties.max_row_group_size;
895
+ let compression = writer_properties.compression;
896
+ let statistics_truncate_length = writer_properties.statistics_truncate_length;
897
+ let default_column_properties = writer_properties.default_column_properties;
898
+ let column_properties = writer_properties.column_properties;
899
+
900
+ if let Some(data_page_size) = data_page_size_limit {
901
+ properties = properties.set_data_page_size_limit(data_page_size);
902
+ }
903
+ if let Some(dictionary_page_size) = dictionary_page_size_limit {
904
+ properties = properties.set_dictionary_page_size_limit(dictionary_page_size);
905
+ }
906
+ if let Some(data_page_row_count) = data_page_row_count_limit {
907
+ properties = properties.set_data_page_row_count_limit(data_page_row_count);
209
908
  }
909
+ if let Some(batch_size) = write_batch_size {
910
+ properties = properties.set_write_batch_size(batch_size);
911
+ }
912
+ if let Some(row_group_size) = max_row_group_size {
913
+ properties = properties.set_max_row_group_size(row_group_size);
914
+ }
915
+ properties = properties.set_statistics_truncate_length(statistics_truncate_length);
210
916
 
211
- fn name(&self) -> Option<String> {
212
- self.name.clone()
917
+ if let Some(compression) = compression {
918
+ let compress: Compression = compression
919
+ .parse()
920
+ .map_err(|err: ParquetError| DeltaTableError::Generic(err.to_string()))?;
921
+
922
+ properties = properties.set_compression(compress);
213
923
  }
214
924
 
215
- fn description(&self) -> Option<String> {
216
- self.description.clone()
925
+ if let Some(default_column_properties) = default_column_properties {
926
+ if let Some(dictionary_enabled) = default_column_properties.dictionary_enabled {
927
+ properties = properties.set_dictionary_enabled(dictionary_enabled);
928
+ }
929
+ if let Some(max_statistics_size) = default_column_properties.max_statistics_size {
930
+ properties = properties.set_max_statistics_size(max_statistics_size);
931
+ }
932
+ if let Some(bloom_filter_properties) = default_column_properties.bloom_filter_properties {
933
+ if let Some(set_bloom_filter_enabled) = bloom_filter_properties.set_bloom_filter_enabled
934
+ {
935
+ properties = properties.set_bloom_filter_enabled(set_bloom_filter_enabled);
936
+ }
937
+ if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
938
+ properties = properties.set_bloom_filter_fpp(bloom_filter_fpp);
939
+ }
940
+ if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
941
+ properties = properties.set_bloom_filter_ndv(bloom_filter_ndv);
942
+ }
943
+ }
944
+ }
945
+ if let Some(column_properties) = column_properties {
946
+ for (column_name, column_prop) in column_properties {
947
+ if let Some(column_prop) = column_prop {
948
+ if let Some(dictionary_enabled) = column_prop.dictionary_enabled {
949
+ properties = properties.set_column_dictionary_enabled(
950
+ column_name.clone().into(),
951
+ dictionary_enabled,
952
+ );
953
+ }
954
+ if let Some(bloom_filter_properties) = column_prop.bloom_filter_properties {
955
+ if let Some(set_bloom_filter_enabled) =
956
+ bloom_filter_properties.set_bloom_filter_enabled
957
+ {
958
+ properties = properties.set_column_bloom_filter_enabled(
959
+ column_name.clone().into(),
960
+ set_bloom_filter_enabled,
961
+ );
962
+ }
963
+ if let Some(bloom_filter_fpp) = bloom_filter_properties.fpp {
964
+ properties = properties.set_column_bloom_filter_fpp(
965
+ column_name.clone().into(),
966
+ bloom_filter_fpp,
967
+ );
968
+ }
969
+ if let Some(bloom_filter_ndv) = bloom_filter_properties.ndv {
970
+ properties = properties
971
+ .set_column_bloom_filter_ndv(column_name.into(), bloom_filter_ndv);
972
+ }
973
+ }
974
+ }
975
+ }
217
976
  }
977
+ Ok(properties.build())
978
+ }
218
979
 
219
- fn partition_columns(&self) -> Vec<String> {
220
- self.partition_columns.clone()
980
+ fn convert_partition_filters(
981
+ partitions_filters: Vec<(String, String, PartitionFilterValue)>,
982
+ ) -> Result<Vec<PartitionFilter>, DeltaTableError> {
983
+ partitions_filters
984
+ .into_iter()
985
+ .map(|filter| match filter {
986
+ (key, op, PartitionFilterValue::Single(v)) => {
987
+ let key: &'_ str = key.as_ref();
988
+ let op: &'_ str = op.as_ref();
989
+ let v: &'_ str = v.as_ref();
990
+ PartitionFilter::try_from((key, op, v))
991
+ }
992
+ (key, op, PartitionFilterValue::Multiple(v)) => {
993
+ let key: &'_ str = key.as_ref();
994
+ let op: &'_ str = op.as_ref();
995
+ let v: Vec<&'_ str> = v.iter().map(|v| v.as_ref()).collect();
996
+ PartitionFilter::try_from((key, op, v.as_slice()))
997
+ }
998
+ })
999
+ .collect()
1000
+ }
1001
+
1002
+ fn maybe_create_commit_properties(
1003
+ maybe_commit_properties: Option<RbCommitProperties>,
1004
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
1005
+ ) -> Option<CommitProperties> {
1006
+ if maybe_commit_properties.is_none() && post_commithook_properties.is_none() {
1007
+ return None;
221
1008
  }
1009
+ let mut commit_properties = CommitProperties::default();
222
1010
 
223
- fn created_time(&self) -> Option<i64> {
224
- self.created_time
1011
+ if let Some(commit_props) = maybe_commit_properties {
1012
+ if let Some(metadata) = commit_props.custom_metadata {
1013
+ let json_metadata: Map<String, serde_json::Value> =
1014
+ metadata.into_iter().map(|(k, v)| (k, v.into())).collect();
1015
+ commit_properties = commit_properties.with_metadata(json_metadata);
1016
+ };
1017
+
1018
+ if let Some(max_retries) = commit_props.max_commit_retries {
1019
+ commit_properties = commit_properties.with_max_retries(max_retries);
1020
+ };
1021
+
1022
+ if let Some(app_transactions) = commit_props.app_transactions {
1023
+ let app_transactions = app_transactions.iter().map(Transaction::from).collect();
1024
+ commit_properties = commit_properties.with_application_transactions(app_transactions);
1025
+ }
225
1026
  }
226
1027
 
227
- fn configuration(&self) -> HashMap<String, Option<String>> {
228
- self.configuration.clone()
1028
+ if let Some(post_commit_hook_props) = post_commithook_properties {
1029
+ commit_properties =
1030
+ set_post_commithook_properties(commit_properties, post_commit_hook_props)
1031
+ }
1032
+ Some(commit_properties)
1033
+ }
1034
+
1035
+ fn rust_core_version() -> String {
1036
+ deltalake::crate_version().to_string()
1037
+ }
1038
+
1039
+ pub struct BloomFilterProperties {
1040
+ pub set_bloom_filter_enabled: Option<bool>,
1041
+ pub fpp: Option<f64>,
1042
+ pub ndv: Option<u64>,
1043
+ }
1044
+
1045
+ impl TryConvert for BloomFilterProperties {
1046
+ fn try_convert(val: Value) -> RbResult<Self> {
1047
+ Ok(BloomFilterProperties {
1048
+ set_bloom_filter_enabled: val.funcall("set_bloom_filter_enabled", ())?,
1049
+ fpp: val.funcall("fpp", ())?,
1050
+ ndv: val.funcall("ndv", ())?,
1051
+ })
1052
+ }
1053
+ }
1054
+
1055
+ pub struct ColumnProperties {
1056
+ pub dictionary_enabled: Option<bool>,
1057
+ pub max_statistics_size: Option<usize>,
1058
+ pub bloom_filter_properties: Option<BloomFilterProperties>,
1059
+ }
1060
+
1061
+ impl TryConvert for ColumnProperties {
1062
+ fn try_convert(val: Value) -> RbResult<Self> {
1063
+ Ok(ColumnProperties {
1064
+ dictionary_enabled: val.funcall("dictionary_enabled", ())?,
1065
+ max_statistics_size: val.funcall("max_statistics_size", ())?,
1066
+ bloom_filter_properties: val.funcall("bloom_filter_properties", ())?,
1067
+ })
1068
+ }
1069
+ }
1070
+
1071
+ pub struct RbWriterProperties {
1072
+ data_page_size_limit: Option<usize>,
1073
+ dictionary_page_size_limit: Option<usize>,
1074
+ data_page_row_count_limit: Option<usize>,
1075
+ write_batch_size: Option<usize>,
1076
+ max_row_group_size: Option<usize>,
1077
+ statistics_truncate_length: Option<usize>,
1078
+ compression: Option<String>,
1079
+ default_column_properties: Option<ColumnProperties>,
1080
+ column_properties: Option<HashMap<String, Option<ColumnProperties>>>,
1081
+ }
1082
+
1083
+ impl TryConvert for RbWriterProperties {
1084
+ fn try_convert(val: Value) -> RbResult<Self> {
1085
+ Ok(RbWriterProperties {
1086
+ data_page_size_limit: val.funcall("data_page_size_limit", ())?,
1087
+ dictionary_page_size_limit: val.funcall("dictionary_page_size_limit", ())?,
1088
+ data_page_row_count_limit: val.funcall("data_page_row_count_limit", ())?,
1089
+ write_batch_size: val.funcall("write_batch_size", ())?,
1090
+ max_row_group_size: val.funcall("max_row_group_size", ())?,
1091
+ statistics_truncate_length: val.funcall("statistics_truncate_length", ())?,
1092
+ compression: val.funcall("compression", ())?,
1093
+ default_column_properties: val.funcall("default_column_properties", ())?,
1094
+ // TODO fix
1095
+ column_properties: None,
1096
+ })
1097
+ }
1098
+ }
1099
+
1100
+ pub struct RbPostCommitHookProperties {
1101
+ create_checkpoint: bool,
1102
+ cleanup_expired_logs: Option<bool>,
1103
+ }
1104
+
1105
+ impl TryConvert for RbPostCommitHookProperties {
1106
+ fn try_convert(val: Value) -> RbResult<Self> {
1107
+ Ok(RbPostCommitHookProperties {
1108
+ create_checkpoint: val.funcall("create_checkpoint", ())?,
1109
+ cleanup_expired_logs: val.funcall("cleanup_expired_logs", ())?,
1110
+ })
1111
+ }
1112
+ }
1113
+
1114
+ #[magnus::wrap(class = "DeltaLake::Transaction")]
1115
+ pub struct RbTransaction {
1116
+ pub app_id: String,
1117
+ pub version: i64,
1118
+ pub last_updated: Option<i64>,
1119
+ }
1120
+
1121
+ impl From<Transaction> for RbTransaction {
1122
+ fn from(value: Transaction) -> Self {
1123
+ RbTransaction {
1124
+ app_id: value.app_id,
1125
+ version: value.version,
1126
+ last_updated: value.last_updated,
1127
+ }
1128
+ }
1129
+ }
1130
+
1131
+ impl From<&RbTransaction> for Transaction {
1132
+ fn from(value: &RbTransaction) -> Self {
1133
+ Transaction {
1134
+ app_id: value.app_id.clone(),
1135
+ version: value.version,
1136
+ last_updated: value.last_updated,
1137
+ }
1138
+ }
1139
+ }
1140
+
1141
+ pub struct RbCommitProperties {
1142
+ custom_metadata: Option<HashMap<String, String>>,
1143
+ max_commit_retries: Option<usize>,
1144
+ app_transactions: Option<Vec<RbTransaction>>,
1145
+ }
1146
+
1147
+ impl TryConvert for RbCommitProperties {
1148
+ fn try_convert(val: Value) -> RbResult<Self> {
1149
+ Ok(RbCommitProperties {
1150
+ custom_metadata: val.funcall("custom_metadata", ())?,
1151
+ max_commit_retries: val.funcall("max_commit_retries", ())?,
1152
+ // TODO fix
1153
+ app_transactions: None,
1154
+ })
229
1155
  }
230
1156
  }
231
1157
 
232
1158
  #[allow(clippy::too_many_arguments)]
233
1159
  fn write_to_deltalake(
234
1160
  table_uri: String,
235
- data: Value,
1161
+ data: RbArrowType<ArrowArrayStreamReader>,
236
1162
  mode: String,
237
1163
  table: Option<&RawDeltaTable>,
238
1164
  schema_mode: Option<String>,
@@ -243,16 +1169,11 @@ fn write_to_deltalake(
243
1169
  description: Option<String>,
244
1170
  configuration: Option<HashMap<String, Option<String>>>,
245
1171
  storage_options: Option<HashMap<String, String>>,
1172
+ writer_properties: Option<RbWriterProperties>,
1173
+ commit_properties: Option<RbCommitProperties>,
1174
+ post_commithook_properties: Option<RbPostCommitHookProperties>,
246
1175
  ) -> RbResult<()> {
247
- let capsule_pointer: usize = data.funcall("to_i", ())?;
248
-
249
- // use similar approach as Polars to avoid copy
250
- let stream_ptr =
251
- Box::new(unsafe { std::ptr::replace(capsule_pointer as _, FFI_ArrowArrayStream::empty()) });
252
- let stream = ArrowArrayStreamReader::try_new(*stream_ptr)
253
- .map_err(|err| DeltaError::new_err(err.to_string()))?;
254
-
255
- let batches = stream.map(|batch| batch.unwrap()).collect::<Vec<_>>();
1176
+ let batches = data.0.map(|batch| batch.unwrap()).collect::<Vec<_>>();
256
1177
  let save_mode = mode.parse().map_err(RubyError::from)?;
257
1178
 
258
1179
  let options = storage_options.clone().unwrap_or_default();
@@ -273,6 +1194,12 @@ fn write_to_deltalake(
273
1194
  builder = builder.with_partition_columns(partition_columns);
274
1195
  }
275
1196
 
1197
+ if let Some(writer_props) = writer_properties {
1198
+ builder = builder.with_writer_properties(
1199
+ set_writer_properties(writer_props).map_err(RubyError::from)?,
1200
+ );
1201
+ }
1202
+
276
1203
  if let Some(name) = &name {
277
1204
  builder = builder.with_table_name(name);
278
1205
  };
@@ -293,18 +1220,55 @@ fn write_to_deltalake(
293
1220
  builder = builder.with_configuration(config);
294
1221
  };
295
1222
 
1223
+ if let Some(commit_properties) =
1224
+ maybe_create_commit_properties(commit_properties, post_commithook_properties)
1225
+ {
1226
+ builder = builder.with_commit_properties(commit_properties);
1227
+ };
1228
+
296
1229
  rt().block_on(builder.into_future())
297
1230
  .map_err(RubyError::from)?;
298
1231
 
299
1232
  Ok(())
300
1233
  }
301
1234
 
1235
+ pub struct RbArrowType<T>(pub T);
1236
+
1237
+ impl TryConvert for RbArrowType<ArrowArrayStreamReader> {
1238
+ fn try_convert(val: Value) -> RbResult<Self> {
1239
+ let addr: usize = val.funcall("to_i", ())?;
1240
+
1241
+ // use similar approach as Polars to consume pointer and avoid copy
1242
+ let stream_ptr =
1243
+ Box::new(unsafe { std::ptr::replace(addr as _, FFI_ArrowArrayStream::empty()) });
1244
+
1245
+ Ok(RbArrowType(
1246
+ ArrowArrayStreamReader::try_new(*stream_ptr)
1247
+ .map_err(|err| DeltaError::new_err(err.to_string()))?,
1248
+ ))
1249
+ }
1250
+ }
1251
+
1252
+ #[magnus::wrap(class = "DeltaLake::ArrowArrayStream")]
1253
+ pub struct ArrowArrayStream {
1254
+ stream: FFI_ArrowArrayStream,
1255
+ }
1256
+
1257
+ impl ArrowArrayStream {
1258
+ pub fn to_i(&self) -> usize {
1259
+ (&self.stream as *const _) as usize
1260
+ }
1261
+ }
1262
+
302
1263
  #[magnus::init]
303
1264
  fn init(ruby: &Ruby) -> RbResult<()> {
304
1265
  deltalake::aws::register_handlers(None);
1266
+ deltalake::azure::register_handlers(None);
1267
+ deltalake::gcp::register_handlers(None);
305
1268
 
306
1269
  let module = ruby.define_module("DeltaLake")?;
307
- module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 12))?;
1270
+ module.define_singleton_method("write_deltalake_rust", function!(write_to_deltalake, 15))?;
1271
+ module.define_singleton_method("rust_core_version", function!(rust_core_version, 0))?;
308
1272
 
309
1273
  let class = module.define_class("RawDeltaTable", ruby.class_object())?;
310
1274
  class.define_singleton_method("new", function!(RawDeltaTable::new, 5))?;
@@ -313,16 +1277,91 @@ fn init(ruby: &Ruby) -> RbResult<()> {
313
1277
  class.define_method("version", method!(RawDeltaTable::version, 0))?;
314
1278
  class.define_method("has_files", method!(RawDeltaTable::has_files, 0))?;
315
1279
  class.define_method("metadata", method!(RawDeltaTable::metadata, 0))?;
1280
+ class.define_method(
1281
+ "protocol_versions",
1282
+ method!(RawDeltaTable::protocol_versions, 0),
1283
+ )?;
316
1284
  class.define_method("load_version", method!(RawDeltaTable::load_version, 1))?;
317
- class.define_method("files", method!(RawDeltaTable::files, 0))?;
318
- class.define_method("file_uris", method!(RawDeltaTable::file_uris, 0))?;
1285
+ class.define_method(
1286
+ "get_latest_version",
1287
+ method!(RawDeltaTable::get_latest_version, 0),
1288
+ )?;
1289
+ class.define_method(
1290
+ "get_earliest_version",
1291
+ method!(RawDeltaTable::get_earliest_version, 0),
1292
+ )?;
1293
+ class.define_method(
1294
+ "get_num_index_cols",
1295
+ method!(RawDeltaTable::get_num_index_cols, 0),
1296
+ )?;
1297
+ class.define_method(
1298
+ "get_stats_columns",
1299
+ method!(RawDeltaTable::get_stats_columns, 0),
1300
+ )?;
1301
+ class.define_method(
1302
+ "load_with_datetime",
1303
+ method!(RawDeltaTable::load_with_datetime, 1),
1304
+ )?;
1305
+ class.define_method("files", method!(RawDeltaTable::files, 1))?;
1306
+ class.define_method("file_uris", method!(RawDeltaTable::file_uris, 1))?;
319
1307
  class.define_method("schema", method!(RawDeltaTable::schema, 0))?;
320
- class.define_method("vacuum", method!(RawDeltaTable::vacuum, 3))?;
1308
+ class.define_method("vacuum", method!(RawDeltaTable::vacuum, 5))?;
1309
+ class.define_method(
1310
+ "compact_optimize",
1311
+ method!(RawDeltaTable::compact_optimize, 7),
1312
+ )?;
1313
+ class.define_method(
1314
+ "z_order_optimize",
1315
+ method!(RawDeltaTable::z_order_optimize, 9),
1316
+ )?;
1317
+ class.define_method("add_columns", method!(RawDeltaTable::add_columns, 1))?;
1318
+ class.define_method("add_feature", method!(RawDeltaTable::add_feature, 2))?;
1319
+ class.define_method(
1320
+ "add_constraints",
1321
+ method!(RawDeltaTable::add_constraints, 1),
1322
+ )?;
1323
+ class.define_method(
1324
+ "drop_constraints",
1325
+ method!(RawDeltaTable::drop_constraints, 2),
1326
+ )?;
1327
+ class.define_method("load_cdf", method!(RawDeltaTable::load_cdf, 5))?;
1328
+ class.define_method(
1329
+ "create_merge_builder",
1330
+ method!(RawDeltaTable::create_merge_builder, 8),
1331
+ )?;
1332
+ class.define_method("merge_execute", method!(RawDeltaTable::merge_execute, 1))?;
1333
+ class.define_method("restore", method!(RawDeltaTable::restore, 4))?;
1334
+ class.define_method("history", method!(RawDeltaTable::history, 1))?;
321
1335
  class.define_method(
322
1336
  "update_incremental",
323
1337
  method!(RawDeltaTable::update_incremental, 0),
324
1338
  )?;
325
- class.define_method("delete", method!(RawDeltaTable::delete, 1))?;
1339
+ class.define_method(
1340
+ "get_active_partitions",
1341
+ method!(RawDeltaTable::get_active_partitions, 0),
1342
+ )?;
1343
+ class.define_method(
1344
+ "create_checkpoint",
1345
+ method!(RawDeltaTable::create_checkpoint, 0),
1346
+ )?;
1347
+ class.define_method(
1348
+ "cleanup_metadata",
1349
+ method!(RawDeltaTable::cleanup_metadata, 0),
1350
+ )?;
1351
+ class.define_method(
1352
+ "get_add_file_sizes",
1353
+ method!(RawDeltaTable::get_add_file_sizes, 0),
1354
+ )?;
1355
+ class.define_method("delete", method!(RawDeltaTable::delete, 4))?;
1356
+ class.define_method(
1357
+ "set_table_properties",
1358
+ method!(RawDeltaTable::set_table_properties, 2),
1359
+ )?;
1360
+ class.define_method("repair", method!(RawDeltaTable::repair, 3))?;
1361
+ class.define_method(
1362
+ "transaction_versions",
1363
+ method!(RawDeltaTable::transaction_versions, 0),
1364
+ )?;
326
1365
 
327
1366
  let class = module.define_class("RawDeltaTableMetaData", ruby.class_object())?;
328
1367
  class.define_method("id", method!(RawDeltaTableMetaData::id, 0))?;
@@ -344,10 +1383,37 @@ fn init(ruby: &Ruby) -> RbResult<()> {
344
1383
  method!(RawDeltaTableMetaData::configuration, 0),
345
1384
  )?;
346
1385
 
1386
+ let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1387
+ class.define_method("to_i", method!(ArrowArrayStream::to_i, 0))?;
1388
+
347
1389
  let class = module.define_class("Field", ruby.class_object())?;
348
1390
  class.define_method("name", method!(Field::name, 0))?;
349
1391
  class.define_method("type", method!(Field::get_type, 0))?;
350
1392
  class.define_method("nullable", method!(Field::nullable, 0))?;
351
1393
 
1394
+ let class = module.define_class("RbMergeBuilder", ruby.class_object())?;
1395
+ class.define_method("source_alias", method!(RbMergeBuilder::source_alias, 0))?;
1396
+ class.define_method("target_alias", method!(RbMergeBuilder::target_alias, 0))?;
1397
+ class.define_method(
1398
+ "when_matched_update",
1399
+ method!(RbMergeBuilder::when_matched_update, 2),
1400
+ )?;
1401
+ class.define_method(
1402
+ "when_matched_delete",
1403
+ method!(RbMergeBuilder::when_matched_delete, 1),
1404
+ )?;
1405
+ class.define_method(
1406
+ "when_not_matched_insert",
1407
+ method!(RbMergeBuilder::when_not_matched_insert, 2),
1408
+ )?;
1409
+ class.define_method(
1410
+ "when_not_matched_by_source_update",
1411
+ method!(RbMergeBuilder::when_not_matched_by_source_update, 2),
1412
+ )?;
1413
+ class.define_method(
1414
+ "when_not_matched_by_source_delete",
1415
+ method!(RbMergeBuilder::when_not_matched_by_source_delete, 1),
1416
+ )?;
1417
+
352
1418
  Ok(())
353
1419
  }