deltalake-rb 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -14,7 +14,7 @@ Add this line to your application’s Gemfile:
14
14
  gem "deltalake-rb"
15
15
  ```
16
16
 
17
- It can take a few minutes to compile the gem.
17
+ It can take 5-10 minutes to compile the gem.
18
18
 
19
19
  ## Getting Started
20
20
 
@@ -50,6 +50,18 @@ Overwrite a table
50
50
  DeltaLake.write("./data/delta", df, mode: "overwrite")
51
51
  ```
52
52
 
53
+ Add a constraint
54
+
55
+ ```ruby
56
+ dt.alter.add_constraint({"a_gt_0" => "a > 0"})
57
+ ```
58
+
59
+ Drop a constraint
60
+
61
+ ```ruby
62
+ dt.alter.drop_constraint("a_gt_0")
63
+ ```
64
+
53
65
  Delete rows
54
66
 
55
67
  ```ruby
@@ -62,6 +74,18 @@ Vacuum
62
74
  dt.vacuum(dry_run: false)
63
75
  ```
64
76
 
77
+ Perform small file compaction
78
+
79
+ ```ruby
80
+ dt.optimize.compact
81
+ ```
82
+
83
+ Colocate similar data in the same files
84
+
85
+ ```ruby
86
+ dt.optimize.z_order(["a"])
87
+ ```
88
+
65
89
  Load a previous version of a table
66
90
 
67
91
  ```ruby
@@ -70,16 +94,22 @@ dt = DeltaLake::Table.new("./data/delta", version: 1)
70
94
  dt.load_as_version(1)
71
95
  ```
72
96
 
97
+ Get the schema
98
+
99
+ ```ruby
100
+ dt.schema
101
+ ```
102
+
73
103
  Get metadata
74
104
 
75
105
  ```ruby
76
106
  dt.metadata
77
107
  ```
78
108
 
79
- Get the schema
109
+ Get history
80
110
 
81
111
  ```ruby
82
- dt.schema
112
+ dt.history
83
113
  ```
84
114
 
85
115
  ## API
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,11 +11,14 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- arrow = { version = "52", features = ["ffi"] }
15
- arrow-schema = { version = "52", features = ["serde"] }
14
+ arrow = { version = "53", features = ["ffi"] }
15
+ arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
- deltalake = { version = "=0.21.0", features = ["datafusion", "s3"] }
17
+ delta_kernel = "0.4"
18
+ deltalake = { version = "=0.22.2", features = ["azure", "datafusion", "gcs", "s3"] }
19
+ futures = "0.3"
18
20
  magnus = "0.7"
21
+ num_cpus = "1"
19
22
  serde = "1"
20
23
  serde_json = "1"
21
24
  tokio = { version = "1", features = ["rt-multi-thread"] }
@@ -1,6 +1,7 @@
1
1
  use arrow_schema::ArrowError;
2
+ use deltalake::protocol::ProtocolError;
2
3
  use deltalake::{errors::DeltaTableError, ObjectStoreError};
3
- use magnus::{exception, Error, Module, RModule, Ruby};
4
+ use magnus::{exception, Error as RbErr, Module, RModule, Ruby};
4
5
  use std::borrow::Cow;
5
6
 
6
7
  macro_rules! create_exception {
@@ -8,7 +9,7 @@ macro_rules! create_exception {
8
9
  pub struct $type {}
9
10
 
10
11
  impl $type {
11
- pub fn new_err<T>(message: T) -> Error
12
+ pub fn new_err<T>(message: T) -> RbErr
12
13
  where
13
14
  T: Into<Cow<'static, str>>,
14
15
  {
@@ -19,7 +20,7 @@ macro_rules! create_exception {
19
20
  .unwrap()
20
21
  .const_get($name)
21
22
  .unwrap();
22
- Error::new(class, message)
23
+ RbErr::new(class, message)
23
24
  }
24
25
  }
25
26
  };
@@ -31,7 +32,7 @@ create_exception!(DeltaProtocolError, "DeltaProtocolError");
31
32
  create_exception!(CommitFailedError, "CommitFailedError");
32
33
  create_exception!(SchemaMismatchError, "SchemaMismatchError");
33
34
 
34
- fn inner_to_rb_err(err: DeltaTableError) -> Error {
35
+ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
35
36
  match err {
36
37
  DeltaTableError::NotATable(msg) => TableNotFoundError::new_err(msg),
37
38
  DeltaTableError::InvalidTableLocation(msg) => TableNotFoundError::new_err(msg),
@@ -48,7 +49,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
48
49
 
49
50
  // ruby exceptions
50
51
  DeltaTableError::ObjectStore { source } => object_store_to_rb(source),
51
- DeltaTableError::Io { source } => Error::new(exception::io_error(), source.to_string()),
52
+ DeltaTableError::Io { source } => RbIOError::new_err(source.to_string()),
52
53
 
53
54
  DeltaTableError::Arrow { source } => arrow_to_rb(source),
54
55
 
@@ -56,31 +57,50 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
56
57
  }
57
58
  }
58
59
 
59
- fn object_store_to_rb(err: ObjectStoreError) -> Error {
60
+ fn object_store_to_rb(err: ObjectStoreError) -> RbErr {
60
61
  match err {
61
- ObjectStoreError::NotFound { .. } => Error::new(exception::io_error(), err.to_string()),
62
+ ObjectStoreError::NotFound { .. } => RbIOError::new_err(err.to_string()),
62
63
  ObjectStoreError::Generic { source, .. }
63
64
  if source.to_string().contains("AWS_S3_ALLOW_UNSAFE_RENAME") =>
64
65
  {
65
66
  DeltaProtocolError::new_err(source.to_string())
66
67
  }
67
- _ => Error::new(exception::io_error(), err.to_string()),
68
+ _ => RbIOError::new_err(err.to_string()),
68
69
  }
69
70
  }
70
71
 
71
- fn arrow_to_rb(err: ArrowError) -> Error {
72
+ fn arrow_to_rb(err: ArrowError) -> RbErr {
72
73
  match err {
73
- ArrowError::IoError(msg, _) => Error::new(exception::io_error(), msg),
74
- ArrowError::DivideByZero => Error::new(exception::arg_error(), "division by zero"),
75
- ArrowError::InvalidArgumentError(msg) => Error::new(exception::arg_error(), msg),
76
- ArrowError::NotYetImplemented(msg) => Error::new(exception::not_imp_error(), msg),
74
+ ArrowError::IoError(msg, _) => RbIOError::new_err(msg),
75
+ ArrowError::DivideByZero => RbValueError::new_err("division by zero"),
76
+ ArrowError::InvalidArgumentError(msg) => RbValueError::new_err(msg),
77
+ ArrowError::NotYetImplemented(msg) => RbNotImplementedError::new_err(msg),
77
78
  ArrowError::SchemaError(msg) => SchemaMismatchError::new_err(msg),
78
- other => Error::new(exception::runtime_error(), other.to_string()),
79
+ other => RbException::new_err(other.to_string()),
80
+ }
81
+ }
82
+
83
+ fn checkpoint_to_rb(err: ProtocolError) -> RbErr {
84
+ match err {
85
+ ProtocolError::Arrow { source } => arrow_to_rb(source),
86
+ ProtocolError::ObjectStore { source } => object_store_to_rb(source),
87
+ ProtocolError::EndOfLog => DeltaProtocolError::new_err("End of log"),
88
+ ProtocolError::NoMetaData => DeltaProtocolError::new_err("Table metadata missing"),
89
+ ProtocolError::CheckpointNotFound => DeltaProtocolError::new_err(err.to_string()),
90
+ ProtocolError::InvalidField(err) => RbValueError::new_err(err),
91
+ ProtocolError::InvalidRow(err) => RbValueError::new_err(err),
92
+ ProtocolError::InvalidDeletionVectorStorageType(err) => RbValueError::new_err(err),
93
+ ProtocolError::SerializeOperation { source } => RbValueError::new_err(source.to_string()),
94
+ ProtocolError::ParquetParseError { source } => RbIOError::new_err(source.to_string()),
95
+ ProtocolError::IO { source } => RbIOError::new_err(source.to_string()),
96
+ ProtocolError::Generic(msg) => DeltaError::new_err(msg),
97
+ ProtocolError::Kernel { source } => DeltaError::new_err(source.to_string()),
79
98
  }
80
99
  }
81
100
 
82
101
  pub enum RubyError {
83
102
  DeltaTable(DeltaTableError),
103
+ Protocol(ProtocolError),
84
104
  }
85
105
 
86
106
  impl From<DeltaTableError> for RubyError {
@@ -89,10 +109,37 @@ impl From<DeltaTableError> for RubyError {
89
109
  }
90
110
  }
91
111
 
92
- impl From<RubyError> for Error {
112
+ impl From<ProtocolError> for RubyError {
113
+ fn from(err: ProtocolError) -> Self {
114
+ RubyError::Protocol(err)
115
+ }
116
+ }
117
+
118
+ impl From<RubyError> for RbErr {
93
119
  fn from(value: RubyError) -> Self {
94
120
  match value {
95
121
  RubyError::DeltaTable(err) => inner_to_rb_err(err),
122
+ RubyError::Protocol(err) => checkpoint_to_rb(err),
96
123
  }
97
124
  }
98
125
  }
126
+
127
+ macro_rules! create_builtin_exception {
128
+ ($type:ident, $class:expr) => {
129
+ pub struct $type {}
130
+
131
+ impl $type {
132
+ pub fn new_err<T>(message: T) -> RbErr
133
+ where
134
+ T: Into<Cow<'static, str>>,
135
+ {
136
+ RbErr::new($class, message)
137
+ }
138
+ }
139
+ };
140
+ }
141
+
142
+ create_builtin_exception!(RbException, exception::runtime_error());
143
+ create_builtin_exception!(RbIOError, exception::io_error());
144
+ create_builtin_exception!(RbNotImplementedError, exception::not_imp_error());
145
+ create_builtin_exception!(RbValueError, exception::arg_error());
@@ -0,0 +1,67 @@
1
+ use crate::{RbResult, RbValueError};
2
+ use deltalake::kernel::TableFeatures as KernelTableFeatures;
3
+ use magnus::{prelude::*, TryConvert, Value};
4
+
5
+ /// High level table features
6
+ #[derive(Clone)]
7
+ pub enum TableFeatures {
8
+ /// Mapping of one column to another
9
+ ColumnMapping,
10
+ /// Deletion vectors for merge, update, delete
11
+ DeletionVectors,
12
+ /// timestamps without timezone support
13
+ TimestampWithoutTimezone,
14
+ /// version 2 of checkpointing
15
+ V2Checkpoint,
16
+ /// Append Only Tables
17
+ AppendOnly,
18
+ /// Table invariants
19
+ Invariants,
20
+ /// Check constraints on columns
21
+ CheckConstraints,
22
+ /// CDF on a table
23
+ ChangeDataFeed,
24
+ /// Columns with generated values
25
+ GeneratedColumns,
26
+ /// ID Columns
27
+ IdentityColumns,
28
+ /// Row tracking on tables
29
+ RowTracking,
30
+ /// domain specific metadata
31
+ DomainMetadata,
32
+ /// Iceberg compatibility support
33
+ IcebergCompatV1,
34
+ }
35
+
36
+ impl From<TableFeatures> for KernelTableFeatures {
37
+ fn from(value: TableFeatures) -> Self {
38
+ match value {
39
+ TableFeatures::ColumnMapping => KernelTableFeatures::ColumnMapping,
40
+ TableFeatures::DeletionVectors => KernelTableFeatures::DeletionVectors,
41
+ TableFeatures::TimestampWithoutTimezone => {
42
+ KernelTableFeatures::TimestampWithoutTimezone
43
+ }
44
+ TableFeatures::V2Checkpoint => KernelTableFeatures::V2Checkpoint,
45
+ TableFeatures::AppendOnly => KernelTableFeatures::AppendOnly,
46
+ TableFeatures::Invariants => KernelTableFeatures::Invariants,
47
+ TableFeatures::CheckConstraints => KernelTableFeatures::CheckConstraints,
48
+ TableFeatures::ChangeDataFeed => KernelTableFeatures::ChangeDataFeed,
49
+ TableFeatures::GeneratedColumns => KernelTableFeatures::GeneratedColumns,
50
+ TableFeatures::IdentityColumns => KernelTableFeatures::IdentityColumns,
51
+ TableFeatures::RowTracking => KernelTableFeatures::RowTracking,
52
+ TableFeatures::DomainMetadata => KernelTableFeatures::DomainMetadata,
53
+ TableFeatures::IcebergCompatV1 => KernelTableFeatures::IcebergCompatV1,
54
+ }
55
+ }
56
+ }
57
+
58
+ impl TryConvert for TableFeatures {
59
+ fn try_convert(val: Value) -> RbResult<Self> {
60
+ // TODO add more features
61
+ let feature = match unsafe { val.to_r_string()?.as_str()? } {
62
+ "append_only" => TableFeatures::AppendOnly,
63
+ _ => return Err(RbValueError::new_err("Invalid feature")),
64
+ };
65
+ Ok(feature)
66
+ }
67
+ }