deltalake-rb 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,10 +11,11 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- arrow = { version = "52", features = ["ffi"] }
15
- arrow-schema = { version = "52", features = ["serde"] }
14
+ arrow = { version = "53", features = ["ffi"] }
15
+ arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
- deltalake = { version = "=0.21.0", features = ["datafusion", "s3"] }
17
+ delta_kernel = "0.4"
18
+ deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
18
19
  futures = "0.3"
19
20
  magnus = "0.7"
20
21
  num_cpus = "1"
@@ -1,6 +1,7 @@
1
1
  use arrow_schema::ArrowError;
2
+ use deltalake::protocol::ProtocolError;
2
3
  use deltalake::{errors::DeltaTableError, ObjectStoreError};
3
- use magnus::{exception, Error, Module, RModule, Ruby};
4
+ use magnus::{exception, Error as RbErr, Module, RModule, Ruby};
4
5
  use std::borrow::Cow;
5
6
 
6
7
  macro_rules! create_exception {
@@ -8,7 +9,7 @@ macro_rules! create_exception {
8
9
  pub struct $type {}
9
10
 
10
11
  impl $type {
11
- pub fn new_err<T>(message: T) -> Error
12
+ pub fn new_err<T>(message: T) -> RbErr
12
13
  where
13
14
  T: Into<Cow<'static, str>>,
14
15
  {
@@ -19,7 +20,7 @@ macro_rules! create_exception {
19
20
  .unwrap()
20
21
  .const_get($name)
21
22
  .unwrap();
22
- Error::new(class, message)
23
+ RbErr::new(class, message)
23
24
  }
24
25
  }
25
26
  };
@@ -31,7 +32,7 @@ create_exception!(DeltaProtocolError, "DeltaProtocolError");
31
32
  create_exception!(CommitFailedError, "CommitFailedError");
32
33
  create_exception!(SchemaMismatchError, "SchemaMismatchError");
33
34
 
34
- fn inner_to_rb_err(err: DeltaTableError) -> Error {
35
+ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
35
36
  match err {
36
37
  DeltaTableError::NotATable(msg) => TableNotFoundError::new_err(msg),
37
38
  DeltaTableError::InvalidTableLocation(msg) => TableNotFoundError::new_err(msg),
@@ -48,7 +49,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
48
49
 
49
50
  // ruby exceptions
50
51
  DeltaTableError::ObjectStore { source } => object_store_to_rb(source),
51
- DeltaTableError::Io { source } => Error::new(exception::io_error(), source.to_string()),
52
+ DeltaTableError::Io { source } => RbIOError::new_err(source.to_string()),
52
53
 
53
54
  DeltaTableError::Arrow { source } => arrow_to_rb(source),
54
55
 
@@ -56,31 +57,50 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
56
57
  }
57
58
  }
58
59
 
59
- fn object_store_to_rb(err: ObjectStoreError) -> Error {
60
+ fn object_store_to_rb(err: ObjectStoreError) -> RbErr {
60
61
  match err {
61
- ObjectStoreError::NotFound { .. } => Error::new(exception::io_error(), err.to_string()),
62
+ ObjectStoreError::NotFound { .. } => RbIOError::new_err(err.to_string()),
62
63
  ObjectStoreError::Generic { source, .. }
63
64
  if source.to_string().contains("AWS_S3_ALLOW_UNSAFE_RENAME") =>
64
65
  {
65
66
  DeltaProtocolError::new_err(source.to_string())
66
67
  }
67
- _ => Error::new(exception::io_error(), err.to_string()),
68
+ _ => RbIOError::new_err(err.to_string()),
68
69
  }
69
70
  }
70
71
 
71
- fn arrow_to_rb(err: ArrowError) -> Error {
72
+ fn arrow_to_rb(err: ArrowError) -> RbErr {
72
73
  match err {
73
- ArrowError::IoError(msg, _) => Error::new(exception::io_error(), msg),
74
- ArrowError::DivideByZero => Error::new(exception::arg_error(), "division by zero"),
75
- ArrowError::InvalidArgumentError(msg) => Error::new(exception::arg_error(), msg),
76
- ArrowError::NotYetImplemented(msg) => Error::new(exception::not_imp_error(), msg),
74
+ ArrowError::IoError(msg, _) => RbIOError::new_err(msg),
75
+ ArrowError::DivideByZero => RbValueError::new_err("division by zero"),
76
+ ArrowError::InvalidArgumentError(msg) => RbValueError::new_err(msg),
77
+ ArrowError::NotYetImplemented(msg) => RbNotImplementedError::new_err(msg),
77
78
  ArrowError::SchemaError(msg) => SchemaMismatchError::new_err(msg),
78
- other => Error::new(exception::runtime_error(), other.to_string()),
79
+ other => RbException::new_err(other.to_string()),
80
+ }
81
+ }
82
+
83
+ fn checkpoint_to_rb(err: ProtocolError) -> RbErr {
84
+ match err {
85
+ ProtocolError::Arrow { source } => arrow_to_rb(source),
86
+ ProtocolError::ObjectStore { source } => object_store_to_rb(source),
87
+ ProtocolError::EndOfLog => DeltaProtocolError::new_err("End of log"),
88
+ ProtocolError::NoMetaData => DeltaProtocolError::new_err("Table metadata missing"),
89
+ ProtocolError::CheckpointNotFound => DeltaProtocolError::new_err(err.to_string()),
90
+ ProtocolError::InvalidField(err) => RbValueError::new_err(err),
91
+ ProtocolError::InvalidRow(err) => RbValueError::new_err(err),
92
+ ProtocolError::InvalidDeletionVectorStorageType(err) => RbValueError::new_err(err),
93
+ ProtocolError::SerializeOperation { source } => RbValueError::new_err(source.to_string()),
94
+ ProtocolError::ParquetParseError { source } => RbIOError::new_err(source.to_string()),
95
+ ProtocolError::IO { source } => RbIOError::new_err(source.to_string()),
96
+ ProtocolError::Generic(msg) => DeltaError::new_err(msg),
97
+ ProtocolError::Kernel { source } => DeltaError::new_err(source.to_string()),
79
98
  }
80
99
  }
81
100
 
82
101
  pub enum RubyError {
83
102
  DeltaTable(DeltaTableError),
103
+ Protocol(ProtocolError),
84
104
  }
85
105
 
86
106
  impl From<DeltaTableError> for RubyError {
@@ -89,10 +109,37 @@ impl From<DeltaTableError> for RubyError {
89
109
  }
90
110
  }
91
111
 
92
- impl From<RubyError> for Error {
112
+ impl From<ProtocolError> for RubyError {
113
+ fn from(err: ProtocolError) -> Self {
114
+ RubyError::Protocol(err)
115
+ }
116
+ }
117
+
118
+ impl From<RubyError> for RbErr {
93
119
  fn from(value: RubyError) -> Self {
94
120
  match value {
95
121
  RubyError::DeltaTable(err) => inner_to_rb_err(err),
122
+ RubyError::Protocol(err) => checkpoint_to_rb(err),
96
123
  }
97
124
  }
98
125
  }
126
+
127
+ macro_rules! create_builtin_exception {
128
+ ($type:ident, $class:expr) => {
129
+ pub struct $type {}
130
+
131
+ impl $type {
132
+ pub fn new_err<T>(message: T) -> RbErr
133
+ where
134
+ T: Into<Cow<'static, str>>,
135
+ {
136
+ RbErr::new($class, message)
137
+ }
138
+ }
139
+ };
140
+ }
141
+
142
+ create_builtin_exception!(RbException, exception::runtime_error());
143
+ create_builtin_exception!(RbIOError, exception::io_error());
144
+ create_builtin_exception!(RbNotImplementedError, exception::not_imp_error());
145
+ create_builtin_exception!(RbValueError, exception::arg_error());
@@ -0,0 +1,67 @@
1
+ use crate::{RbResult, RbValueError};
2
+ use deltalake::kernel::TableFeatures as KernelTableFeatures;
3
+ use magnus::{prelude::*, TryConvert, Value};
4
+
5
+ /// High level table features
6
+ #[derive(Clone)]
7
+ pub enum TableFeatures {
8
+ /// Mapping of one column to another
9
+ ColumnMapping,
10
+ /// Deletion vectors for merge, update, delete
11
+ DeletionVectors,
12
+ /// timestamps without timezone support
13
+ TimestampWithoutTimezone,
14
+ /// version 2 of checkpointing
15
+ V2Checkpoint,
16
+ /// Append Only Tables
17
+ AppendOnly,
18
+ /// Table invariants
19
+ Invariants,
20
+ /// Check constraints on columns
21
+ CheckConstraints,
22
+ /// CDF on a table
23
+ ChangeDataFeed,
24
+ /// Columns with generated values
25
+ GeneratedColumns,
26
+ /// ID Columns
27
+ IdentityColumns,
28
+ /// Row tracking on tables
29
+ RowTracking,
30
+ /// domain specific metadata
31
+ DomainMetadata,
32
+ /// Iceberg compatibility support
33
+ IcebergCompatV1,
34
+ }
35
+
36
+ impl From<TableFeatures> for KernelTableFeatures {
37
+ fn from(value: TableFeatures) -> Self {
38
+ match value {
39
+ TableFeatures::ColumnMapping => KernelTableFeatures::ColumnMapping,
40
+ TableFeatures::DeletionVectors => KernelTableFeatures::DeletionVectors,
41
+ TableFeatures::TimestampWithoutTimezone => {
42
+ KernelTableFeatures::TimestampWithoutTimezone
43
+ }
44
+ TableFeatures::V2Checkpoint => KernelTableFeatures::V2Checkpoint,
45
+ TableFeatures::AppendOnly => KernelTableFeatures::AppendOnly,
46
+ TableFeatures::Invariants => KernelTableFeatures::Invariants,
47
+ TableFeatures::CheckConstraints => KernelTableFeatures::CheckConstraints,
48
+ TableFeatures::ChangeDataFeed => KernelTableFeatures::ChangeDataFeed,
49
+ TableFeatures::GeneratedColumns => KernelTableFeatures::GeneratedColumns,
50
+ TableFeatures::IdentityColumns => KernelTableFeatures::IdentityColumns,
51
+ TableFeatures::RowTracking => KernelTableFeatures::RowTracking,
52
+ TableFeatures::DomainMetadata => KernelTableFeatures::DomainMetadata,
53
+ TableFeatures::IcebergCompatV1 => KernelTableFeatures::IcebergCompatV1,
54
+ }
55
+ }
56
+ }
57
+
58
+ impl TryConvert for TableFeatures {
59
+ fn try_convert(val: Value) -> RbResult<Self> {
60
+ // TODO add more features
61
+ let feature = match unsafe { val.to_r_string()?.as_str()? } {
62
+ "append_only" => TableFeatures::AppendOnly,
63
+ _ => return Err(RbValueError::new_err("Invalid feature")),
64
+ };
65
+ Ok(feature)
66
+ }
67
+ }