deltalake-rb 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,10 +11,11 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- arrow = { version = "52", features = ["ffi"] }
15
- arrow-schema = { version = "52", features = ["serde"] }
14
+ arrow = { version = "53", features = ["ffi"] }
15
+ arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
- deltalake = { version = "=0.21.0", features = ["datafusion", "s3"] }
17
+ delta_kernel = "0.4"
18
+ deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
18
19
  futures = "0.3"
19
20
  magnus = "0.7"
20
21
  num_cpus = "1"
@@ -1,6 +1,7 @@
1
1
  use arrow_schema::ArrowError;
2
+ use deltalake::protocol::ProtocolError;
2
3
  use deltalake::{errors::DeltaTableError, ObjectStoreError};
3
- use magnus::{exception, Error, Module, RModule, Ruby};
4
+ use magnus::{exception, Error as RbErr, Module, RModule, Ruby};
4
5
  use std::borrow::Cow;
5
6
 
6
7
  macro_rules! create_exception {
@@ -8,7 +9,7 @@ macro_rules! create_exception {
8
9
  pub struct $type {}
9
10
 
10
11
  impl $type {
11
- pub fn new_err<T>(message: T) -> Error
12
+ pub fn new_err<T>(message: T) -> RbErr
12
13
  where
13
14
  T: Into<Cow<'static, str>>,
14
15
  {
@@ -19,7 +20,7 @@ macro_rules! create_exception {
19
20
  .unwrap()
20
21
  .const_get($name)
21
22
  .unwrap();
22
- Error::new(class, message)
23
+ RbErr::new(class, message)
23
24
  }
24
25
  }
25
26
  };
@@ -31,7 +32,7 @@ create_exception!(DeltaProtocolError, "DeltaProtocolError");
31
32
  create_exception!(CommitFailedError, "CommitFailedError");
32
33
  create_exception!(SchemaMismatchError, "SchemaMismatchError");
33
34
 
34
- fn inner_to_rb_err(err: DeltaTableError) -> Error {
35
+ fn inner_to_rb_err(err: DeltaTableError) -> RbErr {
35
36
  match err {
36
37
  DeltaTableError::NotATable(msg) => TableNotFoundError::new_err(msg),
37
38
  DeltaTableError::InvalidTableLocation(msg) => TableNotFoundError::new_err(msg),
@@ -48,7 +49,7 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
48
49
 
49
50
  // ruby exceptions
50
51
  DeltaTableError::ObjectStore { source } => object_store_to_rb(source),
51
- DeltaTableError::Io { source } => Error::new(exception::io_error(), source.to_string()),
52
+ DeltaTableError::Io { source } => RbIOError::new_err(source.to_string()),
52
53
 
53
54
  DeltaTableError::Arrow { source } => arrow_to_rb(source),
54
55
 
@@ -56,31 +57,50 @@ fn inner_to_rb_err(err: DeltaTableError) -> Error {
56
57
  }
57
58
  }
58
59
 
59
- fn object_store_to_rb(err: ObjectStoreError) -> Error {
60
+ fn object_store_to_rb(err: ObjectStoreError) -> RbErr {
60
61
  match err {
61
- ObjectStoreError::NotFound { .. } => Error::new(exception::io_error(), err.to_string()),
62
+ ObjectStoreError::NotFound { .. } => RbIOError::new_err(err.to_string()),
62
63
  ObjectStoreError::Generic { source, .. }
63
64
  if source.to_string().contains("AWS_S3_ALLOW_UNSAFE_RENAME") =>
64
65
  {
65
66
  DeltaProtocolError::new_err(source.to_string())
66
67
  }
67
- _ => Error::new(exception::io_error(), err.to_string()),
68
+ _ => RbIOError::new_err(err.to_string()),
68
69
  }
69
70
  }
70
71
 
71
- fn arrow_to_rb(err: ArrowError) -> Error {
72
+ fn arrow_to_rb(err: ArrowError) -> RbErr {
72
73
  match err {
73
- ArrowError::IoError(msg, _) => Error::new(exception::io_error(), msg),
74
- ArrowError::DivideByZero => Error::new(exception::arg_error(), "division by zero"),
75
- ArrowError::InvalidArgumentError(msg) => Error::new(exception::arg_error(), msg),
76
- ArrowError::NotYetImplemented(msg) => Error::new(exception::not_imp_error(), msg),
74
+ ArrowError::IoError(msg, _) => RbIOError::new_err(msg),
75
+ ArrowError::DivideByZero => RbValueError::new_err("division by zero"),
76
+ ArrowError::InvalidArgumentError(msg) => RbValueError::new_err(msg),
77
+ ArrowError::NotYetImplemented(msg) => RbNotImplementedError::new_err(msg),
77
78
  ArrowError::SchemaError(msg) => SchemaMismatchError::new_err(msg),
78
- other => Error::new(exception::runtime_error(), other.to_string()),
79
+ other => RbException::new_err(other.to_string()),
80
+ }
81
+ }
82
+
83
+ fn checkpoint_to_rb(err: ProtocolError) -> RbErr {
84
+ match err {
85
+ ProtocolError::Arrow { source } => arrow_to_rb(source),
86
+ ProtocolError::ObjectStore { source } => object_store_to_rb(source),
87
+ ProtocolError::EndOfLog => DeltaProtocolError::new_err("End of log"),
88
+ ProtocolError::NoMetaData => DeltaProtocolError::new_err("Table metadata missing"),
89
+ ProtocolError::CheckpointNotFound => DeltaProtocolError::new_err(err.to_string()),
90
+ ProtocolError::InvalidField(err) => RbValueError::new_err(err),
91
+ ProtocolError::InvalidRow(err) => RbValueError::new_err(err),
92
+ ProtocolError::InvalidDeletionVectorStorageType(err) => RbValueError::new_err(err),
93
+ ProtocolError::SerializeOperation { source } => RbValueError::new_err(source.to_string()),
94
+ ProtocolError::ParquetParseError { source } => RbIOError::new_err(source.to_string()),
95
+ ProtocolError::IO { source } => RbIOError::new_err(source.to_string()),
96
+ ProtocolError::Generic(msg) => DeltaError::new_err(msg),
97
+ ProtocolError::Kernel { source } => DeltaError::new_err(source.to_string()),
79
98
  }
80
99
  }
81
100
 
82
101
  pub enum RubyError {
83
102
  DeltaTable(DeltaTableError),
103
+ Protocol(ProtocolError),
84
104
  }
85
105
 
86
106
  impl From<DeltaTableError> for RubyError {
@@ -89,10 +109,37 @@ impl From<DeltaTableError> for RubyError {
89
109
  }
90
110
  }
91
111
 
92
- impl From<RubyError> for Error {
112
+ impl From<ProtocolError> for RubyError {
113
+ fn from(err: ProtocolError) -> Self {
114
+ RubyError::Protocol(err)
115
+ }
116
+ }
117
+
118
+ impl From<RubyError> for RbErr {
93
119
  fn from(value: RubyError) -> Self {
94
120
  match value {
95
121
  RubyError::DeltaTable(err) => inner_to_rb_err(err),
122
+ RubyError::Protocol(err) => checkpoint_to_rb(err),
96
123
  }
97
124
  }
98
125
  }
126
+
127
+ macro_rules! create_builtin_exception {
128
+ ($type:ident, $class:expr) => {
129
+ pub struct $type {}
130
+
131
+ impl $type {
132
+ pub fn new_err<T>(message: T) -> RbErr
133
+ where
134
+ T: Into<Cow<'static, str>>,
135
+ {
136
+ RbErr::new($class, message)
137
+ }
138
+ }
139
+ };
140
+ }
141
+
142
+ create_builtin_exception!(RbException, exception::runtime_error());
143
+ create_builtin_exception!(RbIOError, exception::io_error());
144
+ create_builtin_exception!(RbNotImplementedError, exception::not_imp_error());
145
+ create_builtin_exception!(RbValueError, exception::arg_error());
@@ -0,0 +1,67 @@
1
+ use crate::{RbResult, RbValueError};
2
+ use deltalake::kernel::TableFeatures as KernelTableFeatures;
3
+ use magnus::{prelude::*, TryConvert, Value};
4
+
5
+ /// High level table features
6
+ #[derive(Clone)]
7
+ pub enum TableFeatures {
8
+ /// Mapping of one column to another
9
+ ColumnMapping,
10
+ /// Deletion vectors for merge, update, delete
11
+ DeletionVectors,
12
+ /// timestamps without timezone support
13
+ TimestampWithoutTimezone,
14
+ /// version 2 of checkpointing
15
+ V2Checkpoint,
16
+ /// Append Only Tables
17
+ AppendOnly,
18
+ /// Table invariants
19
+ Invariants,
20
+ /// Check constraints on columns
21
+ CheckConstraints,
22
+ /// CDF on a table
23
+ ChangeDataFeed,
24
+ /// Columns with generated values
25
+ GeneratedColumns,
26
+ /// ID Columns
27
+ IdentityColumns,
28
+ /// Row tracking on tables
29
+ RowTracking,
30
+ /// domain specific metadata
31
+ DomainMetadata,
32
+ /// Iceberg compatibility support
33
+ IcebergCompatV1,
34
+ }
35
+
36
+ impl From<TableFeatures> for KernelTableFeatures {
37
+ fn from(value: TableFeatures) -> Self {
38
+ match value {
39
+ TableFeatures::ColumnMapping => KernelTableFeatures::ColumnMapping,
40
+ TableFeatures::DeletionVectors => KernelTableFeatures::DeletionVectors,
41
+ TableFeatures::TimestampWithoutTimezone => {
42
+ KernelTableFeatures::TimestampWithoutTimezone
43
+ }
44
+ TableFeatures::V2Checkpoint => KernelTableFeatures::V2Checkpoint,
45
+ TableFeatures::AppendOnly => KernelTableFeatures::AppendOnly,
46
+ TableFeatures::Invariants => KernelTableFeatures::Invariants,
47
+ TableFeatures::CheckConstraints => KernelTableFeatures::CheckConstraints,
48
+ TableFeatures::ChangeDataFeed => KernelTableFeatures::ChangeDataFeed,
49
+ TableFeatures::GeneratedColumns => KernelTableFeatures::GeneratedColumns,
50
+ TableFeatures::IdentityColumns => KernelTableFeatures::IdentityColumns,
51
+ TableFeatures::RowTracking => KernelTableFeatures::RowTracking,
52
+ TableFeatures::DomainMetadata => KernelTableFeatures::DomainMetadata,
53
+ TableFeatures::IcebergCompatV1 => KernelTableFeatures::IcebergCompatV1,
54
+ }
55
+ }
56
+ }
57
+
58
+ impl TryConvert for TableFeatures {
59
+ fn try_convert(val: Value) -> RbResult<Self> {
60
+ // TODO add more features
61
+ let feature = match unsafe { val.to_r_string()?.as_str()? } {
62
+ "append_only" => TableFeatures::AppendOnly,
63
+ _ => return Err(RbValueError::new_err("Invalid feature")),
64
+ };
65
+ Ok(feature)
66
+ }
67
+ }