deltalake-rb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Cargo.toml ADDED
@@ -0,0 +1,6 @@
1
+ [workspace]
2
+ members = ["ext/deltalake"]
3
+ resolver = "2"
4
+
5
+ [profile.release]
6
+ strip = true
data/LICENSE.txt ADDED
@@ -0,0 +1,179 @@
1
+ Copyright (2020) QP Hou and a number of other contributors. All rights reserved.
2
+
3
+
4
+ Apache License
5
+ Version 2.0, January 2004
6
+ http://www.apache.org/licenses/
7
+
8
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
9
+
10
+ 1. Definitions.
11
+
12
+ "License" shall mean the terms and conditions for use, reproduction,
13
+ and distribution as defined by Sections 1 through 9 of this document.
14
+
15
+ "Licensor" shall mean the copyright owner or entity authorized by
16
+ the copyright owner that is granting the License.
17
+
18
+ "Legal Entity" shall mean the union of the acting entity and all
19
+ other entities that control, are controlled by, or are under common
20
+ control with that entity. For the purposes of this definition,
21
+ "control" means (i) the power, direct or indirect, to cause the
22
+ direction or management of such entity, whether by contract or
23
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
24
+ outstanding shares, or (iii) beneficial ownership of such entity.
25
+
26
+ "You" (or "Your") shall mean an individual or Legal Entity
27
+ exercising permissions granted by this License.
28
+
29
+ "Source" form shall mean the preferred form for making modifications,
30
+ including but not limited to software source code, documentation
31
+ source, and configuration files.
32
+
33
+ "Object" form shall mean any form resulting from mechanical
34
+ transformation or translation of a Source form, including but
35
+ not limited to compiled object code, generated documentation,
36
+ and conversions to other media types.
37
+
38
+ "Work" shall mean the work of authorship, whether in Source or
39
+ Object form, made available under the License, as indicated by a
40
+ copyright notice that is included in or attached to the work
41
+ (an example is provided in the Appendix below).
42
+
43
+ "Derivative Works" shall mean any work, whether in Source or Object
44
+ form, that is based on (or derived from) the Work and for which the
45
+ editorial revisions, annotations, elaborations, or other modifications
46
+ represent, as a whole, an original work of authorship. For the purposes
47
+ of this License, Derivative Works shall not include works that remain
48
+ separable from, or merely link (or bind by name) to the interfaces of,
49
+ the Work and Derivative Works thereof.
50
+
51
+ "Contribution" shall mean any work of authorship, including
52
+ the original version of the Work and any modifications or additions
53
+ to that Work or Derivative Works thereof, that is intentionally
54
+ submitted to Licensor for inclusion in the Work by the copyright owner
55
+ or by an individual or Legal Entity authorized to submit on behalf of
56
+ the copyright owner. For the purposes of this definition, "submitted"
57
+ means any form of electronic, verbal, or written communication sent
58
+ to the Licensor or its representatives, including but not limited to
59
+ communication on electronic mailing lists, source code control systems,
60
+ and issue tracking systems that are managed by, or on behalf of, the
61
+ Licensor for the purpose of discussing and improving the Work, but
62
+ excluding communication that is conspicuously marked or otherwise
63
+ designated in writing by the copyright owner as "Not a Contribution."
64
+
65
+ "Contributor" shall mean Licensor and any individual or Legal Entity
66
+ on behalf of whom a Contribution has been received by Licensor and
67
+ subsequently incorporated within the Work.
68
+
69
+ 2. Grant of Copyright License. Subject to the terms and conditions of
70
+ this License, each Contributor hereby grants to You a perpetual,
71
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
72
+ copyright license to reproduce, prepare Derivative Works of,
73
+ publicly display, publicly perform, sublicense, and distribute the
74
+ Work and such Derivative Works in Source or Object form.
75
+
76
+ 3. Grant of Patent License. Subject to the terms and conditions of
77
+ this License, each Contributor hereby grants to You a perpetual,
78
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
79
+ (except as stated in this section) patent license to make, have made,
80
+ use, offer to sell, sell, import, and otherwise transfer the Work,
81
+ where such license applies only to those patent claims licensable
82
+ by such Contributor that are necessarily infringed by their
83
+ Contribution(s) alone or by combination of their Contribution(s)
84
+ with the Work to which such Contribution(s) was submitted. If You
85
+ institute patent litigation against any entity (including a
86
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
87
+ or a Contribution incorporated within the Work constitutes direct
88
+ or contributory patent infringement, then any patent licenses
89
+ granted to You under this License for that Work shall terminate
90
+ as of the date such litigation is filed.
91
+
92
+ 4. Redistribution. You may reproduce and distribute copies of the
93
+ Work or Derivative Works thereof in any medium, with or without
94
+ modifications, and in Source or Object form, provided that You
95
+ meet the following conditions:
96
+
97
+ (a) You must give any other recipients of the Work or
98
+ Derivative Works a copy of this License; and
99
+
100
+ (b) You must cause any modified files to carry prominent notices
101
+ stating that You changed the files; and
102
+
103
+ (c) You must retain, in the Source form of any Derivative Works
104
+ that You distribute, all copyright, patent, trademark, and
105
+ attribution notices from the Source form of the Work,
106
+ excluding those notices that do not pertain to any part of
107
+ the Derivative Works; and
108
+
109
+ (d) If the Work includes a "NOTICE" text file as part of its
110
+ distribution, then any Derivative Works that You distribute must
111
+ include a readable copy of the attribution notices contained
112
+ within such NOTICE file, excluding those notices that do not
113
+ pertain to any part of the Derivative Works, in at least one
114
+ of the following places: within a NOTICE text file distributed
115
+ as part of the Derivative Works; within the Source form or
116
+ documentation, if provided along with the Derivative Works; or,
117
+ within a display generated by the Derivative Works, if and
118
+ wherever such third-party notices normally appear. The contents
119
+ of the NOTICE file are for informational purposes only and
120
+ do not modify the License. You may add Your own attribution
121
+ notices within Derivative Works that You distribute, alongside
122
+ or as an addendum to the NOTICE text from the Work, provided
123
+ that such additional attribution notices cannot be construed
124
+ as modifying the License.
125
+
126
+ You may add Your own copyright statement to Your modifications and
127
+ may provide additional or different license terms and conditions
128
+ for use, reproduction, or distribution of Your modifications, or
129
+ for any such Derivative Works as a whole, provided Your use,
130
+ reproduction, and distribution of the Work otherwise complies with
131
+ the conditions stated in this License.
132
+
133
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
134
+ any Contribution intentionally submitted for inclusion in the Work
135
+ by You to the Licensor shall be under the terms and conditions of
136
+ this License, without any additional terms or conditions.
137
+ Notwithstanding the above, nothing herein shall supersede or modify
138
+ the terms of any separate license agreement you may have executed
139
+ with Licensor regarding such Contributions.
140
+
141
+ 6. Trademarks. This License does not grant permission to use the trade
142
+ names, trademarks, service marks, or product names of the Licensor,
143
+ except as required for reasonable and customary use in describing the
144
+ origin of the Work and reproducing the content of the NOTICE file.
145
+
146
+ 7. Disclaimer of Warranty. Unless required by applicable law or
147
+ agreed to in writing, Licensor provides the Work (and each
148
+ Contributor provides its Contributions) on an "AS IS" BASIS,
149
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
150
+ implied, including, without limitation, any warranties or conditions
151
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
152
+ PARTICULAR PURPOSE. You are solely responsible for determining the
153
+ appropriateness of using or redistributing the Work and assume any
154
+ risks associated with Your exercise of permissions under this License.
155
+
156
+ 8. Limitation of Liability. In no event and under no legal theory,
157
+ whether in tort (including negligence), contract, or otherwise,
158
+ unless required by applicable law (such as deliberate and grossly
159
+ negligent acts) or agreed to in writing, shall any Contributor be
160
+ liable to You for damages, including any direct, indirect, special,
161
+ incidental, or consequential damages of any character arising as a
162
+ result of this License or out of the use or inability to use the
163
+ Work (including but not limited to damages for loss of goodwill,
164
+ work stoppage, computer failure or malfunction, or any and all
165
+ other commercial damages or losses), even if such Contributor
166
+ has been advised of the possibility of such damages.
167
+
168
+ 9. Accepting Warranty or Additional Liability. While redistributing
169
+ the Work or Derivative Works thereof, You may choose to offer,
170
+ and charge a fee for, acceptance of support, warranty, indemnity,
171
+ or other liability obligations and/or rights consistent with this
172
+ License. However, in accepting such obligations, You may act only
173
+ on Your own behalf and on Your sole responsibility, not on behalf
174
+ of any other Contributor, and only if You agree to indemnify,
175
+ defend, and hold each Contributor harmless for any liability
176
+ incurred by, or claims asserted against, such Contributor by reason
177
+ of your accepting any such warranty or additional liability.
178
+
179
+ END OF TERMS AND CONDITIONS
data/README.md ADDED
@@ -0,0 +1,110 @@
1
+ # delta-ruby
2
+
3
+ [Delta Lake](https://delta.io/) for Ruby
4
+
5
+ Supports local files and Amazon S3
6
+
7
+ [![Build Status](https://github.com/ankane/delta-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/delta-ruby/actions)
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application’s Gemfile:
12
+
13
+ ```ruby
14
+ gem "deltalake-rb"
15
+ ```
16
+
17
+ It can take a few minutes to compile the gem.
18
+
19
+ ## Getting Started
20
+
21
+ Write data
22
+
23
+ ```ruby
24
+ df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
+ DeltaLake.write("./data/delta", df)
26
+ ```
27
+
28
+ Load a table
29
+
30
+ ```ruby
31
+ dt = DeltaLake::Table.new("./data/delta")
32
+ df2 = dt.to_polars
33
+ ```
34
+
35
+ Get a lazy frame
36
+
37
+ ```ruby
38
+ lf = dt.to_polars(eager: false)
39
+ ```
40
+
41
+ Append rows
42
+
43
+ ```ruby
44
+ DeltaLake.write("./data/delta", df, mode: "append")
45
+ ```
46
+
47
+ Overwrite a table
48
+
49
+ ```ruby
50
+ DeltaLake.write("./data/delta", df, mode: "overwrite")
51
+ ```
52
+
53
+ Delete rows
54
+
55
+ ```ruby
56
+ dt.delete("a > 1")
57
+ ```
58
+
59
+ Vacuum
60
+
61
+ ```ruby
62
+ dt.vacuum(dry_run: false)
63
+ ```
64
+
65
+ Load a previous version of a table
66
+
67
+ ```ruby
68
+ dt = DeltaLake::Table.new("./data/delta", version: 1)
69
+ # or
70
+ dt.load_as_version(1)
71
+ ```
72
+
73
+ Get metadata
74
+
75
+ ```ruby
76
+ dt.metadata
77
+ ```
78
+
79
+ Get the schema
80
+
81
+ ```ruby
82
+ dt.schema
83
+ ```
84
+
85
+ ## API
86
+
87
+ This library follows the [Delta Lake Python API](https://delta-io.github.io/delta-rs/) (with a few changes to make it more Ruby-like). You can follow Python tutorials and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
88
+
89
+ ## History
90
+
91
+ View the [changelog](https://github.com/ankane/delta-ruby/blob/master/CHANGELOG.md)
92
+
93
+ ## Contributing
94
+
95
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
96
+
97
+ - [Report bugs](https://github.com/ankane/delta-ruby/issues)
98
+ - Fix bugs and [submit pull requests](https://github.com/ankane/delta-ruby/pulls)
99
+ - Write, clarify, or fix documentation
100
+ - Suggest or add new features
101
+
102
+ To get started with development:
103
+
104
+ ```sh
105
+ git clone https://github.com/ankane/delta-ruby.git
106
+ cd delta-ruby
107
+ bundle install
108
+ bundle exec rake compile
109
+ bundle exec rake test
110
+ ```
@@ -0,0 +1,21 @@
1
+ [package]
2
+ name = "deltalake"
3
+ version = "0.1.0"
4
+ license = "Apache-2.0"
5
+ authors = ["Andrew Kane <andrew@ankane.org>"]
6
+ edition = "2021"
7
+ rust-version = "1.70.0"
8
+ publish = false
9
+
10
+ [lib]
11
+ crate-type = ["cdylib"]
12
+
13
+ [dependencies]
14
+ arrow = { version = "52", features = ["ffi"] }
15
+ arrow-schema = { version = "52", features = ["serde"] }
16
+ chrono = "0.4"
17
+ deltalake = { version = "=0.21.0", features = ["datafusion", "s3"] }
18
+ magnus = "0.7"
19
+ serde = "1"
20
+ serde_json = "1"
21
+ tokio = { version = "1", features = ["rt-multi-thread"] }
@@ -0,0 +1,4 @@
1
+ require "mkmf"
2
+ require "rb_sys/mkmf"
3
+
4
+ create_rust_makefile("deltalake/deltalake")
@@ -0,0 +1,98 @@
1
+ use arrow_schema::ArrowError;
2
+ use deltalake::{errors::DeltaTableError, ObjectStoreError};
3
+ use magnus::{exception, Error, Module, RModule, Ruby};
4
+ use std::borrow::Cow;
5
+
6
+ macro_rules! create_exception {
7
+ ($type:ident, $name:expr) => {
8
+ pub struct $type {}
9
+
10
+ impl $type {
11
+ pub fn new_err<T>(message: T) -> Error
12
+ where
13
+ T: Into<Cow<'static, str>>,
14
+ {
15
+ let class = Ruby::get()
16
+ .unwrap()
17
+ .class_object()
18
+ .const_get::<_, RModule>("DeltaLake")
19
+ .unwrap()
20
+ .const_get($name)
21
+ .unwrap();
22
+ Error::new(class, message)
23
+ }
24
+ }
25
+ };
26
+ }
27
+
28
+ create_exception!(DeltaError, "Error");
29
+ create_exception!(TableNotFoundError, "TableNotFoundError");
30
+ create_exception!(DeltaProtocolError, "DeltaProtocolError");
31
+ create_exception!(CommitFailedError, "CommitFailedError");
32
+ create_exception!(SchemaMismatchError, "SchemaMismatchError");
33
+
34
+ fn inner_to_rb_err(err: DeltaTableError) -> Error {
35
+ match err {
36
+ DeltaTableError::NotATable(msg) => TableNotFoundError::new_err(msg),
37
+ DeltaTableError::InvalidTableLocation(msg) => TableNotFoundError::new_err(msg),
38
+
39
+ // protocol errors
40
+ DeltaTableError::InvalidJsonLog { .. } => DeltaProtocolError::new_err(err.to_string()),
41
+ DeltaTableError::InvalidStatsJson { .. } => DeltaProtocolError::new_err(err.to_string()),
42
+ DeltaTableError::InvalidData { violations } => {
43
+ DeltaProtocolError::new_err(format!("Invariant violations: {:?}", violations))
44
+ }
45
+
46
+ // commit errors
47
+ DeltaTableError::Transaction { source } => CommitFailedError::new_err(source.to_string()),
48
+
49
+ // ruby exceptions
50
+ DeltaTableError::ObjectStore { source } => object_store_to_rb(source),
51
+ DeltaTableError::Io { source } => Error::new(exception::io_error(), source.to_string()),
52
+
53
+ DeltaTableError::Arrow { source } => arrow_to_rb(source),
54
+
55
+ _ => DeltaError::new_err(err.to_string()),
56
+ }
57
+ }
58
+
59
+ fn object_store_to_rb(err: ObjectStoreError) -> Error {
60
+ match err {
61
+ ObjectStoreError::NotFound { .. } => Error::new(exception::io_error(), err.to_string()),
62
+ ObjectStoreError::Generic { source, .. }
63
+ if source.to_string().contains("AWS_S3_ALLOW_UNSAFE_RENAME") =>
64
+ {
65
+ DeltaProtocolError::new_err(source.to_string())
66
+ }
67
+ _ => Error::new(exception::io_error(), err.to_string()),
68
+ }
69
+ }
70
+
71
+ fn arrow_to_rb(err: ArrowError) -> Error {
72
+ match err {
73
+ ArrowError::IoError(msg, _) => Error::new(exception::io_error(), msg),
74
+ ArrowError::DivideByZero => Error::new(exception::arg_error(), "division by zero"),
75
+ ArrowError::InvalidArgumentError(msg) => Error::new(exception::arg_error(), msg),
76
+ ArrowError::NotYetImplemented(msg) => Error::new(exception::not_imp_error(), msg),
77
+ ArrowError::SchemaError(msg) => SchemaMismatchError::new_err(msg),
78
+ other => Error::new(exception::runtime_error(), other.to_string()),
79
+ }
80
+ }
81
+
82
+ pub enum RubyError {
83
+ DeltaTable(DeltaTableError),
84
+ }
85
+
86
+ impl From<DeltaTableError> for RubyError {
87
+ fn from(err: DeltaTableError) -> Self {
88
+ RubyError::DeltaTable(err)
89
+ }
90
+ }
91
+
92
+ impl From<RubyError> for Error {
93
+ fn from(value: RubyError) -> Self {
94
+ match value {
95
+ RubyError::DeltaTable(err) => inner_to_rb_err(err),
96
+ }
97
+ }
98
+ }