deltalake-rb 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +547 -314
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +3 -3
- data/lib/deltalake/table.rb +18 -13
- data/lib/deltalake/version.rb +1 -1
- metadata +3 -7
data/README.md
CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
|
|
21
21
|
Write data
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
df = Polars::DataFrame.new({"
|
25
|
-
DeltaLake.write("./
|
24
|
+
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
25
|
+
DeltaLake.write("./events", df)
|
26
26
|
```
|
27
27
|
|
28
28
|
Load a table
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
dt = DeltaLake::Table.new("./
|
32
|
-
|
31
|
+
dt = DeltaLake::Table.new("./events")
|
32
|
+
df = dt.to_polars
|
33
33
|
```
|
34
34
|
|
35
35
|
Get a lazy frame
|
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
|
|
41
41
|
Append rows
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
DeltaLake.write("./
|
44
|
+
DeltaLake.write("./events", df, mode: "append")
|
45
45
|
```
|
46
46
|
|
47
47
|
Overwrite a table
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
DeltaLake.write("./
|
50
|
+
DeltaLake.write("./events", df, mode: "overwrite")
|
51
51
|
```
|
52
52
|
|
53
53
|
Add a constraint
|
54
54
|
|
55
55
|
```ruby
|
56
|
-
dt.alter.add_constraint({"
|
56
|
+
dt.alter.add_constraint({"id_gt_0" => "id > 0"})
|
57
57
|
```
|
58
58
|
|
59
59
|
Drop a constraint
|
60
60
|
|
61
61
|
```ruby
|
62
|
-
dt.alter.drop_constraint("
|
62
|
+
dt.alter.drop_constraint("id_gt_0")
|
63
63
|
```
|
64
64
|
|
65
65
|
Delete rows
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
dt.delete("
|
68
|
+
dt.delete("id > 1")
|
69
69
|
```
|
70
70
|
|
71
71
|
Vacuum
|
@@ -83,13 +83,13 @@ dt.optimize.compact
|
|
83
83
|
Colocate similar data in the same files
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
dt.optimize.z_order(["
|
86
|
+
dt.optimize.z_order(["category"])
|
87
87
|
```
|
88
88
|
|
89
89
|
Load a previous version of a table
|
90
90
|
|
91
91
|
```ruby
|
92
|
-
dt = DeltaLake::Table.new("./
|
92
|
+
dt = DeltaLake::Table.new("./events", version: 1)
|
93
93
|
# or
|
94
94
|
dt.load_as_version(1)
|
95
95
|
```
|
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,8 +14,8 @@ crate-type = ["cdylib"]
|
|
14
14
|
arrow = { version = "53", features = ["ffi"] }
|
15
15
|
arrow-schema = { version = "53", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = "0.
|
18
|
-
deltalake = { version = "=0.
|
17
|
+
delta_kernel = "0.6"
|
18
|
+
deltalake = { version = "=0.23.0", features = ["azure", "datafusion", "gcs", "s3"] }
|
19
19
|
futures = "0.3"
|
20
20
|
magnus = "0.7"
|
21
21
|
num_cpus = "1"
|
data/lib/deltalake/table.rb
CHANGED
@@ -188,23 +188,28 @@ module DeltaLake
|
|
188
188
|
JSON.parse(metrics)
|
189
189
|
end
|
190
190
|
|
191
|
-
def to_polars(eager: true)
|
191
|
+
def to_polars(eager: true, rechunk: false, columns: nil)
|
192
192
|
require "polars-df"
|
193
193
|
|
194
194
|
sources = file_uris
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
195
|
+
if sources.empty?
|
196
|
+
lf = Polars::LazyFrame.new
|
197
|
+
else
|
198
|
+
delta_keys = [
|
199
|
+
"AWS_S3_ALLOW_UNSAFE_RENAME",
|
200
|
+
"AWS_S3_LOCKING_PROVIDER",
|
201
|
+
"CONDITIONAL_PUT",
|
202
|
+
"DELTA_DYNAMO_TABLE_NAME"
|
203
|
+
]
|
204
|
+
storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
|
205
|
+
lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
|
206
|
+
|
207
|
+
if columns
|
208
|
+
# by_name requires polars-df > 0.15.0
|
209
|
+
lf = lf.select(Polars.cs.by_name(*columns))
|
207
210
|
end
|
211
|
+
end
|
212
|
+
|
208
213
|
eager ? lf.collect : lf
|
209
214
|
end
|
210
215
|
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: rb_sys
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '0'
|
27
|
-
description:
|
28
26
|
email: andrew@ankane.org
|
29
27
|
executables: []
|
30
28
|
extensions:
|
@@ -59,7 +57,6 @@ homepage: https://github.com/ankane/delta-ruby
|
|
59
57
|
licenses:
|
60
58
|
- Apache-2.0
|
61
59
|
metadata: {}
|
62
|
-
post_install_message:
|
63
60
|
rdoc_options: []
|
64
61
|
require_paths:
|
65
62
|
- lib
|
@@ -74,8 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
71
|
- !ruby/object:Gem::Version
|
75
72
|
version: '0'
|
76
73
|
requirements: []
|
77
|
-
rubygems_version: 3.
|
78
|
-
signing_key:
|
74
|
+
rubygems_version: 3.6.2
|
79
75
|
specification_version: 4
|
80
76
|
summary: Delta Lake for Ruby
|
81
77
|
test_files: []
|