deltalake-rb 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +8 -8
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +2 -2
- data/lib/deltalake/table.rb +18 -13
- data/lib/deltalake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74cdc17a6f4237378f874c7c2142ef5bcd233d7da9273296c5e2dbb24d925add
|
4
|
+
data.tar.gz: ddf786922725bddc6adeadeaa9e734d3c3cd974ba009e319740393ade76a44a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d97802a6b1443cd1b85802fae78e81eff9657fe2b524631cc5a3cb4a09d18f01c408af388f52bad37a23b78dbc4816e605ae3690ef013ec904b6e47fd0a88ee0
|
7
|
+
data.tar.gz: fe3ffa3a6babd7fd154670ab587c77febb3be4f801d2a5421be373b6c165a198904f50980df7ef79e7dab155b01dddac6cba52a7f1d07225087462812c2996f0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1572,13 +1572,13 @@ dependencies = [
|
|
1572
1572
|
|
1573
1573
|
[[package]]
|
1574
1574
|
name = "deltalake"
|
1575
|
-
version = "0.1.
|
1575
|
+
version = "0.1.3"
|
1576
1576
|
dependencies = [
|
1577
1577
|
"arrow",
|
1578
1578
|
"arrow-schema",
|
1579
1579
|
"chrono",
|
1580
1580
|
"delta_kernel",
|
1581
|
-
"deltalake 0.22.
|
1581
|
+
"deltalake 0.22.3",
|
1582
1582
|
"futures",
|
1583
1583
|
"magnus",
|
1584
1584
|
"num_cpus",
|
@@ -1589,9 +1589,9 @@ dependencies = [
|
|
1589
1589
|
|
1590
1590
|
[[package]]
|
1591
1591
|
name = "deltalake"
|
1592
|
-
version = "0.22.
|
1592
|
+
version = "0.22.3"
|
1593
1593
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1594
|
-
checksum = "
|
1594
|
+
checksum = "9e289d4760a09c95d44b978363b4466b0aacf7c82561e773f2f12eb5bbf257e9"
|
1595
1595
|
dependencies = [
|
1596
1596
|
"deltalake-aws",
|
1597
1597
|
"deltalake-azure",
|
@@ -3174,18 +3174,18 @@ dependencies = [
|
|
3174
3174
|
|
3175
3175
|
[[package]]
|
3176
3176
|
name = "rb-sys"
|
3177
|
-
version = "0.9.
|
3177
|
+
version = "0.9.105"
|
3178
3178
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3179
|
-
checksum = "
|
3179
|
+
checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
|
3180
3180
|
dependencies = [
|
3181
3181
|
"rb-sys-build",
|
3182
3182
|
]
|
3183
3183
|
|
3184
3184
|
[[package]]
|
3185
3185
|
name = "rb-sys-build"
|
3186
|
-
version = "0.9.
|
3186
|
+
version = "0.9.105"
|
3187
3187
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3188
|
-
checksum = "
|
3188
|
+
checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
|
3189
3189
|
dependencies = [
|
3190
3190
|
"bindgen",
|
3191
3191
|
"lazy_static",
|
data/README.md
CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
|
|
21
21
|
Write data
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
df = Polars::DataFrame.new({"
|
25
|
-
DeltaLake.write("./
|
24
|
+
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
25
|
+
DeltaLake.write("./events", df)
|
26
26
|
```
|
27
27
|
|
28
28
|
Load a table
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
dt = DeltaLake::Table.new("./
|
32
|
-
|
31
|
+
dt = DeltaLake::Table.new("./events")
|
32
|
+
df = dt.to_polars
|
33
33
|
```
|
34
34
|
|
35
35
|
Get a lazy frame
|
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
|
|
41
41
|
Append rows
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
DeltaLake.write("./
|
44
|
+
DeltaLake.write("./events", df, mode: "append")
|
45
45
|
```
|
46
46
|
|
47
47
|
Overwrite a table
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
DeltaLake.write("./
|
50
|
+
DeltaLake.write("./events", df, mode: "overwrite")
|
51
51
|
```
|
52
52
|
|
53
53
|
Add a constraint
|
54
54
|
|
55
55
|
```ruby
|
56
|
-
dt.alter.add_constraint({"
|
56
|
+
dt.alter.add_constraint({"id_gt_0" => "id > 0"})
|
57
57
|
```
|
58
58
|
|
59
59
|
Drop a constraint
|
60
60
|
|
61
61
|
```ruby
|
62
|
-
dt.alter.drop_constraint("
|
62
|
+
dt.alter.drop_constraint("id_gt_0")
|
63
63
|
```
|
64
64
|
|
65
65
|
Delete rows
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
dt.delete("
|
68
|
+
dt.delete("id > 1")
|
69
69
|
```
|
70
70
|
|
71
71
|
Vacuum
|
@@ -83,13 +83,13 @@ dt.optimize.compact
|
|
83
83
|
Colocate similar data in the same files
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
dt.optimize.z_order(["
|
86
|
+
dt.optimize.z_order(["category"])
|
87
87
|
```
|
88
88
|
|
89
89
|
Load a previous version of a table
|
90
90
|
|
91
91
|
```ruby
|
92
|
-
dt = DeltaLake::Table.new("./
|
92
|
+
dt = DeltaLake::Table.new("./events", version: 1)
|
93
93
|
# or
|
94
94
|
dt.load_as_version(1)
|
95
95
|
```
|
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.3"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,7 +15,7 @@ arrow = { version = "53", features = ["ffi"] }
|
|
15
15
|
arrow-schema = { version = "53", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
17
|
delta_kernel = "0.4"
|
18
|
-
deltalake = { version = "=0.22.
|
18
|
+
deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
|
19
19
|
futures = "0.3"
|
20
20
|
magnus = "0.7"
|
21
21
|
num_cpus = "1"
|
data/lib/deltalake/table.rb
CHANGED
@@ -188,23 +188,28 @@ module DeltaLake
|
|
188
188
|
JSON.parse(metrics)
|
189
189
|
end
|
190
190
|
|
191
|
-
def to_polars(eager: true)
|
191
|
+
def to_polars(eager: true, rechunk: false, columns: nil)
|
192
192
|
require "polars-df"
|
193
193
|
|
194
194
|
sources = file_uris
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
195
|
+
if sources.empty?
|
196
|
+
lf = Polars::LazyFrame.new
|
197
|
+
else
|
198
|
+
delta_keys = [
|
199
|
+
"AWS_S3_ALLOW_UNSAFE_RENAME",
|
200
|
+
"AWS_S3_LOCKING_PROVIDER",
|
201
|
+
"CONDITIONAL_PUT",
|
202
|
+
"DELTA_DYNAMO_TABLE_NAME"
|
203
|
+
]
|
204
|
+
storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
|
205
|
+
lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
|
206
|
+
|
207
|
+
if columns
|
208
|
+
# by_name requires polars-df > 0.15.0
|
209
|
+
lf = lf.select(Polars.cs.by_name(*columns))
|
207
210
|
end
|
211
|
+
end
|
212
|
+
|
208
213
|
eager ? lf.collect : lf
|
209
214
|
end
|
210
215
|
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|