deltalake-rb 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +8 -8
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +2 -2
- data/lib/deltalake/table.rb +18 -13
- data/lib/deltalake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74cdc17a6f4237378f874c7c2142ef5bcd233d7da9273296c5e2dbb24d925add
|
4
|
+
data.tar.gz: ddf786922725bddc6adeadeaa9e734d3c3cd974ba009e319740393ade76a44a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d97802a6b1443cd1b85802fae78e81eff9657fe2b524631cc5a3cb4a09d18f01c408af388f52bad37a23b78dbc4816e605ae3690ef013ec904b6e47fd0a88ee0
|
7
|
+
data.tar.gz: fe3ffa3a6babd7fd154670ab587c77febb3be4f801d2a5421be373b6c165a198904f50980df7ef79e7dab155b01dddac6cba52a7f1d07225087462812c2996f0
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1572,13 +1572,13 @@ dependencies = [
|
|
1572
1572
|
|
1573
1573
|
[[package]]
|
1574
1574
|
name = "deltalake"
|
1575
|
-
version = "0.1.
|
1575
|
+
version = "0.1.3"
|
1576
1576
|
dependencies = [
|
1577
1577
|
"arrow",
|
1578
1578
|
"arrow-schema",
|
1579
1579
|
"chrono",
|
1580
1580
|
"delta_kernel",
|
1581
|
-
"deltalake 0.22.
|
1581
|
+
"deltalake 0.22.3",
|
1582
1582
|
"futures",
|
1583
1583
|
"magnus",
|
1584
1584
|
"num_cpus",
|
@@ -1589,9 +1589,9 @@ dependencies = [
|
|
1589
1589
|
|
1590
1590
|
[[package]]
|
1591
1591
|
name = "deltalake"
|
1592
|
-
version = "0.22.
|
1592
|
+
version = "0.22.3"
|
1593
1593
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1594
|
-
checksum = "
|
1594
|
+
checksum = "9e289d4760a09c95d44b978363b4466b0aacf7c82561e773f2f12eb5bbf257e9"
|
1595
1595
|
dependencies = [
|
1596
1596
|
"deltalake-aws",
|
1597
1597
|
"deltalake-azure",
|
@@ -3174,18 +3174,18 @@ dependencies = [
|
|
3174
3174
|
|
3175
3175
|
[[package]]
|
3176
3176
|
name = "rb-sys"
|
3177
|
-
version = "0.9.
|
3177
|
+
version = "0.9.105"
|
3178
3178
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3179
|
-
checksum = "
|
3179
|
+
checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
|
3180
3180
|
dependencies = [
|
3181
3181
|
"rb-sys-build",
|
3182
3182
|
]
|
3183
3183
|
|
3184
3184
|
[[package]]
|
3185
3185
|
name = "rb-sys-build"
|
3186
|
-
version = "0.9.
|
3186
|
+
version = "0.9.105"
|
3187
3187
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3188
|
-
checksum = "
|
3188
|
+
checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
|
3189
3189
|
dependencies = [
|
3190
3190
|
"bindgen",
|
3191
3191
|
"lazy_static",
|
data/README.md
CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
|
|
21
21
|
Write data
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
df = Polars::DataFrame.new({"
|
25
|
-
DeltaLake.write("./
|
24
|
+
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
25
|
+
DeltaLake.write("./events", df)
|
26
26
|
```
|
27
27
|
|
28
28
|
Load a table
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
dt = DeltaLake::Table.new("./
|
32
|
-
|
31
|
+
dt = DeltaLake::Table.new("./events")
|
32
|
+
df = dt.to_polars
|
33
33
|
```
|
34
34
|
|
35
35
|
Get a lazy frame
|
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
|
|
41
41
|
Append rows
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
DeltaLake.write("./
|
44
|
+
DeltaLake.write("./events", df, mode: "append")
|
45
45
|
```
|
46
46
|
|
47
47
|
Overwrite a table
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
DeltaLake.write("./
|
50
|
+
DeltaLake.write("./events", df, mode: "overwrite")
|
51
51
|
```
|
52
52
|
|
53
53
|
Add a constraint
|
54
54
|
|
55
55
|
```ruby
|
56
|
-
dt.alter.add_constraint({"
|
56
|
+
dt.alter.add_constraint({"id_gt_0" => "id > 0"})
|
57
57
|
```
|
58
58
|
|
59
59
|
Drop a constraint
|
60
60
|
|
61
61
|
```ruby
|
62
|
-
dt.alter.drop_constraint("
|
62
|
+
dt.alter.drop_constraint("id_gt_0")
|
63
63
|
```
|
64
64
|
|
65
65
|
Delete rows
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
dt.delete("
|
68
|
+
dt.delete("id > 1")
|
69
69
|
```
|
70
70
|
|
71
71
|
Vacuum
|
@@ -83,13 +83,13 @@ dt.optimize.compact
|
|
83
83
|
Colocate similar data in the same files
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
dt.optimize.z_order(["
|
86
|
+
dt.optimize.z_order(["category"])
|
87
87
|
```
|
88
88
|
|
89
89
|
Load a previous version of a table
|
90
90
|
|
91
91
|
```ruby
|
92
|
-
dt = DeltaLake::Table.new("./
|
92
|
+
dt = DeltaLake::Table.new("./events", version: 1)
|
93
93
|
# or
|
94
94
|
dt.load_as_version(1)
|
95
95
|
```
|
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.3"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,7 +15,7 @@ arrow = { version = "53", features = ["ffi"] }
|
|
15
15
|
arrow-schema = { version = "53", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
17
|
delta_kernel = "0.4"
|
18
|
-
deltalake = { version = "=0.22.
|
18
|
+
deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
|
19
19
|
futures = "0.3"
|
20
20
|
magnus = "0.7"
|
21
21
|
num_cpus = "1"
|
data/lib/deltalake/table.rb
CHANGED
@@ -188,23 +188,28 @@ module DeltaLake
|
|
188
188
|
JSON.parse(metrics)
|
189
189
|
end
|
190
190
|
|
191
|
-
def to_polars(eager: true)
|
191
|
+
def to_polars(eager: true, rechunk: false, columns: nil)
|
192
192
|
require "polars-df"
|
193
193
|
|
194
194
|
sources = file_uris
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
195
|
+
if sources.empty?
|
196
|
+
lf = Polars::LazyFrame.new
|
197
|
+
else
|
198
|
+
delta_keys = [
|
199
|
+
"AWS_S3_ALLOW_UNSAFE_RENAME",
|
200
|
+
"AWS_S3_LOCKING_PROVIDER",
|
201
|
+
"CONDITIONAL_PUT",
|
202
|
+
"DELTA_DYNAMO_TABLE_NAME"
|
203
|
+
]
|
204
|
+
storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
|
205
|
+
lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
|
206
|
+
|
207
|
+
if columns
|
208
|
+
# by_name requires polars-df > 0.15.0
|
209
|
+
lf = lf.select(Polars.cs.by_name(*columns))
|
207
210
|
end
|
211
|
+
end
|
212
|
+
|
208
213
|
eager ? lf.collect : lf
|
209
214
|
end
|
210
215
|
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|