deltalake-rb 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +547 -314
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +3 -3
- data/lib/deltalake/table.rb +18 -13
- data/lib/deltalake/version.rb +1 -1
- metadata +3 -7
data/README.md
CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
|
|
21
21
|
Write data
|
22
22
|
|
23
23
|
```ruby
|
24
|
-
df = Polars::DataFrame.new({"
|
25
|
-
DeltaLake.write("./
|
24
|
+
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
25
|
+
DeltaLake.write("./events", df)
|
26
26
|
```
|
27
27
|
|
28
28
|
Load a table
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
dt = DeltaLake::Table.new("./
|
32
|
-
|
31
|
+
dt = DeltaLake::Table.new("./events")
|
32
|
+
df = dt.to_polars
|
33
33
|
```
|
34
34
|
|
35
35
|
Get a lazy frame
|
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
|
|
41
41
|
Append rows
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
DeltaLake.write("./
|
44
|
+
DeltaLake.write("./events", df, mode: "append")
|
45
45
|
```
|
46
46
|
|
47
47
|
Overwrite a table
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
DeltaLake.write("./
|
50
|
+
DeltaLake.write("./events", df, mode: "overwrite")
|
51
51
|
```
|
52
52
|
|
53
53
|
Add a constraint
|
54
54
|
|
55
55
|
```ruby
|
56
|
-
dt.alter.add_constraint({"
|
56
|
+
dt.alter.add_constraint({"id_gt_0" => "id > 0"})
|
57
57
|
```
|
58
58
|
|
59
59
|
Drop a constraint
|
60
60
|
|
61
61
|
```ruby
|
62
|
-
dt.alter.drop_constraint("
|
62
|
+
dt.alter.drop_constraint("id_gt_0")
|
63
63
|
```
|
64
64
|
|
65
65
|
Delete rows
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
dt.delete("
|
68
|
+
dt.delete("id > 1")
|
69
69
|
```
|
70
70
|
|
71
71
|
Vacuum
|
@@ -83,13 +83,13 @@ dt.optimize.compact
|
|
83
83
|
Colocate similar data in the same files
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
dt.optimize.z_order(["
|
86
|
+
dt.optimize.z_order(["category"])
|
87
87
|
```
|
88
88
|
|
89
89
|
Load a previous version of a table
|
90
90
|
|
91
91
|
```ruby
|
92
|
-
dt = DeltaLake::Table.new("./
|
92
|
+
dt = DeltaLake::Table.new("./events", version: 1)
|
93
93
|
# or
|
94
94
|
dt.load_as_version(1)
|
95
95
|
```
|
data/ext/deltalake/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "deltalake"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,8 +14,8 @@ crate-type = ["cdylib"]
|
|
14
14
|
arrow = { version = "53", features = ["ffi"] }
|
15
15
|
arrow-schema = { version = "53", features = ["serde"] }
|
16
16
|
chrono = "0.4"
|
17
|
-
delta_kernel = "0.
|
18
|
-
deltalake = { version = "=0.
|
17
|
+
delta_kernel = "0.6"
|
18
|
+
deltalake = { version = "=0.23.0", features = ["azure", "datafusion", "gcs", "s3"] }
|
19
19
|
futures = "0.3"
|
20
20
|
magnus = "0.7"
|
21
21
|
num_cpus = "1"
|
data/lib/deltalake/table.rb
CHANGED
@@ -188,23 +188,28 @@ module DeltaLake
|
|
188
188
|
JSON.parse(metrics)
|
189
189
|
end
|
190
190
|
|
191
|
-
def to_polars(eager: true)
|
191
|
+
def to_polars(eager: true, rechunk: false, columns: nil)
|
192
192
|
require "polars-df"
|
193
193
|
|
194
194
|
sources = file_uris
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
195
|
+
if sources.empty?
|
196
|
+
lf = Polars::LazyFrame.new
|
197
|
+
else
|
198
|
+
delta_keys = [
|
199
|
+
"AWS_S3_ALLOW_UNSAFE_RENAME",
|
200
|
+
"AWS_S3_LOCKING_PROVIDER",
|
201
|
+
"CONDITIONAL_PUT",
|
202
|
+
"DELTA_DYNAMO_TABLE_NAME"
|
203
|
+
]
|
204
|
+
storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
|
205
|
+
lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
|
206
|
+
|
207
|
+
if columns
|
208
|
+
# by_name requires polars-df > 0.15.0
|
209
|
+
lf = lf.select(Polars.cs.by_name(*columns))
|
207
210
|
end
|
211
|
+
end
|
212
|
+
|
208
213
|
eager ? lf.collect : lf
|
209
214
|
end
|
210
215
|
|
data/lib/deltalake/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: rb_sys
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '0'
|
27
|
-
description:
|
28
26
|
email: andrew@ankane.org
|
29
27
|
executables: []
|
30
28
|
extensions:
|
@@ -59,7 +57,6 @@ homepage: https://github.com/ankane/delta-ruby
|
|
59
57
|
licenses:
|
60
58
|
- Apache-2.0
|
61
59
|
metadata: {}
|
62
|
-
post_install_message:
|
63
60
|
rdoc_options: []
|
64
61
|
require_paths:
|
65
62
|
- lib
|
@@ -74,8 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
71
|
- !ruby/object:Gem::Version
|
75
72
|
version: '0'
|
76
73
|
requirements: []
|
77
|
-
rubygems_version: 3.
|
78
|
-
signing_key:
|
74
|
+
rubygems_version: 3.6.2
|
79
75
|
specification_version: 4
|
80
76
|
summary: Delta Lake for Ruby
|
81
77
|
test_files: []
|