deltalake-rb 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd4f4c8d4d173c5b44a1758ba798624c43724e702941f3044763d762ee53681e
4
- data.tar.gz: 0c1616f7555ce13dc5d24837b6c8b47148e9a5308e615dee69d9b8b5f58b2116
3
+ metadata.gz: 74cdc17a6f4237378f874c7c2142ef5bcd233d7da9273296c5e2dbb24d925add
4
+ data.tar.gz: ddf786922725bddc6adeadeaa9e734d3c3cd974ba009e319740393ade76a44a6
5
5
  SHA512:
6
- metadata.gz: f6fdce2f189671bb76328f7c3ef4cc2405897bafae9b3f31cfcbe95a60842890d643ad8e4da7af773a3e10ea993b0c7be86e3de5ff5ea836d523872e670fb25e
7
- data.tar.gz: 728f32cf6c891ee82cb2690248197841c8a6b561d6fc4319969799117afafc1d6ea355782a7d906c07d65ca525da28c96a22051e0213dd8d09cbbaa410b07ed0
6
+ metadata.gz: d97802a6b1443cd1b85802fae78e81eff9657fe2b524631cc5a3cb4a09d18f01c408af388f52bad37a23b78dbc4816e605ae3690ef013ec904b6e47fd0a88ee0
7
+ data.tar.gz: fe3ffa3a6babd7fd154670ab587c77febb3be4f801d2a5421be373b6c165a198904f50980df7ef79e7dab155b01dddac6cba52a7f1d07225087462812c2996f0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.1.3 (2024-12-28)
2
+
3
+ - Updated `deltalake` to 0.22.3
4
+ - Added support for Ruby 3.4
5
+ - Added `rechunk` and `columns` options to `to_polars` method
6
+
1
7
  ## 0.1.2 (2024-12-03)
2
8
 
3
9
  - Updated `deltalake` to 0.22.2
data/Cargo.lock CHANGED
@@ -1572,13 +1572,13 @@ dependencies = [
1572
1572
 
1573
1573
  [[package]]
1574
1574
  name = "deltalake"
1575
- version = "0.1.2"
1575
+ version = "0.1.3"
1576
1576
  dependencies = [
1577
1577
  "arrow",
1578
1578
  "arrow-schema",
1579
1579
  "chrono",
1580
1580
  "delta_kernel",
1581
- "deltalake 0.22.2",
1581
+ "deltalake 0.22.3",
1582
1582
  "futures",
1583
1583
  "magnus",
1584
1584
  "num_cpus",
@@ -1589,9 +1589,9 @@ dependencies = [
1589
1589
 
1590
1590
  [[package]]
1591
1591
  name = "deltalake"
1592
- version = "0.22.2"
1592
+ version = "0.22.3"
1593
1593
  source = "registry+https://github.com/rust-lang/crates.io-index"
1594
- checksum = "950788ee777d7fa22043fd329854809cbbf418d25dc04456509e56e885973a7a"
1594
+ checksum = "9e289d4760a09c95d44b978363b4466b0aacf7c82561e773f2f12eb5bbf257e9"
1595
1595
  dependencies = [
1596
1596
  "deltalake-aws",
1597
1597
  "deltalake-azure",
@@ -3174,18 +3174,18 @@ dependencies = [
3174
3174
 
3175
3175
  [[package]]
3176
3176
  name = "rb-sys"
3177
- version = "0.9.103"
3177
+ version = "0.9.105"
3178
3178
  source = "registry+https://github.com/rust-lang/crates.io-index"
3179
- checksum = "91dbe37ab6ac2fba187480fb6544b92445e41e5c6f553bf0c33743f3c450a1df"
3179
+ checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
3180
3180
  dependencies = [
3181
3181
  "rb-sys-build",
3182
3182
  ]
3183
3183
 
3184
3184
  [[package]]
3185
3185
  name = "rb-sys-build"
3186
- version = "0.9.103"
3186
+ version = "0.9.105"
3187
3187
  source = "registry+https://github.com/rust-lang/crates.io-index"
3188
- checksum = "c4d56a49dcb646b70b758789c0d16c055a386a4f2a3346333abb69850fa860ce"
3188
+ checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
3189
3189
  dependencies = [
3190
3190
  "bindgen",
3191
3191
  "lazy_static",
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -15,7 +15,7 @@ arrow = { version = "53", features = ["ffi"] }
15
15
  arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
17
  delta_kernel = "0.4"
18
- deltalake = { version = "=0.22.2", features = ["azure", "datafusion", "gcs", "s3"] }
18
+ deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
19
19
  futures = "0.3"
20
20
  magnus = "0.7"
21
21
  num_cpus = "1"
@@ -188,23 +188,28 @@ module DeltaLake
188
188
  JSON.parse(metrics)
189
189
  end
190
190
 
191
- def to_polars(eager: true)
191
+ def to_polars(eager: true, rechunk: false, columns: nil)
192
192
  require "polars-df"
193
193
 
194
194
  sources = file_uris
195
- lf =
196
- if sources.empty?
197
- Polars::LazyFrame.new
198
- else
199
- delta_keys = [
200
- "AWS_S3_ALLOW_UNSAFE_RENAME",
201
- "AWS_S3_LOCKING_PROVIDER",
202
- "CONDITIONAL_PUT",
203
- "DELTA_DYNAMO_TABLE_NAME"
204
- ]
205
- storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
206
- Polars.scan_parquet(sources, storage_options: storage_options)
195
+ if sources.empty?
196
+ lf = Polars::LazyFrame.new
197
+ else
198
+ delta_keys = [
199
+ "AWS_S3_ALLOW_UNSAFE_RENAME",
200
+ "AWS_S3_LOCKING_PROVIDER",
201
+ "CONDITIONAL_PUT",
202
+ "DELTA_DYNAMO_TABLE_NAME"
203
+ ]
204
+ storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
205
+ lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
206
+
207
+ if columns
208
+ # by_name requires polars-df > 0.15.0
209
+ lf = lf.select(Polars.cs.by_name(*columns))
207
210
  end
211
+ end
212
+
208
213
  eager ? lf.collect : lf
209
214
  end
210
215
 
@@ -1,3 +1,3 @@
1
1
  module DeltaLake
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-03 00:00:00.000000000 Z
11
+ date: 2024-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys