deltalake-rb 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd4f4c8d4d173c5b44a1758ba798624c43724e702941f3044763d762ee53681e
4
- data.tar.gz: 0c1616f7555ce13dc5d24837b6c8b47148e9a5308e615dee69d9b8b5f58b2116
3
+ metadata.gz: 74cdc17a6f4237378f874c7c2142ef5bcd233d7da9273296c5e2dbb24d925add
4
+ data.tar.gz: ddf786922725bddc6adeadeaa9e734d3c3cd974ba009e319740393ade76a44a6
5
5
  SHA512:
6
- metadata.gz: f6fdce2f189671bb76328f7c3ef4cc2405897bafae9b3f31cfcbe95a60842890d643ad8e4da7af773a3e10ea993b0c7be86e3de5ff5ea836d523872e670fb25e
7
- data.tar.gz: 728f32cf6c891ee82cb2690248197841c8a6b561d6fc4319969799117afafc1d6ea355782a7d906c07d65ca525da28c96a22051e0213dd8d09cbbaa410b07ed0
6
+ metadata.gz: d97802a6b1443cd1b85802fae78e81eff9657fe2b524631cc5a3cb4a09d18f01c408af388f52bad37a23b78dbc4816e605ae3690ef013ec904b6e47fd0a88ee0
7
+ data.tar.gz: fe3ffa3a6babd7fd154670ab587c77febb3be4f801d2a5421be373b6c165a198904f50980df7ef79e7dab155b01dddac6cba52a7f1d07225087462812c2996f0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.1.3 (2024-12-28)
2
+
3
+ - Updated `deltalake` to 0.22.3
4
+ - Added support for Ruby 3.4
5
+ - Added `rechunk` and `columns` options to `to_polars` method
6
+
1
7
  ## 0.1.2 (2024-12-03)
2
8
 
3
9
  - Updated `deltalake` to 0.22.2
data/Cargo.lock CHANGED
@@ -1572,13 +1572,13 @@ dependencies = [
1572
1572
 
1573
1573
  [[package]]
1574
1574
  name = "deltalake"
1575
- version = "0.1.2"
1575
+ version = "0.1.3"
1576
1576
  dependencies = [
1577
1577
  "arrow",
1578
1578
  "arrow-schema",
1579
1579
  "chrono",
1580
1580
  "delta_kernel",
1581
- "deltalake 0.22.2",
1581
+ "deltalake 0.22.3",
1582
1582
  "futures",
1583
1583
  "magnus",
1584
1584
  "num_cpus",
@@ -1589,9 +1589,9 @@ dependencies = [
1589
1589
 
1590
1590
  [[package]]
1591
1591
  name = "deltalake"
1592
- version = "0.22.2"
1592
+ version = "0.22.3"
1593
1593
  source = "registry+https://github.com/rust-lang/crates.io-index"
1594
- checksum = "950788ee777d7fa22043fd329854809cbbf418d25dc04456509e56e885973a7a"
1594
+ checksum = "9e289d4760a09c95d44b978363b4466b0aacf7c82561e773f2f12eb5bbf257e9"
1595
1595
  dependencies = [
1596
1596
  "deltalake-aws",
1597
1597
  "deltalake-azure",
@@ -3174,18 +3174,18 @@ dependencies = [
3174
3174
 
3175
3175
  [[package]]
3176
3176
  name = "rb-sys"
3177
- version = "0.9.103"
3177
+ version = "0.9.105"
3178
3178
  source = "registry+https://github.com/rust-lang/crates.io-index"
3179
- checksum = "91dbe37ab6ac2fba187480fb6544b92445e41e5c6f553bf0c33743f3c450a1df"
3179
+ checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
3180
3180
  dependencies = [
3181
3181
  "rb-sys-build",
3182
3182
  ]
3183
3183
 
3184
3184
  [[package]]
3185
3185
  name = "rb-sys-build"
3186
- version = "0.9.103"
3186
+ version = "0.9.105"
3187
3187
  source = "registry+https://github.com/rust-lang/crates.io-index"
3188
- checksum = "c4d56a49dcb646b70b758789c0d16c055a386a4f2a3346333abb69850fa860ce"
3188
+ checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
3189
3189
  dependencies = [
3190
3190
  "bindgen",
3191
3191
  "lazy_static",
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -15,7 +15,7 @@ arrow = { version = "53", features = ["ffi"] }
15
15
  arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
17
  delta_kernel = "0.4"
18
- deltalake = { version = "=0.22.2", features = ["azure", "datafusion", "gcs", "s3"] }
18
+ deltalake = { version = "=0.22.3", features = ["azure", "datafusion", "gcs", "s3"] }
19
19
  futures = "0.3"
20
20
  magnus = "0.7"
21
21
  num_cpus = "1"
@@ -188,23 +188,28 @@ module DeltaLake
188
188
  JSON.parse(metrics)
189
189
  end
190
190
 
191
- def to_polars(eager: true)
191
+ def to_polars(eager: true, rechunk: false, columns: nil)
192
192
  require "polars-df"
193
193
 
194
194
  sources = file_uris
195
- lf =
196
- if sources.empty?
197
- Polars::LazyFrame.new
198
- else
199
- delta_keys = [
200
- "AWS_S3_ALLOW_UNSAFE_RENAME",
201
- "AWS_S3_LOCKING_PROVIDER",
202
- "CONDITIONAL_PUT",
203
- "DELTA_DYNAMO_TABLE_NAME"
204
- ]
205
- storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
206
- Polars.scan_parquet(sources, storage_options: storage_options)
195
+ if sources.empty?
196
+ lf = Polars::LazyFrame.new
197
+ else
198
+ delta_keys = [
199
+ "AWS_S3_ALLOW_UNSAFE_RENAME",
200
+ "AWS_S3_LOCKING_PROVIDER",
201
+ "CONDITIONAL_PUT",
202
+ "DELTA_DYNAMO_TABLE_NAME"
203
+ ]
204
+ storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
205
+ lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
206
+
207
+ if columns
208
+ # by_name requires polars-df > 0.15.0
209
+ lf = lf.select(Polars.cs.by_name(*columns))
207
210
  end
211
+ end
212
+
208
213
  eager ? lf.collect : lf
209
214
  end
210
215
 
@@ -1,3 +1,3 @@
1
1
  module DeltaLake
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-03 00:00:00.000000000 Z
11
+ date: 2024-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys