deltalake-rb 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -14,8 +14,8 @@ crate-type = ["cdylib"]
14
14
  arrow = { version = "53", features = ["ffi"] }
15
15
  arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
- delta_kernel = "0.4"
18
- deltalake = { version = "=0.22.2", features = ["azure", "datafusion", "gcs", "s3"] }
17
+ delta_kernel = "0.6"
18
+ deltalake = { version = "=0.23.0", features = ["azure", "datafusion", "gcs", "s3"] }
19
19
  futures = "0.3"
20
20
  magnus = "0.7"
21
21
  num_cpus = "1"
@@ -188,23 +188,28 @@ module DeltaLake
188
188
  JSON.parse(metrics)
189
189
  end
190
190
 
191
- def to_polars(eager: true)
191
+ def to_polars(eager: true, rechunk: false, columns: nil)
192
192
  require "polars-df"
193
193
 
194
194
  sources = file_uris
195
- lf =
196
- if sources.empty?
197
- Polars::LazyFrame.new
198
- else
199
- delta_keys = [
200
- "AWS_S3_ALLOW_UNSAFE_RENAME",
201
- "AWS_S3_LOCKING_PROVIDER",
202
- "CONDITIONAL_PUT",
203
- "DELTA_DYNAMO_TABLE_NAME"
204
- ]
205
- storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
206
- Polars.scan_parquet(sources, storage_options: storage_options)
195
+ if sources.empty?
196
+ lf = Polars::LazyFrame.new
197
+ else
198
+ delta_keys = [
199
+ "AWS_S3_ALLOW_UNSAFE_RENAME",
200
+ "AWS_S3_LOCKING_PROVIDER",
201
+ "CONDITIONAL_PUT",
202
+ "DELTA_DYNAMO_TABLE_NAME"
203
+ ]
204
+ storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
205
+ lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
206
+
207
+ if columns
208
+ # by_name requires polars-df > 0.15.0
209
+ lf = lf.select(Polars.cs.by_name(*columns))
207
210
  end
211
+ end
212
+
208
213
  eager ? lf.collect : lf
209
214
  end
210
215
 
@@ -1,3 +1,3 @@
1
1
  module DeltaLake
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-12-03 00:00:00.000000000 Z
10
+ date: 2025-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rb_sys
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '0'
27
- description:
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions:
@@ -59,7 +57,6 @@ homepage: https://github.com/ankane/delta-ruby
59
57
  licenses:
60
58
  - Apache-2.0
61
59
  metadata: {}
62
- post_install_message:
63
60
  rdoc_options: []
64
61
  require_paths:
65
62
  - lib
@@ -74,8 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
74
71
  - !ruby/object:Gem::Version
75
72
  version: '0'
76
73
  requirements: []
77
- rubygems_version: 3.5.22
78
- signing_key:
74
+ rubygems_version: 3.6.2
79
75
  specification_version: 4
80
76
  summary: Delta Lake for Ruby
81
77
  test_files: []