deltalake-rb 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -21,15 +21,15 @@ It can take 5-10 minutes to compile the gem.
21
21
  Write data
22
22
 
23
23
  ```ruby
24
- df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3.0, 4.0]})
25
- DeltaLake.write("./data/delta", df)
24
+ df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
25
+ DeltaLake.write("./events", df)
26
26
  ```
27
27
 
28
28
  Load a table
29
29
 
30
30
  ```ruby
31
- dt = DeltaLake::Table.new("./data/delta")
32
- df2 = dt.to_polars
31
+ dt = DeltaLake::Table.new("./events")
32
+ df = dt.to_polars
33
33
  ```
34
34
 
35
35
  Get a lazy frame
@@ -41,31 +41,31 @@ lf = dt.to_polars(eager: false)
41
41
  Append rows
42
42
 
43
43
  ```ruby
44
- DeltaLake.write("./data/delta", df, mode: "append")
44
+ DeltaLake.write("./events", df, mode: "append")
45
45
  ```
46
46
 
47
47
  Overwrite a table
48
48
 
49
49
  ```ruby
50
- DeltaLake.write("./data/delta", df, mode: "overwrite")
50
+ DeltaLake.write("./events", df, mode: "overwrite")
51
51
  ```
52
52
 
53
53
  Add a constraint
54
54
 
55
55
  ```ruby
56
- dt.alter.add_constraint({"a_gt_0" => "a > 0"})
56
+ dt.alter.add_constraint({"id_gt_0" => "id > 0"})
57
57
  ```
58
58
 
59
59
  Drop a constraint
60
60
 
61
61
  ```ruby
62
- dt.alter.drop_constraint("a_gt_0")
62
+ dt.alter.drop_constraint("id_gt_0")
63
63
  ```
64
64
 
65
65
  Delete rows
66
66
 
67
67
  ```ruby
68
- dt.delete("a > 1")
68
+ dt.delete("id > 1")
69
69
  ```
70
70
 
71
71
  Vacuum
@@ -83,13 +83,13 @@ dt.optimize.compact
83
83
  Colocate similar data in the same files
84
84
 
85
85
  ```ruby
86
- dt.optimize.z_order(["a"])
86
+ dt.optimize.z_order(["category"])
87
87
  ```
88
88
 
89
89
  Load a previous version of a table
90
90
 
91
91
  ```ruby
92
- dt = DeltaLake::Table.new("./data/delta", version: 1)
92
+ dt = DeltaLake::Table.new("./events", version: 1)
93
93
  # or
94
94
  dt.load_as_version(1)
95
95
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "deltalake"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -14,8 +14,8 @@ crate-type = ["cdylib"]
14
14
  arrow = { version = "53", features = ["ffi"] }
15
15
  arrow-schema = { version = "53", features = ["serde"] }
16
16
  chrono = "0.4"
17
- delta_kernel = "0.4"
18
- deltalake = { version = "=0.22.2", features = ["azure", "datafusion", "gcs", "s3"] }
17
+ delta_kernel = "0.6"
18
+ deltalake = { version = "=0.23.0", features = ["azure", "datafusion", "gcs", "s3"] }
19
19
  futures = "0.3"
20
20
  magnus = "0.7"
21
21
  num_cpus = "1"
@@ -188,23 +188,28 @@ module DeltaLake
188
188
  JSON.parse(metrics)
189
189
  end
190
190
 
191
- def to_polars(eager: true)
191
+ def to_polars(eager: true, rechunk: false, columns: nil)
192
192
  require "polars-df"
193
193
 
194
194
  sources = file_uris
195
- lf =
196
- if sources.empty?
197
- Polars::LazyFrame.new
198
- else
199
- delta_keys = [
200
- "AWS_S3_ALLOW_UNSAFE_RENAME",
201
- "AWS_S3_LOCKING_PROVIDER",
202
- "CONDITIONAL_PUT",
203
- "DELTA_DYNAMO_TABLE_NAME"
204
- ]
205
- storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
206
- Polars.scan_parquet(sources, storage_options: storage_options)
195
+ if sources.empty?
196
+ lf = Polars::LazyFrame.new
197
+ else
198
+ delta_keys = [
199
+ "AWS_S3_ALLOW_UNSAFE_RENAME",
200
+ "AWS_S3_LOCKING_PROVIDER",
201
+ "CONDITIONAL_PUT",
202
+ "DELTA_DYNAMO_TABLE_NAME"
203
+ ]
204
+ storage_options = @storage_options&.reject { |k, _| delta_keys.include?(k.to_s.upcase) }
205
+ lf = Polars.scan_parquet(sources, storage_options: storage_options, rechunk: rechunk)
206
+
207
+ if columns
208
+ # by_name requires polars-df > 0.15.0
209
+ lf = lf.select(Polars.cs.by_name(*columns))
207
210
  end
211
+ end
212
+
208
213
  eager ? lf.collect : lf
209
214
  end
210
215
 
@@ -1,3 +1,3 @@
1
1
  module DeltaLake
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-12-03 00:00:00.000000000 Z
10
+ date: 2025-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rb_sys
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '0'
27
- description:
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions:
@@ -59,7 +57,6 @@ homepage: https://github.com/ankane/delta-ruby
59
57
  licenses:
60
58
  - Apache-2.0
61
59
  metadata: {}
62
- post_install_message:
63
60
  rdoc_options: []
64
61
  require_paths:
65
62
  - lib
@@ -74,8 +71,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
74
71
  - !ruby/object:Gem::Version
75
72
  version: '0'
76
73
  requirements: []
77
- rubygems_version: 3.5.22
78
- signing_key:
74
+ rubygems_version: 3.6.2
79
75
  specification_version: 4
80
76
  summary: Delta Lake for Ruby
81
77
  test_files: []