polars-df 0.15.0-arm64-darwin → 0.16.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +588 -456
- data/LICENSE-THIRD-PARTY.txt +14994 -16055
- data/README.md +37 -2
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +179 -51
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +35 -5
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +7 -7
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -831,7 +831,13 @@ module Polars
|
|
831
831
|
# Compression method. Defaults to "uncompressed".
|
832
832
|
#
|
833
833
|
# @return [nil]
|
834
|
-
def write_ipc(
|
834
|
+
def write_ipc(
|
835
|
+
file,
|
836
|
+
compression: "uncompressed",
|
837
|
+
compat_level: nil,
|
838
|
+
storage_options: nil,
|
839
|
+
retries: 2
|
840
|
+
)
|
835
841
|
return_bytes = file.nil?
|
836
842
|
if return_bytes
|
837
843
|
file = StringIO.new
|
@@ -849,7 +855,13 @@ module Polars
|
|
849
855
|
compression = "uncompressed"
|
850
856
|
end
|
851
857
|
|
852
|
-
|
858
|
+
if storage_options&.any?
|
859
|
+
storage_options = storage_options.to_a
|
860
|
+
else
|
861
|
+
storage_options = nil
|
862
|
+
end
|
863
|
+
|
864
|
+
_df.write_ipc(file, compression, compat_level, storage_options, retries)
|
853
865
|
return_bytes ? file.string : nil
|
854
866
|
end
|
855
867
|
|
@@ -961,6 +973,61 @@ module Polars
|
|
961
973
|
)
|
962
974
|
end
|
963
975
|
|
976
|
+
# Write DataFrame as delta table.
|
977
|
+
#
|
978
|
+
# @param target [Object]
|
979
|
+
# URI of a table or a DeltaTable object.
|
980
|
+
# @param mode ["error", "append", "overwrite", "ignore", "merge"]
|
981
|
+
# How to handle existing data.
|
982
|
+
# @param storage_options [Hash]
|
983
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
984
|
+
# @param delta_write_options [Hash]
|
985
|
+
# Additional keyword arguments while writing a Delta lake Table.
|
986
|
+
# @param delta_merge_options [Hash]
|
987
|
+
# Keyword arguments which are required to `MERGE` a Delta lake Table.
|
988
|
+
#
|
989
|
+
# @return [nil]
|
990
|
+
def write_delta(
|
991
|
+
target,
|
992
|
+
mode: "error",
|
993
|
+
storage_options: nil,
|
994
|
+
delta_write_options: nil,
|
995
|
+
delta_merge_options: nil
|
996
|
+
)
|
997
|
+
Polars.send(:_check_if_delta_available)
|
998
|
+
|
999
|
+
if Utils.pathlike?(target)
|
1000
|
+
target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
data = self
|
1004
|
+
|
1005
|
+
if mode == "merge"
|
1006
|
+
if delta_merge_options.nil?
|
1007
|
+
msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
|
1008
|
+
raise ArgumentError, msg
|
1009
|
+
end
|
1010
|
+
if target.is_a?(::String)
|
1011
|
+
dt = DeltaLake::Table.new(target, storage_options: storage_options)
|
1012
|
+
else
|
1013
|
+
dt = target
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
predicate = delta_merge_options.delete(:predicate)
|
1017
|
+
dt.merge(data, predicate, **delta_merge_options)
|
1018
|
+
else
|
1019
|
+
delta_write_options ||= {}
|
1020
|
+
|
1021
|
+
DeltaLake.write(
|
1022
|
+
target,
|
1023
|
+
data,
|
1024
|
+
mode: mode,
|
1025
|
+
storage_options: storage_options,
|
1026
|
+
**delta_write_options
|
1027
|
+
)
|
1028
|
+
end
|
1029
|
+
end
|
1030
|
+
|
964
1031
|
# Return an estimation of the total (heap) allocated size of the DataFrame.
|
965
1032
|
#
|
966
1033
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -3939,14 +4006,32 @@ module Polars
|
|
3939
4006
|
# # ╞═════╪═════╪═════╡
|
3940
4007
|
# # │ 3 ┆ 8 ┆ c │
|
3941
4008
|
# # └─────┴─────┴─────┘
|
3942
|
-
def max
|
3943
|
-
|
3944
|
-
|
3945
|
-
|
3946
|
-
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
4009
|
+
def max
|
4010
|
+
lazy.max.collect(_eager: true)
|
4011
|
+
end
|
4012
|
+
|
4013
|
+
# Get the maximum value horizontally across columns.
|
4014
|
+
#
|
4015
|
+
# @return [Series]
|
4016
|
+
#
|
4017
|
+
# @example
|
4018
|
+
# df = Polars::DataFrame.new(
|
4019
|
+
# {
|
4020
|
+
# "foo" => [1, 2, 3],
|
4021
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4022
|
+
# }
|
4023
|
+
# )
|
4024
|
+
# df.max_horizontal
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (3,)
|
4027
|
+
# # Series: 'max' [f64]
|
4028
|
+
# # [
|
4029
|
+
# # 4.0
|
4030
|
+
# # 5.0
|
4031
|
+
# # 6.0
|
4032
|
+
# # ]
|
4033
|
+
def max_horizontal
|
4034
|
+
select(max: F.max_horizontal(F.all)).to_series
|
3950
4035
|
end
|
3951
4036
|
|
3952
4037
|
# Aggregate the columns of this DataFrame to their minimum value.
|
@@ -3971,22 +4056,35 @@ module Polars
|
|
3971
4056
|
# # ╞═════╪═════╪═════╡
|
3972
4057
|
# # │ 1 ┆ 6 ┆ a │
|
3973
4058
|
# # └─────┴─────┴─────┘
|
3974
|
-
def min
|
3975
|
-
|
3976
|
-
lazy.min.collect(_eager: true)
|
3977
|
-
elsif axis == 1
|
3978
|
-
Utils.wrap_s(_df.min_horizontal)
|
3979
|
-
else
|
3980
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
3981
|
-
end
|
4059
|
+
def min
|
4060
|
+
lazy.min.collect(_eager: true)
|
3982
4061
|
end
|
3983
4062
|
|
3984
|
-
#
|
4063
|
+
# Get the minimum value horizontally across columns.
|
3985
4064
|
#
|
3986
|
-
# @
|
3987
|
-
#
|
3988
|
-
# @
|
3989
|
-
#
|
4065
|
+
# @return [Series]
|
4066
|
+
#
|
4067
|
+
# @example
|
4068
|
+
# df = Polars::DataFrame.new(
|
4069
|
+
# {
|
4070
|
+
# "foo" => [1, 2, 3],
|
4071
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4072
|
+
# }
|
4073
|
+
# )
|
4074
|
+
# df.min_horizontal
|
4075
|
+
# # =>
|
4076
|
+
# # shape: (3,)
|
4077
|
+
# # Series: 'min' [f64]
|
4078
|
+
# # [
|
4079
|
+
# # 1.0
|
4080
|
+
# # 2.0
|
4081
|
+
# # 3.0
|
4082
|
+
# # ]
|
4083
|
+
def min_horizontal
|
4084
|
+
select(min: F.min_horizontal(F.all)).to_series
|
4085
|
+
end
|
4086
|
+
|
4087
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
3990
4088
|
#
|
3991
4089
|
# @return [DataFrame]
|
3992
4090
|
#
|
@@ -4008,35 +4106,42 @@ module Polars
|
|
4008
4106
|
# # ╞═════╪═════╪══════╡
|
4009
4107
|
# # │ 6 ┆ 21 ┆ null │
|
4010
4108
|
# # └─────┴─────┴──────┘
|
4109
|
+
def sum
|
4110
|
+
lazy.sum.collect(_eager: true)
|
4111
|
+
end
|
4112
|
+
|
4113
|
+
# Sum all values horizontally across columns.
|
4114
|
+
#
|
4115
|
+
# @param ignore_nulls [Boolean]
|
4116
|
+
# Ignore null values (default).
|
4117
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4118
|
+
#
|
4119
|
+
# @return [Series]
|
4011
4120
|
#
|
4012
4121
|
# @example
|
4013
|
-
# df.
|
4122
|
+
# df = Polars::DataFrame.new(
|
4123
|
+
# {
|
4124
|
+
# "foo" => [1, 2, 3],
|
4125
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4126
|
+
# }
|
4127
|
+
# )
|
4128
|
+
# df.sum_horizontal
|
4014
4129
|
# # =>
|
4015
4130
|
# # shape: (3,)
|
4016
|
-
# # Series: '
|
4131
|
+
# # Series: 'sum' [f64]
|
4017
4132
|
# # [
|
4018
|
-
# #
|
4019
|
-
# #
|
4020
|
-
# #
|
4133
|
+
# # 5.0
|
4134
|
+
# # 7.0
|
4135
|
+
# # 9.0
|
4021
4136
|
# # ]
|
4022
|
-
def
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
when 1
|
4027
|
-
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
4028
|
-
else
|
4029
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
4030
|
-
end
|
4137
|
+
def sum_horizontal(ignore_nulls: true)
|
4138
|
+
select(
|
4139
|
+
sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4140
|
+
).to_series
|
4031
4141
|
end
|
4032
4142
|
|
4033
4143
|
# Aggregate the columns of this DataFrame to their mean value.
|
4034
4144
|
#
|
4035
|
-
# @param axis [Integer]
|
4036
|
-
# Either 0 or 1.
|
4037
|
-
# @param null_strategy ["ignore", "propagate"]
|
4038
|
-
# This argument is only used if axis == 1.
|
4039
|
-
#
|
4040
4145
|
# @return [DataFrame]
|
4041
4146
|
#
|
4042
4147
|
# @example
|
@@ -4057,15 +4162,38 @@ module Polars
|
|
4057
4162
|
# # ╞═════╪═════╪══════╡
|
4058
4163
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4059
4164
|
# # └─────┴─────┴──────┘
|
4060
|
-
def mean
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4068
|
-
|
4165
|
+
def mean
|
4166
|
+
lazy.mean.collect(_eager: true)
|
4167
|
+
end
|
4168
|
+
|
4169
|
+
# Take the mean of all values horizontally across columns.
|
4170
|
+
#
|
4171
|
+
# @param ignore_nulls [Boolean]
|
4172
|
+
# Ignore null values (default).
|
4173
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4174
|
+
#
|
4175
|
+
# @return [Series]
|
4176
|
+
#
|
4177
|
+
# @example
|
4178
|
+
# df = Polars::DataFrame.new(
|
4179
|
+
# {
|
4180
|
+
# "foo" => [1, 2, 3],
|
4181
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4182
|
+
# }
|
4183
|
+
# )
|
4184
|
+
# df.mean_horizontal
|
4185
|
+
# # =>
|
4186
|
+
# # shape: (3,)
|
4187
|
+
# # Series: 'mean' [f64]
|
4188
|
+
# # [
|
4189
|
+
# # 2.5
|
4190
|
+
# # 3.5
|
4191
|
+
# # 4.5
|
4192
|
+
# # ]
|
4193
|
+
def mean_horizontal(ignore_nulls: true)
|
4194
|
+
select(
|
4195
|
+
mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4196
|
+
).to_series
|
4069
4197
|
end
|
4070
4198
|
|
4071
4199
|
# Aggregate the columns of this DataFrame to their standard deviation value.
|
data/lib/polars/data_types.rb
CHANGED
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|
@@ -729,16 +729,20 @@ module Polars
|
|
729
729
|
a,
|
730
730
|
b,
|
731
731
|
method: "pearson",
|
732
|
-
ddof:
|
732
|
+
ddof: nil,
|
733
733
|
propagate_nans: false
|
734
734
|
)
|
735
|
+
if !ddof.nil?
|
736
|
+
warn "The `ddof` parameter has no effect. Do not use it."
|
737
|
+
end
|
738
|
+
|
735
739
|
a = Utils.parse_into_expression(a)
|
736
740
|
b = Utils.parse_into_expression(b)
|
737
741
|
|
738
742
|
if method == "pearson"
|
739
|
-
Utils.wrap_expr(Plr.pearson_corr(a, b
|
743
|
+
Utils.wrap_expr(Plr.pearson_corr(a, b))
|
740
744
|
elsif method == "spearman"
|
741
|
-
Utils.wrap_expr(Plr.spearman_rank_corr(a, b,
|
745
|
+
Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
|
742
746
|
else
|
743
747
|
msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
|
744
748
|
raise ArgumentError, msg
|
@@ -0,0 +1,126 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
# Reads into a DataFrame from a Delta lake table.
|
4
|
+
#
|
5
|
+
# @param source [Object]
|
6
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
7
|
+
# @param version [Object]
|
8
|
+
# Numerical version or timestamp version of the Delta lake table.
|
9
|
+
# @param columns [Array]
|
10
|
+
# Columns to select. Accepts a list of column names.
|
11
|
+
# @param rechunk [Boolean]
|
12
|
+
# Make sure that all columns are contiguous in memory by
|
13
|
+
# aggregating the chunks into a single array.
|
14
|
+
# @param storage_options [Hash]
|
15
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
16
|
+
# @param delta_table_options [Hash]
|
17
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
18
|
+
#
|
19
|
+
# @return [DataFrame]
|
20
|
+
def read_delta(
|
21
|
+
source,
|
22
|
+
version: nil,
|
23
|
+
columns: nil,
|
24
|
+
rechunk: false,
|
25
|
+
storage_options: nil,
|
26
|
+
delta_table_options: nil
|
27
|
+
)
|
28
|
+
dl_tbl =
|
29
|
+
_get_delta_lake_table(
|
30
|
+
source,
|
31
|
+
version: version,
|
32
|
+
storage_options: storage_options,
|
33
|
+
delta_table_options: delta_table_options
|
34
|
+
)
|
35
|
+
|
36
|
+
dl_tbl.to_polars(columns: columns, rechunk: rechunk)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Lazily read from a Delta lake table.
|
40
|
+
#
|
41
|
+
# @param source [Object]
|
42
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
43
|
+
# @param version [Object]
|
44
|
+
# Numerical version or timestamp version of the Delta lake table.
|
45
|
+
# @param storage_options [Hash]
|
46
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
47
|
+
# @param delta_table_options [Hash]
|
48
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
49
|
+
#
|
50
|
+
# @return [LazyFrame]
|
51
|
+
def scan_delta(
|
52
|
+
source,
|
53
|
+
version: nil,
|
54
|
+
storage_options: nil,
|
55
|
+
delta_table_options: nil
|
56
|
+
)
|
57
|
+
dl_tbl =
|
58
|
+
_get_delta_lake_table(
|
59
|
+
source,
|
60
|
+
version: version,
|
61
|
+
storage_options: storage_options,
|
62
|
+
delta_table_options: delta_table_options
|
63
|
+
)
|
64
|
+
|
65
|
+
dl_tbl.to_polars(eager: false)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def _resolve_delta_lake_uri(table_uri, strict: true)
|
71
|
+
require "uri"
|
72
|
+
|
73
|
+
parsed_result = URI(table_uri)
|
74
|
+
|
75
|
+
resolved_uri =
|
76
|
+
if parsed_result.scheme == ""
|
77
|
+
Utils.normalize_filepath(table_uri)
|
78
|
+
else
|
79
|
+
table_uri
|
80
|
+
end
|
81
|
+
|
82
|
+
resolved_uri
|
83
|
+
end
|
84
|
+
|
85
|
+
def _get_delta_lake_table(
|
86
|
+
table_path,
|
87
|
+
version: nil,
|
88
|
+
storage_options: nil,
|
89
|
+
delta_table_options: nil
|
90
|
+
)
|
91
|
+
_check_if_delta_available
|
92
|
+
|
93
|
+
if table_path.is_a?(DeltaLake::Table)
|
94
|
+
return table_path
|
95
|
+
end
|
96
|
+
delta_table_options ||= {}
|
97
|
+
resolved_uri = _resolve_delta_lake_uri(table_path)
|
98
|
+
if !version.is_a?(::String) && !version.is_a?(::Time)
|
99
|
+
dl_tbl =
|
100
|
+
DeltaLake::Table.new(
|
101
|
+
resolved_uri,
|
102
|
+
version: version,
|
103
|
+
storage_options: storage_options,
|
104
|
+
**delta_table_options
|
105
|
+
)
|
106
|
+
else
|
107
|
+
dl_tbl =
|
108
|
+
DeltaLake::Table.new(
|
109
|
+
resolved_uri,
|
110
|
+
storage_options: storage_options,
|
111
|
+
**delta_table_options
|
112
|
+
)
|
113
|
+
dl_tbl.load_as_version(version)
|
114
|
+
end
|
115
|
+
|
116
|
+
dl_tbl = DeltaLake::Table.new(table_path)
|
117
|
+
dl_tbl
|
118
|
+
end
|
119
|
+
|
120
|
+
def _check_if_delta_available
|
121
|
+
if !defined?(DeltaLake)
|
122
|
+
raise Error, "Delta Lake not available"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -431,7 +431,9 @@ module Polars
|
|
431
431
|
projection_pushdown: true,
|
432
432
|
simplify_expression: true,
|
433
433
|
no_optimization: false,
|
434
|
-
slice_pushdown: true
|
434
|
+
slice_pushdown: true,
|
435
|
+
storage_options: nil,
|
436
|
+
retries: 2
|
435
437
|
)
|
436
438
|
lf = _set_sink_optimizations(
|
437
439
|
type_coercion: type_coercion,
|
@@ -460,6 +462,12 @@ module Polars
|
|
460
462
|
}
|
461
463
|
end
|
462
464
|
|
465
|
+
if storage_options&.any?
|
466
|
+
storage_options = storage_options.to_a
|
467
|
+
else
|
468
|
+
storage_options = nil
|
469
|
+
end
|
470
|
+
|
463
471
|
lf.sink_parquet(
|
464
472
|
path,
|
465
473
|
compression,
|
@@ -467,7 +475,9 @@ module Polars
|
|
467
475
|
statistics,
|
468
476
|
row_group_size,
|
469
477
|
data_pagesize_limit,
|
470
|
-
maintain_order
|
478
|
+
maintain_order,
|
479
|
+
storage_options,
|
480
|
+
retries
|
471
481
|
)
|
472
482
|
end
|
473
483
|
|
@@ -512,6 +522,10 @@ module Polars
|
|
512
522
|
slice_pushdown: true,
|
513
523
|
no_optimization: false
|
514
524
|
)
|
525
|
+
# TODO support storage options in Rust
|
526
|
+
storage_options = nil
|
527
|
+
retries = 2
|
528
|
+
|
515
529
|
lf = _set_sink_optimizations(
|
516
530
|
type_coercion: type_coercion,
|
517
531
|
predicate_pushdown: predicate_pushdown,
|
@@ -521,10 +535,18 @@ module Polars
|
|
521
535
|
no_optimization: no_optimization
|
522
536
|
)
|
523
537
|
|
538
|
+
if storage_options&.any?
|
539
|
+
storage_options = storage_options.to_a
|
540
|
+
else
|
541
|
+
storage_options = nil
|
542
|
+
end
|
543
|
+
|
524
544
|
lf.sink_ipc(
|
525
545
|
path,
|
526
546
|
compression,
|
527
|
-
maintain_order
|
547
|
+
maintain_order,
|
548
|
+
storage_options,
|
549
|
+
retries
|
528
550
|
)
|
529
551
|
end
|
530
552
|
|
@@ -692,7 +714,9 @@ module Polars
|
|
692
714
|
projection_pushdown: true,
|
693
715
|
simplify_expression: true,
|
694
716
|
slice_pushdown: true,
|
695
|
-
no_optimization: false
|
717
|
+
no_optimization: false,
|
718
|
+
storage_options: nil,
|
719
|
+
retries: 2
|
696
720
|
)
|
697
721
|
lf = _set_sink_optimizations(
|
698
722
|
type_coercion: type_coercion,
|
@@ -703,7 +727,13 @@ module Polars
|
|
703
727
|
no_optimization: no_optimization
|
704
728
|
)
|
705
729
|
|
706
|
-
|
730
|
+
if storage_options&.any?
|
731
|
+
storage_options = storage_options.to_a
|
732
|
+
else
|
733
|
+
storage_options = nil
|
734
|
+
end
|
735
|
+
|
736
|
+
lf.sink_json(path, maintain_order, storage_options, retries)
|
707
737
|
end
|
708
738
|
|
709
739
|
# @private
|