polars-df 0.15.0-arm64-darwin → 0.16.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +588 -456
- data/LICENSE-THIRD-PARTY.txt +14994 -16055
- data/README.md +37 -2
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +179 -51
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +35 -5
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +7 -7
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -831,7 +831,13 @@ module Polars
|
|
831
831
|
# Compression method. Defaults to "uncompressed".
|
832
832
|
#
|
833
833
|
# @return [nil]
|
834
|
-
def write_ipc(
|
834
|
+
def write_ipc(
|
835
|
+
file,
|
836
|
+
compression: "uncompressed",
|
837
|
+
compat_level: nil,
|
838
|
+
storage_options: nil,
|
839
|
+
retries: 2
|
840
|
+
)
|
835
841
|
return_bytes = file.nil?
|
836
842
|
if return_bytes
|
837
843
|
file = StringIO.new
|
@@ -849,7 +855,13 @@ module Polars
|
|
849
855
|
compression = "uncompressed"
|
850
856
|
end
|
851
857
|
|
852
|
-
|
858
|
+
if storage_options&.any?
|
859
|
+
storage_options = storage_options.to_a
|
860
|
+
else
|
861
|
+
storage_options = nil
|
862
|
+
end
|
863
|
+
|
864
|
+
_df.write_ipc(file, compression, compat_level, storage_options, retries)
|
853
865
|
return_bytes ? file.string : nil
|
854
866
|
end
|
855
867
|
|
@@ -961,6 +973,61 @@ module Polars
|
|
961
973
|
)
|
962
974
|
end
|
963
975
|
|
976
|
+
# Write DataFrame as delta table.
|
977
|
+
#
|
978
|
+
# @param target [Object]
|
979
|
+
# URI of a table or a DeltaTable object.
|
980
|
+
# @param mode ["error", "append", "overwrite", "ignore", "merge"]
|
981
|
+
# How to handle existing data.
|
982
|
+
# @param storage_options [Hash]
|
983
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
984
|
+
# @param delta_write_options [Hash]
|
985
|
+
# Additional keyword arguments while writing a Delta lake Table.
|
986
|
+
# @param delta_merge_options [Hash]
|
987
|
+
# Keyword arguments which are required to `MERGE` a Delta lake Table.
|
988
|
+
#
|
989
|
+
# @return [nil]
|
990
|
+
def write_delta(
|
991
|
+
target,
|
992
|
+
mode: "error",
|
993
|
+
storage_options: nil,
|
994
|
+
delta_write_options: nil,
|
995
|
+
delta_merge_options: nil
|
996
|
+
)
|
997
|
+
Polars.send(:_check_if_delta_available)
|
998
|
+
|
999
|
+
if Utils.pathlike?(target)
|
1000
|
+
target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
data = self
|
1004
|
+
|
1005
|
+
if mode == "merge"
|
1006
|
+
if delta_merge_options.nil?
|
1007
|
+
msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
|
1008
|
+
raise ArgumentError, msg
|
1009
|
+
end
|
1010
|
+
if target.is_a?(::String)
|
1011
|
+
dt = DeltaLake::Table.new(target, storage_options: storage_options)
|
1012
|
+
else
|
1013
|
+
dt = target
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
predicate = delta_merge_options.delete(:predicate)
|
1017
|
+
dt.merge(data, predicate, **delta_merge_options)
|
1018
|
+
else
|
1019
|
+
delta_write_options ||= {}
|
1020
|
+
|
1021
|
+
DeltaLake.write(
|
1022
|
+
target,
|
1023
|
+
data,
|
1024
|
+
mode: mode,
|
1025
|
+
storage_options: storage_options,
|
1026
|
+
**delta_write_options
|
1027
|
+
)
|
1028
|
+
end
|
1029
|
+
end
|
1030
|
+
|
964
1031
|
# Return an estimation of the total (heap) allocated size of the DataFrame.
|
965
1032
|
#
|
966
1033
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -3939,14 +4006,32 @@ module Polars
|
|
3939
4006
|
# # ╞═════╪═════╪═════╡
|
3940
4007
|
# # │ 3 ┆ 8 ┆ c │
|
3941
4008
|
# # └─────┴─────┴─────┘
|
3942
|
-
def max
|
3943
|
-
|
3944
|
-
|
3945
|
-
|
3946
|
-
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
4009
|
+
def max
|
4010
|
+
lazy.max.collect(_eager: true)
|
4011
|
+
end
|
4012
|
+
|
4013
|
+
# Get the maximum value horizontally across columns.
|
4014
|
+
#
|
4015
|
+
# @return [Series]
|
4016
|
+
#
|
4017
|
+
# @example
|
4018
|
+
# df = Polars::DataFrame.new(
|
4019
|
+
# {
|
4020
|
+
# "foo" => [1, 2, 3],
|
4021
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4022
|
+
# }
|
4023
|
+
# )
|
4024
|
+
# df.max_horizontal
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (3,)
|
4027
|
+
# # Series: 'max' [f64]
|
4028
|
+
# # [
|
4029
|
+
# # 4.0
|
4030
|
+
# # 5.0
|
4031
|
+
# # 6.0
|
4032
|
+
# # ]
|
4033
|
+
def max_horizontal
|
4034
|
+
select(max: F.max_horizontal(F.all)).to_series
|
3950
4035
|
end
|
3951
4036
|
|
3952
4037
|
# Aggregate the columns of this DataFrame to their minimum value.
|
@@ -3971,22 +4056,35 @@ module Polars
|
|
3971
4056
|
# # ╞═════╪═════╪═════╡
|
3972
4057
|
# # │ 1 ┆ 6 ┆ a │
|
3973
4058
|
# # └─────┴─────┴─────┘
|
3974
|
-
def min
|
3975
|
-
|
3976
|
-
lazy.min.collect(_eager: true)
|
3977
|
-
elsif axis == 1
|
3978
|
-
Utils.wrap_s(_df.min_horizontal)
|
3979
|
-
else
|
3980
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
3981
|
-
end
|
4059
|
+
def min
|
4060
|
+
lazy.min.collect(_eager: true)
|
3982
4061
|
end
|
3983
4062
|
|
3984
|
-
#
|
4063
|
+
# Get the minimum value horizontally across columns.
|
3985
4064
|
#
|
3986
|
-
# @
|
3987
|
-
#
|
3988
|
-
# @
|
3989
|
-
#
|
4065
|
+
# @return [Series]
|
4066
|
+
#
|
4067
|
+
# @example
|
4068
|
+
# df = Polars::DataFrame.new(
|
4069
|
+
# {
|
4070
|
+
# "foo" => [1, 2, 3],
|
4071
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4072
|
+
# }
|
4073
|
+
# )
|
4074
|
+
# df.min_horizontal
|
4075
|
+
# # =>
|
4076
|
+
# # shape: (3,)
|
4077
|
+
# # Series: 'min' [f64]
|
4078
|
+
# # [
|
4079
|
+
# # 1.0
|
4080
|
+
# # 2.0
|
4081
|
+
# # 3.0
|
4082
|
+
# # ]
|
4083
|
+
def min_horizontal
|
4084
|
+
select(min: F.min_horizontal(F.all)).to_series
|
4085
|
+
end
|
4086
|
+
|
4087
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
3990
4088
|
#
|
3991
4089
|
# @return [DataFrame]
|
3992
4090
|
#
|
@@ -4008,35 +4106,42 @@ module Polars
|
|
4008
4106
|
# # ╞═════╪═════╪══════╡
|
4009
4107
|
# # │ 6 ┆ 21 ┆ null │
|
4010
4108
|
# # └─────┴─────┴──────┘
|
4109
|
+
def sum
|
4110
|
+
lazy.sum.collect(_eager: true)
|
4111
|
+
end
|
4112
|
+
|
4113
|
+
# Sum all values horizontally across columns.
|
4114
|
+
#
|
4115
|
+
# @param ignore_nulls [Boolean]
|
4116
|
+
# Ignore null values (default).
|
4117
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4118
|
+
#
|
4119
|
+
# @return [Series]
|
4011
4120
|
#
|
4012
4121
|
# @example
|
4013
|
-
# df.
|
4122
|
+
# df = Polars::DataFrame.new(
|
4123
|
+
# {
|
4124
|
+
# "foo" => [1, 2, 3],
|
4125
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4126
|
+
# }
|
4127
|
+
# )
|
4128
|
+
# df.sum_horizontal
|
4014
4129
|
# # =>
|
4015
4130
|
# # shape: (3,)
|
4016
|
-
# # Series: '
|
4131
|
+
# # Series: 'sum' [f64]
|
4017
4132
|
# # [
|
4018
|
-
# #
|
4019
|
-
# #
|
4020
|
-
# #
|
4133
|
+
# # 5.0
|
4134
|
+
# # 7.0
|
4135
|
+
# # 9.0
|
4021
4136
|
# # ]
|
4022
|
-
def
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
when 1
|
4027
|
-
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
4028
|
-
else
|
4029
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
4030
|
-
end
|
4137
|
+
def sum_horizontal(ignore_nulls: true)
|
4138
|
+
select(
|
4139
|
+
sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4140
|
+
).to_series
|
4031
4141
|
end
|
4032
4142
|
|
4033
4143
|
# Aggregate the columns of this DataFrame to their mean value.
|
4034
4144
|
#
|
4035
|
-
# @param axis [Integer]
|
4036
|
-
# Either 0 or 1.
|
4037
|
-
# @param null_strategy ["ignore", "propagate"]
|
4038
|
-
# This argument is only used if axis == 1.
|
4039
|
-
#
|
4040
4145
|
# @return [DataFrame]
|
4041
4146
|
#
|
4042
4147
|
# @example
|
@@ -4057,15 +4162,38 @@ module Polars
|
|
4057
4162
|
# # ╞═════╪═════╪══════╡
|
4058
4163
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4059
4164
|
# # └─────┴─────┴──────┘
|
4060
|
-
def mean
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4068
|
-
|
4165
|
+
def mean
|
4166
|
+
lazy.mean.collect(_eager: true)
|
4167
|
+
end
|
4168
|
+
|
4169
|
+
# Take the mean of all values horizontally across columns.
|
4170
|
+
#
|
4171
|
+
# @param ignore_nulls [Boolean]
|
4172
|
+
# Ignore null values (default).
|
4173
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4174
|
+
#
|
4175
|
+
# @return [Series]
|
4176
|
+
#
|
4177
|
+
# @example
|
4178
|
+
# df = Polars::DataFrame.new(
|
4179
|
+
# {
|
4180
|
+
# "foo" => [1, 2, 3],
|
4181
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4182
|
+
# }
|
4183
|
+
# )
|
4184
|
+
# df.mean_horizontal
|
4185
|
+
# # =>
|
4186
|
+
# # shape: (3,)
|
4187
|
+
# # Series: 'mean' [f64]
|
4188
|
+
# # [
|
4189
|
+
# # 2.5
|
4190
|
+
# # 3.5
|
4191
|
+
# # 4.5
|
4192
|
+
# # ]
|
4193
|
+
def mean_horizontal(ignore_nulls: true)
|
4194
|
+
select(
|
4195
|
+
mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4196
|
+
).to_series
|
4069
4197
|
end
|
4070
4198
|
|
4071
4199
|
# Aggregate the columns of this DataFrame to their standard deviation value.
|
data/lib/polars/data_types.rb
CHANGED
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|
@@ -729,16 +729,20 @@ module Polars
|
|
729
729
|
a,
|
730
730
|
b,
|
731
731
|
method: "pearson",
|
732
|
-
ddof:
|
732
|
+
ddof: nil,
|
733
733
|
propagate_nans: false
|
734
734
|
)
|
735
|
+
if !ddof.nil?
|
736
|
+
warn "The `ddof` parameter has no effect. Do not use it."
|
737
|
+
end
|
738
|
+
|
735
739
|
a = Utils.parse_into_expression(a)
|
736
740
|
b = Utils.parse_into_expression(b)
|
737
741
|
|
738
742
|
if method == "pearson"
|
739
|
-
Utils.wrap_expr(Plr.pearson_corr(a, b
|
743
|
+
Utils.wrap_expr(Plr.pearson_corr(a, b))
|
740
744
|
elsif method == "spearman"
|
741
|
-
Utils.wrap_expr(Plr.spearman_rank_corr(a, b,
|
745
|
+
Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
|
742
746
|
else
|
743
747
|
msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
|
744
748
|
raise ArgumentError, msg
|
@@ -0,0 +1,126 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
# Reads into a DataFrame from a Delta lake table.
|
4
|
+
#
|
5
|
+
# @param source [Object]
|
6
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
7
|
+
# @param version [Object]
|
8
|
+
# Numerical version or timestamp version of the Delta lake table.
|
9
|
+
# @param columns [Array]
|
10
|
+
# Columns to select. Accepts a list of column names.
|
11
|
+
# @param rechunk [Boolean]
|
12
|
+
# Make sure that all columns are contiguous in memory by
|
13
|
+
# aggregating the chunks into a single array.
|
14
|
+
# @param storage_options [Hash]
|
15
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
16
|
+
# @param delta_table_options [Hash]
|
17
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
18
|
+
#
|
19
|
+
# @return [DataFrame]
|
20
|
+
def read_delta(
|
21
|
+
source,
|
22
|
+
version: nil,
|
23
|
+
columns: nil,
|
24
|
+
rechunk: false,
|
25
|
+
storage_options: nil,
|
26
|
+
delta_table_options: nil
|
27
|
+
)
|
28
|
+
dl_tbl =
|
29
|
+
_get_delta_lake_table(
|
30
|
+
source,
|
31
|
+
version: version,
|
32
|
+
storage_options: storage_options,
|
33
|
+
delta_table_options: delta_table_options
|
34
|
+
)
|
35
|
+
|
36
|
+
dl_tbl.to_polars(columns: columns, rechunk: rechunk)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Lazily read from a Delta lake table.
|
40
|
+
#
|
41
|
+
# @param source [Object]
|
42
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
43
|
+
# @param version [Object]
|
44
|
+
# Numerical version or timestamp version of the Delta lake table.
|
45
|
+
# @param storage_options [Hash]
|
46
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
47
|
+
# @param delta_table_options [Hash]
|
48
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
49
|
+
#
|
50
|
+
# @return [LazyFrame]
|
51
|
+
def scan_delta(
|
52
|
+
source,
|
53
|
+
version: nil,
|
54
|
+
storage_options: nil,
|
55
|
+
delta_table_options: nil
|
56
|
+
)
|
57
|
+
dl_tbl =
|
58
|
+
_get_delta_lake_table(
|
59
|
+
source,
|
60
|
+
version: version,
|
61
|
+
storage_options: storage_options,
|
62
|
+
delta_table_options: delta_table_options
|
63
|
+
)
|
64
|
+
|
65
|
+
dl_tbl.to_polars(eager: false)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def _resolve_delta_lake_uri(table_uri, strict: true)
|
71
|
+
require "uri"
|
72
|
+
|
73
|
+
parsed_result = URI(table_uri)
|
74
|
+
|
75
|
+
resolved_uri =
|
76
|
+
if parsed_result.scheme == ""
|
77
|
+
Utils.normalize_filepath(table_uri)
|
78
|
+
else
|
79
|
+
table_uri
|
80
|
+
end
|
81
|
+
|
82
|
+
resolved_uri
|
83
|
+
end
|
84
|
+
|
85
|
+
def _get_delta_lake_table(
|
86
|
+
table_path,
|
87
|
+
version: nil,
|
88
|
+
storage_options: nil,
|
89
|
+
delta_table_options: nil
|
90
|
+
)
|
91
|
+
_check_if_delta_available
|
92
|
+
|
93
|
+
if table_path.is_a?(DeltaLake::Table)
|
94
|
+
return table_path
|
95
|
+
end
|
96
|
+
delta_table_options ||= {}
|
97
|
+
resolved_uri = _resolve_delta_lake_uri(table_path)
|
98
|
+
if !version.is_a?(::String) && !version.is_a?(::Time)
|
99
|
+
dl_tbl =
|
100
|
+
DeltaLake::Table.new(
|
101
|
+
resolved_uri,
|
102
|
+
version: version,
|
103
|
+
storage_options: storage_options,
|
104
|
+
**delta_table_options
|
105
|
+
)
|
106
|
+
else
|
107
|
+
dl_tbl =
|
108
|
+
DeltaLake::Table.new(
|
109
|
+
resolved_uri,
|
110
|
+
storage_options: storage_options,
|
111
|
+
**delta_table_options
|
112
|
+
)
|
113
|
+
dl_tbl.load_as_version(version)
|
114
|
+
end
|
115
|
+
|
116
|
+
dl_tbl = DeltaLake::Table.new(table_path)
|
117
|
+
dl_tbl
|
118
|
+
end
|
119
|
+
|
120
|
+
def _check_if_delta_available
|
121
|
+
if !defined?(DeltaLake)
|
122
|
+
raise Error, "Delta Lake not available"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -431,7 +431,9 @@ module Polars
|
|
431
431
|
projection_pushdown: true,
|
432
432
|
simplify_expression: true,
|
433
433
|
no_optimization: false,
|
434
|
-
slice_pushdown: true
|
434
|
+
slice_pushdown: true,
|
435
|
+
storage_options: nil,
|
436
|
+
retries: 2
|
435
437
|
)
|
436
438
|
lf = _set_sink_optimizations(
|
437
439
|
type_coercion: type_coercion,
|
@@ -460,6 +462,12 @@ module Polars
|
|
460
462
|
}
|
461
463
|
end
|
462
464
|
|
465
|
+
if storage_options&.any?
|
466
|
+
storage_options = storage_options.to_a
|
467
|
+
else
|
468
|
+
storage_options = nil
|
469
|
+
end
|
470
|
+
|
463
471
|
lf.sink_parquet(
|
464
472
|
path,
|
465
473
|
compression,
|
@@ -467,7 +475,9 @@ module Polars
|
|
467
475
|
statistics,
|
468
476
|
row_group_size,
|
469
477
|
data_pagesize_limit,
|
470
|
-
maintain_order
|
478
|
+
maintain_order,
|
479
|
+
storage_options,
|
480
|
+
retries
|
471
481
|
)
|
472
482
|
end
|
473
483
|
|
@@ -512,6 +522,10 @@ module Polars
|
|
512
522
|
slice_pushdown: true,
|
513
523
|
no_optimization: false
|
514
524
|
)
|
525
|
+
# TODO support storage options in Rust
|
526
|
+
storage_options = nil
|
527
|
+
retries = 2
|
528
|
+
|
515
529
|
lf = _set_sink_optimizations(
|
516
530
|
type_coercion: type_coercion,
|
517
531
|
predicate_pushdown: predicate_pushdown,
|
@@ -521,10 +535,18 @@ module Polars
|
|
521
535
|
no_optimization: no_optimization
|
522
536
|
)
|
523
537
|
|
538
|
+
if storage_options&.any?
|
539
|
+
storage_options = storage_options.to_a
|
540
|
+
else
|
541
|
+
storage_options = nil
|
542
|
+
end
|
543
|
+
|
524
544
|
lf.sink_ipc(
|
525
545
|
path,
|
526
546
|
compression,
|
527
|
-
maintain_order
|
547
|
+
maintain_order,
|
548
|
+
storage_options,
|
549
|
+
retries
|
528
550
|
)
|
529
551
|
end
|
530
552
|
|
@@ -692,7 +714,9 @@ module Polars
|
|
692
714
|
projection_pushdown: true,
|
693
715
|
simplify_expression: true,
|
694
716
|
slice_pushdown: true,
|
695
|
-
no_optimization: false
|
717
|
+
no_optimization: false,
|
718
|
+
storage_options: nil,
|
719
|
+
retries: 2
|
696
720
|
)
|
697
721
|
lf = _set_sink_optimizations(
|
698
722
|
type_coercion: type_coercion,
|
@@ -703,7 +727,13 @@ module Polars
|
|
703
727
|
no_optimization: no_optimization
|
704
728
|
)
|
705
729
|
|
706
|
-
|
730
|
+
if storage_options&.any?
|
731
|
+
storage_options = storage_options.to_a
|
732
|
+
else
|
733
|
+
storage_options = nil
|
734
|
+
end
|
735
|
+
|
736
|
+
lf.sink_json(path, maintain_order, storage_options, retries)
|
707
737
|
end
|
708
738
|
|
709
739
|
# @private
|