polars-df 0.15.0-aarch64-linux-musl → 0.17.0-aarch64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +664 -539
- data/LICENSE-THIRD-PARTY.txt +15035 -16728
- data/LICENSE.txt +1 -1
- data/README.md +37 -2
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
- data/lib/polars/data_frame.rb +196 -68
- data/lib/polars/data_types.rb +5 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +49 -7
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/series.rb +6 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +8 -8
data/LICENSE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
|
-
Copyright (c) 2022-
|
2
|
+
Copyright (c) 2022-2025 Andrew Kane
|
3
3
|
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -604,10 +604,6 @@ module Polars
|
|
604
604
|
#
|
605
605
|
# @param file [String]
|
606
606
|
# File path to which the result should be written.
|
607
|
-
# @param pretty [Boolean]
|
608
|
-
# Pretty serialize json.
|
609
|
-
# @param row_oriented [Boolean]
|
610
|
-
# Write to row oriented json. This is slower, but more common.
|
611
607
|
#
|
612
608
|
# @return [nil]
|
613
609
|
#
|
@@ -619,16 +615,8 @@ module Polars
|
|
619
615
|
# }
|
620
616
|
# )
|
621
617
|
# df.write_json
|
622
|
-
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
|
623
|
-
#
|
624
|
-
# @example
|
625
|
-
# df.write_json(row_oriented: true)
|
626
618
|
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
|
627
|
-
def write_json(
|
628
|
-
file = nil,
|
629
|
-
pretty: false,
|
630
|
-
row_oriented: false
|
631
|
-
)
|
619
|
+
def write_json(file = nil)
|
632
620
|
if Utils.pathlike?(file)
|
633
621
|
file = Utils.normalize_filepath(file)
|
634
622
|
end
|
@@ -636,7 +624,7 @@ module Polars
|
|
636
624
|
if file.nil? || to_string_io
|
637
625
|
buf = StringIO.new
|
638
626
|
buf.set_encoding(Encoding::BINARY)
|
639
|
-
_df.write_json(buf
|
627
|
+
_df.write_json(buf)
|
640
628
|
json_bytes = buf.string
|
641
629
|
|
642
630
|
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
@@ -646,7 +634,7 @@ module Polars
|
|
646
634
|
return json_str
|
647
635
|
end
|
648
636
|
else
|
649
|
-
_df.write_json(file
|
637
|
+
_df.write_json(file)
|
650
638
|
end
|
651
639
|
nil
|
652
640
|
end
|
@@ -831,7 +819,13 @@ module Polars
|
|
831
819
|
# Compression method. Defaults to "uncompressed".
|
832
820
|
#
|
833
821
|
# @return [nil]
|
834
|
-
def write_ipc(
|
822
|
+
def write_ipc(
|
823
|
+
file,
|
824
|
+
compression: "uncompressed",
|
825
|
+
compat_level: nil,
|
826
|
+
storage_options: nil,
|
827
|
+
retries: 2
|
828
|
+
)
|
835
829
|
return_bytes = file.nil?
|
836
830
|
if return_bytes
|
837
831
|
file = StringIO.new
|
@@ -849,7 +843,13 @@ module Polars
|
|
849
843
|
compression = "uncompressed"
|
850
844
|
end
|
851
845
|
|
852
|
-
|
846
|
+
if storage_options&.any?
|
847
|
+
storage_options = storage_options.to_a
|
848
|
+
else
|
849
|
+
storage_options = nil
|
850
|
+
end
|
851
|
+
|
852
|
+
_df.write_ipc(file, compression, compat_level, storage_options, retries)
|
853
853
|
return_bytes ? file.string : nil
|
854
854
|
end
|
855
855
|
|
@@ -961,6 +961,61 @@ module Polars
|
|
961
961
|
)
|
962
962
|
end
|
963
963
|
|
964
|
+
# Write DataFrame as delta table.
|
965
|
+
#
|
966
|
+
# @param target [Object]
|
967
|
+
# URI of a table or a DeltaTable object.
|
968
|
+
# @param mode ["error", "append", "overwrite", "ignore", "merge"]
|
969
|
+
# How to handle existing data.
|
970
|
+
# @param storage_options [Hash]
|
971
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
972
|
+
# @param delta_write_options [Hash]
|
973
|
+
# Additional keyword arguments while writing a Delta lake Table.
|
974
|
+
# @param delta_merge_options [Hash]
|
975
|
+
# Keyword arguments which are required to `MERGE` a Delta lake Table.
|
976
|
+
#
|
977
|
+
# @return [nil]
|
978
|
+
def write_delta(
|
979
|
+
target,
|
980
|
+
mode: "error",
|
981
|
+
storage_options: nil,
|
982
|
+
delta_write_options: nil,
|
983
|
+
delta_merge_options: nil
|
984
|
+
)
|
985
|
+
Polars.send(:_check_if_delta_available)
|
986
|
+
|
987
|
+
if Utils.pathlike?(target)
|
988
|
+
target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
|
989
|
+
end
|
990
|
+
|
991
|
+
data = self
|
992
|
+
|
993
|
+
if mode == "merge"
|
994
|
+
if delta_merge_options.nil?
|
995
|
+
msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
|
996
|
+
raise ArgumentError, msg
|
997
|
+
end
|
998
|
+
if target.is_a?(::String)
|
999
|
+
dt = DeltaLake::Table.new(target, storage_options: storage_options)
|
1000
|
+
else
|
1001
|
+
dt = target
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
predicate = delta_merge_options.delete(:predicate)
|
1005
|
+
dt.merge(data, predicate, **delta_merge_options)
|
1006
|
+
else
|
1007
|
+
delta_write_options ||= {}
|
1008
|
+
|
1009
|
+
DeltaLake.write(
|
1010
|
+
target,
|
1011
|
+
data,
|
1012
|
+
mode: mode,
|
1013
|
+
storage_options: storage_options,
|
1014
|
+
**delta_write_options
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
|
964
1019
|
# Return an estimation of the total (heap) allocated size of the DataFrame.
|
965
1020
|
#
|
966
1021
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -2227,6 +2282,14 @@ module Polars
|
|
2227
2282
|
# keys are within this distance. If an asof join is done on columns of dtype
|
2228
2283
|
# "Date", "Datetime", "Duration" or "Time" you use the following string
|
2229
2284
|
# language:
|
2285
|
+
# @param allow_exact_matches [Boolean]
|
2286
|
+
# Whether exact matches are valid join predicates.
|
2287
|
+
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
2288
|
+
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
2289
|
+
# @param check_sortedness [Boolean]
|
2290
|
+
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
2291
|
+
# will error, or in case of 'by' argument raise a warning. This might become
|
2292
|
+
# a hard error in the future.
|
2230
2293
|
#
|
2231
2294
|
# - 1ns (1 nanosecond)
|
2232
2295
|
# - 1us (1 microsecond)
|
@@ -2308,7 +2371,9 @@ module Polars
|
|
2308
2371
|
tolerance: nil,
|
2309
2372
|
allow_parallel: true,
|
2310
2373
|
force_parallel: false,
|
2311
|
-
coalesce: true
|
2374
|
+
coalesce: true,
|
2375
|
+
allow_exact_matches: true,
|
2376
|
+
check_sortedness: true
|
2312
2377
|
)
|
2313
2378
|
lazy
|
2314
2379
|
.join_asof(
|
@@ -2324,7 +2389,9 @@ module Polars
|
|
2324
2389
|
tolerance: tolerance,
|
2325
2390
|
allow_parallel: allow_parallel,
|
2326
2391
|
force_parallel: force_parallel,
|
2327
|
-
coalesce: coalesce
|
2392
|
+
coalesce: coalesce,
|
2393
|
+
allow_exact_matches: allow_exact_matches,
|
2394
|
+
check_sortedness: check_sortedness
|
2328
2395
|
)
|
2329
2396
|
.collect(no_optimization: true)
|
2330
2397
|
end
|
@@ -3939,14 +4006,32 @@ module Polars
|
|
3939
4006
|
# # ╞═════╪═════╪═════╡
|
3940
4007
|
# # │ 3 ┆ 8 ┆ c │
|
3941
4008
|
# # └─────┴─────┴─────┘
|
3942
|
-
def max
|
3943
|
-
|
3944
|
-
|
3945
|
-
|
3946
|
-
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
4009
|
+
def max
|
4010
|
+
lazy.max.collect(_eager: true)
|
4011
|
+
end
|
4012
|
+
|
4013
|
+
# Get the maximum value horizontally across columns.
|
4014
|
+
#
|
4015
|
+
# @return [Series]
|
4016
|
+
#
|
4017
|
+
# @example
|
4018
|
+
# df = Polars::DataFrame.new(
|
4019
|
+
# {
|
4020
|
+
# "foo" => [1, 2, 3],
|
4021
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4022
|
+
# }
|
4023
|
+
# )
|
4024
|
+
# df.max_horizontal
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (3,)
|
4027
|
+
# # Series: 'max' [f64]
|
4028
|
+
# # [
|
4029
|
+
# # 4.0
|
4030
|
+
# # 5.0
|
4031
|
+
# # 6.0
|
4032
|
+
# # ]
|
4033
|
+
def max_horizontal
|
4034
|
+
select(max: F.max_horizontal(F.all)).to_series
|
3950
4035
|
end
|
3951
4036
|
|
3952
4037
|
# Aggregate the columns of this DataFrame to their minimum value.
|
@@ -3971,22 +4056,35 @@ module Polars
|
|
3971
4056
|
# # ╞═════╪═════╪═════╡
|
3972
4057
|
# # │ 1 ┆ 6 ┆ a │
|
3973
4058
|
# # └─────┴─────┴─────┘
|
3974
|
-
def min
|
3975
|
-
|
3976
|
-
lazy.min.collect(_eager: true)
|
3977
|
-
elsif axis == 1
|
3978
|
-
Utils.wrap_s(_df.min_horizontal)
|
3979
|
-
else
|
3980
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
3981
|
-
end
|
4059
|
+
def min
|
4060
|
+
lazy.min.collect(_eager: true)
|
3982
4061
|
end
|
3983
4062
|
|
3984
|
-
#
|
4063
|
+
# Get the minimum value horizontally across columns.
|
3985
4064
|
#
|
3986
|
-
# @
|
3987
|
-
#
|
3988
|
-
# @
|
3989
|
-
#
|
4065
|
+
# @return [Series]
|
4066
|
+
#
|
4067
|
+
# @example
|
4068
|
+
# df = Polars::DataFrame.new(
|
4069
|
+
# {
|
4070
|
+
# "foo" => [1, 2, 3],
|
4071
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4072
|
+
# }
|
4073
|
+
# )
|
4074
|
+
# df.min_horizontal
|
4075
|
+
# # =>
|
4076
|
+
# # shape: (3,)
|
4077
|
+
# # Series: 'min' [f64]
|
4078
|
+
# # [
|
4079
|
+
# # 1.0
|
4080
|
+
# # 2.0
|
4081
|
+
# # 3.0
|
4082
|
+
# # ]
|
4083
|
+
def min_horizontal
|
4084
|
+
select(min: F.min_horizontal(F.all)).to_series
|
4085
|
+
end
|
4086
|
+
|
4087
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
3990
4088
|
#
|
3991
4089
|
# @return [DataFrame]
|
3992
4090
|
#
|
@@ -4008,35 +4106,42 @@ module Polars
|
|
4008
4106
|
# # ╞═════╪═════╪══════╡
|
4009
4107
|
# # │ 6 ┆ 21 ┆ null │
|
4010
4108
|
# # └─────┴─────┴──────┘
|
4109
|
+
def sum
|
4110
|
+
lazy.sum.collect(_eager: true)
|
4111
|
+
end
|
4112
|
+
|
4113
|
+
# Sum all values horizontally across columns.
|
4114
|
+
#
|
4115
|
+
# @param ignore_nulls [Boolean]
|
4116
|
+
# Ignore null values (default).
|
4117
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4118
|
+
#
|
4119
|
+
# @return [Series]
|
4011
4120
|
#
|
4012
4121
|
# @example
|
4013
|
-
# df.
|
4122
|
+
# df = Polars::DataFrame.new(
|
4123
|
+
# {
|
4124
|
+
# "foo" => [1, 2, 3],
|
4125
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4126
|
+
# }
|
4127
|
+
# )
|
4128
|
+
# df.sum_horizontal
|
4014
4129
|
# # =>
|
4015
4130
|
# # shape: (3,)
|
4016
|
-
# # Series: '
|
4131
|
+
# # Series: 'sum' [f64]
|
4017
4132
|
# # [
|
4018
|
-
# #
|
4019
|
-
# #
|
4020
|
-
# #
|
4133
|
+
# # 5.0
|
4134
|
+
# # 7.0
|
4135
|
+
# # 9.0
|
4021
4136
|
# # ]
|
4022
|
-
def
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
when 1
|
4027
|
-
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
4028
|
-
else
|
4029
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
4030
|
-
end
|
4137
|
+
def sum_horizontal(ignore_nulls: true)
|
4138
|
+
select(
|
4139
|
+
sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4140
|
+
).to_series
|
4031
4141
|
end
|
4032
4142
|
|
4033
4143
|
# Aggregate the columns of this DataFrame to their mean value.
|
4034
4144
|
#
|
4035
|
-
# @param axis [Integer]
|
4036
|
-
# Either 0 or 1.
|
4037
|
-
# @param null_strategy ["ignore", "propagate"]
|
4038
|
-
# This argument is only used if axis == 1.
|
4039
|
-
#
|
4040
4145
|
# @return [DataFrame]
|
4041
4146
|
#
|
4042
4147
|
# @example
|
@@ -4057,15 +4162,38 @@ module Polars
|
|
4057
4162
|
# # ╞═════╪═════╪══════╡
|
4058
4163
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4059
4164
|
# # └─────┴─────┴──────┘
|
4060
|
-
def mean
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4068
|
-
|
4165
|
+
def mean
|
4166
|
+
lazy.mean.collect(_eager: true)
|
4167
|
+
end
|
4168
|
+
|
4169
|
+
# Take the mean of all values horizontally across columns.
|
4170
|
+
#
|
4171
|
+
# @param ignore_nulls [Boolean]
|
4172
|
+
# Ignore null values (default).
|
4173
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4174
|
+
#
|
4175
|
+
# @return [Series]
|
4176
|
+
#
|
4177
|
+
# @example
|
4178
|
+
# df = Polars::DataFrame.new(
|
4179
|
+
# {
|
4180
|
+
# "foo" => [1, 2, 3],
|
4181
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4182
|
+
# }
|
4183
|
+
# )
|
4184
|
+
# df.mean_horizontal
|
4185
|
+
# # =>
|
4186
|
+
# # shape: (3,)
|
4187
|
+
# # Series: 'mean' [f64]
|
4188
|
+
# # [
|
4189
|
+
# # 2.5
|
4190
|
+
# # 3.5
|
4191
|
+
# # 4.5
|
4192
|
+
# # ]
|
4193
|
+
def mean_horizontal(ignore_nulls: true)
|
4194
|
+
select(
|
4195
|
+
mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4196
|
+
).to_series
|
4069
4197
|
end
|
4070
4198
|
|
4071
4199
|
# Aggregate the columns of this DataFrame to their standard deviation value.
|
data/lib/polars/data_types.rb
CHANGED
@@ -167,6 +167,10 @@ module Polars
|
|
167
167
|
class Int64 < SignedIntegerType
|
168
168
|
end
|
169
169
|
|
170
|
+
# 128-bit signed integer type.
|
171
|
+
class Int128 < SignedIntegerType
|
172
|
+
end
|
173
|
+
|
170
174
|
# 8-bit unsigned integer type.
|
171
175
|
class UInt8 < UnsignedIntegerType
|
172
176
|
end
|
@@ -311,7 +315,7 @@ module Polars
|
|
311
315
|
end
|
312
316
|
|
313
317
|
if categories.empty?
|
314
|
-
|
318
|
+
@categories = Series.new("category", [], dtype: String)
|
315
319
|
return
|
316
320
|
end
|
317
321
|
|
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|
@@ -729,16 +729,20 @@ module Polars
|
|
729
729
|
a,
|
730
730
|
b,
|
731
731
|
method: "pearson",
|
732
|
-
ddof:
|
732
|
+
ddof: nil,
|
733
733
|
propagate_nans: false
|
734
734
|
)
|
735
|
+
if !ddof.nil?
|
736
|
+
warn "The `ddof` parameter has no effect. Do not use it."
|
737
|
+
end
|
738
|
+
|
735
739
|
a = Utils.parse_into_expression(a)
|
736
740
|
b = Utils.parse_into_expression(b)
|
737
741
|
|
738
742
|
if method == "pearson"
|
739
|
-
Utils.wrap_expr(Plr.pearson_corr(a, b
|
743
|
+
Utils.wrap_expr(Plr.pearson_corr(a, b))
|
740
744
|
elsif method == "spearman"
|
741
|
-
Utils.wrap_expr(Plr.spearman_rank_corr(a, b,
|
745
|
+
Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
|
742
746
|
else
|
743
747
|
msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
|
744
748
|
raise ArgumentError, msg
|
@@ -0,0 +1,126 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
# Reads into a DataFrame from a Delta lake table.
|
4
|
+
#
|
5
|
+
# @param source [Object]
|
6
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
7
|
+
# @param version [Object]
|
8
|
+
# Numerical version or timestamp version of the Delta lake table.
|
9
|
+
# @param columns [Array]
|
10
|
+
# Columns to select. Accepts a list of column names.
|
11
|
+
# @param rechunk [Boolean]
|
12
|
+
# Make sure that all columns are contiguous in memory by
|
13
|
+
# aggregating the chunks into a single array.
|
14
|
+
# @param storage_options [Hash]
|
15
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
16
|
+
# @param delta_table_options [Hash]
|
17
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
18
|
+
#
|
19
|
+
# @return [DataFrame]
|
20
|
+
def read_delta(
|
21
|
+
source,
|
22
|
+
version: nil,
|
23
|
+
columns: nil,
|
24
|
+
rechunk: false,
|
25
|
+
storage_options: nil,
|
26
|
+
delta_table_options: nil
|
27
|
+
)
|
28
|
+
dl_tbl =
|
29
|
+
_get_delta_lake_table(
|
30
|
+
source,
|
31
|
+
version: version,
|
32
|
+
storage_options: storage_options,
|
33
|
+
delta_table_options: delta_table_options
|
34
|
+
)
|
35
|
+
|
36
|
+
dl_tbl.to_polars(columns: columns, rechunk: rechunk)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Lazily read from a Delta lake table.
|
40
|
+
#
|
41
|
+
# @param source [Object]
|
42
|
+
# DeltaTable or a Path or URI to the root of the Delta lake table.
|
43
|
+
# @param version [Object]
|
44
|
+
# Numerical version or timestamp version of the Delta lake table.
|
45
|
+
# @param storage_options [Hash]
|
46
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
47
|
+
# @param delta_table_options [Hash]
|
48
|
+
# Additional keyword arguments while reading a Delta lake Table.
|
49
|
+
#
|
50
|
+
# @return [LazyFrame]
|
51
|
+
def scan_delta(
|
52
|
+
source,
|
53
|
+
version: nil,
|
54
|
+
storage_options: nil,
|
55
|
+
delta_table_options: nil
|
56
|
+
)
|
57
|
+
dl_tbl =
|
58
|
+
_get_delta_lake_table(
|
59
|
+
source,
|
60
|
+
version: version,
|
61
|
+
storage_options: storage_options,
|
62
|
+
delta_table_options: delta_table_options
|
63
|
+
)
|
64
|
+
|
65
|
+
dl_tbl.to_polars(eager: false)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def _resolve_delta_lake_uri(table_uri, strict: true)
|
71
|
+
require "uri"
|
72
|
+
|
73
|
+
parsed_result = URI(table_uri)
|
74
|
+
|
75
|
+
resolved_uri =
|
76
|
+
if parsed_result.scheme == ""
|
77
|
+
Utils.normalize_filepath(table_uri)
|
78
|
+
else
|
79
|
+
table_uri
|
80
|
+
end
|
81
|
+
|
82
|
+
resolved_uri
|
83
|
+
end
|
84
|
+
|
85
|
+
def _get_delta_lake_table(
|
86
|
+
table_path,
|
87
|
+
version: nil,
|
88
|
+
storage_options: nil,
|
89
|
+
delta_table_options: nil
|
90
|
+
)
|
91
|
+
_check_if_delta_available
|
92
|
+
|
93
|
+
if table_path.is_a?(DeltaLake::Table)
|
94
|
+
return table_path
|
95
|
+
end
|
96
|
+
delta_table_options ||= {}
|
97
|
+
resolved_uri = _resolve_delta_lake_uri(table_path)
|
98
|
+
if !version.is_a?(::String) && !version.is_a?(::Time)
|
99
|
+
dl_tbl =
|
100
|
+
DeltaLake::Table.new(
|
101
|
+
resolved_uri,
|
102
|
+
version: version,
|
103
|
+
storage_options: storage_options,
|
104
|
+
**delta_table_options
|
105
|
+
)
|
106
|
+
else
|
107
|
+
dl_tbl =
|
108
|
+
DeltaLake::Table.new(
|
109
|
+
resolved_uri,
|
110
|
+
storage_options: storage_options,
|
111
|
+
**delta_table_options
|
112
|
+
)
|
113
|
+
dl_tbl.load_as_version(version)
|
114
|
+
end
|
115
|
+
|
116
|
+
dl_tbl = DeltaLake::Table.new(table_path)
|
117
|
+
dl_tbl
|
118
|
+
end
|
119
|
+
|
120
|
+
def _check_if_delta_available
|
121
|
+
if !defined?(DeltaLake)
|
122
|
+
raise Error, "Delta Lake not available"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|