polars-df 0.11.0-arm64-darwin → 0.13.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/Cargo.lock +428 -450
- data/LICENSE-THIRD-PARTY.txt +2212 -1952
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +35 -7
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +15 -8
- data/lib/polars/lazy_frame.rb +123 -105
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +108 -191
- data/lib/polars/string_expr.rb +51 -76
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -63,7 +63,7 @@ module Polars
|
|
63
63
|
# df.columns
|
64
64
|
# # => ["foo", "bar"]
|
65
65
|
def columns
|
66
|
-
_ldf.
|
66
|
+
_ldf.collect_schema.keys
|
67
67
|
end
|
68
68
|
|
69
69
|
# Get dtypes of columns in LazyFrame.
|
@@ -81,7 +81,7 @@ module Polars
|
|
81
81
|
# lf.dtypes
|
82
82
|
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
83
83
|
def dtypes
|
84
|
-
_ldf.
|
84
|
+
_ldf.collect_schema.values
|
85
85
|
end
|
86
86
|
|
87
87
|
# Get the schema.
|
@@ -99,7 +99,7 @@ module Polars
|
|
99
99
|
# lf.schema
|
100
100
|
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
101
101
|
def schema
|
102
|
-
_ldf.
|
102
|
+
_ldf.collect_schema
|
103
103
|
end
|
104
104
|
|
105
105
|
# Get the width of the LazyFrame.
|
@@ -111,7 +111,7 @@ module Polars
|
|
111
111
|
# lf.width
|
112
112
|
# # => 2
|
113
113
|
def width
|
114
|
-
_ldf.
|
114
|
+
_ldf.collect_schema.length
|
115
115
|
end
|
116
116
|
|
117
117
|
# Check if LazyFrame includes key.
|
@@ -261,16 +261,23 @@ module Polars
|
|
261
261
|
# # │ 2 ┆ 7.0 ┆ b │
|
262
262
|
# # │ 1 ┆ 6.0 ┆ a │
|
263
263
|
# # └─────┴─────┴─────┘
|
264
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
265
|
-
if by.is_a?(::String)
|
266
|
-
return _from_rbldf(
|
267
|
-
|
268
|
-
|
269
|
-
|
264
|
+
def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
265
|
+
if by.is_a?(::String) && more_by.empty?
|
266
|
+
return _from_rbldf(
|
267
|
+
_ldf.sort(
|
268
|
+
by, reverse, nulls_last, maintain_order, multithreaded
|
269
|
+
)
|
270
|
+
)
|
270
271
|
end
|
271
272
|
|
272
|
-
by = Utils.
|
273
|
-
|
273
|
+
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
274
|
+
reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
|
275
|
+
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
276
|
+
_from_rbldf(
|
277
|
+
_ldf.sort_by_exprs(
|
278
|
+
by, reverse, nulls_last, maintain_order, multithreaded
|
279
|
+
)
|
280
|
+
)
|
274
281
|
end
|
275
282
|
|
276
283
|
# def profile
|
@@ -415,7 +422,7 @@ module Polars
|
|
415
422
|
path,
|
416
423
|
compression: "zstd",
|
417
424
|
compression_level: nil,
|
418
|
-
statistics:
|
425
|
+
statistics: true,
|
419
426
|
row_group_size: nil,
|
420
427
|
data_pagesize_limit: nil,
|
421
428
|
maintain_order: true,
|
@@ -435,6 +442,24 @@ module Polars
|
|
435
442
|
no_optimization: no_optimization
|
436
443
|
)
|
437
444
|
|
445
|
+
if statistics == true
|
446
|
+
statistics = {
|
447
|
+
min: true,
|
448
|
+
max: true,
|
449
|
+
distinct_count: false,
|
450
|
+
null_count: true
|
451
|
+
}
|
452
|
+
elsif statistics == false
|
453
|
+
statistics = {}
|
454
|
+
elsif statistics == "full"
|
455
|
+
statistics = {
|
456
|
+
min: true,
|
457
|
+
max: true,
|
458
|
+
distinct_count: true,
|
459
|
+
null_count: true
|
460
|
+
}
|
461
|
+
end
|
462
|
+
|
438
463
|
lf.sink_parquet(
|
439
464
|
path,
|
440
465
|
compression,
|
@@ -589,6 +614,7 @@ module Polars
|
|
589
614
|
datetime_format: nil,
|
590
615
|
date_format: nil,
|
591
616
|
time_format: nil,
|
617
|
+
float_scientific: nil,
|
592
618
|
float_precision: nil,
|
593
619
|
null_value: nil,
|
594
620
|
quote_style: nil,
|
@@ -623,6 +649,7 @@ module Polars
|
|
623
649
|
datetime_format,
|
624
650
|
date_format,
|
625
651
|
time_format,
|
652
|
+
float_scientific,
|
626
653
|
float_precision,
|
627
654
|
null_value,
|
628
655
|
quote_style,
|
@@ -907,7 +934,7 @@ module Polars
|
|
907
934
|
def filter(predicate)
|
908
935
|
_from_rbldf(
|
909
936
|
_ldf.filter(
|
910
|
-
Utils.
|
937
|
+
Utils.parse_into_expression(predicate, str_as_lit: false)
|
911
938
|
)
|
912
939
|
)
|
913
940
|
end
|
@@ -1003,7 +1030,7 @@ module Polars
|
|
1003
1030
|
def select(*exprs, **named_exprs)
|
1004
1031
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1005
1032
|
|
1006
|
-
rbexprs = Utils.
|
1033
|
+
rbexprs = Utils.parse_into_list_of_expressions(
|
1007
1034
|
*exprs, **named_exprs, __structify: structify
|
1008
1035
|
)
|
1009
1036
|
_from_rbldf(_ldf.select(rbexprs))
|
@@ -1011,12 +1038,14 @@ module Polars
|
|
1011
1038
|
|
1012
1039
|
# Start a group by operation.
|
1013
1040
|
#
|
1014
|
-
# @param by [
|
1041
|
+
# @param by [Array]
|
1015
1042
|
# Column(s) to group by.
|
1016
1043
|
# @param maintain_order [Boolean]
|
1017
1044
|
# Make sure that the order of the groups remain consistent. This is more
|
1018
1045
|
# expensive than a default group by.
|
1019
|
-
#
|
1046
|
+
# @param named_by [Hash]
|
1047
|
+
# Additional columns to group by, specified as keyword arguments.
|
1048
|
+
# The columns will be renamed to the keyword used.
|
1020
1049
|
# @return [LazyGroupBy]
|
1021
1050
|
#
|
1022
1051
|
# @example
|
@@ -1039,9 +1068,9 @@ module Polars
|
|
1039
1068
|
# # │ b ┆ 11 │
|
1040
1069
|
# # │ c ┆ 6 │
|
1041
1070
|
# # └─────┴─────┘
|
1042
|
-
def group_by(by, maintain_order: false)
|
1043
|
-
|
1044
|
-
lgb = _ldf.group_by(
|
1071
|
+
def group_by(*by, maintain_order: false, **named_by)
|
1072
|
+
exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
|
1073
|
+
lgb = _ldf.group_by(exprs, maintain_order)
|
1045
1074
|
LazyGroupBy.new(lgb)
|
1046
1075
|
end
|
1047
1076
|
alias_method :groupby, :group_by
|
@@ -1095,12 +1124,6 @@ module Polars
|
|
1095
1124
|
# Define whether the temporal window interval is closed or not.
|
1096
1125
|
# @param by [Object]
|
1097
1126
|
# Also group by this column/these columns.
|
1098
|
-
# @param check_sorted [Boolean]
|
1099
|
-
# When the `by` argument is given, polars can not check sortedness
|
1100
|
-
# by the metadata and has to do a full scan on the index column to
|
1101
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1102
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1103
|
-
# Doing so incorrectly will lead to incorrect output
|
1104
1127
|
#
|
1105
1128
|
# @return [LazyFrame]
|
1106
1129
|
#
|
@@ -1142,21 +1165,20 @@ module Polars
|
|
1142
1165
|
period:,
|
1143
1166
|
offset: nil,
|
1144
1167
|
closed: "right",
|
1145
|
-
by: nil
|
1146
|
-
check_sorted: true
|
1168
|
+
by: nil
|
1147
1169
|
)
|
1148
|
-
index_column = Utils.
|
1170
|
+
index_column = Utils.parse_into_expression(index_column)
|
1149
1171
|
if offset.nil?
|
1150
|
-
offset =
|
1172
|
+
offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
|
1151
1173
|
end
|
1152
1174
|
|
1153
|
-
rbexprs_by =
|
1154
|
-
|
1155
|
-
offset = Utils._timedelta_to_pl_duration(offset)
|
1156
|
-
|
1157
|
-
lgb = _ldf.rolling(
|
1158
|
-
index_column, period, offset, closed, rbexprs_by, check_sorted
|
1175
|
+
rbexprs_by = (
|
1176
|
+
!by.nil? ? Utils.parse_into_list_of_expressions(by) : []
|
1159
1177
|
)
|
1178
|
+
period = Utils.parse_as_duration_string(period)
|
1179
|
+
offset = Utils.parse_as_duration_string(offset)
|
1180
|
+
|
1181
|
+
lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
|
1160
1182
|
LazyGroupBy.new(lgb)
|
1161
1183
|
end
|
1162
1184
|
alias_method :group_by_rolling, :rolling
|
@@ -1224,22 +1246,18 @@ module Polars
|
|
1224
1246
|
# Define whether the temporal window interval is closed or not.
|
1225
1247
|
# @param by [Object]
|
1226
1248
|
# Also group by this column/these columns
|
1227
|
-
# @param check_sorted [Boolean]
|
1228
|
-
# When the `by` argument is given, polars can not check sortedness
|
1229
|
-
# by the metadata and has to do a full scan on the index column to
|
1230
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1231
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1232
|
-
# Doing so incorrectly will lead to incorrect output.
|
1233
1249
|
#
|
1234
1250
|
# @return [DataFrame]
|
1235
1251
|
#
|
1236
1252
|
# @example
|
1237
1253
|
# df = Polars::DataFrame.new(
|
1238
1254
|
# {
|
1239
|
-
# "time" => Polars.
|
1255
|
+
# "time" => Polars.datetime_range(
|
1240
1256
|
# DateTime.new(2021, 12, 16),
|
1241
1257
|
# DateTime.new(2021, 12, 16, 3),
|
1242
|
-
# "30m"
|
1258
|
+
# "30m",
|
1259
|
+
# time_unit: "us",
|
1260
|
+
# eager: true
|
1243
1261
|
# ),
|
1244
1262
|
# "n" => 0..6
|
1245
1263
|
# }
|
@@ -1338,10 +1356,12 @@ module Polars
|
|
1338
1356
|
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1339
1357
|
# df = Polars::DataFrame.new(
|
1340
1358
|
# {
|
1341
|
-
# "time" => Polars.
|
1359
|
+
# "time" => Polars.datetime_range(
|
1342
1360
|
# DateTime.new(2021, 12, 16),
|
1343
1361
|
# DateTime.new(2021, 12, 16, 3),
|
1344
|
-
# "30m"
|
1362
|
+
# "30m",
|
1363
|
+
# time_unit: "us",
|
1364
|
+
# eager: true
|
1345
1365
|
# ),
|
1346
1366
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1347
1367
|
# }
|
@@ -1405,14 +1425,13 @@ module Polars
|
|
1405
1425
|
closed: "left",
|
1406
1426
|
label: "left",
|
1407
1427
|
by: nil,
|
1408
|
-
start_by: "window"
|
1409
|
-
check_sorted: true
|
1428
|
+
start_by: "window"
|
1410
1429
|
)
|
1411
1430
|
if !truncate.nil?
|
1412
1431
|
label = truncate ? "left" : "datapoint"
|
1413
1432
|
end
|
1414
1433
|
|
1415
|
-
index_column = Utils.
|
1434
|
+
index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
|
1416
1435
|
if offset.nil?
|
1417
1436
|
offset = period.nil? ? "-#{every}" : "0ns"
|
1418
1437
|
end
|
@@ -1421,13 +1440,13 @@ module Polars
|
|
1421
1440
|
period = every
|
1422
1441
|
end
|
1423
1442
|
|
1424
|
-
period = Utils.
|
1425
|
-
offset = Utils.
|
1426
|
-
every = Utils.
|
1443
|
+
period = Utils.parse_as_duration_string(period)
|
1444
|
+
offset = Utils.parse_as_duration_string(offset)
|
1445
|
+
every = Utils.parse_as_duration_string(every)
|
1427
1446
|
|
1428
|
-
rbexprs_by = by.nil? ? [] : Utils.
|
1447
|
+
rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
|
1429
1448
|
lgb = _ldf.group_by_dynamic(
|
1430
|
-
index_column
|
1449
|
+
index_column,
|
1431
1450
|
every,
|
1432
1451
|
period,
|
1433
1452
|
offset,
|
@@ -1435,8 +1454,7 @@ module Polars
|
|
1435
1454
|
include_boundaries,
|
1436
1455
|
closed,
|
1437
1456
|
rbexprs_by,
|
1438
|
-
start_by
|
1439
|
-
check_sorted
|
1457
|
+
start_by
|
1440
1458
|
)
|
1441
1459
|
LazyGroupBy.new(lgb)
|
1442
1460
|
end
|
@@ -1587,7 +1605,7 @@ module Polars
|
|
1587
1605
|
# @param on Object
|
1588
1606
|
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
1589
1607
|
# None.
|
1590
|
-
# @param how ["inner", "left", "
|
1608
|
+
# @param how ["inner", "left", "full", "semi", "anti", "cross"]
|
1591
1609
|
# Join strategy.
|
1592
1610
|
# @param suffix [String]
|
1593
1611
|
# Suffix to append to columns with a duplicate name.
|
@@ -1629,7 +1647,7 @@ module Polars
|
|
1629
1647
|
# # └─────┴─────┴─────┴───────┘
|
1630
1648
|
#
|
1631
1649
|
# @example
|
1632
|
-
# df.join(other_df, on: "ham", how: "
|
1650
|
+
# df.join(other_df, on: "ham", how: "full").collect
|
1633
1651
|
# # =>
|
1634
1652
|
# # shape: (4, 5)
|
1635
1653
|
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
@@ -1696,7 +1714,9 @@ module Polars
|
|
1696
1714
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
1697
1715
|
end
|
1698
1716
|
|
1699
|
-
if how == "
|
1717
|
+
if how == "outer"
|
1718
|
+
how = "full"
|
1719
|
+
elsif how == "cross"
|
1700
1720
|
return _from_rbldf(
|
1701
1721
|
_ldf.join(
|
1702
1722
|
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
@@ -1705,12 +1725,12 @@ module Polars
|
|
1705
1725
|
end
|
1706
1726
|
|
1707
1727
|
if !on.nil?
|
1708
|
-
rbexprs = Utils.
|
1728
|
+
rbexprs = Utils.parse_into_list_of_expressions(on)
|
1709
1729
|
rbexprs_left = rbexprs
|
1710
1730
|
rbexprs_right = rbexprs
|
1711
1731
|
elsif !left_on.nil? && !right_on.nil?
|
1712
|
-
rbexprs_left = Utils.
|
1713
|
-
rbexprs_right = Utils.
|
1732
|
+
rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
|
1733
|
+
rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
|
1714
1734
|
else
|
1715
1735
|
raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
|
1716
1736
|
end
|
@@ -1765,7 +1785,8 @@ module Polars
|
|
1765
1785
|
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1766
1786
|
def with_columns(*exprs, **named_exprs)
|
1767
1787
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1768
|
-
|
1788
|
+
|
1789
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
1769
1790
|
|
1770
1791
|
_from_rbldf(_ldf.with_columns(rbexprs))
|
1771
1792
|
end
|
@@ -1926,9 +1947,9 @@ module Polars
|
|
1926
1947
|
# # └──────┴──────┘
|
1927
1948
|
def shift(n, fill_value: nil)
|
1928
1949
|
if !fill_value.nil?
|
1929
|
-
fill_value = Utils.
|
1950
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1930
1951
|
end
|
1931
|
-
n = Utils.
|
1952
|
+
n = Utils.parse_into_expression(n)
|
1932
1953
|
_from_rbldf(_ldf.shift(n, fill_value))
|
1933
1954
|
end
|
1934
1955
|
|
@@ -2125,7 +2146,7 @@ module Polars
|
|
2125
2146
|
# # │ 3 ┆ 7 │
|
2126
2147
|
# # └─────┴─────┘
|
2127
2148
|
def take_every(n)
|
2128
|
-
select(
|
2149
|
+
select(F.col("*").take_every(n))
|
2129
2150
|
end
|
2130
2151
|
|
2131
2152
|
# Fill null values using the specified value or strategy.
|
@@ -2168,7 +2189,7 @@ module Polars
|
|
2168
2189
|
# # └──────┴──────┘
|
2169
2190
|
def fill_nan(fill_value)
|
2170
2191
|
if !fill_value.is_a?(Expr)
|
2171
|
-
fill_value =
|
2192
|
+
fill_value = F.lit(fill_value)
|
2172
2193
|
end
|
2173
2194
|
_from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
|
2174
2195
|
end
|
@@ -2359,8 +2380,8 @@ module Polars
|
|
2359
2380
|
# # │ 3.0 ┆ 1.0 │
|
2360
2381
|
# # └─────┴─────┘
|
2361
2382
|
def quantile(quantile, interpolation: "nearest")
|
2362
|
-
quantile = Utils.
|
2363
|
-
_from_rbldf(_ldf.quantile(quantile
|
2383
|
+
quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
|
2384
|
+
_from_rbldf(_ldf.quantile(quantile, interpolation))
|
2364
2385
|
end
|
2365
2386
|
|
2366
2387
|
# Explode lists to long format.
|
@@ -2392,7 +2413,7 @@ module Polars
|
|
2392
2413
|
# # │ c ┆ 8 │
|
2393
2414
|
# # └─────────┴─────────┘
|
2394
2415
|
def explode(columns)
|
2395
|
-
columns = Utils.
|
2416
|
+
columns = Utils.parse_into_list_of_expressions(columns)
|
2396
2417
|
_from_rbldf(_ldf.explode(columns))
|
2397
2418
|
end
|
2398
2419
|
|
@@ -2455,35 +2476,35 @@ module Polars
|
|
2455
2476
|
# Optionally leaves identifiers set.
|
2456
2477
|
#
|
2457
2478
|
# This function is useful to massage a DataFrame into a format where one or more
|
2458
|
-
# columns are identifier variables (
|
2459
|
-
# measured variables (
|
2479
|
+
# columns are identifier variables (index) while all other columns, considered
|
2480
|
+
# measured variables (on), are "unpivoted" to the row axis leaving just
|
2460
2481
|
# two non-identifier columns, 'variable' and 'value'.
|
2461
2482
|
#
|
2462
|
-
# @param
|
2463
|
-
#
|
2464
|
-
#
|
2465
|
-
#
|
2466
|
-
#
|
2483
|
+
# @param on [Object]
|
2484
|
+
# Column(s) or selector(s) to use as values variables; if `on`
|
2485
|
+
# is empty all columns that are not in `index` will be used.
|
2486
|
+
# @param index [Object]
|
2487
|
+
# Column(s) or selector(s) to use as identifier variables.
|
2467
2488
|
# @param variable_name [String]
|
2468
|
-
# Name to give to the `
|
2489
|
+
# Name to give to the `variable` column. Defaults to "variable"
|
2469
2490
|
# @param value_name [String]
|
2470
2491
|
# Name to give to the `value` column. Defaults to "value"
|
2471
2492
|
# @param streamable [Boolean]
|
2472
2493
|
# Allow this node to run in the streaming engine.
|
2473
|
-
# If this runs in streaming, the output of the
|
2494
|
+
# If this runs in streaming, the output of the unpivot operation
|
2474
2495
|
# will not have a stable ordering.
|
2475
2496
|
#
|
2476
2497
|
# @return [LazyFrame]
|
2477
2498
|
#
|
2478
2499
|
# @example
|
2479
|
-
#
|
2500
|
+
# lf = Polars::LazyFrame.new(
|
2480
2501
|
# {
|
2481
2502
|
# "a" => ["x", "y", "z"],
|
2482
2503
|
# "b" => [1, 3, 5],
|
2483
2504
|
# "c" => [2, 4, 6]
|
2484
2505
|
# }
|
2485
|
-
# )
|
2486
|
-
#
|
2506
|
+
# )
|
2507
|
+
# lf.unpivot(Polars::Selectors.numeric, index: "a").collect
|
2487
2508
|
# # =>
|
2488
2509
|
# # shape: (6, 3)
|
2489
2510
|
# # ┌─────┬──────────┬───────┐
|
@@ -2498,23 +2519,25 @@ module Polars
|
|
2498
2519
|
# # │ y ┆ c ┆ 4 │
|
2499
2520
|
# # │ z ┆ c ┆ 6 │
|
2500
2521
|
# # └─────┴──────────┴───────┘
|
2501
|
-
def
|
2502
|
-
|
2503
|
-
|
2504
|
-
|
2505
|
-
|
2506
|
-
|
2507
|
-
|
2508
|
-
if
|
2509
|
-
|
2510
|
-
end
|
2511
|
-
if id_vars.nil?
|
2512
|
-
id_vars = []
|
2522
|
+
def unpivot(
|
2523
|
+
on,
|
2524
|
+
index: nil,
|
2525
|
+
variable_name: nil,
|
2526
|
+
value_name: nil,
|
2527
|
+
streamable: true
|
2528
|
+
)
|
2529
|
+
if !streamable
|
2530
|
+
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2513
2531
|
end
|
2532
|
+
|
2533
|
+
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
2534
|
+
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
2535
|
+
|
2514
2536
|
_from_rbldf(
|
2515
|
-
_ldf.
|
2537
|
+
_ldf.unpivot(on, index, value_name, variable_name)
|
2516
2538
|
)
|
2517
2539
|
end
|
2540
|
+
alias_method :melt, :unpivot
|
2518
2541
|
|
2519
2542
|
# def map
|
2520
2543
|
# end
|
@@ -2545,7 +2568,7 @@ module Polars
|
|
2545
2568
|
# # │ 10.0 ┆ null ┆ 9.0 │
|
2546
2569
|
# # └──────┴──────┴──────────┘
|
2547
2570
|
def interpolate
|
2548
|
-
select(
|
2571
|
+
select(F.col("*").interpolate)
|
2549
2572
|
end
|
2550
2573
|
|
2551
2574
|
# Decompose a struct into its fields.
|
@@ -2652,24 +2675,19 @@ module Polars
|
|
2652
2675
|
#
|
2653
2676
|
# @param column [Object]
|
2654
2677
|
# Columns that are sorted
|
2655
|
-
# @param more_columns [Object]
|
2656
|
-
# Additional columns that are sorted, specified as positional arguments.
|
2657
2678
|
# @param descending [Boolean]
|
2658
2679
|
# Whether the columns are sorted in descending order.
|
2659
2680
|
#
|
2660
2681
|
# @return [LazyFrame]
|
2661
2682
|
def set_sorted(
|
2662
2683
|
column,
|
2663
|
-
*more_columns,
|
2664
2684
|
descending: false
|
2665
2685
|
)
|
2666
|
-
|
2667
|
-
|
2668
|
-
|
2686
|
+
if !Utils.strlike?(column)
|
2687
|
+
msg = "expected a 'str' for argument 'column' in 'set_sorted'"
|
2688
|
+
raise TypeError, msg
|
2669
2689
|
end
|
2670
|
-
with_columns(
|
2671
|
-
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2672
|
-
)
|
2690
|
+
with_columns(F.col(column).set_sorted(descending: descending))
|
2673
2691
|
end
|
2674
2692
|
|
2675
2693
|
# TODO
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -107,7 +107,7 @@ module Polars
|
|
107
107
|
# # │ b ┆ 5 ┆ 10.0 │
|
108
108
|
# # └─────┴───────┴────────────────┘
|
109
109
|
def agg(*aggs, **named_aggs)
|
110
|
-
rbexprs = Utils.
|
110
|
+
rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
|
111
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
112
112
|
end
|
113
113
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -146,7 +146,7 @@ module Polars
|
|
146
146
|
end
|
147
147
|
|
148
148
|
if !fraction.nil?
|
149
|
-
fraction = Utils.
|
149
|
+
fraction = Utils.parse_into_expression(fraction)
|
150
150
|
return Utils.wrap_expr(
|
151
151
|
_rbexpr.list_sample_fraction(
|
152
152
|
fraction, with_replacement, shuffle, seed
|
@@ -155,7 +155,7 @@ module Polars
|
|
155
155
|
end
|
156
156
|
|
157
157
|
n = 1 if n.nil?
|
158
|
-
n = Utils.
|
158
|
+
n = Utils.parse_into_expression(n)
|
159
159
|
Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
|
160
160
|
end
|
161
161
|
|
@@ -387,7 +387,7 @@ module Polars
|
|
387
387
|
# # │ 1 │
|
388
388
|
# # └──────┘
|
389
389
|
def get(index, null_on_oob: true)
|
390
|
-
index = Utils.
|
390
|
+
index = Utils.parse_into_expression(index)
|
391
391
|
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
392
392
|
end
|
393
393
|
|
@@ -431,7 +431,7 @@ module Polars
|
|
431
431
|
if index.is_a?(::Array)
|
432
432
|
index = Series.new(index)
|
433
433
|
end
|
434
|
-
index = Utils.
|
434
|
+
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
435
|
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
436
436
|
end
|
437
437
|
alias_method :take, :gather
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ true │
|
503
503
|
# # └───────┘
|
504
504
|
def contains(item)
|
505
|
-
Utils.wrap_expr(_rbexpr.list_contains(Utils.
|
505
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
506
506
|
end
|
507
507
|
|
508
508
|
# Join all string items in a sublist and place a separator between them.
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ x y │
|
531
531
|
# # └───────┘
|
532
532
|
def join(separator, ignore_nulls: true)
|
533
|
-
separator = Utils.
|
533
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
|
535
535
|
end
|
536
536
|
|
@@ -625,7 +625,7 @@ module Polars
|
|
625
625
|
# # [null, 10, 2]
|
626
626
|
# # ]
|
627
627
|
def shift(n = 1)
|
628
|
-
n = Utils.
|
628
|
+
n = Utils.parse_into_expression(n)
|
629
629
|
Utils.wrap_expr(_rbexpr.list_shift(n))
|
630
630
|
end
|
631
631
|
|
@@ -650,8 +650,8 @@ module Polars
|
|
650
650
|
# # [2, 1]
|
651
651
|
# # ]
|
652
652
|
def slice(offset, length = nil)
|
653
|
-
offset = Utils.
|
654
|
-
length = Utils.
|
653
|
+
offset = Utils.parse_into_expression(offset, str_as_lit: false)
|
654
|
+
length = Utils.parse_into_expression(length, str_as_lit: false)
|
655
655
|
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
656
656
|
end
|
657
657
|
|
@@ -694,7 +694,7 @@ module Polars
|
|
694
694
|
# # [2, 1]
|
695
695
|
# # ]
|
696
696
|
def tail(n = 5)
|
697
|
-
n = Utils.
|
697
|
+
n = Utils.parse_into_expression(n)
|
698
698
|
Utils.wrap_expr(_rbexpr.list_tail(n))
|
699
699
|
end
|
700
700
|
|
@@ -722,7 +722,7 @@ module Polars
|
|
722
722
|
# # │ 0 │
|
723
723
|
# # └────────────────┘
|
724
724
|
def count_matches(element)
|
725
|
-
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.
|
725
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
|
726
726
|
end
|
727
727
|
alias_method :count_match, :count_matches
|
728
728
|
|
@@ -197,9 +197,13 @@ module Polars
|
|
197
197
|
#
|
198
198
|
# @param index [Integer]
|
199
199
|
# Index to return per sublist
|
200
|
+
# @param null_on_oob [Boolean]
|
201
|
+
# Behavior if an index is out of bounds:
|
202
|
+
# true -> set as null
|
203
|
+
# false -> raise an error
|
200
204
|
#
|
201
205
|
# @return [Series]
|
202
|
-
def get(index)
|
206
|
+
def get(index, null_on_oob: false)
|
203
207
|
super
|
204
208
|
end
|
205
209
|
|
@@ -10,25 +10,23 @@ module Polars
|
|
10
10
|
period,
|
11
11
|
offset,
|
12
12
|
closed,
|
13
|
-
|
14
|
-
check_sorted
|
13
|
+
group_by
|
15
14
|
)
|
16
|
-
period = Utils.
|
17
|
-
offset = Utils.
|
15
|
+
period = Utils.parse_as_duration_string(period)
|
16
|
+
offset = Utils.parse_as_duration_string(offset)
|
18
17
|
|
19
18
|
@df = df
|
20
19
|
@time_column = index_column
|
21
20
|
@period = period
|
22
21
|
@offset = offset
|
23
22
|
@closed = closed
|
24
|
-
@
|
25
|
-
@check_sorted = check_sorted
|
23
|
+
@group_by = group_by
|
26
24
|
end
|
27
25
|
|
28
26
|
def agg(*aggs, **named_aggs)
|
29
27
|
@df.lazy
|
30
28
|
.group_by_rolling(
|
31
|
-
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
|
32
30
|
)
|
33
31
|
.agg(*aggs, **named_aggs)
|
34
32
|
.collect(no_optimization: true, string_cache: false)
|