polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/lib/polars/lazy_frame.rb
CHANGED
@@ -63,7 +63,7 @@ module Polars
|
|
63
63
|
# df.columns
|
64
64
|
# # => ["foo", "bar"]
|
65
65
|
def columns
|
66
|
-
_ldf.
|
66
|
+
_ldf.collect_schema.keys
|
67
67
|
end
|
68
68
|
|
69
69
|
# Get dtypes of columns in LazyFrame.
|
@@ -81,7 +81,7 @@ module Polars
|
|
81
81
|
# lf.dtypes
|
82
82
|
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
83
83
|
def dtypes
|
84
|
-
_ldf.
|
84
|
+
_ldf.collect_schema.values
|
85
85
|
end
|
86
86
|
|
87
87
|
# Get the schema.
|
@@ -99,7 +99,7 @@ module Polars
|
|
99
99
|
# lf.schema
|
100
100
|
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
101
101
|
def schema
|
102
|
-
_ldf.
|
102
|
+
_ldf.collect_schema
|
103
103
|
end
|
104
104
|
|
105
105
|
# Get the width of the LazyFrame.
|
@@ -111,7 +111,7 @@ module Polars
|
|
111
111
|
# lf.width
|
112
112
|
# # => 2
|
113
113
|
def width
|
114
|
-
_ldf.
|
114
|
+
_ldf.collect_schema.length
|
115
115
|
end
|
116
116
|
|
117
117
|
# Check if LazyFrame includes key.
|
@@ -261,16 +261,23 @@ module Polars
|
|
261
261
|
# # │ 2 ┆ 7.0 ┆ b │
|
262
262
|
# # │ 1 ┆ 6.0 ┆ a │
|
263
263
|
# # └─────┴─────┴─────┘
|
264
|
-
def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
265
|
-
if by.is_a?(::String)
|
266
|
-
return _from_rbldf(
|
267
|
-
|
268
|
-
|
269
|
-
|
264
|
+
def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
265
|
+
if by.is_a?(::String) && more_by.empty?
|
266
|
+
return _from_rbldf(
|
267
|
+
_ldf.sort(
|
268
|
+
by, reverse, nulls_last, maintain_order, multithreaded
|
269
|
+
)
|
270
|
+
)
|
270
271
|
end
|
271
272
|
|
272
|
-
by = Utils.
|
273
|
-
|
273
|
+
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
274
|
+
reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
|
275
|
+
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
276
|
+
_from_rbldf(
|
277
|
+
_ldf.sort_by_exprs(
|
278
|
+
by, reverse, nulls_last, maintain_order, multithreaded
|
279
|
+
)
|
280
|
+
)
|
274
281
|
end
|
275
282
|
|
276
283
|
# def profile
|
@@ -415,7 +422,7 @@ module Polars
|
|
415
422
|
path,
|
416
423
|
compression: "zstd",
|
417
424
|
compression_level: nil,
|
418
|
-
statistics:
|
425
|
+
statistics: true,
|
419
426
|
row_group_size: nil,
|
420
427
|
data_pagesize_limit: nil,
|
421
428
|
maintain_order: true,
|
@@ -435,6 +442,24 @@ module Polars
|
|
435
442
|
no_optimization: no_optimization
|
436
443
|
)
|
437
444
|
|
445
|
+
if statistics == true
|
446
|
+
statistics = {
|
447
|
+
min: true,
|
448
|
+
max: true,
|
449
|
+
distinct_count: false,
|
450
|
+
null_count: true
|
451
|
+
}
|
452
|
+
elsif statistics == false
|
453
|
+
statistics = {}
|
454
|
+
elsif statistics == "full"
|
455
|
+
statistics = {
|
456
|
+
min: true,
|
457
|
+
max: true,
|
458
|
+
distinct_count: true,
|
459
|
+
null_count: true
|
460
|
+
}
|
461
|
+
end
|
462
|
+
|
438
463
|
lf.sink_parquet(
|
439
464
|
path,
|
440
465
|
compression,
|
@@ -589,6 +614,7 @@ module Polars
|
|
589
614
|
datetime_format: nil,
|
590
615
|
date_format: nil,
|
591
616
|
time_format: nil,
|
617
|
+
float_scientific: nil,
|
592
618
|
float_precision: nil,
|
593
619
|
null_value: nil,
|
594
620
|
quote_style: nil,
|
@@ -623,6 +649,7 @@ module Polars
|
|
623
649
|
datetime_format,
|
624
650
|
date_format,
|
625
651
|
time_format,
|
652
|
+
float_scientific,
|
626
653
|
float_precision,
|
627
654
|
null_value,
|
628
655
|
quote_style,
|
@@ -907,7 +934,7 @@ module Polars
|
|
907
934
|
def filter(predicate)
|
908
935
|
_from_rbldf(
|
909
936
|
_ldf.filter(
|
910
|
-
Utils.
|
937
|
+
Utils.parse_into_expression(predicate, str_as_lit: false)
|
911
938
|
)
|
912
939
|
)
|
913
940
|
end
|
@@ -1003,7 +1030,7 @@ module Polars
|
|
1003
1030
|
def select(*exprs, **named_exprs)
|
1004
1031
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1005
1032
|
|
1006
|
-
rbexprs = Utils.
|
1033
|
+
rbexprs = Utils.parse_into_list_of_expressions(
|
1007
1034
|
*exprs, **named_exprs, __structify: structify
|
1008
1035
|
)
|
1009
1036
|
_from_rbldf(_ldf.select(rbexprs))
|
@@ -1011,12 +1038,14 @@ module Polars
|
|
1011
1038
|
|
1012
1039
|
# Start a group by operation.
|
1013
1040
|
#
|
1014
|
-
# @param by [
|
1041
|
+
# @param by [Array]
|
1015
1042
|
# Column(s) to group by.
|
1016
1043
|
# @param maintain_order [Boolean]
|
1017
1044
|
# Make sure that the order of the groups remain consistent. This is more
|
1018
1045
|
# expensive than a default group by.
|
1019
|
-
#
|
1046
|
+
# @param named_by [Hash]
|
1047
|
+
# Additional columns to group by, specified as keyword arguments.
|
1048
|
+
# The columns will be renamed to the keyword used.
|
1020
1049
|
# @return [LazyGroupBy]
|
1021
1050
|
#
|
1022
1051
|
# @example
|
@@ -1039,9 +1068,9 @@ module Polars
|
|
1039
1068
|
# # │ b ┆ 11 │
|
1040
1069
|
# # │ c ┆ 6 │
|
1041
1070
|
# # └─────┴─────┘
|
1042
|
-
def group_by(by, maintain_order: false)
|
1043
|
-
|
1044
|
-
lgb = _ldf.group_by(
|
1071
|
+
def group_by(*by, maintain_order: false, **named_by)
|
1072
|
+
exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
|
1073
|
+
lgb = _ldf.group_by(exprs, maintain_order)
|
1045
1074
|
LazyGroupBy.new(lgb)
|
1046
1075
|
end
|
1047
1076
|
alias_method :groupby, :group_by
|
@@ -1095,12 +1124,6 @@ module Polars
|
|
1095
1124
|
# Define whether the temporal window interval is closed or not.
|
1096
1125
|
# @param by [Object]
|
1097
1126
|
# Also group by this column/these columns.
|
1098
|
-
# @param check_sorted [Boolean]
|
1099
|
-
# When the `by` argument is given, polars can not check sortedness
|
1100
|
-
# by the metadata and has to do a full scan on the index column to
|
1101
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1102
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1103
|
-
# Doing so incorrectly will lead to incorrect output
|
1104
1127
|
#
|
1105
1128
|
# @return [LazyFrame]
|
1106
1129
|
#
|
@@ -1142,21 +1165,20 @@ module Polars
|
|
1142
1165
|
period:,
|
1143
1166
|
offset: nil,
|
1144
1167
|
closed: "right",
|
1145
|
-
by: nil
|
1146
|
-
check_sorted: true
|
1168
|
+
by: nil
|
1147
1169
|
)
|
1148
|
-
index_column = Utils.
|
1170
|
+
index_column = Utils.parse_into_expression(index_column)
|
1149
1171
|
if offset.nil?
|
1150
|
-
offset =
|
1172
|
+
offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
|
1151
1173
|
end
|
1152
1174
|
|
1153
|
-
rbexprs_by =
|
1154
|
-
|
1155
|
-
offset = Utils._timedelta_to_pl_duration(offset)
|
1156
|
-
|
1157
|
-
lgb = _ldf.rolling(
|
1158
|
-
index_column, period, offset, closed, rbexprs_by, check_sorted
|
1175
|
+
rbexprs_by = (
|
1176
|
+
!by.nil? ? Utils.parse_into_list_of_expressions(by) : []
|
1159
1177
|
)
|
1178
|
+
period = Utils.parse_as_duration_string(period)
|
1179
|
+
offset = Utils.parse_as_duration_string(offset)
|
1180
|
+
|
1181
|
+
lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
|
1160
1182
|
LazyGroupBy.new(lgb)
|
1161
1183
|
end
|
1162
1184
|
alias_method :group_by_rolling, :rolling
|
@@ -1224,22 +1246,18 @@ module Polars
|
|
1224
1246
|
# Define whether the temporal window interval is closed or not.
|
1225
1247
|
# @param by [Object]
|
1226
1248
|
# Also group by this column/these columns
|
1227
|
-
# @param check_sorted [Boolean]
|
1228
|
-
# When the `by` argument is given, polars can not check sortedness
|
1229
|
-
# by the metadata and has to do a full scan on the index column to
|
1230
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1231
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1232
|
-
# Doing so incorrectly will lead to incorrect output.
|
1233
1249
|
#
|
1234
1250
|
# @return [DataFrame]
|
1235
1251
|
#
|
1236
1252
|
# @example
|
1237
1253
|
# df = Polars::DataFrame.new(
|
1238
1254
|
# {
|
1239
|
-
# "time" => Polars.
|
1255
|
+
# "time" => Polars.datetime_range(
|
1240
1256
|
# DateTime.new(2021, 12, 16),
|
1241
1257
|
# DateTime.new(2021, 12, 16, 3),
|
1242
|
-
# "30m"
|
1258
|
+
# "30m",
|
1259
|
+
# time_unit: "us",
|
1260
|
+
# eager: true
|
1243
1261
|
# ),
|
1244
1262
|
# "n" => 0..6
|
1245
1263
|
# }
|
@@ -1338,10 +1356,12 @@ module Polars
|
|
1338
1356
|
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1339
1357
|
# df = Polars::DataFrame.new(
|
1340
1358
|
# {
|
1341
|
-
# "time" => Polars.
|
1359
|
+
# "time" => Polars.datetime_range(
|
1342
1360
|
# DateTime.new(2021, 12, 16),
|
1343
1361
|
# DateTime.new(2021, 12, 16, 3),
|
1344
|
-
# "30m"
|
1362
|
+
# "30m",
|
1363
|
+
# time_unit: "us",
|
1364
|
+
# eager: true
|
1345
1365
|
# ),
|
1346
1366
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1347
1367
|
# }
|
@@ -1405,14 +1425,13 @@ module Polars
|
|
1405
1425
|
closed: "left",
|
1406
1426
|
label: "left",
|
1407
1427
|
by: nil,
|
1408
|
-
start_by: "window"
|
1409
|
-
check_sorted: true
|
1428
|
+
start_by: "window"
|
1410
1429
|
)
|
1411
1430
|
if !truncate.nil?
|
1412
1431
|
label = truncate ? "left" : "datapoint"
|
1413
1432
|
end
|
1414
1433
|
|
1415
|
-
index_column = Utils.
|
1434
|
+
index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
|
1416
1435
|
if offset.nil?
|
1417
1436
|
offset = period.nil? ? "-#{every}" : "0ns"
|
1418
1437
|
end
|
@@ -1421,13 +1440,13 @@ module Polars
|
|
1421
1440
|
period = every
|
1422
1441
|
end
|
1423
1442
|
|
1424
|
-
period = Utils.
|
1425
|
-
offset = Utils.
|
1426
|
-
every = Utils.
|
1443
|
+
period = Utils.parse_as_duration_string(period)
|
1444
|
+
offset = Utils.parse_as_duration_string(offset)
|
1445
|
+
every = Utils.parse_as_duration_string(every)
|
1427
1446
|
|
1428
|
-
rbexprs_by = by.nil? ? [] : Utils.
|
1447
|
+
rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
|
1429
1448
|
lgb = _ldf.group_by_dynamic(
|
1430
|
-
index_column
|
1449
|
+
index_column,
|
1431
1450
|
every,
|
1432
1451
|
period,
|
1433
1452
|
offset,
|
@@ -1435,8 +1454,7 @@ module Polars
|
|
1435
1454
|
include_boundaries,
|
1436
1455
|
closed,
|
1437
1456
|
rbexprs_by,
|
1438
|
-
start_by
|
1439
|
-
check_sorted
|
1457
|
+
start_by
|
1440
1458
|
)
|
1441
1459
|
LazyGroupBy.new(lgb)
|
1442
1460
|
end
|
@@ -1587,7 +1605,7 @@ module Polars
|
|
1587
1605
|
# @param on Object
|
1588
1606
|
# Join column of both DataFrames. If set, `left_on` and `right_on` should be
|
1589
1607
|
# None.
|
1590
|
-
# @param how ["inner", "left", "
|
1608
|
+
# @param how ["inner", "left", "full", "semi", "anti", "cross"]
|
1591
1609
|
# Join strategy.
|
1592
1610
|
# @param suffix [String]
|
1593
1611
|
# Suffix to append to columns with a duplicate name.
|
@@ -1629,7 +1647,7 @@ module Polars
|
|
1629
1647
|
# # └─────┴─────┴─────┴───────┘
|
1630
1648
|
#
|
1631
1649
|
# @example
|
1632
|
-
# df.join(other_df, on: "ham", how: "
|
1650
|
+
# df.join(other_df, on: "ham", how: "full").collect
|
1633
1651
|
# # =>
|
1634
1652
|
# # shape: (4, 5)
|
1635
1653
|
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
@@ -1696,7 +1714,9 @@ module Polars
|
|
1696
1714
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
1697
1715
|
end
|
1698
1716
|
|
1699
|
-
if how == "
|
1717
|
+
if how == "outer"
|
1718
|
+
how = "full"
|
1719
|
+
elsif how == "cross"
|
1700
1720
|
return _from_rbldf(
|
1701
1721
|
_ldf.join(
|
1702
1722
|
other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
|
@@ -1705,12 +1725,12 @@ module Polars
|
|
1705
1725
|
end
|
1706
1726
|
|
1707
1727
|
if !on.nil?
|
1708
|
-
rbexprs = Utils.
|
1728
|
+
rbexprs = Utils.parse_into_list_of_expressions(on)
|
1709
1729
|
rbexprs_left = rbexprs
|
1710
1730
|
rbexprs_right = rbexprs
|
1711
1731
|
elsif !left_on.nil? && !right_on.nil?
|
1712
|
-
rbexprs_left = Utils.
|
1713
|
-
rbexprs_right = Utils.
|
1732
|
+
rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
|
1733
|
+
rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
|
1714
1734
|
else
|
1715
1735
|
raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
|
1716
1736
|
end
|
@@ -1765,7 +1785,8 @@ module Polars
|
|
1765
1785
|
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
1766
1786
|
def with_columns(*exprs, **named_exprs)
|
1767
1787
|
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
|
1768
|
-
|
1788
|
+
|
1789
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
|
1769
1790
|
|
1770
1791
|
_from_rbldf(_ldf.with_columns(rbexprs))
|
1771
1792
|
end
|
@@ -1926,9 +1947,9 @@ module Polars
|
|
1926
1947
|
# # └──────┴──────┘
|
1927
1948
|
def shift(n, fill_value: nil)
|
1928
1949
|
if !fill_value.nil?
|
1929
|
-
fill_value = Utils.
|
1950
|
+
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
1930
1951
|
end
|
1931
|
-
n = Utils.
|
1952
|
+
n = Utils.parse_into_expression(n)
|
1932
1953
|
_from_rbldf(_ldf.shift(n, fill_value))
|
1933
1954
|
end
|
1934
1955
|
|
@@ -2125,7 +2146,7 @@ module Polars
|
|
2125
2146
|
# # │ 3 ┆ 7 │
|
2126
2147
|
# # └─────┴─────┘
|
2127
2148
|
def take_every(n)
|
2128
|
-
select(
|
2149
|
+
select(F.col("*").take_every(n))
|
2129
2150
|
end
|
2130
2151
|
|
2131
2152
|
# Fill null values using the specified value or strategy.
|
@@ -2168,7 +2189,7 @@ module Polars
|
|
2168
2189
|
# # └──────┴──────┘
|
2169
2190
|
def fill_nan(fill_value)
|
2170
2191
|
if !fill_value.is_a?(Expr)
|
2171
|
-
fill_value =
|
2192
|
+
fill_value = F.lit(fill_value)
|
2172
2193
|
end
|
2173
2194
|
_from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
|
2174
2195
|
end
|
@@ -2359,8 +2380,8 @@ module Polars
|
|
2359
2380
|
# # │ 3.0 ┆ 1.0 │
|
2360
2381
|
# # └─────┴─────┘
|
2361
2382
|
def quantile(quantile, interpolation: "nearest")
|
2362
|
-
quantile = Utils.
|
2363
|
-
_from_rbldf(_ldf.quantile(quantile
|
2383
|
+
quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
|
2384
|
+
_from_rbldf(_ldf.quantile(quantile, interpolation))
|
2364
2385
|
end
|
2365
2386
|
|
2366
2387
|
# Explode lists to long format.
|
@@ -2392,7 +2413,7 @@ module Polars
|
|
2392
2413
|
# # │ c ┆ 8 │
|
2393
2414
|
# # └─────────┴─────────┘
|
2394
2415
|
def explode(columns)
|
2395
|
-
columns = Utils.
|
2416
|
+
columns = Utils.parse_into_list_of_expressions(columns)
|
2396
2417
|
_from_rbldf(_ldf.explode(columns))
|
2397
2418
|
end
|
2398
2419
|
|
@@ -2455,35 +2476,35 @@ module Polars
|
|
2455
2476
|
# Optionally leaves identifiers set.
|
2456
2477
|
#
|
2457
2478
|
# This function is useful to massage a DataFrame into a format where one or more
|
2458
|
-
# columns are identifier variables (
|
2459
|
-
# measured variables (
|
2479
|
+
# columns are identifier variables (index) while all other columns, considered
|
2480
|
+
# measured variables (on), are "unpivoted" to the row axis leaving just
|
2460
2481
|
# two non-identifier columns, 'variable' and 'value'.
|
2461
2482
|
#
|
2462
|
-
# @param
|
2463
|
-
#
|
2464
|
-
#
|
2465
|
-
#
|
2466
|
-
#
|
2483
|
+
# @param on [Object]
|
2484
|
+
# Column(s) or selector(s) to use as values variables; if `on`
|
2485
|
+
# is empty all columns that are not in `index` will be used.
|
2486
|
+
# @param index [Object]
|
2487
|
+
# Column(s) or selector(s) to use as identifier variables.
|
2467
2488
|
# @param variable_name [String]
|
2468
|
-
# Name to give to the `
|
2489
|
+
# Name to give to the `variable` column. Defaults to "variable"
|
2469
2490
|
# @param value_name [String]
|
2470
2491
|
# Name to give to the `value` column. Defaults to "value"
|
2471
2492
|
# @param streamable [Boolean]
|
2472
2493
|
# Allow this node to run in the streaming engine.
|
2473
|
-
# If this runs in streaming, the output of the
|
2494
|
+
# If this runs in streaming, the output of the unpivot operation
|
2474
2495
|
# will not have a stable ordering.
|
2475
2496
|
#
|
2476
2497
|
# @return [LazyFrame]
|
2477
2498
|
#
|
2478
2499
|
# @example
|
2479
|
-
#
|
2500
|
+
# lf = Polars::LazyFrame.new(
|
2480
2501
|
# {
|
2481
2502
|
# "a" => ["x", "y", "z"],
|
2482
2503
|
# "b" => [1, 3, 5],
|
2483
2504
|
# "c" => [2, 4, 6]
|
2484
2505
|
# }
|
2485
|
-
# )
|
2486
|
-
#
|
2506
|
+
# )
|
2507
|
+
# lf.unpivot(Polars::Selectors.numeric, index: "a").collect
|
2487
2508
|
# # =>
|
2488
2509
|
# # shape: (6, 3)
|
2489
2510
|
# # ┌─────┬──────────┬───────┐
|
@@ -2498,23 +2519,21 @@ module Polars
|
|
2498
2519
|
# # │ y ┆ c ┆ 4 │
|
2499
2520
|
# # │ z ┆ c ┆ 6 │
|
2500
2521
|
# # └─────┴──────────┴───────┘
|
2501
|
-
def
|
2502
|
-
|
2503
|
-
|
2504
|
-
|
2505
|
-
|
2506
|
-
|
2507
|
-
|
2508
|
-
|
2509
|
-
|
2510
|
-
|
2511
|
-
if id_vars.nil?
|
2512
|
-
id_vars = []
|
2513
|
-
end
|
2522
|
+
def unpivot(
|
2523
|
+
on,
|
2524
|
+
index: nil,
|
2525
|
+
variable_name: nil,
|
2526
|
+
value_name: nil,
|
2527
|
+
streamable: true
|
2528
|
+
)
|
2529
|
+
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
2530
|
+
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
2531
|
+
|
2514
2532
|
_from_rbldf(
|
2515
|
-
_ldf.
|
2533
|
+
_ldf.unpivot(on, index, value_name, variable_name, streamable)
|
2516
2534
|
)
|
2517
2535
|
end
|
2536
|
+
alias_method :melt, :unpivot
|
2518
2537
|
|
2519
2538
|
# def map
|
2520
2539
|
# end
|
@@ -2545,7 +2564,7 @@ module Polars
|
|
2545
2564
|
# # │ 10.0 ┆ null ┆ 9.0 │
|
2546
2565
|
# # └──────┴──────┴──────────┘
|
2547
2566
|
def interpolate
|
2548
|
-
select(
|
2567
|
+
select(F.col("*").interpolate)
|
2549
2568
|
end
|
2550
2569
|
|
2551
2570
|
# Decompose a struct into its fields.
|
@@ -2652,24 +2671,19 @@ module Polars
|
|
2652
2671
|
#
|
2653
2672
|
# @param column [Object]
|
2654
2673
|
# Columns that are sorted
|
2655
|
-
# @param more_columns [Object]
|
2656
|
-
# Additional columns that are sorted, specified as positional arguments.
|
2657
2674
|
# @param descending [Boolean]
|
2658
2675
|
# Whether the columns are sorted in descending order.
|
2659
2676
|
#
|
2660
2677
|
# @return [LazyFrame]
|
2661
2678
|
def set_sorted(
|
2662
2679
|
column,
|
2663
|
-
*more_columns,
|
2664
2680
|
descending: false
|
2665
2681
|
)
|
2666
|
-
|
2667
|
-
|
2668
|
-
|
2682
|
+
if !Utils.strlike?(column)
|
2683
|
+
msg = "expected a 'str' for argument 'column' in 'set_sorted'"
|
2684
|
+
raise TypeError, msg
|
2669
2685
|
end
|
2670
|
-
with_columns(
|
2671
|
-
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2672
|
-
)
|
2686
|
+
with_columns(F.col(column).set_sorted(descending: descending))
|
2673
2687
|
end
|
2674
2688
|
|
2675
2689
|
# TODO
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -107,7 +107,7 @@ module Polars
|
|
107
107
|
# # │ b ┆ 5 ┆ 10.0 │
|
108
108
|
# # └─────┴───────┴────────────────┘
|
109
109
|
def agg(*aggs, **named_aggs)
|
110
|
-
rbexprs = Utils.
|
110
|
+
rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
|
111
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
112
112
|
end
|
113
113
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -146,7 +146,7 @@ module Polars
|
|
146
146
|
end
|
147
147
|
|
148
148
|
if !fraction.nil?
|
149
|
-
fraction = Utils.
|
149
|
+
fraction = Utils.parse_into_expression(fraction)
|
150
150
|
return Utils.wrap_expr(
|
151
151
|
_rbexpr.list_sample_fraction(
|
152
152
|
fraction, with_replacement, shuffle, seed
|
@@ -155,7 +155,7 @@ module Polars
|
|
155
155
|
end
|
156
156
|
|
157
157
|
n = 1 if n.nil?
|
158
|
-
n = Utils.
|
158
|
+
n = Utils.parse_into_expression(n)
|
159
159
|
Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
|
160
160
|
end
|
161
161
|
|
@@ -387,7 +387,7 @@ module Polars
|
|
387
387
|
# # │ 1 │
|
388
388
|
# # └──────┘
|
389
389
|
def get(index, null_on_oob: true)
|
390
|
-
index = Utils.
|
390
|
+
index = Utils.parse_into_expression(index)
|
391
391
|
Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
|
392
392
|
end
|
393
393
|
|
@@ -431,7 +431,7 @@ module Polars
|
|
431
431
|
if index.is_a?(::Array)
|
432
432
|
index = Series.new(index)
|
433
433
|
end
|
434
|
-
index = Utils.
|
434
|
+
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
435
|
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
436
436
|
end
|
437
437
|
alias_method :take, :gather
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ true │
|
503
503
|
# # └───────┘
|
504
504
|
def contains(item)
|
505
|
-
Utils.wrap_expr(_rbexpr.list_contains(Utils.
|
505
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
506
506
|
end
|
507
507
|
|
508
508
|
# Join all string items in a sublist and place a separator between them.
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ x y │
|
531
531
|
# # └───────┘
|
532
532
|
def join(separator, ignore_nulls: true)
|
533
|
-
separator = Utils.
|
533
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
|
535
535
|
end
|
536
536
|
|
@@ -625,7 +625,7 @@ module Polars
|
|
625
625
|
# # [null, 10, 2]
|
626
626
|
# # ]
|
627
627
|
def shift(n = 1)
|
628
|
-
n = Utils.
|
628
|
+
n = Utils.parse_into_expression(n)
|
629
629
|
Utils.wrap_expr(_rbexpr.list_shift(n))
|
630
630
|
end
|
631
631
|
|
@@ -650,8 +650,8 @@ module Polars
|
|
650
650
|
# # [2, 1]
|
651
651
|
# # ]
|
652
652
|
def slice(offset, length = nil)
|
653
|
-
offset = Utils.
|
654
|
-
length = Utils.
|
653
|
+
offset = Utils.parse_into_expression(offset, str_as_lit: false)
|
654
|
+
length = Utils.parse_into_expression(length, str_as_lit: false)
|
655
655
|
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
656
656
|
end
|
657
657
|
|
@@ -694,7 +694,7 @@ module Polars
|
|
694
694
|
# # [2, 1]
|
695
695
|
# # ]
|
696
696
|
def tail(n = 5)
|
697
|
-
n = Utils.
|
697
|
+
n = Utils.parse_into_expression(n)
|
698
698
|
Utils.wrap_expr(_rbexpr.list_tail(n))
|
699
699
|
end
|
700
700
|
|
@@ -722,7 +722,7 @@ module Polars
|
|
722
722
|
# # │ 0 │
|
723
723
|
# # └────────────────┘
|
724
724
|
def count_matches(element)
|
725
|
-
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.
|
725
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
|
726
726
|
end
|
727
727
|
alias_method :count_match, :count_matches
|
728
728
|
|
@@ -197,9 +197,13 @@ module Polars
|
|
197
197
|
#
|
198
198
|
# @param index [Integer]
|
199
199
|
# Index to return per sublist
|
200
|
+
# @param null_on_oob [Boolean]
|
201
|
+
# Behavior if an index is out of bounds:
|
202
|
+
# true -> set as null
|
203
|
+
# false -> raise an error
|
200
204
|
#
|
201
205
|
# @return [Series]
|
202
|
-
def get(index)
|
206
|
+
def get(index, null_on_oob: false)
|
203
207
|
super
|
204
208
|
end
|
205
209
|
|
@@ -10,25 +10,23 @@ module Polars
|
|
10
10
|
period,
|
11
11
|
offset,
|
12
12
|
closed,
|
13
|
-
|
14
|
-
check_sorted
|
13
|
+
group_by
|
15
14
|
)
|
16
|
-
period = Utils.
|
17
|
-
offset = Utils.
|
15
|
+
period = Utils.parse_as_duration_string(period)
|
16
|
+
offset = Utils.parse_as_duration_string(offset)
|
18
17
|
|
19
18
|
@df = df
|
20
19
|
@time_column = index_column
|
21
20
|
@period = period
|
22
21
|
@offset = offset
|
23
22
|
@closed = closed
|
24
|
-
@
|
25
|
-
@check_sorted = check_sorted
|
23
|
+
@group_by = group_by
|
26
24
|
end
|
27
25
|
|
28
26
|
def agg(*aggs, **named_aggs)
|
29
27
|
@df.lazy
|
30
28
|
.group_by_rolling(
|
31
|
-
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
|
32
30
|
)
|
33
31
|
.agg(*aggs, **named_aggs)
|
34
32
|
.collect(no_optimization: true, string_cache: false)
|