polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/LICENSE-THIRD-PARTY.txt +1161 -832
- data/README.md +4 -3
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +4 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -4,6 +4,22 @@ module Polars
|
|
4
4
|
# @private
|
5
5
|
attr_accessor :_ldf
|
6
6
|
|
7
|
+
# Create a new LazyFrame.
|
8
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
9
|
+
self._ldf = (
|
10
|
+
DataFrame.new(
|
11
|
+
data,
|
12
|
+
schema: schema,
|
13
|
+
schema_overrides: schema_overrides,
|
14
|
+
orient: orient,
|
15
|
+
infer_schema_length: infer_schema_length,
|
16
|
+
nan_to_null: nan_to_null
|
17
|
+
)
|
18
|
+
.lazy
|
19
|
+
._ldf
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
7
23
|
# @private
|
8
24
|
def self._from_rbldf(rb_ldf)
|
9
25
|
ldf = LazyFrame.allocate
|
@@ -379,16 +395,16 @@ module Polars
|
|
379
395
|
# # │ 2 ┆ 7.0 ┆ b │
|
380
396
|
# # │ 1 ┆ 6.0 ┆ a │
|
381
397
|
# # └─────┴─────┴─────┘
|
382
|
-
def sort(by, reverse: false, nulls_last: false)
|
398
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
383
399
|
if by.is_a?(String)
|
384
|
-
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
400
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
385
401
|
end
|
386
402
|
if Utils.bool?(reverse)
|
387
403
|
reverse = [reverse]
|
388
404
|
end
|
389
405
|
|
390
406
|
by = Utils.selection_to_rbexpr_list(by)
|
391
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
407
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
392
408
|
end
|
393
409
|
|
394
410
|
# def profile
|
@@ -921,6 +937,12 @@ module Polars
|
|
921
937
|
# Define whether the temporal window interval is closed or not.
|
922
938
|
# @param by [Object]
|
923
939
|
# Also group by this column/these columns.
|
940
|
+
# @param check_sorted [Boolean]
|
941
|
+
# When the `by` argument is given, polars can not check sortedness
|
942
|
+
# by the metadata and has to do a full scan on the index column to
|
943
|
+
# verify data is sorted. This is expensive. If you are sure the
|
944
|
+
# data within the by groups is sorted, you can set this to `false`.
|
945
|
+
# Doing so incorrectly will lead to incorrect output
|
924
946
|
#
|
925
947
|
# @return [LazyFrame]
|
926
948
|
#
|
@@ -933,8 +955,8 @@ module Polars
|
|
933
955
|
# "2020-01-03 19:45:32",
|
934
956
|
# "2020-01-08 23:16:43"
|
935
957
|
# ]
|
936
|
-
# df = Polars::
|
937
|
-
# Polars.col("dt").str.strptime(Polars::Datetime)
|
958
|
+
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
|
+
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
938
960
|
# )
|
939
961
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
940
962
|
# [
|
@@ -942,7 +964,7 @@ module Polars
|
|
942
964
|
# Polars.min("a").alias("min_a"),
|
943
965
|
# Polars.max("a").alias("max_a")
|
944
966
|
# ]
|
945
|
-
# )
|
967
|
+
# ).collect
|
946
968
|
# # =>
|
947
969
|
# # shape: (6, 4)
|
948
970
|
# # ┌─────────────────────┬───────┬───────┬───────┐
|
@@ -962,7 +984,8 @@ module Polars
|
|
962
984
|
period:,
|
963
985
|
offset: nil,
|
964
986
|
closed: "right",
|
965
|
-
by: nil
|
987
|
+
by: nil,
|
988
|
+
check_sorted: true
|
966
989
|
)
|
967
990
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
968
991
|
if offset.nil?
|
@@ -974,7 +997,7 @@ module Polars
|
|
974
997
|
offset = Utils._timedelta_to_pl_duration(offset)
|
975
998
|
|
976
999
|
lgb = _ldf.groupby_rolling(
|
977
|
-
index_column._rbexpr, period, offset, closed, rbexprs_by
|
1000
|
+
index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
|
978
1001
|
)
|
979
1002
|
LazyGroupBy.new(lgb, self.class)
|
980
1003
|
end
|
@@ -1112,21 +1135,21 @@ module Polars
|
|
1112
1135
|
# df.groupby_dynamic("time", every: "1h", closed: "left").agg(
|
1113
1136
|
# [
|
1114
1137
|
# Polars.col("time").count.alias("time_count"),
|
1115
|
-
# Polars.col("time").
|
1138
|
+
# Polars.col("time").alias("time_agg_list")
|
1116
1139
|
# ]
|
1117
1140
|
# )
|
1118
1141
|
# # =>
|
1119
1142
|
# # shape: (4, 3)
|
1120
|
-
# #
|
1121
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1122
|
-
# # │ --- ┆ --- ┆ ---
|
1123
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1124
|
-
# #
|
1125
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
|
1126
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
|
1127
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
|
1128
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1129
|
-
# #
|
1143
|
+
# # ┌─────────────────────┬────────────┬───────────────────────────────────┐
|
1144
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1145
|
+
# # │ --- ┆ --- ┆ --- │
|
1146
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1147
|
+
# # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
|
1148
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16… │
|
1149
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16… │
|
1150
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16… │
|
1151
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1152
|
+
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1130
1153
|
#
|
1131
1154
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1132
1155
|
# df.groupby_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1193,7 +1216,7 @@ module Polars
|
|
1193
1216
|
# period: "3i",
|
1194
1217
|
# include_boundaries: true,
|
1195
1218
|
# closed: "right"
|
1196
|
-
# ).agg(Polars.col("A").
|
1219
|
+
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1197
1220
|
# # =>
|
1198
1221
|
# # shape: (3, 4)
|
1199
1222
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
@@ -1216,12 +1239,9 @@ module Polars
|
|
1216
1239
|
by: nil,
|
1217
1240
|
start_by: "window"
|
1218
1241
|
)
|
1242
|
+
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1219
1243
|
if offset.nil?
|
1220
|
-
|
1221
|
-
offset = "-#{every}"
|
1222
|
-
else
|
1223
|
-
offset = "0ns"
|
1224
|
-
end
|
1244
|
+
offset = period.nil? ? "-#{every}" : "0ns"
|
1225
1245
|
end
|
1226
1246
|
|
1227
1247
|
if period.nil?
|
@@ -1234,7 +1254,7 @@ module Polars
|
|
1234
1254
|
|
1235
1255
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1236
1256
|
lgb = _ldf.groupby_dynamic(
|
1237
|
-
index_column,
|
1257
|
+
index_column._rbexpr,
|
1238
1258
|
every,
|
1239
1259
|
period,
|
1240
1260
|
offset,
|
@@ -1351,7 +1371,7 @@ module Polars
|
|
1351
1371
|
if by.is_a?(String)
|
1352
1372
|
by_left_ = [by]
|
1353
1373
|
by_right_ = [by]
|
1354
|
-
elsif by.is_a?(Array)
|
1374
|
+
elsif by.is_a?(::Array)
|
1355
1375
|
by_left_ = by
|
1356
1376
|
by_right_ = by
|
1357
1377
|
end
|
@@ -1619,7 +1639,7 @@ module Polars
|
|
1619
1639
|
# # │ null │
|
1620
1640
|
# # └──────┘
|
1621
1641
|
def with_context(other)
|
1622
|
-
if !other.is_a?(Array)
|
1642
|
+
if !other.is_a?(::Array)
|
1623
1643
|
other = [other]
|
1624
1644
|
end
|
1625
1645
|
|
@@ -2228,7 +2248,7 @@ module Polars
|
|
2228
2248
|
#
|
2229
2249
|
# @return [LazyFrame]
|
2230
2250
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2231
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2251
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2232
2252
|
subset = [subset]
|
2233
2253
|
end
|
2234
2254
|
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
@@ -2261,7 +2281,7 @@ module Polars
|
|
2261
2281
|
# # │ 3 ┆ 8 ┆ c │
|
2262
2282
|
# # └─────┴─────┴─────┘
|
2263
2283
|
def drop_nulls(subset: nil)
|
2264
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2284
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2265
2285
|
subset = [subset]
|
2266
2286
|
end
|
2267
2287
|
_from_rbldf(_ldf.drop_nulls(subset))
|
@@ -2423,6 +2443,38 @@ module Polars
|
|
2423
2443
|
_from_rbldf(_ldf.unnest(names))
|
2424
2444
|
end
|
2425
2445
|
|
2446
|
+
# TODO
|
2447
|
+
# def merge_sorted
|
2448
|
+
# end
|
2449
|
+
|
2450
|
+
# Indicate that one or multiple columns are sorted.
|
2451
|
+
#
|
2452
|
+
# @param column [Object]
|
2453
|
+
# Columns that are sorted
|
2454
|
+
# @param more_columns [Object]
|
2455
|
+
# Additional columns that are sorted, specified as positional arguments.
|
2456
|
+
# @param descending [Boolean]
|
2457
|
+
# Whether the columns are sorted in descending order.
|
2458
|
+
#
|
2459
|
+
# @return [LazyFrame]
|
2460
|
+
def set_sorted(
|
2461
|
+
column,
|
2462
|
+
*more_columns,
|
2463
|
+
descending: false
|
2464
|
+
)
|
2465
|
+
columns = Utils.selection_to_rbexpr_list(column)
|
2466
|
+
if more_columns.any?
|
2467
|
+
columns.concat(Utils.selection_to_rbexpr_list(more_columns))
|
2468
|
+
end
|
2469
|
+
with_columns(
|
2470
|
+
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2471
|
+
)
|
2472
|
+
end
|
2473
|
+
|
2474
|
+
# TODO
|
2475
|
+
# def update
|
2476
|
+
# end
|
2477
|
+
|
2426
2478
|
private
|
2427
2479
|
|
2428
2480
|
def initialize_copy(other)
|
@@ -14,7 +14,7 @@ module Polars
|
|
14
14
|
|
15
15
|
if name.is_a?(DataType)
|
16
16
|
Utils.wrap_expr(_dtype_cols([name]))
|
17
|
-
elsif name.is_a?(Array)
|
17
|
+
elsif name.is_a?(::Array)
|
18
18
|
if name.length == 0 || Utils.strlike?(name[0])
|
19
19
|
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
20
20
|
Utils.wrap_expr(RbExpr.cols(name))
|
@@ -36,7 +36,7 @@ module Polars
|
|
36
36
|
# @example A horizontal rank computation by taking the elements of a list
|
37
37
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
38
38
|
# df.with_column(
|
39
|
-
# Polars.concat_list(["a", "b"]).
|
39
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
40
40
|
# )
|
41
41
|
# # =>
|
42
42
|
# # shape: (3, 3)
|
@@ -156,9 +156,8 @@ module Polars
|
|
156
156
|
column.sum
|
157
157
|
elsif Utils.strlike?(column)
|
158
158
|
col(column.to_s).sum
|
159
|
-
elsif column.is_a?(Array)
|
159
|
+
elsif column.is_a?(::Array)
|
160
160
|
exprs = Utils.selection_to_rbexpr_list(column)
|
161
|
-
# TODO
|
162
161
|
Utils.wrap_expr(_sum_exprs(exprs))
|
163
162
|
else
|
164
163
|
fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
|
@@ -283,18 +282,33 @@ module Polars
|
|
283
282
|
# Return an expression representing a literal value.
|
284
283
|
#
|
285
284
|
# @return [Expr]
|
286
|
-
def lit(value)
|
287
|
-
if value.is_a?(
|
285
|
+
def lit(value, dtype: nil, allow_object: nil)
|
286
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
287
|
+
time_unit = dtype&.time_unit || "ns"
|
288
|
+
time_zone = dtype.&time_zone
|
289
|
+
e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
|
290
|
+
if time_zone
|
291
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
292
|
+
else
|
293
|
+
return e
|
294
|
+
end
|
295
|
+
elsif value.is_a?(::Date)
|
296
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
297
|
+
elsif value.is_a?(Polars::Series)
|
288
298
|
name = value.name
|
289
299
|
value = value._s
|
290
|
-
e = Utils.wrap_expr(RbExpr.lit(value))
|
300
|
+
e = Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
291
301
|
if name == ""
|
292
302
|
return e
|
293
303
|
end
|
294
304
|
return e.alias(name)
|
305
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
306
|
+
return lit(Series.new("", value))
|
307
|
+
elsif dtype
|
308
|
+
return Utils.wrap_expr(RbExpr.lit(value, allow_object)).cast(dtype)
|
295
309
|
end
|
296
310
|
|
297
|
-
Utils.wrap_expr(RbExpr.lit(value))
|
311
|
+
Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
298
312
|
end
|
299
313
|
|
300
314
|
# Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
|
@@ -625,23 +639,42 @@ module Polars
|
|
625
639
|
# @return [Expr, Series]
|
626
640
|
#
|
627
641
|
# @example
|
628
|
-
#
|
642
|
+
# Polars.arange(0, 3, eager: true)
|
643
|
+
# # =>
|
644
|
+
# # shape: (3,)
|
645
|
+
# # Series: 'arange' [i64]
|
646
|
+
# # [
|
647
|
+
# # 0
|
648
|
+
# # 1
|
649
|
+
# # 2
|
650
|
+
# # ]
|
651
|
+
#
|
652
|
+
# @example
|
653
|
+
# df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
|
654
|
+
# df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
|
655
|
+
# # =>
|
656
|
+
# # shape: (2, 1)
|
657
|
+
# # ┌───────────┐
|
658
|
+
# # │ arange │
|
659
|
+
# # │ --- │
|
660
|
+
# # │ list[i64] │
|
661
|
+
# # ╞═══════════╡
|
662
|
+
# # │ [1, 2] │
|
663
|
+
# # │ [2, 3] │
|
664
|
+
# # └───────────┘
|
629
665
|
def arange(low, high, step: 1, eager: false, dtype: nil)
|
630
666
|
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
631
667
|
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
632
668
|
range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
|
633
669
|
|
634
|
-
if !dtype.nil? &&
|
670
|
+
if !dtype.nil? && !["i64", Int64].include?(dtype)
|
635
671
|
range_expr = range_expr.cast(dtype)
|
636
672
|
end
|
637
673
|
|
638
674
|
if !eager
|
639
675
|
range_expr
|
640
676
|
else
|
641
|
-
DataFrame.new
|
642
|
-
.select(range_expr)
|
643
|
-
.to_series
|
644
|
-
.rename("arange", in_place: true)
|
677
|
+
DataFrame.new.select(range_expr.alias("arange")).to_series
|
645
678
|
end
|
646
679
|
end
|
647
680
|
|
@@ -658,7 +691,7 @@ module Polars
|
|
658
691
|
#
|
659
692
|
# @return [Expr]
|
660
693
|
def arg_sort_by(exprs, reverse: false)
|
661
|
-
if !exprs.is_a?(Array)
|
694
|
+
if !exprs.is_a?(::Array)
|
662
695
|
exprs = [exprs]
|
663
696
|
end
|
664
697
|
if reverse == true || reverse == false
|
@@ -997,19 +1030,24 @@ module Polars
|
|
997
1030
|
# Only used in `eager` mode. As expression, use `alias`.
|
998
1031
|
#
|
999
1032
|
# @return [Expr]
|
1000
|
-
def repeat(value, n, eager: false, name: nil)
|
1033
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
1034
|
+
if !name.nil?
|
1035
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
if n.is_a?(Integer)
|
1039
|
+
n = lit(n)
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
value = Utils.parse_as_expression(value, str_as_lit: true)
|
1043
|
+
expr = Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr, dtype))
|
1044
|
+
if !name.nil?
|
1045
|
+
expr = expr.alias(name)
|
1046
|
+
end
|
1001
1047
|
if eager
|
1002
|
-
|
1003
|
-
name = ""
|
1004
|
-
end
|
1005
|
-
dtype = py_type_to_dtype(type(value))
|
1006
|
-
Series._repeat(name, value, n, dtype)
|
1007
|
-
else
|
1008
|
-
if n.is_a?(Integer)
|
1009
|
-
n = lit(n)
|
1010
|
-
end
|
1011
|
-
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
1048
|
+
return select(expr).to_series
|
1012
1049
|
end
|
1050
|
+
expr
|
1013
1051
|
end
|
1014
1052
|
|
1015
1053
|
# Return indices where `condition` evaluates `true`.
|
@@ -1124,13 +1162,11 @@ module Polars
|
|
1124
1162
|
end
|
1125
1163
|
|
1126
1164
|
if unit == "d"
|
1127
|
-
expr = column.cast(
|
1165
|
+
expr = column.cast(Date)
|
1128
1166
|
elsif unit == "s"
|
1129
|
-
|
1130
|
-
# expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
|
1167
|
+
expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
|
1131
1168
|
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
|
1132
|
-
|
1133
|
-
# expr = column.cast(Datetime(unit))
|
1169
|
+
expr = column.cast(Datetime.new(unit))
|
1134
1170
|
else
|
1135
1171
|
raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
|
1136
1172
|
end
|
data/lib/polars/list_expr.rb
CHANGED
@@ -15,7 +15,7 @@ module Polars
|
|
15
15
|
#
|
16
16
|
# @example
|
17
17
|
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
18
|
-
# df.select(Polars.col("bar").
|
18
|
+
# df.select(Polars.col("bar").list.lengths)
|
19
19
|
# # =>
|
20
20
|
# # shape: (2, 1)
|
21
21
|
# # ┌─────┐
|
@@ -36,7 +36,7 @@ module Polars
|
|
36
36
|
#
|
37
37
|
# @example
|
38
38
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
39
|
-
# df.select(Polars.col("values").
|
39
|
+
# df.select(Polars.col("values").list.sum)
|
40
40
|
# # =>
|
41
41
|
# # shape: (2, 1)
|
42
42
|
# # ┌────────┐
|
@@ -57,7 +57,7 @@ module Polars
|
|
57
57
|
#
|
58
58
|
# @example
|
59
59
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
60
|
-
# df.select(Polars.col("values").
|
60
|
+
# df.select(Polars.col("values").list.max)
|
61
61
|
# # =>
|
62
62
|
# # shape: (2, 1)
|
63
63
|
# # ┌────────┐
|
@@ -78,7 +78,7 @@ module Polars
|
|
78
78
|
#
|
79
79
|
# @example
|
80
80
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
81
|
-
# df.select(Polars.col("values").
|
81
|
+
# df.select(Polars.col("values").list.min)
|
82
82
|
# # =>
|
83
83
|
# # shape: (2, 1)
|
84
84
|
# # ┌────────┐
|
@@ -99,7 +99,7 @@ module Polars
|
|
99
99
|
#
|
100
100
|
# @example
|
101
101
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
102
|
-
# df.select(Polars.col("values").
|
102
|
+
# df.select(Polars.col("values").list.mean)
|
103
103
|
# # =>
|
104
104
|
# # shape: (2, 1)
|
105
105
|
# # ┌────────┐
|
@@ -124,7 +124,7 @@ module Polars
|
|
124
124
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
125
125
|
# }
|
126
126
|
# )
|
127
|
-
# df.select(Polars.col("a").
|
127
|
+
# df.select(Polars.col("a").list.sort)
|
128
128
|
# # =>
|
129
129
|
# # shape: (2, 1)
|
130
130
|
# # ┌───────────┐
|
@@ -149,7 +149,7 @@ module Polars
|
|
149
149
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
150
150
|
# }
|
151
151
|
# )
|
152
|
-
# df.select(Polars.col("a").
|
152
|
+
# df.select(Polars.col("a").list.reverse)
|
153
153
|
# # =>
|
154
154
|
# # shape: (2, 1)
|
155
155
|
# # ┌───────────┐
|
@@ -174,7 +174,7 @@ module Polars
|
|
174
174
|
# "a" => [[1, 1, 2]]
|
175
175
|
# }
|
176
176
|
# )
|
177
|
-
# df.select(Polars.col("a").
|
177
|
+
# df.select(Polars.col("a").list.unique)
|
178
178
|
# # =>
|
179
179
|
# # shape: (1, 1)
|
180
180
|
# # ┌───────────┐
|
@@ -202,7 +202,7 @@ module Polars
|
|
202
202
|
# "b" => [["b", "c"], ["y", "z"]]
|
203
203
|
# }
|
204
204
|
# )
|
205
|
-
# df.select(Polars.col("a").
|
205
|
+
# df.select(Polars.col("a").list.concat("b"))
|
206
206
|
# # =>
|
207
207
|
# # shape: (2, 1)
|
208
208
|
# # ┌─────────────────┐
|
@@ -214,11 +214,11 @@ module Polars
|
|
214
214
|
# # │ ["x", "y", "z"] │
|
215
215
|
# # └─────────────────┘
|
216
216
|
def concat(other)
|
217
|
-
if other.is_a?(Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
217
|
+
if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
218
|
return concat(Series.new([other]))
|
219
219
|
end
|
220
220
|
|
221
|
-
if !other.is_a?(Array)
|
221
|
+
if !other.is_a?(::Array)
|
222
222
|
other_list = [other]
|
223
223
|
else
|
224
224
|
other_list = other.dup
|
@@ -241,7 +241,7 @@ module Polars
|
|
241
241
|
#
|
242
242
|
# @example
|
243
243
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
244
|
-
# df.select(Polars.col("foo").
|
244
|
+
# df.select(Polars.col("foo").list.get(0))
|
245
245
|
# # =>
|
246
246
|
# # shape: (3, 1)
|
247
247
|
# # ┌──────┐
|
@@ -254,7 +254,7 @@ module Polars
|
|
254
254
|
# # │ 1 │
|
255
255
|
# # └──────┘
|
256
256
|
def get(index)
|
257
|
-
index = Utils.
|
257
|
+
index = Utils.parse_as_expression(index)
|
258
258
|
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
259
|
end
|
260
260
|
|
@@ -280,7 +280,7 @@ module Polars
|
|
280
280
|
#
|
281
281
|
# @return [Expr]
|
282
282
|
def take(index, null_on_oob: false)
|
283
|
-
if index.is_a?(Array)
|
283
|
+
if index.is_a?(::Array)
|
284
284
|
index = Series.new(index)
|
285
285
|
end
|
286
286
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
@@ -293,7 +293,7 @@ module Polars
|
|
293
293
|
#
|
294
294
|
# @example
|
295
295
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
296
|
-
# df.select(Polars.col("foo").
|
296
|
+
# df.select(Polars.col("foo").list.first)
|
297
297
|
# # =>
|
298
298
|
# # shape: (3, 1)
|
299
299
|
# # ┌──────┐
|
@@ -315,7 +315,7 @@ module Polars
|
|
315
315
|
#
|
316
316
|
# @example
|
317
317
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
318
|
-
# df.select(Polars.col("foo").
|
318
|
+
# df.select(Polars.col("foo").list.last)
|
319
319
|
# # =>
|
320
320
|
# # shape: (3, 1)
|
321
321
|
# # ┌──────┐
|
@@ -340,7 +340,7 @@ module Polars
|
|
340
340
|
#
|
341
341
|
# @example
|
342
342
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
343
|
-
# df.select(Polars.col("foo").
|
343
|
+
# df.select(Polars.col("foo").list.contains(1))
|
344
344
|
# # =>
|
345
345
|
# # shape: (3, 1)
|
346
346
|
# # ┌───────┐
|
@@ -367,7 +367,7 @@ module Polars
|
|
367
367
|
#
|
368
368
|
# @example
|
369
369
|
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
370
|
-
# df.select(Polars.col("s").
|
370
|
+
# df.select(Polars.col("s").list.join(" "))
|
371
371
|
# # =>
|
372
372
|
# # shape: (2, 1)
|
373
373
|
# # ┌───────┐
|
@@ -392,7 +392,7 @@ module Polars
|
|
392
392
|
# "a" => [[1, 2], [2, 1]]
|
393
393
|
# }
|
394
394
|
# )
|
395
|
-
# df.select(Polars.col("a").
|
395
|
+
# df.select(Polars.col("a").list.arg_min)
|
396
396
|
# # =>
|
397
397
|
# # shape: (2, 1)
|
398
398
|
# # ┌─────┐
|
@@ -417,7 +417,7 @@ module Polars
|
|
417
417
|
# "a" => [[1, 2], [2, 1]]
|
418
418
|
# }
|
419
419
|
# )
|
420
|
-
# df.select(Polars.col("a").
|
420
|
+
# df.select(Polars.col("a").list.arg_max)
|
421
421
|
# # =>
|
422
422
|
# # shape: (2, 1)
|
423
423
|
# # ┌─────┐
|
@@ -443,7 +443,7 @@ module Polars
|
|
443
443
|
#
|
444
444
|
# @example
|
445
445
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
446
|
-
# s.
|
446
|
+
# s.list.diff
|
447
447
|
# # =>
|
448
448
|
# # shape: (2,)
|
449
449
|
# # Series: 'a' [list[i64]]
|
@@ -464,7 +464,7 @@ module Polars
|
|
464
464
|
#
|
465
465
|
# @example
|
466
466
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
467
|
-
# s.
|
467
|
+
# s.list.shift
|
468
468
|
# # =>
|
469
469
|
# # shape: (2,)
|
470
470
|
# # Series: 'a' [list[i64]]
|
@@ -488,7 +488,7 @@ module Polars
|
|
488
488
|
#
|
489
489
|
# @example
|
490
490
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
491
|
-
# s.
|
491
|
+
# s.list.slice(1, 2)
|
492
492
|
# # =>
|
493
493
|
# # shape: (2,)
|
494
494
|
# # Series: 'a' [list[i64]]
|
@@ -511,7 +511,7 @@ module Polars
|
|
511
511
|
#
|
512
512
|
# @example
|
513
513
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
514
|
-
# s.
|
514
|
+
# s.list.head(2)
|
515
515
|
# # =>
|
516
516
|
# # shape: (2,)
|
517
517
|
# # Series: 'a' [list[i64]]
|
@@ -532,7 +532,7 @@ module Polars
|
|
532
532
|
#
|
533
533
|
# @example
|
534
534
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
535
|
-
# s.
|
535
|
+
# s.list.tail(2)
|
536
536
|
# # =>
|
537
537
|
# # shape: (2,)
|
538
538
|
# # Series: 'a' [list[i64]]
|
@@ -554,7 +554,7 @@ module Polars
|
|
554
554
|
#
|
555
555
|
# @example
|
556
556
|
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
-
# df.select(Polars.col("listcol").
|
557
|
+
# df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
|
558
558
|
# # =>
|
559
559
|
# # shape: (5, 1)
|
560
560
|
# # ┌────────────────┐
|
@@ -584,7 +584,7 @@ module Polars
|
|
584
584
|
#
|
585
585
|
# @example
|
586
586
|
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
587
|
-
# df.select([Polars.col("a").
|
587
|
+
# df.select([Polars.col("a").list.to_struct])
|
588
588
|
# # =>
|
589
589
|
# # shape: (2, 1)
|
590
590
|
# # ┌────────────┐
|
@@ -617,7 +617,7 @@ module Polars
|
|
617
617
|
# @example
|
618
618
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
619
619
|
# df.with_column(
|
620
|
-
# Polars.concat_list(["a", "b"]).
|
620
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
621
621
|
# )
|
622
622
|
# # =>
|
623
623
|
# # shape: (3, 3)
|