polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/LICENSE-THIRD-PARTY.txt +1161 -832
- data/README.md +4 -3
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +4 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -4,6 +4,22 @@ module Polars
|
|
4
4
|
# @private
|
5
5
|
attr_accessor :_ldf
|
6
6
|
|
7
|
+
# Create a new LazyFrame.
|
8
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
9
|
+
self._ldf = (
|
10
|
+
DataFrame.new(
|
11
|
+
data,
|
12
|
+
schema: schema,
|
13
|
+
schema_overrides: schema_overrides,
|
14
|
+
orient: orient,
|
15
|
+
infer_schema_length: infer_schema_length,
|
16
|
+
nan_to_null: nan_to_null
|
17
|
+
)
|
18
|
+
.lazy
|
19
|
+
._ldf
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
7
23
|
# @private
|
8
24
|
def self._from_rbldf(rb_ldf)
|
9
25
|
ldf = LazyFrame.allocate
|
@@ -379,16 +395,16 @@ module Polars
|
|
379
395
|
# # │ 2 ┆ 7.0 ┆ b │
|
380
396
|
# # │ 1 ┆ 6.0 ┆ a │
|
381
397
|
# # └─────┴─────┴─────┘
|
382
|
-
def sort(by, reverse: false, nulls_last: false)
|
398
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
383
399
|
if by.is_a?(String)
|
384
|
-
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
400
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
385
401
|
end
|
386
402
|
if Utils.bool?(reverse)
|
387
403
|
reverse = [reverse]
|
388
404
|
end
|
389
405
|
|
390
406
|
by = Utils.selection_to_rbexpr_list(by)
|
391
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
407
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
392
408
|
end
|
393
409
|
|
394
410
|
# def profile
|
@@ -921,6 +937,12 @@ module Polars
|
|
921
937
|
# Define whether the temporal window interval is closed or not.
|
922
938
|
# @param by [Object]
|
923
939
|
# Also group by this column/these columns.
|
940
|
+
# @param check_sorted [Boolean]
|
941
|
+
# When the `by` argument is given, polars can not check sortedness
|
942
|
+
# by the metadata and has to do a full scan on the index column to
|
943
|
+
# verify data is sorted. This is expensive. If you are sure the
|
944
|
+
# data within the by groups is sorted, you can set this to `false`.
|
945
|
+
# Doing so incorrectly will lead to incorrect output
|
924
946
|
#
|
925
947
|
# @return [LazyFrame]
|
926
948
|
#
|
@@ -933,8 +955,8 @@ module Polars
|
|
933
955
|
# "2020-01-03 19:45:32",
|
934
956
|
# "2020-01-08 23:16:43"
|
935
957
|
# ]
|
936
|
-
# df = Polars::
|
937
|
-
# Polars.col("dt").str.strptime(Polars::Datetime)
|
958
|
+
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
|
+
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
938
960
|
# )
|
939
961
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
940
962
|
# [
|
@@ -942,7 +964,7 @@ module Polars
|
|
942
964
|
# Polars.min("a").alias("min_a"),
|
943
965
|
# Polars.max("a").alias("max_a")
|
944
966
|
# ]
|
945
|
-
# )
|
967
|
+
# ).collect
|
946
968
|
# # =>
|
947
969
|
# # shape: (6, 4)
|
948
970
|
# # ┌─────────────────────┬───────┬───────┬───────┐
|
@@ -962,7 +984,8 @@ module Polars
|
|
962
984
|
period:,
|
963
985
|
offset: nil,
|
964
986
|
closed: "right",
|
965
|
-
by: nil
|
987
|
+
by: nil,
|
988
|
+
check_sorted: true
|
966
989
|
)
|
967
990
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
968
991
|
if offset.nil?
|
@@ -974,7 +997,7 @@ module Polars
|
|
974
997
|
offset = Utils._timedelta_to_pl_duration(offset)
|
975
998
|
|
976
999
|
lgb = _ldf.groupby_rolling(
|
977
|
-
index_column._rbexpr, period, offset, closed, rbexprs_by
|
1000
|
+
index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
|
978
1001
|
)
|
979
1002
|
LazyGroupBy.new(lgb, self.class)
|
980
1003
|
end
|
@@ -1112,21 +1135,21 @@ module Polars
|
|
1112
1135
|
# df.groupby_dynamic("time", every: "1h", closed: "left").agg(
|
1113
1136
|
# [
|
1114
1137
|
# Polars.col("time").count.alias("time_count"),
|
1115
|
-
# Polars.col("time").
|
1138
|
+
# Polars.col("time").alias("time_agg_list")
|
1116
1139
|
# ]
|
1117
1140
|
# )
|
1118
1141
|
# # =>
|
1119
1142
|
# # shape: (4, 3)
|
1120
|
-
# #
|
1121
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1122
|
-
# # │ --- ┆ --- ┆ ---
|
1123
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1124
|
-
# #
|
1125
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
|
1126
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
|
1127
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
|
1128
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1129
|
-
# #
|
1143
|
+
# # ┌─────────────────────┬────────────┬───────────────────────────────────┐
|
1144
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1145
|
+
# # │ --- ┆ --- ┆ --- │
|
1146
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1147
|
+
# # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
|
1148
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16… │
|
1149
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16… │
|
1150
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16… │
|
1151
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1152
|
+
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1130
1153
|
#
|
1131
1154
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1132
1155
|
# df.groupby_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1193,7 +1216,7 @@ module Polars
|
|
1193
1216
|
# period: "3i",
|
1194
1217
|
# include_boundaries: true,
|
1195
1218
|
# closed: "right"
|
1196
|
-
# ).agg(Polars.col("A").
|
1219
|
+
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1197
1220
|
# # =>
|
1198
1221
|
# # shape: (3, 4)
|
1199
1222
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
@@ -1216,12 +1239,9 @@ module Polars
|
|
1216
1239
|
by: nil,
|
1217
1240
|
start_by: "window"
|
1218
1241
|
)
|
1242
|
+
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1219
1243
|
if offset.nil?
|
1220
|
-
|
1221
|
-
offset = "-#{every}"
|
1222
|
-
else
|
1223
|
-
offset = "0ns"
|
1224
|
-
end
|
1244
|
+
offset = period.nil? ? "-#{every}" : "0ns"
|
1225
1245
|
end
|
1226
1246
|
|
1227
1247
|
if period.nil?
|
@@ -1234,7 +1254,7 @@ module Polars
|
|
1234
1254
|
|
1235
1255
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1236
1256
|
lgb = _ldf.groupby_dynamic(
|
1237
|
-
index_column,
|
1257
|
+
index_column._rbexpr,
|
1238
1258
|
every,
|
1239
1259
|
period,
|
1240
1260
|
offset,
|
@@ -1351,7 +1371,7 @@ module Polars
|
|
1351
1371
|
if by.is_a?(String)
|
1352
1372
|
by_left_ = [by]
|
1353
1373
|
by_right_ = [by]
|
1354
|
-
elsif by.is_a?(Array)
|
1374
|
+
elsif by.is_a?(::Array)
|
1355
1375
|
by_left_ = by
|
1356
1376
|
by_right_ = by
|
1357
1377
|
end
|
@@ -1619,7 +1639,7 @@ module Polars
|
|
1619
1639
|
# # │ null │
|
1620
1640
|
# # └──────┘
|
1621
1641
|
def with_context(other)
|
1622
|
-
if !other.is_a?(Array)
|
1642
|
+
if !other.is_a?(::Array)
|
1623
1643
|
other = [other]
|
1624
1644
|
end
|
1625
1645
|
|
@@ -2228,7 +2248,7 @@ module Polars
|
|
2228
2248
|
#
|
2229
2249
|
# @return [LazyFrame]
|
2230
2250
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2231
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2251
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2232
2252
|
subset = [subset]
|
2233
2253
|
end
|
2234
2254
|
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
@@ -2261,7 +2281,7 @@ module Polars
|
|
2261
2281
|
# # │ 3 ┆ 8 ┆ c │
|
2262
2282
|
# # └─────┴─────┴─────┘
|
2263
2283
|
def drop_nulls(subset: nil)
|
2264
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2284
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2265
2285
|
subset = [subset]
|
2266
2286
|
end
|
2267
2287
|
_from_rbldf(_ldf.drop_nulls(subset))
|
@@ -2423,6 +2443,38 @@ module Polars
|
|
2423
2443
|
_from_rbldf(_ldf.unnest(names))
|
2424
2444
|
end
|
2425
2445
|
|
2446
|
+
# TODO
|
2447
|
+
# def merge_sorted
|
2448
|
+
# end
|
2449
|
+
|
2450
|
+
# Indicate that one or multiple columns are sorted.
|
2451
|
+
#
|
2452
|
+
# @param column [Object]
|
2453
|
+
# Columns that are sorted
|
2454
|
+
# @param more_columns [Object]
|
2455
|
+
# Additional columns that are sorted, specified as positional arguments.
|
2456
|
+
# @param descending [Boolean]
|
2457
|
+
# Whether the columns are sorted in descending order.
|
2458
|
+
#
|
2459
|
+
# @return [LazyFrame]
|
2460
|
+
def set_sorted(
|
2461
|
+
column,
|
2462
|
+
*more_columns,
|
2463
|
+
descending: false
|
2464
|
+
)
|
2465
|
+
columns = Utils.selection_to_rbexpr_list(column)
|
2466
|
+
if more_columns.any?
|
2467
|
+
columns.concat(Utils.selection_to_rbexpr_list(more_columns))
|
2468
|
+
end
|
2469
|
+
with_columns(
|
2470
|
+
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2471
|
+
)
|
2472
|
+
end
|
2473
|
+
|
2474
|
+
# TODO
|
2475
|
+
# def update
|
2476
|
+
# end
|
2477
|
+
|
2426
2478
|
private
|
2427
2479
|
|
2428
2480
|
def initialize_copy(other)
|
@@ -14,7 +14,7 @@ module Polars
|
|
14
14
|
|
15
15
|
if name.is_a?(DataType)
|
16
16
|
Utils.wrap_expr(_dtype_cols([name]))
|
17
|
-
elsif name.is_a?(Array)
|
17
|
+
elsif name.is_a?(::Array)
|
18
18
|
if name.length == 0 || Utils.strlike?(name[0])
|
19
19
|
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
20
20
|
Utils.wrap_expr(RbExpr.cols(name))
|
@@ -36,7 +36,7 @@ module Polars
|
|
36
36
|
# @example A horizontal rank computation by taking the elements of a list
|
37
37
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
38
38
|
# df.with_column(
|
39
|
-
# Polars.concat_list(["a", "b"]).
|
39
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
40
40
|
# )
|
41
41
|
# # =>
|
42
42
|
# # shape: (3, 3)
|
@@ -156,9 +156,8 @@ module Polars
|
|
156
156
|
column.sum
|
157
157
|
elsif Utils.strlike?(column)
|
158
158
|
col(column.to_s).sum
|
159
|
-
elsif column.is_a?(Array)
|
159
|
+
elsif column.is_a?(::Array)
|
160
160
|
exprs = Utils.selection_to_rbexpr_list(column)
|
161
|
-
# TODO
|
162
161
|
Utils.wrap_expr(_sum_exprs(exprs))
|
163
162
|
else
|
164
163
|
fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
|
@@ -283,18 +282,33 @@ module Polars
|
|
283
282
|
# Return an expression representing a literal value.
|
284
283
|
#
|
285
284
|
# @return [Expr]
|
286
|
-
def lit(value)
|
287
|
-
if value.is_a?(
|
285
|
+
def lit(value, dtype: nil, allow_object: nil)
|
286
|
+
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
287
|
+
time_unit = dtype&.time_unit || "ns"
|
288
|
+
time_zone = dtype.&time_zone
|
289
|
+
e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
|
290
|
+
if time_zone
|
291
|
+
return e.dt.replace_time_zone(time_zone.to_s)
|
292
|
+
else
|
293
|
+
return e
|
294
|
+
end
|
295
|
+
elsif value.is_a?(::Date)
|
296
|
+
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
297
|
+
elsif value.is_a?(Polars::Series)
|
288
298
|
name = value.name
|
289
299
|
value = value._s
|
290
|
-
e = Utils.wrap_expr(RbExpr.lit(value))
|
300
|
+
e = Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
291
301
|
if name == ""
|
292
302
|
return e
|
293
303
|
end
|
294
304
|
return e.alias(name)
|
305
|
+
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
306
|
+
return lit(Series.new("", value))
|
307
|
+
elsif dtype
|
308
|
+
return Utils.wrap_expr(RbExpr.lit(value, allow_object)).cast(dtype)
|
295
309
|
end
|
296
310
|
|
297
|
-
Utils.wrap_expr(RbExpr.lit(value))
|
311
|
+
Utils.wrap_expr(RbExpr.lit(value, allow_object))
|
298
312
|
end
|
299
313
|
|
300
314
|
# Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
|
@@ -625,23 +639,42 @@ module Polars
|
|
625
639
|
# @return [Expr, Series]
|
626
640
|
#
|
627
641
|
# @example
|
628
|
-
#
|
642
|
+
# Polars.arange(0, 3, eager: true)
|
643
|
+
# # =>
|
644
|
+
# # shape: (3,)
|
645
|
+
# # Series: 'arange' [i64]
|
646
|
+
# # [
|
647
|
+
# # 0
|
648
|
+
# # 1
|
649
|
+
# # 2
|
650
|
+
# # ]
|
651
|
+
#
|
652
|
+
# @example
|
653
|
+
# df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]})
|
654
|
+
# df.select(Polars.arange(Polars.col("a"), Polars.col("b")))
|
655
|
+
# # =>
|
656
|
+
# # shape: (2, 1)
|
657
|
+
# # ┌───────────┐
|
658
|
+
# # │ arange │
|
659
|
+
# # │ --- │
|
660
|
+
# # │ list[i64] │
|
661
|
+
# # ╞═══════════╡
|
662
|
+
# # │ [1, 2] │
|
663
|
+
# # │ [2, 3] │
|
664
|
+
# # └───────────┘
|
629
665
|
def arange(low, high, step: 1, eager: false, dtype: nil)
|
630
666
|
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
631
667
|
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
632
668
|
range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
|
633
669
|
|
634
|
-
if !dtype.nil? &&
|
670
|
+
if !dtype.nil? && !["i64", Int64].include?(dtype)
|
635
671
|
range_expr = range_expr.cast(dtype)
|
636
672
|
end
|
637
673
|
|
638
674
|
if !eager
|
639
675
|
range_expr
|
640
676
|
else
|
641
|
-
DataFrame.new
|
642
|
-
.select(range_expr)
|
643
|
-
.to_series
|
644
|
-
.rename("arange", in_place: true)
|
677
|
+
DataFrame.new.select(range_expr.alias("arange")).to_series
|
645
678
|
end
|
646
679
|
end
|
647
680
|
|
@@ -658,7 +691,7 @@ module Polars
|
|
658
691
|
#
|
659
692
|
# @return [Expr]
|
660
693
|
def arg_sort_by(exprs, reverse: false)
|
661
|
-
if !exprs.is_a?(Array)
|
694
|
+
if !exprs.is_a?(::Array)
|
662
695
|
exprs = [exprs]
|
663
696
|
end
|
664
697
|
if reverse == true || reverse == false
|
@@ -997,19 +1030,24 @@ module Polars
|
|
997
1030
|
# Only used in `eager` mode. As expression, use `alias`.
|
998
1031
|
#
|
999
1032
|
# @return [Expr]
|
1000
|
-
def repeat(value, n, eager: false, name: nil)
|
1033
|
+
def repeat(value, n, dtype: nil, eager: false, name: nil)
|
1034
|
+
if !name.nil?
|
1035
|
+
warn "the `name` argument is deprecated. Use the `alias` method instead."
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
if n.is_a?(Integer)
|
1039
|
+
n = lit(n)
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
value = Utils.parse_as_expression(value, str_as_lit: true)
|
1043
|
+
expr = Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr, dtype))
|
1044
|
+
if !name.nil?
|
1045
|
+
expr = expr.alias(name)
|
1046
|
+
end
|
1001
1047
|
if eager
|
1002
|
-
|
1003
|
-
name = ""
|
1004
|
-
end
|
1005
|
-
dtype = py_type_to_dtype(type(value))
|
1006
|
-
Series._repeat(name, value, n, dtype)
|
1007
|
-
else
|
1008
|
-
if n.is_a?(Integer)
|
1009
|
-
n = lit(n)
|
1010
|
-
end
|
1011
|
-
Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
|
1048
|
+
return select(expr).to_series
|
1012
1049
|
end
|
1050
|
+
expr
|
1013
1051
|
end
|
1014
1052
|
|
1015
1053
|
# Return indices where `condition` evaluates `true`.
|
@@ -1124,13 +1162,11 @@ module Polars
|
|
1124
1162
|
end
|
1125
1163
|
|
1126
1164
|
if unit == "d"
|
1127
|
-
expr = column.cast(
|
1165
|
+
expr = column.cast(Date)
|
1128
1166
|
elsif unit == "s"
|
1129
|
-
|
1130
|
-
# expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
|
1167
|
+
expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
|
1131
1168
|
elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
|
1132
|
-
|
1133
|
-
# expr = column.cast(Datetime(unit))
|
1169
|
+
expr = column.cast(Datetime.new(unit))
|
1134
1170
|
else
|
1135
1171
|
raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
|
1136
1172
|
end
|
data/lib/polars/list_expr.rb
CHANGED
@@ -15,7 +15,7 @@ module Polars
|
|
15
15
|
#
|
16
16
|
# @example
|
17
17
|
# df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
|
18
|
-
# df.select(Polars.col("bar").
|
18
|
+
# df.select(Polars.col("bar").list.lengths)
|
19
19
|
# # =>
|
20
20
|
# # shape: (2, 1)
|
21
21
|
# # ┌─────┐
|
@@ -36,7 +36,7 @@ module Polars
|
|
36
36
|
#
|
37
37
|
# @example
|
38
38
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
39
|
-
# df.select(Polars.col("values").
|
39
|
+
# df.select(Polars.col("values").list.sum)
|
40
40
|
# # =>
|
41
41
|
# # shape: (2, 1)
|
42
42
|
# # ┌────────┐
|
@@ -57,7 +57,7 @@ module Polars
|
|
57
57
|
#
|
58
58
|
# @example
|
59
59
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
60
|
-
# df.select(Polars.col("values").
|
60
|
+
# df.select(Polars.col("values").list.max)
|
61
61
|
# # =>
|
62
62
|
# # shape: (2, 1)
|
63
63
|
# # ┌────────┐
|
@@ -78,7 +78,7 @@ module Polars
|
|
78
78
|
#
|
79
79
|
# @example
|
80
80
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
81
|
-
# df.select(Polars.col("values").
|
81
|
+
# df.select(Polars.col("values").list.min)
|
82
82
|
# # =>
|
83
83
|
# # shape: (2, 1)
|
84
84
|
# # ┌────────┐
|
@@ -99,7 +99,7 @@ module Polars
|
|
99
99
|
#
|
100
100
|
# @example
|
101
101
|
# df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
|
102
|
-
# df.select(Polars.col("values").
|
102
|
+
# df.select(Polars.col("values").list.mean)
|
103
103
|
# # =>
|
104
104
|
# # shape: (2, 1)
|
105
105
|
# # ┌────────┐
|
@@ -124,7 +124,7 @@ module Polars
|
|
124
124
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
125
125
|
# }
|
126
126
|
# )
|
127
|
-
# df.select(Polars.col("a").
|
127
|
+
# df.select(Polars.col("a").list.sort)
|
128
128
|
# # =>
|
129
129
|
# # shape: (2, 1)
|
130
130
|
# # ┌───────────┐
|
@@ -149,7 +149,7 @@ module Polars
|
|
149
149
|
# "a" => [[3, 2, 1], [9, 1, 2]]
|
150
150
|
# }
|
151
151
|
# )
|
152
|
-
# df.select(Polars.col("a").
|
152
|
+
# df.select(Polars.col("a").list.reverse)
|
153
153
|
# # =>
|
154
154
|
# # shape: (2, 1)
|
155
155
|
# # ┌───────────┐
|
@@ -174,7 +174,7 @@ module Polars
|
|
174
174
|
# "a" => [[1, 1, 2]]
|
175
175
|
# }
|
176
176
|
# )
|
177
|
-
# df.select(Polars.col("a").
|
177
|
+
# df.select(Polars.col("a").list.unique)
|
178
178
|
# # =>
|
179
179
|
# # shape: (1, 1)
|
180
180
|
# # ┌───────────┐
|
@@ -202,7 +202,7 @@ module Polars
|
|
202
202
|
# "b" => [["b", "c"], ["y", "z"]]
|
203
203
|
# }
|
204
204
|
# )
|
205
|
-
# df.select(Polars.col("a").
|
205
|
+
# df.select(Polars.col("a").list.concat("b"))
|
206
206
|
# # =>
|
207
207
|
# # shape: (2, 1)
|
208
208
|
# # ┌─────────────────┐
|
@@ -214,11 +214,11 @@ module Polars
|
|
214
214
|
# # │ ["x", "y", "z"] │
|
215
215
|
# # └─────────────────┘
|
216
216
|
def concat(other)
|
217
|
-
if other.is_a?(Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
217
|
+
if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
|
218
218
|
return concat(Series.new([other]))
|
219
219
|
end
|
220
220
|
|
221
|
-
if !other.is_a?(Array)
|
221
|
+
if !other.is_a?(::Array)
|
222
222
|
other_list = [other]
|
223
223
|
else
|
224
224
|
other_list = other.dup
|
@@ -241,7 +241,7 @@ module Polars
|
|
241
241
|
#
|
242
242
|
# @example
|
243
243
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
244
|
-
# df.select(Polars.col("foo").
|
244
|
+
# df.select(Polars.col("foo").list.get(0))
|
245
245
|
# # =>
|
246
246
|
# # shape: (3, 1)
|
247
247
|
# # ┌──────┐
|
@@ -254,7 +254,7 @@ module Polars
|
|
254
254
|
# # │ 1 │
|
255
255
|
# # └──────┘
|
256
256
|
def get(index)
|
257
|
-
index = Utils.
|
257
|
+
index = Utils.parse_as_expression(index)
|
258
258
|
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
259
|
end
|
260
260
|
|
@@ -280,7 +280,7 @@ module Polars
|
|
280
280
|
#
|
281
281
|
# @return [Expr]
|
282
282
|
def take(index, null_on_oob: false)
|
283
|
-
if index.is_a?(Array)
|
283
|
+
if index.is_a?(::Array)
|
284
284
|
index = Series.new(index)
|
285
285
|
end
|
286
286
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
@@ -293,7 +293,7 @@ module Polars
|
|
293
293
|
#
|
294
294
|
# @example
|
295
295
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
296
|
-
# df.select(Polars.col("foo").
|
296
|
+
# df.select(Polars.col("foo").list.first)
|
297
297
|
# # =>
|
298
298
|
# # shape: (3, 1)
|
299
299
|
# # ┌──────┐
|
@@ -315,7 +315,7 @@ module Polars
|
|
315
315
|
#
|
316
316
|
# @example
|
317
317
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
318
|
-
# df.select(Polars.col("foo").
|
318
|
+
# df.select(Polars.col("foo").list.last)
|
319
319
|
# # =>
|
320
320
|
# # shape: (3, 1)
|
321
321
|
# # ┌──────┐
|
@@ -340,7 +340,7 @@ module Polars
|
|
340
340
|
#
|
341
341
|
# @example
|
342
342
|
# df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
|
343
|
-
# df.select(Polars.col("foo").
|
343
|
+
# df.select(Polars.col("foo").list.contains(1))
|
344
344
|
# # =>
|
345
345
|
# # shape: (3, 1)
|
346
346
|
# # ┌───────┐
|
@@ -367,7 +367,7 @@ module Polars
|
|
367
367
|
#
|
368
368
|
# @example
|
369
369
|
# df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
|
370
|
-
# df.select(Polars.col("s").
|
370
|
+
# df.select(Polars.col("s").list.join(" "))
|
371
371
|
# # =>
|
372
372
|
# # shape: (2, 1)
|
373
373
|
# # ┌───────┐
|
@@ -392,7 +392,7 @@ module Polars
|
|
392
392
|
# "a" => [[1, 2], [2, 1]]
|
393
393
|
# }
|
394
394
|
# )
|
395
|
-
# df.select(Polars.col("a").
|
395
|
+
# df.select(Polars.col("a").list.arg_min)
|
396
396
|
# # =>
|
397
397
|
# # shape: (2, 1)
|
398
398
|
# # ┌─────┐
|
@@ -417,7 +417,7 @@ module Polars
|
|
417
417
|
# "a" => [[1, 2], [2, 1]]
|
418
418
|
# }
|
419
419
|
# )
|
420
|
-
# df.select(Polars.col("a").
|
420
|
+
# df.select(Polars.col("a").list.arg_max)
|
421
421
|
# # =>
|
422
422
|
# # shape: (2, 1)
|
423
423
|
# # ┌─────┐
|
@@ -443,7 +443,7 @@ module Polars
|
|
443
443
|
#
|
444
444
|
# @example
|
445
445
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
446
|
-
# s.
|
446
|
+
# s.list.diff
|
447
447
|
# # =>
|
448
448
|
# # shape: (2,)
|
449
449
|
# # Series: 'a' [list[i64]]
|
@@ -464,7 +464,7 @@ module Polars
|
|
464
464
|
#
|
465
465
|
# @example
|
466
466
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
467
|
-
# s.
|
467
|
+
# s.list.shift
|
468
468
|
# # =>
|
469
469
|
# # shape: (2,)
|
470
470
|
# # Series: 'a' [list[i64]]
|
@@ -488,7 +488,7 @@ module Polars
|
|
488
488
|
#
|
489
489
|
# @example
|
490
490
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
491
|
-
# s.
|
491
|
+
# s.list.slice(1, 2)
|
492
492
|
# # =>
|
493
493
|
# # shape: (2,)
|
494
494
|
# # Series: 'a' [list[i64]]
|
@@ -511,7 +511,7 @@ module Polars
|
|
511
511
|
#
|
512
512
|
# @example
|
513
513
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
514
|
-
# s.
|
514
|
+
# s.list.head(2)
|
515
515
|
# # =>
|
516
516
|
# # shape: (2,)
|
517
517
|
# # Series: 'a' [list[i64]]
|
@@ -532,7 +532,7 @@ module Polars
|
|
532
532
|
#
|
533
533
|
# @example
|
534
534
|
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
535
|
-
# s.
|
535
|
+
# s.list.tail(2)
|
536
536
|
# # =>
|
537
537
|
# # shape: (2,)
|
538
538
|
# # Series: 'a' [list[i64]]
|
@@ -554,7 +554,7 @@ module Polars
|
|
554
554
|
#
|
555
555
|
# @example
|
556
556
|
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
-
# df.select(Polars.col("listcol").
|
557
|
+
# df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
|
558
558
|
# # =>
|
559
559
|
# # shape: (5, 1)
|
560
560
|
# # ┌────────────────┐
|
@@ -584,7 +584,7 @@ module Polars
|
|
584
584
|
#
|
585
585
|
# @example
|
586
586
|
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
587
|
-
# df.select([Polars.col("a").
|
587
|
+
# df.select([Polars.col("a").list.to_struct])
|
588
588
|
# # =>
|
589
589
|
# # shape: (2, 1)
|
590
590
|
# # ┌────────────┐
|
@@ -617,7 +617,7 @@ module Polars
|
|
617
617
|
# @example
|
618
618
|
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
619
619
|
# df.with_column(
|
620
|
-
# Polars.concat_list(["a", "b"]).
|
620
|
+
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
621
621
|
# )
|
622
622
|
# # =>
|
623
623
|
# # shape: (3, 3)
|