polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2043 -1202
- data/README.md +2 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +2 -2
data/lib/polars/expr.rb
CHANGED
@@ -1308,8 +1308,6 @@ module Polars
|
|
1308
1308
|
#
|
1309
1309
|
# @param k [Integer]
|
1310
1310
|
# Number of elements to return.
|
1311
|
-
# @param reverse [Boolean]
|
1312
|
-
# Return the smallest elements.
|
1313
1311
|
#
|
1314
1312
|
# @return [Expr]
|
1315
1313
|
#
|
@@ -1322,7 +1320,7 @@ module Polars
|
|
1322
1320
|
# df.select(
|
1323
1321
|
# [
|
1324
1322
|
# Polars.col("value").top_k.alias("top_k"),
|
1325
|
-
# Polars.col("value").
|
1323
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1326
1324
|
# ]
|
1327
1325
|
# )
|
1328
1326
|
# # =>
|
@@ -1338,8 +1336,46 @@ module Polars
|
|
1338
1336
|
# # │ 3 ┆ 4 │
|
1339
1337
|
# # │ 2 ┆ 98 │
|
1340
1338
|
# # └───────┴──────────┘
|
1341
|
-
def top_k(k: 5
|
1342
|
-
wrap_expr(_rbexpr.top_k(k
|
1339
|
+
def top_k(k: 5)
|
1340
|
+
wrap_expr(_rbexpr.top_k(k))
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# Return the `k` smallest elements.
|
1344
|
+
#
|
1345
|
+
# If 'reverse: true` the smallest elements will be given.
|
1346
|
+
#
|
1347
|
+
# @param k [Integer]
|
1348
|
+
# Number of elements to return.
|
1349
|
+
#
|
1350
|
+
# @return [Expr]
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df = Polars::DataFrame.new(
|
1354
|
+
# {
|
1355
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1356
|
+
# }
|
1357
|
+
# )
|
1358
|
+
# df.select(
|
1359
|
+
# [
|
1360
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1361
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1362
|
+
# ]
|
1363
|
+
# )
|
1364
|
+
# # =>
|
1365
|
+
# # shape: (5, 2)
|
1366
|
+
# # ┌───────┬──────────┐
|
1367
|
+
# # │ top_k ┆ bottom_k │
|
1368
|
+
# # │ --- ┆ --- │
|
1369
|
+
# # │ i64 ┆ i64 │
|
1370
|
+
# # ╞═══════╪══════════╡
|
1371
|
+
# # │ 99 ┆ 1 │
|
1372
|
+
# # │ 98 ┆ 2 │
|
1373
|
+
# # │ 4 ┆ 3 │
|
1374
|
+
# # │ 3 ┆ 4 │
|
1375
|
+
# # │ 2 ┆ 98 │
|
1376
|
+
# # └───────┴──────────┘
|
1377
|
+
def bottom_k(k: 5)
|
1378
|
+
wrap_expr(_rbexpr.bottom_k(k))
|
1343
1379
|
end
|
1344
1380
|
|
1345
1381
|
# Get the index values that would sort this column.
|
@@ -2008,6 +2044,28 @@ module Polars
|
|
2008
2044
|
wrap_expr(_rbexpr.n_unique)
|
2009
2045
|
end
|
2010
2046
|
|
2047
|
+
# Approx count unique values.
|
2048
|
+
#
|
2049
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
2050
|
+
#
|
2051
|
+
# @return [Expr]
|
2052
|
+
#
|
2053
|
+
# @example
|
2054
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2055
|
+
# df.select(Polars.col("a").approx_unique)
|
2056
|
+
# # =>
|
2057
|
+
# # shape: (1, 1)
|
2058
|
+
# # ┌─────┐
|
2059
|
+
# # │ a │
|
2060
|
+
# # │ --- │
|
2061
|
+
# # │ u32 │
|
2062
|
+
# # ╞═════╡
|
2063
|
+
# # │ 2 │
|
2064
|
+
# # └─────┘
|
2065
|
+
def approx_unique
|
2066
|
+
wrap_expr(_rbexpr.approx_unique)
|
2067
|
+
end
|
2068
|
+
|
2011
2069
|
# Count null values.
|
2012
2070
|
#
|
2013
2071
|
# @return [Expr]
|
@@ -2194,7 +2252,7 @@ module Polars
|
|
2194
2252
|
# # │ 4 │
|
2195
2253
|
# # │ 6 │
|
2196
2254
|
# # │ 6 │
|
2197
|
-
# # │
|
2255
|
+
# # │ 4 │
|
2198
2256
|
# # │ 6 │
|
2199
2257
|
# # │ 6 │
|
2200
2258
|
# # │ 6 │
|
@@ -2751,6 +2809,7 @@ module Polars
|
|
2751
2809
|
end
|
2752
2810
|
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2753
2811
|
end
|
2812
|
+
alias_method :in?, :is_in
|
2754
2813
|
|
2755
2814
|
# Repeat the elements in this Series as specified in the given expression.
|
2756
2815
|
#
|
@@ -3914,8 +3973,8 @@ module Polars
|
|
3914
3973
|
# # │ 2 │
|
3915
3974
|
# # │ 5 │
|
3916
3975
|
# # └─────┘
|
3917
|
-
def rank(method: "average", reverse: false)
|
3918
|
-
wrap_expr(_rbexpr.rank(method, reverse))
|
3976
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3977
|
+
wrap_expr(_rbexpr.rank(method, reverse, seed))
|
3919
3978
|
end
|
3920
3979
|
|
3921
3980
|
# Calculate the n-th discrete difference.
|
@@ -4916,9 +4975,10 @@ module Polars
|
|
4916
4975
|
# # ╞═══════════╪═══════════╡
|
4917
4976
|
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4918
4977
|
# # └───────────┴───────────┘
|
4919
|
-
def
|
4920
|
-
wrap_expr(_rbexpr.
|
4978
|
+
def implode
|
4979
|
+
wrap_expr(_rbexpr.implode)
|
4921
4980
|
end
|
4981
|
+
alias_method :list, :implode
|
4922
4982
|
|
4923
4983
|
# Shrink numeric columns to the minimal required datatype.
|
4924
4984
|
#
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Polars
|
2
|
-
# Representation of a Lazy computation graph/query
|
2
|
+
# Representation of a Lazy computation graph/query against a DataFrame.
|
3
3
|
class LazyFrame
|
4
4
|
# @private
|
5
5
|
attr_accessor :_ldf
|
@@ -934,7 +934,7 @@ module Polars
|
|
934
934
|
# "2020-01-08 23:16:43"
|
935
935
|
# ]
|
936
936
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
937
|
-
# Polars.col("dt").str.strptime(
|
937
|
+
# Polars.col("dt").str.strptime(Polars::Datetime)
|
938
938
|
# )
|
939
939
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
940
940
|
# [
|
@@ -964,6 +964,7 @@ module Polars
|
|
964
964
|
closed: "right",
|
965
965
|
by: nil
|
966
966
|
)
|
967
|
+
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
967
968
|
if offset.nil?
|
968
969
|
offset = "-#{period}"
|
969
970
|
end
|
@@ -973,7 +974,7 @@ module Polars
|
|
973
974
|
offset = Utils._timedelta_to_pl_duration(offset)
|
974
975
|
|
975
976
|
lgb = _ldf.groupby_rolling(
|
976
|
-
index_column, period, offset, closed, rbexprs_by
|
977
|
+
index_column._rbexpr, period, offset, closed, rbexprs_by
|
977
978
|
)
|
978
979
|
LazyGroupBy.new(lgb, self.class)
|
979
980
|
end
|
@@ -1077,8 +1077,11 @@ module Polars
|
|
1077
1077
|
# # │ null ┆ null ┆ 3.0 ┆ 3.0 │
|
1078
1078
|
# # │ null ┆ null ┆ null ┆ 99.9 │
|
1079
1079
|
# # └──────┴──────┴──────┴──────┘
|
1080
|
-
def coalesce(exprs)
|
1080
|
+
def coalesce(exprs, *more_exprs)
|
1081
1081
|
exprs = Utils.selection_to_rbexpr_list(exprs)
|
1082
|
+
if more_exprs.any?
|
1083
|
+
exprs.concat(Utils.selection_to_rbexpr_list(more_exprs))
|
1084
|
+
end
|
1082
1085
|
Utils.wrap_expr(_coalesce_exprs(exprs))
|
1083
1086
|
end
|
1084
1087
|
|
data/lib/polars/list_expr.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_lengths)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Sum all the lists in the array.
|
@@ -48,7 +48,7 @@ module Polars
|
|
48
48
|
# # │ 5 │
|
49
49
|
# # └────────┘
|
50
50
|
def sum
|
51
|
-
Utils.wrap_expr(_rbexpr.
|
51
|
+
Utils.wrap_expr(_rbexpr.list_sum)
|
52
52
|
end
|
53
53
|
|
54
54
|
# Compute the max value of the lists in the array.
|
@@ -69,7 +69,7 @@ module Polars
|
|
69
69
|
# # │ 3 │
|
70
70
|
# # └────────┘
|
71
71
|
def max
|
72
|
-
Utils.wrap_expr(_rbexpr.
|
72
|
+
Utils.wrap_expr(_rbexpr.list_max)
|
73
73
|
end
|
74
74
|
|
75
75
|
# Compute the min value of the lists in the array.
|
@@ -90,7 +90,7 @@ module Polars
|
|
90
90
|
# # │ 2 │
|
91
91
|
# # └────────┘
|
92
92
|
def min
|
93
|
-
Utils.wrap_expr(_rbexpr.
|
93
|
+
Utils.wrap_expr(_rbexpr.list_min)
|
94
94
|
end
|
95
95
|
|
96
96
|
# Compute the mean value of the lists in the array.
|
@@ -111,7 +111,7 @@ module Polars
|
|
111
111
|
# # │ 2.5 │
|
112
112
|
# # └────────┘
|
113
113
|
def mean
|
114
|
-
Utils.wrap_expr(_rbexpr.
|
114
|
+
Utils.wrap_expr(_rbexpr.list_mean)
|
115
115
|
end
|
116
116
|
|
117
117
|
# Sort the arrays in the list.
|
@@ -136,7 +136,7 @@ module Polars
|
|
136
136
|
# # │ [1, 2, 9] │
|
137
137
|
# # └───────────┘
|
138
138
|
def sort(reverse: false)
|
139
|
-
Utils.wrap_expr(_rbexpr.
|
139
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
140
140
|
end
|
141
141
|
|
142
142
|
# Reverse the arrays in the list.
|
@@ -161,7 +161,7 @@ module Polars
|
|
161
161
|
# # │ [2, 1, 9] │
|
162
162
|
# # └───────────┘
|
163
163
|
def reverse
|
164
|
-
Utils.wrap_expr(_rbexpr.
|
164
|
+
Utils.wrap_expr(_rbexpr.list_reverse)
|
165
165
|
end
|
166
166
|
|
167
167
|
# Get the unique/distinct values in the list.
|
@@ -184,8 +184,8 @@ module Polars
|
|
184
184
|
# # ╞═══════════╡
|
185
185
|
# # │ [1, 2] │
|
186
186
|
# # └───────────┘
|
187
|
-
def unique
|
188
|
-
Utils.wrap_expr(_rbexpr.
|
187
|
+
def unique(maintain_order: false)
|
188
|
+
Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
|
189
189
|
end
|
190
190
|
|
191
191
|
# Concat the arrays in a Series dtype List in linear time.
|
@@ -255,7 +255,7 @@ module Polars
|
|
255
255
|
# # └──────┘
|
256
256
|
def get(index)
|
257
257
|
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
258
|
-
Utils.wrap_expr(_rbexpr.
|
258
|
+
Utils.wrap_expr(_rbexpr.list_get(index))
|
259
259
|
end
|
260
260
|
|
261
261
|
# Get the value by index in the sublists.
|
@@ -265,6 +265,28 @@ module Polars
|
|
265
265
|
get(item)
|
266
266
|
end
|
267
267
|
|
268
|
+
# Take sublists by multiple indices.
|
269
|
+
#
|
270
|
+
# The indices may be defined in a single column, or by sublists in another
|
271
|
+
# column of dtype `List`.
|
272
|
+
#
|
273
|
+
# @param index [Object]
|
274
|
+
# Indices to return per sublist
|
275
|
+
# @param null_on_oob [Boolean]
|
276
|
+
# Behavior if an index is out of bounds:
|
277
|
+
# True -> set as null
|
278
|
+
# False -> raise an error
|
279
|
+
# Note that defaulting to raising an error is much cheaper
|
280
|
+
#
|
281
|
+
# @return [Expr]
|
282
|
+
def take(index, null_on_oob: false)
|
283
|
+
if index.is_a?(Array)
|
284
|
+
index = Series.new(index)
|
285
|
+
end
|
286
|
+
index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
|
287
|
+
Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
|
288
|
+
end
|
289
|
+
|
268
290
|
# Get the first value of the sublists.
|
269
291
|
#
|
270
292
|
# @return [Expr]
|
@@ -331,7 +353,7 @@ module Polars
|
|
331
353
|
# # │ true │
|
332
354
|
# # └───────┘
|
333
355
|
def contains(item)
|
334
|
-
Utils.wrap_expr(_rbexpr.
|
356
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
|
335
357
|
end
|
336
358
|
|
337
359
|
# Join all string items in a sublist and place a separator between them.
|
@@ -357,7 +379,7 @@ module Polars
|
|
357
379
|
# # │ x y │
|
358
380
|
# # └───────┘
|
359
381
|
def join(separator)
|
360
|
-
Utils.wrap_expr(_rbexpr.
|
382
|
+
Utils.wrap_expr(_rbexpr.list_join(separator))
|
361
383
|
end
|
362
384
|
|
363
385
|
# Retrieve the index of the minimal value in every sublist.
|
@@ -382,7 +404,7 @@ module Polars
|
|
382
404
|
# # │ 1 │
|
383
405
|
# # └─────┘
|
384
406
|
def arg_min
|
385
|
-
Utils.wrap_expr(_rbexpr.
|
407
|
+
Utils.wrap_expr(_rbexpr.list_arg_min)
|
386
408
|
end
|
387
409
|
|
388
410
|
# Retrieve the index of the maximum value in every sublist.
|
@@ -407,7 +429,7 @@ module Polars
|
|
407
429
|
# # │ 0 │
|
408
430
|
# # └─────┘
|
409
431
|
def arg_max
|
410
|
-
Utils.wrap_expr(_rbexpr.
|
432
|
+
Utils.wrap_expr(_rbexpr.list_arg_max)
|
411
433
|
end
|
412
434
|
|
413
435
|
# Calculate the n-th discrete difference of every sublist.
|
@@ -430,7 +452,7 @@ module Polars
|
|
430
452
|
# # [null, -8, -1]
|
431
453
|
# # ]
|
432
454
|
def diff(n: 1, null_behavior: "ignore")
|
433
|
-
Utils.wrap_expr(_rbexpr.
|
455
|
+
Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
|
434
456
|
end
|
435
457
|
|
436
458
|
# Shift values by the given period.
|
@@ -451,7 +473,7 @@ module Polars
|
|
451
473
|
# # [null, 10, 2]
|
452
474
|
# # ]
|
453
475
|
def shift(periods = 1)
|
454
|
-
Utils.wrap_expr(_rbexpr.
|
476
|
+
Utils.wrap_expr(_rbexpr.list_shift(periods))
|
455
477
|
end
|
456
478
|
|
457
479
|
# Slice every sublist.
|
@@ -477,7 +499,7 @@ module Polars
|
|
477
499
|
def slice(offset, length = nil)
|
478
500
|
offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
|
479
501
|
length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
|
480
|
-
Utils.wrap_expr(_rbexpr.
|
502
|
+
Utils.wrap_expr(_rbexpr.list_slice(offset, length))
|
481
503
|
end
|
482
504
|
|
483
505
|
# Slice the first `n` values of every sublist.
|
@@ -523,6 +545,33 @@ module Polars
|
|
523
545
|
slice(offset, n)
|
524
546
|
end
|
525
547
|
|
548
|
+
# Count how often the value produced by ``element`` occurs.
|
549
|
+
#
|
550
|
+
# @param element [Expr]
|
551
|
+
# An expression that produces a single value
|
552
|
+
#
|
553
|
+
# @return [Expr]
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
|
557
|
+
# df.select(Polars.col("listcol").arr.count_match(2).alias("number_of_twos"))
|
558
|
+
# # =>
|
559
|
+
# # shape: (5, 1)
|
560
|
+
# # ┌────────────────┐
|
561
|
+
# # │ number_of_twos │
|
562
|
+
# # │ --- │
|
563
|
+
# # │ u32 │
|
564
|
+
# # ╞════════════════╡
|
565
|
+
# # │ 0 │
|
566
|
+
# # │ 0 │
|
567
|
+
# # │ 2 │
|
568
|
+
# # │ 1 │
|
569
|
+
# # │ 0 │
|
570
|
+
# # └────────────────┘
|
571
|
+
def count_match(element)
|
572
|
+
Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
|
+
end
|
574
|
+
|
526
575
|
# Convert the series of type `List` to a series of type `Struct`.
|
527
576
|
#
|
528
577
|
# @param n_field_strategy ["first_non_null", "max_width"]
|
@@ -548,7 +597,7 @@ module Polars
|
|
548
597
|
# # └────────────┘
|
549
598
|
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
550
599
|
raise Todo if name_generator
|
551
|
-
Utils.wrap_expr(_rbexpr.
|
600
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
|
552
601
|
end
|
553
602
|
|
554
603
|
# Run any polars expression against the lists' elements.
|
@@ -582,7 +631,7 @@ module Polars
|
|
582
631
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
583
632
|
# # └─────┴─────┴────────────┘
|
584
633
|
def eval(expr, parallel: false)
|
585
|
-
|
634
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
|
586
635
|
end
|
587
636
|
end
|
588
637
|
end
|