polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/expr.rb CHANGED
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,7 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1324
  # ]
1327
1325
  # )
1328
1326
  # # =>
@@ -1338,8 +1336,46 @@ module Polars
1338
1336
  # # │ 3 ┆ 4 │
1339
1337
  # # │ 2 ┆ 98 │
1340
1338
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1362
+ # ]
1363
+ # )
1364
+ # # =>
1365
+ # # shape: (5, 2)
1366
+ # # ┌───────┬──────────┐
1367
+ # # │ top_k ┆ bottom_k │
1368
+ # # │ --- ┆ --- │
1369
+ # # │ i64 ┆ i64 │
1370
+ # # ╞═══════╪══════════╡
1371
+ # # │ 99 ┆ 1 │
1372
+ # # │ 98 ┆ 2 │
1373
+ # # │ 4 ┆ 3 │
1374
+ # # │ 3 ┆ 4 │
1375
+ # # │ 2 ┆ 98 │
1376
+ # # └───────┴──────────┘
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2751,6 +2809,7 @@ module Polars
2751
2809
  end
2752
2810
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2811
  end
2812
+ alias_method :in?, :is_in
2754
2813
 
2755
2814
  # Repeat the elements in this Series as specified in the given expression.
2756
2815
  #
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4916,9 +4975,10 @@ module Polars
4916
4975
  # # ╞═══════════╪═══════════╡
4917
4976
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4977
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4978
+ def implode
4979
+ wrap_expr(_rbexpr.implode)
4921
4980
  end
4981
+ alias_method :list, :implode
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
4924
4984
  #
@@ -1,5 +1,5 @@
1
1
  module Polars
2
- # Representation of a Lazy computation graph/query againat a DataFrame.
2
+ # Representation of a Lazy computation graph/query against a DataFrame.
3
3
  class LazyFrame
4
4
  # @private
5
5
  attr_accessor :_ldf
@@ -934,7 +934,7 @@ module Polars
934
934
  # "2020-01-08 23:16:43"
935
935
  # ]
936
936
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
937
- # Polars.col("dt").str.strptime(:datetime)
937
+ # Polars.col("dt").str.strptime(Polars::Datetime)
938
938
  # )
939
939
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
940
940
  # [
@@ -964,6 +964,7 @@ module Polars
964
964
  closed: "right",
965
965
  by: nil
966
966
  )
967
+ index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
967
968
  if offset.nil?
968
969
  offset = "-#{period}"
969
970
  end
@@ -973,7 +974,7 @@ module Polars
973
974
  offset = Utils._timedelta_to_pl_duration(offset)
974
975
 
975
976
  lgb = _ldf.groupby_rolling(
976
- index_column, period, offset, closed, rbexprs_by
977
+ index_column._rbexpr, period, offset, closed, rbexprs_by
977
978
  )
978
979
  LazyGroupBy.new(lgb, self.class)
979
980
  end
@@ -1077,8 +1077,11 @@ module Polars
1077
1077
  # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1078
1078
  # # │ null ┆ null ┆ null ┆ 99.9 │
1079
1079
  # # └──────┴──────┴──────┴──────┘
1080
- def coalesce(exprs)
1080
+ def coalesce(exprs, *more_exprs)
1081
1081
  exprs = Utils.selection_to_rbexpr_list(exprs)
1082
+ if more_exprs.any?
1083
+ exprs.concat(Utils.selection_to_rbexpr_list(more_exprs))
1084
+ end
1082
1085
  Utils.wrap_expr(_coalesce_exprs(exprs))
1083
1086
  end
1084
1087
 
@@ -27,7 +27,7 @@ module Polars
27
27
  # # │ 1 │
28
28
  # # └─────┘
29
29
  def lengths
30
- Utils.wrap_expr(_rbexpr.arr_lengths)
30
+ Utils.wrap_expr(_rbexpr.list_lengths)
31
31
  end
32
32
 
33
33
  # Sum all the lists in the array.
@@ -48,7 +48,7 @@ module Polars
48
48
  # # │ 5 │
49
49
  # # └────────┘
50
50
  def sum
51
- Utils.wrap_expr(_rbexpr.lst_sum)
51
+ Utils.wrap_expr(_rbexpr.list_sum)
52
52
  end
53
53
 
54
54
  # Compute the max value of the lists in the array.
@@ -69,7 +69,7 @@ module Polars
69
69
  # # │ 3 │
70
70
  # # └────────┘
71
71
  def max
72
- Utils.wrap_expr(_rbexpr.lst_max)
72
+ Utils.wrap_expr(_rbexpr.list_max)
73
73
  end
74
74
 
75
75
  # Compute the min value of the lists in the array.
@@ -90,7 +90,7 @@ module Polars
90
90
  # # │ 2 │
91
91
  # # └────────┘
92
92
  def min
93
- Utils.wrap_expr(_rbexpr.lst_min)
93
+ Utils.wrap_expr(_rbexpr.list_min)
94
94
  end
95
95
 
96
96
  # Compute the mean value of the lists in the array.
@@ -111,7 +111,7 @@ module Polars
111
111
  # # │ 2.5 │
112
112
  # # └────────┘
113
113
  def mean
114
- Utils.wrap_expr(_rbexpr.lst_mean)
114
+ Utils.wrap_expr(_rbexpr.list_mean)
115
115
  end
116
116
 
117
117
  # Sort the arrays in the list.
@@ -136,7 +136,7 @@ module Polars
136
136
  # # │ [1, 2, 9] │
137
137
  # # └───────────┘
138
138
  def sort(reverse: false)
139
- Utils.wrap_expr(_rbexpr.lst_sort(reverse))
139
+ Utils.wrap_expr(_rbexpr.list_sort(reverse))
140
140
  end
141
141
 
142
142
  # Reverse the arrays in the list.
@@ -161,7 +161,7 @@ module Polars
161
161
  # # │ [2, 1, 9] │
162
162
  # # └───────────┘
163
163
  def reverse
164
- Utils.wrap_expr(_rbexpr.lst_reverse)
164
+ Utils.wrap_expr(_rbexpr.list_reverse)
165
165
  end
166
166
 
167
167
  # Get the unique/distinct values in the list.
@@ -184,8 +184,8 @@ module Polars
184
184
  # # ╞═══════════╡
185
185
  # # │ [1, 2] │
186
186
  # # └───────────┘
187
- def unique
188
- Utils.wrap_expr(_rbexpr.lst_unique)
187
+ def unique(maintain_order: false)
188
+ Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
189
189
  end
190
190
 
191
191
  # Concat the arrays in a Series dtype List in linear time.
@@ -255,7 +255,7 @@ module Polars
255
255
  # # └──────┘
256
256
  def get(index)
257
257
  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
258
- Utils.wrap_expr(_rbexpr.lst_get(index))
258
+ Utils.wrap_expr(_rbexpr.list_get(index))
259
259
  end
260
260
 
261
261
  # Get the value by index in the sublists.
@@ -265,6 +265,28 @@ module Polars
265
265
  get(item)
266
266
  end
267
267
 
268
+ # Take sublists by multiple indices.
269
+ #
270
+ # The indices may be defined in a single column, or by sublists in another
271
+ # column of dtype `List`.
272
+ #
273
+ # @param index [Object]
274
+ # Indices to return per sublist
275
+ # @param null_on_oob [Boolean]
276
+ # Behavior if an index is out of bounds:
277
+ # True -> set as null
278
+ # False -> raise an error
279
+ # Note that defaulting to raising an error is much cheaper
280
+ #
281
+ # @return [Expr]
282
+ def take(index, null_on_oob: false)
283
+ if index.is_a?(Array)
284
+ index = Series.new(index)
285
+ end
286
+ index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
287
+ Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
288
+ end
289
+
268
290
  # Get the first value of the sublists.
269
291
  #
270
292
  # @return [Expr]
@@ -331,7 +353,7 @@ module Polars
331
353
  # # │ true │
332
354
  # # └───────┘
333
355
  def contains(item)
334
- Utils.wrap_expr(_rbexpr.arr_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
356
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
335
357
  end
336
358
 
337
359
  # Join all string items in a sublist and place a separator between them.
@@ -357,7 +379,7 @@ module Polars
357
379
  # # │ x y │
358
380
  # # └───────┘
359
381
  def join(separator)
360
- Utils.wrap_expr(_rbexpr.lst_join(separator))
382
+ Utils.wrap_expr(_rbexpr.list_join(separator))
361
383
  end
362
384
 
363
385
  # Retrieve the index of the minimal value in every sublist.
@@ -382,7 +404,7 @@ module Polars
382
404
  # # │ 1 │
383
405
  # # └─────┘
384
406
  def arg_min
385
- Utils.wrap_expr(_rbexpr.lst_arg_min)
407
+ Utils.wrap_expr(_rbexpr.list_arg_min)
386
408
  end
387
409
 
388
410
  # Retrieve the index of the maximum value in every sublist.
@@ -407,7 +429,7 @@ module Polars
407
429
  # # │ 0 │
408
430
  # # └─────┘
409
431
  def arg_max
410
- Utils.wrap_expr(_rbexpr.lst_arg_max)
432
+ Utils.wrap_expr(_rbexpr.list_arg_max)
411
433
  end
412
434
 
413
435
  # Calculate the n-th discrete difference of every sublist.
@@ -430,7 +452,7 @@ module Polars
430
452
  # # [null, -8, -1]
431
453
  # # ]
432
454
  def diff(n: 1, null_behavior: "ignore")
433
- Utils.wrap_expr(_rbexpr.lst_diff(n, null_behavior))
455
+ Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
434
456
  end
435
457
 
436
458
  # Shift values by the given period.
@@ -451,7 +473,7 @@ module Polars
451
473
  # # [null, 10, 2]
452
474
  # # ]
453
475
  def shift(periods = 1)
454
- Utils.wrap_expr(_rbexpr.lst_shift(periods))
476
+ Utils.wrap_expr(_rbexpr.list_shift(periods))
455
477
  end
456
478
 
457
479
  # Slice every sublist.
@@ -477,7 +499,7 @@ module Polars
477
499
  def slice(offset, length = nil)
478
500
  offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
479
501
  length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
480
- Utils.wrap_expr(_rbexpr.lst_slice(offset, length))
502
+ Utils.wrap_expr(_rbexpr.list_slice(offset, length))
481
503
  end
482
504
 
483
505
  # Slice the first `n` values of every sublist.
@@ -523,6 +545,33 @@ module Polars
523
545
  slice(offset, n)
524
546
  end
525
547
 
548
+ # Count how often the value produced by ``element`` occurs.
549
+ #
550
+ # @param element [Expr]
551
+ # An expression that produces a single value
552
+ #
553
+ # @return [Expr]
554
+ #
555
+ # @example
556
+ # df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
557
+ # df.select(Polars.col("listcol").arr.count_match(2).alias("number_of_twos"))
558
+ # # =>
559
+ # # shape: (5, 1)
560
+ # # ┌────────────────┐
561
+ # # │ number_of_twos │
562
+ # # │ --- │
563
+ # # │ u32 │
564
+ # # ╞════════════════╡
565
+ # # │ 0 │
566
+ # # │ 0 │
567
+ # # │ 2 │
568
+ # # │ 1 │
569
+ # # │ 0 │
570
+ # # └────────────────┘
571
+ def count_match(element)
572
+ Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
573
+ end
574
+
526
575
  # Convert the series of type `List` to a series of type `Struct`.
527
576
  #
528
577
  # @param n_field_strategy ["first_non_null", "max_width"]
@@ -548,7 +597,7 @@ module Polars
548
597
  # # └────────────┘
549
598
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
550
599
  raise Todo if name_generator
551
- Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
600
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
552
601
  end
553
602
 
554
603
  # Run any polars expression against the lists' elements.
@@ -582,7 +631,7 @@ module Polars
582
631
  # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
583
632
  # # └─────┴─────┴────────────┘
584
633
  def eval(expr, parallel: false)
585
- Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
634
+ Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
586
635
  end
587
636
  end
588
637
  end