polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,7 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1324
  # ]
1327
1325
  # )
1328
1326
  # # =>
@@ -1338,8 +1336,46 @@ module Polars
1338
1336
  # # │ 3 ┆ 4 │
1339
1337
  # # │ 2 ┆ 98 │
1340
1338
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1362
+ # ]
1363
+ # )
1364
+ # # =>
1365
+ # # shape: (5, 2)
1366
+ # # ┌───────┬──────────┐
1367
+ # # │ top_k ┆ bottom_k │
1368
+ # # │ --- ┆ --- │
1369
+ # # │ i64 ┆ i64 │
1370
+ # # ╞═══════╪══════════╡
1371
+ # # │ 99 ┆ 1 │
1372
+ # # │ 98 ┆ 2 │
1373
+ # # │ 4 ┆ 3 │
1374
+ # # │ 3 ┆ 4 │
1375
+ # # │ 2 ┆ 98 │
1376
+ # # └───────┴──────────┘
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2751,6 +2809,7 @@ module Polars
2751
2809
  end
2752
2810
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2811
  end
2812
+ alias_method :in?, :is_in
2754
2813
 
2755
2814
  # Repeat the elements in this Series as specified in the given expression.
2756
2815
  #
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4916,9 +4975,10 @@ module Polars
4916
4975
  # # ╞═══════════╪═══════════╡
4917
4976
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4977
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4978
+ def implode
4979
+ wrap_expr(_rbexpr.implode)
4921
4980
  end
4981
+ alias_method :list, :implode
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
4924
4984
  #
@@ -1,5 +1,5 @@
1
1
  module Polars
2
- # Representation of a Lazy computation graph/query againat a DataFrame.
2
+ # Representation of a Lazy computation graph/query against a DataFrame.
3
3
  class LazyFrame
4
4
  # @private
5
5
  attr_accessor :_ldf
@@ -934,7 +934,7 @@ module Polars
934
934
  # "2020-01-08 23:16:43"
935
935
  # ]
936
936
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
937
- # Polars.col("dt").str.strptime(:datetime)
937
+ # Polars.col("dt").str.strptime(Polars::Datetime)
938
938
  # )
939
939
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
940
940
  # [
@@ -964,6 +964,7 @@ module Polars
964
964
  closed: "right",
965
965
  by: nil
966
966
  )
967
+ index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
967
968
  if offset.nil?
968
969
  offset = "-#{period}"
969
970
  end
@@ -973,7 +974,7 @@ module Polars
973
974
  offset = Utils._timedelta_to_pl_duration(offset)
974
975
 
975
976
  lgb = _ldf.groupby_rolling(
976
- index_column, period, offset, closed, rbexprs_by
977
+ index_column._rbexpr, period, offset, closed, rbexprs_by
977
978
  )
978
979
  LazyGroupBy.new(lgb, self.class)
979
980
  end
@@ -1077,8 +1077,11 @@ module Polars
1077
1077
  # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1078
1078
  # # │ null ┆ null ┆ null ┆ 99.9 │
1079
1079
  # # └──────┴──────┴──────┴──────┘
1080
- def coalesce(exprs)
1080
+ def coalesce(exprs, *more_exprs)
1081
1081
  exprs = Utils.selection_to_rbexpr_list(exprs)
1082
+ if more_exprs.any?
1083
+ exprs.concat(Utils.selection_to_rbexpr_list(more_exprs))
1084
+ end
1082
1085
  Utils.wrap_expr(_coalesce_exprs(exprs))
1083
1086
  end
1084
1087
 
@@ -27,7 +27,7 @@ module Polars
27
27
  # # │ 1 │
28
28
  # # └─────┘
29
29
  def lengths
30
- Utils.wrap_expr(_rbexpr.arr_lengths)
30
+ Utils.wrap_expr(_rbexpr.list_lengths)
31
31
  end
32
32
 
33
33
  # Sum all the lists in the array.
@@ -48,7 +48,7 @@ module Polars
48
48
  # # │ 5 │
49
49
  # # └────────┘
50
50
  def sum
51
- Utils.wrap_expr(_rbexpr.lst_sum)
51
+ Utils.wrap_expr(_rbexpr.list_sum)
52
52
  end
53
53
 
54
54
  # Compute the max value of the lists in the array.
@@ -69,7 +69,7 @@ module Polars
69
69
  # # │ 3 │
70
70
  # # └────────┘
71
71
  def max
72
- Utils.wrap_expr(_rbexpr.lst_max)
72
+ Utils.wrap_expr(_rbexpr.list_max)
73
73
  end
74
74
 
75
75
  # Compute the min value of the lists in the array.
@@ -90,7 +90,7 @@ module Polars
90
90
  # # │ 2 │
91
91
  # # └────────┘
92
92
  def min
93
- Utils.wrap_expr(_rbexpr.lst_min)
93
+ Utils.wrap_expr(_rbexpr.list_min)
94
94
  end
95
95
 
96
96
  # Compute the mean value of the lists in the array.
@@ -111,7 +111,7 @@ module Polars
111
111
  # # │ 2.5 │
112
112
  # # └────────┘
113
113
  def mean
114
- Utils.wrap_expr(_rbexpr.lst_mean)
114
+ Utils.wrap_expr(_rbexpr.list_mean)
115
115
  end
116
116
 
117
117
  # Sort the arrays in the list.
@@ -136,7 +136,7 @@ module Polars
136
136
  # # │ [1, 2, 9] │
137
137
  # # └───────────┘
138
138
  def sort(reverse: false)
139
- Utils.wrap_expr(_rbexpr.lst_sort(reverse))
139
+ Utils.wrap_expr(_rbexpr.list_sort(reverse))
140
140
  end
141
141
 
142
142
  # Reverse the arrays in the list.
@@ -161,7 +161,7 @@ module Polars
161
161
  # # │ [2, 1, 9] │
162
162
  # # └───────────┘
163
163
  def reverse
164
- Utils.wrap_expr(_rbexpr.lst_reverse)
164
+ Utils.wrap_expr(_rbexpr.list_reverse)
165
165
  end
166
166
 
167
167
  # Get the unique/distinct values in the list.
@@ -184,8 +184,8 @@ module Polars
184
184
  # # ╞═══════════╡
185
185
  # # │ [1, 2] │
186
186
  # # └───────────┘
187
- def unique
188
- Utils.wrap_expr(_rbexpr.lst_unique)
187
+ def unique(maintain_order: false)
188
+ Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
189
189
  end
190
190
 
191
191
  # Concat the arrays in a Series dtype List in linear time.
@@ -255,7 +255,7 @@ module Polars
255
255
  # # └──────┘
256
256
  def get(index)
257
257
  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
258
- Utils.wrap_expr(_rbexpr.lst_get(index))
258
+ Utils.wrap_expr(_rbexpr.list_get(index))
259
259
  end
260
260
 
261
261
  # Get the value by index in the sublists.
@@ -265,6 +265,28 @@ module Polars
265
265
  get(item)
266
266
  end
267
267
 
268
+ # Take sublists by multiple indices.
269
+ #
270
+ # The indices may be defined in a single column, or by sublists in another
271
+ # column of dtype `List`.
272
+ #
273
+ # @param index [Object]
274
+ # Indices to return per sublist
275
+ # @param null_on_oob [Boolean]
276
+ # Behavior if an index is out of bounds:
277
+ # True -> set as null
278
+ # False -> raise an error
279
+ # Note that defaulting to raising an error is much cheaper
280
+ #
281
+ # @return [Expr]
282
+ def take(index, null_on_oob: false)
283
+ if index.is_a?(Array)
284
+ index = Series.new(index)
285
+ end
286
+ index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
287
+ Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
288
+ end
289
+
268
290
  # Get the first value of the sublists.
269
291
  #
270
292
  # @return [Expr]
@@ -331,7 +353,7 @@ module Polars
331
353
  # # │ true │
332
354
  # # └───────┘
333
355
  def contains(item)
334
- Utils.wrap_expr(_rbexpr.arr_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
356
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
335
357
  end
336
358
 
337
359
  # Join all string items in a sublist and place a separator between them.
@@ -357,7 +379,7 @@ module Polars
357
379
  # # │ x y │
358
380
  # # └───────┘
359
381
  def join(separator)
360
- Utils.wrap_expr(_rbexpr.lst_join(separator))
382
+ Utils.wrap_expr(_rbexpr.list_join(separator))
361
383
  end
362
384
 
363
385
  # Retrieve the index of the minimal value in every sublist.
@@ -382,7 +404,7 @@ module Polars
382
404
  # # │ 1 │
383
405
  # # └─────┘
384
406
  def arg_min
385
- Utils.wrap_expr(_rbexpr.lst_arg_min)
407
+ Utils.wrap_expr(_rbexpr.list_arg_min)
386
408
  end
387
409
 
388
410
  # Retrieve the index of the maximum value in every sublist.
@@ -407,7 +429,7 @@ module Polars
407
429
  # # │ 0 │
408
430
  # # └─────┘
409
431
  def arg_max
410
- Utils.wrap_expr(_rbexpr.lst_arg_max)
432
+ Utils.wrap_expr(_rbexpr.list_arg_max)
411
433
  end
412
434
 
413
435
  # Calculate the n-th discrete difference of every sublist.
@@ -430,7 +452,7 @@ module Polars
430
452
  # # [null, -8, -1]
431
453
  # # ]
432
454
  def diff(n: 1, null_behavior: "ignore")
433
- Utils.wrap_expr(_rbexpr.lst_diff(n, null_behavior))
455
+ Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
434
456
  end
435
457
 
436
458
  # Shift values by the given period.
@@ -451,7 +473,7 @@ module Polars
451
473
  # # [null, 10, 2]
452
474
  # # ]
453
475
  def shift(periods = 1)
454
- Utils.wrap_expr(_rbexpr.lst_shift(periods))
476
+ Utils.wrap_expr(_rbexpr.list_shift(periods))
455
477
  end
456
478
 
457
479
  # Slice every sublist.
@@ -477,7 +499,7 @@ module Polars
477
499
  def slice(offset, length = nil)
478
500
  offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
479
501
  length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
480
- Utils.wrap_expr(_rbexpr.lst_slice(offset, length))
502
+ Utils.wrap_expr(_rbexpr.list_slice(offset, length))
481
503
  end
482
504
 
483
505
  # Slice the first `n` values of every sublist.
@@ -523,6 +545,33 @@ module Polars
523
545
  slice(offset, n)
524
546
  end
525
547
 
548
+ # Count how often the value produced by ``element`` occurs.
549
+ #
550
+ # @param element [Expr]
551
+ # An expression that produces a single value
552
+ #
553
+ # @return [Expr]
554
+ #
555
+ # @example
556
+ # df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
557
+ # df.select(Polars.col("listcol").arr.count_match(2).alias("number_of_twos"))
558
+ # # =>
559
+ # # shape: (5, 1)
560
+ # # ┌────────────────┐
561
+ # # │ number_of_twos │
562
+ # # │ --- │
563
+ # # │ u32 │
564
+ # # ╞════════════════╡
565
+ # # │ 0 │
566
+ # # │ 0 │
567
+ # # │ 2 │
568
+ # # │ 1 │
569
+ # # │ 0 │
570
+ # # └────────────────┘
571
+ def count_match(element)
572
+ Utils.wrap_expr(_rbexpr.list_count_match(Utils.expr_to_lit_or_expr(element)._rbexpr))
573
+ end
574
+
526
575
  # Convert the series of type `List` to a series of type `Struct`.
527
576
  #
528
577
  # @param n_field_strategy ["first_non_null", "max_width"]
@@ -548,7 +597,7 @@ module Polars
548
597
  # # └────────────┘
549
598
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
550
599
  raise Todo if name_generator
551
- Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
600
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
552
601
  end
553
602
 
554
603
  # Run any polars expression against the lists' elements.
@@ -582,7 +631,7 @@ module Polars
582
631
  # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
583
632
  # # └─────┴─────┴────────────┘
584
633
  def eval(expr, parallel: false)
585
- Utils.wrap_expr(_rbexpr.lst_eval(expr._rbexpr, parallel))
634
+ Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
586
635
  end
587
636
  end
588
637
  end