polars-df 0.4.0-arm64-darwin → 0.6.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/expr.rb CHANGED
@@ -362,7 +362,7 @@ module Polars
362
362
  if columns.is_a?(String)
363
363
  columns = [columns]
364
364
  return wrap_expr(_rbexpr.exclude(columns))
365
- elsif !columns.is_a?(Array)
365
+ elsif !columns.is_a?(::Array)
366
366
  columns = [columns]
367
367
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
368
  end
@@ -820,18 +820,18 @@ module Polars
820
820
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
821
821
  # # =>
822
822
  # # shape: (6, 1)
823
- # # ┌─────────┐
824
- # # │ literal
825
- # # │ ---
826
- # # │ i64
827
- # # ╞═════════╡
828
- # # │ null
829
- # # │ null
830
- # # │ null
831
- # # │ 1
832
- # # │ 1
833
- # # │ 2
834
- # # └─────────┘
823
+ # # ┌────────┐
824
+ # # │ repeat
825
+ # # │ ---
826
+ # # │ i64
827
+ # # ╞════════╡
828
+ # # │ null
829
+ # # │ null
830
+ # # │ null
831
+ # # │ 1
832
+ # # │ 1
833
+ # # │ 2
834
+ # # └────────┘
835
835
  def rechunk
836
836
  wrap_expr(_rbexpr.rechunk)
837
837
  end
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,45 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1324
+ # ]
1325
+ # )
1326
+ # # =>
1327
+ # # shape: (5, 2)
1328
+ # # ┌───────┬──────────┐
1329
+ # # │ top_k ┆ bottom_k │
1330
+ # # │ --- ┆ --- │
1331
+ # # │ i64 ┆ i64 │
1332
+ # # ╞═══════╪══════════╡
1333
+ # # │ 99 ┆ 1 │
1334
+ # # │ 98 ┆ 2 │
1335
+ # # │ 4 ┆ 3 │
1336
+ # # │ 3 ┆ 4 │
1337
+ # # │ 2 ┆ 98 │
1338
+ # # └───────┴──────────┘
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1362
  # ]
1327
1363
  # )
1328
1364
  # # =>
@@ -1338,8 +1374,8 @@ module Polars
1338
1374
  # # │ 3 ┆ 4 │
1339
1375
  # # │ 2 ┆ 98 │
1340
1376
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -1498,10 +1534,10 @@ module Polars
1498
1534
  # # │ two │
1499
1535
  # # └───────┘
1500
1536
  def sort_by(by, reverse: false)
1501
- if !by.is_a?(Array)
1537
+ if !by.is_a?(::Array)
1502
1538
  by = [by]
1503
1539
  end
1504
- if !reverse.is_a?(Array)
1540
+ if !reverse.is_a?(::Array)
1505
1541
  reverse = [reverse]
1506
1542
  end
1507
1543
  by = Utils.selection_to_rbexpr_list(by)
@@ -1542,7 +1578,7 @@ module Polars
1542
1578
  # # │ two ┆ 99 │
1543
1579
  # # └───────┴───────┘
1544
1580
  def take(indices)
1545
- if indices.is_a?(Array)
1581
+ if indices.is_a?(::Array)
1546
1582
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1547
1583
  else
1548
1584
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2378,14 +2436,14 @@ module Polars
2378
2436
  # ).sort("group_col")
2379
2437
  # # =>
2380
2438
  # # shape: (2, 3)
2381
- # # ┌───────────┬──────┬─────┐
2382
- # # │ group_col ┆ lt ┆ gte │
2383
- # # │ --- ┆ --- ┆ --- │
2384
- # # │ str ┆ i64 ┆ i64 │
2385
- # # ╞═══════════╪══════╪═════╡
2386
- # # │ g1 ┆ 1 ┆ 2 │
2387
- # # │ g2 ┆ null ┆ 3 │
2388
- # # └───────────┴──────┴─────┘
2439
+ # # ┌───────────┬─────┬─────┐
2440
+ # # │ group_col ┆ lt ┆ gte │
2441
+ # # │ --- ┆ --- ┆ --- │
2442
+ # # │ str ┆ i64 ┆ i64 │
2443
+ # # ╞═══════════╪═════╪═════╡
2444
+ # # │ g1 ┆ 1 ┆ 2 │
2445
+ # # │ g2 ┆ 0 ┆ 3 │
2446
+ # # └───────────┴─────┴─────┘
2389
2447
  def filter(predicate)
2390
2448
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
2391
2449
  end
@@ -2416,14 +2474,14 @@ module Polars
2416
2474
  # ).sort("group_col")
2417
2475
  # # =>
2418
2476
  # # shape: (2, 3)
2419
- # # ┌───────────┬──────┬─────┐
2420
- # # │ group_col ┆ lt ┆ gte │
2421
- # # │ --- ┆ --- ┆ --- │
2422
- # # │ str ┆ i64 ┆ i64 │
2423
- # # ╞═══════════╪══════╪═════╡
2424
- # # │ g1 ┆ 1 ┆ 2 │
2425
- # # │ g2 ┆ null ┆ 3 │
2426
- # # └───────────┴──────┴─────┘
2477
+ # # ┌───────────┬─────┬─────┐
2478
+ # # │ group_col ┆ lt ┆ gte │
2479
+ # # │ --- ┆ --- ┆ --- │
2480
+ # # │ str ┆ i64 ┆ i64 │
2481
+ # # ╞═══════════╪═════╪═════╡
2482
+ # # │ g1 ┆ 1 ┆ 2 │
2483
+ # # │ g2 ┆ 0 ┆ 3 │
2484
+ # # └───────────┴─────┴─────┘
2427
2485
  def where(predicate)
2428
2486
  filter(predicate)
2429
2487
  end
@@ -2558,25 +2616,23 @@ module Polars
2558
2616
  # @return [Expr]
2559
2617
  #
2560
2618
  # @example
2561
- # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2562
- # df.select(Polars.col("foo").flatten)
2563
- # # =>
2564
- # # shape: (10, 1)
2565
- # # ┌─────┐
2566
- # # │ foo │
2567
- # # │ --- │
2568
- # # │ str │
2569
- # # ╞═════╡
2570
- # # │ h │
2571
- # # │ e
2572
- # # │ l
2573
- # # │ l
2574
- # # │ … │
2575
- # # │ o
2576
- # # │ r
2577
- # # │ l │
2578
- # # │ d │
2579
- # # └─────┘
2619
+ # df = Polars::DataFrame.new(
2620
+ # {
2621
+ # "group" => ["a", "b", "b"],
2622
+ # "values" => [[1, 2], [2, 3], [4]]
2623
+ # }
2624
+ # )
2625
+ # df.groupby("group").agg(Polars.col("values").flatten)
2626
+ # # =>
2627
+ # # shape: (2, 2)
2628
+ # # ┌───────┬───────────┐
2629
+ # # │ group ┆ values
2630
+ # # │ --- ┆ ---
2631
+ # # │ str ┆ list[i64]
2632
+ # # ╞═══════╪═══════════╡
2633
+ # # │ a ┆ [1, 2]
2634
+ # # │ b ┆ [2, 3, 4]
2635
+ # # └───────┴───────────┘
2580
2636
  def flatten
2581
2637
  wrap_expr(_rbexpr.explode)
2582
2638
  end
@@ -2740,7 +2796,7 @@ module Polars
2740
2796
  # # │ false │
2741
2797
  # # └──────────┘
2742
2798
  def is_in(other)
2743
- if other.is_a?(Array)
2799
+ if other.is_a?(::Array)
2744
2800
  if other.length == 0
2745
2801
  other = Polars.lit(nil)
2746
2802
  else
@@ -2751,6 +2807,7 @@ module Polars
2751
2807
  end
2752
2808
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2809
  end
2810
+ alias_method :in?, :is_in
2754
2811
 
2755
2812
  # Repeat the elements in this Series as specified in the given expression.
2756
2813
  #
@@ -3443,14 +3500,15 @@ module Polars
3443
3500
  min_periods: nil,
3444
3501
  center: false,
3445
3502
  by: nil,
3446
- closed: "left"
3503
+ closed: "left",
3504
+ ddof: 1
3447
3505
  )
3448
3506
  window_size, min_periods = _prepare_rolling_window_args(
3449
3507
  window_size, min_periods
3450
3508
  )
3451
3509
  wrap_expr(
3452
3510
  _rbexpr.rolling_std(
3453
- window_size, weights, min_periods, center, by, closed
3511
+ window_size, weights, min_periods, center, by, closed, ddof
3454
3512
  )
3455
3513
  )
3456
3514
  end
@@ -3532,14 +3590,15 @@ module Polars
3532
3590
  min_periods: nil,
3533
3591
  center: false,
3534
3592
  by: nil,
3535
- closed: "left"
3593
+ closed: "left",
3594
+ ddof: 1
3536
3595
  )
3537
3596
  window_size, min_periods = _prepare_rolling_window_args(
3538
3597
  window_size, min_periods
3539
3598
  )
3540
3599
  wrap_expr(
3541
3600
  _rbexpr.rolling_var(
3542
- window_size, weights, min_periods, center, by, closed
3601
+ window_size, weights, min_periods, center, by, closed, ddof
3543
3602
  )
3544
3603
  )
3545
3604
  end
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4499,11 +4558,11 @@ module Polars
4499
4558
  # # │ 1 │
4500
4559
  # # │ 3 │
4501
4560
  # # └─────┘
4502
- def shuffle(seed: nil)
4561
+ def shuffle(seed: nil, fixed_seed: false)
4503
4562
  if seed.nil?
4504
4563
  seed = rand(10000)
4505
4564
  end
4506
- wrap_expr(_rbexpr.shuffle(seed))
4565
+ wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4507
4566
  end
4508
4567
 
4509
4568
  # Sample from this expression.
@@ -4541,21 +4600,22 @@ module Polars
4541
4600
  with_replacement: true,
4542
4601
  shuffle: false,
4543
4602
  seed: nil,
4544
- n: nil
4603
+ n: nil,
4604
+ fixed_seed: false
4545
4605
  )
4546
4606
  if !n.nil? && !frac.nil?
4547
4607
  raise ArgumentError, "cannot specify both `n` and `frac`"
4548
4608
  end
4549
4609
 
4550
4610
  if !n.nil? && frac.nil?
4551
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4611
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4552
4612
  end
4553
4613
 
4554
4614
  if frac.nil?
4555
4615
  frac = 1.0
4556
4616
  end
4557
4617
  wrap_expr(
4558
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4618
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4559
4619
  )
4560
4620
  end
4561
4621
 
@@ -4870,8 +4930,8 @@ module Polars
4870
4930
  #
4871
4931
  # Enables downstream code to user fast paths for sorted arrays.
4872
4932
  #
4873
- # @param reverse [Boolean]
4874
- # If the `Series` order is reversed, e.g. descending.
4933
+ # @param descending [Boolean]
4934
+ # Whether the `Series` order is descending.
4875
4935
  #
4876
4936
  # @return [Expr]
4877
4937
  #
@@ -4891,9 +4951,9 @@ module Polars
4891
4951
  # # ╞════════╡
4892
4952
  # # │ 3 │
4893
4953
  # # └────────┘
4894
- # def set_sorted(reverse: false)
4895
- # map { |s| s.set_sorted(reverse) }
4896
- # end
4954
+ def set_sorted(descending: false)
4955
+ wrap_expr(_rbexpr.set_sorted_flag(descending))
4956
+ end
4897
4957
 
4898
4958
  # Aggregate to list.
4899
4959
  #
@@ -4906,7 +4966,7 @@ module Polars
4906
4966
  # "b" => [4, 5, 6]
4907
4967
  # }
4908
4968
  # )
4909
- # df.select(Polars.all.list)
4969
+ # df.select(Polars.all.implode)
4910
4970
  # # =>
4911
4971
  # # shape: (1, 2)
4912
4972
  # # ┌───────────┬───────────┐
@@ -4916,8 +4976,8 @@ module Polars
4916
4976
  # # ╞═══════════╪═══════════╡
4917
4977
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4978
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4979
+ def implode
4980
+ wrap_expr(_rbexpr.implode)
4921
4981
  end
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
@@ -4958,10 +5018,17 @@ module Polars
4958
5018
  # Create an object namespace of all list related methods.
4959
5019
  #
4960
5020
  # @return [ListExpr]
4961
- def arr
5021
+ def list
4962
5022
  ListExpr.new(self)
4963
5023
  end
4964
5024
 
5025
+ # Create an object namespace of all array related methods.
5026
+ #
5027
+ # @return [ArrayExpr]
5028
+ def arr
5029
+ ArrayExpr.new(self)
5030
+ end
5031
+
4965
5032
  # Create an object namespace of all binary related methods.
4966
5033
  #
4967
5034
  # @return [BinaryExpr]
@@ -43,7 +43,6 @@ module Polars
43
43
  # # │ i64 ┆ i64 │
44
44
  # # ╞═════╪═════╡
45
45
  # # │ 1 ┆ 3 │
46
- # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
46
  # # │ 2 ┆ 4 │
48
47
  # # └─────┴─────┘
49
48
  def concat(items, rechunk: true, how: "vertical", parallel: true)
@@ -551,32 +551,11 @@ module Polars
551
551
  agg(Polars.all.median)
552
552
  end
553
553
 
554
- # Aggregate the groups into Series.
555
- #
556
- # @return [DataFrame]
557
- #
558
- # @example
559
- # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
560
- # df.groupby("a", maintain_order: true).agg_list
561
- # # =>
562
- # # shape: (2, 2)
563
- # # ┌─────┬─────────────────┐
564
- # # │ a ┆ b │
565
- # # │ --- ┆ --- │
566
- # # │ str ┆ list[list[i64]] │
567
- # # ╞═════╪═════════════════╡
568
- # # │ one ┆ [[1, 3]] │
569
- # # │ two ┆ [[2, 4]] │
570
- # # └─────┴─────────────────┘
571
- def agg_list
572
- agg(Polars.all.list)
573
- end
574
-
575
554
  # Plot data.
576
555
  #
577
556
  # @return [Vega::LiteChart]
578
557
  def plot(*args, **options)
579
- raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
558
+ raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
580
559
  # same message as Ruby
581
560
  raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
582
561