polars-df 0.4.0-x86_64-darwin → 0.6.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/expr.rb CHANGED
@@ -362,7 +362,7 @@ module Polars
362
362
  if columns.is_a?(String)
363
363
  columns = [columns]
364
364
  return wrap_expr(_rbexpr.exclude(columns))
365
- elsif !columns.is_a?(Array)
365
+ elsif !columns.is_a?(::Array)
366
366
  columns = [columns]
367
367
  return wrap_expr(_rbexpr.exclude_dtype(columns))
368
368
  end
@@ -820,18 +820,18 @@ module Polars
820
820
  # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
821
821
  # # =>
822
822
  # # shape: (6, 1)
823
- # # ┌─────────┐
824
- # # │ literal
825
- # # │ ---
826
- # # │ i64
827
- # # ╞═════════╡
828
- # # │ null
829
- # # │ null
830
- # # │ null
831
- # # │ 1
832
- # # │ 1
833
- # # │ 2
834
- # # └─────────┘
823
+ # # ┌────────┐
824
+ # # │ repeat
825
+ # # │ ---
826
+ # # │ i64
827
+ # # ╞════════╡
828
+ # # │ null
829
+ # # │ null
830
+ # # │ null
831
+ # # │ 1
832
+ # # │ 1
833
+ # # │ 2
834
+ # # └────────┘
835
835
  def rechunk
836
836
  wrap_expr(_rbexpr.rechunk)
837
837
  end
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,45 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1324
+ # ]
1325
+ # )
1326
+ # # =>
1327
+ # # shape: (5, 2)
1328
+ # # ┌───────┬──────────┐
1329
+ # # │ top_k ┆ bottom_k │
1330
+ # # │ --- ┆ --- │
1331
+ # # │ i64 ┆ i64 │
1332
+ # # ╞═══════╪══════════╡
1333
+ # # │ 99 ┆ 1 │
1334
+ # # │ 98 ┆ 2 │
1335
+ # # │ 4 ┆ 3 │
1336
+ # # │ 3 ┆ 4 │
1337
+ # # │ 2 ┆ 98 │
1338
+ # # └───────┴──────────┘
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1362
  # ]
1327
1363
  # )
1328
1364
  # # =>
@@ -1338,8 +1374,8 @@ module Polars
1338
1374
  # # │ 3 ┆ 4 │
1339
1375
  # # │ 2 ┆ 98 │
1340
1376
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -1498,10 +1534,10 @@ module Polars
1498
1534
  # # │ two │
1499
1535
  # # └───────┘
1500
1536
  def sort_by(by, reverse: false)
1501
- if !by.is_a?(Array)
1537
+ if !by.is_a?(::Array)
1502
1538
  by = [by]
1503
1539
  end
1504
- if !reverse.is_a?(Array)
1540
+ if !reverse.is_a?(::Array)
1505
1541
  reverse = [reverse]
1506
1542
  end
1507
1543
  by = Utils.selection_to_rbexpr_list(by)
@@ -1542,7 +1578,7 @@ module Polars
1542
1578
  # # │ two ┆ 99 │
1543
1579
  # # └───────┴───────┘
1544
1580
  def take(indices)
1545
- if indices.is_a?(Array)
1581
+ if indices.is_a?(::Array)
1546
1582
  indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
1547
1583
  else
1548
1584
  indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2378,14 +2436,14 @@ module Polars
2378
2436
  # ).sort("group_col")
2379
2437
  # # =>
2380
2438
  # # shape: (2, 3)
2381
- # # ┌───────────┬──────┬─────┐
2382
- # # │ group_col ┆ lt ┆ gte │
2383
- # # │ --- ┆ --- ┆ --- │
2384
- # # │ str ┆ i64 ┆ i64 │
2385
- # # ╞═══════════╪══════╪═════╡
2386
- # # │ g1 ┆ 1 ┆ 2 │
2387
- # # │ g2 ┆ null ┆ 3 │
2388
- # # └───────────┴──────┴─────┘
2439
+ # # ┌───────────┬─────┬─────┐
2440
+ # # │ group_col ┆ lt ┆ gte │
2441
+ # # │ --- ┆ --- ┆ --- │
2442
+ # # │ str ┆ i64 ┆ i64 │
2443
+ # # ╞═══════════╪═════╪═════╡
2444
+ # # │ g1 ┆ 1 ┆ 2 │
2445
+ # # │ g2 ┆ 0 ┆ 3 │
2446
+ # # └───────────┴─────┴─────┘
2389
2447
  def filter(predicate)
2390
2448
  wrap_expr(_rbexpr.filter(predicate._rbexpr))
2391
2449
  end
@@ -2416,14 +2474,14 @@ module Polars
2416
2474
  # ).sort("group_col")
2417
2475
  # # =>
2418
2476
  # # shape: (2, 3)
2419
- # # ┌───────────┬──────┬─────┐
2420
- # # │ group_col ┆ lt ┆ gte │
2421
- # # │ --- ┆ --- ┆ --- │
2422
- # # │ str ┆ i64 ┆ i64 │
2423
- # # ╞═══════════╪══════╪═════╡
2424
- # # │ g1 ┆ 1 ┆ 2 │
2425
- # # │ g2 ┆ null ┆ 3 │
2426
- # # └───────────┴──────┴─────┘
2477
+ # # ┌───────────┬─────┬─────┐
2478
+ # # │ group_col ┆ lt ┆ gte │
2479
+ # # │ --- ┆ --- ┆ --- │
2480
+ # # │ str ┆ i64 ┆ i64 │
2481
+ # # ╞═══════════╪═════╪═════╡
2482
+ # # │ g1 ┆ 1 ┆ 2 │
2483
+ # # │ g2 ┆ 0 ┆ 3 │
2484
+ # # └───────────┴─────┴─────┘
2427
2485
  def where(predicate)
2428
2486
  filter(predicate)
2429
2487
  end
@@ -2558,25 +2616,23 @@ module Polars
2558
2616
  # @return [Expr]
2559
2617
  #
2560
2618
  # @example
2561
- # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
2562
- # df.select(Polars.col("foo").flatten)
2563
- # # =>
2564
- # # shape: (10, 1)
2565
- # # ┌─────┐
2566
- # # │ foo │
2567
- # # │ --- │
2568
- # # │ str │
2569
- # # ╞═════╡
2570
- # # │ h │
2571
- # # │ e
2572
- # # │ l
2573
- # # │ l
2574
- # # │ … │
2575
- # # │ o
2576
- # # │ r
2577
- # # │ l │
2578
- # # │ d │
2579
- # # └─────┘
2619
+ # df = Polars::DataFrame.new(
2620
+ # {
2621
+ # "group" => ["a", "b", "b"],
2622
+ # "values" => [[1, 2], [2, 3], [4]]
2623
+ # }
2624
+ # )
2625
+ # df.groupby("group").agg(Polars.col("values").flatten)
2626
+ # # =>
2627
+ # # shape: (2, 2)
2628
+ # # ┌───────┬───────────┐
2629
+ # # │ group ┆ values
2630
+ # # │ --- ┆ ---
2631
+ # # │ str ┆ list[i64]
2632
+ # # ╞═══════╪═══════════╡
2633
+ # # │ a ┆ [1, 2]
2634
+ # # │ b ┆ [2, 3, 4]
2635
+ # # └───────┴───────────┘
2580
2636
  def flatten
2581
2637
  wrap_expr(_rbexpr.explode)
2582
2638
  end
@@ -2740,7 +2796,7 @@ module Polars
2740
2796
  # # │ false │
2741
2797
  # # └──────────┘
2742
2798
  def is_in(other)
2743
- if other.is_a?(Array)
2799
+ if other.is_a?(::Array)
2744
2800
  if other.length == 0
2745
2801
  other = Polars.lit(nil)
2746
2802
  else
@@ -2751,6 +2807,7 @@ module Polars
2751
2807
  end
2752
2808
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2809
  end
2810
+ alias_method :in?, :is_in
2754
2811
 
2755
2812
  # Repeat the elements in this Series as specified in the given expression.
2756
2813
  #
@@ -3443,14 +3500,15 @@ module Polars
3443
3500
  min_periods: nil,
3444
3501
  center: false,
3445
3502
  by: nil,
3446
- closed: "left"
3503
+ closed: "left",
3504
+ ddof: 1
3447
3505
  )
3448
3506
  window_size, min_periods = _prepare_rolling_window_args(
3449
3507
  window_size, min_periods
3450
3508
  )
3451
3509
  wrap_expr(
3452
3510
  _rbexpr.rolling_std(
3453
- window_size, weights, min_periods, center, by, closed
3511
+ window_size, weights, min_periods, center, by, closed, ddof
3454
3512
  )
3455
3513
  )
3456
3514
  end
@@ -3532,14 +3590,15 @@ module Polars
3532
3590
  min_periods: nil,
3533
3591
  center: false,
3534
3592
  by: nil,
3535
- closed: "left"
3593
+ closed: "left",
3594
+ ddof: 1
3536
3595
  )
3537
3596
  window_size, min_periods = _prepare_rolling_window_args(
3538
3597
  window_size, min_periods
3539
3598
  )
3540
3599
  wrap_expr(
3541
3600
  _rbexpr.rolling_var(
3542
- window_size, weights, min_periods, center, by, closed
3601
+ window_size, weights, min_periods, center, by, closed, ddof
3543
3602
  )
3544
3603
  )
3545
3604
  end
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4499,11 +4558,11 @@ module Polars
4499
4558
  # # │ 1 │
4500
4559
  # # │ 3 │
4501
4560
  # # └─────┘
4502
- def shuffle(seed: nil)
4561
+ def shuffle(seed: nil, fixed_seed: false)
4503
4562
  if seed.nil?
4504
4563
  seed = rand(10000)
4505
4564
  end
4506
- wrap_expr(_rbexpr.shuffle(seed))
4565
+ wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
4507
4566
  end
4508
4567
 
4509
4568
  # Sample from this expression.
@@ -4541,21 +4600,22 @@ module Polars
4541
4600
  with_replacement: true,
4542
4601
  shuffle: false,
4543
4602
  seed: nil,
4544
- n: nil
4603
+ n: nil,
4604
+ fixed_seed: false
4545
4605
  )
4546
4606
  if !n.nil? && !frac.nil?
4547
4607
  raise ArgumentError, "cannot specify both `n` and `frac`"
4548
4608
  end
4549
4609
 
4550
4610
  if !n.nil? && frac.nil?
4551
- return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
4611
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
4552
4612
  end
4553
4613
 
4554
4614
  if frac.nil?
4555
4615
  frac = 1.0
4556
4616
  end
4557
4617
  wrap_expr(
4558
- _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
4618
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
4559
4619
  )
4560
4620
  end
4561
4621
 
@@ -4870,8 +4930,8 @@ module Polars
4870
4930
  #
4871
4931
  # Enables downstream code to user fast paths for sorted arrays.
4872
4932
  #
4873
- # @param reverse [Boolean]
4874
- # If the `Series` order is reversed, e.g. descending.
4933
+ # @param descending [Boolean]
4934
+ # Whether the `Series` order is descending.
4875
4935
  #
4876
4936
  # @return [Expr]
4877
4937
  #
@@ -4891,9 +4951,9 @@ module Polars
4891
4951
  # # ╞════════╡
4892
4952
  # # │ 3 │
4893
4953
  # # └────────┘
4894
- # def set_sorted(reverse: false)
4895
- # map { |s| s.set_sorted(reverse) }
4896
- # end
4954
+ def set_sorted(descending: false)
4955
+ wrap_expr(_rbexpr.set_sorted_flag(descending))
4956
+ end
4897
4957
 
4898
4958
  # Aggregate to list.
4899
4959
  #
@@ -4906,7 +4966,7 @@ module Polars
4906
4966
  # "b" => [4, 5, 6]
4907
4967
  # }
4908
4968
  # )
4909
- # df.select(Polars.all.list)
4969
+ # df.select(Polars.all.implode)
4910
4970
  # # =>
4911
4971
  # # shape: (1, 2)
4912
4972
  # # ┌───────────┬───────────┐
@@ -4916,8 +4976,8 @@ module Polars
4916
4976
  # # ╞═══════════╪═══════════╡
4917
4977
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4978
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4979
+ def implode
4980
+ wrap_expr(_rbexpr.implode)
4921
4981
  end
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
@@ -4958,10 +5018,17 @@ module Polars
4958
5018
  # Create an object namespace of all list related methods.
4959
5019
  #
4960
5020
  # @return [ListExpr]
4961
- def arr
5021
+ def list
4962
5022
  ListExpr.new(self)
4963
5023
  end
4964
5024
 
5025
+ # Create an object namespace of all array related methods.
5026
+ #
5027
+ # @return [ArrayExpr]
5028
+ def arr
5029
+ ArrayExpr.new(self)
5030
+ end
5031
+
4965
5032
  # Create an object namespace of all binary related methods.
4966
5033
  #
4967
5034
  # @return [BinaryExpr]
@@ -43,7 +43,6 @@ module Polars
43
43
  # # │ i64 ┆ i64 │
44
44
  # # ╞═════╪═════╡
45
45
  # # │ 1 ┆ 3 │
46
- # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
46
  # # │ 2 ┆ 4 │
48
47
  # # └─────┴─────┘
49
48
  def concat(items, rechunk: true, how: "vertical", parallel: true)
@@ -551,32 +551,11 @@ module Polars
551
551
  agg(Polars.all.median)
552
552
  end
553
553
 
554
- # Aggregate the groups into Series.
555
- #
556
- # @return [DataFrame]
557
- #
558
- # @example
559
- # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
560
- # df.groupby("a", maintain_order: true).agg_list
561
- # # =>
562
- # # shape: (2, 2)
563
- # # ┌─────┬─────────────────┐
564
- # # │ a ┆ b │
565
- # # │ --- ┆ --- │
566
- # # │ str ┆ list[list[i64]] │
567
- # # ╞═════╪═════════════════╡
568
- # # │ one ┆ [[1, 3]] │
569
- # # │ two ┆ [[2, 4]] │
570
- # # └─────┴─────────────────┘
571
- def agg_list
572
- agg(Polars.all.list)
573
- end
574
-
575
554
  # Plot data.
576
555
  #
577
556
  # @return [Vega::LiteChart]
578
557
  def plot(*args, **options)
579
- raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
558
+ raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
580
559
  # same message as Ruby
581
560
  raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
582
561