polars-df 0.4.0-x86_64-linux → 0.6.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2386 -1216
- data/README.md +6 -5
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/expr.rb
CHANGED
@@ -362,7 +362,7 @@ module Polars
|
|
362
362
|
if columns.is_a?(String)
|
363
363
|
columns = [columns]
|
364
364
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
365
|
+
elsif !columns.is_a?(::Array)
|
366
366
|
columns = [columns]
|
367
367
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
368
|
end
|
@@ -820,18 +820,18 @@ module Polars
|
|
820
820
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
821
|
# # =>
|
822
822
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
823
|
+
# # ┌────────┐
|
824
|
+
# # │ repeat │
|
825
|
+
# # │ --- │
|
826
|
+
# # │ i64 │
|
827
|
+
# # ╞════════╡
|
828
|
+
# # │ null │
|
829
|
+
# # │ null │
|
830
|
+
# # │ null │
|
831
|
+
# # │ 1 │
|
832
|
+
# # │ 1 │
|
833
|
+
# # │ 2 │
|
834
|
+
# # └────────┘
|
835
835
|
def rechunk
|
836
836
|
wrap_expr(_rbexpr.rechunk)
|
837
837
|
end
|
@@ -1308,8 +1308,6 @@ module Polars
|
|
1308
1308
|
#
|
1309
1309
|
# @param k [Integer]
|
1310
1310
|
# Number of elements to return.
|
1311
|
-
# @param reverse [Boolean]
|
1312
|
-
# Return the smallest elements.
|
1313
1311
|
#
|
1314
1312
|
# @return [Expr]
|
1315
1313
|
#
|
@@ -1322,7 +1320,45 @@ module Polars
|
|
1322
1320
|
# df.select(
|
1323
1321
|
# [
|
1324
1322
|
# Polars.col("value").top_k.alias("top_k"),
|
1325
|
-
# Polars.col("value").
|
1323
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1324
|
+
# ]
|
1325
|
+
# )
|
1326
|
+
# # =>
|
1327
|
+
# # shape: (5, 2)
|
1328
|
+
# # ┌───────┬──────────┐
|
1329
|
+
# # │ top_k ┆ bottom_k │
|
1330
|
+
# # │ --- ┆ --- │
|
1331
|
+
# # │ i64 ┆ i64 │
|
1332
|
+
# # ╞═══════╪══════════╡
|
1333
|
+
# # │ 99 ┆ 1 │
|
1334
|
+
# # │ 98 ┆ 2 │
|
1335
|
+
# # │ 4 ┆ 3 │
|
1336
|
+
# # │ 3 ┆ 4 │
|
1337
|
+
# # │ 2 ┆ 98 │
|
1338
|
+
# # └───────┴──────────┘
|
1339
|
+
def top_k(k: 5)
|
1340
|
+
wrap_expr(_rbexpr.top_k(k))
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# Return the `k` smallest elements.
|
1344
|
+
#
|
1345
|
+
# If 'reverse: true` the smallest elements will be given.
|
1346
|
+
#
|
1347
|
+
# @param k [Integer]
|
1348
|
+
# Number of elements to return.
|
1349
|
+
#
|
1350
|
+
# @return [Expr]
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df = Polars::DataFrame.new(
|
1354
|
+
# {
|
1355
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1356
|
+
# }
|
1357
|
+
# )
|
1358
|
+
# df.select(
|
1359
|
+
# [
|
1360
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1361
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1326
1362
|
# ]
|
1327
1363
|
# )
|
1328
1364
|
# # =>
|
@@ -1338,8 +1374,8 @@ module Polars
|
|
1338
1374
|
# # │ 3 ┆ 4 │
|
1339
1375
|
# # │ 2 ┆ 98 │
|
1340
1376
|
# # └───────┴──────────┘
|
1341
|
-
def
|
1342
|
-
wrap_expr(_rbexpr.
|
1377
|
+
def bottom_k(k: 5)
|
1378
|
+
wrap_expr(_rbexpr.bottom_k(k))
|
1343
1379
|
end
|
1344
1380
|
|
1345
1381
|
# Get the index values that would sort this column.
|
@@ -1498,10 +1534,10 @@ module Polars
|
|
1498
1534
|
# # │ two │
|
1499
1535
|
# # └───────┘
|
1500
1536
|
def sort_by(by, reverse: false)
|
1501
|
-
if !by.is_a?(Array)
|
1537
|
+
if !by.is_a?(::Array)
|
1502
1538
|
by = [by]
|
1503
1539
|
end
|
1504
|
-
if !reverse.is_a?(Array)
|
1540
|
+
if !reverse.is_a?(::Array)
|
1505
1541
|
reverse = [reverse]
|
1506
1542
|
end
|
1507
1543
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1542,7 +1578,7 @@ module Polars
|
|
1542
1578
|
# # │ two ┆ 99 │
|
1543
1579
|
# # └───────┴───────┘
|
1544
1580
|
def take(indices)
|
1545
|
-
if indices.is_a?(Array)
|
1581
|
+
if indices.is_a?(::Array)
|
1546
1582
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1547
1583
|
else
|
1548
1584
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
@@ -2008,6 +2044,28 @@ module Polars
|
|
2008
2044
|
wrap_expr(_rbexpr.n_unique)
|
2009
2045
|
end
|
2010
2046
|
|
2047
|
+
# Approx count unique values.
|
2048
|
+
#
|
2049
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
2050
|
+
#
|
2051
|
+
# @return [Expr]
|
2052
|
+
#
|
2053
|
+
# @example
|
2054
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2055
|
+
# df.select(Polars.col("a").approx_unique)
|
2056
|
+
# # =>
|
2057
|
+
# # shape: (1, 1)
|
2058
|
+
# # ┌─────┐
|
2059
|
+
# # │ a │
|
2060
|
+
# # │ --- │
|
2061
|
+
# # │ u32 │
|
2062
|
+
# # ╞═════╡
|
2063
|
+
# # │ 2 │
|
2064
|
+
# # └─────┘
|
2065
|
+
def approx_unique
|
2066
|
+
wrap_expr(_rbexpr.approx_unique)
|
2067
|
+
end
|
2068
|
+
|
2011
2069
|
# Count null values.
|
2012
2070
|
#
|
2013
2071
|
# @return [Expr]
|
@@ -2194,7 +2252,7 @@ module Polars
|
|
2194
2252
|
# # │ 4 │
|
2195
2253
|
# # │ 6 │
|
2196
2254
|
# # │ 6 │
|
2197
|
-
# # │
|
2255
|
+
# # │ 4 │
|
2198
2256
|
# # │ 6 │
|
2199
2257
|
# # │ 6 │
|
2200
2258
|
# # │ 6 │
|
@@ -2378,14 +2436,14 @@ module Polars
|
|
2378
2436
|
# ).sort("group_col")
|
2379
2437
|
# # =>
|
2380
2438
|
# # shape: (2, 3)
|
2381
|
-
# #
|
2382
|
-
# # │ group_col ┆ lt
|
2383
|
-
# # │ --- ┆ ---
|
2384
|
-
# # │ str ┆ i64
|
2385
|
-
# #
|
2386
|
-
# # │ g1 ┆ 1
|
2387
|
-
# # │ g2 ┆
|
2388
|
-
# #
|
2439
|
+
# # ┌───────────┬─────┬─────┐
|
2440
|
+
# # │ group_col ┆ lt ┆ gte │
|
2441
|
+
# # │ --- ┆ --- ┆ --- │
|
2442
|
+
# # │ str ┆ i64 ┆ i64 │
|
2443
|
+
# # ╞═══════════╪═════╪═════╡
|
2444
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2445
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2446
|
+
# # └───────────┴─────┴─────┘
|
2389
2447
|
def filter(predicate)
|
2390
2448
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2391
2449
|
end
|
@@ -2416,14 +2474,14 @@ module Polars
|
|
2416
2474
|
# ).sort("group_col")
|
2417
2475
|
# # =>
|
2418
2476
|
# # shape: (2, 3)
|
2419
|
-
# #
|
2420
|
-
# # │ group_col ┆ lt
|
2421
|
-
# # │ --- ┆ ---
|
2422
|
-
# # │ str ┆ i64
|
2423
|
-
# #
|
2424
|
-
# # │ g1 ┆ 1
|
2425
|
-
# # │ g2 ┆
|
2426
|
-
# #
|
2477
|
+
# # ┌───────────┬─────┬─────┐
|
2478
|
+
# # │ group_col ┆ lt ┆ gte │
|
2479
|
+
# # │ --- ┆ --- ┆ --- │
|
2480
|
+
# # │ str ┆ i64 ┆ i64 │
|
2481
|
+
# # ╞═══════════╪═════╪═════╡
|
2482
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2483
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2484
|
+
# # └───────────┴─────┴─────┘
|
2427
2485
|
def where(predicate)
|
2428
2486
|
filter(predicate)
|
2429
2487
|
end
|
@@ -2558,25 +2616,23 @@ module Polars
|
|
2558
2616
|
# @return [Expr]
|
2559
2617
|
#
|
2560
2618
|
# @example
|
2561
|
-
#
|
2562
|
-
#
|
2563
|
-
#
|
2564
|
-
#
|
2565
|
-
#
|
2566
|
-
#
|
2567
|
-
#
|
2568
|
-
#
|
2569
|
-
#
|
2570
|
-
#
|
2571
|
-
#
|
2572
|
-
#
|
2573
|
-
#
|
2574
|
-
#
|
2575
|
-
#
|
2576
|
-
#
|
2577
|
-
#
|
2578
|
-
# # │ d │
|
2579
|
-
# # └─────┘
|
2619
|
+
# df = Polars::DataFrame.new(
|
2620
|
+
# {
|
2621
|
+
# "group" => ["a", "b", "b"],
|
2622
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2623
|
+
# }
|
2624
|
+
# )
|
2625
|
+
# df.groupby("group").agg(Polars.col("values").flatten)
|
2626
|
+
# # =>
|
2627
|
+
# # shape: (2, 2)
|
2628
|
+
# # ┌───────┬───────────┐
|
2629
|
+
# # │ group ┆ values │
|
2630
|
+
# # │ --- ┆ --- │
|
2631
|
+
# # │ str ┆ list[i64] │
|
2632
|
+
# # ╞═══════╪═══════════╡
|
2633
|
+
# # │ a ┆ [1, 2] │
|
2634
|
+
# # │ b ┆ [2, 3, 4] │
|
2635
|
+
# # └───────┴───────────┘
|
2580
2636
|
def flatten
|
2581
2637
|
wrap_expr(_rbexpr.explode)
|
2582
2638
|
end
|
@@ -2740,7 +2796,7 @@ module Polars
|
|
2740
2796
|
# # │ false │
|
2741
2797
|
# # └──────────┘
|
2742
2798
|
def is_in(other)
|
2743
|
-
if other.is_a?(Array)
|
2799
|
+
if other.is_a?(::Array)
|
2744
2800
|
if other.length == 0
|
2745
2801
|
other = Polars.lit(nil)
|
2746
2802
|
else
|
@@ -2751,6 +2807,7 @@ module Polars
|
|
2751
2807
|
end
|
2752
2808
|
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2753
2809
|
end
|
2810
|
+
alias_method :in?, :is_in
|
2754
2811
|
|
2755
2812
|
# Repeat the elements in this Series as specified in the given expression.
|
2756
2813
|
#
|
@@ -3443,14 +3500,15 @@ module Polars
|
|
3443
3500
|
min_periods: nil,
|
3444
3501
|
center: false,
|
3445
3502
|
by: nil,
|
3446
|
-
closed: "left"
|
3503
|
+
closed: "left",
|
3504
|
+
ddof: 1
|
3447
3505
|
)
|
3448
3506
|
window_size, min_periods = _prepare_rolling_window_args(
|
3449
3507
|
window_size, min_periods
|
3450
3508
|
)
|
3451
3509
|
wrap_expr(
|
3452
3510
|
_rbexpr.rolling_std(
|
3453
|
-
window_size, weights, min_periods, center, by, closed
|
3511
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3454
3512
|
)
|
3455
3513
|
)
|
3456
3514
|
end
|
@@ -3532,14 +3590,15 @@ module Polars
|
|
3532
3590
|
min_periods: nil,
|
3533
3591
|
center: false,
|
3534
3592
|
by: nil,
|
3535
|
-
closed: "left"
|
3593
|
+
closed: "left",
|
3594
|
+
ddof: 1
|
3536
3595
|
)
|
3537
3596
|
window_size, min_periods = _prepare_rolling_window_args(
|
3538
3597
|
window_size, min_periods
|
3539
3598
|
)
|
3540
3599
|
wrap_expr(
|
3541
3600
|
_rbexpr.rolling_var(
|
3542
|
-
window_size, weights, min_periods, center, by, closed
|
3601
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3543
3602
|
)
|
3544
3603
|
)
|
3545
3604
|
end
|
@@ -3914,8 +3973,8 @@ module Polars
|
|
3914
3973
|
# # │ 2 │
|
3915
3974
|
# # │ 5 │
|
3916
3975
|
# # └─────┘
|
3917
|
-
def rank(method: "average", reverse: false)
|
3918
|
-
wrap_expr(_rbexpr.rank(method, reverse))
|
3976
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3977
|
+
wrap_expr(_rbexpr.rank(method, reverse, seed))
|
3919
3978
|
end
|
3920
3979
|
|
3921
3980
|
# Calculate the n-th discrete difference.
|
@@ -4499,11 +4558,11 @@ module Polars
|
|
4499
4558
|
# # │ 1 │
|
4500
4559
|
# # │ 3 │
|
4501
4560
|
# # └─────┘
|
4502
|
-
def shuffle(seed: nil)
|
4561
|
+
def shuffle(seed: nil, fixed_seed: false)
|
4503
4562
|
if seed.nil?
|
4504
4563
|
seed = rand(10000)
|
4505
4564
|
end
|
4506
|
-
wrap_expr(_rbexpr.shuffle(seed))
|
4565
|
+
wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
|
4507
4566
|
end
|
4508
4567
|
|
4509
4568
|
# Sample from this expression.
|
@@ -4541,21 +4600,22 @@ module Polars
|
|
4541
4600
|
with_replacement: true,
|
4542
4601
|
shuffle: false,
|
4543
4602
|
seed: nil,
|
4544
|
-
n: nil
|
4603
|
+
n: nil,
|
4604
|
+
fixed_seed: false
|
4545
4605
|
)
|
4546
4606
|
if !n.nil? && !frac.nil?
|
4547
4607
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4548
4608
|
end
|
4549
4609
|
|
4550
4610
|
if !n.nil? && frac.nil?
|
4551
|
-
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
|
4552
4612
|
end
|
4553
4613
|
|
4554
4614
|
if frac.nil?
|
4555
4615
|
frac = 1.0
|
4556
4616
|
end
|
4557
4617
|
wrap_expr(
|
4558
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
|
4559
4619
|
)
|
4560
4620
|
end
|
4561
4621
|
|
@@ -4870,8 +4930,8 @@ module Polars
|
|
4870
4930
|
#
|
4871
4931
|
# Enables downstream code to user fast paths for sorted arrays.
|
4872
4932
|
#
|
4873
|
-
# @param
|
4874
|
-
#
|
4933
|
+
# @param descending [Boolean]
|
4934
|
+
# Whether the `Series` order is descending.
|
4875
4935
|
#
|
4876
4936
|
# @return [Expr]
|
4877
4937
|
#
|
@@ -4891,9 +4951,9 @@ module Polars
|
|
4891
4951
|
# # ╞════════╡
|
4892
4952
|
# # │ 3 │
|
4893
4953
|
# # └────────┘
|
4894
|
-
|
4895
|
-
|
4896
|
-
|
4954
|
+
def set_sorted(descending: false)
|
4955
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
4956
|
+
end
|
4897
4957
|
|
4898
4958
|
# Aggregate to list.
|
4899
4959
|
#
|
@@ -4906,7 +4966,7 @@ module Polars
|
|
4906
4966
|
# "b" => [4, 5, 6]
|
4907
4967
|
# }
|
4908
4968
|
# )
|
4909
|
-
# df.select(Polars.all.
|
4969
|
+
# df.select(Polars.all.implode)
|
4910
4970
|
# # =>
|
4911
4971
|
# # shape: (1, 2)
|
4912
4972
|
# # ┌───────────┬───────────┐
|
@@ -4916,8 +4976,8 @@ module Polars
|
|
4916
4976
|
# # ╞═══════════╪═══════════╡
|
4917
4977
|
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4918
4978
|
# # └───────────┴───────────┘
|
4919
|
-
def
|
4920
|
-
wrap_expr(_rbexpr.
|
4979
|
+
def implode
|
4980
|
+
wrap_expr(_rbexpr.implode)
|
4921
4981
|
end
|
4922
4982
|
|
4923
4983
|
# Shrink numeric columns to the minimal required datatype.
|
@@ -4958,10 +5018,17 @@ module Polars
|
|
4958
5018
|
# Create an object namespace of all list related methods.
|
4959
5019
|
#
|
4960
5020
|
# @return [ListExpr]
|
4961
|
-
def
|
5021
|
+
def list
|
4962
5022
|
ListExpr.new(self)
|
4963
5023
|
end
|
4964
5024
|
|
5025
|
+
# Create an object namespace of all array related methods.
|
5026
|
+
#
|
5027
|
+
# @return [ArrayExpr]
|
5028
|
+
def arr
|
5029
|
+
ArrayExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
4965
5032
|
# Create an object namespace of all binary related methods.
|
4966
5033
|
#
|
4967
5034
|
# @return [BinaryExpr]
|
data/lib/polars/functions.rb
CHANGED
data/lib/polars/group_by.rb
CHANGED
@@ -551,32 +551,11 @@ module Polars
|
|
551
551
|
agg(Polars.all.median)
|
552
552
|
end
|
553
553
|
|
554
|
-
# Aggregate the groups into Series.
|
555
|
-
#
|
556
|
-
# @return [DataFrame]
|
557
|
-
#
|
558
|
-
# @example
|
559
|
-
# df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
|
560
|
-
# df.groupby("a", maintain_order: true).agg_list
|
561
|
-
# # =>
|
562
|
-
# # shape: (2, 2)
|
563
|
-
# # ┌─────┬─────────────────┐
|
564
|
-
# # │ a ┆ b │
|
565
|
-
# # │ --- ┆ --- │
|
566
|
-
# # │ str ┆ list[list[i64]] │
|
567
|
-
# # ╞═════╪═════════════════╡
|
568
|
-
# # │ one ┆ [[1, 3]] │
|
569
|
-
# # │ two ┆ [[2, 4]] │
|
570
|
-
# # └─────┴─────────────────┘
|
571
|
-
def agg_list
|
572
|
-
agg(Polars.all.list)
|
573
|
-
end
|
574
|
-
|
575
554
|
# Plot data.
|
576
555
|
#
|
577
556
|
# @return [Vega::LiteChart]
|
578
557
|
def plot(*args, **options)
|
579
|
-
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
558
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
|
580
559
|
# same message as Ruby
|
581
560
|
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
561
|
|