polars-df 0.4.0-x86_64-linux → 0.6.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2386 -1216
- data/README.md +6 -5
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/expr.rb
CHANGED
@@ -362,7 +362,7 @@ module Polars
|
|
362
362
|
if columns.is_a?(String)
|
363
363
|
columns = [columns]
|
364
364
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
365
|
+
elsif !columns.is_a?(::Array)
|
366
366
|
columns = [columns]
|
367
367
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
368
|
end
|
@@ -820,18 +820,18 @@ module Polars
|
|
820
820
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
821
|
# # =>
|
822
822
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
823
|
+
# # ┌────────┐
|
824
|
+
# # │ repeat │
|
825
|
+
# # │ --- │
|
826
|
+
# # │ i64 │
|
827
|
+
# # ╞════════╡
|
828
|
+
# # │ null │
|
829
|
+
# # │ null │
|
830
|
+
# # │ null │
|
831
|
+
# # │ 1 │
|
832
|
+
# # │ 1 │
|
833
|
+
# # │ 2 │
|
834
|
+
# # └────────┘
|
835
835
|
def rechunk
|
836
836
|
wrap_expr(_rbexpr.rechunk)
|
837
837
|
end
|
@@ -1308,8 +1308,6 @@ module Polars
|
|
1308
1308
|
#
|
1309
1309
|
# @param k [Integer]
|
1310
1310
|
# Number of elements to return.
|
1311
|
-
# @param reverse [Boolean]
|
1312
|
-
# Return the smallest elements.
|
1313
1311
|
#
|
1314
1312
|
# @return [Expr]
|
1315
1313
|
#
|
@@ -1322,7 +1320,45 @@ module Polars
|
|
1322
1320
|
# df.select(
|
1323
1321
|
# [
|
1324
1322
|
# Polars.col("value").top_k.alias("top_k"),
|
1325
|
-
# Polars.col("value").
|
1323
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1324
|
+
# ]
|
1325
|
+
# )
|
1326
|
+
# # =>
|
1327
|
+
# # shape: (5, 2)
|
1328
|
+
# # ┌───────┬──────────┐
|
1329
|
+
# # │ top_k ┆ bottom_k │
|
1330
|
+
# # │ --- ┆ --- │
|
1331
|
+
# # │ i64 ┆ i64 │
|
1332
|
+
# # ╞═══════╪══════════╡
|
1333
|
+
# # │ 99 ┆ 1 │
|
1334
|
+
# # │ 98 ┆ 2 │
|
1335
|
+
# # │ 4 ┆ 3 │
|
1336
|
+
# # │ 3 ┆ 4 │
|
1337
|
+
# # │ 2 ┆ 98 │
|
1338
|
+
# # └───────┴──────────┘
|
1339
|
+
def top_k(k: 5)
|
1340
|
+
wrap_expr(_rbexpr.top_k(k))
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# Return the `k` smallest elements.
|
1344
|
+
#
|
1345
|
+
# If 'reverse: true` the smallest elements will be given.
|
1346
|
+
#
|
1347
|
+
# @param k [Integer]
|
1348
|
+
# Number of elements to return.
|
1349
|
+
#
|
1350
|
+
# @return [Expr]
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df = Polars::DataFrame.new(
|
1354
|
+
# {
|
1355
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1356
|
+
# }
|
1357
|
+
# )
|
1358
|
+
# df.select(
|
1359
|
+
# [
|
1360
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1361
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1326
1362
|
# ]
|
1327
1363
|
# )
|
1328
1364
|
# # =>
|
@@ -1338,8 +1374,8 @@ module Polars
|
|
1338
1374
|
# # │ 3 ┆ 4 │
|
1339
1375
|
# # │ 2 ┆ 98 │
|
1340
1376
|
# # └───────┴──────────┘
|
1341
|
-
def
|
1342
|
-
wrap_expr(_rbexpr.
|
1377
|
+
def bottom_k(k: 5)
|
1378
|
+
wrap_expr(_rbexpr.bottom_k(k))
|
1343
1379
|
end
|
1344
1380
|
|
1345
1381
|
# Get the index values that would sort this column.
|
@@ -1498,10 +1534,10 @@ module Polars
|
|
1498
1534
|
# # │ two │
|
1499
1535
|
# # └───────┘
|
1500
1536
|
def sort_by(by, reverse: false)
|
1501
|
-
if !by.is_a?(Array)
|
1537
|
+
if !by.is_a?(::Array)
|
1502
1538
|
by = [by]
|
1503
1539
|
end
|
1504
|
-
if !reverse.is_a?(Array)
|
1540
|
+
if !reverse.is_a?(::Array)
|
1505
1541
|
reverse = [reverse]
|
1506
1542
|
end
|
1507
1543
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1542,7 +1578,7 @@ module Polars
|
|
1542
1578
|
# # │ two ┆ 99 │
|
1543
1579
|
# # └───────┴───────┘
|
1544
1580
|
def take(indices)
|
1545
|
-
if indices.is_a?(Array)
|
1581
|
+
if indices.is_a?(::Array)
|
1546
1582
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1547
1583
|
else
|
1548
1584
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
@@ -2008,6 +2044,28 @@ module Polars
|
|
2008
2044
|
wrap_expr(_rbexpr.n_unique)
|
2009
2045
|
end
|
2010
2046
|
|
2047
|
+
# Approx count unique values.
|
2048
|
+
#
|
2049
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
2050
|
+
#
|
2051
|
+
# @return [Expr]
|
2052
|
+
#
|
2053
|
+
# @example
|
2054
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2055
|
+
# df.select(Polars.col("a").approx_unique)
|
2056
|
+
# # =>
|
2057
|
+
# # shape: (1, 1)
|
2058
|
+
# # ┌─────┐
|
2059
|
+
# # │ a │
|
2060
|
+
# # │ --- │
|
2061
|
+
# # │ u32 │
|
2062
|
+
# # ╞═════╡
|
2063
|
+
# # │ 2 │
|
2064
|
+
# # └─────┘
|
2065
|
+
def approx_unique
|
2066
|
+
wrap_expr(_rbexpr.approx_unique)
|
2067
|
+
end
|
2068
|
+
|
2011
2069
|
# Count null values.
|
2012
2070
|
#
|
2013
2071
|
# @return [Expr]
|
@@ -2194,7 +2252,7 @@ module Polars
|
|
2194
2252
|
# # │ 4 │
|
2195
2253
|
# # │ 6 │
|
2196
2254
|
# # │ 6 │
|
2197
|
-
# # │
|
2255
|
+
# # │ 4 │
|
2198
2256
|
# # │ 6 │
|
2199
2257
|
# # │ 6 │
|
2200
2258
|
# # │ 6 │
|
@@ -2378,14 +2436,14 @@ module Polars
|
|
2378
2436
|
# ).sort("group_col")
|
2379
2437
|
# # =>
|
2380
2438
|
# # shape: (2, 3)
|
2381
|
-
# #
|
2382
|
-
# # │ group_col ┆ lt
|
2383
|
-
# # │ --- ┆ ---
|
2384
|
-
# # │ str ┆ i64
|
2385
|
-
# #
|
2386
|
-
# # │ g1 ┆ 1
|
2387
|
-
# # │ g2 ┆
|
2388
|
-
# #
|
2439
|
+
# # ┌───────────┬─────┬─────┐
|
2440
|
+
# # │ group_col ┆ lt ┆ gte │
|
2441
|
+
# # │ --- ┆ --- ┆ --- │
|
2442
|
+
# # │ str ┆ i64 ┆ i64 │
|
2443
|
+
# # ╞═══════════╪═════╪═════╡
|
2444
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2445
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2446
|
+
# # └───────────┴─────┴─────┘
|
2389
2447
|
def filter(predicate)
|
2390
2448
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2391
2449
|
end
|
@@ -2416,14 +2474,14 @@ module Polars
|
|
2416
2474
|
# ).sort("group_col")
|
2417
2475
|
# # =>
|
2418
2476
|
# # shape: (2, 3)
|
2419
|
-
# #
|
2420
|
-
# # │ group_col ┆ lt
|
2421
|
-
# # │ --- ┆ ---
|
2422
|
-
# # │ str ┆ i64
|
2423
|
-
# #
|
2424
|
-
# # │ g1 ┆ 1
|
2425
|
-
# # │ g2 ┆
|
2426
|
-
# #
|
2477
|
+
# # ┌───────────┬─────┬─────┐
|
2478
|
+
# # │ group_col ┆ lt ┆ gte │
|
2479
|
+
# # │ --- ┆ --- ┆ --- │
|
2480
|
+
# # │ str ┆ i64 ┆ i64 │
|
2481
|
+
# # ╞═══════════╪═════╪═════╡
|
2482
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2483
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2484
|
+
# # └───────────┴─────┴─────┘
|
2427
2485
|
def where(predicate)
|
2428
2486
|
filter(predicate)
|
2429
2487
|
end
|
@@ -2558,25 +2616,23 @@ module Polars
|
|
2558
2616
|
# @return [Expr]
|
2559
2617
|
#
|
2560
2618
|
# @example
|
2561
|
-
#
|
2562
|
-
#
|
2563
|
-
#
|
2564
|
-
#
|
2565
|
-
#
|
2566
|
-
#
|
2567
|
-
#
|
2568
|
-
#
|
2569
|
-
#
|
2570
|
-
#
|
2571
|
-
#
|
2572
|
-
#
|
2573
|
-
#
|
2574
|
-
#
|
2575
|
-
#
|
2576
|
-
#
|
2577
|
-
#
|
2578
|
-
# # │ d │
|
2579
|
-
# # └─────┘
|
2619
|
+
# df = Polars::DataFrame.new(
|
2620
|
+
# {
|
2621
|
+
# "group" => ["a", "b", "b"],
|
2622
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2623
|
+
# }
|
2624
|
+
# )
|
2625
|
+
# df.groupby("group").agg(Polars.col("values").flatten)
|
2626
|
+
# # =>
|
2627
|
+
# # shape: (2, 2)
|
2628
|
+
# # ┌───────┬───────────┐
|
2629
|
+
# # │ group ┆ values │
|
2630
|
+
# # │ --- ┆ --- │
|
2631
|
+
# # │ str ┆ list[i64] │
|
2632
|
+
# # ╞═══════╪═══════════╡
|
2633
|
+
# # │ a ┆ [1, 2] │
|
2634
|
+
# # │ b ┆ [2, 3, 4] │
|
2635
|
+
# # └───────┴───────────┘
|
2580
2636
|
def flatten
|
2581
2637
|
wrap_expr(_rbexpr.explode)
|
2582
2638
|
end
|
@@ -2740,7 +2796,7 @@ module Polars
|
|
2740
2796
|
# # │ false │
|
2741
2797
|
# # └──────────┘
|
2742
2798
|
def is_in(other)
|
2743
|
-
if other.is_a?(Array)
|
2799
|
+
if other.is_a?(::Array)
|
2744
2800
|
if other.length == 0
|
2745
2801
|
other = Polars.lit(nil)
|
2746
2802
|
else
|
@@ -2751,6 +2807,7 @@ module Polars
|
|
2751
2807
|
end
|
2752
2808
|
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2753
2809
|
end
|
2810
|
+
alias_method :in?, :is_in
|
2754
2811
|
|
2755
2812
|
# Repeat the elements in this Series as specified in the given expression.
|
2756
2813
|
#
|
@@ -3443,14 +3500,15 @@ module Polars
|
|
3443
3500
|
min_periods: nil,
|
3444
3501
|
center: false,
|
3445
3502
|
by: nil,
|
3446
|
-
closed: "left"
|
3503
|
+
closed: "left",
|
3504
|
+
ddof: 1
|
3447
3505
|
)
|
3448
3506
|
window_size, min_periods = _prepare_rolling_window_args(
|
3449
3507
|
window_size, min_periods
|
3450
3508
|
)
|
3451
3509
|
wrap_expr(
|
3452
3510
|
_rbexpr.rolling_std(
|
3453
|
-
window_size, weights, min_periods, center, by, closed
|
3511
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3454
3512
|
)
|
3455
3513
|
)
|
3456
3514
|
end
|
@@ -3532,14 +3590,15 @@ module Polars
|
|
3532
3590
|
min_periods: nil,
|
3533
3591
|
center: false,
|
3534
3592
|
by: nil,
|
3535
|
-
closed: "left"
|
3593
|
+
closed: "left",
|
3594
|
+
ddof: 1
|
3536
3595
|
)
|
3537
3596
|
window_size, min_periods = _prepare_rolling_window_args(
|
3538
3597
|
window_size, min_periods
|
3539
3598
|
)
|
3540
3599
|
wrap_expr(
|
3541
3600
|
_rbexpr.rolling_var(
|
3542
|
-
window_size, weights, min_periods, center, by, closed
|
3601
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3543
3602
|
)
|
3544
3603
|
)
|
3545
3604
|
end
|
@@ -3914,8 +3973,8 @@ module Polars
|
|
3914
3973
|
# # │ 2 │
|
3915
3974
|
# # │ 5 │
|
3916
3975
|
# # └─────┘
|
3917
|
-
def rank(method: "average", reverse: false)
|
3918
|
-
wrap_expr(_rbexpr.rank(method, reverse))
|
3976
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3977
|
+
wrap_expr(_rbexpr.rank(method, reverse, seed))
|
3919
3978
|
end
|
3920
3979
|
|
3921
3980
|
# Calculate the n-th discrete difference.
|
@@ -4499,11 +4558,11 @@ module Polars
|
|
4499
4558
|
# # │ 1 │
|
4500
4559
|
# # │ 3 │
|
4501
4560
|
# # └─────┘
|
4502
|
-
def shuffle(seed: nil)
|
4561
|
+
def shuffle(seed: nil, fixed_seed: false)
|
4503
4562
|
if seed.nil?
|
4504
4563
|
seed = rand(10000)
|
4505
4564
|
end
|
4506
|
-
wrap_expr(_rbexpr.shuffle(seed))
|
4565
|
+
wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
|
4507
4566
|
end
|
4508
4567
|
|
4509
4568
|
# Sample from this expression.
|
@@ -4541,21 +4600,22 @@ module Polars
|
|
4541
4600
|
with_replacement: true,
|
4542
4601
|
shuffle: false,
|
4543
4602
|
seed: nil,
|
4544
|
-
n: nil
|
4603
|
+
n: nil,
|
4604
|
+
fixed_seed: false
|
4545
4605
|
)
|
4546
4606
|
if !n.nil? && !frac.nil?
|
4547
4607
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4548
4608
|
end
|
4549
4609
|
|
4550
4610
|
if !n.nil? && frac.nil?
|
4551
|
-
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
|
4552
4612
|
end
|
4553
4613
|
|
4554
4614
|
if frac.nil?
|
4555
4615
|
frac = 1.0
|
4556
4616
|
end
|
4557
4617
|
wrap_expr(
|
4558
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
|
4559
4619
|
)
|
4560
4620
|
end
|
4561
4621
|
|
@@ -4870,8 +4930,8 @@ module Polars
|
|
4870
4930
|
#
|
4871
4931
|
# Enables downstream code to user fast paths for sorted arrays.
|
4872
4932
|
#
|
4873
|
-
# @param
|
4874
|
-
#
|
4933
|
+
# @param descending [Boolean]
|
4934
|
+
# Whether the `Series` order is descending.
|
4875
4935
|
#
|
4876
4936
|
# @return [Expr]
|
4877
4937
|
#
|
@@ -4891,9 +4951,9 @@ module Polars
|
|
4891
4951
|
# # ╞════════╡
|
4892
4952
|
# # │ 3 │
|
4893
4953
|
# # └────────┘
|
4894
|
-
|
4895
|
-
|
4896
|
-
|
4954
|
+
def set_sorted(descending: false)
|
4955
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
4956
|
+
end
|
4897
4957
|
|
4898
4958
|
# Aggregate to list.
|
4899
4959
|
#
|
@@ -4906,7 +4966,7 @@ module Polars
|
|
4906
4966
|
# "b" => [4, 5, 6]
|
4907
4967
|
# }
|
4908
4968
|
# )
|
4909
|
-
# df.select(Polars.all.
|
4969
|
+
# df.select(Polars.all.implode)
|
4910
4970
|
# # =>
|
4911
4971
|
# # shape: (1, 2)
|
4912
4972
|
# # ┌───────────┬───────────┐
|
@@ -4916,8 +4976,8 @@ module Polars
|
|
4916
4976
|
# # ╞═══════════╪═══════════╡
|
4917
4977
|
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4918
4978
|
# # └───────────┴───────────┘
|
4919
|
-
def
|
4920
|
-
wrap_expr(_rbexpr.
|
4979
|
+
def implode
|
4980
|
+
wrap_expr(_rbexpr.implode)
|
4921
4981
|
end
|
4922
4982
|
|
4923
4983
|
# Shrink numeric columns to the minimal required datatype.
|
@@ -4958,10 +5018,17 @@ module Polars
|
|
4958
5018
|
# Create an object namespace of all list related methods.
|
4959
5019
|
#
|
4960
5020
|
# @return [ListExpr]
|
4961
|
-
def
|
5021
|
+
def list
|
4962
5022
|
ListExpr.new(self)
|
4963
5023
|
end
|
4964
5024
|
|
5025
|
+
# Create an object namespace of all array related methods.
|
5026
|
+
#
|
5027
|
+
# @return [ArrayExpr]
|
5028
|
+
def arr
|
5029
|
+
ArrayExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
4965
5032
|
# Create an object namespace of all binary related methods.
|
4966
5033
|
#
|
4967
5034
|
# @return [BinaryExpr]
|
data/lib/polars/functions.rb
CHANGED
data/lib/polars/group_by.rb
CHANGED
@@ -551,32 +551,11 @@ module Polars
|
|
551
551
|
agg(Polars.all.median)
|
552
552
|
end
|
553
553
|
|
554
|
-
# Aggregate the groups into Series.
|
555
|
-
#
|
556
|
-
# @return [DataFrame]
|
557
|
-
#
|
558
|
-
# @example
|
559
|
-
# df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
|
560
|
-
# df.groupby("a", maintain_order: true).agg_list
|
561
|
-
# # =>
|
562
|
-
# # shape: (2, 2)
|
563
|
-
# # ┌─────┬─────────────────┐
|
564
|
-
# # │ a ┆ b │
|
565
|
-
# # │ --- ┆ --- │
|
566
|
-
# # │ str ┆ list[list[i64]] │
|
567
|
-
# # ╞═════╪═════════════════╡
|
568
|
-
# # │ one ┆ [[1, 3]] │
|
569
|
-
# # │ two ┆ [[2, 4]] │
|
570
|
-
# # └─────┴─────────────────┘
|
571
|
-
def agg_list
|
572
|
-
agg(Polars.all.list)
|
573
|
-
end
|
574
|
-
|
575
554
|
# Plot data.
|
576
555
|
#
|
577
556
|
# @return [Vega::LiteChart]
|
578
557
|
def plot(*args, **options)
|
579
|
-
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
558
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
|
580
559
|
# same message as Ruby
|
581
560
|
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
561
|
|