polars-df 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/README.md +4 -3
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/mod.rs +7 -3
- data/ext/polars/src/conversion.rs +171 -63
- data/ext/polars/src/dataframe.rs +19 -23
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/general.rs +39 -9
- data/ext/polars/src/expr/list.rs +27 -22
- data/ext/polars/src/expr/string.rs +10 -9
- data/ext/polars/src/expr.rs +1 -0
- data/ext/polars/src/functions/lazy.rs +61 -21
- data/ext/polars/src/lazyframe.rs +14 -2
- data/ext/polars/src/lib.rs +25 -20
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +4 -0
- data/ext/polars/src/series/construction.rs +28 -2
- data/ext/polars/src/series.rs +57 -17
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/expr.rb
CHANGED
@@ -362,7 +362,7 @@ module Polars
|
|
362
362
|
if columns.is_a?(String)
|
363
363
|
columns = [columns]
|
364
364
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
365
|
+
elsif !columns.is_a?(::Array)
|
366
366
|
columns = [columns]
|
367
367
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
368
|
end
|
@@ -820,18 +820,18 @@ module Polars
|
|
820
820
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
821
|
# # =>
|
822
822
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
823
|
+
# # ┌────────┐
|
824
|
+
# # │ repeat │
|
825
|
+
# # │ --- │
|
826
|
+
# # │ i64 │
|
827
|
+
# # ╞════════╡
|
828
|
+
# # │ null │
|
829
|
+
# # │ null │
|
830
|
+
# # │ null │
|
831
|
+
# # │ 1 │
|
832
|
+
# # │ 1 │
|
833
|
+
# # │ 2 │
|
834
|
+
# # └────────┘
|
835
835
|
def rechunk
|
836
836
|
wrap_expr(_rbexpr.rechunk)
|
837
837
|
end
|
@@ -1534,10 +1534,10 @@ module Polars
|
|
1534
1534
|
# # │ two │
|
1535
1535
|
# # └───────┘
|
1536
1536
|
def sort_by(by, reverse: false)
|
1537
|
-
if !by.is_a?(Array)
|
1537
|
+
if !by.is_a?(::Array)
|
1538
1538
|
by = [by]
|
1539
1539
|
end
|
1540
|
-
if !reverse.is_a?(Array)
|
1540
|
+
if !reverse.is_a?(::Array)
|
1541
1541
|
reverse = [reverse]
|
1542
1542
|
end
|
1543
1543
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1578,7 +1578,7 @@ module Polars
|
|
1578
1578
|
# # │ two ┆ 99 │
|
1579
1579
|
# # └───────┴───────┘
|
1580
1580
|
def take(indices)
|
1581
|
-
if indices.is_a?(Array)
|
1581
|
+
if indices.is_a?(::Array)
|
1582
1582
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1583
|
else
|
1584
1584
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
@@ -2436,14 +2436,14 @@ module Polars
|
|
2436
2436
|
# ).sort("group_col")
|
2437
2437
|
# # =>
|
2438
2438
|
# # shape: (2, 3)
|
2439
|
-
# #
|
2440
|
-
# # │ group_col ┆ lt
|
2441
|
-
# # │ --- ┆ ---
|
2442
|
-
# # │ str ┆ i64
|
2443
|
-
# #
|
2444
|
-
# # │ g1 ┆ 1
|
2445
|
-
# # │ g2 ┆
|
2446
|
-
# #
|
2439
|
+
# # ┌───────────┬─────┬─────┐
|
2440
|
+
# # │ group_col ┆ lt ┆ gte │
|
2441
|
+
# # │ --- ┆ --- ┆ --- │
|
2442
|
+
# # │ str ┆ i64 ┆ i64 │
|
2443
|
+
# # ╞═══════════╪═════╪═════╡
|
2444
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2445
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2446
|
+
# # └───────────┴─────┴─────┘
|
2447
2447
|
def filter(predicate)
|
2448
2448
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2449
2449
|
end
|
@@ -2474,14 +2474,14 @@ module Polars
|
|
2474
2474
|
# ).sort("group_col")
|
2475
2475
|
# # =>
|
2476
2476
|
# # shape: (2, 3)
|
2477
|
-
# #
|
2478
|
-
# # │ group_col ┆ lt
|
2479
|
-
# # │ --- ┆ ---
|
2480
|
-
# # │ str ┆ i64
|
2481
|
-
# #
|
2482
|
-
# # │ g1 ┆ 1
|
2483
|
-
# # │ g2 ┆
|
2484
|
-
# #
|
2477
|
+
# # ┌───────────┬─────┬─────┐
|
2478
|
+
# # │ group_col ┆ lt ┆ gte │
|
2479
|
+
# # │ --- ┆ --- ┆ --- │
|
2480
|
+
# # │ str ┆ i64 ┆ i64 │
|
2481
|
+
# # ╞═══════════╪═════╪═════╡
|
2482
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2483
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2484
|
+
# # └───────────┴─────┴─────┘
|
2485
2485
|
def where(predicate)
|
2486
2486
|
filter(predicate)
|
2487
2487
|
end
|
@@ -2616,25 +2616,23 @@ module Polars
|
|
2616
2616
|
# @return [Expr]
|
2617
2617
|
#
|
2618
2618
|
# @example
|
2619
|
-
#
|
2620
|
-
#
|
2621
|
-
#
|
2622
|
-
#
|
2623
|
-
#
|
2624
|
-
#
|
2625
|
-
#
|
2626
|
-
#
|
2627
|
-
#
|
2628
|
-
#
|
2629
|
-
#
|
2630
|
-
#
|
2631
|
-
#
|
2632
|
-
#
|
2633
|
-
#
|
2634
|
-
#
|
2635
|
-
#
|
2636
|
-
# # │ d │
|
2637
|
-
# # └─────┘
|
2619
|
+
# df = Polars::DataFrame.new(
|
2620
|
+
# {
|
2621
|
+
# "group" => ["a", "b", "b"],
|
2622
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2623
|
+
# }
|
2624
|
+
# )
|
2625
|
+
# df.groupby("group").agg(Polars.col("values").flatten)
|
2626
|
+
# # =>
|
2627
|
+
# # shape: (2, 2)
|
2628
|
+
# # ┌───────┬───────────┐
|
2629
|
+
# # │ group ┆ values │
|
2630
|
+
# # │ --- ┆ --- │
|
2631
|
+
# # │ str ┆ list[i64] │
|
2632
|
+
# # ╞═══════╪═══════════╡
|
2633
|
+
# # │ a ┆ [1, 2] │
|
2634
|
+
# # │ b ┆ [2, 3, 4] │
|
2635
|
+
# # └───────┴───────────┘
|
2638
2636
|
def flatten
|
2639
2637
|
wrap_expr(_rbexpr.explode)
|
2640
2638
|
end
|
@@ -2798,7 +2796,7 @@ module Polars
|
|
2798
2796
|
# # │ false │
|
2799
2797
|
# # └──────────┘
|
2800
2798
|
def is_in(other)
|
2801
|
-
if other.is_a?(Array)
|
2799
|
+
if other.is_a?(::Array)
|
2802
2800
|
if other.length == 0
|
2803
2801
|
other = Polars.lit(nil)
|
2804
2802
|
else
|
@@ -3502,14 +3500,15 @@ module Polars
|
|
3502
3500
|
min_periods: nil,
|
3503
3501
|
center: false,
|
3504
3502
|
by: nil,
|
3505
|
-
closed: "left"
|
3503
|
+
closed: "left",
|
3504
|
+
ddof: 1
|
3506
3505
|
)
|
3507
3506
|
window_size, min_periods = _prepare_rolling_window_args(
|
3508
3507
|
window_size, min_periods
|
3509
3508
|
)
|
3510
3509
|
wrap_expr(
|
3511
3510
|
_rbexpr.rolling_std(
|
3512
|
-
window_size, weights, min_periods, center, by, closed
|
3511
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3513
3512
|
)
|
3514
3513
|
)
|
3515
3514
|
end
|
@@ -3591,14 +3590,15 @@ module Polars
|
|
3591
3590
|
min_periods: nil,
|
3592
3591
|
center: false,
|
3593
3592
|
by: nil,
|
3594
|
-
closed: "left"
|
3593
|
+
closed: "left",
|
3594
|
+
ddof: 1
|
3595
3595
|
)
|
3596
3596
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3597
|
window_size, min_periods
|
3598
3598
|
)
|
3599
3599
|
wrap_expr(
|
3600
3600
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed
|
3601
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3602
3602
|
)
|
3603
3603
|
)
|
3604
3604
|
end
|
@@ -4558,11 +4558,11 @@ module Polars
|
|
4558
4558
|
# # │ 1 │
|
4559
4559
|
# # │ 3 │
|
4560
4560
|
# # └─────┘
|
4561
|
-
def shuffle(seed: nil)
|
4561
|
+
def shuffle(seed: nil, fixed_seed: false)
|
4562
4562
|
if seed.nil?
|
4563
4563
|
seed = rand(10000)
|
4564
4564
|
end
|
4565
|
-
wrap_expr(_rbexpr.shuffle(seed))
|
4565
|
+
wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
|
4566
4566
|
end
|
4567
4567
|
|
4568
4568
|
# Sample from this expression.
|
@@ -4600,21 +4600,22 @@ module Polars
|
|
4600
4600
|
with_replacement: true,
|
4601
4601
|
shuffle: false,
|
4602
4602
|
seed: nil,
|
4603
|
-
n: nil
|
4603
|
+
n: nil,
|
4604
|
+
fixed_seed: false
|
4604
4605
|
)
|
4605
4606
|
if !n.nil? && !frac.nil?
|
4606
4607
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4607
4608
|
end
|
4608
4609
|
|
4609
4610
|
if !n.nil? && frac.nil?
|
4610
|
-
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
|
4611
4612
|
end
|
4612
4613
|
|
4613
4614
|
if frac.nil?
|
4614
4615
|
frac = 1.0
|
4615
4616
|
end
|
4616
4617
|
wrap_expr(
|
4617
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
|
4618
4619
|
)
|
4619
4620
|
end
|
4620
4621
|
|
@@ -4929,8 +4930,8 @@ module Polars
|
|
4929
4930
|
#
|
4930
4931
|
# Enables downstream code to user fast paths for sorted arrays.
|
4931
4932
|
#
|
4932
|
-
# @param
|
4933
|
-
#
|
4933
|
+
# @param descending [Boolean]
|
4934
|
+
# Whether the `Series` order is descending.
|
4934
4935
|
#
|
4935
4936
|
# @return [Expr]
|
4936
4937
|
#
|
@@ -4950,9 +4951,9 @@ module Polars
|
|
4950
4951
|
# # ╞════════╡
|
4951
4952
|
# # │ 3 │
|
4952
4953
|
# # └────────┘
|
4953
|
-
|
4954
|
-
|
4955
|
-
|
4954
|
+
def set_sorted(descending: false)
|
4955
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
4956
|
+
end
|
4956
4957
|
|
4957
4958
|
# Aggregate to list.
|
4958
4959
|
#
|
@@ -4965,7 +4966,7 @@ module Polars
|
|
4965
4966
|
# "b" => [4, 5, 6]
|
4966
4967
|
# }
|
4967
4968
|
# )
|
4968
|
-
# df.select(Polars.all.
|
4969
|
+
# df.select(Polars.all.implode)
|
4969
4970
|
# # =>
|
4970
4971
|
# # shape: (1, 2)
|
4971
4972
|
# # ┌───────────┬───────────┐
|
@@ -4978,7 +4979,6 @@ module Polars
|
|
4978
4979
|
def implode
|
4979
4980
|
wrap_expr(_rbexpr.implode)
|
4980
4981
|
end
|
4981
|
-
alias_method :list, :implode
|
4982
4982
|
|
4983
4983
|
# Shrink numeric columns to the minimal required datatype.
|
4984
4984
|
#
|
@@ -5018,10 +5018,17 @@ module Polars
|
|
5018
5018
|
# Create an object namespace of all list related methods.
|
5019
5019
|
#
|
5020
5020
|
# @return [ListExpr]
|
5021
|
-
def
|
5021
|
+
def list
|
5022
5022
|
ListExpr.new(self)
|
5023
5023
|
end
|
5024
5024
|
|
5025
|
+
# Create an object namespace of all array related methods.
|
5026
|
+
#
|
5027
|
+
# @return [ArrayExpr]
|
5028
|
+
def arr
|
5029
|
+
ArrayExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
5025
5032
|
# Create an object namespace of all binary related methods.
|
5026
5033
|
#
|
5027
5034
|
# @return [BinaryExpr]
|
data/lib/polars/functions.rb
CHANGED
data/lib/polars/group_by.rb
CHANGED
@@ -551,32 +551,11 @@ module Polars
|
|
551
551
|
agg(Polars.all.median)
|
552
552
|
end
|
553
553
|
|
554
|
-
# Aggregate the groups into Series.
|
555
|
-
#
|
556
|
-
# @return [DataFrame]
|
557
|
-
#
|
558
|
-
# @example
|
559
|
-
# df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
|
560
|
-
# df.groupby("a", maintain_order: true).agg_list
|
561
|
-
# # =>
|
562
|
-
# # shape: (2, 2)
|
563
|
-
# # ┌─────┬─────────────────┐
|
564
|
-
# # │ a ┆ b │
|
565
|
-
# # │ --- ┆ --- │
|
566
|
-
# # │ str ┆ list[list[i64]] │
|
567
|
-
# # ╞═════╪═════════════════╡
|
568
|
-
# # │ one ┆ [[1, 3]] │
|
569
|
-
# # │ two ┆ [[2, 4]] │
|
570
|
-
# # └─────┴─────────────────┘
|
571
|
-
def agg_list
|
572
|
-
agg(Polars.all.list)
|
573
|
-
end
|
574
|
-
|
575
554
|
# Plot data.
|
576
555
|
#
|
577
556
|
# @return [Vega::LiteChart]
|
578
557
|
def plot(*args, **options)
|
579
|
-
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
558
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
|
580
559
|
# same message as Ruby
|
581
560
|
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
561
|
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -4,6 +4,22 @@ module Polars
|
|
4
4
|
# @private
|
5
5
|
attr_accessor :_ldf
|
6
6
|
|
7
|
+
# Create a new LazyFrame.
|
8
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
9
|
+
self._ldf = (
|
10
|
+
DataFrame.new(
|
11
|
+
data,
|
12
|
+
schema: schema,
|
13
|
+
schema_overrides: schema_overrides,
|
14
|
+
orient: orient,
|
15
|
+
infer_schema_length: infer_schema_length,
|
16
|
+
nan_to_null: nan_to_null
|
17
|
+
)
|
18
|
+
.lazy
|
19
|
+
._ldf
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
7
23
|
# @private
|
8
24
|
def self._from_rbldf(rb_ldf)
|
9
25
|
ldf = LazyFrame.allocate
|
@@ -379,16 +395,16 @@ module Polars
|
|
379
395
|
# # │ 2 ┆ 7.0 ┆ b │
|
380
396
|
# # │ 1 ┆ 6.0 ┆ a │
|
381
397
|
# # └─────┴─────┴─────┘
|
382
|
-
def sort(by, reverse: false, nulls_last: false)
|
398
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
383
399
|
if by.is_a?(String)
|
384
|
-
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
400
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
385
401
|
end
|
386
402
|
if Utils.bool?(reverse)
|
387
403
|
reverse = [reverse]
|
388
404
|
end
|
389
405
|
|
390
406
|
by = Utils.selection_to_rbexpr_list(by)
|
391
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
407
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
392
408
|
end
|
393
409
|
|
394
410
|
# def profile
|
@@ -921,6 +937,12 @@ module Polars
|
|
921
937
|
# Define whether the temporal window interval is closed or not.
|
922
938
|
# @param by [Object]
|
923
939
|
# Also group by this column/these columns.
|
940
|
+
# @param check_sorted [Boolean]
|
941
|
+
# When the `by` argument is given, polars can not check sortedness
|
942
|
+
# by the metadata and has to do a full scan on the index column to
|
943
|
+
# verify data is sorted. This is expensive. If you are sure the
|
944
|
+
# data within the by groups is sorted, you can set this to `false`.
|
945
|
+
# Doing so incorrectly will lead to incorrect output
|
924
946
|
#
|
925
947
|
# @return [LazyFrame]
|
926
948
|
#
|
@@ -933,8 +955,8 @@ module Polars
|
|
933
955
|
# "2020-01-03 19:45:32",
|
934
956
|
# "2020-01-08 23:16:43"
|
935
957
|
# ]
|
936
|
-
# df = Polars::
|
937
|
-
# Polars.col("dt").str.strptime(Polars::Datetime)
|
958
|
+
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
|
+
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
938
960
|
# )
|
939
961
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
940
962
|
# [
|
@@ -942,7 +964,7 @@ module Polars
|
|
942
964
|
# Polars.min("a").alias("min_a"),
|
943
965
|
# Polars.max("a").alias("max_a")
|
944
966
|
# ]
|
945
|
-
# )
|
967
|
+
# ).collect
|
946
968
|
# # =>
|
947
969
|
# # shape: (6, 4)
|
948
970
|
# # ┌─────────────────────┬───────┬───────┬───────┐
|
@@ -962,7 +984,8 @@ module Polars
|
|
962
984
|
period:,
|
963
985
|
offset: nil,
|
964
986
|
closed: "right",
|
965
|
-
by: nil
|
987
|
+
by: nil,
|
988
|
+
check_sorted: true
|
966
989
|
)
|
967
990
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
968
991
|
if offset.nil?
|
@@ -974,7 +997,7 @@ module Polars
|
|
974
997
|
offset = Utils._timedelta_to_pl_duration(offset)
|
975
998
|
|
976
999
|
lgb = _ldf.groupby_rolling(
|
977
|
-
index_column._rbexpr, period, offset, closed, rbexprs_by
|
1000
|
+
index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
|
978
1001
|
)
|
979
1002
|
LazyGroupBy.new(lgb, self.class)
|
980
1003
|
end
|
@@ -1112,21 +1135,21 @@ module Polars
|
|
1112
1135
|
# df.groupby_dynamic("time", every: "1h", closed: "left").agg(
|
1113
1136
|
# [
|
1114
1137
|
# Polars.col("time").count.alias("time_count"),
|
1115
|
-
# Polars.col("time").
|
1138
|
+
# Polars.col("time").alias("time_agg_list")
|
1116
1139
|
# ]
|
1117
1140
|
# )
|
1118
1141
|
# # =>
|
1119
1142
|
# # shape: (4, 3)
|
1120
|
-
# #
|
1121
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1122
|
-
# # │ --- ┆ --- ┆ ---
|
1123
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1124
|
-
# #
|
1125
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
|
1126
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
|
1127
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
|
1128
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1129
|
-
# #
|
1143
|
+
# # ┌─────────────────────┬────────────┬───────────────────────────────────┐
|
1144
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1145
|
+
# # │ --- ┆ --- ┆ --- │
|
1146
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1147
|
+
# # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
|
1148
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16… │
|
1149
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16… │
|
1150
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16… │
|
1151
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1152
|
+
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1130
1153
|
#
|
1131
1154
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1132
1155
|
# df.groupby_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1193,7 +1216,7 @@ module Polars
|
|
1193
1216
|
# period: "3i",
|
1194
1217
|
# include_boundaries: true,
|
1195
1218
|
# closed: "right"
|
1196
|
-
# ).agg(Polars.col("A").
|
1219
|
+
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1197
1220
|
# # =>
|
1198
1221
|
# # shape: (3, 4)
|
1199
1222
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
@@ -1216,12 +1239,9 @@ module Polars
|
|
1216
1239
|
by: nil,
|
1217
1240
|
start_by: "window"
|
1218
1241
|
)
|
1242
|
+
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1219
1243
|
if offset.nil?
|
1220
|
-
|
1221
|
-
offset = "-#{every}"
|
1222
|
-
else
|
1223
|
-
offset = "0ns"
|
1224
|
-
end
|
1244
|
+
offset = period.nil? ? "-#{every}" : "0ns"
|
1225
1245
|
end
|
1226
1246
|
|
1227
1247
|
if period.nil?
|
@@ -1234,7 +1254,7 @@ module Polars
|
|
1234
1254
|
|
1235
1255
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1236
1256
|
lgb = _ldf.groupby_dynamic(
|
1237
|
-
index_column,
|
1257
|
+
index_column._rbexpr,
|
1238
1258
|
every,
|
1239
1259
|
period,
|
1240
1260
|
offset,
|
@@ -1351,7 +1371,7 @@ module Polars
|
|
1351
1371
|
if by.is_a?(String)
|
1352
1372
|
by_left_ = [by]
|
1353
1373
|
by_right_ = [by]
|
1354
|
-
elsif by.is_a?(Array)
|
1374
|
+
elsif by.is_a?(::Array)
|
1355
1375
|
by_left_ = by
|
1356
1376
|
by_right_ = by
|
1357
1377
|
end
|
@@ -1619,7 +1639,7 @@ module Polars
|
|
1619
1639
|
# # │ null │
|
1620
1640
|
# # └──────┘
|
1621
1641
|
def with_context(other)
|
1622
|
-
if !other.is_a?(Array)
|
1642
|
+
if !other.is_a?(::Array)
|
1623
1643
|
other = [other]
|
1624
1644
|
end
|
1625
1645
|
|
@@ -2228,7 +2248,7 @@ module Polars
|
|
2228
2248
|
#
|
2229
2249
|
# @return [LazyFrame]
|
2230
2250
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2231
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2251
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2232
2252
|
subset = [subset]
|
2233
2253
|
end
|
2234
2254
|
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
@@ -2261,7 +2281,7 @@ module Polars
|
|
2261
2281
|
# # │ 3 ┆ 8 ┆ c │
|
2262
2282
|
# # └─────┴─────┴─────┘
|
2263
2283
|
def drop_nulls(subset: nil)
|
2264
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2284
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2265
2285
|
subset = [subset]
|
2266
2286
|
end
|
2267
2287
|
_from_rbldf(_ldf.drop_nulls(subset))
|
@@ -2423,6 +2443,38 @@ module Polars
|
|
2423
2443
|
_from_rbldf(_ldf.unnest(names))
|
2424
2444
|
end
|
2425
2445
|
|
2446
|
+
# TODO
|
2447
|
+
# def merge_sorted
|
2448
|
+
# end
|
2449
|
+
|
2450
|
+
# Indicate that one or multiple columns are sorted.
|
2451
|
+
#
|
2452
|
+
# @param column [Object]
|
2453
|
+
# Columns that are sorted
|
2454
|
+
# @param more_columns [Object]
|
2455
|
+
# Additional columns that are sorted, specified as positional arguments.
|
2456
|
+
# @param descending [Boolean]
|
2457
|
+
# Whether the columns are sorted in descending order.
|
2458
|
+
#
|
2459
|
+
# @return [LazyFrame]
|
2460
|
+
def set_sorted(
|
2461
|
+
column,
|
2462
|
+
*more_columns,
|
2463
|
+
descending: false
|
2464
|
+
)
|
2465
|
+
columns = Utils.selection_to_rbexpr_list(column)
|
2466
|
+
if more_columns.any?
|
2467
|
+
columns.concat(Utils.selection_to_rbexpr_list(more_columns))
|
2468
|
+
end
|
2469
|
+
with_columns(
|
2470
|
+
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2471
|
+
)
|
2472
|
+
end
|
2473
|
+
|
2474
|
+
# TODO
|
2475
|
+
# def update
|
2476
|
+
# end
|
2477
|
+
|
2426
2478
|
private
|
2427
2479
|
|
2428
2480
|
def initialize_copy(other)
|