polars-df 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/README.md +4 -3
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/mod.rs +7 -3
- data/ext/polars/src/conversion.rs +171 -63
- data/ext/polars/src/dataframe.rs +19 -23
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/general.rs +39 -9
- data/ext/polars/src/expr/list.rs +27 -22
- data/ext/polars/src/expr/string.rs +10 -9
- data/ext/polars/src/expr.rs +1 -0
- data/ext/polars/src/functions/lazy.rs +61 -21
- data/ext/polars/src/lazyframe.rs +14 -2
- data/ext/polars/src/lib.rs +25 -20
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +4 -0
- data/ext/polars/src/series/construction.rs +28 -2
- data/ext/polars/src/series.rs +57 -17
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/expr.rb
CHANGED
@@ -362,7 +362,7 @@ module Polars
|
|
362
362
|
if columns.is_a?(String)
|
363
363
|
columns = [columns]
|
364
364
|
return wrap_expr(_rbexpr.exclude(columns))
|
365
|
-
elsif !columns.is_a?(Array)
|
365
|
+
elsif !columns.is_a?(::Array)
|
366
366
|
columns = [columns]
|
367
367
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
368
368
|
end
|
@@ -820,18 +820,18 @@ module Polars
|
|
820
820
|
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
|
821
821
|
# # =>
|
822
822
|
# # shape: (6, 1)
|
823
|
-
# #
|
824
|
-
# # │
|
825
|
-
# # │ ---
|
826
|
-
# # │ i64
|
827
|
-
# #
|
828
|
-
# # │ null
|
829
|
-
# # │ null
|
830
|
-
# # │ null
|
831
|
-
# # │ 1
|
832
|
-
# # │ 1
|
833
|
-
# # │ 2
|
834
|
-
# #
|
823
|
+
# # ┌────────┐
|
824
|
+
# # │ repeat │
|
825
|
+
# # │ --- │
|
826
|
+
# # │ i64 │
|
827
|
+
# # ╞════════╡
|
828
|
+
# # │ null │
|
829
|
+
# # │ null │
|
830
|
+
# # │ null │
|
831
|
+
# # │ 1 │
|
832
|
+
# # │ 1 │
|
833
|
+
# # │ 2 │
|
834
|
+
# # └────────┘
|
835
835
|
def rechunk
|
836
836
|
wrap_expr(_rbexpr.rechunk)
|
837
837
|
end
|
@@ -1534,10 +1534,10 @@ module Polars
|
|
1534
1534
|
# # │ two │
|
1535
1535
|
# # └───────┘
|
1536
1536
|
def sort_by(by, reverse: false)
|
1537
|
-
if !by.is_a?(Array)
|
1537
|
+
if !by.is_a?(::Array)
|
1538
1538
|
by = [by]
|
1539
1539
|
end
|
1540
|
-
if !reverse.is_a?(Array)
|
1540
|
+
if !reverse.is_a?(::Array)
|
1541
1541
|
reverse = [reverse]
|
1542
1542
|
end
|
1543
1543
|
by = Utils.selection_to_rbexpr_list(by)
|
@@ -1578,7 +1578,7 @@ module Polars
|
|
1578
1578
|
# # │ two ┆ 99 │
|
1579
1579
|
# # └───────┴───────┘
|
1580
1580
|
def take(indices)
|
1581
|
-
if indices.is_a?(Array)
|
1581
|
+
if indices.is_a?(::Array)
|
1582
1582
|
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
|
1583
1583
|
else
|
1584
1584
|
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
|
@@ -2436,14 +2436,14 @@ module Polars
|
|
2436
2436
|
# ).sort("group_col")
|
2437
2437
|
# # =>
|
2438
2438
|
# # shape: (2, 3)
|
2439
|
-
# #
|
2440
|
-
# # │ group_col ┆ lt
|
2441
|
-
# # │ --- ┆ ---
|
2442
|
-
# # │ str ┆ i64
|
2443
|
-
# #
|
2444
|
-
# # │ g1 ┆ 1
|
2445
|
-
# # │ g2 ┆
|
2446
|
-
# #
|
2439
|
+
# # ┌───────────┬─────┬─────┐
|
2440
|
+
# # │ group_col ┆ lt ┆ gte │
|
2441
|
+
# # │ --- ┆ --- ┆ --- │
|
2442
|
+
# # │ str ┆ i64 ┆ i64 │
|
2443
|
+
# # ╞═══════════╪═════╪═════╡
|
2444
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2445
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2446
|
+
# # └───────────┴─────┴─────┘
|
2447
2447
|
def filter(predicate)
|
2448
2448
|
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
2449
2449
|
end
|
@@ -2474,14 +2474,14 @@ module Polars
|
|
2474
2474
|
# ).sort("group_col")
|
2475
2475
|
# # =>
|
2476
2476
|
# # shape: (2, 3)
|
2477
|
-
# #
|
2478
|
-
# # │ group_col ┆ lt
|
2479
|
-
# # │ --- ┆ ---
|
2480
|
-
# # │ str ┆ i64
|
2481
|
-
# #
|
2482
|
-
# # │ g1 ┆ 1
|
2483
|
-
# # │ g2 ┆
|
2484
|
-
# #
|
2477
|
+
# # ┌───────────┬─────┬─────┐
|
2478
|
+
# # │ group_col ┆ lt ┆ gte │
|
2479
|
+
# # │ --- ┆ --- ┆ --- │
|
2480
|
+
# # │ str ┆ i64 ┆ i64 │
|
2481
|
+
# # ╞═══════════╪═════╪═════╡
|
2482
|
+
# # │ g1 ┆ 1 ┆ 2 │
|
2483
|
+
# # │ g2 ┆ 0 ┆ 3 │
|
2484
|
+
# # └───────────┴─────┴─────┘
|
2485
2485
|
def where(predicate)
|
2486
2486
|
filter(predicate)
|
2487
2487
|
end
|
@@ -2616,25 +2616,23 @@ module Polars
|
|
2616
2616
|
# @return [Expr]
|
2617
2617
|
#
|
2618
2618
|
# @example
|
2619
|
-
#
|
2620
|
-
#
|
2621
|
-
#
|
2622
|
-
#
|
2623
|
-
#
|
2624
|
-
#
|
2625
|
-
#
|
2626
|
-
#
|
2627
|
-
#
|
2628
|
-
#
|
2629
|
-
#
|
2630
|
-
#
|
2631
|
-
#
|
2632
|
-
#
|
2633
|
-
#
|
2634
|
-
#
|
2635
|
-
#
|
2636
|
-
# # │ d │
|
2637
|
-
# # └─────┘
|
2619
|
+
# df = Polars::DataFrame.new(
|
2620
|
+
# {
|
2621
|
+
# "group" => ["a", "b", "b"],
|
2622
|
+
# "values" => [[1, 2], [2, 3], [4]]
|
2623
|
+
# }
|
2624
|
+
# )
|
2625
|
+
# df.groupby("group").agg(Polars.col("values").flatten)
|
2626
|
+
# # =>
|
2627
|
+
# # shape: (2, 2)
|
2628
|
+
# # ┌───────┬───────────┐
|
2629
|
+
# # │ group ┆ values │
|
2630
|
+
# # │ --- ┆ --- │
|
2631
|
+
# # │ str ┆ list[i64] │
|
2632
|
+
# # ╞═══════╪═══════════╡
|
2633
|
+
# # │ a ┆ [1, 2] │
|
2634
|
+
# # │ b ┆ [2, 3, 4] │
|
2635
|
+
# # └───────┴───────────┘
|
2638
2636
|
def flatten
|
2639
2637
|
wrap_expr(_rbexpr.explode)
|
2640
2638
|
end
|
@@ -2798,7 +2796,7 @@ module Polars
|
|
2798
2796
|
# # │ false │
|
2799
2797
|
# # └──────────┘
|
2800
2798
|
def is_in(other)
|
2801
|
-
if other.is_a?(Array)
|
2799
|
+
if other.is_a?(::Array)
|
2802
2800
|
if other.length == 0
|
2803
2801
|
other = Polars.lit(nil)
|
2804
2802
|
else
|
@@ -3502,14 +3500,15 @@ module Polars
|
|
3502
3500
|
min_periods: nil,
|
3503
3501
|
center: false,
|
3504
3502
|
by: nil,
|
3505
|
-
closed: "left"
|
3503
|
+
closed: "left",
|
3504
|
+
ddof: 1
|
3506
3505
|
)
|
3507
3506
|
window_size, min_periods = _prepare_rolling_window_args(
|
3508
3507
|
window_size, min_periods
|
3509
3508
|
)
|
3510
3509
|
wrap_expr(
|
3511
3510
|
_rbexpr.rolling_std(
|
3512
|
-
window_size, weights, min_periods, center, by, closed
|
3511
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3513
3512
|
)
|
3514
3513
|
)
|
3515
3514
|
end
|
@@ -3591,14 +3590,15 @@ module Polars
|
|
3591
3590
|
min_periods: nil,
|
3592
3591
|
center: false,
|
3593
3592
|
by: nil,
|
3594
|
-
closed: "left"
|
3593
|
+
closed: "left",
|
3594
|
+
ddof: 1
|
3595
3595
|
)
|
3596
3596
|
window_size, min_periods = _prepare_rolling_window_args(
|
3597
3597
|
window_size, min_periods
|
3598
3598
|
)
|
3599
3599
|
wrap_expr(
|
3600
3600
|
_rbexpr.rolling_var(
|
3601
|
-
window_size, weights, min_periods, center, by, closed
|
3601
|
+
window_size, weights, min_periods, center, by, closed, ddof
|
3602
3602
|
)
|
3603
3603
|
)
|
3604
3604
|
end
|
@@ -4558,11 +4558,11 @@ module Polars
|
|
4558
4558
|
# # │ 1 │
|
4559
4559
|
# # │ 3 │
|
4560
4560
|
# # └─────┘
|
4561
|
-
def shuffle(seed: nil)
|
4561
|
+
def shuffle(seed: nil, fixed_seed: false)
|
4562
4562
|
if seed.nil?
|
4563
4563
|
seed = rand(10000)
|
4564
4564
|
end
|
4565
|
-
wrap_expr(_rbexpr.shuffle(seed))
|
4565
|
+
wrap_expr(_rbexpr.shuffle(seed, fixed_seed))
|
4566
4566
|
end
|
4567
4567
|
|
4568
4568
|
# Sample from this expression.
|
@@ -4600,21 +4600,22 @@ module Polars
|
|
4600
4600
|
with_replacement: true,
|
4601
4601
|
shuffle: false,
|
4602
4602
|
seed: nil,
|
4603
|
-
n: nil
|
4603
|
+
n: nil,
|
4604
|
+
fixed_seed: false
|
4604
4605
|
)
|
4605
4606
|
if !n.nil? && !frac.nil?
|
4606
4607
|
raise ArgumentError, "cannot specify both `n` and `frac`"
|
4607
4608
|
end
|
4608
4609
|
|
4609
4610
|
if !n.nil? && frac.nil?
|
4610
|
-
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
|
4611
|
+
return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed, fixed_seed))
|
4611
4612
|
end
|
4612
4613
|
|
4613
4614
|
if frac.nil?
|
4614
4615
|
frac = 1.0
|
4615
4616
|
end
|
4616
4617
|
wrap_expr(
|
4617
|
-
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
|
4618
|
+
_rbexpr.sample_frac(frac, with_replacement, shuffle, seed, fixed_seed)
|
4618
4619
|
)
|
4619
4620
|
end
|
4620
4621
|
|
@@ -4929,8 +4930,8 @@ module Polars
|
|
4929
4930
|
#
|
4930
4931
|
# Enables downstream code to user fast paths for sorted arrays.
|
4931
4932
|
#
|
4932
|
-
# @param
|
4933
|
-
#
|
4933
|
+
# @param descending [Boolean]
|
4934
|
+
# Whether the `Series` order is descending.
|
4934
4935
|
#
|
4935
4936
|
# @return [Expr]
|
4936
4937
|
#
|
@@ -4950,9 +4951,9 @@ module Polars
|
|
4950
4951
|
# # ╞════════╡
|
4951
4952
|
# # │ 3 │
|
4952
4953
|
# # └────────┘
|
4953
|
-
|
4954
|
-
|
4955
|
-
|
4954
|
+
def set_sorted(descending: false)
|
4955
|
+
wrap_expr(_rbexpr.set_sorted_flag(descending))
|
4956
|
+
end
|
4956
4957
|
|
4957
4958
|
# Aggregate to list.
|
4958
4959
|
#
|
@@ -4965,7 +4966,7 @@ module Polars
|
|
4965
4966
|
# "b" => [4, 5, 6]
|
4966
4967
|
# }
|
4967
4968
|
# )
|
4968
|
-
# df.select(Polars.all.
|
4969
|
+
# df.select(Polars.all.implode)
|
4969
4970
|
# # =>
|
4970
4971
|
# # shape: (1, 2)
|
4971
4972
|
# # ┌───────────┬───────────┐
|
@@ -4978,7 +4979,6 @@ module Polars
|
|
4978
4979
|
def implode
|
4979
4980
|
wrap_expr(_rbexpr.implode)
|
4980
4981
|
end
|
4981
|
-
alias_method :list, :implode
|
4982
4982
|
|
4983
4983
|
# Shrink numeric columns to the minimal required datatype.
|
4984
4984
|
#
|
@@ -5018,10 +5018,17 @@ module Polars
|
|
5018
5018
|
# Create an object namespace of all list related methods.
|
5019
5019
|
#
|
5020
5020
|
# @return [ListExpr]
|
5021
|
-
def
|
5021
|
+
def list
|
5022
5022
|
ListExpr.new(self)
|
5023
5023
|
end
|
5024
5024
|
|
5025
|
+
# Create an object namespace of all array related methods.
|
5026
|
+
#
|
5027
|
+
# @return [ArrayExpr]
|
5028
|
+
def arr
|
5029
|
+
ArrayExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
5025
5032
|
# Create an object namespace of all binary related methods.
|
5026
5033
|
#
|
5027
5034
|
# @return [BinaryExpr]
|
data/lib/polars/functions.rb
CHANGED
data/lib/polars/group_by.rb
CHANGED
@@ -551,32 +551,11 @@ module Polars
|
|
551
551
|
agg(Polars.all.median)
|
552
552
|
end
|
553
553
|
|
554
|
-
# Aggregate the groups into Series.
|
555
|
-
#
|
556
|
-
# @return [DataFrame]
|
557
|
-
#
|
558
|
-
# @example
|
559
|
-
# df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
|
560
|
-
# df.groupby("a", maintain_order: true).agg_list
|
561
|
-
# # =>
|
562
|
-
# # shape: (2, 2)
|
563
|
-
# # ┌─────┬─────────────────┐
|
564
|
-
# # │ a ┆ b │
|
565
|
-
# # │ --- ┆ --- │
|
566
|
-
# # │ str ┆ list[list[i64]] │
|
567
|
-
# # ╞═════╪═════════════════╡
|
568
|
-
# # │ one ┆ [[1, 3]] │
|
569
|
-
# # │ two ┆ [[2, 4]] │
|
570
|
-
# # └─────┴─────────────────┘
|
571
|
-
def agg_list
|
572
|
-
agg(Polars.all.list)
|
573
|
-
end
|
574
|
-
|
575
554
|
# Plot data.
|
576
555
|
#
|
577
556
|
# @return [Vega::LiteChart]
|
578
557
|
def plot(*args, **options)
|
579
|
-
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
558
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
|
580
559
|
# same message as Ruby
|
581
560
|
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
561
|
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -4,6 +4,22 @@ module Polars
|
|
4
4
|
# @private
|
5
5
|
attr_accessor :_ldf
|
6
6
|
|
7
|
+
# Create a new LazyFrame.
|
8
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
9
|
+
self._ldf = (
|
10
|
+
DataFrame.new(
|
11
|
+
data,
|
12
|
+
schema: schema,
|
13
|
+
schema_overrides: schema_overrides,
|
14
|
+
orient: orient,
|
15
|
+
infer_schema_length: infer_schema_length,
|
16
|
+
nan_to_null: nan_to_null
|
17
|
+
)
|
18
|
+
.lazy
|
19
|
+
._ldf
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
7
23
|
# @private
|
8
24
|
def self._from_rbldf(rb_ldf)
|
9
25
|
ldf = LazyFrame.allocate
|
@@ -379,16 +395,16 @@ module Polars
|
|
379
395
|
# # │ 2 ┆ 7.0 ┆ b │
|
380
396
|
# # │ 1 ┆ 6.0 ┆ a │
|
381
397
|
# # └─────┴─────┴─────┘
|
382
|
-
def sort(by, reverse: false, nulls_last: false)
|
398
|
+
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
383
399
|
if by.is_a?(String)
|
384
|
-
_from_rbldf(_ldf.sort(by, reverse, nulls_last))
|
400
|
+
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
385
401
|
end
|
386
402
|
if Utils.bool?(reverse)
|
387
403
|
reverse = [reverse]
|
388
404
|
end
|
389
405
|
|
390
406
|
by = Utils.selection_to_rbexpr_list(by)
|
391
|
-
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last))
|
407
|
+
_from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
|
392
408
|
end
|
393
409
|
|
394
410
|
# def profile
|
@@ -921,6 +937,12 @@ module Polars
|
|
921
937
|
# Define whether the temporal window interval is closed or not.
|
922
938
|
# @param by [Object]
|
923
939
|
# Also group by this column/these columns.
|
940
|
+
# @param check_sorted [Boolean]
|
941
|
+
# When the `by` argument is given, polars can not check sortedness
|
942
|
+
# by the metadata and has to do a full scan on the index column to
|
943
|
+
# verify data is sorted. This is expensive. If you are sure the
|
944
|
+
# data within the by groups is sorted, you can set this to `false`.
|
945
|
+
# Doing so incorrectly will lead to incorrect output
|
924
946
|
#
|
925
947
|
# @return [LazyFrame]
|
926
948
|
#
|
@@ -933,8 +955,8 @@ module Polars
|
|
933
955
|
# "2020-01-03 19:45:32",
|
934
956
|
# "2020-01-08 23:16:43"
|
935
957
|
# ]
|
936
|
-
# df = Polars::
|
937
|
-
# Polars.col("dt").str.strptime(Polars::Datetime)
|
958
|
+
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
|
+
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
938
960
|
# )
|
939
961
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
940
962
|
# [
|
@@ -942,7 +964,7 @@ module Polars
|
|
942
964
|
# Polars.min("a").alias("min_a"),
|
943
965
|
# Polars.max("a").alias("max_a")
|
944
966
|
# ]
|
945
|
-
# )
|
967
|
+
# ).collect
|
946
968
|
# # =>
|
947
969
|
# # shape: (6, 4)
|
948
970
|
# # ┌─────────────────────┬───────┬───────┬───────┐
|
@@ -962,7 +984,8 @@ module Polars
|
|
962
984
|
period:,
|
963
985
|
offset: nil,
|
964
986
|
closed: "right",
|
965
|
-
by: nil
|
987
|
+
by: nil,
|
988
|
+
check_sorted: true
|
966
989
|
)
|
967
990
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
968
991
|
if offset.nil?
|
@@ -974,7 +997,7 @@ module Polars
|
|
974
997
|
offset = Utils._timedelta_to_pl_duration(offset)
|
975
998
|
|
976
999
|
lgb = _ldf.groupby_rolling(
|
977
|
-
index_column._rbexpr, period, offset, closed, rbexprs_by
|
1000
|
+
index_column._rbexpr, period, offset, closed, rbexprs_by, check_sorted
|
978
1001
|
)
|
979
1002
|
LazyGroupBy.new(lgb, self.class)
|
980
1003
|
end
|
@@ -1112,21 +1135,21 @@ module Polars
|
|
1112
1135
|
# df.groupby_dynamic("time", every: "1h", closed: "left").agg(
|
1113
1136
|
# [
|
1114
1137
|
# Polars.col("time").count.alias("time_count"),
|
1115
|
-
# Polars.col("time").
|
1138
|
+
# Polars.col("time").alias("time_agg_list")
|
1116
1139
|
# ]
|
1117
1140
|
# )
|
1118
1141
|
# # =>
|
1119
1142
|
# # shape: (4, 3)
|
1120
|
-
# #
|
1121
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1122
|
-
# # │ --- ┆ --- ┆ ---
|
1123
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1124
|
-
# #
|
1125
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
|
1126
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
|
1127
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
|
1128
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1129
|
-
# #
|
1143
|
+
# # ┌─────────────────────┬────────────┬───────────────────────────────────┐
|
1144
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1145
|
+
# # │ --- ┆ --- ┆ --- │
|
1146
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1147
|
+
# # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
|
1148
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16… │
|
1149
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16… │
|
1150
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16… │
|
1151
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1152
|
+
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1130
1153
|
#
|
1131
1154
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1132
1155
|
# df.groupby_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -1193,7 +1216,7 @@ module Polars
|
|
1193
1216
|
# period: "3i",
|
1194
1217
|
# include_boundaries: true,
|
1195
1218
|
# closed: "right"
|
1196
|
-
# ).agg(Polars.col("A").
|
1219
|
+
# ).agg(Polars.col("A").alias("A_agg_list"))
|
1197
1220
|
# # =>
|
1198
1221
|
# # shape: (3, 4)
|
1199
1222
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
@@ -1216,12 +1239,9 @@ module Polars
|
|
1216
1239
|
by: nil,
|
1217
1240
|
start_by: "window"
|
1218
1241
|
)
|
1242
|
+
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1219
1243
|
if offset.nil?
|
1220
|
-
|
1221
|
-
offset = "-#{every}"
|
1222
|
-
else
|
1223
|
-
offset = "0ns"
|
1224
|
-
end
|
1244
|
+
offset = period.nil? ? "-#{every}" : "0ns"
|
1225
1245
|
end
|
1226
1246
|
|
1227
1247
|
if period.nil?
|
@@ -1234,7 +1254,7 @@ module Polars
|
|
1234
1254
|
|
1235
1255
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1236
1256
|
lgb = _ldf.groupby_dynamic(
|
1237
|
-
index_column,
|
1257
|
+
index_column._rbexpr,
|
1238
1258
|
every,
|
1239
1259
|
period,
|
1240
1260
|
offset,
|
@@ -1351,7 +1371,7 @@ module Polars
|
|
1351
1371
|
if by.is_a?(String)
|
1352
1372
|
by_left_ = [by]
|
1353
1373
|
by_right_ = [by]
|
1354
|
-
elsif by.is_a?(Array)
|
1374
|
+
elsif by.is_a?(::Array)
|
1355
1375
|
by_left_ = by
|
1356
1376
|
by_right_ = by
|
1357
1377
|
end
|
@@ -1619,7 +1639,7 @@ module Polars
|
|
1619
1639
|
# # │ null │
|
1620
1640
|
# # └──────┘
|
1621
1641
|
def with_context(other)
|
1622
|
-
if !other.is_a?(Array)
|
1642
|
+
if !other.is_a?(::Array)
|
1623
1643
|
other = [other]
|
1624
1644
|
end
|
1625
1645
|
|
@@ -2228,7 +2248,7 @@ module Polars
|
|
2228
2248
|
#
|
2229
2249
|
# @return [LazyFrame]
|
2230
2250
|
def unique(maintain_order: true, subset: nil, keep: "first")
|
2231
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2251
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2232
2252
|
subset = [subset]
|
2233
2253
|
end
|
2234
2254
|
_from_rbldf(_ldf.unique(maintain_order, subset, keep))
|
@@ -2261,7 +2281,7 @@ module Polars
|
|
2261
2281
|
# # │ 3 ┆ 8 ┆ c │
|
2262
2282
|
# # └─────┴─────┴─────┘
|
2263
2283
|
def drop_nulls(subset: nil)
|
2264
|
-
if !subset.nil? && !subset.is_a?(Array)
|
2284
|
+
if !subset.nil? && !subset.is_a?(::Array)
|
2265
2285
|
subset = [subset]
|
2266
2286
|
end
|
2267
2287
|
_from_rbldf(_ldf.drop_nulls(subset))
|
@@ -2423,6 +2443,38 @@ module Polars
|
|
2423
2443
|
_from_rbldf(_ldf.unnest(names))
|
2424
2444
|
end
|
2425
2445
|
|
2446
|
+
# TODO
|
2447
|
+
# def merge_sorted
|
2448
|
+
# end
|
2449
|
+
|
2450
|
+
# Indicate that one or multiple columns are sorted.
|
2451
|
+
#
|
2452
|
+
# @param column [Object]
|
2453
|
+
# Columns that are sorted
|
2454
|
+
# @param more_columns [Object]
|
2455
|
+
# Additional columns that are sorted, specified as positional arguments.
|
2456
|
+
# @param descending [Boolean]
|
2457
|
+
# Whether the columns are sorted in descending order.
|
2458
|
+
#
|
2459
|
+
# @return [LazyFrame]
|
2460
|
+
def set_sorted(
|
2461
|
+
column,
|
2462
|
+
*more_columns,
|
2463
|
+
descending: false
|
2464
|
+
)
|
2465
|
+
columns = Utils.selection_to_rbexpr_list(column)
|
2466
|
+
if more_columns.any?
|
2467
|
+
columns.concat(Utils.selection_to_rbexpr_list(more_columns))
|
2468
|
+
end
|
2469
|
+
with_columns(
|
2470
|
+
columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
|
2471
|
+
)
|
2472
|
+
end
|
2473
|
+
|
2474
|
+
# TODO
|
2475
|
+
# def update
|
2476
|
+
# end
|
2477
|
+
|
2426
2478
|
private
|
2427
2479
|
|
2428
2480
|
def initialize_copy(other)
|