polars-df 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -0,0 +1,122 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base class for all Polars data types.
|
3
|
+
class DataType
|
4
|
+
end
|
5
|
+
|
6
|
+
# 8-bit signed integer type.
|
7
|
+
class Int8 < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# 16-bit signed integer type.
|
11
|
+
class Int16 < DataType
|
12
|
+
end
|
13
|
+
|
14
|
+
# 32-bit signed integer type.
|
15
|
+
class Int32 < DataType
|
16
|
+
end
|
17
|
+
|
18
|
+
# 64-bit signed integer type.
|
19
|
+
class Int64 < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# 8-bit unsigned integer type.
|
23
|
+
class UInt8 < DataType
|
24
|
+
end
|
25
|
+
|
26
|
+
# 16-bit unsigned integer type.
|
27
|
+
class UInt16 < DataType
|
28
|
+
end
|
29
|
+
|
30
|
+
# 32-bit unsigned integer type.
|
31
|
+
class UInt32 < DataType
|
32
|
+
end
|
33
|
+
|
34
|
+
# 64-bit unsigned integer type.
|
35
|
+
class UInt64 < DataType
|
36
|
+
end
|
37
|
+
|
38
|
+
# 32-bit floating point type.
|
39
|
+
class Float32 < DataType
|
40
|
+
end
|
41
|
+
|
42
|
+
# 64-bit floating point type.
|
43
|
+
class Float64 < DataType
|
44
|
+
end
|
45
|
+
|
46
|
+
# Boolean type.
|
47
|
+
class Boolean < DataType
|
48
|
+
end
|
49
|
+
|
50
|
+
# UTF-8 encoded string type.
|
51
|
+
class Utf8 < DataType
|
52
|
+
end
|
53
|
+
|
54
|
+
# Binary type.
|
55
|
+
class Binary < DataType
|
56
|
+
end
|
57
|
+
|
58
|
+
# Type representing Null / None values.
|
59
|
+
class Null < DataType
|
60
|
+
end
|
61
|
+
|
62
|
+
# Type representing Datatype values that could not be determined statically.
|
63
|
+
class Unknown < DataType
|
64
|
+
end
|
65
|
+
|
66
|
+
# Nested list/array type.
|
67
|
+
class List < DataType
|
68
|
+
def initialize(inner)
|
69
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Calendar date type.
|
74
|
+
class Date < DataType
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calendar date and time type.
|
78
|
+
class Datetime < DataType
|
79
|
+
def initialize(time_unit = "us", time_zone = nil)
|
80
|
+
@tu = time_unit || "us"
|
81
|
+
@time_zone = time_zone
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Time duration/delta type.
|
86
|
+
class Duration < DataType
|
87
|
+
def initialize(time_unit = "us")
|
88
|
+
@tu = time_unit
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Time of day type.
|
93
|
+
class Time < DataType
|
94
|
+
end
|
95
|
+
|
96
|
+
# Type for wrapping arbitrary Python objects.
|
97
|
+
class Object < DataType
|
98
|
+
end
|
99
|
+
|
100
|
+
# A categorical encoding of a set of strings.
|
101
|
+
class Categorical < DataType
|
102
|
+
end
|
103
|
+
|
104
|
+
# Definition of a single field within a `Struct` DataType.
|
105
|
+
class Field < DataType
|
106
|
+
def initialize(name, dtype)
|
107
|
+
@name = name
|
108
|
+
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Struct composite type.
|
113
|
+
class Struct < DataType
|
114
|
+
def initialize(fields)
|
115
|
+
if fields.is_a?(Hash)
|
116
|
+
@fields = fields.map { |n, d| Field.new(n, d) }
|
117
|
+
else
|
118
|
+
@fields = fields
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -465,7 +465,7 @@ module Polars
|
|
465
465
|
#
|
466
466
|
# Applies to Date and Datetime columns.
|
467
467
|
#
|
468
|
-
# Returns the weekday number where monday =
|
468
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
469
469
|
#
|
470
470
|
# @return [Expr]
|
471
471
|
#
|
@@ -502,11 +502,11 @@ module Polars
|
|
502
502
|
# # │ --- ┆ --- ┆ --- │
|
503
503
|
# # │ u32 ┆ u32 ┆ u32 │
|
504
504
|
# # ╞═════════╪══════════════╪═════════════╡
|
505
|
-
# # │
|
505
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
506
506
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
507
|
-
# # │
|
507
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
508
508
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
509
|
-
# # │
|
509
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
510
510
|
# # └─────────┴──────────────┴─────────────┘
|
511
511
|
def weekday
|
512
512
|
Utils.wrap_expr(_rbexpr.weekday)
|
@@ -554,11 +554,11 @@ module Polars
|
|
554
554
|
# # │ --- ┆ --- ┆ --- │
|
555
555
|
# # │ u32 ┆ u32 ┆ u32 │
|
556
556
|
# # ╞═════════╪══════════════╪═════════════╡
|
557
|
-
# # │
|
557
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
558
558
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
559
|
-
# # │
|
559
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
560
560
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
561
|
-
# # │
|
561
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
562
562
|
# # └─────────┴──────────────┴─────────────┘
|
563
563
|
def day
|
564
564
|
Utils.wrap_expr(_rbexpr.day)
|
@@ -606,11 +606,11 @@ module Polars
|
|
606
606
|
# # │ --- ┆ --- ┆ --- │
|
607
607
|
# # │ u32 ┆ u32 ┆ u32 │
|
608
608
|
# # ╞═════════╪══════════════╪═════════════╡
|
609
|
-
# # │
|
609
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
610
610
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
611
|
-
# # │
|
611
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
612
612
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
613
|
-
# # │
|
613
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
614
614
|
# # └─────────┴──────────────┴─────────────┘
|
615
615
|
def ordinal_day
|
616
616
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
@@ -317,7 +317,7 @@ module Polars
|
|
317
317
|
#
|
318
318
|
# Applies to Date and Datetime columns.
|
319
319
|
#
|
320
|
-
# Returns the weekday number where monday =
|
320
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
321
321
|
#
|
322
322
|
# @return [Series]
|
323
323
|
#
|
@@ -344,13 +344,13 @@ module Polars
|
|
344
344
|
# # shape: (7,)
|
345
345
|
# # Series: '' [u32]
|
346
346
|
# # [
|
347
|
-
# # 0
|
348
347
|
# # 1
|
349
348
|
# # 2
|
350
349
|
# # 3
|
351
350
|
# # 4
|
352
351
|
# # 5
|
353
352
|
# # 6
|
353
|
+
# # 7
|
354
354
|
# # ]
|
355
355
|
def weekday
|
356
356
|
super
|
@@ -973,9 +973,9 @@ module Polars
|
|
973
973
|
# # shape: (3,)
|
974
974
|
# # Series: 'NYC' [datetime[μs, America/New_York]]
|
975
975
|
# # [
|
976
|
-
# # 2020-
|
977
|
-
# # 2020-
|
978
|
-
# # 2020-
|
976
|
+
# # 2020-03-01 00:00:00 EST
|
977
|
+
# # 2020-04-01 01:00:00 EDT
|
978
|
+
# # 2020-05-01 01:00:00 EDT
|
979
979
|
# # ]
|
980
980
|
#
|
981
981
|
# @example Timestamps have changed after cast_time_zone
|
@@ -984,9 +984,9 @@ module Polars
|
|
984
984
|
# # shape: (3,)
|
985
985
|
# # Series: 'NYC' [i64]
|
986
986
|
# # [
|
987
|
-
# #
|
988
|
-
# #
|
989
|
-
# #
|
987
|
+
# # 1583038800
|
988
|
+
# # 1585717200
|
989
|
+
# # 1588309200
|
990
990
|
# # ]
|
991
991
|
def cast_time_zone(tz)
|
992
992
|
super
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Polars
|
2
|
+
# A dynamic grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
+
# groupby context.
|
6
|
+
class DynamicGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
every,
|
11
|
+
period,
|
12
|
+
offset,
|
13
|
+
truncate,
|
14
|
+
include_boundaries,
|
15
|
+
closed,
|
16
|
+
by,
|
17
|
+
start_by
|
18
|
+
)
|
19
|
+
period = Utils._timedelta_to_pl_duration(period)
|
20
|
+
offset = Utils._timedelta_to_pl_duration(offset)
|
21
|
+
every = Utils._timedelta_to_pl_duration(every)
|
22
|
+
|
23
|
+
@df = df
|
24
|
+
@time_column = index_column
|
25
|
+
@every = every
|
26
|
+
@period = period
|
27
|
+
@offset = offset
|
28
|
+
@truncate = truncate
|
29
|
+
@include_boundaries = include_boundaries
|
30
|
+
@closed = closed
|
31
|
+
@by = by
|
32
|
+
@start_by = start_by
|
33
|
+
end
|
34
|
+
|
35
|
+
def agg(aggs)
|
36
|
+
@df.lazy
|
37
|
+
.groupby_dynamic(
|
38
|
+
@time_column,
|
39
|
+
every: @every,
|
40
|
+
period: @period,
|
41
|
+
offset: @offset,
|
42
|
+
truncate: @truncate,
|
43
|
+
include_boundaries: @include_boundaries,
|
44
|
+
closed: @closed,
|
45
|
+
by: @by,
|
46
|
+
start_by: @start_by
|
47
|
+
)
|
48
|
+
.agg(aggs)
|
49
|
+
.collect(no_optimization: true, string_cache: false)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/polars/expr.rb
CHANGED
@@ -432,8 +432,34 @@ module Polars
|
|
432
432
|
wrap_expr(_rbexpr.suffix(suffix))
|
433
433
|
end
|
434
434
|
|
435
|
-
#
|
436
|
-
#
|
435
|
+
# Rename the output of an expression by mapping a function over the root name.
|
436
|
+
#
|
437
|
+
# @return [Expr]
|
438
|
+
#
|
439
|
+
# @example
|
440
|
+
# df = Polars::DataFrame.new(
|
441
|
+
# {
|
442
|
+
# "A" => [1, 2],
|
443
|
+
# "B" => [3, 4]
|
444
|
+
# }
|
445
|
+
# )
|
446
|
+
# df.select(
|
447
|
+
# Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
|
448
|
+
# )
|
449
|
+
# # =>
|
450
|
+
# # shape: (2, 2)
|
451
|
+
# # ┌───────────┬───────────┐
|
452
|
+
# # │ A_reverse ┆ B_reverse │
|
453
|
+
# # │ --- ┆ --- │
|
454
|
+
# # │ i64 ┆ i64 │
|
455
|
+
# # ╞═══════════╪═══════════╡
|
456
|
+
# # │ 2 ┆ 4 │
|
457
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
458
|
+
# # │ 1 ┆ 3 │
|
459
|
+
# # └───────────┴───────────┘
|
460
|
+
def map_alias(&f)
|
461
|
+
Utils.wrap_expr(_rbexpr.map_alias(f))
|
462
|
+
end
|
437
463
|
|
438
464
|
# Negate a boolean expression.
|
439
465
|
#
|
@@ -2460,7 +2486,8 @@ module Polars
|
|
2460
2486
|
# # │ 1.5 │
|
2461
2487
|
# # └─────┘
|
2462
2488
|
def quantile(quantile, interpolation: "nearest")
|
2463
|
-
|
2489
|
+
quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
|
2490
|
+
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2464
2491
|
end
|
2465
2492
|
|
2466
2493
|
# Filter a single column.
|
@@ -2575,14 +2602,98 @@ module Polars
|
|
2575
2602
|
# # ╞══════╪════════╡
|
2576
2603
|
# # │ 1 ┆ 0 │
|
2577
2604
|
# # └──────┴────────┘
|
2578
|
-
# def map(return_dtype: nil, agg_list: false, &
|
2605
|
+
# def map(return_dtype: nil, agg_list: false, &f)
|
2579
2606
|
# if !return_dtype.nil?
|
2580
2607
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2581
2608
|
# end
|
2582
|
-
# wrap_expr(_rbexpr.map(return_dtype, agg_list
|
2609
|
+
# wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
|
2583
2610
|
# end
|
2584
2611
|
|
2585
|
-
#
|
2612
|
+
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2613
|
+
#
|
2614
|
+
# Depending on the context it has the following behavior:
|
2615
|
+
#
|
2616
|
+
# * Selection
|
2617
|
+
# Expects `f` to be of type Callable[[Any], Any].
|
2618
|
+
# Applies a Ruby function over each individual value in the column.
|
2619
|
+
# * GroupBy
|
2620
|
+
# Expects `f` to be of type Callable[[Series], Series].
|
2621
|
+
# Applies a Ruby function over each group.
|
2622
|
+
#
|
2623
|
+
# Implementing logic using a Ruby function is almost always _significantly_
|
2624
|
+
# slower and more memory intensive than implementing the same logic using
|
2625
|
+
# the native expression API because:
|
2626
|
+
#
|
2627
|
+
# - The native expression engine runs in Rust; UDFs run in Ruby.
|
2628
|
+
# - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
|
2629
|
+
# - Polars-native expressions can be parallelised (UDFs cannot).
|
2630
|
+
# - Polars-native expressions can be logically optimised (UDFs cannot).
|
2631
|
+
#
|
2632
|
+
# Wherever possible you should strongly prefer the native expression API
|
2633
|
+
# to achieve the best performance.
|
2634
|
+
#
|
2635
|
+
# @param return_dtype [Symbol]
|
2636
|
+
# Dtype of the output Series.
|
2637
|
+
# If not set, polars will assume that
|
2638
|
+
# the dtype remains unchanged.
|
2639
|
+
#
|
2640
|
+
# @return [Expr]
|
2641
|
+
#
|
2642
|
+
# @example
|
2643
|
+
# df = Polars::DataFrame.new(
|
2644
|
+
# {
|
2645
|
+
# "a" => [1, 2, 3, 1],
|
2646
|
+
# "b" => ["a", "b", "c", "c"]
|
2647
|
+
# }
|
2648
|
+
# )
|
2649
|
+
#
|
2650
|
+
# @example In a selection context, the function is applied by row.
|
2651
|
+
# df.with_column(
|
2652
|
+
# Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
|
2653
|
+
# )
|
2654
|
+
# # =>
|
2655
|
+
# # shape: (4, 3)
|
2656
|
+
# # ┌─────┬─────┬───────────┐
|
2657
|
+
# # │ a ┆ b ┆ a_times_2 │
|
2658
|
+
# # │ --- ┆ --- ┆ --- │
|
2659
|
+
# # │ i64 ┆ str ┆ i64 │
|
2660
|
+
# # ╞═════╪═════╪═══════════╡
|
2661
|
+
# # │ 1 ┆ a ┆ 2 │
|
2662
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2663
|
+
# # │ 2 ┆ b ┆ 4 │
|
2664
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2665
|
+
# # │ 3 ┆ c ┆ 6 │
|
2666
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2667
|
+
# # │ 1 ┆ c ┆ 2 │
|
2668
|
+
# # └─────┴─────┴───────────┘
|
2669
|
+
#
|
2670
|
+
# @example In a GroupBy context the function is applied by group:
|
2671
|
+
# df.lazy
|
2672
|
+
# .groupby("b", maintain_order: true)
|
2673
|
+
# .agg(
|
2674
|
+
# [
|
2675
|
+
# Polars.col("a").apply { |x| x.sum }
|
2676
|
+
# ]
|
2677
|
+
# )
|
2678
|
+
# .collect
|
2679
|
+
# # =>
|
2680
|
+
# # shape: (3, 2)
|
2681
|
+
# # ┌─────┬─────┐
|
2682
|
+
# # │ b ┆ a │
|
2683
|
+
# # │ --- ┆ --- │
|
2684
|
+
# # │ str ┆ i64 │
|
2685
|
+
# # ╞═════╪═════╡
|
2686
|
+
# # │ a ┆ 1 │
|
2687
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2688
|
+
# # │ b ┆ 2 │
|
2689
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2690
|
+
# # │ c ┆ 4 │
|
2691
|
+
# # └─────┴─────┘
|
2692
|
+
# def apply(return_dtype: nil, &f)
|
2693
|
+
# wrap_f = lambda do |x|
|
2694
|
+
# x.apply(return_dtype: return_dtype, &f)
|
2695
|
+
# end
|
2696
|
+
# map(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2586
2697
|
# end
|
2587
2698
|
|
2588
2699
|
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
@@ -2898,8 +3009,49 @@ module Polars
|
|
2898
3009
|
end
|
2899
3010
|
end
|
2900
3011
|
|
2901
|
-
#
|
2902
|
-
#
|
3012
|
+
# Hash the elements in the selection.
|
3013
|
+
#
|
3014
|
+
# The hash value is of type `:u64`.
|
3015
|
+
#
|
3016
|
+
# @param seed [Integer]
|
3017
|
+
# Random seed parameter. Defaults to 0.
|
3018
|
+
# @param seed_1 [Integer]
|
3019
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3020
|
+
# @param seed_2 [Integer]
|
3021
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3022
|
+
# @param seed_3 [Integer]
|
3023
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3024
|
+
#
|
3025
|
+
# @return [Expr]
|
3026
|
+
#
|
3027
|
+
# @example
|
3028
|
+
# df = Polars::DataFrame.new(
|
3029
|
+
# {
|
3030
|
+
# "a" => [1, 2, nil],
|
3031
|
+
# "b" => ["x", nil, "z"]
|
3032
|
+
# }
|
3033
|
+
# )
|
3034
|
+
# df.with_column(Polars.all._hash(10, 20, 30, 40))
|
3035
|
+
# # =>
|
3036
|
+
# # shape: (3, 2)
|
3037
|
+
# # ┌──────────────────────┬──────────────────────┐
|
3038
|
+
# # │ a ┆ b │
|
3039
|
+
# # │ --- ┆ --- │
|
3040
|
+
# # │ u64 ┆ u64 │
|
3041
|
+
# # ╞══════════════════════╪══════════════════════╡
|
3042
|
+
# # │ 4629889412789719550 ┆ 6959506404929392568 │
|
3043
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3044
|
+
# # │ 16386608652769605760 ┆ 11638928888656214026 │
|
3045
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3046
|
+
# # │ 11638928888656214026 ┆ 11040941213715918520 │
|
3047
|
+
# # └──────────────────────┴──────────────────────┘
|
3048
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3049
|
+
k0 = seed
|
3050
|
+
k1 = seed_1.nil? ? seed : seed_1
|
3051
|
+
k2 = seed_2.nil? ? seed : seed_2
|
3052
|
+
k3 = seed_3.nil? ? seed : seed_3
|
3053
|
+
wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
|
3054
|
+
end
|
2903
3055
|
|
2904
3056
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2905
3057
|
#
|
@@ -2937,7 +3089,40 @@ module Polars
|
|
2937
3089
|
wrap_expr(_rbexpr.reinterpret(signed))
|
2938
3090
|
end
|
2939
3091
|
|
2940
|
-
#
|
3092
|
+
# Print the value that this expression evaluates to and pass on the value.
|
3093
|
+
#
|
3094
|
+
# @return [Expr]
|
3095
|
+
#
|
3096
|
+
# @example
|
3097
|
+
# df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
|
3098
|
+
# df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
|
3099
|
+
# # =>
|
3100
|
+
# # value is: shape: (3,)
|
3101
|
+
# # Series: 'foo' [i64]
|
3102
|
+
# # [
|
3103
|
+
# # 1
|
3104
|
+
# # 2
|
3105
|
+
# # 4
|
3106
|
+
# # ]
|
3107
|
+
# # shape: (3, 1)
|
3108
|
+
# # ┌─────┐
|
3109
|
+
# # │ bar │
|
3110
|
+
# # │ --- │
|
3111
|
+
# # │ i64 │
|
3112
|
+
# # ╞═════╡
|
3113
|
+
# # │ 1 │
|
3114
|
+
# # ├╌╌╌╌╌┤
|
3115
|
+
# # │ 2 │
|
3116
|
+
# # ├╌╌╌╌╌┤
|
3117
|
+
# # │ 4 │
|
3118
|
+
# # └─────┘
|
3119
|
+
# def _inspect(fmt = "%s")
|
3120
|
+
# inspect = lambda do |s|
|
3121
|
+
# puts(fmt % [s])
|
3122
|
+
# s
|
3123
|
+
# end
|
3124
|
+
|
3125
|
+
# map(return_dtype: nil, agg_list: true, &inspect)
|
2941
3126
|
# end
|
2942
3127
|
|
2943
3128
|
# Fill nulls with linear interpolation over missing values.
|
@@ -2967,8 +3152,8 @@ module Polars
|
|
2967
3152
|
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2968
3153
|
# # │ 3 ┆ 3.0 │
|
2969
3154
|
# # └─────┴─────┘
|
2970
|
-
def interpolate
|
2971
|
-
wrap_expr(_rbexpr.interpolate)
|
3155
|
+
def interpolate(method: "linear")
|
3156
|
+
wrap_expr(_rbexpr.interpolate(method))
|
2972
3157
|
end
|
2973
3158
|
|
2974
3159
|
# Apply a rolling min (moving min) over the values in this array.
|
@@ -3721,7 +3906,72 @@ module Polars
|
|
3721
3906
|
)
|
3722
3907
|
end
|
3723
3908
|
|
3724
|
-
#
|
3909
|
+
# Apply a custom rolling window function.
|
3910
|
+
#
|
3911
|
+
# Prefer the specific rolling window functions over this one, as they are faster.
|
3912
|
+
#
|
3913
|
+
# Prefer:
|
3914
|
+
# * rolling_min
|
3915
|
+
# * rolling_max
|
3916
|
+
# * rolling_mean
|
3917
|
+
# * rolling_sum
|
3918
|
+
#
|
3919
|
+
# @param window_size [Integer]
|
3920
|
+
# The length of the window.
|
3921
|
+
# @param weights [Object]
|
3922
|
+
# An optional slice with the same length as the window that will be multiplied
|
3923
|
+
# elementwise with the values in the window.
|
3924
|
+
# @param min_periods [Integer]
|
3925
|
+
# The number of values in the window that should be non-null before computing
|
3926
|
+
# a result. If nil, it will be set equal to window size.
|
3927
|
+
# @param center [Boolean]
|
3928
|
+
# Set the labels at the center of the window
|
3929
|
+
#
|
3930
|
+
# @return [Expr]
|
3931
|
+
#
|
3932
|
+
# @example
|
3933
|
+
# df = Polars::DataFrame.new(
|
3934
|
+
# {
|
3935
|
+
# "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
|
3936
|
+
# }
|
3937
|
+
# )
|
3938
|
+
# df.select(
|
3939
|
+
# [
|
3940
|
+
# Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
|
3941
|
+
# ]
|
3942
|
+
# )
|
3943
|
+
# # =>
|
3944
|
+
# # shape: (5, 1)
|
3945
|
+
# # ┌──────────┐
|
3946
|
+
# # │ A │
|
3947
|
+
# # │ --- │
|
3948
|
+
# # │ f64 │
|
3949
|
+
# # ╞══════════╡
|
3950
|
+
# # │ null │
|
3951
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3952
|
+
# # │ null │
|
3953
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3954
|
+
# # │ 4.358899 │
|
3955
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3956
|
+
# # │ 4.041452 │
|
3957
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3958
|
+
# # │ 5.567764 │
|
3959
|
+
# # └──────────┘
|
3960
|
+
# def rolling_apply(
|
3961
|
+
# window_size:,
|
3962
|
+
# weights: nil,
|
3963
|
+
# min_periods: nil,
|
3964
|
+
# center: false,
|
3965
|
+
# &function
|
3966
|
+
# )
|
3967
|
+
# if min_periods.nil?
|
3968
|
+
# min_periods = window_size
|
3969
|
+
# end
|
3970
|
+
# wrap_expr(
|
3971
|
+
# _rbexpr.rolling_apply(
|
3972
|
+
# function, window_size, weights, min_periods, center
|
3973
|
+
# )
|
3974
|
+
# )
|
3725
3975
|
# end
|
3726
3976
|
|
3727
3977
|
# Compute a rolling skew.
|