polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -0,0 +1,122 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base class for all Polars data types.
|
3
|
+
class DataType
|
4
|
+
end
|
5
|
+
|
6
|
+
# 8-bit signed integer type.
|
7
|
+
class Int8 < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# 16-bit signed integer type.
|
11
|
+
class Int16 < DataType
|
12
|
+
end
|
13
|
+
|
14
|
+
# 32-bit signed integer type.
|
15
|
+
class Int32 < DataType
|
16
|
+
end
|
17
|
+
|
18
|
+
# 64-bit signed integer type.
|
19
|
+
class Int64 < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# 8-bit unsigned integer type.
|
23
|
+
class UInt8 < DataType
|
24
|
+
end
|
25
|
+
|
26
|
+
# 16-bit unsigned integer type.
|
27
|
+
class UInt16 < DataType
|
28
|
+
end
|
29
|
+
|
30
|
+
# 32-bit unsigned integer type.
|
31
|
+
class UInt32 < DataType
|
32
|
+
end
|
33
|
+
|
34
|
+
# 64-bit unsigned integer type.
|
35
|
+
class UInt64 < DataType
|
36
|
+
end
|
37
|
+
|
38
|
+
# 32-bit floating point type.
|
39
|
+
class Float32 < DataType
|
40
|
+
end
|
41
|
+
|
42
|
+
# 64-bit floating point type.
|
43
|
+
class Float64 < DataType
|
44
|
+
end
|
45
|
+
|
46
|
+
# Boolean type.
|
47
|
+
class Boolean < DataType
|
48
|
+
end
|
49
|
+
|
50
|
+
# UTF-8 encoded string type.
|
51
|
+
class Utf8 < DataType
|
52
|
+
end
|
53
|
+
|
54
|
+
# Binary type.
|
55
|
+
class Binary < DataType
|
56
|
+
end
|
57
|
+
|
58
|
+
# Type representing Null / None values.
|
59
|
+
class Null < DataType
|
60
|
+
end
|
61
|
+
|
62
|
+
# Type representing Datatype values that could not be determined statically.
|
63
|
+
class Unknown < DataType
|
64
|
+
end
|
65
|
+
|
66
|
+
# Nested list/array type.
|
67
|
+
class List < DataType
|
68
|
+
def initialize(inner)
|
69
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Calendar date type.
|
74
|
+
class Date < DataType
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calendar date and time type.
|
78
|
+
class Datetime < DataType
|
79
|
+
def initialize(time_unit = "us", time_zone = nil)
|
80
|
+
@tu = time_unit || "us"
|
81
|
+
@time_zone = time_zone
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Time duration/delta type.
|
86
|
+
class Duration < DataType
|
87
|
+
def initialize(time_unit = "us")
|
88
|
+
@tu = time_unit
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Time of day type.
|
93
|
+
class Time < DataType
|
94
|
+
end
|
95
|
+
|
96
|
+
# Type for wrapping arbitrary Python objects.
|
97
|
+
class Object < DataType
|
98
|
+
end
|
99
|
+
|
100
|
+
# A categorical encoding of a set of strings.
|
101
|
+
class Categorical < DataType
|
102
|
+
end
|
103
|
+
|
104
|
+
# Definition of a single field within a `Struct` DataType.
|
105
|
+
class Field < DataType
|
106
|
+
def initialize(name, dtype)
|
107
|
+
@name = name
|
108
|
+
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Struct composite type.
|
113
|
+
class Struct < DataType
|
114
|
+
def initialize(fields)
|
115
|
+
if fields.is_a?(Hash)
|
116
|
+
@fields = fields.map { |n, d| Field.new(n, d) }
|
117
|
+
else
|
118
|
+
@fields = fields
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -465,7 +465,7 @@ module Polars
|
|
465
465
|
#
|
466
466
|
# Applies to Date and Datetime columns.
|
467
467
|
#
|
468
|
-
# Returns the weekday number where monday =
|
468
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
469
469
|
#
|
470
470
|
# @return [Expr]
|
471
471
|
#
|
@@ -502,11 +502,11 @@ module Polars
|
|
502
502
|
# # │ --- ┆ --- ┆ --- │
|
503
503
|
# # │ u32 ┆ u32 ┆ u32 │
|
504
504
|
# # ╞═════════╪══════════════╪═════════════╡
|
505
|
-
# # │
|
505
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
506
506
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
507
|
-
# # │
|
507
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
508
508
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
509
|
-
# # │
|
509
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
510
510
|
# # └─────────┴──────────────┴─────────────┘
|
511
511
|
def weekday
|
512
512
|
Utils.wrap_expr(_rbexpr.weekday)
|
@@ -554,11 +554,11 @@ module Polars
|
|
554
554
|
# # │ --- ┆ --- ┆ --- │
|
555
555
|
# # │ u32 ┆ u32 ┆ u32 │
|
556
556
|
# # ╞═════════╪══════════════╪═════════════╡
|
557
|
-
# # │
|
557
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
558
558
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
559
|
-
# # │
|
559
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
560
560
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
561
|
-
# # │
|
561
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
562
562
|
# # └─────────┴──────────────┴─────────────┘
|
563
563
|
def day
|
564
564
|
Utils.wrap_expr(_rbexpr.day)
|
@@ -606,11 +606,11 @@ module Polars
|
|
606
606
|
# # │ --- ┆ --- ┆ --- │
|
607
607
|
# # │ u32 ┆ u32 ┆ u32 │
|
608
608
|
# # ╞═════════╪══════════════╪═════════════╡
|
609
|
-
# # │
|
609
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
610
610
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
611
|
-
# # │
|
611
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
612
612
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
613
|
-
# # │
|
613
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
614
614
|
# # └─────────┴──────────────┴─────────────┘
|
615
615
|
def ordinal_day
|
616
616
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
@@ -317,7 +317,7 @@ module Polars
|
|
317
317
|
#
|
318
318
|
# Applies to Date and Datetime columns.
|
319
319
|
#
|
320
|
-
# Returns the weekday number where monday =
|
320
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
321
321
|
#
|
322
322
|
# @return [Series]
|
323
323
|
#
|
@@ -344,13 +344,13 @@ module Polars
|
|
344
344
|
# # shape: (7,)
|
345
345
|
# # Series: '' [u32]
|
346
346
|
# # [
|
347
|
-
# # 0
|
348
347
|
# # 1
|
349
348
|
# # 2
|
350
349
|
# # 3
|
351
350
|
# # 4
|
352
351
|
# # 5
|
353
352
|
# # 6
|
353
|
+
# # 7
|
354
354
|
# # ]
|
355
355
|
def weekday
|
356
356
|
super
|
@@ -973,9 +973,9 @@ module Polars
|
|
973
973
|
# # shape: (3,)
|
974
974
|
# # Series: 'NYC' [datetime[μs, America/New_York]]
|
975
975
|
# # [
|
976
|
-
# # 2020-
|
977
|
-
# # 2020-
|
978
|
-
# # 2020-
|
976
|
+
# # 2020-03-01 00:00:00 EST
|
977
|
+
# # 2020-04-01 01:00:00 EDT
|
978
|
+
# # 2020-05-01 01:00:00 EDT
|
979
979
|
# # ]
|
980
980
|
#
|
981
981
|
# @example Timestamps have changed after cast_time_zone
|
@@ -984,9 +984,9 @@ module Polars
|
|
984
984
|
# # shape: (3,)
|
985
985
|
# # Series: 'NYC' [i64]
|
986
986
|
# # [
|
987
|
-
# #
|
988
|
-
# #
|
989
|
-
# #
|
987
|
+
# # 1583038800
|
988
|
+
# # 1585717200
|
989
|
+
# # 1588309200
|
990
990
|
# # ]
|
991
991
|
def cast_time_zone(tz)
|
992
992
|
super
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Polars
|
2
|
+
# A dynamic grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
+
# groupby context.
|
6
|
+
class DynamicGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
every,
|
11
|
+
period,
|
12
|
+
offset,
|
13
|
+
truncate,
|
14
|
+
include_boundaries,
|
15
|
+
closed,
|
16
|
+
by,
|
17
|
+
start_by
|
18
|
+
)
|
19
|
+
period = Utils._timedelta_to_pl_duration(period)
|
20
|
+
offset = Utils._timedelta_to_pl_duration(offset)
|
21
|
+
every = Utils._timedelta_to_pl_duration(every)
|
22
|
+
|
23
|
+
@df = df
|
24
|
+
@time_column = index_column
|
25
|
+
@every = every
|
26
|
+
@period = period
|
27
|
+
@offset = offset
|
28
|
+
@truncate = truncate
|
29
|
+
@include_boundaries = include_boundaries
|
30
|
+
@closed = closed
|
31
|
+
@by = by
|
32
|
+
@start_by = start_by
|
33
|
+
end
|
34
|
+
|
35
|
+
def agg(aggs)
|
36
|
+
@df.lazy
|
37
|
+
.groupby_dynamic(
|
38
|
+
@time_column,
|
39
|
+
every: @every,
|
40
|
+
period: @period,
|
41
|
+
offset: @offset,
|
42
|
+
truncate: @truncate,
|
43
|
+
include_boundaries: @include_boundaries,
|
44
|
+
closed: @closed,
|
45
|
+
by: @by,
|
46
|
+
start_by: @start_by
|
47
|
+
)
|
48
|
+
.agg(aggs)
|
49
|
+
.collect(no_optimization: true, string_cache: false)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/polars/expr.rb
CHANGED
@@ -432,8 +432,34 @@ module Polars
|
|
432
432
|
wrap_expr(_rbexpr.suffix(suffix))
|
433
433
|
end
|
434
434
|
|
435
|
-
#
|
436
|
-
#
|
435
|
+
# Rename the output of an expression by mapping a function over the root name.
|
436
|
+
#
|
437
|
+
# @return [Expr]
|
438
|
+
#
|
439
|
+
# @example
|
440
|
+
# df = Polars::DataFrame.new(
|
441
|
+
# {
|
442
|
+
# "A" => [1, 2],
|
443
|
+
# "B" => [3, 4]
|
444
|
+
# }
|
445
|
+
# )
|
446
|
+
# df.select(
|
447
|
+
# Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
|
448
|
+
# )
|
449
|
+
# # =>
|
450
|
+
# # shape: (2, 2)
|
451
|
+
# # ┌───────────┬───────────┐
|
452
|
+
# # │ A_reverse ┆ B_reverse │
|
453
|
+
# # │ --- ┆ --- │
|
454
|
+
# # │ i64 ┆ i64 │
|
455
|
+
# # ╞═══════════╪═══════════╡
|
456
|
+
# # │ 2 ┆ 4 │
|
457
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
458
|
+
# # │ 1 ┆ 3 │
|
459
|
+
# # └───────────┴───────────┘
|
460
|
+
def map_alias(&f)
|
461
|
+
Utils.wrap_expr(_rbexpr.map_alias(f))
|
462
|
+
end
|
437
463
|
|
438
464
|
# Negate a boolean expression.
|
439
465
|
#
|
@@ -2460,7 +2486,8 @@ module Polars
|
|
2460
2486
|
# # │ 1.5 │
|
2461
2487
|
# # └─────┘
|
2462
2488
|
def quantile(quantile, interpolation: "nearest")
|
2463
|
-
|
2489
|
+
quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
|
2490
|
+
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2464
2491
|
end
|
2465
2492
|
|
2466
2493
|
# Filter a single column.
|
@@ -2575,14 +2602,98 @@ module Polars
|
|
2575
2602
|
# # ╞══════╪════════╡
|
2576
2603
|
# # │ 1 ┆ 0 │
|
2577
2604
|
# # └──────┴────────┘
|
2578
|
-
# def map(return_dtype: nil, agg_list: false, &
|
2605
|
+
# def map(return_dtype: nil, agg_list: false, &f)
|
2579
2606
|
# if !return_dtype.nil?
|
2580
2607
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2581
2608
|
# end
|
2582
|
-
# wrap_expr(_rbexpr.map(return_dtype, agg_list
|
2609
|
+
# wrap_expr(_rbexpr.map(f, return_dtype, agg_list))
|
2583
2610
|
# end
|
2584
2611
|
|
2585
|
-
#
|
2612
|
+
# Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
|
2613
|
+
#
|
2614
|
+
# Depending on the context it has the following behavior:
|
2615
|
+
#
|
2616
|
+
# * Selection
|
2617
|
+
# Expects `f` to be of type Callable[[Any], Any].
|
2618
|
+
# Applies a Ruby function over each individual value in the column.
|
2619
|
+
# * GroupBy
|
2620
|
+
# Expects `f` to be of type Callable[[Series], Series].
|
2621
|
+
# Applies a Ruby function over each group.
|
2622
|
+
#
|
2623
|
+
# Implementing logic using a Ruby function is almost always _significantly_
|
2624
|
+
# slower and more memory intensive than implementing the same logic using
|
2625
|
+
# the native expression API because:
|
2626
|
+
#
|
2627
|
+
# - The native expression engine runs in Rust; UDFs run in Ruby.
|
2628
|
+
# - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
|
2629
|
+
# - Polars-native expressions can be parallelised (UDFs cannot).
|
2630
|
+
# - Polars-native expressions can be logically optimised (UDFs cannot).
|
2631
|
+
#
|
2632
|
+
# Wherever possible you should strongly prefer the native expression API
|
2633
|
+
# to achieve the best performance.
|
2634
|
+
#
|
2635
|
+
# @param return_dtype [Symbol]
|
2636
|
+
# Dtype of the output Series.
|
2637
|
+
# If not set, polars will assume that
|
2638
|
+
# the dtype remains unchanged.
|
2639
|
+
#
|
2640
|
+
# @return [Expr]
|
2641
|
+
#
|
2642
|
+
# @example
|
2643
|
+
# df = Polars::DataFrame.new(
|
2644
|
+
# {
|
2645
|
+
# "a" => [1, 2, 3, 1],
|
2646
|
+
# "b" => ["a", "b", "c", "c"]
|
2647
|
+
# }
|
2648
|
+
# )
|
2649
|
+
#
|
2650
|
+
# @example In a selection context, the function is applied by row.
|
2651
|
+
# df.with_column(
|
2652
|
+
# Polars.col("a").apply { |x| x * 2 }.alias("a_times_2")
|
2653
|
+
# )
|
2654
|
+
# # =>
|
2655
|
+
# # shape: (4, 3)
|
2656
|
+
# # ┌─────┬─────┬───────────┐
|
2657
|
+
# # │ a ┆ b ┆ a_times_2 │
|
2658
|
+
# # │ --- ┆ --- ┆ --- │
|
2659
|
+
# # │ i64 ┆ str ┆ i64 │
|
2660
|
+
# # ╞═════╪═════╪═══════════╡
|
2661
|
+
# # │ 1 ┆ a ┆ 2 │
|
2662
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2663
|
+
# # │ 2 ┆ b ┆ 4 │
|
2664
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2665
|
+
# # │ 3 ┆ c ┆ 6 │
|
2666
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
2667
|
+
# # │ 1 ┆ c ┆ 2 │
|
2668
|
+
# # └─────┴─────┴───────────┘
|
2669
|
+
#
|
2670
|
+
# @example In a GroupBy context the function is applied by group:
|
2671
|
+
# df.lazy
|
2672
|
+
# .groupby("b", maintain_order: true)
|
2673
|
+
# .agg(
|
2674
|
+
# [
|
2675
|
+
# Polars.col("a").apply { |x| x.sum }
|
2676
|
+
# ]
|
2677
|
+
# )
|
2678
|
+
# .collect
|
2679
|
+
# # =>
|
2680
|
+
# # shape: (3, 2)
|
2681
|
+
# # ┌─────┬─────┐
|
2682
|
+
# # │ b ┆ a │
|
2683
|
+
# # │ --- ┆ --- │
|
2684
|
+
# # │ str ┆ i64 │
|
2685
|
+
# # ╞═════╪═════╡
|
2686
|
+
# # │ a ┆ 1 │
|
2687
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2688
|
+
# # │ b ┆ 2 │
|
2689
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2690
|
+
# # │ c ┆ 4 │
|
2691
|
+
# # └─────┴─────┘
|
2692
|
+
# def apply(return_dtype: nil, &f)
|
2693
|
+
# wrap_f = lambda do |x|
|
2694
|
+
# x.apply(return_dtype: return_dtype, &f)
|
2695
|
+
# end
|
2696
|
+
# map(agg_list: true, return_dtype: return_dtype, &wrap_f)
|
2586
2697
|
# end
|
2587
2698
|
|
2588
2699
|
# Explode a list or utf8 Series. This means that every item is expanded to a new
|
@@ -2898,8 +3009,49 @@ module Polars
|
|
2898
3009
|
end
|
2899
3010
|
end
|
2900
3011
|
|
2901
|
-
#
|
2902
|
-
#
|
3012
|
+
# Hash the elements in the selection.
|
3013
|
+
#
|
3014
|
+
# The hash value is of type `:u64`.
|
3015
|
+
#
|
3016
|
+
# @param seed [Integer]
|
3017
|
+
# Random seed parameter. Defaults to 0.
|
3018
|
+
# @param seed_1 [Integer]
|
3019
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3020
|
+
# @param seed_2 [Integer]
|
3021
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3022
|
+
# @param seed_3 [Integer]
|
3023
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3024
|
+
#
|
3025
|
+
# @return [Expr]
|
3026
|
+
#
|
3027
|
+
# @example
|
3028
|
+
# df = Polars::DataFrame.new(
|
3029
|
+
# {
|
3030
|
+
# "a" => [1, 2, nil],
|
3031
|
+
# "b" => ["x", nil, "z"]
|
3032
|
+
# }
|
3033
|
+
# )
|
3034
|
+
# df.with_column(Polars.all._hash(10, 20, 30, 40))
|
3035
|
+
# # =>
|
3036
|
+
# # shape: (3, 2)
|
3037
|
+
# # ┌──────────────────────┬──────────────────────┐
|
3038
|
+
# # │ a ┆ b │
|
3039
|
+
# # │ --- ┆ --- │
|
3040
|
+
# # │ u64 ┆ u64 │
|
3041
|
+
# # ╞══════════════════════╪══════════════════════╡
|
3042
|
+
# # │ 4629889412789719550 ┆ 6959506404929392568 │
|
3043
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3044
|
+
# # │ 16386608652769605760 ┆ 11638928888656214026 │
|
3045
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
3046
|
+
# # │ 11638928888656214026 ┆ 11040941213715918520 │
|
3047
|
+
# # └──────────────────────┴──────────────────────┘
|
3048
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3049
|
+
k0 = seed
|
3050
|
+
k1 = seed_1.nil? ? seed : seed_1
|
3051
|
+
k2 = seed_2.nil? ? seed : seed_2
|
3052
|
+
k3 = seed_3.nil? ? seed : seed_3
|
3053
|
+
wrap_expr(_rbexpr._hash(k0, k1, k2, k3))
|
3054
|
+
end
|
2903
3055
|
|
2904
3056
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2905
3057
|
#
|
@@ -2937,7 +3089,40 @@ module Polars
|
|
2937
3089
|
wrap_expr(_rbexpr.reinterpret(signed))
|
2938
3090
|
end
|
2939
3091
|
|
2940
|
-
#
|
3092
|
+
# Print the value that this expression evaluates to and pass on the value.
|
3093
|
+
#
|
3094
|
+
# @return [Expr]
|
3095
|
+
#
|
3096
|
+
# @example
|
3097
|
+
# df = Polars::DataFrame.new({"foo" => [1, 1, 2]})
|
3098
|
+
# df.select(Polars.col("foo").cumsum._inspect("value is: %s").alias("bar"))
|
3099
|
+
# # =>
|
3100
|
+
# # value is: shape: (3,)
|
3101
|
+
# # Series: 'foo' [i64]
|
3102
|
+
# # [
|
3103
|
+
# # 1
|
3104
|
+
# # 2
|
3105
|
+
# # 4
|
3106
|
+
# # ]
|
3107
|
+
# # shape: (3, 1)
|
3108
|
+
# # ┌─────┐
|
3109
|
+
# # │ bar │
|
3110
|
+
# # │ --- │
|
3111
|
+
# # │ i64 │
|
3112
|
+
# # ╞═════╡
|
3113
|
+
# # │ 1 │
|
3114
|
+
# # ├╌╌╌╌╌┤
|
3115
|
+
# # │ 2 │
|
3116
|
+
# # ├╌╌╌╌╌┤
|
3117
|
+
# # │ 4 │
|
3118
|
+
# # └─────┘
|
3119
|
+
# def _inspect(fmt = "%s")
|
3120
|
+
# inspect = lambda do |s|
|
3121
|
+
# puts(fmt % [s])
|
3122
|
+
# s
|
3123
|
+
# end
|
3124
|
+
|
3125
|
+
# map(return_dtype: nil, agg_list: true, &inspect)
|
2941
3126
|
# end
|
2942
3127
|
|
2943
3128
|
# Fill nulls with linear interpolation over missing values.
|
@@ -2967,8 +3152,8 @@ module Polars
|
|
2967
3152
|
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
2968
3153
|
# # │ 3 ┆ 3.0 │
|
2969
3154
|
# # └─────┴─────┘
|
2970
|
-
def interpolate
|
2971
|
-
wrap_expr(_rbexpr.interpolate)
|
3155
|
+
def interpolate(method: "linear")
|
3156
|
+
wrap_expr(_rbexpr.interpolate(method))
|
2972
3157
|
end
|
2973
3158
|
|
2974
3159
|
# Apply a rolling min (moving min) over the values in this array.
|
@@ -3721,7 +3906,72 @@ module Polars
|
|
3721
3906
|
)
|
3722
3907
|
end
|
3723
3908
|
|
3724
|
-
#
|
3909
|
+
# Apply a custom rolling window function.
|
3910
|
+
#
|
3911
|
+
# Prefer the specific rolling window functions over this one, as they are faster.
|
3912
|
+
#
|
3913
|
+
# Prefer:
|
3914
|
+
# * rolling_min
|
3915
|
+
# * rolling_max
|
3916
|
+
# * rolling_mean
|
3917
|
+
# * rolling_sum
|
3918
|
+
#
|
3919
|
+
# @param window_size [Integer]
|
3920
|
+
# The length of the window.
|
3921
|
+
# @param weights [Object]
|
3922
|
+
# An optional slice with the same length as the window that will be multiplied
|
3923
|
+
# elementwise with the values in the window.
|
3924
|
+
# @param min_periods [Integer]
|
3925
|
+
# The number of values in the window that should be non-null before computing
|
3926
|
+
# a result. If nil, it will be set equal to window size.
|
3927
|
+
# @param center [Boolean]
|
3928
|
+
# Set the labels at the center of the window
|
3929
|
+
#
|
3930
|
+
# @return [Expr]
|
3931
|
+
#
|
3932
|
+
# @example
|
3933
|
+
# df = Polars::DataFrame.new(
|
3934
|
+
# {
|
3935
|
+
# "A" => [1.0, 2.0, 9.0, 2.0, 13.0]
|
3936
|
+
# }
|
3937
|
+
# )
|
3938
|
+
# df.select(
|
3939
|
+
# [
|
3940
|
+
# Polars.col("A").rolling_apply(window_size: 3) { |s| s.std }
|
3941
|
+
# ]
|
3942
|
+
# )
|
3943
|
+
# # =>
|
3944
|
+
# # shape: (5, 1)
|
3945
|
+
# # ┌──────────┐
|
3946
|
+
# # │ A │
|
3947
|
+
# # │ --- │
|
3948
|
+
# # │ f64 │
|
3949
|
+
# # ╞══════════╡
|
3950
|
+
# # │ null │
|
3951
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3952
|
+
# # │ null │
|
3953
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3954
|
+
# # │ 4.358899 │
|
3955
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3956
|
+
# # │ 4.041452 │
|
3957
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
3958
|
+
# # │ 5.567764 │
|
3959
|
+
# # └──────────┘
|
3960
|
+
# def rolling_apply(
|
3961
|
+
# window_size:,
|
3962
|
+
# weights: nil,
|
3963
|
+
# min_periods: nil,
|
3964
|
+
# center: false,
|
3965
|
+
# &function
|
3966
|
+
# )
|
3967
|
+
# if min_periods.nil?
|
3968
|
+
# min_periods = window_size
|
3969
|
+
# end
|
3970
|
+
# wrap_expr(
|
3971
|
+
# _rbexpr.rolling_apply(
|
3972
|
+
# function, window_size, weights, min_periods, center
|
3973
|
+
# )
|
3974
|
+
# )
|
3725
3975
|
# end
|
3726
3976
|
|
3727
3977
|
# Compute a rolling skew.
|