polars-df 0.10.0-x86_64-linux → 0.12.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1125 -865
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/lib/polars/expr_dispatch.rb
CHANGED
@@ -32,7 +32,7 @@ module Polars
|
|
32
32
|
# # │ null ┆ null ┆ z ┆ null │
|
33
33
|
# # └───────┴───────┴─────┴───────┘
|
34
34
|
def all_horizontal(*exprs)
|
35
|
-
rbexprs = Utils.
|
35
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
36
36
|
Utils.wrap_expr(Plr.all_horizontal(rbexprs))
|
37
37
|
end
|
38
38
|
|
@@ -68,7 +68,7 @@ module Polars
|
|
68
68
|
# # │ null ┆ null ┆ z ┆ null │
|
69
69
|
# # └───────┴───────┴─────┴───────┘
|
70
70
|
def any_horizontal(*exprs)
|
71
|
-
rbexprs = Utils.
|
71
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
72
72
|
Utils.wrap_expr(Plr.any_horizontal(rbexprs))
|
73
73
|
end
|
74
74
|
|
@@ -101,7 +101,7 @@ module Polars
|
|
101
101
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
102
102
|
# # └─────┴──────┴─────┴─────┘
|
103
103
|
def max_horizontal(*exprs)
|
104
|
-
rbexprs = Utils.
|
104
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
105
105
|
Utils.wrap_expr(Plr.max_horizontal(rbexprs))
|
106
106
|
end
|
107
107
|
|
@@ -134,7 +134,7 @@ module Polars
|
|
134
134
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
135
135
|
# # └─────┴──────┴─────┴─────┘
|
136
136
|
def min_horizontal(*exprs)
|
137
|
-
rbexprs = Utils.
|
137
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
138
138
|
Utils.wrap_expr(Plr.min_horizontal(rbexprs))
|
139
139
|
end
|
140
140
|
|
@@ -167,7 +167,7 @@ module Polars
|
|
167
167
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
168
|
# # └─────┴──────┴─────┴─────┘
|
169
169
|
def sum_horizontal(*exprs)
|
170
|
-
rbexprs = Utils.
|
170
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
171
|
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
172
172
|
end
|
173
173
|
|
@@ -200,7 +200,7 @@ module Polars
|
|
200
200
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
201
|
# # └─────┴──────┴─────┴──────┘
|
202
202
|
def mean_horizontal(*exprs)
|
203
|
-
rbexprs = Utils.
|
203
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
204
|
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
205
205
|
end
|
206
206
|
|
@@ -233,11 +233,11 @@ module Polars
|
|
233
233
|
# # │ 3 ┆ null ┆ z ┆ {3,null} │
|
234
234
|
# # └─────┴──────┴─────┴───────────┘
|
235
235
|
def cum_sum_horizontal(*exprs)
|
236
|
-
rbexprs = Utils.
|
236
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
237
237
|
exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }
|
238
238
|
|
239
239
|
# (Expr): use u32 as that will not cast to float as eagerly
|
240
|
-
Polars.cum_fold(Polars.lit(0).cast(UInt32), ->
|
240
|
+
Polars.cum_fold(Polars.lit(0).cast(UInt32), ->(a, b) { a + b }, exprs_wrapped).alias(
|
241
241
|
"cum_sum"
|
242
242
|
)
|
243
243
|
end
|
@@ -44,28 +44,28 @@ module Polars
|
|
44
44
|
time_unit: "us"
|
45
45
|
)
|
46
46
|
if !weeks.nil?
|
47
|
-
weeks = Utils.
|
47
|
+
weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
|
48
48
|
end
|
49
49
|
if !days.nil?
|
50
|
-
days = Utils.
|
50
|
+
days = Utils.parse_into_expression(days, str_as_lit: false)
|
51
51
|
end
|
52
52
|
if !hours.nil?
|
53
|
-
hours = Utils.
|
53
|
+
hours = Utils.parse_into_expression(hours, str_as_lit: false)
|
54
54
|
end
|
55
55
|
if !minutes.nil?
|
56
|
-
minutes = Utils.
|
56
|
+
minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
|
57
57
|
end
|
58
58
|
if !seconds.nil?
|
59
|
-
seconds = Utils.
|
59
|
+
seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
|
60
60
|
end
|
61
61
|
if !milliseconds.nil?
|
62
|
-
milliseconds = Utils.
|
62
|
+
milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
|
63
63
|
end
|
64
64
|
if !microseconds.nil?
|
65
|
-
microseconds = Utils.
|
65
|
+
microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
|
66
66
|
end
|
67
67
|
if !nanoseconds.nil?
|
68
|
-
nanoseconds = Utils.
|
68
|
+
nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
|
69
69
|
end
|
70
70
|
|
71
71
|
Utils.wrap_expr(
|
@@ -87,28 +87,38 @@ module Polars
|
|
87
87
|
#
|
88
88
|
# @return [Expr]
|
89
89
|
def concat_list(exprs)
|
90
|
-
exprs = Utils.
|
90
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
91
91
|
Utils.wrap_expr(Plr.concat_list(exprs))
|
92
92
|
end
|
93
93
|
|
94
94
|
# Collect several columns into a Series of dtype Struct.
|
95
95
|
#
|
96
|
-
# @param exprs [
|
97
|
-
#
|
96
|
+
# @param exprs [Array]
|
97
|
+
# Column(s) to collect into a struct column, specified as positional arguments.
|
98
|
+
# Accepts expression input. Strings are parsed as column names,
|
99
|
+
# other non-expression inputs are parsed as literals.
|
100
|
+
# @param schema [Hash]
|
101
|
+
# Optional schema that explicitly defines the struct field dtypes. If no columns
|
102
|
+
# or expressions are provided, schema keys are used to define columns.
|
98
103
|
# @param eager [Boolean]
|
99
|
-
# Evaluate immediately
|
104
|
+
# Evaluate immediately and return a `Series`. If set to `false` (default),
|
105
|
+
# return an expression instead.
|
106
|
+
# @param named_exprs [Hash]
|
107
|
+
# Additional columns to collect into the struct column, specified as keyword
|
108
|
+
# arguments. The columns will be renamed to the keyword used.
|
100
109
|
#
|
101
110
|
# @return [Object]
|
102
111
|
#
|
103
112
|
# @example
|
104
|
-
# Polars::DataFrame.new(
|
113
|
+
# df = Polars::DataFrame.new(
|
105
114
|
# {
|
106
115
|
# "int" => [1, 2],
|
107
116
|
# "str" => ["a", "b"],
|
108
117
|
# "bool" => [true, nil],
|
109
118
|
# "list" => [[1, 2], [3]],
|
110
119
|
# }
|
111
|
-
# )
|
120
|
+
# )
|
121
|
+
# df.select([Polars.struct(Polars.all).alias("my_struct")])
|
112
122
|
# # =>
|
113
123
|
# # shape: (2, 1)
|
114
124
|
# # ┌─────────────────────┐
|
@@ -120,29 +130,42 @@ module Polars
|
|
120
130
|
# # │ {2,"b",null,[3]} │
|
121
131
|
# # └─────────────────────┘
|
122
132
|
#
|
123
|
-
# @example
|
124
|
-
# df
|
125
|
-
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
126
|
-
# )
|
127
|
-
# df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
|
133
|
+
# @example Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.
|
134
|
+
# df.select(Polars.struct("int", false).alias("my_struct"))
|
128
135
|
# # =>
|
129
|
-
# # shape: (
|
130
|
-
# #
|
131
|
-
# # │
|
132
|
-
# # │ ---
|
133
|
-
# # │
|
134
|
-
# #
|
135
|
-
# # │
|
136
|
-
# # │
|
137
|
-
# #
|
138
|
-
#
|
139
|
-
#
|
140
|
-
|
136
|
+
# # shape: (2, 1)
|
137
|
+
# # ┌───────────┐
|
138
|
+
# # │ my_struct │
|
139
|
+
# # │ --- │
|
140
|
+
# # │ struct[2] │
|
141
|
+
# # ╞═══════════╡
|
142
|
+
# # │ {1,false} │
|
143
|
+
# # │ {2,false} │
|
144
|
+
# # └───────────┘
|
145
|
+
#
|
146
|
+
# @example Use keyword arguments to easily name each struct field.
|
147
|
+
# df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
|
148
|
+
# # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
|
149
|
+
def struct(*exprs, schema: nil, eager: false, **named_exprs)
|
150
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
|
151
|
+
expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
|
152
|
+
|
153
|
+
if !schema.nil? && !schema.empty?
|
154
|
+
if !exprs.any?
|
155
|
+
# no columns or expressions provided; create one from schema keys
|
156
|
+
expr =
|
157
|
+
Utils.wrap_expr(
|
158
|
+
Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
|
159
|
+
)
|
160
|
+
expr = expr.cast(Struct.new(schema), strict: false)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
141
164
|
if eager
|
142
|
-
Polars.select(
|
165
|
+
Polars.select(expr).to_series
|
166
|
+
else
|
167
|
+
expr
|
143
168
|
end
|
144
|
-
exprs = Utils.selection_to_rbexpr_list(exprs)
|
145
|
-
Utils.wrap_expr(Plr.as_struct(exprs))
|
146
169
|
end
|
147
170
|
|
148
171
|
# Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
|
@@ -188,13 +211,13 @@ module Polars
|
|
188
211
|
# # │ 3 ┆ null ┆ walk ┆ null │
|
189
212
|
# # └─────┴──────┴──────┴───────────────┘
|
190
213
|
def concat_str(exprs, sep: "", ignore_nulls: false)
|
191
|
-
exprs = Utils.
|
214
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
192
215
|
Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
|
193
216
|
end
|
194
217
|
|
195
218
|
# Format expressions as a string.
|
196
219
|
#
|
197
|
-
# @param
|
220
|
+
# @param f_string [String]
|
198
221
|
# A string that with placeholders.
|
199
222
|
# For example: "hello_{}" or "{}_world
|
200
223
|
# @param args [Object]
|
@@ -225,17 +248,17 @@ module Polars
|
|
225
248
|
# # │ foo_b_bar_2 │
|
226
249
|
# # │ foo_c_bar_3 │
|
227
250
|
# # └─────────────┘
|
228
|
-
def format(
|
229
|
-
if
|
251
|
+
def format(f_string, *args)
|
252
|
+
if f_string.scan("{}").length != args.length
|
230
253
|
raise ArgumentError, "number of placeholders should equal the number of arguments"
|
231
254
|
end
|
232
255
|
|
233
256
|
exprs = []
|
234
257
|
|
235
258
|
arguments = args.each
|
236
|
-
|
259
|
+
f_string.split(/(\{\})/).each do |s|
|
237
260
|
if s == "{}"
|
238
|
-
e = Utils.
|
261
|
+
e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
|
239
262
|
exprs << e
|
240
263
|
elsif s.length > 0
|
241
264
|
exprs << lit(s)
|
@@ -524,6 +524,55 @@ module Polars
|
|
524
524
|
col(*columns).last
|
525
525
|
end
|
526
526
|
|
527
|
+
# Get the nth column(s) of the context.
|
528
|
+
#
|
529
|
+
# @param indices [Array]
|
530
|
+
# One or more indices representing the columns to retrieve.
|
531
|
+
#
|
532
|
+
# @return [Expr]
|
533
|
+
#
|
534
|
+
# @example
|
535
|
+
# df = Polars::DataFrame.new(
|
536
|
+
# {
|
537
|
+
# "a" => [1, 8, 3],
|
538
|
+
# "b" => [4, 5, 2],
|
539
|
+
# "c" => ["foo", "bar", "baz"]
|
540
|
+
# }
|
541
|
+
# )
|
542
|
+
# df.select(Polars.nth(1))
|
543
|
+
# # =>
|
544
|
+
# # shape: (3, 1)
|
545
|
+
# # ┌─────┐
|
546
|
+
# # │ b │
|
547
|
+
# # │ --- │
|
548
|
+
# # │ i64 │
|
549
|
+
# # ╞═════╡
|
550
|
+
# # │ 4 │
|
551
|
+
# # │ 5 │
|
552
|
+
# # │ 2 │
|
553
|
+
# # └─────┘
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df.select(Polars.nth(2, 0))
|
557
|
+
# # =>
|
558
|
+
# # shape: (3, 2)
|
559
|
+
# # ┌─────┬─────┐
|
560
|
+
# # │ c ┆ a │
|
561
|
+
# # │ --- ┆ --- │
|
562
|
+
# # │ str ┆ i64 │
|
563
|
+
# # ╞═════╪═════╡
|
564
|
+
# # │ foo ┆ 1 │
|
565
|
+
# # │ bar ┆ 8 │
|
566
|
+
# # │ baz ┆ 3 │
|
567
|
+
# # └─────┴─────┘
|
568
|
+
def nth(*indices)
|
569
|
+
if indices.length == 1 && indices[0].is_a?(Array)
|
570
|
+
indices = indices[0]
|
571
|
+
end
|
572
|
+
|
573
|
+
Utils.wrap_expr(Plr.index_cols(indices))
|
574
|
+
end
|
575
|
+
|
527
576
|
# Get the first `n` rows.
|
528
577
|
#
|
529
578
|
# This function is syntactic sugar for `col(column).head(n)`.
|
@@ -683,8 +732,8 @@ module Polars
|
|
683
732
|
ddof: 1,
|
684
733
|
propagate_nans: false
|
685
734
|
)
|
686
|
-
a = Utils.
|
687
|
-
b = Utils.
|
735
|
+
a = Utils.parse_into_expression(a)
|
736
|
+
b = Utils.parse_into_expression(b)
|
688
737
|
|
689
738
|
if method == "pearson"
|
690
739
|
Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
|
@@ -728,8 +777,8 @@ module Polars
|
|
728
777
|
# # │ 3.0 │
|
729
778
|
# # └─────┘
|
730
779
|
def cov(a, b, ddof: 1)
|
731
|
-
a = Utils.
|
732
|
-
b = Utils.
|
780
|
+
a = Utils.parse_into_expression(a)
|
781
|
+
b = Utils.parse_into_expression(b)
|
733
782
|
Utils.wrap_expr(Plr.cov(a, b, ddof))
|
734
783
|
end
|
735
784
|
|
@@ -743,13 +792,13 @@ module Polars
|
|
743
792
|
#
|
744
793
|
# @return [Expr]
|
745
794
|
def fold(acc, f, exprs)
|
746
|
-
acc = Utils.
|
795
|
+
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
747
796
|
if exprs.is_a?(Expr)
|
748
797
|
exprs = [exprs]
|
749
798
|
end
|
750
799
|
|
751
|
-
exprs = Utils.
|
752
|
-
Utils.wrap_expr(Plr.fold(acc
|
800
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
801
|
+
Utils.wrap_expr(Plr.fold(acc, f, exprs))
|
753
802
|
end
|
754
803
|
|
755
804
|
# def reduce
|
@@ -776,13 +825,13 @@ module Polars
|
|
776
825
|
# If you simply want the first encountered expression as accumulator,
|
777
826
|
# consider using `cumreduce`.
|
778
827
|
def cum_fold(acc, f, exprs, include_init: false)
|
779
|
-
acc = Utils.
|
828
|
+
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
780
829
|
if exprs.is_a?(Expr)
|
781
830
|
exprs = [exprs]
|
782
831
|
end
|
783
832
|
|
784
|
-
exprs = Utils.
|
785
|
-
Utils.wrap_expr(Plr.cum_fold(acc
|
833
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
834
|
+
Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
|
786
835
|
end
|
787
836
|
alias_method :cumfold, :cum_fold
|
788
837
|
|
@@ -982,7 +1031,7 @@ module Polars
|
|
982
1031
|
if reverse == true || reverse == false
|
983
1032
|
reverse = [reverse] * exprs.length
|
984
1033
|
end
|
985
|
-
exprs = Utils.
|
1034
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
986
1035
|
Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
|
987
1036
|
end
|
988
1037
|
alias_method :argsort_by, :arg_sort_by
|
@@ -1119,8 +1168,8 @@ module Polars
|
|
1119
1168
|
end
|
1120
1169
|
condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
|
1121
1170
|
else
|
1122
|
-
condition = Utils.
|
1123
|
-
Utils.wrap_expr(Plr.arg_where(condition
|
1171
|
+
condition = Utils.parse_into_expression(condition, str_as_lit: true)
|
1172
|
+
Utils.wrap_expr(Plr.arg_where(condition))
|
1124
1173
|
end
|
1125
1174
|
end
|
1126
1175
|
|
@@ -1171,7 +1220,7 @@ module Polars
|
|
1171
1220
|
# # │ null ┆ null ┆ null ┆ 10.0 │
|
1172
1221
|
# # └──────┴──────┴──────┴──────┘
|
1173
1222
|
def coalesce(exprs, *more_exprs)
|
1174
|
-
exprs = Utils.
|
1223
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
1175
1224
|
Utils.wrap_expr(Plr.coalesce(exprs))
|
1176
1225
|
end
|
1177
1226
|
|
data/lib/polars/functions/lit.rb
CHANGED
@@ -7,7 +7,7 @@ module Polars
|
|
7
7
|
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
8
8
|
time_unit = dtype&.time_unit || "ns"
|
9
9
|
time_zone = dtype.&time_zone
|
10
|
-
e = lit(Utils.
|
10
|
+
e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
|
11
11
|
if time_zone
|
12
12
|
return e.dt.replace_time_zone(time_zone.to_s)
|
13
13
|
else
|
@@ -9,16 +9,11 @@ module Polars
|
|
9
9
|
# @param interval [Object]
|
10
10
|
# Interval periods. It can be a polars duration string, such as `3d12h4m25s`
|
11
11
|
# representing 3 days, 12 hours, 4 minutes, and 25 seconds.
|
12
|
-
# @param lazy [Boolean]
|
13
|
-
# Return an expression.
|
14
12
|
# @param closed ["both", "left", "right", "none"]
|
15
13
|
# Define whether the temporal window interval is closed or not.
|
16
|
-
# @param
|
17
|
-
#
|
18
|
-
#
|
19
|
-
# Set the time unit.
|
20
|
-
# @param time_zone [String]
|
21
|
-
# Optional timezone
|
14
|
+
# @param eager [Boolean]
|
15
|
+
# Evaluate immediately and return a `Series`.
|
16
|
+
# If set to `false` (default), return an expression instead.
|
22
17
|
#
|
23
18
|
# @return [Object]
|
24
19
|
#
|
@@ -28,73 +23,111 @@ module Polars
|
|
28
23
|
# type date. All other permutations return a datetime Series.
|
29
24
|
#
|
30
25
|
# @example Using polars duration string to specify the interval
|
31
|
-
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo",
|
26
|
+
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
|
27
|
+
# "date"
|
28
|
+
# )
|
32
29
|
# # =>
|
33
30
|
# # shape: (3,)
|
34
|
-
# # Series: '
|
31
|
+
# # Series: 'date' [date]
|
35
32
|
# # [
|
36
33
|
# # 2022-01-01
|
37
34
|
# # 2022-02-01
|
38
35
|
# # 2022-03-01
|
39
36
|
# # ]
|
40
|
-
#
|
41
|
-
# @example Using `timedelta` object to specify the interval:
|
42
|
-
# Polars.date_range(
|
43
|
-
# DateTime.new(1985, 1, 1),
|
44
|
-
# DateTime.new(1985, 1, 10),
|
45
|
-
# "1d12h",
|
46
|
-
# time_unit: "ms"
|
47
|
-
# )
|
48
|
-
# # =>
|
49
|
-
# # shape: (7,)
|
50
|
-
# # Series: '' [datetime[ms]]
|
51
|
-
# # [
|
52
|
-
# # 1985-01-01 00:00:00
|
53
|
-
# # 1985-01-02 12:00:00
|
54
|
-
# # 1985-01-04 00:00:00
|
55
|
-
# # 1985-01-05 12:00:00
|
56
|
-
# # 1985-01-07 00:00:00
|
57
|
-
# # 1985-01-08 12:00:00
|
58
|
-
# # 1985-01-10 00:00:00
|
59
|
-
# # ]
|
60
37
|
def date_range(
|
61
38
|
start,
|
62
39
|
stop,
|
63
|
-
interval,
|
64
|
-
lazy: false,
|
40
|
+
interval = "1d",
|
65
41
|
closed: "both",
|
66
|
-
|
67
|
-
time_unit: nil,
|
68
|
-
time_zone: nil
|
42
|
+
eager: false
|
69
43
|
)
|
70
|
-
|
71
|
-
raise Todo
|
72
|
-
else
|
73
|
-
interval = interval.to_s
|
74
|
-
if interval.include?(" ")
|
75
|
-
interval = interval.gsub(" ", "")
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
if time_unit.nil?
|
80
|
-
if interval.include?("ns")
|
81
|
-
time_unit = "ns"
|
82
|
-
else
|
83
|
-
time_unit = "us"
|
84
|
-
end
|
85
|
-
end
|
44
|
+
interval = Utils.parse_interval_argument(interval)
|
86
45
|
|
87
|
-
start_rbexpr = Utils.
|
88
|
-
|
46
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
47
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
89
48
|
|
90
49
|
result = Utils.wrap_expr(
|
91
|
-
Plr.date_range(start_rbexpr,
|
50
|
+
Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
|
92
51
|
)
|
93
52
|
|
94
|
-
|
53
|
+
if eager
|
54
|
+
return F.select(result).to_series
|
55
|
+
end
|
56
|
+
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
# Create a column of date ranges.
|
61
|
+
#
|
62
|
+
# @param start [Object]
|
63
|
+
# Lower bound of the date range.
|
64
|
+
# @param stop [Object]
|
65
|
+
# Upper bound of the date range.
|
66
|
+
# @param interval [Object]
|
67
|
+
# Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
|
68
|
+
# @param closed ["both", "left", "right", "none"]
|
69
|
+
# Define which sides of the range are closed (inclusive).
|
70
|
+
# @param eager [Boolean]
|
71
|
+
# Evaluate immediately and return a `Series`.
|
72
|
+
# If set to `false` (default), return an expression instead.
|
73
|
+
#
|
74
|
+
# @return [Object]
|
75
|
+
#
|
76
|
+
# @note
|
77
|
+
# `interval` is created according to the following string language:
|
78
|
+
#
|
79
|
+
# - 1ns (1 nanosecond)
|
80
|
+
# - 1us (1 microsecond)
|
81
|
+
# - 1ms (1 millisecond)
|
82
|
+
# - 1s (1 second)
|
83
|
+
# - 1m (1 minute)
|
84
|
+
# - 1h (1 hour)
|
85
|
+
# - 1d (1 calendar day)
|
86
|
+
# - 1w (1 calendar week)
|
87
|
+
# - 1mo (1 calendar month)
|
88
|
+
# - 1q (1 calendar quarter)
|
89
|
+
# - 1y (1 calendar year)
|
90
|
+
#
|
91
|
+
# Or combine them:
|
92
|
+
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
93
|
+
#
|
94
|
+
# By "calendar day", we mean the corresponding time on the next day (which may
|
95
|
+
# not be 24 hours, due to daylight savings). Similarly for "calendar week",
|
96
|
+
# "calendar month", "calendar quarter", and "calendar year".
|
97
|
+
#
|
98
|
+
# @example
|
99
|
+
# df = Polars::DataFrame.new(
|
100
|
+
# {
|
101
|
+
# "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
|
102
|
+
# "end" => Date.new(2022, 1, 3)
|
103
|
+
# }
|
104
|
+
# )
|
105
|
+
# df.with_columns(date_range: Polars.date_ranges("start", "end"))
|
106
|
+
# # =>
|
107
|
+
# # shape: (2, 3)
|
108
|
+
# # ┌────────────┬────────────┬─────────────────────────────────┐
|
109
|
+
# # │ start ┆ end ┆ date_range │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ date ┆ date ┆ list[date] │
|
112
|
+
# # ╞════════════╪════════════╪═════════════════════════════════╡
|
113
|
+
# # │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
|
114
|
+
# # │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │
|
115
|
+
# # └────────────┴────────────┴─────────────────────────────────┘
|
116
|
+
def date_ranges(
|
117
|
+
start,
|
118
|
+
stop,
|
119
|
+
interval = "1d",
|
120
|
+
closed: "both",
|
121
|
+
eager: false
|
122
|
+
)
|
123
|
+
interval = Utils.parse_interval_argument(interval)
|
124
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
125
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
126
|
+
|
127
|
+
result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))
|
95
128
|
|
96
|
-
if
|
97
|
-
return select(result).to_series
|
129
|
+
if eager
|
130
|
+
return F.select(result).to_series
|
98
131
|
end
|
99
132
|
|
100
133
|
result
|