polars-df 0.10.0-x86_64-darwin → 0.12.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1127 -867
- data/README.md +6 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/lib/polars/expr_dispatch.rb
CHANGED
@@ -32,7 +32,7 @@ module Polars
|
|
32
32
|
# # │ null ┆ null ┆ z ┆ null │
|
33
33
|
# # └───────┴───────┴─────┴───────┘
|
34
34
|
def all_horizontal(*exprs)
|
35
|
-
rbexprs = Utils.
|
35
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
36
36
|
Utils.wrap_expr(Plr.all_horizontal(rbexprs))
|
37
37
|
end
|
38
38
|
|
@@ -68,7 +68,7 @@ module Polars
|
|
68
68
|
# # │ null ┆ null ┆ z ┆ null │
|
69
69
|
# # └───────┴───────┴─────┴───────┘
|
70
70
|
def any_horizontal(*exprs)
|
71
|
-
rbexprs = Utils.
|
71
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
72
72
|
Utils.wrap_expr(Plr.any_horizontal(rbexprs))
|
73
73
|
end
|
74
74
|
|
@@ -101,7 +101,7 @@ module Polars
|
|
101
101
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
102
102
|
# # └─────┴──────┴─────┴─────┘
|
103
103
|
def max_horizontal(*exprs)
|
104
|
-
rbexprs = Utils.
|
104
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
105
105
|
Utils.wrap_expr(Plr.max_horizontal(rbexprs))
|
106
106
|
end
|
107
107
|
|
@@ -134,7 +134,7 @@ module Polars
|
|
134
134
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
135
135
|
# # └─────┴──────┴─────┴─────┘
|
136
136
|
def min_horizontal(*exprs)
|
137
|
-
rbexprs = Utils.
|
137
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
138
138
|
Utils.wrap_expr(Plr.min_horizontal(rbexprs))
|
139
139
|
end
|
140
140
|
|
@@ -167,7 +167,7 @@ module Polars
|
|
167
167
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
168
|
# # └─────┴──────┴─────┴─────┘
|
169
169
|
def sum_horizontal(*exprs)
|
170
|
-
rbexprs = Utils.
|
170
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
171
|
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
172
172
|
end
|
173
173
|
|
@@ -200,7 +200,7 @@ module Polars
|
|
200
200
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
201
|
# # └─────┴──────┴─────┴──────┘
|
202
202
|
def mean_horizontal(*exprs)
|
203
|
-
rbexprs = Utils.
|
203
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
204
|
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
205
205
|
end
|
206
206
|
|
@@ -233,11 +233,11 @@ module Polars
|
|
233
233
|
# # │ 3 ┆ null ┆ z ┆ {3,null} │
|
234
234
|
# # └─────┴──────┴─────┴───────────┘
|
235
235
|
def cum_sum_horizontal(*exprs)
|
236
|
-
rbexprs = Utils.
|
236
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
237
237
|
exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }
|
238
238
|
|
239
239
|
# (Expr): use u32 as that will not cast to float as eagerly
|
240
|
-
Polars.cum_fold(Polars.lit(0).cast(UInt32), ->
|
240
|
+
Polars.cum_fold(Polars.lit(0).cast(UInt32), ->(a, b) { a + b }, exprs_wrapped).alias(
|
241
241
|
"cum_sum"
|
242
242
|
)
|
243
243
|
end
|
@@ -44,28 +44,28 @@ module Polars
|
|
44
44
|
time_unit: "us"
|
45
45
|
)
|
46
46
|
if !weeks.nil?
|
47
|
-
weeks = Utils.
|
47
|
+
weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
|
48
48
|
end
|
49
49
|
if !days.nil?
|
50
|
-
days = Utils.
|
50
|
+
days = Utils.parse_into_expression(days, str_as_lit: false)
|
51
51
|
end
|
52
52
|
if !hours.nil?
|
53
|
-
hours = Utils.
|
53
|
+
hours = Utils.parse_into_expression(hours, str_as_lit: false)
|
54
54
|
end
|
55
55
|
if !minutes.nil?
|
56
|
-
minutes = Utils.
|
56
|
+
minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
|
57
57
|
end
|
58
58
|
if !seconds.nil?
|
59
|
-
seconds = Utils.
|
59
|
+
seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
|
60
60
|
end
|
61
61
|
if !milliseconds.nil?
|
62
|
-
milliseconds = Utils.
|
62
|
+
milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
|
63
63
|
end
|
64
64
|
if !microseconds.nil?
|
65
|
-
microseconds = Utils.
|
65
|
+
microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
|
66
66
|
end
|
67
67
|
if !nanoseconds.nil?
|
68
|
-
nanoseconds = Utils.
|
68
|
+
nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
|
69
69
|
end
|
70
70
|
|
71
71
|
Utils.wrap_expr(
|
@@ -87,28 +87,38 @@ module Polars
|
|
87
87
|
#
|
88
88
|
# @return [Expr]
|
89
89
|
def concat_list(exprs)
|
90
|
-
exprs = Utils.
|
90
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
91
91
|
Utils.wrap_expr(Plr.concat_list(exprs))
|
92
92
|
end
|
93
93
|
|
94
94
|
# Collect several columns into a Series of dtype Struct.
|
95
95
|
#
|
96
|
-
# @param exprs [
|
97
|
-
#
|
96
|
+
# @param exprs [Array]
|
97
|
+
# Column(s) to collect into a struct column, specified as positional arguments.
|
98
|
+
# Accepts expression input. Strings are parsed as column names,
|
99
|
+
# other non-expression inputs are parsed as literals.
|
100
|
+
# @param schema [Hash]
|
101
|
+
# Optional schema that explicitly defines the struct field dtypes. If no columns
|
102
|
+
# or expressions are provided, schema keys are used to define columns.
|
98
103
|
# @param eager [Boolean]
|
99
|
-
# Evaluate immediately
|
104
|
+
# Evaluate immediately and return a `Series`. If set to `false` (default),
|
105
|
+
# return an expression instead.
|
106
|
+
# @param named_exprs [Hash]
|
107
|
+
# Additional columns to collect into the struct column, specified as keyword
|
108
|
+
# arguments. The columns will be renamed to the keyword used.
|
100
109
|
#
|
101
110
|
# @return [Object]
|
102
111
|
#
|
103
112
|
# @example
|
104
|
-
# Polars::DataFrame.new(
|
113
|
+
# df = Polars::DataFrame.new(
|
105
114
|
# {
|
106
115
|
# "int" => [1, 2],
|
107
116
|
# "str" => ["a", "b"],
|
108
117
|
# "bool" => [true, nil],
|
109
118
|
# "list" => [[1, 2], [3]],
|
110
119
|
# }
|
111
|
-
# )
|
120
|
+
# )
|
121
|
+
# df.select([Polars.struct(Polars.all).alias("my_struct")])
|
112
122
|
# # =>
|
113
123
|
# # shape: (2, 1)
|
114
124
|
# # ┌─────────────────────┐
|
@@ -120,29 +130,42 @@ module Polars
|
|
120
130
|
# # │ {2,"b",null,[3]} │
|
121
131
|
# # └─────────────────────┘
|
122
132
|
#
|
123
|
-
# @example
|
124
|
-
# df
|
125
|
-
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
126
|
-
# )
|
127
|
-
# df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
|
133
|
+
# @example Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.
|
134
|
+
# df.select(Polars.struct("int", false).alias("my_struct"))
|
128
135
|
# # =>
|
129
|
-
# # shape: (
|
130
|
-
# #
|
131
|
-
# # │
|
132
|
-
# # │ ---
|
133
|
-
# # │
|
134
|
-
# #
|
135
|
-
# # │
|
136
|
-
# # │
|
137
|
-
# #
|
138
|
-
#
|
139
|
-
#
|
140
|
-
|
136
|
+
# # shape: (2, 1)
|
137
|
+
# # ┌───────────┐
|
138
|
+
# # │ my_struct │
|
139
|
+
# # │ --- │
|
140
|
+
# # │ struct[2] │
|
141
|
+
# # ╞═══════════╡
|
142
|
+
# # │ {1,false} │
|
143
|
+
# # │ {2,false} │
|
144
|
+
# # └───────────┘
|
145
|
+
#
|
146
|
+
# @example Use keyword arguments to easily name each struct field.
|
147
|
+
# df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
|
148
|
+
# # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
|
149
|
+
def struct(*exprs, schema: nil, eager: false, **named_exprs)
|
150
|
+
rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
|
151
|
+
expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
|
152
|
+
|
153
|
+
if !schema.nil? && !schema.empty?
|
154
|
+
if !exprs.any?
|
155
|
+
# no columns or expressions provided; create one from schema keys
|
156
|
+
expr =
|
157
|
+
Utils.wrap_expr(
|
158
|
+
Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
|
159
|
+
)
|
160
|
+
expr = expr.cast(Struct.new(schema), strict: false)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
141
164
|
if eager
|
142
|
-
Polars.select(
|
165
|
+
Polars.select(expr).to_series
|
166
|
+
else
|
167
|
+
expr
|
143
168
|
end
|
144
|
-
exprs = Utils.selection_to_rbexpr_list(exprs)
|
145
|
-
Utils.wrap_expr(Plr.as_struct(exprs))
|
146
169
|
end
|
147
170
|
|
148
171
|
# Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
|
@@ -188,13 +211,13 @@ module Polars
|
|
188
211
|
# # │ 3 ┆ null ┆ walk ┆ null │
|
189
212
|
# # └─────┴──────┴──────┴───────────────┘
|
190
213
|
def concat_str(exprs, sep: "", ignore_nulls: false)
|
191
|
-
exprs = Utils.
|
214
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
192
215
|
Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
|
193
216
|
end
|
194
217
|
|
195
218
|
# Format expressions as a string.
|
196
219
|
#
|
197
|
-
# @param
|
220
|
+
# @param f_string [String]
|
198
221
|
# A string that with placeholders.
|
199
222
|
# For example: "hello_{}" or "{}_world
|
200
223
|
# @param args [Object]
|
@@ -225,17 +248,17 @@ module Polars
|
|
225
248
|
# # │ foo_b_bar_2 │
|
226
249
|
# # │ foo_c_bar_3 │
|
227
250
|
# # └─────────────┘
|
228
|
-
def format(
|
229
|
-
if
|
251
|
+
def format(f_string, *args)
|
252
|
+
if f_string.scan("{}").length != args.length
|
230
253
|
raise ArgumentError, "number of placeholders should equal the number of arguments"
|
231
254
|
end
|
232
255
|
|
233
256
|
exprs = []
|
234
257
|
|
235
258
|
arguments = args.each
|
236
|
-
|
259
|
+
f_string.split(/(\{\})/).each do |s|
|
237
260
|
if s == "{}"
|
238
|
-
e = Utils.
|
261
|
+
e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
|
239
262
|
exprs << e
|
240
263
|
elsif s.length > 0
|
241
264
|
exprs << lit(s)
|
@@ -524,6 +524,55 @@ module Polars
|
|
524
524
|
col(*columns).last
|
525
525
|
end
|
526
526
|
|
527
|
+
# Get the nth column(s) of the context.
|
528
|
+
#
|
529
|
+
# @param indices [Array]
|
530
|
+
# One or more indices representing the columns to retrieve.
|
531
|
+
#
|
532
|
+
# @return [Expr]
|
533
|
+
#
|
534
|
+
# @example
|
535
|
+
# df = Polars::DataFrame.new(
|
536
|
+
# {
|
537
|
+
# "a" => [1, 8, 3],
|
538
|
+
# "b" => [4, 5, 2],
|
539
|
+
# "c" => ["foo", "bar", "baz"]
|
540
|
+
# }
|
541
|
+
# )
|
542
|
+
# df.select(Polars.nth(1))
|
543
|
+
# # =>
|
544
|
+
# # shape: (3, 1)
|
545
|
+
# # ┌─────┐
|
546
|
+
# # │ b │
|
547
|
+
# # │ --- │
|
548
|
+
# # │ i64 │
|
549
|
+
# # ╞═════╡
|
550
|
+
# # │ 4 │
|
551
|
+
# # │ 5 │
|
552
|
+
# # │ 2 │
|
553
|
+
# # └─────┘
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# df.select(Polars.nth(2, 0))
|
557
|
+
# # =>
|
558
|
+
# # shape: (3, 2)
|
559
|
+
# # ┌─────┬─────┐
|
560
|
+
# # │ c ┆ a │
|
561
|
+
# # │ --- ┆ --- │
|
562
|
+
# # │ str ┆ i64 │
|
563
|
+
# # ╞═════╪═════╡
|
564
|
+
# # │ foo ┆ 1 │
|
565
|
+
# # │ bar ┆ 8 │
|
566
|
+
# # │ baz ┆ 3 │
|
567
|
+
# # └─────┴─────┘
|
568
|
+
def nth(*indices)
|
569
|
+
if indices.length == 1 && indices[0].is_a?(Array)
|
570
|
+
indices = indices[0]
|
571
|
+
end
|
572
|
+
|
573
|
+
Utils.wrap_expr(Plr.index_cols(indices))
|
574
|
+
end
|
575
|
+
|
527
576
|
# Get the first `n` rows.
|
528
577
|
#
|
529
578
|
# This function is syntactic sugar for `col(column).head(n)`.
|
@@ -683,8 +732,8 @@ module Polars
|
|
683
732
|
ddof: 1,
|
684
733
|
propagate_nans: false
|
685
734
|
)
|
686
|
-
a = Utils.
|
687
|
-
b = Utils.
|
735
|
+
a = Utils.parse_into_expression(a)
|
736
|
+
b = Utils.parse_into_expression(b)
|
688
737
|
|
689
738
|
if method == "pearson"
|
690
739
|
Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
|
@@ -728,8 +777,8 @@ module Polars
|
|
728
777
|
# # │ 3.0 │
|
729
778
|
# # └─────┘
|
730
779
|
def cov(a, b, ddof: 1)
|
731
|
-
a = Utils.
|
732
|
-
b = Utils.
|
780
|
+
a = Utils.parse_into_expression(a)
|
781
|
+
b = Utils.parse_into_expression(b)
|
733
782
|
Utils.wrap_expr(Plr.cov(a, b, ddof))
|
734
783
|
end
|
735
784
|
|
@@ -743,13 +792,13 @@ module Polars
|
|
743
792
|
#
|
744
793
|
# @return [Expr]
|
745
794
|
def fold(acc, f, exprs)
|
746
|
-
acc = Utils.
|
795
|
+
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
747
796
|
if exprs.is_a?(Expr)
|
748
797
|
exprs = [exprs]
|
749
798
|
end
|
750
799
|
|
751
|
-
exprs = Utils.
|
752
|
-
Utils.wrap_expr(Plr.fold(acc
|
800
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
801
|
+
Utils.wrap_expr(Plr.fold(acc, f, exprs))
|
753
802
|
end
|
754
803
|
|
755
804
|
# def reduce
|
@@ -776,13 +825,13 @@ module Polars
|
|
776
825
|
# If you simply want the first encountered expression as accumulator,
|
777
826
|
# consider using `cumreduce`.
|
778
827
|
def cum_fold(acc, f, exprs, include_init: false)
|
779
|
-
acc = Utils.
|
828
|
+
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
780
829
|
if exprs.is_a?(Expr)
|
781
830
|
exprs = [exprs]
|
782
831
|
end
|
783
832
|
|
784
|
-
exprs = Utils.
|
785
|
-
Utils.wrap_expr(Plr.cum_fold(acc
|
833
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
834
|
+
Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
|
786
835
|
end
|
787
836
|
alias_method :cumfold, :cum_fold
|
788
837
|
|
@@ -982,7 +1031,7 @@ module Polars
|
|
982
1031
|
if reverse == true || reverse == false
|
983
1032
|
reverse = [reverse] * exprs.length
|
984
1033
|
end
|
985
|
-
exprs = Utils.
|
1034
|
+
exprs = Utils.parse_into_list_of_expressions(exprs)
|
986
1035
|
Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
|
987
1036
|
end
|
988
1037
|
alias_method :argsort_by, :arg_sort_by
|
@@ -1119,8 +1168,8 @@ module Polars
|
|
1119
1168
|
end
|
1120
1169
|
condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
|
1121
1170
|
else
|
1122
|
-
condition = Utils.
|
1123
|
-
Utils.wrap_expr(Plr.arg_where(condition
|
1171
|
+
condition = Utils.parse_into_expression(condition, str_as_lit: true)
|
1172
|
+
Utils.wrap_expr(Plr.arg_where(condition))
|
1124
1173
|
end
|
1125
1174
|
end
|
1126
1175
|
|
@@ -1171,7 +1220,7 @@ module Polars
|
|
1171
1220
|
# # │ null ┆ null ┆ null ┆ 10.0 │
|
1172
1221
|
# # └──────┴──────┴──────┴──────┘
|
1173
1222
|
def coalesce(exprs, *more_exprs)
|
1174
|
-
exprs = Utils.
|
1223
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
1175
1224
|
Utils.wrap_expr(Plr.coalesce(exprs))
|
1176
1225
|
end
|
1177
1226
|
|
data/lib/polars/functions/lit.rb
CHANGED
@@ -7,7 +7,7 @@ module Polars
|
|
7
7
|
if value.is_a?(::Time) || value.is_a?(::DateTime)
|
8
8
|
time_unit = dtype&.time_unit || "ns"
|
9
9
|
time_zone = dtype.&time_zone
|
10
|
-
e = lit(Utils.
|
10
|
+
e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
|
11
11
|
if time_zone
|
12
12
|
return e.dt.replace_time_zone(time_zone.to_s)
|
13
13
|
else
|
@@ -9,16 +9,11 @@ module Polars
|
|
9
9
|
# @param interval [Object]
|
10
10
|
# Interval periods. It can be a polars duration string, such as `3d12h4m25s`
|
11
11
|
# representing 3 days, 12 hours, 4 minutes, and 25 seconds.
|
12
|
-
# @param lazy [Boolean]
|
13
|
-
# Return an expression.
|
14
12
|
# @param closed ["both", "left", "right", "none"]
|
15
13
|
# Define whether the temporal window interval is closed or not.
|
16
|
-
# @param
|
17
|
-
#
|
18
|
-
#
|
19
|
-
# Set the time unit.
|
20
|
-
# @param time_zone [String]
|
21
|
-
# Optional timezone
|
14
|
+
# @param eager [Boolean]
|
15
|
+
# Evaluate immediately and return a `Series`.
|
16
|
+
# If set to `false` (default), return an expression instead.
|
22
17
|
#
|
23
18
|
# @return [Object]
|
24
19
|
#
|
@@ -28,73 +23,111 @@ module Polars
|
|
28
23
|
# type date. All other permutations return a datetime Series.
|
29
24
|
#
|
30
25
|
# @example Using polars duration string to specify the interval
|
31
|
-
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo",
|
26
|
+
# Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
|
27
|
+
# "date"
|
28
|
+
# )
|
32
29
|
# # =>
|
33
30
|
# # shape: (3,)
|
34
|
-
# # Series: '
|
31
|
+
# # Series: 'date' [date]
|
35
32
|
# # [
|
36
33
|
# # 2022-01-01
|
37
34
|
# # 2022-02-01
|
38
35
|
# # 2022-03-01
|
39
36
|
# # ]
|
40
|
-
#
|
41
|
-
# @example Using `timedelta` object to specify the interval:
|
42
|
-
# Polars.date_range(
|
43
|
-
# DateTime.new(1985, 1, 1),
|
44
|
-
# DateTime.new(1985, 1, 10),
|
45
|
-
# "1d12h",
|
46
|
-
# time_unit: "ms"
|
47
|
-
# )
|
48
|
-
# # =>
|
49
|
-
# # shape: (7,)
|
50
|
-
# # Series: '' [datetime[ms]]
|
51
|
-
# # [
|
52
|
-
# # 1985-01-01 00:00:00
|
53
|
-
# # 1985-01-02 12:00:00
|
54
|
-
# # 1985-01-04 00:00:00
|
55
|
-
# # 1985-01-05 12:00:00
|
56
|
-
# # 1985-01-07 00:00:00
|
57
|
-
# # 1985-01-08 12:00:00
|
58
|
-
# # 1985-01-10 00:00:00
|
59
|
-
# # ]
|
60
37
|
def date_range(
|
61
38
|
start,
|
62
39
|
stop,
|
63
|
-
interval,
|
64
|
-
lazy: false,
|
40
|
+
interval = "1d",
|
65
41
|
closed: "both",
|
66
|
-
|
67
|
-
time_unit: nil,
|
68
|
-
time_zone: nil
|
42
|
+
eager: false
|
69
43
|
)
|
70
|
-
|
71
|
-
raise Todo
|
72
|
-
else
|
73
|
-
interval = interval.to_s
|
74
|
-
if interval.include?(" ")
|
75
|
-
interval = interval.gsub(" ", "")
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
if time_unit.nil?
|
80
|
-
if interval.include?("ns")
|
81
|
-
time_unit = "ns"
|
82
|
-
else
|
83
|
-
time_unit = "us"
|
84
|
-
end
|
85
|
-
end
|
44
|
+
interval = Utils.parse_interval_argument(interval)
|
86
45
|
|
87
|
-
start_rbexpr = Utils.
|
88
|
-
|
46
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
47
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
89
48
|
|
90
49
|
result = Utils.wrap_expr(
|
91
|
-
Plr.date_range(start_rbexpr,
|
50
|
+
Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
|
92
51
|
)
|
93
52
|
|
94
|
-
|
53
|
+
if eager
|
54
|
+
return F.select(result).to_series
|
55
|
+
end
|
56
|
+
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
# Create a column of date ranges.
|
61
|
+
#
|
62
|
+
# @param start [Object]
|
63
|
+
# Lower bound of the date range.
|
64
|
+
# @param stop [Object]
|
65
|
+
# Upper bound of the date range.
|
66
|
+
# @param interval [Object]
|
67
|
+
# Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
|
68
|
+
# @param closed ["both", "left", "right", "none"]
|
69
|
+
# Define which sides of the range are closed (inclusive).
|
70
|
+
# @param eager [Boolean]
|
71
|
+
# Evaluate immediately and return a `Series`.
|
72
|
+
# If set to `false` (default), return an expression instead.
|
73
|
+
#
|
74
|
+
# @return [Object]
|
75
|
+
#
|
76
|
+
# @note
|
77
|
+
# `interval` is created according to the following string language:
|
78
|
+
#
|
79
|
+
# - 1ns (1 nanosecond)
|
80
|
+
# - 1us (1 microsecond)
|
81
|
+
# - 1ms (1 millisecond)
|
82
|
+
# - 1s (1 second)
|
83
|
+
# - 1m (1 minute)
|
84
|
+
# - 1h (1 hour)
|
85
|
+
# - 1d (1 calendar day)
|
86
|
+
# - 1w (1 calendar week)
|
87
|
+
# - 1mo (1 calendar month)
|
88
|
+
# - 1q (1 calendar quarter)
|
89
|
+
# - 1y (1 calendar year)
|
90
|
+
#
|
91
|
+
# Or combine them:
|
92
|
+
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
93
|
+
#
|
94
|
+
# By "calendar day", we mean the corresponding time on the next day (which may
|
95
|
+
# not be 24 hours, due to daylight savings). Similarly for "calendar week",
|
96
|
+
# "calendar month", "calendar quarter", and "calendar year".
|
97
|
+
#
|
98
|
+
# @example
|
99
|
+
# df = Polars::DataFrame.new(
|
100
|
+
# {
|
101
|
+
# "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
|
102
|
+
# "end" => Date.new(2022, 1, 3)
|
103
|
+
# }
|
104
|
+
# )
|
105
|
+
# df.with_columns(date_range: Polars.date_ranges("start", "end"))
|
106
|
+
# # =>
|
107
|
+
# # shape: (2, 3)
|
108
|
+
# # ┌────────────┬────────────┬─────────────────────────────────┐
|
109
|
+
# # │ start ┆ end ┆ date_range │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ date ┆ date ┆ list[date] │
|
112
|
+
# # ╞════════════╪════════════╪═════════════════════════════════╡
|
113
|
+
# # │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
|
114
|
+
# # │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │
|
115
|
+
# # └────────────┴────────────┴─────────────────────────────────┘
|
116
|
+
def date_ranges(
|
117
|
+
start,
|
118
|
+
stop,
|
119
|
+
interval = "1d",
|
120
|
+
closed: "both",
|
121
|
+
eager: false
|
122
|
+
)
|
123
|
+
interval = Utils.parse_interval_argument(interval)
|
124
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
125
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
126
|
+
|
127
|
+
result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))
|
95
128
|
|
96
|
-
if
|
97
|
-
return select(result).to_series
|
129
|
+
if eager
|
130
|
+
return F.select(result).to_series
|
98
131
|
end
|
99
132
|
|
100
133
|
result
|