polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -14,7 +14,7 @@ module Polars
14
14
  namespace = self.class._accessor
15
15
 
16
16
  s = Utils.wrap_s(_s)
17
- expr = Utils.col(s.name)
17
+ expr = F.col(s.name)
18
18
  expr = expr.send(namespace) if namespace
19
19
  s.to_frame.select(expr.send(method, ...)).to_series
20
20
  end
@@ -32,7 +32,7 @@ module Polars
32
32
  # # │ null ┆ null ┆ z ┆ null │
33
33
  # # └───────┴───────┴─────┴───────┘
34
34
  def all_horizontal(*exprs)
35
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
35
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
36
36
  Utils.wrap_expr(Plr.all_horizontal(rbexprs))
37
37
  end
38
38
 
@@ -68,7 +68,7 @@ module Polars
68
68
  # # │ null ┆ null ┆ z ┆ null │
69
69
  # # └───────┴───────┴─────┴───────┘
70
70
  def any_horizontal(*exprs)
71
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
71
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
72
72
  Utils.wrap_expr(Plr.any_horizontal(rbexprs))
73
73
  end
74
74
 
@@ -101,7 +101,7 @@ module Polars
101
101
  # # │ 3 ┆ null ┆ z ┆ 3 │
102
102
  # # └─────┴──────┴─────┴─────┘
103
103
  def max_horizontal(*exprs)
104
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
104
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
105
105
  Utils.wrap_expr(Plr.max_horizontal(rbexprs))
106
106
  end
107
107
 
@@ -134,7 +134,7 @@ module Polars
134
134
  # # │ 3 ┆ null ┆ z ┆ 3 │
135
135
  # # └─────┴──────┴─────┴─────┘
136
136
  def min_horizontal(*exprs)
137
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
137
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
138
138
  Utils.wrap_expr(Plr.min_horizontal(rbexprs))
139
139
  end
140
140
 
@@ -167,7 +167,7 @@ module Polars
167
167
  # # │ 3 ┆ null ┆ z ┆ 3 │
168
168
  # # └─────┴──────┴─────┴─────┘
169
169
  def sum_horizontal(*exprs)
170
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
170
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
171
171
  Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
172
172
  end
173
173
 
@@ -200,7 +200,7 @@ module Polars
200
200
  # # │ 3 ┆ null ┆ z ┆ 3.0 │
201
201
  # # └─────┴──────┴─────┴──────┘
202
202
  def mean_horizontal(*exprs)
203
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
203
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
204
204
  Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
205
205
  end
206
206
 
@@ -233,11 +233,11 @@ module Polars
233
233
  # # │ 3 ┆ null ┆ z ┆ {3,null} │
234
234
  # # └─────┴──────┴─────┴───────────┘
235
235
  def cum_sum_horizontal(*exprs)
236
- rbexprs = Utils.parse_as_list_of_expressions(*exprs)
236
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs)
237
237
  exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }
238
238
 
239
239
  # (Expr): use u32 as that will not cast to float as eagerly
240
- Polars.cum_fold(Polars.lit(0).cast(UInt32), -> (a, b) { a + b }, exprs_wrapped).alias(
240
+ Polars.cum_fold(Polars.lit(0).cast(UInt32), ->(a, b) { a + b }, exprs_wrapped).alias(
241
241
  "cum_sum"
242
242
  )
243
243
  end
@@ -44,28 +44,28 @@ module Polars
44
44
  time_unit: "us"
45
45
  )
46
46
  if !weeks.nil?
47
- weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
47
+ weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
48
48
  end
49
49
  if !days.nil?
50
- days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
50
+ days = Utils.parse_into_expression(days, str_as_lit: false)
51
51
  end
52
52
  if !hours.nil?
53
- hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
53
+ hours = Utils.parse_into_expression(hours, str_as_lit: false)
54
54
  end
55
55
  if !minutes.nil?
56
- minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
56
+ minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
57
57
  end
58
58
  if !seconds.nil?
59
- seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
59
+ seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
60
60
  end
61
61
  if !milliseconds.nil?
62
- milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
62
+ milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
63
63
  end
64
64
  if !microseconds.nil?
65
- microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
65
+ microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
66
66
  end
67
67
  if !nanoseconds.nil?
68
- nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
68
+ nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
69
69
  end
70
70
 
71
71
  Utils.wrap_expr(
@@ -87,28 +87,38 @@ module Polars
87
87
  #
88
88
  # @return [Expr]
89
89
  def concat_list(exprs)
90
- exprs = Utils.selection_to_rbexpr_list(exprs)
90
+ exprs = Utils.parse_into_list_of_expressions(exprs)
91
91
  Utils.wrap_expr(Plr.concat_list(exprs))
92
92
  end
93
93
 
94
94
  # Collect several columns into a Series of dtype Struct.
95
95
  #
96
- # @param exprs [Object]
97
- # Columns/Expressions to collect into a Struct
96
+ # @param exprs [Array]
97
+ # Column(s) to collect into a struct column, specified as positional arguments.
98
+ # Accepts expression input. Strings are parsed as column names,
99
+ # other non-expression inputs are parsed as literals.
100
+ # @param schema [Hash]
101
+ # Optional schema that explicitly defines the struct field dtypes. If no columns
102
+ # or expressions are provided, schema keys are used to define columns.
98
103
  # @param eager [Boolean]
99
- # Evaluate immediately
104
+ # Evaluate immediately and return a `Series`. If set to `false` (default),
105
+ # return an expression instead.
106
+ # @param named_exprs [Hash]
107
+ # Additional columns to collect into the struct column, specified as keyword
108
+ # arguments. The columns will be renamed to the keyword used.
100
109
  #
101
110
  # @return [Object]
102
111
  #
103
112
  # @example
104
- # Polars::DataFrame.new(
113
+ # df = Polars::DataFrame.new(
105
114
  # {
106
115
  # "int" => [1, 2],
107
116
  # "str" => ["a", "b"],
108
117
  # "bool" => [true, nil],
109
118
  # "list" => [[1, 2], [3]],
110
119
  # }
111
- # ).select([Polars.struct(Polars.all).alias("my_struct")])
120
+ # )
121
+ # df.select([Polars.struct(Polars.all).alias("my_struct")])
112
122
  # # =>
113
123
  # # shape: (2, 1)
114
124
  # # ┌─────────────────────┐
@@ -120,29 +130,42 @@ module Polars
120
130
  # # │ {2,"b",null,[3]} │
121
131
  # # └─────────────────────┘
122
132
  #
123
- # @example Only collect specific columns as a struct:
124
- # df = Polars::DataFrame.new(
125
- # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
126
- # )
127
- # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
133
+ # @example Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.
134
+ # df.select(Polars.struct("int", false).alias("my_struct"))
128
135
  # # =>
129
- # # shape: (4, 4)
130
- # # ┌─────┬───────┬─────┬─────────────┐
131
- # # │ a ┆ b ┆ c ┆ a_and_b
132
- # # │ --- ┆ --- ┆ --- ┆ ---
133
- # # │ i64 ┆ str ┆ i64 ┆ struct[2]
134
- # # ╞═════╪═══════╪═════╪═════════════╡
135
- # # │ 1 ┆ one ┆ 9 ┆ {1,"one"}
136
- # # │ 2 ┆ two ┆ 8 ┆ {2,"two"}
137
- # # │ 3 ┆ three ┆ 7 ┆ {3,"three"} │
138
- # # │ 4 ┆ four ┆ 6 ┆ {4,"four"} │
139
- # # └─────┴───────┴─────┴─────────────┘
140
- def struct(exprs, eager: false)
136
+ # # shape: (2, 1)
137
+ # # ┌───────────┐
138
+ # # │ my_struct
139
+ # # │ ---
140
+ # # │ struct[2]
141
+ # # ╞═══════════╡
142
+ # # │ {1,false}
143
+ # # │ {2,false}
144
+ # # └───────────┘
145
+ #
146
+ # @example Use keyword arguments to easily name each struct field.
147
+ # df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
148
+ # # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
149
+ def struct(*exprs, schema: nil, eager: false, **named_exprs)
150
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
151
+ expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
152
+
153
+ if !schema.nil? && !schema.empty?
154
+ if !exprs.any?
155
+ # no columns or expressions provided; create one from schema keys
156
+ expr =
157
+ Utils.wrap_expr(
158
+ Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
159
+ )
160
+ expr = expr.cast(Struct.new(schema), strict: false)
161
+ end
162
+ end
163
+
141
164
  if eager
142
- Polars.select(struct(exprs, eager: false)).to_series
165
+ Polars.select(expr).to_series
166
+ else
167
+ expr
143
168
  end
144
- exprs = Utils.selection_to_rbexpr_list(exprs)
145
- Utils.wrap_expr(Plr.as_struct(exprs))
146
169
  end
147
170
 
148
171
  # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
@@ -188,13 +211,13 @@ module Polars
188
211
  # # │ 3 ┆ null ┆ walk ┆ null │
189
212
  # # └─────┴──────┴──────┴───────────────┘
190
213
  def concat_str(exprs, sep: "", ignore_nulls: false)
191
- exprs = Utils.selection_to_rbexpr_list(exprs)
214
+ exprs = Utils.parse_into_list_of_expressions(exprs)
192
215
  Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
193
216
  end
194
217
 
195
218
  # Format expressions as a string.
196
219
  #
197
- # @param fstring [String]
220
+ # @param f_string [String]
198
221
  # A string that with placeholders.
199
222
  # For example: "hello_{}" or "{}_world
200
223
  # @param args [Object]
@@ -225,17 +248,17 @@ module Polars
225
248
  # # │ foo_b_bar_2 │
226
249
  # # │ foo_c_bar_3 │
227
250
  # # └─────────────┘
228
- def format(fstring, *args)
229
- if fstring.scan("{}").length != args.length
251
+ def format(f_string, *args)
252
+ if f_string.scan("{}").length != args.length
230
253
  raise ArgumentError, "number of placeholders should equal the number of arguments"
231
254
  end
232
255
 
233
256
  exprs = []
234
257
 
235
258
  arguments = args.each
236
- fstring.split(/(\{\})/).each do |s|
259
+ f_string.split(/(\{\})/).each do |s|
237
260
  if s == "{}"
238
- e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
261
+ e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
239
262
  exprs << e
240
263
  elsif s.length > 0
241
264
  exprs << lit(s)
@@ -524,6 +524,55 @@ module Polars
524
524
  col(*columns).last
525
525
  end
526
526
 
527
+ # Get the nth column(s) of the context.
528
+ #
529
+ # @param indices [Array]
530
+ # One or more indices representing the columns to retrieve.
531
+ #
532
+ # @return [Expr]
533
+ #
534
+ # @example
535
+ # df = Polars::DataFrame.new(
536
+ # {
537
+ # "a" => [1, 8, 3],
538
+ # "b" => [4, 5, 2],
539
+ # "c" => ["foo", "bar", "baz"]
540
+ # }
541
+ # )
542
+ # df.select(Polars.nth(1))
543
+ # # =>
544
+ # # shape: (3, 1)
545
+ # # ┌─────┐
546
+ # # │ b │
547
+ # # │ --- │
548
+ # # │ i64 │
549
+ # # ╞═════╡
550
+ # # │ 4 │
551
+ # # │ 5 │
552
+ # # │ 2 │
553
+ # # └─────┘
554
+ #
555
+ # @example
556
+ # df.select(Polars.nth(2, 0))
557
+ # # =>
558
+ # # shape: (3, 2)
559
+ # # ┌─────┬─────┐
560
+ # # │ c ┆ a │
561
+ # # │ --- ┆ --- │
562
+ # # │ str ┆ i64 │
563
+ # # ╞═════╪═════╡
564
+ # # │ foo ┆ 1 │
565
+ # # │ bar ┆ 8 │
566
+ # # │ baz ┆ 3 │
567
+ # # └─────┴─────┘
568
+ def nth(*indices)
569
+ if indices.length == 1 && indices[0].is_a?(Array)
570
+ indices = indices[0]
571
+ end
572
+
573
+ Utils.wrap_expr(Plr.index_cols(indices))
574
+ end
575
+
527
576
  # Get the first `n` rows.
528
577
  #
529
578
  # This function is syntactic sugar for `col(column).head(n)`.
@@ -683,8 +732,8 @@ module Polars
683
732
  ddof: 1,
684
733
  propagate_nans: false
685
734
  )
686
- a = Utils.parse_as_expression(a)
687
- b = Utils.parse_as_expression(b)
735
+ a = Utils.parse_into_expression(a)
736
+ b = Utils.parse_into_expression(b)
688
737
 
689
738
  if method == "pearson"
690
739
  Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
@@ -728,8 +777,8 @@ module Polars
728
777
  # # │ 3.0 │
729
778
  # # └─────┘
730
779
  def cov(a, b, ddof: 1)
731
- a = Utils.parse_as_expression(a)
732
- b = Utils.parse_as_expression(b)
780
+ a = Utils.parse_into_expression(a)
781
+ b = Utils.parse_into_expression(b)
733
782
  Utils.wrap_expr(Plr.cov(a, b, ddof))
734
783
  end
735
784
 
@@ -743,13 +792,13 @@ module Polars
743
792
  #
744
793
  # @return [Expr]
745
794
  def fold(acc, f, exprs)
746
- acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
795
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
747
796
  if exprs.is_a?(Expr)
748
797
  exprs = [exprs]
749
798
  end
750
799
 
751
- exprs = Utils.selection_to_rbexpr_list(exprs)
752
- Utils.wrap_expr(Plr.fold(acc._rbexpr, f, exprs))
800
+ exprs = Utils.parse_into_list_of_expressions(exprs)
801
+ Utils.wrap_expr(Plr.fold(acc, f, exprs))
753
802
  end
754
803
 
755
804
  # def reduce
@@ -776,13 +825,13 @@ module Polars
776
825
  # If you simply want the first encountered expression as accumulator,
777
826
  # consider using `cumreduce`.
778
827
  def cum_fold(acc, f, exprs, include_init: false)
779
- acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
828
+ acc = Utils.parse_into_expression(acc, str_as_lit: true)
780
829
  if exprs.is_a?(Expr)
781
830
  exprs = [exprs]
782
831
  end
783
832
 
784
- exprs = Utils.selection_to_rbexpr_list(exprs)
785
- Utils.wrap_expr(Plr.cum_fold(acc._rbexpr, f, exprs, include_init))
833
+ exprs = Utils.parse_into_list_of_expressions(exprs)
834
+ Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
786
835
  end
787
836
  alias_method :cumfold, :cum_fold
788
837
 
@@ -982,7 +1031,7 @@ module Polars
982
1031
  if reverse == true || reverse == false
983
1032
  reverse = [reverse] * exprs.length
984
1033
  end
985
- exprs = Utils.selection_to_rbexpr_list(exprs)
1034
+ exprs = Utils.parse_into_list_of_expressions(exprs)
986
1035
  Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
987
1036
  end
988
1037
  alias_method :argsort_by, :arg_sort_by
@@ -1119,8 +1168,8 @@ module Polars
1119
1168
  end
1120
1169
  condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
1121
1170
  else
1122
- condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
1123
- Utils.wrap_expr(Plr.arg_where(condition._rbexpr))
1171
+ condition = Utils.parse_into_expression(condition, str_as_lit: true)
1172
+ Utils.wrap_expr(Plr.arg_where(condition))
1124
1173
  end
1125
1174
  end
1126
1175
 
@@ -1171,7 +1220,7 @@ module Polars
1171
1220
  # # │ null ┆ null ┆ null ┆ 10.0 │
1172
1221
  # # └──────┴──────┴──────┴──────┘
1173
1222
  def coalesce(exprs, *more_exprs)
1174
- exprs = Utils.parse_as_list_of_expressions(exprs, *more_exprs)
1223
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1175
1224
  Utils.wrap_expr(Plr.coalesce(exprs))
1176
1225
  end
1177
1226
 
@@ -7,7 +7,7 @@ module Polars
7
7
  if value.is_a?(::Time) || value.is_a?(::DateTime)
8
8
  time_unit = dtype&.time_unit || "ns"
9
9
  time_zone = dtype.&time_zone
10
- e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
10
+ e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
11
11
  if time_zone
12
12
  return e.dt.replace_time_zone(time_zone.to_s)
13
13
  else
@@ -9,16 +9,11 @@ module Polars
9
9
  # @param interval [Object]
10
10
  # Interval periods. It can be a polars duration string, such as `3d12h4m25s`
11
11
  # representing 3 days, 12 hours, 4 minutes, and 25 seconds.
12
- # @param lazy [Boolean]
13
- # Return an expression.
14
12
  # @param closed ["both", "left", "right", "none"]
15
13
  # Define whether the temporal window interval is closed or not.
16
- # @param name [String]
17
- # Name of the output Series.
18
- # @param time_unit [nil, "ns", "us", "ms"]
19
- # Set the time unit.
20
- # @param time_zone [String]
21
- # Optional timezone
14
+ # @param eager [Boolean]
15
+ # Evaluate immediately and return a `Series`.
16
+ # If set to `false` (default), return an expression instead.
22
17
  #
23
18
  # @return [Object]
24
19
  #
@@ -28,73 +23,111 @@ module Polars
28
23
  # type date. All other permutations return a datetime Series.
29
24
  #
30
25
  # @example Using polars duration string to specify the interval
31
- # Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
26
+ # Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
27
+ # "date"
28
+ # )
32
29
  # # =>
33
30
  # # shape: (3,)
34
- # # Series: 'drange' [date]
31
+ # # Series: 'date' [date]
35
32
  # # [
36
33
  # # 2022-01-01
37
34
  # # 2022-02-01
38
35
  # # 2022-03-01
39
36
  # # ]
40
- #
41
- # @example Using `timedelta` object to specify the interval:
42
- # Polars.date_range(
43
- # DateTime.new(1985, 1, 1),
44
- # DateTime.new(1985, 1, 10),
45
- # "1d12h",
46
- # time_unit: "ms"
47
- # )
48
- # # =>
49
- # # shape: (7,)
50
- # # Series: '' [datetime[ms]]
51
- # # [
52
- # # 1985-01-01 00:00:00
53
- # # 1985-01-02 12:00:00
54
- # # 1985-01-04 00:00:00
55
- # # 1985-01-05 12:00:00
56
- # # 1985-01-07 00:00:00
57
- # # 1985-01-08 12:00:00
58
- # # 1985-01-10 00:00:00
59
- # # ]
60
37
  def date_range(
61
38
  start,
62
39
  stop,
63
- interval,
64
- lazy: false,
40
+ interval = "1d",
65
41
  closed: "both",
66
- name: nil,
67
- time_unit: nil,
68
- time_zone: nil
42
+ eager: false
69
43
  )
70
- if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
71
- raise Todo
72
- else
73
- interval = interval.to_s
74
- if interval.include?(" ")
75
- interval = interval.gsub(" ", "")
76
- end
77
- end
78
-
79
- if time_unit.nil?
80
- if interval.include?("ns")
81
- time_unit = "ns"
82
- else
83
- time_unit = "us"
84
- end
85
- end
44
+ interval = Utils.parse_interval_argument(interval)
86
45
 
87
- start_rbexpr = Utils.parse_as_expression(start)
88
- stop_rbexpr = Utils.parse_as_expression(stop)
46
+ start_rbexpr = Utils.parse_into_expression(start)
47
+ end_rbexpr = Utils.parse_into_expression(stop)
89
48
 
90
49
  result = Utils.wrap_expr(
91
- Plr.date_range(start_rbexpr, stop_rbexpr, interval, closed, time_unit, time_zone)
50
+ Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
92
51
  )
93
52
 
94
- result = result.alias(name.to_s)
53
+ if eager
54
+ return F.select(result).to_series
55
+ end
56
+
57
+ result
58
+ end
59
+
60
+ # Create a column of date ranges.
61
+ #
62
+ # @param start [Object]
63
+ # Lower bound of the date range.
64
+ # @param stop [Object]
65
+ # Upper bound of the date range.
66
+ # @param interval [Object]
67
+ # Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
68
+ # @param closed ["both", "left", "right", "none"]
69
+ # Define which sides of the range are closed (inclusive).
70
+ # @param eager [Boolean]
71
+ # Evaluate immediately and return a `Series`.
72
+ # If set to `false` (default), return an expression instead.
73
+ #
74
+ # @return [Object]
75
+ #
76
+ # @note
77
+ # `interval` is created according to the following string language:
78
+ #
79
+ # - 1ns (1 nanosecond)
80
+ # - 1us (1 microsecond)
81
+ # - 1ms (1 millisecond)
82
+ # - 1s (1 second)
83
+ # - 1m (1 minute)
84
+ # - 1h (1 hour)
85
+ # - 1d (1 calendar day)
86
+ # - 1w (1 calendar week)
87
+ # - 1mo (1 calendar month)
88
+ # - 1q (1 calendar quarter)
89
+ # - 1y (1 calendar year)
90
+ #
91
+ # Or combine them:
92
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
93
+ #
94
+ # By "calendar day", we mean the corresponding time on the next day (which may
95
+ # not be 24 hours, due to daylight savings). Similarly for "calendar week",
96
+ # "calendar month", "calendar quarter", and "calendar year".
97
+ #
98
+ # @example
99
+ # df = Polars::DataFrame.new(
100
+ # {
101
+ # "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
102
+ # "end" => Date.new(2022, 1, 3)
103
+ # }
104
+ # )
105
+ # df.with_columns(date_range: Polars.date_ranges("start", "end"))
106
+ # # =>
107
+ # # shape: (2, 3)
108
+ # # ┌────────────┬────────────┬─────────────────────────────────┐
109
+ # # │ start ┆ end ┆ date_range │
110
+ # # │ --- ┆ --- ┆ --- │
111
+ # # │ date ┆ date ┆ list[date] │
112
+ # # ╞════════════╪════════════╪═════════════════════════════════╡
113
+ # # │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
114
+ # # │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │
115
+ # # └────────────┴────────────┴─────────────────────────────────┘
116
+ def date_ranges(
117
+ start,
118
+ stop,
119
+ interval = "1d",
120
+ closed: "both",
121
+ eager: false
122
+ )
123
+ interval = Utils.parse_interval_argument(interval)
124
+ start_rbexpr = Utils.parse_into_expression(start)
125
+ end_rbexpr = Utils.parse_into_expression(stop)
126
+
127
+ result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))
95
128
 
96
- if !lazy
97
- return select(result).to_series
129
+ if eager
130
+ return F.select(result).to_series
98
131
  end
99
132
 
100
133
  result