polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -247,6 +247,5 @@ module Polars
247
247
  "cum_sum"
248
248
  )
249
249
  end
250
- alias_method :cumsum_horizontal, :cum_sum_horizontal
251
250
  end
252
251
  end
@@ -46,7 +46,7 @@ module Polars
46
46
  return col("*")
47
47
  end
48
48
 
49
- col(*names).all(drop_nulls: ignore_nulls)
49
+ col(*names).all(ignore_nulls: ignore_nulls)
50
50
  end
51
51
 
52
52
  # Evaluate a bitwise OR operation.
@@ -78,7 +78,7 @@ module Polars
78
78
  # # │ true │
79
79
  # # └──────┘
80
80
  def any(*names, ignore_nulls: true)
81
- col(*names).any(drop_nulls: ignore_nulls)
81
+ col(*names).any(ignore_nulls: ignore_nulls)
82
82
  end
83
83
 
84
84
  # Get the maximum value.
@@ -277,6 +277,5 @@ module Polars
277
277
  def cum_sum(*names)
278
278
  col(*names).cum_sum
279
279
  end
280
- alias_method :cumsum, :cum_sum
281
280
  end
282
281
  end
@@ -1,5 +1,233 @@
1
1
  module Polars
2
2
  module Functions
3
+ # Create a Polars literal expression of type Datetime.
4
+ #
5
+ # @param year [Object]
6
+ # Column or literal.
7
+ # @param month [Object]
8
+ # Column or literal, ranging from 1-12.
9
+ # @param day [Object]
10
+ # Column or literal, ranging from 1-31.
11
+ # @param hour [Object]
12
+ # Column or literal, ranging from 0-23.
13
+ # @param minute [Object]
14
+ # Column or literal, ranging from 0-59.
15
+ # @param second [Object]
16
+ # Column or literal, ranging from 0-59.
17
+ # @param microsecond [Object]
18
+ # Column or literal, ranging from 0-999999.
19
+ # @param time_unit ['us', 'ms', 'ns']
20
+ # Time unit of the resulting expression.
21
+ # @param time_zone [Object]
22
+ # Time zone of the resulting expression.
23
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
24
+ # Determine how to deal with ambiguous datetimes:
25
+ #
26
+ # - `'raise'` (default): raise
27
+ # - `'earliest'`: use the earliest datetime
28
+ # - `'latest'`: use the latest datetime
29
+ # - `'null'`: set to null
30
+ #
31
+ # @return [Expr]
32
+ #
33
+ # @example
34
+ # df = Polars::DataFrame.new(
35
+ # {
36
+ # "month" => [1, 2, 3],
37
+ # "day" => [4, 5, 6],
38
+ # "hour" => [12, 13, 14],
39
+ # "minute" => [15, 30, 45]
40
+ # }
41
+ # )
42
+ # df.with_columns(
43
+ # Polars.datetime(
44
+ # 2024,
45
+ # Polars.col("month"),
46
+ # Polars.col("day"),
47
+ # Polars.col("hour"),
48
+ # Polars.col("minute"),
49
+ # time_zone: "Australia/Sydney"
50
+ # )
51
+ # )
52
+ # # =>
53
+ # # shape: (3, 5)
54
+ # # ┌───────┬─────┬──────┬────────┬────────────────────────────────┐
55
+ # # │ month ┆ day ┆ hour ┆ minute ┆ datetime │
56
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
57
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ datetime[μs, Australia/Sydney] │
58
+ # # ╞═══════╪═════╪══════╪════════╪════════════════════════════════╡
59
+ # # │ 1 ┆ 4 ┆ 12 ┆ 15 ┆ 2024-01-04 12:15:00 AEDT │
60
+ # # │ 2 ┆ 5 ┆ 13 ┆ 30 ┆ 2024-02-05 13:30:00 AEDT │
61
+ # # │ 3 ┆ 6 ┆ 14 ┆ 45 ┆ 2024-03-06 14:45:00 AEDT │
62
+ # # └───────┴─────┴──────┴────────┴────────────────────────────────┘
63
+ #
64
+ # @example We can also use `Polars.datetime` for filtering:
65
+ # df = Polars::DataFrame.new(
66
+ # {
67
+ # "start" => [
68
+ # DateTime.new(2024, 1, 1, 0, 0, 0),
69
+ # DateTime.new(2024, 1, 1, 0, 0, 0),
70
+ # DateTime.new(2024, 1, 1, 0, 0, 0)
71
+ # ],
72
+ # "end" => [
73
+ # DateTime.new(2024, 5, 1, 20, 15, 10),
74
+ # DateTime.new(2024, 7, 1, 21, 25, 20),
75
+ # DateTime.new(2024, 9, 1, 22, 35, 30)
76
+ # ]
77
+ # }
78
+ # )
79
+ # df.filter(Polars.col("end") > Polars.datetime(2024, 6, 1))
80
+ # # =>
81
+ # # shape: (2, 2)
82
+ # # ┌─────────────────────┬─────────────────────┐
83
+ # # │ start ┆ end │
84
+ # # │ --- ┆ --- │
85
+ # # │ datetime[ns] ┆ datetime[ns] │
86
+ # # ╞═════════════════════╪═════════════════════╡
87
+ # # │ 2024-01-01 00:00:00 ┆ 2024-07-01 21:25:20 │
88
+ # # │ 2024-01-01 00:00:00 ┆ 2024-09-01 22:35:30 │
89
+ # # └─────────────────────┴─────────────────────┘
90
+ def datetime(
91
+ year,
92
+ month,
93
+ day,
94
+ hour = nil,
95
+ minute = nil,
96
+ second = nil,
97
+ microsecond = nil,
98
+ time_unit: "us",
99
+ time_zone: nil,
100
+ ambiguous: "raise"
101
+ )
102
+ ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
103
+ year_expr = Utils.parse_into_expression(year)
104
+ month_expr = Utils.parse_into_expression(month)
105
+ day_expr = Utils.parse_into_expression(day)
106
+
107
+ hour_expr = !hour.nil? ? Utils.parse_into_expression(hour) : nil
108
+ minute_expr = !minute.nil? ? Utils.parse_into_expression(minute) : nil
109
+ second_expr = !second.nil? ? Utils.parse_into_expression(second) : nil
110
+ microsecond_expr = (
111
+ !microsecond.nil? ? Utils.parse_into_expression(microsecond) : nil
112
+ )
113
+
114
+ Utils.wrap_expr(
115
+ Plr.datetime(
116
+ year_expr,
117
+ month_expr,
118
+ day_expr,
119
+ hour_expr,
120
+ minute_expr,
121
+ second_expr,
122
+ microsecond_expr,
123
+ time_unit,
124
+ time_zone,
125
+ ambiguous_expr
126
+ )
127
+ )
128
+ end
129
+
130
+ # Create a Polars literal expression of type Date.
131
+ #
132
+ # @param year [Object]
133
+ # column or literal.
134
+ # @param month [Object]
135
+ # column or literal, ranging from 1-12.
136
+ # @param day [Object]
137
+ # column or literal, ranging from 1-31.
138
+ #
139
+ # @return [Expr]
140
+ #
141
+ # @example
142
+ # df = Polars::DataFrame.new(
143
+ # {
144
+ # "month" => [1, 2, 3],
145
+ # "day" => [4, 5, 6]
146
+ # }
147
+ # )
148
+ # df.with_columns(Polars.date(2024, Polars.col("month"), Polars.col("day")))
149
+ # # =>
150
+ # # shape: (3, 3)
151
+ # # ┌───────┬─────┬────────────┐
152
+ # # │ month ┆ day ┆ date │
153
+ # # │ --- ┆ --- ┆ --- │
154
+ # # │ i64 ┆ i64 ┆ date │
155
+ # # ╞═══════╪═════╪════════════╡
156
+ # # │ 1 ┆ 4 ┆ 2024-01-04 │
157
+ # # │ 2 ┆ 5 ┆ 2024-02-05 │
158
+ # # │ 3 ┆ 6 ┆ 2024-03-06 │
159
+ # # └───────┴─────┴────────────┘
160
+ #
161
+ # @example We can also use `pl.date` for filtering:
162
+ # df = Polars::DataFrame.new(
163
+ # {
164
+ # "start" => [Date.new(2024, 1, 1), Date.new(2024, 1, 1), Date.new(2024, 1, 1)],
165
+ # "end" => [Date.new(2024, 5, 1), Date.new(2024, 7, 1), Date.new(2024, 9, 1)]
166
+ # }
167
+ # )
168
+ # df.filter(Polars.col("end") > Polars.date(2024, 6, 1))
169
+ # # =>
170
+ # # shape: (2, 2)
171
+ # # ┌────────────┬────────────┐
172
+ # # │ start ┆ end │
173
+ # # │ --- ┆ --- │
174
+ # # │ date ┆ date │
175
+ # # ╞════════════╪════════════╡
176
+ # # │ 2024-01-01 ┆ 2024-07-01 │
177
+ # # │ 2024-01-01 ┆ 2024-09-01 │
178
+ # # └────────────┴────────────┘
179
+ def date(
180
+ year,
181
+ month,
182
+ day
183
+ )
184
+ datetime(year, month, day).cast(Date).alias("date")
185
+ end
186
+
187
+ # Create a Polars literal expression of type Time.
188
+ #
189
+ # @param hour [Object]
190
+ # column or literal, ranging from 0-23.
191
+ # @param minute [Object]
192
+ # column or literal, ranging from 0-59.
193
+ # @param second [Object]
194
+ # column or literal, ranging from 0-59.
195
+ # @param microsecond [Object]
196
+ # column or literal, ranging from 0-999999.
197
+ #
198
+ # @return [Expr]
199
+ #
200
+ # @example
201
+ # df = Polars::DataFrame.new(
202
+ # {
203
+ # "hour" => [12, 13, 14],
204
+ # "minute" => [15, 30, 45]
205
+ # }
206
+ # )
207
+ # df.with_columns(Polars.time(Polars.col("hour"), Polars.col("minute")))
208
+ # # =>
209
+ # # shape: (3, 3)
210
+ # # ┌──────┬────────┬──────────┐
211
+ # # │ hour ┆ minute ┆ time │
212
+ # # │ --- ┆ --- ┆ --- │
213
+ # # │ i64 ┆ i64 ┆ time │
214
+ # # ╞══════╪════════╪══════════╡
215
+ # # │ 12 ┆ 15 ┆ 12:15:00 │
216
+ # # │ 13 ┆ 30 ┆ 13:30:00 │
217
+ # # │ 14 ┆ 45 ┆ 14:45:00 │
218
+ # # └──────┴────────┴──────────┘
219
+ def time(
220
+ hour = nil,
221
+ minute = nil,
222
+ second = nil,
223
+ microsecond = nil
224
+ )
225
+ epoch_start = [1970, 1, 1]
226
+ datetime(*epoch_start, hour, minute, second, microsecond)
227
+ .cast(Time)
228
+ .alias("time")
229
+ end
230
+
3
231
  # Create polars `Duration` from distinct time components.
4
232
  #
5
233
  # @return [Expr]
@@ -41,8 +269,12 @@ module Polars
41
269
  milliseconds: nil,
42
270
  microseconds: nil,
43
271
  nanoseconds: nil,
44
- time_unit: "us"
272
+ time_unit: nil
45
273
  )
274
+ if !nanoseconds.nil? && time_unit.nil?
275
+ time_unit = "ns"
276
+ end
277
+
46
278
  if !weeks.nil?
47
279
  weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
48
280
  end
@@ -68,6 +300,10 @@ module Polars
68
300
  nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
69
301
  end
70
302
 
303
+ if time_unit.nil?
304
+ time_unit = "us"
305
+ end
306
+
71
307
  Utils.wrap_expr(
72
308
  Plr.duration(
73
309
  weeks,
@@ -140,6 +376,49 @@ module Polars
140
376
  Utils.wrap_expr(Plr.concat_list(exprs))
141
377
  end
142
378
 
379
+ # Horizontally concatenate columns into a single array column.
380
+ #
381
+ # Non-array columns are reshaped to a unit-width array. All columns must have
382
+ # a dtype of either `Polars::Array.new(<DataType>, width)` or `Polars::<DataType>`.
383
+ #
384
+ # @note
385
+ # This functionality is considered **unstable**. It may be changed
386
+ # at any point without it being considered a breaking change.
387
+ #
388
+ # @param exprs [Object]
389
+ # Columns to concatenate into a single array column. Accepts expression input.
390
+ # Strings are parsed as column names, other non-expression inputs are parsed as
391
+ # literals.
392
+ # @param more_exprs [Array]
393
+ # Additional columns to concatenate into a single array column, specified as
394
+ # positional arguments.
395
+ #
396
+ # @return [Expr]
397
+ #
398
+ # @example Concatenate 2 array columns:
399
+ # Polars.select(
400
+ # a: Polars::Series.new([[1], [3], nil], dtype: Polars::Array.new(Polars::Int64, 1)),
401
+ # b: Polars::Series.new([[3], [nil], [5]], dtype: Polars::Array.new(Polars::Int64, 1))
402
+ # ).with_columns(
403
+ # Polars.concat_arr("a", "b").alias("concat_arr(a, b)"),
404
+ # Polars.concat_arr("a", Polars.first("b")).alias("concat_arr(a, first(b))")
405
+ # )
406
+ # # =>
407
+ # # shape: (3, 4)
408
+ # # ┌───────────────┬───────────────┬──────────────────┬─────────────────────────┐
409
+ # # │ a ┆ b ┆ concat_arr(a, b) ┆ concat_arr(a, first(b)) │
410
+ # # │ --- ┆ --- ┆ --- ┆ --- │
411
+ # # │ array[i64, 1] ┆ array[i64, 1] ┆ array[i64, 2] ┆ array[i64, 2] │
412
+ # # ╞═══════════════╪═══════════════╪══════════════════╪═════════════════════════╡
413
+ # # │ [1] ┆ [3] ┆ [1, 3] ┆ [1, 3] │
414
+ # # │ [3] ┆ [null] ┆ [3, null] ┆ [3, 3] │
415
+ # # │ null ┆ [5] ┆ null ┆ null │
416
+ # # └───────────────┴───────────────┴──────────────────┴─────────────────────────┘
417
+ def concat_arr(exprs, *more_exprs)
418
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
419
+ Utils.wrap_expr(Plr.concat_arr(exprs))
420
+ end
421
+
143
422
  # Collect several columns into a Series of dtype Struct.
144
423
  #
145
424
  # @param exprs [Array]
@@ -194,7 +473,7 @@ module Polars
194
473
  #
195
474
  # @example Use keyword arguments to easily name each struct field.
196
475
  # df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
197
- # # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
476
+ # # => Polars::Schema({"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})})
198
477
  def struct(*exprs, schema: nil, eager: false, **named_exprs)
199
478
  rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
200
479
  expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
@@ -221,7 +500,10 @@ module Polars
221
500
  #
222
501
  # @param exprs [Object]
223
502
  # Columns to concat into a Utf8 Series.
224
- # @param sep [String]
503
+ # @param more_exprs [Array]
504
+ # Additional columns to concatenate into a single string column, specified as
505
+ # positional arguments.
506
+ # @param separator [String]
225
507
  # String value that will be used to separate the values.
226
508
  # @param ignore_nulls [Boolean]
227
509
  # Ignore null values (default).
@@ -244,7 +526,7 @@ module Polars
244
526
  # Polars.col("b"),
245
527
  # Polars.col("c")
246
528
  # ],
247
- # sep: " "
529
+ # separator: " "
248
530
  # ).alias("full_sentence")
249
531
  # ]
250
532
  # )
@@ -259,9 +541,9 @@ module Polars
259
541
  # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
260
542
  # # │ 3 ┆ null ┆ walk ┆ null │
261
543
  # # └─────┴──────┴──────┴───────────────┘
262
- def concat_str(exprs, sep: "", ignore_nulls: false)
263
- exprs = Utils.parse_into_list_of_expressions(exprs)
264
- Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
544
+ def concat_str(exprs, *more_exprs, separator: "", ignore_nulls: false)
545
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
546
+ Utils.wrap_expr(Plr.concat_str(exprs, separator, ignore_nulls))
265
547
  end
266
548
 
267
549
  # Format expressions as a string.
@@ -314,7 +596,7 @@ module Polars
314
596
  end
315
597
  end
316
598
 
317
- concat_str(exprs, sep: "")
599
+ concat_str(exprs, separator: "")
318
600
  end
319
601
  end
320
602
  end
@@ -93,7 +93,7 @@ module Polars
93
93
  # # │ 2 ┆ 4 ┆ 5 ┆ null │
94
94
  # # │ 3 ┆ null ┆ 6 ┆ 8 │
95
95
  # # └─────┴──────┴──────┴──────┘
96
- def concat(items, rechunk: true, how: "vertical", parallel: true)
96
+ def concat(items, rechunk: false, how: "vertical", parallel: true)
97
97
  elems = items.to_a
98
98
 
99
99
  if elems.empty?
@@ -152,7 +152,7 @@ module Polars
152
152
  parallel,
153
153
  true
154
154
  )
155
- ).collect(no_optimization: true)
155
+ ).collect(optimizations: QueryOptFlags._eager)
156
156
  elsif how == "diagonal"
157
157
  out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
158
158
  elsif how == "diagonal_relaxed"
@@ -163,7 +163,7 @@ module Polars
163
163
  parallel,
164
164
  true
165
165
  )
166
- ).collect(no_optimization: true)
166
+ ).collect(optimizations: QueryOptFlags._eager)
167
167
  elsif how == "horizontal"
168
168
  out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
169
169
  else
@@ -206,7 +206,194 @@ module Polars
206
206
  end
207
207
  end
208
208
 
209
- # Align a sequence of frames using the unique values from one or more columns as a key.
209
+ # Combine multiple DataFrames, LazyFrames, or Series into a single object.
210
+ #
211
+ # @note
212
+ # This function does not guarantee any specific ordering of rows in the result.
213
+ # If you need predictable row ordering, use `Polars.concat` instead.
214
+ #
215
+ # @param items [Array]
216
+ # DataFrames, LazyFrames, or Series to concatenate.
217
+ # @param how ['vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal', 'align', 'align_full', 'align_inner', 'align_left', 'align_right']
218
+ # Note that `Series` only support the `vertical` strategy.
219
+ #
220
+ # * vertical: Applies multiple `vstack` operations.
221
+ # * vertical_relaxed: Same as `vertical`, but additionally coerces columns to
222
+ # their common supertype *if* they are mismatched (eg: Int32 → Int64).
223
+ # * diagonal: Finds a union between the column schemas and fills missing column
224
+ # values with `null`.
225
+ # * diagonal_relaxed: Same as `diagonal`, but additionally coerces columns to
226
+ # their common supertype *if* they are mismatched (eg: Int32 → Int64).
227
+ # * horizontal: Stacks Series from DataFrames horizontally and fills with `null`
228
+ # if the lengths don't match.
229
+ # * align, align_full, align_left, align_right: Combines frames horizontally,
230
+ # auto-determining the common key columns and aligning rows using the same
231
+ # logic as `align_frames` (note that "align" is an alias for "align_full").
232
+ # The "align" strategy determines the type of join used to align the frames,
233
+ # equivalent to the "how" parameter on `align_frames`. Note that the common
234
+ # join columns are automatically coalesced, but other column collisions
235
+ # will raise an error (if you need more control over this you should use
236
+ # a suitable `join` method directly).
237
+ #
238
+ # @return [Object]
239
+ #
240
+ # @example
241
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
242
+ # df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
243
+ # Polars.union([df1, df2])
244
+ # # =>
245
+ # # shape: (2, 2)
246
+ # # ┌─────┬─────┐
247
+ # # │ a ┆ b │
248
+ # # │ --- ┆ --- │
249
+ # # │ i64 ┆ i64 │
250
+ # # ╞═════╪═════╡
251
+ # # │ 1 ┆ 3 │
252
+ # # │ 2 ┆ 4 │
253
+ # # └─────┴─────┘
254
+ #
255
+ # @example
256
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
257
+ # df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
258
+ # Polars.union([df1, df2], how: "vertical_relaxed")
259
+ # # =>
260
+ # # shape: (2, 2)
261
+ # # ┌─────┬─────┐
262
+ # # │ a ┆ b │
263
+ # # │ --- ┆ --- │
264
+ # # │ f64 ┆ i64 │
265
+ # # ╞═════╪═════╡
266
+ # # │ 1.0 ┆ 3 │
267
+ # # │ 2.5 ┆ 4 │
268
+ # # └─────┴─────┘
269
+ #
270
+ # @example
271
+ # df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
272
+ # df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
273
+ # Polars.union([df_h1, df_h2], how: "horizontal")
274
+ # # =>
275
+ # # shape: (2, 5)
276
+ # # ┌─────┬─────┬─────┬─────┬─────┐
277
+ # # │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
278
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
279
+ # # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
280
+ # # ╞═════╪═════╪═════╪═════╪═════╡
281
+ # # │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
282
+ # # │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
283
+ # # └─────┴─────┴─────┴─────┴─────┘
284
+ #
285
+ # @example The "diagonal" strategy allows for some frames to have missing columns, the values for which are filled with `null`:
286
+ # df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
287
+ # df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
288
+ # Polars.union([df_d1, df_d2], how: "diagonal")
289
+ # # =>
290
+ # # shape: (2, 3)
291
+ # # ┌─────┬──────┬──────┐
292
+ # # │ a ┆ b ┆ c │
293
+ # # │ --- ┆ --- ┆ --- │
294
+ # # │ i64 ┆ i64 ┆ i64 │
295
+ # # ╞═════╪══════╪══════╡
296
+ # # │ 1 ┆ 3 ┆ null │
297
+ # # │ 2 ┆ null ┆ 4 │
298
+ # # └─────┴──────┴──────┘
299
+ def union(
300
+ items,
301
+ how: "vertical"
302
+ )
303
+ elems = items.to_a
304
+
305
+ if elems.empty?
306
+ msg = "cannot concat empty list"
307
+ raise ArgumentError, msg
308
+ elsif elems.length == 1 && (elems[0].is_a?(DataFrame) || elems[0].is_a?(Series) || elems[0].is_a?(LazyFrame))
309
+ return elems[0]
310
+ end
311
+
312
+ if how.start_with?("align")
313
+ raise Todo
314
+ end
315
+
316
+ out = nil
317
+ first = elems[0]
318
+
319
+ if first.is_a?(DataFrame)
320
+ if ["vertical", "vertical_relaxed"].include?(how)
321
+ out = Utils.wrap_ldf(
322
+ Plr.concat_lf(
323
+ elems.map { |df| df.lazy },
324
+ false,
325
+ true,
326
+ how.end_with?("relaxed")
327
+ )
328
+ ).collect(optimizations: QueryOptFlags._eager)
329
+ elsif ["diagonal", "diagonal_relaxed"].include?(how)
330
+ out = Utils.wrap_ldf(
331
+ Plr.concat_lf_diagonal(
332
+ elems.map { |df| df.lazy },
333
+ false,
334
+ true,
335
+ how.end_with?("relaxed")
336
+ )
337
+ ).collect(optimizations: QueryOptFlags._eager)
338
+ elsif how == "horizontal"
339
+ out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
340
+ else
341
+ raise Todo
342
+ msg = "DataFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
343
+ raise ArgumentError, msg
344
+ end
345
+
346
+ elsif first.is_a?(LazyFrame)
347
+ if ["vertical", "vertical_relaxed"].include?(how)
348
+ return Utils.wrap_ldf(
349
+ Plr.concat_lf(
350
+ elems,
351
+ false,
352
+ true,
353
+ how.end_with?("relaxed")
354
+ )
355
+ )
356
+ elsif ["diagonal", "diagonal_relaxed"].include?(how)
357
+ return Utils.wrap_ldf(
358
+ Plr.concat_lf_diagonal(
359
+ elems,
360
+ false,
361
+ true,
362
+ how.end_with?("relaxed")
363
+ )
364
+ )
365
+ elsif how == "horizontal"
366
+ return Utils.wrap_ldf(
367
+ Plr.concat_lf_horizontal(
368
+ elems,
369
+ true
370
+ )
371
+ )
372
+ else
373
+ raise Todo
374
+ msg = "LazyFrame `how` must be one of {{#{allowed}}}, got #{how.inspect}"
375
+ raise ArgumentError, msg
376
+ end
377
+
378
+ elsif first.is_a?(Series)
379
+ if how == "vertical"
380
+ out = Utils.wrap_s(Plr.concat_series(elems))
381
+ else
382
+ msg = "Series only supports 'vertical' concat strategy"
383
+ raise ArgumentError, msg
384
+ end
385
+
386
+ elsif first.is_a?(Expr)
387
+ return Utils.wrap_expr(Plr.concat_expr(elems.map { |e| e._rbexpr }, false))
388
+ else
389
+ msg = "did not expect type: #{first.class.name.inspect} in `concat`"
390
+ raise TypeError, msg
391
+ end
392
+
393
+ out
394
+ end
395
+
396
+ # Align an array of frames using the unique values from one or more columns as a key.
210
397
  #
211
398
  # Frames that do not contain the given key values have rows injected (with nulls
212
399
  # filling the non-key columns), and each resulting frame is sorted by the key.
@@ -219,13 +406,13 @@ module Polars
219
406
  # the same number of rows.
220
407
  #
221
408
  # @param frames [Array]
222
- # Sequence of DataFrames or LazyFrames.
409
+ # Array of DataFrames or LazyFrames.
223
410
  # @param on [Object]
224
411
  # One or more columns whose unique values will be used to align the frames.
225
412
  # @param select [Object]
226
413
  # Optional post-alignment column select to constrain and/or order
227
414
  # the columns returned from the newly aligned frames.
228
- # @param reverse [Object]
415
+ # @param descending [Object]
229
416
  # Sort the alignment column values in descending order; can be a single
230
417
  # boolean or a list of booleans associated with each column in `on`.
231
418
  #
@@ -254,7 +441,7 @@ module Polars
254
441
  # }
255
442
  # )
256
443
  # af1, af2, af3 = Polars.align_frames(
257
- # df1, df2, df3, on: "dt", select: ["x", "y"]
444
+ # df1, df2, df3, on: "dt", how: "left", select: ["x", "y"]
258
445
  # )
259
446
  # (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
260
447
  # # =>
@@ -271,9 +458,16 @@ module Polars
271
458
  def align_frames(
272
459
  *frames,
273
460
  on:,
461
+ how: nil,
274
462
  select: nil,
275
- reverse: false
463
+ descending: false
276
464
  )
465
+ # TODO update
466
+ if how.nil?
467
+ warn "The default `how` for `align_frames` method will change from `left` to `full` in a future version"
468
+ how = "left"
469
+ end
470
+
277
471
  if frames.empty?
278
472
  return []
279
473
  elsif frames.map(&:class).uniq.length != 1
@@ -285,7 +479,7 @@ module Polars
285
479
  alignment_frame = (
286
480
  concat(frames.map { |df| df.lazy.select(on) })
287
481
  .unique(maintain_order: false)
288
- .sort(on, reverse: reverse)
482
+ .sort(on, descending: descending)
289
483
  )
290
484
  alignment_frame = (
291
485
  eager ? alignment_frame.collect.lazy : alignment_frame.cache
@@ -296,7 +490,7 @@ module Polars
296
490
  alignment_frame.join(
297
491
  df.lazy,
298
492
  on: alignment_frame.columns,
299
- how: "left"
493
+ how: how
300
494
  ).select(df.columns)
301
495
  end
302
496
  if !select.nil?
@@ -0,0 +1,21 @@
1
+ module Polars
2
+ module Functions
3
+ # Escapes string regex meta characters.
4
+ #
5
+ # @param s [String]
6
+ # The string whose meta characters will be escaped.
7
+ #
8
+ # @return [String]
9
+ def escape_regex(s)
10
+ if s.is_a?(Expr)
11
+ msg = "escape_regex function is unsupported for `Expr`, you may want use `Expr.str.escape_regex` instead"
12
+ raise TypeError, msg
13
+ elsif !s.is_a?(::String)
14
+ msg = "escape_regex function supports only `String` type, got `#{s.class.name}`"
15
+ raise TypeError, msg
16
+ end
17
+
18
+ Plr.escape_regex(s)
19
+ end
20
+ end
21
+ end