polars-df 0.13.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,271 @@
1
+ module Polars
2
+ module Functions
3
+ # Create polars `Duration` from distinct time components.
4
+ #
5
+ # @return [Expr]
6
+ #
7
+ # @example
8
+ # df = Polars::DataFrame.new(
9
+ # {
10
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
11
+ # "add" => [1, 2]
12
+ # }
13
+ # )
14
+ # df.select(
15
+ # [
16
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
17
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
18
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
19
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
20
+ # "add_milliseconds"
21
+ # ),
22
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
23
+ # ]
24
+ # )
25
+ # # =>
26
+ # # shape: (2, 5)
27
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
28
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
29
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
30
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
31
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
32
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
33
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
34
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
35
+ def duration(
36
+ weeks: nil,
37
+ days: nil,
38
+ hours: nil,
39
+ minutes: nil,
40
+ seconds: nil,
41
+ milliseconds: nil,
42
+ microseconds: nil,
43
+ nanoseconds: nil,
44
+ time_unit: "us"
45
+ )
46
+ if !weeks.nil?
47
+ weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
48
+ end
49
+ if !days.nil?
50
+ days = Utils.parse_into_expression(days, str_as_lit: false)
51
+ end
52
+ if !hours.nil?
53
+ hours = Utils.parse_into_expression(hours, str_as_lit: false)
54
+ end
55
+ if !minutes.nil?
56
+ minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
57
+ end
58
+ if !seconds.nil?
59
+ seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
60
+ end
61
+ if !milliseconds.nil?
62
+ milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
63
+ end
64
+ if !microseconds.nil?
65
+ microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
66
+ end
67
+ if !nanoseconds.nil?
68
+ nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
69
+ end
70
+
71
+ Utils.wrap_expr(
72
+ Plr.duration(
73
+ weeks,
74
+ days,
75
+ hours,
76
+ minutes,
77
+ seconds,
78
+ milliseconds,
79
+ microseconds,
80
+ nanoseconds,
81
+ time_unit
82
+ )
83
+ )
84
+ end
85
+
86
+ # Concat the arrays in a Series dtype List in linear time.
87
+ #
88
+ # @return [Expr]
89
+ def concat_list(exprs)
90
+ exprs = Utils.parse_into_list_of_expressions(exprs)
91
+ Utils.wrap_expr(Plr.concat_list(exprs))
92
+ end
93
+
94
+ # Collect several columns into a Series of dtype Struct.
95
+ #
96
+ # @param exprs [Array]
97
+ # Column(s) to collect into a struct column, specified as positional arguments.
98
+ # Accepts expression input. Strings are parsed as column names,
99
+ # other non-expression inputs are parsed as literals.
100
+ # @param schema [Hash]
101
+ # Optional schema that explicitly defines the struct field dtypes. If no columns
102
+ # or expressions are provided, schema keys are used to define columns.
103
+ # @param eager [Boolean]
104
+ # Evaluate immediately and return a `Series`. If set to `false` (default),
105
+ # return an expression instead.
106
+ # @param named_exprs [Hash]
107
+ # Additional columns to collect into the struct column, specified as keyword
108
+ # arguments. The columns will be renamed to the keyword used.
109
+ #
110
+ # @return [Object]
111
+ #
112
+ # @example
113
+ # df = Polars::DataFrame.new(
114
+ # {
115
+ # "int" => [1, 2],
116
+ # "str" => ["a", "b"],
117
+ # "bool" => [true, nil],
118
+ # "list" => [[1, 2], [3]],
119
+ # }
120
+ # )
121
+ # df.select([Polars.struct(Polars.all).alias("my_struct")])
122
+ # # =>
123
+ # # shape: (2, 1)
124
+ # # ┌─────────────────────┐
125
+ # # │ my_struct │
126
+ # # │ --- │
127
+ # # │ struct[4] │
128
+ # # ╞═════════════════════╡
129
+ # # │ {1,"a",true,[1, 2]} │
130
+ # # │ {2,"b",null,[3]} │
131
+ # # └─────────────────────┘
132
+ #
133
+ # @example Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.
134
+ # df.select(Polars.struct("int", false).alias("my_struct"))
135
+ # # =>
136
+ # # shape: (2, 1)
137
+ # # ┌───────────┐
138
+ # # │ my_struct │
139
+ # # │ --- │
140
+ # # │ struct[2] │
141
+ # # ╞═══════════╡
142
+ # # │ {1,false} │
143
+ # # │ {2,false} │
144
+ # # └───────────┘
145
+ #
146
+ # @example Use keyword arguments to easily name each struct field.
147
+ # df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
148
+ # # => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}
149
+ def struct(*exprs, schema: nil, eager: false, **named_exprs)
150
+ rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
151
+ expr = Utils.wrap_expr(Plr.as_struct(rbexprs))
152
+
153
+ if !schema.nil? && !schema.empty?
154
+ if !exprs.any?
155
+ # no columns or expressions provided; create one from schema keys
156
+ expr =
157
+ Utils.wrap_expr(
158
+ Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
159
+ )
160
+ expr = expr.cast(Struct.new(schema), strict: false)
161
+ end
162
+ end
163
+
164
+ if eager
165
+ Polars.select(expr).to_series
166
+ else
167
+ expr
168
+ end
169
+ end
170
+
171
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
172
+ #
173
+ # @param exprs [Object]
174
+ # Columns to concat into a Utf8 Series.
175
+ # @param sep [String]
176
+ # String value that will be used to separate the values.
177
+ # @param ignore_nulls [Boolean]
178
+ # Ignore null values (default).
179
+ #
180
+ # @return [Expr]
181
+ #
182
+ # @example
183
+ # df = Polars::DataFrame.new(
184
+ # {
185
+ # "a" => [1, 2, 3],
186
+ # "b" => ["dogs", "cats", nil],
187
+ # "c" => ["play", "swim", "walk"]
188
+ # }
189
+ # )
190
+ # df.with_columns(
191
+ # [
192
+ # Polars.concat_str(
193
+ # [
194
+ # Polars.col("a") * 2,
195
+ # Polars.col("b"),
196
+ # Polars.col("c")
197
+ # ],
198
+ # sep: " "
199
+ # ).alias("full_sentence")
200
+ # ]
201
+ # )
202
+ # # =>
203
+ # # shape: (3, 4)
204
+ # # ┌─────┬──────┬──────┬───────────────┐
205
+ # # │ a ┆ b ┆ c ┆ full_sentence │
206
+ # # │ --- ┆ --- ┆ --- ┆ --- │
207
+ # # │ i64 ┆ str ┆ str ┆ str │
208
+ # # ╞═════╪══════╪══════╪═══════════════╡
209
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
210
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
211
+ # # │ 3 ┆ null ┆ walk ┆ null │
212
+ # # └─────┴──────┴──────┴───────────────┘
213
+ def concat_str(exprs, sep: "", ignore_nulls: false)
214
+ exprs = Utils.parse_into_list_of_expressions(exprs)
215
+ Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
216
+ end
217
+
218
+ # Format expressions as a string.
219
+ #
220
+ # @param f_string [String]
221
+ # A string that with placeholders.
222
+ # For example: "hello_{}" or "{}_world
223
+ # @param args [Object]
224
+ # Expression(s) that fill the placeholders
225
+ #
226
+ # @return [Expr]
227
+ #
228
+ # @example
229
+ # df = Polars::DataFrame.new(
230
+ # {
231
+ # "a": ["a", "b", "c"],
232
+ # "b": [1, 2, 3]
233
+ # }
234
+ # )
235
+ # df.select(
236
+ # [
237
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
238
+ # ]
239
+ # )
240
+ # # =>
241
+ # # shape: (3, 1)
242
+ # # ┌─────────────┐
243
+ # # │ fmt │
244
+ # # │ --- │
245
+ # # │ str │
246
+ # # ╞═════════════╡
247
+ # # │ foo_a_bar_1 │
248
+ # # │ foo_b_bar_2 │
249
+ # # │ foo_c_bar_3 │
250
+ # # └─────────────┘
251
+ def format(f_string, *args)
252
+ if f_string.scan("{}").length != args.length
253
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
254
+ end
255
+
256
+ exprs = []
257
+
258
+ arguments = args.each
259
+ f_string.split(/(\{\})/).each do |s|
260
+ if s == "{}"
261
+ e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
262
+ exprs << e
263
+ elsif s.length > 0
264
+ exprs << lit(s)
265
+ end
266
+ end
267
+
268
+ concat_str(exprs, sep: "")
269
+ end
270
+ end
271
+ end
@@ -0,0 +1,47 @@
1
+ module Polars
2
+ module Functions
3
+ # Return an expression representing a column in a DataFrame.
4
+ #
5
+ # @return [Expr]
6
+ def col(name, *more_names)
7
+ if more_names.any?
8
+ if Utils.strlike?(name)
9
+ names_str = [name]
10
+ names_str.concat(more_names)
11
+ return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s)))
12
+ elsif Utils.is_polars_dtype(name)
13
+ dtypes = [name]
14
+ dtypes.concat(more_names)
15
+ return Utils.wrap_expr(Plr.dtype_cols(dtypes))
16
+ else
17
+ msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
18
+ raise TypeError, msg
19
+ end
20
+ end
21
+
22
+ if Utils.strlike?(name)
23
+ Utils.wrap_expr(Plr.col(name.to_s))
24
+ elsif Utils.is_polars_dtype(name)
25
+ Utils.wrap_expr(Plr.dtype_cols([name]))
26
+ elsif name.is_a?(::Array)
27
+ names = Array(name)
28
+ if names.empty?
29
+ return Utils.wrap_expr(Plr.cols(names))
30
+ end
31
+
32
+ item = names[0]
33
+ if Utils.strlike?(item)
34
+ Utils.wrap_expr(Plr.cols(names.map(&:to_s)))
35
+ elsif Utils.is_polars_dtype(item)
36
+ Utils.wrap_expr(Plr.dtype_cols(names))
37
+ else
38
+ msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
39
+ raise TypeError, msg
40
+ end
41
+ else
42
+ msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
43
+ raise TypeError, msg
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,182 @@
1
+ module Polars
2
+ module Functions
3
+ # Aggregate multiple Dataframes/Series to a single DataFrame/Series.
4
+ #
5
+ # @param items [Object]
6
+ # DataFrames/Series/LazyFrames to concatenate.
7
+ # @param rechunk [Boolean]
8
+ # Make sure that all data is in contiguous memory.
9
+ # @param how ["vertical", "vertical_relaxed", "diagonal", "horizontal"]
10
+ # LazyFrames do not support the `horizontal` strategy.
11
+ #
12
+ # - Vertical: applies multiple `vstack` operations.
13
+ # - Diagonal: finds a union between the column schemas and fills missing column values with null.
14
+ # - Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
15
+ # @param parallel [Boolean]
16
+ # Only relevant for LazyFrames. This determines if the concatenated
17
+ # lazy computations may be executed in parallel.
18
+ #
19
+ # @return [Object]
20
+ #
21
+ # @example
22
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
23
+ # df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
24
+ # Polars.concat([df1, df2])
25
+ # # =>
26
+ # # shape: (2, 2)
27
+ # # ┌─────┬─────┐
28
+ # # │ a ┆ b │
29
+ # # │ --- ┆ --- │
30
+ # # │ i64 ┆ i64 │
31
+ # # ╞═════╪═════╡
32
+ # # │ 1 ┆ 3 │
33
+ # # │ 2 ┆ 4 │
34
+ # # └─────┴─────┘
35
+ def concat(items, rechunk: true, how: "vertical", parallel: true)
36
+ if items.empty?
37
+ raise ArgumentError, "cannot concat empty list"
38
+ end
39
+
40
+ first = items[0]
41
+ if first.is_a?(DataFrame)
42
+ if how == "vertical"
43
+ out = Utils.wrap_df(Plr.concat_df(items))
44
+ elsif how == "diagonal"
45
+ out = Utils.wrap_df(Plr.concat_df_diagonal(items))
46
+ elsif how == "horizontal"
47
+ out = Utils.wrap_df(Plr.concat_df_horizontal(items))
48
+ else
49
+ raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
50
+ end
51
+ elsif first.is_a?(LazyFrame)
52
+ if how == "vertical"
53
+ return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, false))
54
+ elsif how == "vertical_relaxed"
55
+ return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, true))
56
+ elsif how == "diagonal"
57
+ return Utils.wrap_ldf(Plr.concat_lf_diagonal(items, rechunk, parallel, false))
58
+ else
59
+ raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and 'diagonal' concat strategy."
60
+ end
61
+ elsif first.is_a?(Series)
62
+ # TODO
63
+ out = Utils.wrap_s(Plr.concat_series(items))
64
+ elsif first.is_a?(Expr)
65
+ out = first
66
+ items[1..-1].each do |e|
67
+ out = out.append(e)
68
+ end
69
+ else
70
+ raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
71
+ end
72
+
73
+ if rechunk
74
+ out.rechunk
75
+ else
76
+ out
77
+ end
78
+ end
79
+
80
+ # Align a sequence of frames using the uique values from one or more columns as a key.
81
+ #
82
+ # Frames that do not contain the given key values have rows injected (with nulls
83
+ # filling the non-key columns), and each resulting frame is sorted by the key.
84
+ #
85
+ # The original column order of input frames is not changed unless ``select`` is
86
+ # specified (in which case the final column order is determined from that).
87
+ #
88
+ # Note that this does not result in a joined frame - you receive the same number
89
+ # of frames back that you passed in, but each is now aligned by key and has
90
+ # the same number of rows.
91
+ #
92
+ # @param frames [Array]
93
+ # Sequence of DataFrames or LazyFrames.
94
+ # @param on [Object]
95
+ # One or more columns whose unique values will be used to align the frames.
96
+ # @param select [Object]
97
+ # Optional post-alignment column select to constrain and/or order
98
+ # the columns returned from the newly aligned frames.
99
+ # @param reverse [Object]
100
+ # Sort the alignment column values in descending order; can be a single
101
+ # boolean or a list of booleans associated with each column in `on`.
102
+ #
103
+ # @return [Object]
104
+ #
105
+ # @example
106
+ # df1 = Polars::DataFrame.new(
107
+ # {
108
+ # "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
109
+ # "x" => [3.5, 4.0, 1.0],
110
+ # "y" => [10.0, 2.5, 1.5]
111
+ # }
112
+ # )
113
+ # df2 = Polars::DataFrame.new(
114
+ # {
115
+ # "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
116
+ # "x" => [8.0, 1.0, 3.5],
117
+ # "y" => [1.5, 12.0, 5.0]
118
+ # }
119
+ # )
120
+ # df3 = Polars::DataFrame.new(
121
+ # {
122
+ # "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
123
+ # "x" => [2.0, 5.0],
124
+ # "y" => [2.5, 2.0]
125
+ # }
126
+ # )
127
+ # af1, af2, af3 = Polars.align_frames(
128
+ # df1, df2, df3, on: "dt", select: ["x", "y"]
129
+ # )
130
+ # (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
131
+ # # =>
132
+ # # shape: (3, 1)
133
+ # # ┌───────┐
134
+ # # │ dot │
135
+ # # │ --- │
136
+ # # │ f64 │
137
+ # # ╞═══════╡
138
+ # # │ 0.0 │
139
+ # # ├╌╌╌╌╌╌╌┤
140
+ # # │ 167.5 │
141
+ # # ├╌╌╌╌╌╌╌┤
142
+ # # │ 47.0 │
143
+ # # └───────┘
144
+ def align_frames(
145
+ *frames,
146
+ on:,
147
+ select: nil,
148
+ reverse: false
149
+ )
150
+ if frames.empty?
151
+ return []
152
+ elsif frames.map(&:class).uniq.length != 1
153
+ raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
154
+ end
155
+
156
+ # establish the superset of all "on" column values, sort, and cache
157
+ eager = frames[0].is_a?(DataFrame)
158
+ alignment_frame = (
159
+ concat(frames.map { |df| df.lazy.select(on) })
160
+ .unique(maintain_order: false)
161
+ .sort(on, reverse: reverse)
162
+ )
163
+ alignment_frame = (
164
+ eager ? alignment_frame.collect.lazy : alignment_frame.cache
165
+ )
166
+ # finally, align all frames
167
+ aligned_frames =
168
+ frames.map do |df|
169
+ alignment_frame.join(
170
+ df.lazy,
171
+ on: alignment_frame.columns,
172
+ how: "left"
173
+ ).select(df.columns)
174
+ end
175
+ if !select.nil?
176
+ aligned_frames = aligned_frames.map { |df| df.select(select) }
177
+ end
178
+
179
+ eager ? aligned_frames.map(&:collect) : aligned_frames
180
+ end
181
+ end
182
+ end