polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/lazy_frame.rb
CHANGED
|
@@ -5,12 +5,13 @@ module Polars
|
|
|
5
5
|
attr_accessor :_ldf
|
|
6
6
|
|
|
7
7
|
# Create a new LazyFrame.
|
|
8
|
-
def initialize(data = nil, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length:
|
|
8
|
+
def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, orient: nil, infer_schema_length: N_INFER_DEFAULT, nan_to_null: false)
|
|
9
9
|
self._ldf = (
|
|
10
10
|
DataFrame.new(
|
|
11
11
|
data,
|
|
12
12
|
schema: schema,
|
|
13
13
|
schema_overrides: schema_overrides,
|
|
14
|
+
strict: strict,
|
|
14
15
|
orient: orient,
|
|
15
16
|
infer_schema_length: infer_schema_length,
|
|
16
17
|
nan_to_null: nan_to_null
|
|
@@ -27,25 +28,16 @@ module Polars
|
|
|
27
28
|
ldf
|
|
28
29
|
end
|
|
29
30
|
|
|
30
|
-
# Read a logical plan from a JSON file to construct a LazyFrame.
|
|
31
|
-
#
|
|
32
|
-
# @param file [String]
|
|
33
|
-
# Path to a file or a file-like object.
|
|
34
|
-
#
|
|
35
|
-
# @return [LazyFrame]
|
|
36
|
-
def self.read_json(file)
|
|
37
|
-
if Utils.pathlike?(file)
|
|
38
|
-
file = Utils.normalize_filepath(file)
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
Utils.wrap_ldf(RbLazyFrame.deserialize_json(file))
|
|
42
|
-
end
|
|
43
|
-
|
|
44
31
|
# Read a logical plan from a file to construct a LazyFrame.
|
|
45
32
|
#
|
|
46
33
|
# @param source [Object]
|
|
47
34
|
# Path to a file or a file-like object (by file-like object, we refer to
|
|
48
35
|
# objects that have a `read` method, such as a file handler or `StringIO`).
|
|
36
|
+
# @param format ['binary', 'json']
|
|
37
|
+
# The format with which the LazyFrame was serialized. Options:
|
|
38
|
+
#
|
|
39
|
+
# - `"binary"`: Deserialize from binary format (bytes). This is the default.
|
|
40
|
+
# - `"json"`: Deserialize from JSON format (string).
|
|
49
41
|
#
|
|
50
42
|
# @return [LazyFrame]
|
|
51
43
|
#
|
|
@@ -71,14 +63,20 @@ module Polars
|
|
|
71
63
|
# # ╞═════╡
|
|
72
64
|
# # │ 6 │
|
|
73
65
|
# # └─────┘
|
|
74
|
-
def self.deserialize(source)
|
|
75
|
-
raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
|
|
76
|
-
|
|
66
|
+
def self.deserialize(source, format: "binary")
|
|
77
67
|
if Utils.pathlike?(source)
|
|
78
68
|
source = Utils.normalize_filepath(source)
|
|
79
69
|
end
|
|
80
70
|
|
|
81
|
-
|
|
71
|
+
if format == "binary"
|
|
72
|
+
raise Todo unless RbLazyFrame.respond_to?(:deserialize_binary)
|
|
73
|
+
deserializer = RbLazyFrame.method(:deserialize_binary)
|
|
74
|
+
elsif format == "json"
|
|
75
|
+
deserializer = RbLazyFrame.method(:deserialize_json)
|
|
76
|
+
else
|
|
77
|
+
msg = "`format` must be one of {{'binary', 'json'}}, got #{format.inspect}"
|
|
78
|
+
raise ArgumentError, msg
|
|
79
|
+
end
|
|
82
80
|
|
|
83
81
|
_from_rbldf(deserializer.(source))
|
|
84
82
|
end
|
|
@@ -170,31 +168,22 @@ module Polars
|
|
|
170
168
|
# @return [String]
|
|
171
169
|
def to_s
|
|
172
170
|
<<~EOS
|
|
173
|
-
naive plan: (run LazyFrame#
|
|
171
|
+
naive plan: (run LazyFrame#explain(optimized: true) to see the optimized plan)
|
|
174
172
|
|
|
175
|
-
#{
|
|
173
|
+
#{explain(optimized: false)}
|
|
176
174
|
EOS
|
|
177
175
|
end
|
|
178
176
|
|
|
179
|
-
# Write the logical plan of this LazyFrame to a file or string in JSON format.
|
|
180
|
-
#
|
|
181
|
-
# @param file [String]
|
|
182
|
-
# File path to which the result should be written.
|
|
183
|
-
#
|
|
184
|
-
# @return [nil]
|
|
185
|
-
def write_json(file)
|
|
186
|
-
if Utils.pathlike?(file)
|
|
187
|
-
file = Utils.normalize_filepath(file)
|
|
188
|
-
end
|
|
189
|
-
_ldf.write_json(file)
|
|
190
|
-
nil
|
|
191
|
-
end
|
|
192
|
-
|
|
193
177
|
# Serialize the logical plan of this LazyFrame to a file or string.
|
|
194
178
|
#
|
|
195
179
|
# @param file [Object]
|
|
196
180
|
# File path to which the result should be written. If set to `nil`
|
|
197
181
|
# (default), the output is returned as a string instead.
|
|
182
|
+
# @param format ['binary', 'json']
|
|
183
|
+
# The format in which to serialize. Options:
|
|
184
|
+
#
|
|
185
|
+
# - `"binary"`: Serialize to binary format (bytes). This is the default.
|
|
186
|
+
# - `"json"`: Serialize to JSON format (string) (deprecated).
|
|
198
187
|
#
|
|
199
188
|
# @return [Object]
|
|
200
189
|
#
|
|
@@ -215,16 +204,25 @@ module Polars
|
|
|
215
204
|
# # ╞═════╡
|
|
216
205
|
# # │ 6 │
|
|
217
206
|
# # └─────┘
|
|
218
|
-
def serialize(file = nil)
|
|
219
|
-
|
|
207
|
+
def serialize(file = nil, format: "binary")
|
|
208
|
+
if format == "binary"
|
|
209
|
+
raise Todo unless _ldf.respond_to?(:serialize_binary)
|
|
210
|
+
serializer = _ldf.method(:serialize_binary)
|
|
211
|
+
elsif format == "json"
|
|
212
|
+
msg = "'json' serialization format of LazyFrame is deprecated"
|
|
213
|
+
warn msg
|
|
214
|
+
serializer = _ldf.method(:serialize_json)
|
|
215
|
+
else
|
|
216
|
+
msg = "`format` must be one of {{'binary', 'json'}}, got #{format.inspect}"
|
|
217
|
+
raise ArgumentError, msg
|
|
218
|
+
end
|
|
220
219
|
|
|
221
|
-
serializer = _ldf.method(:serialize_binary)
|
|
222
220
|
Utils.serialize_polars_object(serializer, file)
|
|
223
221
|
end
|
|
224
222
|
|
|
225
223
|
# Offers a structured way to apply a sequence of user-defined functions (UDFs).
|
|
226
224
|
#
|
|
227
|
-
# @param
|
|
225
|
+
# @param function [Object]
|
|
228
226
|
# Callable; will receive the frame as the first parameter,
|
|
229
227
|
# followed by any given args/kwargs.
|
|
230
228
|
# @param args [Object]
|
|
@@ -236,7 +234,7 @@ module Polars
|
|
|
236
234
|
#
|
|
237
235
|
# @example
|
|
238
236
|
# cast_str_to_int = lambda do |data, col_name:|
|
|
239
|
-
# data.
|
|
237
|
+
# data.with_columns(Polars.col(col_name).cast(Polars::Int64))
|
|
240
238
|
# end
|
|
241
239
|
#
|
|
242
240
|
# df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]}).lazy
|
|
@@ -253,47 +251,344 @@ module Polars
|
|
|
253
251
|
# # │ 3 ┆ 30 │
|
|
254
252
|
# # │ 4 ┆ 40 │
|
|
255
253
|
# # └─────┴─────┘
|
|
256
|
-
def pipe(
|
|
257
|
-
|
|
254
|
+
def pipe(function, *args, **kwargs, &block)
|
|
255
|
+
function.(self, *args, **kwargs, &block)
|
|
258
256
|
end
|
|
259
257
|
|
|
260
|
-
#
|
|
258
|
+
# Creates a summary of statistics for a LazyFrame, returning a DataFrame.
|
|
261
259
|
#
|
|
262
|
-
# @
|
|
263
|
-
|
|
264
|
-
|
|
260
|
+
# @param percentiles [Array]
|
|
261
|
+
# One or more percentiles to include in the summary statistics.
|
|
262
|
+
# All values must be in the range `[0, 1]`.
|
|
263
|
+
# @param interpolation ['nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable']
|
|
264
|
+
# Interpolation method used when calculating percentiles.
|
|
265
|
+
#
|
|
266
|
+
# @return [DataFrame]
|
|
267
|
+
#
|
|
268
|
+
# @note
|
|
269
|
+
# The median is included by default as the 50% percentile.
|
|
270
|
+
#
|
|
271
|
+
# @note
|
|
272
|
+
# This method does *not* maintain the laziness of the frame, and will `collect`
|
|
273
|
+
# the final result. This could potentially be an expensive operation.
|
|
274
|
+
#
|
|
275
|
+
# @note
|
|
276
|
+
# We do not guarantee the output of `describe` to be stable. It will show
|
|
277
|
+
# statistics that we deem informative, and may be updated in the future.
|
|
278
|
+
# Using `describe` programmatically (versus interactive exploration) is
|
|
279
|
+
# not recommended for this reason.
|
|
280
|
+
#
|
|
281
|
+
# @example Show default frame statistics:
|
|
282
|
+
# lf = Polars::LazyFrame.new(
|
|
283
|
+
# {
|
|
284
|
+
# "float" => [1.0, 2.8, 3.0],
|
|
285
|
+
# "int" => [40, 50, nil],
|
|
286
|
+
# "bool" => [true, false, true],
|
|
287
|
+
# "str" => ["zz", "xx", "yy"],
|
|
288
|
+
# "date" => [Date.new(2020, 1, 1), Date.new(2021, 7, 5), Date.new(2022, 12, 31)]
|
|
289
|
+
# }
|
|
290
|
+
# )
|
|
291
|
+
# lf.describe
|
|
292
|
+
# # =>
|
|
293
|
+
# # shape: (9, 6)
|
|
294
|
+
# # ┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────────┐
|
|
295
|
+
# # │ statistic ┆ float ┆ int ┆ bool ┆ str ┆ date │
|
|
296
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
297
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
|
|
298
|
+
# # ╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════════╡
|
|
299
|
+
# # │ count ┆ 3.0 ┆ 2.0 ┆ 3.0 ┆ 3 ┆ 3 │
|
|
300
|
+
# # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 0 ┆ 0 │
|
|
301
|
+
# # │ mean ┆ 2.266667 ┆ 45.0 ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 UTC │
|
|
302
|
+
# # │ std ┆ 1.101514 ┆ 7.071068 ┆ null ┆ null ┆ null │
|
|
303
|
+
# # │ min ┆ 1.0 ┆ 40.0 ┆ 0.0 ┆ xx ┆ 2020-01-01 │
|
|
304
|
+
# # │ 25% ┆ 2.8 ┆ 40.0 ┆ null ┆ null ┆ 2021-07-05 │
|
|
305
|
+
# # │ 50% ┆ 2.8 ┆ 50.0 ┆ null ┆ null ┆ 2021-07-05 │
|
|
306
|
+
# # │ 75% ┆ 3.0 ┆ 50.0 ┆ null ┆ null ┆ 2022-12-31 │
|
|
307
|
+
# # │ max ┆ 3.0 ┆ 50.0 ┆ 1.0 ┆ zz ┆ 2022-12-31 │
|
|
308
|
+
# # └────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────────┘
|
|
309
|
+
#
|
|
310
|
+
# @example Customize which percentiles are displayed, applying linear interpolation:
|
|
311
|
+
# lf.describe(
|
|
312
|
+
# percentiles: [0.1, 0.3, 0.5, 0.7, 0.9],
|
|
313
|
+
# interpolation: "linear"
|
|
314
|
+
# )
|
|
315
|
+
# # =>
|
|
316
|
+
# # shape: (11, 6)
|
|
317
|
+
# # ┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────────┐
|
|
318
|
+
# # │ statistic ┆ float ┆ int ┆ bool ┆ str ┆ date │
|
|
319
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
320
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
|
|
321
|
+
# # ╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════════╡
|
|
322
|
+
# # │ count ┆ 3.0 ┆ 2.0 ┆ 3.0 ┆ 3 ┆ 3 │
|
|
323
|
+
# # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 0 ┆ 0 │
|
|
324
|
+
# # │ mean ┆ 2.266667 ┆ 45.0 ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 UTC │
|
|
325
|
+
# # │ std ┆ 1.101514 ┆ 7.071068 ┆ null ┆ null ┆ null │
|
|
326
|
+
# # │ min ┆ 1.0 ┆ 40.0 ┆ 0.0 ┆ xx ┆ 2020-01-01 │
|
|
327
|
+
# # │ … ┆ … ┆ … ┆ … ┆ … ┆ … │
|
|
328
|
+
# # │ 30% ┆ 2.08 ┆ 43.0 ┆ null ┆ null ┆ 2020-11-26 │
|
|
329
|
+
# # │ 50% ┆ 2.8 ┆ 45.0 ┆ null ┆ null ┆ 2021-07-05 │
|
|
330
|
+
# # │ 70% ┆ 2.88 ┆ 47.0 ┆ null ┆ null ┆ 2022-02-07 │
|
|
331
|
+
# # │ 90% ┆ 2.96 ┆ 49.0 ┆ null ┆ null ┆ 2022-09-13 │
|
|
332
|
+
# # │ max ┆ 3.0 ┆ 50.0 ┆ 1.0 ┆ zz ┆ 2022-12-31 │
|
|
333
|
+
# # └────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────────┘
|
|
334
|
+
def describe(
|
|
335
|
+
percentiles: [0.25, 0.5, 0.75],
|
|
336
|
+
interpolation: "nearest"
|
|
337
|
+
)
|
|
338
|
+
schema = collect_schema.to_h
|
|
339
|
+
|
|
340
|
+
if schema.empty?
|
|
341
|
+
msg = "cannot describe a LazyFrame that has no columns"
|
|
342
|
+
raise TypeError, msg
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# create list of metrics
|
|
346
|
+
metrics = ["count", "null_count", "mean", "std", "min"]
|
|
347
|
+
if (quantiles = Utils.parse_percentiles(percentiles)).any?
|
|
348
|
+
metrics.concat(quantiles.map { |q| "%g%%" % [q * 100] })
|
|
349
|
+
end
|
|
350
|
+
metrics.append("max")
|
|
351
|
+
|
|
352
|
+
skip_minmax = lambda do |dt|
|
|
353
|
+
dt.nested? || [Categorical, Enum, Null, Object, Unknown].include?(dt)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# determine which columns will produce std/mean/percentile/etc
|
|
357
|
+
# statistics in a single pass over the frame schema
|
|
358
|
+
has_numeric_result, sort_cols = Set.new, Set.new
|
|
359
|
+
metric_exprs = []
|
|
360
|
+
null = F.lit(nil)
|
|
361
|
+
|
|
362
|
+
schema.each do |c, dtype|
|
|
363
|
+
is_numeric = dtype.numeric?
|
|
364
|
+
is_temporal = !is_numeric && dtype.temporal?
|
|
365
|
+
|
|
366
|
+
# counts
|
|
367
|
+
count_exprs = [
|
|
368
|
+
F.col(c).count.name.prefix("count:"),
|
|
369
|
+
F.col(c).null_count.name.prefix("null_count:")
|
|
370
|
+
]
|
|
371
|
+
# mean
|
|
372
|
+
mean_expr =
|
|
373
|
+
if is_temporal || is_numeric || dtype == Boolean
|
|
374
|
+
F.col(c).mean
|
|
375
|
+
else
|
|
376
|
+
null
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# standard deviation, min, max
|
|
380
|
+
expr_std = is_numeric ? F.col(c).std : null
|
|
381
|
+
min_expr = !skip_minmax.(dtype) ? F.col(c).min : null
|
|
382
|
+
max_expr = !skip_minmax.(dtype) ? F.col(c).max : null
|
|
383
|
+
|
|
384
|
+
# percentiles
|
|
385
|
+
pct_exprs = []
|
|
386
|
+
quantiles.each do |p|
|
|
387
|
+
if is_numeric || is_temporal
|
|
388
|
+
pct_expr =
|
|
389
|
+
if is_temporal
|
|
390
|
+
F.col(c).to_physical.quantile(p, interpolation: interpolation).cast(dtype)
|
|
391
|
+
else
|
|
392
|
+
F.col(c).quantile(p, interpolation: interpolation)
|
|
393
|
+
end
|
|
394
|
+
sort_cols.add(c)
|
|
395
|
+
else
|
|
396
|
+
pct_expr = null
|
|
397
|
+
end
|
|
398
|
+
pct_exprs << pct_expr.alias("#{p}:#{c}")
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
if is_numeric || dtype.nested? || [Null, Boolean].include?(dtype)
|
|
402
|
+
has_numeric_result.add(c)
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# add column expressions (in end-state 'metrics' list order)
|
|
406
|
+
metric_exprs.concat(
|
|
407
|
+
[
|
|
408
|
+
*count_exprs,
|
|
409
|
+
mean_expr.alias("mean:#{c}"),
|
|
410
|
+
expr_std.alias("std:#{c}"),
|
|
411
|
+
min_expr.alias("min:#{c}"),
|
|
412
|
+
*pct_exprs,
|
|
413
|
+
max_expr.alias("max:#{c}")
|
|
414
|
+
]
|
|
415
|
+
)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# calculate requested metrics in parallel, then collect the result
|
|
419
|
+
df_metrics = (
|
|
420
|
+
(
|
|
421
|
+
# if more than one quantile, sort the relevant columns to make them O(1)
|
|
422
|
+
# TODO: drop sort once we have efficient retrieval of multiple quantiles
|
|
423
|
+
sort_cols ? with_columns(sort_cols.map { |c| F.col(c).sort }) : self
|
|
424
|
+
)
|
|
425
|
+
.select(*metric_exprs)
|
|
426
|
+
.collect
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# reshape wide result
|
|
430
|
+
n_metrics = metrics.length
|
|
431
|
+
column_metrics =
|
|
432
|
+
schema.length.times.map do |n|
|
|
433
|
+
df_metrics.row(0)[(n * n_metrics)...((n + 1) * n_metrics)]
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
summary = schema.keys.zip(column_metrics).to_h
|
|
437
|
+
|
|
438
|
+
# cast by column type (numeric/bool -> float), (other -> string)
|
|
439
|
+
schema.each_key do |c|
|
|
440
|
+
summary[c] =
|
|
441
|
+
summary[c].map do |v|
|
|
442
|
+
if v.nil? || v.is_a?(Hash)
|
|
443
|
+
nil
|
|
444
|
+
else
|
|
445
|
+
if has_numeric_result.include?(c)
|
|
446
|
+
if v == true
|
|
447
|
+
1.0
|
|
448
|
+
elsif v == false
|
|
449
|
+
0.0
|
|
450
|
+
else
|
|
451
|
+
v.to_f
|
|
452
|
+
end
|
|
453
|
+
else
|
|
454
|
+
"#{v}"
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# return results as a DataFrame
|
|
461
|
+
df_summary = Polars.from_hash(summary)
|
|
462
|
+
df_summary.insert_column(0, Polars::Series.new("statistic", metrics))
|
|
463
|
+
df_summary
|
|
265
464
|
end
|
|
266
465
|
|
|
267
|
-
# Create a string representation of the
|
|
466
|
+
# Create a string representation of the query plan.
|
|
467
|
+
#
|
|
468
|
+
# Different optimizations can be turned on or off.
|
|
268
469
|
#
|
|
269
470
|
# @return [String]
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
471
|
+
#
|
|
472
|
+
# @example
|
|
473
|
+
# lf = Polars::LazyFrame.new(
|
|
474
|
+
# {
|
|
475
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
|
476
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
|
477
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
|
478
|
+
# }
|
|
479
|
+
# )
|
|
480
|
+
# lf.group_by("a", maintain_order: true).agg(Polars.all.sum).sort(
|
|
481
|
+
# "a"
|
|
482
|
+
# ).explain
|
|
483
|
+
def explain(
|
|
484
|
+
format: "plain",
|
|
485
|
+
optimized: true,
|
|
486
|
+
engine: "auto",
|
|
487
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
279
488
|
)
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
slice_pushdown,
|
|
286
|
-
common_subplan_elimination,
|
|
287
|
-
comm_subexpr_elim,
|
|
288
|
-
allow_streaming,
|
|
289
|
-
false
|
|
290
|
-
)
|
|
489
|
+
engine = _select_engine(engine)
|
|
490
|
+
|
|
491
|
+
if engine == "streaming"
|
|
492
|
+
Utils.issue_unstable_warning("streaming mode is considered unstable.")
|
|
493
|
+
end
|
|
291
494
|
|
|
292
|
-
|
|
495
|
+
if optimized
|
|
496
|
+
ldf = _ldf.with_optimizations(optimizations._rboptflags)
|
|
497
|
+
if format == "tree"
|
|
498
|
+
return ldf.describe_optimized_plan_tree
|
|
499
|
+
else
|
|
500
|
+
return ldf.describe_optimized_plan
|
|
501
|
+
end
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
if format == "tree"
|
|
505
|
+
_ldf.describe_plan_tree
|
|
506
|
+
else
|
|
507
|
+
_ldf.describe_plan
|
|
508
|
+
end
|
|
293
509
|
end
|
|
294
510
|
|
|
295
|
-
#
|
|
296
|
-
#
|
|
511
|
+
# Show a plot of the query plan.
|
|
512
|
+
#
|
|
513
|
+
# Note that Graphviz must be installed to render the visualization (if not
|
|
514
|
+
# already present, you can download it here: https://graphviz.org/download.
|
|
515
|
+
#
|
|
516
|
+
# @param optimized [Boolean]
|
|
517
|
+
# Optimize the query plan.
|
|
518
|
+
# @param show [Boolean]
|
|
519
|
+
# Show the figure.
|
|
520
|
+
# @param output_path [String]
|
|
521
|
+
# Write the figure to disk.
|
|
522
|
+
# @param raw_output [Boolean]
|
|
523
|
+
# Return dot syntax. This cannot be combined with `show` and/or `output_path`.
|
|
524
|
+
# @param engine [String]
|
|
525
|
+
# Select the engine used to process the query, optional.
|
|
526
|
+
# At the moment, if set to `"auto"` (default), the query
|
|
527
|
+
# is run using the polars in-memory engine. Polars will also
|
|
528
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
529
|
+
# environment variable. If it cannot run the query using the
|
|
530
|
+
# selected engine, the query is run using the polars in-memory
|
|
531
|
+
# engine.
|
|
532
|
+
# @param plan_stage ['ir', 'physical']
|
|
533
|
+
# Select the stage to display. Currently only the streaming engine has a
|
|
534
|
+
# separate physical stage, for the other engines both IR and physical are the
|
|
535
|
+
# same.
|
|
536
|
+
# @param optimizations [Object]
|
|
537
|
+
# The set of the optimizations considered during query optimization.
|
|
538
|
+
#
|
|
539
|
+
# @return [Object]
|
|
540
|
+
#
|
|
541
|
+
# @example
|
|
542
|
+
# lf = Polars::LazyFrame.new(
|
|
543
|
+
# {
|
|
544
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
|
545
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
|
546
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
|
547
|
+
# }
|
|
548
|
+
# )
|
|
549
|
+
# lf.group_by("a", maintain_order: true).agg(Polars.all.sum).sort(
|
|
550
|
+
# "a"
|
|
551
|
+
# ).show_graph
|
|
552
|
+
def show_graph(
|
|
553
|
+
optimized: true,
|
|
554
|
+
show: true,
|
|
555
|
+
output_path: nil,
|
|
556
|
+
raw_output: false,
|
|
557
|
+
figsize: [16.0, 12.0],
|
|
558
|
+
engine: "auto",
|
|
559
|
+
plan_stage: "ir",
|
|
560
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
561
|
+
)
|
|
562
|
+
engine = _select_engine(engine)
|
|
563
|
+
|
|
564
|
+
if engine == "streaming"
|
|
565
|
+
issue_unstable_warning("streaming mode is considered unstable.")
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
optimizations = optimizations.dup
|
|
569
|
+
optimizations._rboptflags.streaming = engine == "streaming"
|
|
570
|
+
_ldf = self._ldf.with_optimizations(optimizations._rboptflags)
|
|
571
|
+
|
|
572
|
+
if plan_stage == "ir"
|
|
573
|
+
dot = _ldf.to_dot(optimized)
|
|
574
|
+
elsif plan_stage == "physical"
|
|
575
|
+
if engine == "streaming"
|
|
576
|
+
dot = _ldf.to_dot_streaming_phys(optimized)
|
|
577
|
+
else
|
|
578
|
+
dot = _ldf.to_dot(optimized)
|
|
579
|
+
end
|
|
580
|
+
else
|
|
581
|
+
error_msg = "invalid plan stage '#{plan_stage}'"
|
|
582
|
+
raise TypeError, error_msg
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
Utils.display_dot_graph(
|
|
586
|
+
dot: dot,
|
|
587
|
+
show: show,
|
|
588
|
+
output_path: output_path,
|
|
589
|
+
raw_output: raw_output
|
|
590
|
+
)
|
|
591
|
+
end
|
|
297
592
|
|
|
298
593
|
# Sort the DataFrame.
|
|
299
594
|
#
|
|
@@ -307,7 +602,7 @@ module Polars
|
|
|
307
602
|
# Column (expressions) to sort by.
|
|
308
603
|
# @param more_by [Array]
|
|
309
604
|
# Additional columns to sort by, specified as positional arguments.
|
|
310
|
-
# @param
|
|
605
|
+
# @param descending [Boolean]
|
|
311
606
|
# Sort in descending order.
|
|
312
607
|
# @param nulls_last [Boolean]
|
|
313
608
|
# Place null values last. Can only be used if sorted by a single column.
|
|
@@ -328,7 +623,7 @@ module Polars
|
|
|
328
623
|
# "ham" => ["a", "b", "c"]
|
|
329
624
|
# }
|
|
330
625
|
# ).lazy
|
|
331
|
-
# df.sort("foo",
|
|
626
|
+
# df.sort("foo", descending: true).collect
|
|
332
627
|
# # =>
|
|
333
628
|
# # shape: (3, 3)
|
|
334
629
|
# # ┌─────┬─────┬─────┐
|
|
@@ -340,21 +635,21 @@ module Polars
|
|
|
340
635
|
# # │ 2 ┆ 7.0 ┆ b │
|
|
341
636
|
# # │ 1 ┆ 6.0 ┆ a │
|
|
342
637
|
# # └─────┴─────┴─────┘
|
|
343
|
-
def sort(by, *more_by,
|
|
638
|
+
def sort(by, *more_by, descending: false, nulls_last: false, maintain_order: false, multithreaded: true)
|
|
344
639
|
if by.is_a?(::String) && more_by.empty?
|
|
345
640
|
return _from_rbldf(
|
|
346
641
|
_ldf.sort(
|
|
347
|
-
by,
|
|
642
|
+
by, descending, nulls_last, maintain_order, multithreaded
|
|
348
643
|
)
|
|
349
644
|
)
|
|
350
645
|
end
|
|
351
646
|
|
|
352
647
|
by = Utils.parse_into_list_of_expressions(by, *more_by)
|
|
353
|
-
|
|
648
|
+
descending = Utils.extend_bool(descending, by.length, "descending", "by")
|
|
354
649
|
nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
|
|
355
650
|
_from_rbldf(
|
|
356
651
|
_ldf.sort_by_exprs(
|
|
357
|
-
by,
|
|
652
|
+
by, descending, nulls_last, maintain_order, multithreaded
|
|
358
653
|
)
|
|
359
654
|
)
|
|
360
655
|
end
|
|
@@ -440,7 +735,7 @@ module Polars
|
|
|
440
735
|
# Accepts expression input. Strings are parsed as column names.
|
|
441
736
|
# @param reverse [Object]
|
|
442
737
|
# Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
|
|
443
|
-
# largest). This can be specified per column by passing
|
|
738
|
+
# largest). This can be specified per column by passing an array of
|
|
444
739
|
# booleans.
|
|
445
740
|
#
|
|
446
741
|
# @return [LazyFrame]
|
|
@@ -504,7 +799,7 @@ module Polars
|
|
|
504
799
|
# Accepts expression input. Strings are parsed as column names.
|
|
505
800
|
# @param reverse [Object]
|
|
506
801
|
# Consider the `k` largest elements of the `by` column(s) (instead of the `k`
|
|
507
|
-
# smallest). This can be specified per column by passing
|
|
802
|
+
# smallest). This can be specified per column by passing an array of
|
|
508
803
|
# booleans.
|
|
509
804
|
#
|
|
510
805
|
# @return [LazyFrame]
|
|
@@ -554,35 +849,90 @@ module Polars
|
|
|
554
849
|
_from_rbldf(_ldf.bottom_k(k, by, reverse))
|
|
555
850
|
end
|
|
556
851
|
|
|
557
|
-
#
|
|
558
|
-
#
|
|
852
|
+
# Profile a LazyFrame.
|
|
853
|
+
#
|
|
854
|
+
# This will run the query and return a tuple
|
|
855
|
+
# containing the materialized DataFrame and a DataFrame that
|
|
856
|
+
# contains profiling information of each node that is executed.
|
|
857
|
+
#
|
|
858
|
+
# The units of the timings are microseconds.
|
|
859
|
+
#
|
|
860
|
+
# @param engine [String]
|
|
861
|
+
# Select the engine used to process the query, optional.
|
|
862
|
+
# At the moment, if set to `"auto"` (default), the query
|
|
863
|
+
# is run using the polars in-memory engine. Polars will also
|
|
864
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
865
|
+
# environment variable. If it cannot run the query using the
|
|
866
|
+
# selected engine, the query is run using the polars in-memory
|
|
867
|
+
# engine.
|
|
868
|
+
# @param optimizations [Object]
|
|
869
|
+
# The optimization passes done during query optimization.
|
|
870
|
+
#
|
|
871
|
+
# @return [Array]
|
|
872
|
+
#
|
|
873
|
+
# @example
|
|
874
|
+
# lf = Polars::LazyFrame.new(
|
|
875
|
+
# {
|
|
876
|
+
# "a" => ["a", "b", "a", "b", "b", "c"],
|
|
877
|
+
# "b" => [1, 2, 3, 4, 5, 6],
|
|
878
|
+
# "c" => [6, 5, 4, 3, 2, 1]
|
|
879
|
+
# }
|
|
880
|
+
# )
|
|
881
|
+
# lf.group_by("a", maintain_order: true).agg(Polars.all.sum).sort(
|
|
882
|
+
# "a"
|
|
883
|
+
# ).profile
|
|
884
|
+
# # =>
|
|
885
|
+
# # [shape: (3, 3)
|
|
886
|
+
# # ┌─────┬─────┬─────┐
|
|
887
|
+
# # │ a ┆ b ┆ c │
|
|
888
|
+
# # │ --- ┆ --- ┆ --- │
|
|
889
|
+
# # │ str ┆ i64 ┆ i64 │
|
|
890
|
+
# # ╞═════╪═════╪═════╡
|
|
891
|
+
# # │ a ┆ 4 ┆ 10 │
|
|
892
|
+
# # │ b ┆ 11 ┆ 10 │
|
|
893
|
+
# # │ c ┆ 6 ┆ 1 │
|
|
894
|
+
# # └─────┴─────┴─────┘,
|
|
895
|
+
# # shape: (3, 3)
|
|
896
|
+
# # ┌──────────────┬───────┬─────┐
|
|
897
|
+
# # │ node ┆ start ┆ end │
|
|
898
|
+
# # │ --- ┆ --- ┆ --- │
|
|
899
|
+
# # │ str ┆ u64 ┆ u64 │
|
|
900
|
+
# # ╞══════════════╪═══════╪═════╡
|
|
901
|
+
# # │ optimization ┆ 0 ┆ 67 │
|
|
902
|
+
# # │ sort(a) ┆ 67 ┆ 79 │
|
|
903
|
+
# # └──────────────┴───────┴─────┘]
|
|
904
|
+
def profile(
|
|
905
|
+
engine: "auto",
|
|
906
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
907
|
+
)
|
|
908
|
+
engine = _select_engine(engine)
|
|
909
|
+
|
|
910
|
+
ldf = _ldf.with_optimizations(optimizations._rboptflags)
|
|
911
|
+
|
|
912
|
+
df_rb, timings_rb = ldf.profile
|
|
913
|
+
[Utils.wrap_df(df_rb), Utils.wrap_df(timings_rb)]
|
|
914
|
+
end
|
|
559
915
|
|
|
560
|
-
#
|
|
561
|
-
#
|
|
562
|
-
#
|
|
563
|
-
#
|
|
564
|
-
#
|
|
565
|
-
# @param
|
|
566
|
-
#
|
|
567
|
-
#
|
|
568
|
-
#
|
|
569
|
-
#
|
|
570
|
-
#
|
|
571
|
-
#
|
|
572
|
-
#
|
|
573
|
-
# @param
|
|
574
|
-
#
|
|
575
|
-
#
|
|
576
|
-
# @param
|
|
577
|
-
#
|
|
578
|
-
#
|
|
579
|
-
#
|
|
580
|
-
# @param common_subplan_elimination [Boolean]
|
|
581
|
-
# Will try to cache branching subplans that occur on self-joins or unions.
|
|
582
|
-
# @param comm_subexpr_elim [Boolean]
|
|
583
|
-
# Common subexpressions will be cached and reused.
|
|
584
|
-
# @param allow_streaming [Boolean]
|
|
585
|
-
# Run parts of the query in a streaming fashion (this is in an alpha state)
|
|
916
|
+
# Materialize this LazyFrame into a DataFrame.
|
|
917
|
+
#
|
|
918
|
+
# By default, all query optimizations are enabled. Individual optimizations may
|
|
919
|
+
# be disabled by setting the corresponding parameter to `false`.
|
|
920
|
+
#
|
|
921
|
+
# @param engine
|
|
922
|
+
# Select the engine used to process the query, optional.
|
|
923
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
924
|
+
# using the polars streaming engine. Polars will also
|
|
925
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
926
|
+
# environment variable. If it cannot run the query using the
|
|
927
|
+
# selected engine, the query is run using the polars streaming
|
|
928
|
+
# engine.
|
|
929
|
+
# @param background [Boolean]
|
|
930
|
+
# Run the query in the background and get a handle to the query.
|
|
931
|
+
# This handle can be used to fetch the result or cancel the query.
|
|
932
|
+
# @param optimizations
|
|
933
|
+
# The optimization passes done during query optimization.
|
|
934
|
+
#
|
|
935
|
+
# This has no effect if `lazy` is set to `true`.
|
|
586
936
|
#
|
|
587
937
|
# @return [DataFrame]
|
|
588
938
|
#
|
|
@@ -607,42 +957,23 @@ module Polars
|
|
|
607
957
|
# # │ c ┆ 6 ┆ 1 │
|
|
608
958
|
# # └─────┴─────┴─────┘
|
|
609
959
|
def collect(
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
simplify_expression: true,
|
|
614
|
-
string_cache: false,
|
|
615
|
-
no_optimization: false,
|
|
616
|
-
slice_pushdown: true,
|
|
617
|
-
common_subplan_elimination: true,
|
|
618
|
-
comm_subexpr_elim: true,
|
|
619
|
-
allow_streaming: false,
|
|
620
|
-
_eager: false
|
|
960
|
+
engine: "auto",
|
|
961
|
+
background: false,
|
|
962
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
621
963
|
)
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
common_subplan_elimination = false
|
|
627
|
-
comm_subexpr_elim = false
|
|
964
|
+
engine = _select_engine(engine)
|
|
965
|
+
|
|
966
|
+
if engine == "streaming"
|
|
967
|
+
Utils.issue_unstable_warning("streaming mode is considered unstable.")
|
|
628
968
|
end
|
|
629
969
|
|
|
630
|
-
|
|
631
|
-
|
|
970
|
+
ldf = _ldf.with_optimizations(optimizations._rboptflags)
|
|
971
|
+
if background
|
|
972
|
+
Utils.issue_unstable_warning("background mode is considered unstable.")
|
|
973
|
+
return InProcessQuery.new(ldf.collect_concurrently)
|
|
632
974
|
end
|
|
633
975
|
|
|
634
|
-
ldf
|
|
635
|
-
type_coercion,
|
|
636
|
-
predicate_pushdown,
|
|
637
|
-
projection_pushdown,
|
|
638
|
-
simplify_expression,
|
|
639
|
-
slice_pushdown,
|
|
640
|
-
common_subplan_elimination,
|
|
641
|
-
comm_subexpr_elim,
|
|
642
|
-
allow_streaming,
|
|
643
|
-
_eager
|
|
644
|
-
)
|
|
645
|
-
Utils.wrap_df(ldf.collect)
|
|
976
|
+
Utils.wrap_df(ldf.collect(engine))
|
|
646
977
|
end
|
|
647
978
|
|
|
648
979
|
# Resolve the schema of this LazyFrame.
|
|
@@ -705,24 +1036,12 @@ module Polars
|
|
|
705
1036
|
# If `nil` (default), the chunks of the `DataFrame` are
|
|
706
1037
|
# used. Writing in smaller chunks may reduce memory pressure and improve
|
|
707
1038
|
# writing speeds.
|
|
708
|
-
# @param
|
|
1039
|
+
# @param data_page_size [Integer]
|
|
709
1040
|
# Size limit of individual data pages.
|
|
710
1041
|
# If not set defaults to 1024 * 1024 bytes
|
|
711
1042
|
# @param maintain_order [Boolean]
|
|
712
1043
|
# Maintain the order in which data is processed.
|
|
713
1044
|
# Setting this to `false` will be slightly faster.
|
|
714
|
-
# @param type_coercion [Boolean]
|
|
715
|
-
# Do type coercion optimization.
|
|
716
|
-
# @param predicate_pushdown [Boolean]
|
|
717
|
-
# Do predicate pushdown optimization.
|
|
718
|
-
# @param projection_pushdown [Boolean]
|
|
719
|
-
# Do projection pushdown optimization.
|
|
720
|
-
# @param simplify_expression [Boolean]
|
|
721
|
-
# Run simplify expressions optimization.
|
|
722
|
-
# @param no_optimization [Boolean]
|
|
723
|
-
# Turn off (certain) optimizations.
|
|
724
|
-
# @param slice_pushdown [Boolean]
|
|
725
|
-
# Slice pushdown optimization.
|
|
726
1045
|
# @param storage_options [String]
|
|
727
1046
|
# Options that indicate how to connect to a cloud provider.
|
|
728
1047
|
#
|
|
@@ -748,6 +1067,18 @@ module Polars
|
|
|
748
1067
|
# Recursively create all the directories in the path.
|
|
749
1068
|
# @param lazy [Boolean]
|
|
750
1069
|
# Wait to start execution until `collect` is called.
|
|
1070
|
+
# @param engine
|
|
1071
|
+
# Select the engine used to process the query, optional.
|
|
1072
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
1073
|
+
# using the polars streaming engine. Polars will also
|
|
1074
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
1075
|
+
# environment variable. If it cannot run the query using the
|
|
1076
|
+
# selected engine, the query is run using the polars streaming
|
|
1077
|
+
# engine.
|
|
1078
|
+
# @param optimizations
|
|
1079
|
+
# The optimization passes done during query optimization.
|
|
1080
|
+
#
|
|
1081
|
+
# This has no effect if `lazy` is set to `true`.
|
|
751
1082
|
#
|
|
752
1083
|
# @return [DataFrame]
|
|
753
1084
|
#
|
|
@@ -760,28 +1091,20 @@ module Polars
|
|
|
760
1091
|
compression_level: nil,
|
|
761
1092
|
statistics: true,
|
|
762
1093
|
row_group_size: nil,
|
|
763
|
-
|
|
1094
|
+
data_page_size: nil,
|
|
764
1095
|
maintain_order: true,
|
|
765
|
-
type_coercion: true,
|
|
766
|
-
predicate_pushdown: true,
|
|
767
|
-
projection_pushdown: true,
|
|
768
|
-
simplify_expression: true,
|
|
769
|
-
no_optimization: false,
|
|
770
|
-
slice_pushdown: true,
|
|
771
1096
|
storage_options: nil,
|
|
1097
|
+
credential_provider: "auto",
|
|
772
1098
|
retries: 2,
|
|
773
1099
|
sync_on_close: nil,
|
|
1100
|
+
metadata: nil,
|
|
774
1101
|
mkdir: false,
|
|
775
|
-
lazy: false
|
|
1102
|
+
lazy: false,
|
|
1103
|
+
field_overwrites: nil,
|
|
1104
|
+
engine: "auto",
|
|
1105
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
776
1106
|
)
|
|
777
|
-
|
|
778
|
-
type_coercion: type_coercion,
|
|
779
|
-
predicate_pushdown: predicate_pushdown,
|
|
780
|
-
projection_pushdown: projection_pushdown,
|
|
781
|
-
simplify_expression: simplify_expression,
|
|
782
|
-
slice_pushdown: slice_pushdown,
|
|
783
|
-
no_optimization: no_optimization
|
|
784
|
-
)
|
|
1107
|
+
engine = _select_engine(engine, path)
|
|
785
1108
|
|
|
786
1109
|
if statistics == true
|
|
787
1110
|
statistics = {
|
|
@@ -801,6 +1124,12 @@ module Polars
|
|
|
801
1124
|
}
|
|
802
1125
|
end
|
|
803
1126
|
|
|
1127
|
+
_init_credential_provider_builder = Polars.method(:_init_credential_provider_builder)
|
|
1128
|
+
|
|
1129
|
+
credential_provider_builder = _init_credential_provider_builder.(
|
|
1130
|
+
credential_provider, path, storage_options, "sink_parquet"
|
|
1131
|
+
)
|
|
1132
|
+
|
|
804
1133
|
if storage_options&.any?
|
|
805
1134
|
storage_options = storage_options.to_a
|
|
806
1135
|
else
|
|
@@ -813,24 +1142,33 @@ module Polars
|
|
|
813
1142
|
"mkdir" => mkdir
|
|
814
1143
|
}
|
|
815
1144
|
|
|
816
|
-
|
|
1145
|
+
field_overwrites_dicts = []
|
|
1146
|
+
if !field_overwrites.nil?
|
|
1147
|
+
raise Todo
|
|
1148
|
+
end
|
|
1149
|
+
|
|
1150
|
+
ldf_rb = _ldf.sink_parquet(
|
|
817
1151
|
path,
|
|
818
1152
|
compression,
|
|
819
1153
|
compression_level,
|
|
820
1154
|
statistics,
|
|
821
1155
|
row_group_size,
|
|
822
|
-
|
|
1156
|
+
data_page_size,
|
|
823
1157
|
storage_options,
|
|
1158
|
+
credential_provider_builder,
|
|
824
1159
|
retries,
|
|
825
|
-
sink_options
|
|
1160
|
+
sink_options,
|
|
1161
|
+
metadata,
|
|
1162
|
+
field_overwrites_dicts
|
|
826
1163
|
)
|
|
827
|
-
lf = LazyFrame._from_rbldf(lf)
|
|
828
1164
|
|
|
829
1165
|
if !lazy
|
|
830
|
-
|
|
1166
|
+
ldf_rb = ldf_rb.with_optimizations(optimizations._rboptflags)
|
|
1167
|
+
ldf = LazyFrame._from_rbldf(ldf_rb)
|
|
1168
|
+
ldf.collect(engine: engine)
|
|
831
1169
|
return nil
|
|
832
1170
|
end
|
|
833
|
-
|
|
1171
|
+
LazyFrame._from_rbldf(ldf_rb)
|
|
834
1172
|
end
|
|
835
1173
|
|
|
836
1174
|
# Evaluate the query in streaming mode and write to an IPC file.
|
|
@@ -860,18 +1198,6 @@ module Polars
|
|
|
860
1198
|
# information from environment variables.
|
|
861
1199
|
# @param retries [Integer]
|
|
862
1200
|
# Number of retries if accessing a cloud instance fails.
|
|
863
|
-
# @param type_coercion [Boolean]
|
|
864
|
-
# Do type coercion optimization.
|
|
865
|
-
# @param predicate_pushdown [Boolean]
|
|
866
|
-
# Do predicate pushdown optimization.
|
|
867
|
-
# @param projection_pushdown [Boolean]
|
|
868
|
-
# Do projection pushdown optimization.
|
|
869
|
-
# @param simplify_expression [Boolean]
|
|
870
|
-
# Run simplify expressions optimization.
|
|
871
|
-
# @param slice_pushdown [Boolean]
|
|
872
|
-
# Slice pushdown optimization.
|
|
873
|
-
# @param no_optimization [Boolean]
|
|
874
|
-
# Turn off (certain) optimizations.
|
|
875
1201
|
# @param sync_on_close ['data', 'all']
|
|
876
1202
|
# Sync to disk when before closing a file.
|
|
877
1203
|
#
|
|
@@ -882,6 +1208,18 @@ module Polars
|
|
|
882
1208
|
# Recursively create all the directories in the path.
|
|
883
1209
|
# @param lazy [Boolean]
|
|
884
1210
|
# Wait to start execution until `collect` is called.
|
|
1211
|
+
# @param engine
|
|
1212
|
+
# Select the engine used to process the query, optional.
|
|
1213
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
1214
|
+
# using the polars streaming engine. Polars will also
|
|
1215
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
1216
|
+
# environment variable. If it cannot run the query using the
|
|
1217
|
+
# selected engine, the query is run using the polars streaming
|
|
1218
|
+
# engine.
|
|
1219
|
+
# @param optimizations
|
|
1220
|
+
# The optimization passes done during query optimization.
|
|
1221
|
+
#
|
|
1222
|
+
# This has no effect if `lazy` is set to `true`.
|
|
885
1223
|
#
|
|
886
1224
|
# @return [DataFrame]
|
|
887
1225
|
#
|
|
@@ -890,27 +1228,24 @@ module Polars
|
|
|
890
1228
|
# lf.sink_ipc("out.arrow")
|
|
891
1229
|
def sink_ipc(
|
|
892
1230
|
path,
|
|
893
|
-
compression: "
|
|
1231
|
+
compression: "uncompressed",
|
|
1232
|
+
compat_level: nil,
|
|
894
1233
|
maintain_order: true,
|
|
895
1234
|
storage_options: nil,
|
|
1235
|
+
credential_provider: "auto",
|
|
896
1236
|
retries: 2,
|
|
897
|
-
type_coercion: true,
|
|
898
|
-
predicate_pushdown: true,
|
|
899
|
-
projection_pushdown: true,
|
|
900
|
-
simplify_expression: true,
|
|
901
|
-
slice_pushdown: true,
|
|
902
|
-
no_optimization: false,
|
|
903
1237
|
sync_on_close: nil,
|
|
904
1238
|
mkdir: false,
|
|
905
|
-
lazy: false
|
|
1239
|
+
lazy: false,
|
|
1240
|
+
engine: "auto",
|
|
1241
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
906
1242
|
)
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
no_optimization: no_optimization
|
|
1243
|
+
engine = _select_engine(engine, path)
|
|
1244
|
+
|
|
1245
|
+
_init_credential_provider_builder = Polars.method(:_init_credential_provider_builder)
|
|
1246
|
+
|
|
1247
|
+
credential_provider_builder = _init_credential_provider_builder.(
|
|
1248
|
+
credential_provider, path, storage_options, "sink_ipc"
|
|
914
1249
|
)
|
|
915
1250
|
|
|
916
1251
|
if storage_options&.any?
|
|
@@ -925,20 +1260,34 @@ module Polars
|
|
|
925
1260
|
"mkdir" => mkdir
|
|
926
1261
|
}
|
|
927
1262
|
|
|
928
|
-
|
|
1263
|
+
compat_level_rb = nil
|
|
1264
|
+
if compat_level.nil?
|
|
1265
|
+
compat_level_rb = true
|
|
1266
|
+
else
|
|
1267
|
+
raise Todo
|
|
1268
|
+
end
|
|
1269
|
+
|
|
1270
|
+
if compression.nil?
|
|
1271
|
+
compression = "uncompressed"
|
|
1272
|
+
end
|
|
1273
|
+
|
|
1274
|
+
ldf_rb = _ldf.sink_ipc(
|
|
929
1275
|
path,
|
|
930
1276
|
compression,
|
|
1277
|
+
compat_level_rb,
|
|
931
1278
|
storage_options,
|
|
1279
|
+
credential_provider_builder,
|
|
932
1280
|
retries,
|
|
933
1281
|
sink_options
|
|
934
1282
|
)
|
|
935
|
-
lf = LazyFrame._from_rbldf(lf)
|
|
936
1283
|
|
|
937
1284
|
if !lazy
|
|
938
|
-
|
|
1285
|
+
ldf_rb = ldf_rb.with_optimizations(optimizations._rboptflags)
|
|
1286
|
+
ldf = LazyFrame._from_rbldf(ldf_rb)
|
|
1287
|
+
ldf.collect(engine: engine)
|
|
939
1288
|
return nil
|
|
940
1289
|
end
|
|
941
|
-
|
|
1290
|
+
LazyFrame._from_rbldf(ldf_rb)
|
|
942
1291
|
end
|
|
943
1292
|
|
|
944
1293
|
# Evaluate the query in streaming mode and write to a CSV file.
|
|
@@ -1004,32 +1353,32 @@ module Polars
|
|
|
1004
1353
|
# @param maintain_order [Boolean]
|
|
1005
1354
|
# Maintain the order in which data is processed.
|
|
1006
1355
|
# Setting this to `false` will be slightly faster.
|
|
1007
|
-
# @param type_coercion [Boolean]
|
|
1008
|
-
# Do type coercion optimization.
|
|
1009
|
-
# @param predicate_pushdown [Boolean]
|
|
1010
|
-
# Do predicate pushdown optimization.
|
|
1011
|
-
# @param projection_pushdown [Boolean]
|
|
1012
|
-
# Do projection pushdown optimization.
|
|
1013
|
-
# @param simplify_expression [Boolean]
|
|
1014
|
-
# Run simplify expressions optimization.
|
|
1015
|
-
# @param slice_pushdown [Boolean]
|
|
1016
|
-
# Slice pushdown optimization.
|
|
1017
|
-
# @param no_optimization [Boolean]
|
|
1018
|
-
# Turn off (certain) optimizations.
|
|
1019
1356
|
# @param storage_options [Object]
|
|
1020
1357
|
# Options that indicate how to connect to a cloud provider.
|
|
1021
1358
|
# @param retries [Integer]
|
|
1022
1359
|
# Number of retries if accessing a cloud instance fails.
|
|
1023
1360
|
# @param sync_on_close ['data', 'all']
|
|
1024
|
-
#
|
|
1361
|
+
# Sync to disk when before closing a file.
|
|
1025
1362
|
#
|
|
1026
|
-
#
|
|
1027
|
-
#
|
|
1028
|
-
#
|
|
1363
|
+
# * `nil` does not sync.
|
|
1364
|
+
# * `data` syncs the file contents.
|
|
1365
|
+
# * `all` syncs the file contents and metadata.
|
|
1029
1366
|
# @param mkdir [Boolean]
|
|
1030
|
-
#
|
|
1367
|
+
# Recursively create all the directories in the path.
|
|
1031
1368
|
# @param lazy [Boolean]
|
|
1032
|
-
#
|
|
1369
|
+
# Wait to start execution until `collect` is called.
|
|
1370
|
+
# @param engine
|
|
1371
|
+
# Select the engine used to process the query, optional.
|
|
1372
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
1373
|
+
# using the polars streaming engine. Polars will also
|
|
1374
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
1375
|
+
# environment variable. If it cannot run the query using the
|
|
1376
|
+
# selected engine, the query is run using the polars streaming
|
|
1377
|
+
# engine.
|
|
1378
|
+
# @param optimizations
|
|
1379
|
+
# The optimization passes done during query optimization.
|
|
1380
|
+
#
|
|
1381
|
+
# This has no effect if `lazy` is set to `true`.
|
|
1033
1382
|
#
|
|
1034
1383
|
# @return [DataFrame]
|
|
1035
1384
|
#
|
|
@@ -1053,28 +1402,23 @@ module Polars
|
|
|
1053
1402
|
null_value: nil,
|
|
1054
1403
|
quote_style: nil,
|
|
1055
1404
|
maintain_order: true,
|
|
1056
|
-
type_coercion: true,
|
|
1057
|
-
predicate_pushdown: true,
|
|
1058
|
-
projection_pushdown: true,
|
|
1059
|
-
simplify_expression: true,
|
|
1060
|
-
slice_pushdown: true,
|
|
1061
|
-
no_optimization: false,
|
|
1062
1405
|
storage_options: nil,
|
|
1406
|
+
credential_provider: "auto",
|
|
1063
1407
|
retries: 2,
|
|
1064
1408
|
sync_on_close: nil,
|
|
1065
1409
|
mkdir: false,
|
|
1066
|
-
lazy: false
|
|
1410
|
+
lazy: false,
|
|
1411
|
+
engine: "auto",
|
|
1412
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
1067
1413
|
)
|
|
1068
1414
|
Utils._check_arg_is_1byte("separator", separator, false)
|
|
1069
1415
|
Utils._check_arg_is_1byte("quote_char", quote_char, false)
|
|
1416
|
+
engine = _select_engine(engine, path)
|
|
1070
1417
|
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
simplify_expression: simplify_expression,
|
|
1076
|
-
slice_pushdown: slice_pushdown,
|
|
1077
|
-
no_optimization: no_optimization
|
|
1418
|
+
_init_credential_provider_builder = Polars.method(:_init_credential_provider_builder)
|
|
1419
|
+
|
|
1420
|
+
credential_provider_builder = _init_credential_provider_builder.(
|
|
1421
|
+
credential_provider, path, storage_options, "sink_csv"
|
|
1078
1422
|
)
|
|
1079
1423
|
|
|
1080
1424
|
if storage_options&.any?
|
|
@@ -1089,7 +1433,7 @@ module Polars
|
|
|
1089
1433
|
"mkdir" => mkdir
|
|
1090
1434
|
}
|
|
1091
1435
|
|
|
1092
|
-
|
|
1436
|
+
ldf_rb = _ldf.sink_csv(
|
|
1093
1437
|
path,
|
|
1094
1438
|
include_bom,
|
|
1095
1439
|
include_header,
|
|
@@ -1106,16 +1450,18 @@ module Polars
|
|
|
1106
1450
|
null_value,
|
|
1107
1451
|
quote_style,
|
|
1108
1452
|
storage_options,
|
|
1453
|
+
credential_provider_builder,
|
|
1109
1454
|
retries,
|
|
1110
1455
|
sink_options
|
|
1111
1456
|
)
|
|
1112
|
-
lf = LazyFrame._from_rbldf(lf)
|
|
1113
1457
|
|
|
1114
1458
|
if !lazy
|
|
1115
|
-
|
|
1459
|
+
ldf_rb = ldf_rb.with_optimizations(optimizations._rboptflags)
|
|
1460
|
+
ldf = LazyFrame._from_rbldf(ldf_rb)
|
|
1461
|
+
ldf.collect(engine: engine)
|
|
1116
1462
|
return nil
|
|
1117
1463
|
end
|
|
1118
|
-
|
|
1464
|
+
LazyFrame._from_rbldf(ldf_rb)
|
|
1119
1465
|
end
|
|
1120
1466
|
|
|
1121
1467
|
# Evaluate the query in streaming mode and write to an NDJSON file.
|
|
@@ -1127,18 +1473,6 @@ module Polars
|
|
|
1127
1473
|
# @param maintain_order [Boolean]
|
|
1128
1474
|
# Maintain the order in which data is processed.
|
|
1129
1475
|
# Setting this to `false` will be slightly faster.
|
|
1130
|
-
# @param type_coercion [Boolean]
|
|
1131
|
-
# Do type coercion optimization.
|
|
1132
|
-
# @param predicate_pushdown [Boolean]
|
|
1133
|
-
# Do predicate pushdown optimization.
|
|
1134
|
-
# @param projection_pushdown [Boolean]
|
|
1135
|
-
# Do projection pushdown optimization.
|
|
1136
|
-
# @param simplify_expression [Boolean]
|
|
1137
|
-
# Run simplify expressions optimization.
|
|
1138
|
-
# @param slice_pushdown [Boolean]
|
|
1139
|
-
# Slice pushdown optimization.
|
|
1140
|
-
# @param no_optimization [Boolean]
|
|
1141
|
-
# Turn off (certain) optimizations.
|
|
1142
1476
|
# @param storage_options [String]
|
|
1143
1477
|
# Options that indicate how to connect to a cloud provider.
|
|
1144
1478
|
#
|
|
@@ -1164,6 +1498,18 @@ module Polars
|
|
|
1164
1498
|
# Recursively create all the directories in the path.
|
|
1165
1499
|
# @param lazy [Boolean]
|
|
1166
1500
|
# Wait to start execution until `collect` is called.
|
|
1501
|
+
# @param engine
|
|
1502
|
+
# Select the engine used to process the query, optional.
|
|
1503
|
+
# At the moment, if set to `"auto"` (default), the query is run
|
|
1504
|
+
# using the polars streaming engine. Polars will also
|
|
1505
|
+
# attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
|
|
1506
|
+
# environment variable. If it cannot run the query using the
|
|
1507
|
+
# selected engine, the query is run using the polars streaming
|
|
1508
|
+
# engine.
|
|
1509
|
+
# @param optimizations
|
|
1510
|
+
# The optimization passes done during query optimization.
|
|
1511
|
+
#
|
|
1512
|
+
# This has no effect if `lazy` is set to `true`.
|
|
1167
1513
|
#
|
|
1168
1514
|
# @return [DataFrame]
|
|
1169
1515
|
#
|
|
@@ -1173,25 +1519,21 @@ module Polars
|
|
|
1173
1519
|
def sink_ndjson(
|
|
1174
1520
|
path,
|
|
1175
1521
|
maintain_order: true,
|
|
1176
|
-
type_coercion: true,
|
|
1177
|
-
predicate_pushdown: true,
|
|
1178
|
-
projection_pushdown: true,
|
|
1179
|
-
simplify_expression: true,
|
|
1180
|
-
slice_pushdown: true,
|
|
1181
|
-
no_optimization: false,
|
|
1182
1522
|
storage_options: nil,
|
|
1523
|
+
credential_provider: "auto",
|
|
1183
1524
|
retries: 2,
|
|
1184
1525
|
sync_on_close: nil,
|
|
1185
1526
|
mkdir: false,
|
|
1186
|
-
lazy: false
|
|
1527
|
+
lazy: false,
|
|
1528
|
+
engine: "auto",
|
|
1529
|
+
optimizations: DEFAULT_QUERY_OPT_FLAGS
|
|
1187
1530
|
)
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
no_optimization: no_optimization
|
|
1531
|
+
engine = _select_engine(engine, path)
|
|
1532
|
+
|
|
1533
|
+
_init_credential_provider_builder = Polars.method(:_init_credential_provider_builder)
|
|
1534
|
+
|
|
1535
|
+
credential_provider_builder = _init_credential_provider_builder.(
|
|
1536
|
+
credential_provider, path, storage_options, "sink_ndjson"
|
|
1195
1537
|
)
|
|
1196
1538
|
|
|
1197
1539
|
if storage_options&.any?
|
|
@@ -1206,80 +1548,21 @@ module Polars
|
|
|
1206
1548
|
"mkdir" => mkdir
|
|
1207
1549
|
}
|
|
1208
1550
|
|
|
1209
|
-
|
|
1210
|
-
|
|
1551
|
+
ldf_rb = _ldf.sink_json(
|
|
1552
|
+
path,
|
|
1553
|
+
storage_options,
|
|
1554
|
+
credential_provider_builder,
|
|
1555
|
+
retries,
|
|
1556
|
+
sink_options
|
|
1557
|
+
)
|
|
1211
1558
|
|
|
1212
1559
|
if !lazy
|
|
1213
|
-
|
|
1560
|
+
ldf_rb = ldf_rb.with_optimizations(optimizations._rboptflags)
|
|
1561
|
+
ldf = LazyFrame._from_rbldf(ldf_rb)
|
|
1562
|
+
ldf.collect(engine: engine)
|
|
1214
1563
|
return nil
|
|
1215
1564
|
end
|
|
1216
|
-
|
|
1217
|
-
end
|
|
1218
|
-
|
|
1219
|
-
# @private
|
|
1220
|
-
def _set_sink_optimizations(
|
|
1221
|
-
type_coercion: true,
|
|
1222
|
-
predicate_pushdown: true,
|
|
1223
|
-
projection_pushdown: true,
|
|
1224
|
-
simplify_expression: true,
|
|
1225
|
-
slice_pushdown: true,
|
|
1226
|
-
no_optimization: false
|
|
1227
|
-
)
|
|
1228
|
-
if no_optimization
|
|
1229
|
-
predicate_pushdown = false
|
|
1230
|
-
projection_pushdown = false
|
|
1231
|
-
slice_pushdown = false
|
|
1232
|
-
end
|
|
1233
|
-
|
|
1234
|
-
_ldf.optimization_toggle(
|
|
1235
|
-
type_coercion,
|
|
1236
|
-
predicate_pushdown,
|
|
1237
|
-
projection_pushdown,
|
|
1238
|
-
simplify_expression,
|
|
1239
|
-
slice_pushdown,
|
|
1240
|
-
false,
|
|
1241
|
-
false,
|
|
1242
|
-
true,
|
|
1243
|
-
false
|
|
1244
|
-
)
|
|
1245
|
-
end
|
|
1246
|
-
|
|
1247
|
-
# Collect a small number of rows for debugging purposes.
|
|
1248
|
-
#
|
|
1249
|
-
# Fetch is like a {#collect} operation, but it overwrites the number of rows
|
|
1250
|
-
# read by every scan operation. This is a utility that helps debug a query on a
|
|
1251
|
-
# smaller number of rows.
|
|
1252
|
-
#
|
|
1253
|
-
# Note that the fetch does not guarantee the final number of rows in the
|
|
1254
|
-
# DataFrame. Filter, join operations and a lower number of rows available in the
|
|
1255
|
-
# scanned file influence the final number of rows.
|
|
1256
|
-
#
|
|
1257
|
-
# @param n_rows [Integer]
|
|
1258
|
-
# Collect n_rows from the data sources.
|
|
1259
|
-
#
|
|
1260
|
-
# @return [DataFrame]
|
|
1261
|
-
#
|
|
1262
|
-
# @example
|
|
1263
|
-
# df = Polars::DataFrame.new(
|
|
1264
|
-
# {
|
|
1265
|
-
# "a" => ["a", "b", "a", "b", "b", "c"],
|
|
1266
|
-
# "b" => [1, 2, 3, 4, 5, 6],
|
|
1267
|
-
# "c" => [6, 5, 4, 3, 2, 1]
|
|
1268
|
-
# }
|
|
1269
|
-
# ).lazy
|
|
1270
|
-
# df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
|
|
1271
|
-
# # =>
|
|
1272
|
-
# # shape: (2, 3)
|
|
1273
|
-
# # ┌─────┬─────┬─────┐
|
|
1274
|
-
# # │ a ┆ b ┆ c │
|
|
1275
|
-
# # │ --- ┆ --- ┆ --- │
|
|
1276
|
-
# # │ str ┆ i64 ┆ i64 │
|
|
1277
|
-
# # ╞═════╪═════╪═════╡
|
|
1278
|
-
# # │ a ┆ 4 ┆ 10 │
|
|
1279
|
-
# # │ b ┆ 11 ┆ 10 │
|
|
1280
|
-
# # └─────┴─────┴─────┘
|
|
1281
|
-
def fetch(n_rows = 500, **kwargs)
|
|
1282
|
-
head(n_rows).collect(**kwargs)
|
|
1565
|
+
LazyFrame._from_rbldf(ldf_rb)
|
|
1283
1566
|
end
|
|
1284
1567
|
|
|
1285
1568
|
# Return lazy representation, i.e. itself.
|
|
@@ -1388,7 +1671,7 @@ module Polars
|
|
|
1388
1671
|
# "c" => [true, true, false, nil],
|
|
1389
1672
|
# }
|
|
1390
1673
|
# ).lazy
|
|
1391
|
-
# lf.clear.
|
|
1674
|
+
# lf.clear.collect
|
|
1392
1675
|
# # =>
|
|
1393
1676
|
# # shape: (0, 3)
|
|
1394
1677
|
# # ┌─────┬─────┬──────┐
|
|
@@ -1399,7 +1682,7 @@ module Polars
|
|
|
1399
1682
|
# # └─────┴─────┴──────┘
|
|
1400
1683
|
#
|
|
1401
1684
|
# @example
|
|
1402
|
-
# lf.clear(2).
|
|
1685
|
+
# lf.clear(2).collect
|
|
1403
1686
|
# # =>
|
|
1404
1687
|
# # shape: (2, 3)
|
|
1405
1688
|
# # ┌──────┬──────┬──────┐
|
|
@@ -1413,24 +1696,82 @@ module Polars
|
|
|
1413
1696
|
def clear(n = 0)
|
|
1414
1697
|
DataFrame.new(schema: schema).clear(n).lazy
|
|
1415
1698
|
end
|
|
1416
|
-
alias_method :cleared, :clear
|
|
1417
1699
|
|
|
1418
1700
|
# Filter the rows in the DataFrame based on a predicate expression.
|
|
1419
1701
|
#
|
|
1420
|
-
# @param
|
|
1421
|
-
# Expression that
|
|
1702
|
+
# @param predicates [Array]
|
|
1703
|
+
# Expression(s) that evaluate to a boolean Series.
|
|
1704
|
+
# @param constraints [Hash]
|
|
1705
|
+
# Column filters; use `name = value` to filter columns using the supplied
|
|
1706
|
+
# value. Each constraint behaves the same as `Polars.col(name).eq(value)`,
|
|
1707
|
+
# and is implicitly joined with the other filter conditions using `&`.
|
|
1422
1708
|
#
|
|
1423
1709
|
# @return [LazyFrame]
|
|
1424
1710
|
#
|
|
1425
1711
|
# @example Filter on one condition:
|
|
1426
|
-
# lf = Polars::
|
|
1712
|
+
# lf = Polars::LazyFrame.new(
|
|
1427
1713
|
# {
|
|
1428
|
-
# "foo" => [1, 2, 3],
|
|
1429
|
-
# "bar" => [6, 7, 8],
|
|
1430
|
-
# "ham" => ["a", "b", "c"]
|
|
1714
|
+
# "foo" => [1, 2, 3, nil, 4, nil, 0],
|
|
1715
|
+
# "bar" => [6, 7, 8, nil, nil, 9, 0],
|
|
1716
|
+
# "ham" => ["a", "b", "c", nil, "d", "e", "f"]
|
|
1431
1717
|
# }
|
|
1432
|
-
# )
|
|
1433
|
-
# lf.filter(Polars.col("foo")
|
|
1718
|
+
# )
|
|
1719
|
+
# lf.filter(Polars.col("foo") > 1).collect
|
|
1720
|
+
# # =>
|
|
1721
|
+
# # shape: (3, 3)
|
|
1722
|
+
# # ┌─────┬──────┬─────┐
|
|
1723
|
+
# # │ foo ┆ bar ┆ ham │
|
|
1724
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1725
|
+
# # │ i64 ┆ i64 ┆ str │
|
|
1726
|
+
# # ╞═════╪══════╪═════╡
|
|
1727
|
+
# # │ 2 ┆ 7 ┆ b │
|
|
1728
|
+
# # │ 3 ┆ 8 ┆ c │
|
|
1729
|
+
# # │ 4 ┆ null ┆ d │
|
|
1730
|
+
# # └─────┴──────┴─────┘
|
|
1731
|
+
#
|
|
1732
|
+
# @example Filter on multiple conditions:
|
|
1733
|
+
# lf.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a")).collect
|
|
1734
|
+
# # =>
|
|
1735
|
+
# # shape: (1, 3)
|
|
1736
|
+
# # ┌─────┬─────┬─────┐
|
|
1737
|
+
# # │ foo ┆ bar ┆ ham │
|
|
1738
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1739
|
+
# # │ i64 ┆ i64 ┆ str │
|
|
1740
|
+
# # ╞═════╪═════╪═════╡
|
|
1741
|
+
# # │ 1 ┆ 6 ┆ a │
|
|
1742
|
+
# # └─────┴─────┴─────┘
|
|
1743
|
+
#
|
|
1744
|
+
# @example Provide multiple filters using `*args` syntax:
|
|
1745
|
+
# lf.filter(
|
|
1746
|
+
# Polars.col("foo") == 1,
|
|
1747
|
+
# Polars.col("ham") == "a"
|
|
1748
|
+
# ).collect
|
|
1749
|
+
# # =>
|
|
1750
|
+
# # shape: (1, 3)
|
|
1751
|
+
# # ┌─────┬─────┬─────┐
|
|
1752
|
+
# # │ foo ┆ bar ┆ ham │
|
|
1753
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1754
|
+
# # │ i64 ┆ i64 ┆ str │
|
|
1755
|
+
# # ╞═════╪═════╪═════╡
|
|
1756
|
+
# # │ 1 ┆ 6 ┆ a │
|
|
1757
|
+
# # └─────┴─────┴─────┘
|
|
1758
|
+
#
|
|
1759
|
+
# @example Provide multiple filters using `**kwargs` syntax:
|
|
1760
|
+
# lf.filter(foo: 1, ham: "a").collect
|
|
1761
|
+
# # =>
|
|
1762
|
+
# # shape: (1, 3)
|
|
1763
|
+
# # ┌─────┬─────┬─────┐
|
|
1764
|
+
# # │ foo ┆ bar ┆ ham │
|
|
1765
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1766
|
+
# # │ i64 ┆ i64 ┆ str │
|
|
1767
|
+
# # ╞═════╪═════╪═════╡
|
|
1768
|
+
# # │ 1 ┆ 6 ┆ a │
|
|
1769
|
+
# # └─────┴─────┴─────┘
|
|
1770
|
+
#
|
|
1771
|
+
# @example Filter on an OR condition:
|
|
1772
|
+
# lf.filter(
|
|
1773
|
+
# (Polars.col("foo") == 1) | (Polars.col("ham") == "c")
|
|
1774
|
+
# ).collect
|
|
1434
1775
|
# # =>
|
|
1435
1776
|
# # shape: (2, 3)
|
|
1436
1777
|
# # ┌─────┬─────┬─────┐
|
|
@@ -1439,11 +1780,13 @@ module Polars
|
|
|
1439
1780
|
# # │ i64 ┆ i64 ┆ str │
|
|
1440
1781
|
# # ╞═════╪═════╪═════╡
|
|
1441
1782
|
# # │ 1 ┆ 6 ┆ a │
|
|
1442
|
-
# # │
|
|
1783
|
+
# # │ 3 ┆ 8 ┆ c │
|
|
1443
1784
|
# # └─────┴─────┴─────┘
|
|
1444
1785
|
#
|
|
1445
|
-
# @example Filter
|
|
1446
|
-
# lf.filter(
|
|
1786
|
+
# @example Filter by comparing two columns against each other
|
|
1787
|
+
# lf.filter(
|
|
1788
|
+
# Polars.col("foo") == Polars.col("bar")
|
|
1789
|
+
# ).collect
|
|
1447
1790
|
# # =>
|
|
1448
1791
|
# # shape: (1, 3)
|
|
1449
1792
|
# # ┌─────┬─────┬─────┐
|
|
@@ -1451,13 +1794,39 @@ module Polars
|
|
|
1451
1794
|
# # │ --- ┆ --- ┆ --- │
|
|
1452
1795
|
# # │ i64 ┆ i64 ┆ str │
|
|
1453
1796
|
# # ╞═════╪═════╪═════╡
|
|
1797
|
+
# # │ 0 ┆ 0 ┆ f │
|
|
1798
|
+
# # └─────┴─────┴─────┘
|
|
1799
|
+
#
|
|
1800
|
+
# @example
|
|
1801
|
+
# lf.filter(
|
|
1802
|
+
# Polars.col("foo") != Polars.col("bar")
|
|
1803
|
+
# ).collect
|
|
1804
|
+
# # =>
|
|
1805
|
+
# # shape: (3, 3)
|
|
1806
|
+
# # ┌─────┬─────┬─────┐
|
|
1807
|
+
# # │ foo ┆ bar ┆ ham │
|
|
1808
|
+
# # │ --- ┆ --- ┆ --- │
|
|
1809
|
+
# # │ i64 ┆ i64 ┆ str │
|
|
1810
|
+
# # ╞═════╪═════╪═════╡
|
|
1454
1811
|
# # │ 1 ┆ 6 ┆ a │
|
|
1812
|
+
# # │ 2 ┆ 7 ┆ b │
|
|
1813
|
+
# # │ 3 ┆ 8 ┆ c │
|
|
1455
1814
|
# # └─────┴─────┴─────┘
|
|
1456
|
-
def filter(
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1815
|
+
def filter(*predicates, **constraints)
|
|
1816
|
+
if constraints.empty?
|
|
1817
|
+
# early-exit conditions (exclude/include all rows)
|
|
1818
|
+
if predicates.empty? || (predicates.length == 1 && predicates[0].is_a?(TrueClass))
|
|
1819
|
+
return dup
|
|
1820
|
+
end
|
|
1821
|
+
if predicates.length == 1 && predicates[0].is_a?(FalseClass)
|
|
1822
|
+
return clear
|
|
1823
|
+
end
|
|
1824
|
+
end
|
|
1825
|
+
|
|
1826
|
+
_filter(
|
|
1827
|
+
predicates: predicates,
|
|
1828
|
+
constraints: constraints,
|
|
1829
|
+
invert: false
|
|
1461
1830
|
)
|
|
1462
1831
|
end
|
|
1463
1832
|
|
|
@@ -1752,13 +2121,9 @@ module Polars
|
|
|
1752
2121
|
lgb = _ldf.group_by(exprs, maintain_order)
|
|
1753
2122
|
LazyGroupBy.new(lgb)
|
|
1754
2123
|
end
|
|
1755
|
-
alias_method :groupby, :group_by
|
|
1756
|
-
alias_method :group, :group_by
|
|
1757
2124
|
|
|
1758
2125
|
# Create rolling groups based on a time column.
|
|
1759
2126
|
#
|
|
1760
|
-
# Also works for index values of type `:i32` or `:i64`.
|
|
1761
|
-
#
|
|
1762
2127
|
# Different from a `dynamic_group_by` the windows are now determined by the
|
|
1763
2128
|
# individual values and are not of constant intervals. For constant intervals
|
|
1764
2129
|
# use *group_by_dynamic*.
|
|
@@ -1793,15 +2158,15 @@ module Polars
|
|
|
1793
2158
|
# make sense.
|
|
1794
2159
|
#
|
|
1795
2160
|
# In case of a rolling group by on indices, dtype needs to be one of
|
|
1796
|
-
#
|
|
1797
|
-
# performance matters use an
|
|
2161
|
+
# \\\\{UInt32, UInt64, Int32, Int64}. Note that the first three get temporarily
|
|
2162
|
+
# cast to Int64, so if performance matters use an Int64 column.
|
|
1798
2163
|
# @param period [Object]
|
|
1799
2164
|
# Length of the window.
|
|
1800
2165
|
# @param offset [Object]
|
|
1801
2166
|
# Offset of the window. Default is -period.
|
|
1802
2167
|
# @param closed ["right", "left", "both", "none"]
|
|
1803
2168
|
# Define whether the temporal window interval is closed or not.
|
|
1804
|
-
# @param
|
|
2169
|
+
# @param group_by [Object]
|
|
1805
2170
|
# Also group by this column/these columns.
|
|
1806
2171
|
#
|
|
1807
2172
|
# @return [LazyFrame]
|
|
@@ -1815,7 +2180,7 @@ module Polars
|
|
|
1815
2180
|
# "2020-01-03 19:45:32",
|
|
1816
2181
|
# "2020-01-08 23:16:43"
|
|
1817
2182
|
# ]
|
|
1818
|
-
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).
|
|
2183
|
+
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_columns(
|
|
1819
2184
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
|
1820
2185
|
# )
|
|
1821
2186
|
# df.rolling(index_column: "dt", period: "2d").agg(
|
|
@@ -1844,7 +2209,7 @@ module Polars
|
|
|
1844
2209
|
period:,
|
|
1845
2210
|
offset: nil,
|
|
1846
2211
|
closed: "right",
|
|
1847
|
-
|
|
2212
|
+
group_by: nil
|
|
1848
2213
|
)
|
|
1849
2214
|
index_column = Utils.parse_into_expression(index_column)
|
|
1850
2215
|
if offset.nil?
|
|
@@ -1852,7 +2217,7 @@ module Polars
|
|
|
1852
2217
|
end
|
|
1853
2218
|
|
|
1854
2219
|
rbexprs_by = (
|
|
1855
|
-
!
|
|
2220
|
+
!group_by.nil? ? Utils.parse_into_list_of_expressions(group_by) : []
|
|
1856
2221
|
)
|
|
1857
2222
|
period = Utils.parse_as_duration_string(period)
|
|
1858
2223
|
offset = Utils.parse_as_duration_string(offset)
|
|
@@ -1860,10 +2225,8 @@ module Polars
|
|
|
1860
2225
|
lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
|
|
1861
2226
|
LazyGroupBy.new(lgb)
|
|
1862
2227
|
end
|
|
1863
|
-
alias_method :group_by_rolling, :rolling
|
|
1864
|
-
alias_method :groupby_rolling, :rolling
|
|
1865
2228
|
|
|
1866
|
-
# Group based on a time value (or index value of type
|
|
2229
|
+
# Group based on a time value (or index value of type Int32, Int64).
|
|
1867
2230
|
#
|
|
1868
2231
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
|
1869
2232
|
# normal group by is that a row can be member of multiple groups. The time/index
|
|
@@ -1906,8 +2269,8 @@ module Polars
|
|
|
1906
2269
|
# make sense.
|
|
1907
2270
|
#
|
|
1908
2271
|
# In case of a dynamic group by on indices, dtype needs to be one of
|
|
1909
|
-
#
|
|
1910
|
-
# performance matters use an
|
|
2272
|
+
# \\\\{Int32, Int64}. Note that Int32 gets temporarily cast to Int64, so if
|
|
2273
|
+
# performance matters use an Int64 column.
|
|
1911
2274
|
# @param every [Object]
|
|
1912
2275
|
# Interval of the window.
|
|
1913
2276
|
# @param period [Object]
|
|
@@ -1915,8 +2278,6 @@ module Polars
|
|
|
1915
2278
|
# @param offset [Object]
|
|
1916
2279
|
# Offset of the window if nil and period is nil it will be equal to negative
|
|
1917
2280
|
# `every`.
|
|
1918
|
-
# @param truncate [Boolean]
|
|
1919
|
-
# Truncate the time value to the window lower bound.
|
|
1920
2281
|
# @param include_boundaries [Boolean]
|
|
1921
2282
|
# Add the lower and upper bound of the window to the "_lower_bound" and
|
|
1922
2283
|
# "_upper_bound" columns. This will impact performance because it's harder to
|
|
@@ -1931,7 +2292,7 @@ module Polars
|
|
|
1931
2292
|
# - 'datapoint': the first value of the index column in the given window.
|
|
1932
2293
|
# If you don't need the label to be at one of the boundaries, choose this
|
|
1933
2294
|
# option for maximum performance
|
|
1934
|
-
# @param
|
|
2295
|
+
# @param group_by [Object]
|
|
1935
2296
|
# Also group by this column/these columns
|
|
1936
2297
|
# @param start_by ['window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
|
1937
2298
|
# The strategy to determine the start of the first window by.
|
|
@@ -2073,7 +2434,7 @@ module Polars
|
|
|
2073
2434
|
# "time",
|
|
2074
2435
|
# every: "1h",
|
|
2075
2436
|
# closed: "both",
|
|
2076
|
-
#
|
|
2437
|
+
# group_by: "groups",
|
|
2077
2438
|
# include_boundaries: true
|
|
2078
2439
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
|
2079
2440
|
# # =>
|
|
@@ -2123,17 +2484,12 @@ module Polars
|
|
|
2123
2484
|
every:,
|
|
2124
2485
|
period: nil,
|
|
2125
2486
|
offset: nil,
|
|
2126
|
-
truncate: nil,
|
|
2127
2487
|
include_boundaries: false,
|
|
2128
2488
|
closed: "left",
|
|
2129
2489
|
label: "left",
|
|
2130
|
-
|
|
2490
|
+
group_by: nil,
|
|
2131
2491
|
start_by: "window"
|
|
2132
2492
|
)
|
|
2133
|
-
if !truncate.nil?
|
|
2134
|
-
label = truncate ? "left" : "datapoint"
|
|
2135
|
-
end
|
|
2136
|
-
|
|
2137
2493
|
index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
|
|
2138
2494
|
if offset.nil?
|
|
2139
2495
|
offset = period.nil? ? "-#{every}" : "0ns"
|
|
@@ -2147,7 +2503,7 @@ module Polars
|
|
|
2147
2503
|
offset = Utils.parse_as_duration_string(offset)
|
|
2148
2504
|
every = Utils.parse_as_duration_string(every)
|
|
2149
2505
|
|
|
2150
|
-
rbexprs_by =
|
|
2506
|
+
rbexprs_by = group_by.nil? ? [] : Utils.parse_into_list_of_expressions(group_by)
|
|
2151
2507
|
lgb = _ldf.group_by_dynamic(
|
|
2152
2508
|
index_column,
|
|
2153
2509
|
every,
|
|
@@ -2161,7 +2517,6 @@ module Polars
|
|
|
2161
2517
|
)
|
|
2162
2518
|
LazyGroupBy.new(lgb)
|
|
2163
2519
|
end
|
|
2164
|
-
alias_method :groupby_dynamic, :group_by_dynamic
|
|
2165
2520
|
|
|
2166
2521
|
# Perform an asof join.
|
|
2167
2522
|
#
|
|
@@ -2521,7 +2876,7 @@ module Polars
|
|
|
2521
2876
|
# * *one_to_one* - “1:1”: check if join keys are unique in both left and right datasets
|
|
2522
2877
|
# * *one_to_many* - “1:m”: check if join keys are unique in left dataset
|
|
2523
2878
|
# * *many_to_one* - “m:1”: check if join keys are unique in right dataset
|
|
2524
|
-
# @param
|
|
2879
|
+
# @param nulls_equal [Boolean]
|
|
2525
2880
|
# Join on null values. By default null values will never produce matches.
|
|
2526
2881
|
# @param allow_parallel [Boolean]
|
|
2527
2882
|
# Allow the physical plan to optionally evaluate the computation of both
|
|
@@ -2643,7 +2998,7 @@ module Polars
|
|
|
2643
2998
|
how: "inner",
|
|
2644
2999
|
suffix: "_right",
|
|
2645
3000
|
validate: "m:m",
|
|
2646
|
-
|
|
3001
|
+
nulls_equal: false,
|
|
2647
3002
|
allow_parallel: true,
|
|
2648
3003
|
force_parallel: false,
|
|
2649
3004
|
coalesce: nil,
|
|
@@ -2666,7 +3021,7 @@ module Polars
|
|
|
2666
3021
|
[],
|
|
2667
3022
|
[],
|
|
2668
3023
|
allow_parallel,
|
|
2669
|
-
|
|
3024
|
+
nulls_equal,
|
|
2670
3025
|
force_parallel,
|
|
2671
3026
|
how,
|
|
2672
3027
|
suffix,
|
|
@@ -2695,7 +3050,7 @@ module Polars
|
|
|
2695
3050
|
rbexprs_right,
|
|
2696
3051
|
allow_parallel,
|
|
2697
3052
|
force_parallel,
|
|
2698
|
-
|
|
3053
|
+
nulls_equal,
|
|
2699
3054
|
how,
|
|
2700
3055
|
suffix,
|
|
2701
3056
|
validate,
|
|
@@ -2875,87 +3230,6 @@ module Polars
|
|
|
2875
3230
|
_from_rbldf(_ldf.with_columns_seq(rbexprs))
|
|
2876
3231
|
end
|
|
2877
3232
|
|
|
2878
|
-
# Add an external context to the computation graph.
|
|
2879
|
-
#
|
|
2880
|
-
# This allows expressions to also access columns from DataFrames
|
|
2881
|
-
# that are not part of this one.
|
|
2882
|
-
#
|
|
2883
|
-
# @param other [Object]
|
|
2884
|
-
# Lazy DataFrame to join with.
|
|
2885
|
-
#
|
|
2886
|
-
# @return [LazyFrame]
|
|
2887
|
-
#
|
|
2888
|
-
# @example
|
|
2889
|
-
# df_a = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["a", "c", nil]}).lazy
|
|
2890
|
-
# df_other = Polars::DataFrame.new({"c" => ["foo", "ham"]})
|
|
2891
|
-
# (
|
|
2892
|
-
# df_a.with_context(df_other.lazy).select(
|
|
2893
|
-
# [Polars.col("b") + Polars.col("c").first]
|
|
2894
|
-
# )
|
|
2895
|
-
# ).collect
|
|
2896
|
-
# # =>
|
|
2897
|
-
# # shape: (3, 1)
|
|
2898
|
-
# # ┌──────┐
|
|
2899
|
-
# # │ b │
|
|
2900
|
-
# # │ --- │
|
|
2901
|
-
# # │ str │
|
|
2902
|
-
# # ╞══════╡
|
|
2903
|
-
# # │ afoo │
|
|
2904
|
-
# # │ cfoo │
|
|
2905
|
-
# # │ null │
|
|
2906
|
-
# # └──────┘
|
|
2907
|
-
def with_context(other)
|
|
2908
|
-
if !other.is_a?(::Array)
|
|
2909
|
-
other = [other]
|
|
2910
|
-
end
|
|
2911
|
-
|
|
2912
|
-
_from_rbldf(_ldf.with_context(other.map(&:_ldf)))
|
|
2913
|
-
end
|
|
2914
|
-
|
|
2915
|
-
# Add or overwrite column in a DataFrame.
|
|
2916
|
-
#
|
|
2917
|
-
# @param column [Object]
|
|
2918
|
-
# Expression that evaluates to column or a Series to use.
|
|
2919
|
-
#
|
|
2920
|
-
# @return [LazyFrame]
|
|
2921
|
-
#
|
|
2922
|
-
# @example
|
|
2923
|
-
# df = Polars::DataFrame.new(
|
|
2924
|
-
# {
|
|
2925
|
-
# "a" => [1, 3, 5],
|
|
2926
|
-
# "b" => [2, 4, 6]
|
|
2927
|
-
# }
|
|
2928
|
-
# ).lazy
|
|
2929
|
-
# df.with_column((Polars.col("b") ** 2).alias("b_squared")).collect
|
|
2930
|
-
# # =>
|
|
2931
|
-
# # shape: (3, 3)
|
|
2932
|
-
# # ┌─────┬─────┬───────────┐
|
|
2933
|
-
# # │ a ┆ b ┆ b_squared │
|
|
2934
|
-
# # │ --- ┆ --- ┆ --- │
|
|
2935
|
-
# # │ i64 ┆ i64 ┆ i64 │
|
|
2936
|
-
# # ╞═════╪═════╪═══════════╡
|
|
2937
|
-
# # │ 1 ┆ 2 ┆ 4 │
|
|
2938
|
-
# # │ 3 ┆ 4 ┆ 16 │
|
|
2939
|
-
# # │ 5 ┆ 6 ┆ 36 │
|
|
2940
|
-
# # └─────┴─────┴───────────┘
|
|
2941
|
-
#
|
|
2942
|
-
# @example
|
|
2943
|
-
# df.with_column(Polars.col("a") ** 2).collect
|
|
2944
|
-
# # =>
|
|
2945
|
-
# # shape: (3, 2)
|
|
2946
|
-
# # ┌─────┬─────┐
|
|
2947
|
-
# # │ a ┆ b │
|
|
2948
|
-
# # │ --- ┆ --- │
|
|
2949
|
-
# # │ i64 ┆ i64 │
|
|
2950
|
-
# # ╞═════╪═════╡
|
|
2951
|
-
# # │ 1 ┆ 2 │
|
|
2952
|
-
# # │ 9 ┆ 4 │
|
|
2953
|
-
# # │ 25 ┆ 6 │
|
|
2954
|
-
# # └─────┴─────┘
|
|
2955
|
-
def with_column(column)
|
|
2956
|
-
with_columns([column])
|
|
2957
|
-
end
|
|
2958
|
-
|
|
2959
3233
|
# Remove one or multiple columns from a DataFrame.
|
|
2960
3234
|
#
|
|
2961
3235
|
# @param columns [Object]
|
|
@@ -3060,20 +3334,6 @@ module Polars
|
|
|
3060
3334
|
# # │ 2 ┆ 7 ┆ b │
|
|
3061
3335
|
# # │ 3 ┆ 8 ┆ c │
|
|
3062
3336
|
# # └───────┴─────┴─────┘
|
|
3063
|
-
#
|
|
3064
|
-
# @example
|
|
3065
|
-
# lf.rename(->(column_name) { "c" + column_name[1..] }).collect
|
|
3066
|
-
# # =>
|
|
3067
|
-
# # shape: (3, 3)
|
|
3068
|
-
# # ┌─────┬─────┬─────┐
|
|
3069
|
-
# # │ coo ┆ car ┆ cam │
|
|
3070
|
-
# # │ --- ┆ --- ┆ --- │
|
|
3071
|
-
# # │ i64 ┆ i64 ┆ str │
|
|
3072
|
-
# # ╞═════╪═════╪═════╡
|
|
3073
|
-
# # │ 1 ┆ 6 ┆ a │
|
|
3074
|
-
# # │ 2 ┆ 7 ┆ b │
|
|
3075
|
-
# # │ 3 ┆ 8 ┆ c │
|
|
3076
|
-
# # └─────┴─────┴─────┘
|
|
3077
3337
|
def rename(mapping, strict: true)
|
|
3078
3338
|
if mapping.respond_to?(:call)
|
|
3079
3339
|
select(F.all.name.map(&mapping))
|
|
@@ -3153,7 +3413,7 @@ module Polars
|
|
|
3153
3413
|
# # │ 5 ┆ 6 │
|
|
3154
3414
|
# # │ null ┆ null │
|
|
3155
3415
|
# # └──────┴──────┘
|
|
3156
|
-
def shift(n, fill_value: nil)
|
|
3416
|
+
def shift(n = 1, fill_value: nil)
|
|
3157
3417
|
if !fill_value.nil?
|
|
3158
3418
|
fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
|
|
3159
3419
|
end
|
|
@@ -3161,52 +3421,6 @@ module Polars
|
|
|
3161
3421
|
_from_rbldf(_ldf.shift(n, fill_value))
|
|
3162
3422
|
end
|
|
3163
3423
|
|
|
3164
|
-
# Shift the values by a given period and fill the resulting null values.
|
|
3165
|
-
#
|
|
3166
|
-
# @param periods [Integer]
|
|
3167
|
-
# Number of places to shift (may be negative).
|
|
3168
|
-
# @param fill_value [Object]
|
|
3169
|
-
# Fill `nil` values with the result of this expression.
|
|
3170
|
-
#
|
|
3171
|
-
# @return [LazyFrame]
|
|
3172
|
-
#
|
|
3173
|
-
# @example
|
|
3174
|
-
# df = Polars::DataFrame.new(
|
|
3175
|
-
# {
|
|
3176
|
-
# "a" => [1, 3, 5],
|
|
3177
|
-
# "b" => [2, 4, 6]
|
|
3178
|
-
# }
|
|
3179
|
-
# ).lazy
|
|
3180
|
-
# df.shift_and_fill(1, 0).collect
|
|
3181
|
-
# # =>
|
|
3182
|
-
# # shape: (3, 2)
|
|
3183
|
-
# # ┌─────┬─────┐
|
|
3184
|
-
# # │ a ┆ b │
|
|
3185
|
-
# # │ --- ┆ --- │
|
|
3186
|
-
# # │ i64 ┆ i64 │
|
|
3187
|
-
# # ╞═════╪═════╡
|
|
3188
|
-
# # │ 0 ┆ 0 │
|
|
3189
|
-
# # │ 1 ┆ 2 │
|
|
3190
|
-
# # │ 3 ┆ 4 │
|
|
3191
|
-
# # └─────┴─────┘
|
|
3192
|
-
#
|
|
3193
|
-
# @example
|
|
3194
|
-
# df.shift_and_fill(-1, 0).collect
|
|
3195
|
-
# # =>
|
|
3196
|
-
# # shape: (3, 2)
|
|
3197
|
-
# # ┌─────┬─────┐
|
|
3198
|
-
# # │ a ┆ b │
|
|
3199
|
-
# # │ --- ┆ --- │
|
|
3200
|
-
# # │ i64 ┆ i64 │
|
|
3201
|
-
# # ╞═════╪═════╡
|
|
3202
|
-
# # │ 3 ┆ 4 │
|
|
3203
|
-
# # │ 5 ┆ 6 │
|
|
3204
|
-
# # │ 0 ┆ 0 │
|
|
3205
|
-
# # └─────┴─────┘
|
|
3206
|
-
def shift_and_fill(periods, fill_value)
|
|
3207
|
-
shift(periods, fill_value: fill_value)
|
|
3208
|
-
end
|
|
3209
|
-
|
|
3210
3424
|
# Get a slice of this DataFrame.
|
|
3211
3425
|
#
|
|
3212
3426
|
# @param offset [Integer]
|
|
@@ -3252,11 +3466,6 @@ module Polars
|
|
|
3252
3466
|
#
|
|
3253
3467
|
# @return [LazyFrame]
|
|
3254
3468
|
#
|
|
3255
|
-
# @note
|
|
3256
|
-
# Consider using the {#fetch} operation if you only want to test your
|
|
3257
|
-
# query. The {#fetch} operation will load the first `n` rows at the scan
|
|
3258
|
-
# level, whereas the {#head}/{#limit} are applied at the end.
|
|
3259
|
-
#
|
|
3260
3469
|
# @example
|
|
3261
3470
|
# lf = Polars::LazyFrame.new(
|
|
3262
3471
|
# {
|
|
@@ -3302,11 +3511,6 @@ module Polars
|
|
|
3302
3511
|
#
|
|
3303
3512
|
# @return [LazyFrame]
|
|
3304
3513
|
#
|
|
3305
|
-
# @note
|
|
3306
|
-
# Consider using the {#fetch} operation if you only want to test your
|
|
3307
|
-
# query. The {#fetch} operation will load the first `n` rows at the scan
|
|
3308
|
-
# level, whereas the {#head}/{#limit} are applied at the end.
|
|
3309
|
-
#
|
|
3310
3514
|
# @example
|
|
3311
3515
|
# lf = Polars::LazyFrame.new(
|
|
3312
3516
|
# {
|
|
@@ -3475,10 +3679,14 @@ module Polars
|
|
|
3475
3679
|
def with_row_index(name: "index", offset: 0)
|
|
3476
3680
|
_from_rbldf(_ldf.with_row_index(name, offset))
|
|
3477
3681
|
end
|
|
3478
|
-
alias_method :with_row_count, :with_row_index
|
|
3479
3682
|
|
|
3480
3683
|
# Take every nth row in the LazyFrame and return as a new LazyFrame.
|
|
3481
3684
|
#
|
|
3685
|
+
# @param n [Integer]
|
|
3686
|
+
# Gather every *n*-th row.
|
|
3687
|
+
# @param offset [Integer]
|
|
3688
|
+
# Starting index.
|
|
3689
|
+
#
|
|
3482
3690
|
# @return [LazyFrame]
|
|
3483
3691
|
#
|
|
3484
3692
|
# @example
|
|
@@ -3494,10 +3702,9 @@ module Polars
|
|
|
3494
3702
|
# # │ 1 ┆ 5 │
|
|
3495
3703
|
# # │ 3 ┆ 7 │
|
|
3496
3704
|
# # └─────┴─────┘
|
|
3497
|
-
def gather_every(n)
|
|
3498
|
-
select(F.col("*").gather_every(n))
|
|
3705
|
+
def gather_every(n, offset: 0)
|
|
3706
|
+
select(F.col("*").gather_every(n, offset))
|
|
3499
3707
|
end
|
|
3500
|
-
alias_method :take_every, :gather_every
|
|
3501
3708
|
|
|
3502
3709
|
# Fill null values using the specified value or strategy.
|
|
3503
3710
|
#
|
|
@@ -3568,13 +3775,53 @@ module Polars
|
|
|
3568
3775
|
# # │ 0 ┆ 0.0 │
|
|
3569
3776
|
# # │ 4 ┆ 13.0 │
|
|
3570
3777
|
# # └─────┴──────┘
|
|
3571
|
-
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype:
|
|
3778
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
|
|
3779
|
+
if !value.nil?
|
|
3780
|
+
if value.is_a?(Expr)
|
|
3781
|
+
dtypes = nil
|
|
3782
|
+
elsif value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
3783
|
+
dtypes = [Boolean]
|
|
3784
|
+
elsif matches_supertype && (value.is_a?(Integer) || value.is_a?(Float))
|
|
3785
|
+
dtypes = [
|
|
3786
|
+
Int8,
|
|
3787
|
+
Int16,
|
|
3788
|
+
Int32,
|
|
3789
|
+
Int64,
|
|
3790
|
+
Int128,
|
|
3791
|
+
UInt8,
|
|
3792
|
+
UInt16,
|
|
3793
|
+
UInt32,
|
|
3794
|
+
UInt64,
|
|
3795
|
+
Float32,
|
|
3796
|
+
Float64,
|
|
3797
|
+
Decimal.new
|
|
3798
|
+
]
|
|
3799
|
+
elsif value.is_a?(Integer)
|
|
3800
|
+
dtypes = [Int64]
|
|
3801
|
+
elsif value.is_a?(Float)
|
|
3802
|
+
dtypes = [Float64]
|
|
3803
|
+
elsif value.is_a?(::Date)
|
|
3804
|
+
dtypes = [Date]
|
|
3805
|
+
elsif value.is_a?(::String)
|
|
3806
|
+
dtypes = [String, Categorical]
|
|
3807
|
+
else
|
|
3808
|
+
# fallback; anything not explicitly handled above
|
|
3809
|
+
dtypes = nil
|
|
3810
|
+
end
|
|
3811
|
+
|
|
3812
|
+
if dtypes
|
|
3813
|
+
return with_columns(
|
|
3814
|
+
F.col(dtypes).fill_null(value, strategy: strategy, limit: limit)
|
|
3815
|
+
)
|
|
3816
|
+
end
|
|
3817
|
+
end
|
|
3818
|
+
|
|
3572
3819
|
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
|
3573
3820
|
end
|
|
3574
3821
|
|
|
3575
3822
|
# Fill floating point NaN values.
|
|
3576
3823
|
#
|
|
3577
|
-
# @param
|
|
3824
|
+
# @param value [Object]
|
|
3578
3825
|
# Value to fill the NaN values with.
|
|
3579
3826
|
#
|
|
3580
3827
|
# @return [LazyFrame]
|
|
@@ -3603,11 +3850,11 @@ module Polars
|
|
|
3603
3850
|
# # │ 99.0 ┆ 99.0 │
|
|
3604
3851
|
# # │ 4.0 ┆ 13.0 │
|
|
3605
3852
|
# # └──────┴──────┘
|
|
3606
|
-
def fill_nan(
|
|
3607
|
-
if !
|
|
3608
|
-
|
|
3853
|
+
def fill_nan(value)
|
|
3854
|
+
if !value.is_a?(Expr)
|
|
3855
|
+
value = F.lit(value)
|
|
3609
3856
|
end
|
|
3610
|
-
_from_rbldf(_ldf.fill_nan(
|
|
3857
|
+
_from_rbldf(_ldf.fill_nan(value._rbexpr))
|
|
3611
3858
|
end
|
|
3612
3859
|
|
|
3613
3860
|
# Aggregate the columns in the DataFrame to their standard deviation value.
|
|
@@ -3922,7 +4169,7 @@ module Polars
|
|
|
3922
4169
|
# # │ 3 ┆ a ┆ b │
|
|
3923
4170
|
# # │ 1 ┆ a ┆ b │
|
|
3924
4171
|
# # └─────┴─────┴─────┘
|
|
3925
|
-
def unique(maintain_order:
|
|
4172
|
+
def unique(maintain_order: false, subset: nil, keep: "any")
|
|
3926
4173
|
selector_subset = nil
|
|
3927
4174
|
if !subset.nil?
|
|
3928
4175
|
selector_subset = Utils.parse_list_into_selector(subset)._rbselector
|
|
@@ -4078,7 +4325,7 @@ module Polars
|
|
|
4078
4325
|
# # │ z ┆ c ┆ 6 │
|
|
4079
4326
|
# # └─────┴──────────┴───────┘
|
|
4080
4327
|
def unpivot(
|
|
4081
|
-
on,
|
|
4328
|
+
on = nil,
|
|
4082
4329
|
index: nil,
|
|
4083
4330
|
variable_name: nil,
|
|
4084
4331
|
value_name: nil,
|
|
@@ -4100,7 +4347,6 @@ module Polars
|
|
|
4100
4347
|
)
|
|
4101
4348
|
)
|
|
4102
4349
|
end
|
|
4103
|
-
alias_method :melt, :unpivot
|
|
4104
4350
|
|
|
4105
4351
|
# def map
|
|
4106
4352
|
# end
|
|
@@ -4166,7 +4412,7 @@ module Polars
|
|
|
4166
4412
|
# ["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"]
|
|
4167
4413
|
# )
|
|
4168
4414
|
# )
|
|
4169
|
-
# df.
|
|
4415
|
+
# df.collect
|
|
4170
4416
|
# # =>
|
|
4171
4417
|
# # shape: (2, 3)
|
|
4172
4418
|
# # ┌────────┬─────────────────────┬───────┐
|
|
@@ -4179,7 +4425,7 @@ module Polars
|
|
|
4179
4425
|
# # └────────┴─────────────────────┴───────┘
|
|
4180
4426
|
#
|
|
4181
4427
|
# @example
|
|
4182
|
-
# df.unnest("t_struct").
|
|
4428
|
+
# df.unnest("t_struct").collect
|
|
4183
4429
|
# # =>
|
|
4184
4430
|
# # shape: (2, 6)
|
|
4185
4431
|
# # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
|
|
@@ -4248,19 +4494,41 @@ module Polars
|
|
|
4248
4494
|
#
|
|
4249
4495
|
# @param column [Object]
|
|
4250
4496
|
# Column that is sorted.
|
|
4497
|
+
# @param more_columns [Array]
|
|
4498
|
+
# Columns that are sorted over after `column`.
|
|
4251
4499
|
# @param descending [Boolean]
|
|
4252
4500
|
# Whether the column is sorted in descending order.
|
|
4501
|
+
# @param nulls_last [Boolean]
|
|
4502
|
+
# Whether the nulls are at the end.
|
|
4253
4503
|
#
|
|
4254
4504
|
# @return [LazyFrame]
|
|
4255
4505
|
def set_sorted(
|
|
4256
4506
|
column,
|
|
4257
|
-
|
|
4507
|
+
*more_columns,
|
|
4508
|
+
descending: false,
|
|
4509
|
+
nulls_last: false
|
|
4258
4510
|
)
|
|
4259
4511
|
if !Utils.strlike?(column)
|
|
4260
4512
|
msg = "expected a 'str' for argument 'column' in 'set_sorted'"
|
|
4261
4513
|
raise TypeError, msg
|
|
4262
4514
|
end
|
|
4263
|
-
|
|
4515
|
+
|
|
4516
|
+
if Utils.bool?(descending)
|
|
4517
|
+
ds = [descending]
|
|
4518
|
+
else
|
|
4519
|
+
ds = descending
|
|
4520
|
+
end
|
|
4521
|
+
if Utils.bool?(nulls_last)
|
|
4522
|
+
nl = [nulls_last]
|
|
4523
|
+
else
|
|
4524
|
+
nl = nulls_last
|
|
4525
|
+
end
|
|
4526
|
+
|
|
4527
|
+
_from_rbldf(
|
|
4528
|
+
_ldf.hint_sorted(
|
|
4529
|
+
[column] + more_columns, ds, nl
|
|
4530
|
+
)
|
|
4531
|
+
)
|
|
4264
4532
|
end
|
|
4265
4533
|
|
|
4266
4534
|
# Update the values in this `LazyFrame` with the values in `other`.
|
|
@@ -4297,7 +4565,7 @@ module Polars
|
|
|
4297
4565
|
# @note
|
|
4298
4566
|
# This is syntactic sugar for a left/inner join that preserves the order
|
|
4299
4567
|
# of the left `DataFrame` by default, with an optional coalesce when
|
|
4300
|
-
# `include_nulls:
|
|
4568
|
+
# `include_nulls: false`.
|
|
4301
4569
|
#
|
|
4302
4570
|
# @example Update `df` values with the non-null values in `new_df`, by row index:
|
|
4303
4571
|
# lf = Polars::LazyFrame.new(
|
|
@@ -4451,7 +4719,7 @@ module Polars
|
|
|
4451
4719
|
# only use non-idx right columns present in left frame
|
|
4452
4720
|
right_other = Set.new(right_schema.to_h.keys).intersection(left_schema.to_h.keys) - Set.new(right_on)
|
|
4453
4721
|
|
|
4454
|
-
# When include_nulls is
|
|
4722
|
+
# When include_nulls is true, we need to distinguish records after the join that
|
|
4455
4723
|
# were originally null in the right frame, as opposed to records that were null
|
|
4456
4724
|
# because the key was missing from the right frame.
|
|
4457
4725
|
# Add a validity column to track whether row was matched or not.
|
|
@@ -4574,11 +4842,29 @@ module Polars
|
|
|
4574
4842
|
end
|
|
4575
4843
|
|
|
4576
4844
|
# if multiple predicates, combine as 'horizontal' expression
|
|
4577
|
-
combined_predicate =
|
|
4845
|
+
combined_predicate =
|
|
4846
|
+
if all_predicates.any?
|
|
4847
|
+
if all_predicates.length > 1
|
|
4848
|
+
F.all_horizontal(*all_predicates)
|
|
4849
|
+
else
|
|
4850
|
+
all_predicates[0]
|
|
4851
|
+
end
|
|
4852
|
+
else
|
|
4853
|
+
nil
|
|
4854
|
+
end
|
|
4578
4855
|
|
|
4579
4856
|
# apply reduced boolean mask first, if applicable, then predicates
|
|
4580
4857
|
if boolean_masks.any?
|
|
4581
|
-
|
|
4858
|
+
if boolean_masks.length > 1
|
|
4859
|
+
raise Todo
|
|
4860
|
+
end
|
|
4861
|
+
mask_expr = F.lit(boolean_masks[0])
|
|
4862
|
+
combined_predicate =
|
|
4863
|
+
if combined_predicate.nil?
|
|
4864
|
+
mask_expr
|
|
4865
|
+
else
|
|
4866
|
+
mask_expr & combined_predicate
|
|
4867
|
+
end
|
|
4582
4868
|
end
|
|
4583
4869
|
|
|
4584
4870
|
if combined_predicate.nil?
|
|
@@ -4588,5 +4874,10 @@ module Polars
|
|
|
4588
4874
|
filter_method = invert ? _ldf.method(:remove) : _ldf.method(:filter)
|
|
4589
4875
|
_from_rbldf(filter_method.(combined_predicate._rbexpr))
|
|
4590
4876
|
end
|
|
4877
|
+
|
|
4878
|
+
def _select_engine(engine, path = nil)
|
|
4879
|
+
engine = Plr.get_engine_affinity if engine == "auto"
|
|
4880
|
+
engine == "auto" && !path.is_a?(::String) && !path.nil? ? "in-memory" : engine
|
|
4881
|
+
end
|
|
4591
4882
|
end
|
|
4592
4883
|
end
|