polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -77,7 +77,6 @@ module Polars
|
|
|
77
77
|
def len
|
|
78
78
|
super
|
|
79
79
|
end
|
|
80
|
-
alias_method :lengths, :len
|
|
81
80
|
|
|
82
81
|
# Drop all null values in the list.
|
|
83
82
|
#
|
|
@@ -259,6 +258,11 @@ module Polars
|
|
|
259
258
|
|
|
260
259
|
# Sort the arrays in the list.
|
|
261
260
|
#
|
|
261
|
+
# @param descending [Boolean]
|
|
262
|
+
# Sort in descending order.
|
|
263
|
+
# @param nulls_last [Boolean]
|
|
264
|
+
# Place null values last.
|
|
265
|
+
#
|
|
262
266
|
# @return [Series]
|
|
263
267
|
#
|
|
264
268
|
# @example
|
|
@@ -273,7 +277,7 @@ module Polars
|
|
|
273
277
|
# # ]
|
|
274
278
|
#
|
|
275
279
|
# @example
|
|
276
|
-
# s.list.sort(
|
|
280
|
+
# s.list.sort(descending: true)
|
|
277
281
|
# # =>
|
|
278
282
|
# # shape: (2,)
|
|
279
283
|
# # Series: 'a' [list[i64]]
|
|
@@ -281,7 +285,7 @@ module Polars
|
|
|
281
285
|
# # [3, 2, 1]
|
|
282
286
|
# # [9, 2, 1]
|
|
283
287
|
# # ]
|
|
284
|
-
def sort(
|
|
288
|
+
def sort(descending: false, nulls_last: false)
|
|
285
289
|
super
|
|
286
290
|
end
|
|
287
291
|
|
|
@@ -305,6 +309,9 @@ module Polars
|
|
|
305
309
|
|
|
306
310
|
# Get the unique/distinct values in the list.
|
|
307
311
|
#
|
|
312
|
+
# @param maintain_order [Boolean]
|
|
313
|
+
# Maintain order of data. This requires more work.
|
|
314
|
+
#
|
|
308
315
|
# @return [Series]
|
|
309
316
|
#
|
|
310
317
|
# @example
|
|
@@ -317,7 +324,7 @@ module Polars
|
|
|
317
324
|
# # [1, 2]
|
|
318
325
|
# # [2, 3]
|
|
319
326
|
# # ]
|
|
320
|
-
def unique
|
|
327
|
+
def unique(maintain_order: false)
|
|
321
328
|
super
|
|
322
329
|
end
|
|
323
330
|
|
|
@@ -400,8 +407,8 @@ module Polars
|
|
|
400
407
|
# Indices to return per sublist
|
|
401
408
|
# @param null_on_oob [Boolean]
|
|
402
409
|
# Behavior if an index is out of bounds:
|
|
403
|
-
#
|
|
404
|
-
#
|
|
410
|
+
# true -> set as null
|
|
411
|
+
# false -> raise an error
|
|
405
412
|
# Note that defaulting to raising an error is much cheaper
|
|
406
413
|
#
|
|
407
414
|
# @return [Series]
|
|
@@ -461,6 +468,11 @@ module Polars
|
|
|
461
468
|
#
|
|
462
469
|
# @param separator [String]
|
|
463
470
|
# string to separate the items with
|
|
471
|
+
# @param ignore_nulls [Boolean]
|
|
472
|
+
# Ignore null values (default).
|
|
473
|
+
#
|
|
474
|
+
# If set to `false`, null values will be propagated.
|
|
475
|
+
# If the sub-list contains any null values, the output is `nil`.
|
|
464
476
|
#
|
|
465
477
|
# @return [Series]
|
|
466
478
|
#
|
|
@@ -474,7 +486,7 @@ module Polars
|
|
|
474
486
|
# # "foo-bar"
|
|
475
487
|
# # "hello-world"
|
|
476
488
|
# # ]
|
|
477
|
-
def join(separator)
|
|
489
|
+
def join(separator, ignore_nulls: true)
|
|
478
490
|
super
|
|
479
491
|
end
|
|
480
492
|
|
|
@@ -516,10 +528,33 @@ module Polars
|
|
|
516
528
|
super
|
|
517
529
|
end
|
|
518
530
|
|
|
531
|
+
# Get the single value of the sublists.
|
|
532
|
+
#
|
|
533
|
+
# This errors if the sublist length is not exactly one.
|
|
534
|
+
#
|
|
535
|
+
# @return [Series]
|
|
536
|
+
#
|
|
537
|
+
# @example
|
|
538
|
+
# s = Polars::Series.new("a", [[1], [4], [6]])
|
|
539
|
+
# s.list.item
|
|
540
|
+
# # =>
|
|
541
|
+
# # shape: (3,)
|
|
542
|
+
# # Series: 'a' [i64]
|
|
543
|
+
# # [
|
|
544
|
+
# # 1
|
|
545
|
+
# # 4
|
|
546
|
+
# # 6
|
|
547
|
+
# # ]
|
|
548
|
+
def item
|
|
549
|
+
super
|
|
550
|
+
end
|
|
551
|
+
|
|
519
552
|
# Check if sublists contain the given item.
|
|
520
553
|
#
|
|
521
554
|
# @param item [Object]
|
|
522
555
|
# Item that will be checked for membership.
|
|
556
|
+
# @param nulls_equal [Boolean]
|
|
557
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
|
523
558
|
#
|
|
524
559
|
# @return [Series]
|
|
525
560
|
#
|
|
@@ -534,7 +569,7 @@ module Polars
|
|
|
534
569
|
# # false
|
|
535
570
|
# # true
|
|
536
571
|
# # ]
|
|
537
|
-
def contains(item)
|
|
572
|
+
def contains(item, nulls_equal: true)
|
|
538
573
|
super
|
|
539
574
|
end
|
|
540
575
|
|
|
@@ -599,7 +634,7 @@ module Polars
|
|
|
599
634
|
|
|
600
635
|
# Shift values by the given period.
|
|
601
636
|
#
|
|
602
|
-
# @param
|
|
637
|
+
# @param n [Integer]
|
|
603
638
|
# Number of places to shift (may be negative).
|
|
604
639
|
#
|
|
605
640
|
# @return [Series]
|
|
@@ -614,7 +649,7 @@ module Polars
|
|
|
614
649
|
# # [null, 1, … 3]
|
|
615
650
|
# # [null, 10, 2]
|
|
616
651
|
# # ]
|
|
617
|
-
def shift(
|
|
652
|
+
def shift(n = 1)
|
|
618
653
|
super
|
|
619
654
|
end
|
|
620
655
|
|
|
@@ -799,22 +834,51 @@ module Polars
|
|
|
799
834
|
# @return [Series]
|
|
800
835
|
#
|
|
801
836
|
# @example
|
|
802
|
-
#
|
|
803
|
-
#
|
|
804
|
-
# Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
|
|
805
|
-
# )
|
|
837
|
+
# s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
|
|
838
|
+
# s.list.eval(Polars.element.rank)
|
|
806
839
|
# # =>
|
|
807
|
-
# # shape: (3,
|
|
808
|
-
# #
|
|
809
|
-
# #
|
|
810
|
-
# #
|
|
811
|
-
# #
|
|
812
|
-
# #
|
|
813
|
-
# #
|
|
814
|
-
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
|
815
|
-
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
|
816
|
-
# # └─────┴─────┴────────────┘
|
|
840
|
+
# # shape: (3,)
|
|
841
|
+
# # Series: 'a' [list[f64]]
|
|
842
|
+
# # [
|
|
843
|
+
# # [1.0, 2.0]
|
|
844
|
+
# # [2.0, 1.0]
|
|
845
|
+
# # [2.0, 1.0]
|
|
846
|
+
# # ]
|
|
817
847
|
def eval(expr)
|
|
848
|
+
s = Utils.wrap_s(_s)
|
|
849
|
+
s.to_frame.select(F.col(s.name).list.eval(expr)).to_series
|
|
850
|
+
end
|
|
851
|
+
|
|
852
|
+
# Run any polars aggregation expression against the list' elements.
|
|
853
|
+
#
|
|
854
|
+
# @param expr [Expr]
|
|
855
|
+
# Expression to run. Note that you can select an element with `Polars.element`.
|
|
856
|
+
#
|
|
857
|
+
# @return [Series]
|
|
858
|
+
#
|
|
859
|
+
# @example
|
|
860
|
+
# s = Polars::Series.new("a", [[1, nil], [42, 13], [nil, nil]])
|
|
861
|
+
# s.list.agg(Polars.element.null_count)
|
|
862
|
+
# # =>
|
|
863
|
+
# # shape: (3,)
|
|
864
|
+
# # Series: 'a' [u32]
|
|
865
|
+
# # [
|
|
866
|
+
# # 1
|
|
867
|
+
# # 0
|
|
868
|
+
# # 2
|
|
869
|
+
# # ]
|
|
870
|
+
#
|
|
871
|
+
# @example
|
|
872
|
+
# s.list.agg(Polars.element.drop_nulls)
|
|
873
|
+
# # =>
|
|
874
|
+
# # shape: (3,)
|
|
875
|
+
# # Series: 'a' [list[i64]]
|
|
876
|
+
# # [
|
|
877
|
+
# # [1]
|
|
878
|
+
# # [42, 13]
|
|
879
|
+
# # []
|
|
880
|
+
# # ]
|
|
881
|
+
def agg(expr)
|
|
818
882
|
super
|
|
819
883
|
end
|
|
820
884
|
|
data/lib/polars/meta_expr.rb
CHANGED
|
@@ -180,8 +180,13 @@ module Polars
|
|
|
180
180
|
# # => "foo"
|
|
181
181
|
# Polars.len.meta.output_name
|
|
182
182
|
# # => "len"
|
|
183
|
-
def output_name
|
|
183
|
+
def output_name(raise_if_undetermined: true)
|
|
184
184
|
_rbexpr.meta_output_name
|
|
185
|
+
rescue Polars::ComputeError
|
|
186
|
+
if !raise_if_undetermined
|
|
187
|
+
return nil
|
|
188
|
+
end
|
|
189
|
+
raise
|
|
185
190
|
end
|
|
186
191
|
|
|
187
192
|
# Pop the latest expression and return the input(s) of the popped expression.
|
|
@@ -292,5 +297,41 @@ module Polars
|
|
|
292
297
|
nil
|
|
293
298
|
end
|
|
294
299
|
end
|
|
300
|
+
|
|
301
|
+
# Format the expression as a Graphviz graph.
|
|
302
|
+
#
|
|
303
|
+
# Note that Graphviz must be installed to render the visualization (if not
|
|
304
|
+
# already present, you can download it here: https://graphviz.org/download.
|
|
305
|
+
#
|
|
306
|
+
# @param show [Boolean]
|
|
307
|
+
# Show the figure.
|
|
308
|
+
# @param output_path [String]
|
|
309
|
+
# Write the figure to disk.
|
|
310
|
+
# @param raw_output [Boolean]
|
|
311
|
+
# Return dot syntax. This cannot be combined with `show` and/or `output_path`.
|
|
312
|
+
# @param figsize [Array]
|
|
313
|
+
# Passed to matplotlib if `show == true`.
|
|
314
|
+
#
|
|
315
|
+
# @return [Object]
|
|
316
|
+
#
|
|
317
|
+
# @example
|
|
318
|
+
# e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
|
|
319
|
+
# e.meta.show_graph
|
|
320
|
+
def show_graph(
|
|
321
|
+
show: true,
|
|
322
|
+
output_path: nil,
|
|
323
|
+
raw_output: false,
|
|
324
|
+
figsize: [16.0, 12.0],
|
|
325
|
+
schema: nil
|
|
326
|
+
)
|
|
327
|
+
dot = _rbexpr.meta_show_graph(schema)
|
|
328
|
+
Utils.display_dot_graph(
|
|
329
|
+
dot: dot,
|
|
330
|
+
show: show,
|
|
331
|
+
output_path: output_path,
|
|
332
|
+
raw_output: raw_output,
|
|
333
|
+
figsize: figsize
|
|
334
|
+
)
|
|
335
|
+
end
|
|
295
336
|
end
|
|
296
337
|
end
|
data/lib/polars/name_expr.rb
CHANGED
|
@@ -77,8 +77,11 @@ module Polars
|
|
|
77
77
|
# # │ 2 ┆ y ┆ 2 ┆ y │
|
|
78
78
|
# # │ 1 ┆ x ┆ 3 ┆ z │
|
|
79
79
|
# # └───────────┴───────────┴─────┴─────┘
|
|
80
|
-
def map(&
|
|
81
|
-
|
|
80
|
+
def map(&function)
|
|
81
|
+
# need to mark function for GC
|
|
82
|
+
raise Todo
|
|
83
|
+
|
|
84
|
+
Utils.wrap_expr(_rbexpr.name_map(function))
|
|
82
85
|
end
|
|
83
86
|
|
|
84
87
|
# Add a prefix to the root column name of the expression.
|
|
@@ -208,11 +211,45 @@ module Polars
|
|
|
208
211
|
# @example
|
|
209
212
|
# df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
|
|
210
213
|
# df.select(Polars.col("x").name.prefix_fields("prefix_")).schema
|
|
211
|
-
# # => {"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})}
|
|
214
|
+
# # => Polars::Schema({"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})})
|
|
212
215
|
def prefix_fields(prefix)
|
|
213
216
|
Utils.wrap_expr(_rbexpr.name_prefix_fields(prefix))
|
|
214
217
|
end
|
|
215
218
|
|
|
219
|
+
# Replace matching regex/literal substring in the name with a new value.
|
|
220
|
+
#
|
|
221
|
+
# @param pattern [String]
|
|
222
|
+
# A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
|
223
|
+
# @param value [String]
|
|
224
|
+
# String that will replace the matched substring.
|
|
225
|
+
# @param literal [Boolean]
|
|
226
|
+
# Treat `pattern` as a literal string, not a regex.
|
|
227
|
+
#
|
|
228
|
+
# @return [Expr]
|
|
229
|
+
#
|
|
230
|
+
# @example
|
|
231
|
+
# df = Polars::DataFrame.new(
|
|
232
|
+
# {
|
|
233
|
+
# "n_foo" => [1, 2, 3],
|
|
234
|
+
# "n_bar" => ["x", "y", "z"]
|
|
235
|
+
# }
|
|
236
|
+
# )
|
|
237
|
+
# df.select(Polars.all.name.replace("^n_", "col_"))
|
|
238
|
+
# # =>
|
|
239
|
+
# # shape: (3, 2)
|
|
240
|
+
# # ┌─────────┬─────────┐
|
|
241
|
+
# # │ col_foo ┆ col_bar │
|
|
242
|
+
# # │ --- ┆ --- │
|
|
243
|
+
# # │ i64 ┆ str │
|
|
244
|
+
# # ╞═════════╪═════════╡
|
|
245
|
+
# # │ 1 ┆ x │
|
|
246
|
+
# # │ 2 ┆ y │
|
|
247
|
+
# # │ 3 ┆ z │
|
|
248
|
+
# # └─────────┴─────────┘
|
|
249
|
+
def replace(pattern, value, literal: false)
|
|
250
|
+
Utils.wrap_expr(_rbexpr.name_replace(pattern, value, literal))
|
|
251
|
+
end
|
|
252
|
+
|
|
216
253
|
# Add a suffix to all field names of a struct.
|
|
217
254
|
#
|
|
218
255
|
# @note
|
|
@@ -226,7 +263,7 @@ module Polars
|
|
|
226
263
|
# @example
|
|
227
264
|
# df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
|
|
228
265
|
# df.select(Polars.col("x").name.suffix_fields("_suffix")).schema
|
|
229
|
-
# # => {"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})}
|
|
266
|
+
# # => Polars::Schema({"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})})
|
|
230
267
|
def suffix_fields(suffix)
|
|
231
268
|
Utils.wrap_expr(_rbexpr.name_suffix_fields(suffix))
|
|
232
269
|
end
|
|
@@ -5,6 +5,9 @@ module Polars
|
|
|
5
5
|
# This functionality is considered **unstable**. It may be changed
|
|
6
6
|
# at any point without it being considered a breaking change.
|
|
7
7
|
class QueryOptFlags
|
|
8
|
+
# @private
|
|
9
|
+
attr_accessor :_rboptflags
|
|
10
|
+
|
|
8
11
|
def initialize(
|
|
9
12
|
predicate_pushdown: nil,
|
|
10
13
|
projection_pushdown: nil,
|
|
@@ -17,7 +20,7 @@ module Polars
|
|
|
17
20
|
check_order_observe: nil,
|
|
18
21
|
fast_projection: nil
|
|
19
22
|
)
|
|
20
|
-
|
|
23
|
+
self._rboptflags = RbOptFlags.default
|
|
21
24
|
update(
|
|
22
25
|
predicate_pushdown: predicate_pushdown,
|
|
23
26
|
projection_pushdown: projection_pushdown,
|
|
@@ -32,6 +35,35 @@ module Polars
|
|
|
32
35
|
)
|
|
33
36
|
end
|
|
34
37
|
|
|
38
|
+
# Create new empty set off optimizations.
|
|
39
|
+
def self.none(
|
|
40
|
+
predicate_pushdown: nil,
|
|
41
|
+
projection_pushdown: nil,
|
|
42
|
+
simplify_expression: nil,
|
|
43
|
+
slice_pushdown: nil,
|
|
44
|
+
comm_subplan_elim: nil,
|
|
45
|
+
comm_subexpr_elim: nil,
|
|
46
|
+
cluster_with_columns: nil,
|
|
47
|
+
collapse_joins: nil,
|
|
48
|
+
check_order_observe: nil,
|
|
49
|
+
fast_projection: nil
|
|
50
|
+
)
|
|
51
|
+
optflags = QueryOptFlags.new
|
|
52
|
+
optflags.no_optimizations
|
|
53
|
+
optflags.update(
|
|
54
|
+
predicate_pushdown: predicate_pushdown,
|
|
55
|
+
projection_pushdown: projection_pushdown,
|
|
56
|
+
simplify_expression: simplify_expression,
|
|
57
|
+
slice_pushdown: slice_pushdown,
|
|
58
|
+
comm_subplan_elim: comm_subplan_elim,
|
|
59
|
+
comm_subexpr_elim: comm_subexpr_elim,
|
|
60
|
+
cluster_with_columns: cluster_with_columns,
|
|
61
|
+
collapse_joins: collapse_joins,
|
|
62
|
+
check_order_observe: check_order_observe,
|
|
63
|
+
fast_projection: fast_projection
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
|
|
35
67
|
def update(
|
|
36
68
|
predicate_pushdown: nil,
|
|
37
69
|
projection_pushdown: nil,
|
|
@@ -44,7 +76,171 @@ module Polars
|
|
|
44
76
|
check_order_observe: nil,
|
|
45
77
|
fast_projection: nil
|
|
46
78
|
)
|
|
47
|
-
|
|
79
|
+
if !predicate_pushdown.nil?
|
|
80
|
+
self.predicate_pushdown = predicate_pushdown
|
|
81
|
+
end
|
|
82
|
+
if !projection_pushdown.nil?
|
|
83
|
+
self.projection_pushdown = projection_pushdown
|
|
84
|
+
end
|
|
85
|
+
if !simplify_expression.nil?
|
|
86
|
+
self.simplify_expression = simplify_expression
|
|
87
|
+
end
|
|
88
|
+
if !slice_pushdown.nil?
|
|
89
|
+
self.slice_pushdown = slice_pushdown
|
|
90
|
+
end
|
|
91
|
+
if !comm_subplan_elim.nil?
|
|
92
|
+
self.comm_subplan_elim = comm_subplan_elim
|
|
93
|
+
end
|
|
94
|
+
if !comm_subexpr_elim.nil?
|
|
95
|
+
self.comm_subexpr_elim = comm_subexpr_elim
|
|
96
|
+
end
|
|
97
|
+
if !cluster_with_columns.nil?
|
|
98
|
+
self.cluster_with_columns = cluster_with_columns
|
|
99
|
+
end
|
|
100
|
+
if !collapse_joins.nil?
|
|
101
|
+
Utils.issue_deprecation_warning(
|
|
102
|
+
"the `collapse_joins` parameter for `QueryOptFlags` is deprecated. " +
|
|
103
|
+
"Use `predicate_pushdown` instead."
|
|
104
|
+
)
|
|
105
|
+
if !collapse_joins
|
|
106
|
+
self.predicate_pushdown = false
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
if !check_order_observe.nil?
|
|
110
|
+
self.check_order_observe = check_order_observe
|
|
111
|
+
end
|
|
112
|
+
if !fast_projection.nil?
|
|
113
|
+
self.fast_projection = fast_projection
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Create new empty set off optimizations.
|
|
120
|
+
def self._eager
|
|
121
|
+
optflags = QueryOptFlags.new
|
|
122
|
+
optflags.no_optimizations
|
|
123
|
+
optflags._rboptflags.eager = true
|
|
124
|
+
optflags.simplify_expression = true
|
|
125
|
+
optflags
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Remove selected optimizations.
|
|
129
|
+
def no_optimizations
|
|
130
|
+
_rboptflags.no_optimizations
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Only read columns that are used later in the query.
|
|
134
|
+
def projection_pushdown
|
|
135
|
+
_rboptflags.projection_pushdown
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def projection_pushdown=(value)
|
|
139
|
+
_rboptflags.projection_pushdown = value
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Apply predicates/filters as early as possible.
|
|
143
|
+
def predicate_pushdown
|
|
144
|
+
_rboptflags.predicate_pushdown
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def predicate_pushdown=(value)
|
|
148
|
+
_rboptflags.predicate_pushdown = value
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Cluster sequential `with_columns` calls to independent calls.
|
|
152
|
+
def cluster_with_columns
|
|
153
|
+
_rboptflags.cluster_with_columns
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def cluster_with_columns=(value)
|
|
157
|
+
_rboptflags.cluster_with_columns = value
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Run many expression optimization rules until fixed point.
|
|
161
|
+
def simplify_expression
|
|
162
|
+
_rboptflags.simplify_expression
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def simplify_expression=(value)
|
|
166
|
+
_rboptflags.simplify_expression = value
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Pushdown slices/limits.
|
|
170
|
+
def slice_pushdown
|
|
171
|
+
_rboptflags.slice_pushdown
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def slice_pushdown=(value)
|
|
175
|
+
_rboptflags.slice_pushdown = value
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Elide duplicate plans and caches their outputs.
|
|
179
|
+
def comm_subplan_elim
|
|
180
|
+
_rboptflags.comm_subplan_elim
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def comm_subplan_elim=(value)
|
|
184
|
+
_rboptflags.comm_subplan_elim = value
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Elide duplicate expressions and caches their outputs.
|
|
188
|
+
def comm_subexpr_elim
|
|
189
|
+
_rboptflags.comm_subexpr_elim
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def comm_subexpr_elim=(value)
|
|
193
|
+
_rboptflags.comm_subexpr_elim = value
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Do not maintain order if the order would not be observed.
|
|
197
|
+
def check_order_observe
|
|
198
|
+
_rboptflags.check_order_observe
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def check_order_observe=(value)
|
|
202
|
+
_rboptflags.check_order_observe = value
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Replace simple projections with a faster inlined projection that skips the expression engine.
|
|
206
|
+
def fast_projection
|
|
207
|
+
_rboptflags.fast_projection
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def fast_projection=(value)
|
|
211
|
+
_rboptflags.fast_projection = value
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def to_s
|
|
215
|
+
<<~STR
|
|
216
|
+
QueryOptFlags {
|
|
217
|
+
type_coercion: #{_rboptflags.type_coercion}
|
|
218
|
+
type_check: #{_rboptflags.type_check}
|
|
219
|
+
|
|
220
|
+
predicate_pushdown: #{predicate_pushdown}
|
|
221
|
+
projection_pushdown: #{projection_pushdown}
|
|
222
|
+
simplify_expression: #{simplify_expression}
|
|
223
|
+
slice_pushdown: #{slice_pushdown}
|
|
224
|
+
comm_subplan_elim: #{comm_subplan_elim}
|
|
225
|
+
comm_subexpr_elim: #{comm_subexpr_elim}
|
|
226
|
+
cluster_with_columns: #{cluster_with_columns}
|
|
227
|
+
check_order_observe: #{check_order_observe}
|
|
228
|
+
fast_projection: #{fast_projection}
|
|
229
|
+
|
|
230
|
+
eager: #{_rboptflags.eager}
|
|
231
|
+
streaming: #{_rboptflags.streaming}
|
|
232
|
+
}
|
|
233
|
+
STR
|
|
234
|
+
end
|
|
235
|
+
alias_method :inspect, :to_s
|
|
236
|
+
|
|
237
|
+
private
|
|
238
|
+
|
|
239
|
+
def initialize_copy(other)
|
|
240
|
+
super
|
|
241
|
+
self._rboptflags = _rboptflags.copy
|
|
48
242
|
end
|
|
49
243
|
end
|
|
244
|
+
|
|
245
|
+
DEFAULT_QUERY_OPT_FLAGS = QueryOptFlags.new
|
|
50
246
|
end
|
|
@@ -25,11 +25,11 @@ module Polars
|
|
|
25
25
|
|
|
26
26
|
def agg(*aggs, **named_aggs)
|
|
27
27
|
@df.lazy
|
|
28
|
-
.
|
|
29
|
-
index_column: @time_column, period: @period, offset: @offset, closed: @closed,
|
|
28
|
+
.rolling(
|
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, group_by: @group_by
|
|
30
30
|
)
|
|
31
31
|
.agg(*aggs, **named_aggs)
|
|
32
|
-
.collect(
|
|
32
|
+
.collect(optimizations: QueryOptFlags.none)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
end
|
data/lib/polars/schema.rb
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
module Polars
|
|
2
2
|
class Schema
|
|
3
|
+
include Enumerable
|
|
4
|
+
|
|
3
5
|
# Ordered mapping of column names to their data type.
|
|
4
6
|
#
|
|
5
7
|
# @param schema [Object]
|
|
6
8
|
# The schema definition given by column names and their associated
|
|
7
9
|
# Polars data type. Accepts a mapping or an enumerable of arrays.
|
|
8
10
|
def initialize(schema = nil, check_dtypes: true)
|
|
9
|
-
input = schema || {}
|
|
10
11
|
@schema = {}
|
|
12
|
+
|
|
13
|
+
if schema.respond_to?(:arrow_c_schema) && !schema.is_a?(Schema)
|
|
14
|
+
Plr.init_polars_schema_from_arrow_c_schema(@schema, schema)
|
|
15
|
+
return
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
input = schema || {}
|
|
11
19
|
input.each do |name, tp|
|
|
12
20
|
if !check_dtypes
|
|
13
21
|
@schema[name] = tp
|
|
@@ -34,6 +42,11 @@ module Polars
|
|
|
34
42
|
@schema[name] = dtype
|
|
35
43
|
end
|
|
36
44
|
|
|
45
|
+
# @private
|
|
46
|
+
def arrow_c_schema
|
|
47
|
+
Plr.polars_schema_to_rbcapsule(self)
|
|
48
|
+
end
|
|
49
|
+
|
|
37
50
|
# Get the column names of the schema.
|
|
38
51
|
#
|
|
39
52
|
# @return [Array]
|
|
@@ -78,14 +91,19 @@ module Polars
|
|
|
78
91
|
end
|
|
79
92
|
alias_method :inspect, :to_s
|
|
80
93
|
|
|
94
|
+
# @private
|
|
95
|
+
def each(&block)
|
|
96
|
+
@schema.each(&block)
|
|
97
|
+
end
|
|
98
|
+
|
|
81
99
|
# @private
|
|
82
100
|
def include?(name)
|
|
83
101
|
@schema.include?(name)
|
|
84
102
|
end
|
|
85
103
|
|
|
86
104
|
# @private
|
|
87
|
-
def to_h
|
|
88
|
-
@schema.to_h
|
|
105
|
+
def to_h(&block)
|
|
106
|
+
@schema.to_h(&block)
|
|
89
107
|
end
|
|
90
108
|
|
|
91
109
|
private
|
data/lib/polars/selector.rb
CHANGED
|
@@ -127,14 +127,49 @@ module Polars
|
|
|
127
127
|
# Only works after a wildcard or regex column selection, and you cannot provide
|
|
128
128
|
# both string column names *and* dtypes (you may prefer to use selectors instead).
|
|
129
129
|
#
|
|
130
|
-
# @return [Selector]
|
|
131
|
-
#
|
|
132
130
|
# @param columns [Object]
|
|
133
131
|
# The name or datatype of the column(s) to exclude. Accepts regular expression
|
|
134
132
|
# input. Regular expressions should start with `^` and end with `$`.
|
|
135
133
|
# @param more_columns [Array]
|
|
136
134
|
# Additional names or datatypes of columns to exclude, specified as positional
|
|
137
135
|
# arguments.
|
|
136
|
+
#
|
|
137
|
+
# @return [Selector]
|
|
138
|
+
#
|
|
139
|
+
# @example Exclude by column name(s):
|
|
140
|
+
# df = Polars::DataFrame.new(
|
|
141
|
+
# {
|
|
142
|
+
# "aa" => [1, 2, 3],
|
|
143
|
+
# "ba" => ["a", "b", nil],
|
|
144
|
+
# "cc" => [nil, 2.5, 1.5]
|
|
145
|
+
# }
|
|
146
|
+
# )
|
|
147
|
+
# df.select(Polars.cs.exclude("ba", "xx"))
|
|
148
|
+
# # =>
|
|
149
|
+
# # shape: (3, 2)
|
|
150
|
+
# # ┌─────┬──────┐
|
|
151
|
+
# # │ aa ┆ cc │
|
|
152
|
+
# # │ --- ┆ --- │
|
|
153
|
+
# # │ i64 ┆ f64 │
|
|
154
|
+
# # ╞═════╪══════╡
|
|
155
|
+
# # │ 1 ┆ null │
|
|
156
|
+
# # │ 2 ┆ 2.5 │
|
|
157
|
+
# # │ 3 ┆ 1.5 │
|
|
158
|
+
# # └─────┴──────┘
|
|
159
|
+
#
|
|
160
|
+
# @example Exclude using a column name, a selector, and a dtype:
|
|
161
|
+
# df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
|
|
162
|
+
# # =>
|
|
163
|
+
# # shape: (3, 1)
|
|
164
|
+
# # ┌──────┐
|
|
165
|
+
# # │ cc │
|
|
166
|
+
# # │ --- │
|
|
167
|
+
# # │ f64 │
|
|
168
|
+
# # ╞══════╡
|
|
169
|
+
# # │ null │
|
|
170
|
+
# # │ 2.5 │
|
|
171
|
+
# # │ 1.5 │
|
|
172
|
+
# # └──────┘
|
|
138
173
|
def exclude(columns, *more_columns)
|
|
139
174
|
exclude_cols = []
|
|
140
175
|
exclude_dtypes = []
|