polars-df 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,13 +8,18 @@ module Polars
8
8
  name = name.to_a
9
9
  end
10
10
 
11
- if name.is_a?(Array)
11
+ if name.is_a?(Class) && name < DataType
12
+ name = [name]
13
+ end
14
+
15
+ if name.is_a?(DataType)
16
+ Utils.wrap_expr(_dtype_cols([name]))
17
+ elsif name.is_a?(Array)
12
18
  if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
13
19
  name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
14
20
  Utils.wrap_expr(RbExpr.cols(name))
15
21
  elsif Utils.is_polars_dtype(name[0])
16
- raise Todo
17
- # Utils.wrap_expr(_dtype_cols(name))
22
+ Utils.wrap_expr(_dtype_cols(name))
18
23
  else
19
24
  raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
20
25
  end
@@ -158,7 +163,7 @@ module Polars
158
163
  # TODO
159
164
  Utils.wrap_expr(_sum_exprs(exprs))
160
165
  else
161
- raise Todo
166
+ fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
162
167
  end
163
168
  end
164
169
 
@@ -294,8 +299,69 @@ module Polars
294
299
  Utils.wrap_expr(RbExpr.lit(value))
295
300
  end
296
301
 
297
- # def cumsum
298
- # end
302
+ # Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
303
+ #
304
+ # @param column [Object]
305
+ # Column(s) to be used in aggregation.
306
+ #
307
+ # @return [Object]
308
+ #
309
+ # @example
310
+ # df = Polars::DataFrame.new(
311
+ # {
312
+ # "a" => [1, 2],
313
+ # "b" => [3, 4],
314
+ # "c" => [5, 6]
315
+ # }
316
+ # )
317
+ # # =>
318
+ # # shape: (2, 3)
319
+ # # ┌─────┬─────┬─────┐
320
+ # # │ a ┆ b ┆ c │
321
+ # # │ --- ┆ --- ┆ --- │
322
+ # # │ i64 ┆ i64 ┆ i64 │
323
+ # # ╞═════╪═════╪═════╡
324
+ # # │ 1 ┆ 3 ┆ 5 │
325
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
326
+ # # │ 2 ┆ 4 ┆ 6 │
327
+ # # └─────┴─────┴─────┘
328
+ #
329
+ # @example Cumulatively sum a column by name:
330
+ # df.select(Polars.cumsum("a"))
331
+ # # =>
332
+ # # shape: (2, 1)
333
+ # # ┌─────┐
334
+ # # │ a │
335
+ # # │ --- │
336
+ # # │ i64 │
337
+ # # ╞═════╡
338
+ # # │ 1 │
339
+ # # ├╌╌╌╌╌┤
340
+ # # │ 3 │
341
+ # # └─────┘
342
+ #
343
+ # @example Cumulatively sum a list of columns/expressions horizontally:
344
+ # df.with_column(Polars.cumsum(["a", "c"]))
345
+ # # =>
346
+ # # shape: (2, 4)
347
+ # # ┌─────┬─────┬─────┬───────────┐
348
+ # # │ a ┆ b ┆ c ┆ cumsum │
349
+ # # │ --- ┆ --- ┆ --- ┆ --- │
350
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
351
+ # # ╞═════╪═════╪═════╪═══════════╡
352
+ # # │ 1 ┆ 3 ┆ 5 ┆ {1,6} │
353
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
354
+ # # │ 2 ┆ 4 ┆ 6 ┆ {2,8} │
355
+ # # └─────┴─────┴─────┴───────────┘
356
+ def cumsum(column)
357
+ if column.is_a?(Series)
358
+ column.cumsum
359
+ elsif column.is_a?(String)
360
+ col(column).cumsum
361
+ else
362
+ cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
363
+ end
364
+ end
299
365
 
300
366
  # Compute the spearman rank correlation between two columns.
301
367
  #
@@ -367,7 +433,7 @@ module Polars
367
433
  # def apply
368
434
  # end
369
435
 
370
- # Accumulate over multiple columns horizontally/ row wise with a left fold.
436
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
371
437
  #
372
438
  # @return [Expr]
373
439
  def fold(acc, f, exprs)
@@ -383,17 +449,118 @@ module Polars
383
449
  # def reduce
384
450
  # end
385
451
 
386
- # def cumfold
387
- # end
452
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
453
+ #
454
+ # Every cumulative result is added as a separate field in a Struct column.
455
+ #
456
+ # @param acc [Object]
457
+ # Accumulator Expression. This is the value that will be initialized when the fold
458
+ # starts. For a sum this could for instance be lit(0).
459
+ # @param f [Object]
460
+ # Function to apply over the accumulator and the value.
461
+ # Fn(acc, value) -> new_value
462
+ # @param exprs [Object]
463
+ # Expressions to aggregate over. May also be a wildcard expression.
464
+ # @param include_init [Boolean]
465
+ # Include the initial accumulator state as struct field.
466
+ #
467
+ # @return [Object]
468
+ #
469
+ # @note
470
+ # If you simply want the first encountered expression as accumulator,
471
+ # consider using `cumreduce`.
472
+ def cumfold(acc, f, exprs, include_init: false)
473
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
474
+ if exprs.is_a?(Expr)
475
+ exprs = [exprs]
476
+ end
477
+
478
+ exprs = Utils.selection_to_rbexpr_list(exprs)
479
+ Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
480
+ end
388
481
 
389
482
  # def cumreduce
390
483
  # end
391
484
 
392
- # def any
393
- # end
485
+ # Evaluate columnwise or elementwise with a bitwise OR operation.
486
+ #
487
+ # @return [Expr]
488
+ def any(name)
489
+ if name.is_a?(String)
490
+ col(name).any
491
+ else
492
+ fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
493
+ end
494
+ end
394
495
 
395
- # def exclude
396
- # end
496
+ # Exclude certain columns from a wildcard/regex selection.
497
+ #
498
+ # @param columns [Object]
499
+ # Column(s) to exclude from selection
500
+ # This can be:
501
+ #
502
+ # - a column name, or multiple column names
503
+ # - a regular expression starting with `^` and ending with `$`
504
+ # - a dtype or multiple dtypes
505
+ #
506
+ # @return [Object]
507
+ #
508
+ # @example
509
+ # df = Polars::DataFrame.new(
510
+ # {
511
+ # "aa" => [1, 2, 3],
512
+ # "ba" => ["a", "b", nil],
513
+ # "cc" => [nil, 2.5, 1.5]
514
+ # }
515
+ # )
516
+ # # =>
517
+ # # shape: (3, 3)
518
+ # # ┌─────┬──────┬──────┐
519
+ # # │ aa ┆ ba ┆ cc │
520
+ # # │ --- ┆ --- ┆ --- │
521
+ # # │ i64 ┆ str ┆ f64 │
522
+ # # ╞═════╪══════╪══════╡
523
+ # # │ 1 ┆ a ┆ null │
524
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
525
+ # # │ 2 ┆ b ┆ 2.5 │
526
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
527
+ # # │ 3 ┆ null ┆ 1.5 │
528
+ # # └─────┴──────┴──────┘
529
+ #
530
+ # @example Exclude by column name(s):
531
+ # df.select(Polars.exclude("ba"))
532
+ # # =>
533
+ # # shape: (3, 2)
534
+ # # ┌─────┬──────┐
535
+ # # │ aa ┆ cc │
536
+ # # │ --- ┆ --- │
537
+ # # │ i64 ┆ f64 │
538
+ # # ╞═════╪══════╡
539
+ # # │ 1 ┆ null │
540
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
541
+ # # │ 2 ┆ 2.5 │
542
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
543
+ # # │ 3 ┆ 1.5 │
544
+ # # └─────┴──────┘
545
+ #
546
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
547
+ # df.select(Polars.exclude("^.*a$"))
548
+ # # =>
549
+ # # shape: (3, 1)
550
+ # # ┌──────┐
551
+ # # │ cc │
552
+ # # │ --- │
553
+ # # │ f64 │
554
+ # # ╞══════╡
555
+ # # │ null │
556
+ # # ├╌╌╌╌╌╌┤
557
+ # # │ 2.5 │
558
+ # # ├╌╌╌╌╌╌┤
559
+ # # │ 1.5 │
560
+ # # └──────┘
561
+ def exclude(columns)
562
+ col("*").exclude(columns)
563
+ end
397
564
 
398
565
  # Do one of two things.
399
566
  #
@@ -429,11 +596,26 @@ module Polars
429
596
  end
430
597
  end
431
598
 
432
- # def groups
433
- # end
599
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
600
+ #
601
+ # @return [Object]
602
+ def groups(column)
603
+ col(column).agg_groups
604
+ end
434
605
 
435
- # def quantile
436
- # end
606
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
607
+ #
608
+ # @param column [String]
609
+ # Column name.
610
+ # @param quantile [Float]
611
+ # Quantile between 0.0 and 1.0.
612
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
613
+ # Interpolation method.
614
+ #
615
+ # @return [Expr]
616
+ def quantile(column, quantile, interpolation: "nearest")
617
+ col(column).quantile(quantile, interpolation: interpolation)
618
+ end
437
619
 
438
620
  # Create a range expression (or Series).
439
621
  #
@@ -449,7 +631,7 @@ module Polars
449
631
  # @param eager [Boolean]
450
632
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
451
633
  # @param dtype [Symbol]
452
- # Apply an explicit integer dtype to the resulting expression (default is Int64).
634
+ # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
453
635
  #
454
636
  # @return [Expr, Series]
455
637
  #
@@ -474,14 +656,212 @@ module Polars
474
656
  end
475
657
  end
476
658
 
477
- # def argsort_by
478
- # end
659
+ # Find the indexes that would sort the columns.
660
+ #
661
+ # Argsort by multiple columns. The first column will be used for the ordering.
662
+ # If there are duplicates in the first column, the second column will be used to
663
+ # determine the ordering and so on.
664
+ #
665
+ # @param exprs [Object]
666
+ # Columns use to determine the ordering.
667
+ # @param reverse [Boolean]
668
+ # Default is ascending.
669
+ #
670
+ # @return [Expr]
671
+ def argsort_by(exprs, reverse: false)
672
+ if !exprs.is_a?(Array)
673
+ exprs = [exprs]
674
+ end
675
+ if reverse == true || reverse == false
676
+ reverse = [reverse] * exprs.length
677
+ end
678
+ exprs = Utils.selection_to_rbexpr_list(exprs)
679
+ Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
680
+ end
479
681
 
480
- # def duration
481
- # end
682
+ # Create polars `Duration` from distinct time components.
683
+ #
684
+ # @return [Expr]
685
+ #
686
+ # @example
687
+ # df = Polars::DataFrame.new(
688
+ # {
689
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
690
+ # "add" => [1, 2]
691
+ # }
692
+ # )
693
+ # df.select(
694
+ # [
695
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
696
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
697
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
698
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
699
+ # "add_milliseconds"
700
+ # ),
701
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
702
+ # ]
703
+ # )
704
+ # # =>
705
+ # # shape: (2, 5)
706
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
707
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
708
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
709
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
710
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
711
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
712
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
713
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
714
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
715
+ def duration(
716
+ days: nil,
717
+ seconds: nil,
718
+ nanoseconds: nil,
719
+ microseconds: nil,
720
+ milliseconds: nil,
721
+ minutes: nil,
722
+ hours: nil,
723
+ weeks: nil
724
+ )
725
+ if !hours.nil?
726
+ hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
727
+ end
728
+ if !minutes.nil?
729
+ minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
730
+ end
731
+ if !seconds.nil?
732
+ seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
733
+ end
734
+ if !milliseconds.nil?
735
+ milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
736
+ end
737
+ if !microseconds.nil?
738
+ microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
739
+ end
740
+ if !nanoseconds.nil?
741
+ nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
742
+ end
743
+ if !days.nil?
744
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
745
+ end
746
+ if !weeks.nil?
747
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
748
+ end
482
749
 
483
- # def format
484
- # end
750
+ Utils.wrap_expr(
751
+ _rb_duration(
752
+ days,
753
+ seconds,
754
+ nanoseconds,
755
+ microseconds,
756
+ milliseconds,
757
+ minutes,
758
+ hours,
759
+ weeks
760
+ )
761
+ )
762
+ end
763
+
764
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
765
+ #
766
+ # @param exprs [Object]
767
+ # Columns to concat into a Utf8 Series.
768
+ # @param sep [String]
769
+ # String value that will be used to separate the values.
770
+ #
771
+ # @return [Expr]
772
+ #
773
+ # @example
774
+ # df = Polars::DataFrame.new(
775
+ # {
776
+ # "a" => [1, 2, 3],
777
+ # "b" => ["dogs", "cats", nil],
778
+ # "c" => ["play", "swim", "walk"]
779
+ # }
780
+ # )
781
+ # df.with_columns(
782
+ # [
783
+ # Polars.concat_str(
784
+ # [
785
+ # Polars.col("a") * 2,
786
+ # Polars.col("b"),
787
+ # Polars.col("c")
788
+ # ],
789
+ # sep: " "
790
+ # ).alias("full_sentence")
791
+ # ]
792
+ # )
793
+ # # =>
794
+ # # shape: (3, 4)
795
+ # # ┌─────┬──────┬──────┬───────────────┐
796
+ # # │ a ┆ b ┆ c ┆ full_sentence │
797
+ # # │ --- ┆ --- ┆ --- ┆ --- │
798
+ # # │ i64 ┆ str ┆ str ┆ str │
799
+ # # ╞═════╪══════╪══════╪═══════════════╡
800
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
801
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
802
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
803
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
804
+ # # │ 3 ┆ null ┆ walk ┆ null │
805
+ # # └─────┴──────┴──────┴───────────────┘
806
+ def concat_str(exprs, sep: "")
807
+ exprs = Utils.selection_to_rbexpr_list(exprs)
808
+ return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
809
+ end
810
+
811
+ # Format expressions as a string.
812
+ #
813
+ # @param fstring [String]
814
+ # A string that with placeholders.
815
+ # For example: "hello_{}" or "{}_world
816
+ # @param args [Object]
817
+ # Expression(s) that fill the placeholders
818
+ #
819
+ # @return [Expr]
820
+ #
821
+ # @example
822
+ # df = Polars::DataFrame.new(
823
+ # {
824
+ # "a": ["a", "b", "c"],
825
+ # "b": [1, 2, 3]
826
+ # }
827
+ # )
828
+ # df.select(
829
+ # [
830
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
831
+ # ]
832
+ # )
833
+ # # =>
834
+ # # shape: (3, 1)
835
+ # # ┌─────────────┐
836
+ # # │ fmt │
837
+ # # │ --- │
838
+ # # │ str │
839
+ # # ╞═════════════╡
840
+ # # │ foo_a_bar_1 │
841
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
842
+ # # │ foo_b_bar_2 │
843
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
844
+ # # │ foo_c_bar_3 │
845
+ # # └─────────────┘
846
+ def format(fstring, *args)
847
+ if fstring.scan("{}").length != args.length
848
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
849
+ end
850
+
851
+ exprs = []
852
+
853
+ arguments = args.each
854
+ fstring.split(/(\{\})/).each do |s|
855
+ if s == "{}"
856
+ e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
857
+ exprs << e
858
+ elsif s.length > 0
859
+ exprs << lit(s)
860
+ end
861
+ end
862
+
863
+ concat_str(exprs, sep: "")
864
+ end
485
865
 
486
866
  # Concat the arrays in a Series dtype List in linear time.
487
867
  #
@@ -491,8 +871,73 @@ module Polars
491
871
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
492
872
  end
493
873
 
494
- # def collect_all
495
- # end
874
+ # Collect multiple LazyFrames at the same time.
875
+ #
876
+ # This runs all the computation graphs in parallel on Polars threadpool.
877
+ #
878
+ # @param lazy_frames [Boolean]
879
+ # A list of LazyFrames to collect.
880
+ # @param type_coercion [Boolean]
881
+ # Do type coercion optimization.
882
+ # @param predicate_pushdown [Boolean]
883
+ # Do predicate pushdown optimization.
884
+ # @param projection_pushdown [Boolean]
885
+ # Do projection pushdown optimization.
886
+ # @param simplify_expression [Boolean]
887
+ # Run simplify expressions optimization.
888
+ # @param string_cache [Boolean]
889
+ # This argument is deprecated and will be ignored
890
+ # @param no_optimization [Boolean]
891
+ # Turn off optimizations.
892
+ # @param slice_pushdown [Boolean]
893
+ # Slice pushdown optimization.
894
+ # @param common_subplan_elimination [Boolean]
895
+ # Will try to cache branching subplans that occur on self-joins or unions.
896
+ # @param allow_streaming [Boolean]
897
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
898
+ #
899
+ # @return [Array]
900
+ def collect_all(
901
+ lazy_frames,
902
+ type_coercion: true,
903
+ predicate_pushdown: true,
904
+ projection_pushdown: true,
905
+ simplify_expression: true,
906
+ string_cache: false,
907
+ no_optimization: false,
908
+ slice_pushdown: true,
909
+ common_subplan_elimination: true,
910
+ allow_streaming: false
911
+ )
912
+ if no_optimization
913
+ predicate_pushdown = false
914
+ projection_pushdown = false
915
+ slice_pushdown = false
916
+ common_subplan_elimination = false
917
+ end
918
+
919
+ prepared = []
920
+
921
+ lazy_frames.each do |lf|
922
+ ldf = lf._ldf.optimization_toggle(
923
+ type_coercion,
924
+ predicate_pushdown,
925
+ projection_pushdown,
926
+ simplify_expression,
927
+ slice_pushdown,
928
+ common_subplan_elimination,
929
+ allow_streaming
930
+ )
931
+ prepared << ldf
932
+ end
933
+
934
+ out = _collect_all(prepared)
935
+
936
+ # wrap the rbdataframes into dataframe
937
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
938
+
939
+ result
940
+ end
496
941
 
497
942
  # Run polars expressions without a context.
498
943
  #
@@ -621,11 +1066,105 @@ module Polars
621
1066
  end
622
1067
  end
623
1068
 
624
- # def coalesce
625
- # end
1069
+ # Folds the expressions from left to right, keeping the first non-null value.
1070
+ #
1071
+ # @param exprs [Object]
1072
+ # Expressions to coalesce.
1073
+ #
1074
+ # @return [Expr]
1075
+ #
1076
+ # @example
1077
+ # df = Polars::DataFrame.new(
1078
+ # [
1079
+ # [nil, 1.0, 1.0],
1080
+ # [nil, 2.0, 2.0],
1081
+ # [nil, nil, 3.0],
1082
+ # [nil, nil, nil]
1083
+ # ],
1084
+ # columns: [["a", :f64], ["b", :f64], ["c", :f64]]
1085
+ # )
1086
+ # df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
1087
+ # # =>
1088
+ # # shape: (4, 4)
1089
+ # # ┌──────┬──────┬──────┬──────┐
1090
+ # # │ a ┆ b ┆ c ┆ d │
1091
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1092
+ # # │ f64 ┆ f64 ┆ f64 ┆ f64 │
1093
+ # # ╞══════╪══════╪══════╪══════╡
1094
+ # # │ null ┆ 1.0 ┆ 1.0 ┆ 1.0 │
1095
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1096
+ # # │ null ┆ 2.0 ┆ 2.0 ┆ 2.0 │
1097
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1098
+ # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1099
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1100
+ # # │ null ┆ null ┆ null ┆ 99.9 │
1101
+ # # └──────┴──────┴──────┴──────┘
1102
+ def coalesce(exprs)
1103
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1104
+ Utils.wrap_expr(_coalesce_exprs(exprs))
1105
+ end
626
1106
 
627
- # def from_epoch
628
- # end
1107
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1108
+ #
1109
+ # Depending on the `unit` provided, this function will return a different dtype:
1110
+ # - unit: "d" returns pl.Date
1111
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1112
+ # - unit: "ms" returns pl.Datetime["ms"]
1113
+ # - unit: "us" returns pl.Datetime["us"]
1114
+ # - unit: "ns" returns pl.Datetime["ns"]
1115
+ #
1116
+ # @param column [Object]
1117
+ # Series or expression to parse integers to pl.Datetime.
1118
+ # @param unit [String]
1119
+ # The unit of the timesteps since epoch time.
1120
+ # @param eager [Boolean]
1121
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1122
+ #
1123
+ # @return [Object]
1124
+ #
1125
+ # @example
1126
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1127
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1128
+ # # =>
1129
+ # # shape: (2, 1)
1130
+ # # ┌─────────────────────┐
1131
+ # # │ timestamp │
1132
+ # # │ --- │
1133
+ # # │ datetime[μs] │
1134
+ # # ╞═════════════════════╡
1135
+ # # │ 2022-10-25 07:31:17 │
1136
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1137
+ # # │ 2022-10-25 07:31:39 │
1138
+ # # └─────────────────────┘
1139
+ def from_epoch(column, unit: "s", eager: false)
1140
+ if column.is_a?(String)
1141
+ column = col(column)
1142
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1143
+ column = Series.new(column)
1144
+ end
1145
+
1146
+ if unit == "d"
1147
+ expr = column.cast(:date)
1148
+ elsif unit == "s"
1149
+ raise Todo
1150
+ # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
1151
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1152
+ raise Todo
1153
+ # expr = column.cast(Datetime(unit))
1154
+ else
1155
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1156
+ end
1157
+
1158
+ if eager
1159
+ if !column.is_a?(Series)
1160
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1161
+ else
1162
+ column.to_frame.select(expr).to_series
1163
+ end
1164
+ else
1165
+ expr
1166
+ end
1167
+ end
629
1168
 
630
1169
  # Start a "when, then, otherwise" expression.
631
1170
  #
@@ -568,7 +568,7 @@ module Polars
568
568
  # # └────────────┘
569
569
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
570
570
  raise Todo if name_generator
571
- Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
571
+ Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
572
572
  end
573
573
 
574
574
  # Run any polars expression against the lists' elements.