polars-df 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -158,7 +158,7 @@ module Polars
158
158
  # TODO
159
159
  Utils.wrap_expr(_sum_exprs(exprs))
160
160
  else
161
- raise Todo
161
+ fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
162
162
  end
163
163
  end
164
164
 
@@ -294,8 +294,69 @@ module Polars
294
294
  Utils.wrap_expr(RbExpr.lit(value))
295
295
  end
296
296
 
297
- # def cumsum
298
- # end
297
+ # Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
298
+ #
299
+ # @param column [Object]
300
+ # Column(s) to be used in aggregation.
301
+ #
302
+ # @return [Object]
303
+ #
304
+ # @example
305
+ # df = Polars::DataFrame.new(
306
+ # {
307
+ # "a" => [1, 2],
308
+ # "b" => [3, 4],
309
+ # "c" => [5, 6]
310
+ # }
311
+ # )
312
+ # # =>
313
+ # # shape: (2, 3)
314
+ # # ┌─────┬─────┬─────┐
315
+ # # │ a ┆ b ┆ c │
316
+ # # │ --- ┆ --- ┆ --- │
317
+ # # │ i64 ┆ i64 ┆ i64 │
318
+ # # ╞═════╪═════╪═════╡
319
+ # # │ 1 ┆ 3 ┆ 5 │
320
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
321
+ # # │ 2 ┆ 4 ┆ 6 │
322
+ # # └─────┴─────┴─────┘
323
+ #
324
+ # @example Cumulatively sum a column by name:
325
+ # df.select(Polars.cumsum("a"))
326
+ # # =>
327
+ # # shape: (2, 1)
328
+ # # ┌─────┐
329
+ # # │ a │
330
+ # # │ --- │
331
+ # # │ i64 │
332
+ # # ╞═════╡
333
+ # # │ 1 │
334
+ # # ├╌╌╌╌╌┤
335
+ # # │ 3 │
336
+ # # └─────┘
337
+ #
338
+ # @example Cumulatively sum a list of columns/expressions horizontally:
339
+ # df.with_column(Polars.cumsum(["a", "c"]))
340
+ # # =>
341
+ # # shape: (2, 4)
342
+ # # ┌─────┬─────┬─────┬───────────┐
343
+ # # │ a ┆ b ┆ c ┆ cumsum │
344
+ # # │ --- ┆ --- ┆ --- ┆ --- │
345
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
346
+ # # ╞═════╪═════╪═════╪═══════════╡
347
+ # # │ 1 ┆ 3 ┆ 5 ┆ {1,6} │
348
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
349
+ # # │ 2 ┆ 4 ┆ 6 ┆ {2,8} │
350
+ # # └─────┴─────┴─────┴───────────┘
351
+ def cumsum(column)
352
+ if column.is_a?(Series)
353
+ column.cumsum
354
+ elsif column.is_a?(String)
355
+ col(column).cumsum
356
+ else
357
+ cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
358
+ end
359
+ end
299
360
 
300
361
  # Compute the spearman rank correlation between two columns.
301
362
  #
@@ -367,7 +428,7 @@ module Polars
367
428
  # def apply
368
429
  # end
369
430
 
370
- # Accumulate over multiple columns horizontally/ row wise with a left fold.
431
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
371
432
  #
372
433
  # @return [Expr]
373
434
  def fold(acc, f, exprs)
@@ -383,17 +444,118 @@ module Polars
383
444
  # def reduce
384
445
  # end
385
446
 
386
- # def cumfold
387
- # end
447
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
448
+ #
449
+ # Every cumulative result is added as a separate field in a Struct column.
450
+ #
451
+ # @param acc [Object]
452
+ # Accumulator Expression. This is the value that will be initialized when the fold
453
+ # starts. For a sum this could for instance be lit(0).
454
+ # @param f [Object]
455
+ # Function to apply over the accumulator and the value.
456
+ # Fn(acc, value) -> new_value
457
+ # @param exprs [Object]
458
+ # Expressions to aggregate over. May also be a wildcard expression.
459
+ # @param include_init [Boolean]
460
+ # Include the initial accumulator state as struct field.
461
+ #
462
+ # @return [Object]
463
+ #
464
+ # @note
465
+ # If you simply want the first encountered expression as accumulator,
466
+ # consider using `cumreduce`.
467
+ def cumfold(acc, f, exprs, include_init: false)
468
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
469
+ if exprs.is_a?(Expr)
470
+ exprs = [exprs]
471
+ end
472
+
473
+ exprs = Utils.selection_to_rbexpr_list(exprs)
474
+ Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
475
+ end
388
476
 
389
477
  # def cumreduce
390
478
  # end
391
479
 
392
- # def any
393
- # end
480
+ # Evaluate columnwise or elementwise with a bitwise OR operation.
481
+ #
482
+ # @return [Expr]
483
+ def any(name)
484
+ if name.is_a?(String)
485
+ col(name).any
486
+ else
487
+ fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
488
+ end
489
+ end
394
490
 
395
- # def exclude
396
- # end
491
+ # Exclude certain columns from a wildcard/regex selection.
492
+ #
493
+ # @param columns [Object]
494
+ # Column(s) to exclude from selection
495
+ # This can be:
496
+ #
497
+ # - a column name, or multiple column names
498
+ # - a regular expression starting with `^` and ending with `$`
499
+ # - a dtype or multiple dtypes
500
+ #
501
+ # @return [Object]
502
+ #
503
+ # @example
504
+ # df = Polars::DataFrame.new(
505
+ # {
506
+ # "aa" => [1, 2, 3],
507
+ # "ba" => ["a", "b", nil],
508
+ # "cc" => [nil, 2.5, 1.5]
509
+ # }
510
+ # )
511
+ # # =>
512
+ # # shape: (3, 3)
513
+ # # ┌─────┬──────┬──────┐
514
+ # # │ aa ┆ ba ┆ cc │
515
+ # # │ --- ┆ --- ┆ --- │
516
+ # # │ i64 ┆ str ┆ f64 │
517
+ # # ╞═════╪══════╪══════╡
518
+ # # │ 1 ┆ a ┆ null │
519
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
520
+ # # │ 2 ┆ b ┆ 2.5 │
521
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
522
+ # # │ 3 ┆ null ┆ 1.5 │
523
+ # # └─────┴──────┴──────┘
524
+ #
525
+ # @example Exclude by column name(s):
526
+ # df.select(Polars.exclude("ba"))
527
+ # # =>
528
+ # # shape: (3, 2)
529
+ # # ┌─────┬──────┐
530
+ # # │ aa ┆ cc │
531
+ # # │ --- ┆ --- │
532
+ # # │ i64 ┆ f64 │
533
+ # # ╞═════╪══════╡
534
+ # # │ 1 ┆ null │
535
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
536
+ # # │ 2 ┆ 2.5 │
537
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
538
+ # # │ 3 ┆ 1.5 │
539
+ # # └─────┴──────┘
540
+ #
541
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
542
+ # df.select(Polars.exclude("^.*a$"))
543
+ # # =>
544
+ # # shape: (3, 1)
545
+ # # ┌──────┐
546
+ # # │ cc │
547
+ # # │ --- │
548
+ # # │ f64 │
549
+ # # ╞══════╡
550
+ # # │ null │
551
+ # # ├╌╌╌╌╌╌┤
552
+ # # │ 2.5 │
553
+ # # ├╌╌╌╌╌╌┤
554
+ # # │ 1.5 │
555
+ # # └──────┘
556
+ def exclude(columns)
557
+ col("*").exclude(columns)
558
+ end
397
559
 
398
560
  # Do one of two things.
399
561
  #
@@ -429,11 +591,26 @@ module Polars
429
591
  end
430
592
  end
431
593
 
432
- # def groups
433
- # end
594
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
595
+ #
596
+ # @return [Object]
597
+ def groups(column)
598
+ col(column).agg_groups
599
+ end
434
600
 
435
- # def quantile
436
- # end
601
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
602
+ #
603
+ # @param column [String]
604
+ # Column name.
605
+ # @param quantile [Float]
606
+ # Quantile between 0.0 and 1.0.
607
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
608
+ # Interpolation method.
609
+ #
610
+ # @return [Expr]
611
+ def quantile(column, quantile, interpolation: "nearest")
612
+ col(column).quantile(quantile, interpolation: interpolation)
613
+ end
437
614
 
438
615
  # Create a range expression (or Series).
439
616
  #
@@ -449,7 +626,7 @@ module Polars
449
626
  # @param eager [Boolean]
450
627
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
451
628
  # @param dtype [Symbol]
452
- # Apply an explicit integer dtype to the resulting expression (default is Int64).
629
+ # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
453
630
  #
454
631
  # @return [Expr, Series]
455
632
  #
@@ -474,14 +651,212 @@ module Polars
474
651
  end
475
652
  end
476
653
 
477
- # def argsort_by
478
- # end
654
+ # Find the indexes that would sort the columns.
655
+ #
656
+ # Argsort by multiple columns. The first column will be used for the ordering.
657
+ # If there are duplicates in the first column, the second column will be used to
658
+ # determine the ordering and so on.
659
+ #
660
+ # @param exprs [Object]
661
+ # Columns use to determine the ordering.
662
+ # @param reverse [Boolean]
663
+ # Default is ascending.
664
+ #
665
+ # @return [Expr]
666
+ def argsort_by(exprs, reverse: false)
667
+ if !exprs.is_a?(Array)
668
+ exprs = [exprs]
669
+ end
670
+ if reverse == true || reverse == false
671
+ reverse = [reverse] * exprs.length
672
+ end
673
+ exprs = Utils.selection_to_rbexpr_list(exprs)
674
+ Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
675
+ end
479
676
 
480
- # def duration
481
- # end
677
+ # Create polars `Duration` from distinct time components.
678
+ #
679
+ # @return [Expr]
680
+ #
681
+ # @example
682
+ # df = Polars::DataFrame.new(
683
+ # {
684
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
685
+ # "add" => [1, 2]
686
+ # }
687
+ # )
688
+ # df.select(
689
+ # [
690
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
691
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
692
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
693
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
694
+ # "add_milliseconds"
695
+ # ),
696
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
697
+ # ]
698
+ # )
699
+ # # =>
700
+ # # shape: (2, 5)
701
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
702
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
703
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
704
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
705
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
706
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
707
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
708
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
709
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
710
+ def duration(
711
+ days: nil,
712
+ seconds: nil,
713
+ nanoseconds: nil,
714
+ microseconds: nil,
715
+ milliseconds: nil,
716
+ minutes: nil,
717
+ hours: nil,
718
+ weeks: nil
719
+ )
720
+ if !hours.nil?
721
+ hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
722
+ end
723
+ if !minutes.nil?
724
+ minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
725
+ end
726
+ if !seconds.nil?
727
+ seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
728
+ end
729
+ if !milliseconds.nil?
730
+ milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
731
+ end
732
+ if !microseconds.nil?
733
+ microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
734
+ end
735
+ if !nanoseconds.nil?
736
+ nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
737
+ end
738
+ if !days.nil?
739
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
740
+ end
741
+ if !weeks.nil?
742
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
743
+ end
482
744
 
483
- # def format
484
- # end
745
+ Utils.wrap_expr(
746
+ _rb_duration(
747
+ days,
748
+ seconds,
749
+ nanoseconds,
750
+ microseconds,
751
+ milliseconds,
752
+ minutes,
753
+ hours,
754
+ weeks
755
+ )
756
+ )
757
+ end
758
+
759
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
760
+ #
761
+ # @param exprs [Object]
762
+ # Columns to concat into a Utf8 Series.
763
+ # @param sep [String]
764
+ # String value that will be used to separate the values.
765
+ #
766
+ # @return [Expr]
767
+ #
768
+ # @example
769
+ # df = Polars::DataFrame.new(
770
+ # {
771
+ # "a" => [1, 2, 3],
772
+ # "b" => ["dogs", "cats", nil],
773
+ # "c" => ["play", "swim", "walk"]
774
+ # }
775
+ # )
776
+ # df.with_columns(
777
+ # [
778
+ # Polars.concat_str(
779
+ # [
780
+ # Polars.col("a") * 2,
781
+ # Polars.col("b"),
782
+ # Polars.col("c")
783
+ # ],
784
+ # sep: " "
785
+ # ).alias("full_sentence")
786
+ # ]
787
+ # )
788
+ # # =>
789
+ # # shape: (3, 4)
790
+ # # ┌─────┬──────┬──────┬───────────────┐
791
+ # # │ a ┆ b ┆ c ┆ full_sentence │
792
+ # # │ --- ┆ --- ┆ --- ┆ --- │
793
+ # # │ i64 ┆ str ┆ str ┆ str │
794
+ # # ╞═════╪══════╪══════╪═══════════════╡
795
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
796
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
797
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
798
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
799
+ # # │ 3 ┆ null ┆ walk ┆ null │
800
+ # # └─────┴──────┴──────┴───────────────┘
801
+ def concat_str(exprs, sep: "")
802
+ exprs = Utils.selection_to_rbexpr_list(exprs)
803
+ return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
804
+ end
805
+
806
+ # Format expressions as a string.
807
+ #
808
+ # @param fstring [String]
809
+ # A string that with placeholders.
810
+ # For example: "hello_{}" or "{}_world
811
+ # @param args [Object]
812
+ # Expression(s) that fill the placeholders
813
+ #
814
+ # @return [Expr]
815
+ #
816
+ # @example
817
+ # df = Polars::DataFrame.new(
818
+ # {
819
+ # "a": ["a", "b", "c"],
820
+ # "b": [1, 2, 3]
821
+ # }
822
+ # )
823
+ # df.select(
824
+ # [
825
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
826
+ # ]
827
+ # )
828
+ # # =>
829
+ # # shape: (3, 1)
830
+ # # ┌─────────────┐
831
+ # # │ fmt │
832
+ # # │ --- │
833
+ # # │ str │
834
+ # # ╞═════════════╡
835
+ # # │ foo_a_bar_1 │
836
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
837
+ # # │ foo_b_bar_2 │
838
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
839
+ # # │ foo_c_bar_3 │
840
+ # # └─────────────┘
841
+ def format(fstring, *args)
842
+ if fstring.scan("{}").length != args.length
843
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
844
+ end
845
+
846
+ exprs = []
847
+
848
+ arguments = args.each
849
+ fstring.split(/(\{\})/).each do |s|
850
+ if s == "{}"
851
+ e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
852
+ exprs << e
853
+ elsif s.length > 0
854
+ exprs << lit(s)
855
+ end
856
+ end
857
+
858
+ concat_str(exprs, sep: "")
859
+ end
485
860
 
486
861
  # Concat the arrays in a Series dtype List in linear time.
487
862
  #
@@ -491,8 +866,73 @@ module Polars
491
866
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
492
867
  end
493
868
 
494
- # def collect_all
495
- # end
869
+ # Collect multiple LazyFrames at the same time.
870
+ #
871
+ # This runs all the computation graphs in parallel on Polars threadpool.
872
+ #
873
+ # @param lazy_frames [Boolean]
874
+ # A list of LazyFrames to collect.
875
+ # @param type_coercion [Boolean]
876
+ # Do type coercion optimization.
877
+ # @param predicate_pushdown [Boolean]
878
+ # Do predicate pushdown optimization.
879
+ # @param projection_pushdown [Boolean]
880
+ # Do projection pushdown optimization.
881
+ # @param simplify_expression [Boolean]
882
+ # Run simplify expressions optimization.
883
+ # @param string_cache [Boolean]
884
+ # This argument is deprecated and will be ignored
885
+ # @param no_optimization [Boolean]
886
+ # Turn off optimizations.
887
+ # @param slice_pushdown [Boolean]
888
+ # Slice pushdown optimization.
889
+ # @param common_subplan_elimination [Boolean]
890
+ # Will try to cache branching subplans that occur on self-joins or unions.
891
+ # @param allow_streaming [Boolean]
892
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
893
+ #
894
+ # @return [Array]
895
+ def collect_all(
896
+ lazy_frames,
897
+ type_coercion: true,
898
+ predicate_pushdown: true,
899
+ projection_pushdown: true,
900
+ simplify_expression: true,
901
+ string_cache: false,
902
+ no_optimization: false,
903
+ slice_pushdown: true,
904
+ common_subplan_elimination: true,
905
+ allow_streaming: false
906
+ )
907
+ if no_optimization
908
+ predicate_pushdown = false
909
+ projection_pushdown = false
910
+ slice_pushdown = false
911
+ common_subplan_elimination = false
912
+ end
913
+
914
+ prepared = []
915
+
916
+ lazy_frames.each do |lf|
917
+ ldf = lf._ldf.optimization_toggle(
918
+ type_coercion,
919
+ predicate_pushdown,
920
+ projection_pushdown,
921
+ simplify_expression,
922
+ slice_pushdown,
923
+ common_subplan_elimination,
924
+ allow_streaming
925
+ )
926
+ prepared << ldf
927
+ end
928
+
929
+ out = _collect_all(prepared)
930
+
931
+ # wrap the rbdataframes into dataframe
932
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
933
+
934
+ result
935
+ end
496
936
 
497
937
  # Run polars expressions without a context.
498
938
  #
@@ -621,11 +1061,105 @@ module Polars
621
1061
  end
622
1062
  end
623
1063
 
624
- # def coalesce
625
- # end
1064
+ # Folds the expressions from left to right, keeping the first non-null value.
1065
+ #
1066
+ # @param exprs [Object]
1067
+ # Expressions to coalesce.
1068
+ #
1069
+ # @return [Expr]
1070
+ #
1071
+ # @example
1072
+ # df = Polars::DataFrame.new(
1073
+ # [
1074
+ # [nil, 1.0, 1.0],
1075
+ # [nil, 2.0, 2.0],
1076
+ # [nil, nil, 3.0],
1077
+ # [nil, nil, nil]
1078
+ # ],
1079
+ # columns: [["a", :f64], ["b", :f64], ["c", :f64]]
1080
+ # )
1081
+ # df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
1082
+ # # =>
1083
+ # # shape: (4, 4)
1084
+ # # ┌──────┬──────┬──────┬──────┐
1085
+ # # │ a ┆ b ┆ c ┆ d │
1086
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1087
+ # # │ f64 ┆ f64 ┆ f64 ┆ f64 │
1088
+ # # ╞══════╪══════╪══════╪══════╡
1089
+ # # │ null ┆ 1.0 ┆ 1.0 ┆ 1.0 │
1090
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1091
+ # # │ null ┆ 2.0 ┆ 2.0 ┆ 2.0 │
1092
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1093
+ # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1094
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1095
+ # # │ null ┆ null ┆ null ┆ 99.9 │
1096
+ # # └──────┴──────┴──────┴──────┘
1097
+ def coalesce(exprs)
1098
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1099
+ Utils.wrap_expr(_coalesce_exprs(exprs))
1100
+ end
626
1101
 
627
- # def from_epoch
628
- # end
1102
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1103
+ #
1104
+ # Depending on the `unit` provided, this function will return a different dtype:
1105
+ # - unit: "d" returns pl.Date
1106
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1107
+ # - unit: "ms" returns pl.Datetime["ms"]
1108
+ # - unit: "us" returns pl.Datetime["us"]
1109
+ # - unit: "ns" returns pl.Datetime["ns"]
1110
+ #
1111
+ # @param column [Object]
1112
+ # Series or expression to parse integers to pl.Datetime.
1113
+ # @param unit [String]
1114
+ # The unit of the timesteps since epoch time.
1115
+ # @param eager [Boolean]
1116
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1117
+ #
1118
+ # @return [Object]
1119
+ #
1120
+ # @example
1121
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1122
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1123
+ # # =>
1124
+ # # shape: (2, 1)
1125
+ # # ┌─────────────────────┐
1126
+ # # │ timestamp │
1127
+ # # │ --- │
1128
+ # # │ datetime[μs] │
1129
+ # # ╞═════════════════════╡
1130
+ # # │ 2022-10-25 07:31:17 │
1131
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1132
+ # # │ 2022-10-25 07:31:39 │
1133
+ # # └─────────────────────┘
1134
+ def from_epoch(column, unit: "s", eager: false)
1135
+ if column.is_a?(String)
1136
+ column = col(column)
1137
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1138
+ column = Series.new(column)
1139
+ end
1140
+
1141
+ if unit == "d"
1142
+ expr = column.cast(:date)
1143
+ elsif unit == "s"
1144
+ raise Todo
1145
+ # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
1146
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1147
+ raise Todo
1148
+ # expr = column.cast(Datetime(unit))
1149
+ else
1150
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1151
+ end
1152
+
1153
+ if eager
1154
+ if !column.is_a?(Series)
1155
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1156
+ else
1157
+ column.to_frame.select(expr).to_series
1158
+ end
1159
+ else
1160
+ expr
1161
+ end
1162
+ end
629
1163
 
630
1164
  # Start a "when, then, otherwise" expression.
631
1165
  #
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # groupby context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ by
14
+ )
15
+ period = Utils._timedelta_to_pl_duration(period)
16
+ offset = Utils._timedelta_to_pl_duration(offset)
17
+
18
+ @df = df
19
+ @time_column = index_column
20
+ @period = period
21
+ @offset = offset
22
+ @closed = closed
23
+ @by = by
24
+ end
25
+
26
+ def agg(aggs)
27
+ @df.lazy
28
+ .groupby_rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
30
+ )
31
+ .agg(aggs)
32
+ .collect(no_optimization: true, string_cache: false)
33
+ end
34
+ end
35
+ end