polars-df 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -158,7 +158,7 @@ module Polars
158
158
  # TODO
159
159
  Utils.wrap_expr(_sum_exprs(exprs))
160
160
  else
161
- raise Todo
161
+ fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
162
162
  end
163
163
  end
164
164
 
@@ -294,8 +294,69 @@ module Polars
294
294
  Utils.wrap_expr(RbExpr.lit(value))
295
295
  end
296
296
 
297
- # def cumsum
298
- # end
297
+ # Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.
298
+ #
299
+ # @param column [Object]
300
+ # Column(s) to be used in aggregation.
301
+ #
302
+ # @return [Object]
303
+ #
304
+ # @example
305
+ # df = Polars::DataFrame.new(
306
+ # {
307
+ # "a" => [1, 2],
308
+ # "b" => [3, 4],
309
+ # "c" => [5, 6]
310
+ # }
311
+ # )
312
+ # # =>
313
+ # # shape: (2, 3)
314
+ # # ┌─────┬─────┬─────┐
315
+ # # │ a ┆ b ┆ c │
316
+ # # │ --- ┆ --- ┆ --- │
317
+ # # │ i64 ┆ i64 ┆ i64 │
318
+ # # ╞═════╪═════╪═════╡
319
+ # # │ 1 ┆ 3 ┆ 5 │
320
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
321
+ # # │ 2 ┆ 4 ┆ 6 │
322
+ # # └─────┴─────┴─────┘
323
+ #
324
+ # @example Cumulatively sum a column by name:
325
+ # df.select(Polars.cumsum("a"))
326
+ # # =>
327
+ # # shape: (2, 1)
328
+ # # ┌─────┐
329
+ # # │ a │
330
+ # # │ --- │
331
+ # # │ i64 │
332
+ # # ╞═════╡
333
+ # # │ 1 │
334
+ # # ├╌╌╌╌╌┤
335
+ # # │ 3 │
336
+ # # └─────┘
337
+ #
338
+ # @example Cumulatively sum a list of columns/expressions horizontally:
339
+ # df.with_column(Polars.cumsum(["a", "c"]))
340
+ # # =>
341
+ # # shape: (2, 4)
342
+ # # ┌─────┬─────┬─────┬───────────┐
343
+ # # │ a ┆ b ┆ c ┆ cumsum │
344
+ # # │ --- ┆ --- ┆ --- ┆ --- │
345
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
346
+ # # ╞═════╪═════╪═════╪═══════════╡
347
+ # # │ 1 ┆ 3 ┆ 5 ┆ {1,6} │
348
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
349
+ # # │ 2 ┆ 4 ┆ 6 ┆ {2,8} │
350
+ # # └─────┴─────┴─────┴───────────┘
351
+ def cumsum(column)
352
+ if column.is_a?(Series)
353
+ column.cumsum
354
+ elsif column.is_a?(String)
355
+ col(column).cumsum
356
+ else
357
+ cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
358
+ end
359
+ end
299
360
 
300
361
  # Compute the spearman rank correlation between two columns.
301
362
  #
@@ -367,7 +428,7 @@ module Polars
367
428
  # def apply
368
429
  # end
369
430
 
370
- # Accumulate over multiple columns horizontally/ row wise with a left fold.
431
+ # Accumulate over multiple columns horizontally/row wise with a left fold.
371
432
  #
372
433
  # @return [Expr]
373
434
  def fold(acc, f, exprs)
@@ -383,17 +444,118 @@ module Polars
383
444
  # def reduce
384
445
  # end
385
446
 
386
- # def cumfold
387
- # end
447
+ # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
448
+ #
449
+ # Every cumulative result is added as a separate field in a Struct column.
450
+ #
451
+ # @param acc [Object]
452
+ # Accumulator Expression. This is the value that will be initialized when the fold
453
+ # starts. For a sum this could for instance be lit(0).
454
+ # @param f [Object]
455
+ # Function to apply over the accumulator and the value.
456
+ # Fn(acc, value) -> new_value
457
+ # @param exprs [Object]
458
+ # Expressions to aggregate over. May also be a wildcard expression.
459
+ # @param include_init [Boolean]
460
+ # Include the initial accumulator state as struct field.
461
+ #
462
+ # @return [Object]
463
+ #
464
+ # @note
465
+ # If you simply want the first encountered expression as accumulator,
466
+ # consider using `cumreduce`.
467
+ def cumfold(acc, f, exprs, include_init: false)
468
+ acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
469
+ if exprs.is_a?(Expr)
470
+ exprs = [exprs]
471
+ end
472
+
473
+ exprs = Utils.selection_to_rbexpr_list(exprs)
474
+ Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
475
+ end
388
476
 
389
477
  # def cumreduce
390
478
  # end
391
479
 
392
- # def any
393
- # end
480
+ # Evaluate columnwise or elementwise with a bitwise OR operation.
481
+ #
482
+ # @return [Expr]
483
+ def any(name)
484
+ if name.is_a?(String)
485
+ col(name).any
486
+ else
487
+ fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
488
+ end
489
+ end
394
490
 
395
- # def exclude
396
- # end
491
+ # Exclude certain columns from a wildcard/regex selection.
492
+ #
493
+ # @param columns [Object]
494
+ # Column(s) to exclude from selection
495
+ # This can be:
496
+ #
497
+ # - a column name, or multiple column names
498
+ # - a regular expression starting with `^` and ending with `$`
499
+ # - a dtype or multiple dtypes
500
+ #
501
+ # @return [Object]
502
+ #
503
+ # @example
504
+ # df = Polars::DataFrame.new(
505
+ # {
506
+ # "aa" => [1, 2, 3],
507
+ # "ba" => ["a", "b", nil],
508
+ # "cc" => [nil, 2.5, 1.5]
509
+ # }
510
+ # )
511
+ # # =>
512
+ # # shape: (3, 3)
513
+ # # ┌─────┬──────┬──────┐
514
+ # # │ aa ┆ ba ┆ cc │
515
+ # # │ --- ┆ --- ┆ --- │
516
+ # # │ i64 ┆ str ┆ f64 │
517
+ # # ╞═════╪══════╪══════╡
518
+ # # │ 1 ┆ a ┆ null │
519
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
520
+ # # │ 2 ┆ b ┆ 2.5 │
521
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
522
+ # # │ 3 ┆ null ┆ 1.5 │
523
+ # # └─────┴──────┴──────┘
524
+ #
525
+ # @example Exclude by column name(s):
526
+ # df.select(Polars.exclude("ba"))
527
+ # # =>
528
+ # # shape: (3, 2)
529
+ # # ┌─────┬──────┐
530
+ # # │ aa ┆ cc │
531
+ # # │ --- ┆ --- │
532
+ # # │ i64 ┆ f64 │
533
+ # # ╞═════╪══════╡
534
+ # # │ 1 ┆ null │
535
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
536
+ # # │ 2 ┆ 2.5 │
537
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
538
+ # # │ 3 ┆ 1.5 │
539
+ # # └─────┴──────┘
540
+ #
541
+ # @example Exclude by regex, e.g. removing all columns whose names end with the letter "a":
542
+ # df.select(Polars.exclude("^.*a$"))
543
+ # # =>
544
+ # # shape: (3, 1)
545
+ # # ┌──────┐
546
+ # # │ cc │
547
+ # # │ --- │
548
+ # # │ f64 │
549
+ # # ╞══════╡
550
+ # # │ null │
551
+ # # ├╌╌╌╌╌╌┤
552
+ # # │ 2.5 │
553
+ # # ├╌╌╌╌╌╌┤
554
+ # # │ 1.5 │
555
+ # # └──────┘
556
+ def exclude(columns)
557
+ col("*").exclude(columns)
558
+ end
397
559
 
398
560
  # Do one of two things.
399
561
  #
@@ -429,11 +591,26 @@ module Polars
429
591
  end
430
592
  end
431
593
 
432
- # def groups
433
- # end
594
+ # Syntactic sugar for `Polars.col("foo").agg_groups`.
595
+ #
596
+ # @return [Object]
597
+ def groups(column)
598
+ col(column).agg_groups
599
+ end
434
600
 
435
- # def quantile
436
- # end
601
+ # Syntactic sugar for `Polars.col("foo").quantile(...)`.
602
+ #
603
+ # @param column [String]
604
+ # Column name.
605
+ # @param quantile [Float]
606
+ # Quantile between 0.0 and 1.0.
607
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
608
+ # Interpolation method.
609
+ #
610
+ # @return [Expr]
611
+ def quantile(column, quantile, interpolation: "nearest")
612
+ col(column).quantile(quantile, interpolation: interpolation)
613
+ end
437
614
 
438
615
  # Create a range expression (or Series).
439
616
  #
@@ -449,7 +626,7 @@ module Polars
449
626
  # @param eager [Boolean]
450
627
  # If eager evaluation is `True`, a Series is returned instead of an Expr.
451
628
  # @param dtype [Symbol]
452
- # Apply an explicit integer dtype to the resulting expression (default is Int64).
629
+ # Apply an explicit integer dtype to the resulting expression (default is `:i64`).
453
630
  #
454
631
  # @return [Expr, Series]
455
632
  #
@@ -474,14 +651,212 @@ module Polars
474
651
  end
475
652
  end
476
653
 
477
- # def argsort_by
478
- # end
654
+ # Find the indexes that would sort the columns.
655
+ #
656
+ # Argsort by multiple columns. The first column will be used for the ordering.
657
+ # If there are duplicates in the first column, the second column will be used to
658
+ # determine the ordering and so on.
659
+ #
660
+ # @param exprs [Object]
661
+ # Columns use to determine the ordering.
662
+ # @param reverse [Boolean]
663
+ # Default is ascending.
664
+ #
665
+ # @return [Expr]
666
+ def argsort_by(exprs, reverse: false)
667
+ if !exprs.is_a?(Array)
668
+ exprs = [exprs]
669
+ end
670
+ if reverse == true || reverse == false
671
+ reverse = [reverse] * exprs.length
672
+ end
673
+ exprs = Utils.selection_to_rbexpr_list(exprs)
674
+ Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
675
+ end
479
676
 
480
- # def duration
481
- # end
677
+ # Create polars `Duration` from distinct time components.
678
+ #
679
+ # @return [Expr]
680
+ #
681
+ # @example
682
+ # df = Polars::DataFrame.new(
683
+ # {
684
+ # "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
685
+ # "add" => [1, 2]
686
+ # }
687
+ # )
688
+ # df.select(
689
+ # [
690
+ # (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
691
+ # (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
692
+ # (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
693
+ # (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
694
+ # "add_milliseconds"
695
+ # ),
696
+ # (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
697
+ # ]
698
+ # )
699
+ # # =>
700
+ # # shape: (2, 5)
701
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
702
+ # # │ add_weeks ┆ add_days ┆ add_seconds ┆ add_milliseconds ┆ add_hours │
703
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
704
+ # # │ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] │
705
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
706
+ # # │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
707
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
708
+ # # │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
709
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
710
+ def duration(
711
+ days: nil,
712
+ seconds: nil,
713
+ nanoseconds: nil,
714
+ microseconds: nil,
715
+ milliseconds: nil,
716
+ minutes: nil,
717
+ hours: nil,
718
+ weeks: nil
719
+ )
720
+ if !hours.nil?
721
+ hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
722
+ end
723
+ if !minutes.nil?
724
+ minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
725
+ end
726
+ if !seconds.nil?
727
+ seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
728
+ end
729
+ if !milliseconds.nil?
730
+ milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
731
+ end
732
+ if !microseconds.nil?
733
+ microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
734
+ end
735
+ if !nanoseconds.nil?
736
+ nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
737
+ end
738
+ if !days.nil?
739
+ days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
740
+ end
741
+ if !weeks.nil?
742
+ weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
743
+ end
482
744
 
483
- # def format
484
- # end
745
+ Utils.wrap_expr(
746
+ _rb_duration(
747
+ days,
748
+ seconds,
749
+ nanoseconds,
750
+ microseconds,
751
+ milliseconds,
752
+ minutes,
753
+ hours,
754
+ weeks
755
+ )
756
+ )
757
+ end
758
+
759
+ # Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.
760
+ #
761
+ # @param exprs [Object]
762
+ # Columns to concat into a Utf8 Series.
763
+ # @param sep [String]
764
+ # String value that will be used to separate the values.
765
+ #
766
+ # @return [Expr]
767
+ #
768
+ # @example
769
+ # df = Polars::DataFrame.new(
770
+ # {
771
+ # "a" => [1, 2, 3],
772
+ # "b" => ["dogs", "cats", nil],
773
+ # "c" => ["play", "swim", "walk"]
774
+ # }
775
+ # )
776
+ # df.with_columns(
777
+ # [
778
+ # Polars.concat_str(
779
+ # [
780
+ # Polars.col("a") * 2,
781
+ # Polars.col("b"),
782
+ # Polars.col("c")
783
+ # ],
784
+ # sep: " "
785
+ # ).alias("full_sentence")
786
+ # ]
787
+ # )
788
+ # # =>
789
+ # # shape: (3, 4)
790
+ # # ┌─────┬──────┬──────┬───────────────┐
791
+ # # │ a ┆ b ┆ c ┆ full_sentence │
792
+ # # │ --- ┆ --- ┆ --- ┆ --- │
793
+ # # │ i64 ┆ str ┆ str ┆ str │
794
+ # # ╞═════╪══════╪══════╪═══════════════╡
795
+ # # │ 1 ┆ dogs ┆ play ┆ 2 dogs play │
796
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
797
+ # # │ 2 ┆ cats ┆ swim ┆ 4 cats swim │
798
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
799
+ # # │ 3 ┆ null ┆ walk ┆ null │
800
+ # # └─────┴──────┴──────┴───────────────┘
801
+ def concat_str(exprs, sep: "")
802
+ exprs = Utils.selection_to_rbexpr_list(exprs)
803
+ return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
804
+ end
805
+
806
+ # Format expressions as a string.
807
+ #
808
+ # @param fstring [String]
809
+ # A string that with placeholders.
810
+ # For example: "hello_{}" or "{}_world
811
+ # @param args [Object]
812
+ # Expression(s) that fill the placeholders
813
+ #
814
+ # @return [Expr]
815
+ #
816
+ # @example
817
+ # df = Polars::DataFrame.new(
818
+ # {
819
+ # "a": ["a", "b", "c"],
820
+ # "b": [1, 2, 3]
821
+ # }
822
+ # )
823
+ # df.select(
824
+ # [
825
+ # Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
826
+ # ]
827
+ # )
828
+ # # =>
829
+ # # shape: (3, 1)
830
+ # # ┌─────────────┐
831
+ # # │ fmt │
832
+ # # │ --- │
833
+ # # │ str │
834
+ # # ╞═════════════╡
835
+ # # │ foo_a_bar_1 │
836
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
837
+ # # │ foo_b_bar_2 │
838
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
839
+ # # │ foo_c_bar_3 │
840
+ # # └─────────────┘
841
+ def format(fstring, *args)
842
+ if fstring.scan("{}").length != args.length
843
+ raise ArgumentError, "number of placeholders should equal the number of arguments"
844
+ end
845
+
846
+ exprs = []
847
+
848
+ arguments = args.each
849
+ fstring.split(/(\{\})/).each do |s|
850
+ if s == "{}"
851
+ e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
852
+ exprs << e
853
+ elsif s.length > 0
854
+ exprs << lit(s)
855
+ end
856
+ end
857
+
858
+ concat_str(exprs, sep: "")
859
+ end
485
860
 
486
861
  # Concat the arrays in a Series dtype List in linear time.
487
862
  #
@@ -491,8 +866,73 @@ module Polars
491
866
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
492
867
  end
493
868
 
494
- # def collect_all
495
- # end
869
+ # Collect multiple LazyFrames at the same time.
870
+ #
871
+ # This runs all the computation graphs in parallel on Polars threadpool.
872
+ #
873
+ # @param lazy_frames [Boolean]
874
+ # A list of LazyFrames to collect.
875
+ # @param type_coercion [Boolean]
876
+ # Do type coercion optimization.
877
+ # @param predicate_pushdown [Boolean]
878
+ # Do predicate pushdown optimization.
879
+ # @param projection_pushdown [Boolean]
880
+ # Do projection pushdown optimization.
881
+ # @param simplify_expression [Boolean]
882
+ # Run simplify expressions optimization.
883
+ # @param string_cache [Boolean]
884
+ # This argument is deprecated and will be ignored
885
+ # @param no_optimization [Boolean]
886
+ # Turn off optimizations.
887
+ # @param slice_pushdown [Boolean]
888
+ # Slice pushdown optimization.
889
+ # @param common_subplan_elimination [Boolean]
890
+ # Will try to cache branching subplans that occur on self-joins or unions.
891
+ # @param allow_streaming [Boolean]
892
+ # Run parts of the query in a streaming fashion (this is in an alpha state)
893
+ #
894
+ # @return [Array]
895
+ def collect_all(
896
+ lazy_frames,
897
+ type_coercion: true,
898
+ predicate_pushdown: true,
899
+ projection_pushdown: true,
900
+ simplify_expression: true,
901
+ string_cache: false,
902
+ no_optimization: false,
903
+ slice_pushdown: true,
904
+ common_subplan_elimination: true,
905
+ allow_streaming: false
906
+ )
907
+ if no_optimization
908
+ predicate_pushdown = false
909
+ projection_pushdown = false
910
+ slice_pushdown = false
911
+ common_subplan_elimination = false
912
+ end
913
+
914
+ prepared = []
915
+
916
+ lazy_frames.each do |lf|
917
+ ldf = lf._ldf.optimization_toggle(
918
+ type_coercion,
919
+ predicate_pushdown,
920
+ projection_pushdown,
921
+ simplify_expression,
922
+ slice_pushdown,
923
+ common_subplan_elimination,
924
+ allow_streaming
925
+ )
926
+ prepared << ldf
927
+ end
928
+
929
+ out = _collect_all(prepared)
930
+
931
+ # wrap the rbdataframes into dataframe
932
+ result = out.map { |rbdf| Utils.wrap_df(rbdf) }
933
+
934
+ result
935
+ end
496
936
 
497
937
  # Run polars expressions without a context.
498
938
  #
@@ -621,11 +1061,105 @@ module Polars
621
1061
  end
622
1062
  end
623
1063
 
624
- # def coalesce
625
- # end
1064
+ # Folds the expressions from left to right, keeping the first non-null value.
1065
+ #
1066
+ # @param exprs [Object]
1067
+ # Expressions to coalesce.
1068
+ #
1069
+ # @return [Expr]
1070
+ #
1071
+ # @example
1072
+ # df = Polars::DataFrame.new(
1073
+ # [
1074
+ # [nil, 1.0, 1.0],
1075
+ # [nil, 2.0, 2.0],
1076
+ # [nil, nil, 3.0],
1077
+ # [nil, nil, nil]
1078
+ # ],
1079
+ # columns: [["a", :f64], ["b", :f64], ["c", :f64]]
1080
+ # )
1081
+ # df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
1082
+ # # =>
1083
+ # # shape: (4, 4)
1084
+ # # ┌──────┬──────┬──────┬──────┐
1085
+ # # │ a ┆ b ┆ c ┆ d │
1086
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1087
+ # # │ f64 ┆ f64 ┆ f64 ┆ f64 │
1088
+ # # ╞══════╪══════╪══════╪══════╡
1089
+ # # │ null ┆ 1.0 ┆ 1.0 ┆ 1.0 │
1090
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1091
+ # # │ null ┆ 2.0 ┆ 2.0 ┆ 2.0 │
1092
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1093
+ # # │ null ┆ null ┆ 3.0 ┆ 3.0 │
1094
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1095
+ # # │ null ┆ null ┆ null ┆ 99.9 │
1096
+ # # └──────┴──────┴──────┴──────┘
1097
+ def coalesce(exprs)
1098
+ exprs = Utils.selection_to_rbexpr_list(exprs)
1099
+ Utils.wrap_expr(_coalesce_exprs(exprs))
1100
+ end
626
1101
 
627
- # def from_epoch
628
- # end
1102
+ # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1103
+ #
1104
+ # Depending on the `unit` provided, this function will return a different dtype:
1105
+ # - unit: "d" returns pl.Date
1106
+ # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1107
+ # - unit: "ms" returns pl.Datetime["ms"]
1108
+ # - unit: "us" returns pl.Datetime["us"]
1109
+ # - unit: "ns" returns pl.Datetime["ns"]
1110
+ #
1111
+ # @param column [Object]
1112
+ # Series or expression to parse integers to pl.Datetime.
1113
+ # @param unit [String]
1114
+ # The unit of the timesteps since epoch time.
1115
+ # @param eager [Boolean]
1116
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
1117
+ #
1118
+ # @return [Object]
1119
+ #
1120
+ # @example
1121
+ # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1122
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1123
+ # # =>
1124
+ # # shape: (2, 1)
1125
+ # # ┌─────────────────────┐
1126
+ # # │ timestamp │
1127
+ # # │ --- │
1128
+ # # │ datetime[μs] │
1129
+ # # ╞═════════════════════╡
1130
+ # # │ 2022-10-25 07:31:17 │
1131
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1132
+ # # │ 2022-10-25 07:31:39 │
1133
+ # # └─────────────────────┘
1134
+ def from_epoch(column, unit: "s", eager: false)
1135
+ if column.is_a?(String)
1136
+ column = col(column)
1137
+ elsif !column.is_a?(Series) && !column.is_a?(Expr)
1138
+ column = Series.new(column)
1139
+ end
1140
+
1141
+ if unit == "d"
1142
+ expr = column.cast(:date)
1143
+ elsif unit == "s"
1144
+ raise Todo
1145
+ # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
1146
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1147
+ raise Todo
1148
+ # expr = column.cast(Datetime(unit))
1149
+ else
1150
+ raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1151
+ end
1152
+
1153
+ if eager
1154
+ if !column.is_a?(Series)
1155
+ raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1156
+ else
1157
+ column.to_frame.select(expr).to_series
1158
+ end
1159
+ else
1160
+ expr
1161
+ end
1162
+ end
629
1163
 
630
1164
  # Start a "when, then, otherwise" expression.
631
1165
  #
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # groupby context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ by
14
+ )
15
+ period = Utils._timedelta_to_pl_duration(period)
16
+ offset = Utils._timedelta_to_pl_duration(offset)
17
+
18
+ @df = df
19
+ @time_column = index_column
20
+ @period = period
21
+ @offset = offset
22
+ @closed = closed
23
+ @by = by
24
+ end
25
+
26
+ def agg(aggs)
27
+ @df.lazy
28
+ .groupby_rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
30
+ )
31
+ .agg(aggs)
32
+ .collect(no_optimization: true, string_cache: false)
33
+ end
34
+ end
35
+ end