polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -6,6 +6,20 @@ module Polars
6
6
  # Name of the field(s) to select.
7
7
  #
8
8
  # @return [Expr]
9
+ #
10
+ # @example
11
+ # df = Polars::DataFrame.new({"a" => [{"x" => 5, "y" => 2}, {"x" => 3, "y" => 4}]})
12
+ # df.select(Polars.col("a").struct.with_fields(Polars.field("x") ** 2))
13
+ # # =>
14
+ # # shape: (2, 1)
15
+ # # ┌───────────┐
16
+ # # │ a │
17
+ # # │ --- │
18
+ # # │ struct[2] │
19
+ # # ╞═══════════╡
20
+ # # │ {25,2} │
21
+ # # │ {9,4} │
22
+ # # └───────────┘
9
23
  def field(name)
10
24
  if name.is_a?(::String)
11
25
  name = [name]
@@ -19,7 +33,7 @@ module Polars
19
33
  #
20
34
  # @example A horizontal rank computation by taking the elements of a list
21
35
  # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
22
- # df.with_column(
36
+ # df.with_columns(
23
37
  # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
24
38
  # )
25
39
  # # =>
@@ -285,7 +299,6 @@ module Polars
285
299
  def mean(*columns)
286
300
  col(*columns).mean
287
301
  end
288
- alias_method :avg, :mean
289
302
 
290
303
  # Get the median value.
291
304
  #
@@ -692,8 +705,12 @@ module Polars
692
705
  # By default ddof is 1.
693
706
  # @param propagate_nans [Boolean]
694
707
  # If `true` any `NaN` encountered will lead to `NaN` in the output.
695
- # Defaults to `False` where `NaN` are regarded as larger than any finite number
708
+ # Defaults to `false` where `NaN` are regarded as larger than any finite number
696
709
  # and thus lead to the highest rank.
710
+ # @param eager [Boolean]
711
+ # Evaluate immediately and return a `Series`; this requires that at least one
712
+ # of the given arguments is a `Series`. If set to `false` (default), return
713
+ # an expression instead.
697
714
  #
698
715
  # @return [Expr]
699
716
  #
@@ -734,27 +751,63 @@ module Polars
734
751
  # # ╞═════╡
735
752
  # # │ 0.5 │
736
753
  # # └─────┘
754
+ #
755
+ # @example Eager evaluation:
756
+ # s1 = Polars::Series.new("a", [1, 8, 3])
757
+ # s2 = Polars::Series.new("b", [4, 5, 2])
758
+ # Polars.corr(s1, s2, eager: true)
759
+ # # =>
760
+ # # shape: (1,)
761
+ # # Series: 'a' [f64]
762
+ # # [
763
+ # # 0.544705
764
+ # # ]
765
+ #
766
+ # @example
767
+ # Polars.corr(s1, s2, method: "spearman", eager: true)
768
+ # # =>
769
+ # # shape: (1,)
770
+ # # Series: 'a' [f64]
771
+ # # [
772
+ # # 0.5
773
+ # # ]
737
774
  def corr(
738
775
  a,
739
776
  b,
740
777
  method: "pearson",
741
778
  ddof: nil,
742
- propagate_nans: false
779
+ propagate_nans: false,
780
+ eager: false
743
781
  )
744
782
  if !ddof.nil?
745
- warn "The `ddof` parameter has no effect. Do not use it."
783
+ Utils.issue_deprecation_warning(
784
+ "The `ddof` parameter has no effect. Do not use it."
785
+ )
746
786
  end
747
787
 
748
- a = Utils.parse_into_expression(a)
749
- b = Utils.parse_into_expression(b)
788
+ if eager
789
+ if !(a.is_a?(Series) || b.is_a?(Series))
790
+ msg = "expected at least one Series in 'corr' inputs if 'eager: true'"
791
+ raise ArgumentError, msg
792
+ end
750
793
 
751
- if method == "pearson"
752
- Utils.wrap_expr(Plr.pearson_corr(a, b))
753
- elsif method == "spearman"
754
- Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
794
+ frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
795
+ exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
796
+ frame.select(
797
+ corr(*exprs, eager: false, method: method, propagate_nans: propagate_nans)
798
+ ).to_series
755
799
  else
756
- msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
757
- raise ArgumentError, msg
800
+ a = Utils.parse_into_expression(a)
801
+ b = Utils.parse_into_expression(b)
802
+
803
+ if method == "pearson"
804
+ Utils.wrap_expr(Plr.pearson_corr(a, b))
805
+ elsif method == "spearman"
806
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
807
+ else
808
+ msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
809
+ raise ArgumentError, msg
810
+ end
758
811
  end
759
812
  end
760
813
 
@@ -768,6 +821,10 @@ module Polars
768
821
  # "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
769
822
  # where N represents the number of elements.
770
823
  # By default ddof is 1.
824
+ # @param eager [Boolean]
825
+ # Evaluate immediately and return a `Series`; this requires that at least one
826
+ # of the given arguments is a `Series`. If set to `false` (default), return
827
+ # an expression instead.
771
828
  #
772
829
  # @return [Expr]
773
830
  #
@@ -789,10 +846,32 @@ module Polars
789
846
  # # ╞═════╡
790
847
  # # │ 3.0 │
791
848
  # # └─────┘
792
- def cov(a, b, ddof: 1)
793
- a = Utils.parse_into_expression(a)
794
- b = Utils.parse_into_expression(b)
795
- Utils.wrap_expr(Plr.cov(a, b, ddof))
849
+ #
850
+ # @example Eager evaluation:
851
+ # s1 = Polars::Series.new("a", [1, 8, 3])
852
+ # s2 = Polars::Series.new("b", [4, 5, 2])
853
+ # Polars.cov(s1, s2, eager: true)
854
+ # # =>
855
+ # # shape: (1,)
856
+ # # Series: 'a' [f64]
857
+ # # [
858
+ # # 3.0
859
+ # # ]
860
+ def cov(a, b, ddof: 1, eager: false)
861
+ if eager
862
+ if !(a.is_a?(Series) || b.is_a?(Series))
863
+ msg = "expected at least one Series in 'cov' inputs if 'eager: true'"
864
+ raise ArgumentError, msg
865
+ end
866
+
867
+ frame = Polars::DataFrame.new([a, b].filter_map { |e| e if e.is_a?(Series) })
868
+ exprs = [a, b].map { |e| e.is_a?(Series) ? e.name : e }
869
+ frame.select(cov(*exprs, eager: false, ddof: ddof)).to_series
870
+ else
871
+ a_rbexpr = Utils.parse_into_expression(a)
872
+ b_rbexpr = Utils.parse_into_expression(b)
873
+ Utils.wrap_expr(Plr.cov(a_rbexpr, b_rbexpr, ddof))
874
+ end
796
875
  end
797
876
 
798
877
  # def map
@@ -860,6 +939,9 @@ module Polars
860
939
  returns_scalar: false,
861
940
  return_dtype: nil
862
941
  )
942
+ # need to mark function for GC
943
+ raise Todo
944
+
863
945
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
864
946
  if exprs.is_a?(Expr)
865
947
  exprs = [exprs]
@@ -882,8 +964,72 @@ module Polars
882
964
  )
883
965
  end
884
966
 
885
- # def reduce
886
- # end
967
+ # Accumulate over multiple columns horizontally/ row wise with a left fold.
968
+ #
969
+ # @param function [Object]
970
+ # Function to apply over the accumulator and the value.
971
+ # Fn(acc, value) -> new_value
972
+ # @param exprs [Object]
973
+ # Expressions to aggregate over. May also be a wildcard expression.
974
+ # @param returns_scalar [Boolean]
975
+ # Whether or not `function` applied returns a scalar. This must be set correctly
976
+ # by the user.
977
+ # @param return_dtype [Object]
978
+ # Output datatype.
979
+ # If not set, the dtype will be inferred based on the dtype of the input
980
+ # expressions.
981
+ #
982
+ # @return [Expr]
983
+ #
984
+ # @example Horizontally sum over all columns.
985
+ # df = Polars::DataFrame.new(
986
+ # {
987
+ # "a" => [1, 2, 3],
988
+ # "b" => [0, 1, 2]
989
+ # }
990
+ # )
991
+ # df.select(
992
+ # Polars.reduce(function: ->(acc, x) { acc + x }, exprs: Polars.col("*")).alias("sum")
993
+ # )
994
+ # # =>
995
+ # # shape: (3, 1)
996
+ # # ┌─────┐
997
+ # # │ sum │
998
+ # # │ --- │
999
+ # # │ i64 │
1000
+ # # ╞═════╡
1001
+ # # │ 1 │
1002
+ # # │ 3 │
1003
+ # # │ 5 │
1004
+ # # └─────┘
1005
+ def reduce(
1006
+ function:,
1007
+ exprs:,
1008
+ returns_scalar: false,
1009
+ return_dtype: nil
1010
+ )
1011
+ # need to mark function for GC
1012
+ raise Todo
1013
+
1014
+ if exprs.is_a?(Expr)
1015
+ exprs = [exprs]
1016
+ end
1017
+
1018
+ rt = nil
1019
+ if !return_dtype.nil?
1020
+ rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
1021
+ end
1022
+
1023
+ rbexprs = Utils.parse_into_list_of_expressions(exprs)
1024
+ Utils.wrap_expr(
1025
+ Plr.reduce(
1026
+ _wrap_acc_lamba(function),
1027
+ rbexprs,
1028
+ returns_scalar,
1029
+ rt
1030
+ )
1031
+ )
1032
+ end
887
1033
 
888
1034
  # Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
889
1035
  #
@@ -942,6 +1088,9 @@ module Polars
942
1088
  return_dtype: nil,
943
1089
  include_init: false
944
1090
  )
1091
+ # need to mark function for GC
1092
+ raise Todo
1093
+
945
1094
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
946
1095
  if exprs.is_a?(Expr)
947
1096
  exprs = [exprs]
@@ -964,60 +1113,79 @@ module Polars
964
1113
  )._alias("cum_fold")
965
1114
  )
966
1115
  end
967
- alias_method :cumfold, :cum_fold
968
-
969
- # def cum_reduce
970
- # end
971
1116
 
972
- # Compute two argument arctan in radians.
1117
+ # Cumulatively reduce horizontally across columns with a left fold.
973
1118
  #
974
- # Returns the angle (in radians) in the plane between the
975
- # positive x-axis and the ray from the origin to (x,y).
1119
+ # Every cumulative result is added as a separate field in a Struct column.
976
1120
  #
977
- # @param y [Object]
978
- # Column name or Expression.
979
- # @param x [Object]
980
- # Column name or Expression.
1121
+ # @param function [Object]
1122
+ # Function to apply over the accumulator and the value.
1123
+ # Fn(acc, value) -> new_value
1124
+ # @param exprs [Object]
1125
+ # Expressions to aggregate over. May also be a wildcard expression.
1126
+ # @param returns_scalar [Boolean]
1127
+ # Whether or not `function` applied returns a scalar. This must be set correctly
1128
+ # by the user.
1129
+ # @param return_dtype [Object]
1130
+ # Output datatype.
1131
+ # If not set, the dtype will be inferred based on the dtype of the input
1132
+ # expressions.
981
1133
  #
982
1134
  # @return [Expr]
983
1135
  #
984
1136
  # @example
985
- # twoRootTwo = Math.sqrt(2) / 2
986
1137
  # df = Polars::DataFrame.new(
987
1138
  # {
988
- # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
989
- # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
1139
+ # "a" => [1, 2, 3],
1140
+ # "b" => [3, 4, 5],
1141
+ # "c" => [5, 6, 7]
990
1142
  # }
991
1143
  # )
992
- # df.select(
993
- # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
994
- # )
1144
+ # df.with_columns(Polars.cum_reduce(function: ->(acc, x) { acc + x }, exprs: Polars.all))
995
1145
  # # =>
996
- # # shape: (4, 2)
997
- # # ┌────────┬───────────┐
998
- # # │ atan2datan2
999
- # # │ --- ┆ ---
1000
- # # │ f64 f64
1001
- # # ╞════════╪═══════════╡
1002
- # # │ 45.00.785398
1003
- # # │ -45.0 -0.785398
1004
- # # │ 135.0 2.356194
1005
- # # │ -135.0 ┆ -2.356194 │
1006
- # # └────────┴───────────┘
1007
- def arctan2(y, x)
1008
- if Utils.strlike?(y)
1009
- y = col(y)
1146
+ # # shape: (3, 4)
1147
+ # # ┌─────┬─────┬─────┬────────────┐
1148
+ # # │ a ┆ b c ┆ cum_reduce
1149
+ # # │ --- ┆ --- ┆ --- ┆ ---
1150
+ # # │ i64 i64 ┆ i64 ┆ struct[3]
1151
+ # # ╞═════╪═════╪═════╪════════════╡
1152
+ # # │ 13 ┆ 5 ┆ {1,4,9}
1153
+ # # │ 2 4 ┆ 6 ┆ {2,6,12}
1154
+ # # │ 3 5 ┆ 7 ┆ {3,8,15}
1155
+ # # └─────┴─────┴─────┴────────────┘
1156
+ def cum_reduce(
1157
+ function:,
1158
+ exprs:,
1159
+ returns_scalar: false,
1160
+ return_dtype: nil
1161
+ )
1162
+ # need to mark function for GC
1163
+ raise Todo
1164
+
1165
+ if exprs.is_a?(Expr)
1166
+ exprs = [exprs]
1010
1167
  end
1011
- if Utils.strlike?(x)
1012
- x = col(x)
1168
+
1169
+ rt = nil
1170
+ if !return_dtype.nil?
1171
+ rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
1013
1172
  end
1014
- Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
1173
+
1174
+ rbexprs = Utils.parse_into_list_of_expressions(exprs)
1175
+ Utils.wrap_expr(
1176
+ Plr.cum_reduce(
1177
+ _wrap_acc_lamba(function),
1178
+ rbexprs,
1179
+ returns_scalar,
1180
+ rt
1181
+ ).alias("cum_reduce")
1182
+ )
1015
1183
  end
1016
1184
 
1017
- # Compute two argument arctan in degrees.
1185
+ # Compute two argument arctan in radians.
1018
1186
  #
1019
- # Returns the angle (in degrees) in the plane between the positive x-axis
1020
- # and the ray from the origin to (x,y).
1187
+ # Returns the angle (in radians) in the plane between the
1188
+ # positive x-axis and the ray from the origin to (x,y).
1021
1189
  #
1022
1190
  # @param y [Object]
1023
1191
  # Column name or Expression.
@@ -1027,47 +1195,44 @@ module Polars
1027
1195
  # @return [Expr]
1028
1196
  #
1029
1197
  # @example
1030
- # twoRootTwo = Math.sqrt(2) / 2
1198
+ # c = Math.sqrt(2) / 2
1031
1199
  # df = Polars::DataFrame.new(
1032
1200
  # {
1033
- # "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
1034
- # "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
1201
+ # "y" => [c, -c, c, -c],
1202
+ # "x" => [c, c, -c, -c]
1035
1203
  # }
1036
1204
  # )
1037
- # df.select(
1038
- # Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
1039
- # )
1205
+ # df.with_columns(Polars.arctan2("y", "x").alias("atan2"))
1040
1206
  # # =>
1041
- # # shape: (4, 2)
1042
- # # ┌────────┬───────────┐
1043
- # # │ atan2d ┆ atan2 │
1044
- # # │ --- ┆ --- │
1045
- # # │ f64 ┆ f64 │
1046
- # # ╞════════╪═══════════╡
1047
- # # │ 45.0 ┆ 0.785398 │
1048
- # # │ -45.0 ┆ -0.785398 │
1049
- # # │ 135.0 ┆ 2.356194 │
1050
- # # │ -135.0 ┆ -2.356194 │
1051
- # # └────────┴───────────┘
1052
- def arctan2d(y, x)
1207
+ # # shape: (4, 3)
1208
+ # # ┌───────────┬───────────┬───────────┐
1209
+ # # │ y ┆ x ┆ atan2 │
1210
+ # # │ --- ┆ --- ┆ ---
1211
+ # # │ f64 ┆ f64 ┆ f64
1212
+ # # ╞═══════════╪═══════════╪═══════════╡
1213
+ # # │ 0.707107 ┆ 0.707107 ┆ 0.785398 │
1214
+ # # │ -0.707107 ┆ 0.707107 ┆ -0.785398 │
1215
+ # # │ 0.707107-0.707107 ┆ 2.356194 │
1216
+ # # │ -0.707107 ┆ -0.707107 ┆ -2.356194 │
1217
+ # # └───────────┴───────────┴───────────┘
1218
+ def arctan2(y, x)
1053
1219
  if Utils.strlike?(y)
1054
1220
  y = col(y)
1055
1221
  end
1056
1222
  if Utils.strlike?(x)
1057
1223
  x = col(x)
1058
1224
  end
1059
- Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
1225
+ Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
1060
1226
  end
1061
1227
 
1062
1228
  # Exclude certain columns from a wildcard/regex selection.
1063
1229
  #
1064
1230
  # @param columns [Object]
1065
- # Column(s) to exclude from selection
1066
- # This can be:
1067
- #
1068
- # - a column name, or multiple column names
1069
- # - a regular expression starting with `^` and ending with `$`
1070
- # - a dtype or multiple dtypes
1231
+ # The name or datatype of the column(s) to exclude. Accepts regular expression
1232
+ # input. Regular expressions should start with `^` and end with `$`.
1233
+ # @param more_columns [Array]
1234
+ # Additional names or datatypes of columns to exclude, specified as positional
1235
+ # arguments.
1071
1236
  #
1072
1237
  # @return [Object]
1073
1238
  #
@@ -1118,8 +1283,8 @@ module Polars
1118
1283
  # # │ 2.5 │
1119
1284
  # # │ 1.5 │
1120
1285
  # # └──────┘
1121
- def exclude(columns)
1122
- col("*").exclude(columns)
1286
+ def exclude(columns, *more_columns)
1287
+ col("*").exclude(columns, *more_columns)
1123
1288
  end
1124
1289
 
1125
1290
  # Syntactic sugar for `Polars.col("foo").agg_groups`.
@@ -1153,7 +1318,7 @@ module Polars
1153
1318
  # Columns use to determine the ordering.
1154
1319
  # @param more_exprs [Array]
1155
1320
  # Additional columns to arg sort by, specified as positional arguments.
1156
- # @param reverse [Boolean]
1321
+ # @param descending [Boolean]
1157
1322
  # Default is ascending.
1158
1323
  # @param nulls_last [Boolean]
1159
1324
  # Place null values last.
@@ -1187,7 +1352,7 @@ module Polars
1187
1352
  # # └─────┘
1188
1353
  #
1189
1354
  # @example Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.
1190
- # df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
1355
+ # df.select(Polars.arg_sort_by(["a", "b"], descending: true))
1191
1356
  # # =>
1192
1357
  # # shape: (4, 1)
1193
1358
  # # ┌─────┐
@@ -1218,17 +1383,16 @@ module Polars
1218
1383
  def arg_sort_by(
1219
1384
  exprs,
1220
1385
  *more_exprs,
1221
- reverse: false,
1386
+ descending: false,
1222
1387
  nulls_last: false,
1223
1388
  multithreaded: true,
1224
1389
  maintain_order: false
1225
1390
  )
1226
1391
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1227
- reverse = Utils.extend_bool(reverse, exprs.length, "reverse", "exprs")
1392
+ descending = Utils.extend_bool(descending, exprs.length, "descending", "exprs")
1228
1393
  nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
1229
- Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse, nulls_last, multithreaded, maintain_order))
1394
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, descending, nulls_last, multithreaded, maintain_order))
1230
1395
  end
1231
- alias_method :argsort_by, :arg_sort_by
1232
1396
 
1233
1397
  # Collect multiple LazyFrames at the same time.
1234
1398
  #
@@ -1236,62 +1400,31 @@ module Polars
1236
1400
  #
1237
1401
  # @param lazy_frames [Boolean]
1238
1402
  # A list of LazyFrames to collect.
1239
- # @param type_coercion [Boolean]
1240
- # Do type coercion optimization.
1241
- # @param predicate_pushdown [Boolean]
1242
- # Do predicate pushdown optimization.
1243
- # @param projection_pushdown [Boolean]
1244
- # Do projection pushdown optimization.
1245
- # @param simplify_expression [Boolean]
1246
- # Run simplify expressions optimization.
1247
- # @param string_cache [Boolean]
1248
- # This argument is deprecated and will be ignored
1249
- # @param no_optimization [Boolean]
1250
- # Turn off optimizations.
1251
- # @param slice_pushdown [Boolean]
1252
- # Slice pushdown optimization.
1253
- # @param common_subplan_elimination [Boolean]
1254
- # Will try to cache branching subplans that occur on self-joins or unions.
1255
- # @param allow_streaming [Boolean]
1256
- # Run parts of the query in a streaming fashion (this is in an alpha state)
1403
+ # @param optimizations
1404
+ # The optimization passes done during query optimization.
1405
+ #
1406
+ # This has no effect if `lazy` is set to `true`.
1407
+ # @param engine
1408
+ # Select the engine used to process the query, optional.
1409
+ # At the moment, if set to `"auto"` (default), the query is run
1410
+ # using the polars streaming engine. Polars will also
1411
+ # attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
1412
+ # environment variable. If it cannot run the query using the
1413
+ # selected engine, the query is run using the polars streaming
1414
+ # engine.
1257
1415
  #
1258
1416
  # @return [Array]
1259
1417
  def collect_all(
1260
1418
  lazy_frames,
1261
- type_coercion: true,
1262
- predicate_pushdown: true,
1263
- projection_pushdown: true,
1264
- simplify_expression: true,
1265
- string_cache: false,
1266
- no_optimization: false,
1267
- slice_pushdown: true,
1268
- common_subplan_elimination: true,
1269
- allow_streaming: false
1419
+ optimizations: DEFAULT_QUERY_OPT_FLAGS,
1420
+ engine: "auto"
1270
1421
  )
1271
- if no_optimization
1272
- predicate_pushdown = false
1273
- projection_pushdown = false
1274
- slice_pushdown = false
1275
- common_subplan_elimination = false
1422
+ if engine == "streaming"
1423
+ Utils.issue_unstable_warning("streaming mode is considered unstable.")
1276
1424
  end
1277
1425
 
1278
- prepared = []
1279
-
1280
- lazy_frames.each do |lf|
1281
- ldf = lf._ldf.optimization_toggle(
1282
- type_coercion,
1283
- predicate_pushdown,
1284
- projection_pushdown,
1285
- simplify_expression,
1286
- slice_pushdown,
1287
- common_subplan_elimination,
1288
- allow_streaming,
1289
- false
1290
- )
1291
- prepared << ldf
1292
- end
1293
-
1294
- out = Plr.collect_all(prepared)
1426
+ lfs = lazy_frames.map { |lf| lf._ldf }
1427
+ out = Plr.collect_all(lfs, engine, optimizations._rboptflags)
1295
1428
 
1296
1429
  # wrap the rbdataframes into dataframe
1297
1430
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }
@@ -1307,6 +1440,9 @@ module Polars
1307
1440
  # Column(s) to select, specified as positional arguments.
1308
1441
  # Accepts expression input. Strings are parsed as column names,
1309
1442
  # other non-expression inputs are parsed as literals.
1443
+ # @param eager [Boolean]
1444
+ # Evaluate immediately and return a `DataFrame` (default); if set to `false`,
1445
+ # return a `LazyFrame` instead.
1310
1446
  # @param named_exprs [Hash]
1311
1447
  # Additional columns to select, specified as keyword arguments.
1312
1448
  # The columns will be renamed to the keyword used.
@@ -1328,8 +1464,9 @@ module Polars
1328
1464
  # # │ 2 │
1329
1465
  # # │ 1 │
1330
1466
  # # └─────┘
1331
- def select(*exprs, **named_exprs)
1332
- DataFrame.new([]).select(*exprs, **named_exprs)
1467
+ def select(*exprs, eager: true, **named_exprs)
1468
+ empty_frame = eager ? Polars::DataFrame.new : Polars::LazyFrame.new
1469
+ empty_frame.select(*exprs, **named_exprs)
1333
1470
  end
1334
1471
 
1335
1472
  # Return indices where `condition` evaluates `true`.
@@ -1374,6 +1511,10 @@ module Polars
1374
1511
  # names, other non-expression inputs are parsed as literals.
1375
1512
  # @param more_exprs [Hash]
1376
1513
  # Additional columns to coalesce, specified as positional arguments.
1514
+ # @param eager [Boolean]
1515
+ # Evaluate immediately and return a `Series`; this requires that at least one
1516
+ # of the given arguments is a `Series`. If set to `false` (default), return
1517
+ # an expression instead.
1377
1518
  #
1378
1519
  # @return [Expr]
1379
1520
  #
@@ -1413,32 +1554,55 @@ module Polars
1413
1554
  # # │ null ┆ null ┆ 3 ┆ 3.0 │
1414
1555
  # # │ null ┆ null ┆ null ┆ 10.0 │
1415
1556
  # # └──────┴──────┴──────┴──────┘
1416
- def coalesce(exprs, *more_exprs)
1417
- exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1418
- Utils.wrap_expr(Plr.coalesce(exprs))
1557
+ #
1558
+ # @example
1559
+ # s1 = Polars::Series.new("a", [nil, 2, nil])
1560
+ # s2 = Polars::Series.new("b", [1, nil, 3])
1561
+ # Polars.coalesce(s1, s2, eager: true)
1562
+ # # =>
1563
+ # # shape: (3,)
1564
+ # # Series: 'a' [i64]
1565
+ # # [
1566
+ # # 1
1567
+ # # 2
1568
+ # # 3
1569
+ # # ]
1570
+ def coalesce(exprs, *more_exprs, eager: false)
1571
+ if eager
1572
+ exprs = [exprs] + more_exprs
1573
+ series = exprs.filter_map { |e| e if e.is_a?(Series) }
1574
+ if !series.any?
1575
+ msg = "expected at least one Series in 'coalesce' if 'eager: true'"
1576
+ raise ArgumentError, msg
1577
+ end
1578
+
1579
+ exprs = exprs.map { |e| e.is_a?(Series) ? e.name : e }
1580
+ Polars::DataFrame.new(series).select(coalesce(exprs, eager: false)).to_series
1581
+ else
1582
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1583
+ Utils.wrap_expr(Plr.coalesce(exprs))
1584
+ end
1419
1585
  end
1420
1586
 
1421
1587
  # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
1422
1588
  #
1423
1589
  # Depending on the `unit` provided, this function will return a different dtype:
1424
- # - unit: "d" returns pl.Date
1425
- # - unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1426
- # - unit: "ms" returns pl.Datetime["ms"]
1427
- # - unit: "us" returns pl.Datetime["us"]
1428
- # - unit: "ns" returns pl.Datetime["ns"]
1590
+ # - time_unit: "d" returns pl.Date
1591
+ # - time_unit: "s" returns pl.Datetime["us"] (pl.Datetime's default)
1592
+ # - time_unit: "ms" returns pl.Datetime["ms"]
1593
+ # - time_unit: "us" returns pl.Datetime["us"]
1594
+ # - time_unit: "ns" returns pl.Datetime["ns"]
1429
1595
  #
1430
1596
  # @param column [Object]
1431
1597
  # Series or expression to parse integers to pl.Datetime.
1432
- # @param unit [String]
1598
+ # @param time_unit [String]
1433
1599
  # The unit of the timesteps since epoch time.
1434
- # @param eager [Boolean]
1435
- # If eager evaluation is `true`, a Series is returned instead of an Expr.
1436
1600
  #
1437
1601
  # @return [Object]
1438
1602
  #
1439
1603
  # @example
1440
1604
  # df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
1441
- # df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
1605
+ # df.select(Polars.from_epoch(Polars.col("timestamp"), time_unit: "s")).collect
1442
1606
  # # =>
1443
1607
  # # shape: (2, 1)
1444
1608
  # # ┌─────────────────────┐
@@ -1449,32 +1613,102 @@ module Polars
1449
1613
  # # │ 2022-10-25 07:31:17 │
1450
1614
  # # │ 2022-10-25 07:31:39 │
1451
1615
  # # └─────────────────────┘
1452
- def from_epoch(column, unit: "s", eager: false)
1616
+ def from_epoch(column, time_unit: "s")
1453
1617
  if Utils.strlike?(column)
1454
- column = col(column)
1618
+ column = F.col(column)
1455
1619
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
1456
1620
  column = Series.new(column)
1457
1621
  end
1458
1622
 
1459
- if unit == "d"
1460
- expr = column.cast(Date)
1461
- elsif unit == "s"
1462
- expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
1463
- elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
1464
- expr = column.cast(Datetime.new(unit))
1623
+ if time_unit == "d"
1624
+ column.cast(Date)
1625
+ elsif time_unit == "s"
1626
+ (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
1627
+ elsif Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
1628
+ column.cast(Datetime.new(time_unit))
1465
1629
  else
1466
- raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
1630
+ raise ArgumentError, "`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got #{time_unit.inspect}."
1467
1631
  end
1632
+ end
1468
1633
 
1469
- if eager
1470
- if !column.is_a?(Series)
1471
- raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
1472
- else
1473
- column.to_frame.select(expr).to_series
1474
- end
1475
- else
1476
- expr
1634
+ # Compute the rolling covariance between two columns/ expressions.
1635
+ #
1636
+ # The window at a given row includes the row itself and the
1637
+ # `window_size - 1` elements before it.
1638
+ #
1639
+ # @param a [Object]
1640
+ # Column name or Expression.
1641
+ # @param b [Object]
1642
+ # Column name or Expression.
1643
+ # @param window_size [Integer]
1644
+ # The length of the window.
1645
+ # @param min_samples [Integer]
1646
+ # The number of values in the window that should be non-null before computing
1647
+ # a result. If nil, it will be set equal to window size.
1648
+ # @param ddof [Integer]
1649
+ # Delta degrees of freedom. The divisor used in calculations
1650
+ # is `N - ddof`, where `N` represents the number of elements.
1651
+ #
1652
+ # @return [Expr]
1653
+ def rolling_cov(
1654
+ a,
1655
+ b,
1656
+ window_size:,
1657
+ min_samples: nil,
1658
+ ddof: 1
1659
+ )
1660
+ if min_samples.nil?
1661
+ min_samples = window_size
1477
1662
  end
1663
+ if Utils.strlike?(a)
1664
+ a = F.col(a)
1665
+ end
1666
+ if Utils.strlike?(b)
1667
+ b = F.col(b)
1668
+ end
1669
+ Utils.wrap_expr(
1670
+ Plr.rolling_cov(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
1671
+ )
1672
+ end
1673
+
1674
+ # Compute the rolling correlation between two columns/ expressions.
1675
+ #
1676
+ # The window at a given row includes the row itself and the
1677
+ # `window_size - 1` elements before it.
1678
+ #
1679
+ # @param a [Object]
1680
+ # Column name or Expression.
1681
+ # @param b [Object]
1682
+ # Column name or Expression.
1683
+ # @param window_size [Integer]
1684
+ # The length of the window.
1685
+ # @param min_samples [Integer]
1686
+ # The number of values in the window that should be non-null before computing
1687
+ # a result. If nil, it will be set equal to window size.
1688
+ # @param ddof [Integer]
1689
+ # Delta degrees of freedom. The divisor used in calculations
1690
+ # is `N - ddof`, where `N` represents the number of elements.
1691
+ #
1692
+ # @return [Expr]
1693
+ def rolling_corr(
1694
+ a,
1695
+ b,
1696
+ window_size:,
1697
+ min_samples: nil,
1698
+ ddof: 1
1699
+ )
1700
+ if min_samples.nil?
1701
+ min_samples = window_size
1702
+ end
1703
+ if Utils.strlike?(a)
1704
+ a = F.col(a)
1705
+ end
1706
+ if Utils.strlike?(b)
1707
+ b = F.col(b)
1708
+ end
1709
+ Utils.wrap_expr(
1710
+ Plr.rolling_corr(a._rbexpr, b._rbexpr, window_size, min_samples, ddof)
1711
+ )
1478
1712
  end
1479
1713
 
1480
1714
  # Parse one or more SQL expressions to polars expression(s).
@@ -1519,5 +1753,11 @@ module Polars
1519
1753
  sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
1520
1754
  end
1521
1755
  end
1756
+
1757
+ private
1758
+
1759
+ def _wrap_acc_lamba(function)
1760
+ raise Todo
1761
+ end
1522
1762
  end
1523
1763
  end