polars-df 0.14.0-arm64-darwin → 0.16.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +25665 -14861
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,28 @@
1
+ module Polars
2
+ class DataTypeGroup < Set
3
+ end
4
+
5
+ SIGNED_INTEGER_DTYPES = DataTypeGroup.new(
6
+ [
7
+ Int8,
8
+ Int16,
9
+ Int32,
10
+ Int64
11
+ ]
12
+ )
13
+ UNSIGNED_INTEGER_DTYPES = DataTypeGroup.new(
14
+ [
15
+ UInt8,
16
+ UInt16,
17
+ UInt32,
18
+ UInt64
19
+ ]
20
+ )
21
+ INTEGER_DTYPES = (
22
+ SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
23
+ )
24
+ FLOAT_DTYPES = DataTypeGroup.new([Float32, Float64])
25
+ NUMERIC_DTYPES = DataTypeGroup.new(
26
+ FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
27
+ )
28
+ end
@@ -292,6 +292,8 @@ module Polars
292
292
 
293
293
  # A categorical encoding of a set of strings.
294
294
  class Categorical < DataType
295
+ attr_reader :ordering
296
+
295
297
  def initialize(ordering = "physical")
296
298
  @ordering = ordering
297
299
  end
@@ -309,7 +311,7 @@ module Polars
309
311
  end
310
312
 
311
313
  if categories.empty?
312
- self.categories = Series.new("category", [], dtype: String)
314
+ @categories = Series.new("category", [], dtype: String)
313
315
  return
314
316
  end
315
317
 
@@ -269,6 +269,50 @@ module Polars
269
269
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
270
270
  #
271
271
  # @return [Expr]
272
+ #
273
+ # @example
274
+ # df = Polars::DataFrame.new(
275
+ # {
276
+ # "datetime" => [
277
+ # Time.utc(2020, 3, 1),
278
+ # Time.utc(2020, 4, 1),
279
+ # Time.utc(2020, 5, 1)
280
+ # ]
281
+ # }
282
+ # )
283
+ # df.with_columns(
284
+ # Polars.col("datetime")
285
+ # .dt.strftime("%Y/%m/%d %H:%M:%S")
286
+ # .alias("datetime_string")
287
+ # )
288
+ # # =>
289
+ # # shape: (3, 2)
290
+ # # ┌─────────────────────┬─────────────────────┐
291
+ # # │ datetime ┆ datetime_string │
292
+ # # │ --- ┆ --- │
293
+ # # │ datetime[ns] ┆ str │
294
+ # # ╞═════════════════════╪═════════════════════╡
295
+ # # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
296
+ # # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
297
+ # # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
298
+ # # └─────────────────────┴─────────────────────┘
299
+ #
300
+ # @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
301
+ # df.with_columns(
302
+ # day_name: Polars.col("datetime").dt.strftime("%A"),
303
+ # month_name: Polars.col("datetime").dt.strftime("%B")
304
+ # )
305
+ # # =>
306
+ # # shape: (3, 3)
307
+ # # ┌─────────────────────┬───────────┬────────────┐
308
+ # # │ datetime ┆ day_name ┆ month_name │
309
+ # # │ --- ┆ --- ┆ --- │
310
+ # # │ datetime[ns] ┆ str ┆ str │
311
+ # # ╞═════════════════════╪═══════════╪════════════╡
312
+ # # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
313
+ # # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
314
+ # # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
315
+ # # └─────────────────────┴───────────┴────────────┘
272
316
  def strftime(fmt)
273
317
  Utils.wrap_expr(_rbexpr.strftime(fmt))
274
318
  end
@@ -574,6 +618,29 @@ module Polars
574
618
  # Date
575
619
  #
576
620
  # @return [Expr]
621
+ #
622
+ # @example
623
+ # df = Polars::DataFrame.new(
624
+ # {
625
+ # "datetime" => [
626
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
627
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
628
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
629
+ # ]
630
+ # }
631
+ # )
632
+ # df.with_columns(Polars.col("datetime").dt.date.alias("date"))
633
+ # # =>
634
+ # # shape: (3, 2)
635
+ # # ┌─────────────────────────┬────────────┐
636
+ # # │ datetime ┆ date │
637
+ # # │ --- ┆ --- │
638
+ # # │ datetime[ns] ┆ date │
639
+ # # ╞═════════════════════════╪════════════╡
640
+ # # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
641
+ # # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
642
+ # # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
643
+ # # └─────────────────────────┴────────────┘
577
644
  def date
578
645
  Utils.wrap_expr(_rbexpr.dt_date)
579
646
  end
@@ -732,6 +799,34 @@ module Polars
732
799
  # Applies to Datetime columns.
733
800
  #
734
801
  # @return [Expr]
802
+ #
803
+ # @example
804
+ # df = Polars::DataFrame.new(
805
+ # {
806
+ # "datetime": [
807
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
808
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
809
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
810
+ # ]
811
+ # }
812
+ # )
813
+ # df.with_columns(
814
+ # Polars.col("datetime").dt.hour.alias("hour"),
815
+ # Polars.col("datetime").dt.minute.alias("minute"),
816
+ # Polars.col("datetime").dt.second.alias("second"),
817
+ # Polars.col("datetime").dt.millisecond.alias("millisecond")
818
+ # )
819
+ # # =>
820
+ # # shape: (3, 5)
821
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
822
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
823
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
824
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
825
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
826
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
827
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
828
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
829
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
735
830
  def millisecond
736
831
  Utils.wrap_expr(_rbexpr.dt_millisecond)
737
832
  end
@@ -741,6 +836,34 @@ module Polars
741
836
  # Applies to Datetime columns.
742
837
  #
743
838
  # @return [Expr]
839
+ #
840
+ # @example
841
+ # df = Polars::DataFrame.new(
842
+ # {
843
+ # "datetime": [
844
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
845
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
846
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
847
+ # ]
848
+ # }
849
+ # )
850
+ # df.with_columns(
851
+ # Polars.col("datetime").dt.hour.alias("hour"),
852
+ # Polars.col("datetime").dt.minute.alias("minute"),
853
+ # Polars.col("datetime").dt.second.alias("second"),
854
+ # Polars.col("datetime").dt.microsecond.alias("microsecond")
855
+ # )
856
+ # # =>
857
+ # # shape: (3, 5)
858
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
859
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
860
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
861
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
862
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
863
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
864
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
865
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
866
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
744
867
  def microsecond
745
868
  Utils.wrap_expr(_rbexpr.dt_microsecond)
746
869
  end
@@ -750,6 +873,34 @@ module Polars
750
873
  # Applies to Datetime columns.
751
874
  #
752
875
  # @return [Expr]
876
+ #
877
+ # @example
878
+ # df = Polars::DataFrame.new(
879
+ # {
880
+ # "datetime": [
881
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
882
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
883
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
884
+ # ]
885
+ # }
886
+ # )
887
+ # df.with_columns(
888
+ # Polars.col("datetime").dt.hour.alias("hour"),
889
+ # Polars.col("datetime").dt.minute.alias("minute"),
890
+ # Polars.col("datetime").dt.second.alias("second"),
891
+ # Polars.col("datetime").dt.nanosecond.alias("nanosecond")
892
+ # )
893
+ # # =>
894
+ # # shape: (3, 5)
895
+ # # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
896
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
897
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
898
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
899
+ # # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
900
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
901
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
902
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
903
+ # # └─────────────────────────┴──────┴────────┴────────┴────────────┘
753
904
  def nanosecond
754
905
  Utils.wrap_expr(_rbexpr.dt_nanosecond)
755
906
  end
@@ -835,6 +986,34 @@ module Polars
835
986
  # Time unit for the `Datetime` Series.
836
987
  #
837
988
  # @return [Expr]
989
+ #
990
+ # @example
991
+ # df = Polars::DataFrame.new(
992
+ # {
993
+ # "date" => Polars.datetime_range(
994
+ # Time.utc(2001, 1, 1),
995
+ # Time.utc(2001, 1, 3),
996
+ # "1d",
997
+ # time_unit: "ns",
998
+ # eager: true
999
+ # )
1000
+ # }
1001
+ # )
1002
+ # df.select(
1003
+ # Polars.col("date"),
1004
+ # Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
1005
+ # )
1006
+ # # =>
1007
+ # # shape: (3, 2)
1008
+ # # ┌─────────────────────┬───────────────────────┐
1009
+ # # │ date ┆ time_unit_us │
1010
+ # # │ --- ┆ --- │
1011
+ # # │ datetime[ns] ┆ datetime[μs] │
1012
+ # # ╞═════════════════════╪═══════════════════════╡
1013
+ # # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
1014
+ # # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
1015
+ # # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
1016
+ # # └─────────────────────┴───────────────────────┘
838
1017
  def with_time_unit(time_unit)
839
1018
  Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
840
1019
  end
@@ -931,6 +1110,71 @@ module Polars
931
1110
  # Determine how to deal with non-existent datetimes.
932
1111
  #
933
1112
  # @return [Expr]
1113
+ #
1114
+ # @example
1115
+ # df = Polars::DataFrame.new(
1116
+ # {
1117
+ # "london_timezone": Polars.datetime_range(
1118
+ # Time.utc(2020, 3, 1),
1119
+ # Time.utc(2020, 7, 1),
1120
+ # "1mo",
1121
+ # time_zone: "UTC",
1122
+ # eager: true,
1123
+ # ).dt.convert_time_zone("Europe/London")
1124
+ # }
1125
+ # )
1126
+ # df.select(
1127
+ # [
1128
+ # Polars.col("london_timezone"),
1129
+ # Polars.col("london_timezone")
1130
+ # .dt.replace_time_zone("Europe/Amsterdam")
1131
+ # .alias("London_to_Amsterdam")
1132
+ # ]
1133
+ # )
1134
+ # # =>
1135
+ # # shape: (5, 2)
1136
+ # # ┌─────────────────────────────┬────────────────────────────────┐
1137
+ # # │ london_timezone ┆ London_to_Amsterdam │
1138
+ # # │ --- ┆ --- │
1139
+ # # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
1140
+ # # ╞═════════════════════════════╪════════════════════════════════╡
1141
+ # # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
1142
+ # # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
1143
+ # # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
1144
+ # # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
1145
+ # # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
1146
+ # # └─────────────────────────────┴────────────────────────────────┘
1147
+ #
1148
+ # @example You can use `ambiguous` to deal with ambiguous datetimes:
1149
+ # dates = [
1150
+ # "2018-10-28 01:30",
1151
+ # "2018-10-28 02:00",
1152
+ # "2018-10-28 02:30",
1153
+ # "2018-10-28 02:00"
1154
+ # ]
1155
+ # df = Polars::DataFrame.new(
1156
+ # {
1157
+ # "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
1158
+ # "ambiguous" => ["earliest", "earliest", "latest", "latest"]
1159
+ # }
1160
+ # )
1161
+ # df.with_columns(
1162
+ # ts_localized: Polars.col("ts").dt.replace_time_zone(
1163
+ # "Europe/Brussels", ambiguous: Polars.col("ambiguous")
1164
+ # )
1165
+ # )
1166
+ # # =>
1167
+ # # shape: (4, 3)
1168
+ # # ┌─────────────────────┬───────────┬───────────────────────────────┐
1169
+ # # │ ts ┆ ambiguous ┆ ts_localized │
1170
+ # # │ --- ┆ --- ┆ --- │
1171
+ # # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
1172
+ # # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
1173
+ # # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
1174
+ # # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
1175
+ # # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
1176
+ # # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
1177
+ # # └─────────────────────┴───────────┴───────────────────────────────┘
934
1178
  def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
935
1179
  unless ambiguous.is_a?(Expr)
936
1180
  ambiguous = Polars.lit(ambiguous)
@@ -1150,6 +1150,47 @@ module Polars
1150
1150
  # Every interval start and period length.
1151
1151
  #
1152
1152
  # @return [Series]
1153
+ #
1154
+ # @example
1155
+ # s = Polars.datetime_range(
1156
+ # Time.utc(2001, 1, 1),
1157
+ # Time.utc(2001, 1, 2),
1158
+ # "165m",
1159
+ # eager: true
1160
+ # ).alias("datetime")
1161
+ # s.dt.truncate("1h")
1162
+ # # =>
1163
+ # # shape: (9,)
1164
+ # # Series: 'datetime' [datetime[ns]]
1165
+ # # [
1166
+ # # 2001-01-01 00:00:00
1167
+ # # 2001-01-01 02:00:00
1168
+ # # 2001-01-01 05:00:00
1169
+ # # 2001-01-01 08:00:00
1170
+ # # 2001-01-01 11:00:00
1171
+ # # 2001-01-01 13:00:00
1172
+ # # 2001-01-01 16:00:00
1173
+ # # 2001-01-01 19:00:00
1174
+ # # 2001-01-01 22:00:00
1175
+ # # ]
1176
+ #
1177
+ # @example
1178
+ # s = Polars.datetime_range(
1179
+ # Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
1180
+ # ).alias("datetime")
1181
+ # s.dt.truncate("30m")
1182
+ # # =>
1183
+ # # shape: (7,)
1184
+ # # Series: 'datetime' [datetime[ns]]
1185
+ # # [
1186
+ # # 2001-01-01 00:00:00
1187
+ # # 2001-01-01 00:00:00
1188
+ # # 2001-01-01 00:00:00
1189
+ # # 2001-01-01 00:30:00
1190
+ # # 2001-01-01 00:30:00
1191
+ # # 2001-01-01 00:30:00
1192
+ # # 2001-01-01 01:00:00
1193
+ # # ]
1153
1194
  def truncate(every)
1154
1195
  super
1155
1196
  end
@@ -1185,6 +1226,52 @@ module Polars
1185
1226
  # @note
1186
1227
  # This functionality is currently experimental and may
1187
1228
  # change without it being considered a breaking change.
1229
+ #
1230
+ # @example
1231
+ # start = Time.utc(2001, 1, 1)
1232
+ # stop = Time.utc(2001, 1, 2)
1233
+ # s = Polars.datetime_range(
1234
+ # start, stop, "165m", eager: true
1235
+ # ).alias("datetime")
1236
+ # s.dt.round("1h")
1237
+ # # =>
1238
+ # # shape: (9,)
1239
+ # # Series: 'datetime' [datetime[ns]]
1240
+ # # [
1241
+ # # 2001-01-01 00:00:00
1242
+ # # 2001-01-01 03:00:00
1243
+ # # 2001-01-01 06:00:00
1244
+ # # 2001-01-01 08:00:00
1245
+ # # 2001-01-01 11:00:00
1246
+ # # 2001-01-01 14:00:00
1247
+ # # 2001-01-01 17:00:00
1248
+ # # 2001-01-01 19:00:00
1249
+ # # 2001-01-01 22:00:00
1250
+ # # ]
1251
+ #
1252
+ # @example
1253
+ # round_str = s.dt.round("1h")
1254
+ # round_td = s.dt.round("1h")
1255
+ # round_str.equals(round_td)
1256
+ # # => true
1257
+ #
1258
+ # @example
1259
+ # start = Time.utc(2001, 1, 1)
1260
+ # stop = Time.utc(2001, 1, 1, 1)
1261
+ # s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
1262
+ # s.dt.round("30m")
1263
+ # # =>
1264
+ # # shape: (7,)
1265
+ # # Series: 'datetime' [datetime[ns]]
1266
+ # # [
1267
+ # # 2001-01-01 00:00:00
1268
+ # # 2001-01-01 00:00:00
1269
+ # # 2001-01-01 00:30:00
1270
+ # # 2001-01-01 00:30:00
1271
+ # # 2001-01-01 00:30:00
1272
+ # # 2001-01-01 01:00:00
1273
+ # # 2001-01-01 01:00:00
1274
+ # # ]
1188
1275
  def round(every)
1189
1276
  super
1190
1277
  end
data/lib/polars/expr.rb CHANGED
@@ -411,6 +411,26 @@ module Polars
411
411
  # Add a prefix to the root column name of the expression.
412
412
  #
413
413
  # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 2, 3],
419
+ # "b" => ["x", "y", "z"]
420
+ # }
421
+ # )
422
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
423
+ # # =>
424
+ # # shape: (3, 4)
425
+ # # ┌─────┬─────┬───────────┬───────────┐
426
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
427
+ # # │ --- ┆ --- ┆ --- ┆ --- │
428
+ # # │ i64 ┆ str ┆ i64 ┆ str │
429
+ # # ╞═════╪═════╪═══════════╪═══════════╡
430
+ # # │ 1 ┆ x ┆ 3 ┆ z │
431
+ # # │ 2 ┆ y ┆ 2 ┆ y │
432
+ # # │ 3 ┆ z ┆ 1 ┆ x │
433
+ # # └─────┴─────┴───────────┴───────────┘
414
434
  def prefix(prefix)
415
435
  name.prefix(prefix)
416
436
  end
@@ -418,6 +438,26 @@ module Polars
418
438
  # Add a suffix to the root column name of the expression.
419
439
  #
420
440
  # @return [Expr]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [1, 2, 3],
446
+ # "b" => ["x", "y", "z"]
447
+ # }
448
+ # )
449
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
450
+ # # =>
451
+ # # shape: (3, 4)
452
+ # # ┌─────┬─────┬───────────┬───────────┐
453
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
454
+ # # │ --- ┆ --- ┆ --- ┆ --- │
455
+ # # │ i64 ┆ str ┆ i64 ┆ str │
456
+ # # ╞═════╪═════╪═══════════╪═══════════╡
457
+ # # │ 1 ┆ x ┆ 3 ┆ z │
458
+ # # │ 2 ┆ y ┆ 2 ┆ y │
459
+ # # │ 3 ┆ z ┆ 1 ┆ x │
460
+ # # └─────┴─────┴───────────┴───────────┘
421
461
  def suffix(suffix)
422
462
  name.suffix(suffix)
423
463
  end
@@ -1863,6 +1903,35 @@ module Polars
1863
1903
  # Reverse the selection.
1864
1904
  #
1865
1905
  # @return [Expr]
1906
+ #
1907
+ # @example
1908
+ # df = Polars::DataFrame.new(
1909
+ # {
1910
+ # "A" => [1, 2, 3, 4, 5],
1911
+ # "fruits" => ["banana", "banana", "apple", "apple", "banana"],
1912
+ # "B" => [5, 4, 3, 2, 1],
1913
+ # "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
1914
+ # }
1915
+ # )
1916
+ # df.select(
1917
+ # [
1918
+ # Polars.all,
1919
+ # Polars.all.reverse.name.suffix("_reverse")
1920
+ # ]
1921
+ # )
1922
+ # # =>
1923
+ # # shape: (5, 8)
1924
+ # # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
1925
+ # # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
1926
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1927
+ # # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
1928
+ # # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
1929
+ # # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
1930
+ # # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
1931
+ # # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
1932
+ # # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
1933
+ # # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
1934
+ # # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
1866
1935
  def reverse
1867
1936
  _from_rbexpr(_rbexpr.reverse)
1868
1937
  end
@@ -2825,7 +2894,7 @@ module Polars
2825
2894
  # # ╞══════╪════════╡
2826
2895
  # # │ 1 ┆ 0 │
2827
2896
  # # └──────┴────────┘
2828
- # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2897
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
2829
2898
  # if !return_dtype.nil?
2830
2899
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2831
2900
  # end
@@ -2835,7 +2904,8 @@ module Polars
2835
2904
  # f,
2836
2905
  # return_dtype,
2837
2906
  # agg_list,
2838
- # is_elementwise
2907
+ # is_elementwise,
2908
+ # returns_scalar
2839
2909
  # )
2840
2910
  # )
2841
2911
  # end
@@ -3071,6 +3141,21 @@ module Polars
3071
3141
  # Number of rows to return.
3072
3142
  #
3073
3143
  # @return [Expr]
3144
+ #
3145
+ # @example
3146
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
3147
+ # df.select(Polars.col("foo").limit(3))
3148
+ # # =>
3149
+ # # shape: (3, 1)
3150
+ # # ┌─────┐
3151
+ # # │ foo │
3152
+ # # │ --- │
3153
+ # # │ i64 │
3154
+ # # ╞═════╡
3155
+ # # │ 1 │
3156
+ # # │ 2 │
3157
+ # # │ 3 │
3158
+ # # └─────┘
3074
3159
  def limit(n = 10)
3075
3160
  head(n)
3076
3161
  end
@@ -5601,6 +5686,22 @@ module Polars
5601
5686
  # If false, the calculations are corrected for statistical bias.
5602
5687
  #
5603
5688
  # @return [Expr]
5689
+ #
5690
+ # @example
5691
+ # df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
5692
+ # df.select(Polars.col("a").rolling_skew(3))
5693
+ # # =>
5694
+ # # shape: (4, 1)
5695
+ # # ┌──────────┐
5696
+ # # │ a │
5697
+ # # │ --- │
5698
+ # # │ f64 │
5699
+ # # ╞══════════╡
5700
+ # # │ null │
5701
+ # # │ null │
5702
+ # # │ 0.381802 │
5703
+ # # │ 0.47033 │
5704
+ # # └──────────┘
5604
5705
  def rolling_skew(window_size, bias: true)
5605
5706
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5606
5707
  end
@@ -143,6 +143,9 @@ module Polars
143
143
  # @param exprs [Array]
144
144
  # Column(s) to use in the aggregation. Accepts expression input. Strings are
145
145
  # parsed as column names, other non-expression inputs are parsed as literals.
146
+ # @param ignore_nulls [Boolean]
147
+ # Ignore null values (default).
148
+ # If set to `false`, any null value in the input will lead to a null output.
146
149
  #
147
150
  # @return [Expr]
148
151
  #
@@ -166,9 +169,9 @@ module Polars
166
169
  # # │ 8 ┆ 5 ┆ y ┆ 13 │
167
170
  # # │ 3 ┆ null ┆ z ┆ 3 │
168
171
  # # └─────┴──────┴─────┴─────┘
169
- def sum_horizontal(*exprs)
172
+ def sum_horizontal(*exprs, ignore_nulls: true)
170
173
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
171
- Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
174
+ Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
172
175
  end
173
176
 
174
177
  # Compute the mean of all values horizontally across columns.
@@ -176,6 +179,9 @@ module Polars
176
179
  # @param exprs [Array]
177
180
  # Column(s) to use in the aggregation. Accepts expression input. Strings are
178
181
  # parsed as column names, other non-expression inputs are parsed as literals.
182
+ # @param ignore_nulls [Boolean]
183
+ # Ignore null values (default).
184
+ # If set to `false`, any null value in the input will lead to a null output.
179
185
  #
180
186
  # @return [Expr]
181
187
  #
@@ -199,9 +205,9 @@ module Polars
199
205
  # # │ 8 ┆ 5 ┆ y ┆ 6.5 │
200
206
  # # │ 3 ┆ null ┆ z ┆ 3.0 │
201
207
  # # └─────┴──────┴─────┴──────┘
202
- def mean_horizontal(*exprs)
208
+ def mean_horizontal(*exprs, ignore_nulls: true)
203
209
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
204
- Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
210
+ Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
205
211
  end
206
212
 
207
213
  # Cumulatively sum all values horizontally across columns.