polars-df 0.14.0-x64-mingw-ucrt → 0.16.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +24369 -14580
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,28 @@
1
+ module Polars
2
+ class DataTypeGroup < Set
3
+ end
4
+
5
+ SIGNED_INTEGER_DTYPES = DataTypeGroup.new(
6
+ [
7
+ Int8,
8
+ Int16,
9
+ Int32,
10
+ Int64
11
+ ]
12
+ )
13
+ UNSIGNED_INTEGER_DTYPES = DataTypeGroup.new(
14
+ [
15
+ UInt8,
16
+ UInt16,
17
+ UInt32,
18
+ UInt64
19
+ ]
20
+ )
21
+ INTEGER_DTYPES = (
22
+ SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
23
+ )
24
+ FLOAT_DTYPES = DataTypeGroup.new([Float32, Float64])
25
+ NUMERIC_DTYPES = DataTypeGroup.new(
26
+ FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
27
+ )
28
+ end
@@ -292,6 +292,8 @@ module Polars
292
292
 
293
293
  # A categorical encoding of a set of strings.
294
294
  class Categorical < DataType
295
+ attr_reader :ordering
296
+
295
297
  def initialize(ordering = "physical")
296
298
  @ordering = ordering
297
299
  end
@@ -309,7 +311,7 @@ module Polars
309
311
  end
310
312
 
311
313
  if categories.empty?
312
- self.categories = Series.new("category", [], dtype: String)
314
+ @categories = Series.new("category", [], dtype: String)
313
315
  return
314
316
  end
315
317
 
@@ -269,6 +269,50 @@ module Polars
269
269
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
270
270
  #
271
271
  # @return [Expr]
272
+ #
273
+ # @example
274
+ # df = Polars::DataFrame.new(
275
+ # {
276
+ # "datetime" => [
277
+ # Time.utc(2020, 3, 1),
278
+ # Time.utc(2020, 4, 1),
279
+ # Time.utc(2020, 5, 1)
280
+ # ]
281
+ # }
282
+ # )
283
+ # df.with_columns(
284
+ # Polars.col("datetime")
285
+ # .dt.strftime("%Y/%m/%d %H:%M:%S")
286
+ # .alias("datetime_string")
287
+ # )
288
+ # # =>
289
+ # # shape: (3, 2)
290
+ # # ┌─────────────────────┬─────────────────────┐
291
+ # # │ datetime ┆ datetime_string │
292
+ # # │ --- ┆ --- │
293
+ # # │ datetime[ns] ┆ str │
294
+ # # ╞═════════════════════╪═════════════════════╡
295
+ # # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
296
+ # # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
297
+ # # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
298
+ # # └─────────────────────┴─────────────────────┘
299
+ #
300
+ # @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
301
+ # df.with_columns(
302
+ # day_name: Polars.col("datetime").dt.strftime("%A"),
303
+ # month_name: Polars.col("datetime").dt.strftime("%B")
304
+ # )
305
+ # # =>
306
+ # # shape: (3, 3)
307
+ # # ┌─────────────────────┬───────────┬────────────┐
308
+ # # │ datetime ┆ day_name ┆ month_name │
309
+ # # │ --- ┆ --- ┆ --- │
310
+ # # │ datetime[ns] ┆ str ┆ str │
311
+ # # ╞═════════════════════╪═══════════╪════════════╡
312
+ # # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
313
+ # # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
314
+ # # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
315
+ # # └─────────────────────┴───────────┴────────────┘
272
316
  def strftime(fmt)
273
317
  Utils.wrap_expr(_rbexpr.strftime(fmt))
274
318
  end
@@ -574,6 +618,29 @@ module Polars
574
618
  # Date
575
619
  #
576
620
  # @return [Expr]
621
+ #
622
+ # @example
623
+ # df = Polars::DataFrame.new(
624
+ # {
625
+ # "datetime" => [
626
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
627
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
628
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
629
+ # ]
630
+ # }
631
+ # )
632
+ # df.with_columns(Polars.col("datetime").dt.date.alias("date"))
633
+ # # =>
634
+ # # shape: (3, 2)
635
+ # # ┌─────────────────────────┬────────────┐
636
+ # # │ datetime ┆ date │
637
+ # # │ --- ┆ --- │
638
+ # # │ datetime[ns] ┆ date │
639
+ # # ╞═════════════════════════╪════════════╡
640
+ # # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
641
+ # # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
642
+ # # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
643
+ # # └─────────────────────────┴────────────┘
577
644
  def date
578
645
  Utils.wrap_expr(_rbexpr.dt_date)
579
646
  end
@@ -732,6 +799,34 @@ module Polars
732
799
  # Applies to Datetime columns.
733
800
  #
734
801
  # @return [Expr]
802
+ #
803
+ # @example
804
+ # df = Polars::DataFrame.new(
805
+ # {
806
+ # "datetime": [
807
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
808
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
809
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
810
+ # ]
811
+ # }
812
+ # )
813
+ # df.with_columns(
814
+ # Polars.col("datetime").dt.hour.alias("hour"),
815
+ # Polars.col("datetime").dt.minute.alias("minute"),
816
+ # Polars.col("datetime").dt.second.alias("second"),
817
+ # Polars.col("datetime").dt.millisecond.alias("millisecond")
818
+ # )
819
+ # # =>
820
+ # # shape: (3, 5)
821
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
822
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
823
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
824
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
825
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
826
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
827
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
828
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
829
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
735
830
  def millisecond
736
831
  Utils.wrap_expr(_rbexpr.dt_millisecond)
737
832
  end
@@ -741,6 +836,34 @@ module Polars
741
836
  # Applies to Datetime columns.
742
837
  #
743
838
  # @return [Expr]
839
+ #
840
+ # @example
841
+ # df = Polars::DataFrame.new(
842
+ # {
843
+ # "datetime": [
844
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
845
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
846
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
847
+ # ]
848
+ # }
849
+ # )
850
+ # df.with_columns(
851
+ # Polars.col("datetime").dt.hour.alias("hour"),
852
+ # Polars.col("datetime").dt.minute.alias("minute"),
853
+ # Polars.col("datetime").dt.second.alias("second"),
854
+ # Polars.col("datetime").dt.microsecond.alias("microsecond")
855
+ # )
856
+ # # =>
857
+ # # shape: (3, 5)
858
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
859
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
860
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
861
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
862
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
863
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
864
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
865
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
866
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
744
867
  def microsecond
745
868
  Utils.wrap_expr(_rbexpr.dt_microsecond)
746
869
  end
@@ -750,6 +873,34 @@ module Polars
750
873
  # Applies to Datetime columns.
751
874
  #
752
875
  # @return [Expr]
876
+ #
877
+ # @example
878
+ # df = Polars::DataFrame.new(
879
+ # {
880
+ # "datetime": [
881
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
882
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
883
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
884
+ # ]
885
+ # }
886
+ # )
887
+ # df.with_columns(
888
+ # Polars.col("datetime").dt.hour.alias("hour"),
889
+ # Polars.col("datetime").dt.minute.alias("minute"),
890
+ # Polars.col("datetime").dt.second.alias("second"),
891
+ # Polars.col("datetime").dt.nanosecond.alias("nanosecond")
892
+ # )
893
+ # # =>
894
+ # # shape: (3, 5)
895
+ # # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
896
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
897
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
898
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
899
+ # # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
900
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
901
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
902
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
903
+ # # └─────────────────────────┴──────┴────────┴────────┴────────────┘
753
904
  def nanosecond
754
905
  Utils.wrap_expr(_rbexpr.dt_nanosecond)
755
906
  end
@@ -835,6 +986,34 @@ module Polars
835
986
  # Time unit for the `Datetime` Series.
836
987
  #
837
988
  # @return [Expr]
989
+ #
990
+ # @example
991
+ # df = Polars::DataFrame.new(
992
+ # {
993
+ # "date" => Polars.datetime_range(
994
+ # Time.utc(2001, 1, 1),
995
+ # Time.utc(2001, 1, 3),
996
+ # "1d",
997
+ # time_unit: "ns",
998
+ # eager: true
999
+ # )
1000
+ # }
1001
+ # )
1002
+ # df.select(
1003
+ # Polars.col("date"),
1004
+ # Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
1005
+ # )
1006
+ # # =>
1007
+ # # shape: (3, 2)
1008
+ # # ┌─────────────────────┬───────────────────────┐
1009
+ # # │ date ┆ time_unit_us │
1010
+ # # │ --- ┆ --- │
1011
+ # # │ datetime[ns] ┆ datetime[μs] │
1012
+ # # ╞═════════════════════╪═══════════════════════╡
1013
+ # # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
1014
+ # # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
1015
+ # # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
1016
+ # # └─────────────────────┴───────────────────────┘
838
1017
  def with_time_unit(time_unit)
839
1018
  Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
840
1019
  end
@@ -931,6 +1110,71 @@ module Polars
931
1110
  # Determine how to deal with non-existent datetimes.
932
1111
  #
933
1112
  # @return [Expr]
1113
+ #
1114
+ # @example
1115
+ # df = Polars::DataFrame.new(
1116
+ # {
1117
+ # "london_timezone": Polars.datetime_range(
1118
+ # Time.utc(2020, 3, 1),
1119
+ # Time.utc(2020, 7, 1),
1120
+ # "1mo",
1121
+ # time_zone: "UTC",
1122
+ # eager: true,
1123
+ # ).dt.convert_time_zone("Europe/London")
1124
+ # }
1125
+ # )
1126
+ # df.select(
1127
+ # [
1128
+ # Polars.col("london_timezone"),
1129
+ # Polars.col("london_timezone")
1130
+ # .dt.replace_time_zone("Europe/Amsterdam")
1131
+ # .alias("London_to_Amsterdam")
1132
+ # ]
1133
+ # )
1134
+ # # =>
1135
+ # # shape: (5, 2)
1136
+ # # ┌─────────────────────────────┬────────────────────────────────┐
1137
+ # # │ london_timezone ┆ London_to_Amsterdam │
1138
+ # # │ --- ┆ --- │
1139
+ # # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
1140
+ # # ╞═════════════════════════════╪════════════════════════════════╡
1141
+ # # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
1142
+ # # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
1143
+ # # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
1144
+ # # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
1145
+ # # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
1146
+ # # └─────────────────────────────┴────────────────────────────────┘
1147
+ #
1148
+ # @example You can use `ambiguous` to deal with ambiguous datetimes:
1149
+ # dates = [
1150
+ # "2018-10-28 01:30",
1151
+ # "2018-10-28 02:00",
1152
+ # "2018-10-28 02:30",
1153
+ # "2018-10-28 02:00"
1154
+ # ]
1155
+ # df = Polars::DataFrame.new(
1156
+ # {
1157
+ # "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
1158
+ # "ambiguous" => ["earliest", "earliest", "latest", "latest"]
1159
+ # }
1160
+ # )
1161
+ # df.with_columns(
1162
+ # ts_localized: Polars.col("ts").dt.replace_time_zone(
1163
+ # "Europe/Brussels", ambiguous: Polars.col("ambiguous")
1164
+ # )
1165
+ # )
1166
+ # # =>
1167
+ # # shape: (4, 3)
1168
+ # # ┌─────────────────────┬───────────┬───────────────────────────────┐
1169
+ # # │ ts ┆ ambiguous ┆ ts_localized │
1170
+ # # │ --- ┆ --- ┆ --- │
1171
+ # # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
1172
+ # # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
1173
+ # # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
1174
+ # # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
1175
+ # # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
1176
+ # # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
1177
+ # # └─────────────────────┴───────────┴───────────────────────────────┘
934
1178
  def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
935
1179
  unless ambiguous.is_a?(Expr)
936
1180
  ambiguous = Polars.lit(ambiguous)
@@ -1150,6 +1150,47 @@ module Polars
1150
1150
  # Every interval start and period length.
1151
1151
  #
1152
1152
  # @return [Series]
1153
+ #
1154
+ # @example
1155
+ # s = Polars.datetime_range(
1156
+ # Time.utc(2001, 1, 1),
1157
+ # Time.utc(2001, 1, 2),
1158
+ # "165m",
1159
+ # eager: true
1160
+ # ).alias("datetime")
1161
+ # s.dt.truncate("1h")
1162
+ # # =>
1163
+ # # shape: (9,)
1164
+ # # Series: 'datetime' [datetime[ns]]
1165
+ # # [
1166
+ # # 2001-01-01 00:00:00
1167
+ # # 2001-01-01 02:00:00
1168
+ # # 2001-01-01 05:00:00
1169
+ # # 2001-01-01 08:00:00
1170
+ # # 2001-01-01 11:00:00
1171
+ # # 2001-01-01 13:00:00
1172
+ # # 2001-01-01 16:00:00
1173
+ # # 2001-01-01 19:00:00
1174
+ # # 2001-01-01 22:00:00
1175
+ # # ]
1176
+ #
1177
+ # @example
1178
+ # s = Polars.datetime_range(
1179
+ # Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
1180
+ # ).alias("datetime")
1181
+ # s.dt.truncate("30m")
1182
+ # # =>
1183
+ # # shape: (7,)
1184
+ # # Series: 'datetime' [datetime[ns]]
1185
+ # # [
1186
+ # # 2001-01-01 00:00:00
1187
+ # # 2001-01-01 00:00:00
1188
+ # # 2001-01-01 00:00:00
1189
+ # # 2001-01-01 00:30:00
1190
+ # # 2001-01-01 00:30:00
1191
+ # # 2001-01-01 00:30:00
1192
+ # # 2001-01-01 01:00:00
1193
+ # # ]
1153
1194
  def truncate(every)
1154
1195
  super
1155
1196
  end
@@ -1185,6 +1226,52 @@ module Polars
1185
1226
  # @note
1186
1227
  # This functionality is currently experimental and may
1187
1228
  # change without it being considered a breaking change.
1229
+ #
1230
+ # @example
1231
+ # start = Time.utc(2001, 1, 1)
1232
+ # stop = Time.utc(2001, 1, 2)
1233
+ # s = Polars.datetime_range(
1234
+ # start, stop, "165m", eager: true
1235
+ # ).alias("datetime")
1236
+ # s.dt.round("1h")
1237
+ # # =>
1238
+ # # shape: (9,)
1239
+ # # Series: 'datetime' [datetime[ns]]
1240
+ # # [
1241
+ # # 2001-01-01 00:00:00
1242
+ # # 2001-01-01 03:00:00
1243
+ # # 2001-01-01 06:00:00
1244
+ # # 2001-01-01 08:00:00
1245
+ # # 2001-01-01 11:00:00
1246
+ # # 2001-01-01 14:00:00
1247
+ # # 2001-01-01 17:00:00
1248
+ # # 2001-01-01 19:00:00
1249
+ # # 2001-01-01 22:00:00
1250
+ # # ]
1251
+ #
1252
+ # @example
1253
+ # round_str = s.dt.round("1h")
1254
+ # round_td = s.dt.round("1h")
1255
+ # round_str.equals(round_td)
1256
+ # # => true
1257
+ #
1258
+ # @example
1259
+ # start = Time.utc(2001, 1, 1)
1260
+ # stop = Time.utc(2001, 1, 1, 1)
1261
+ # s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
1262
+ # s.dt.round("30m")
1263
+ # # =>
1264
+ # # shape: (7,)
1265
+ # # Series: 'datetime' [datetime[ns]]
1266
+ # # [
1267
+ # # 2001-01-01 00:00:00
1268
+ # # 2001-01-01 00:00:00
1269
+ # # 2001-01-01 00:30:00
1270
+ # # 2001-01-01 00:30:00
1271
+ # # 2001-01-01 00:30:00
1272
+ # # 2001-01-01 01:00:00
1273
+ # # 2001-01-01 01:00:00
1274
+ # # ]
1188
1275
  def round(every)
1189
1276
  super
1190
1277
  end
data/lib/polars/expr.rb CHANGED
@@ -411,6 +411,26 @@ module Polars
411
411
  # Add a prefix to the root column name of the expression.
412
412
  #
413
413
  # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 2, 3],
419
+ # "b" => ["x", "y", "z"]
420
+ # }
421
+ # )
422
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
423
+ # # =>
424
+ # # shape: (3, 4)
425
+ # # ┌─────┬─────┬───────────┬───────────┐
426
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
427
+ # # │ --- ┆ --- ┆ --- ┆ --- │
428
+ # # │ i64 ┆ str ┆ i64 ┆ str │
429
+ # # ╞═════╪═════╪═══════════╪═══════════╡
430
+ # # │ 1 ┆ x ┆ 3 ┆ z │
431
+ # # │ 2 ┆ y ┆ 2 ┆ y │
432
+ # # │ 3 ┆ z ┆ 1 ┆ x │
433
+ # # └─────┴─────┴───────────┴───────────┘
414
434
  def prefix(prefix)
415
435
  name.prefix(prefix)
416
436
  end
@@ -418,6 +438,26 @@ module Polars
418
438
  # Add a suffix to the root column name of the expression.
419
439
  #
420
440
  # @return [Expr]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [1, 2, 3],
446
+ # "b" => ["x", "y", "z"]
447
+ # }
448
+ # )
449
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
450
+ # # =>
451
+ # # shape: (3, 4)
452
+ # # ┌─────┬─────┬───────────┬───────────┐
453
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
454
+ # # │ --- ┆ --- ┆ --- ┆ --- │
455
+ # # │ i64 ┆ str ┆ i64 ┆ str │
456
+ # # ╞═════╪═════╪═══════════╪═══════════╡
457
+ # # │ 1 ┆ x ┆ 3 ┆ z │
458
+ # # │ 2 ┆ y ┆ 2 ┆ y │
459
+ # # │ 3 ┆ z ┆ 1 ┆ x │
460
+ # # └─────┴─────┴───────────┴───────────┘
421
461
  def suffix(suffix)
422
462
  name.suffix(suffix)
423
463
  end
@@ -1863,6 +1903,35 @@ module Polars
1863
1903
  # Reverse the selection.
1864
1904
  #
1865
1905
  # @return [Expr]
1906
+ #
1907
+ # @example
1908
+ # df = Polars::DataFrame.new(
1909
+ # {
1910
+ # "A" => [1, 2, 3, 4, 5],
1911
+ # "fruits" => ["banana", "banana", "apple", "apple", "banana"],
1912
+ # "B" => [5, 4, 3, 2, 1],
1913
+ # "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
1914
+ # }
1915
+ # )
1916
+ # df.select(
1917
+ # [
1918
+ # Polars.all,
1919
+ # Polars.all.reverse.name.suffix("_reverse")
1920
+ # ]
1921
+ # )
1922
+ # # =>
1923
+ # # shape: (5, 8)
1924
+ # # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
1925
+ # # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
1926
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1927
+ # # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
1928
+ # # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
1929
+ # # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
1930
+ # # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
1931
+ # # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
1932
+ # # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
1933
+ # # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
1934
+ # # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
1866
1935
  def reverse
1867
1936
  _from_rbexpr(_rbexpr.reverse)
1868
1937
  end
@@ -2825,7 +2894,7 @@ module Polars
2825
2894
  # # ╞══════╪════════╡
2826
2895
  # # │ 1 ┆ 0 │
2827
2896
  # # └──────┴────────┘
2828
- # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2897
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
2829
2898
  # if !return_dtype.nil?
2830
2899
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2831
2900
  # end
@@ -2835,7 +2904,8 @@ module Polars
2835
2904
  # f,
2836
2905
  # return_dtype,
2837
2906
  # agg_list,
2838
- # is_elementwise
2907
+ # is_elementwise,
2908
+ # returns_scalar
2839
2909
  # )
2840
2910
  # )
2841
2911
  # end
@@ -3071,6 +3141,21 @@ module Polars
3071
3141
  # Number of rows to return.
3072
3142
  #
3073
3143
  # @return [Expr]
3144
+ #
3145
+ # @example
3146
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
3147
+ # df.select(Polars.col("foo").limit(3))
3148
+ # # =>
3149
+ # # shape: (3, 1)
3150
+ # # ┌─────┐
3151
+ # # │ foo │
3152
+ # # │ --- │
3153
+ # # │ i64 │
3154
+ # # ╞═════╡
3155
+ # # │ 1 │
3156
+ # # │ 2 │
3157
+ # # │ 3 │
3158
+ # # └─────┘
3074
3159
  def limit(n = 10)
3075
3160
  head(n)
3076
3161
  end
@@ -5601,6 +5686,22 @@ module Polars
5601
5686
  # If false, the calculations are corrected for statistical bias.
5602
5687
  #
5603
5688
  # @return [Expr]
5689
+ #
5690
+ # @example
5691
+ # df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
5692
+ # df.select(Polars.col("a").rolling_skew(3))
5693
+ # # =>
5694
+ # # shape: (4, 1)
5695
+ # # ┌──────────┐
5696
+ # # │ a │
5697
+ # # │ --- │
5698
+ # # │ f64 │
5699
+ # # ╞══════════╡
5700
+ # # │ null │
5701
+ # # │ null │
5702
+ # # │ 0.381802 │
5703
+ # # │ 0.47033 │
5704
+ # # └──────────┘
5604
5705
  def rolling_skew(window_size, bias: true)
5605
5706
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5606
5707
  end
@@ -143,6 +143,9 @@ module Polars
143
143
  # @param exprs [Array]
144
144
  # Column(s) to use in the aggregation. Accepts expression input. Strings are
145
145
  # parsed as column names, other non-expression inputs are parsed as literals.
146
+ # @param ignore_nulls [Boolean]
147
+ # Ignore null values (default).
148
+ # If set to `false`, any null value in the input will lead to a null output.
146
149
  #
147
150
  # @return [Expr]
148
151
  #
@@ -166,9 +169,9 @@ module Polars
166
169
  # # │ 8 ┆ 5 ┆ y ┆ 13 │
167
170
  # # │ 3 ┆ null ┆ z ┆ 3 │
168
171
  # # └─────┴──────┴─────┴─────┘
169
- def sum_horizontal(*exprs)
172
+ def sum_horizontal(*exprs, ignore_nulls: true)
170
173
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
171
- Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
174
+ Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
172
175
  end
173
176
 
174
177
  # Compute the mean of all values horizontally across columns.
@@ -176,6 +179,9 @@ module Polars
176
179
  # @param exprs [Array]
177
180
  # Column(s) to use in the aggregation. Accepts expression input. Strings are
178
181
  # parsed as column names, other non-expression inputs are parsed as literals.
182
+ # @param ignore_nulls [Boolean]
183
+ # Ignore null values (default).
184
+ # If set to `false`, any null value in the input will lead to a null output.
179
185
  #
180
186
  # @return [Expr]
181
187
  #
@@ -199,9 +205,9 @@ module Polars
199
205
  # # │ 8 ┆ 5 ┆ y ┆ 6.5 │
200
206
  # # │ 3 ┆ null ┆ z ┆ 3.0 │
201
207
  # # └─────┴──────┴─────┴──────┘
202
- def mean_horizontal(*exprs)
208
+ def mean_horizontal(*exprs, ignore_nulls: true)
203
209
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
204
- Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
210
+ Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
205
211
  end
206
212
 
207
213
  # Cumulatively sum all values horizontally across columns.