polars-df 0.13.0-arm64-darwin → 0.15.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24439 -12853
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.bundle +0 -0
  8. data/lib/polars/3.2/polars.bundle +0 -0
  9. data/lib/polars/3.3/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -269,6 +269,50 @@ module Polars
269
269
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
270
270
  #
271
271
  # @return [Expr]
272
+ #
273
+ # @example
274
+ # df = Polars::DataFrame.new(
275
+ # {
276
+ # "datetime" => [
277
+ # Time.utc(2020, 3, 1),
278
+ # Time.utc(2020, 4, 1),
279
+ # Time.utc(2020, 5, 1)
280
+ # ]
281
+ # }
282
+ # )
283
+ # df.with_columns(
284
+ # Polars.col("datetime")
285
+ # .dt.strftime("%Y/%m/%d %H:%M:%S")
286
+ # .alias("datetime_string")
287
+ # )
288
+ # # =>
289
+ # # shape: (3, 2)
290
+ # # ┌─────────────────────┬─────────────────────┐
291
+ # # │ datetime ┆ datetime_string │
292
+ # # │ --- ┆ --- │
293
+ # # │ datetime[ns] ┆ str │
294
+ # # ╞═════════════════════╪═════════════════════╡
295
+ # # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
296
+ # # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
297
+ # # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
298
+ # # └─────────────────────┴─────────────────────┘
299
+ #
300
+ # @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
301
+ # df.with_columns(
302
+ # day_name: Polars.col("datetime").dt.strftime("%A"),
303
+ # month_name: Polars.col("datetime").dt.strftime("%B")
304
+ # )
305
+ # # =>
306
+ # # shape: (3, 3)
307
+ # # ┌─────────────────────┬───────────┬────────────┐
308
+ # # │ datetime ┆ day_name ┆ month_name │
309
+ # # │ --- ┆ --- ┆ --- │
310
+ # # │ datetime[ns] ┆ str ┆ str │
311
+ # # ╞═════════════════════╪═══════════╪════════════╡
312
+ # # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
313
+ # # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
314
+ # # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
315
+ # # └─────────────────────┴───────────┴────────────┘
272
316
  def strftime(fmt)
273
317
  Utils.wrap_expr(_rbexpr.strftime(fmt))
274
318
  end
@@ -574,6 +618,29 @@ module Polars
574
618
  # Date
575
619
  #
576
620
  # @return [Expr]
621
+ #
622
+ # @example
623
+ # df = Polars::DataFrame.new(
624
+ # {
625
+ # "datetime" => [
626
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
627
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
628
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
629
+ # ]
630
+ # }
631
+ # )
632
+ # df.with_columns(Polars.col("datetime").dt.date.alias("date"))
633
+ # # =>
634
+ # # shape: (3, 2)
635
+ # # ┌─────────────────────────┬────────────┐
636
+ # # │ datetime ┆ date │
637
+ # # │ --- ┆ --- │
638
+ # # │ datetime[ns] ┆ date │
639
+ # # ╞═════════════════════════╪════════════╡
640
+ # # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
641
+ # # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
642
+ # # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
643
+ # # └─────────────────────────┴────────────┘
577
644
  def date
578
645
  Utils.wrap_expr(_rbexpr.dt_date)
579
646
  end
@@ -732,6 +799,34 @@ module Polars
732
799
  # Applies to Datetime columns.
733
800
  #
734
801
  # @return [Expr]
802
+ #
803
+ # @example
804
+ # df = Polars::DataFrame.new(
805
+ # {
806
+ # "datetime": [
807
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
808
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
809
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
810
+ # ]
811
+ # }
812
+ # )
813
+ # df.with_columns(
814
+ # Polars.col("datetime").dt.hour.alias("hour"),
815
+ # Polars.col("datetime").dt.minute.alias("minute"),
816
+ # Polars.col("datetime").dt.second.alias("second"),
817
+ # Polars.col("datetime").dt.millisecond.alias("millisecond")
818
+ # )
819
+ # # =>
820
+ # # shape: (3, 5)
821
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
822
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
823
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
824
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
825
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
826
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
827
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
828
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
829
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
735
830
  def millisecond
736
831
  Utils.wrap_expr(_rbexpr.dt_millisecond)
737
832
  end
@@ -741,6 +836,34 @@ module Polars
741
836
  # Applies to Datetime columns.
742
837
  #
743
838
  # @return [Expr]
839
+ #
840
+ # @example
841
+ # df = Polars::DataFrame.new(
842
+ # {
843
+ # "datetime": [
844
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
845
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
846
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
847
+ # ]
848
+ # }
849
+ # )
850
+ # df.with_columns(
851
+ # Polars.col("datetime").dt.hour.alias("hour"),
852
+ # Polars.col("datetime").dt.minute.alias("minute"),
853
+ # Polars.col("datetime").dt.second.alias("second"),
854
+ # Polars.col("datetime").dt.microsecond.alias("microsecond")
855
+ # )
856
+ # # =>
857
+ # # shape: (3, 5)
858
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
859
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
860
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
861
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
862
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
863
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
864
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
865
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
866
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
744
867
  def microsecond
745
868
  Utils.wrap_expr(_rbexpr.dt_microsecond)
746
869
  end
@@ -750,6 +873,34 @@ module Polars
750
873
  # Applies to Datetime columns.
751
874
  #
752
875
  # @return [Expr]
876
+ #
877
+ # @example
878
+ # df = Polars::DataFrame.new(
879
+ # {
880
+ # "datetime": [
881
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
882
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
883
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
884
+ # ]
885
+ # }
886
+ # )
887
+ # df.with_columns(
888
+ # Polars.col("datetime").dt.hour.alias("hour"),
889
+ # Polars.col("datetime").dt.minute.alias("minute"),
890
+ # Polars.col("datetime").dt.second.alias("second"),
891
+ # Polars.col("datetime").dt.nanosecond.alias("nanosecond")
892
+ # )
893
+ # # =>
894
+ # # shape: (3, 5)
895
+ # # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
896
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
897
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
898
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
899
+ # # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
900
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
901
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
902
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
903
+ # # └─────────────────────────┴──────┴────────┴────────┴────────────┘
753
904
  def nanosecond
754
905
  Utils.wrap_expr(_rbexpr.dt_nanosecond)
755
906
  end
@@ -835,6 +986,34 @@ module Polars
835
986
  # Time unit for the `Datetime` Series.
836
987
  #
837
988
  # @return [Expr]
989
+ #
990
+ # @example
991
+ # df = Polars::DataFrame.new(
992
+ # {
993
+ # "date" => Polars.datetime_range(
994
+ # Time.utc(2001, 1, 1),
995
+ # Time.utc(2001, 1, 3),
996
+ # "1d",
997
+ # time_unit: "ns",
998
+ # eager: true
999
+ # )
1000
+ # }
1001
+ # )
1002
+ # df.select(
1003
+ # Polars.col("date"),
1004
+ # Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
1005
+ # )
1006
+ # # =>
1007
+ # # shape: (3, 2)
1008
+ # # ┌─────────────────────┬───────────────────────┐
1009
+ # # │ date ┆ time_unit_us │
1010
+ # # │ --- ┆ --- │
1011
+ # # │ datetime[ns] ┆ datetime[μs] │
1012
+ # # ╞═════════════════════╪═══════════════════════╡
1013
+ # # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
1014
+ # # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
1015
+ # # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
1016
+ # # └─────────────────────┴───────────────────────┘
838
1017
  def with_time_unit(time_unit)
839
1018
  Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
840
1019
  end
@@ -931,6 +1110,71 @@ module Polars
931
1110
  # Determine how to deal with non-existent datetimes.
932
1111
  #
933
1112
  # @return [Expr]
1113
+ #
1114
+ # @example
1115
+ # df = Polars::DataFrame.new(
1116
+ # {
1117
+ # "london_timezone": Polars.datetime_range(
1118
+ # Time.utc(2020, 3, 1),
1119
+ # Time.utc(2020, 7, 1),
1120
+ # "1mo",
1121
+ # time_zone: "UTC",
1122
+ # eager: true,
1123
+ # ).dt.convert_time_zone("Europe/London")
1124
+ # }
1125
+ # )
1126
+ # df.select(
1127
+ # [
1128
+ # Polars.col("london_timezone"),
1129
+ # Polars.col("london_timezone")
1130
+ # .dt.replace_time_zone("Europe/Amsterdam")
1131
+ # .alias("London_to_Amsterdam")
1132
+ # ]
1133
+ # )
1134
+ # # =>
1135
+ # # shape: (5, 2)
1136
+ # # ┌─────────────────────────────┬────────────────────────────────┐
1137
+ # # │ london_timezone ┆ London_to_Amsterdam │
1138
+ # # │ --- ┆ --- │
1139
+ # # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
1140
+ # # ╞═════════════════════════════╪════════════════════════════════╡
1141
+ # # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
1142
+ # # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
1143
+ # # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
1144
+ # # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
1145
+ # # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
1146
+ # # └─────────────────────────────┴────────────────────────────────┘
1147
+ #
1148
+ # @example You can use `ambiguous` to deal with ambiguous datetimes:
1149
+ # dates = [
1150
+ # "2018-10-28 01:30",
1151
+ # "2018-10-28 02:00",
1152
+ # "2018-10-28 02:30",
1153
+ # "2018-10-28 02:00"
1154
+ # ]
1155
+ # df = Polars::DataFrame.new(
1156
+ # {
1157
+ # "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
1158
+ # "ambiguous" => ["earliest", "earliest", "latest", "latest"]
1159
+ # }
1160
+ # )
1161
+ # df.with_columns(
1162
+ # ts_localized: Polars.col("ts").dt.replace_time_zone(
1163
+ # "Europe/Brussels", ambiguous: Polars.col("ambiguous")
1164
+ # )
1165
+ # )
1166
+ # # =>
1167
+ # # shape: (4, 3)
1168
+ # # ┌─────────────────────┬───────────┬───────────────────────────────┐
1169
+ # # │ ts ┆ ambiguous ┆ ts_localized │
1170
+ # # │ --- ┆ --- ┆ --- │
1171
+ # # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
1172
+ # # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
1173
+ # # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
1174
+ # # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
1175
+ # # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
1176
+ # # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
1177
+ # # └─────────────────────┴───────────┴───────────────────────────────┘
934
1178
  def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
935
1179
  unless ambiguous.is_a?(Expr)
936
1180
  ambiguous = Polars.lit(ambiguous)
@@ -1150,6 +1150,47 @@ module Polars
1150
1150
  # Every interval start and period length.
1151
1151
  #
1152
1152
  # @return [Series]
1153
+ #
1154
+ # @example
1155
+ # s = Polars.datetime_range(
1156
+ # Time.utc(2001, 1, 1),
1157
+ # Time.utc(2001, 1, 2),
1158
+ # "165m",
1159
+ # eager: true
1160
+ # ).alias("datetime")
1161
+ # s.dt.truncate("1h")
1162
+ # # =>
1163
+ # # shape: (9,)
1164
+ # # Series: 'datetime' [datetime[ns]]
1165
+ # # [
1166
+ # # 2001-01-01 00:00:00
1167
+ # # 2001-01-01 02:00:00
1168
+ # # 2001-01-01 05:00:00
1169
+ # # 2001-01-01 08:00:00
1170
+ # # 2001-01-01 11:00:00
1171
+ # # 2001-01-01 13:00:00
1172
+ # # 2001-01-01 16:00:00
1173
+ # # 2001-01-01 19:00:00
1174
+ # # 2001-01-01 22:00:00
1175
+ # # ]
1176
+ #
1177
+ # @example
1178
+ # s = Polars.datetime_range(
1179
+ # Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
1180
+ # ).alias("datetime")
1181
+ # s.dt.truncate("30m")
1182
+ # # =>
1183
+ # # shape: (7,)
1184
+ # # Series: 'datetime' [datetime[ns]]
1185
+ # # [
1186
+ # # 2001-01-01 00:00:00
1187
+ # # 2001-01-01 00:00:00
1188
+ # # 2001-01-01 00:00:00
1189
+ # # 2001-01-01 00:30:00
1190
+ # # 2001-01-01 00:30:00
1191
+ # # 2001-01-01 00:30:00
1192
+ # # 2001-01-01 01:00:00
1193
+ # # ]
1153
1194
  def truncate(every)
1154
1195
  super
1155
1196
  end
@@ -1185,6 +1226,52 @@ module Polars
1185
1226
  # @note
1186
1227
  # This functionality is currently experimental and may
1187
1228
  # change without it being considered a breaking change.
1229
+ #
1230
+ # @example
1231
+ # start = Time.utc(2001, 1, 1)
1232
+ # stop = Time.utc(2001, 1, 2)
1233
+ # s = Polars.datetime_range(
1234
+ # start, stop, "165m", eager: true
1235
+ # ).alias("datetime")
1236
+ # s.dt.round("1h")
1237
+ # # =>
1238
+ # # shape: (9,)
1239
+ # # Series: 'datetime' [datetime[ns]]
1240
+ # # [
1241
+ # # 2001-01-01 00:00:00
1242
+ # # 2001-01-01 03:00:00
1243
+ # # 2001-01-01 06:00:00
1244
+ # # 2001-01-01 08:00:00
1245
+ # # 2001-01-01 11:00:00
1246
+ # # 2001-01-01 14:00:00
1247
+ # # 2001-01-01 17:00:00
1248
+ # # 2001-01-01 19:00:00
1249
+ # # 2001-01-01 22:00:00
1250
+ # # ]
1251
+ #
1252
+ # @example
1253
+ # round_str = s.dt.round("1h")
1254
+ # round_td = s.dt.round("1h")
1255
+ # round_str.equals(round_td)
1256
+ # # => true
1257
+ #
1258
+ # @example
1259
+ # start = Time.utc(2001, 1, 1)
1260
+ # stop = Time.utc(2001, 1, 1, 1)
1261
+ # s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
1262
+ # s.dt.round("30m")
1263
+ # # =>
1264
+ # # shape: (7,)
1265
+ # # Series: 'datetime' [datetime[ns]]
1266
+ # # [
1267
+ # # 2001-01-01 00:00:00
1268
+ # # 2001-01-01 00:00:00
1269
+ # # 2001-01-01 00:30:00
1270
+ # # 2001-01-01 00:30:00
1271
+ # # 2001-01-01 00:30:00
1272
+ # # 2001-01-01 01:00:00
1273
+ # # 2001-01-01 01:00:00
1274
+ # # ]
1188
1275
  def round(every)
1189
1276
  super
1190
1277
  end
data/lib/polars/expr.rb CHANGED
@@ -411,6 +411,26 @@ module Polars
411
411
  # Add a prefix to the root column name of the expression.
412
412
  #
413
413
  # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 2, 3],
419
+ # "b" => ["x", "y", "z"]
420
+ # }
421
+ # )
422
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
423
+ # # =>
424
+ # # shape: (3, 4)
425
+ # # ┌─────┬─────┬───────────┬───────────┐
426
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
427
+ # # │ --- ┆ --- ┆ --- ┆ --- │
428
+ # # │ i64 ┆ str ┆ i64 ┆ str │
429
+ # # ╞═════╪═════╪═══════════╪═══════════╡
430
+ # # │ 1 ┆ x ┆ 3 ┆ z │
431
+ # # │ 2 ┆ y ┆ 2 ┆ y │
432
+ # # │ 3 ┆ z ┆ 1 ┆ x │
433
+ # # └─────┴─────┴───────────┴───────────┘
414
434
  def prefix(prefix)
415
435
  name.prefix(prefix)
416
436
  end
@@ -418,6 +438,26 @@ module Polars
418
438
  # Add a suffix to the root column name of the expression.
419
439
  #
420
440
  # @return [Expr]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [1, 2, 3],
446
+ # "b" => ["x", "y", "z"]
447
+ # }
448
+ # )
449
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
450
+ # # =>
451
+ # # shape: (3, 4)
452
+ # # ┌─────┬─────┬───────────┬───────────┐
453
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
454
+ # # │ --- ┆ --- ┆ --- ┆ --- │
455
+ # # │ i64 ┆ str ┆ i64 ┆ str │
456
+ # # ╞═════╪═════╪═══════════╪═══════════╡
457
+ # # │ 1 ┆ x ┆ 3 ┆ z │
458
+ # # │ 2 ┆ y ┆ 2 ┆ y │
459
+ # # │ 3 ┆ z ┆ 1 ┆ x │
460
+ # # └─────┴─────┴───────────┴───────────┘
421
461
  def suffix(suffix)
422
462
  name.suffix(suffix)
423
463
  end
@@ -1182,7 +1222,7 @@ module Polars
1182
1222
  # "b" => [1, 1, 2, 2]
1183
1223
  # }
1184
1224
  # )
1185
- # df.select(Polars.all.mode)
1225
+ # df.select(Polars.all.mode.first)
1186
1226
  # # =>
1187
1227
  # # shape: (2, 2)
1188
1228
  # # ┌─────┬─────┐
@@ -1863,6 +1903,35 @@ module Polars
1863
1903
  # Reverse the selection.
1864
1904
  #
1865
1905
  # @return [Expr]
1906
+ #
1907
+ # @example
1908
+ # df = Polars::DataFrame.new(
1909
+ # {
1910
+ # "A" => [1, 2, 3, 4, 5],
1911
+ # "fruits" => ["banana", "banana", "apple", "apple", "banana"],
1912
+ # "B" => [5, 4, 3, 2, 1],
1913
+ # "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
1914
+ # }
1915
+ # )
1916
+ # df.select(
1917
+ # [
1918
+ # Polars.all,
1919
+ # Polars.all.reverse.name.suffix("_reverse")
1920
+ # ]
1921
+ # )
1922
+ # # =>
1923
+ # # shape: (5, 8)
1924
+ # # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
1925
+ # # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
1926
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1927
+ # # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
1928
+ # # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
1929
+ # # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
1930
+ # # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
1931
+ # # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
1932
+ # # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
1933
+ # # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
1934
+ # # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
1866
1935
  def reverse
1867
1936
  _from_rbexpr(_rbexpr.reverse)
1868
1937
  end
@@ -2825,7 +2894,7 @@ module Polars
2825
2894
  # # ╞══════╪════════╡
2826
2895
  # # │ 1 ┆ 0 │
2827
2896
  # # └──────┴────────┘
2828
- # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2897
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
2829
2898
  # if !return_dtype.nil?
2830
2899
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2831
2900
  # end
@@ -2835,7 +2904,8 @@ module Polars
2835
2904
  # f,
2836
2905
  # return_dtype,
2837
2906
  # agg_list,
2838
- # is_elementwise
2907
+ # is_elementwise,
2908
+ # returns_scalar
2839
2909
  # )
2840
2910
  # )
2841
2911
  # end
@@ -3071,6 +3141,21 @@ module Polars
3071
3141
  # Number of rows to return.
3072
3142
  #
3073
3143
  # @return [Expr]
3144
+ #
3145
+ # @example
3146
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
3147
+ # df.select(Polars.col("foo").limit(3))
3148
+ # # =>
3149
+ # # shape: (3, 1)
3150
+ # # ┌─────┐
3151
+ # # │ foo │
3152
+ # # │ --- │
3153
+ # # │ i64 │
3154
+ # # ╞═════╡
3155
+ # # │ 1 │
3156
+ # # │ 2 │
3157
+ # # │ 3 │
3158
+ # # └─────┘
3074
3159
  def limit(n = 10)
3075
3160
  head(n)
3076
3161
  end
@@ -5601,6 +5686,22 @@ module Polars
5601
5686
  # If false, the calculations are corrected for statistical bias.
5602
5687
  #
5603
5688
  # @return [Expr]
5689
+ #
5690
+ # @example
5691
+ # df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
5692
+ # df.select(Polars.col("a").rolling_skew(3))
5693
+ # # =>
5694
+ # # shape: (4, 1)
5695
+ # # ┌──────────┐
5696
+ # # │ a │
5697
+ # # │ --- │
5698
+ # # │ f64 │
5699
+ # # ╞══════════╡
5700
+ # # │ null │
5701
+ # # │ null │
5702
+ # # │ 0.381802 │
5703
+ # # │ 0.47033 │
5704
+ # # └──────────┘
5604
5705
  def rolling_skew(window_size, bias: true)
5605
5706
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5606
5707
  end
@@ -6015,12 +6116,12 @@ module Polars
6015
6116
  # # ┌──────┐
6016
6117
  # # │ a │
6017
6118
  # # │ --- │
6018
- # # │ i64
6119
+ # # │ f64
6019
6120
  # # ╞══════╡
6020
- # # │ -1
6021
- # # │ 0
6022
- # # │ 0
6023
- # # │ 1
6121
+ # # │ -1.0
6122
+ # # │ -0.0
6123
+ # # │ 0.0
6124
+ # # │ 1.0
6024
6125
  # # │ null │
6025
6126
  # # └──────┘
6026
6127
  def sign
@@ -86,8 +86,57 @@ module Polars
86
86
  # Concat the arrays in a Series dtype List in linear time.
87
87
  #
88
88
  # @return [Expr]
89
- def concat_list(exprs)
90
- exprs = Utils.parse_into_list_of_expressions(exprs)
89
+ #
90
+ # @example Concatenate two existing list columns. Null values are propagated.
91
+ # df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
92
+ # df.with_columns(concat_list: Polars.concat_list("a", "b"))
93
+ # # =>
94
+ # # shape: (3, 3)
95
+ # # ┌───────────┬───────────┬─────────────┐
96
+ # # │ a ┆ b ┆ concat_list │
97
+ # # │ --- ┆ --- ┆ --- │
98
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
99
+ # # ╞═══════════╪═══════════╪═════════════╡
100
+ # # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
101
+ # # │ [3] ┆ [] ┆ [3] │
102
+ # # │ [4, 5] ┆ null ┆ null │
103
+ # # └───────────┴───────────┴─────────────┘
104
+ #
105
+ # @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
106
+ # df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
107
+ # # =>
108
+ # # shape: (3, 2)
109
+ # # ┌───────────┬─────────────────┐
110
+ # # │ a ┆ concat_list │
111
+ # # │ --- ┆ --- │
112
+ # # │ list[i64] ┆ list[str] │
113
+ # # ╞═══════════╪═════════════════╡
114
+ # # │ [1, 2] ┆ ["1", "2", "x"] │
115
+ # # │ [3] ┆ ["3", "x"] │
116
+ # # │ [4, 5] ┆ ["4", "5", "x"] │
117
+ # # └───────────┴─────────────────┘
118
+ #
119
+ # @example Create lagged columns and collect them into a list. This mimics a rolling window.
120
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
121
+ # df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
122
+ # df.select(
123
+ # Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
124
+ # )
125
+ # # =>
126
+ # # shape: (5, 1)
127
+ # # ┌───────────────────┐
128
+ # # │ A_rolling │
129
+ # # │ --- │
130
+ # # │ list[f64] │
131
+ # # ╞═══════════════════╡
132
+ # # │ [null, null, 1.0] │
133
+ # # │ [null, 1.0, 2.0] │
134
+ # # │ [1.0, 2.0, 9.0] │
135
+ # # │ [2.0, 9.0, 2.0] │
136
+ # # │ [9.0, 2.0, 13.0] │
137
+ # # └───────────────────┘
138
+ def concat_list(exprs, *more_exprs)
139
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
91
140
  Utils.wrap_expr(Plr.concat_list(exprs))
92
141
  end
93
142
 
@@ -23,7 +23,7 @@ module Polars
23
23
  Utils.wrap_expr(Plr.col(name.to_s))
24
24
  elsif Utils.is_polars_dtype(name)
25
25
  Utils.wrap_expr(Plr.dtype_cols([name]))
26
- elsif name.is_a?(::Array)
26
+ elsif name.is_a?(::Array) || name.is_a?(::Set)
27
27
  names = Array(name)
28
28
  if names.empty?
29
29
  return Utils.wrap_expr(Plr.cols(names))
@@ -127,7 +127,7 @@ module Polars
127
127
  # af1, af2, af3 = Polars.align_frames(
128
128
  # df1, df2, df3, on: "dt", select: ["x", "y"]
129
129
  # )
130
- # (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
130
+ # (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
131
131
  # # =>
132
132
  # # shape: (3, 1)
133
133
  # # ┌───────┐
@@ -136,9 +136,7 @@ module Polars
136
136
  # # │ f64 │
137
137
  # # ╞═══════╡
138
138
  # # │ 0.0 │
139
- # # ├╌╌╌╌╌╌╌┤
140
139
  # # │ 167.5 │
141
- # # ├╌╌╌╌╌╌╌┤
142
140
  # # │ 47.0 │
143
141
  # # └───────┘
144
142
  def align_frames(