polars-df 0.13.0-x86_64-linux → 0.15.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24801 -13447
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.so +0 -0
  8. data/lib/polars/3.2/polars.so +0 -0
  9. data/lib/polars/3.3/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -269,6 +269,50 @@ module Polars
269
269
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
270
270
  #
271
271
  # @return [Expr]
272
+ #
273
+ # @example
274
+ # df = Polars::DataFrame.new(
275
+ # {
276
+ # "datetime" => [
277
+ # Time.utc(2020, 3, 1),
278
+ # Time.utc(2020, 4, 1),
279
+ # Time.utc(2020, 5, 1)
280
+ # ]
281
+ # }
282
+ # )
283
+ # df.with_columns(
284
+ # Polars.col("datetime")
285
+ # .dt.strftime("%Y/%m/%d %H:%M:%S")
286
+ # .alias("datetime_string")
287
+ # )
288
+ # # =>
289
+ # # shape: (3, 2)
290
+ # # ┌─────────────────────┬─────────────────────┐
291
+ # # │ datetime ┆ datetime_string │
292
+ # # │ --- ┆ --- │
293
+ # # │ datetime[ns] ┆ str │
294
+ # # ╞═════════════════════╪═════════════════════╡
295
+ # # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
296
+ # # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
297
+ # # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
298
+ # # └─────────────────────┴─────────────────────┘
299
+ #
300
+ # @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
301
+ # df.with_columns(
302
+ # day_name: Polars.col("datetime").dt.strftime("%A"),
303
+ # month_name: Polars.col("datetime").dt.strftime("%B")
304
+ # )
305
+ # # =>
306
+ # # shape: (3, 3)
307
+ # # ┌─────────────────────┬───────────┬────────────┐
308
+ # # │ datetime ┆ day_name ┆ month_name │
309
+ # # │ --- ┆ --- ┆ --- │
310
+ # # │ datetime[ns] ┆ str ┆ str │
311
+ # # ╞═════════════════════╪═══════════╪════════════╡
312
+ # # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
313
+ # # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
314
+ # # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
315
+ # # └─────────────────────┴───────────┴────────────┘
272
316
  def strftime(fmt)
273
317
  Utils.wrap_expr(_rbexpr.strftime(fmt))
274
318
  end
@@ -574,6 +618,29 @@ module Polars
574
618
  # Date
575
619
  #
576
620
  # @return [Expr]
621
+ #
622
+ # @example
623
+ # df = Polars::DataFrame.new(
624
+ # {
625
+ # "datetime" => [
626
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
627
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
628
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
629
+ # ]
630
+ # }
631
+ # )
632
+ # df.with_columns(Polars.col("datetime").dt.date.alias("date"))
633
+ # # =>
634
+ # # shape: (3, 2)
635
+ # # ┌─────────────────────────┬────────────┐
636
+ # # │ datetime ┆ date │
637
+ # # │ --- ┆ --- │
638
+ # # │ datetime[ns] ┆ date │
639
+ # # ╞═════════════════════════╪════════════╡
640
+ # # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
641
+ # # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
642
+ # # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
643
+ # # └─────────────────────────┴────────────┘
577
644
  def date
578
645
  Utils.wrap_expr(_rbexpr.dt_date)
579
646
  end
@@ -732,6 +799,34 @@ module Polars
732
799
  # Applies to Datetime columns.
733
800
  #
734
801
  # @return [Expr]
802
+ #
803
+ # @example
804
+ # df = Polars::DataFrame.new(
805
+ # {
806
+ # "datetime": [
807
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
808
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
809
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
810
+ # ]
811
+ # }
812
+ # )
813
+ # df.with_columns(
814
+ # Polars.col("datetime").dt.hour.alias("hour"),
815
+ # Polars.col("datetime").dt.minute.alias("minute"),
816
+ # Polars.col("datetime").dt.second.alias("second"),
817
+ # Polars.col("datetime").dt.millisecond.alias("millisecond")
818
+ # )
819
+ # # =>
820
+ # # shape: (3, 5)
821
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
822
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
823
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
824
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
825
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
826
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
827
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
828
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
829
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
735
830
  def millisecond
736
831
  Utils.wrap_expr(_rbexpr.dt_millisecond)
737
832
  end
@@ -741,6 +836,34 @@ module Polars
741
836
  # Applies to Datetime columns.
742
837
  #
743
838
  # @return [Expr]
839
+ #
840
+ # @example
841
+ # df = Polars::DataFrame.new(
842
+ # {
843
+ # "datetime": [
844
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
845
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
846
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
847
+ # ]
848
+ # }
849
+ # )
850
+ # df.with_columns(
851
+ # Polars.col("datetime").dt.hour.alias("hour"),
852
+ # Polars.col("datetime").dt.minute.alias("minute"),
853
+ # Polars.col("datetime").dt.second.alias("second"),
854
+ # Polars.col("datetime").dt.microsecond.alias("microsecond")
855
+ # )
856
+ # # =>
857
+ # # shape: (3, 5)
858
+ # # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
859
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
860
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
861
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
862
+ # # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
863
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
864
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
865
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
866
+ # # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
744
867
  def microsecond
745
868
  Utils.wrap_expr(_rbexpr.dt_microsecond)
746
869
  end
@@ -750,6 +873,34 @@ module Polars
750
873
  # Applies to Datetime columns.
751
874
  #
752
875
  # @return [Expr]
876
+ #
877
+ # @example
878
+ # df = Polars::DataFrame.new(
879
+ # {
880
+ # "datetime": [
881
+ # Time.utc(1978, 1, 1, 1, 1, 1, 0),
882
+ # Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
883
+ # Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
884
+ # ]
885
+ # }
886
+ # )
887
+ # df.with_columns(
888
+ # Polars.col("datetime").dt.hour.alias("hour"),
889
+ # Polars.col("datetime").dt.minute.alias("minute"),
890
+ # Polars.col("datetime").dt.second.alias("second"),
891
+ # Polars.col("datetime").dt.nanosecond.alias("nanosecond")
892
+ # )
893
+ # # =>
894
+ # # shape: (3, 5)
895
+ # # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
896
+ # # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
897
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
898
+ # # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
899
+ # # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
900
+ # # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
901
+ # # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
902
+ # # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
903
+ # # └─────────────────────────┴──────┴────────┴────────┴────────────┘
753
904
  def nanosecond
754
905
  Utils.wrap_expr(_rbexpr.dt_nanosecond)
755
906
  end
@@ -835,6 +986,34 @@ module Polars
835
986
  # Time unit for the `Datetime` Series.
836
987
  #
837
988
  # @return [Expr]
989
+ #
990
+ # @example
991
+ # df = Polars::DataFrame.new(
992
+ # {
993
+ # "date" => Polars.datetime_range(
994
+ # Time.utc(2001, 1, 1),
995
+ # Time.utc(2001, 1, 3),
996
+ # "1d",
997
+ # time_unit: "ns",
998
+ # eager: true
999
+ # )
1000
+ # }
1001
+ # )
1002
+ # df.select(
1003
+ # Polars.col("date"),
1004
+ # Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
1005
+ # )
1006
+ # # =>
1007
+ # # shape: (3, 2)
1008
+ # # ┌─────────────────────┬───────────────────────┐
1009
+ # # │ date ┆ time_unit_us │
1010
+ # # │ --- ┆ --- │
1011
+ # # │ datetime[ns] ┆ datetime[μs] │
1012
+ # # ╞═════════════════════╪═══════════════════════╡
1013
+ # # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
1014
+ # # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
1015
+ # # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
1016
+ # # └─────────────────────┴───────────────────────┘
838
1017
  def with_time_unit(time_unit)
839
1018
  Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
840
1019
  end
@@ -931,6 +1110,71 @@ module Polars
931
1110
  # Determine how to deal with non-existent datetimes.
932
1111
  #
933
1112
  # @return [Expr]
1113
+ #
1114
+ # @example
1115
+ # df = Polars::DataFrame.new(
1116
+ # {
1117
+ # "london_timezone": Polars.datetime_range(
1118
+ # Time.utc(2020, 3, 1),
1119
+ # Time.utc(2020, 7, 1),
1120
+ # "1mo",
1121
+ # time_zone: "UTC",
1122
+ # eager: true,
1123
+ # ).dt.convert_time_zone("Europe/London")
1124
+ # }
1125
+ # )
1126
+ # df.select(
1127
+ # [
1128
+ # Polars.col("london_timezone"),
1129
+ # Polars.col("london_timezone")
1130
+ # .dt.replace_time_zone("Europe/Amsterdam")
1131
+ # .alias("London_to_Amsterdam")
1132
+ # ]
1133
+ # )
1134
+ # # =>
1135
+ # # shape: (5, 2)
1136
+ # # ┌─────────────────────────────┬────────────────────────────────┐
1137
+ # # │ london_timezone ┆ London_to_Amsterdam │
1138
+ # # │ --- ┆ --- │
1139
+ # # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
1140
+ # # ╞═════════════════════════════╪════════════════════════════════╡
1141
+ # # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
1142
+ # # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
1143
+ # # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
1144
+ # # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
1145
+ # # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
1146
+ # # └─────────────────────────────┴────────────────────────────────┘
1147
+ #
1148
+ # @example You can use `ambiguous` to deal with ambiguous datetimes:
1149
+ # dates = [
1150
+ # "2018-10-28 01:30",
1151
+ # "2018-10-28 02:00",
1152
+ # "2018-10-28 02:30",
1153
+ # "2018-10-28 02:00"
1154
+ # ]
1155
+ # df = Polars::DataFrame.new(
1156
+ # {
1157
+ # "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
1158
+ # "ambiguous" => ["earliest", "earliest", "latest", "latest"]
1159
+ # }
1160
+ # )
1161
+ # df.with_columns(
1162
+ # ts_localized: Polars.col("ts").dt.replace_time_zone(
1163
+ # "Europe/Brussels", ambiguous: Polars.col("ambiguous")
1164
+ # )
1165
+ # )
1166
+ # # =>
1167
+ # # shape: (4, 3)
1168
+ # # ┌─────────────────────┬───────────┬───────────────────────────────┐
1169
+ # # │ ts ┆ ambiguous ┆ ts_localized │
1170
+ # # │ --- ┆ --- ┆ --- │
1171
+ # # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
1172
+ # # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
1173
+ # # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
1174
+ # # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
1175
+ # # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
1176
+ # # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
1177
+ # # └─────────────────────┴───────────┴───────────────────────────────┘
934
1178
  def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
935
1179
  unless ambiguous.is_a?(Expr)
936
1180
  ambiguous = Polars.lit(ambiguous)
@@ -1150,6 +1150,47 @@ module Polars
1150
1150
  # Every interval start and period length.
1151
1151
  #
1152
1152
  # @return [Series]
1153
+ #
1154
+ # @example
1155
+ # s = Polars.datetime_range(
1156
+ # Time.utc(2001, 1, 1),
1157
+ # Time.utc(2001, 1, 2),
1158
+ # "165m",
1159
+ # eager: true
1160
+ # ).alias("datetime")
1161
+ # s.dt.truncate("1h")
1162
+ # # =>
1163
+ # # shape: (9,)
1164
+ # # Series: 'datetime' [datetime[ns]]
1165
+ # # [
1166
+ # # 2001-01-01 00:00:00
1167
+ # # 2001-01-01 02:00:00
1168
+ # # 2001-01-01 05:00:00
1169
+ # # 2001-01-01 08:00:00
1170
+ # # 2001-01-01 11:00:00
1171
+ # # 2001-01-01 13:00:00
1172
+ # # 2001-01-01 16:00:00
1173
+ # # 2001-01-01 19:00:00
1174
+ # # 2001-01-01 22:00:00
1175
+ # # ]
1176
+ #
1177
+ # @example
1178
+ # s = Polars.datetime_range(
1179
+ # Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
1180
+ # ).alias("datetime")
1181
+ # s.dt.truncate("30m")
1182
+ # # =>
1183
+ # # shape: (7,)
1184
+ # # Series: 'datetime' [datetime[ns]]
1185
+ # # [
1186
+ # # 2001-01-01 00:00:00
1187
+ # # 2001-01-01 00:00:00
1188
+ # # 2001-01-01 00:00:00
1189
+ # # 2001-01-01 00:30:00
1190
+ # # 2001-01-01 00:30:00
1191
+ # # 2001-01-01 00:30:00
1192
+ # # 2001-01-01 01:00:00
1193
+ # # ]
1153
1194
  def truncate(every)
1154
1195
  super
1155
1196
  end
@@ -1185,6 +1226,52 @@ module Polars
1185
1226
  # @note
1186
1227
  # This functionality is currently experimental and may
1187
1228
  # change without it being considered a breaking change.
1229
+ #
1230
+ # @example
1231
+ # start = Time.utc(2001, 1, 1)
1232
+ # stop = Time.utc(2001, 1, 2)
1233
+ # s = Polars.datetime_range(
1234
+ # start, stop, "165m", eager: true
1235
+ # ).alias("datetime")
1236
+ # s.dt.round("1h")
1237
+ # # =>
1238
+ # # shape: (9,)
1239
+ # # Series: 'datetime' [datetime[ns]]
1240
+ # # [
1241
+ # # 2001-01-01 00:00:00
1242
+ # # 2001-01-01 03:00:00
1243
+ # # 2001-01-01 06:00:00
1244
+ # # 2001-01-01 08:00:00
1245
+ # # 2001-01-01 11:00:00
1246
+ # # 2001-01-01 14:00:00
1247
+ # # 2001-01-01 17:00:00
1248
+ # # 2001-01-01 19:00:00
1249
+ # # 2001-01-01 22:00:00
1250
+ # # ]
1251
+ #
1252
+ # @example
1253
+ # round_str = s.dt.round("1h")
1254
+ # round_td = s.dt.round("1h")
1255
+ # round_str.equals(round_td)
1256
+ # # => true
1257
+ #
1258
+ # @example
1259
+ # start = Time.utc(2001, 1, 1)
1260
+ # stop = Time.utc(2001, 1, 1, 1)
1261
+ # s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
1262
+ # s.dt.round("30m")
1263
+ # # =>
1264
+ # # shape: (7,)
1265
+ # # Series: 'datetime' [datetime[ns]]
1266
+ # # [
1267
+ # # 2001-01-01 00:00:00
1268
+ # # 2001-01-01 00:00:00
1269
+ # # 2001-01-01 00:30:00
1270
+ # # 2001-01-01 00:30:00
1271
+ # # 2001-01-01 00:30:00
1272
+ # # 2001-01-01 01:00:00
1273
+ # # 2001-01-01 01:00:00
1274
+ # # ]
1188
1275
  def round(every)
1189
1276
  super
1190
1277
  end
data/lib/polars/expr.rb CHANGED
@@ -411,6 +411,26 @@ module Polars
411
411
  # Add a prefix to the root column name of the expression.
412
412
  #
413
413
  # @return [Expr]
414
+ #
415
+ # @example
416
+ # df = Polars::DataFrame.new(
417
+ # {
418
+ # "a" => [1, 2, 3],
419
+ # "b" => ["x", "y", "z"]
420
+ # }
421
+ # )
422
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
423
+ # # =>
424
+ # # shape: (3, 4)
425
+ # # ┌─────┬─────┬───────────┬───────────┐
426
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
427
+ # # │ --- ┆ --- ┆ --- ┆ --- │
428
+ # # │ i64 ┆ str ┆ i64 ┆ str │
429
+ # # ╞═════╪═════╪═══════════╪═══════════╡
430
+ # # │ 1 ┆ x ┆ 3 ┆ z │
431
+ # # │ 2 ┆ y ┆ 2 ┆ y │
432
+ # # │ 3 ┆ z ┆ 1 ┆ x │
433
+ # # └─────┴─────┴───────────┴───────────┘
414
434
  def prefix(prefix)
415
435
  name.prefix(prefix)
416
436
  end
@@ -418,6 +438,26 @@ module Polars
418
438
  # Add a suffix to the root column name of the expression.
419
439
  #
420
440
  # @return [Expr]
441
+ #
442
+ # @example
443
+ # df = Polars::DataFrame.new(
444
+ # {
445
+ # "a" => [1, 2, 3],
446
+ # "b" => ["x", "y", "z"]
447
+ # }
448
+ # )
449
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
450
+ # # =>
451
+ # # shape: (3, 4)
452
+ # # ┌─────┬─────┬───────────┬───────────┐
453
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
454
+ # # │ --- ┆ --- ┆ --- ┆ --- │
455
+ # # │ i64 ┆ str ┆ i64 ┆ str │
456
+ # # ╞═════╪═════╪═══════════╪═══════════╡
457
+ # # │ 1 ┆ x ┆ 3 ┆ z │
458
+ # # │ 2 ┆ y ┆ 2 ┆ y │
459
+ # # │ 3 ┆ z ┆ 1 ┆ x │
460
+ # # └─────┴─────┴───────────┴───────────┘
421
461
  def suffix(suffix)
422
462
  name.suffix(suffix)
423
463
  end
@@ -1182,7 +1222,7 @@ module Polars
1182
1222
  # "b" => [1, 1, 2, 2]
1183
1223
  # }
1184
1224
  # )
1185
- # df.select(Polars.all.mode)
1225
+ # df.select(Polars.all.mode.first)
1186
1226
  # # =>
1187
1227
  # # shape: (2, 2)
1188
1228
  # # ┌─────┬─────┐
@@ -1863,6 +1903,35 @@ module Polars
1863
1903
  # Reverse the selection.
1864
1904
  #
1865
1905
  # @return [Expr]
1906
+ #
1907
+ # @example
1908
+ # df = Polars::DataFrame.new(
1909
+ # {
1910
+ # "A" => [1, 2, 3, 4, 5],
1911
+ # "fruits" => ["banana", "banana", "apple", "apple", "banana"],
1912
+ # "B" => [5, 4, 3, 2, 1],
1913
+ # "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
1914
+ # }
1915
+ # )
1916
+ # df.select(
1917
+ # [
1918
+ # Polars.all,
1919
+ # Polars.all.reverse.name.suffix("_reverse")
1920
+ # ]
1921
+ # )
1922
+ # # =>
1923
+ # # shape: (5, 8)
1924
+ # # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
1925
+ # # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
1926
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1927
+ # # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
1928
+ # # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
1929
+ # # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
1930
+ # # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
1931
+ # # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
1932
+ # # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
1933
+ # # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
1934
+ # # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
1866
1935
  def reverse
1867
1936
  _from_rbexpr(_rbexpr.reverse)
1868
1937
  end
@@ -2825,7 +2894,7 @@ module Polars
2825
2894
  # # ╞══════╪════════╡
2826
2895
  # # │ 1 ┆ 0 │
2827
2896
  # # └──────┴────────┘
2828
- # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
2897
+ # def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
2829
2898
  # if !return_dtype.nil?
2830
2899
  # return_dtype = Utils.rb_type_to_dtype(return_dtype)
2831
2900
  # end
@@ -2835,7 +2904,8 @@ module Polars
2835
2904
  # f,
2836
2905
  # return_dtype,
2837
2906
  # agg_list,
2838
- # is_elementwise
2907
+ # is_elementwise,
2908
+ # returns_scalar
2839
2909
  # )
2840
2910
  # )
2841
2911
  # end
@@ -3071,6 +3141,21 @@ module Polars
3071
3141
  # Number of rows to return.
3072
3142
  #
3073
3143
  # @return [Expr]
3144
+ #
3145
+ # @example
3146
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
3147
+ # df.select(Polars.col("foo").limit(3))
3148
+ # # =>
3149
+ # # shape: (3, 1)
3150
+ # # ┌─────┐
3151
+ # # │ foo │
3152
+ # # │ --- │
3153
+ # # │ i64 │
3154
+ # # ╞═════╡
3155
+ # # │ 1 │
3156
+ # # │ 2 │
3157
+ # # │ 3 │
3158
+ # # └─────┘
3074
3159
  def limit(n = 10)
3075
3160
  head(n)
3076
3161
  end
@@ -5601,6 +5686,22 @@ module Polars
5601
5686
  # If false, the calculations are corrected for statistical bias.
5602
5687
  #
5603
5688
  # @return [Expr]
5689
+ #
5690
+ # @example
5691
+ # df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
5692
+ # df.select(Polars.col("a").rolling_skew(3))
5693
+ # # =>
5694
+ # # shape: (4, 1)
5695
+ # # ┌──────────┐
5696
+ # # │ a │
5697
+ # # │ --- │
5698
+ # # │ f64 │
5699
+ # # ╞══════════╡
5700
+ # # │ null │
5701
+ # # │ null │
5702
+ # # │ 0.381802 │
5703
+ # # │ 0.47033 │
5704
+ # # └──────────┘
5604
5705
  def rolling_skew(window_size, bias: true)
5605
5706
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5606
5707
  end
@@ -6015,12 +6116,12 @@ module Polars
6015
6116
  # # ┌──────┐
6016
6117
  # # │ a │
6017
6118
  # # │ --- │
6018
- # # │ i64
6119
+ # # │ f64
6019
6120
  # # ╞══════╡
6020
- # # │ -1
6021
- # # │ 0
6022
- # # │ 0
6023
- # # │ 1
6121
+ # # │ -1.0
6122
+ # # │ -0.0
6123
+ # # │ 0.0
6124
+ # # │ 1.0
6024
6125
  # # │ null │
6025
6126
  # # └──────┘
6026
6127
  def sign
@@ -86,8 +86,57 @@ module Polars
86
86
  # Concat the arrays in a Series dtype List in linear time.
87
87
  #
88
88
  # @return [Expr]
89
- def concat_list(exprs)
90
- exprs = Utils.parse_into_list_of_expressions(exprs)
89
+ #
90
+ # @example Concatenate two existing list columns. Null values are propagated.
91
+ # df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
92
+ # df.with_columns(concat_list: Polars.concat_list("a", "b"))
93
+ # # =>
94
+ # # shape: (3, 3)
95
+ # # ┌───────────┬───────────┬─────────────┐
96
+ # # │ a ┆ b ┆ concat_list │
97
+ # # │ --- ┆ --- ┆ --- │
98
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
99
+ # # ╞═══════════╪═══════════╪═════════════╡
100
+ # # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
101
+ # # │ [3] ┆ [] ┆ [3] │
102
+ # # │ [4, 5] ┆ null ┆ null │
103
+ # # └───────────┴───────────┴─────────────┘
104
+ #
105
+ # @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
106
+ # df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
107
+ # # =>
108
+ # # shape: (3, 2)
109
+ # # ┌───────────┬─────────────────┐
110
+ # # │ a ┆ concat_list │
111
+ # # │ --- ┆ --- │
112
+ # # │ list[i64] ┆ list[str] │
113
+ # # ╞═══════════╪═════════════════╡
114
+ # # │ [1, 2] ┆ ["1", "2", "x"] │
115
+ # # │ [3] ┆ ["3", "x"] │
116
+ # # │ [4, 5] ┆ ["4", "5", "x"] │
117
+ # # └───────────┴─────────────────┘
118
+ #
119
+ # @example Create lagged columns and collect them into a list. This mimics a rolling window.
120
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
121
+ # df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
122
+ # df.select(
123
+ # Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
124
+ # )
125
+ # # =>
126
+ # # shape: (5, 1)
127
+ # # ┌───────────────────┐
128
+ # # │ A_rolling │
129
+ # # │ --- │
130
+ # # │ list[f64] │
131
+ # # ╞═══════════════════╡
132
+ # # │ [null, null, 1.0] │
133
+ # # │ [null, 1.0, 2.0] │
134
+ # # │ [1.0, 2.0, 9.0] │
135
+ # # │ [2.0, 9.0, 2.0] │
136
+ # # │ [9.0, 2.0, 13.0] │
137
+ # # └───────────────────┘
138
+ def concat_list(exprs, *more_exprs)
139
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
91
140
  Utils.wrap_expr(Plr.concat_list(exprs))
92
141
  end
93
142
 
@@ -23,7 +23,7 @@ module Polars
23
23
  Utils.wrap_expr(Plr.col(name.to_s))
24
24
  elsif Utils.is_polars_dtype(name)
25
25
  Utils.wrap_expr(Plr.dtype_cols([name]))
26
- elsif name.is_a?(::Array)
26
+ elsif name.is_a?(::Array) || name.is_a?(::Set)
27
27
  names = Array(name)
28
28
  if names.empty?
29
29
  return Utils.wrap_expr(Plr.cols(names))
@@ -127,7 +127,7 @@ module Polars
127
127
  # af1, af2, af3 = Polars.align_frames(
128
128
  # df1, df2, df3, on: "dt", select: ["x", "y"]
129
129
  # )
130
- # (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
130
+ # (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
131
131
  # # =>
132
132
  # # shape: (3, 1)
133
133
  # # ┌───────┐
@@ -136,9 +136,7 @@ module Polars
136
136
  # # │ f64 │
137
137
  # # ╞═══════╡
138
138
  # # │ 0.0 │
139
- # # ├╌╌╌╌╌╌╌┤
140
139
  # # │ 167.5 │
141
- # # ├╌╌╌╌╌╌╌┤
142
140
  # # │ 47.0 │
143
141
  # # └───────┘
144
142
  def align_frames(