polars-df 0.13.0-arm64-darwin → 0.15.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE-THIRD-PARTY.txt +24439 -12853
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
@@ -269,6 +269,50 @@ module Polars
|
|
269
269
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
270
270
|
#
|
271
271
|
# @return [Expr]
|
272
|
+
#
|
273
|
+
# @example
|
274
|
+
# df = Polars::DataFrame.new(
|
275
|
+
# {
|
276
|
+
# "datetime" => [
|
277
|
+
# Time.utc(2020, 3, 1),
|
278
|
+
# Time.utc(2020, 4, 1),
|
279
|
+
# Time.utc(2020, 5, 1)
|
280
|
+
# ]
|
281
|
+
# }
|
282
|
+
# )
|
283
|
+
# df.with_columns(
|
284
|
+
# Polars.col("datetime")
|
285
|
+
# .dt.strftime("%Y/%m/%d %H:%M:%S")
|
286
|
+
# .alias("datetime_string")
|
287
|
+
# )
|
288
|
+
# # =>
|
289
|
+
# # shape: (3, 2)
|
290
|
+
# # ┌─────────────────────┬─────────────────────┐
|
291
|
+
# # │ datetime ┆ datetime_string │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ datetime[ns] ┆ str │
|
294
|
+
# # ╞═════════════════════╪═════════════════════╡
|
295
|
+
# # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
|
296
|
+
# # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
|
297
|
+
# # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
|
298
|
+
# # └─────────────────────┴─────────────────────┘
|
299
|
+
#
|
300
|
+
# @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
|
301
|
+
# df.with_columns(
|
302
|
+
# day_name: Polars.col("datetime").dt.strftime("%A"),
|
303
|
+
# month_name: Polars.col("datetime").dt.strftime("%B")
|
304
|
+
# )
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 3)
|
307
|
+
# # ┌─────────────────────┬───────────┬────────────┐
|
308
|
+
# # │ datetime ┆ day_name ┆ month_name │
|
309
|
+
# # │ --- ┆ --- ┆ --- │
|
310
|
+
# # │ datetime[ns] ┆ str ┆ str │
|
311
|
+
# # ╞═════════════════════╪═══════════╪════════════╡
|
312
|
+
# # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
|
313
|
+
# # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
|
314
|
+
# # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
|
315
|
+
# # └─────────────────────┴───────────┴────────────┘
|
272
316
|
def strftime(fmt)
|
273
317
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
274
318
|
end
|
@@ -574,6 +618,29 @@ module Polars
|
|
574
618
|
# Date
|
575
619
|
#
|
576
620
|
# @return [Expr]
|
621
|
+
#
|
622
|
+
# @example
|
623
|
+
# df = Polars::DataFrame.new(
|
624
|
+
# {
|
625
|
+
# "datetime" => [
|
626
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
627
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
628
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
|
629
|
+
# ]
|
630
|
+
# }
|
631
|
+
# )
|
632
|
+
# df.with_columns(Polars.col("datetime").dt.date.alias("date"))
|
633
|
+
# # =>
|
634
|
+
# # shape: (3, 2)
|
635
|
+
# # ┌─────────────────────────┬────────────┐
|
636
|
+
# # │ datetime ┆ date │
|
637
|
+
# # │ --- ┆ --- │
|
638
|
+
# # │ datetime[ns] ┆ date │
|
639
|
+
# # ╞═════════════════════════╪════════════╡
|
640
|
+
# # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
|
641
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
|
642
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
|
643
|
+
# # └─────────────────────────┴────────────┘
|
577
644
|
def date
|
578
645
|
Utils.wrap_expr(_rbexpr.dt_date)
|
579
646
|
end
|
@@ -732,6 +799,34 @@ module Polars
|
|
732
799
|
# Applies to Datetime columns.
|
733
800
|
#
|
734
801
|
# @return [Expr]
|
802
|
+
#
|
803
|
+
# @example
|
804
|
+
# df = Polars::DataFrame.new(
|
805
|
+
# {
|
806
|
+
# "datetime": [
|
807
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
808
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
809
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
810
|
+
# ]
|
811
|
+
# }
|
812
|
+
# )
|
813
|
+
# df.with_columns(
|
814
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
815
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
816
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
817
|
+
# Polars.col("datetime").dt.millisecond.alias("millisecond")
|
818
|
+
# )
|
819
|
+
# # =>
|
820
|
+
# # shape: (3, 5)
|
821
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
822
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
|
823
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
824
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
825
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
826
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
827
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
|
828
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
|
829
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
735
830
|
def millisecond
|
736
831
|
Utils.wrap_expr(_rbexpr.dt_millisecond)
|
737
832
|
end
|
@@ -741,6 +836,34 @@ module Polars
|
|
741
836
|
# Applies to Datetime columns.
|
742
837
|
#
|
743
838
|
# @return [Expr]
|
839
|
+
#
|
840
|
+
# @example
|
841
|
+
# df = Polars::DataFrame.new(
|
842
|
+
# {
|
843
|
+
# "datetime": [
|
844
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
845
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
846
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
847
|
+
# ]
|
848
|
+
# }
|
849
|
+
# )
|
850
|
+
# df.with_columns(
|
851
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
852
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
853
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
854
|
+
# Polars.col("datetime").dt.microsecond.alias("microsecond")
|
855
|
+
# )
|
856
|
+
# # =>
|
857
|
+
# # shape: (3, 5)
|
858
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
859
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
|
860
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
861
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
862
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
863
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
864
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
|
865
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
|
866
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
744
867
|
def microsecond
|
745
868
|
Utils.wrap_expr(_rbexpr.dt_microsecond)
|
746
869
|
end
|
@@ -750,6 +873,34 @@ module Polars
|
|
750
873
|
# Applies to Datetime columns.
|
751
874
|
#
|
752
875
|
# @return [Expr]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# df = Polars::DataFrame.new(
|
879
|
+
# {
|
880
|
+
# "datetime": [
|
881
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
882
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
883
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
884
|
+
# ]
|
885
|
+
# }
|
886
|
+
# )
|
887
|
+
# df.with_columns(
|
888
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
889
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
890
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
891
|
+
# Polars.col("datetime").dt.nanosecond.alias("nanosecond")
|
892
|
+
# )
|
893
|
+
# # =>
|
894
|
+
# # shape: (3, 5)
|
895
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
|
896
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
|
897
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
898
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
899
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
|
900
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
901
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
|
902
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
|
903
|
+
# # └─────────────────────────┴──────┴────────┴────────┴────────────┘
|
753
904
|
def nanosecond
|
754
905
|
Utils.wrap_expr(_rbexpr.dt_nanosecond)
|
755
906
|
end
|
@@ -835,6 +986,34 @@ module Polars
|
|
835
986
|
# Time unit for the `Datetime` Series.
|
836
987
|
#
|
837
988
|
# @return [Expr]
|
989
|
+
#
|
990
|
+
# @example
|
991
|
+
# df = Polars::DataFrame.new(
|
992
|
+
# {
|
993
|
+
# "date" => Polars.datetime_range(
|
994
|
+
# Time.utc(2001, 1, 1),
|
995
|
+
# Time.utc(2001, 1, 3),
|
996
|
+
# "1d",
|
997
|
+
# time_unit: "ns",
|
998
|
+
# eager: true
|
999
|
+
# )
|
1000
|
+
# }
|
1001
|
+
# )
|
1002
|
+
# df.select(
|
1003
|
+
# Polars.col("date"),
|
1004
|
+
# Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
|
1005
|
+
# )
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (3, 2)
|
1008
|
+
# # ┌─────────────────────┬───────────────────────┐
|
1009
|
+
# # │ date ┆ time_unit_us │
|
1010
|
+
# # │ --- ┆ --- │
|
1011
|
+
# # │ datetime[ns] ┆ datetime[μs] │
|
1012
|
+
# # ╞═════════════════════╪═══════════════════════╡
|
1013
|
+
# # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
|
1014
|
+
# # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
|
1015
|
+
# # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
|
1016
|
+
# # └─────────────────────┴───────────────────────┘
|
838
1017
|
def with_time_unit(time_unit)
|
839
1018
|
Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
|
840
1019
|
end
|
@@ -931,6 +1110,71 @@ module Polars
|
|
931
1110
|
# Determine how to deal with non-existent datetimes.
|
932
1111
|
#
|
933
1112
|
# @return [Expr]
|
1113
|
+
#
|
1114
|
+
# @example
|
1115
|
+
# df = Polars::DataFrame.new(
|
1116
|
+
# {
|
1117
|
+
# "london_timezone": Polars.datetime_range(
|
1118
|
+
# Time.utc(2020, 3, 1),
|
1119
|
+
# Time.utc(2020, 7, 1),
|
1120
|
+
# "1mo",
|
1121
|
+
# time_zone: "UTC",
|
1122
|
+
# eager: true,
|
1123
|
+
# ).dt.convert_time_zone("Europe/London")
|
1124
|
+
# }
|
1125
|
+
# )
|
1126
|
+
# df.select(
|
1127
|
+
# [
|
1128
|
+
# Polars.col("london_timezone"),
|
1129
|
+
# Polars.col("london_timezone")
|
1130
|
+
# .dt.replace_time_zone("Europe/Amsterdam")
|
1131
|
+
# .alias("London_to_Amsterdam")
|
1132
|
+
# ]
|
1133
|
+
# )
|
1134
|
+
# # =>
|
1135
|
+
# # shape: (5, 2)
|
1136
|
+
# # ┌─────────────────────────────┬────────────────────────────────┐
|
1137
|
+
# # │ london_timezone ┆ London_to_Amsterdam │
|
1138
|
+
# # │ --- ┆ --- │
|
1139
|
+
# # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
|
1140
|
+
# # ╞═════════════════════════════╪════════════════════════════════╡
|
1141
|
+
# # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
|
1142
|
+
# # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
|
1143
|
+
# # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
|
1144
|
+
# # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
|
1145
|
+
# # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
|
1146
|
+
# # └─────────────────────────────┴────────────────────────────────┘
|
1147
|
+
#
|
1148
|
+
# @example You can use `ambiguous` to deal with ambiguous datetimes:
|
1149
|
+
# dates = [
|
1150
|
+
# "2018-10-28 01:30",
|
1151
|
+
# "2018-10-28 02:00",
|
1152
|
+
# "2018-10-28 02:30",
|
1153
|
+
# "2018-10-28 02:00"
|
1154
|
+
# ]
|
1155
|
+
# df = Polars::DataFrame.new(
|
1156
|
+
# {
|
1157
|
+
# "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
|
1158
|
+
# "ambiguous" => ["earliest", "earliest", "latest", "latest"]
|
1159
|
+
# }
|
1160
|
+
# )
|
1161
|
+
# df.with_columns(
|
1162
|
+
# ts_localized: Polars.col("ts").dt.replace_time_zone(
|
1163
|
+
# "Europe/Brussels", ambiguous: Polars.col("ambiguous")
|
1164
|
+
# )
|
1165
|
+
# )
|
1166
|
+
# # =>
|
1167
|
+
# # shape: (4, 3)
|
1168
|
+
# # ┌─────────────────────┬───────────┬───────────────────────────────┐
|
1169
|
+
# # │ ts ┆ ambiguous ┆ ts_localized │
|
1170
|
+
# # │ --- ┆ --- ┆ --- │
|
1171
|
+
# # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
|
1172
|
+
# # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
|
1173
|
+
# # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
|
1174
|
+
# # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
|
1175
|
+
# # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
|
1176
|
+
# # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
|
1177
|
+
# # └─────────────────────┴───────────┴───────────────────────────────┘
|
934
1178
|
def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
|
935
1179
|
unless ambiguous.is_a?(Expr)
|
936
1180
|
ambiguous = Polars.lit(ambiguous)
|
@@ -1150,6 +1150,47 @@ module Polars
|
|
1150
1150
|
# Every interval start and period length.
|
1151
1151
|
#
|
1152
1152
|
# @return [Series]
|
1153
|
+
#
|
1154
|
+
# @example
|
1155
|
+
# s = Polars.datetime_range(
|
1156
|
+
# Time.utc(2001, 1, 1),
|
1157
|
+
# Time.utc(2001, 1, 2),
|
1158
|
+
# "165m",
|
1159
|
+
# eager: true
|
1160
|
+
# ).alias("datetime")
|
1161
|
+
# s.dt.truncate("1h")
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (9,)
|
1164
|
+
# # Series: 'datetime' [datetime[ns]]
|
1165
|
+
# # [
|
1166
|
+
# # 2001-01-01 00:00:00
|
1167
|
+
# # 2001-01-01 02:00:00
|
1168
|
+
# # 2001-01-01 05:00:00
|
1169
|
+
# # 2001-01-01 08:00:00
|
1170
|
+
# # 2001-01-01 11:00:00
|
1171
|
+
# # 2001-01-01 13:00:00
|
1172
|
+
# # 2001-01-01 16:00:00
|
1173
|
+
# # 2001-01-01 19:00:00
|
1174
|
+
# # 2001-01-01 22:00:00
|
1175
|
+
# # ]
|
1176
|
+
#
|
1177
|
+
# @example
|
1178
|
+
# s = Polars.datetime_range(
|
1179
|
+
# Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
|
1180
|
+
# ).alias("datetime")
|
1181
|
+
# s.dt.truncate("30m")
|
1182
|
+
# # =>
|
1183
|
+
# # shape: (7,)
|
1184
|
+
# # Series: 'datetime' [datetime[ns]]
|
1185
|
+
# # [
|
1186
|
+
# # 2001-01-01 00:00:00
|
1187
|
+
# # 2001-01-01 00:00:00
|
1188
|
+
# # 2001-01-01 00:00:00
|
1189
|
+
# # 2001-01-01 00:30:00
|
1190
|
+
# # 2001-01-01 00:30:00
|
1191
|
+
# # 2001-01-01 00:30:00
|
1192
|
+
# # 2001-01-01 01:00:00
|
1193
|
+
# # ]
|
1153
1194
|
def truncate(every)
|
1154
1195
|
super
|
1155
1196
|
end
|
@@ -1185,6 +1226,52 @@ module Polars
|
|
1185
1226
|
# @note
|
1186
1227
|
# This functionality is currently experimental and may
|
1187
1228
|
# change without it being considered a breaking change.
|
1229
|
+
#
|
1230
|
+
# @example
|
1231
|
+
# start = Time.utc(2001, 1, 1)
|
1232
|
+
# stop = Time.utc(2001, 1, 2)
|
1233
|
+
# s = Polars.datetime_range(
|
1234
|
+
# start, stop, "165m", eager: true
|
1235
|
+
# ).alias("datetime")
|
1236
|
+
# s.dt.round("1h")
|
1237
|
+
# # =>
|
1238
|
+
# # shape: (9,)
|
1239
|
+
# # Series: 'datetime' [datetime[ns]]
|
1240
|
+
# # [
|
1241
|
+
# # 2001-01-01 00:00:00
|
1242
|
+
# # 2001-01-01 03:00:00
|
1243
|
+
# # 2001-01-01 06:00:00
|
1244
|
+
# # 2001-01-01 08:00:00
|
1245
|
+
# # 2001-01-01 11:00:00
|
1246
|
+
# # 2001-01-01 14:00:00
|
1247
|
+
# # 2001-01-01 17:00:00
|
1248
|
+
# # 2001-01-01 19:00:00
|
1249
|
+
# # 2001-01-01 22:00:00
|
1250
|
+
# # ]
|
1251
|
+
#
|
1252
|
+
# @example
|
1253
|
+
# round_str = s.dt.round("1h")
|
1254
|
+
# round_td = s.dt.round("1h")
|
1255
|
+
# round_str.equals(round_td)
|
1256
|
+
# # => true
|
1257
|
+
#
|
1258
|
+
# @example
|
1259
|
+
# start = Time.utc(2001, 1, 1)
|
1260
|
+
# stop = Time.utc(2001, 1, 1, 1)
|
1261
|
+
# s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
|
1262
|
+
# s.dt.round("30m")
|
1263
|
+
# # =>
|
1264
|
+
# # shape: (7,)
|
1265
|
+
# # Series: 'datetime' [datetime[ns]]
|
1266
|
+
# # [
|
1267
|
+
# # 2001-01-01 00:00:00
|
1268
|
+
# # 2001-01-01 00:00:00
|
1269
|
+
# # 2001-01-01 00:30:00
|
1270
|
+
# # 2001-01-01 00:30:00
|
1271
|
+
# # 2001-01-01 00:30:00
|
1272
|
+
# # 2001-01-01 01:00:00
|
1273
|
+
# # 2001-01-01 01:00:00
|
1274
|
+
# # ]
|
1188
1275
|
def round(every)
|
1189
1276
|
super
|
1190
1277
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -411,6 +411,26 @@ module Polars
|
|
411
411
|
# Add a prefix to the root column name of the expression.
|
412
412
|
#
|
413
413
|
# @return [Expr]
|
414
|
+
#
|
415
|
+
# @example
|
416
|
+
# df = Polars::DataFrame.new(
|
417
|
+
# {
|
418
|
+
# "a" => [1, 2, 3],
|
419
|
+
# "b" => ["x", "y", "z"]
|
420
|
+
# }
|
421
|
+
# )
|
422
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
423
|
+
# # =>
|
424
|
+
# # shape: (3, 4)
|
425
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
426
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
427
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
428
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
429
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
430
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
431
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
432
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
433
|
+
# # └─────┴─────┴───────────┴───────────┘
|
414
434
|
def prefix(prefix)
|
415
435
|
name.prefix(prefix)
|
416
436
|
end
|
@@ -418,6 +438,26 @@ module Polars
|
|
418
438
|
# Add a suffix to the root column name of the expression.
|
419
439
|
#
|
420
440
|
# @return [Expr]
|
441
|
+
#
|
442
|
+
# @example
|
443
|
+
# df = Polars::DataFrame.new(
|
444
|
+
# {
|
445
|
+
# "a" => [1, 2, 3],
|
446
|
+
# "b" => ["x", "y", "z"]
|
447
|
+
# }
|
448
|
+
# )
|
449
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
450
|
+
# # =>
|
451
|
+
# # shape: (3, 4)
|
452
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
453
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
454
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
455
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
456
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
457
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
458
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
459
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
460
|
+
# # └─────┴─────┴───────────┴───────────┘
|
421
461
|
def suffix(suffix)
|
422
462
|
name.suffix(suffix)
|
423
463
|
end
|
@@ -1182,7 +1222,7 @@ module Polars
|
|
1182
1222
|
# "b" => [1, 1, 2, 2]
|
1183
1223
|
# }
|
1184
1224
|
# )
|
1185
|
-
# df.select(Polars.all.mode)
|
1225
|
+
# df.select(Polars.all.mode.first)
|
1186
1226
|
# # =>
|
1187
1227
|
# # shape: (2, 2)
|
1188
1228
|
# # ┌─────┬─────┐
|
@@ -1863,6 +1903,35 @@ module Polars
|
|
1863
1903
|
# Reverse the selection.
|
1864
1904
|
#
|
1865
1905
|
# @return [Expr]
|
1906
|
+
#
|
1907
|
+
# @example
|
1908
|
+
# df = Polars::DataFrame.new(
|
1909
|
+
# {
|
1910
|
+
# "A" => [1, 2, 3, 4, 5],
|
1911
|
+
# "fruits" => ["banana", "banana", "apple", "apple", "banana"],
|
1912
|
+
# "B" => [5, 4, 3, 2, 1],
|
1913
|
+
# "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
|
1914
|
+
# }
|
1915
|
+
# )
|
1916
|
+
# df.select(
|
1917
|
+
# [
|
1918
|
+
# Polars.all,
|
1919
|
+
# Polars.all.reverse.name.suffix("_reverse")
|
1920
|
+
# ]
|
1921
|
+
# )
|
1922
|
+
# # =>
|
1923
|
+
# # shape: (5, 8)
|
1924
|
+
# # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
|
1925
|
+
# # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
|
1926
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1927
|
+
# # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
|
1928
|
+
# # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
|
1929
|
+
# # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
|
1930
|
+
# # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
|
1931
|
+
# # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
|
1932
|
+
# # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
|
1933
|
+
# # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
|
1934
|
+
# # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
|
1866
1935
|
def reverse
|
1867
1936
|
_from_rbexpr(_rbexpr.reverse)
|
1868
1937
|
end
|
@@ -2825,7 +2894,7 @@ module Polars
|
|
2825
2894
|
# # ╞══════╪════════╡
|
2826
2895
|
# # │ 1 ┆ 0 │
|
2827
2896
|
# # └──────┴────────┘
|
2828
|
-
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2897
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
|
2829
2898
|
# if !return_dtype.nil?
|
2830
2899
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2831
2900
|
# end
|
@@ -2835,7 +2904,8 @@ module Polars
|
|
2835
2904
|
# f,
|
2836
2905
|
# return_dtype,
|
2837
2906
|
# agg_list,
|
2838
|
-
# is_elementwise
|
2907
|
+
# is_elementwise,
|
2908
|
+
# returns_scalar
|
2839
2909
|
# )
|
2840
2910
|
# )
|
2841
2911
|
# end
|
@@ -3071,6 +3141,21 @@ module Polars
|
|
3071
3141
|
# Number of rows to return.
|
3072
3142
|
#
|
3073
3143
|
# @return [Expr]
|
3144
|
+
#
|
3145
|
+
# @example
|
3146
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
3147
|
+
# df.select(Polars.col("foo").limit(3))
|
3148
|
+
# # =>
|
3149
|
+
# # shape: (3, 1)
|
3150
|
+
# # ┌─────┐
|
3151
|
+
# # │ foo │
|
3152
|
+
# # │ --- │
|
3153
|
+
# # │ i64 │
|
3154
|
+
# # ╞═════╡
|
3155
|
+
# # │ 1 │
|
3156
|
+
# # │ 2 │
|
3157
|
+
# # │ 3 │
|
3158
|
+
# # └─────┘
|
3074
3159
|
def limit(n = 10)
|
3075
3160
|
head(n)
|
3076
3161
|
end
|
@@ -5601,6 +5686,22 @@ module Polars
|
|
5601
5686
|
# If false, the calculations are corrected for statistical bias.
|
5602
5687
|
#
|
5603
5688
|
# @return [Expr]
|
5689
|
+
#
|
5690
|
+
# @example
|
5691
|
+
# df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
|
5692
|
+
# df.select(Polars.col("a").rolling_skew(3))
|
5693
|
+
# # =>
|
5694
|
+
# # shape: (4, 1)
|
5695
|
+
# # ┌──────────┐
|
5696
|
+
# # │ a │
|
5697
|
+
# # │ --- │
|
5698
|
+
# # │ f64 │
|
5699
|
+
# # ╞══════════╡
|
5700
|
+
# # │ null │
|
5701
|
+
# # │ null │
|
5702
|
+
# # │ 0.381802 │
|
5703
|
+
# # │ 0.47033 │
|
5704
|
+
# # └──────────┘
|
5604
5705
|
def rolling_skew(window_size, bias: true)
|
5605
5706
|
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5606
5707
|
end
|
@@ -6015,12 +6116,12 @@ module Polars
|
|
6015
6116
|
# # ┌──────┐
|
6016
6117
|
# # │ a │
|
6017
6118
|
# # │ --- │
|
6018
|
-
# # │
|
6119
|
+
# # │ f64 │
|
6019
6120
|
# # ╞══════╡
|
6020
|
-
# # │ -1
|
6021
|
-
# # │ 0
|
6022
|
-
# # │ 0
|
6023
|
-
# # │ 1
|
6121
|
+
# # │ -1.0 │
|
6122
|
+
# # │ -0.0 │
|
6123
|
+
# # │ 0.0 │
|
6124
|
+
# # │ 1.0 │
|
6024
6125
|
# # │ null │
|
6025
6126
|
# # └──────┘
|
6026
6127
|
def sign
|
@@ -86,8 +86,57 @@ module Polars
|
|
86
86
|
# Concat the arrays in a Series dtype List in linear time.
|
87
87
|
#
|
88
88
|
# @return [Expr]
|
89
|
-
|
90
|
-
|
89
|
+
#
|
90
|
+
# @example Concatenate two existing list columns. Null values are propagated.
|
91
|
+
# df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
|
92
|
+
# df.with_columns(concat_list: Polars.concat_list("a", "b"))
|
93
|
+
# # =>
|
94
|
+
# # shape: (3, 3)
|
95
|
+
# # ┌───────────┬───────────┬─────────────┐
|
96
|
+
# # │ a ┆ b ┆ concat_list │
|
97
|
+
# # │ --- ┆ --- ┆ --- │
|
98
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
99
|
+
# # ╞═══════════╪═══════════╪═════════════╡
|
100
|
+
# # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
|
101
|
+
# # │ [3] ┆ [] ┆ [3] │
|
102
|
+
# # │ [4, 5] ┆ null ┆ null │
|
103
|
+
# # └───────────┴───────────┴─────────────┘
|
104
|
+
#
|
105
|
+
# @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
|
106
|
+
# df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
|
107
|
+
# # =>
|
108
|
+
# # shape: (3, 2)
|
109
|
+
# # ┌───────────┬─────────────────┐
|
110
|
+
# # │ a ┆ concat_list │
|
111
|
+
# # │ --- ┆ --- │
|
112
|
+
# # │ list[i64] ┆ list[str] │
|
113
|
+
# # ╞═══════════╪═════════════════╡
|
114
|
+
# # │ [1, 2] ┆ ["1", "2", "x"] │
|
115
|
+
# # │ [3] ┆ ["3", "x"] │
|
116
|
+
# # │ [4, 5] ┆ ["4", "5", "x"] │
|
117
|
+
# # └───────────┴─────────────────┘
|
118
|
+
#
|
119
|
+
# @example Create lagged columns and collect them into a list. This mimics a rolling window.
|
120
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
|
121
|
+
# df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
|
122
|
+
# df.select(
|
123
|
+
# Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
|
124
|
+
# )
|
125
|
+
# # =>
|
126
|
+
# # shape: (5, 1)
|
127
|
+
# # ┌───────────────────┐
|
128
|
+
# # │ A_rolling │
|
129
|
+
# # │ --- │
|
130
|
+
# # │ list[f64] │
|
131
|
+
# # ╞═══════════════════╡
|
132
|
+
# # │ [null, null, 1.0] │
|
133
|
+
# # │ [null, 1.0, 2.0] │
|
134
|
+
# # │ [1.0, 2.0, 9.0] │
|
135
|
+
# # │ [2.0, 9.0, 2.0] │
|
136
|
+
# # │ [9.0, 2.0, 13.0] │
|
137
|
+
# # └───────────────────┘
|
138
|
+
def concat_list(exprs, *more_exprs)
|
139
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
91
140
|
Utils.wrap_expr(Plr.concat_list(exprs))
|
92
141
|
end
|
93
142
|
|
data/lib/polars/functions/col.rb
CHANGED
@@ -23,7 +23,7 @@ module Polars
|
|
23
23
|
Utils.wrap_expr(Plr.col(name.to_s))
|
24
24
|
elsif Utils.is_polars_dtype(name)
|
25
25
|
Utils.wrap_expr(Plr.dtype_cols([name]))
|
26
|
-
elsif name.is_a?(::Array)
|
26
|
+
elsif name.is_a?(::Array) || name.is_a?(::Set)
|
27
27
|
names = Array(name)
|
28
28
|
if names.empty?
|
29
29
|
return Utils.wrap_expr(Plr.cols(names))
|
@@ -127,7 +127,7 @@ module Polars
|
|
127
127
|
# af1, af2, af3 = Polars.align_frames(
|
128
128
|
# df1, df2, df3, on: "dt", select: ["x", "y"]
|
129
129
|
# )
|
130
|
-
# (af1 * af2 * af3).fill_null(0).select(Polars.
|
130
|
+
# (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
|
131
131
|
# # =>
|
132
132
|
# # shape: (3, 1)
|
133
133
|
# # ┌───────┐
|
@@ -136,9 +136,7 @@ module Polars
|
|
136
136
|
# # │ f64 │
|
137
137
|
# # ╞═══════╡
|
138
138
|
# # │ 0.0 │
|
139
|
-
# # ├╌╌╌╌╌╌╌┤
|
140
139
|
# # │ 167.5 │
|
141
|
-
# # ├╌╌╌╌╌╌╌┤
|
142
140
|
# # │ 47.0 │
|
143
141
|
# # └───────┘
|
144
142
|
def align_frames(
|