polars-df 0.14.0-x86_64-linux → 0.15.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE-THIRD-PARTY.txt +24793 -13160
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
@@ -269,6 +269,50 @@ module Polars
|
|
269
269
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
270
270
|
#
|
271
271
|
# @return [Expr]
|
272
|
+
#
|
273
|
+
# @example
|
274
|
+
# df = Polars::DataFrame.new(
|
275
|
+
# {
|
276
|
+
# "datetime" => [
|
277
|
+
# Time.utc(2020, 3, 1),
|
278
|
+
# Time.utc(2020, 4, 1),
|
279
|
+
# Time.utc(2020, 5, 1)
|
280
|
+
# ]
|
281
|
+
# }
|
282
|
+
# )
|
283
|
+
# df.with_columns(
|
284
|
+
# Polars.col("datetime")
|
285
|
+
# .dt.strftime("%Y/%m/%d %H:%M:%S")
|
286
|
+
# .alias("datetime_string")
|
287
|
+
# )
|
288
|
+
# # =>
|
289
|
+
# # shape: (3, 2)
|
290
|
+
# # ┌─────────────────────┬─────────────────────┐
|
291
|
+
# # │ datetime ┆ datetime_string │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ datetime[ns] ┆ str │
|
294
|
+
# # ╞═════════════════════╪═════════════════════╡
|
295
|
+
# # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
|
296
|
+
# # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
|
297
|
+
# # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
|
298
|
+
# # └─────────────────────┴─────────────────────┘
|
299
|
+
#
|
300
|
+
# @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
|
301
|
+
# df.with_columns(
|
302
|
+
# day_name: Polars.col("datetime").dt.strftime("%A"),
|
303
|
+
# month_name: Polars.col("datetime").dt.strftime("%B")
|
304
|
+
# )
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 3)
|
307
|
+
# # ┌─────────────────────┬───────────┬────────────┐
|
308
|
+
# # │ datetime ┆ day_name ┆ month_name │
|
309
|
+
# # │ --- ┆ --- ┆ --- │
|
310
|
+
# # │ datetime[ns] ┆ str ┆ str │
|
311
|
+
# # ╞═════════════════════╪═══════════╪════════════╡
|
312
|
+
# # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
|
313
|
+
# # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
|
314
|
+
# # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
|
315
|
+
# # └─────────────────────┴───────────┴────────────┘
|
272
316
|
def strftime(fmt)
|
273
317
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
274
318
|
end
|
@@ -574,6 +618,29 @@ module Polars
|
|
574
618
|
# Date
|
575
619
|
#
|
576
620
|
# @return [Expr]
|
621
|
+
#
|
622
|
+
# @example
|
623
|
+
# df = Polars::DataFrame.new(
|
624
|
+
# {
|
625
|
+
# "datetime" => [
|
626
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
627
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
628
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
|
629
|
+
# ]
|
630
|
+
# }
|
631
|
+
# )
|
632
|
+
# df.with_columns(Polars.col("datetime").dt.date.alias("date"))
|
633
|
+
# # =>
|
634
|
+
# # shape: (3, 2)
|
635
|
+
# # ┌─────────────────────────┬────────────┐
|
636
|
+
# # │ datetime ┆ date │
|
637
|
+
# # │ --- ┆ --- │
|
638
|
+
# # │ datetime[ns] ┆ date │
|
639
|
+
# # ╞═════════════════════════╪════════════╡
|
640
|
+
# # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
|
641
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
|
642
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
|
643
|
+
# # └─────────────────────────┴────────────┘
|
577
644
|
def date
|
578
645
|
Utils.wrap_expr(_rbexpr.dt_date)
|
579
646
|
end
|
@@ -732,6 +799,34 @@ module Polars
|
|
732
799
|
# Applies to Datetime columns.
|
733
800
|
#
|
734
801
|
# @return [Expr]
|
802
|
+
#
|
803
|
+
# @example
|
804
|
+
# df = Polars::DataFrame.new(
|
805
|
+
# {
|
806
|
+
# "datetime": [
|
807
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
808
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
809
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
810
|
+
# ]
|
811
|
+
# }
|
812
|
+
# )
|
813
|
+
# df.with_columns(
|
814
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
815
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
816
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
817
|
+
# Polars.col("datetime").dt.millisecond.alias("millisecond")
|
818
|
+
# )
|
819
|
+
# # =>
|
820
|
+
# # shape: (3, 5)
|
821
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
822
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
|
823
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
824
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
825
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
826
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
827
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
|
828
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
|
829
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
735
830
|
def millisecond
|
736
831
|
Utils.wrap_expr(_rbexpr.dt_millisecond)
|
737
832
|
end
|
@@ -741,6 +836,34 @@ module Polars
|
|
741
836
|
# Applies to Datetime columns.
|
742
837
|
#
|
743
838
|
# @return [Expr]
|
839
|
+
#
|
840
|
+
# @example
|
841
|
+
# df = Polars::DataFrame.new(
|
842
|
+
# {
|
843
|
+
# "datetime": [
|
844
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
845
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
846
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
847
|
+
# ]
|
848
|
+
# }
|
849
|
+
# )
|
850
|
+
# df.with_columns(
|
851
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
852
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
853
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
854
|
+
# Polars.col("datetime").dt.microsecond.alias("microsecond")
|
855
|
+
# )
|
856
|
+
# # =>
|
857
|
+
# # shape: (3, 5)
|
858
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
859
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
|
860
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
861
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
862
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
863
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
864
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
|
865
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
|
866
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
744
867
|
def microsecond
|
745
868
|
Utils.wrap_expr(_rbexpr.dt_microsecond)
|
746
869
|
end
|
@@ -750,6 +873,34 @@ module Polars
|
|
750
873
|
# Applies to Datetime columns.
|
751
874
|
#
|
752
875
|
# @return [Expr]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# df = Polars::DataFrame.new(
|
879
|
+
# {
|
880
|
+
# "datetime": [
|
881
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
882
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
883
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
884
|
+
# ]
|
885
|
+
# }
|
886
|
+
# )
|
887
|
+
# df.with_columns(
|
888
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
889
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
890
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
891
|
+
# Polars.col("datetime").dt.nanosecond.alias("nanosecond")
|
892
|
+
# )
|
893
|
+
# # =>
|
894
|
+
# # shape: (3, 5)
|
895
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
|
896
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
|
897
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
898
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
899
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
|
900
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
901
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
|
902
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
|
903
|
+
# # └─────────────────────────┴──────┴────────┴────────┴────────────┘
|
753
904
|
def nanosecond
|
754
905
|
Utils.wrap_expr(_rbexpr.dt_nanosecond)
|
755
906
|
end
|
@@ -835,6 +986,34 @@ module Polars
|
|
835
986
|
# Time unit for the `Datetime` Series.
|
836
987
|
#
|
837
988
|
# @return [Expr]
|
989
|
+
#
|
990
|
+
# @example
|
991
|
+
# df = Polars::DataFrame.new(
|
992
|
+
# {
|
993
|
+
# "date" => Polars.datetime_range(
|
994
|
+
# Time.utc(2001, 1, 1),
|
995
|
+
# Time.utc(2001, 1, 3),
|
996
|
+
# "1d",
|
997
|
+
# time_unit: "ns",
|
998
|
+
# eager: true
|
999
|
+
# )
|
1000
|
+
# }
|
1001
|
+
# )
|
1002
|
+
# df.select(
|
1003
|
+
# Polars.col("date"),
|
1004
|
+
# Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
|
1005
|
+
# )
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (3, 2)
|
1008
|
+
# # ┌─────────────────────┬───────────────────────┐
|
1009
|
+
# # │ date ┆ time_unit_us │
|
1010
|
+
# # │ --- ┆ --- │
|
1011
|
+
# # │ datetime[ns] ┆ datetime[μs] │
|
1012
|
+
# # ╞═════════════════════╪═══════════════════════╡
|
1013
|
+
# # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
|
1014
|
+
# # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
|
1015
|
+
# # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
|
1016
|
+
# # └─────────────────────┴───────────────────────┘
|
838
1017
|
def with_time_unit(time_unit)
|
839
1018
|
Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
|
840
1019
|
end
|
@@ -931,6 +1110,71 @@ module Polars
|
|
931
1110
|
# Determine how to deal with non-existent datetimes.
|
932
1111
|
#
|
933
1112
|
# @return [Expr]
|
1113
|
+
#
|
1114
|
+
# @example
|
1115
|
+
# df = Polars::DataFrame.new(
|
1116
|
+
# {
|
1117
|
+
# "london_timezone": Polars.datetime_range(
|
1118
|
+
# Time.utc(2020, 3, 1),
|
1119
|
+
# Time.utc(2020, 7, 1),
|
1120
|
+
# "1mo",
|
1121
|
+
# time_zone: "UTC",
|
1122
|
+
# eager: true,
|
1123
|
+
# ).dt.convert_time_zone("Europe/London")
|
1124
|
+
# }
|
1125
|
+
# )
|
1126
|
+
# df.select(
|
1127
|
+
# [
|
1128
|
+
# Polars.col("london_timezone"),
|
1129
|
+
# Polars.col("london_timezone")
|
1130
|
+
# .dt.replace_time_zone("Europe/Amsterdam")
|
1131
|
+
# .alias("London_to_Amsterdam")
|
1132
|
+
# ]
|
1133
|
+
# )
|
1134
|
+
# # =>
|
1135
|
+
# # shape: (5, 2)
|
1136
|
+
# # ┌─────────────────────────────┬────────────────────────────────┐
|
1137
|
+
# # │ london_timezone ┆ London_to_Amsterdam │
|
1138
|
+
# # │ --- ┆ --- │
|
1139
|
+
# # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
|
1140
|
+
# # ╞═════════════════════════════╪════════════════════════════════╡
|
1141
|
+
# # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
|
1142
|
+
# # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
|
1143
|
+
# # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
|
1144
|
+
# # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
|
1145
|
+
# # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
|
1146
|
+
# # └─────────────────────────────┴────────────────────────────────┘
|
1147
|
+
#
|
1148
|
+
# @example You can use `ambiguous` to deal with ambiguous datetimes:
|
1149
|
+
# dates = [
|
1150
|
+
# "2018-10-28 01:30",
|
1151
|
+
# "2018-10-28 02:00",
|
1152
|
+
# "2018-10-28 02:30",
|
1153
|
+
# "2018-10-28 02:00"
|
1154
|
+
# ]
|
1155
|
+
# df = Polars::DataFrame.new(
|
1156
|
+
# {
|
1157
|
+
# "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
|
1158
|
+
# "ambiguous" => ["earliest", "earliest", "latest", "latest"]
|
1159
|
+
# }
|
1160
|
+
# )
|
1161
|
+
# df.with_columns(
|
1162
|
+
# ts_localized: Polars.col("ts").dt.replace_time_zone(
|
1163
|
+
# "Europe/Brussels", ambiguous: Polars.col("ambiguous")
|
1164
|
+
# )
|
1165
|
+
# )
|
1166
|
+
# # =>
|
1167
|
+
# # shape: (4, 3)
|
1168
|
+
# # ┌─────────────────────┬───────────┬───────────────────────────────┐
|
1169
|
+
# # │ ts ┆ ambiguous ┆ ts_localized │
|
1170
|
+
# # │ --- ┆ --- ┆ --- │
|
1171
|
+
# # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
|
1172
|
+
# # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
|
1173
|
+
# # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
|
1174
|
+
# # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
|
1175
|
+
# # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
|
1176
|
+
# # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
|
1177
|
+
# # └─────────────────────┴───────────┴───────────────────────────────┘
|
934
1178
|
def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
|
935
1179
|
unless ambiguous.is_a?(Expr)
|
936
1180
|
ambiguous = Polars.lit(ambiguous)
|
@@ -1150,6 +1150,47 @@ module Polars
|
|
1150
1150
|
# Every interval start and period length.
|
1151
1151
|
#
|
1152
1152
|
# @return [Series]
|
1153
|
+
#
|
1154
|
+
# @example
|
1155
|
+
# s = Polars.datetime_range(
|
1156
|
+
# Time.utc(2001, 1, 1),
|
1157
|
+
# Time.utc(2001, 1, 2),
|
1158
|
+
# "165m",
|
1159
|
+
# eager: true
|
1160
|
+
# ).alias("datetime")
|
1161
|
+
# s.dt.truncate("1h")
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (9,)
|
1164
|
+
# # Series: 'datetime' [datetime[ns]]
|
1165
|
+
# # [
|
1166
|
+
# # 2001-01-01 00:00:00
|
1167
|
+
# # 2001-01-01 02:00:00
|
1168
|
+
# # 2001-01-01 05:00:00
|
1169
|
+
# # 2001-01-01 08:00:00
|
1170
|
+
# # 2001-01-01 11:00:00
|
1171
|
+
# # 2001-01-01 13:00:00
|
1172
|
+
# # 2001-01-01 16:00:00
|
1173
|
+
# # 2001-01-01 19:00:00
|
1174
|
+
# # 2001-01-01 22:00:00
|
1175
|
+
# # ]
|
1176
|
+
#
|
1177
|
+
# @example
|
1178
|
+
# s = Polars.datetime_range(
|
1179
|
+
# Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
|
1180
|
+
# ).alias("datetime")
|
1181
|
+
# s.dt.truncate("30m")
|
1182
|
+
# # =>
|
1183
|
+
# # shape: (7,)
|
1184
|
+
# # Series: 'datetime' [datetime[ns]]
|
1185
|
+
# # [
|
1186
|
+
# # 2001-01-01 00:00:00
|
1187
|
+
# # 2001-01-01 00:00:00
|
1188
|
+
# # 2001-01-01 00:00:00
|
1189
|
+
# # 2001-01-01 00:30:00
|
1190
|
+
# # 2001-01-01 00:30:00
|
1191
|
+
# # 2001-01-01 00:30:00
|
1192
|
+
# # 2001-01-01 01:00:00
|
1193
|
+
# # ]
|
1153
1194
|
def truncate(every)
|
1154
1195
|
super
|
1155
1196
|
end
|
@@ -1185,6 +1226,52 @@ module Polars
|
|
1185
1226
|
# @note
|
1186
1227
|
# This functionality is currently experimental and may
|
1187
1228
|
# change without it being considered a breaking change.
|
1229
|
+
#
|
1230
|
+
# @example
|
1231
|
+
# start = Time.utc(2001, 1, 1)
|
1232
|
+
# stop = Time.utc(2001, 1, 2)
|
1233
|
+
# s = Polars.datetime_range(
|
1234
|
+
# start, stop, "165m", eager: true
|
1235
|
+
# ).alias("datetime")
|
1236
|
+
# s.dt.round("1h")
|
1237
|
+
# # =>
|
1238
|
+
# # shape: (9,)
|
1239
|
+
# # Series: 'datetime' [datetime[ns]]
|
1240
|
+
# # [
|
1241
|
+
# # 2001-01-01 00:00:00
|
1242
|
+
# # 2001-01-01 03:00:00
|
1243
|
+
# # 2001-01-01 06:00:00
|
1244
|
+
# # 2001-01-01 08:00:00
|
1245
|
+
# # 2001-01-01 11:00:00
|
1246
|
+
# # 2001-01-01 14:00:00
|
1247
|
+
# # 2001-01-01 17:00:00
|
1248
|
+
# # 2001-01-01 19:00:00
|
1249
|
+
# # 2001-01-01 22:00:00
|
1250
|
+
# # ]
|
1251
|
+
#
|
1252
|
+
# @example
|
1253
|
+
# round_str = s.dt.round("1h")
|
1254
|
+
# round_td = s.dt.round("1h")
|
1255
|
+
# round_str.equals(round_td)
|
1256
|
+
# # => true
|
1257
|
+
#
|
1258
|
+
# @example
|
1259
|
+
# start = Time.utc(2001, 1, 1)
|
1260
|
+
# stop = Time.utc(2001, 1, 1, 1)
|
1261
|
+
# s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
|
1262
|
+
# s.dt.round("30m")
|
1263
|
+
# # =>
|
1264
|
+
# # shape: (7,)
|
1265
|
+
# # Series: 'datetime' [datetime[ns]]
|
1266
|
+
# # [
|
1267
|
+
# # 2001-01-01 00:00:00
|
1268
|
+
# # 2001-01-01 00:00:00
|
1269
|
+
# # 2001-01-01 00:30:00
|
1270
|
+
# # 2001-01-01 00:30:00
|
1271
|
+
# # 2001-01-01 00:30:00
|
1272
|
+
# # 2001-01-01 01:00:00
|
1273
|
+
# # 2001-01-01 01:00:00
|
1274
|
+
# # ]
|
1188
1275
|
def round(every)
|
1189
1276
|
super
|
1190
1277
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -411,6 +411,26 @@ module Polars
|
|
411
411
|
# Add a prefix to the root column name of the expression.
|
412
412
|
#
|
413
413
|
# @return [Expr]
|
414
|
+
#
|
415
|
+
# @example
|
416
|
+
# df = Polars::DataFrame.new(
|
417
|
+
# {
|
418
|
+
# "a" => [1, 2, 3],
|
419
|
+
# "b" => ["x", "y", "z"]
|
420
|
+
# }
|
421
|
+
# )
|
422
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
423
|
+
# # =>
|
424
|
+
# # shape: (3, 4)
|
425
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
426
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
427
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
428
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
429
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
430
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
431
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
432
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
433
|
+
# # └─────┴─────┴───────────┴───────────┘
|
414
434
|
def prefix(prefix)
|
415
435
|
name.prefix(prefix)
|
416
436
|
end
|
@@ -418,6 +438,26 @@ module Polars
|
|
418
438
|
# Add a suffix to the root column name of the expression.
|
419
439
|
#
|
420
440
|
# @return [Expr]
|
441
|
+
#
|
442
|
+
# @example
|
443
|
+
# df = Polars::DataFrame.new(
|
444
|
+
# {
|
445
|
+
# "a" => [1, 2, 3],
|
446
|
+
# "b" => ["x", "y", "z"]
|
447
|
+
# }
|
448
|
+
# )
|
449
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
450
|
+
# # =>
|
451
|
+
# # shape: (3, 4)
|
452
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
453
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
454
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
455
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
456
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
457
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
458
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
459
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
460
|
+
# # └─────┴─────┴───────────┴───────────┘
|
421
461
|
def suffix(suffix)
|
422
462
|
name.suffix(suffix)
|
423
463
|
end
|
@@ -1863,6 +1903,35 @@ module Polars
|
|
1863
1903
|
# Reverse the selection.
|
1864
1904
|
#
|
1865
1905
|
# @return [Expr]
|
1906
|
+
#
|
1907
|
+
# @example
|
1908
|
+
# df = Polars::DataFrame.new(
|
1909
|
+
# {
|
1910
|
+
# "A" => [1, 2, 3, 4, 5],
|
1911
|
+
# "fruits" => ["banana", "banana", "apple", "apple", "banana"],
|
1912
|
+
# "B" => [5, 4, 3, 2, 1],
|
1913
|
+
# "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
|
1914
|
+
# }
|
1915
|
+
# )
|
1916
|
+
# df.select(
|
1917
|
+
# [
|
1918
|
+
# Polars.all,
|
1919
|
+
# Polars.all.reverse.name.suffix("_reverse")
|
1920
|
+
# ]
|
1921
|
+
# )
|
1922
|
+
# # =>
|
1923
|
+
# # shape: (5, 8)
|
1924
|
+
# # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
|
1925
|
+
# # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
|
1926
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1927
|
+
# # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
|
1928
|
+
# # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
|
1929
|
+
# # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
|
1930
|
+
# # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
|
1931
|
+
# # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
|
1932
|
+
# # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
|
1933
|
+
# # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
|
1934
|
+
# # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
|
1866
1935
|
def reverse
|
1867
1936
|
_from_rbexpr(_rbexpr.reverse)
|
1868
1937
|
end
|
@@ -2825,7 +2894,7 @@ module Polars
|
|
2825
2894
|
# # ╞══════╪════════╡
|
2826
2895
|
# # │ 1 ┆ 0 │
|
2827
2896
|
# # └──────┴────────┘
|
2828
|
-
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2897
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
|
2829
2898
|
# if !return_dtype.nil?
|
2830
2899
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2831
2900
|
# end
|
@@ -2835,7 +2904,8 @@ module Polars
|
|
2835
2904
|
# f,
|
2836
2905
|
# return_dtype,
|
2837
2906
|
# agg_list,
|
2838
|
-
# is_elementwise
|
2907
|
+
# is_elementwise,
|
2908
|
+
# returns_scalar
|
2839
2909
|
# )
|
2840
2910
|
# )
|
2841
2911
|
# end
|
@@ -3071,6 +3141,21 @@ module Polars
|
|
3071
3141
|
# Number of rows to return.
|
3072
3142
|
#
|
3073
3143
|
# @return [Expr]
|
3144
|
+
#
|
3145
|
+
# @example
|
3146
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
3147
|
+
# df.select(Polars.col("foo").limit(3))
|
3148
|
+
# # =>
|
3149
|
+
# # shape: (3, 1)
|
3150
|
+
# # ┌─────┐
|
3151
|
+
# # │ foo │
|
3152
|
+
# # │ --- │
|
3153
|
+
# # │ i64 │
|
3154
|
+
# # ╞═════╡
|
3155
|
+
# # │ 1 │
|
3156
|
+
# # │ 2 │
|
3157
|
+
# # │ 3 │
|
3158
|
+
# # └─────┘
|
3074
3159
|
def limit(n = 10)
|
3075
3160
|
head(n)
|
3076
3161
|
end
|
@@ -5601,6 +5686,22 @@ module Polars
|
|
5601
5686
|
# If false, the calculations are corrected for statistical bias.
|
5602
5687
|
#
|
5603
5688
|
# @return [Expr]
|
5689
|
+
#
|
5690
|
+
# @example
|
5691
|
+
# df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
|
5692
|
+
# df.select(Polars.col("a").rolling_skew(3))
|
5693
|
+
# # =>
|
5694
|
+
# # shape: (4, 1)
|
5695
|
+
# # ┌──────────┐
|
5696
|
+
# # │ a │
|
5697
|
+
# # │ --- │
|
5698
|
+
# # │ f64 │
|
5699
|
+
# # ╞══════════╡
|
5700
|
+
# # │ null │
|
5701
|
+
# # │ null │
|
5702
|
+
# # │ 0.381802 │
|
5703
|
+
# # │ 0.47033 │
|
5704
|
+
# # └──────────┘
|
5604
5705
|
def rolling_skew(window_size, bias: true)
|
5605
5706
|
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5606
5707
|
end
|
@@ -86,8 +86,57 @@ module Polars
|
|
86
86
|
# Concat the arrays in a Series dtype List in linear time.
|
87
87
|
#
|
88
88
|
# @return [Expr]
|
89
|
-
|
90
|
-
|
89
|
+
#
|
90
|
+
# @example Concatenate two existing list columns. Null values are propagated.
|
91
|
+
# df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
|
92
|
+
# df.with_columns(concat_list: Polars.concat_list("a", "b"))
|
93
|
+
# # =>
|
94
|
+
# # shape: (3, 3)
|
95
|
+
# # ┌───────────┬───────────┬─────────────┐
|
96
|
+
# # │ a ┆ b ┆ concat_list │
|
97
|
+
# # │ --- ┆ --- ┆ --- │
|
98
|
+
# # │ list[i64] ┆ list[i64] ┆ list[i64] │
|
99
|
+
# # ╞═══════════╪═══════════╪═════════════╡
|
100
|
+
# # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
|
101
|
+
# # │ [3] ┆ [] ┆ [3] │
|
102
|
+
# # │ [4, 5] ┆ null ┆ null │
|
103
|
+
# # └───────────┴───────────┴─────────────┘
|
104
|
+
#
|
105
|
+
# @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
|
106
|
+
# df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
|
107
|
+
# # =>
|
108
|
+
# # shape: (3, 2)
|
109
|
+
# # ┌───────────┬─────────────────┐
|
110
|
+
# # │ a ┆ concat_list │
|
111
|
+
# # │ --- ┆ --- │
|
112
|
+
# # │ list[i64] ┆ list[str] │
|
113
|
+
# # ╞═══════════╪═════════════════╡
|
114
|
+
# # │ [1, 2] ┆ ["1", "2", "x"] │
|
115
|
+
# # │ [3] ┆ ["3", "x"] │
|
116
|
+
# # │ [4, 5] ┆ ["4", "5", "x"] │
|
117
|
+
# # └───────────┴─────────────────┘
|
118
|
+
#
|
119
|
+
# @example Create lagged columns and collect them into a list. This mimics a rolling window.
|
120
|
+
# df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
|
121
|
+
# df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
|
122
|
+
# df.select(
|
123
|
+
# Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
|
124
|
+
# )
|
125
|
+
# # =>
|
126
|
+
# # shape: (5, 1)
|
127
|
+
# # ┌───────────────────┐
|
128
|
+
# # │ A_rolling │
|
129
|
+
# # │ --- │
|
130
|
+
# # │ list[f64] │
|
131
|
+
# # ╞═══════════════════╡
|
132
|
+
# # │ [null, null, 1.0] │
|
133
|
+
# # │ [null, 1.0, 2.0] │
|
134
|
+
# # │ [1.0, 2.0, 9.0] │
|
135
|
+
# # │ [2.0, 9.0, 2.0] │
|
136
|
+
# # │ [9.0, 2.0, 13.0] │
|
137
|
+
# # └───────────────────┘
|
138
|
+
def concat_list(exprs, *more_exprs)
|
139
|
+
exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
|
91
140
|
Utils.wrap_expr(Plr.concat_list(exprs))
|
92
141
|
end
|
93
142
|
|
data/lib/polars/functions/col.rb
CHANGED
@@ -23,7 +23,7 @@ module Polars
|
|
23
23
|
Utils.wrap_expr(Plr.col(name.to_s))
|
24
24
|
elsif Utils.is_polars_dtype(name)
|
25
25
|
Utils.wrap_expr(Plr.dtype_cols([name]))
|
26
|
-
elsif name.is_a?(::Array)
|
26
|
+
elsif name.is_a?(::Array) || name.is_a?(::Set)
|
27
27
|
names = Array(name)
|
28
28
|
if names.empty?
|
29
29
|
return Utils.wrap_expr(Plr.cols(names))
|
@@ -127,7 +127,7 @@ module Polars
|
|
127
127
|
# af1, af2, af3 = Polars.align_frames(
|
128
128
|
# df1, df2, df3, on: "dt", select: ["x", "y"]
|
129
129
|
# )
|
130
|
-
# (af1 * af2 * af3).fill_null(0).select(Polars.
|
130
|
+
# (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
|
131
131
|
# # =>
|
132
132
|
# # shape: (3, 1)
|
133
133
|
# # ┌───────┐
|
@@ -136,9 +136,7 @@ module Polars
|
|
136
136
|
# # │ f64 │
|
137
137
|
# # ╞═══════╡
|
138
138
|
# # │ 0.0 │
|
139
|
-
# # ├╌╌╌╌╌╌╌┤
|
140
139
|
# # │ 167.5 │
|
141
|
-
# # ├╌╌╌╌╌╌╌┤
|
142
140
|
# # │ 47.0 │
|
143
141
|
# # └───────┘
|
144
142
|
def align_frames(
|