polars-df 0.14.0-x86_64-darwin → 0.16.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +24956 -14152
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,28 @@
|
|
1
|
+
module Polars
|
2
|
+
class DataTypeGroup < Set
|
3
|
+
end
|
4
|
+
|
5
|
+
SIGNED_INTEGER_DTYPES = DataTypeGroup.new(
|
6
|
+
[
|
7
|
+
Int8,
|
8
|
+
Int16,
|
9
|
+
Int32,
|
10
|
+
Int64
|
11
|
+
]
|
12
|
+
)
|
13
|
+
UNSIGNED_INTEGER_DTYPES = DataTypeGroup.new(
|
14
|
+
[
|
15
|
+
UInt8,
|
16
|
+
UInt16,
|
17
|
+
UInt32,
|
18
|
+
UInt64
|
19
|
+
]
|
20
|
+
)
|
21
|
+
INTEGER_DTYPES = (
|
22
|
+
SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
|
23
|
+
)
|
24
|
+
FLOAT_DTYPES = DataTypeGroup.new([Float32, Float64])
|
25
|
+
NUMERIC_DTYPES = DataTypeGroup.new(
|
26
|
+
FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
|
27
|
+
)
|
28
|
+
end
|
data/lib/polars/data_types.rb
CHANGED
@@ -292,6 +292,8 @@ module Polars
|
|
292
292
|
|
293
293
|
# A categorical encoding of a set of strings.
|
294
294
|
class Categorical < DataType
|
295
|
+
attr_reader :ordering
|
296
|
+
|
295
297
|
def initialize(ordering = "physical")
|
296
298
|
@ordering = ordering
|
297
299
|
end
|
@@ -309,7 +311,7 @@ module Polars
|
|
309
311
|
end
|
310
312
|
|
311
313
|
if categories.empty?
|
312
|
-
|
314
|
+
@categories = Series.new("category", [], dtype: String)
|
313
315
|
return
|
314
316
|
end
|
315
317
|
|
@@ -269,6 +269,50 @@ module Polars
|
|
269
269
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
270
270
|
#
|
271
271
|
# @return [Expr]
|
272
|
+
#
|
273
|
+
# @example
|
274
|
+
# df = Polars::DataFrame.new(
|
275
|
+
# {
|
276
|
+
# "datetime" => [
|
277
|
+
# Time.utc(2020, 3, 1),
|
278
|
+
# Time.utc(2020, 4, 1),
|
279
|
+
# Time.utc(2020, 5, 1)
|
280
|
+
# ]
|
281
|
+
# }
|
282
|
+
# )
|
283
|
+
# df.with_columns(
|
284
|
+
# Polars.col("datetime")
|
285
|
+
# .dt.strftime("%Y/%m/%d %H:%M:%S")
|
286
|
+
# .alias("datetime_string")
|
287
|
+
# )
|
288
|
+
# # =>
|
289
|
+
# # shape: (3, 2)
|
290
|
+
# # ┌─────────────────────┬─────────────────────┐
|
291
|
+
# # │ datetime ┆ datetime_string │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ datetime[ns] ┆ str │
|
294
|
+
# # ╞═════════════════════╪═════════════════════╡
|
295
|
+
# # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
|
296
|
+
# # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
|
297
|
+
# # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
|
298
|
+
# # └─────────────────────┴─────────────────────┘
|
299
|
+
#
|
300
|
+
# @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
|
301
|
+
# df.with_columns(
|
302
|
+
# day_name: Polars.col("datetime").dt.strftime("%A"),
|
303
|
+
# month_name: Polars.col("datetime").dt.strftime("%B")
|
304
|
+
# )
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 3)
|
307
|
+
# # ┌─────────────────────┬───────────┬────────────┐
|
308
|
+
# # │ datetime ┆ day_name ┆ month_name │
|
309
|
+
# # │ --- ┆ --- ┆ --- │
|
310
|
+
# # │ datetime[ns] ┆ str ┆ str │
|
311
|
+
# # ╞═════════════════════╪═══════════╪════════════╡
|
312
|
+
# # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
|
313
|
+
# # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
|
314
|
+
# # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
|
315
|
+
# # └─────────────────────┴───────────┴────────────┘
|
272
316
|
def strftime(fmt)
|
273
317
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
274
318
|
end
|
@@ -574,6 +618,29 @@ module Polars
|
|
574
618
|
# Date
|
575
619
|
#
|
576
620
|
# @return [Expr]
|
621
|
+
#
|
622
|
+
# @example
|
623
|
+
# df = Polars::DataFrame.new(
|
624
|
+
# {
|
625
|
+
# "datetime" => [
|
626
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
627
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
628
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
|
629
|
+
# ]
|
630
|
+
# }
|
631
|
+
# )
|
632
|
+
# df.with_columns(Polars.col("datetime").dt.date.alias("date"))
|
633
|
+
# # =>
|
634
|
+
# # shape: (3, 2)
|
635
|
+
# # ┌─────────────────────────┬────────────┐
|
636
|
+
# # │ datetime ┆ date │
|
637
|
+
# # │ --- ┆ --- │
|
638
|
+
# # │ datetime[ns] ┆ date │
|
639
|
+
# # ╞═════════════════════════╪════════════╡
|
640
|
+
# # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
|
641
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
|
642
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
|
643
|
+
# # └─────────────────────────┴────────────┘
|
577
644
|
def date
|
578
645
|
Utils.wrap_expr(_rbexpr.dt_date)
|
579
646
|
end
|
@@ -732,6 +799,34 @@ module Polars
|
|
732
799
|
# Applies to Datetime columns.
|
733
800
|
#
|
734
801
|
# @return [Expr]
|
802
|
+
#
|
803
|
+
# @example
|
804
|
+
# df = Polars::DataFrame.new(
|
805
|
+
# {
|
806
|
+
# "datetime": [
|
807
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
808
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
809
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
810
|
+
# ]
|
811
|
+
# }
|
812
|
+
# )
|
813
|
+
# df.with_columns(
|
814
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
815
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
816
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
817
|
+
# Polars.col("datetime").dt.millisecond.alias("millisecond")
|
818
|
+
# )
|
819
|
+
# # =>
|
820
|
+
# # shape: (3, 5)
|
821
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
822
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
|
823
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
824
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
825
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
826
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
827
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
|
828
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
|
829
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
735
830
|
def millisecond
|
736
831
|
Utils.wrap_expr(_rbexpr.dt_millisecond)
|
737
832
|
end
|
@@ -741,6 +836,34 @@ module Polars
|
|
741
836
|
# Applies to Datetime columns.
|
742
837
|
#
|
743
838
|
# @return [Expr]
|
839
|
+
#
|
840
|
+
# @example
|
841
|
+
# df = Polars::DataFrame.new(
|
842
|
+
# {
|
843
|
+
# "datetime": [
|
844
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
845
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
846
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
847
|
+
# ]
|
848
|
+
# }
|
849
|
+
# )
|
850
|
+
# df.with_columns(
|
851
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
852
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
853
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
854
|
+
# Polars.col("datetime").dt.microsecond.alias("microsecond")
|
855
|
+
# )
|
856
|
+
# # =>
|
857
|
+
# # shape: (3, 5)
|
858
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
859
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
|
860
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
861
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
862
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
863
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
864
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
|
865
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
|
866
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
744
867
|
def microsecond
|
745
868
|
Utils.wrap_expr(_rbexpr.dt_microsecond)
|
746
869
|
end
|
@@ -750,6 +873,34 @@ module Polars
|
|
750
873
|
# Applies to Datetime columns.
|
751
874
|
#
|
752
875
|
# @return [Expr]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# df = Polars::DataFrame.new(
|
879
|
+
# {
|
880
|
+
# "datetime": [
|
881
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
882
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
883
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
884
|
+
# ]
|
885
|
+
# }
|
886
|
+
# )
|
887
|
+
# df.with_columns(
|
888
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
889
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
890
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
891
|
+
# Polars.col("datetime").dt.nanosecond.alias("nanosecond")
|
892
|
+
# )
|
893
|
+
# # =>
|
894
|
+
# # shape: (3, 5)
|
895
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
|
896
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
|
897
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
898
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
899
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
|
900
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
901
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
|
902
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
|
903
|
+
# # └─────────────────────────┴──────┴────────┴────────┴────────────┘
|
753
904
|
def nanosecond
|
754
905
|
Utils.wrap_expr(_rbexpr.dt_nanosecond)
|
755
906
|
end
|
@@ -835,6 +986,34 @@ module Polars
|
|
835
986
|
# Time unit for the `Datetime` Series.
|
836
987
|
#
|
837
988
|
# @return [Expr]
|
989
|
+
#
|
990
|
+
# @example
|
991
|
+
# df = Polars::DataFrame.new(
|
992
|
+
# {
|
993
|
+
# "date" => Polars.datetime_range(
|
994
|
+
# Time.utc(2001, 1, 1),
|
995
|
+
# Time.utc(2001, 1, 3),
|
996
|
+
# "1d",
|
997
|
+
# time_unit: "ns",
|
998
|
+
# eager: true
|
999
|
+
# )
|
1000
|
+
# }
|
1001
|
+
# )
|
1002
|
+
# df.select(
|
1003
|
+
# Polars.col("date"),
|
1004
|
+
# Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
|
1005
|
+
# )
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (3, 2)
|
1008
|
+
# # ┌─────────────────────┬───────────────────────┐
|
1009
|
+
# # │ date ┆ time_unit_us │
|
1010
|
+
# # │ --- ┆ --- │
|
1011
|
+
# # │ datetime[ns] ┆ datetime[μs] │
|
1012
|
+
# # ╞═════════════════════╪═══════════════════════╡
|
1013
|
+
# # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
|
1014
|
+
# # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
|
1015
|
+
# # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
|
1016
|
+
# # └─────────────────────┴───────────────────────┘
|
838
1017
|
def with_time_unit(time_unit)
|
839
1018
|
Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
|
840
1019
|
end
|
@@ -931,6 +1110,71 @@ module Polars
|
|
931
1110
|
# Determine how to deal with non-existent datetimes.
|
932
1111
|
#
|
933
1112
|
# @return [Expr]
|
1113
|
+
#
|
1114
|
+
# @example
|
1115
|
+
# df = Polars::DataFrame.new(
|
1116
|
+
# {
|
1117
|
+
# "london_timezone": Polars.datetime_range(
|
1118
|
+
# Time.utc(2020, 3, 1),
|
1119
|
+
# Time.utc(2020, 7, 1),
|
1120
|
+
# "1mo",
|
1121
|
+
# time_zone: "UTC",
|
1122
|
+
# eager: true,
|
1123
|
+
# ).dt.convert_time_zone("Europe/London")
|
1124
|
+
# }
|
1125
|
+
# )
|
1126
|
+
# df.select(
|
1127
|
+
# [
|
1128
|
+
# Polars.col("london_timezone"),
|
1129
|
+
# Polars.col("london_timezone")
|
1130
|
+
# .dt.replace_time_zone("Europe/Amsterdam")
|
1131
|
+
# .alias("London_to_Amsterdam")
|
1132
|
+
# ]
|
1133
|
+
# )
|
1134
|
+
# # =>
|
1135
|
+
# # shape: (5, 2)
|
1136
|
+
# # ┌─────────────────────────────┬────────────────────────────────┐
|
1137
|
+
# # │ london_timezone ┆ London_to_Amsterdam │
|
1138
|
+
# # │ --- ┆ --- │
|
1139
|
+
# # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
|
1140
|
+
# # ╞═════════════════════════════╪════════════════════════════════╡
|
1141
|
+
# # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
|
1142
|
+
# # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
|
1143
|
+
# # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
|
1144
|
+
# # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
|
1145
|
+
# # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
|
1146
|
+
# # └─────────────────────────────┴────────────────────────────────┘
|
1147
|
+
#
|
1148
|
+
# @example You can use `ambiguous` to deal with ambiguous datetimes:
|
1149
|
+
# dates = [
|
1150
|
+
# "2018-10-28 01:30",
|
1151
|
+
# "2018-10-28 02:00",
|
1152
|
+
# "2018-10-28 02:30",
|
1153
|
+
# "2018-10-28 02:00"
|
1154
|
+
# ]
|
1155
|
+
# df = Polars::DataFrame.new(
|
1156
|
+
# {
|
1157
|
+
# "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
|
1158
|
+
# "ambiguous" => ["earliest", "earliest", "latest", "latest"]
|
1159
|
+
# }
|
1160
|
+
# )
|
1161
|
+
# df.with_columns(
|
1162
|
+
# ts_localized: Polars.col("ts").dt.replace_time_zone(
|
1163
|
+
# "Europe/Brussels", ambiguous: Polars.col("ambiguous")
|
1164
|
+
# )
|
1165
|
+
# )
|
1166
|
+
# # =>
|
1167
|
+
# # shape: (4, 3)
|
1168
|
+
# # ┌─────────────────────┬───────────┬───────────────────────────────┐
|
1169
|
+
# # │ ts ┆ ambiguous ┆ ts_localized │
|
1170
|
+
# # │ --- ┆ --- ┆ --- │
|
1171
|
+
# # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
|
1172
|
+
# # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
|
1173
|
+
# # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
|
1174
|
+
# # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
|
1175
|
+
# # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
|
1176
|
+
# # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
|
1177
|
+
# # └─────────────────────┴───────────┴───────────────────────────────┘
|
934
1178
|
def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
|
935
1179
|
unless ambiguous.is_a?(Expr)
|
936
1180
|
ambiguous = Polars.lit(ambiguous)
|
@@ -1150,6 +1150,47 @@ module Polars
|
|
1150
1150
|
# Every interval start and period length.
|
1151
1151
|
#
|
1152
1152
|
# @return [Series]
|
1153
|
+
#
|
1154
|
+
# @example
|
1155
|
+
# s = Polars.datetime_range(
|
1156
|
+
# Time.utc(2001, 1, 1),
|
1157
|
+
# Time.utc(2001, 1, 2),
|
1158
|
+
# "165m",
|
1159
|
+
# eager: true
|
1160
|
+
# ).alias("datetime")
|
1161
|
+
# s.dt.truncate("1h")
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (9,)
|
1164
|
+
# # Series: 'datetime' [datetime[ns]]
|
1165
|
+
# # [
|
1166
|
+
# # 2001-01-01 00:00:00
|
1167
|
+
# # 2001-01-01 02:00:00
|
1168
|
+
# # 2001-01-01 05:00:00
|
1169
|
+
# # 2001-01-01 08:00:00
|
1170
|
+
# # 2001-01-01 11:00:00
|
1171
|
+
# # 2001-01-01 13:00:00
|
1172
|
+
# # 2001-01-01 16:00:00
|
1173
|
+
# # 2001-01-01 19:00:00
|
1174
|
+
# # 2001-01-01 22:00:00
|
1175
|
+
# # ]
|
1176
|
+
#
|
1177
|
+
# @example
|
1178
|
+
# s = Polars.datetime_range(
|
1179
|
+
# Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
|
1180
|
+
# ).alias("datetime")
|
1181
|
+
# s.dt.truncate("30m")
|
1182
|
+
# # =>
|
1183
|
+
# # shape: (7,)
|
1184
|
+
# # Series: 'datetime' [datetime[ns]]
|
1185
|
+
# # [
|
1186
|
+
# # 2001-01-01 00:00:00
|
1187
|
+
# # 2001-01-01 00:00:00
|
1188
|
+
# # 2001-01-01 00:00:00
|
1189
|
+
# # 2001-01-01 00:30:00
|
1190
|
+
# # 2001-01-01 00:30:00
|
1191
|
+
# # 2001-01-01 00:30:00
|
1192
|
+
# # 2001-01-01 01:00:00
|
1193
|
+
# # ]
|
1153
1194
|
def truncate(every)
|
1154
1195
|
super
|
1155
1196
|
end
|
@@ -1185,6 +1226,52 @@ module Polars
|
|
1185
1226
|
# @note
|
1186
1227
|
# This functionality is currently experimental and may
|
1187
1228
|
# change without it being considered a breaking change.
|
1229
|
+
#
|
1230
|
+
# @example
|
1231
|
+
# start = Time.utc(2001, 1, 1)
|
1232
|
+
# stop = Time.utc(2001, 1, 2)
|
1233
|
+
# s = Polars.datetime_range(
|
1234
|
+
# start, stop, "165m", eager: true
|
1235
|
+
# ).alias("datetime")
|
1236
|
+
# s.dt.round("1h")
|
1237
|
+
# # =>
|
1238
|
+
# # shape: (9,)
|
1239
|
+
# # Series: 'datetime' [datetime[ns]]
|
1240
|
+
# # [
|
1241
|
+
# # 2001-01-01 00:00:00
|
1242
|
+
# # 2001-01-01 03:00:00
|
1243
|
+
# # 2001-01-01 06:00:00
|
1244
|
+
# # 2001-01-01 08:00:00
|
1245
|
+
# # 2001-01-01 11:00:00
|
1246
|
+
# # 2001-01-01 14:00:00
|
1247
|
+
# # 2001-01-01 17:00:00
|
1248
|
+
# # 2001-01-01 19:00:00
|
1249
|
+
# # 2001-01-01 22:00:00
|
1250
|
+
# # ]
|
1251
|
+
#
|
1252
|
+
# @example
|
1253
|
+
# round_str = s.dt.round("1h")
|
1254
|
+
# round_td = s.dt.round("1h")
|
1255
|
+
# round_str.equals(round_td)
|
1256
|
+
# # => true
|
1257
|
+
#
|
1258
|
+
# @example
|
1259
|
+
# start = Time.utc(2001, 1, 1)
|
1260
|
+
# stop = Time.utc(2001, 1, 1, 1)
|
1261
|
+
# s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
|
1262
|
+
# s.dt.round("30m")
|
1263
|
+
# # =>
|
1264
|
+
# # shape: (7,)
|
1265
|
+
# # Series: 'datetime' [datetime[ns]]
|
1266
|
+
# # [
|
1267
|
+
# # 2001-01-01 00:00:00
|
1268
|
+
# # 2001-01-01 00:00:00
|
1269
|
+
# # 2001-01-01 00:30:00
|
1270
|
+
# # 2001-01-01 00:30:00
|
1271
|
+
# # 2001-01-01 00:30:00
|
1272
|
+
# # 2001-01-01 01:00:00
|
1273
|
+
# # 2001-01-01 01:00:00
|
1274
|
+
# # ]
|
1188
1275
|
def round(every)
|
1189
1276
|
super
|
1190
1277
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -411,6 +411,26 @@ module Polars
|
|
411
411
|
# Add a prefix to the root column name of the expression.
|
412
412
|
#
|
413
413
|
# @return [Expr]
|
414
|
+
#
|
415
|
+
# @example
|
416
|
+
# df = Polars::DataFrame.new(
|
417
|
+
# {
|
418
|
+
# "a" => [1, 2, 3],
|
419
|
+
# "b" => ["x", "y", "z"]
|
420
|
+
# }
|
421
|
+
# )
|
422
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
423
|
+
# # =>
|
424
|
+
# # shape: (3, 4)
|
425
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
426
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
427
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
428
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
429
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
430
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
431
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
432
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
433
|
+
# # └─────┴─────┴───────────┴───────────┘
|
414
434
|
def prefix(prefix)
|
415
435
|
name.prefix(prefix)
|
416
436
|
end
|
@@ -418,6 +438,26 @@ module Polars
|
|
418
438
|
# Add a suffix to the root column name of the expression.
|
419
439
|
#
|
420
440
|
# @return [Expr]
|
441
|
+
#
|
442
|
+
# @example
|
443
|
+
# df = Polars::DataFrame.new(
|
444
|
+
# {
|
445
|
+
# "a" => [1, 2, 3],
|
446
|
+
# "b" => ["x", "y", "z"]
|
447
|
+
# }
|
448
|
+
# )
|
449
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
450
|
+
# # =>
|
451
|
+
# # shape: (3, 4)
|
452
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
453
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
454
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
455
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
456
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
457
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
458
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
459
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
460
|
+
# # └─────┴─────┴───────────┴───────────┘
|
421
461
|
def suffix(suffix)
|
422
462
|
name.suffix(suffix)
|
423
463
|
end
|
@@ -1863,6 +1903,35 @@ module Polars
|
|
1863
1903
|
# Reverse the selection.
|
1864
1904
|
#
|
1865
1905
|
# @return [Expr]
|
1906
|
+
#
|
1907
|
+
# @example
|
1908
|
+
# df = Polars::DataFrame.new(
|
1909
|
+
# {
|
1910
|
+
# "A" => [1, 2, 3, 4, 5],
|
1911
|
+
# "fruits" => ["banana", "banana", "apple", "apple", "banana"],
|
1912
|
+
# "B" => [5, 4, 3, 2, 1],
|
1913
|
+
# "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
|
1914
|
+
# }
|
1915
|
+
# )
|
1916
|
+
# df.select(
|
1917
|
+
# [
|
1918
|
+
# Polars.all,
|
1919
|
+
# Polars.all.reverse.name.suffix("_reverse")
|
1920
|
+
# ]
|
1921
|
+
# )
|
1922
|
+
# # =>
|
1923
|
+
# # shape: (5, 8)
|
1924
|
+
# # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
|
1925
|
+
# # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
|
1926
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1927
|
+
# # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
|
1928
|
+
# # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
|
1929
|
+
# # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
|
1930
|
+
# # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
|
1931
|
+
# # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
|
1932
|
+
# # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
|
1933
|
+
# # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
|
1934
|
+
# # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
|
1866
1935
|
def reverse
|
1867
1936
|
_from_rbexpr(_rbexpr.reverse)
|
1868
1937
|
end
|
@@ -2825,7 +2894,7 @@ module Polars
|
|
2825
2894
|
# # ╞══════╪════════╡
|
2826
2895
|
# # │ 1 ┆ 0 │
|
2827
2896
|
# # └──────┴────────┘
|
2828
|
-
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2897
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
|
2829
2898
|
# if !return_dtype.nil?
|
2830
2899
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2831
2900
|
# end
|
@@ -2835,7 +2904,8 @@ module Polars
|
|
2835
2904
|
# f,
|
2836
2905
|
# return_dtype,
|
2837
2906
|
# agg_list,
|
2838
|
-
# is_elementwise
|
2907
|
+
# is_elementwise,
|
2908
|
+
# returns_scalar
|
2839
2909
|
# )
|
2840
2910
|
# )
|
2841
2911
|
# end
|
@@ -3071,6 +3141,21 @@ module Polars
|
|
3071
3141
|
# Number of rows to return.
|
3072
3142
|
#
|
3073
3143
|
# @return [Expr]
|
3144
|
+
#
|
3145
|
+
# @example
|
3146
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
3147
|
+
# df.select(Polars.col("foo").limit(3))
|
3148
|
+
# # =>
|
3149
|
+
# # shape: (3, 1)
|
3150
|
+
# # ┌─────┐
|
3151
|
+
# # │ foo │
|
3152
|
+
# # │ --- │
|
3153
|
+
# # │ i64 │
|
3154
|
+
# # ╞═════╡
|
3155
|
+
# # │ 1 │
|
3156
|
+
# # │ 2 │
|
3157
|
+
# # │ 3 │
|
3158
|
+
# # └─────┘
|
3074
3159
|
def limit(n = 10)
|
3075
3160
|
head(n)
|
3076
3161
|
end
|
@@ -5601,6 +5686,22 @@ module Polars
|
|
5601
5686
|
# If false, the calculations are corrected for statistical bias.
|
5602
5687
|
#
|
5603
5688
|
# @return [Expr]
|
5689
|
+
#
|
5690
|
+
# @example
|
5691
|
+
# df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
|
5692
|
+
# df.select(Polars.col("a").rolling_skew(3))
|
5693
|
+
# # =>
|
5694
|
+
# # shape: (4, 1)
|
5695
|
+
# # ┌──────────┐
|
5696
|
+
# # │ a │
|
5697
|
+
# # │ --- │
|
5698
|
+
# # │ f64 │
|
5699
|
+
# # ╞══════════╡
|
5700
|
+
# # │ null │
|
5701
|
+
# # │ null │
|
5702
|
+
# # │ 0.381802 │
|
5703
|
+
# # │ 0.47033 │
|
5704
|
+
# # └──────────┘
|
5604
5705
|
def rolling_skew(window_size, bias: true)
|
5605
5706
|
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5606
5707
|
end
|
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|