polars-df 0.14.0-x64-mingw-ucrt → 0.16.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +24369 -14580
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,28 @@
|
|
1
|
+
module Polars
|
2
|
+
class DataTypeGroup < Set
|
3
|
+
end
|
4
|
+
|
5
|
+
SIGNED_INTEGER_DTYPES = DataTypeGroup.new(
|
6
|
+
[
|
7
|
+
Int8,
|
8
|
+
Int16,
|
9
|
+
Int32,
|
10
|
+
Int64
|
11
|
+
]
|
12
|
+
)
|
13
|
+
UNSIGNED_INTEGER_DTYPES = DataTypeGroup.new(
|
14
|
+
[
|
15
|
+
UInt8,
|
16
|
+
UInt16,
|
17
|
+
UInt32,
|
18
|
+
UInt64
|
19
|
+
]
|
20
|
+
)
|
21
|
+
INTEGER_DTYPES = (
|
22
|
+
SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
|
23
|
+
)
|
24
|
+
FLOAT_DTYPES = DataTypeGroup.new([Float32, Float64])
|
25
|
+
NUMERIC_DTYPES = DataTypeGroup.new(
|
26
|
+
FLOAT_DTYPES + INTEGER_DTYPES | [Decimal]
|
27
|
+
)
|
28
|
+
end
|
data/lib/polars/data_types.rb
CHANGED
@@ -292,6 +292,8 @@ module Polars
|
|
292
292
|
|
293
293
|
# A categorical encoding of a set of strings.
|
294
294
|
class Categorical < DataType
|
295
|
+
attr_reader :ordering
|
296
|
+
|
295
297
|
def initialize(ordering = "physical")
|
296
298
|
@ordering = ordering
|
297
299
|
end
|
@@ -309,7 +311,7 @@ module Polars
|
|
309
311
|
end
|
310
312
|
|
311
313
|
if categories.empty?
|
312
|
-
|
314
|
+
@categories = Series.new("category", [], dtype: String)
|
313
315
|
return
|
314
316
|
end
|
315
317
|
|
@@ -269,6 +269,50 @@ module Polars
|
|
269
269
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
270
270
|
#
|
271
271
|
# @return [Expr]
|
272
|
+
#
|
273
|
+
# @example
|
274
|
+
# df = Polars::DataFrame.new(
|
275
|
+
# {
|
276
|
+
# "datetime" => [
|
277
|
+
# Time.utc(2020, 3, 1),
|
278
|
+
# Time.utc(2020, 4, 1),
|
279
|
+
# Time.utc(2020, 5, 1)
|
280
|
+
# ]
|
281
|
+
# }
|
282
|
+
# )
|
283
|
+
# df.with_columns(
|
284
|
+
# Polars.col("datetime")
|
285
|
+
# .dt.strftime("%Y/%m/%d %H:%M:%S")
|
286
|
+
# .alias("datetime_string")
|
287
|
+
# )
|
288
|
+
# # =>
|
289
|
+
# # shape: (3, 2)
|
290
|
+
# # ┌─────────────────────┬─────────────────────┐
|
291
|
+
# # │ datetime ┆ datetime_string │
|
292
|
+
# # │ --- ┆ --- │
|
293
|
+
# # │ datetime[ns] ┆ str │
|
294
|
+
# # ╞═════════════════════╪═════════════════════╡
|
295
|
+
# # │ 2020-03-01 00:00:00 ┆ 2020/03/01 00:00:00 │
|
296
|
+
# # │ 2020-04-01 00:00:00 ┆ 2020/04/01 00:00:00 │
|
297
|
+
# # │ 2020-05-01 00:00:00 ┆ 2020/05/01 00:00:00 │
|
298
|
+
# # └─────────────────────┴─────────────────────┘
|
299
|
+
#
|
300
|
+
# @example If you're interested in the day name / month name, you can use `'%A'` / `'%B'`:
|
301
|
+
# df.with_columns(
|
302
|
+
# day_name: Polars.col("datetime").dt.strftime("%A"),
|
303
|
+
# month_name: Polars.col("datetime").dt.strftime("%B")
|
304
|
+
# )
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 3)
|
307
|
+
# # ┌─────────────────────┬───────────┬────────────┐
|
308
|
+
# # │ datetime ┆ day_name ┆ month_name │
|
309
|
+
# # │ --- ┆ --- ┆ --- │
|
310
|
+
# # │ datetime[ns] ┆ str ┆ str │
|
311
|
+
# # ╞═════════════════════╪═══════════╪════════════╡
|
312
|
+
# # │ 2020-03-01 00:00:00 ┆ Sunday ┆ March │
|
313
|
+
# # │ 2020-04-01 00:00:00 ┆ Wednesday ┆ April │
|
314
|
+
# # │ 2020-05-01 00:00:00 ┆ Friday ┆ May │
|
315
|
+
# # └─────────────────────┴───────────┴────────────┘
|
272
316
|
def strftime(fmt)
|
273
317
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
274
318
|
end
|
@@ -574,6 +618,29 @@ module Polars
|
|
574
618
|
# Date
|
575
619
|
#
|
576
620
|
# @return [Expr]
|
621
|
+
#
|
622
|
+
# @example
|
623
|
+
# df = Polars::DataFrame.new(
|
624
|
+
# {
|
625
|
+
# "datetime" => [
|
626
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
627
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
628
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000)
|
629
|
+
# ]
|
630
|
+
# }
|
631
|
+
# )
|
632
|
+
# df.with_columns(Polars.col("datetime").dt.date.alias("date"))
|
633
|
+
# # =>
|
634
|
+
# # shape: (3, 2)
|
635
|
+
# # ┌─────────────────────────┬────────────┐
|
636
|
+
# # │ datetime ┆ date │
|
637
|
+
# # │ --- ┆ --- │
|
638
|
+
# # │ datetime[ns] ┆ date │
|
639
|
+
# # ╞═════════════════════════╪════════════╡
|
640
|
+
# # │ 1978-01-01 01:01:01 ┆ 1978-01-01 │
|
641
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 2024-10-13 │
|
642
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 2065-01-01 │
|
643
|
+
# # └─────────────────────────┴────────────┘
|
577
644
|
def date
|
578
645
|
Utils.wrap_expr(_rbexpr.dt_date)
|
579
646
|
end
|
@@ -732,6 +799,34 @@ module Polars
|
|
732
799
|
# Applies to Datetime columns.
|
733
800
|
#
|
734
801
|
# @return [Expr]
|
802
|
+
#
|
803
|
+
# @example
|
804
|
+
# df = Polars::DataFrame.new(
|
805
|
+
# {
|
806
|
+
# "datetime": [
|
807
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
808
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
809
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
810
|
+
# ]
|
811
|
+
# }
|
812
|
+
# )
|
813
|
+
# df.with_columns(
|
814
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
815
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
816
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
817
|
+
# Polars.col("datetime").dt.millisecond.alias("millisecond")
|
818
|
+
# )
|
819
|
+
# # =>
|
820
|
+
# # shape: (3, 5)
|
821
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
822
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
|
823
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
824
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
825
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
826
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
827
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500 │
|
828
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60 │
|
829
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
735
830
|
def millisecond
|
736
831
|
Utils.wrap_expr(_rbexpr.dt_millisecond)
|
737
832
|
end
|
@@ -741,6 +836,34 @@ module Polars
|
|
741
836
|
# Applies to Datetime columns.
|
742
837
|
#
|
743
838
|
# @return [Expr]
|
839
|
+
#
|
840
|
+
# @example
|
841
|
+
# df = Polars::DataFrame.new(
|
842
|
+
# {
|
843
|
+
# "datetime": [
|
844
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
845
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
846
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
847
|
+
# ]
|
848
|
+
# }
|
849
|
+
# )
|
850
|
+
# df.with_columns(
|
851
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
852
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
853
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
854
|
+
# Polars.col("datetime").dt.microsecond.alias("microsecond")
|
855
|
+
# )
|
856
|
+
# # =>
|
857
|
+
# # shape: (3, 5)
|
858
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
|
859
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
|
860
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
861
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
862
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
|
863
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
864
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000 │
|
865
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000 │
|
866
|
+
# # └─────────────────────────┴──────┴────────┴────────┴─────────────┘
|
744
867
|
def microsecond
|
745
868
|
Utils.wrap_expr(_rbexpr.dt_microsecond)
|
746
869
|
end
|
@@ -750,6 +873,34 @@ module Polars
|
|
750
873
|
# Applies to Datetime columns.
|
751
874
|
#
|
752
875
|
# @return [Expr]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# df = Polars::DataFrame.new(
|
879
|
+
# {
|
880
|
+
# "datetime": [
|
881
|
+
# Time.utc(1978, 1, 1, 1, 1, 1, 0),
|
882
|
+
# Time.utc(2024, 10, 13, 5, 30, 14, 500_000),
|
883
|
+
# Time.utc(2065, 1, 1, 10, 20, 30, 60_000),
|
884
|
+
# ]
|
885
|
+
# }
|
886
|
+
# )
|
887
|
+
# df.with_columns(
|
888
|
+
# Polars.col("datetime").dt.hour.alias("hour"),
|
889
|
+
# Polars.col("datetime").dt.minute.alias("minute"),
|
890
|
+
# Polars.col("datetime").dt.second.alias("second"),
|
891
|
+
# Polars.col("datetime").dt.nanosecond.alias("nanosecond")
|
892
|
+
# )
|
893
|
+
# # =>
|
894
|
+
# # shape: (3, 5)
|
895
|
+
# # ┌─────────────────────────┬──────┬────────┬────────┬────────────┐
|
896
|
+
# # │ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
|
897
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
898
|
+
# # │ datetime[ns] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
|
899
|
+
# # ╞═════════════════════════╪══════╪════════╪════════╪════════════╡
|
900
|
+
# # │ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
|
901
|
+
# # │ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
|
902
|
+
# # │ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
|
903
|
+
# # └─────────────────────────┴──────┴────────┴────────┴────────────┘
|
753
904
|
def nanosecond
|
754
905
|
Utils.wrap_expr(_rbexpr.dt_nanosecond)
|
755
906
|
end
|
@@ -835,6 +986,34 @@ module Polars
|
|
835
986
|
# Time unit for the `Datetime` Series.
|
836
987
|
#
|
837
988
|
# @return [Expr]
|
989
|
+
#
|
990
|
+
# @example
|
991
|
+
# df = Polars::DataFrame.new(
|
992
|
+
# {
|
993
|
+
# "date" => Polars.datetime_range(
|
994
|
+
# Time.utc(2001, 1, 1),
|
995
|
+
# Time.utc(2001, 1, 3),
|
996
|
+
# "1d",
|
997
|
+
# time_unit: "ns",
|
998
|
+
# eager: true
|
999
|
+
# )
|
1000
|
+
# }
|
1001
|
+
# )
|
1002
|
+
# df.select(
|
1003
|
+
# Polars.col("date"),
|
1004
|
+
# Polars.col("date").dt.with_time_unit("us").alias("time_unit_us")
|
1005
|
+
# )
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (3, 2)
|
1008
|
+
# # ┌─────────────────────┬───────────────────────┐
|
1009
|
+
# # │ date ┆ time_unit_us │
|
1010
|
+
# # │ --- ┆ --- │
|
1011
|
+
# # │ datetime[ns] ┆ datetime[μs] │
|
1012
|
+
# # ╞═════════════════════╪═══════════════════════╡
|
1013
|
+
# # │ 2001-01-01 00:00:00 ┆ +32971-04-28 00:00:00 │
|
1014
|
+
# # │ 2001-01-02 00:00:00 ┆ +32974-01-22 00:00:00 │
|
1015
|
+
# # │ 2001-01-03 00:00:00 ┆ +32976-10-18 00:00:00 │
|
1016
|
+
# # └─────────────────────┴───────────────────────┘
|
838
1017
|
def with_time_unit(time_unit)
|
839
1018
|
Utils.wrap_expr(_rbexpr.dt_with_time_unit(time_unit))
|
840
1019
|
end
|
@@ -931,6 +1110,71 @@ module Polars
|
|
931
1110
|
# Determine how to deal with non-existent datetimes.
|
932
1111
|
#
|
933
1112
|
# @return [Expr]
|
1113
|
+
#
|
1114
|
+
# @example
|
1115
|
+
# df = Polars::DataFrame.new(
|
1116
|
+
# {
|
1117
|
+
# "london_timezone": Polars.datetime_range(
|
1118
|
+
# Time.utc(2020, 3, 1),
|
1119
|
+
# Time.utc(2020, 7, 1),
|
1120
|
+
# "1mo",
|
1121
|
+
# time_zone: "UTC",
|
1122
|
+
# eager: true,
|
1123
|
+
# ).dt.convert_time_zone("Europe/London")
|
1124
|
+
# }
|
1125
|
+
# )
|
1126
|
+
# df.select(
|
1127
|
+
# [
|
1128
|
+
# Polars.col("london_timezone"),
|
1129
|
+
# Polars.col("london_timezone")
|
1130
|
+
# .dt.replace_time_zone("Europe/Amsterdam")
|
1131
|
+
# .alias("London_to_Amsterdam")
|
1132
|
+
# ]
|
1133
|
+
# )
|
1134
|
+
# # =>
|
1135
|
+
# # shape: (5, 2)
|
1136
|
+
# # ┌─────────────────────────────┬────────────────────────────────┐
|
1137
|
+
# # │ london_timezone ┆ London_to_Amsterdam │
|
1138
|
+
# # │ --- ┆ --- │
|
1139
|
+
# # │ datetime[ns, Europe/London] ┆ datetime[ns, Europe/Amsterdam] │
|
1140
|
+
# # ╞═════════════════════════════╪════════════════════════════════╡
|
1141
|
+
# # │ 2020-03-01 00:00:00 GMT ┆ 2020-03-01 00:00:00 CET │
|
1142
|
+
# # │ 2020-04-01 01:00:00 BST ┆ 2020-04-01 01:00:00 CEST │
|
1143
|
+
# # │ 2020-05-01 01:00:00 BST ┆ 2020-05-01 01:00:00 CEST │
|
1144
|
+
# # │ 2020-06-01 01:00:00 BST ┆ 2020-06-01 01:00:00 CEST │
|
1145
|
+
# # │ 2020-07-01 01:00:00 BST ┆ 2020-07-01 01:00:00 CEST │
|
1146
|
+
# # └─────────────────────────────┴────────────────────────────────┘
|
1147
|
+
#
|
1148
|
+
# @example You can use `ambiguous` to deal with ambiguous datetimes:
|
1149
|
+
# dates = [
|
1150
|
+
# "2018-10-28 01:30",
|
1151
|
+
# "2018-10-28 02:00",
|
1152
|
+
# "2018-10-28 02:30",
|
1153
|
+
# "2018-10-28 02:00"
|
1154
|
+
# ]
|
1155
|
+
# df = Polars::DataFrame.new(
|
1156
|
+
# {
|
1157
|
+
# "ts" => Polars::Series.new(dates).str.strptime(Polars::Datetime),
|
1158
|
+
# "ambiguous" => ["earliest", "earliest", "latest", "latest"]
|
1159
|
+
# }
|
1160
|
+
# )
|
1161
|
+
# df.with_columns(
|
1162
|
+
# ts_localized: Polars.col("ts").dt.replace_time_zone(
|
1163
|
+
# "Europe/Brussels", ambiguous: Polars.col("ambiguous")
|
1164
|
+
# )
|
1165
|
+
# )
|
1166
|
+
# # =>
|
1167
|
+
# # shape: (4, 3)
|
1168
|
+
# # ┌─────────────────────┬───────────┬───────────────────────────────┐
|
1169
|
+
# # │ ts ┆ ambiguous ┆ ts_localized │
|
1170
|
+
# # │ --- ┆ --- ┆ --- │
|
1171
|
+
# # │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │
|
1172
|
+
# # ╞═════════════════════╪═══════════╪═══════════════════════════════╡
|
1173
|
+
# # │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │
|
1174
|
+
# # │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │
|
1175
|
+
# # │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │
|
1176
|
+
# # │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │
|
1177
|
+
# # └─────────────────────┴───────────┴───────────────────────────────┘
|
934
1178
|
def replace_time_zone(time_zone, ambiguous: "raise", non_existent: "raise")
|
935
1179
|
unless ambiguous.is_a?(Expr)
|
936
1180
|
ambiguous = Polars.lit(ambiguous)
|
@@ -1150,6 +1150,47 @@ module Polars
|
|
1150
1150
|
# Every interval start and period length.
|
1151
1151
|
#
|
1152
1152
|
# @return [Series]
|
1153
|
+
#
|
1154
|
+
# @example
|
1155
|
+
# s = Polars.datetime_range(
|
1156
|
+
# Time.utc(2001, 1, 1),
|
1157
|
+
# Time.utc(2001, 1, 2),
|
1158
|
+
# "165m",
|
1159
|
+
# eager: true
|
1160
|
+
# ).alias("datetime")
|
1161
|
+
# s.dt.truncate("1h")
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (9,)
|
1164
|
+
# # Series: 'datetime' [datetime[ns]]
|
1165
|
+
# # [
|
1166
|
+
# # 2001-01-01 00:00:00
|
1167
|
+
# # 2001-01-01 02:00:00
|
1168
|
+
# # 2001-01-01 05:00:00
|
1169
|
+
# # 2001-01-01 08:00:00
|
1170
|
+
# # 2001-01-01 11:00:00
|
1171
|
+
# # 2001-01-01 13:00:00
|
1172
|
+
# # 2001-01-01 16:00:00
|
1173
|
+
# # 2001-01-01 19:00:00
|
1174
|
+
# # 2001-01-01 22:00:00
|
1175
|
+
# # ]
|
1176
|
+
#
|
1177
|
+
# @example
|
1178
|
+
# s = Polars.datetime_range(
|
1179
|
+
# Time.utc(2001, 1, 1), Time.utc(2001, 1, 1, 1), "10m", eager: true
|
1180
|
+
# ).alias("datetime")
|
1181
|
+
# s.dt.truncate("30m")
|
1182
|
+
# # =>
|
1183
|
+
# # shape: (7,)
|
1184
|
+
# # Series: 'datetime' [datetime[ns]]
|
1185
|
+
# # [
|
1186
|
+
# # 2001-01-01 00:00:00
|
1187
|
+
# # 2001-01-01 00:00:00
|
1188
|
+
# # 2001-01-01 00:00:00
|
1189
|
+
# # 2001-01-01 00:30:00
|
1190
|
+
# # 2001-01-01 00:30:00
|
1191
|
+
# # 2001-01-01 00:30:00
|
1192
|
+
# # 2001-01-01 01:00:00
|
1193
|
+
# # ]
|
1153
1194
|
def truncate(every)
|
1154
1195
|
super
|
1155
1196
|
end
|
@@ -1185,6 +1226,52 @@ module Polars
|
|
1185
1226
|
# @note
|
1186
1227
|
# This functionality is currently experimental and may
|
1187
1228
|
# change without it being considered a breaking change.
|
1229
|
+
#
|
1230
|
+
# @example
|
1231
|
+
# start = Time.utc(2001, 1, 1)
|
1232
|
+
# stop = Time.utc(2001, 1, 2)
|
1233
|
+
# s = Polars.datetime_range(
|
1234
|
+
# start, stop, "165m", eager: true
|
1235
|
+
# ).alias("datetime")
|
1236
|
+
# s.dt.round("1h")
|
1237
|
+
# # =>
|
1238
|
+
# # shape: (9,)
|
1239
|
+
# # Series: 'datetime' [datetime[ns]]
|
1240
|
+
# # [
|
1241
|
+
# # 2001-01-01 00:00:00
|
1242
|
+
# # 2001-01-01 03:00:00
|
1243
|
+
# # 2001-01-01 06:00:00
|
1244
|
+
# # 2001-01-01 08:00:00
|
1245
|
+
# # 2001-01-01 11:00:00
|
1246
|
+
# # 2001-01-01 14:00:00
|
1247
|
+
# # 2001-01-01 17:00:00
|
1248
|
+
# # 2001-01-01 19:00:00
|
1249
|
+
# # 2001-01-01 22:00:00
|
1250
|
+
# # ]
|
1251
|
+
#
|
1252
|
+
# @example
|
1253
|
+
# round_str = s.dt.round("1h")
|
1254
|
+
# round_td = s.dt.round("1h")
|
1255
|
+
# round_str.equals(round_td)
|
1256
|
+
# # => true
|
1257
|
+
#
|
1258
|
+
# @example
|
1259
|
+
# start = Time.utc(2001, 1, 1)
|
1260
|
+
# stop = Time.utc(2001, 1, 1, 1)
|
1261
|
+
# s = Polars.datetime_range(start, stop, "10m", eager: true).alias("datetime")
|
1262
|
+
# s.dt.round("30m")
|
1263
|
+
# # =>
|
1264
|
+
# # shape: (7,)
|
1265
|
+
# # Series: 'datetime' [datetime[ns]]
|
1266
|
+
# # [
|
1267
|
+
# # 2001-01-01 00:00:00
|
1268
|
+
# # 2001-01-01 00:00:00
|
1269
|
+
# # 2001-01-01 00:30:00
|
1270
|
+
# # 2001-01-01 00:30:00
|
1271
|
+
# # 2001-01-01 00:30:00
|
1272
|
+
# # 2001-01-01 01:00:00
|
1273
|
+
# # 2001-01-01 01:00:00
|
1274
|
+
# # ]
|
1188
1275
|
def round(every)
|
1189
1276
|
super
|
1190
1277
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -411,6 +411,26 @@ module Polars
|
|
411
411
|
# Add a prefix to the root column name of the expression.
|
412
412
|
#
|
413
413
|
# @return [Expr]
|
414
|
+
#
|
415
|
+
# @example
|
416
|
+
# df = Polars::DataFrame.new(
|
417
|
+
# {
|
418
|
+
# "a" => [1, 2, 3],
|
419
|
+
# "b" => ["x", "y", "z"]
|
420
|
+
# }
|
421
|
+
# )
|
422
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
423
|
+
# # =>
|
424
|
+
# # shape: (3, 4)
|
425
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
426
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
427
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
428
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
429
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
430
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
431
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
432
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
433
|
+
# # └─────┴─────┴───────────┴───────────┘
|
414
434
|
def prefix(prefix)
|
415
435
|
name.prefix(prefix)
|
416
436
|
end
|
@@ -418,6 +438,26 @@ module Polars
|
|
418
438
|
# Add a suffix to the root column name of the expression.
|
419
439
|
#
|
420
440
|
# @return [Expr]
|
441
|
+
#
|
442
|
+
# @example
|
443
|
+
# df = Polars::DataFrame.new(
|
444
|
+
# {
|
445
|
+
# "a" => [1, 2, 3],
|
446
|
+
# "b" => ["x", "y", "z"]
|
447
|
+
# }
|
448
|
+
# )
|
449
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
450
|
+
# # =>
|
451
|
+
# # shape: (3, 4)
|
452
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
453
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
454
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
455
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
456
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
457
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
458
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
459
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
460
|
+
# # └─────┴─────┴───────────┴───────────┘
|
421
461
|
def suffix(suffix)
|
422
462
|
name.suffix(suffix)
|
423
463
|
end
|
@@ -1863,6 +1903,35 @@ module Polars
|
|
1863
1903
|
# Reverse the selection.
|
1864
1904
|
#
|
1865
1905
|
# @return [Expr]
|
1906
|
+
#
|
1907
|
+
# @example
|
1908
|
+
# df = Polars::DataFrame.new(
|
1909
|
+
# {
|
1910
|
+
# "A" => [1, 2, 3, 4, 5],
|
1911
|
+
# "fruits" => ["banana", "banana", "apple", "apple", "banana"],
|
1912
|
+
# "B" => [5, 4, 3, 2, 1],
|
1913
|
+
# "cars" => ["beetle", "audi", "beetle", "beetle", "beetle"]
|
1914
|
+
# }
|
1915
|
+
# )
|
1916
|
+
# df.select(
|
1917
|
+
# [
|
1918
|
+
# Polars.all,
|
1919
|
+
# Polars.all.reverse.name.suffix("_reverse")
|
1920
|
+
# ]
|
1921
|
+
# )
|
1922
|
+
# # =>
|
1923
|
+
# # shape: (5, 8)
|
1924
|
+
# # ┌─────┬────────┬─────┬────────┬───────────┬────────────────┬───────────┬──────────────┐
|
1925
|
+
# # │ A ┆ fruits ┆ B ┆ cars ┆ A_reverse ┆ fruits_reverse ┆ B_reverse ┆ cars_reverse │
|
1926
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1927
|
+
# # │ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str ┆ i64 ┆ str │
|
1928
|
+
# # ╞═════╪════════╪═════╪════════╪═══════════╪════════════════╪═══════════╪══════════════╡
|
1929
|
+
# # │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 5 ┆ banana ┆ 1 ┆ beetle │
|
1930
|
+
# # │ 2 ┆ banana ┆ 4 ┆ audi ┆ 4 ┆ apple ┆ 2 ┆ beetle │
|
1931
|
+
# # │ 3 ┆ apple ┆ 3 ┆ beetle ┆ 3 ┆ apple ┆ 3 ┆ beetle │
|
1932
|
+
# # │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 ┆ banana ┆ 4 ┆ audi │
|
1933
|
+
# # │ 5 ┆ banana ┆ 1 ┆ beetle ┆ 1 ┆ banana ┆ 5 ┆ beetle │
|
1934
|
+
# # └─────┴────────┴─────┴────────┴───────────┴────────────────┴───────────┴──────────────┘
|
1866
1935
|
def reverse
|
1867
1936
|
_from_rbexpr(_rbexpr.reverse)
|
1868
1937
|
end
|
@@ -2825,7 +2894,7 @@ module Polars
|
|
2825
2894
|
# # ╞══════╪════════╡
|
2826
2895
|
# # │ 1 ┆ 0 │
|
2827
2896
|
# # └──────┴────────┘
|
2828
|
-
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, &f)
|
2897
|
+
# def map_batches(return_dtype: nil, agg_list: false, is_elementwise: false, returns_scalar: false, &f)
|
2829
2898
|
# if !return_dtype.nil?
|
2830
2899
|
# return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2831
2900
|
# end
|
@@ -2835,7 +2904,8 @@ module Polars
|
|
2835
2904
|
# f,
|
2836
2905
|
# return_dtype,
|
2837
2906
|
# agg_list,
|
2838
|
-
# is_elementwise
|
2907
|
+
# is_elementwise,
|
2908
|
+
# returns_scalar
|
2839
2909
|
# )
|
2840
2910
|
# )
|
2841
2911
|
# end
|
@@ -3071,6 +3141,21 @@ module Polars
|
|
3071
3141
|
# Number of rows to return.
|
3072
3142
|
#
|
3073
3143
|
# @return [Expr]
|
3144
|
+
#
|
3145
|
+
# @example
|
3146
|
+
# df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
|
3147
|
+
# df.select(Polars.col("foo").limit(3))
|
3148
|
+
# # =>
|
3149
|
+
# # shape: (3, 1)
|
3150
|
+
# # ┌─────┐
|
3151
|
+
# # │ foo │
|
3152
|
+
# # │ --- │
|
3153
|
+
# # │ i64 │
|
3154
|
+
# # ╞═════╡
|
3155
|
+
# # │ 1 │
|
3156
|
+
# # │ 2 │
|
3157
|
+
# # │ 3 │
|
3158
|
+
# # └─────┘
|
3074
3159
|
def limit(n = 10)
|
3075
3160
|
head(n)
|
3076
3161
|
end
|
@@ -5601,6 +5686,22 @@ module Polars
|
|
5601
5686
|
# If false, the calculations are corrected for statistical bias.
|
5602
5687
|
#
|
5603
5688
|
# @return [Expr]
|
5689
|
+
#
|
5690
|
+
# @example
|
5691
|
+
# df = Polars::DataFrame.new({"a" => [1, 4, 2, 9]})
|
5692
|
+
# df.select(Polars.col("a").rolling_skew(3))
|
5693
|
+
# # =>
|
5694
|
+
# # shape: (4, 1)
|
5695
|
+
# # ┌──────────┐
|
5696
|
+
# # │ a │
|
5697
|
+
# # │ --- │
|
5698
|
+
# # │ f64 │
|
5699
|
+
# # ╞══════════╡
|
5700
|
+
# # │ null │
|
5701
|
+
# # │ null │
|
5702
|
+
# # │ 0.381802 │
|
5703
|
+
# # │ 0.47033 │
|
5704
|
+
# # └──────────┘
|
5604
5705
|
def rolling_skew(window_size, bias: true)
|
5605
5706
|
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5606
5707
|
end
|
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|