polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/lazy_frame.rb
CHANGED
@@ -97,7 +97,8 @@ module Polars
|
|
97
97
|
row_count_offset: 0,
|
98
98
|
storage_options: nil,
|
99
99
|
low_memory: false,
|
100
|
-
use_statistics: true
|
100
|
+
use_statistics: true,
|
101
|
+
hive_partitioning: true
|
101
102
|
)
|
102
103
|
_from_rbldf(
|
103
104
|
RbLazyFrame.new_from_parquet(
|
@@ -108,7 +109,8 @@ module Polars
|
|
108
109
|
rechunk,
|
109
110
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
110
111
|
low_memory,
|
111
|
-
use_statistics
|
112
|
+
use_statistics,
|
113
|
+
hive_partitioning
|
112
114
|
)
|
113
115
|
)
|
114
116
|
end
|
@@ -216,7 +218,7 @@ module Polars
|
|
216
218
|
# }
|
217
219
|
# ).lazy
|
218
220
|
# lf.dtypes
|
219
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
221
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
220
222
|
def dtypes
|
221
223
|
_ldf.dtypes
|
222
224
|
end
|
@@ -234,7 +236,7 @@ module Polars
|
|
234
236
|
# }
|
235
237
|
# ).lazy
|
236
238
|
# lf.schema
|
237
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
239
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
238
240
|
def schema
|
239
241
|
_ldf.schema
|
240
242
|
end
|
@@ -350,6 +352,7 @@ module Polars
|
|
350
352
|
slice_pushdown,
|
351
353
|
common_subplan_elimination,
|
352
354
|
allow_streaming,
|
355
|
+
false
|
353
356
|
)
|
354
357
|
|
355
358
|
ldf.describe_optimized_plan
|
@@ -396,7 +399,7 @@ module Polars
|
|
396
399
|
# # │ 1 ┆ 6.0 ┆ a │
|
397
400
|
# # └─────┴─────┴─────┘
|
398
401
|
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
399
|
-
if by.is_a?(String)
|
402
|
+
if by.is_a?(::String)
|
400
403
|
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
401
404
|
end
|
402
405
|
if Utils.bool?(reverse)
|
@@ -445,7 +448,7 @@ module Polars
|
|
445
448
|
# "c" => [6, 5, 4, 3, 2, 1]
|
446
449
|
# }
|
447
450
|
# ).lazy
|
448
|
-
# df.
|
451
|
+
# df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
|
449
452
|
# # =>
|
450
453
|
# # shape: (3, 3)
|
451
454
|
# # ┌─────┬─────┬─────┐
|
@@ -466,7 +469,8 @@ module Polars
|
|
466
469
|
no_optimization: false,
|
467
470
|
slice_pushdown: true,
|
468
471
|
common_subplan_elimination: true,
|
469
|
-
allow_streaming: false
|
472
|
+
allow_streaming: false,
|
473
|
+
_eager: false
|
470
474
|
)
|
471
475
|
if no_optimization
|
472
476
|
predicate_pushdown = false
|
@@ -486,7 +490,8 @@ module Polars
|
|
486
490
|
simplify_expression,
|
487
491
|
slice_pushdown,
|
488
492
|
common_subplan_elimination,
|
489
|
-
allow_streaming
|
493
|
+
allow_streaming,
|
494
|
+
_eager
|
490
495
|
)
|
491
496
|
Utils.wrap_df(ldf.collect)
|
492
497
|
end
|
@@ -568,7 +573,8 @@ module Polars
|
|
568
573
|
simplify_expression,
|
569
574
|
slice_pushdown,
|
570
575
|
false,
|
571
|
-
true
|
576
|
+
true,
|
577
|
+
false
|
572
578
|
)
|
573
579
|
lf.sink_parquet(
|
574
580
|
path,
|
@@ -623,7 +629,7 @@ module Polars
|
|
623
629
|
# "c" => [6, 5, 4, 3, 2, 1]
|
624
630
|
# }
|
625
631
|
# ).lazy
|
626
|
-
# df.
|
632
|
+
# df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
|
627
633
|
# # =>
|
628
634
|
# # shape: (2, 3)
|
629
635
|
# # ┌─────┬─────┬─────┐
|
@@ -660,7 +666,8 @@ module Polars
|
|
660
666
|
simplify_expression,
|
661
667
|
slice_pushdown,
|
662
668
|
common_subplan_elimination,
|
663
|
-
allow_streaming
|
669
|
+
allow_streaming,
|
670
|
+
false
|
664
671
|
)
|
665
672
|
Utils.wrap_df(ldf.fetch(n_rows))
|
666
673
|
end
|
@@ -853,13 +860,13 @@ module Polars
|
|
853
860
|
_from_rbldf(_ldf.select(exprs))
|
854
861
|
end
|
855
862
|
|
856
|
-
# Start a
|
863
|
+
# Start a group by operation.
|
857
864
|
#
|
858
865
|
# @param by [Object]
|
859
866
|
# Column(s) to group by.
|
860
867
|
# @param maintain_order [Boolean]
|
861
868
|
# Make sure that the order of the groups remain consistent. This is more
|
862
|
-
# expensive than a default
|
869
|
+
# expensive than a default group by.
|
863
870
|
#
|
864
871
|
# @return [LazyGroupBy]
|
865
872
|
#
|
@@ -871,7 +878,7 @@ module Polars
|
|
871
878
|
# "c" => [6, 5, 4, 3, 2, 1]
|
872
879
|
# }
|
873
880
|
# ).lazy
|
874
|
-
# df.
|
881
|
+
# df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
|
875
882
|
# # =>
|
876
883
|
# # shape: (3, 2)
|
877
884
|
# # ┌─────┬─────┐
|
@@ -883,19 +890,21 @@ module Polars
|
|
883
890
|
# # │ b ┆ 11 │
|
884
891
|
# # │ c ┆ 6 │
|
885
892
|
# # └─────┴─────┘
|
886
|
-
def
|
893
|
+
def group_by(by, maintain_order: false)
|
887
894
|
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
888
|
-
lgb = _ldf.
|
889
|
-
LazyGroupBy.new(lgb
|
895
|
+
lgb = _ldf.group_by(rbexprs_by, maintain_order)
|
896
|
+
LazyGroupBy.new(lgb)
|
890
897
|
end
|
898
|
+
alias_method :groupby, :group_by
|
899
|
+
alias_method :group, :group_by
|
891
900
|
|
892
901
|
# Create rolling groups based on a time column.
|
893
902
|
#
|
894
903
|
# Also works for index values of type `:i32` or `:i64`.
|
895
904
|
#
|
896
|
-
# Different from a `
|
905
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
897
906
|
# individual values and are not of constant intervals. For constant intervals
|
898
|
-
# use *
|
907
|
+
# use *group_by_dynamic*.
|
899
908
|
#
|
900
909
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
901
910
|
# by using the following string language:
|
@@ -915,7 +924,7 @@ module Polars
|
|
915
924
|
# Or combine them:
|
916
925
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
917
926
|
#
|
918
|
-
# In case of a
|
927
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
919
928
|
#
|
920
929
|
# - "1i" # length 1
|
921
930
|
# - "10i" # length 10
|
@@ -926,7 +935,7 @@ module Polars
|
|
926
935
|
# This column must be sorted in ascending order. If not the output will not
|
927
936
|
# make sense.
|
928
937
|
#
|
929
|
-
# In case of a rolling
|
938
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
930
939
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
931
940
|
# performance matters use an `:i64` column.
|
932
941
|
# @param period [Object]
|
@@ -958,7 +967,7 @@ module Polars
|
|
958
967
|
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
968
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
960
969
|
# )
|
961
|
-
# df.
|
970
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
962
971
|
# [
|
963
972
|
# Polars.sum("a").alias("sum_a"),
|
964
973
|
# Polars.min("a").alias("min_a"),
|
@@ -979,7 +988,7 @@ module Polars
|
|
979
988
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
980
989
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
981
990
|
# # └─────────────────────┴───────┴───────┴───────┘
|
982
|
-
def
|
991
|
+
def group_by_rolling(
|
983
992
|
index_column:,
|
984
993
|
period:,
|
985
994
|
offset: nil,
|
@@ -987,7 +996,7 @@ module Polars
|
|
987
996
|
by: nil,
|
988
997
|
check_sorted: true
|
989
998
|
)
|
990
|
-
index_column = Utils.
|
999
|
+
index_column = Utils.parse_as_expression(index_column)
|
991
1000
|
if offset.nil?
|
992
1001
|
offset = "-#{period}"
|
993
1002
|
end
|
@@ -996,16 +1005,17 @@ module Polars
|
|
996
1005
|
period = Utils._timedelta_to_pl_duration(period)
|
997
1006
|
offset = Utils._timedelta_to_pl_duration(offset)
|
998
1007
|
|
999
|
-
lgb = _ldf.
|
1000
|
-
index_column
|
1008
|
+
lgb = _ldf.group_by_rolling(
|
1009
|
+
index_column, period, offset, closed, rbexprs_by, check_sorted
|
1001
1010
|
)
|
1002
|
-
LazyGroupBy.new(lgb
|
1011
|
+
LazyGroupBy.new(lgb)
|
1003
1012
|
end
|
1013
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1004
1014
|
|
1005
1015
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1006
1016
|
#
|
1007
1017
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1008
|
-
# normal
|
1018
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1009
1019
|
# window could be seen as a rolling window, with a window size determined by
|
1010
1020
|
# dates/times/values instead of slots in the DataFrame.
|
1011
1021
|
#
|
@@ -1033,37 +1043,43 @@ module Polars
|
|
1033
1043
|
# Or combine them:
|
1034
1044
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1035
1045
|
#
|
1036
|
-
# In case of a
|
1046
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1037
1047
|
#
|
1038
1048
|
# - "1i" # length 1
|
1039
1049
|
# - "10i" # length 10
|
1040
1050
|
#
|
1041
|
-
# @param index_column
|
1051
|
+
# @param index_column [Object]
|
1042
1052
|
# Column used to group based on the time window.
|
1043
1053
|
# Often to type Date/Datetime
|
1044
1054
|
# This column must be sorted in ascending order. If not the output will not
|
1045
1055
|
# make sense.
|
1046
1056
|
#
|
1047
|
-
# In case of a dynamic
|
1057
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
1048
1058
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1049
1059
|
# performance matters use an `:i64` column.
|
1050
|
-
# @param every
|
1060
|
+
# @param every [Object]
|
1051
1061
|
# Interval of the window.
|
1052
|
-
# @param period
|
1062
|
+
# @param period [Object]
|
1053
1063
|
# Length of the window, if None it is equal to 'every'.
|
1054
|
-
# @param offset
|
1064
|
+
# @param offset [Object]
|
1055
1065
|
# Offset of the window if None and period is None it will be equal to negative
|
1056
1066
|
# `every`.
|
1057
|
-
# @param truncate
|
1067
|
+
# @param truncate [Boolean]
|
1058
1068
|
# Truncate the time value to the window lower bound.
|
1059
|
-
# @param include_boundaries
|
1069
|
+
# @param include_boundaries [Boolean]
|
1060
1070
|
# Add the lower and upper bound of the window to the "_lower_bound" and
|
1061
1071
|
# "_upper_bound" columns. This will impact performance because it's harder to
|
1062
1072
|
# parallelize
|
1063
1073
|
# @param closed ["right", "left", "both", "none"]
|
1064
1074
|
# Define whether the temporal window interval is closed or not.
|
1065
|
-
# @param by
|
1075
|
+
# @param by [Object]
|
1066
1076
|
# Also group by this column/these columns
|
1077
|
+
# @param check_sorted [Boolean]
|
1078
|
+
# When the `by` argument is given, polars can not check sortedness
|
1079
|
+
# by the metadata and has to do a full scan on the index column to
|
1080
|
+
# verify data is sorted. This is expensive. If you are sure the
|
1081
|
+
# data within the by groups is sorted, you can set this to `false`.
|
1082
|
+
# Doing so incorrectly will lead to incorrect output.
|
1067
1083
|
#
|
1068
1084
|
# @return [DataFrame]
|
1069
1085
|
#
|
@@ -1095,7 +1111,7 @@ module Polars
|
|
1095
1111
|
# # └─────────────────────┴─────┘
|
1096
1112
|
#
|
1097
1113
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
1098
|
-
# df.
|
1114
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
1099
1115
|
# [
|
1100
1116
|
# Polars.col("time").min.alias("time_min"),
|
1101
1117
|
# Polars.col("time").max.alias("time_max")
|
@@ -1115,7 +1131,7 @@ module Polars
|
|
1115
1131
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
1116
1132
|
#
|
1117
1133
|
# @example The window boundaries can also be added to the aggregation result.
|
1118
|
-
# df.
|
1134
|
+
# df.group_by_dynamic(
|
1119
1135
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
1120
1136
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
1121
1137
|
# # =>
|
@@ -1132,7 +1148,7 @@ module Polars
|
|
1132
1148
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
1133
1149
|
#
|
1134
1150
|
# @example When closed="left", should not include right end of interval.
|
1135
|
-
# df.
|
1151
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
1136
1152
|
# [
|
1137
1153
|
# Polars.col("time").count.alias("time_count"),
|
1138
1154
|
# Polars.col("time").alias("time_agg_list")
|
@@ -1152,7 +1168,7 @@ module Polars
|
|
1152
1168
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1153
1169
|
#
|
1154
1170
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1155
|
-
# df.
|
1171
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
1156
1172
|
# [Polars.col("time").count.alias("time_count")]
|
1157
1173
|
# )
|
1158
1174
|
# # =>
|
@@ -1169,7 +1185,7 @@ module Polars
|
|
1169
1185
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
1170
1186
|
# # └─────────────────────┴────────────┘
|
1171
1187
|
#
|
1172
|
-
# @example Dynamic
|
1188
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1173
1189
|
# df = Polars::DataFrame.new(
|
1174
1190
|
# {
|
1175
1191
|
# "time" => Polars.date_range(
|
@@ -1180,7 +1196,7 @@ module Polars
|
|
1180
1196
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1181
1197
|
# }
|
1182
1198
|
# )
|
1183
|
-
# df.
|
1199
|
+
# df.group_by_dynamic(
|
1184
1200
|
# "time",
|
1185
1201
|
# every: "1h",
|
1186
1202
|
# closed: "both",
|
@@ -1203,14 +1219,14 @@ module Polars
|
|
1203
1219
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
1204
1220
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
1205
1221
|
#
|
1206
|
-
# @example Dynamic
|
1222
|
+
# @example Dynamic group by on an index column.
|
1207
1223
|
# df = Polars::DataFrame.new(
|
1208
1224
|
# {
|
1209
1225
|
# "idx" => Polars.arange(0, 6, eager: true),
|
1210
1226
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
1211
1227
|
# }
|
1212
1228
|
# )
|
1213
|
-
# df.
|
1229
|
+
# df.group_by_dynamic(
|
1214
1230
|
# "idx",
|
1215
1231
|
# every: "2i",
|
1216
1232
|
# period: "3i",
|
@@ -1228,17 +1244,23 @@ module Polars
|
|
1228
1244
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1229
1245
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
1230
1246
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
1231
|
-
def
|
1247
|
+
def group_by_dynamic(
|
1232
1248
|
index_column,
|
1233
1249
|
every:,
|
1234
1250
|
period: nil,
|
1235
1251
|
offset: nil,
|
1236
|
-
truncate:
|
1252
|
+
truncate: nil,
|
1237
1253
|
include_boundaries: false,
|
1238
1254
|
closed: "left",
|
1255
|
+
label: "left",
|
1239
1256
|
by: nil,
|
1240
|
-
start_by: "window"
|
1257
|
+
start_by: "window",
|
1258
|
+
check_sorted: true
|
1241
1259
|
)
|
1260
|
+
if !truncate.nil?
|
1261
|
+
label = truncate ? "left" : "datapoint"
|
1262
|
+
end
|
1263
|
+
|
1242
1264
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1243
1265
|
if offset.nil?
|
1244
1266
|
offset = period.nil? ? "-#{every}" : "0ns"
|
@@ -1253,19 +1275,21 @@ module Polars
|
|
1253
1275
|
every = Utils._timedelta_to_pl_duration(every)
|
1254
1276
|
|
1255
1277
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1256
|
-
lgb = _ldf.
|
1278
|
+
lgb = _ldf.group_by_dynamic(
|
1257
1279
|
index_column._rbexpr,
|
1258
1280
|
every,
|
1259
1281
|
period,
|
1260
1282
|
offset,
|
1261
|
-
|
1283
|
+
label,
|
1262
1284
|
include_boundaries,
|
1263
1285
|
closed,
|
1264
1286
|
rbexprs_by,
|
1265
|
-
start_by
|
1287
|
+
start_by,
|
1288
|
+
check_sorted
|
1266
1289
|
)
|
1267
|
-
LazyGroupBy.new(lgb
|
1290
|
+
LazyGroupBy.new(lgb)
|
1268
1291
|
end
|
1292
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
1269
1293
|
|
1270
1294
|
# Perform an asof join.
|
1271
1295
|
#
|
@@ -1347,7 +1371,7 @@ module Polars
|
|
1347
1371
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
1348
1372
|
end
|
1349
1373
|
|
1350
|
-
if on.is_a?(String)
|
1374
|
+
if on.is_a?(::String)
|
1351
1375
|
left_on = on
|
1352
1376
|
right_on = on
|
1353
1377
|
end
|
@@ -1356,19 +1380,19 @@ module Polars
|
|
1356
1380
|
raise ArgumentError, "You should pass the column to join on as an argument."
|
1357
1381
|
end
|
1358
1382
|
|
1359
|
-
if by_left.is_a?(String) || by_left.is_a?(Expr)
|
1383
|
+
if by_left.is_a?(::String) || by_left.is_a?(Expr)
|
1360
1384
|
by_left_ = [by_left]
|
1361
1385
|
else
|
1362
1386
|
by_left_ = by_left
|
1363
1387
|
end
|
1364
1388
|
|
1365
|
-
if by_right.is_a?(String) || by_right.is_a?(Expr)
|
1389
|
+
if by_right.is_a?(::String) || by_right.is_a?(Expr)
|
1366
1390
|
by_right_ = [by_right]
|
1367
1391
|
else
|
1368
1392
|
by_right_ = by_right
|
1369
1393
|
end
|
1370
1394
|
|
1371
|
-
if by.is_a?(String)
|
1395
|
+
if by.is_a?(::String)
|
1372
1396
|
by_left_ = [by]
|
1373
1397
|
by_right_ = [by]
|
1374
1398
|
elsif by.is_a?(::Array)
|
@@ -1378,7 +1402,7 @@ module Polars
|
|
1378
1402
|
|
1379
1403
|
tolerance_str = nil
|
1380
1404
|
tolerance_num = nil
|
1381
|
-
if tolerance.is_a?(String)
|
1405
|
+
if tolerance.is_a?(::String)
|
1382
1406
|
tolerance_str = tolerance
|
1383
1407
|
else
|
1384
1408
|
tolerance_num = tolerance
|
@@ -1454,17 +1478,17 @@ module Polars
|
|
1454
1478
|
# @example
|
1455
1479
|
# df.join(other_df, on: "ham", how: "outer").collect
|
1456
1480
|
# # =>
|
1457
|
-
# # shape: (4,
|
1458
|
-
# #
|
1459
|
-
# # │ foo ┆ bar ┆ ham
|
1460
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1461
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
1462
|
-
# #
|
1463
|
-
# # │ 1 ┆ 6.0 ┆ a
|
1464
|
-
# # │ 2 ┆ 7.0 ┆ b
|
1465
|
-
# # │ null ┆ null ┆
|
1466
|
-
# # │ 3 ┆ 8.0 ┆ c
|
1467
|
-
# #
|
1481
|
+
# # shape: (4, 5)
|
1482
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
1483
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
1484
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1485
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
1486
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
1487
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
1488
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
1489
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
1490
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
1491
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
1468
1492
|
#
|
1469
1493
|
# @example
|
1470
1494
|
# df.join(other_df, on: "ham", how: "left").collect
|
@@ -1698,7 +1722,7 @@ module Polars
|
|
1698
1722
|
#
|
1699
1723
|
# @return [LazyFrame]
|
1700
1724
|
def drop(columns)
|
1701
|
-
if columns.is_a?(String)
|
1725
|
+
if columns.is_a?(::String)
|
1702
1726
|
columns = [columns]
|
1703
1727
|
end
|
1704
1728
|
_from_rbldf(_ldf.drop_columns(columns))
|
@@ -1725,8 +1749,10 @@ module Polars
|
|
1725
1749
|
|
1726
1750
|
# Shift the values by a given period.
|
1727
1751
|
#
|
1728
|
-
# @param
|
1752
|
+
# @param n [Integer]
|
1729
1753
|
# Number of places to shift (may be negative).
|
1754
|
+
# @param fill_value [Object]
|
1755
|
+
# Fill the resulting null values with this value.
|
1730
1756
|
#
|
1731
1757
|
# @return [LazyFrame]
|
1732
1758
|
#
|
@@ -1763,8 +1789,12 @@ module Polars
|
|
1763
1789
|
# # │ 5 ┆ 6 │
|
1764
1790
|
# # │ null ┆ null │
|
1765
1791
|
# # └──────┴──────┘
|
1766
|
-
def shift(
|
1767
|
-
|
1792
|
+
def shift(n, fill_value: nil)
|
1793
|
+
if !fill_value.nil?
|
1794
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1795
|
+
end
|
1796
|
+
n = Utils.parse_as_expression(n)
|
1797
|
+
_from_rbldf(_ldf.shift(n, fill_value))
|
1768
1798
|
end
|
1769
1799
|
|
1770
1800
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1810,10 +1840,7 @@ module Polars
|
|
1810
1840
|
# # │ 0 ┆ 0 │
|
1811
1841
|
# # └─────┴─────┘
|
1812
1842
|
def shift_and_fill(periods, fill_value)
|
1813
|
-
|
1814
|
-
fill_value = Polars.lit(fill_value)
|
1815
|
-
end
|
1816
|
-
_from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
|
1843
|
+
shift(periods, fill_value: fill_value)
|
1817
1844
|
end
|
1818
1845
|
|
1819
1846
|
# Get a slice of this DataFrame.
|
@@ -2336,10 +2363,10 @@ module Polars
|
|
2336
2363
|
# # │ z ┆ c ┆ 6 │
|
2337
2364
|
# # └─────┴──────────┴───────┘
|
2338
2365
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
|
2339
|
-
if value_vars.is_a?(String)
|
2366
|
+
if value_vars.is_a?(::String)
|
2340
2367
|
value_vars = [value_vars]
|
2341
2368
|
end
|
2342
|
-
if id_vars.is_a?(String)
|
2369
|
+
if id_vars.is_a?(::String)
|
2343
2370
|
id_vars = [id_vars]
|
2344
2371
|
end
|
2345
2372
|
if value_vars.nil?
|
@@ -2371,16 +2398,16 @@ module Polars
|
|
2371
2398
|
# df.interpolate.collect
|
2372
2399
|
# # =>
|
2373
2400
|
# # shape: (4, 3)
|
2374
|
-
# #
|
2375
|
-
# # │ foo
|
2376
|
-
# # │ ---
|
2377
|
-
# # │
|
2378
|
-
# #
|
2379
|
-
# # │ 1
|
2380
|
-
# # │ 5
|
2381
|
-
# # │ 9
|
2382
|
-
# # │ 10
|
2383
|
-
# #
|
2401
|
+
# # ┌──────┬──────┬──────────┐
|
2402
|
+
# # │ foo ┆ bar ┆ baz │
|
2403
|
+
# # │ --- ┆ --- ┆ --- │
|
2404
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
2405
|
+
# # ╞══════╪══════╪══════════╡
|
2406
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
2407
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
2408
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
2409
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
2410
|
+
# # └──────┴──────┴──────────┘
|
2384
2411
|
def interpolate
|
2385
2412
|
select(Utils.col("*").interpolate)
|
2386
2413
|
end
|
@@ -2437,7 +2464,7 @@ module Polars
|
|
2437
2464
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
2438
2465
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
2439
2466
|
def unnest(names)
|
2440
|
-
if names.is_a?(String)
|
2467
|
+
if names.is_a?(::String)
|
2441
2468
|
names = [names]
|
2442
2469
|
end
|
2443
2470
|
_from_rbldf(_ldf.unnest(names))
|