polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/lazy_frame.rb
CHANGED
@@ -97,7 +97,8 @@ module Polars
|
|
97
97
|
row_count_offset: 0,
|
98
98
|
storage_options: nil,
|
99
99
|
low_memory: false,
|
100
|
-
use_statistics: true
|
100
|
+
use_statistics: true,
|
101
|
+
hive_partitioning: true
|
101
102
|
)
|
102
103
|
_from_rbldf(
|
103
104
|
RbLazyFrame.new_from_parquet(
|
@@ -108,7 +109,8 @@ module Polars
|
|
108
109
|
rechunk,
|
109
110
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
110
111
|
low_memory,
|
111
|
-
use_statistics
|
112
|
+
use_statistics,
|
113
|
+
hive_partitioning
|
112
114
|
)
|
113
115
|
)
|
114
116
|
end
|
@@ -216,7 +218,7 @@ module Polars
|
|
216
218
|
# }
|
217
219
|
# ).lazy
|
218
220
|
# lf.dtypes
|
219
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
221
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
220
222
|
def dtypes
|
221
223
|
_ldf.dtypes
|
222
224
|
end
|
@@ -234,7 +236,7 @@ module Polars
|
|
234
236
|
# }
|
235
237
|
# ).lazy
|
236
238
|
# lf.schema
|
237
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
239
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
238
240
|
def schema
|
239
241
|
_ldf.schema
|
240
242
|
end
|
@@ -350,6 +352,7 @@ module Polars
|
|
350
352
|
slice_pushdown,
|
351
353
|
common_subplan_elimination,
|
352
354
|
allow_streaming,
|
355
|
+
false
|
353
356
|
)
|
354
357
|
|
355
358
|
ldf.describe_optimized_plan
|
@@ -396,7 +399,7 @@ module Polars
|
|
396
399
|
# # │ 1 ┆ 6.0 ┆ a │
|
397
400
|
# # └─────┴─────┴─────┘
|
398
401
|
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
399
|
-
if by.is_a?(String)
|
402
|
+
if by.is_a?(::String)
|
400
403
|
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
401
404
|
end
|
402
405
|
if Utils.bool?(reverse)
|
@@ -445,7 +448,7 @@ module Polars
|
|
445
448
|
# "c" => [6, 5, 4, 3, 2, 1]
|
446
449
|
# }
|
447
450
|
# ).lazy
|
448
|
-
# df.
|
451
|
+
# df.group_by("a", maintain_order: true).agg(Polars.all.sum).collect
|
449
452
|
# # =>
|
450
453
|
# # shape: (3, 3)
|
451
454
|
# # ┌─────┬─────┬─────┐
|
@@ -466,7 +469,8 @@ module Polars
|
|
466
469
|
no_optimization: false,
|
467
470
|
slice_pushdown: true,
|
468
471
|
common_subplan_elimination: true,
|
469
|
-
allow_streaming: false
|
472
|
+
allow_streaming: false,
|
473
|
+
_eager: false
|
470
474
|
)
|
471
475
|
if no_optimization
|
472
476
|
predicate_pushdown = false
|
@@ -486,7 +490,8 @@ module Polars
|
|
486
490
|
simplify_expression,
|
487
491
|
slice_pushdown,
|
488
492
|
common_subplan_elimination,
|
489
|
-
allow_streaming
|
493
|
+
allow_streaming,
|
494
|
+
_eager
|
490
495
|
)
|
491
496
|
Utils.wrap_df(ldf.collect)
|
492
497
|
end
|
@@ -568,7 +573,8 @@ module Polars
|
|
568
573
|
simplify_expression,
|
569
574
|
slice_pushdown,
|
570
575
|
false,
|
571
|
-
true
|
576
|
+
true,
|
577
|
+
false
|
572
578
|
)
|
573
579
|
lf.sink_parquet(
|
574
580
|
path,
|
@@ -623,7 +629,7 @@ module Polars
|
|
623
629
|
# "c" => [6, 5, 4, 3, 2, 1]
|
624
630
|
# }
|
625
631
|
# ).lazy
|
626
|
-
# df.
|
632
|
+
# df.group_by("a", maintain_order: true).agg(Polars.all.sum).fetch(2)
|
627
633
|
# # =>
|
628
634
|
# # shape: (2, 3)
|
629
635
|
# # ┌─────┬─────┬─────┐
|
@@ -660,7 +666,8 @@ module Polars
|
|
660
666
|
simplify_expression,
|
661
667
|
slice_pushdown,
|
662
668
|
common_subplan_elimination,
|
663
|
-
allow_streaming
|
669
|
+
allow_streaming,
|
670
|
+
false
|
664
671
|
)
|
665
672
|
Utils.wrap_df(ldf.fetch(n_rows))
|
666
673
|
end
|
@@ -853,13 +860,13 @@ module Polars
|
|
853
860
|
_from_rbldf(_ldf.select(exprs))
|
854
861
|
end
|
855
862
|
|
856
|
-
# Start a
|
863
|
+
# Start a group by operation.
|
857
864
|
#
|
858
865
|
# @param by [Object]
|
859
866
|
# Column(s) to group by.
|
860
867
|
# @param maintain_order [Boolean]
|
861
868
|
# Make sure that the order of the groups remain consistent. This is more
|
862
|
-
# expensive than a default
|
869
|
+
# expensive than a default group by.
|
863
870
|
#
|
864
871
|
# @return [LazyGroupBy]
|
865
872
|
#
|
@@ -871,7 +878,7 @@ module Polars
|
|
871
878
|
# "c" => [6, 5, 4, 3, 2, 1]
|
872
879
|
# }
|
873
880
|
# ).lazy
|
874
|
-
# df.
|
881
|
+
# df.group_by("a", maintain_order: true).agg(Polars.col("b").sum).collect
|
875
882
|
# # =>
|
876
883
|
# # shape: (3, 2)
|
877
884
|
# # ┌─────┬─────┐
|
@@ -883,19 +890,21 @@ module Polars
|
|
883
890
|
# # │ b ┆ 11 │
|
884
891
|
# # │ c ┆ 6 │
|
885
892
|
# # └─────┴─────┘
|
886
|
-
def
|
893
|
+
def group_by(by, maintain_order: false)
|
887
894
|
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
888
|
-
lgb = _ldf.
|
889
|
-
LazyGroupBy.new(lgb
|
895
|
+
lgb = _ldf.group_by(rbexprs_by, maintain_order)
|
896
|
+
LazyGroupBy.new(lgb)
|
890
897
|
end
|
898
|
+
alias_method :groupby, :group_by
|
899
|
+
alias_method :group, :group_by
|
891
900
|
|
892
901
|
# Create rolling groups based on a time column.
|
893
902
|
#
|
894
903
|
# Also works for index values of type `:i32` or `:i64`.
|
895
904
|
#
|
896
|
-
# Different from a `
|
905
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
897
906
|
# individual values and are not of constant intervals. For constant intervals
|
898
|
-
# use *
|
907
|
+
# use *group_by_dynamic*.
|
899
908
|
#
|
900
909
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
901
910
|
# by using the following string language:
|
@@ -915,7 +924,7 @@ module Polars
|
|
915
924
|
# Or combine them:
|
916
925
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
917
926
|
#
|
918
|
-
# In case of a
|
927
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
919
928
|
#
|
920
929
|
# - "1i" # length 1
|
921
930
|
# - "10i" # length 10
|
@@ -926,7 +935,7 @@ module Polars
|
|
926
935
|
# This column must be sorted in ascending order. If not the output will not
|
927
936
|
# make sense.
|
928
937
|
#
|
929
|
-
# In case of a rolling
|
938
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
930
939
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
931
940
|
# performance matters use an `:i64` column.
|
932
941
|
# @param period [Object]
|
@@ -958,7 +967,7 @@ module Polars
|
|
958
967
|
# df = Polars::LazyFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
959
968
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
960
969
|
# )
|
961
|
-
# df.
|
970
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
962
971
|
# [
|
963
972
|
# Polars.sum("a").alias("sum_a"),
|
964
973
|
# Polars.min("a").alias("min_a"),
|
@@ -979,7 +988,7 @@ module Polars
|
|
979
988
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
980
989
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
981
990
|
# # └─────────────────────┴───────┴───────┴───────┘
|
982
|
-
def
|
991
|
+
def group_by_rolling(
|
983
992
|
index_column:,
|
984
993
|
period:,
|
985
994
|
offset: nil,
|
@@ -987,7 +996,7 @@ module Polars
|
|
987
996
|
by: nil,
|
988
997
|
check_sorted: true
|
989
998
|
)
|
990
|
-
index_column = Utils.
|
999
|
+
index_column = Utils.parse_as_expression(index_column)
|
991
1000
|
if offset.nil?
|
992
1001
|
offset = "-#{period}"
|
993
1002
|
end
|
@@ -996,16 +1005,17 @@ module Polars
|
|
996
1005
|
period = Utils._timedelta_to_pl_duration(period)
|
997
1006
|
offset = Utils._timedelta_to_pl_duration(offset)
|
998
1007
|
|
999
|
-
lgb = _ldf.
|
1000
|
-
index_column
|
1008
|
+
lgb = _ldf.group_by_rolling(
|
1009
|
+
index_column, period, offset, closed, rbexprs_by, check_sorted
|
1001
1010
|
)
|
1002
|
-
LazyGroupBy.new(lgb
|
1011
|
+
LazyGroupBy.new(lgb)
|
1003
1012
|
end
|
1013
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1004
1014
|
|
1005
1015
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1006
1016
|
#
|
1007
1017
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1008
|
-
# normal
|
1018
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1009
1019
|
# window could be seen as a rolling window, with a window size determined by
|
1010
1020
|
# dates/times/values instead of slots in the DataFrame.
|
1011
1021
|
#
|
@@ -1033,37 +1043,43 @@ module Polars
|
|
1033
1043
|
# Or combine them:
|
1034
1044
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1035
1045
|
#
|
1036
|
-
# In case of a
|
1046
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1037
1047
|
#
|
1038
1048
|
# - "1i" # length 1
|
1039
1049
|
# - "10i" # length 10
|
1040
1050
|
#
|
1041
|
-
# @param index_column
|
1051
|
+
# @param index_column [Object]
|
1042
1052
|
# Column used to group based on the time window.
|
1043
1053
|
# Often to type Date/Datetime
|
1044
1054
|
# This column must be sorted in ascending order. If not the output will not
|
1045
1055
|
# make sense.
|
1046
1056
|
#
|
1047
|
-
# In case of a dynamic
|
1057
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
1048
1058
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1049
1059
|
# performance matters use an `:i64` column.
|
1050
|
-
# @param every
|
1060
|
+
# @param every [Object]
|
1051
1061
|
# Interval of the window.
|
1052
|
-
# @param period
|
1062
|
+
# @param period [Object]
|
1053
1063
|
# Length of the window, if None it is equal to 'every'.
|
1054
|
-
# @param offset
|
1064
|
+
# @param offset [Object]
|
1055
1065
|
# Offset of the window if None and period is None it will be equal to negative
|
1056
1066
|
# `every`.
|
1057
|
-
# @param truncate
|
1067
|
+
# @param truncate [Boolean]
|
1058
1068
|
# Truncate the time value to the window lower bound.
|
1059
|
-
# @param include_boundaries
|
1069
|
+
# @param include_boundaries [Boolean]
|
1060
1070
|
# Add the lower and upper bound of the window to the "_lower_bound" and
|
1061
1071
|
# "_upper_bound" columns. This will impact performance because it's harder to
|
1062
1072
|
# parallelize
|
1063
1073
|
# @param closed ["right", "left", "both", "none"]
|
1064
1074
|
# Define whether the temporal window interval is closed or not.
|
1065
|
-
# @param by
|
1075
|
+
# @param by [Object]
|
1066
1076
|
# Also group by this column/these columns
|
1077
|
+
# @param check_sorted [Boolean]
|
1078
|
+
# When the `by` argument is given, polars can not check sortedness
|
1079
|
+
# by the metadata and has to do a full scan on the index column to
|
1080
|
+
# verify data is sorted. This is expensive. If you are sure the
|
1081
|
+
# data within the by groups is sorted, you can set this to `false`.
|
1082
|
+
# Doing so incorrectly will lead to incorrect output.
|
1067
1083
|
#
|
1068
1084
|
# @return [DataFrame]
|
1069
1085
|
#
|
@@ -1095,7 +1111,7 @@ module Polars
|
|
1095
1111
|
# # └─────────────────────┴─────┘
|
1096
1112
|
#
|
1097
1113
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
1098
|
-
# df.
|
1114
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
1099
1115
|
# [
|
1100
1116
|
# Polars.col("time").min.alias("time_min"),
|
1101
1117
|
# Polars.col("time").max.alias("time_max")
|
@@ -1115,7 +1131,7 @@ module Polars
|
|
1115
1131
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
1116
1132
|
#
|
1117
1133
|
# @example The window boundaries can also be added to the aggregation result.
|
1118
|
-
# df.
|
1134
|
+
# df.group_by_dynamic(
|
1119
1135
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
1120
1136
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
1121
1137
|
# # =>
|
@@ -1132,7 +1148,7 @@ module Polars
|
|
1132
1148
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
1133
1149
|
#
|
1134
1150
|
# @example When closed="left", should not include right end of interval.
|
1135
|
-
# df.
|
1151
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
1136
1152
|
# [
|
1137
1153
|
# Polars.col("time").count.alias("time_count"),
|
1138
1154
|
# Polars.col("time").alias("time_agg_list")
|
@@ -1152,7 +1168,7 @@ module Polars
|
|
1152
1168
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
1153
1169
|
#
|
1154
1170
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1155
|
-
# df.
|
1171
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
1156
1172
|
# [Polars.col("time").count.alias("time_count")]
|
1157
1173
|
# )
|
1158
1174
|
# # =>
|
@@ -1169,7 +1185,7 @@ module Polars
|
|
1169
1185
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
1170
1186
|
# # └─────────────────────┴────────────┘
|
1171
1187
|
#
|
1172
|
-
# @example Dynamic
|
1188
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1173
1189
|
# df = Polars::DataFrame.new(
|
1174
1190
|
# {
|
1175
1191
|
# "time" => Polars.date_range(
|
@@ -1180,7 +1196,7 @@ module Polars
|
|
1180
1196
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1181
1197
|
# }
|
1182
1198
|
# )
|
1183
|
-
# df.
|
1199
|
+
# df.group_by_dynamic(
|
1184
1200
|
# "time",
|
1185
1201
|
# every: "1h",
|
1186
1202
|
# closed: "both",
|
@@ -1203,14 +1219,14 @@ module Polars
|
|
1203
1219
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
1204
1220
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
1205
1221
|
#
|
1206
|
-
# @example Dynamic
|
1222
|
+
# @example Dynamic group by on an index column.
|
1207
1223
|
# df = Polars::DataFrame.new(
|
1208
1224
|
# {
|
1209
1225
|
# "idx" => Polars.arange(0, 6, eager: true),
|
1210
1226
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
1211
1227
|
# }
|
1212
1228
|
# )
|
1213
|
-
# df.
|
1229
|
+
# df.group_by_dynamic(
|
1214
1230
|
# "idx",
|
1215
1231
|
# every: "2i",
|
1216
1232
|
# period: "3i",
|
@@ -1228,17 +1244,23 @@ module Polars
|
|
1228
1244
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
1229
1245
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
1230
1246
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
1231
|
-
def
|
1247
|
+
def group_by_dynamic(
|
1232
1248
|
index_column,
|
1233
1249
|
every:,
|
1234
1250
|
period: nil,
|
1235
1251
|
offset: nil,
|
1236
|
-
truncate:
|
1252
|
+
truncate: nil,
|
1237
1253
|
include_boundaries: false,
|
1238
1254
|
closed: "left",
|
1255
|
+
label: "left",
|
1239
1256
|
by: nil,
|
1240
|
-
start_by: "window"
|
1257
|
+
start_by: "window",
|
1258
|
+
check_sorted: true
|
1241
1259
|
)
|
1260
|
+
if !truncate.nil?
|
1261
|
+
label = truncate ? "left" : "datapoint"
|
1262
|
+
end
|
1263
|
+
|
1242
1264
|
index_column = Utils.expr_to_lit_or_expr(index_column, str_to_lit: false)
|
1243
1265
|
if offset.nil?
|
1244
1266
|
offset = period.nil? ? "-#{every}" : "0ns"
|
@@ -1253,19 +1275,21 @@ module Polars
|
|
1253
1275
|
every = Utils._timedelta_to_pl_duration(every)
|
1254
1276
|
|
1255
1277
|
rbexprs_by = by.nil? ? [] : Utils.selection_to_rbexpr_list(by)
|
1256
|
-
lgb = _ldf.
|
1278
|
+
lgb = _ldf.group_by_dynamic(
|
1257
1279
|
index_column._rbexpr,
|
1258
1280
|
every,
|
1259
1281
|
period,
|
1260
1282
|
offset,
|
1261
|
-
|
1283
|
+
label,
|
1262
1284
|
include_boundaries,
|
1263
1285
|
closed,
|
1264
1286
|
rbexprs_by,
|
1265
|
-
start_by
|
1287
|
+
start_by,
|
1288
|
+
check_sorted
|
1266
1289
|
)
|
1267
|
-
LazyGroupBy.new(lgb
|
1290
|
+
LazyGroupBy.new(lgb)
|
1268
1291
|
end
|
1292
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
1269
1293
|
|
1270
1294
|
# Perform an asof join.
|
1271
1295
|
#
|
@@ -1347,7 +1371,7 @@ module Polars
|
|
1347
1371
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
1348
1372
|
end
|
1349
1373
|
|
1350
|
-
if on.is_a?(String)
|
1374
|
+
if on.is_a?(::String)
|
1351
1375
|
left_on = on
|
1352
1376
|
right_on = on
|
1353
1377
|
end
|
@@ -1356,19 +1380,19 @@ module Polars
|
|
1356
1380
|
raise ArgumentError, "You should pass the column to join on as an argument."
|
1357
1381
|
end
|
1358
1382
|
|
1359
|
-
if by_left.is_a?(String) || by_left.is_a?(Expr)
|
1383
|
+
if by_left.is_a?(::String) || by_left.is_a?(Expr)
|
1360
1384
|
by_left_ = [by_left]
|
1361
1385
|
else
|
1362
1386
|
by_left_ = by_left
|
1363
1387
|
end
|
1364
1388
|
|
1365
|
-
if by_right.is_a?(String) || by_right.is_a?(Expr)
|
1389
|
+
if by_right.is_a?(::String) || by_right.is_a?(Expr)
|
1366
1390
|
by_right_ = [by_right]
|
1367
1391
|
else
|
1368
1392
|
by_right_ = by_right
|
1369
1393
|
end
|
1370
1394
|
|
1371
|
-
if by.is_a?(String)
|
1395
|
+
if by.is_a?(::String)
|
1372
1396
|
by_left_ = [by]
|
1373
1397
|
by_right_ = [by]
|
1374
1398
|
elsif by.is_a?(::Array)
|
@@ -1378,7 +1402,7 @@ module Polars
|
|
1378
1402
|
|
1379
1403
|
tolerance_str = nil
|
1380
1404
|
tolerance_num = nil
|
1381
|
-
if tolerance.is_a?(String)
|
1405
|
+
if tolerance.is_a?(::String)
|
1382
1406
|
tolerance_str = tolerance
|
1383
1407
|
else
|
1384
1408
|
tolerance_num = tolerance
|
@@ -1454,17 +1478,17 @@ module Polars
|
|
1454
1478
|
# @example
|
1455
1479
|
# df.join(other_df, on: "ham", how: "outer").collect
|
1456
1480
|
# # =>
|
1457
|
-
# # shape: (4,
|
1458
|
-
# #
|
1459
|
-
# # │ foo ┆ bar ┆ ham
|
1460
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1461
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
1462
|
-
# #
|
1463
|
-
# # │ 1 ┆ 6.0 ┆ a
|
1464
|
-
# # │ 2 ┆ 7.0 ┆ b
|
1465
|
-
# # │ null ┆ null ┆
|
1466
|
-
# # │ 3 ┆ 8.0 ┆ c
|
1467
|
-
# #
|
1481
|
+
# # shape: (4, 5)
|
1482
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
1483
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
1484
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1485
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
1486
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
1487
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
1488
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
1489
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
1490
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
1491
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
1468
1492
|
#
|
1469
1493
|
# @example
|
1470
1494
|
# df.join(other_df, on: "ham", how: "left").collect
|
@@ -1698,7 +1722,7 @@ module Polars
|
|
1698
1722
|
#
|
1699
1723
|
# @return [LazyFrame]
|
1700
1724
|
def drop(columns)
|
1701
|
-
if columns.is_a?(String)
|
1725
|
+
if columns.is_a?(::String)
|
1702
1726
|
columns = [columns]
|
1703
1727
|
end
|
1704
1728
|
_from_rbldf(_ldf.drop_columns(columns))
|
@@ -1725,8 +1749,10 @@ module Polars
|
|
1725
1749
|
|
1726
1750
|
# Shift the values by a given period.
|
1727
1751
|
#
|
1728
|
-
# @param
|
1752
|
+
# @param n [Integer]
|
1729
1753
|
# Number of places to shift (may be negative).
|
1754
|
+
# @param fill_value [Object]
|
1755
|
+
# Fill the resulting null values with this value.
|
1730
1756
|
#
|
1731
1757
|
# @return [LazyFrame]
|
1732
1758
|
#
|
@@ -1763,8 +1789,12 @@ module Polars
|
|
1763
1789
|
# # │ 5 ┆ 6 │
|
1764
1790
|
# # │ null ┆ null │
|
1765
1791
|
# # └──────┴──────┘
|
1766
|
-
def shift(
|
1767
|
-
|
1792
|
+
def shift(n, fill_value: nil)
|
1793
|
+
if !fill_value.nil?
|
1794
|
+
fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
|
1795
|
+
end
|
1796
|
+
n = Utils.parse_as_expression(n)
|
1797
|
+
_from_rbldf(_ldf.shift(n, fill_value))
|
1768
1798
|
end
|
1769
1799
|
|
1770
1800
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -1810,10 +1840,7 @@ module Polars
|
|
1810
1840
|
# # │ 0 ┆ 0 │
|
1811
1841
|
# # └─────┴─────┘
|
1812
1842
|
def shift_and_fill(periods, fill_value)
|
1813
|
-
|
1814
|
-
fill_value = Polars.lit(fill_value)
|
1815
|
-
end
|
1816
|
-
_from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr))
|
1843
|
+
shift(periods, fill_value: fill_value)
|
1817
1844
|
end
|
1818
1845
|
|
1819
1846
|
# Get a slice of this DataFrame.
|
@@ -2336,10 +2363,10 @@ module Polars
|
|
2336
2363
|
# # │ z ┆ c ┆ 6 │
|
2337
2364
|
# # └─────┴──────────┴───────┘
|
2338
2365
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
|
2339
|
-
if value_vars.is_a?(String)
|
2366
|
+
if value_vars.is_a?(::String)
|
2340
2367
|
value_vars = [value_vars]
|
2341
2368
|
end
|
2342
|
-
if id_vars.is_a?(String)
|
2369
|
+
if id_vars.is_a?(::String)
|
2343
2370
|
id_vars = [id_vars]
|
2344
2371
|
end
|
2345
2372
|
if value_vars.nil?
|
@@ -2371,16 +2398,16 @@ module Polars
|
|
2371
2398
|
# df.interpolate.collect
|
2372
2399
|
# # =>
|
2373
2400
|
# # shape: (4, 3)
|
2374
|
-
# #
|
2375
|
-
# # │ foo
|
2376
|
-
# # │ ---
|
2377
|
-
# # │
|
2378
|
-
# #
|
2379
|
-
# # │ 1
|
2380
|
-
# # │ 5
|
2381
|
-
# # │ 9
|
2382
|
-
# # │ 10
|
2383
|
-
# #
|
2401
|
+
# # ┌──────┬──────┬──────────┐
|
2402
|
+
# # │ foo ┆ bar ┆ baz │
|
2403
|
+
# # │ --- ┆ --- ┆ --- │
|
2404
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
2405
|
+
# # ╞══════╪══════╪══════════╡
|
2406
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
2407
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
2408
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
2409
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
2410
|
+
# # └──────┴──────┴──────────┘
|
2384
2411
|
def interpolate
|
2385
2412
|
select(Utils.col("*").interpolate)
|
2386
2413
|
end
|
@@ -2437,7 +2464,7 @@ module Polars
|
|
2437
2464
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
2438
2465
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
2439
2466
|
def unnest(names)
|
2440
|
-
if names.is_a?(String)
|
2467
|
+
if names.is_a?(::String)
|
2441
2468
|
names = [names]
|
2442
2469
|
end
|
2443
2470
|
_from_rbldf(_ldf.unnest(names))
|