polars-df 0.11.0-x86_64-linux → 0.12.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/LICENSE-THIRD-PARTY.txt +1065 -878
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -42,7 +42,7 @@ module Polars
|
|
42
42
|
if !dtypes.nil?
|
43
43
|
if dtypes.is_a?(Hash)
|
44
44
|
dtype_list = []
|
45
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
46
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
47
47
|
end
|
48
48
|
elsif dtypes.is_a?(::Array)
|
@@ -78,7 +78,7 @@ module Polars
|
|
78
78
|
missing_utf8_is_empty_string,
|
79
79
|
parse_dates,
|
80
80
|
skip_rows_after_header,
|
81
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
82
|
sample_size,
|
83
83
|
eol_char,
|
84
84
|
raise_if_empty,
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -9,42 +9,6 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
# Determine how this categorical series should be sorted.
|
13
|
-
#
|
14
|
-
# @param ordering ["physical", "lexical"]
|
15
|
-
# Ordering type:
|
16
|
-
#
|
17
|
-
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
-
#
|
20
|
-
# @return [Expr]
|
21
|
-
#
|
22
|
-
# @example
|
23
|
-
# df = Polars::DataFrame.new(
|
24
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
-
# ).with_columns(
|
26
|
-
# [
|
27
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
-
# ]
|
29
|
-
# )
|
30
|
-
# df.sort(["cats", "vals"])
|
31
|
-
# # =>
|
32
|
-
# # shape: (5, 2)
|
33
|
-
# # ┌──────┬──────┐
|
34
|
-
# # │ cats ┆ vals │
|
35
|
-
# # │ --- ┆ --- │
|
36
|
-
# # │ cat ┆ i64 │
|
37
|
-
# # ╞══════╪══════╡
|
38
|
-
# # │ a ┆ 2 │
|
39
|
-
# # │ b ┆ 3 │
|
40
|
-
# # │ k ┆ 2 │
|
41
|
-
# # │ z ┆ 1 │
|
42
|
-
# # │ z ┆ 3 │
|
43
|
-
# # └──────┴──────┘
|
44
|
-
def set_ordering(ordering)
|
45
|
-
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
46
|
-
end
|
47
|
-
|
48
12
|
# Get the categories stored in this data type.
|
49
13
|
#
|
50
14
|
# @return [Expr]
|
@@ -10,43 +10,6 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
-
# Determine how this categorical series should be sorted.
|
14
|
-
#
|
15
|
-
# @param ordering ["physical", "lexical"]
|
16
|
-
# Ordering type:
|
17
|
-
#
|
18
|
-
# - 'physical' -> Use the physical representation of the categories to
|
19
|
-
# determine the order (default).
|
20
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
-
#
|
22
|
-
# @return [Series]
|
23
|
-
#
|
24
|
-
# @example
|
25
|
-
# df = Polars::DataFrame.new(
|
26
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
-
# ).with_columns(
|
28
|
-
# [
|
29
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
-
# ]
|
31
|
-
# )
|
32
|
-
# df.sort(["cats", "vals"])
|
33
|
-
# # =>
|
34
|
-
# # shape: (5, 2)
|
35
|
-
# # ┌──────┬──────┐
|
36
|
-
# # │ cats ┆ vals │
|
37
|
-
# # │ --- ┆ --- │
|
38
|
-
# # │ cat ┆ i64 │
|
39
|
-
# # ╞══════╪══════╡
|
40
|
-
# # │ a ┆ 2 │
|
41
|
-
# # │ b ┆ 3 │
|
42
|
-
# # │ k ┆ 2 │
|
43
|
-
# # │ z ┆ 1 │
|
44
|
-
# # │ z ┆ 3 │
|
45
|
-
# # └──────┴──────┘
|
46
|
-
def set_ordering(ordering)
|
47
|
-
super
|
48
|
-
end
|
49
|
-
|
50
13
|
# Get the categories stored in this data type.
|
51
14
|
#
|
52
15
|
# @return [Series]
|
data/lib/polars/data_frame.rb
CHANGED
@@ -622,7 +622,7 @@ module Polars
|
|
622
622
|
# "bar" => [6, 7, 8]
|
623
623
|
# }
|
624
624
|
# )
|
625
|
-
# df.write_ndjson
|
625
|
+
# df.write_ndjson
|
626
626
|
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
627
627
|
def write_ndjson(file = nil)
|
628
628
|
if Utils.pathlike?(file)
|
@@ -883,6 +883,24 @@ module Polars
|
|
883
883
|
file = Utils.normalize_filepath(file)
|
884
884
|
end
|
885
885
|
|
886
|
+
if statistics == true
|
887
|
+
statistics = {
|
888
|
+
min: true,
|
889
|
+
max: true,
|
890
|
+
distinct_count: false,
|
891
|
+
null_count: true
|
892
|
+
}
|
893
|
+
elsif statistics == false
|
894
|
+
statistics = {}
|
895
|
+
elsif statistics == "full"
|
896
|
+
statistics = {
|
897
|
+
min: true,
|
898
|
+
max: true,
|
899
|
+
distinct_count: true,
|
900
|
+
null_count: true
|
901
|
+
}
|
902
|
+
end
|
903
|
+
|
886
904
|
_df.write_parquet(
|
887
905
|
file, compression, compression_level, statistics, row_group_size, data_page_size
|
888
906
|
)
|
@@ -1724,12 +1742,6 @@ module Polars
|
|
1724
1742
|
# Define whether the temporal window interval is closed or not.
|
1725
1743
|
# @param by [Object]
|
1726
1744
|
# Also group by this column/these columns.
|
1727
|
-
# @param check_sorted [Boolean]
|
1728
|
-
# When the `by` argument is given, polars can not check sortedness
|
1729
|
-
# by the metadata and has to do a full scan on the index column to
|
1730
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1731
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1732
|
-
# Doing so incorrectly will lead to incorrect output
|
1733
1745
|
#
|
1734
1746
|
# @return [RollingGroupBy]
|
1735
1747
|
#
|
@@ -1745,7 +1757,7 @@ module Polars
|
|
1745
1757
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1746
1758
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1747
1759
|
# )
|
1748
|
-
# df.
|
1760
|
+
# df.rolling(index_column: "dt", period: "2d").agg(
|
1749
1761
|
# [
|
1750
1762
|
# Polars.sum("a").alias("sum_a"),
|
1751
1763
|
# Polars.min("a").alias("min_a"),
|
@@ -1766,17 +1778,17 @@ module Polars
|
|
1766
1778
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1767
1779
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1768
1780
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1769
|
-
def
|
1781
|
+
def rolling(
|
1770
1782
|
index_column:,
|
1771
1783
|
period:,
|
1772
1784
|
offset: nil,
|
1773
1785
|
closed: "right",
|
1774
|
-
by: nil
|
1775
|
-
check_sorted: true
|
1786
|
+
by: nil
|
1776
1787
|
)
|
1777
|
-
RollingGroupBy.new(self, index_column, period, offset, closed, by
|
1788
|
+
RollingGroupBy.new(self, index_column, period, offset, closed, by)
|
1778
1789
|
end
|
1779
|
-
alias_method :groupby_rolling, :
|
1790
|
+
alias_method :groupby_rolling, :rolling
|
1791
|
+
alias_method :group_by_rolling, :rolling
|
1780
1792
|
|
1781
1793
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1782
1794
|
#
|
@@ -1846,10 +1858,12 @@ module Polars
|
|
1846
1858
|
# @example
|
1847
1859
|
# df = Polars::DataFrame.new(
|
1848
1860
|
# {
|
1849
|
-
# "time" => Polars.
|
1861
|
+
# "time" => Polars.datetime_range(
|
1850
1862
|
# DateTime.new(2021, 12, 16),
|
1851
1863
|
# DateTime.new(2021, 12, 16, 3),
|
1852
|
-
# "30m"
|
1864
|
+
# "30m",
|
1865
|
+
# time_unit: "us",
|
1866
|
+
# eager: true
|
1853
1867
|
# ),
|
1854
1868
|
# "n" => 0..6
|
1855
1869
|
# }
|
@@ -1948,10 +1962,12 @@ module Polars
|
|
1948
1962
|
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1949
1963
|
# df = Polars::DataFrame.new(
|
1950
1964
|
# {
|
1951
|
-
# "time" => Polars.
|
1965
|
+
# "time" => Polars.datetime_range(
|
1952
1966
|
# DateTime.new(2021, 12, 16),
|
1953
1967
|
# DateTime.new(2021, 12, 16, 3),
|
1954
|
-
# "30m"
|
1968
|
+
# "30m",
|
1969
|
+
# time_unit: "us",
|
1970
|
+
# eager: true
|
1955
1971
|
# ),
|
1956
1972
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1957
1973
|
# }
|
@@ -2038,8 +2054,6 @@ module Polars
|
|
2038
2054
|
# Note that this column has to be sorted for the output to make sense.
|
2039
2055
|
# @param every [String]
|
2040
2056
|
# interval will start 'every' duration
|
2041
|
-
# @param offset [String]
|
2042
|
-
# change the start of the date_range by this offset.
|
2043
2057
|
# @param by [Object]
|
2044
2058
|
# First group by these columns and then upsample for every group
|
2045
2059
|
# @param maintain_order [Boolean]
|
@@ -2099,7 +2113,6 @@ module Polars
|
|
2099
2113
|
def upsample(
|
2100
2114
|
time_column:,
|
2101
2115
|
every:,
|
2102
|
-
offset: nil,
|
2103
2116
|
by: nil,
|
2104
2117
|
maintain_order: false
|
2105
2118
|
)
|
@@ -2109,15 +2122,11 @@ module Polars
|
|
2109
2122
|
if by.is_a?(::String)
|
2110
2123
|
by = [by]
|
2111
2124
|
end
|
2112
|
-
if offset.nil?
|
2113
|
-
offset = "0ns"
|
2114
|
-
end
|
2115
2125
|
|
2116
|
-
every = Utils.
|
2117
|
-
offset = Utils._timedelta_to_pl_duration(offset)
|
2126
|
+
every = Utils.parse_as_duration_string(every)
|
2118
2127
|
|
2119
2128
|
_from_rbdf(
|
2120
|
-
_df.upsample(by, time_column, every,
|
2129
|
+
_df.upsample(by, time_column, every, maintain_order)
|
2121
2130
|
)
|
2122
2131
|
end
|
2123
2132
|
|
@@ -2264,7 +2273,7 @@ module Polars
|
|
2264
2273
|
# Name(s) of the right join column(s).
|
2265
2274
|
# @param on [Object]
|
2266
2275
|
# Name(s) of the join columns in both DataFrames.
|
2267
|
-
# @param how ["inner", "left", "
|
2276
|
+
# @param how ["inner", "left", "full", "semi", "anti", "cross"]
|
2268
2277
|
# Join strategy.
|
2269
2278
|
# @param suffix [String]
|
2270
2279
|
# Suffix to append to columns with a duplicate name.
|
@@ -2300,7 +2309,7 @@ module Polars
|
|
2300
2309
|
# # └─────┴─────┴─────┴───────┘
|
2301
2310
|
#
|
2302
2311
|
# @example
|
2303
|
-
# df.join(other_df, on: "ham", how: "
|
2312
|
+
# df.join(other_df, on: "ham", how: "full")
|
2304
2313
|
# # =>
|
2305
2314
|
# # shape: (4, 5)
|
2306
2315
|
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
@@ -2957,9 +2966,9 @@ module Polars
|
|
2957
2966
|
# arguments contains multiple columns as well
|
2958
2967
|
# @param index [Object]
|
2959
2968
|
# One or multiple keys to group by
|
2960
|
-
# @param
|
2969
|
+
# @param on [Object]
|
2961
2970
|
# Columns whose values will be used as the header of the output DataFrame
|
2962
|
-
# @param
|
2971
|
+
# @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
|
2963
2972
|
# A predefined aggregate function str or an expression.
|
2964
2973
|
# @param maintain_order [Object]
|
2965
2974
|
# Sort the grouped keys so that the output order is predictable.
|
@@ -2971,66 +2980,62 @@ module Polars
|
|
2971
2980
|
# @example
|
2972
2981
|
# df = Polars::DataFrame.new(
|
2973
2982
|
# {
|
2974
|
-
# "foo" => ["one", "one", "
|
2975
|
-
# "bar" => ["
|
2983
|
+
# "foo" => ["one", "one", "two", "two", "one", "two"],
|
2984
|
+
# "bar" => ["y", "y", "y", "x", "x", "x"],
|
2976
2985
|
# "baz" => [1, 2, 3, 4, 5, 6]
|
2977
2986
|
# }
|
2978
2987
|
# )
|
2979
|
-
# df.pivot(
|
2988
|
+
# df.pivot("bar", index: "foo", values: "baz", aggregate_function: "sum")
|
2980
2989
|
# # =>
|
2981
|
-
# # shape: (2,
|
2982
|
-
# #
|
2983
|
-
# # │ foo ┆
|
2984
|
-
# # │ --- ┆ --- ┆ ---
|
2985
|
-
# # │ str ┆ i64 ┆ i64
|
2986
|
-
# #
|
2987
|
-
# # │ one ┆
|
2988
|
-
# # │ two ┆
|
2989
|
-
# #
|
2990
|
+
# # shape: (2, 3)
|
2991
|
+
# # ┌─────┬─────┬─────┐
|
2992
|
+
# # │ foo ┆ y ┆ x │
|
2993
|
+
# # │ --- ┆ --- ┆ --- │
|
2994
|
+
# # │ str ┆ i64 ┆ i64 │
|
2995
|
+
# # ╞═════╪═════╪═════╡
|
2996
|
+
# # │ one ┆ 3 ┆ 5 │
|
2997
|
+
# # │ two ┆ 3 ┆ 10 │
|
2998
|
+
# # └─────┴─────┴─────┘
|
2990
2999
|
def pivot(
|
2991
|
-
|
2992
|
-
index
|
2993
|
-
|
2994
|
-
|
3000
|
+
on,
|
3001
|
+
index: nil,
|
3002
|
+
values: nil,
|
3003
|
+
aggregate_function: nil,
|
2995
3004
|
maintain_order: true,
|
2996
3005
|
sort_columns: false,
|
2997
3006
|
separator: "_"
|
2998
3007
|
)
|
2999
|
-
|
3000
|
-
|
3001
|
-
|
3002
|
-
|
3003
|
-
index = [index]
|
3004
|
-
end
|
3005
|
-
if columns.is_a?(::String)
|
3006
|
-
columns = [columns]
|
3008
|
+
index = Utils._expand_selectors(self, index)
|
3009
|
+
on = Utils._expand_selectors(self, on)
|
3010
|
+
if !values.nil?
|
3011
|
+
values = Utils._expand_selectors(self, values)
|
3007
3012
|
end
|
3008
3013
|
|
3009
|
-
if
|
3010
|
-
case
|
3014
|
+
if aggregate_function.is_a?(::String)
|
3015
|
+
case aggregate_function
|
3011
3016
|
when "first"
|
3012
|
-
aggregate_expr =
|
3017
|
+
aggregate_expr = F.element.first._rbexpr
|
3013
3018
|
when "sum"
|
3014
|
-
aggregate_expr =
|
3019
|
+
aggregate_expr = F.element.sum._rbexpr
|
3015
3020
|
when "max"
|
3016
|
-
aggregate_expr =
|
3021
|
+
aggregate_expr = F.element.max._rbexpr
|
3017
3022
|
when "min"
|
3018
|
-
aggregate_expr =
|
3023
|
+
aggregate_expr = F.element.min._rbexpr
|
3019
3024
|
when "mean"
|
3020
|
-
aggregate_expr =
|
3025
|
+
aggregate_expr = F.element.mean._rbexpr
|
3021
3026
|
when "median"
|
3022
|
-
aggregate_expr =
|
3027
|
+
aggregate_expr = F.element.median._rbexpr
|
3023
3028
|
when "last"
|
3024
|
-
aggregate_expr =
|
3029
|
+
aggregate_expr = F.element.last._rbexpr
|
3025
3030
|
when "len"
|
3026
|
-
aggregate_expr =
|
3031
|
+
aggregate_expr = F.len._rbexpr
|
3027
3032
|
when "count"
|
3028
3033
|
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3029
|
-
aggregate_expr =
|
3034
|
+
aggregate_expr = F.len._rbexpr
|
3030
3035
|
else
|
3031
3036
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3032
3037
|
end
|
3033
|
-
elsif
|
3038
|
+
elsif aggregate_function.nil?
|
3034
3039
|
aggregate_expr = nil
|
3035
3040
|
else
|
3036
3041
|
aggregate_expr = aggregate_function._rbexpr
|
@@ -3038,8 +3043,8 @@ module Polars
|
|
3038
3043
|
|
3039
3044
|
_from_rbdf(
|
3040
3045
|
_df.pivot_expr(
|
3046
|
+
on,
|
3041
3047
|
index,
|
3042
|
-
columns,
|
3043
3048
|
values,
|
3044
3049
|
maintain_order,
|
3045
3050
|
sort_columns,
|
@@ -3054,18 +3059,18 @@ module Polars
|
|
3054
3059
|
# Optionally leaves identifiers set.
|
3055
3060
|
#
|
3056
3061
|
# This function is useful to massage a DataFrame into a format where one or more
|
3057
|
-
# columns are identifier variables (
|
3058
|
-
# measured variables (
|
3062
|
+
# columns are identifier variables (index) while all other columns, considered
|
3063
|
+
# measured variables (on), are "unpivoted" to the row axis leaving just
|
3059
3064
|
# two non-identifier columns, 'variable' and 'value'.
|
3060
3065
|
#
|
3061
|
-
# @param
|
3062
|
-
#
|
3063
|
-
#
|
3064
|
-
#
|
3065
|
-
#
|
3066
|
-
# @param variable_name [
|
3067
|
-
# Name to give to the `
|
3068
|
-
# @param value_name [
|
3066
|
+
# @param on [Object]
|
3067
|
+
# Column(s) or selector(s) to use as values variables; if `on`
|
3068
|
+
# is empty all columns that are not in `index` will be used.
|
3069
|
+
# @param index [Object]
|
3070
|
+
# Column(s) or selector(s) to use as identifier variables.
|
3071
|
+
# @param variable_name [Object]
|
3072
|
+
# Name to give to the `variable` column. Defaults to "variable"
|
3073
|
+
# @param value_name [Object]
|
3069
3074
|
# Name to give to the `value` column. Defaults to "value"
|
3070
3075
|
#
|
3071
3076
|
# @return [DataFrame]
|
@@ -3078,7 +3083,7 @@ module Polars
|
|
3078
3083
|
# "c" => [2, 4, 6]
|
3079
3084
|
# }
|
3080
3085
|
# )
|
3081
|
-
# df.
|
3086
|
+
# df.unpivot(Polars::Selectors.numeric, index: "a")
|
3082
3087
|
# # =>
|
3083
3088
|
# # shape: (6, 3)
|
3084
3089
|
# # ┌─────┬──────────┬───────┐
|
@@ -3093,23 +3098,13 @@ module Polars
|
|
3093
3098
|
# # │ y ┆ c ┆ 4 │
|
3094
3099
|
# # │ z ┆ c ┆ 6 │
|
3095
3100
|
# # └─────┴──────────┴───────┘
|
3096
|
-
def
|
3097
|
-
|
3098
|
-
|
3099
|
-
|
3100
|
-
|
3101
|
-
id_vars = [id_vars]
|
3102
|
-
end
|
3103
|
-
if value_vars.nil?
|
3104
|
-
value_vars = []
|
3105
|
-
end
|
3106
|
-
if id_vars.nil?
|
3107
|
-
id_vars = []
|
3108
|
-
end
|
3109
|
-
_from_rbdf(
|
3110
|
-
_df.melt(id_vars, value_vars, value_name, variable_name)
|
3111
|
-
)
|
3101
|
+
def unpivot(on, index: nil, variable_name: nil, value_name: nil)
|
3102
|
+
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
3103
|
+
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
3104
|
+
|
3105
|
+
_from_rbdf(_df.unpivot(on, index, value_name, variable_name))
|
3112
3106
|
end
|
3107
|
+
alias_method :melt, :unpivot
|
3113
3108
|
|
3114
3109
|
# Unstack a long table to a wide form without doing an aggregation.
|
3115
3110
|
#
|
@@ -4143,7 +4138,7 @@ module Polars
|
|
4143
4138
|
end
|
4144
4139
|
|
4145
4140
|
if subset.is_a?(::Array) && subset.length == 1
|
4146
|
-
expr = Utils.
|
4141
|
+
expr = Utils.wrap_expr(Utils.parse_into_expression(subset[0], str_as_lit: false))
|
4147
4142
|
else
|
4148
4143
|
struct_fields = subset.nil? ? Polars.all : subset
|
4149
4144
|
expr = Polars.struct(struct_fields)
|
@@ -4561,7 +4556,7 @@ module Polars
|
|
4561
4556
|
# # │ 3 ┆ 7 │
|
4562
4557
|
# # └─────┴─────┘
|
4563
4558
|
def gather_every(n, offset = 0)
|
4564
|
-
select(
|
4559
|
+
select(F.col("*").gather_every(n, offset))
|
4565
4560
|
end
|
4566
4561
|
alias_method :take_every, :gather_every
|
4567
4562
|
|
@@ -4631,7 +4626,7 @@ module Polars
|
|
4631
4626
|
# # │ 10.0 ┆ null ┆ 9.0 │
|
4632
4627
|
# # └──────┴──────┴──────────┘
|
4633
4628
|
def interpolate
|
4634
|
-
select(
|
4629
|
+
select(F.col("*").interpolate)
|
4635
4630
|
end
|
4636
4631
|
|
4637
4632
|
# Check if the dataframe is empty.
|
@@ -4767,19 +4762,16 @@ module Polars
|
|
4767
4762
|
#
|
4768
4763
|
# @param column [Object]
|
4769
4764
|
# Columns that are sorted
|
4770
|
-
# @param more_columns [Object]
|
4771
|
-
# Additional columns that are sorted, specified as positional arguments.
|
4772
4765
|
# @param descending [Boolean]
|
4773
4766
|
# Whether the columns are sorted in descending order.
|
4774
4767
|
#
|
4775
4768
|
# @return [DataFrame]
|
4776
4769
|
def set_sorted(
|
4777
4770
|
column,
|
4778
|
-
*more_columns,
|
4779
4771
|
descending: false
|
4780
4772
|
)
|
4781
4773
|
lazy
|
4782
|
-
.set_sorted(column,
|
4774
|
+
.set_sorted(column, descending: descending)
|
4783
4775
|
.collect(no_optimization: true)
|
4784
4776
|
end
|
4785
4777
|
|