polars-df 0.11.0-x86_64-linux-musl → 0.13.0-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/Cargo.lock +428 -450
- data/LICENSE-THIRD-PARTY.txt +2502 -2242
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +35 -7
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +15 -8
- data/lib/polars/lazy_frame.rb +123 -105
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +108 -191
- data/lib/polars/string_expr.rb +51 -76
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -42,7 +42,7 @@ module Polars
|
|
42
42
|
if !dtypes.nil?
|
43
43
|
if dtypes.is_a?(Hash)
|
44
44
|
dtype_list = []
|
45
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
46
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
47
47
|
end
|
48
48
|
elsif dtypes.is_a?(::Array)
|
@@ -78,7 +78,7 @@ module Polars
|
|
78
78
|
missing_utf8_is_empty_string,
|
79
79
|
parse_dates,
|
80
80
|
skip_rows_after_header,
|
81
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
82
|
sample_size,
|
83
83
|
eol_char,
|
84
84
|
raise_if_empty,
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -9,42 +9,6 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
# Determine how this categorical series should be sorted.
|
13
|
-
#
|
14
|
-
# @param ordering ["physical", "lexical"]
|
15
|
-
# Ordering type:
|
16
|
-
#
|
17
|
-
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
-
#
|
20
|
-
# @return [Expr]
|
21
|
-
#
|
22
|
-
# @example
|
23
|
-
# df = Polars::DataFrame.new(
|
24
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
-
# ).with_columns(
|
26
|
-
# [
|
27
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
-
# ]
|
29
|
-
# )
|
30
|
-
# df.sort(["cats", "vals"])
|
31
|
-
# # =>
|
32
|
-
# # shape: (5, 2)
|
33
|
-
# # ┌──────┬──────┐
|
34
|
-
# # │ cats ┆ vals │
|
35
|
-
# # │ --- ┆ --- │
|
36
|
-
# # │ cat ┆ i64 │
|
37
|
-
# # ╞══════╪══════╡
|
38
|
-
# # │ a ┆ 2 │
|
39
|
-
# # │ b ┆ 3 │
|
40
|
-
# # │ k ┆ 2 │
|
41
|
-
# # │ z ┆ 1 │
|
42
|
-
# # │ z ┆ 3 │
|
43
|
-
# # └──────┴──────┘
|
44
|
-
def set_ordering(ordering)
|
45
|
-
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
46
|
-
end
|
47
|
-
|
48
12
|
# Get the categories stored in this data type.
|
49
13
|
#
|
50
14
|
# @return [Expr]
|
@@ -10,43 +10,6 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
-
# Determine how this categorical series should be sorted.
|
14
|
-
#
|
15
|
-
# @param ordering ["physical", "lexical"]
|
16
|
-
# Ordering type:
|
17
|
-
#
|
18
|
-
# - 'physical' -> Use the physical representation of the categories to
|
19
|
-
# determine the order (default).
|
20
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
-
#
|
22
|
-
# @return [Series]
|
23
|
-
#
|
24
|
-
# @example
|
25
|
-
# df = Polars::DataFrame.new(
|
26
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
-
# ).with_columns(
|
28
|
-
# [
|
29
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
-
# ]
|
31
|
-
# )
|
32
|
-
# df.sort(["cats", "vals"])
|
33
|
-
# # =>
|
34
|
-
# # shape: (5, 2)
|
35
|
-
# # ┌──────┬──────┐
|
36
|
-
# # │ cats ┆ vals │
|
37
|
-
# # │ --- ┆ --- │
|
38
|
-
# # │ cat ┆ i64 │
|
39
|
-
# # ╞══════╪══════╡
|
40
|
-
# # │ a ┆ 2 │
|
41
|
-
# # │ b ┆ 3 │
|
42
|
-
# # │ k ┆ 2 │
|
43
|
-
# # │ z ┆ 1 │
|
44
|
-
# # │ z ┆ 3 │
|
45
|
-
# # └──────┴──────┘
|
46
|
-
def set_ordering(ordering)
|
47
|
-
super
|
48
|
-
end
|
49
|
-
|
50
13
|
# Get the categories stored in this data type.
|
51
14
|
#
|
52
15
|
# @return [Series]
|
data/lib/polars/data_frame.rb
CHANGED
@@ -622,7 +622,7 @@ module Polars
|
|
622
622
|
# "bar" => [6, 7, 8]
|
623
623
|
# }
|
624
624
|
# )
|
625
|
-
# df.write_ndjson
|
625
|
+
# df.write_ndjson
|
626
626
|
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
627
627
|
def write_ndjson(file = nil)
|
628
628
|
if Utils.pathlike?(file)
|
@@ -883,6 +883,24 @@ module Polars
|
|
883
883
|
file = Utils.normalize_filepath(file)
|
884
884
|
end
|
885
885
|
|
886
|
+
if statistics == true
|
887
|
+
statistics = {
|
888
|
+
min: true,
|
889
|
+
max: true,
|
890
|
+
distinct_count: false,
|
891
|
+
null_count: true
|
892
|
+
}
|
893
|
+
elsif statistics == false
|
894
|
+
statistics = {}
|
895
|
+
elsif statistics == "full"
|
896
|
+
statistics = {
|
897
|
+
min: true,
|
898
|
+
max: true,
|
899
|
+
distinct_count: true,
|
900
|
+
null_count: true
|
901
|
+
}
|
902
|
+
end
|
903
|
+
|
886
904
|
_df.write_parquet(
|
887
905
|
file, compression, compression_level, statistics, row_group_size, data_page_size
|
888
906
|
)
|
@@ -1724,12 +1742,6 @@ module Polars
|
|
1724
1742
|
# Define whether the temporal window interval is closed or not.
|
1725
1743
|
# @param by [Object]
|
1726
1744
|
# Also group by this column/these columns.
|
1727
|
-
# @param check_sorted [Boolean]
|
1728
|
-
# When the `by` argument is given, polars can not check sortedness
|
1729
|
-
# by the metadata and has to do a full scan on the index column to
|
1730
|
-
# verify data is sorted. This is expensive. If you are sure the
|
1731
|
-
# data within the by groups is sorted, you can set this to `false`.
|
1732
|
-
# Doing so incorrectly will lead to incorrect output
|
1733
1745
|
#
|
1734
1746
|
# @return [RollingGroupBy]
|
1735
1747
|
#
|
@@ -1745,7 +1757,7 @@ module Polars
|
|
1745
1757
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1746
1758
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1747
1759
|
# )
|
1748
|
-
# df.
|
1760
|
+
# df.rolling(index_column: "dt", period: "2d").agg(
|
1749
1761
|
# [
|
1750
1762
|
# Polars.sum("a").alias("sum_a"),
|
1751
1763
|
# Polars.min("a").alias("min_a"),
|
@@ -1766,17 +1778,17 @@ module Polars
|
|
1766
1778
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1767
1779
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1768
1780
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1769
|
-
def
|
1781
|
+
def rolling(
|
1770
1782
|
index_column:,
|
1771
1783
|
period:,
|
1772
1784
|
offset: nil,
|
1773
1785
|
closed: "right",
|
1774
|
-
by: nil
|
1775
|
-
check_sorted: true
|
1786
|
+
by: nil
|
1776
1787
|
)
|
1777
|
-
RollingGroupBy.new(self, index_column, period, offset, closed, by
|
1788
|
+
RollingGroupBy.new(self, index_column, period, offset, closed, by)
|
1778
1789
|
end
|
1779
|
-
alias_method :groupby_rolling, :
|
1790
|
+
alias_method :groupby_rolling, :rolling
|
1791
|
+
alias_method :group_by_rolling, :rolling
|
1780
1792
|
|
1781
1793
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1782
1794
|
#
|
@@ -1846,10 +1858,12 @@ module Polars
|
|
1846
1858
|
# @example
|
1847
1859
|
# df = Polars::DataFrame.new(
|
1848
1860
|
# {
|
1849
|
-
# "time" => Polars.
|
1861
|
+
# "time" => Polars.datetime_range(
|
1850
1862
|
# DateTime.new(2021, 12, 16),
|
1851
1863
|
# DateTime.new(2021, 12, 16, 3),
|
1852
|
-
# "30m"
|
1864
|
+
# "30m",
|
1865
|
+
# time_unit: "us",
|
1866
|
+
# eager: true
|
1853
1867
|
# ),
|
1854
1868
|
# "n" => 0..6
|
1855
1869
|
# }
|
@@ -1948,10 +1962,12 @@ module Polars
|
|
1948
1962
|
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
1949
1963
|
# df = Polars::DataFrame.new(
|
1950
1964
|
# {
|
1951
|
-
# "time" => Polars.
|
1965
|
+
# "time" => Polars.datetime_range(
|
1952
1966
|
# DateTime.new(2021, 12, 16),
|
1953
1967
|
# DateTime.new(2021, 12, 16, 3),
|
1954
|
-
# "30m"
|
1968
|
+
# "30m",
|
1969
|
+
# time_unit: "us",
|
1970
|
+
# eager: true
|
1955
1971
|
# ),
|
1956
1972
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
1957
1973
|
# }
|
@@ -2038,8 +2054,6 @@ module Polars
|
|
2038
2054
|
# Note that this column has to be sorted for the output to make sense.
|
2039
2055
|
# @param every [String]
|
2040
2056
|
# interval will start 'every' duration
|
2041
|
-
# @param offset [String]
|
2042
|
-
# change the start of the date_range by this offset.
|
2043
2057
|
# @param by [Object]
|
2044
2058
|
# First group by these columns and then upsample for every group
|
2045
2059
|
# @param maintain_order [Boolean]
|
@@ -2099,7 +2113,6 @@ module Polars
|
|
2099
2113
|
def upsample(
|
2100
2114
|
time_column:,
|
2101
2115
|
every:,
|
2102
|
-
offset: nil,
|
2103
2116
|
by: nil,
|
2104
2117
|
maintain_order: false
|
2105
2118
|
)
|
@@ -2109,15 +2122,11 @@ module Polars
|
|
2109
2122
|
if by.is_a?(::String)
|
2110
2123
|
by = [by]
|
2111
2124
|
end
|
2112
|
-
if offset.nil?
|
2113
|
-
offset = "0ns"
|
2114
|
-
end
|
2115
2125
|
|
2116
|
-
every = Utils.
|
2117
|
-
offset = Utils._timedelta_to_pl_duration(offset)
|
2126
|
+
every = Utils.parse_as_duration_string(every)
|
2118
2127
|
|
2119
2128
|
_from_rbdf(
|
2120
|
-
_df.upsample(by, time_column, every,
|
2129
|
+
_df.upsample(by, time_column, every, maintain_order)
|
2121
2130
|
)
|
2122
2131
|
end
|
2123
2132
|
|
@@ -2264,7 +2273,7 @@ module Polars
|
|
2264
2273
|
# Name(s) of the right join column(s).
|
2265
2274
|
# @param on [Object]
|
2266
2275
|
# Name(s) of the join columns in both DataFrames.
|
2267
|
-
# @param how ["inner", "left", "
|
2276
|
+
# @param how ["inner", "left", "full", "semi", "anti", "cross"]
|
2268
2277
|
# Join strategy.
|
2269
2278
|
# @param suffix [String]
|
2270
2279
|
# Suffix to append to columns with a duplicate name.
|
@@ -2300,7 +2309,7 @@ module Polars
|
|
2300
2309
|
# # └─────┴─────┴─────┴───────┘
|
2301
2310
|
#
|
2302
2311
|
# @example
|
2303
|
-
# df.join(other_df, on: "ham", how: "
|
2312
|
+
# df.join(other_df, on: "ham", how: "full")
|
2304
2313
|
# # =>
|
2305
2314
|
# # shape: (4, 5)
|
2306
2315
|
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
@@ -2957,9 +2966,9 @@ module Polars
|
|
2957
2966
|
# arguments contains multiple columns as well
|
2958
2967
|
# @param index [Object]
|
2959
2968
|
# One or multiple keys to group by
|
2960
|
-
# @param
|
2969
|
+
# @param on [Object]
|
2961
2970
|
# Columns whose values will be used as the header of the output DataFrame
|
2962
|
-
# @param
|
2971
|
+
# @param aggregate_function ["first", "sum", "max", "min", "mean", "median", "last", "count"]
|
2963
2972
|
# A predefined aggregate function str or an expression.
|
2964
2973
|
# @param maintain_order [Object]
|
2965
2974
|
# Sort the grouped keys so that the output order is predictable.
|
@@ -2971,66 +2980,62 @@ module Polars
|
|
2971
2980
|
# @example
|
2972
2981
|
# df = Polars::DataFrame.new(
|
2973
2982
|
# {
|
2974
|
-
# "foo" => ["one", "one", "
|
2975
|
-
# "bar" => ["
|
2983
|
+
# "foo" => ["one", "one", "two", "two", "one", "two"],
|
2984
|
+
# "bar" => ["y", "y", "y", "x", "x", "x"],
|
2976
2985
|
# "baz" => [1, 2, 3, 4, 5, 6]
|
2977
2986
|
# }
|
2978
2987
|
# )
|
2979
|
-
# df.pivot(
|
2988
|
+
# df.pivot("bar", index: "foo", values: "baz", aggregate_function: "sum")
|
2980
2989
|
# # =>
|
2981
|
-
# # shape: (2,
|
2982
|
-
# #
|
2983
|
-
# # │ foo ┆
|
2984
|
-
# # │ --- ┆ --- ┆ ---
|
2985
|
-
# # │ str ┆ i64 ┆ i64
|
2986
|
-
# #
|
2987
|
-
# # │ one ┆
|
2988
|
-
# # │ two ┆
|
2989
|
-
# #
|
2990
|
+
# # shape: (2, 3)
|
2991
|
+
# # ┌─────┬─────┬─────┐
|
2992
|
+
# # │ foo ┆ y ┆ x │
|
2993
|
+
# # │ --- ┆ --- ┆ --- │
|
2994
|
+
# # │ str ┆ i64 ┆ i64 │
|
2995
|
+
# # ╞═════╪═════╪═════╡
|
2996
|
+
# # │ one ┆ 3 ┆ 5 │
|
2997
|
+
# # │ two ┆ 3 ┆ 10 │
|
2998
|
+
# # └─────┴─────┴─────┘
|
2990
2999
|
def pivot(
|
2991
|
-
|
2992
|
-
index
|
2993
|
-
|
2994
|
-
|
3000
|
+
on,
|
3001
|
+
index: nil,
|
3002
|
+
values: nil,
|
3003
|
+
aggregate_function: nil,
|
2995
3004
|
maintain_order: true,
|
2996
3005
|
sort_columns: false,
|
2997
3006
|
separator: "_"
|
2998
3007
|
)
|
2999
|
-
|
3000
|
-
|
3001
|
-
|
3002
|
-
|
3003
|
-
index = [index]
|
3004
|
-
end
|
3005
|
-
if columns.is_a?(::String)
|
3006
|
-
columns = [columns]
|
3008
|
+
index = Utils._expand_selectors(self, index)
|
3009
|
+
on = Utils._expand_selectors(self, on)
|
3010
|
+
if !values.nil?
|
3011
|
+
values = Utils._expand_selectors(self, values)
|
3007
3012
|
end
|
3008
3013
|
|
3009
|
-
if
|
3010
|
-
case
|
3014
|
+
if aggregate_function.is_a?(::String)
|
3015
|
+
case aggregate_function
|
3011
3016
|
when "first"
|
3012
|
-
aggregate_expr =
|
3017
|
+
aggregate_expr = F.element.first._rbexpr
|
3013
3018
|
when "sum"
|
3014
|
-
aggregate_expr =
|
3019
|
+
aggregate_expr = F.element.sum._rbexpr
|
3015
3020
|
when "max"
|
3016
|
-
aggregate_expr =
|
3021
|
+
aggregate_expr = F.element.max._rbexpr
|
3017
3022
|
when "min"
|
3018
|
-
aggregate_expr =
|
3023
|
+
aggregate_expr = F.element.min._rbexpr
|
3019
3024
|
when "mean"
|
3020
|
-
aggregate_expr =
|
3025
|
+
aggregate_expr = F.element.mean._rbexpr
|
3021
3026
|
when "median"
|
3022
|
-
aggregate_expr =
|
3027
|
+
aggregate_expr = F.element.median._rbexpr
|
3023
3028
|
when "last"
|
3024
|
-
aggregate_expr =
|
3029
|
+
aggregate_expr = F.element.last._rbexpr
|
3025
3030
|
when "len"
|
3026
|
-
aggregate_expr =
|
3031
|
+
aggregate_expr = F.len._rbexpr
|
3027
3032
|
when "count"
|
3028
3033
|
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3029
|
-
aggregate_expr =
|
3034
|
+
aggregate_expr = F.len._rbexpr
|
3030
3035
|
else
|
3031
3036
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3032
3037
|
end
|
3033
|
-
elsif
|
3038
|
+
elsif aggregate_function.nil?
|
3034
3039
|
aggregate_expr = nil
|
3035
3040
|
else
|
3036
3041
|
aggregate_expr = aggregate_function._rbexpr
|
@@ -3038,8 +3043,8 @@ module Polars
|
|
3038
3043
|
|
3039
3044
|
_from_rbdf(
|
3040
3045
|
_df.pivot_expr(
|
3046
|
+
on,
|
3041
3047
|
index,
|
3042
|
-
columns,
|
3043
3048
|
values,
|
3044
3049
|
maintain_order,
|
3045
3050
|
sort_columns,
|
@@ -3054,18 +3059,18 @@ module Polars
|
|
3054
3059
|
# Optionally leaves identifiers set.
|
3055
3060
|
#
|
3056
3061
|
# This function is useful to massage a DataFrame into a format where one or more
|
3057
|
-
# columns are identifier variables (
|
3058
|
-
# measured variables (
|
3062
|
+
# columns are identifier variables (index) while all other columns, considered
|
3063
|
+
# measured variables (on), are "unpivoted" to the row axis leaving just
|
3059
3064
|
# two non-identifier columns, 'variable' and 'value'.
|
3060
3065
|
#
|
3061
|
-
# @param
|
3062
|
-
#
|
3063
|
-
#
|
3064
|
-
#
|
3065
|
-
#
|
3066
|
-
# @param variable_name [
|
3067
|
-
# Name to give to the `
|
3068
|
-
# @param value_name [
|
3066
|
+
# @param on [Object]
|
3067
|
+
# Column(s) or selector(s) to use as values variables; if `on`
|
3068
|
+
# is empty all columns that are not in `index` will be used.
|
3069
|
+
# @param index [Object]
|
3070
|
+
# Column(s) or selector(s) to use as identifier variables.
|
3071
|
+
# @param variable_name [Object]
|
3072
|
+
# Name to give to the `variable` column. Defaults to "variable"
|
3073
|
+
# @param value_name [Object]
|
3069
3074
|
# Name to give to the `value` column. Defaults to "value"
|
3070
3075
|
#
|
3071
3076
|
# @return [DataFrame]
|
@@ -3078,7 +3083,7 @@ module Polars
|
|
3078
3083
|
# "c" => [2, 4, 6]
|
3079
3084
|
# }
|
3080
3085
|
# )
|
3081
|
-
# df.
|
3086
|
+
# df.unpivot(Polars::Selectors.numeric, index: "a")
|
3082
3087
|
# # =>
|
3083
3088
|
# # shape: (6, 3)
|
3084
3089
|
# # ┌─────┬──────────┬───────┐
|
@@ -3093,23 +3098,13 @@ module Polars
|
|
3093
3098
|
# # │ y ┆ c ┆ 4 │
|
3094
3099
|
# # │ z ┆ c ┆ 6 │
|
3095
3100
|
# # └─────┴──────────┴───────┘
|
3096
|
-
def
|
3097
|
-
|
3098
|
-
|
3099
|
-
|
3100
|
-
|
3101
|
-
id_vars = [id_vars]
|
3102
|
-
end
|
3103
|
-
if value_vars.nil?
|
3104
|
-
value_vars = []
|
3105
|
-
end
|
3106
|
-
if id_vars.nil?
|
3107
|
-
id_vars = []
|
3108
|
-
end
|
3109
|
-
_from_rbdf(
|
3110
|
-
_df.melt(id_vars, value_vars, value_name, variable_name)
|
3111
|
-
)
|
3101
|
+
def unpivot(on, index: nil, variable_name: nil, value_name: nil)
|
3102
|
+
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
3103
|
+
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
3104
|
+
|
3105
|
+
_from_rbdf(_df.unpivot(on, index, value_name, variable_name))
|
3112
3106
|
end
|
3107
|
+
alias_method :melt, :unpivot
|
3113
3108
|
|
3114
3109
|
# Unstack a long table to a wide form without doing an aggregation.
|
3115
3110
|
#
|
@@ -4143,7 +4138,7 @@ module Polars
|
|
4143
4138
|
end
|
4144
4139
|
|
4145
4140
|
if subset.is_a?(::Array) && subset.length == 1
|
4146
|
-
expr = Utils.
|
4141
|
+
expr = Utils.wrap_expr(Utils.parse_into_expression(subset[0], str_as_lit: false))
|
4147
4142
|
else
|
4148
4143
|
struct_fields = subset.nil? ? Polars.all : subset
|
4149
4144
|
expr = Polars.struct(struct_fields)
|
@@ -4561,7 +4556,7 @@ module Polars
|
|
4561
4556
|
# # │ 3 ┆ 7 │
|
4562
4557
|
# # └─────┴─────┘
|
4563
4558
|
def gather_every(n, offset = 0)
|
4564
|
-
select(
|
4559
|
+
select(F.col("*").gather_every(n, offset))
|
4565
4560
|
end
|
4566
4561
|
alias_method :take_every, :gather_every
|
4567
4562
|
|
@@ -4631,7 +4626,7 @@ module Polars
|
|
4631
4626
|
# # │ 10.0 ┆ null ┆ 9.0 │
|
4632
4627
|
# # └──────┴──────┴──────────┘
|
4633
4628
|
def interpolate
|
4634
|
-
select(
|
4629
|
+
select(F.col("*").interpolate)
|
4635
4630
|
end
|
4636
4631
|
|
4637
4632
|
# Check if the dataframe is empty.
|
@@ -4767,19 +4762,16 @@ module Polars
|
|
4767
4762
|
#
|
4768
4763
|
# @param column [Object]
|
4769
4764
|
# Columns that are sorted
|
4770
|
-
# @param more_columns [Object]
|
4771
|
-
# Additional columns that are sorted, specified as positional arguments.
|
4772
4765
|
# @param descending [Boolean]
|
4773
4766
|
# Whether the columns are sorted in descending order.
|
4774
4767
|
#
|
4775
4768
|
# @return [DataFrame]
|
4776
4769
|
def set_sorted(
|
4777
4770
|
column,
|
4778
|
-
*more_columns,
|
4779
4771
|
descending: false
|
4780
4772
|
)
|
4781
4773
|
lazy
|
4782
|
-
.set_sorted(column,
|
4774
|
+
.set_sorted(column, descending: descending)
|
4783
4775
|
.collect(no_optimization: true)
|
4784
4776
|
end
|
4785
4777
|
|