polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/LICENSE-THIRD-PARTY.txt +1161 -832
- data/README.md +4 -3
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +4 -2
data/README.md
CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
29
|
|
30
30
|
## Reference
|
31
31
|
|
@@ -348,7 +348,7 @@ df.to_numo
|
|
348
348
|
You can specify column types when creating a data frame
|
349
349
|
|
350
350
|
```ruby
|
351
|
-
Polars::DataFrame.new(data,
|
351
|
+
Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
|
352
352
|
```
|
353
353
|
|
354
354
|
Supported types are:
|
@@ -357,8 +357,9 @@ Supported types are:
|
|
357
357
|
- float - `Float64`, `Float32`
|
358
358
|
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
|
-
- string - `Utf8`, `Categorical`
|
360
|
+
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
+
- other - `Object`, `List`, `Struct`, `Array` [unreleased]
|
362
363
|
|
363
364
|
Get column types
|
364
365
|
|
data/lib/polars/3.0/polars.so
CHANGED
Binary file
|
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for array related expressions.
|
3
|
+
class ArrayExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Compute the min values of the sub-arrays.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.min)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ i64 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 1 │
|
30
|
+
# # │ 3 │
|
31
|
+
# # └─────┘
|
32
|
+
def min
|
33
|
+
Utils.wrap_expr(_rbexpr.array_min)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Compute the max values of the sub-arrays.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# df = Polars::DataFrame.new(
|
42
|
+
# {"a" => [[1, 2], [4, 3]]},
|
43
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("a").arr.max)
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ a │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ i64 │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ 2 │
|
54
|
+
# # │ 4 │
|
55
|
+
# # └─────┘
|
56
|
+
def max
|
57
|
+
Utils.wrap_expr(_rbexpr.array_max)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compute the sum values of the sub-arrays.
|
61
|
+
#
|
62
|
+
# @return [Expr]
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# df = Polars::DataFrame.new(
|
66
|
+
# {"a" => [[1, 2], [4, 3]]},
|
67
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
68
|
+
# )
|
69
|
+
# df.select(Polars.col("a").arr.sum)
|
70
|
+
# # =>
|
71
|
+
# # shape: (2, 1)
|
72
|
+
# # ┌─────┐
|
73
|
+
# # │ a │
|
74
|
+
# # │ --- │
|
75
|
+
# # │ i64 │
|
76
|
+
# # ╞═════╡
|
77
|
+
# # │ 3 │
|
78
|
+
# # │ 7 │
|
79
|
+
# # └─────┘
|
80
|
+
def sum
|
81
|
+
Utils.wrap_expr(_rbexpr.array_sum)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.arr namespace.
|
3
|
+
class ArrayNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "arr"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Compute the min values of the sub-arrays.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# s = Polars::Series.new(
|
19
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
20
|
+
# )
|
21
|
+
# s.arr.min
|
22
|
+
# # =>
|
23
|
+
# # shape: (2,)
|
24
|
+
# # Series: 'a' [i64]
|
25
|
+
# # [
|
26
|
+
# # 1
|
27
|
+
# # 3
|
28
|
+
# # ]
|
29
|
+
def min
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Compute the max values of the sub-arrays.
|
34
|
+
#
|
35
|
+
# @return [Series]
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# s = Polars::Series.new(
|
39
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
40
|
+
# )
|
41
|
+
# s.arr.max
|
42
|
+
# # =>
|
43
|
+
# # shape: (2,)
|
44
|
+
# # Series: 'a' [i64]
|
45
|
+
# # [
|
46
|
+
# # 2
|
47
|
+
# # 4
|
48
|
+
# # ]
|
49
|
+
def max
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
# Compute the sum values of the sub-arrays.
|
54
|
+
#
|
55
|
+
# @return [Series]
|
56
|
+
#
|
57
|
+
# @example
|
58
|
+
# df = Polars::DataFrame.new(
|
59
|
+
# {"a" => [[1, 2], [4, 3]]},
|
60
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
61
|
+
# )
|
62
|
+
# df.select(Polars.col("a").arr.sum)
|
63
|
+
# # =>
|
64
|
+
# # shape: (2, 1)
|
65
|
+
# # ┌─────┐
|
66
|
+
# # │ a │
|
67
|
+
# # │ --- │
|
68
|
+
# # │ i64 │
|
69
|
+
# # ╞═════╡
|
70
|
+
# # │ 3 │
|
71
|
+
# # │ 7 │
|
72
|
+
# # └─────┘
|
73
|
+
def sum
|
74
|
+
super
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/polars/data_frame.rb
CHANGED
@@ -36,7 +36,7 @@ module Polars
|
|
36
36
|
elsif data.is_a?(Hash)
|
37
37
|
data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
|
38
38
|
self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
|
39
|
-
elsif data.is_a?(Array)
|
39
|
+
elsif data.is_a?(::Array)
|
40
40
|
self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
|
41
41
|
elsif data.is_a?(Series)
|
42
42
|
self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
|
@@ -116,7 +116,7 @@ module Polars
|
|
116
116
|
dtypes.each do|k, v|
|
117
117
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
118
118
|
end
|
119
|
-
elsif dtypes.is_a?(Array)
|
119
|
+
elsif dtypes.is_a?(::Array)
|
120
120
|
dtype_slice = dtypes
|
121
121
|
else
|
122
122
|
raise ArgumentError, "dtype arg should be list or dict"
|
@@ -590,7 +590,7 @@ module Polars
|
|
590
590
|
|
591
591
|
# df[2, ..] (select row as df)
|
592
592
|
if row_selection.is_a?(Integer)
|
593
|
-
if col_selection.is_a?(Array)
|
593
|
+
if col_selection.is_a?(::Array)
|
594
594
|
df = self[0.., col_selection]
|
595
595
|
return df.slice(row_selection, 1)
|
596
596
|
end
|
@@ -611,7 +611,7 @@ module Polars
|
|
611
611
|
return series[row_selection]
|
612
612
|
end
|
613
613
|
|
614
|
-
if col_selection.is_a?(Array)
|
614
|
+
if col_selection.is_a?(::Array)
|
615
615
|
# df[.., [1, 2]]
|
616
616
|
if Utils.is_int_sequence(col_selection)
|
617
617
|
series_list = col_selection.map { |i| to_series(i) }
|
@@ -641,7 +641,7 @@ module Polars
|
|
641
641
|
return Slice.new(self).apply(item)
|
642
642
|
end
|
643
643
|
|
644
|
-
if item.is_a?(Array) && item.all? { |v| Utils.strlike?(v) }
|
644
|
+
if item.is_a?(::Array) && item.all? { |v| Utils.strlike?(v) }
|
645
645
|
# select multiple columns
|
646
646
|
# df[["foo", "bar"]]
|
647
647
|
return _from_rbdf(_df.select(item.map(&:to_s)))
|
@@ -684,13 +684,13 @@ module Polars
|
|
684
684
|
end
|
685
685
|
|
686
686
|
if Utils.strlike?(key)
|
687
|
-
if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
|
687
|
+
if value.is_a?(::Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
|
688
688
|
value = Series.new(value)
|
689
689
|
elsif !value.is_a?(Series)
|
690
690
|
value = Polars.lit(value)
|
691
691
|
end
|
692
692
|
self._df = with_column(value.alias(key.to_s))._df
|
693
|
-
elsif key.is_a?(Array)
|
693
|
+
elsif key.is_a?(::Array)
|
694
694
|
row_selection, col_selection = key
|
695
695
|
|
696
696
|
if Utils.strlike?(col_selection)
|
@@ -994,14 +994,21 @@ module Polars
|
|
994
994
|
#
|
995
995
|
# @return [nil]
|
996
996
|
def write_ipc(file, compression: "uncompressed")
|
997
|
-
|
998
|
-
|
997
|
+
return_bytes = file.nil?
|
998
|
+
if return_bytes
|
999
|
+
file = StringIO.new
|
1000
|
+
file.set_encoding(Encoding::BINARY)
|
999
1001
|
end
|
1000
1002
|
if Utils.pathlike?(file)
|
1001
1003
|
file = Utils.normalise_filepath(file)
|
1002
1004
|
end
|
1003
1005
|
|
1006
|
+
if compression.nil?
|
1007
|
+
compression = "uncompressed"
|
1008
|
+
end
|
1009
|
+
|
1004
1010
|
_df.write_ipc(file, compression)
|
1011
|
+
return_bytes ? file.string : nil
|
1005
1012
|
end
|
1006
1013
|
|
1007
1014
|
# Write to Apache Parquet file.
|
@@ -1491,13 +1498,9 @@ module Polars
|
|
1491
1498
|
# # │ 1 ┆ 6.0 ┆ a │
|
1492
1499
|
# # └─────┴─────┴─────┘
|
1493
1500
|
def sort(by, reverse: false, nulls_last: false)
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
.collect(no_optimization: true, string_cache: false)
|
1498
|
-
else
|
1499
|
-
_from_rbdf(_df.sort(by, reverse, nulls_last))
|
1500
|
-
end
|
1501
|
+
lazy
|
1502
|
+
.sort(by, reverse: reverse, nulls_last: nulls_last)
|
1503
|
+
.collect(no_optimization: true)
|
1501
1504
|
end
|
1502
1505
|
|
1503
1506
|
# Sort the DataFrame by column in-place.
|
@@ -1899,6 +1902,12 @@ module Polars
|
|
1899
1902
|
# Define whether the temporal window interval is closed or not.
|
1900
1903
|
# @param by [Object]
|
1901
1904
|
# Also group by this column/these columns.
|
1905
|
+
# @param check_sorted [Boolean]
|
1906
|
+
# When the `by` argument is given, polars can not check sortedness
|
1907
|
+
# by the metadata and has to do a full scan on the index column to
|
1908
|
+
# verify data is sorted. This is expensive. If you are sure the
|
1909
|
+
# data within the by groups is sorted, you can set this to `false`.
|
1910
|
+
# Doing so incorrectly will lead to incorrect output
|
1902
1911
|
#
|
1903
1912
|
# @return [RollingGroupBy]
|
1904
1913
|
#
|
@@ -1912,7 +1921,7 @@ module Polars
|
|
1912
1921
|
# "2020-01-08 23:16:43"
|
1913
1922
|
# ]
|
1914
1923
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1915
|
-
# Polars.col("dt").str.strptime(Polars::Datetime)
|
1924
|
+
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1916
1925
|
# )
|
1917
1926
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
1918
1927
|
# [
|
@@ -1940,9 +1949,10 @@ module Polars
|
|
1940
1949
|
period:,
|
1941
1950
|
offset: nil,
|
1942
1951
|
closed: "right",
|
1943
|
-
by: nil
|
1952
|
+
by: nil,
|
1953
|
+
check_sorted: true
|
1944
1954
|
)
|
1945
|
-
RollingGroupBy.new(self, index_column, period, offset, closed, by)
|
1955
|
+
RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
|
1946
1956
|
end
|
1947
1957
|
|
1948
1958
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
@@ -2078,21 +2088,21 @@ module Polars
|
|
2078
2088
|
# df.groupby_dynamic("time", every: "1h", closed: "left").agg(
|
2079
2089
|
# [
|
2080
2090
|
# Polars.col("time").count.alias("time_count"),
|
2081
|
-
# Polars.col("time").
|
2091
|
+
# Polars.col("time").alias("time_agg_list")
|
2082
2092
|
# ]
|
2083
2093
|
# )
|
2084
2094
|
# # =>
|
2085
2095
|
# # shape: (4, 3)
|
2086
|
-
# #
|
2087
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
2088
|
-
# # │ --- ┆ --- ┆ ---
|
2089
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
2090
|
-
# #
|
2091
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
|
2092
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
|
2093
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
|
2094
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
2095
|
-
# #
|
2096
|
+
# # ┌─────────────────────┬────────────┬───────────────────────────────────┐
|
2097
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
2098
|
+
# # │ --- ┆ --- ┆ --- │
|
2099
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
2100
|
+
# # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
|
2101
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16… │
|
2102
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16… │
|
2103
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16… │
|
2104
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
2105
|
+
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
2096
2106
|
#
|
2097
2107
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2098
2108
|
# df.groupby_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -2159,7 +2169,7 @@ module Polars
|
|
2159
2169
|
# period: "3i",
|
2160
2170
|
# include_boundaries: true,
|
2161
2171
|
# closed: "right"
|
2162
|
-
# ).agg(Polars.col("A").
|
2172
|
+
# ).agg(Polars.col("A").alias("A_agg_list"))
|
2163
2173
|
# # =>
|
2164
2174
|
# # shape: (3, 4)
|
2165
2175
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
@@ -2242,7 +2252,7 @@ module Polars
|
|
2242
2252
|
# "groups" => ["A", "B", "A", "B"],
|
2243
2253
|
# "values" => [0, 1, 2, 3]
|
2244
2254
|
# }
|
2245
|
-
# )
|
2255
|
+
# ).set_sorted("time")
|
2246
2256
|
# df.upsample(
|
2247
2257
|
# time_column: "time", every: "1mo", by: "groups", maintain_order: true
|
2248
2258
|
# ).select(Polars.all.forward_fill)
|
@@ -2360,7 +2370,7 @@ module Polars
|
|
2360
2370
|
# ], # note record date: Jan 1st (sorted!)
|
2361
2371
|
# "gdp" => [4164, 4411, 4566, 4696]
|
2362
2372
|
# }
|
2363
|
-
# )
|
2373
|
+
# ).set_sorted("date")
|
2364
2374
|
# population = Polars::DataFrame.new(
|
2365
2375
|
# {
|
2366
2376
|
# "date" => [
|
@@ -2371,7 +2381,7 @@ module Polars
|
|
2371
2381
|
# ], # note record date: May 12th (sorted!)
|
2372
2382
|
# "population" => [82.19, 82.66, 83.12, 83.52]
|
2373
2383
|
# }
|
2374
|
-
# )
|
2384
|
+
# ).set_sorted("date")
|
2375
2385
|
# population.join_asof(
|
2376
2386
|
# gdp, left_on: "date", right_on: "date", strategy: "backward"
|
2377
2387
|
# )
|
@@ -2674,7 +2684,7 @@ module Polars
|
|
2674
2684
|
# # │ 3 ┆ 8 ┆ c ┆ 30 │
|
2675
2685
|
# # └─────┴─────┴─────┴───────┘
|
2676
2686
|
def hstack(columns, in_place: false)
|
2677
|
-
if !columns.is_a?(Array)
|
2687
|
+
if !columns.is_a?(::Array)
|
2678
2688
|
columns = columns.get_columns
|
2679
2689
|
end
|
2680
2690
|
if in_place
|
@@ -2804,7 +2814,7 @@ module Polars
|
|
2804
2814
|
# # │ 3 ┆ 8.0 │
|
2805
2815
|
# # └─────┴─────┘
|
2806
2816
|
def drop(columns)
|
2807
|
-
if columns.is_a?(Array)
|
2817
|
+
if columns.is_a?(::Array)
|
2808
2818
|
df = clone
|
2809
2819
|
columns.each do |n|
|
2810
2820
|
df._df.drop_in_place(n)
|
@@ -3317,7 +3327,7 @@ module Polars
|
|
3317
3327
|
n_fill = n_cols * n_rows - height
|
3318
3328
|
|
3319
3329
|
if n_fill > 0
|
3320
|
-
if !fill_values.is_a?(Array)
|
3330
|
+
if !fill_values.is_a?(::Array)
|
3321
3331
|
fill_values = [fill_values] * df.width
|
3322
3332
|
end
|
3323
3333
|
|
@@ -3426,29 +3436,29 @@ module Polars
|
|
3426
3436
|
# # ╞═════╪═════╪═════╡
|
3427
3437
|
# # │ C ┆ 2 ┆ l │
|
3428
3438
|
# # └─────┴─────┴─────┘}
|
3429
|
-
def partition_by(groups, maintain_order: true, as_dict: false)
|
3439
|
+
def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
|
3430
3440
|
if groups.is_a?(String)
|
3431
3441
|
groups = [groups]
|
3432
|
-
elsif !groups.is_a?(Array)
|
3442
|
+
elsif !groups.is_a?(::Array)
|
3433
3443
|
groups = Array(groups)
|
3434
3444
|
end
|
3435
3445
|
|
3436
3446
|
if as_dict
|
3437
3447
|
out = {}
|
3438
3448
|
if groups.length == 1
|
3439
|
-
_df.partition_by(groups, maintain_order).each do |df|
|
3449
|
+
_df.partition_by(groups, maintain_order, include_key).each do |df|
|
3440
3450
|
df = _from_rbdf(df)
|
3441
3451
|
out[df[groups][0, 0]] = df
|
3442
3452
|
end
|
3443
3453
|
else
|
3444
|
-
_df.partition_by(groups, maintain_order).each do |df|
|
3454
|
+
_df.partition_by(groups, maintain_order, include_key).each do |df|
|
3445
3455
|
df = _from_rbdf(df)
|
3446
3456
|
out[df[groups].row(0)] = df
|
3447
3457
|
end
|
3448
3458
|
end
|
3449
3459
|
out
|
3450
3460
|
else
|
3451
|
-
_df.partition_by(groups, maintain_order).map { |df| _from_rbdf(df) }
|
3461
|
+
_df.partition_by(groups, maintain_order, include_key).map { |df| _from_rbdf(df) }
|
3452
3462
|
end
|
3453
3463
|
end
|
3454
3464
|
|
@@ -3716,7 +3726,7 @@ module Polars
|
|
3716
3726
|
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
3717
3727
|
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
3718
3728
|
def with_columns(exprs)
|
3719
|
-
if !exprs.nil? && !exprs.is_a?(Array)
|
3729
|
+
if !exprs.nil? && !exprs.is_a?(::Array)
|
3720
3730
|
exprs = [exprs]
|
3721
3731
|
end
|
3722
3732
|
lazy
|
@@ -4097,11 +4107,11 @@ module Polars
|
|
4097
4107
|
# # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
|
4098
4108
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4099
4109
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4100
|
-
def to_dummies(columns: nil, separator: "_")
|
4110
|
+
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4101
4111
|
if columns.is_a?(String)
|
4102
4112
|
columns = [columns]
|
4103
4113
|
end
|
4104
|
-
_from_rbdf(_df.to_dummies(columns, separator))
|
4114
|
+
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
4105
4115
|
end
|
4106
4116
|
|
4107
4117
|
# Drop duplicate rows from this DataFrame.
|
@@ -4189,7 +4199,7 @@ module Polars
|
|
4189
4199
|
subset = [subset]
|
4190
4200
|
end
|
4191
4201
|
|
4192
|
-
if subset.is_a?(Array) && subset.length == 1
|
4202
|
+
if subset.is_a?(::Array) && subset.length == 1
|
4193
4203
|
expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
|
4194
4204
|
else
|
4195
4205
|
struct_fields = subset.nil? ? Polars.all : subset
|
@@ -4758,6 +4768,38 @@ module Polars
|
|
4758
4768
|
_from_rbdf(_df.unnest(names))
|
4759
4769
|
end
|
4760
4770
|
|
4771
|
+
# TODO
|
4772
|
+
# def corr
|
4773
|
+
# end
|
4774
|
+
|
4775
|
+
# TODO
|
4776
|
+
# def merge_sorted
|
4777
|
+
# end
|
4778
|
+
|
4779
|
+
# Indicate that one or multiple columns are sorted.
|
4780
|
+
#
|
4781
|
+
# @param column [Object]
|
4782
|
+
# Columns that are sorted
|
4783
|
+
# @param more_columns [Object]
|
4784
|
+
# Additional columns that are sorted, specified as positional arguments.
|
4785
|
+
# @param descending [Boolean]
|
4786
|
+
# Whether the columns are sorted in descending order.
|
4787
|
+
#
|
4788
|
+
# @return [DataFrame]
|
4789
|
+
def set_sorted(
|
4790
|
+
column,
|
4791
|
+
*more_columns,
|
4792
|
+
descending: false
|
4793
|
+
)
|
4794
|
+
lazy
|
4795
|
+
.set_sorted(column, *more_columns, descending: descending)
|
4796
|
+
.collect(no_optimization: true)
|
4797
|
+
end
|
4798
|
+
|
4799
|
+
# TODO
|
4800
|
+
# def update
|
4801
|
+
# end
|
4802
|
+
|
4761
4803
|
private
|
4762
4804
|
|
4763
4805
|
def initialize_copy(other)
|
@@ -4967,7 +5009,7 @@ module Polars
|
|
4967
5009
|
columns.each do |col, i|
|
4968
5010
|
if dtypes[col] == Categorical # != rbdf_dtypes[i]
|
4969
5011
|
column_casts << Polars.col(col).cast(Categorical)._rbexpr
|
4970
|
-
elsif structs
|
5012
|
+
elsif structs&.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
|
4971
5013
|
column_casts << Polars.col(col).cast(structs[col])._rbexpr
|
4972
5014
|
elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
|
4973
5015
|
column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
|
@@ -5012,7 +5054,7 @@ module Polars
|
|
5012
5054
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5013
5055
|
end
|
5014
5056
|
return rbdf
|
5015
|
-
elsif data[0].is_a?(Array)
|
5057
|
+
elsif data[0].is_a?(::Array)
|
5016
5058
|
if orient.nil? && !columns.nil?
|
5017
5059
|
orient = columns.length == data.length ? "col" : "row"
|
5018
5060
|
end
|
@@ -5117,7 +5159,7 @@ module Polars
|
|
5117
5159
|
|
5118
5160
|
def _prepare_other_arg(other)
|
5119
5161
|
if !other.is_a?(Series)
|
5120
|
-
if other.is_a?(Array)
|
5162
|
+
if other.is_a?(::Array)
|
5121
5163
|
raise ArgumentError, "Operation not supported."
|
5122
5164
|
end
|
5123
5165
|
|