polars-df 0.21.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion/mod.rs +92 -4
- data/ext/polars/src/exceptions.rs +1 -0
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +84 -5
- data/ext/polars/src/expr/list.rs +24 -0
- data/ext/polars/src/expr/meta.rs +11 -0
- data/ext/polars/src/expr/mod.rs +1 -0
- data/ext/polars/src/expr/name.rs +8 -0
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/string.rs +59 -0
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/functions/io.rs +19 -0
- data/ext/polars/src/functions/lazy.rs +4 -0
- data/ext/polars/src/lazyframe/general.rs +51 -0
- data/ext/polars/src/lib.rs +119 -10
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/series.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/general.rs +64 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +684 -19
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +14 -2
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1213 -180
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +13 -0
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +27 -5
- data/lib/polars/lazy_frame.rb +936 -20
- data/lib/polars/list_expr.rb +196 -4
- data/lib/polars/list_name_space.rb +201 -4
- data/lib/polars/meta_expr.rb +64 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1051 -54
- data/lib/polars/string_expr.rb +411 -6
- data/lib/polars/string_name_space.rb +722 -49
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -0
- metadata +4 -1
data/lib/polars/array_expr.rb
CHANGED
@@ -9,6 +9,181 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Return the number of elements in each array.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.len)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ u32 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 2 │
|
30
|
+
# # │ 2 │
|
31
|
+
# # └─────┘
|
32
|
+
def len
|
33
|
+
Utils.wrap_expr(_rbexpr.arr_len)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Slice every subarray.
|
37
|
+
#
|
38
|
+
# @param offset [Integer]
|
39
|
+
# Start index. Negative indexing is supported.
|
40
|
+
# @param length [Integer]
|
41
|
+
# Length of the slice. If set to `None` (default), the slice is taken to the
|
42
|
+
# end of the list.
|
43
|
+
# @param as_array [Boolean]
|
44
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
45
|
+
# If true `length` and `offset` must be constant values.
|
46
|
+
#
|
47
|
+
# @return [Expr]
|
48
|
+
#
|
49
|
+
# @example
|
50
|
+
# df = Polars::DataFrame.new(
|
51
|
+
# {"a" => [[1, 2], [4, 3]]},
|
52
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
53
|
+
# )
|
54
|
+
# df.select(Polars.col("a").arr.slice(0, 1))
|
55
|
+
# # =>
|
56
|
+
# # shape: (2, 1)
|
57
|
+
# # ┌───────────┐
|
58
|
+
# # │ a │
|
59
|
+
# # │ --- │
|
60
|
+
# # │ list[i64] │
|
61
|
+
# # ╞═══════════╡
|
62
|
+
# # │ [1] │
|
63
|
+
# # │ [4] │
|
64
|
+
# # └───────────┘
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# df = Polars::DataFrame.new(
|
68
|
+
# {"a" => [[1, 2], [4, 3]]},
|
69
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
70
|
+
# )
|
71
|
+
# df.select(Polars.col("a").arr.slice(0, 1, as_array: true))
|
72
|
+
# # =>
|
73
|
+
# # shape: (2, 1)
|
74
|
+
# # ┌───────────────┐
|
75
|
+
# # │ a │
|
76
|
+
# # │ --- │
|
77
|
+
# # │ array[i64, 1] │
|
78
|
+
# # ╞═══════════════╡
|
79
|
+
# # │ [1] │
|
80
|
+
# # │ [4] │
|
81
|
+
# # └───────────────┘
|
82
|
+
def slice(
|
83
|
+
offset,
|
84
|
+
length = nil,
|
85
|
+
as_array: false
|
86
|
+
)
|
87
|
+
offset = Utils.parse_into_expression(offset)
|
88
|
+
length = !length.nil? ? Utils.parse_into_expression(length) : nil
|
89
|
+
Utils.wrap_expr(_rbexpr.arr_slice(offset, length, as_array))
|
90
|
+
end
|
91
|
+
|
92
|
+
# Get the first `n` elements of the sub-arrays.
|
93
|
+
#
|
94
|
+
# @param n [Integer]
|
95
|
+
# Number of values to return for each sublist.
|
96
|
+
# @param as_array [Boolean]
|
97
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
98
|
+
# If true `n` must be a constant value.
|
99
|
+
#
|
100
|
+
# @return [Expr]
|
101
|
+
#
|
102
|
+
# @example
|
103
|
+
# df = Polars::DataFrame.new(
|
104
|
+
# {"a" => [[1, 2], [4, 3]]},
|
105
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
106
|
+
# )
|
107
|
+
# df.select(Polars.col("a").arr.head(1))
|
108
|
+
# # =>
|
109
|
+
# # shape: (2, 1)
|
110
|
+
# # ┌───────────┐
|
111
|
+
# # │ a │
|
112
|
+
# # │ --- │
|
113
|
+
# # │ list[i64] │
|
114
|
+
# # ╞═══════════╡
|
115
|
+
# # │ [1] │
|
116
|
+
# # │ [4] │
|
117
|
+
# # └───────────┘
|
118
|
+
#
|
119
|
+
# @example
|
120
|
+
# df = Polars::DataFrame.new(
|
121
|
+
# {"a" => [[1, 2], [4, 3]]},
|
122
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
123
|
+
# )
|
124
|
+
# df.select(Polars.col("a").arr.head(1, as_array: true))
|
125
|
+
# # =>
|
126
|
+
# # shape: (2, 1)
|
127
|
+
# # ┌───────────────┐
|
128
|
+
# # │ a │
|
129
|
+
# # │ --- │
|
130
|
+
# # │ array[i64, 1] │
|
131
|
+
# # ╞═══════════════╡
|
132
|
+
# # │ [1] │
|
133
|
+
# # │ [4] │
|
134
|
+
# # └───────────────┘
|
135
|
+
def head(n = 5, as_array: false)
|
136
|
+
slice(0, n, as_array: as_array)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Slice the last `n` values of every sublist.
|
140
|
+
#
|
141
|
+
# @param n [Integer]
|
142
|
+
# Number of values to return for each sublist.
|
143
|
+
# @param as_array [Boolean]
|
144
|
+
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
145
|
+
# If true `n` must be a constant value.
|
146
|
+
#
|
147
|
+
# @return [Expr]
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# df = Polars::DataFrame.new(
|
151
|
+
# {"a" => [[1, 2], [4, 3]]},
|
152
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
153
|
+
# )
|
154
|
+
# df.select(Polars.col("a").arr.tail(1))
|
155
|
+
# # =>
|
156
|
+
# # shape: (2, 1)
|
157
|
+
# # ┌───────────┐
|
158
|
+
# # │ a │
|
159
|
+
# # │ --- │
|
160
|
+
# # │ list[i64] │
|
161
|
+
# # ╞═══════════╡
|
162
|
+
# # │ [2] │
|
163
|
+
# # │ [3] │
|
164
|
+
# # └───────────┘
|
165
|
+
#
|
166
|
+
# @example
|
167
|
+
# df = Polars::DataFrame.new(
|
168
|
+
# {"a" => [[1, 2], [4, 3]]},
|
169
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
170
|
+
# )
|
171
|
+
# df.select(Polars.col("a").arr.tail(1, as_array: true))
|
172
|
+
# # =>
|
173
|
+
# # shape: (2, 1)
|
174
|
+
# # ┌───────────────┐
|
175
|
+
# # │ a │
|
176
|
+
# # │ --- │
|
177
|
+
# # │ array[i64, 1] │
|
178
|
+
# # ╞═══════════════╡
|
179
|
+
# # │ [2] │
|
180
|
+
# # │ [3] │
|
181
|
+
# # └───────────────┘
|
182
|
+
def tail(n = 5, as_array: false)
|
183
|
+
n = Utils.parse_into_expression(n)
|
184
|
+
Utils.wrap_expr(_rbexpr.arr_tail(n, as_array))
|
185
|
+
end
|
186
|
+
|
12
187
|
# Compute the min values of the sub-arrays.
|
13
188
|
#
|
14
189
|
# @return [Expr]
|
@@ -30,7 +205,7 @@ module Polars
|
|
30
205
|
# # │ 3 │
|
31
206
|
# # └─────┘
|
32
207
|
def min
|
33
|
-
Utils.wrap_expr(_rbexpr.
|
208
|
+
Utils.wrap_expr(_rbexpr.arr_min)
|
34
209
|
end
|
35
210
|
|
36
211
|
# Compute the max values of the sub-arrays.
|
@@ -54,7 +229,7 @@ module Polars
|
|
54
229
|
# # │ 4 │
|
55
230
|
# # └─────┘
|
56
231
|
def max
|
57
|
-
Utils.wrap_expr(_rbexpr.
|
232
|
+
Utils.wrap_expr(_rbexpr.arr_max)
|
58
233
|
end
|
59
234
|
|
60
235
|
# Compute the sum values of the sub-arrays.
|
@@ -78,7 +253,103 @@ module Polars
|
|
78
253
|
# # │ 7 │
|
79
254
|
# # └─────┘
|
80
255
|
def sum
|
81
|
-
Utils.wrap_expr(_rbexpr.
|
256
|
+
Utils.wrap_expr(_rbexpr.arr_sum)
|
257
|
+
end
|
258
|
+
|
259
|
+
# Compute the std of the values of the sub-arrays.
|
260
|
+
#
|
261
|
+
# @return [Expr]
|
262
|
+
#
|
263
|
+
# @example
|
264
|
+
# df = Polars::DataFrame.new(
|
265
|
+
# {"a" => [[1, 2], [4, 3]]},
|
266
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
267
|
+
# )
|
268
|
+
# df.select(Polars.col("a").arr.std)
|
269
|
+
# # =>
|
270
|
+
# # shape: (2, 1)
|
271
|
+
# # ┌──────────┐
|
272
|
+
# # │ a │
|
273
|
+
# # │ --- │
|
274
|
+
# # │ f64 │
|
275
|
+
# # ╞══════════╡
|
276
|
+
# # │ 0.707107 │
|
277
|
+
# # │ 0.707107 │
|
278
|
+
# # └──────────┘
|
279
|
+
def std(ddof: 1)
|
280
|
+
Utils.wrap_expr(_rbexpr.arr_std(ddof))
|
281
|
+
end
|
282
|
+
|
283
|
+
# Compute the var of the values of the sub-arrays.
|
284
|
+
#
|
285
|
+
# @return [Expr]
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# df = Polars::DataFrame.new(
|
289
|
+
# {"a" => [[1, 2], [4, 3]]},
|
290
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
291
|
+
# )
|
292
|
+
# df.select(Polars.col("a").arr.var)
|
293
|
+
# # =>
|
294
|
+
# # shape: (2, 1)
|
295
|
+
# # ┌─────┐
|
296
|
+
# # │ a │
|
297
|
+
# # │ --- │
|
298
|
+
# # │ f64 │
|
299
|
+
# # ╞═════╡
|
300
|
+
# # │ 0.5 │
|
301
|
+
# # │ 0.5 │
|
302
|
+
# # └─────┘
|
303
|
+
def var(ddof: 1)
|
304
|
+
Utils.wrap_expr(_rbexpr.arr_var(ddof))
|
305
|
+
end
|
306
|
+
|
307
|
+
# Compute the mean of the values of the sub-arrays.
|
308
|
+
#
|
309
|
+
# @return [Expr]
|
310
|
+
#
|
311
|
+
# @example
|
312
|
+
# df = Polars::DataFrame.new(
|
313
|
+
# {"a" => [[1, 2, 3], [1, 1, 16]]},
|
314
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
315
|
+
# )
|
316
|
+
# df.select(Polars.col("a").arr.mean)
|
317
|
+
# # =>
|
318
|
+
# # shape: (2, 1)
|
319
|
+
# # ┌─────┐
|
320
|
+
# # │ a │
|
321
|
+
# # │ --- │
|
322
|
+
# # │ f64 │
|
323
|
+
# # ╞═════╡
|
324
|
+
# # │ 2.0 │
|
325
|
+
# # │ 6.0 │
|
326
|
+
# # └─────┘
|
327
|
+
def mean
|
328
|
+
Utils.wrap_expr(_rbexpr.arr_mean)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Compute the median of the values of the sub-arrays.
|
332
|
+
#
|
333
|
+
# @return [Expr]
|
334
|
+
#
|
335
|
+
# @example
|
336
|
+
# df = Polars::DataFrame.new(
|
337
|
+
# {"a" => [[1, 2], [4, 3]]},
|
338
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
339
|
+
# )
|
340
|
+
# df.select(Polars.col("a").arr.median)
|
341
|
+
# # =>
|
342
|
+
# # shape: (2, 1)
|
343
|
+
# # ┌─────┐
|
344
|
+
# # │ a │
|
345
|
+
# # │ --- │
|
346
|
+
# # │ f64 │
|
347
|
+
# # ╞═════╡
|
348
|
+
# # │ 1.5 │
|
349
|
+
# # │ 3.5 │
|
350
|
+
# # └─────┘
|
351
|
+
def median
|
352
|
+
Utils.wrap_expr(_rbexpr.arr_median)
|
82
353
|
end
|
83
354
|
|
84
355
|
# Get the unique/distinct values in the array.
|
@@ -109,6 +380,32 @@ module Polars
|
|
109
380
|
Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
|
110
381
|
end
|
111
382
|
|
383
|
+
# Count the number of unique values in every sub-arrays.
|
384
|
+
#
|
385
|
+
# @return [Expr]
|
386
|
+
#
|
387
|
+
# @example
|
388
|
+
# df = Polars::DataFrame.new(
|
389
|
+
# {
|
390
|
+
# "a" => [[1, 1, 2], [2, 3, 4]],
|
391
|
+
# },
|
392
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
393
|
+
# )
|
394
|
+
# df.with_columns(n_unique: Polars.col("a").arr.n_unique)
|
395
|
+
# # =>
|
396
|
+
# # shape: (2, 2)
|
397
|
+
# # ┌───────────────┬──────────┐
|
398
|
+
# # │ a ┆ n_unique │
|
399
|
+
# # │ --- ┆ --- │
|
400
|
+
# # │ array[i64, 3] ┆ u32 │
|
401
|
+
# # ╞═══════════════╪══════════╡
|
402
|
+
# # │ [1, 1, 2] ┆ 2 │
|
403
|
+
# # │ [2, 3, 4] ┆ 3 │
|
404
|
+
# # └───────────────┴──────────┘
|
405
|
+
def n_unique
|
406
|
+
Utils.wrap_expr(_rbexpr.arr_n_unique)
|
407
|
+
end
|
408
|
+
|
112
409
|
# Convert an Array column into a List column with the same inner data type.
|
113
410
|
#
|
114
411
|
# @return [Expr]
|
@@ -535,5 +832,87 @@ module Polars
|
|
535
832
|
element = Utils.parse_into_expression(element, str_as_lit: true)
|
536
833
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
537
834
|
end
|
835
|
+
|
836
|
+
# Convert the Series of type `Array` to a Series of type `Struct`.
|
837
|
+
#
|
838
|
+
# @param fields [Object]
|
839
|
+
# If the name and number of the desired fields is known in advance
|
840
|
+
# a list of field names can be given, which will be assigned by index.
|
841
|
+
# Otherwise, to dynamically assign field names, a custom function can be
|
842
|
+
# used; if neither are set, fields will be `field_0, field_1 .. field_n`.
|
843
|
+
#
|
844
|
+
# @return [Expr]
|
845
|
+
#
|
846
|
+
# @example Convert array to struct with default field name assignment:
|
847
|
+
# df = Polars::DataFrame.new(
|
848
|
+
# {"n" => [[0, 1, 2], [3, 4, 5]]}, schema: {"n" => Polars::Array.new(Polars::Int8, 3)}
|
849
|
+
# )
|
850
|
+
# df.with_columns(struct: Polars.col("n").arr.to_struct)
|
851
|
+
# # =>
|
852
|
+
# # shape: (2, 2)
|
853
|
+
# # ┌──────────────┬───────────┐
|
854
|
+
# # │ n ┆ struct │
|
855
|
+
# # │ --- ┆ --- │
|
856
|
+
# # │ array[i8, 3] ┆ struct[3] │
|
857
|
+
# # ╞══════════════╪═══════════╡
|
858
|
+
# # │ [0, 1, 2] ┆ {0,1,2} │
|
859
|
+
# # │ [3, 4, 5] ┆ {3,4,5} │
|
860
|
+
# # └──────────────┴───────────┘
|
861
|
+
def to_struct(fields: nil)
|
862
|
+
raise Todo if fields
|
863
|
+
if fields.is_a?(Enumerable)
|
864
|
+
field_names = fields.to_a
|
865
|
+
rbexpr = _rbexpr.arr_to_struct(nil)
|
866
|
+
Utils.wrap_expr(rbexpr).struct.rename_fields(field_names)
|
867
|
+
else
|
868
|
+
rbexpr = _rbexpr.arr_to_struct(fields)
|
869
|
+
Utils.wrap_expr(rbexpr)
|
870
|
+
end
|
871
|
+
end
|
872
|
+
|
873
|
+
# Shift array values by the given number of indices.
|
874
|
+
#
|
875
|
+
# @param n [Integer]
|
876
|
+
# Number of indices to shift forward. If a negative value is passed, values
|
877
|
+
# are shifted in the opposite direction instead.
|
878
|
+
#
|
879
|
+
# @return [Expr]
|
880
|
+
#
|
881
|
+
# @note
|
882
|
+
# This method is similar to the `LAG` operation in SQL when the value for `n`
|
883
|
+
# is positive. With a negative value for `n`, it is similar to `LEAD`.
|
884
|
+
#
|
885
|
+
# @example By default, array values are shifted forward by one index.
|
886
|
+
# df = Polars::DataFrame.new(
|
887
|
+
# {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
888
|
+
# )
|
889
|
+
# df.with_columns(shift: Polars.col("a").arr.shift)
|
890
|
+
# # =>
|
891
|
+
# # shape: (2, 2)
|
892
|
+
# # ┌───────────────┬───────────────┐
|
893
|
+
# # │ a ┆ shift │
|
894
|
+
# # │ --- ┆ --- │
|
895
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
896
|
+
# # ╞═══════════════╪═══════════════╡
|
897
|
+
# # │ [1, 2, 3] ┆ [null, 1, 2] │
|
898
|
+
# # │ [4, 5, 6] ┆ [null, 4, 5] │
|
899
|
+
# # └───────────────┴───────────────┘
|
900
|
+
#
|
901
|
+
# @example Pass a negative value to shift in the opposite direction instead.
|
902
|
+
# df.with_columns(shift: Polars.col("a").arr.shift(-2))
|
903
|
+
# # =>
|
904
|
+
# # shape: (2, 2)
|
905
|
+
# # ┌───────────────┬─────────────────┐
|
906
|
+
# # │ a ┆ shift │
|
907
|
+
# # │ --- ┆ --- │
|
908
|
+
# # │ array[i64, 3] ┆ array[i64, 3] │
|
909
|
+
# # ╞═══════════════╪═════════════════╡
|
910
|
+
# # │ [1, 2, 3] ┆ [3, null, null] │
|
911
|
+
# # │ [4, 5, 6] ┆ [6, null, null] │
|
912
|
+
# # └───────────────┴─────────────────┘
|
913
|
+
def shift(n = 1)
|
914
|
+
n = Utils.parse_into_expression(n)
|
915
|
+
Utils.wrap_expr(_rbexpr.arr_shift(n))
|
916
|
+
end
|
538
917
|
end
|
539
918
|
end
|