polars-df 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,346 @@
1
+ module Polars
2
+ # Series.arr namespace.
3
+ class ListNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "arr"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Get the length of the arrays as UInt32.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new([[1, 2, 3], [5]])
19
+ # s.arr.lengths
20
+ # # =>
21
+ # # shape: (2,)
22
+ # # Series: '' [u32]
23
+ # # [
24
+ # # 3
25
+ # # 1
26
+ # # ]
27
+ def lengths
28
+ super
29
+ end
30
+
31
+ # Sum all the arrays in the list.
32
+ #
33
+ # @return [Series]
34
+ def sum
35
+ super
36
+ end
37
+
38
+ # Compute the max value of the arrays in the list.
39
+ #
40
+ # @return [Series]
41
+ def max
42
+ super
43
+ end
44
+
45
+ # Compute the min value of the arrays in the list.
46
+ #
47
+ # @return [Series]
48
+ def min
49
+ super
50
+ end
51
+
52
+ # Compute the mean value of the arrays in the list.
53
+ #
54
+ # @return [Series]
55
+ def mean
56
+ super
57
+ end
58
+
59
+ # Sort the arrays in the list.
60
+ #
61
+ # @return [Series]
62
+ def sort(reverse: false)
63
+ super
64
+ end
65
+
66
+ # Reverse the arrays in the list.
67
+ #
68
+ # @return [Series]
69
+ def reverse
70
+ super
71
+ end
72
+
73
+ # Get the unique/distinct values in the list.
74
+ #
75
+ # @return [Series]
76
+ def unique
77
+ super
78
+ end
79
+
80
+ # Concat the arrays in a Series dtype List in linear time.
81
+ #
82
+ # @param other [Object]
83
+ # Columns to concat into a List Series
84
+ #
85
+ # @return [Series]
86
+ def concat(other)
87
+ super
88
+ end
89
+
90
+ # Get the value by index in the sublists.
91
+ #
92
+ # So index `0` would return the first item of every sublist
93
+ # and index `-1` would return the last item of every sublist
94
+ # if an index is out of bounds, it will return a `None`.
95
+ #
96
+ # @param index [Integer]
97
+ # Index to return per sublist
98
+ #
99
+ # @return [Series]
100
+ def get(index)
101
+ super
102
+ end
103
+
104
+ # Get the value by index in the sublists.
105
+ #
106
+ # @return [Series]
107
+ def [](item)
108
+ get(item)
109
+ end
110
+
111
+ # Join all string items in a sublist and place a separator between them.
112
+ #
113
+ # This errors if inner type of list `!= Utf8`.
114
+ #
115
+ # @param separator [String]
116
+ # string to separate the items with
117
+ #
118
+ # @return [Series]
119
+ #
120
+ # @example
121
+ # s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
122
+ # s.arr.join("-")
123
+ # # =>
124
+ # # shape: (2,)
125
+ # # Series: '' [str]
126
+ # # [
127
+ # # "foo-bar"
128
+ # # "hello-world"
129
+ # # ]
130
+ def join(separator)
131
+ super
132
+ end
133
+
134
+ # Get the first value of the sublists.
135
+ #
136
+ # @return [Series]
137
+ def first
138
+ super
139
+ end
140
+
141
+ # Get the last value of the sublists.
142
+ #
143
+ # @return [Series]
144
+ def last
145
+ super
146
+ end
147
+
148
+ # Check if sublists contain the given item.
149
+ #
150
+ # @param item [Object]
151
+ # Item that will be checked for membership.
152
+ #
153
+ # @return [Series]
154
+ def contains(item)
155
+ super
156
+ end
157
+
158
+ # Retrieve the index of the minimal value in every sublist.
159
+ #
160
+ # @return [Series]
161
+ def arg_min
162
+ super
163
+ end
164
+
165
+ # Retrieve the index of the maximum value in every sublist.
166
+ #
167
+ # @return [Series]
168
+ def arg_max
169
+ super
170
+ end
171
+
172
+ # Calculate the n-th discrete difference of every sublist.
173
+ #
174
+ # @param n [Integer]
175
+ # Number of slots to shift.
176
+ # @param null_behavior ["ignore", "drop"]
177
+ # How to handle null values.
178
+ #
179
+ # @return [Series]
180
+ #
181
+ # @example
182
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
183
+ # s.arr.diff
184
+ # # =>
185
+ # # shape: (2,)
186
+ # # Series: 'a' [list]
187
+ # # [
188
+ # # [null, 1, ... 1]
189
+ # # [null, -8, -1]
190
+ # # ]
191
+ def diff(n: 1, null_behavior: "ignore")
192
+ super
193
+ end
194
+
195
+ # Shift values by the given period.
196
+ #
197
+ # @param periods [Integer]
198
+ # Number of places to shift (may be negative).
199
+ #
200
+ # @return [Series]
201
+ #
202
+ # @example
203
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
204
+ # s.arr.shift
205
+ # # =>
206
+ # # shape: (2,)
207
+ # # Series: 'a' [list]
208
+ # # [
209
+ # # [null, 1, ... 3]
210
+ # # [null, 10, 2]
211
+ # # ]
212
+ def shift(periods = 1)
213
+ super
214
+ end
215
+
216
+ # Slice every sublist.
217
+ #
218
+ # @param offset [Integer]
219
+ # Start index. Negative indexing is supported.
220
+ # @param length [Integer]
221
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
222
+ # end of the list.
223
+ #
224
+ # @return [Series]
225
+ #
226
+ # @example
227
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
228
+ # s.arr.slice(1, 2)
229
+ # # =>
230
+ # # shape: (2,)
231
+ # # Series: 'a' [list]
232
+ # # [
233
+ # # [2, 3]
234
+ # # [2, 1]
235
+ # # ]
236
+ def slice(offset, length = nil)
237
+ super
238
+ end
239
+
240
+ # Slice the first `n` values of every sublist.
241
+ #
242
+ # @param n [Integer]
243
+ # Number of values to return for each sublist.
244
+ #
245
+ # @return [Series]
246
+ #
247
+ # @example
248
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
249
+ # s.arr.head(2)
250
+ # # =>
251
+ # # shape: (2,)
252
+ # # Series: 'a' [list]
253
+ # # [
254
+ # # [1, 2]
255
+ # # [10, 2]
256
+ # # ]
257
+ def head(n = 5)
258
+ super
259
+ end
260
+
261
+ # Slice the last `n` values of every sublist.
262
+ #
263
+ # @param n [Integer]
264
+ # Number of values to return for each sublist.
265
+ #
266
+ # @return [Series]
267
+ #
268
+ # @example
269
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
270
+ # s.arr.tail(2)
271
+ # # =>
272
+ # # shape: (2,)
273
+ # # Series: 'a' [list]
274
+ # # [
275
+ # # [3, 4]
276
+ # # [2, 1]
277
+ # # ]
278
+ def tail(n = 5)
279
+ super
280
+ end
281
+
282
+ # Convert the series of type `List` to a series of type `Struct`.
283
+ #
284
+ # @param n_field_strategy ["first_non_null", "max_width"]
285
+ # Strategy to determine the number of fields of the struct.
286
+ # @param name_generator [Object]
287
+ # A custom function that can be used to generate the field names.
288
+ # Default field names are `field_0, field_1 .. field_n`
289
+ #
290
+ # @return [Series]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
294
+ # df.select([Polars.col("a").arr.to_struct])
295
+ # # =>
296
+ # # shape: (2, 1)
297
+ # # ┌────────────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ struct[3] │
301
+ # # ╞════════════╡
302
+ # # │ {1,2,3} │
303
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
304
+ # # │ {1,2,null} │
305
+ # # └────────────┘
306
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
307
+ super
308
+ end
309
+
310
+ # Run any polars expression against the lists' elements.
311
+ #
312
+ # @param expr [Expr]
313
+ # Expression to run. Note that you can select an element with `Polars.first`, or
314
+ # `Polars.col`
315
+ # @param parallel [Boolean]
316
+ # Run all expression parallel. Don't activate this blindly.
317
+ # Parallelism is worth it if there is enough work to do per thread.
318
+ #
319
+ # This likely should not be use in the groupby context, because we already
320
+ # parallel execution per group
321
+ #
322
+ # @return [Series]
323
+ #
324
+ # @example
325
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
326
+ # df.with_column(
327
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
328
+ # )
329
+ # # =>
330
+ # # shape: (3, 3)
331
+ # # ┌─────┬─────┬────────────┐
332
+ # # │ a ┆ b ┆ rank │
333
+ # # │ --- ┆ --- ┆ --- │
334
+ # # │ i64 ┆ i64 ┆ list[f32] │
335
+ # # ╞═════╪═════╪════════════╡
336
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
337
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
338
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
339
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
340
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
341
+ # # └─────┴─────┴────────────┘
342
+ def eval(expr, parallel: false)
343
+ super
344
+ end
345
+ end
346
+ end