polars-df 0.2.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +33 -0
- data/Cargo.lock +2230 -0
- data/Cargo.toml +10 -0
- data/LICENSE-THIRD-PARTY.txt +38856 -0
- data/LICENSE.txt +20 -0
- data/README.md +91 -0
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +52 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +4833 -0
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +1418 -0
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +5307 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +453 -0
- data/lib/polars/group_by.rb +558 -0
- data/lib/polars/io.rb +814 -0
- data/lib/polars/lazy_frame.rb +2442 -0
- data/lib/polars/lazy_functions.rb +1195 -0
- data/lib/polars/lazy_group_by.rb +93 -0
- data/lib/polars/list_expr.rb +610 -0
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +54 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +3730 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +972 -0
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +100 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +192 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/when.rb +16 -0
- data/lib/polars/when_then.rb +19 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +50 -0
- metadata +89 -0
@@ -0,0 +1,346 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.arr namespace.
|
3
|
+
class ListNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "arr"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Get the length of the arrays as UInt32.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# s = Polars::Series.new([[1, 2, 3], [5]])
|
19
|
+
# s.arr.lengths
|
20
|
+
# # =>
|
21
|
+
# # shape: (2,)
|
22
|
+
# # Series: '' [u32]
|
23
|
+
# # [
|
24
|
+
# # 3
|
25
|
+
# # 1
|
26
|
+
# # ]
|
27
|
+
def lengths
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sum all the arrays in the list.
|
32
|
+
#
|
33
|
+
# @return [Series]
|
34
|
+
def sum
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
# Compute the max value of the arrays in the list.
|
39
|
+
#
|
40
|
+
# @return [Series]
|
41
|
+
def max
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Compute the min value of the arrays in the list.
|
46
|
+
#
|
47
|
+
# @return [Series]
|
48
|
+
def min
|
49
|
+
super
|
50
|
+
end
|
51
|
+
|
52
|
+
# Compute the mean value of the arrays in the list.
|
53
|
+
#
|
54
|
+
# @return [Series]
|
55
|
+
def mean
|
56
|
+
super
|
57
|
+
end
|
58
|
+
|
59
|
+
# Sort the arrays in the list.
|
60
|
+
#
|
61
|
+
# @return [Series]
|
62
|
+
def sort(reverse: false)
|
63
|
+
super
|
64
|
+
end
|
65
|
+
|
66
|
+
# Reverse the arrays in the list.
|
67
|
+
#
|
68
|
+
# @return [Series]
|
69
|
+
def reverse
|
70
|
+
super
|
71
|
+
end
|
72
|
+
|
73
|
+
# Get the unique/distinct values in the list.
|
74
|
+
#
|
75
|
+
# @return [Series]
|
76
|
+
def unique
|
77
|
+
super
|
78
|
+
end
|
79
|
+
|
80
|
+
# Concat the arrays in a Series dtype List in linear time.
|
81
|
+
#
|
82
|
+
# @param other [Object]
|
83
|
+
# Columns to concat into a List Series
|
84
|
+
#
|
85
|
+
# @return [Series]
|
86
|
+
def concat(other)
|
87
|
+
super
|
88
|
+
end
|
89
|
+
|
90
|
+
# Get the value by index in the sublists.
|
91
|
+
#
|
92
|
+
# So index `0` would return the first item of every sublist
|
93
|
+
# and index `-1` would return the last item of every sublist
|
94
|
+
# if an index is out of bounds, it will return a `None`.
|
95
|
+
#
|
96
|
+
# @param index [Integer]
|
97
|
+
# Index to return per sublist
|
98
|
+
#
|
99
|
+
# @return [Series]
|
100
|
+
def get(index)
|
101
|
+
super
|
102
|
+
end
|
103
|
+
|
104
|
+
# Get the value by index in the sublists.
|
105
|
+
#
|
106
|
+
# @return [Series]
|
107
|
+
def [](item)
|
108
|
+
get(item)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Join all string items in a sublist and place a separator between them.
|
112
|
+
#
|
113
|
+
# This errors if inner type of list `!= Utf8`.
|
114
|
+
#
|
115
|
+
# @param separator [String]
|
116
|
+
# string to separate the items with
|
117
|
+
#
|
118
|
+
# @return [Series]
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
|
122
|
+
# s.arr.join("-")
|
123
|
+
# # =>
|
124
|
+
# # shape: (2,)
|
125
|
+
# # Series: '' [str]
|
126
|
+
# # [
|
127
|
+
# # "foo-bar"
|
128
|
+
# # "hello-world"
|
129
|
+
# # ]
|
130
|
+
def join(separator)
|
131
|
+
super
|
132
|
+
end
|
133
|
+
|
134
|
+
# Get the first value of the sublists.
|
135
|
+
#
|
136
|
+
# @return [Series]
|
137
|
+
def first
|
138
|
+
super
|
139
|
+
end
|
140
|
+
|
141
|
+
# Get the last value of the sublists.
|
142
|
+
#
|
143
|
+
# @return [Series]
|
144
|
+
def last
|
145
|
+
super
|
146
|
+
end
|
147
|
+
|
148
|
+
# Check if sublists contain the given item.
|
149
|
+
#
|
150
|
+
# @param item [Object]
|
151
|
+
# Item that will be checked for membership.
|
152
|
+
#
|
153
|
+
# @return [Series]
|
154
|
+
def contains(item)
|
155
|
+
super
|
156
|
+
end
|
157
|
+
|
158
|
+
# Retrieve the index of the minimal value in every sublist.
|
159
|
+
#
|
160
|
+
# @return [Series]
|
161
|
+
def arg_min
|
162
|
+
super
|
163
|
+
end
|
164
|
+
|
165
|
+
# Retrieve the index of the maximum value in every sublist.
|
166
|
+
#
|
167
|
+
# @return [Series]
|
168
|
+
def arg_max
|
169
|
+
super
|
170
|
+
end
|
171
|
+
|
172
|
+
# Calculate the n-th discrete difference of every sublist.
|
173
|
+
#
|
174
|
+
# @param n [Integer]
|
175
|
+
# Number of slots to shift.
|
176
|
+
# @param null_behavior ["ignore", "drop"]
|
177
|
+
# How to handle null values.
|
178
|
+
#
|
179
|
+
# @return [Series]
|
180
|
+
#
|
181
|
+
# @example
|
182
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
183
|
+
# s.arr.diff
|
184
|
+
# # =>
|
185
|
+
# # shape: (2,)
|
186
|
+
# # Series: 'a' [list]
|
187
|
+
# # [
|
188
|
+
# # [null, 1, ... 1]
|
189
|
+
# # [null, -8, -1]
|
190
|
+
# # ]
|
191
|
+
def diff(n: 1, null_behavior: "ignore")
|
192
|
+
super
|
193
|
+
end
|
194
|
+
|
195
|
+
# Shift values by the given period.
|
196
|
+
#
|
197
|
+
# @param periods [Integer]
|
198
|
+
# Number of places to shift (may be negative).
|
199
|
+
#
|
200
|
+
# @return [Series]
|
201
|
+
#
|
202
|
+
# @example
|
203
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
204
|
+
# s.arr.shift
|
205
|
+
# # =>
|
206
|
+
# # shape: (2,)
|
207
|
+
# # Series: 'a' [list]
|
208
|
+
# # [
|
209
|
+
# # [null, 1, ... 3]
|
210
|
+
# # [null, 10, 2]
|
211
|
+
# # ]
|
212
|
+
def shift(periods = 1)
|
213
|
+
super
|
214
|
+
end
|
215
|
+
|
216
|
+
# Slice every sublist.
|
217
|
+
#
|
218
|
+
# @param offset [Integer]
|
219
|
+
# Start index. Negative indexing is supported.
|
220
|
+
# @param length [Integer]
|
221
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
222
|
+
# end of the list.
|
223
|
+
#
|
224
|
+
# @return [Series]
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
228
|
+
# s.arr.slice(1, 2)
|
229
|
+
# # =>
|
230
|
+
# # shape: (2,)
|
231
|
+
# # Series: 'a' [list]
|
232
|
+
# # [
|
233
|
+
# # [2, 3]
|
234
|
+
# # [2, 1]
|
235
|
+
# # ]
|
236
|
+
def slice(offset, length = nil)
|
237
|
+
super
|
238
|
+
end
|
239
|
+
|
240
|
+
# Slice the first `n` values of every sublist.
|
241
|
+
#
|
242
|
+
# @param n [Integer]
|
243
|
+
# Number of values to return for each sublist.
|
244
|
+
#
|
245
|
+
# @return [Series]
|
246
|
+
#
|
247
|
+
# @example
|
248
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
249
|
+
# s.arr.head(2)
|
250
|
+
# # =>
|
251
|
+
# # shape: (2,)
|
252
|
+
# # Series: 'a' [list]
|
253
|
+
# # [
|
254
|
+
# # [1, 2]
|
255
|
+
# # [10, 2]
|
256
|
+
# # ]
|
257
|
+
def head(n = 5)
|
258
|
+
super
|
259
|
+
end
|
260
|
+
|
261
|
+
# Slice the last `n` values of every sublist.
|
262
|
+
#
|
263
|
+
# @param n [Integer]
|
264
|
+
# Number of values to return for each sublist.
|
265
|
+
#
|
266
|
+
# @return [Series]
|
267
|
+
#
|
268
|
+
# @example
|
269
|
+
# s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
|
270
|
+
# s.arr.tail(2)
|
271
|
+
# # =>
|
272
|
+
# # shape: (2,)
|
273
|
+
# # Series: 'a' [list]
|
274
|
+
# # [
|
275
|
+
# # [3, 4]
|
276
|
+
# # [2, 1]
|
277
|
+
# # ]
|
278
|
+
def tail(n = 5)
|
279
|
+
super
|
280
|
+
end
|
281
|
+
|
282
|
+
# Convert the series of type `List` to a series of type `Struct`.
|
283
|
+
#
|
284
|
+
# @param n_field_strategy ["first_non_null", "max_width"]
|
285
|
+
# Strategy to determine the number of fields of the struct.
|
286
|
+
# @param name_generator [Object]
|
287
|
+
# A custom function that can be used to generate the field names.
|
288
|
+
# Default field names are `field_0, field_1 .. field_n`
|
289
|
+
#
|
290
|
+
# @return [Series]
|
291
|
+
#
|
292
|
+
# @example
|
293
|
+
# df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
|
294
|
+
# df.select([Polars.col("a").arr.to_struct])
|
295
|
+
# # =>
|
296
|
+
# # shape: (2, 1)
|
297
|
+
# # ┌────────────┐
|
298
|
+
# # │ a │
|
299
|
+
# # │ --- │
|
300
|
+
# # │ struct[3] │
|
301
|
+
# # ╞════════════╡
|
302
|
+
# # │ {1,2,3} │
|
303
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
304
|
+
# # │ {1,2,null} │
|
305
|
+
# # └────────────┘
|
306
|
+
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
307
|
+
super
|
308
|
+
end
|
309
|
+
|
310
|
+
# Run any polars expression against the lists' elements.
|
311
|
+
#
|
312
|
+
# @param expr [Expr]
|
313
|
+
# Expression to run. Note that you can select an element with `Polars.first`, or
|
314
|
+
# `Polars.col`
|
315
|
+
# @param parallel [Boolean]
|
316
|
+
# Run all expression parallel. Don't activate this blindly.
|
317
|
+
# Parallelism is worth it if there is enough work to do per thread.
|
318
|
+
#
|
319
|
+
# This likely should not be use in the groupby context, because we already
|
320
|
+
# parallel execution per group
|
321
|
+
#
|
322
|
+
# @return [Series]
|
323
|
+
#
|
324
|
+
# @example
|
325
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
326
|
+
# df.with_column(
|
327
|
+
# Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
|
328
|
+
# )
|
329
|
+
# # =>
|
330
|
+
# # shape: (3, 3)
|
331
|
+
# # ┌─────┬─────┬────────────┐
|
332
|
+
# # │ a ┆ b ┆ rank │
|
333
|
+
# # │ --- ┆ --- ┆ --- │
|
334
|
+
# # │ i64 ┆ i64 ┆ list[f32] │
|
335
|
+
# # ╞═════╪═════╪════════════╡
|
336
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
337
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
338
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
339
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
340
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
341
|
+
# # └─────┴─────┴────────────┘
|
342
|
+
def eval(expr, parallel: false)
|
343
|
+
super
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for expressions on a meta level.
|
3
|
+
class MetaExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Equal.
|
13
|
+
#
|
14
|
+
# @return [Boolean]
|
15
|
+
def ==(other)
|
16
|
+
_rbexpr.meta_eq(other._rbexpr)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Not equal.
|
20
|
+
#
|
21
|
+
# @return [Boolean]
|
22
|
+
def !=(other)
|
23
|
+
!(self == other)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Pop the latest expression and return the input(s) of the popped expression.
|
27
|
+
#
|
28
|
+
# @return [Array]
|
29
|
+
def pop
|
30
|
+
_rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get a list with the root column name.
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
def root_names
|
37
|
+
_rbexpr.meta_roots
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get the column name that this expression would produce.
|
41
|
+
#
|
42
|
+
# @return [String]
|
43
|
+
def output_name
|
44
|
+
_rbexpr.meta_output_name
|
45
|
+
end
|
46
|
+
|
47
|
+
# Undo any renaming operation like `alias` or `keep_name`.
|
48
|
+
#
|
49
|
+
# @return [Expr]
|
50
|
+
def undo_aliases
|
51
|
+
Utils.wrap_expr(_rbexpr.meta_undo_aliases)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
# A rolling grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
+
# groupby context.
|
6
|
+
class RollingGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
period,
|
11
|
+
offset,
|
12
|
+
closed,
|
13
|
+
by
|
14
|
+
)
|
15
|
+
period = Utils._timedelta_to_pl_duration(period)
|
16
|
+
offset = Utils._timedelta_to_pl_duration(offset)
|
17
|
+
|
18
|
+
@df = df
|
19
|
+
@time_column = index_column
|
20
|
+
@period = period
|
21
|
+
@offset = offset
|
22
|
+
@closed = closed
|
23
|
+
@by = by
|
24
|
+
end
|
25
|
+
|
26
|
+
def agg(aggs)
|
27
|
+
@df.lazy
|
28
|
+
.groupby_rolling(
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
|
30
|
+
)
|
31
|
+
.agg(aggs)
|
32
|
+
.collect(no_optimization: true, string_cache: false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|