polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +3112 -1613
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +34 -6
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -73,5 +73,351 @@ module Polars
|
|
73
73
|
def sum
|
74
74
|
super
|
75
75
|
end
|
76
|
+
|
77
|
+
# Get the unique/distinct values in the array.
|
78
|
+
#
|
79
|
+
# @param maintain_order [Boolean]
|
80
|
+
# Maintain order of data. This requires more work.
|
81
|
+
#
|
82
|
+
# @return [Series]
|
83
|
+
#
|
84
|
+
# @example
|
85
|
+
# df = Polars::DataFrame.new(
|
86
|
+
# {
|
87
|
+
# "a" => [[1, 1, 2]]
|
88
|
+
# },
|
89
|
+
# schema_overrides: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
90
|
+
# )
|
91
|
+
# df.select(Polars.col("a").arr.unique)
|
92
|
+
# # =>
|
93
|
+
# # shape: (1, 1)
|
94
|
+
# # ┌───────────┐
|
95
|
+
# # │ a │
|
96
|
+
# # │ --- │
|
97
|
+
# # │ list[i64] │
|
98
|
+
# # ╞═══════════╡
|
99
|
+
# # │ [1, 2] │
|
100
|
+
# # └───────────┘
|
101
|
+
def unique(maintain_order: false)
|
102
|
+
super
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert an Array column into a List column with the same inner data type.
|
106
|
+
#
|
107
|
+
# @return [Series]
|
108
|
+
#
|
109
|
+
# @example
|
110
|
+
# s = Polars::Series.new([[1, 2], [3, 4]], dtype: Polars::Array.new(Polars::Int8, 2))
|
111
|
+
# s.arr.to_list
|
112
|
+
# # =>
|
113
|
+
# # shape: (2,)
|
114
|
+
# # Series: '' [list[i8]]
|
115
|
+
# # [
|
116
|
+
# # [1, 2]
|
117
|
+
# # [3, 4]
|
118
|
+
# # ]
|
119
|
+
def to_list
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
# Evaluate whether any boolean value is true for every subarray.
|
124
|
+
#
|
125
|
+
# @return [Series]
|
126
|
+
#
|
127
|
+
# @example
|
128
|
+
# s = Polars::Series.new(
|
129
|
+
# [[true, true], [false, true], [false, false], [nil, nil], nil],
|
130
|
+
# dtype: Polars::Array.new(Polars::Boolean, 2)
|
131
|
+
# )
|
132
|
+
# s.arr.any
|
133
|
+
# # =>
|
134
|
+
# # shape: (5,)
|
135
|
+
# # Series: '' [bool]
|
136
|
+
# # [
|
137
|
+
# # true
|
138
|
+
# # true
|
139
|
+
# # false
|
140
|
+
# # false
|
141
|
+
# # null
|
142
|
+
# # ]
|
143
|
+
def any
|
144
|
+
super
|
145
|
+
end
|
146
|
+
|
147
|
+
# Evaluate whether all boolean values are true for every subarray.
|
148
|
+
#
|
149
|
+
# @return [Series]
|
150
|
+
#
|
151
|
+
# @example
|
152
|
+
# s = Polars::Series.new(
|
153
|
+
# [[true, true], [false, true], [false, false], [nil, nil], nil],
|
154
|
+
# dtype: Polars::Array.new(Polars::Boolean, 2)
|
155
|
+
# )
|
156
|
+
# s.arr.all
|
157
|
+
# # =>
|
158
|
+
# # shape: (5,)
|
159
|
+
# # Series: '' [bool]
|
160
|
+
# # [
|
161
|
+
# # true
|
162
|
+
# # false
|
163
|
+
# # false
|
164
|
+
# # true
|
165
|
+
# # null
|
166
|
+
# # ]
|
167
|
+
def all
|
168
|
+
super
|
169
|
+
end
|
170
|
+
|
171
|
+
# Sort the arrays in this column.
|
172
|
+
#
|
173
|
+
# @param descending [Boolean]
|
174
|
+
# Sort in descending order.
|
175
|
+
# @param nulls_last [Boolean]
|
176
|
+
# Place null values last.
|
177
|
+
#
|
178
|
+
# @return [Series]
|
179
|
+
#
|
180
|
+
# @example
|
181
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
182
|
+
# s.arr.sort
|
183
|
+
# # =>
|
184
|
+
# # shape: (2,)
|
185
|
+
# # Series: 'a' [array[i64, 3]]
|
186
|
+
# # [
|
187
|
+
# # [1, 2, 3]
|
188
|
+
# # [1, 2, 9]
|
189
|
+
# # ]
|
190
|
+
#
|
191
|
+
# @example
|
192
|
+
# s.arr.sort(descending: true)
|
193
|
+
# # =>
|
194
|
+
# # shape: (2,)
|
195
|
+
# # Series: 'a' [array[i64, 3]]
|
196
|
+
# # [
|
197
|
+
# # [3, 2, 1]
|
198
|
+
# # [9, 2, 1]
|
199
|
+
# # ]
|
200
|
+
def sort(descending: false, nulls_last: false)
|
201
|
+
super
|
202
|
+
end
|
203
|
+
|
204
|
+
# Reverse the arrays in this column.
|
205
|
+
#
|
206
|
+
# @return [Series]
|
207
|
+
#
|
208
|
+
# @example
|
209
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
210
|
+
# s.arr.reverse
|
211
|
+
# # =>
|
212
|
+
# # shape: (2,)
|
213
|
+
# # Series: 'a' [array[i64, 3]]
|
214
|
+
# # [
|
215
|
+
# # [1, 2, 3]
|
216
|
+
# # [2, 1, 9]
|
217
|
+
# # ]
|
218
|
+
def reverse
|
219
|
+
super
|
220
|
+
end
|
221
|
+
|
222
|
+
# Retrieve the index of the minimal value in every sub-array.
|
223
|
+
#
|
224
|
+
# @return [Series]
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
228
|
+
# s.arr.arg_min
|
229
|
+
# # =>
|
230
|
+
# # shape: (2,)
|
231
|
+
# # Series: 'a' [u32]
|
232
|
+
# # [
|
233
|
+
# # 2
|
234
|
+
# # 1
|
235
|
+
# # ]
|
236
|
+
def arg_min
|
237
|
+
super
|
238
|
+
end
|
239
|
+
|
240
|
+
# Retrieve the index of the maximum value in every sub-array.
|
241
|
+
#
|
242
|
+
# @return [Series]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# s = Polars::Series.new("a", [[0, 9, 3], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
246
|
+
# s.arr.arg_max
|
247
|
+
# # =>
|
248
|
+
# # shape: (2,)
|
249
|
+
# # Series: 'a' [u32]
|
250
|
+
# # [
|
251
|
+
# # 1
|
252
|
+
# # 0
|
253
|
+
# # ]
|
254
|
+
def arg_max
|
255
|
+
super
|
256
|
+
end
|
257
|
+
|
258
|
+
# Get the value by index in the sub-arrays.
|
259
|
+
#
|
260
|
+
# So index `0` would return the first item of every sublist
|
261
|
+
# and index `-1` would return the last item of every sublist
|
262
|
+
# if an index is out of bounds, it will return a `nil`.
|
263
|
+
#
|
264
|
+
# @param index [Integer]
|
265
|
+
# Index to return per sublist
|
266
|
+
#
|
267
|
+
# @return [Series]
|
268
|
+
#
|
269
|
+
# @example
|
270
|
+
# s = Polars::Series.new(
|
271
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
272
|
+
# )
|
273
|
+
# s.arr.get(Polars::Series.new([1, -2, 4]))
|
274
|
+
# # =>
|
275
|
+
# # shape: (3,)
|
276
|
+
# # Series: 'a' [i32]
|
277
|
+
# # [
|
278
|
+
# # 2
|
279
|
+
# # 5
|
280
|
+
# # null
|
281
|
+
# # ]
|
282
|
+
def get(index)
|
283
|
+
super
|
284
|
+
end
|
285
|
+
|
286
|
+
# Get the first value of the sub-arrays.
|
287
|
+
#
|
288
|
+
# @return [Series]
|
289
|
+
#
|
290
|
+
# @example
|
291
|
+
# s = Polars::Series.new(
|
292
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
293
|
+
# )
|
294
|
+
# s.arr.first
|
295
|
+
# # =>
|
296
|
+
# # shape: (3,)
|
297
|
+
# # Series: 'a' [i32]
|
298
|
+
# # [
|
299
|
+
# # 1
|
300
|
+
# # 4
|
301
|
+
# # 7
|
302
|
+
# # ]
|
303
|
+
def first
|
304
|
+
super
|
305
|
+
end
|
306
|
+
|
307
|
+
# Get the last value of the sub-arrays.
|
308
|
+
#
|
309
|
+
# @return [Series]
|
310
|
+
#
|
311
|
+
# @example
|
312
|
+
# s = Polars::Series.new(
|
313
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
314
|
+
# )
|
315
|
+
# s.arr.last
|
316
|
+
# # =>
|
317
|
+
# # shape: (3,)
|
318
|
+
# # Series: 'a' [i32]
|
319
|
+
# # [
|
320
|
+
# # 3
|
321
|
+
# # 6
|
322
|
+
# # 9
|
323
|
+
# # ]
|
324
|
+
def last
|
325
|
+
super
|
326
|
+
end
|
327
|
+
|
328
|
+
# Join all string items in a sub-array and place a separator between them.
|
329
|
+
#
|
330
|
+
# This errors if inner type of array `!= String`.
|
331
|
+
#
|
332
|
+
# @param separator [String]
|
333
|
+
# string to separate the items with
|
334
|
+
# @param ignore_nulls [Boolean]
|
335
|
+
# Ignore null values (default).
|
336
|
+
#
|
337
|
+
# If set to `False`, null values will be propagated.
|
338
|
+
# If the sub-list contains any null values, the output is `nil`.
|
339
|
+
#
|
340
|
+
# @return [Series]
|
341
|
+
#
|
342
|
+
# @example
|
343
|
+
# s = Polars::Series.new([["x", "y"], ["a", "b"]], dtype: Polars::Array.new(Polars::String, 2))
|
344
|
+
# s.arr.join("-")
|
345
|
+
# # =>
|
346
|
+
# # shape: (2,)
|
347
|
+
# # Series: '' [str]
|
348
|
+
# # [
|
349
|
+
# # "x-y"
|
350
|
+
# # "a-b"
|
351
|
+
# # ]
|
352
|
+
def join(separator, ignore_nulls: true)
|
353
|
+
super
|
354
|
+
end
|
355
|
+
|
356
|
+
# Returns a column with a separate row for every array element.
|
357
|
+
#
|
358
|
+
# @return [Series]
|
359
|
+
#
|
360
|
+
# @example
|
361
|
+
# s = Polars::Series.new("a", [[1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int64, 3))
|
362
|
+
# s.arr.explode
|
363
|
+
# # =>
|
364
|
+
# # shape: (6,)
|
365
|
+
# # Series: 'a' [i64]
|
366
|
+
# # [
|
367
|
+
# # 1
|
368
|
+
# # 2
|
369
|
+
# # 3
|
370
|
+
# # 4
|
371
|
+
# # 5
|
372
|
+
# # 6
|
373
|
+
# # ]
|
374
|
+
def explode
|
375
|
+
super
|
376
|
+
end
|
377
|
+
|
378
|
+
# Check if sub-arrays contain the given item.
|
379
|
+
#
|
380
|
+
# @param item [Object]
|
381
|
+
# Item that will be checked for membership
|
382
|
+
#
|
383
|
+
# @return [Series]
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# s = Polars::Series.new(
|
387
|
+
# "a", [[3, 2, 1], [1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int32, 3)
|
388
|
+
# )
|
389
|
+
# s.arr.contains(1)
|
390
|
+
# # =>
|
391
|
+
# # shape: (3,)
|
392
|
+
# # Series: 'a' [bool]
|
393
|
+
# # [
|
394
|
+
# # true
|
395
|
+
# # true
|
396
|
+
# # false
|
397
|
+
# # ]
|
398
|
+
def contains(item)
|
399
|
+
super
|
400
|
+
end
|
401
|
+
|
402
|
+
# Count how often the value produced by `element` occurs.
|
403
|
+
#
|
404
|
+
# @param element [Object]
|
405
|
+
# An expression that produces a single value
|
406
|
+
#
|
407
|
+
# @return [Series]
|
408
|
+
#
|
409
|
+
# @example
|
410
|
+
# s = Polars::Series.new("a", [[1, 2, 3], [2, 2, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
411
|
+
# s.arr.count_matches(2)
|
412
|
+
# # =>
|
413
|
+
# # shape: (2,)
|
414
|
+
# # Series: 'a' [u32]
|
415
|
+
# # [
|
416
|
+
# # 1
|
417
|
+
# # 3
|
418
|
+
# # ]
|
419
|
+
def count_matches(element)
|
420
|
+
super
|
421
|
+
end
|
76
422
|
end
|
77
423
|
end
|
@@ -27,7 +27,8 @@ module Polars
|
|
27
27
|
row_count_offset: 0,
|
28
28
|
sample_size: 1024,
|
29
29
|
eol_char: "\n",
|
30
|
-
new_columns: nil
|
30
|
+
new_columns: nil,
|
31
|
+
truncate_ragged_lines: false
|
31
32
|
)
|
32
33
|
if Utils.pathlike?(file)
|
33
34
|
path = Utils.normalise_filepath(file)
|
@@ -75,7 +76,8 @@ module Polars
|
|
75
76
|
skip_rows_after_header,
|
76
77
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
77
78
|
sample_size,
|
78
|
-
eol_char
|
79
|
+
eol_char,
|
80
|
+
truncate_ragged_lines
|
79
81
|
)
|
80
82
|
self.new_columns = new_columns
|
81
83
|
end
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -44,5 +44,29 @@ module Polars
|
|
44
44
|
def set_ordering(ordering)
|
45
45
|
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
46
46
|
end
|
47
|
+
|
48
|
+
# Get the categories stored in this data type.
|
49
|
+
#
|
50
|
+
# @return [Expr]
|
51
|
+
#
|
52
|
+
# @example
|
53
|
+
# df = Polars::Series.new(
|
54
|
+
# "cats", ["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical
|
55
|
+
# ).to_frame
|
56
|
+
# df.select(Polars.col("cats").cat.get_categories)
|
57
|
+
# # =>
|
58
|
+
# # shape: (3, 1)
|
59
|
+
# # ┌──────┐
|
60
|
+
# # │ cats │
|
61
|
+
# # │ --- │
|
62
|
+
# # │ str │
|
63
|
+
# # ╞══════╡
|
64
|
+
# # │ foo │
|
65
|
+
# # │ bar │
|
66
|
+
# # │ ham │
|
67
|
+
# # └──────┘
|
68
|
+
def get_categories
|
69
|
+
Utils.wrap_expr(_rbexpr.cat_get_categories)
|
70
|
+
end
|
47
71
|
end
|
48
72
|
end
|
@@ -46,5 +46,80 @@ module Polars
|
|
46
46
|
def set_ordering(ordering)
|
47
47
|
super
|
48
48
|
end
|
49
|
+
|
50
|
+
# Get the categories stored in this data type.
|
51
|
+
#
|
52
|
+
# @return [Series]
|
53
|
+
#
|
54
|
+
# @example
|
55
|
+
# s = Polars::Series.new(["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical)
|
56
|
+
# s.cat.get_categories
|
57
|
+
# # =>
|
58
|
+
# # shape: (3,)
|
59
|
+
# # Series: '' [str]
|
60
|
+
# # [
|
61
|
+
# # "foo"
|
62
|
+
# # "bar"
|
63
|
+
# # "ham"
|
64
|
+
# # ]
|
65
|
+
def get_categories
|
66
|
+
super
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return whether or not the column is a local categorical.
|
70
|
+
#
|
71
|
+
# @return [Boolean]
|
72
|
+
#
|
73
|
+
# @example Categoricals constructed without a string cache are considered local.
|
74
|
+
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
75
|
+
# s.cat.is_local
|
76
|
+
# # => true
|
77
|
+
#
|
78
|
+
# @example Categoricals constructed with a string cache are considered global.
|
79
|
+
# s = nil
|
80
|
+
# Polars::StringCache.new do
|
81
|
+
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
82
|
+
# end
|
83
|
+
# s.cat.is_local
|
84
|
+
# # => false
|
85
|
+
def is_local
|
86
|
+
_s.cat_is_local
|
87
|
+
end
|
88
|
+
|
89
|
+
# Convert a categorical column to its local representation.
|
90
|
+
#
|
91
|
+
# This may change the underlying physical representation of the column.
|
92
|
+
#
|
93
|
+
# @return [Series]
|
94
|
+
#
|
95
|
+
# @example Compare the global and local representations of a categorical.
|
96
|
+
# s = nil
|
97
|
+
# Polars::StringCache.new do
|
98
|
+
# _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
|
99
|
+
# s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
|
100
|
+
# end
|
101
|
+
# s.to_physical
|
102
|
+
# # =>
|
103
|
+
# # shape: (3,)
|
104
|
+
# # Series: 'y' [u32]
|
105
|
+
# # [
|
106
|
+
# # 2
|
107
|
+
# # 1
|
108
|
+
# # 3
|
109
|
+
# # ]
|
110
|
+
#
|
111
|
+
# @example
|
112
|
+
# s.cat.to_local.to_physical
|
113
|
+
# # =>
|
114
|
+
# # shape: (3,)
|
115
|
+
# # Series: 'y' [u32]
|
116
|
+
# # [
|
117
|
+
# # 0
|
118
|
+
# # 1
|
119
|
+
# # 2
|
120
|
+
# # ]
|
121
|
+
def to_local
|
122
|
+
Utils.wrap_s(_s.cat_to_local)
|
123
|
+
end
|
49
124
|
end
|
50
125
|
end
|
data/lib/polars/config.rb
CHANGED
@@ -21,7 +21,7 @@ module Polars
|
|
21
21
|
"POLARS_VERBOSE"
|
22
22
|
]
|
23
23
|
|
24
|
-
POLARS_CFG_DIRECT_VARS = {"set_fmt_float" =>
|
24
|
+
POLARS_CFG_DIRECT_VARS = {"set_fmt_float" => Plr.method(:get_float_fmt)}
|
25
25
|
|
26
26
|
# Initialize a Config object instance for context manager usage.
|
27
27
|
def initialize(restore_defaults: false, **options)
|
@@ -163,7 +163,7 @@ module Polars
|
|
163
163
|
#
|
164
164
|
# @return [Config]
|
165
165
|
def self.set_fmt_float(fmt = "mixed")
|
166
|
-
|
166
|
+
Plr.set_float_fmt(fmt)
|
167
167
|
self
|
168
168
|
end
|
169
169
|
|