polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
- Copyright (c) 2022-2023 Andrew Kane
2
+ Copyright (c) 2022-2024 Andrew Kane
3
3
 
4
4
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  :fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
4
4
 
5
- [![Build Status](https://github.com/ankane/polars-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/polars-ruby/actions)
5
+ [![Build Status](https://github.com/ankane/polars-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/polars-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -357,7 +357,7 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Binary`, `Categorical`
360
+ - string - `String`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
362
  - nested - `List`, `Struct`, `Array`
363
363
  - other - `Object`, `Null`
@@ -433,4 +433,5 @@ cd polars-ruby
433
433
  bundle install
434
434
  bundle exec rake compile
435
435
  bundle exec rake test
436
+ bundle exec rake test:docs
436
437
  ```
Binary file
Binary file
Binary file
@@ -80,5 +80,458 @@ module Polars
80
80
  def sum
81
81
  Utils.wrap_expr(_rbexpr.array_sum)
82
82
  end
83
+
84
+ # Get the unique/distinct values in the array.
85
+ #
86
+ # @param maintain_order [Boolean]
87
+ # Maintain order of data. This requires more work.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [[1, 1, 2]]
95
+ # },
96
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
97
+ # )
98
+ # df.select(Polars.col("a").arr.unique)
99
+ # # =>
100
+ # # shape: (1, 1)
101
+ # # ┌───────────┐
102
+ # # │ a │
103
+ # # │ --- │
104
+ # # │ list[i64] │
105
+ # # ╞═══════════╡
106
+ # # │ [1, 2] │
107
+ # # └───────────┘
108
+ def unique(maintain_order: false)
109
+ Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
+ end
111
+
112
+ # Convert an Array column into a List column with the same inner data type.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {"a" => [[1, 2], [3, 4]]},
119
+ # schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
120
+ # )
121
+ # df.select(Polars.col("a").arr.to_list)
122
+ # # =>
123
+ # # shape: (2, 1)
124
+ # # ┌──────────┐
125
+ # # │ a │
126
+ # # │ --- │
127
+ # # │ list[i8] │
128
+ # # ╞══════════╡
129
+ # # │ [1, 2] │
130
+ # # │ [3, 4] │
131
+ # # └──────────┘
132
+ def to_list
133
+ Utils.wrap_expr(_rbexpr.arr_to_list)
134
+ end
135
+
136
+ # Evaluate whether any boolean value is true for every subarray.
137
+ #
138
+ # @return [Expr]
139
+ #
140
+ # @example
141
+ # df = Polars::DataFrame.new(
142
+ # {
143
+ # "a": [
144
+ # [true, true],
145
+ # [false, true],
146
+ # [false, false],
147
+ # [nil, nil],
148
+ # nil
149
+ # ]
150
+ # },
151
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
152
+ # )
153
+ # df.with_columns(any: Polars.col("a").arr.any)
154
+ # # =>
155
+ # # shape: (5, 2)
156
+ # # ┌────────────────┬───────┐
157
+ # # │ a ┆ any │
158
+ # # │ --- ┆ --- │
159
+ # # │ array[bool, 2] ┆ bool │
160
+ # # ╞════════════════╪═══════╡
161
+ # # │ [true, true] ┆ true │
162
+ # # │ [false, true] ┆ true │
163
+ # # │ [false, false] ┆ false │
164
+ # # │ [null, null] ┆ false │
165
+ # # │ null ┆ null │
166
+ # # └────────────────┴───────┘
167
+ def any
168
+ Utils.wrap_expr(_rbexpr.arr_any)
169
+ end
170
+
171
+ # Evaluate whether all boolean values are true for every subarray.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "a": [
179
+ # [true, true],
180
+ # [false, true],
181
+ # [false, false],
182
+ # [nil, nil],
183
+ # nil
184
+ # ]
185
+ # },
186
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
187
+ # )
188
+ # df.with_columns(all: Polars.col("a").arr.all)
189
+ # # =>
190
+ # # shape: (5, 2)
191
+ # # ┌────────────────┬───────┐
192
+ # # │ a ┆ all │
193
+ # # │ --- ┆ --- │
194
+ # # │ array[bool, 2] ┆ bool │
195
+ # # ╞════════════════╪═══════╡
196
+ # # │ [true, true] ┆ true │
197
+ # # │ [false, true] ┆ false │
198
+ # # │ [false, false] ┆ false │
199
+ # # │ [null, null] ┆ true │
200
+ # # │ null ┆ null │
201
+ # # └────────────────┴───────┘
202
+ def all
203
+ Utils.wrap_expr(_rbexpr.arr_all)
204
+ end
205
+
206
+ # Sort the arrays in this column.
207
+ #
208
+ # @param descending [Boolean]
209
+ # Sort in descending order.
210
+ # @param nulls_last [Boolean]
211
+ # Place null values last.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [[3, 2, 1], [9, 1, 2]],
219
+ # },
220
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
221
+ # )
222
+ # df.with_columns(sort: Polars.col("a").arr.sort)
223
+ # # =>
224
+ # # shape: (2, 2)
225
+ # # ┌───────────────┬───────────────┐
226
+ # # │ a ┆ sort │
227
+ # # │ --- ┆ --- │
228
+ # # │ array[i64, 3] ┆ array[i64, 3] │
229
+ # # ╞═══════════════╪═══════════════╡
230
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
231
+ # # │ [9, 1, 2] ┆ [1, 2, 9] │
232
+ # # └───────────────┴───────────────┘
233
+ #
234
+ # @example
235
+ # df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
236
+ # # =>
237
+ # # shape: (2, 2)
238
+ # # ┌───────────────┬───────────────┐
239
+ # # │ a ┆ sort │
240
+ # # │ --- ┆ --- │
241
+ # # │ array[i64, 3] ┆ array[i64, 3] │
242
+ # # ╞═══════════════╪═══════════════╡
243
+ # # │ [3, 2, 1] ┆ [3, 2, 1] │
244
+ # # │ [9, 1, 2] ┆ [9, 2, 1] │
245
+ # # └───────────────┴───────────────┘
246
+ def sort(descending: false, nulls_last: false)
247
+ Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
248
+ end
249
+
250
+ # Reverse the arrays in this column.
251
+ #
252
+ # @return [Expr]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new(
256
+ # {
257
+ # "a" => [[3, 2, 1], [9, 1, 2]]
258
+ # },
259
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
260
+ # )
261
+ # df.with_columns(reverse: Polars.col("a").arr.reverse)
262
+ # # =>
263
+ # # shape: (2, 2)
264
+ # # ┌───────────────┬───────────────┐
265
+ # # │ a ┆ reverse │
266
+ # # │ --- ┆ --- │
267
+ # # │ array[i64, 3] ┆ array[i64, 3] │
268
+ # # ╞═══════════════╪═══════════════╡
269
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
270
+ # # │ [9, 1, 2] ┆ [2, 1, 9] │
271
+ # # └───────────────┴───────────────┘
272
+ def reverse
273
+ Utils.wrap_expr(_rbexpr.arr_reverse)
274
+ end
275
+
276
+ # Retrieve the index of the minimal value in every sub-array.
277
+ #
278
+ # @return [Expr]
279
+ #
280
+ # @example
281
+ # df = Polars::DataFrame.new(
282
+ # {
283
+ # "a" => [[1, 2], [2, 1]]
284
+ # },
285
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
286
+ # )
287
+ # df.with_columns(arg_min: Polars.col("a").arr.arg_min)
288
+ # # =>
289
+ # # shape: (2, 2)
290
+ # # ┌───────────────┬─────────┐
291
+ # # │ a ┆ arg_min │
292
+ # # │ --- ┆ --- │
293
+ # # │ array[i64, 2] ┆ u32 │
294
+ # # ╞═══════════════╪═════════╡
295
+ # # │ [1, 2] ┆ 0 │
296
+ # # │ [2, 1] ┆ 1 │
297
+ # # └───────────────┴─────────┘
298
+ def arg_min
299
+ Utils.wrap_expr(_rbexpr.arr_arg_min)
300
+ end
301
+
302
+ # Retrieve the index of the maximum value in every sub-array.
303
+ #
304
+ # @return [Expr]
305
+ #
306
+ # @example
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "a" => [[1, 2], [2, 1]]
310
+ # },
311
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
312
+ # )
313
+ # df.with_columns(arg_max: Polars.col("a").arr.arg_max)
314
+ # # =>
315
+ # # shape: (2, 2)
316
+ # # ┌───────────────┬─────────┐
317
+ # # │ a ┆ arg_max │
318
+ # # │ --- ┆ --- │
319
+ # # │ array[i64, 2] ┆ u32 │
320
+ # # ╞═══════════════╪═════════╡
321
+ # # │ [1, 2] ┆ 1 │
322
+ # # │ [2, 1] ┆ 0 │
323
+ # # └───────────────┴─────────┘
324
+ def arg_max
325
+ Utils.wrap_expr(_rbexpr.arr_arg_max)
326
+ end
327
+
328
+ # Get the value by index in the sub-arrays.
329
+ #
330
+ # So index `0` would return the first item of every sublist
331
+ # and index `-1` would return the last item of every sublist
332
+ # if an index is out of bounds, it will return a `nil`.
333
+ #
334
+ # @param index [Integer]
335
+ # Index to return per sub-array
336
+ # @param null_on_oob [Boolean]
337
+ # Behavior if an index is out of bounds:
338
+ # true -> set as null
339
+ # false -> raise an error
340
+ #
341
+ # @return [Expr]
342
+ #
343
+ # @example
344
+ # df = Polars::DataFrame.new(
345
+ # {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
346
+ # schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
347
+ # )
348
+ # df.with_columns(get: Polars.col("arr").arr.get("idx"))
349
+ # # =>
350
+ # # shape: (3, 3)
351
+ # # ┌───────────────┬─────┬──────┐
352
+ # # │ arr ┆ idx ┆ get │
353
+ # # │ --- ┆ --- ┆ --- │
354
+ # # │ array[i32, 3] ┆ i32 ┆ i32 │
355
+ # # ╞═══════════════╪═════╪══════╡
356
+ # # │ [1, 2, 3] ┆ 1 ┆ 2 │
357
+ # # │ [4, 5, 6] ┆ -2 ┆ 5 │
358
+ # # │ [7, 8, 9] ┆ 4 ┆ null │
359
+ # # └───────────────┴─────┴──────┘
360
+ def get(index, null_on_oob: true)
361
+ index = Utils.parse_as_expression(index)
362
+ Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
363
+ end
364
+
365
+ # Get the first value of the sub-arrays.
366
+ #
367
+ # @return [Expr]
368
+ #
369
+ # @example
370
+ # df = Polars::DataFrame.new(
371
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
372
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
373
+ # )
374
+ # df.with_columns(first: Polars.col("a").arr.first)
375
+ # # =>
376
+ # # shape: (3, 2)
377
+ # # ┌───────────────┬───────┐
378
+ # # │ a ┆ first │
379
+ # # │ --- ┆ --- │
380
+ # # │ array[i32, 3] ┆ i32 │
381
+ # # ╞═══════════════╪═══════╡
382
+ # # │ [1, 2, 3] ┆ 1 │
383
+ # # │ [4, 5, 6] ┆ 4 │
384
+ # # │ [7, 8, 9] ┆ 7 │
385
+ # # └───────────────┴───────┘
386
+ def first
387
+ get(0)
388
+ end
389
+
390
+ # Get the last value of the sub-arrays.
391
+ #
392
+ # @return [Expr]
393
+ #
394
+ # @example
395
+ # df = Polars::DataFrame.new(
396
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
397
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
398
+ # )
399
+ # df.with_columns(last: Polars.col("a").arr.last)
400
+ # # =>
401
+ # # shape: (3, 2)
402
+ # # ┌───────────────┬──────┐
403
+ # # │ a ┆ last │
404
+ # # │ --- ┆ --- │
405
+ # # │ array[i32, 3] ┆ i32 │
406
+ # # ╞═══════════════╪══════╡
407
+ # # │ [1, 2, 3] ┆ 3 │
408
+ # # │ [4, 5, 6] ┆ 6 │
409
+ # # │ [7, 8, 9] ┆ 9 │
410
+ # # └───────────────┴──────┘
411
+ def last
412
+ get(-1)
413
+ end
414
+
415
+ # Join all string items in a sub-array and place a separator between them.
416
+ #
417
+ # This errors if inner type of array `!= String`.
418
+ #
419
+ # @param separator [String]
420
+ # string to separate the items with
421
+ # @param ignore_nulls [Boolean]
422
+ # Ignore null values (default).
423
+ #
424
+ # If set to `false`, null values will be propagated.
425
+ # If the sub-list contains any null values, the output is `nil`.
426
+ #
427
+ # @return [Expr]
428
+ #
429
+ # @example
430
+ # df = Polars::DataFrame.new(
431
+ # {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
432
+ # schema: {
433
+ # "s" => Polars::Array.new(Polars::String, 2),
434
+ # "separator" => Polars::String
435
+ # }
436
+ # )
437
+ # df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
438
+ # # =>
439
+ # # shape: (2, 3)
440
+ # # ┌───────────────┬───────────┬──────┐
441
+ # # │ s ┆ separator ┆ join │
442
+ # # │ --- ┆ --- ┆ --- │
443
+ # # │ array[str, 2] ┆ str ┆ str │
444
+ # # ╞═══════════════╪═══════════╪══════╡
445
+ # # │ ["a", "b"] ┆ * ┆ a*b │
446
+ # # │ ["x", "y"] ┆ _ ┆ x_y │
447
+ # # └───────────────┴───────────┴──────┘
448
+ def join(separator, ignore_nulls: true)
449
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
450
+ Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
451
+ end
452
+
453
+ # Returns a column with a separate row for every array element.
454
+ #
455
+ # @return [Expr]
456
+ #
457
+ # @example
458
+ # df = Polars::DataFrame.new(
459
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
460
+ # )
461
+ # df.select(Polars.col("a").arr.explode)
462
+ # # =>
463
+ # # shape: (6, 1)
464
+ # # ┌─────┐
465
+ # # │ a │
466
+ # # │ --- │
467
+ # # │ i64 │
468
+ # # ╞═════╡
469
+ # # │ 1 │
470
+ # # │ 2 │
471
+ # # │ 3 │
472
+ # # │ 4 │
473
+ # # │ 5 │
474
+ # # │ 6 │
475
+ # # └─────┘
476
+ def explode
477
+ Utils.wrap_expr(_rbexpr.explode)
478
+ end
479
+
480
+ # Check if sub-arrays contain the given item.
481
+ #
482
+ # @param item [Object]
483
+ # Item that will be checked for membership
484
+ #
485
+ # @return [Expr]
486
+ #
487
+ # @example
488
+ # df = Polars::DataFrame.new(
489
+ # {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
490
+ # schema: {"a" => Polars::Array.new(Polars::String, 2)}
491
+ # )
492
+ # df.with_columns(contains: Polars.col("a").arr.contains("a"))
493
+ # # =>
494
+ # # shape: (3, 2)
495
+ # # ┌───────────────┬──────────┐
496
+ # # │ a ┆ contains │
497
+ # # │ --- ┆ --- │
498
+ # # │ array[str, 2] ┆ bool │
499
+ # # ╞═══════════════╪══════════╡
500
+ # # │ ["a", "b"] ┆ true │
501
+ # # │ ["x", "y"] ┆ false │
502
+ # # │ ["a", "c"] ┆ true │
503
+ # # └───────────────┴──────────┘
504
+ def contains(item)
505
+ item = Utils.parse_as_expression(item, str_as_lit: true)
506
+ Utils.wrap_expr(_rbexpr.arr_contains(item))
507
+ end
508
+
509
+ # Count how often the value produced by `element` occurs.
510
+ #
511
+ # @param element [Object]
512
+ # An expression that produces a single value
513
+ #
514
+ # @return [Expr]
515
+ #
516
+ # @example
517
+ # df = Polars::DataFrame.new(
518
+ # {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
519
+ # )
520
+ # df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
521
+ # # =>
522
+ # # shape: (3, 2)
523
+ # # ┌───────────────┬────────────────┐
524
+ # # │ a ┆ number_of_twos │
525
+ # # │ --- ┆ --- │
526
+ # # │ array[i64, 2] ┆ u32 │
527
+ # # ╞═══════════════╪════════════════╡
528
+ # # │ [1, 2] ┆ 1 │
529
+ # # │ [1, 1] ┆ 0 │
530
+ # # │ [2, 2] ┆ 2 │
531
+ # # └───────────────┴────────────────┘
532
+ def count_matches(element)
533
+ element = Utils.parse_as_expression(element, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.arr_count_matches(element))
535
+ end
83
536
  end
84
537
  end