polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1978 -1459
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +248 -108
  17. data/lib/polars/data_types.rb +195 -29
  18. data/lib/polars/date_time_expr.rb +41 -24
  19. data/lib/polars/date_time_name_space.rb +12 -12
  20. data/lib/polars/exceptions.rb +12 -1
  21. data/lib/polars/expr.rb +1080 -195
  22. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  23. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  24. data/lib/polars/functions/as_datatype.rb +248 -0
  25. data/lib/polars/functions/col.rb +47 -0
  26. data/lib/polars/functions/eager.rb +182 -0
  27. data/lib/polars/functions/lazy.rb +1280 -0
  28. data/lib/polars/functions/len.rb +49 -0
  29. data/lib/polars/functions/lit.rb +35 -0
  30. data/lib/polars/functions/random.rb +16 -0
  31. data/lib/polars/functions/range/date_range.rb +103 -0
  32. data/lib/polars/functions/range/int_range.rb +51 -0
  33. data/lib/polars/functions/repeat.rb +144 -0
  34. data/lib/polars/functions/whenthen.rb +27 -0
  35. data/lib/polars/functions.rb +29 -416
  36. data/lib/polars/group_by.rb +3 -3
  37. data/lib/polars/io.rb +21 -28
  38. data/lib/polars/lazy_frame.rb +390 -76
  39. data/lib/polars/list_expr.rb +152 -6
  40. data/lib/polars/list_name_space.rb +102 -0
  41. data/lib/polars/meta_expr.rb +175 -7
  42. data/lib/polars/series.rb +557 -59
  43. data/lib/polars/sql_context.rb +1 -1
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +412 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/struct_expr.rb +1 -1
  48. data/lib/polars/struct_name_space.rb +1 -1
  49. data/lib/polars/testing.rb +507 -0
  50. data/lib/polars/utils.rb +64 -20
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +15 -2
  53. metadata +36 -7
  54. data/lib/polars/lazy_functions.rb +0 -1197
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
- Copyright (c) 2022-2023 Andrew Kane
2
+ Copyright (c) 2022-2024 Andrew Kane
3
3
 
4
4
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  :fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
4
4
 
5
- [![Build Status](https://github.com/ankane/polars-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/polars-ruby/actions)
5
+ [![Build Status](https://github.com/ankane/polars-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/polars-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -357,7 +357,7 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Binary`, `Categorical`
360
+ - string - `String`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
362
  - nested - `List`, `Struct`, `Array`
363
363
  - other - `Object`, `Null`
Binary file
Binary file
Binary file
@@ -80,5 +80,454 @@ module Polars
80
80
  def sum
81
81
  Utils.wrap_expr(_rbexpr.array_sum)
82
82
  end
83
+
84
+ # Get the unique/distinct values in the array.
85
+ #
86
+ # @param maintain_order [Boolean]
87
+ # Maintain order of data. This requires more work.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [[1, 1, 2]]
95
+ # },
96
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
97
+ # )
98
+ # df.select(Polars.col("a").arr.unique)
99
+ # # =>
100
+ # # shape: (1, 1)
101
+ # # ┌───────────┐
102
+ # # │ a │
103
+ # # │ --- │
104
+ # # │ list[i64] │
105
+ # # ╞═══════════╡
106
+ # # │ [1, 2] │
107
+ # # └───────────┘
108
+ def unique(maintain_order: false)
109
+ Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
+ end
111
+
112
+ # Convert an Array column into a List column with the same inner data type.
113
+ #
114
+ # @return [Expr]
115
+ #
116
+ # @example
117
+ # df = Polars::DataFrame.new(
118
+ # {"a" => [[1, 2], [3, 4]]},
119
+ # schema: {"a" => Polars::Array.new(Polars::Int8, 2)}
120
+ # )
121
+ # df.select(Polars.col("a").arr.to_list)
122
+ # # =>
123
+ # # shape: (2, 1)
124
+ # # ┌──────────┐
125
+ # # │ a │
126
+ # # │ --- │
127
+ # # │ list[i8] │
128
+ # # ╞══════════╡
129
+ # # │ [1, 2] │
130
+ # # │ [3, 4] │
131
+ # # └──────────┘
132
+ def to_list
133
+ Utils.wrap_expr(_rbexpr.arr_to_list)
134
+ end
135
+
136
+ # Evaluate whether any boolean value is true for every subarray.
137
+ #
138
+ # @return [Expr]
139
+ #
140
+ # @example
141
+ # df = Polars::DataFrame.new(
142
+ # {
143
+ # "a": [
144
+ # [true, true],
145
+ # [false, true],
146
+ # [false, false],
147
+ # [nil, nil],
148
+ # nil
149
+ # ]
150
+ # },
151
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
152
+ # )
153
+ # df.with_columns(any: Polars.col("a").arr.any)
154
+ # # =>
155
+ # # shape: (5, 2)
156
+ # # ┌────────────────┬───────┐
157
+ # # │ a ┆ any │
158
+ # # │ --- ┆ --- │
159
+ # # │ array[bool, 2] ┆ bool │
160
+ # # ╞════════════════╪═══════╡
161
+ # # │ [true, true] ┆ true │
162
+ # # │ [false, true] ┆ true │
163
+ # # │ [false, false] ┆ false │
164
+ # # │ [null, null] ┆ false │
165
+ # # │ null ┆ null │
166
+ # # └────────────────┴───────┘
167
+ def any
168
+ Utils.wrap_expr(_rbexpr.arr_any)
169
+ end
170
+
171
+ # Evaluate whether all boolean values are true for every subarray.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "a": [
179
+ # [true, true],
180
+ # [false, true],
181
+ # [false, false],
182
+ # [nil, nil],
183
+ # nil
184
+ # ]
185
+ # },
186
+ # schema: {"a" => Polars::Array.new(Polars::Boolean, 2)}
187
+ # )
188
+ # df.with_columns(all: Polars.col("a").arr.all)
189
+ # # =>
190
+ # # shape: (5, 2)
191
+ # # ┌────────────────┬───────┐
192
+ # # │ a ┆ all │
193
+ # # │ --- ┆ --- │
194
+ # # │ array[bool, 2] ┆ bool │
195
+ # # ╞════════════════╪═══════╡
196
+ # # │ [true, true] ┆ true │
197
+ # # │ [false, true] ┆ false │
198
+ # # │ [false, false] ┆ false │
199
+ # # │ [null, null] ┆ true │
200
+ # # │ null ┆ null │
201
+ # # └────────────────┴───────┘
202
+ def all
203
+ Utils.wrap_expr(_rbexpr.arr_all)
204
+ end
205
+
206
+ # Sort the arrays in this column.
207
+ #
208
+ # @param descending [Boolean]
209
+ # Sort in descending order.
210
+ # @param nulls_last [Boolean]
211
+ # Place null values last.
212
+ #
213
+ # @return [Expr]
214
+ #
215
+ # @example
216
+ # df = Polars::DataFrame.new(
217
+ # {
218
+ # "a" => [[3, 2, 1], [9, 1, 2]],
219
+ # },
220
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
221
+ # )
222
+ # df.with_columns(sort: Polars.col("a").arr.sort)
223
+ # # =>
224
+ # # shape: (2, 2)
225
+ # # ┌───────────────┬───────────────┐
226
+ # # │ a ┆ sort │
227
+ # # │ --- ┆ --- │
228
+ # # │ array[i64, 3] ┆ array[i64, 3] │
229
+ # # ╞═══════════════╪═══════════════╡
230
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
231
+ # # │ [9, 1, 2] ┆ [1, 2, 9] │
232
+ # # └───────────────┴───────────────┘
233
+ #
234
+ # @example
235
+ # df.with_columns(sort: Polars.col("a").arr.sort(descending: true))
236
+ # # =>
237
+ # # shape: (2, 2)
238
+ # # ┌───────────────┬───────────────┐
239
+ # # │ a ┆ sort │
240
+ # # │ --- ┆ --- │
241
+ # # │ array[i64, 3] ┆ array[i64, 3] │
242
+ # # ╞═══════════════╪═══════════════╡
243
+ # # │ [3, 2, 1] ┆ [3, 2, 1] │
244
+ # # │ [9, 1, 2] ┆ [9, 2, 1] │
245
+ # # └───────────────┴───────────────┘
246
+ def sort(descending: false, nulls_last: false)
247
+ Utils.wrap_expr(_rbexpr.arr_sort(descending, nulls_last))
248
+ end
249
+
250
+ # Reverse the arrays in this column.
251
+ #
252
+ # @return [Expr]
253
+ #
254
+ # @example
255
+ # df = Polars::DataFrame.new(
256
+ # {
257
+ # "a" => [[3, 2, 1], [9, 1, 2]]
258
+ # },
259
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
260
+ # )
261
+ # df.with_columns(reverse: Polars.col("a").arr.reverse)
262
+ # # =>
263
+ # # shape: (2, 2)
264
+ # # ┌───────────────┬───────────────┐
265
+ # # │ a ┆ reverse │
266
+ # # │ --- ┆ --- │
267
+ # # │ array[i64, 3] ┆ array[i64, 3] │
268
+ # # ╞═══════════════╪═══════════════╡
269
+ # # │ [3, 2, 1] ┆ [1, 2, 3] │
270
+ # # │ [9, 1, 2] ┆ [2, 1, 9] │
271
+ # # └───────────────┴───────────────┘
272
+ def reverse
273
+ Utils.wrap_expr(_rbexpr.arr_reverse)
274
+ end
275
+
276
+ # Retrieve the index of the minimal value in every sub-array.
277
+ #
278
+ # @return [Expr]
279
+ #
280
+ # @example
281
+ # df = Polars::DataFrame.new(
282
+ # {
283
+ # "a" => [[1, 2], [2, 1]]
284
+ # },
285
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
286
+ # )
287
+ # df.with_columns(arg_min: Polars.col("a").arr.arg_min)
288
+ # # =>
289
+ # # shape: (2, 2)
290
+ # # ┌───────────────┬─────────┐
291
+ # # │ a ┆ arg_min │
292
+ # # │ --- ┆ --- │
293
+ # # │ array[i64, 2] ┆ u32 │
294
+ # # ╞═══════════════╪═════════╡
295
+ # # │ [1, 2] ┆ 0 │
296
+ # # │ [2, 1] ┆ 1 │
297
+ # # └───────────────┴─────────┘
298
+ def arg_min
299
+ Utils.wrap_expr(_rbexpr.arr_arg_min)
300
+ end
301
+
302
+ # Retrieve the index of the maximum value in every sub-array.
303
+ #
304
+ # @return [Expr]
305
+ #
306
+ # @example
307
+ # df = Polars::DataFrame.new(
308
+ # {
309
+ # "a" => [[1, 2], [2, 1]]
310
+ # },
311
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
312
+ # )
313
+ # df.with_columns(arg_max: Polars.col("a").arr.arg_max)
314
+ # # =>
315
+ # # shape: (2, 2)
316
+ # # ┌───────────────┬─────────┐
317
+ # # │ a ┆ arg_max │
318
+ # # │ --- ┆ --- │
319
+ # # │ array[i64, 2] ┆ u32 │
320
+ # # ╞═══════════════╪═════════╡
321
+ # # │ [1, 2] ┆ 1 │
322
+ # # │ [2, 1] ┆ 0 │
323
+ # # └───────────────┴─────────┘
324
+ def arg_max
325
+ Utils.wrap_expr(_rbexpr.arr_arg_max)
326
+ end
327
+
328
+ # Get the value by index in the sub-arrays.
329
+ #
330
+ # So index `0` would return the first item of every sublist
331
+ # and index `-1` would return the last item of every sublist
332
+ # if an index is out of bounds, it will return a `nil`.
333
+ #
334
+ # @param index [Integer]
335
+ # Index to return per sub-array
336
+ #
337
+ # @return [Expr]
338
+ #
339
+ # @example
340
+ # df = Polars::DataFrame.new(
341
+ # {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
342
+ # schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
343
+ # )
344
+ # df.with_columns(get: Polars.col("arr").arr.get("idx"))
345
+ # # =>
346
+ # # shape: (3, 3)
347
+ # # ┌───────────────┬─────┬──────┐
348
+ # # │ arr ┆ idx ┆ get │
349
+ # # │ --- ┆ --- ┆ --- │
350
+ # # │ array[i32, 3] ┆ i32 ┆ i32 │
351
+ # # ╞═══════════════╪═════╪══════╡
352
+ # # │ [1, 2, 3] ┆ 1 ┆ 2 │
353
+ # # │ [4, 5, 6] ┆ -2 ┆ 5 │
354
+ # # │ [7, 8, 9] ┆ 4 ┆ null │
355
+ # # └───────────────┴─────┴──────┘
356
+ def get(index)
357
+ index = Utils.parse_as_expression(index)
358
+ Utils.wrap_expr(_rbexpr.arr_get(index))
359
+ end
360
+
361
+ # Get the first value of the sub-arrays.
362
+ #
363
+ # @return [Expr]
364
+ #
365
+ # @example
366
+ # df = Polars::DataFrame.new(
367
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
368
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
369
+ # )
370
+ # df.with_columns(first: Polars.col("a").arr.first)
371
+ # # =>
372
+ # # shape: (3, 2)
373
+ # # ┌───────────────┬───────┐
374
+ # # │ a ┆ first │
375
+ # # │ --- ┆ --- │
376
+ # # │ array[i32, 3] ┆ i32 │
377
+ # # ╞═══════════════╪═══════╡
378
+ # # │ [1, 2, 3] ┆ 1 │
379
+ # # │ [4, 5, 6] ┆ 4 │
380
+ # # │ [7, 8, 9] ┆ 7 │
381
+ # # └───────────────┴───────┘
382
+ def first
383
+ get(0)
384
+ end
385
+
386
+ # Get the last value of the sub-arrays.
387
+ #
388
+ # @return [Expr]
389
+ #
390
+ # @example
391
+ # df = Polars::DataFrame.new(
392
+ # {"a" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
393
+ # schema: {"a" => Polars::Array.new(Polars::Int32, 3)}
394
+ # )
395
+ # df.with_columns(last: Polars.col("a").arr.last)
396
+ # # =>
397
+ # # shape: (3, 2)
398
+ # # ┌───────────────┬──────┐
399
+ # # │ a ┆ last │
400
+ # # │ --- ┆ --- │
401
+ # # │ array[i32, 3] ┆ i32 │
402
+ # # ╞═══════════════╪══════╡
403
+ # # │ [1, 2, 3] ┆ 3 │
404
+ # # │ [4, 5, 6] ┆ 6 │
405
+ # # │ [7, 8, 9] ┆ 9 │
406
+ # # └───────────────┴──────┘
407
+ def last
408
+ get(-1)
409
+ end
410
+
411
+ # Join all string items in a sub-array and place a separator between them.
412
+ #
413
+ # This errors if inner type of array `!= String`.
414
+ #
415
+ # @param separator [String]
416
+ # string to separate the items with
417
+ # @param ignore_nulls [Boolean]
418
+ # Ignore null values (default).
419
+ #
420
+ # If set to `false`, null values will be propagated.
421
+ # If the sub-list contains any null values, the output is `nil`.
422
+ #
423
+ # @return [Expr]
424
+ #
425
+ # @example
426
+ # df = Polars::DataFrame.new(
427
+ # {"s" => [["a", "b"], ["x", "y"]], "separator" => ["*", "_"]},
428
+ # schema: {
429
+ # "s" => Polars::Array.new(Polars::String, 2),
430
+ # "separator" => Polars::String
431
+ # }
432
+ # )
433
+ # df.with_columns(join: Polars.col("s").arr.join(Polars.col("separator")))
434
+ # # =>
435
+ # # shape: (2, 3)
436
+ # # ┌───────────────┬───────────┬──────┐
437
+ # # │ s ┆ separator ┆ join │
438
+ # # │ --- ┆ --- ┆ --- │
439
+ # # │ array[str, 2] ┆ str ┆ str │
440
+ # # ╞═══════════════╪═══════════╪══════╡
441
+ # # │ ["a", "b"] ┆ * ┆ a*b │
442
+ # # │ ["x", "y"] ┆ _ ┆ x_y │
443
+ # # └───────────────┴───────────┴──────┘
444
+ def join(separator, ignore_nulls: true)
445
+ separator = Utils.parse_as_expression(separator, str_as_lit: true)
446
+ Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
447
+ end
448
+
449
+ # Returns a column with a separate row for every array element.
450
+ #
451
+ # @return [Expr]
452
+ #
453
+ # @example
454
+ # df = Polars::DataFrame.new(
455
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
456
+ # )
457
+ # df.select(Polars.col("a").arr.explode)
458
+ # # =>
459
+ # # shape: (6, 1)
460
+ # # ┌─────┐
461
+ # # │ a │
462
+ # # │ --- │
463
+ # # │ i64 │
464
+ # # ╞═════╡
465
+ # # │ 1 │
466
+ # # │ 2 │
467
+ # # │ 3 │
468
+ # # │ 4 │
469
+ # # │ 5 │
470
+ # # │ 6 │
471
+ # # └─────┘
472
+ def explode
473
+ Utils.wrap_expr(_rbexpr.explode)
474
+ end
475
+
476
+ # Check if sub-arrays contain the given item.
477
+ #
478
+ # @param item [Object]
479
+ # Item that will be checked for membership
480
+ #
481
+ # @return [Expr]
482
+ #
483
+ # @example
484
+ # df = Polars::DataFrame.new(
485
+ # {"a" => [["a", "b"], ["x", "y"], ["a", "c"]]},
486
+ # schema: {"a" => Polars::Array.new(Polars::String, 2)}
487
+ # )
488
+ # df.with_columns(contains: Polars.col("a").arr.contains("a"))
489
+ # # =>
490
+ # # shape: (3, 2)
491
+ # # ┌───────────────┬──────────┐
492
+ # # │ a ┆ contains │
493
+ # # │ --- ┆ --- │
494
+ # # │ array[str, 2] ┆ bool │
495
+ # # ╞═══════════════╪══════════╡
496
+ # # │ ["a", "b"] ┆ true │
497
+ # # │ ["x", "y"] ┆ false │
498
+ # # │ ["a", "c"] ┆ true │
499
+ # # └───────────────┴──────────┘
500
+ def contains(item)
501
+ item = Utils.parse_as_expression(item, str_as_lit: true)
502
+ Utils.wrap_expr(_rbexpr.arr_contains(item))
503
+ end
504
+
505
+ # Count how often the value produced by `element` occurs.
506
+ #
507
+ # @param element [Object]
508
+ # An expression that produces a single value
509
+ #
510
+ # @return [Expr]
511
+ #
512
+ # @example
513
+ # df = Polars::DataFrame.new(
514
+ # {"a" => [[1, 2], [1, 1], [2, 2]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
515
+ # )
516
+ # df.with_columns(number_of_twos: Polars.col("a").arr.count_matches(2))
517
+ # # =>
518
+ # # shape: (3, 2)
519
+ # # ┌───────────────┬────────────────┐
520
+ # # │ a ┆ number_of_twos │
521
+ # # │ --- ┆ --- │
522
+ # # │ array[i64, 2] ┆ u32 │
523
+ # # ╞═══════════════╪════════════════╡
524
+ # # │ [1, 2] ┆ 1 │
525
+ # # │ [1, 1] ┆ 0 │
526
+ # # │ [2, 2] ┆ 2 │
527
+ # # └───────────────┴────────────────┘
528
+ def count_matches(element)
529
+ element = Utils.parse_as_expression(element, str_as_lit: true)
530
+ Utils.wrap_expr(_rbexpr.arr_count_matches(element))
531
+ end
83
532
  end
84
533
  end