polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
@@ -9,6 +9,181 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Return the number of elements in each array.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[1, 2], [4, 3]]},
19
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
20
+ # )
21
+ # df.select(Polars.col("a").arr.len)
22
+ # # =>
23
+ # # shape: (2, 1)
24
+ # # ┌─────┐
25
+ # # │ a │
26
+ # # │ --- │
27
+ # # │ u32 │
28
+ # # ╞═════╡
29
+ # # │ 2 │
30
+ # # │ 2 │
31
+ # # └─────┘
32
+ def len
33
+ Utils.wrap_expr(_rbexpr.arr_len)
34
+ end
35
+
36
+ # Slice every subarray.
37
+ #
38
+ # @param offset [Integer]
39
+ # Start index. Negative indexing is supported.
40
+ # @param length [Integer]
41
+ # Length of the slice. If set to `None` (default), the slice is taken to the
42
+ # end of the list.
43
+ # @param as_array [Boolean]
44
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
45
+ # If true `length` and `offset` must be constant values.
46
+ #
47
+ # @return [Expr]
48
+ #
49
+ # @example
50
+ # df = Polars::DataFrame.new(
51
+ # {"a" => [[1, 2], [4, 3]]},
52
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
53
+ # )
54
+ # df.select(Polars.col("a").arr.slice(0, 1))
55
+ # # =>
56
+ # # shape: (2, 1)
57
+ # # ┌───────────┐
58
+ # # │ a │
59
+ # # │ --- │
60
+ # # │ list[i64] │
61
+ # # ╞═══════════╡
62
+ # # │ [1] │
63
+ # # │ [4] │
64
+ # # └───────────┘
65
+ #
66
+ # @example
67
+ # df = Polars::DataFrame.new(
68
+ # {"a" => [[1, 2], [4, 3]]},
69
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
70
+ # )
71
+ # df.select(Polars.col("a").arr.slice(0, 1, as_array: true))
72
+ # # =>
73
+ # # shape: (2, 1)
74
+ # # ┌───────────────┐
75
+ # # │ a │
76
+ # # │ --- │
77
+ # # │ array[i64, 1] │
78
+ # # ╞═══════════════╡
79
+ # # │ [1] │
80
+ # # │ [4] │
81
+ # # └───────────────┘
82
+ def slice(
83
+ offset,
84
+ length = nil,
85
+ as_array: false
86
+ )
87
+ offset = Utils.parse_into_expression(offset)
88
+ length = !length.nil? ? Utils.parse_into_expression(length) : nil
89
+ Utils.wrap_expr(_rbexpr.arr_slice(offset, length, as_array))
90
+ end
91
+
92
+ # Get the first `n` elements of the sub-arrays.
93
+ #
94
+ # @param n [Integer]
95
+ # Number of values to return for each sublist.
96
+ # @param as_array [Boolean]
97
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
98
+ # If true `n` must be a constant value.
99
+ #
100
+ # @return [Expr]
101
+ #
102
+ # @example
103
+ # df = Polars::DataFrame.new(
104
+ # {"a" => [[1, 2], [4, 3]]},
105
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
106
+ # )
107
+ # df.select(Polars.col("a").arr.head(1))
108
+ # # =>
109
+ # # shape: (2, 1)
110
+ # # ┌───────────┐
111
+ # # │ a │
112
+ # # │ --- │
113
+ # # │ list[i64] │
114
+ # # ╞═══════════╡
115
+ # # │ [1] │
116
+ # # │ [4] │
117
+ # # └───────────┘
118
+ #
119
+ # @example
120
+ # df = Polars::DataFrame.new(
121
+ # {"a" => [[1, 2], [4, 3]]},
122
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
123
+ # )
124
+ # df.select(Polars.col("a").arr.head(1, as_array: true))
125
+ # # =>
126
+ # # shape: (2, 1)
127
+ # # ┌───────────────┐
128
+ # # │ a │
129
+ # # │ --- │
130
+ # # │ array[i64, 1] │
131
+ # # ╞═══════════════╡
132
+ # # │ [1] │
133
+ # # │ [4] │
134
+ # # └───────────────┘
135
+ def head(n = 5, as_array: false)
136
+ slice(0, n, as_array: as_array)
137
+ end
138
+
139
+ # Slice the last `n` values of every sublist.
140
+ #
141
+ # @param n [Integer]
142
+ # Number of values to return for each sublist.
143
+ # @param as_array [Boolean]
144
+ # Return result as a fixed-length `Array`, otherwise as a `List`.
145
+ # If true `n` must be a constant value.
146
+ #
147
+ # @return [Expr]
148
+ #
149
+ # @example
150
+ # df = Polars::DataFrame.new(
151
+ # {"a" => [[1, 2], [4, 3]]},
152
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
153
+ # )
154
+ # df.select(Polars.col("a").arr.tail(1))
155
+ # # =>
156
+ # # shape: (2, 1)
157
+ # # ┌───────────┐
158
+ # # │ a │
159
+ # # │ --- │
160
+ # # │ list[i64] │
161
+ # # ╞═══════════╡
162
+ # # │ [2] │
163
+ # # │ [3] │
164
+ # # └───────────┘
165
+ #
166
+ # @example
167
+ # df = Polars::DataFrame.new(
168
+ # {"a" => [[1, 2], [4, 3]]},
169
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
170
+ # )
171
+ # df.select(Polars.col("a").arr.tail(1, as_array: true))
172
+ # # =>
173
+ # # shape: (2, 1)
174
+ # # ┌───────────────┐
175
+ # # │ a │
176
+ # # │ --- │
177
+ # # │ array[i64, 1] │
178
+ # # ╞═══════════════╡
179
+ # # │ [2] │
180
+ # # │ [3] │
181
+ # # └───────────────┘
182
+ def tail(n = 5, as_array: false)
183
+ n = Utils.parse_into_expression(n)
184
+ Utils.wrap_expr(_rbexpr.arr_tail(n, as_array))
185
+ end
186
+
12
187
  # Compute the min values of the sub-arrays.
13
188
  #
14
189
  # @return [Expr]
@@ -30,7 +205,7 @@ module Polars
30
205
  # # │ 3 │
31
206
  # # └─────┘
32
207
  def min
33
- Utils.wrap_expr(_rbexpr.array_min)
208
+ Utils.wrap_expr(_rbexpr.arr_min)
34
209
  end
35
210
 
36
211
  # Compute the max values of the sub-arrays.
@@ -54,7 +229,7 @@ module Polars
54
229
  # # │ 4 │
55
230
  # # └─────┘
56
231
  def max
57
- Utils.wrap_expr(_rbexpr.array_max)
232
+ Utils.wrap_expr(_rbexpr.arr_max)
58
233
  end
59
234
 
60
235
  # Compute the sum values of the sub-arrays.
@@ -78,7 +253,103 @@ module Polars
78
253
  # # │ 7 │
79
254
  # # └─────┘
80
255
  def sum
81
- Utils.wrap_expr(_rbexpr.array_sum)
256
+ Utils.wrap_expr(_rbexpr.arr_sum)
257
+ end
258
+
259
+ # Compute the std of the values of the sub-arrays.
260
+ #
261
+ # @return [Expr]
262
+ #
263
+ # @example
264
+ # df = Polars::DataFrame.new(
265
+ # {"a" => [[1, 2], [4, 3]]},
266
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
267
+ # )
268
+ # df.select(Polars.col("a").arr.std)
269
+ # # =>
270
+ # # shape: (2, 1)
271
+ # # ┌──────────┐
272
+ # # │ a │
273
+ # # │ --- │
274
+ # # │ f64 │
275
+ # # ╞══════════╡
276
+ # # │ 0.707107 │
277
+ # # │ 0.707107 │
278
+ # # └──────────┘
279
+ def std(ddof: 1)
280
+ Utils.wrap_expr(_rbexpr.arr_std(ddof))
281
+ end
282
+
283
+ # Compute the var of the values of the sub-arrays.
284
+ #
285
+ # @return [Expr]
286
+ #
287
+ # @example
288
+ # df = Polars::DataFrame.new(
289
+ # {"a" => [[1, 2], [4, 3]]},
290
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
291
+ # )
292
+ # df.select(Polars.col("a").arr.var)
293
+ # # =>
294
+ # # shape: (2, 1)
295
+ # # ┌─────┐
296
+ # # │ a │
297
+ # # │ --- │
298
+ # # │ f64 │
299
+ # # ╞═════╡
300
+ # # │ 0.5 │
301
+ # # │ 0.5 │
302
+ # # └─────┘
303
+ def var(ddof: 1)
304
+ Utils.wrap_expr(_rbexpr.arr_var(ddof))
305
+ end
306
+
307
+ # Compute the mean of the values of the sub-arrays.
308
+ #
309
+ # @return [Expr]
310
+ #
311
+ # @example
312
+ # df = Polars::DataFrame.new(
313
+ # {"a" => [[1, 2, 3], [1, 1, 16]]},
314
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
315
+ # )
316
+ # df.select(Polars.col("a").arr.mean)
317
+ # # =>
318
+ # # shape: (2, 1)
319
+ # # ┌─────┐
320
+ # # │ a │
321
+ # # │ --- │
322
+ # # │ f64 │
323
+ # # ╞═════╡
324
+ # # │ 2.0 │
325
+ # # │ 6.0 │
326
+ # # └─────┘
327
+ def mean
328
+ Utils.wrap_expr(_rbexpr.arr_mean)
329
+ end
330
+
331
+ # Compute the median of the values of the sub-arrays.
332
+ #
333
+ # @return [Expr]
334
+ #
335
+ # @example
336
+ # df = Polars::DataFrame.new(
337
+ # {"a" => [[1, 2], [4, 3]]},
338
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
339
+ # )
340
+ # df.select(Polars.col("a").arr.median)
341
+ # # =>
342
+ # # shape: (2, 1)
343
+ # # ┌─────┐
344
+ # # │ a │
345
+ # # │ --- │
346
+ # # │ f64 │
347
+ # # ╞═════╡
348
+ # # │ 1.5 │
349
+ # # │ 3.5 │
350
+ # # └─────┘
351
+ def median
352
+ Utils.wrap_expr(_rbexpr.arr_median)
82
353
  end
83
354
 
84
355
  # Get the unique/distinct values in the array.
@@ -109,6 +380,32 @@ module Polars
109
380
  Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
110
381
  end
111
382
 
383
+ # Count the number of unique values in every sub-arrays.
384
+ #
385
+ # @return [Expr]
386
+ #
387
+ # @example
388
+ # df = Polars::DataFrame.new(
389
+ # {
390
+ # "a" => [[1, 1, 2], [2, 3, 4]],
391
+ # },
392
+ # schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
393
+ # )
394
+ # df.with_columns(n_unique: Polars.col("a").arr.n_unique)
395
+ # # =>
396
+ # # shape: (2, 2)
397
+ # # ┌───────────────┬──────────┐
398
+ # # │ a ┆ n_unique │
399
+ # # │ --- ┆ --- │
400
+ # # │ array[i64, 3] ┆ u32 │
401
+ # # ╞═══════════════╪══════════╡
402
+ # # │ [1, 1, 2] ┆ 2 │
403
+ # # │ [2, 3, 4] ┆ 3 │
404
+ # # └───────────────┴──────────┘
405
+ def n_unique
406
+ Utils.wrap_expr(_rbexpr.arr_n_unique)
407
+ end
408
+
112
409
  # Convert an Array column into a List column with the same inner data type.
113
410
  #
114
411
  # @return [Expr]
@@ -535,5 +832,87 @@ module Polars
535
832
  element = Utils.parse_into_expression(element, str_as_lit: true)
536
833
  Utils.wrap_expr(_rbexpr.arr_count_matches(element))
537
834
  end
835
+
836
+ # Convert the Series of type `Array` to a Series of type `Struct`.
837
+ #
838
+ # @param fields [Object]
839
+ # If the name and number of the desired fields is known in advance
840
+ # a list of field names can be given, which will be assigned by index.
841
+ # Otherwise, to dynamically assign field names, a custom function can be
842
+ # used; if neither are set, fields will be `field_0, field_1 .. field_n`.
843
+ #
844
+ # @return [Expr]
845
+ #
846
+ # @example Convert array to struct with default field name assignment:
847
+ # df = Polars::DataFrame.new(
848
+ # {"n" => [[0, 1, 2], [3, 4, 5]]}, schema: {"n" => Polars::Array.new(Polars::Int8, 3)}
849
+ # )
850
+ # df.with_columns(struct: Polars.col("n").arr.to_struct)
851
+ # # =>
852
+ # # shape: (2, 2)
853
+ # # ┌──────────────┬───────────┐
854
+ # # │ n ┆ struct │
855
+ # # │ --- ┆ --- │
856
+ # # │ array[i8, 3] ┆ struct[3] │
857
+ # # ╞══════════════╪═══════════╡
858
+ # # │ [0, 1, 2] ┆ {0,1,2} │
859
+ # # │ [3, 4, 5] ┆ {3,4,5} │
860
+ # # └──────────────┴───────────┘
861
+ def to_struct(fields: nil)
862
+ raise Todo if fields
863
+ if fields.is_a?(Enumerable)
864
+ field_names = fields.to_a
865
+ rbexpr = _rbexpr.arr_to_struct(nil)
866
+ Utils.wrap_expr(rbexpr).struct.rename_fields(field_names)
867
+ else
868
+ rbexpr = _rbexpr.arr_to_struct(fields)
869
+ Utils.wrap_expr(rbexpr)
870
+ end
871
+ end
872
+
873
+ # Shift array values by the given number of indices.
874
+ #
875
+ # @param n [Integer]
876
+ # Number of indices to shift forward. If a negative value is passed, values
877
+ # are shifted in the opposite direction instead.
878
+ #
879
+ # @return [Expr]
880
+ #
881
+ # @note
882
+ # This method is similar to the `LAG` operation in SQL when the value for `n`
883
+ # is positive. With a negative value for `n`, it is similar to `LEAD`.
884
+ #
885
+ # @example By default, array values are shifted forward by one index.
886
+ # df = Polars::DataFrame.new(
887
+ # {"a" => [[1, 2, 3], [4, 5, 6]]}, schema: {"a" => Polars::Array.new(Polars::Int64, 3)}
888
+ # )
889
+ # df.with_columns(shift: Polars.col("a").arr.shift)
890
+ # # =>
891
+ # # shape: (2, 2)
892
+ # # ┌───────────────┬───────────────┐
893
+ # # │ a ┆ shift │
894
+ # # │ --- ┆ --- │
895
+ # # │ array[i64, 3] ┆ array[i64, 3] │
896
+ # # ╞═══════════════╪═══════════════╡
897
+ # # │ [1, 2, 3] ┆ [null, 1, 2] │
898
+ # # │ [4, 5, 6] ┆ [null, 4, 5] │
899
+ # # └───────────────┴───────────────┘
900
+ #
901
+ # @example Pass a negative value to shift in the opposite direction instead.
902
+ # df.with_columns(shift: Polars.col("a").arr.shift(-2))
903
+ # # =>
904
+ # # shape: (2, 2)
905
+ # # ┌───────────────┬─────────────────┐
906
+ # # │ a ┆ shift │
907
+ # # │ --- ┆ --- │
908
+ # # │ array[i64, 3] ┆ array[i64, 3] │
909
+ # # ╞═══════════════╪═════════════════╡
910
+ # # │ [1, 2, 3] ┆ [3, null, null] │
911
+ # # │ [4, 5, 6] ┆ [6, null, null] │
912
+ # # └───────────────┴─────────────────┘
913
+ def shift(n = 1)
914
+ n = Utils.parse_into_expression(n)
915
+ Utils.wrap_expr(_rbexpr.arr_shift(n))
916
+ end
538
917
  end
539
918
  end