polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1978 -1459
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +248 -108
  17. data/lib/polars/data_types.rb +195 -29
  18. data/lib/polars/date_time_expr.rb +41 -24
  19. data/lib/polars/date_time_name_space.rb +12 -12
  20. data/lib/polars/exceptions.rb +12 -1
  21. data/lib/polars/expr.rb +1080 -195
  22. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  23. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  24. data/lib/polars/functions/as_datatype.rb +248 -0
  25. data/lib/polars/functions/col.rb +47 -0
  26. data/lib/polars/functions/eager.rb +182 -0
  27. data/lib/polars/functions/lazy.rb +1280 -0
  28. data/lib/polars/functions/len.rb +49 -0
  29. data/lib/polars/functions/lit.rb +35 -0
  30. data/lib/polars/functions/random.rb +16 -0
  31. data/lib/polars/functions/range/date_range.rb +103 -0
  32. data/lib/polars/functions/range/int_range.rb +51 -0
  33. data/lib/polars/functions/repeat.rb +144 -0
  34. data/lib/polars/functions/whenthen.rb +27 -0
  35. data/lib/polars/functions.rb +29 -416
  36. data/lib/polars/group_by.rb +3 -3
  37. data/lib/polars/io.rb +21 -28
  38. data/lib/polars/lazy_frame.rb +390 -76
  39. data/lib/polars/list_expr.rb +152 -6
  40. data/lib/polars/list_name_space.rb +102 -0
  41. data/lib/polars/meta_expr.rb +175 -7
  42. data/lib/polars/series.rb +557 -59
  43. data/lib/polars/sql_context.rb +1 -1
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +412 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/struct_expr.rb +1 -1
  48. data/lib/polars/struct_name_space.rb +1 -1
  49. data/lib/polars/testing.rb +507 -0
  50. data/lib/polars/utils.rb +64 -20
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +15 -2
  53. metadata +36 -7
  54. data/lib/polars/lazy_functions.rb +0 -1197
@@ -73,5 +73,351 @@ module Polars
73
73
  def sum
74
74
  super
75
75
  end
76
+
77
+ # Get the unique/distinct values in the array.
78
+ #
79
+ # @param maintain_order [Boolean]
80
+ # Maintain order of data. This requires more work.
81
+ #
82
+ # @return [Series]
83
+ #
84
+ # @example
85
+ # df = Polars::DataFrame.new(
86
+ # {
87
+ # "a" => [[1, 1, 2]]
88
+ # },
89
+ # schema_overrides: {"a" => Polars::Array.new(Polars::Int64, 3)}
90
+ # )
91
+ # df.select(Polars.col("a").arr.unique)
92
+ # # =>
93
+ # # shape: (1, 1)
94
+ # # ┌───────────┐
95
+ # # │ a │
96
+ # # │ --- │
97
+ # # │ list[i64] │
98
+ # # ╞═══════════╡
99
+ # # │ [1, 2] │
100
+ # # └───────────┘
101
+ def unique(maintain_order: false)
102
+ super
103
+ end
104
+
105
+ # Convert an Array column into a List column with the same inner data type.
106
+ #
107
+ # @return [Series]
108
+ #
109
+ # @example
110
+ # s = Polars::Series.new([[1, 2], [3, 4]], dtype: Polars::Array.new(Polars::Int8, 2))
111
+ # s.arr.to_list
112
+ # # =>
113
+ # # shape: (2,)
114
+ # # Series: '' [list[i8]]
115
+ # # [
116
+ # # [1, 2]
117
+ # # [3, 4]
118
+ # # ]
119
+ def to_list
120
+ super
121
+ end
122
+
123
+ # Evaluate whether any boolean value is true for every subarray.
124
+ #
125
+ # @return [Series]
126
+ #
127
+ # @example
128
+ # s = Polars::Series.new(
129
+ # [[true, true], [false, true], [false, false], [nil, nil], nil],
130
+ # dtype: Polars::Array.new(Polars::Boolean, 2)
131
+ # )
132
+ # s.arr.any
133
+ # # =>
134
+ # # shape: (5,)
135
+ # # Series: '' [bool]
136
+ # # [
137
+ # # true
138
+ # # true
139
+ # # false
140
+ # # false
141
+ # # null
142
+ # # ]
143
+ def any
144
+ super
145
+ end
146
+
147
+ # Evaluate whether all boolean values are true for every subarray.
148
+ #
149
+ # @return [Series]
150
+ #
151
+ # @example
152
+ # s = Polars::Series.new(
153
+ # [[true, true], [false, true], [false, false], [nil, nil], nil],
154
+ # dtype: Polars::Array.new(Polars::Boolean, 2)
155
+ # )
156
+ # s.arr.all
157
+ # # =>
158
+ # # shape: (5,)
159
+ # # Series: '' [bool]
160
+ # # [
161
+ # # true
162
+ # # false
163
+ # # false
164
+ # # true
165
+ # # null
166
+ # # ]
167
+ def all
168
+ super
169
+ end
170
+
171
+ # Sort the arrays in this column.
172
+ #
173
+ # @param descending [Boolean]
174
+ # Sort in descending order.
175
+ # @param nulls_last [Boolean]
176
+ # Place null values last.
177
+ #
178
+ # @return [Series]
179
+ #
180
+ # @example
181
+ # s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
182
+ # s.arr.sort
183
+ # # =>
184
+ # # shape: (2,)
185
+ # # Series: 'a' [array[i64, 3]]
186
+ # # [
187
+ # # [1, 2, 3]
188
+ # # [1, 2, 9]
189
+ # # ]
190
+ #
191
+ # @example
192
+ # s.arr.sort(descending: true)
193
+ # # =>
194
+ # # shape: (2,)
195
+ # # Series: 'a' [array[i64, 3]]
196
+ # # [
197
+ # # [3, 2, 1]
198
+ # # [9, 2, 1]
199
+ # # ]
200
+ def sort(descending: false, nulls_last: false)
201
+ super
202
+ end
203
+
204
+ # Reverse the arrays in this column.
205
+ #
206
+ # @return [Series]
207
+ #
208
+ # @example
209
+ # s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
210
+ # s.arr.reverse
211
+ # # =>
212
+ # # shape: (2,)
213
+ # # Series: 'a' [array[i64, 3]]
214
+ # # [
215
+ # # [1, 2, 3]
216
+ # # [2, 1, 9]
217
+ # # ]
218
+ def reverse
219
+ super
220
+ end
221
+
222
+ # Retrieve the index of the minimal value in every sub-array.
223
+ #
224
+ # @return [Series]
225
+ #
226
+ # @example
227
+ # s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
228
+ # s.arr.arg_min
229
+ # # =>
230
+ # # shape: (2,)
231
+ # # Series: 'a' [u32]
232
+ # # [
233
+ # # 2
234
+ # # 1
235
+ # # ]
236
+ def arg_min
237
+ super
238
+ end
239
+
240
+ # Retrieve the index of the maximum value in every sub-array.
241
+ #
242
+ # @return [Series]
243
+ #
244
+ # @example
245
+ # s = Polars::Series.new("a", [[0, 9, 3], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
246
+ # s.arr.arg_max
247
+ # # =>
248
+ # # shape: (2,)
249
+ # # Series: 'a' [u32]
250
+ # # [
251
+ # # 1
252
+ # # 0
253
+ # # ]
254
+ def arg_max
255
+ super
256
+ end
257
+
258
+ # Get the value by index in the sub-arrays.
259
+ #
260
+ # So index `0` would return the first item of every sublist
261
+ # and index `-1` would return the last item of every sublist
262
+ # if an index is out of bounds, it will return a `nil`.
263
+ #
264
+ # @param index [Integer]
265
+ # Index to return per sublist
266
+ #
267
+ # @return [Series]
268
+ #
269
+ # @example
270
+ # s = Polars::Series.new(
271
+ # "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
272
+ # )
273
+ # s.arr.get(Polars::Series.new([1, -2, 4]))
274
+ # # =>
275
+ # # shape: (3,)
276
+ # # Series: 'a' [i32]
277
+ # # [
278
+ # # 2
279
+ # # 5
280
+ # # null
281
+ # # ]
282
+ def get(index)
283
+ super
284
+ end
285
+
286
+ # Get the first value of the sub-arrays.
287
+ #
288
+ # @return [Series]
289
+ #
290
+ # @example
291
+ # s = Polars::Series.new(
292
+ # "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
293
+ # )
294
+ # s.arr.first
295
+ # # =>
296
+ # # shape: (3,)
297
+ # # Series: 'a' [i32]
298
+ # # [
299
+ # # 1
300
+ # # 4
301
+ # # 7
302
+ # # ]
303
+ def first
304
+ super
305
+ end
306
+
307
+ # Get the last value of the sub-arrays.
308
+ #
309
+ # @return [Series]
310
+ #
311
+ # @example
312
+ # s = Polars::Series.new(
313
+ # "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
314
+ # )
315
+ # s.arr.last
316
+ # # =>
317
+ # # shape: (3,)
318
+ # # Series: 'a' [i32]
319
+ # # [
320
+ # # 3
321
+ # # 6
322
+ # # 9
323
+ # # ]
324
+ def last
325
+ super
326
+ end
327
+
328
+ # Join all string items in a sub-array and place a separator between them.
329
+ #
330
+ # This errors if inner type of array `!= String`.
331
+ #
332
+ # @param separator [String]
333
+ # string to separate the items with
334
+ # @param ignore_nulls [Boolean]
335
+ # Ignore null values (default).
336
+ #
337
+ # If set to `False`, null values will be propagated.
338
+ # If the sub-list contains any null values, the output is `nil`.
339
+ #
340
+ # @return [Series]
341
+ #
342
+ # @example
343
+ # s = Polars::Series.new([["x", "y"], ["a", "b"]], dtype: Polars::Array.new(Polars::String, 2))
344
+ # s.arr.join("-")
345
+ # # =>
346
+ # # shape: (2,)
347
+ # # Series: '' [str]
348
+ # # [
349
+ # # "x-y"
350
+ # # "a-b"
351
+ # # ]
352
+ def join(separator, ignore_nulls: true)
353
+ super
354
+ end
355
+
356
+ # Returns a column with a separate row for every array element.
357
+ #
358
+ # @return [Series]
359
+ #
360
+ # @example
361
+ # s = Polars::Series.new("a", [[1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int64, 3))
362
+ # s.arr.explode
363
+ # # =>
364
+ # # shape: (6,)
365
+ # # Series: 'a' [i64]
366
+ # # [
367
+ # # 1
368
+ # # 2
369
+ # # 3
370
+ # # 4
371
+ # # 5
372
+ # # 6
373
+ # # ]
374
+ def explode
375
+ super
376
+ end
377
+
378
+ # Check if sub-arrays contain the given item.
379
+ #
380
+ # @param item [Object]
381
+ # Item that will be checked for membership
382
+ #
383
+ # @return [Series]
384
+ #
385
+ # @example
386
+ # s = Polars::Series.new(
387
+ # "a", [[3, 2, 1], [1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int32, 3)
388
+ # )
389
+ # s.arr.contains(1)
390
+ # # =>
391
+ # # shape: (3,)
392
+ # # Series: 'a' [bool]
393
+ # # [
394
+ # # true
395
+ # # true
396
+ # # false
397
+ # # ]
398
+ def contains(item)
399
+ super
400
+ end
401
+
402
+ # Count how often the value produced by `element` occurs.
403
+ #
404
+ # @param element [Object]
405
+ # An expression that produces a single value
406
+ #
407
+ # @return [Series]
408
+ #
409
+ # @example
410
+ # s = Polars::Series.new("a", [[1, 2, 3], [2, 2, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
411
+ # s.arr.count_matches(2)
412
+ # # =>
413
+ # # shape: (2,)
414
+ # # Series: 'a' [u32]
415
+ # # [
416
+ # # 1
417
+ # # 3
418
+ # # ]
419
+ def count_matches(element)
420
+ super
421
+ end
76
422
  end
77
423
  end
@@ -44,5 +44,29 @@ module Polars
44
44
  def set_ordering(ordering)
45
45
  Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
46
46
  end
47
+
48
+ # Get the categories stored in this data type.
49
+ #
50
+ # @return [Expr]
51
+ #
52
+ # @example
53
+ # df = Polars::Series.new(
54
+ # "cats", ["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical
55
+ # ).to_frame
56
+ # df.select(Polars.col("cats").cat.get_categories)
57
+ # # =>
58
+ # # shape: (3, 1)
59
+ # # ┌──────┐
60
+ # # │ cats │
61
+ # # │ --- │
62
+ # # │ str │
63
+ # # ╞══════╡
64
+ # # │ foo │
65
+ # # │ bar │
66
+ # # │ ham │
67
+ # # └──────┘
68
+ def get_categories
69
+ Utils.wrap_expr(_rbexpr.cat_get_categories)
70
+ end
47
71
  end
48
72
  end
@@ -46,5 +46,80 @@ module Polars
46
46
  def set_ordering(ordering)
47
47
  super
48
48
  end
49
+
50
+ # Get the categories stored in this data type.
51
+ #
52
+ # @return [Series]
53
+ #
54
+ # @example
55
+ # s = Polars::Series.new(["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical)
56
+ # s.cat.get_categories
57
+ # # =>
58
+ # # shape: (3,)
59
+ # # Series: '' [str]
60
+ # # [
61
+ # # "foo"
62
+ # # "bar"
63
+ # # "ham"
64
+ # # ]
65
+ def get_categories
66
+ super
67
+ end
68
+
69
+ # Return whether or not the column is a local categorical.
70
+ #
71
+ # @return [Boolean]
72
+ #
73
+ # @example Categoricals constructed without a string cache are considered local.
74
+ # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
75
+ # s.cat.is_local
76
+ # # => true
77
+ #
78
+ # @example Categoricals constructed with a string cache are considered global.
79
+ # s = nil
80
+ # Polars::StringCache.new do
81
+ # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
82
+ # end
83
+ # s.cat.is_local
84
+ # # => false
85
+ def is_local
86
+ _s.cat_is_local
87
+ end
88
+
89
+ # Convert a categorical column to its local representation.
90
+ #
91
+ # This may change the underlying physical representation of the column.
92
+ #
93
+ # @return [Series]
94
+ #
95
+ # @example Compare the global and local representations of a categorical.
96
+ # s = nil
97
+ # Polars::StringCache.new do
98
+ # _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
99
+ # s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
100
+ # end
101
+ # s.to_physical
102
+ # # =>
103
+ # # shape: (3,)
104
+ # # Series: 'y' [u32]
105
+ # # [
106
+ # # 2
107
+ # # 1
108
+ # # 3
109
+ # # ]
110
+ #
111
+ # @example
112
+ # s.cat.to_local.to_physical
113
+ # # =>
114
+ # # shape: (3,)
115
+ # # Series: 'y' [u32]
116
+ # # [
117
+ # # 0
118
+ # # 1
119
+ # # 2
120
+ # # ]
121
+ def to_local
122
+ Utils.wrap_s(_s.cat_to_local)
123
+ end
49
124
  end
50
125
  end
data/lib/polars/config.rb CHANGED
@@ -21,7 +21,7 @@ module Polars
21
21
  "POLARS_VERBOSE"
22
22
  ]
23
23
 
24
- POLARS_CFG_DIRECT_VARS = {"set_fmt_float" => Polars.method(:_get_float_fmt)}
24
+ POLARS_CFG_DIRECT_VARS = {"set_fmt_float" => Plr.method(:get_float_fmt)}
25
25
 
26
26
  # Initialize a Config object instance for context manager usage.
27
27
  def initialize(restore_defaults: false, **options)
@@ -163,7 +163,7 @@ module Polars
163
163
  #
164
164
  # @return [Config]
165
165
  def self.set_fmt_float(fmt = "mixed")
166
- Polars._set_float_fmt(fmt)
166
+ Plr.set_float_fmt(fmt)
167
167
  self
168
168
  end
169
169