polars-df 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
data/lib/polars/expr.rb CHANGED
@@ -1,78 +1,139 @@
1
1
  module Polars
2
+ # Expressions that can be used in various contexts.
2
3
  class Expr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def self._from_rbexpr(rbexpr)
6
9
  expr = Expr.allocate
7
10
  expr._rbexpr = rbexpr
8
11
  expr
9
12
  end
10
13
 
14
+ # Returns a string representing the Expr.
15
+ #
16
+ # @return [String]
11
17
  def to_s
12
18
  _rbexpr.to_str
13
19
  end
14
20
  alias_method :inspect, :to_s
15
21
 
22
+ # Bitwise XOR.
23
+ #
24
+ # @return [Expr]
16
25
  def ^(other)
17
26
  wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
18
27
  end
19
28
 
29
+ # Bitwise AND.
30
+ #
31
+ # @return [Expr]
20
32
  def &(other)
21
33
  wrap_expr(_rbexpr._and(_to_rbexpr(other)))
22
34
  end
23
35
 
36
+ # Bitwise OR.
37
+ #
38
+ # @return [Expr]
24
39
  def |(other)
25
40
  wrap_expr(_rbexpr._or(_to_rbexpr(other)))
26
41
  end
27
42
 
43
+ # Performs addition.
44
+ #
45
+ # @return [Expr]
28
46
  def +(other)
29
47
  wrap_expr(_rbexpr + _to_rbexpr(other))
30
48
  end
31
49
 
50
+ # Performs subtraction.
51
+ #
52
+ # @return [Expr]
32
53
  def -(other)
33
54
  wrap_expr(_rbexpr - _to_rbexpr(other))
34
55
  end
35
56
 
57
+ # Performs multiplication.
58
+ #
59
+ # @return [Expr]
36
60
  def *(other)
37
61
  wrap_expr(_rbexpr * _to_rbexpr(other))
38
62
  end
39
63
 
64
+ # Performs division.
65
+ #
66
+ # @return [Expr]
40
67
  def /(other)
41
68
  wrap_expr(_rbexpr / _to_rbexpr(other))
42
69
  end
43
70
 
71
+ # Performs floor division.
72
+ #
73
+ # @return [Expr]
74
+ def floordiv(other)
75
+ wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
76
+ end
77
+
78
+ # Returns the modulo.
79
+ #
80
+ # @return [Expr]
44
81
  def %(other)
45
82
  wrap_expr(_rbexpr % _to_rbexpr(other))
46
83
  end
47
84
 
85
+ # Raises to the power of exponent.
86
+ #
87
+ # @return [Expr]
48
88
  def **(power)
49
89
  pow(power)
50
90
  end
51
91
 
92
+ # Greater than or equal.
93
+ #
94
+ # @return [Expr]
52
95
  def >=(other)
53
96
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
54
97
  end
55
98
 
99
+ # Less than or equal.
100
+ #
101
+ # @return [Expr]
56
102
  def <=(other)
57
103
  wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
58
104
  end
59
105
 
106
+ # Equal.
107
+ #
108
+ # @return [Expr]
60
109
  def ==(other)
61
110
  wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
62
111
  end
63
112
 
113
+ # Not equal.
114
+ #
115
+ # @return [Expr]
64
116
  def !=(other)
65
117
  wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
66
118
  end
67
119
 
120
+ # Less than.
121
+ #
122
+ # @return [Expr]
68
123
  def <(other)
69
124
  wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
70
125
  end
71
126
 
127
+ # Greater than.
128
+ #
129
+ # @return [Expr]
72
130
  def >(other)
73
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
74
132
  end
75
133
 
134
+ # Performs negation.
135
+ #
136
+ # @return [Expr]
76
137
  def -@
77
138
  Utils.lit(0) - self
78
139
  end
@@ -80,22 +141,119 @@ module Polars
80
141
  # def to_physical
81
142
  # end
82
143
 
144
+ # Check if any boolean value in a Boolean column is `true`.
145
+ #
146
+ # @return [Boolean]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
150
+ # df.select(Polars.all.any)
151
+ # # =>
152
+ # # shape: (1, 2)
153
+ # # ┌──────┬───────┐
154
+ # # │ TF ┆ FF │
155
+ # # │ --- ┆ --- │
156
+ # # │ bool ┆ bool │
157
+ # # ╞══════╪═══════╡
158
+ # # │ true ┆ false │
159
+ # # └──────┴───────┘
83
160
  def any
84
161
  wrap_expr(_rbexpr.any)
85
162
  end
86
163
 
164
+ # Check if all boolean values in a Boolean column are `true`.
165
+ #
166
+ # This method is an expression - not to be confused with
167
+ # `Polars.all` which is a function to select all columns.
168
+ #
169
+ # @return [Boolean]
170
+ #
171
+ # @example
172
+ # df = Polars::DataFrame.new(
173
+ # {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
174
+ # )
175
+ # df.select(Polars.col("*").all)
176
+ # # =>
177
+ # # shape: (1, 3)
178
+ # # ┌──────┬───────┬───────┐
179
+ # # │ TT ┆ TF ┆ FF │
180
+ # # │ --- ┆ --- ┆ --- │
181
+ # # │ bool ┆ bool ┆ bool │
182
+ # # ╞══════╪═══════╪═══════╡
183
+ # # │ true ┆ false ┆ false │
184
+ # # └──────┴───────┴───────┘
87
185
  def all
88
186
  wrap_expr(_rbexpr.all)
89
187
  end
90
188
 
189
+ # Compute the square root of the elements.
190
+ #
191
+ # @return [Expr]
192
+ #
193
+ # @example
194
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
195
+ # df.select(Polars.col("values").sqrt)
196
+ # # =>
197
+ # # shape: (3, 1)
198
+ # # ┌──────────┐
199
+ # # │ values │
200
+ # # │ --- │
201
+ # # │ f64 │
202
+ # # ╞══════════╡
203
+ # # │ 1.0 │
204
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
205
+ # # │ 1.414214 │
206
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
207
+ # # │ 2.0 │
208
+ # # └──────────┘
91
209
  def sqrt
92
- self ** 0.5
93
- end
94
-
210
+ self**0.5
211
+ end
212
+
213
+ # Compute the base 10 logarithm of the input array, element-wise.
214
+ #
215
+ # @return [Expr]
216
+ #
217
+ # @example
218
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
219
+ # df.select(Polars.col("values").log10)
220
+ # # =>
221
+ # # shape: (3, 1)
222
+ # # ┌─────────┐
223
+ # # │ values │
224
+ # # │ --- │
225
+ # # │ f64 │
226
+ # # ╞═════════╡
227
+ # # │ 0.0 │
228
+ # # ├╌╌╌╌╌╌╌╌╌┤
229
+ # # │ 0.30103 │
230
+ # # ├╌╌╌╌╌╌╌╌╌┤
231
+ # # │ 0.60206 │
232
+ # # └─────────┘
95
233
  def log10
96
234
  log(10)
97
235
  end
98
236
 
237
+ # Compute the exponential, element-wise.
238
+ #
239
+ # @return [Expr]
240
+ #
241
+ # @example
242
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
243
+ # df.select(Polars.col("values").exp)
244
+ # # =>
245
+ # # shape: (3, 1)
246
+ # # ┌──────────┐
247
+ # # │ values │
248
+ # # │ --- │
249
+ # # │ f64 │
250
+ # # ╞══════════╡
251
+ # # │ 2.718282 │
252
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
253
+ # # │ 7.389056 │
254
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
255
+ # # │ 54.59815 │
256
+ # # └──────────┘
99
257
  def exp
100
258
  wrap_expr(_rbexpr.exp)
101
259
  end
@@ -104,7 +262,9 @@ module Polars
104
262
  wrap_expr(_rbexpr._alias(name))
105
263
  end
106
264
 
107
- # TODO support symbols
265
+ # TODO support symbols for exclude
266
+
267
+ #
108
268
  def exclude(columns)
109
269
  if columns.is_a?(String)
110
270
  columns = [columns]
@@ -140,22 +300,166 @@ module Polars
140
300
  # def map_alias
141
301
  # end
142
302
 
303
+ # Negate a boolean expression.
304
+ #
305
+ # @return [Expr]
306
+ #
307
+ # @example
308
+ # df = Polars::DataFrame.new(
309
+ # {
310
+ # "a" => [true, false, false],
311
+ # "b" => ["a", "b", nil]
312
+ # }
313
+ # )
314
+ # # =>
315
+ # # shape: (3, 2)
316
+ # # ┌───────┬──────┐
317
+ # # │ a ┆ b │
318
+ # # │ --- ┆ --- │
319
+ # # │ bool ┆ str │
320
+ # # ╞═══════╪══════╡
321
+ # # │ true ┆ a │
322
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
323
+ # # │ false ┆ b │
324
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
325
+ # # │ false ┆ null │
326
+ # # └───────┴──────┘
327
+ #
328
+ # @example
329
+ # df.select(Polars.col("a").is_not)
330
+ # # =>
331
+ # # shape: (3, 1)
332
+ # # ┌───────┐
333
+ # # │ a │
334
+ # # │ --- │
335
+ # # │ bool │
336
+ # # ╞═══════╡
337
+ # # │ false │
338
+ # # ├╌╌╌╌╌╌╌┤
339
+ # # │ true │
340
+ # # ├╌╌╌╌╌╌╌┤
341
+ # # │ true │
342
+ # # └───────┘
143
343
  def is_not
144
344
  wrap_expr(_rbexpr.is_not)
145
345
  end
146
346
 
347
+ # Returns a boolean Series indicating which values are null.
348
+ #
349
+ # @return [Expr]
350
+ #
351
+ # @example
352
+ # df = Polars::DataFrame.new(
353
+ # {
354
+ # "a" => [1, 2, nil, 1, 5],
355
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
356
+ # }
357
+ # )
358
+ # df.with_column(Polars.all.is_null.suffix("_isnull"))
359
+ # # =>
360
+ # # shape: (5, 4)
361
+ # # ┌──────┬─────┬──────────┬──────────┐
362
+ # # │ a ┆ b ┆ a_isnull ┆ b_isnull │
363
+ # # │ --- ┆ --- ┆ --- ┆ --- │
364
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
365
+ # # ╞══════╪═════╪══════════╪══════════╡
366
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
367
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
368
+ # # │ 2 ┆ 2.0 ┆ false ┆ false │
369
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
370
+ # # │ null ┆ NaN ┆ true ┆ false │
371
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
372
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
373
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
374
+ # # │ 5 ┆ 5.0 ┆ false ┆ false │
375
+ # # └──────┴─────┴──────────┴──────────┘
147
376
  def is_null
148
377
  wrap_expr(_rbexpr.is_null)
149
378
  end
150
379
 
380
+ # Returns a boolean Series indicating which values are not null.
381
+ #
382
+ # @return [Expr]
383
+ #
384
+ # @example
385
+ # df = Polars::DataFrame.new(
386
+ # {
387
+ # "a" => [1, 2, nil, 1, 5],
388
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
389
+ # }
390
+ # )
391
+ # df.with_column(Polars.all.is_not_null.suffix("_not_null"))
392
+ # # =>
393
+ # # shape: (5, 4)
394
+ # # ┌──────┬─────┬────────────┬────────────┐
395
+ # # │ a ┆ b ┆ a_not_null ┆ b_not_null │
396
+ # # │ --- ┆ --- ┆ --- ┆ --- │
397
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
398
+ # # ╞══════╪═════╪════════════╪════════════╡
399
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
400
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
401
+ # # │ 2 ┆ 2.0 ┆ true ┆ true │
402
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
403
+ # # │ null ┆ NaN ┆ false ┆ true │
404
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
405
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
406
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
407
+ # # │ 5 ┆ 5.0 ┆ true ┆ true │
408
+ # # └──────┴─────┴────────────┴────────────┘
151
409
  def is_not_null
152
410
  wrap_expr(_rbexpr.is_not_null)
153
411
  end
154
412
 
413
+ # Returns a boolean Series indicating which values are finite.
414
+ #
415
+ # @return [Expr]
416
+ #
417
+ # @example
418
+ # df = Polars::DataFrame.new(
419
+ # {
420
+ # "A" => [1.0, 2],
421
+ # "B" => [3.0, Float::INFINITY]
422
+ # }
423
+ # )
424
+ # df.select(Polars.all.is_finite)
425
+ # # =>
426
+ # # shape: (2, 2)
427
+ # # ┌──────┬───────┐
428
+ # # │ A ┆ B │
429
+ # # │ --- ┆ --- │
430
+ # # │ bool ┆ bool │
431
+ # # ╞══════╪═══════╡
432
+ # # │ true ┆ true │
433
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
434
+ # # │ true ┆ false │
435
+ # # └──────┴───────┘
155
436
  def is_finite
156
437
  wrap_expr(_rbexpr.is_finite)
157
438
  end
158
439
 
440
+ # Returns a boolean Series indicating which values are infinite.
441
+ #
442
+ # @return [Expr]
443
+ #
444
+ # @example
445
+ # df = Polars::DataFrame.new(
446
+ # {
447
+ # "A" => [1.0, 2],
448
+ # "B" => [3.0, Float::INFINITY]
449
+ # }
450
+ # )
451
+ # df.select(Polars.all.is_infinite)
452
+ # # =>
453
+ # # shape: (2, 2)
454
+ # # ┌───────┬───────┐
455
+ # # │ A ┆ B │
456
+ # # │ --- ┆ --- │
457
+ # # │ bool ┆ bool │
458
+ # # ╞═══════╪═══════╡
459
+ # # │ false ┆ false │
460
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
461
+ # # │ false ┆ true │
462
+ # # └───────┴───────┘
159
463
  def is_infinite
160
464
  wrap_expr(_rbexpr.is_infinite)
161
465
  end
@@ -172,14 +476,77 @@ module Polars
172
476
  wrap_expr(_rbexpr.agg_groups)
173
477
  end
174
478
 
479
+ # Count the number of values in this expression.
480
+ #
481
+ # @return [Expr]
482
+ #
483
+ # @example
484
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
485
+ # df.select(Polars.all.count)
486
+ # # =>
487
+ # # shape: (1, 2)
488
+ # # ┌─────┬─────┐
489
+ # # │ a ┆ b │
490
+ # # │ --- ┆ --- │
491
+ # # │ u32 ┆ u32 │
492
+ # # ╞═════╪═════╡
493
+ # # │ 3 ┆ 3 │
494
+ # # └─────┴─────┘
175
495
  def count
176
496
  wrap_expr(_rbexpr.count)
177
497
  end
178
498
 
499
+ # Count the number of values in this expression.
500
+ #
501
+ # Alias for {#count}.
502
+ #
503
+ # @return [Expr]
504
+ #
505
+ # @example
506
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
507
+ # df.select(Polars.all.len)
508
+ # # =>
509
+ # # shape: (1, 2)
510
+ # # ┌─────┬─────┐
511
+ # # │ a ┆ b │
512
+ # # │ --- ┆ --- │
513
+ # # │ u32 ┆ u32 │
514
+ # # ╞═════╪═════╡
515
+ # # │ 3 ┆ 3 │
516
+ # # └─────┴─────┘
179
517
  def len
180
518
  count
181
519
  end
182
520
 
521
+ # Get a slice of this expression.
522
+ #
523
+ # @param offset [Integer]
524
+ # Start index. Negative indexing is supported.
525
+ # @param length [Integer]
526
+ # Length of the slice. If set to `nil`, all rows starting at the offset
527
+ # will be selected.
528
+ #
529
+ # @return [Expr]
530
+ #
531
+ # @example
532
+ # df = Polars::DataFrame.new(
533
+ # {
534
+ # "a" => [8, 9, 10, 11],
535
+ # "b" => [nil, 4, 4, 4]
536
+ # }
537
+ # )
538
+ # df.select(Polars.all.slice(1, 2))
539
+ # # =>
540
+ # # shape: (2, 2)
541
+ # # ┌─────┬─────┐
542
+ # # │ a ┆ b │
543
+ # # │ --- ┆ --- │
544
+ # # │ i64 ┆ i64 │
545
+ # # ╞═════╪═════╡
546
+ # # │ 9 ┆ 4 │
547
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
548
+ # # │ 10 ┆ 4 │
549
+ # # └─────┴─────┘
183
550
  def slice(offset, length = nil)
184
551
  if !offset.is_a?(Expr)
185
552
  offset = Polars.lit(offset)
@@ -195,14 +562,90 @@ module Polars
195
562
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
196
563
  end
197
564
 
565
+ # Create a single chunk of memory for this Series.
566
+ #
567
+ # @return [Expr]
568
+ #
569
+ # @example Create a Series with 3 nulls, append column a then rechunk
570
+ # df = Polars::DataFrame.new({"a": [1, 1, 2]})
571
+ # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
+ # # =>
573
+ # # shape: (6, 1)
574
+ # # ┌─────────┐
575
+ # # │ literal │
576
+ # # │ --- │
577
+ # # │ i64 │
578
+ # # ╞═════════╡
579
+ # # │ null │
580
+ # # ├╌╌╌╌╌╌╌╌╌┤
581
+ # # │ null │
582
+ # # ├╌╌╌╌╌╌╌╌╌┤
583
+ # # │ null │
584
+ # # ├╌╌╌╌╌╌╌╌╌┤
585
+ # # │ 1 │
586
+ # # ├╌╌╌╌╌╌╌╌╌┤
587
+ # # │ 1 │
588
+ # # ├╌╌╌╌╌╌╌╌╌┤
589
+ # # │ 2 │
590
+ # # └─────────┘
198
591
  def rechunk
199
592
  wrap_expr(_rbexpr.rechunk)
200
593
  end
201
594
 
595
+ # Drop null values.
596
+ #
597
+ # @return [Expr]
598
+ #
599
+ # @example
600
+ # df = Polars::DataFrame.new(
601
+ # {
602
+ # "a" => [8, 9, 10, 11],
603
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
604
+ # }
605
+ # )
606
+ # df.select(Polars.col("b").drop_nulls)
607
+ # # =>
608
+ # # shape: (3, 1)
609
+ # # ┌─────┐
610
+ # # │ b │
611
+ # # │ --- │
612
+ # # │ f64 │
613
+ # # ╞═════╡
614
+ # # │ 4.0 │
615
+ # # ├╌╌╌╌╌┤
616
+ # # │ 4.0 │
617
+ # # ├╌╌╌╌╌┤
618
+ # # │ NaN │
619
+ # # └─────┘
202
620
  def drop_nulls
203
621
  wrap_expr(_rbexpr.drop_nulls)
204
622
  end
205
623
 
624
+ # Drop floating point NaN values.
625
+ #
626
+ # @return [Expr]
627
+ #
628
+ # @example
629
+ # df = Polars::DataFrame.new(
630
+ # {
631
+ # "a" => [8, 9, 10, 11],
632
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
633
+ # }
634
+ # )
635
+ # df.select(Polars.col("b").drop_nans)
636
+ # # =>
637
+ # # shape: (3, 1)
638
+ # # ┌──────┐
639
+ # # │ b │
640
+ # # │ --- │
641
+ # # │ f64 │
642
+ # # ╞══════╡
643
+ # # │ null │
644
+ # # ├╌╌╌╌╌╌┤
645
+ # # │ 4.0 │
646
+ # # ├╌╌╌╌╌╌┤
647
+ # # │ 4.0 │
648
+ # # └──────┘
206
649
  def drop_nans
207
650
  wrap_expr(_rbexpr.drop_nans)
208
651
  end
@@ -227,14 +670,87 @@ module Polars
227
670
  wrap_expr(_rbexpr.cumcount(reverse))
228
671
  end
229
672
 
673
+ # Rounds down to the nearest integer value.
674
+ #
675
+ # Only works on floating point Series.
676
+ #
677
+ # @return [Expr]
678
+ #
679
+ # @example
680
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
681
+ # df.select(Polars.col("a").floor)
682
+ # # =>
683
+ # # shape: (4, 1)
684
+ # # ┌─────┐
685
+ # # │ a │
686
+ # # │ --- │
687
+ # # │ f64 │
688
+ # # ╞═════╡
689
+ # # │ 0.0 │
690
+ # # ├╌╌╌╌╌┤
691
+ # # │ 0.0 │
692
+ # # ├╌╌╌╌╌┤
693
+ # # │ 1.0 │
694
+ # # ├╌╌╌╌╌┤
695
+ # # │ 1.0 │
696
+ # # └─────┘
230
697
  def floor
231
698
  wrap_expr(_rbexpr.floor)
232
699
  end
233
700
 
701
+ # Rounds up to the nearest integer value.
702
+ #
703
+ # Only works on floating point Series.
704
+ #
705
+ # @return [Expr]
706
+ #
707
+ # @example
708
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
709
+ # df.select(Polars.col("a").ceil)
710
+ # # =>
711
+ # # shape: (4, 1)
712
+ # # ┌─────┐
713
+ # # │ a │
714
+ # # │ --- │
715
+ # # │ f64 │
716
+ # # ╞═════╡
717
+ # # │ 1.0 │
718
+ # # ├╌╌╌╌╌┤
719
+ # # │ 1.0 │
720
+ # # ├╌╌╌╌╌┤
721
+ # # │ 1.0 │
722
+ # # ├╌╌╌╌╌┤
723
+ # # │ 2.0 │
724
+ # # └─────┘
234
725
  def ceil
235
726
  wrap_expr(_rbexpr.ceil)
236
727
  end
237
728
 
729
+ # Round underlying floating point data by `decimals` digits.
730
+ #
731
+ # @param decimals [Integer]
732
+ # Number of decimals to round by.
733
+ #
734
+ # @return [Expr]
735
+ #
736
+ # @example
737
+ # df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
738
+ # df.select(Polars.col("a").round(1))
739
+ # # =>
740
+ # # shape: (4, 1)
741
+ # # ┌─────┐
742
+ # # │ a │
743
+ # # │ --- │
744
+ # # │ f64 │
745
+ # # ╞═════╡
746
+ # # │ 0.3 │
747
+ # # ├╌╌╌╌╌┤
748
+ # # │ 0.5 │
749
+ # # ├╌╌╌╌╌┤
750
+ # # │ 1.0 │
751
+ # # ├╌╌╌╌╌┤
752
+ # # │ 1.2 │
753
+ # # └─────┘
238
754
  def round(decimals = 0)
239
755
  wrap_expr(_rbexpr.round(decimals))
240
756
  end
@@ -244,6 +760,31 @@ module Polars
244
760
  wrap_expr(_rbexpr.dot(other._rbexpr))
245
761
  end
246
762
 
763
+ # Compute the most occurring value(s).
764
+ #
765
+ # Can return multiple Values.
766
+ #
767
+ # @return [Expr]
768
+ #
769
+ # @example
770
+ # df = Polars::DataFrame.new(
771
+ # {
772
+ # "a" => [1, 1, 2, 3],
773
+ # "b" => [1, 1, 2, 2]
774
+ # }
775
+ # )
776
+ # df.select(Polars.all.mode)
777
+ # # =>
778
+ # # shape: (2, 2)
779
+ # # ┌─────┬─────┐
780
+ # # │ a ┆ b │
781
+ # # │ --- ┆ --- │
782
+ # # │ i64 ┆ i64 │
783
+ # # ╞═════╪═════╡
784
+ # # │ 1 ┆ 1 │
785
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
786
+ # # │ 1 ┆ 2 │
787
+ # # └─────┴─────┘
247
788
  def mode
248
789
  wrap_expr(_rbexpr.mode)
249
790
  end
@@ -265,10 +806,50 @@ module Polars
265
806
  wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
266
807
  end
267
808
 
809
+ # Get the index of the maximal value.
810
+ #
811
+ # @return [Expr]
812
+ #
813
+ # @example
814
+ # df = Polars::DataFrame.new(
815
+ # {
816
+ # "a" => [20, 10, 30]
817
+ # }
818
+ # )
819
+ # df.select(Polars.col("a").arg_max)
820
+ # # =>
821
+ # # shape: (1, 1)
822
+ # # ┌─────┐
823
+ # # │ a │
824
+ # # │ --- │
825
+ # # │ u32 │
826
+ # # ╞═════╡
827
+ # # │ 2 │
828
+ # # └─────┘
268
829
  def arg_max
269
830
  wrap_expr(_rbexpr.arg_max)
270
831
  end
271
832
 
833
+ # Get the index of the minimal value.
834
+ #
835
+ # @return [Expr]
836
+ #
837
+ # @example
838
+ # df = Polars::DataFrame.new(
839
+ # {
840
+ # "a" => [20, 10, 30]
841
+ # }
842
+ # )
843
+ # df.select(Polars.col("a").arg_min)
844
+ # # =>
845
+ # # shape: (1, 1)
846
+ # # ┌─────┐
847
+ # # │ a │
848
+ # # │ --- │
849
+ # # │ u32 │
850
+ # # ╞═════╡
851
+ # # │ 1 │
852
+ # # └─────┘
272
853
  def arg_min
273
854
  wrap_expr(_rbexpr.arg_min)
274
855
  end
@@ -290,10 +871,41 @@ module Polars
290
871
  wrap_expr(_rbexpr.sort_by(by, reverse))
291
872
  end
292
873
 
293
- # def take
294
- # end
295
-
296
- def shift(periods)
874
+ def take(indices)
875
+ if indices.is_a?(Array)
876
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
877
+ else
878
+ indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
879
+ end
880
+ wrap_expr(_rbexpr.take(indices_lit._rbexpr))
881
+ end
882
+
883
+ # Shift the values by a given period.
884
+ #
885
+ # @param periods [Integer]
886
+ # Number of places to shift (may be negative).
887
+ #
888
+ # @return [Expr]
889
+ #
890
+ # @example
891
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
892
+ # df.select(Polars.col("foo").shift(1))
893
+ # # =>
894
+ # # shape: (4, 1)
895
+ # # ┌──────┐
896
+ # # │ foo │
897
+ # # │ --- │
898
+ # # │ i64 │
899
+ # # ╞══════╡
900
+ # # │ null │
901
+ # # ├╌╌╌╌╌╌┤
902
+ # # │ 1 │
903
+ # # ├╌╌╌╌╌╌┤
904
+ # # │ 2 │
905
+ # # ├╌╌╌╌╌╌┤
906
+ # # │ 3 │
907
+ # # └──────┘
908
+ def shift(periods = 1)
297
909
  wrap_expr(_rbexpr.shift(periods))
298
910
  end
299
911
 
@@ -439,6 +1051,7 @@ module Polars
439
1051
  # def apply
440
1052
  # end
441
1053
 
1054
+ #
442
1055
  def flatten
443
1056
  wrap_expr(_rbexpr.explode)
444
1057
  end
@@ -471,6 +1084,7 @@ module Polars
471
1084
  # def is_in
472
1085
  # end
473
1086
 
1087
+ #
474
1088
  def repeat_by(by)
475
1089
  by = Utils.expr_to_lit_or_expr(by, false)
476
1090
  wrap_expr(_rbexpr.repeat_by(by._rbexpr))
@@ -482,6 +1096,7 @@ module Polars
482
1096
  # def _hash
483
1097
  # end
484
1098
 
1099
+ #
485
1100
  def reinterpret(signed: false)
486
1101
  wrap_expr(_rbexpr.reinterpret(signed))
487
1102
  end
@@ -489,6 +1104,7 @@ module Polars
489
1104
  # def _inspect
490
1105
  # end
491
1106
 
1107
+ #
492
1108
  def interpolate
493
1109
  wrap_expr(_rbexpr.interpolate)
494
1110
  end
@@ -520,6 +1136,7 @@ module Polars
520
1136
  # def rolling_apply
521
1137
  # end
522
1138
 
1139
+ #
523
1140
  def rolling_skew(window_size, bias: true)
524
1141
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
525
1142
  end
@@ -647,8 +1264,10 @@ module Polars
647
1264
  # def ewm_var
648
1265
  # end
649
1266
 
650
- # def extend_constant
651
- # end
1267
+ #
1268
+ def extend_constant(value, n)
1269
+ wrap_expr(_rbexpr.extend_constant(value, n))
1270
+ end
652
1271
 
653
1272
  def value_counts(multithreaded: false, sort: false)
654
1273
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
@@ -659,7 +1278,7 @@ module Polars
659
1278
  end
660
1279
 
661
1280
  def log(base = Math::E)
662
- wrap_expr(self._rbexpr.log(base))
1281
+ wrap_expr(_rbexpr.log(base))
663
1282
  end
664
1283
 
665
1284
  def entropy(base: 2, normalize: false)
@@ -672,6 +1291,7 @@ module Polars
672
1291
  # def set_sorted
673
1292
  # end
674
1293
 
1294
+ #
675
1295
  def list
676
1296
  wrap_expr(_rbexpr.list)
677
1297
  end