polars-df 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
data/lib/polars/expr.rb CHANGED
@@ -1,78 +1,139 @@
1
1
  module Polars
2
+ # Expressions that can be used in various contexts.
2
3
  class Expr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def self._from_rbexpr(rbexpr)
6
9
  expr = Expr.allocate
7
10
  expr._rbexpr = rbexpr
8
11
  expr
9
12
  end
10
13
 
14
+ # Returns a string representing the Expr.
15
+ #
16
+ # @return [String]
11
17
  def to_s
12
18
  _rbexpr.to_str
13
19
  end
14
20
  alias_method :inspect, :to_s
15
21
 
22
+ # Bitwise XOR.
23
+ #
24
+ # @return [Expr]
16
25
  def ^(other)
17
26
  wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
18
27
  end
19
28
 
29
+ # Bitwise AND.
30
+ #
31
+ # @return [Expr]
20
32
  def &(other)
21
33
  wrap_expr(_rbexpr._and(_to_rbexpr(other)))
22
34
  end
23
35
 
36
+ # Bitwise OR.
37
+ #
38
+ # @return [Expr]
24
39
  def |(other)
25
40
  wrap_expr(_rbexpr._or(_to_rbexpr(other)))
26
41
  end
27
42
 
43
+ # Performs addition.
44
+ #
45
+ # @return [Expr]
28
46
  def +(other)
29
47
  wrap_expr(_rbexpr + _to_rbexpr(other))
30
48
  end
31
49
 
50
+ # Performs subtraction.
51
+ #
52
+ # @return [Expr]
32
53
  def -(other)
33
54
  wrap_expr(_rbexpr - _to_rbexpr(other))
34
55
  end
35
56
 
57
+ # Performs multiplication.
58
+ #
59
+ # @return [Expr]
36
60
  def *(other)
37
61
  wrap_expr(_rbexpr * _to_rbexpr(other))
38
62
  end
39
63
 
64
+ # Performs division.
65
+ #
66
+ # @return [Expr]
40
67
  def /(other)
41
68
  wrap_expr(_rbexpr / _to_rbexpr(other))
42
69
  end
43
70
 
71
+ # Performs floor division.
72
+ #
73
+ # @return [Expr]
74
+ def floordiv(other)
75
+ wrap_expr(_rbexpr.floordiv(_to_rbexpr(other)))
76
+ end
77
+
78
+ # Returns the modulo.
79
+ #
80
+ # @return [Expr]
44
81
  def %(other)
45
82
  wrap_expr(_rbexpr % _to_rbexpr(other))
46
83
  end
47
84
 
85
+ # Raises to the power of exponent.
86
+ #
87
+ # @return [Expr]
48
88
  def **(power)
49
89
  pow(power)
50
90
  end
51
91
 
92
+ # Greater than or equal.
93
+ #
94
+ # @return [Expr]
52
95
  def >=(other)
53
96
  wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
54
97
  end
55
98
 
99
+ # Less than or equal.
100
+ #
101
+ # @return [Expr]
56
102
  def <=(other)
57
103
  wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
58
104
  end
59
105
 
106
+ # Equal.
107
+ #
108
+ # @return [Expr]
60
109
  def ==(other)
61
110
  wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
62
111
  end
63
112
 
113
+ # Not equal.
114
+ #
115
+ # @return [Expr]
64
116
  def !=(other)
65
117
  wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
66
118
  end
67
119
 
120
+ # Less than.
121
+ #
122
+ # @return [Expr]
68
123
  def <(other)
69
124
  wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
70
125
  end
71
126
 
127
+ # Greater than.
128
+ #
129
+ # @return [Expr]
72
130
  def >(other)
73
131
  wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
74
132
  end
75
133
 
134
+ # Performs negation.
135
+ #
136
+ # @return [Expr]
76
137
  def -@
77
138
  Utils.lit(0) - self
78
139
  end
@@ -80,22 +141,119 @@ module Polars
80
141
  # def to_physical
81
142
  # end
82
143
 
144
+ # Check if any boolean value in a Boolean column is `true`.
145
+ #
146
+ # @return [Boolean]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
150
+ # df.select(Polars.all.any)
151
+ # # =>
152
+ # # shape: (1, 2)
153
+ # # ┌──────┬───────┐
154
+ # # │ TF ┆ FF │
155
+ # # │ --- ┆ --- │
156
+ # # │ bool ┆ bool │
157
+ # # ╞══════╪═══════╡
158
+ # # │ true ┆ false │
159
+ # # └──────┴───────┘
83
160
  def any
84
161
  wrap_expr(_rbexpr.any)
85
162
  end
86
163
 
164
+ # Check if all boolean values in a Boolean column are `true`.
165
+ #
166
+ # This method is an expression - not to be confused with
167
+ # `Polars.all` which is a function to select all columns.
168
+ #
169
+ # @return [Boolean]
170
+ #
171
+ # @example
172
+ # df = Polars::DataFrame.new(
173
+ # {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
174
+ # )
175
+ # df.select(Polars.col("*").all)
176
+ # # =>
177
+ # # shape: (1, 3)
178
+ # # ┌──────┬───────┬───────┐
179
+ # # │ TT ┆ TF ┆ FF │
180
+ # # │ --- ┆ --- ┆ --- │
181
+ # # │ bool ┆ bool ┆ bool │
182
+ # # ╞══════╪═══════╪═══════╡
183
+ # # │ true ┆ false ┆ false │
184
+ # # └──────┴───────┴───────┘
87
185
  def all
88
186
  wrap_expr(_rbexpr.all)
89
187
  end
90
188
 
189
+ # Compute the square root of the elements.
190
+ #
191
+ # @return [Expr]
192
+ #
193
+ # @example
194
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
195
+ # df.select(Polars.col("values").sqrt)
196
+ # # =>
197
+ # # shape: (3, 1)
198
+ # # ┌──────────┐
199
+ # # │ values │
200
+ # # │ --- │
201
+ # # │ f64 │
202
+ # # ╞══════════╡
203
+ # # │ 1.0 │
204
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
205
+ # # │ 1.414214 │
206
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
207
+ # # │ 2.0 │
208
+ # # └──────────┘
91
209
  def sqrt
92
- self ** 0.5
93
- end
94
-
210
+ self**0.5
211
+ end
212
+
213
+ # Compute the base 10 logarithm of the input array, element-wise.
214
+ #
215
+ # @return [Expr]
216
+ #
217
+ # @example
218
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
219
+ # df.select(Polars.col("values").log10)
220
+ # # =>
221
+ # # shape: (3, 1)
222
+ # # ┌─────────┐
223
+ # # │ values │
224
+ # # │ --- │
225
+ # # │ f64 │
226
+ # # ╞═════════╡
227
+ # # │ 0.0 │
228
+ # # ├╌╌╌╌╌╌╌╌╌┤
229
+ # # │ 0.30103 │
230
+ # # ├╌╌╌╌╌╌╌╌╌┤
231
+ # # │ 0.60206 │
232
+ # # └─────────┘
95
233
  def log10
96
234
  log(10)
97
235
  end
98
236
 
237
+ # Compute the exponential, element-wise.
238
+ #
239
+ # @return [Expr]
240
+ #
241
+ # @example
242
+ # df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
243
+ # df.select(Polars.col("values").exp)
244
+ # # =>
245
+ # # shape: (3, 1)
246
+ # # ┌──────────┐
247
+ # # │ values │
248
+ # # │ --- │
249
+ # # │ f64 │
250
+ # # ╞══════════╡
251
+ # # │ 2.718282 │
252
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
253
+ # # │ 7.389056 │
254
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
255
+ # # │ 54.59815 │
256
+ # # └──────────┘
99
257
  def exp
100
258
  wrap_expr(_rbexpr.exp)
101
259
  end
@@ -104,7 +262,9 @@ module Polars
104
262
  wrap_expr(_rbexpr._alias(name))
105
263
  end
106
264
 
107
- # TODO support symbols
265
+ # TODO support symbols for exclude
266
+
267
+ #
108
268
  def exclude(columns)
109
269
  if columns.is_a?(String)
110
270
  columns = [columns]
@@ -140,22 +300,166 @@ module Polars
140
300
  # def map_alias
141
301
  # end
142
302
 
303
+ # Negate a boolean expression.
304
+ #
305
+ # @return [Expr]
306
+ #
307
+ # @example
308
+ # df = Polars::DataFrame.new(
309
+ # {
310
+ # "a" => [true, false, false],
311
+ # "b" => ["a", "b", nil]
312
+ # }
313
+ # )
314
+ # # =>
315
+ # # shape: (3, 2)
316
+ # # ┌───────┬──────┐
317
+ # # │ a ┆ b │
318
+ # # │ --- ┆ --- │
319
+ # # │ bool ┆ str │
320
+ # # ╞═══════╪══════╡
321
+ # # │ true ┆ a │
322
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
323
+ # # │ false ┆ b │
324
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
325
+ # # │ false ┆ null │
326
+ # # └───────┴──────┘
327
+ #
328
+ # @example
329
+ # df.select(Polars.col("a").is_not)
330
+ # # =>
331
+ # # shape: (3, 1)
332
+ # # ┌───────┐
333
+ # # │ a │
334
+ # # │ --- │
335
+ # # │ bool │
336
+ # # ╞═══════╡
337
+ # # │ false │
338
+ # # ├╌╌╌╌╌╌╌┤
339
+ # # │ true │
340
+ # # ├╌╌╌╌╌╌╌┤
341
+ # # │ true │
342
+ # # └───────┘
143
343
  def is_not
144
344
  wrap_expr(_rbexpr.is_not)
145
345
  end
146
346
 
347
+ # Returns a boolean Series indicating which values are null.
348
+ #
349
+ # @return [Expr]
350
+ #
351
+ # @example
352
+ # df = Polars::DataFrame.new(
353
+ # {
354
+ # "a" => [1, 2, nil, 1, 5],
355
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
356
+ # }
357
+ # )
358
+ # df.with_column(Polars.all.is_null.suffix("_isnull"))
359
+ # # =>
360
+ # # shape: (5, 4)
361
+ # # ┌──────┬─────┬──────────┬──────────┐
362
+ # # │ a ┆ b ┆ a_isnull ┆ b_isnull │
363
+ # # │ --- ┆ --- ┆ --- ┆ --- │
364
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
365
+ # # ╞══════╪═════╪══════════╪══════════╡
366
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
367
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
368
+ # # │ 2 ┆ 2.0 ┆ false ┆ false │
369
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
370
+ # # │ null ┆ NaN ┆ true ┆ false │
371
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
372
+ # # │ 1 ┆ 1.0 ┆ false ┆ false │
373
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
374
+ # # │ 5 ┆ 5.0 ┆ false ┆ false │
375
+ # # └──────┴─────┴──────────┴──────────┘
147
376
  def is_null
148
377
  wrap_expr(_rbexpr.is_null)
149
378
  end
150
379
 
380
+ # Returns a boolean Series indicating which values are not null.
381
+ #
382
+ # @return [Expr]
383
+ #
384
+ # @example
385
+ # df = Polars::DataFrame.new(
386
+ # {
387
+ # "a" => [1, 2, nil, 1, 5],
388
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
389
+ # }
390
+ # )
391
+ # df.with_column(Polars.all.is_not_null.suffix("_not_null"))
392
+ # # =>
393
+ # # shape: (5, 4)
394
+ # # ┌──────┬─────┬────────────┬────────────┐
395
+ # # │ a ┆ b ┆ a_not_null ┆ b_not_null │
396
+ # # │ --- ┆ --- ┆ --- ┆ --- │
397
+ # # │ i64 ┆ f64 ┆ bool ┆ bool │
398
+ # # ╞══════╪═════╪════════════╪════════════╡
399
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
400
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
401
+ # # │ 2 ┆ 2.0 ┆ true ┆ true │
402
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
403
+ # # │ null ┆ NaN ┆ false ┆ true │
404
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
405
+ # # │ 1 ┆ 1.0 ┆ true ┆ true │
406
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
407
+ # # │ 5 ┆ 5.0 ┆ true ┆ true │
408
+ # # └──────┴─────┴────────────┴────────────┘
151
409
  def is_not_null
152
410
  wrap_expr(_rbexpr.is_not_null)
153
411
  end
154
412
 
413
+ # Returns a boolean Series indicating which values are finite.
414
+ #
415
+ # @return [Expr]
416
+ #
417
+ # @example
418
+ # df = Polars::DataFrame.new(
419
+ # {
420
+ # "A" => [1.0, 2],
421
+ # "B" => [3.0, Float::INFINITY]
422
+ # }
423
+ # )
424
+ # df.select(Polars.all.is_finite)
425
+ # # =>
426
+ # # shape: (2, 2)
427
+ # # ┌──────┬───────┐
428
+ # # │ A ┆ B │
429
+ # # │ --- ┆ --- │
430
+ # # │ bool ┆ bool │
431
+ # # ╞══════╪═══════╡
432
+ # # │ true ┆ true │
433
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
434
+ # # │ true ┆ false │
435
+ # # └──────┴───────┘
155
436
  def is_finite
156
437
  wrap_expr(_rbexpr.is_finite)
157
438
  end
158
439
 
440
+ # Returns a boolean Series indicating which values are infinite.
441
+ #
442
+ # @return [Expr]
443
+ #
444
+ # @example
445
+ # df = Polars::DataFrame.new(
446
+ # {
447
+ # "A" => [1.0, 2],
448
+ # "B" => [3.0, Float::INFINITY]
449
+ # }
450
+ # )
451
+ # df.select(Polars.all.is_infinite)
452
+ # # =>
453
+ # # shape: (2, 2)
454
+ # # ┌───────┬───────┐
455
+ # # │ A ┆ B │
456
+ # # │ --- ┆ --- │
457
+ # # │ bool ┆ bool │
458
+ # # ╞═══════╪═══════╡
459
+ # # │ false ┆ false │
460
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
461
+ # # │ false ┆ true │
462
+ # # └───────┴───────┘
159
463
  def is_infinite
160
464
  wrap_expr(_rbexpr.is_infinite)
161
465
  end
@@ -172,14 +476,77 @@ module Polars
172
476
  wrap_expr(_rbexpr.agg_groups)
173
477
  end
174
478
 
479
+ # Count the number of values in this expression.
480
+ #
481
+ # @return [Expr]
482
+ #
483
+ # @example
484
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
485
+ # df.select(Polars.all.count)
486
+ # # =>
487
+ # # shape: (1, 2)
488
+ # # ┌─────┬─────┐
489
+ # # │ a ┆ b │
490
+ # # │ --- ┆ --- │
491
+ # # │ u32 ┆ u32 │
492
+ # # ╞═════╪═════╡
493
+ # # │ 3 ┆ 3 │
494
+ # # └─────┴─────┘
175
495
  def count
176
496
  wrap_expr(_rbexpr.count)
177
497
  end
178
498
 
499
+ # Count the number of values in this expression.
500
+ #
501
+ # Alias for {#count}.
502
+ #
503
+ # @return [Expr]
504
+ #
505
+ # @example
506
+ # df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
507
+ # df.select(Polars.all.len)
508
+ # # =>
509
+ # # shape: (1, 2)
510
+ # # ┌─────┬─────┐
511
+ # # │ a ┆ b │
512
+ # # │ --- ┆ --- │
513
+ # # │ u32 ┆ u32 │
514
+ # # ╞═════╪═════╡
515
+ # # │ 3 ┆ 3 │
516
+ # # └─────┴─────┘
179
517
  def len
180
518
  count
181
519
  end
182
520
 
521
+ # Get a slice of this expression.
522
+ #
523
+ # @param offset [Integer]
524
+ # Start index. Negative indexing is supported.
525
+ # @param length [Integer]
526
+ # Length of the slice. If set to `nil`, all rows starting at the offset
527
+ # will be selected.
528
+ #
529
+ # @return [Expr]
530
+ #
531
+ # @example
532
+ # df = Polars::DataFrame.new(
533
+ # {
534
+ # "a" => [8, 9, 10, 11],
535
+ # "b" => [nil, 4, 4, 4]
536
+ # }
537
+ # )
538
+ # df.select(Polars.all.slice(1, 2))
539
+ # # =>
540
+ # # shape: (2, 2)
541
+ # # ┌─────┬─────┐
542
+ # # │ a ┆ b │
543
+ # # │ --- ┆ --- │
544
+ # # │ i64 ┆ i64 │
545
+ # # ╞═════╪═════╡
546
+ # # │ 9 ┆ 4 │
547
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
548
+ # # │ 10 ┆ 4 │
549
+ # # └─────┴─────┘
183
550
  def slice(offset, length = nil)
184
551
  if !offset.is_a?(Expr)
185
552
  offset = Polars.lit(offset)
@@ -195,14 +562,90 @@ module Polars
195
562
  wrap_expr(_rbexpr.append(other._rbexpr, upcast))
196
563
  end
197
564
 
565
+ # Create a single chunk of memory for this Series.
566
+ #
567
+ # @return [Expr]
568
+ #
569
+ # @example Create a Series with 3 nulls, append column a then rechunk
570
+ # df = Polars::DataFrame.new({"a": [1, 1, 2]})
571
+ # df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
572
+ # # =>
573
+ # # shape: (6, 1)
574
+ # # ┌─────────┐
575
+ # # │ literal │
576
+ # # │ --- │
577
+ # # │ i64 │
578
+ # # ╞═════════╡
579
+ # # │ null │
580
+ # # ├╌╌╌╌╌╌╌╌╌┤
581
+ # # │ null │
582
+ # # ├╌╌╌╌╌╌╌╌╌┤
583
+ # # │ null │
584
+ # # ├╌╌╌╌╌╌╌╌╌┤
585
+ # # │ 1 │
586
+ # # ├╌╌╌╌╌╌╌╌╌┤
587
+ # # │ 1 │
588
+ # # ├╌╌╌╌╌╌╌╌╌┤
589
+ # # │ 2 │
590
+ # # └─────────┘
198
591
  def rechunk
199
592
  wrap_expr(_rbexpr.rechunk)
200
593
  end
201
594
 
595
+ # Drop null values.
596
+ #
597
+ # @return [Expr]
598
+ #
599
+ # @example
600
+ # df = Polars::DataFrame.new(
601
+ # {
602
+ # "a" => [8, 9, 10, 11],
603
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
604
+ # }
605
+ # )
606
+ # df.select(Polars.col("b").drop_nulls)
607
+ # # =>
608
+ # # shape: (3, 1)
609
+ # # ┌─────┐
610
+ # # │ b │
611
+ # # │ --- │
612
+ # # │ f64 │
613
+ # # ╞═════╡
614
+ # # │ 4.0 │
615
+ # # ├╌╌╌╌╌┤
616
+ # # │ 4.0 │
617
+ # # ├╌╌╌╌╌┤
618
+ # # │ NaN │
619
+ # # └─────┘
202
620
  def drop_nulls
203
621
  wrap_expr(_rbexpr.drop_nulls)
204
622
  end
205
623
 
624
+ # Drop floating point NaN values.
625
+ #
626
+ # @return [Expr]
627
+ #
628
+ # @example
629
+ # df = Polars::DataFrame.new(
630
+ # {
631
+ # "a" => [8, 9, 10, 11],
632
+ # "b" => [nil, 4.0, 4.0, Float::NAN]
633
+ # }
634
+ # )
635
+ # df.select(Polars.col("b").drop_nans)
636
+ # # =>
637
+ # # shape: (3, 1)
638
+ # # ┌──────┐
639
+ # # │ b │
640
+ # # │ --- │
641
+ # # │ f64 │
642
+ # # ╞══════╡
643
+ # # │ null │
644
+ # # ├╌╌╌╌╌╌┤
645
+ # # │ 4.0 │
646
+ # # ├╌╌╌╌╌╌┤
647
+ # # │ 4.0 │
648
+ # # └──────┘
206
649
  def drop_nans
207
650
  wrap_expr(_rbexpr.drop_nans)
208
651
  end
@@ -227,14 +670,87 @@ module Polars
227
670
  wrap_expr(_rbexpr.cumcount(reverse))
228
671
  end
229
672
 
673
+ # Rounds down to the nearest integer value.
674
+ #
675
+ # Only works on floating point Series.
676
+ #
677
+ # @return [Expr]
678
+ #
679
+ # @example
680
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
681
+ # df.select(Polars.col("a").floor)
682
+ # # =>
683
+ # # shape: (4, 1)
684
+ # # ┌─────┐
685
+ # # │ a │
686
+ # # │ --- │
687
+ # # │ f64 │
688
+ # # ╞═════╡
689
+ # # │ 0.0 │
690
+ # # ├╌╌╌╌╌┤
691
+ # # │ 0.0 │
692
+ # # ├╌╌╌╌╌┤
693
+ # # │ 1.0 │
694
+ # # ├╌╌╌╌╌┤
695
+ # # │ 1.0 │
696
+ # # └─────┘
230
697
  def floor
231
698
  wrap_expr(_rbexpr.floor)
232
699
  end
233
700
 
701
+ # Rounds up to the nearest integer value.
702
+ #
703
+ # Only works on floating point Series.
704
+ #
705
+ # @return [Expr]
706
+ #
707
+ # @example
708
+ # df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
709
+ # df.select(Polars.col("a").ceil)
710
+ # # =>
711
+ # # shape: (4, 1)
712
+ # # ┌─────┐
713
+ # # │ a │
714
+ # # │ --- │
715
+ # # │ f64 │
716
+ # # ╞═════╡
717
+ # # │ 1.0 │
718
+ # # ├╌╌╌╌╌┤
719
+ # # │ 1.0 │
720
+ # # ├╌╌╌╌╌┤
721
+ # # │ 1.0 │
722
+ # # ├╌╌╌╌╌┤
723
+ # # │ 2.0 │
724
+ # # └─────┘
234
725
  def ceil
235
726
  wrap_expr(_rbexpr.ceil)
236
727
  end
237
728
 
729
+ # Round underlying floating point data by `decimals` digits.
730
+ #
731
+ # @param decimals [Integer]
732
+ # Number of decimals to round by.
733
+ #
734
+ # @return [Expr]
735
+ #
736
+ # @example
737
+ # df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
738
+ # df.select(Polars.col("a").round(1))
739
+ # # =>
740
+ # # shape: (4, 1)
741
+ # # ┌─────┐
742
+ # # │ a │
743
+ # # │ --- │
744
+ # # │ f64 │
745
+ # # ╞═════╡
746
+ # # │ 0.3 │
747
+ # # ├╌╌╌╌╌┤
748
+ # # │ 0.5 │
749
+ # # ├╌╌╌╌╌┤
750
+ # # │ 1.0 │
751
+ # # ├╌╌╌╌╌┤
752
+ # # │ 1.2 │
753
+ # # └─────┘
238
754
  def round(decimals = 0)
239
755
  wrap_expr(_rbexpr.round(decimals))
240
756
  end
@@ -244,6 +760,31 @@ module Polars
244
760
  wrap_expr(_rbexpr.dot(other._rbexpr))
245
761
  end
246
762
 
763
+ # Compute the most occurring value(s).
764
+ #
765
+ # Can return multiple Values.
766
+ #
767
+ # @return [Expr]
768
+ #
769
+ # @example
770
+ # df = Polars::DataFrame.new(
771
+ # {
772
+ # "a" => [1, 1, 2, 3],
773
+ # "b" => [1, 1, 2, 2]
774
+ # }
775
+ # )
776
+ # df.select(Polars.all.mode)
777
+ # # =>
778
+ # # shape: (2, 2)
779
+ # # ┌─────┬─────┐
780
+ # # │ a ┆ b │
781
+ # # │ --- ┆ --- │
782
+ # # │ i64 ┆ i64 │
783
+ # # ╞═════╪═════╡
784
+ # # │ 1 ┆ 1 │
785
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
786
+ # # │ 1 ┆ 2 │
787
+ # # └─────┴─────┘
247
788
  def mode
248
789
  wrap_expr(_rbexpr.mode)
249
790
  end
@@ -265,10 +806,50 @@ module Polars
265
806
  wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
266
807
  end
267
808
 
809
+ # Get the index of the maximal value.
810
+ #
811
+ # @return [Expr]
812
+ #
813
+ # @example
814
+ # df = Polars::DataFrame.new(
815
+ # {
816
+ # "a" => [20, 10, 30]
817
+ # }
818
+ # )
819
+ # df.select(Polars.col("a").arg_max)
820
+ # # =>
821
+ # # shape: (1, 1)
822
+ # # ┌─────┐
823
+ # # │ a │
824
+ # # │ --- │
825
+ # # │ u32 │
826
+ # # ╞═════╡
827
+ # # │ 2 │
828
+ # # └─────┘
268
829
  def arg_max
269
830
  wrap_expr(_rbexpr.arg_max)
270
831
  end
271
832
 
833
+ # Get the index of the minimal value.
834
+ #
835
+ # @return [Expr]
836
+ #
837
+ # @example
838
+ # df = Polars::DataFrame.new(
839
+ # {
840
+ # "a" => [20, 10, 30]
841
+ # }
842
+ # )
843
+ # df.select(Polars.col("a").arg_min)
844
+ # # =>
845
+ # # shape: (1, 1)
846
+ # # ┌─────┐
847
+ # # │ a │
848
+ # # │ --- │
849
+ # # │ u32 │
850
+ # # ╞═════╡
851
+ # # │ 1 │
852
+ # # └─────┘
272
853
  def arg_min
273
854
  wrap_expr(_rbexpr.arg_min)
274
855
  end
@@ -290,10 +871,41 @@ module Polars
290
871
  wrap_expr(_rbexpr.sort_by(by, reverse))
291
872
  end
292
873
 
293
- # def take
294
- # end
295
-
296
- def shift(periods)
874
+ def take(indices)
875
+ if indices.is_a?(Array)
876
+ indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
877
+ else
878
+ indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
879
+ end
880
+ wrap_expr(_rbexpr.take(indices_lit._rbexpr))
881
+ end
882
+
883
+ # Shift the values by a given period.
884
+ #
885
+ # @param periods [Integer]
886
+ # Number of places to shift (may be negative).
887
+ #
888
+ # @return [Expr]
889
+ #
890
+ # @example
891
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
892
+ # df.select(Polars.col("foo").shift(1))
893
+ # # =>
894
+ # # shape: (4, 1)
895
+ # # ┌──────┐
896
+ # # │ foo │
897
+ # # │ --- │
898
+ # # │ i64 │
899
+ # # ╞══════╡
900
+ # # │ null │
901
+ # # ├╌╌╌╌╌╌┤
902
+ # # │ 1 │
903
+ # # ├╌╌╌╌╌╌┤
904
+ # # │ 2 │
905
+ # # ├╌╌╌╌╌╌┤
906
+ # # │ 3 │
907
+ # # └──────┘
908
+ def shift(periods = 1)
297
909
  wrap_expr(_rbexpr.shift(periods))
298
910
  end
299
911
 
@@ -439,6 +1051,7 @@ module Polars
439
1051
  # def apply
440
1052
  # end
441
1053
 
1054
+ #
442
1055
  def flatten
443
1056
  wrap_expr(_rbexpr.explode)
444
1057
  end
@@ -471,6 +1084,7 @@ module Polars
471
1084
  # def is_in
472
1085
  # end
473
1086
 
1087
+ #
474
1088
  def repeat_by(by)
475
1089
  by = Utils.expr_to_lit_or_expr(by, false)
476
1090
  wrap_expr(_rbexpr.repeat_by(by._rbexpr))
@@ -482,6 +1096,7 @@ module Polars
482
1096
  # def _hash
483
1097
  # end
484
1098
 
1099
+ #
485
1100
  def reinterpret(signed: false)
486
1101
  wrap_expr(_rbexpr.reinterpret(signed))
487
1102
  end
@@ -489,6 +1104,7 @@ module Polars
489
1104
  # def _inspect
490
1105
  # end
491
1106
 
1107
+ #
492
1108
  def interpolate
493
1109
  wrap_expr(_rbexpr.interpolate)
494
1110
  end
@@ -520,6 +1136,7 @@ module Polars
520
1136
  # def rolling_apply
521
1137
  # end
522
1138
 
1139
+ #
523
1140
  def rolling_skew(window_size, bias: true)
524
1141
  wrap_expr(_rbexpr.rolling_skew(window_size, bias))
525
1142
  end
@@ -647,8 +1264,10 @@ module Polars
647
1264
  # def ewm_var
648
1265
  # end
649
1266
 
650
- # def extend_constant
651
- # end
1267
+ #
1268
+ def extend_constant(value, n)
1269
+ wrap_expr(_rbexpr.extend_constant(value, n))
1270
+ end
652
1271
 
653
1272
  def value_counts(multithreaded: false, sort: false)
654
1273
  wrap_expr(_rbexpr.value_counts(multithreaded, sort))
@@ -659,7 +1278,7 @@ module Polars
659
1278
  end
660
1279
 
661
1280
  def log(base = Math::E)
662
- wrap_expr(self._rbexpr.log(base))
1281
+ wrap_expr(_rbexpr.log(base))
663
1282
  end
664
1283
 
665
1284
  def entropy(base: 2, normalize: false)
@@ -672,6 +1291,7 @@ module Polars
672
1291
  # def set_sorted
673
1292
  # end
674
1293
 
1294
+ #
675
1295
  def list
676
1296
  wrap_expr(_rbexpr.list)
677
1297
  end