polars-df 0.2.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38828 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.so +0 -0
  10. data/lib/polars/3.1/polars.so +0 -0
  11. data/lib/polars/3.2/polars.so +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,4833 @@
1
+ module Polars
2
+ # Two-dimensional data structure representing data as a table with rows and columns.
3
+ class DataFrame
4
+ # @private
5
+ attr_accessor :_df
6
+
7
+ # Create a new DataFrame.
8
+ #
9
+ # @param data [Hash, Array, Series, nil]
10
+ # Two-dimensional data in various forms. Hash must contain Arrays.
11
+ # Array may contain Series.
12
+ # @param columns [Array, Hash, nil]
13
+ # Column labels to use for resulting DataFrame. If specified, overrides any
14
+ # labels already present in the data. Must match data dimensions.
15
+ # @param orient ["col", "row", nil]
16
+ # Whether to interpret two-dimensional data as columns or as rows. If `nil`,
17
+ # the orientation is inferred by matching the columns and data dimensions. If
18
+ # this does not yield conclusive results, column orientation is used.
19
+ def initialize(data = nil, columns: nil, orient: nil)
20
+ if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
21
+ result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
22
+ data = {}
23
+ result.columns.each_with_index do |k, i|
24
+ data[k] = result.rows.map { |r| r[i] }
25
+ end
26
+ end
27
+
28
+ if data.nil?
29
+ self._df = self.class.hash_to_rbdf({}, columns: columns)
30
+ elsif data.is_a?(Hash)
31
+ data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
32
+ self._df = self.class.hash_to_rbdf(data, columns: columns)
33
+ elsif data.is_a?(Array)
34
+ self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
35
+ elsif data.is_a?(Series)
36
+ self._df = self.class.series_to_rbdf(data, columns: columns)
37
+ else
38
+ raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
39
+ end
40
+ end
41
+
42
+ # @private
43
+ def self._from_rbdf(rb_df)
44
+ df = DataFrame.allocate
45
+ df._df = rb_df
46
+ df
47
+ end
48
+
49
+ # @private
50
+ def self._from_hashes(data, infer_schema_length: 100, schema: nil)
51
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
52
+ _from_rbdf(rbdf)
53
+ end
54
+
55
+ # @private
56
+ def self._from_hash(data, columns: nil)
57
+ _from_rbdf(hash_to_rbdf(data, columns: columns))
58
+ end
59
+
60
+ # def self._from_records
61
+ # end
62
+
63
+ # def self._from_numo
64
+ # end
65
+
66
+ # no self._from_arrow
67
+
68
+ # no self._from_pandas
69
+
70
+ # @private
71
+ def self._read_csv(
72
+ file,
73
+ has_header: true,
74
+ columns: nil,
75
+ sep: str = ",",
76
+ comment_char: nil,
77
+ quote_char: '"',
78
+ skip_rows: 0,
79
+ dtypes: nil,
80
+ null_values: nil,
81
+ ignore_errors: false,
82
+ parse_dates: false,
83
+ n_threads: nil,
84
+ infer_schema_length: 100,
85
+ batch_size: 8192,
86
+ n_rows: nil,
87
+ encoding: "utf8",
88
+ low_memory: false,
89
+ rechunk: true,
90
+ skip_rows_after_header: 0,
91
+ row_count_name: nil,
92
+ row_count_offset: 0,
93
+ sample_size: 1024,
94
+ eol_char: "\n"
95
+ )
96
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
97
+ path = Utils.format_path(file)
98
+ else
99
+ path = nil
100
+ # if defined?(StringIO) && file.is_a?(StringIO)
101
+ # file = file.string
102
+ # end
103
+ end
104
+
105
+ dtype_list = nil
106
+ dtype_slice = nil
107
+ if !dtypes.nil?
108
+ if dtypes.is_a?(Hash)
109
+ dtype_list = []
110
+ dtypes.each do|k, v|
111
+ dtype_list << [k, Utils.rb_type_to_dtype(v)]
112
+ end
113
+ elsif dtypes.is_a?(Array)
114
+ dtype_slice = dtypes
115
+ else
116
+ raise ArgumentError, "dtype arg should be list or dict"
117
+ end
118
+ end
119
+
120
+ processed_null_values = Utils._process_null_values(null_values)
121
+
122
+ if columns.is_a?(String)
123
+ columns = [columns]
124
+ end
125
+ if file.is_a?(String) && file.include?("*")
126
+ raise Todo
127
+ end
128
+
129
+ projection, columns = Utils.handle_projection_columns(columns)
130
+
131
+ _from_rbdf(
132
+ RbDataFrame.read_csv(
133
+ file,
134
+ infer_schema_length,
135
+ batch_size,
136
+ has_header,
137
+ ignore_errors,
138
+ n_rows,
139
+ skip_rows,
140
+ projection,
141
+ sep,
142
+ rechunk,
143
+ columns,
144
+ encoding,
145
+ n_threads,
146
+ path,
147
+ dtype_list,
148
+ dtype_slice,
149
+ low_memory,
150
+ comment_char,
151
+ quote_char,
152
+ processed_null_values,
153
+ parse_dates,
154
+ skip_rows_after_header,
155
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
156
+ sample_size,
157
+ eol_char
158
+ )
159
+ )
160
+ end
161
+
162
+ # @private
163
+ def self._read_parquet(
164
+ file,
165
+ columns: nil,
166
+ n_rows: nil,
167
+ parallel: "auto",
168
+ row_count_name: nil,
169
+ row_count_offset: 0,
170
+ low_memory: false
171
+ )
172
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
173
+ file = Utils.format_path(file)
174
+ end
175
+
176
+ if file.is_a?(String) && file.include?("*")
177
+ raise Todo
178
+ end
179
+
180
+ projection, columns = Utils.handle_projection_columns(columns)
181
+ _from_rbdf(
182
+ RbDataFrame.read_parquet(
183
+ file,
184
+ columns,
185
+ projection,
186
+ n_rows,
187
+ parallel,
188
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
189
+ low_memory
190
+ )
191
+ )
192
+ end
193
+
194
+ # @private
195
+ def self._read_avro(file, columns: nil, n_rows: nil)
196
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
197
+ file = Utils.format_path(file)
198
+ end
199
+ projection, columns = Utils.handle_projection_columns(columns)
200
+ _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
201
+ end
202
+
203
+ # @private
204
+ def self._read_ipc(
205
+ file,
206
+ columns: nil,
207
+ n_rows: nil,
208
+ row_count_name: nil,
209
+ row_count_offset: 0,
210
+ rechunk: true,
211
+ memory_map: true
212
+ )
213
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
214
+ file = Utils.format_path(file)
215
+ end
216
+ if columns.is_a?(String)
217
+ columns = [columns]
218
+ end
219
+
220
+ if file.is_a?(String) && file.include?("*")
221
+ raise Todo
222
+ end
223
+
224
+ projection, columns = Utils.handle_projection_columns(columns)
225
+ _from_rbdf(
226
+ RbDataFrame.read_ipc(
227
+ file,
228
+ columns,
229
+ projection,
230
+ n_rows,
231
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
232
+ memory_map
233
+ )
234
+ )
235
+ end
236
+
237
+ # @private
238
+ def self._read_json(file)
239
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
240
+ file = Utils.format_path(file)
241
+ end
242
+
243
+ _from_rbdf(RbDataFrame.read_json(file))
244
+ end
245
+
246
+ # @private
247
+ def self._read_ndjson(file)
248
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
249
+ file = Utils.format_path(file)
250
+ end
251
+
252
+ _from_rbdf(RbDataFrame.read_ndjson(file))
253
+ end
254
+
255
+ # Get the shape of the DataFrame.
256
+ #
257
+ # @return [Array]
258
+ #
259
+ # @example
260
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5]})
261
+ # df.shape
262
+ # # => [5, 1]
263
+ def shape
264
+ _df.shape
265
+ end
266
+
267
+ # Get the height of the DataFrame.
268
+ #
269
+ # @return [Integer]
270
+ #
271
+ # @example
272
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5]})
273
+ # df.height
274
+ # # => 5
275
+ def height
276
+ _df.height
277
+ end
278
+
279
+ # Get the width of the DataFrame.
280
+ #
281
+ # @return [Integer]
282
+ #
283
+ # @example
284
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5]})
285
+ # df.width
286
+ # # => 1
287
+ def width
288
+ _df.width
289
+ end
290
+
291
+ # Get column names.
292
+ #
293
+ # @return [Array]
294
+ #
295
+ # @example
296
+ # df = Polars::DataFrame.new(
297
+ # {
298
+ # "foo" => [1, 2, 3],
299
+ # "bar" => [6, 7, 8],
300
+ # "ham" => ["a", "b", "c"]
301
+ # }
302
+ # )
303
+ # df.columns
304
+ # # => ["foo", "bar", "ham"]
305
+ def columns
306
+ _df.columns
307
+ end
308
+
309
+ # Change the column names of the DataFrame.
310
+ #
311
+ # @param columns [Array]
312
+ # A list with new names for the DataFrame.
313
+ # The length of the list should be equal to the width of the DataFrame.
314
+ #
315
+ # @return [Object]
316
+ #
317
+ # @example
318
+ # df = Polars::DataFrame.new(
319
+ # {
320
+ # "foo" => [1, 2, 3],
321
+ # "bar" => [6, 7, 8],
322
+ # "ham" => ["a", "b", "c"]
323
+ # }
324
+ # )
325
+ # df.columns = ["apple", "banana", "orange"]
326
+ # df
327
+ # # =>
328
+ # # shape: (3, 3)
329
+ # # ┌───────┬────────┬────────┐
330
+ # # │ apple ┆ banana ┆ orange │
331
+ # # │ --- ┆ --- ┆ --- │
332
+ # # │ i64 ┆ i64 ┆ str │
333
+ # # ╞═══════╪════════╪════════╡
334
+ # # │ 1 ┆ 6 ┆ a │
335
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
336
+ # # │ 2 ┆ 7 ┆ b │
337
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
338
+ # # │ 3 ┆ 8 ┆ c │
339
+ # # └───────┴────────┴────────┘
340
+ def columns=(columns)
341
+ _df.set_column_names(columns)
342
+ end
343
+
344
+ # Get dtypes of columns in DataFrame. Dtypes can also be found in column headers when printing the DataFrame.
345
+ #
346
+ # @return [Array]
347
+ #
348
+ # @example
349
+ # df = Polars::DataFrame.new(
350
+ # {
351
+ # "foo" => [1, 2, 3],
352
+ # "bar" => [6.0, 7.0, 8.0],
353
+ # "ham" => ["a", "b", "c"]
354
+ # }
355
+ # )
356
+ # df.dtypes
357
+ # # => [Polars::Int64, Polars::Float64, Polars::Utf8]
358
+ def dtypes
359
+ _df.dtypes
360
+ end
361
+
362
+ # Get the schema.
363
+ #
364
+ # @return [Hash]
365
+ #
366
+ # @example
367
+ # df = Polars::DataFrame.new(
368
+ # {
369
+ # "foo" => [1, 2, 3],
370
+ # "bar" => [6.0, 7.0, 8.0],
371
+ # "ham" => ["a", "b", "c"]
372
+ # }
373
+ # )
374
+ # df.schema
375
+ # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
376
+ def schema
377
+ columns.zip(dtypes).to_h
378
+ end
379
+
380
+ # Equal.
381
+ #
382
+ # @return [DataFrame]
383
+ def ==(other)
384
+ _comp(other, "eq")
385
+ end
386
+
387
+ # Not equal.
388
+ #
389
+ # @return [DataFrame]
390
+ def !=(other)
391
+ _comp(other, "neq")
392
+ end
393
+
394
+ # Greater than.
395
+ #
396
+ # @return [DataFrame]
397
+ def >(other)
398
+ _comp(other, "gt")
399
+ end
400
+
401
+ # Less than.
402
+ #
403
+ # @return [DataFrame]
404
+ def <(other)
405
+ _comp(other, "lt")
406
+ end
407
+
408
+ # Greater than or equal.
409
+ #
410
+ # @return [DataFrame]
411
+ def >=(other)
412
+ _comp(other, "gt_eq")
413
+ end
414
+
415
+ # Less than or equal.
416
+ #
417
+ # @return [DataFrame]
418
+ def <=(other)
419
+ _comp(other, "lt_eq")
420
+ end
421
+
422
+ # Performs multiplication.
423
+ #
424
+ # @return [DataFrame]
425
+ def *(other)
426
+ if other.is_a?(DataFrame)
427
+ return _from_rbdf(_df.mul_df(other._df))
428
+ end
429
+
430
+ other = _prepare_other_arg(other)
431
+ _from_rbdf(_df.mul(other._s))
432
+ end
433
+
434
+ # Performs division.
435
+ #
436
+ # @return [DataFrame]
437
+ def /(other)
438
+ if other.is_a?(DataFrame)
439
+ return _from_rbdf(_df.div_df(other._df))
440
+ end
441
+
442
+ other = _prepare_other_arg(other)
443
+ _from_rbdf(_df.div(other._s))
444
+ end
445
+
446
+ # Performs addition.
447
+ #
448
+ # @return [DataFrame]
449
+ def +(other)
450
+ if other.is_a?(DataFrame)
451
+ return _from_rbdf(_df.add_df(other._df))
452
+ end
453
+
454
+ other = _prepare_other_arg(other)
455
+ _from_rbdf(_df.add(other._s))
456
+ end
457
+
458
+ # Performs subtraction.
459
+ #
460
+ # @return [DataFrame]
461
+ def -(other)
462
+ if other.is_a?(DataFrame)
463
+ return _from_rbdf(_df.sub_df(other._df))
464
+ end
465
+
466
+ other = _prepare_other_arg(other)
467
+ _from_rbdf(_df.sub(other._s))
468
+ end
469
+
470
+ # Returns the modulo.
471
+ #
472
+ # @return [DataFrame]
473
+ def %(other)
474
+ if other.is_a?(DataFrame)
475
+ return _from_rbdf(_df.rem_df(other._df))
476
+ end
477
+
478
+ other = _prepare_other_arg(other)
479
+ _from_rbdf(_df.rem(other._s))
480
+ end
481
+
482
+ # Returns a string representing the DataFrame.
483
+ #
484
+ # @return [String]
485
+ def to_s
486
+ _df.to_s
487
+ end
488
+ alias_method :inspect, :to_s
489
+
490
+ # Check if DataFrame includes column.
491
+ #
492
+ # @return [Boolean]
493
+ def include?(name)
494
+ columns.include?(name)
495
+ end
496
+
497
+ # def each
498
+ # end
499
+
500
+ # Returns subset of the DataFrame.
501
+ #
502
+ # @return [Object]
503
+ def [](*args)
504
+ if args.size == 2
505
+ row_selection, col_selection = args
506
+
507
+ # df[.., unknown]
508
+ if row_selection.is_a?(Range)
509
+
510
+ # multiple slices
511
+ # df[.., ..]
512
+ if col_selection.is_a?(Range)
513
+ raise Todo
514
+ end
515
+ end
516
+
517
+ # df[2, ..] (select row as df)
518
+ if row_selection.is_a?(Integer)
519
+ if col_selection.is_a?(Array)
520
+ df = self[0.., col_selection]
521
+ return df.slice(row_selection, 1)
522
+ end
523
+ # df[2, "a"]
524
+ if col_selection.is_a?(String)
525
+ return self[col_selection][row_selection]
526
+ end
527
+ end
528
+
529
+ # column selection can be "a" and ["a", "b"]
530
+ if col_selection.is_a?(String)
531
+ col_selection = [col_selection]
532
+ end
533
+
534
+ # df[.., 1]
535
+ if col_selection.is_a?(Integer)
536
+ series = to_series(col_selection)
537
+ return series[row_selection]
538
+ end
539
+
540
+ if col_selection.is_a?(Array)
541
+ # df[.., [1, 2]]
542
+ if is_int_sequence(col_selection)
543
+ series_list = col_selection.map { |i| to_series(i) }
544
+ df = self.class.new(series_list)
545
+ return df[row_selection]
546
+ end
547
+ end
548
+
549
+ df = self[col_selection]
550
+ return df[row_selection]
551
+ elsif args.size == 1
552
+ item = args[0]
553
+
554
+ # select single column
555
+ # df["foo"]
556
+ if item.is_a?(String)
557
+ return Utils.wrap_s(_df.column(item))
558
+ end
559
+
560
+ # df[idx]
561
+ if item.is_a?(Integer)
562
+ return slice(_pos_idx(item, 0), 1)
563
+ end
564
+
565
+ # df[..]
566
+ if item.is_a?(Range)
567
+ return Slice.new(self).apply(item)
568
+ end
569
+
570
+ if Utils.is_str_sequence(item, allow_str: false)
571
+ # select multiple columns
572
+ # df[["foo", "bar"]]
573
+ return _from_rbdf(_df.select(item))
574
+ end
575
+ end
576
+
577
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
578
+ end
579
+
580
+ # Set item.
581
+ #
582
+ # @return [Object]
583
+ # def []=(key, value)
584
+ # if key.is_a?(String)
585
+ # raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
586
+ # end
587
+
588
+ # raise Todo
589
+ # end
590
+
591
+ # no to_arrow
592
+
593
+ # Convert DataFrame to a hash mapping column name to values.
594
+ #
595
+ # @return [Hash]
596
+ def to_h(as_series: true)
597
+ if as_series
598
+ get_columns.to_h { |s| [s.name, s] }
599
+ else
600
+ get_columns.to_h { |s| [s.name, s.to_a] }
601
+ end
602
+ end
603
+
604
+ # Convert every row to a dictionary.
605
+ #
606
+ # Note that this is slow.
607
+ #
608
+ # @return [Array]
609
+ #
610
+ # @example
611
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
612
+ # df.to_hashes
613
+ # [{'foo': 1, 'bar': 4}, {'foo': 2, 'bar': 5}, {'foo': 3, 'bar': 6}]
614
+ def to_hashes
615
+ rbdf = _df
616
+ names = columns
617
+
618
+ height.times.map do |i|
619
+ names.zip(rbdf.row_tuple(i)).to_h
620
+ end
621
+ end
622
+
623
+ # def to_numo
624
+ # end
625
+
626
+ # no to_pandas
627
+
628
+ # Select column as Series at index location.
629
+ #
630
+ # @param index [Integer]
631
+ # Location of selection.
632
+ #
633
+ # @return [Series]
634
+ #
635
+ # @example
636
+ # df = Polars::DataFrame.new(
637
+ # {
638
+ # "foo" => [1, 2, 3],
639
+ # "bar" => [6, 7, 8],
640
+ # "ham" => ["a", "b", "c"]
641
+ # }
642
+ # )
643
+ # df.to_series(1)
644
+ # # =>
645
+ # # shape: (3,)
646
+ # # Series: 'bar' [i64]
647
+ # # [
648
+ # # 6
649
+ # # 7
650
+ # # 8
651
+ # # ]
652
+ def to_series(index = 0)
653
+ if index < 0
654
+ index = columns.length + index
655
+ end
656
+ Utils.wrap_s(_df.select_at_idx(index))
657
+ end
658
+
659
+ # Serialize to JSON representation.
660
+ #
661
+ # @return [nil]
662
+ #
663
+ # @param file [String]
664
+ # File path to which the result should be written.
665
+ # @param pretty [Boolean]
666
+ # Pretty serialize json.
667
+ # @param row_oriented [Boolean]
668
+ # Write to row oriented json. This is slower, but more common.
669
+ #
670
+ # @see #write_ndjson
671
+ def write_json(
672
+ file,
673
+ pretty: false,
674
+ row_oriented: false
675
+ )
676
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
677
+ file = Utils.format_path(file)
678
+ end
679
+
680
+ _df.write_json(file, pretty, row_oriented)
681
+ nil
682
+ end
683
+
684
+ # Serialize to newline delimited JSON representation.
685
+ #
686
+ # @param file [String]
687
+ # File path to which the result should be written.
688
+ #
689
+ # @return [nil]
690
+ def write_ndjson(file)
691
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
692
+ file = Utils.format_path(file)
693
+ end
694
+
695
+ _df.write_ndjson(file)
696
+ nil
697
+ end
698
+
699
+ # Write to comma-separated values (CSV) file.
700
+ #
701
+ # @param file [String, nil]
702
+ # File path to which the result should be written. If set to `nil`
703
+ # (default), the output is returned as a string instead.
704
+ # @param has_header [Boolean]
705
+ # Whether to include header in the CSV output.
706
+ # @param sep [String]
707
+ # Separate CSV fields with this symbol.
708
+ # @param quote [String]
709
+ # Byte to use as quoting character.
710
+ # @param batch_size [Integer]
711
+ # Number of rows that will be processed per thread.
712
+ # @param datetime_format [String, nil]
713
+ # A format string, with the specifiers defined by the
714
+ # [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
715
+ # Rust crate. If no format specified, the default fractional-second
716
+ # precision is inferred from the maximum timeunit found in the frame's
717
+ # Datetime cols (if any).
718
+ # @param date_format [String, nil]
719
+ # A format string, with the specifiers defined by the
720
+ # [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
721
+ # Rust crate.
722
+ # @param time_format [String, nil]
723
+ # A format string, with the specifiers defined by the
724
+ # [chrono](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
725
+ # Rust crate.
726
+ # @param float_precision [Integer, nil]
727
+ # Number of decimal places to write, applied to both `:f32` and
728
+ # `:f64` datatypes.
729
+ # @param null_value [String, nil]
730
+ # A string representing null values (defaulting to the empty string).
731
+ #
732
+ # @return [String, nil]
733
+ #
734
+ # @example
735
+ # df = Polars::DataFrame.new(
736
+ # {
737
+ # "foo" => [1, 2, 3, 4, 5],
738
+ # "bar" => [6, 7, 8, 9, 10],
739
+ # "ham" => ["a", "b", "c", "d", "e"]
740
+ # }
741
+ # )
742
+ # df.write_csv("file.csv")
743
+ def write_csv(
744
+ file = nil,
745
+ has_header: true,
746
+ sep: ",",
747
+ quote: '"',
748
+ batch_size: 1024,
749
+ datetime_format: nil,
750
+ date_format: nil,
751
+ time_format: nil,
752
+ float_precision: nil,
753
+ null_value: nil
754
+ )
755
+ if sep.length > 1
756
+ raise ArgumentError, "only single byte separator is allowed"
757
+ elsif quote.length > 1
758
+ raise ArgumentError, "only single byte quote char is allowed"
759
+ elsif null_value == ""
760
+ null_value = nil
761
+ end
762
+
763
+ if file.nil?
764
+ buffer = StringIO.new
765
+ buffer.set_encoding(Encoding::BINARY)
766
+ _df.write_csv(
767
+ buffer,
768
+ has_header,
769
+ sep.ord,
770
+ quote.ord,
771
+ batch_size,
772
+ datetime_format,
773
+ date_format,
774
+ time_format,
775
+ float_precision,
776
+ null_value
777
+ )
778
+ return buffer.string.force_encoding(Encoding::UTF_8)
779
+ end
780
+
781
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
782
+ file = Utils.format_path(file)
783
+ end
784
+
785
+ _df.write_csv(
786
+ file,
787
+ has_header,
788
+ sep.ord,
789
+ quote.ord,
790
+ batch_size,
791
+ datetime_format,
792
+ date_format,
793
+ time_format,
794
+ float_precision,
795
+ null_value,
796
+ )
797
+ nil
798
+ end
799
+
800
+ # Write to Apache Avro file.
801
+ #
802
+ # @param file [String]
803
+ # File path to which the file should be written.
804
+ # @param compression ["uncompressed", "snappy", "deflate"]
805
+ # Compression method. Defaults to "uncompressed".
806
+ #
807
+ # @return [nil]
808
+ def write_avro(file, compression = "uncompressed")
809
+ if compression.nil?
810
+ compression = "uncompressed"
811
+ end
812
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
813
+ file = Utils.format_path(file)
814
+ end
815
+
816
+ _df.write_avro(file, compression)
817
+ end
818
+
819
+ # Write to Arrow IPC binary stream or Feather file.
820
+ #
821
+ # @param file [String]
822
+ # File path to which the file should be written.
823
+ # @param compression ["uncompressed", "lz4", "zstd"]
824
+ # Compression method. Defaults to "uncompressed".
825
+ #
826
+ # @return [nil]
827
+ def write_ipc(file, compression: "uncompressed")
828
+ if compression.nil?
829
+ compression = "uncompressed"
830
+ end
831
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
832
+ file = Utils.format_path(file)
833
+ end
834
+
835
+ _df.write_ipc(file, compression)
836
+ end
837
+
838
+ # Write to Apache Parquet file.
839
+ #
840
+ # @param file [String]
841
+ # File path to which the file should be written.
842
+ # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
843
+ # Choose "zstd" for good compression performance.
844
+ # Choose "lz4" for fast compression/decompression.
845
+ # Choose "snappy" for more backwards compatibility guarantees
846
+ # when you deal with older parquet readers.
847
+ # @param compression_level [Integer, nil]
848
+ # The level of compression to use. Higher compression means smaller files on
849
+ # disk.
850
+ #
851
+ # - "gzip" : min-level: 0, max-level: 10.
852
+ # - "brotli" : min-level: 0, max-level: 11.
853
+ # - "zstd" : min-level: 1, max-level: 22.
854
+ # @param statistics [Boolean]
855
+ # Write statistics to the parquet headers. This requires extra compute.
856
+ # @param row_group_size [Integer, nil]
857
+ # Size of the row groups in number of rows.
858
+ # If `nil` (default), the chunks of the DataFrame are
859
+ # used. Writing in smaller chunks may reduce memory pressure and improve
860
+ # writing speeds.
861
+ #
862
+ # @return [nil]
863
+ def write_parquet(
864
+ file,
865
+ compression: "zstd",
866
+ compression_level: nil,
867
+ statistics: false,
868
+ row_group_size: nil
869
+ )
870
+ if compression.nil?
871
+ compression = "uncompressed"
872
+ end
873
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
874
+ file = Utils.format_path(file)
875
+ end
876
+
877
+ _df.write_parquet(
878
+ file, compression, compression_level, statistics, row_group_size
879
+ )
880
+ end
881
+
882
+ # Return an estimation of the total (heap) allocated size of the DataFrame.
883
+ #
884
+ # Estimated size is given in the specified unit (bytes by default).
885
+ #
886
+ # This estimation is the sum of the size of its buffers, validity, including
887
+ # nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
888
+ # size of 2 arrays is not the sum of the sizes computed from this function. In
889
+ # particular, StructArray's size is an upper bound.
890
+ #
891
+ # When an array is sliced, its allocated size remains constant because the buffer
892
+ # unchanged. However, this function will yield a smaller number. This is because
893
+ # this function returns the visible size of the buffer, not its total capacity.
894
+ #
895
+ # FFI buffers are included in this estimation.
896
+ #
897
+ # @param unit ["b", "kb", "mb", "gb", "tb"]
898
+ # Scale the returned size to the given unit.
899
+ #
900
+ # @return [Numeric]
901
+ #
902
+ # @example
903
+ # df = Polars::DataFrame.new(
904
+ # {
905
+ # "x" => 1_000_000.times.to_a.reverse,
906
+ # "y" => 1_000_000.times.map { |v| v / 1000.0 },
907
+ # "z" => 1_000_000.times.map(&:to_s)
908
+ # },
909
+ # columns: {"x" => :u32, "y" => :f64, "z" => :str}
910
+ # )
911
+ # df.estimated_size
912
+ # # => 25888898
913
+ # df.estimated_size("mb")
914
+ # # => 24.689577102661133
915
+ def estimated_size(unit = "b")
916
+ sz = _df.estimated_size
917
+ Utils.scale_bytes(sz, to: unit)
918
+ end
919
+
920
+ # Transpose a DataFrame over the diagonal.
921
+ #
922
+ # @param include_header [Boolean]
923
+ # If set, the column names will be added as first column.
924
+ # @param header_name [String]
925
+ # If `include_header` is set, this determines the name of the column that will
926
+ # be inserted.
927
+ # @param column_names [Array]
928
+ # Optional generator/iterator that yields column names. Will be used to
929
+ # replace the columns in the DataFrame.
930
+ #
931
+ # @return [DataFrame]
932
+ #
933
+ # @note
934
+ # This is a very expensive operation. Perhaps you can do it differently.
935
+ #
936
+ # @example
937
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [1, 2, 3]})
938
+ # df.transpose(include_header: true)
939
+ # # =>
940
+ # # shape: (2, 4)
941
+ # # ┌────────┬──────────┬──────────┬──────────┐
942
+ # # │ column ┆ column_0 ┆ column_1 ┆ column_2 │
943
+ # # │ --- ┆ --- ┆ --- ┆ --- │
944
+ # # │ str ┆ i64 ┆ i64 ┆ i64 │
945
+ # # ╞════════╪══════════╪══════════╪══════════╡
946
+ # # │ a ┆ 1 ┆ 2 ┆ 3 │
947
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
948
+ # # │ b ┆ 1 ┆ 2 ┆ 3 │
949
+ # # └────────┴──────────┴──────────┴──────────┘
950
+ #
951
+ # @example Replace the auto-generated column names with a list
952
+ # df.transpose(include_header: false, column_names: ["a", "b", "c"])
953
+ # # =>
954
+ # # shape: (2, 3)
955
+ # # ┌─────┬─────┬─────┐
956
+ # # │ a ┆ b ┆ c │
957
+ # # │ --- ┆ --- ┆ --- │
958
+ # # │ i64 ┆ i64 ┆ i64 │
959
+ # # ╞═════╪═════╪═════╡
960
+ # # │ 1 ┆ 2 ┆ 3 │
961
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
962
+ # # │ 1 ┆ 2 ┆ 3 │
963
+ # # └─────┴─────┴─────┘
964
+ #
965
+ # @example Include the header as a separate column
966
+ # df.transpose(
967
+ # include_header: true, header_name: "foo", column_names: ["a", "b", "c"]
968
+ # )
969
+ # # =>
970
+ # # shape: (2, 4)
971
+ # # ┌─────┬─────┬─────┬─────┐
972
+ # # │ foo ┆ a ┆ b ┆ c │
973
+ # # │ --- ┆ --- ┆ --- ┆ --- │
974
+ # # │ str ┆ i64 ┆ i64 ┆ i64 │
975
+ # # ╞═════╪═════╪═════╪═════╡
976
+ # # │ a ┆ 1 ┆ 2 ┆ 3 │
977
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
978
+ # # │ b ┆ 1 ┆ 2 ┆ 3 │
979
+ # # └─────┴─────┴─────┴─────┘
980
+ def transpose(include_header: false, header_name: "column", column_names: nil)
981
+ df = _from_rbdf(_df.transpose(include_header, header_name))
982
+ if !column_names.nil?
983
+ names = []
984
+ n = df.width
985
+ if include_header
986
+ names << header_name
987
+ n -= 1
988
+ end
989
+
990
+ column_names = column_names.each
991
+ n.times do
992
+ names << column_names.next
993
+ end
994
+ df.columns = names
995
+ end
996
+ df
997
+ end
998
+
999
+ # Reverse the DataFrame.
1000
+ #
1001
+ # @return [DataFrame]
1002
+ #
1003
+ # @example
1004
+ # df = Polars::DataFrame.new(
1005
+ # {
1006
+ # "key" => ["a", "b", "c"],
1007
+ # "val" => [1, 2, 3]
1008
+ # }
1009
+ # )
1010
+ # df.reverse
1011
+ # # =>
1012
+ # # shape: (3, 2)
1013
+ # # ┌─────┬─────┐
1014
+ # # │ key ┆ val │
1015
+ # # │ --- ┆ --- │
1016
+ # # │ str ┆ i64 │
1017
+ # # ╞═════╪═════╡
1018
+ # # │ c ┆ 3 │
1019
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1020
+ # # │ b ┆ 2 │
1021
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1022
+ # # │ a ┆ 1 │
1023
+ # # └─────┴─────┘
1024
+ def reverse
1025
+ select(Polars.col("*").reverse)
1026
+ end
1027
+
1028
+ # Rename column names.
1029
+ #
1030
+ # @param mapping [Hash]
1031
+ # Key value pairs that map from old name to new name.
1032
+ #
1033
+ # @return [DataFrame]
1034
+ #
1035
+ # @example
1036
+ # df = Polars::DataFrame.new(
1037
+ # {
1038
+ # "foo" => [1, 2, 3],
1039
+ # "bar" => [6, 7, 8],
1040
+ # "ham" => ["a", "b", "c"]
1041
+ # }
1042
+ # )
1043
+ # df.rename({"foo" => "apple"})
1044
+ # # =>
1045
+ # # shape: (3, 3)
1046
+ # # ┌───────┬─────┬─────┐
1047
+ # # │ apple ┆ bar ┆ ham │
1048
+ # # │ --- ┆ --- ┆ --- │
1049
+ # # │ i64 ┆ i64 ┆ str │
1050
+ # # ╞═══════╪═════╪═════╡
1051
+ # # │ 1 ┆ 6 ┆ a │
1052
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1053
+ # # │ 2 ┆ 7 ┆ b │
1054
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1055
+ # # │ 3 ┆ 8 ┆ c │
1056
+ # # └───────┴─────┴─────┘
1057
+ def rename(mapping)
1058
+ lazy.rename(mapping).collect(no_optimization: true)
1059
+ end
1060
+
1061
+ # Insert a Series at a certain column index. This operation is in place.
1062
+ #
1063
+ # @param index [Integer]
1064
+ # Column to insert the new `Series` column.
1065
+ # @param series [Series]
1066
+ # `Series` to insert.
1067
+ #
1068
+ # @return [DataFrame]
1069
+ #
1070
+ # @example
1071
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1072
+ # s = Polars::Series.new("baz", [97, 98, 99])
1073
+ # df.insert_at_idx(1, s)
1074
+ # # =>
1075
+ # # shape: (3, 3)
1076
+ # # ┌─────┬─────┬─────┐
1077
+ # # │ foo ┆ baz ┆ bar │
1078
+ # # │ --- ┆ --- ┆ --- │
1079
+ # # │ i64 ┆ i64 ┆ i64 │
1080
+ # # ╞═════╪═════╪═════╡
1081
+ # # │ 1 ┆ 97 ┆ 4 │
1082
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1083
+ # # │ 2 ┆ 98 ┆ 5 │
1084
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1085
+ # # │ 3 ┆ 99 ┆ 6 │
1086
+ # # └─────┴─────┴─────┘
1087
+ #
1088
+ # @example
1089
+ # df = Polars::DataFrame.new(
1090
+ # {
1091
+ # "a" => [1, 2, 3, 4],
1092
+ # "b" => [0.5, 4, 10, 13],
1093
+ # "c" => [true, true, false, true]
1094
+ # }
1095
+ # )
1096
+ # s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
1097
+ # df.insert_at_idx(3, s)
1098
+ # # =>
1099
+ # # shape: (4, 4)
1100
+ # # ┌─────┬──────┬───────┬──────┐
1101
+ # # │ a ┆ b ┆ c ┆ d │
1102
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1103
+ # # │ i64 ┆ f64 ┆ bool ┆ f64 │
1104
+ # # ╞═════╪══════╪═══════╪══════╡
1105
+ # # │ 1 ┆ 0.5 ┆ true ┆ -2.5 │
1106
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1107
+ # # │ 2 ┆ 4.0 ┆ true ┆ 15.0 │
1108
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1109
+ # # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
1110
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1111
+ # # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
1112
+ # # └─────┴──────┴───────┴──────┘
1113
+ def insert_at_idx(index, series)
1114
+ if index < 0
1115
+ index = columns.length + index
1116
+ end
1117
+ _df.insert_at_idx(index, series._s)
1118
+ self
1119
+ end
1120
+
1121
+ # Filter the rows in the DataFrame based on a predicate expression.
1122
+ #
1123
+ # @param predicate [Expr]
1124
+ # Expression that evaluates to a boolean Series.
1125
+ #
1126
+ # @return [DataFrame]
1127
+ #
1128
+ # @example Filter on one condition:
1129
+ # df = Polars::DataFrame.new(
1130
+ # {
1131
+ # "foo" => [1, 2, 3],
1132
+ # "bar" => [6, 7, 8],
1133
+ # "ham" => ["a", "b", "c"]
1134
+ # }
1135
+ # )
1136
+ # df.filter(Polars.col("foo") < 3)
1137
+ # # =>
1138
+ # # shape: (2, 3)
1139
+ # # ┌─────┬─────┬─────┐
1140
+ # # │ foo ┆ bar ┆ ham │
1141
+ # # │ --- ┆ --- ┆ --- │
1142
+ # # │ i64 ┆ i64 ┆ str │
1143
+ # # ╞═════╪═════╪═════╡
1144
+ # # │ 1 ┆ 6 ┆ a │
1145
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1146
+ # # │ 2 ┆ 7 ┆ b │
1147
+ # # └─────┴─────┴─────┘
1148
+ #
1149
+ # @example Filter on multiple conditions:
1150
+ # df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
1151
+ # # =>
1152
+ # # shape: (1, 3)
1153
+ # # ┌─────┬─────┬─────┐
1154
+ # # │ foo ┆ bar ┆ ham │
1155
+ # # │ --- ┆ --- ┆ --- │
1156
+ # # │ i64 ┆ i64 ┆ str │
1157
+ # # ╞═════╪═════╪═════╡
1158
+ # # │ 1 ┆ 6 ┆ a │
1159
+ # # └─────┴─────┴─────┘
1160
+ def filter(predicate)
1161
+ lazy.filter(predicate).collect
1162
+ end
1163
+
1164
+ # Summary statistics for a DataFrame.
1165
+ #
1166
+ # @return [DataFrame]
1167
+ #
1168
+ # @example
1169
+ # df = Polars::DataFrame.new(
1170
+ # {
1171
+ # "a" => [1.0, 2.8, 3.0],
1172
+ # "b" => [4, 5, nil],
1173
+ # "c" => [true, false, true],
1174
+ # "d" => [nil, "b", "c"],
1175
+ # "e" => ["usd", "eur", nil]
1176
+ # }
1177
+ # )
1178
+ # df.describe
1179
+ # # =>
1180
+ # # shape: (7, 6)
1181
+ # # ┌────────────┬──────────┬──────────┬──────────┬──────┬──────┐
1182
+ # # │ describe ┆ a ┆ b ┆ c ┆ d ┆ e │
1183
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1184
+ # # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
1185
+ # # ╞════════════╪══════════╪══════════╪══════════╪══════╪══════╡
1186
+ # # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3 ┆ 3 │
1187
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1188
+ # # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 1 ┆ 1 │
1189
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1190
+ # # │ mean ┆ 2.266667 ┆ 4.5 ┆ 0.666667 ┆ null ┆ null │
1191
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1192
+ # # │ std ┆ 1.101514 ┆ 0.707107 ┆ 0.57735 ┆ null ┆ null │
1193
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1194
+ # # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0 ┆ b ┆ eur │
1195
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1196
+ # # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0 ┆ c ┆ usd │
1197
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
1198
+ # # │ median ┆ 2.8 ┆ 4.5 ┆ 1.0 ┆ null ┆ null │
1199
+ # # └────────────┴──────────┴──────────┴──────────┴──────┴──────┘
1200
+ def describe
1201
+ describe_cast = lambda do |stat|
1202
+ columns = []
1203
+ self.columns.each_with_index do |s, i|
1204
+ if self[s].is_numeric || self[s].is_boolean
1205
+ columns << stat[0.., i].cast(:f64)
1206
+ else
1207
+ # for dates, strings, etc, we cast to string so that all
1208
+ # statistics can be shown
1209
+ columns << stat[0.., i].cast(:str)
1210
+ end
1211
+ end
1212
+ self.class.new(columns)
1213
+ end
1214
+
1215
+ summary = _from_rbdf(
1216
+ Polars.concat(
1217
+ [
1218
+ describe_cast.(
1219
+ self.class.new(columns.to_h { |c| [c, [height]] })
1220
+ ),
1221
+ describe_cast.(null_count),
1222
+ describe_cast.(mean),
1223
+ describe_cast.(std),
1224
+ describe_cast.(min),
1225
+ describe_cast.(max),
1226
+ describe_cast.(median)
1227
+ ]
1228
+ )._df
1229
+ )
1230
+ summary.insert_at_idx(
1231
+ 0,
1232
+ Polars::Series.new(
1233
+ "describe",
1234
+ ["count", "null_count", "mean", "std", "min", "max", "median"],
1235
+ )
1236
+ )
1237
+ summary
1238
+ end
1239
+
1240
+ # Find the index of a column by name.
1241
+ #
1242
+ # @param name [String]
1243
+ # Name of the column to find.
1244
+ #
1245
+ # @return [Series]
1246
+ #
1247
+ # @example
1248
+ # df = Polars::DataFrame.new(
1249
+ # {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
1250
+ # )
1251
+ # df.find_idx_by_name("ham")
1252
+ # # => 2
1253
+ def find_idx_by_name(name)
1254
+ _df.find_idx_by_name(name)
1255
+ end
1256
+
1257
+ # Replace a column at an index location.
1258
+ #
1259
+ # @param index [Integer]
1260
+ # Column index.
1261
+ # @param series [Series]
1262
+ # Series that will replace the column.
1263
+ #
1264
+ # @return [DataFrame]
1265
+ #
1266
+ # @example
1267
+ # df = Polars::DataFrame.new(
1268
+ # {
1269
+ # "foo" => [1, 2, 3],
1270
+ # "bar" => [6, 7, 8],
1271
+ # "ham" => ["a", "b", "c"]
1272
+ # }
1273
+ # )
1274
+ # s = Polars::Series.new("apple", [10, 20, 30])
1275
+ # df.replace_at_idx(0, s)
1276
+ # # =>
1277
+ # # shape: (3, 3)
1278
+ # # ┌───────┬─────┬─────┐
1279
+ # # │ apple ┆ bar ┆ ham │
1280
+ # # │ --- ┆ --- ┆ --- │
1281
+ # # │ i64 ┆ i64 ┆ str │
1282
+ # # ╞═══════╪═════╪═════╡
1283
+ # # │ 10 ┆ 6 ┆ a │
1284
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1285
+ # # │ 20 ┆ 7 ┆ b │
1286
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1287
+ # # │ 30 ┆ 8 ┆ c │
1288
+ # # └───────┴─────┴─────┘
1289
+ def replace_at_idx(index, series)
1290
+ if index < 0
1291
+ index = columns.length + index
1292
+ end
1293
+ _df.replace_at_idx(index, series._s)
1294
+ self
1295
+ end
1296
+
1297
+ # Sort the DataFrame by column.
1298
+ #
1299
+ # @param by [String]
1300
+ # By which column to sort.
1301
+ # @param reverse [Boolean]
1302
+ # Reverse/descending sort.
1303
+ # @param nulls_last [Boolean]
1304
+ # Place null values last. Can only be used if sorted by a single column.
1305
+ #
1306
+ # @return [DataFrame]
1307
+ #
1308
+ # @example
1309
+ # df = Polars::DataFrame.new(
1310
+ # {
1311
+ # "foo" => [1, 2, 3],
1312
+ # "bar" => [6.0, 7.0, 8.0],
1313
+ # "ham" => ["a", "b", "c"]
1314
+ # }
1315
+ # )
1316
+ # df.sort("foo", reverse: true)
1317
+ # # =>
1318
+ # # shape: (3, 3)
1319
+ # # ┌─────┬─────┬─────┐
1320
+ # # │ foo ┆ bar ┆ ham │
1321
+ # # │ --- ┆ --- ┆ --- │
1322
+ # # │ i64 ┆ f64 ┆ str │
1323
+ # # ╞═════╪═════╪═════╡
1324
+ # # │ 3 ┆ 8.0 ┆ c │
1325
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1326
+ # # │ 2 ┆ 7.0 ┆ b │
1327
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1328
+ # # │ 1 ┆ 6.0 ┆ a │
1329
+ # # └─────┴─────┴─────┘
1330
+ #
1331
+ # @example Sort by multiple columns.
1332
+ # df.sort(
1333
+ # [Polars.col("foo"), Polars.col("bar")**2],
1334
+ # reverse: [true, false]
1335
+ # )
1336
+ # # =>
1337
+ # # shape: (3, 3)
1338
+ # # ┌─────┬─────┬─────┐
1339
+ # # │ foo ┆ bar ┆ ham │
1340
+ # # │ --- ┆ --- ┆ --- │
1341
+ # # │ i64 ┆ f64 ┆ str │
1342
+ # # ╞═════╪═════╪═════╡
1343
+ # # │ 3 ┆ 8.0 ┆ c │
1344
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1345
+ # # │ 2 ┆ 7.0 ┆ b │
1346
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1347
+ # # │ 1 ┆ 6.0 ┆ a │
1348
+ # # └─────┴─────┴─────┘
1349
+ def sort(by, reverse: false, nulls_last: false)
1350
+ if by.is_a?(Array) || by.is_a?(Expr)
1351
+ lazy
1352
+ .sort(by, reverse: reverse, nulls_last: nulls_last)
1353
+ .collect(no_optimization: true, string_cache: false)
1354
+ else
1355
+ _from_rbdf(_df.sort(by, reverse, nulls_last))
1356
+ end
1357
+ end
1358
+
1359
+ # Check if DataFrame is equal to other.
1360
+ #
1361
+ # @param other [DataFrame]
1362
+ # DataFrame to compare with.
1363
+ # @param null_equal [Boolean]
1364
+ # Consider null values as equal.
1365
+ #
1366
+ # @return [Boolean]
1367
+ #
1368
+ # @example
1369
+ # df1 = Polars::DataFrame.new(
1370
+ # {
1371
+ # "foo" => [1, 2, 3],
1372
+ # "bar" => [6.0, 7.0, 8.0],
1373
+ # "ham" => ["a", "b", "c"]
1374
+ # }
1375
+ # )
1376
+ # df2 = Polars::DataFrame.new(
1377
+ # {
1378
+ # "foo" => [3, 2, 1],
1379
+ # "bar" => [8.0, 7.0, 6.0],
1380
+ # "ham" => ["c", "b", "a"]
1381
+ # }
1382
+ # )
1383
+ # df1.frame_equal(df1)
1384
+ # # => true
1385
+ # df1.frame_equal(df2)
1386
+ # # => false
1387
+ def frame_equal(other, null_equal: true)
1388
+ _df.frame_equal(other._df, null_equal)
1389
+ end
1390
+
1391
+ # Replace a column by a new Series.
1392
+ #
1393
+ # @param column [String]
1394
+ # Column to replace.
1395
+ # @param new_col [Series]
1396
+ # New column to insert.
1397
+ #
1398
+ # @return [DataFrame]
1399
+ #
1400
+ # @example
1401
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1402
+ # s = Polars::Series.new([10, 20, 30])
1403
+ # df.replace("foo", s)
1404
+ # # =>
1405
+ # # shape: (3, 2)
1406
+ # # ┌─────┬─────┐
1407
+ # # │ foo ┆ bar │
1408
+ # # │ --- ┆ --- │
1409
+ # # │ i64 ┆ i64 │
1410
+ # # ╞═════╪═════╡
1411
+ # # │ 10 ┆ 4 │
1412
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1413
+ # # │ 20 ┆ 5 │
1414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1415
+ # # │ 30 ┆ 6 │
1416
+ # # └─────┴─────┘
1417
+ def replace(column, new_col)
1418
+ _df.replace(column, new_col._s)
1419
+ self
1420
+ end
1421
+
1422
+ # Get a slice of this DataFrame.
1423
+ #
1424
+ # @param offset [Integer]
1425
+ # Start index. Negative indexing is supported.
1426
+ # @param length [Integer, nil]
1427
+ # Length of the slice. If set to `nil`, all rows starting at the offset
1428
+ # will be selected.
1429
+ #
1430
+ # @return [DataFrame]
1431
+ #
1432
+ # @example
1433
+ # df = Polars::DataFrame.new(
1434
+ # {
1435
+ # "foo" => [1, 2, 3],
1436
+ # "bar" => [6.0, 7.0, 8.0],
1437
+ # "ham" => ["a", "b", "c"]
1438
+ # }
1439
+ # )
1440
+ # df.slice(1, 2)
1441
+ # # =>
1442
+ # # shape: (2, 3)
1443
+ # # ┌─────┬─────┬─────┐
1444
+ # # │ foo ┆ bar ┆ ham │
1445
+ # # │ --- ┆ --- ┆ --- │
1446
+ # # │ i64 ┆ f64 ┆ str │
1447
+ # # ╞═════╪═════╪═════╡
1448
+ # # │ 2 ┆ 7.0 ┆ b │
1449
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1450
+ # # │ 3 ┆ 8.0 ┆ c │
1451
+ # # └─────┴─────┴─────┘
1452
+ def slice(offset, length = nil)
1453
+ if !length.nil? && length < 0
1454
+ length = height - offset + length
1455
+ end
1456
+ _from_rbdf(_df.slice(offset, length))
1457
+ end
1458
+
1459
+ # Get the first `n` rows.
1460
+ #
1461
+ # Alias for {#head}.
1462
+ #
1463
+ # @param n [Integer]
1464
+ # Number of rows to return.
1465
+ #
1466
+ # @return [DataFrame]
1467
+ #
1468
+ # @example
1469
+ # df = Polars::DataFrame.new(
1470
+ # {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
1471
+ # )
1472
+ # df.limit(4)
1473
+ # # =>
1474
+ # # shape: (4, 2)
1475
+ # # ┌─────┬─────┐
1476
+ # # │ foo ┆ bar │
1477
+ # # │ --- ┆ --- │
1478
+ # # │ i64 ┆ str │
1479
+ # # ╞═════╪═════╡
1480
+ # # │ 1 ┆ a │
1481
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1482
+ # # │ 2 ┆ b │
1483
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1484
+ # # │ 3 ┆ c │
1485
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1486
+ # # │ 4 ┆ d │
1487
+ # # └─────┴─────┘
1488
+ def limit(n = 5)
1489
+ head(n)
1490
+ end
1491
+
1492
+ # Get the first `n` rows.
1493
+ #
1494
+ # @param n [Integer]
1495
+ # Number of rows to return.
1496
+ #
1497
+ # @return [DataFrame]
1498
+ #
1499
+ # @example
1500
+ # df = Polars::DataFrame.new(
1501
+ # {
1502
+ # "foo" => [1, 2, 3, 4, 5],
1503
+ # "bar" => [6, 7, 8, 9, 10],
1504
+ # "ham" => ["a", "b", "c", "d", "e"]
1505
+ # }
1506
+ # )
1507
+ # df.head(3)
1508
+ # # =>
1509
+ # # shape: (3, 3)
1510
+ # # ┌─────┬─────┬─────┐
1511
+ # # │ foo ┆ bar ┆ ham │
1512
+ # # │ --- ┆ --- ┆ --- │
1513
+ # # │ i64 ┆ i64 ┆ str │
1514
+ # # ╞═════╪═════╪═════╡
1515
+ # # │ 1 ┆ 6 ┆ a │
1516
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1517
+ # # │ 2 ┆ 7 ┆ b │
1518
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1519
+ # # │ 3 ┆ 8 ┆ c │
1520
+ # # └─────┴─────┴─────┘
1521
+ def head(n = 5)
1522
+ _from_rbdf(_df.head(n))
1523
+ end
1524
+
1525
+ # Get the last `n` rows.
1526
+ #
1527
+ # @param n [Integer]
1528
+ # Number of rows to return.
1529
+ #
1530
+ # @return [DataFrame]
1531
+ #
1532
+ # @example
1533
+ # df = Polars::DataFrame.new(
1534
+ # {
1535
+ # "foo" => [1, 2, 3, 4, 5],
1536
+ # "bar" => [6, 7, 8, 9, 10],
1537
+ # "ham" => ["a", "b", "c", "d", "e"]
1538
+ # }
1539
+ # )
1540
+ # df.tail(3)
1541
+ # # =>
1542
+ # # shape: (3, 3)
1543
+ # # ┌─────┬─────┬─────┐
1544
+ # # │ foo ┆ bar ┆ ham │
1545
+ # # │ --- ┆ --- ┆ --- │
1546
+ # # │ i64 ┆ i64 ┆ str │
1547
+ # # ╞═════╪═════╪═════╡
1548
+ # # │ 3 ┆ 8 ┆ c │
1549
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1550
+ # # │ 4 ┆ 9 ┆ d │
1551
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1552
+ # # │ 5 ┆ 10 ┆ e │
1553
+ # # └─────┴─────┴─────┘
1554
+ def tail(n = 5)
1555
+ _from_rbdf(_df.tail(n))
1556
+ end
1557
+
1558
+ # Return a new DataFrame where the null values are dropped.
1559
+ #
1560
+ # @param subset [Object]
1561
+ # Subset of column(s) on which `drop_nulls` will be applied.
1562
+ #
1563
+ # @return [DataFrame]
1564
+ #
1565
+ # @example
1566
+ # df = Polars::DataFrame.new(
1567
+ # {
1568
+ # "foo" => [1, 2, 3],
1569
+ # "bar" => [6, nil, 8],
1570
+ # "ham" => ["a", "b", "c"]
1571
+ # }
1572
+ # )
1573
+ # df.drop_nulls
1574
+ # # =>
1575
+ # # shape: (2, 3)
1576
+ # # ┌─────┬─────┬─────┐
1577
+ # # │ foo ┆ bar ┆ ham │
1578
+ # # │ --- ┆ --- ┆ --- │
1579
+ # # │ i64 ┆ i64 ┆ str │
1580
+ # # ╞═════╪═════╪═════╡
1581
+ # # │ 1 ┆ 6 ┆ a │
1582
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1583
+ # # │ 3 ┆ 8 ┆ c │
1584
+ # # └─────┴─────┴─────┘
1585
+ def drop_nulls(subset: nil)
1586
+ if subset.is_a?(String)
1587
+ subset = [subset]
1588
+ end
1589
+ _from_rbdf(_df.drop_nulls(subset))
1590
+ end
1591
+
1592
+ # Offers a structured way to apply a sequence of user-defined functions (UDFs).
1593
+ #
1594
+ # @param func [Object]
1595
+ # Callable; will receive the frame as the first parameter,
1596
+ # followed by any given args/kwargs.
1597
+ # @param args [Object]
1598
+ # Arguments to pass to the UDF.
1599
+ # @param kwargs [Object]
1600
+ # Keyword arguments to pass to the UDF.
1601
+ #
1602
+ # @return [Object]
1603
+ #
1604
+ # @note
1605
+ # It is recommended to use LazyFrame when piping operations, in order
1606
+ # to fully take advantage of query optimization and parallelization.
1607
+ # See {#lazy}.
1608
+ #
1609
+ # @example
1610
+ # cast_str_to_int = lambda do |data, col_name:|
1611
+ # data.with_column(Polars.col(col_name).cast(:i64))
1612
+ # end
1613
+ #
1614
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]})
1615
+ # df.pipe(cast_str_to_int, col_name: "b")
1616
+ # # =>
1617
+ # # shape: (4, 2)
1618
+ # # ┌─────┬─────┐
1619
+ # # │ a ┆ b │
1620
+ # # │ --- ┆ --- │
1621
+ # # │ i64 ┆ i64 │
1622
+ # # ╞═════╪═════╡
1623
+ # # │ 1 ┆ 10 │
1624
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1625
+ # # │ 2 ┆ 20 │
1626
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1627
+ # # │ 3 ┆ 30 │
1628
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1629
+ # # │ 4 ┆ 40 │
1630
+ # # └─────┴─────┘
1631
+ def pipe(func, *args, **kwargs, &block)
1632
+ func.call(self, *args, **kwargs, &block)
1633
+ end
1634
+
1635
+ # Add a column at index 0 that counts the rows.
1636
+ #
1637
+ # @param name [String]
1638
+ # Name of the column to add.
1639
+ # @param offset [Integer]
1640
+ # Start the row count at this offset.
1641
+ #
1642
+ # @return [DataFrame]
1643
+ #
1644
+ # @example
1645
+ # df = Polars::DataFrame.new(
1646
+ # {
1647
+ # "a" => [1, 3, 5],
1648
+ # "b" => [2, 4, 6]
1649
+ # }
1650
+ # )
1651
+ # df.with_row_count
1652
+ # # =>
1653
+ # # shape: (3, 3)
1654
+ # # ┌────────┬─────┬─────┐
1655
+ # # │ row_nr ┆ a ┆ b │
1656
+ # # │ --- ┆ --- ┆ --- │
1657
+ # # │ u32 ┆ i64 ┆ i64 │
1658
+ # # ╞════════╪═════╪═════╡
1659
+ # # │ 0 ┆ 1 ┆ 2 │
1660
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1661
+ # # │ 1 ┆ 3 ┆ 4 │
1662
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
1663
+ # # │ 2 ┆ 5 ┆ 6 │
1664
+ # # └────────┴─────┴─────┘
1665
+ def with_row_count(name: "row_nr", offset: 0)
1666
+ _from_rbdf(_df.with_row_count(name, offset))
1667
+ end
1668
+
1669
+ # Start a groupby operation.
1670
+ #
1671
+ # @param by [Object]
1672
+ # Column(s) to group by.
1673
+ # @param maintain_order [Boolean]
1674
+ # Make sure that the order of the groups remain consistent. This is more
1675
+ # expensive than a default groupby. Note that this only works in expression
1676
+ # aggregations.
1677
+ #
1678
+ # @return [GroupBy]
1679
+ #
1680
+ # @example
1681
+ # df = Polars::DataFrame.new(
1682
+ # {
1683
+ # "a" => ["a", "b", "a", "b", "b", "c"],
1684
+ # "b" => [1, 2, 3, 4, 5, 6],
1685
+ # "c" => [6, 5, 4, 3, 2, 1]
1686
+ # }
1687
+ # )
1688
+ # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1689
+ # # =>
1690
+ # # shape: (3, 2)
1691
+ # # ┌─────┬─────┐
1692
+ # # │ a ┆ b │
1693
+ # # │ --- ┆ --- │
1694
+ # # │ str ┆ i64 │
1695
+ # # ╞═════╪═════╡
1696
+ # # │ a ┆ 4 │
1697
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1698
+ # # │ b ┆ 11 │
1699
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
1700
+ # # │ c ┆ 6 │
1701
+ # # └─────┴─────┘
1702
+ def groupby(by, maintain_order: false)
1703
+ if !Utils.bool?(maintain_order)
1704
+ raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1705
+ end
1706
+ if by.is_a?(String)
1707
+ by = [by]
1708
+ end
1709
+ GroupBy.new(
1710
+ _df,
1711
+ by,
1712
+ self.class,
1713
+ maintain_order: maintain_order
1714
+ )
1715
+ end
1716
+
1717
+ # Create rolling groups based on a time column.
1718
+ #
1719
+ # Also works for index values of type `:i32` or `:i64`.
1720
+ #
1721
+ # Different from a `dynamic_groupby` the windows are now determined by the
1722
+ # individual values and are not of constant intervals. For constant intervals use
1723
+ # *groupby_dynamic*
1724
+ #
1725
+ # The `period` and `offset` arguments are created either from a timedelta, or
1726
+ # by using the following string language:
1727
+ #
1728
+ # - 1ns (1 nanosecond)
1729
+ # - 1us (1 microsecond)
1730
+ # - 1ms (1 millisecond)
1731
+ # - 1s (1 second)
1732
+ # - 1m (1 minute)
1733
+ # - 1h (1 hour)
1734
+ # - 1d (1 day)
1735
+ # - 1w (1 week)
1736
+ # - 1mo (1 calendar month)
1737
+ # - 1y (1 calendar year)
1738
+ # - 1i (1 index count)
1739
+ #
1740
+ # Or combine them:
1741
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1742
+ #
1743
+ # In case of a groupby_rolling on an integer column, the windows are defined by:
1744
+ #
1745
+ # - **"1i" # length 1**
1746
+ # - **"10i" # length 10**
1747
+ #
1748
+ # @param index_column [Object]
1749
+ # Column used to group based on the time window.
1750
+ # Often to type Date/Datetime
1751
+ # This column must be sorted in ascending order. If not the output will not
1752
+ # make sense.
1753
+ #
1754
+ # In case of a rolling groupby on indices, dtype needs to be one of
1755
+ # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1756
+ # performance matters use an `:i64` column.
1757
+ # @param period [Object]
1758
+ # Length of the window.
1759
+ # @param offset [Object]
1760
+ # Offset of the window. Default is -period.
1761
+ # @param closed ["right", "left", "both", "none"]
1762
+ # Define whether the temporal window interval is closed or not.
1763
+ # @param by [Object]
1764
+ # Also group by this column/these columns.
1765
+ #
1766
+ # @return [RollingGroupBy]
1767
+ #
1768
+ # @example
1769
+ # dates = [
1770
+ # "2020-01-01 13:45:48",
1771
+ # "2020-01-01 16:42:13",
1772
+ # "2020-01-01 16:45:09",
1773
+ # "2020-01-02 18:12:48",
1774
+ # "2020-01-03 19:45:32",
1775
+ # "2020-01-08 23:16:43"
1776
+ # ]
1777
+ # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1778
+ # Polars.col("dt").str.strptime(:datetime)
1779
+ # )
1780
+ # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1781
+ # [
1782
+ # Polars.sum("a").alias("sum_a"),
1783
+ # Polars.min("a").alias("min_a"),
1784
+ # Polars.max("a").alias("max_a")
1785
+ # ]
1786
+ # )
1787
+ # # =>
1788
+ # # shape: (6, 4)
1789
+ # # ┌─────────────────────┬───────┬───────┬───────┐
1790
+ # # │ dt ┆ sum_a ┆ min_a ┆ max_a │
1791
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1792
+ # # │ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │
1793
+ # # ╞═════════════════════╪═══════╪═══════╪═══════╡
1794
+ # # │ 2020-01-01 13:45:48 ┆ 3 ┆ 3 ┆ 3 │
1795
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1796
+ # # │ 2020-01-01 16:42:13 ┆ 10 ┆ 3 ┆ 7 │
1797
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1798
+ # # │ 2020-01-01 16:45:09 ┆ 15 ┆ 3 ┆ 7 │
1799
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1800
+ # # │ 2020-01-02 18:12:48 ┆ 24 ┆ 3 ┆ 9 │
1801
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1802
+ # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
1803
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
1804
+ # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
1805
+ # # └─────────────────────┴───────┴───────┴───────┘
1806
+ def groupby_rolling(
1807
+ index_column:,
1808
+ period:,
1809
+ offset: nil,
1810
+ closed: "right",
1811
+ by: nil
1812
+ )
1813
+ RollingGroupBy.new(self, index_column, period, offset, closed, by)
1814
+ end
1815
+
1816
+ # Group based on a time value (or index value of type `:i32`, `:i64`).
1817
+ #
1818
+ # Time windows are calculated and rows are assigned to windows. Different from a
1819
+ # normal groupby is that a row can be member of multiple groups. The time/index
1820
+ # window could be seen as a rolling window, with a window size determined by
1821
+ # dates/times/values instead of slots in the DataFrame.
1822
+ #
1823
+ # A window is defined by:
1824
+ #
1825
+ # - every: interval of the window
1826
+ # - period: length of the window
1827
+ # - offset: offset of the window
1828
+ #
1829
+ # The `every`, `period` and `offset` arguments are created with
1830
+ # the following string language:
1831
+ #
1832
+ # - 1ns (1 nanosecond)
1833
+ # - 1us (1 microsecond)
1834
+ # - 1ms (1 millisecond)
1835
+ # - 1s (1 second)
1836
+ # - 1m (1 minute)
1837
+ # - 1h (1 hour)
1838
+ # - 1d (1 day)
1839
+ # - 1w (1 week)
1840
+ # - 1mo (1 calendar month)
1841
+ # - 1y (1 calendar year)
1842
+ # - 1i (1 index count)
1843
+ #
1844
+ # Or combine them:
1845
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1846
+ #
1847
+ # In case of a groupby_dynamic on an integer column, the windows are defined by:
1848
+ #
1849
+ # - "1i" # length 1
1850
+ # - "10i" # length 10
1851
+ #
1852
+ # @param index_column
1853
+ # Column used to group based on the time window.
1854
+ # Often to type Date/Datetime
1855
+ # This column must be sorted in ascending order. If not the output will not
1856
+ # make sense.
1857
+ #
1858
+ # In case of a dynamic groupby on indices, dtype needs to be one of
1859
+ # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1860
+ # performance matters use an `:i64` column.
1861
+ # @param every
1862
+ # Interval of the window.
1863
+ # @param period
1864
+ # Length of the window, if None it is equal to 'every'.
1865
+ # @param offset
1866
+ # Offset of the window if None and period is None it will be equal to negative
1867
+ # `every`.
1868
+ # @param truncate
1869
+ # Truncate the time value to the window lower bound.
1870
+ # @param include_boundaries
1871
+ # Add the lower and upper bound of the window to the "_lower_bound" and
1872
+ # "_upper_bound" columns. This will impact performance because it's harder to
1873
+ # parallelize
1874
+ # @param closed ["right", "left", "both", "none"]
1875
+ # Define whether the temporal window interval is closed or not.
1876
+ # @param by
1877
+ # Also group by this column/these columns
1878
+ #
1879
+ # @return [DataFrame]
1880
+ #
1881
+ # @example
1882
+ # df = Polars::DataFrame.new(
1883
+ # {
1884
+ # "time" => Polars.date_range(
1885
+ # DateTime.new(2021, 12, 16),
1886
+ # DateTime.new(2021, 12, 16, 3),
1887
+ # "30m"
1888
+ # ),
1889
+ # "n" => 0..6
1890
+ # }
1891
+ # )
1892
+ # # =>
1893
+ # # shape: (7, 2)
1894
+ # # ┌─────────────────────┬─────┐
1895
+ # # │ time ┆ n │
1896
+ # # │ --- ┆ --- │
1897
+ # # │ datetime[μs] ┆ i64 │
1898
+ # # ╞═════════════════════╪═════╡
1899
+ # # │ 2021-12-16 00:00:00 ┆ 0 │
1900
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1901
+ # # │ 2021-12-16 00:30:00 ┆ 1 │
1902
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1903
+ # # │ 2021-12-16 01:00:00 ┆ 2 │
1904
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1905
+ # # │ 2021-12-16 01:30:00 ┆ 3 │
1906
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1907
+ # # │ 2021-12-16 02:00:00 ┆ 4 │
1908
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1909
+ # # │ 2021-12-16 02:30:00 ┆ 5 │
1910
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
1911
+ # # │ 2021-12-16 03:00:00 ┆ 6 │
1912
+ # # └─────────────────────┴─────┘
1913
+ #
1914
+ # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
1915
+ # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
1916
+ # [
1917
+ # Polars.col("time").min.alias("time_min"),
1918
+ # Polars.col("time").max.alias("time_max")
1919
+ # ]
1920
+ # )
1921
+ # # =>
1922
+ # # shape: (4, 3)
1923
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┐
1924
+ # # │ time ┆ time_min ┆ time_max │
1925
+ # # │ --- ┆ --- ┆ --- │
1926
+ # # │ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] │
1927
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╡
1928
+ # # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 00:00:00 │
1929
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1930
+ # # │ 2021-12-16 00:00:00 ┆ 2021-12-16 00:30:00 ┆ 2021-12-16 01:00:00 │
1931
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1932
+ # # │ 2021-12-16 01:00:00 ┆ 2021-12-16 01:30:00 ┆ 2021-12-16 02:00:00 │
1933
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1934
+ # # │ 2021-12-16 02:00:00 ┆ 2021-12-16 02:30:00 ┆ 2021-12-16 03:00:00 │
1935
+ # # └─────────────────────┴─────────────────────┴─────────────────────┘
1936
+ #
1937
+ # @example The window boundaries can also be added to the aggregation result.
1938
+ # df.groupby_dynamic(
1939
+ # "time", every: "1h", include_boundaries: true, closed: "right"
1940
+ # ).agg([Polars.col("time").count.alias("time_count")])
1941
+ # # =>
1942
+ # # shape: (4, 4)
1943
+ # # ┌─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
1944
+ # # │ _lower_boundary ┆ _upper_boundary ┆ time ┆ time_count │
1945
+ # # │ --- ┆ --- ┆ --- ┆ --- │
1946
+ # # │ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ u32 │
1947
+ # # ╞═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
1948
+ # # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1 │
1949
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1950
+ # # │ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 2 │
1951
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1952
+ # # │ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2 │
1953
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1954
+ # # │ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2 │
1955
+ # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
1956
+ #
1957
+ # @example When closed="left", should not include right end of interval.
1958
+ # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
1959
+ # [
1960
+ # Polars.col("time").count.alias("time_count"),
1961
+ # Polars.col("time").list.alias("time_agg_list")
1962
+ # ]
1963
+ # )
1964
+ # # =>
1965
+ # # shape: (4, 3)
1966
+ # # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
1967
+ # # │ time ┆ time_count ┆ time_agg_list │
1968
+ # # │ --- ┆ --- ┆ --- │
1969
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
1970
+ # # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
1971
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16... │
1972
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1973
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16... │
1974
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1975
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16... │
1976
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
1977
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
1978
+ # # └─────────────────────┴────────────┴─────────────────────────────────────┘
1979
+ #
1980
+ # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1981
+ # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
1982
+ # [Polars.col("time").count.alias("time_count")]
1983
+ # )
1984
+ # # =>
1985
+ # # shape: (5, 2)
1986
+ # # ┌─────────────────────┬────────────┐
1987
+ # # │ time ┆ time_count │
1988
+ # # │ --- ┆ --- │
1989
+ # # │ datetime[μs] ┆ u32 │
1990
+ # # ╞═════════════════════╪════════════╡
1991
+ # # │ 2021-12-15 23:00:00 ┆ 1 │
1992
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1993
+ # # │ 2021-12-16 00:00:00 ┆ 3 │
1994
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1995
+ # # │ 2021-12-16 01:00:00 ┆ 3 │
1996
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1997
+ # # │ 2021-12-16 02:00:00 ┆ 3 │
1998
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
1999
+ # # │ 2021-12-16 03:00:00 ┆ 1 │
2000
+ # # └─────────────────────┴────────────┘
2001
+ #
2002
+ # @example Dynamic groupbys can also be combined with grouping on normal keys.
2003
+ # df = Polars::DataFrame.new(
2004
+ # {
2005
+ # "time" => Polars.date_range(
2006
+ # DateTime.new(2021, 12, 16),
2007
+ # DateTime.new(2021, 12, 16, 3),
2008
+ # "30m"
2009
+ # ),
2010
+ # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
2011
+ # }
2012
+ # )
2013
+ # df.groupby_dynamic(
2014
+ # "time",
2015
+ # every: "1h",
2016
+ # closed: "both",
2017
+ # by: "groups",
2018
+ # include_boundaries: true
2019
+ # ).agg([Polars.col("time").count.alias("time_count")])
2020
+ # # =>
2021
+ # # shape: (7, 5)
2022
+ # # ┌────────┬─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
2023
+ # # │ groups ┆ _lower_boundary ┆ _upper_boundary ┆ time ┆ time_count │
2024
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
2025
+ # # │ str ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ u32 │
2026
+ # # ╞════════╪═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
2027
+ # # │ a ┆ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1 │
2028
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2029
+ # # │ a ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 3 │
2030
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2031
+ # # │ a ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 1 │
2032
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2033
+ # # │ a ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2 │
2034
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2035
+ # # │ a ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 04:00:00 ┆ 2021-12-16 03:00:00 ┆ 1 │
2036
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2037
+ # # │ b ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2 │
2038
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
2039
+ # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
2040
+ # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2041
+ #
2042
+ # @example Dynamic groupby on an index column.
2043
+ # df = Polars::DataFrame.new(
2044
+ # {
2045
+ # "idx" => Polars.arange(0, 6, eager: true),
2046
+ # "A" => ["A", "A", "B", "B", "B", "C"]
2047
+ # }
2048
+ # )
2049
+ # df.groupby_dynamic(
2050
+ # "idx",
2051
+ # every: "2i",
2052
+ # period: "3i",
2053
+ # include_boundaries: true,
2054
+ # closed: "right"
2055
+ # ).agg(Polars.col("A").list.alias("A_agg_list"))
2056
+ # # =>
2057
+ # # shape: (3, 4)
2058
+ # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
2059
+ # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
2060
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2061
+ # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
2062
+ # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
2063
+ # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
2064
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2065
+ # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
2066
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
2067
+ # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
2068
+ # # └─────────────────┴─────────────────┴─────┴─────────────────┘
2069
+ def groupby_dynamic(
2070
+ index_column,
2071
+ every:,
2072
+ period: nil,
2073
+ offset: nil,
2074
+ truncate: true,
2075
+ include_boundaries: false,
2076
+ closed: "left",
2077
+ by: nil,
2078
+ start_by: "window"
2079
+ )
2080
+ DynamicGroupBy.new(
2081
+ self,
2082
+ index_column,
2083
+ every,
2084
+ period,
2085
+ offset,
2086
+ truncate,
2087
+ include_boundaries,
2088
+ closed,
2089
+ by,
2090
+ start_by
2091
+ )
2092
+ end
2093
+
2094
+ # Upsample a DataFrame at a regular frequency.
2095
+ #
2096
+ # @param time_column [Object]
2097
+ # time column will be used to determine a date_range.
2098
+ # Note that this column has to be sorted for the output to make sense.
2099
+ # @param every [String]
2100
+ # interval will start 'every' duration
2101
+ # @param offset [String]
2102
+ # change the start of the date_range by this offset.
2103
+ # @param by [Object]
2104
+ # First group by these columns and then upsample for every group
2105
+ # @param maintain_order [Boolean]
2106
+ # Keep the ordering predictable. This is slower.
2107
+ #
2108
+ # The `every` and `offset` arguments are created with
2109
+ # the following string language:
2110
+ #
2111
+ # - 1ns (1 nanosecond)
2112
+ # - 1us (1 microsecond)
2113
+ # - 1ms (1 millisecond)
2114
+ # - 1s (1 second)
2115
+ # - 1m (1 minute)
2116
+ # - 1h (1 hour)
2117
+ # - 1d (1 day)
2118
+ # - 1w (1 week)
2119
+ # - 1mo (1 calendar month)
2120
+ # - 1y (1 calendar year)
2121
+ # - 1i (1 index count)
2122
+ #
2123
+ # Or combine them:
2124
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
2125
+ #
2126
+ # @return [DataFrame]
2127
+ #
2128
+ # @example Upsample a DataFrame by a certain interval.
2129
+ # df = Polars::DataFrame.new(
2130
+ # {
2131
+ # "time" => [
2132
+ # DateTime.new(2021, 2, 1),
2133
+ # DateTime.new(2021, 4, 1),
2134
+ # DateTime.new(2021, 5, 1),
2135
+ # DateTime.new(2021, 6, 1)
2136
+ # ],
2137
+ # "groups" => ["A", "B", "A", "B"],
2138
+ # "values" => [0, 1, 2, 3]
2139
+ # }
2140
+ # )
2141
+ # df.upsample(
2142
+ # time_column: "time", every: "1mo", by: "groups", maintain_order: true
2143
+ # ).select(Polars.all.forward_fill)
2144
+ # # =>
2145
+ # # shape: (7, 3)
2146
+ # # ┌─────────────────────┬────────┬────────┐
2147
+ # # │ time ┆ groups ┆ values │
2148
+ # # │ --- ┆ --- ┆ --- │
2149
+ # # │ datetime[ns] ┆ str ┆ i64 │
2150
+ # # ╞═════════════════════╪════════╪════════╡
2151
+ # # │ 2021-02-01 00:00:00 ┆ A ┆ 0 │
2152
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2153
+ # # │ 2021-03-01 00:00:00 ┆ A ┆ 0 │
2154
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2155
+ # # │ 2021-04-01 00:00:00 ┆ A ┆ 0 │
2156
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2157
+ # # │ 2021-05-01 00:00:00 ┆ A ┆ 2 │
2158
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2159
+ # # │ 2021-04-01 00:00:00 ┆ B ┆ 1 │
2160
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2161
+ # # │ 2021-05-01 00:00:00 ┆ B ┆ 1 │
2162
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
2163
+ # # │ 2021-06-01 00:00:00 ┆ B ┆ 3 │
2164
+ # # └─────────────────────┴────────┴────────┘
2165
+ def upsample(
2166
+ time_column:,
2167
+ every:,
2168
+ offset: nil,
2169
+ by: nil,
2170
+ maintain_order: false
2171
+ )
2172
+ if by.nil?
2173
+ by = []
2174
+ end
2175
+ if by.is_a?(String)
2176
+ by = [by]
2177
+ end
2178
+ if offset.nil?
2179
+ offset = "0ns"
2180
+ end
2181
+
2182
+ every = Utils._timedelta_to_pl_duration(every)
2183
+ offset = Utils._timedelta_to_pl_duration(offset)
2184
+
2185
+ _from_rbdf(
2186
+ _df.upsample(by, time_column, every, offset, maintain_order)
2187
+ )
2188
+ end
2189
+
2190
+ # Perform an asof join.
2191
+ #
2192
+ # This is similar to a left-join except that we match on nearest key rather than
2193
+ # equal keys.
2194
+ #
2195
+ # Both DataFrames must be sorted by the asof_join key.
2196
+ #
2197
+ # For each row in the left DataFrame:
2198
+ #
2199
+ # - A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.
2200
+ # - A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.
2201
+ #
2202
+ # The default is "backward".
2203
+ #
2204
+ # @param other [DataFrame]
2205
+ # DataFrame to join with.
2206
+ # @param left_on [String]
2207
+ # Join column of the left DataFrame.
2208
+ # @param right_on [String]
2209
+ # Join column of the right DataFrame.
2210
+ # @param on [String]
2211
+ # Join column of both DataFrames. If set, `left_on` and `right_on` should be
2212
+ # None.
2213
+ # @param by [Object]
2214
+ # join on these columns before doing asof join
2215
+ # @param by_left [Object]
2216
+ # join on these columns before doing asof join
2217
+ # @param by_right [Object]
2218
+ # join on these columns before doing asof join
2219
+ # @param strategy ["backward", "forward"]
2220
+ # Join strategy.
2221
+ # @param suffix [String]
2222
+ # Suffix to append to columns with a duplicate name.
2223
+ # @param tolerance [Object]
2224
+ # Numeric tolerance. By setting this the join will only be done if the near
2225
+ # keys are within this distance. If an asof join is done on columns of dtype
2226
+ # "Date", "Datetime", "Duration" or "Time" you use the following string
2227
+ # language:
2228
+ #
2229
+ # - 1ns (1 nanosecond)
2230
+ # - 1us (1 microsecond)
2231
+ # - 1ms (1 millisecond)
2232
+ # - 1s (1 second)
2233
+ # - 1m (1 minute)
2234
+ # - 1h (1 hour)
2235
+ # - 1d (1 day)
2236
+ # - 1w (1 week)
2237
+ # - 1mo (1 calendar month)
2238
+ # - 1y (1 calendar year)
2239
+ # - 1i (1 index count)
2240
+ #
2241
+ # Or combine them:
2242
+ # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
2243
+ #
2244
+ # @param allow_parallel [Boolean]
2245
+ # Allow the physical plan to optionally evaluate the computation of both
2246
+ # DataFrames up to the join in parallel.
2247
+ # @param force_parallel [Boolean]
2248
+ # Force the physical plan to evaluate the computation of both DataFrames up to
2249
+ # the join in parallel.
2250
+ #
2251
+ # @return [DataFrame]
2252
+ #
2253
+ # @example
2254
+ # gdp = Polars::DataFrame.new(
2255
+ # {
2256
+ # "date" => [
2257
+ # DateTime.new(2016, 1, 1),
2258
+ # DateTime.new(2017, 1, 1),
2259
+ # DateTime.new(2018, 1, 1),
2260
+ # DateTime.new(2019, 1, 1),
2261
+ # ], # note record date: Jan 1st (sorted!)
2262
+ # "gdp" => [4164, 4411, 4566, 4696]
2263
+ # }
2264
+ # )
2265
+ # population = Polars::DataFrame.new(
2266
+ # {
2267
+ # "date" => [
2268
+ # DateTime.new(2016, 5, 12),
2269
+ # DateTime.new(2017, 5, 12),
2270
+ # DateTime.new(2018, 5, 12),
2271
+ # DateTime.new(2019, 5, 12),
2272
+ # ], # note record date: May 12th (sorted!)
2273
+ # "population" => [82.19, 82.66, 83.12, 83.52]
2274
+ # }
2275
+ # )
2276
+ # population.join_asof(
2277
+ # gdp, left_on: "date", right_on: "date", strategy: "backward"
2278
+ # )
2279
+ # # =>
2280
+ # # shape: (4, 3)
2281
+ # # ┌─────────────────────┬────────────┬──────┐
2282
+ # # │ date ┆ population ┆ gdp │
2283
+ # # │ --- ┆ --- ┆ --- │
2284
+ # # │ datetime[ns] ┆ f64 ┆ i64 │
2285
+ # # ╞═════════════════════╪════════════╪══════╡
2286
+ # # │ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │
2287
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2288
+ # # │ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │
2289
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2290
+ # # │ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │
2291
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2292
+ # # │ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │
2293
+ # # └─────────────────────┴────────────┴──────┘
2294
+ def join_asof(
2295
+ other,
2296
+ left_on: nil,
2297
+ right_on: nil,
2298
+ on: nil,
2299
+ by_left: nil,
2300
+ by_right: nil,
2301
+ by: nil,
2302
+ strategy: "backward",
2303
+ suffix: "_right",
2304
+ tolerance: nil,
2305
+ allow_parallel: true,
2306
+ force_parallel: false
2307
+ )
2308
+ lazy
2309
+ .join_asof(
2310
+ other.lazy,
2311
+ left_on: left_on,
2312
+ right_on: right_on,
2313
+ on: on,
2314
+ by_left: by_left,
2315
+ by_right: by_right,
2316
+ by: by,
2317
+ strategy: strategy,
2318
+ suffix: suffix,
2319
+ tolerance: tolerance,
2320
+ allow_parallel: allow_parallel,
2321
+ force_parallel: force_parallel
2322
+ )
2323
+ .collect(no_optimization: true)
2324
+ end
2325
+
2326
+ # Join in SQL-like fashion.
2327
+ #
2328
+ # @param other [DataFrame]
2329
+ # DataFrame to join with.
2330
+ # @param left_on [Object]
2331
+ # Name(s) of the left join column(s).
2332
+ # @param right_on [Object]
2333
+ # Name(s) of the right join column(s).
2334
+ # @param on [Object]
2335
+ # Name(s) of the join columns in both DataFrames.
2336
+ # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
2337
+ # Join strategy.
2338
+ # @param suffix [String]
2339
+ # Suffix to append to columns with a duplicate name.
2340
+ #
2341
+ # @return [DataFrame]
2342
+ #
2343
+ # @example
2344
+ # df = Polars::DataFrame.new(
2345
+ # {
2346
+ # "foo" => [1, 2, 3],
2347
+ # "bar" => [6.0, 7.0, 8.0],
2348
+ # "ham" => ["a", "b", "c"]
2349
+ # }
2350
+ # )
2351
+ # other_df = Polars::DataFrame.new(
2352
+ # {
2353
+ # "apple" => ["x", "y", "z"],
2354
+ # "ham" => ["a", "b", "d"]
2355
+ # }
2356
+ # )
2357
+ # df.join(other_df, on: "ham")
2358
+ # # =>
2359
+ # # shape: (2, 4)
2360
+ # # ┌─────┬─────┬─────┬───────┐
2361
+ # # │ foo ┆ bar ┆ ham ┆ apple │
2362
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2363
+ # # │ i64 ┆ f64 ┆ str ┆ str │
2364
+ # # ╞═════╪═════╪═════╪═══════╡
2365
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
2366
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2367
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
2368
+ # # └─────┴─────┴─────┴───────┘
2369
+ #
2370
+ # @example
2371
+ # df.join(other_df, on: "ham", how: "outer")
2372
+ # # =>
2373
+ # # shape: (4, 4)
2374
+ # # ┌──────┬──────┬─────┬───────┐
2375
+ # # │ foo ┆ bar ┆ ham ┆ apple │
2376
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2377
+ # # │ i64 ┆ f64 ┆ str ┆ str │
2378
+ # # ╞══════╪══════╪═════╪═══════╡
2379
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
2380
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2381
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
2382
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2383
+ # # │ null ┆ null ┆ d ┆ z │
2384
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2385
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
2386
+ # # └──────┴──────┴─────┴───────┘
2387
+ #
2388
+ # @example
2389
+ # df.join(other_df, on: "ham", how: "left")
2390
+ # # =>
2391
+ # # shape: (3, 4)
2392
+ # # ┌─────┬─────┬─────┬───────┐
2393
+ # # │ foo ┆ bar ┆ ham ┆ apple │
2394
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2395
+ # # │ i64 ┆ f64 ┆ str ┆ str │
2396
+ # # ╞═════╪═════╪═════╪═══════╡
2397
+ # # │ 1 ┆ 6.0 ┆ a ┆ x │
2398
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2399
+ # # │ 2 ┆ 7.0 ┆ b ┆ y │
2400
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2401
+ # # │ 3 ┆ 8.0 ┆ c ┆ null │
2402
+ # # └─────┴─────┴─────┴───────┘
2403
+ #
2404
+ # @example
2405
+ # df.join(other_df, on: "ham", how: "semi")
2406
+ # # =>
2407
+ # # shape: (2, 3)
2408
+ # # ┌─────┬─────┬─────┐
2409
+ # # │ foo ┆ bar ┆ ham │
2410
+ # # │ --- ┆ --- ┆ --- │
2411
+ # # │ i64 ┆ f64 ┆ str │
2412
+ # # ╞═════╪═════╪═════╡
2413
+ # # │ 1 ┆ 6.0 ┆ a │
2414
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2415
+ # # │ 2 ┆ 7.0 ┆ b │
2416
+ # # └─────┴─────┴─────┘
2417
+ #
2418
+ # @example
2419
+ # df.join(other_df, on: "ham", how: "anti")
2420
+ # # =>
2421
+ # # shape: (1, 3)
2422
+ # # ┌─────┬─────┬─────┐
2423
+ # # │ foo ┆ bar ┆ ham │
2424
+ # # │ --- ┆ --- ┆ --- │
2425
+ # # │ i64 ┆ f64 ┆ str │
2426
+ # # ╞═════╪═════╪═════╡
2427
+ # # │ 3 ┆ 8.0 ┆ c │
2428
+ # # └─────┴─────┴─────┘
2429
+ def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
2430
+ lazy
2431
+ .join(
2432
+ other.lazy,
2433
+ left_on: left_on,
2434
+ right_on: right_on,
2435
+ on: on,
2436
+ how: how,
2437
+ suffix: suffix,
2438
+ )
2439
+ .collect(no_optimization: true)
2440
+ end
2441
+
2442
+ # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
2443
+ #
2444
+ # The UDF will receive each row as a tuple of values: `udf(row)`.
2445
+ #
2446
+ # Implementing logic using a Ruby function is almost always _significantly_
2447
+ # slower and more memory intensive than implementing the same logic using
2448
+ # the native expression API because:
2449
+ #
2450
+ # - The native expression engine runs in Rust; UDFs run in Ruby.
2451
+ # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
2452
+ # - Polars-native expressions can be parallelised (UDFs cannot).
2453
+ # - Polars-native expressions can be logically optimised (UDFs cannot).
2454
+ #
2455
+ # Wherever possible you should strongly prefer the native expression API
2456
+ # to achieve the best performance.
2457
+ #
2458
+ # @param return_dtype [Symbol]
2459
+ # Output type of the operation. If none given, Polars tries to infer the type.
2460
+ # @param inference_size [Integer]
2461
+ # Only used in the case when the custom function returns rows.
2462
+ # This uses the first `n` rows to determine the output schema
2463
+ #
2464
+ # @return [Object]
2465
+ #
2466
+ # @note
2467
+ # The frame-level `apply` cannot track column names (as the UDF is a black-box
2468
+ # that may arbitrarily drop, rearrange, transform, or add new columns); if you
2469
+ # want to apply a UDF such that column names are preserved, you should use the
2470
+ # expression-level `apply` syntax instead.
2471
+ #
2472
+ # @example
2473
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
2474
+ #
2475
+ # @example Return a DataFrame by mapping each row to a tuple:
2476
+ # df.apply { |t| [t[0] * 2, t[1] * 3] }
2477
+ # # =>
2478
+ # # shape: (3, 2)
2479
+ # # ┌──────────┬──────────┐
2480
+ # # │ column_0 ┆ column_1 │
2481
+ # # │ --- ┆ --- │
2482
+ # # │ i64 ┆ i64 │
2483
+ # # ╞══════════╪══════════╡
2484
+ # # │ 2 ┆ -3 │
2485
+ # # ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2486
+ # # │ 4 ┆ 15 │
2487
+ # # ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
2488
+ # # │ 6 ┆ 24 │
2489
+ # # └──────────┴──────────┘
2490
+ #
2491
+ # @example Return a Series by mapping each row to a scalar:
2492
+ # df.apply { |t| t[0] * 2 + t[1] }
2493
+ # # =>
2494
+ # # shape: (3, 1)
2495
+ # # ┌───────┐
2496
+ # # │ apply │
2497
+ # # │ --- │
2498
+ # # │ i64 │
2499
+ # # ╞═══════╡
2500
+ # # │ 1 │
2501
+ # # ├╌╌╌╌╌╌╌┤
2502
+ # # │ 9 │
2503
+ # # ├╌╌╌╌╌╌╌┤
2504
+ # # │ 14 │
2505
+ # # └───────┘
2506
+ def apply(return_dtype: nil, inference_size: 256, &f)
2507
+ out, is_df = _df.apply(f, return_dtype, inference_size)
2508
+ if is_df
2509
+ _from_rbdf(out)
2510
+ else
2511
+ _from_rbdf(Utils.wrap_s(out).to_frame._df)
2512
+ end
2513
+ end
2514
+
2515
+ # Return a new DataFrame with the column added or replaced.
2516
+ #
2517
+ # @param column [Object]
2518
+ # Series, where the name of the Series refers to the column in the DataFrame.
2519
+ #
2520
+ # @return [DataFrame]
2521
+ #
2522
+ # @example Added
2523
+ # df = Polars::DataFrame.new(
2524
+ # {
2525
+ # "a" => [1, 3, 5],
2526
+ # "b" => [2, 4, 6]
2527
+ # }
2528
+ # )
2529
+ # df.with_column((Polars.col("b") ** 2).alias("b_squared"))
2530
+ # # =>
2531
+ # # shape: (3, 3)
2532
+ # # ┌─────┬─────┬───────────┐
2533
+ # # │ a ┆ b ┆ b_squared │
2534
+ # # │ --- ┆ --- ┆ --- │
2535
+ # # │ i64 ┆ i64 ┆ f64 │
2536
+ # # ╞═════╪═════╪═══════════╡
2537
+ # # │ 1 ┆ 2 ┆ 4.0 │
2538
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2539
+ # # │ 3 ┆ 4 ┆ 16.0 │
2540
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
2541
+ # # │ 5 ┆ 6 ┆ 36.0 │
2542
+ # # └─────┴─────┴───────────┘
2543
+ #
2544
+ # @example Replaced
2545
+ # df.with_column(Polars.col("a") ** 2)
2546
+ # # =>
2547
+ # # shape: (3, 2)
2548
+ # # ┌──────┬─────┐
2549
+ # # │ a ┆ b │
2550
+ # # │ --- ┆ --- │
2551
+ # # │ f64 ┆ i64 │
2552
+ # # ╞══════╪═════╡
2553
+ # # │ 1.0 ┆ 2 │
2554
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
2555
+ # # │ 9.0 ┆ 4 │
2556
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
2557
+ # # │ 25.0 ┆ 6 │
2558
+ # # └──────┴─────┘
2559
+ def with_column(column)
2560
+ lazy
2561
+ .with_column(column)
2562
+ .collect(no_optimization: true, string_cache: false)
2563
+ end
2564
+
2565
+ # Return a new DataFrame grown horizontally by stacking multiple Series to it.
2566
+ #
2567
+ # @param columns [Object]
2568
+ # Series to stack.
2569
+ # @param in_place [Boolean]
2570
+ # Modify in place.
2571
+ #
2572
+ # @return [DataFrame]
2573
+ #
2574
+ # @example
2575
+ # df = Polars::DataFrame.new(
2576
+ # {
2577
+ # "foo" => [1, 2, 3],
2578
+ # "bar" => [6, 7, 8],
2579
+ # "ham" => ["a", "b", "c"]
2580
+ # }
2581
+ # )
2582
+ # x = Polars::Series.new("apple", [10, 20, 30])
2583
+ # df.hstack([x])
2584
+ # # =>
2585
+ # # shape: (3, 4)
2586
+ # # ┌─────┬─────┬─────┬───────┐
2587
+ # # │ foo ┆ bar ┆ ham ┆ apple │
2588
+ # # │ --- ┆ --- ┆ --- ┆ --- │
2589
+ # # │ i64 ┆ i64 ┆ str ┆ i64 │
2590
+ # # ╞═════╪═════╪═════╪═══════╡
2591
+ # # │ 1 ┆ 6 ┆ a ┆ 10 │
2592
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2593
+ # # │ 2 ┆ 7 ┆ b ┆ 20 │
2594
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2595
+ # # │ 3 ┆ 8 ┆ c ┆ 30 │
2596
+ # # └─────┴─────┴─────┴───────┘
2597
+ def hstack(columns, in_place: false)
2598
+ if !columns.is_a?(Array)
2599
+ columns = columns.get_columns
2600
+ end
2601
+ if in_place
2602
+ _df.hstack_mut(columns.map(&:_s))
2603
+ self
2604
+ else
2605
+ _from_rbdf(_df.hstack(columns.map(&:_s)))
2606
+ end
2607
+ end
2608
+
2609
+ # Grow this DataFrame vertically by stacking a DataFrame to it.
2610
+ #
2611
+ # @param df [DataFrame]
2612
+ # DataFrame to stack.
2613
+ # @param in_place [Boolean]
2614
+ # Modify in place
2615
+ #
2616
+ # @return [DataFrame]
2617
+ #
2618
+ # @example
2619
+ # df1 = Polars::DataFrame.new(
2620
+ # {
2621
+ # "foo" => [1, 2],
2622
+ # "bar" => [6, 7],
2623
+ # "ham" => ["a", "b"]
2624
+ # }
2625
+ # )
2626
+ # df2 = Polars::DataFrame.new(
2627
+ # {
2628
+ # "foo" => [3, 4],
2629
+ # "bar" => [8, 9],
2630
+ # "ham" => ["c", "d"]
2631
+ # }
2632
+ # )
2633
+ # df1.vstack(df2)
2634
+ # # =>
2635
+ # # shape: (4, 3)
2636
+ # # ┌─────┬─────┬─────┐
2637
+ # # │ foo ┆ bar ┆ ham │
2638
+ # # │ --- ┆ --- ┆ --- │
2639
+ # # │ i64 ┆ i64 ┆ str │
2640
+ # # ╞═════╪═════╪═════╡
2641
+ # # │ 1 ┆ 6 ┆ a │
2642
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2643
+ # # │ 2 ┆ 7 ┆ b │
2644
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2645
+ # # │ 3 ┆ 8 ┆ c │
2646
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
2647
+ # # │ 4 ┆ 9 ┆ d │
2648
+ # # └─────┴─────┴─────┘
2649
+ def vstack(df, in_place: false)
2650
+ if in_place
2651
+ _df.vstack_mut(df._df)
2652
+ self
2653
+ else
2654
+ _from_rbdf(_df.vstack(df._df))
2655
+ end
2656
+ end
2657
+
2658
+ # Extend the memory backed by this `DataFrame` with the values from `other`.
2659
+ #
2660
+ # Different from `vstack` which adds the chunks from `other` to the chunks of this
2661
+ # `DataFrame` `extend` appends the data from `other` to the underlying memory
2662
+ # locations and thus may cause a reallocation.
2663
+ #
2664
+ # If this does not cause a reallocation, the resulting data structure will not
2665
+ # have any extra chunks and thus will yield faster queries.
2666
+ #
2667
+ # Prefer `extend` over `vstack` when you want to do a query after a single append.
2668
+ # For instance during online operations where you add `n` rows and rerun a query.
2669
+ #
2670
+ # Prefer `vstack` over `extend` when you want to append many times before doing a
2671
+ # query. For instance when you read in multiple files and when to store them in a
2672
+ # single `DataFrame`. In the latter case, finish the sequence of `vstack`
2673
+ # operations with a `rechunk`.
2674
+ #
2675
+ # @param other [DataFrame]
2676
+ # DataFrame to vertically add.
2677
+ #
2678
+ # @return [DataFrame]
2679
+ #
2680
+ # @example
2681
+ # df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
2682
+ # df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
2683
+ # df1.extend(df2)
2684
+ # # =>
2685
+ # # shape: (6, 2)
2686
+ # # ┌─────┬─────┐
2687
+ # # │ foo ┆ bar │
2688
+ # # │ --- ┆ --- │
2689
+ # # │ i64 ┆ i64 │
2690
+ # # ╞═════╪═════╡
2691
+ # # │ 1 ┆ 4 │
2692
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2693
+ # # │ 2 ┆ 5 │
2694
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2695
+ # # │ 3 ┆ 6 │
2696
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2697
+ # # │ 10 ┆ 40 │
2698
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2699
+ # # │ 20 ┆ 50 │
2700
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2701
+ # # │ 30 ┆ 60 │
2702
+ # # └─────┴─────┘
2703
+ def extend(other)
2704
+ _df.extend(other._df)
2705
+ self
2706
+ end
2707
+
2708
+ # Remove column from DataFrame and return as new.
2709
+ #
2710
+ # @param columns [Object]
2711
+ # Column(s) to drop.
2712
+ #
2713
+ # @return [DataFrame]
2714
+ #
2715
+ # @example
2716
+ # df = Polars::DataFrame.new(
2717
+ # {
2718
+ # "foo" => [1, 2, 3],
2719
+ # "bar" => [6.0, 7.0, 8.0],
2720
+ # "ham" => ["a", "b", "c"]
2721
+ # }
2722
+ # )
2723
+ # df.drop("ham")
2724
+ # # =>
2725
+ # # shape: (3, 2)
2726
+ # # ┌─────┬─────┐
2727
+ # # │ foo ┆ bar │
2728
+ # # │ --- ┆ --- │
2729
+ # # │ i64 ┆ f64 │
2730
+ # # ╞═════╪═════╡
2731
+ # # │ 1 ┆ 6.0 │
2732
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2733
+ # # │ 2 ┆ 7.0 │
2734
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
2735
+ # # │ 3 ┆ 8.0 │
2736
+ # # └─────┴─────┘
2737
+ def drop(columns)
2738
+ if columns.is_a?(Array)
2739
+ df = clone
2740
+ columns.each do |n|
2741
+ df._df.drop_in_place(n)
2742
+ end
2743
+ df
2744
+ else
2745
+ _from_rbdf(_df.drop(columns))
2746
+ end
2747
+ end
2748
+
2749
+ # Drop in place.
2750
+ #
2751
+ # @param name [Object]
2752
+ # Column to drop.
2753
+ #
2754
+ # @return [Series]
2755
+ #
2756
+ # @example
2757
+ # df = Polars::DataFrame.new(
2758
+ # {
2759
+ # "foo" => [1, 2, 3],
2760
+ # "bar" => [6, 7, 8],
2761
+ # "ham" => ["a", "b", "c"]
2762
+ # }
2763
+ # )
2764
+ # df.drop_in_place("ham")
2765
+ # # =>
2766
+ # # shape: (3,)
2767
+ # # Series: 'ham' [str]
2768
+ # # [
2769
+ # # "a"
2770
+ # # "b"
2771
+ # # "c"
2772
+ # # ]
2773
+ def drop_in_place(name)
2774
+ Utils.wrap_s(_df.drop_in_place(name))
2775
+ end
2776
+
2777
+ # Create an empty copy of the current DataFrame.
2778
+ #
2779
+ # Returns a DataFrame with identical schema but no data.
2780
+ #
2781
+ # @return [DataFrame]
2782
+ #
2783
+ # @example
2784
+ # df = Polars::DataFrame.new(
2785
+ # {
2786
+ # "a" => [nil, 2, 3, 4],
2787
+ # "b" => [0.5, nil, 2.5, 13],
2788
+ # "c" => [true, true, false, nil]
2789
+ # }
2790
+ # )
2791
+ # df.cleared
2792
+ # # =>
2793
+ # # shape: (0, 3)
2794
+ # # ┌─────┬─────┬──────┐
2795
+ # # │ a ┆ b ┆ c │
2796
+ # # │ --- ┆ --- ┆ --- │
2797
+ # # │ i64 ┆ f64 ┆ bool │
2798
+ # # ╞═════╪═════╪══════╡
2799
+ # # └─────┴─────┴──────┘
2800
+ def cleared
2801
+ height > 0 ? head(0) : clone
2802
+ end
2803
+
2804
+ # clone handled by initialize_copy
2805
+
2806
+ # Get the DataFrame as a Array of Series.
2807
+ #
2808
+ # @return [Array]
2809
+ def get_columns
2810
+ _df.get_columns.map { |s| Utils.wrap_s(s) }
2811
+ end
2812
+
2813
+ # Get a single column as Series by name.
2814
+ #
2815
+ # @param name [String]
2816
+ # Name of the column to retrieve.
2817
+ #
2818
+ # @return [Series]
2819
+ #
2820
+ # @example
2821
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
2822
+ # df.get_column("foo")
2823
+ # # =>
2824
+ # # shape: (3,)
2825
+ # # Series: 'foo' [i64]
2826
+ # # [
2827
+ # # 1
2828
+ # # 2
2829
+ # # 3
2830
+ # # ]
2831
+ def get_column(name)
2832
+ self[name]
2833
+ end
2834
+
2835
+ # Fill null values using the specified value or strategy.
2836
+ #
2837
+ # @param value [Numeric]
2838
+ # Value used to fill null values.
2839
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2840
+ # Strategy used to fill null values.
2841
+ # @param limit [Integer]
2842
+ # Number of consecutive null values to fill when using the 'forward' or
2843
+ # 'backward' strategy.
2844
+ # @param matches_supertype [Boolean]
2845
+ # Fill all matching supertype of the fill `value`.
2846
+ #
2847
+ # @return [DataFrame]
2848
+ #
2849
+ # @example
2850
+ # df = Polars::DataFrame.new(
2851
+ # {
2852
+ # "a" => [1, 2, nil, 4],
2853
+ # "b" => [0.5, 4, nil, 13]
2854
+ # }
2855
+ # )
2856
+ # df.fill_null(99)
2857
+ # # =>
2858
+ # # shape: (4, 2)
2859
+ # # ┌─────┬──────┐
2860
+ # # │ a ┆ b │
2861
+ # # │ --- ┆ --- │
2862
+ # # │ i64 ┆ f64 │
2863
+ # # ╞═════╪══════╡
2864
+ # # │ 1 ┆ 0.5 │
2865
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2866
+ # # │ 2 ┆ 4.0 │
2867
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2868
+ # # │ 99 ┆ 99.0 │
2869
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2870
+ # # │ 4 ┆ 13.0 │
2871
+ # # └─────┴──────┘
2872
+ #
2873
+ # @example
2874
+ # df.fill_null(strategy: "forward")
2875
+ # # =>
2876
+ # # shape: (4, 2)
2877
+ # # ┌─────┬──────┐
2878
+ # # │ a ┆ b │
2879
+ # # │ --- ┆ --- │
2880
+ # # │ i64 ┆ f64 │
2881
+ # # ╞═════╪══════╡
2882
+ # # │ 1 ┆ 0.5 │
2883
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2884
+ # # │ 2 ┆ 4.0 │
2885
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2886
+ # # │ 2 ┆ 4.0 │
2887
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2888
+ # # │ 4 ┆ 13.0 │
2889
+ # # └─────┴──────┘
2890
+ #
2891
+ # @example
2892
+ # df.fill_null(strategy: "max")
2893
+ # # =>
2894
+ # # shape: (4, 2)
2895
+ # # ┌─────┬──────┐
2896
+ # # │ a ┆ b │
2897
+ # # │ --- ┆ --- │
2898
+ # # │ i64 ┆ f64 │
2899
+ # # ╞═════╪══════╡
2900
+ # # │ 1 ┆ 0.5 │
2901
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2902
+ # # │ 2 ┆ 4.0 │
2903
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2904
+ # # │ 4 ┆ 13.0 │
2905
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2906
+ # # │ 4 ┆ 13.0 │
2907
+ # # └─────┴──────┘
2908
+ #
2909
+ # @example
2910
+ # df.fill_null(strategy: "zero")
2911
+ # # =>
2912
+ # # shape: (4, 2)
2913
+ # # ┌─────┬──────┐
2914
+ # # │ a ┆ b │
2915
+ # # │ --- ┆ --- │
2916
+ # # │ i64 ┆ f64 │
2917
+ # # ╞═════╪══════╡
2918
+ # # │ 1 ┆ 0.5 │
2919
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2920
+ # # │ 2 ┆ 4.0 │
2921
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2922
+ # # │ 0 ┆ 0.0 │
2923
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2924
+ # # │ 4 ┆ 13.0 │
2925
+ # # └─────┴──────┘
2926
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
2927
+ _from_rbdf(
2928
+ lazy
2929
+ .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
2930
+ .collect(no_optimization: true)
2931
+ ._df
2932
+ )
2933
+ end
2934
+
2935
+ # Fill floating point NaN values by an Expression evaluation.
2936
+ #
2937
+ # @param fill_value [Object]
2938
+ # Value to fill NaN with.
2939
+ #
2940
+ # @return [DataFrame]
2941
+ #
2942
+ # @note
2943
+ # Note that floating point NaNs (Not a Number) are not missing values!
2944
+ # To replace missing values, use `fill_null`.
2945
+ #
2946
+ # @example
2947
+ # df = Polars::DataFrame.new(
2948
+ # {
2949
+ # "a" => [1.5, 2, Float::NAN, 4],
2950
+ # "b" => [0.5, 4, Float::NAN, 13]
2951
+ # }
2952
+ # )
2953
+ # df.fill_nan(99)
2954
+ # # =>
2955
+ # # shape: (4, 2)
2956
+ # # ┌──────┬──────┐
2957
+ # # │ a ┆ b │
2958
+ # # │ --- ┆ --- │
2959
+ # # │ f64 ┆ f64 │
2960
+ # # ╞══════╪══════╡
2961
+ # # │ 1.5 ┆ 0.5 │
2962
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2963
+ # # │ 2.0 ┆ 4.0 │
2964
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2965
+ # # │ 99.0 ┆ 99.0 │
2966
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
2967
+ # # │ 4.0 ┆ 13.0 │
2968
+ # # └──────┴──────┘
2969
+ def fill_nan(fill_value)
2970
+ lazy.fill_nan(fill_value).collect(no_optimization: true)
2971
+ end
2972
+
2973
+ # Explode `DataFrame` to long format by exploding a column with Lists.
2974
+ #
2975
+ # @param columns [Object]
2976
+ # Column of LargeList type.
2977
+ #
2978
+ # @return [DataFrame]
2979
+ #
2980
+ # @example
2981
+ # df = Polars::DataFrame.new(
2982
+ # {
2983
+ # "letters" => ["a", "a", "b", "c"],
2984
+ # "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
2985
+ # }
2986
+ # )
2987
+ # df.explode("numbers")
2988
+ # # =>
2989
+ # # shape: (8, 2)
2990
+ # # ┌─────────┬─────────┐
2991
+ # # │ letters ┆ numbers │
2992
+ # # │ --- ┆ --- │
2993
+ # # │ str ┆ i64 │
2994
+ # # ╞═════════╪═════════╡
2995
+ # # │ a ┆ 1 │
2996
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2997
+ # # │ a ┆ 2 │
2998
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
2999
+ # # │ a ┆ 3 │
3000
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
3001
+ # # │ b ┆ 4 │
3002
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
3003
+ # # │ b ┆ 5 │
3004
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
3005
+ # # │ c ┆ 6 │
3006
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
3007
+ # # │ c ┆ 7 │
3008
+ # # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
3009
+ # # │ c ┆ 8 │
3010
+ # # └─────────┴─────────┘
3011
+ def explode(columns)
3012
+ lazy.explode(columns).collect(no_optimization: true)
3013
+ end
3014
+
3015
+ # Create a spreadsheet-style pivot table as a DataFrame.
3016
+ #
3017
+ # @param values [Object]
3018
+ # Column values to aggregate. Can be multiple columns if the *columns*
3019
+ # arguments contains multiple columns as well
3020
+ # @param index [Object]
3021
+ # One or multiple keys to group by
3022
+ # @param columns [Object]
3023
+ # Columns whose values will be used as the header of the output DataFrame
3024
+ # @param aggregate_fn ["first", "sum", "max", "min", "mean", "median", "last", "count"]
3025
+ # A predefined aggregate function str or an expression.
3026
+ # @param maintain_order [Object]
3027
+ # Sort the grouped keys so that the output order is predictable.
3028
+ # @param sort_columns [Object]
3029
+ # Sort the transposed columns by name. Default is by order of discovery.
3030
+ #
3031
+ # @return [DataFrame]
3032
+ #
3033
+ # @example
3034
+ # df = Polars::DataFrame.new(
3035
+ # {
3036
+ # "foo" => ["one", "one", "one", "two", "two", "two"],
3037
+ # "bar" => ["A", "B", "C", "A", "B", "C"],
3038
+ # "baz" => [1, 2, 3, 4, 5, 6]
3039
+ # }
3040
+ # )
3041
+ # df.pivot(values: "baz", index: "foo", columns: "bar")
3042
+ # # =>
3043
+ # # shape: (2, 4)
3044
+ # # ┌─────┬─────┬─────┬─────┐
3045
+ # # │ foo ┆ A ┆ B ┆ C │
3046
+ # # │ --- ┆ --- ┆ --- ┆ --- │
3047
+ # # │ str ┆ i64 ┆ i64 ┆ i64 │
3048
+ # # ╞═════╪═════╪═════╪═════╡
3049
+ # # │ one ┆ 1 ┆ 2 ┆ 3 │
3050
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3051
+ # # │ two ┆ 4 ┆ 5 ┆ 6 │
3052
+ # # └─────┴─────┴─────┴─────┘
3053
+ def pivot(
3054
+ values:,
3055
+ index:,
3056
+ columns:,
3057
+ aggregate_fn: "first",
3058
+ maintain_order: true,
3059
+ sort_columns: false
3060
+ )
3061
+ if values.is_a?(String)
3062
+ values = [values]
3063
+ end
3064
+ if index.is_a?(String)
3065
+ index = [index]
3066
+ end
3067
+ if columns.is_a?(String)
3068
+ columns = [columns]
3069
+ end
3070
+
3071
+ if aggregate_fn.is_a?(String)
3072
+ case aggregate_fn
3073
+ when "first"
3074
+ aggregate_fn = Polars.element.first
3075
+ when "sum"
3076
+ aggregate_fn = Polars.element.sum
3077
+ when "max"
3078
+ aggregate_fn = Polars.element.max
3079
+ when "min"
3080
+ aggregate_fn = Polars.element.min
3081
+ when "mean"
3082
+ aggregate_fn = Polars.element.mean
3083
+ when "median"
3084
+ aggregate_fn = Polars.element.median
3085
+ when "last"
3086
+ aggregate_fn = Polars.element.last
3087
+ when "count"
3088
+ aggregate_fn = Polars.count
3089
+ else
3090
+ raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
3091
+ end
3092
+ end
3093
+
3094
+ _from_rbdf(
3095
+ _df.pivot_expr(
3096
+ values,
3097
+ index,
3098
+ columns,
3099
+ aggregate_fn._rbexpr,
3100
+ maintain_order,
3101
+ sort_columns
3102
+ )
3103
+ )
3104
+ end
3105
+
3106
+ # Unpivot a DataFrame from wide to long format.
3107
+ #
3108
+ # Optionally leaves identifiers set.
3109
+ #
3110
+ # This function is useful to massage a DataFrame into a format where one or more
3111
+ # columns are identifier variables (id_vars), while all other columns, considered
3112
+ # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
3113
+ # two non-identifier columns, 'variable' and 'value'.
3114
+ #
3115
+ # @param id_vars [Object]
3116
+ # Columns to use as identifier variables.
3117
+ # @param value_vars [Object]
3118
+ # Values to use as identifier variables.
3119
+ # If `value_vars` is empty all columns that are not in `id_vars` will be used.
3120
+ # @param variable_name [String]
3121
+ # Name to give to the `value` column. Defaults to "variable"
3122
+ # @param value_name [String]
3123
+ # Name to give to the `value` column. Defaults to "value"
3124
+ #
3125
+ # @return [DataFrame]
3126
+ #
3127
+ # @example
3128
+ # df = Polars::DataFrame.new(
3129
+ # {
3130
+ # "a" => ["x", "y", "z"],
3131
+ # "b" => [1, 3, 5],
3132
+ # "c" => [2, 4, 6]
3133
+ # }
3134
+ # )
3135
+ # df.melt(id_vars: "a", value_vars: ["b", "c"])
3136
+ # # =>
3137
+ # # shape: (6, 3)
3138
+ # # ┌─────┬──────────┬───────┐
3139
+ # # │ a ┆ variable ┆ value │
3140
+ # # │ --- ┆ --- ┆ --- │
3141
+ # # │ str ┆ str ┆ i64 │
3142
+ # # ╞═════╪══════════╪═══════╡
3143
+ # # │ x ┆ b ┆ 1 │
3144
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3145
+ # # │ y ┆ b ┆ 3 │
3146
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3147
+ # # │ z ┆ b ┆ 5 │
3148
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3149
+ # # │ x ┆ c ┆ 2 │
3150
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3151
+ # # │ y ┆ c ┆ 4 │
3152
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3153
+ # # │ z ┆ c ┆ 6 │
3154
+ # # └─────┴──────────┴───────┘
3155
+ def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
3156
+ if value_vars.is_a?(String)
3157
+ value_vars = [value_vars]
3158
+ end
3159
+ if id_vars.is_a?(String)
3160
+ id_vars = [id_vars]
3161
+ end
3162
+ if value_vars.nil?
3163
+ value_vars = []
3164
+ end
3165
+ if id_vars.nil?
3166
+ id_vars = []
3167
+ end
3168
+ _from_rbdf(
3169
+ _df.melt(id_vars, value_vars, value_name, variable_name)
3170
+ )
3171
+ end
3172
+
3173
+ # Unstack a long table to a wide form without doing an aggregation.
3174
+ #
3175
+ # This can be much faster than a pivot, because it can skip the grouping phase.
3176
+ #
3177
+ # @note
3178
+ # This functionality is experimental and may be subject to changes
3179
+ # without it being considered a breaking change.
3180
+ #
3181
+ # @param step Integer
3182
+ # Number of rows in the unstacked frame.
3183
+ # @param how ["vertical", "horizontal"]
3184
+ # Direction of the unstack.
3185
+ # @param columns [Object]
3186
+ # Column to include in the operation.
3187
+ # @param fill_values [Object]
3188
+ # Fill values that don't fit the new size with this value.
3189
+ #
3190
+ # @return [DataFrame]
3191
+ #
3192
+ # @example
3193
+ # df = Polars::DataFrame.new(
3194
+ # {
3195
+ # "col1" => "A".."I",
3196
+ # "col2" => Polars.arange(0, 9, eager: true)
3197
+ # }
3198
+ # )
3199
+ # # =>
3200
+ # # shape: (9, 2)
3201
+ # # ┌──────┬──────┐
3202
+ # # │ col1 ┆ col2 │
3203
+ # # │ --- ┆ --- │
3204
+ # # │ str ┆ i64 │
3205
+ # # ╞══════╪══════╡
3206
+ # # │ A ┆ 0 │
3207
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3208
+ # # │ B ┆ 1 │
3209
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3210
+ # # │ C ┆ 2 │
3211
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3212
+ # # │ D ┆ 3 │
3213
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3214
+ # # │ ... ┆ ... │
3215
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3216
+ # # │ F ┆ 5 │
3217
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3218
+ # # │ G ┆ 6 │
3219
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3220
+ # # │ H ┆ 7 │
3221
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3222
+ # # │ I ┆ 8 │
3223
+ # # └──────┴──────┘
3224
+ #
3225
+ # @example
3226
+ # df.unstack(step: 3, how: "vertical")
3227
+ # # =>
3228
+ # # shape: (3, 6)
3229
+ # # ┌────────┬────────┬────────┬────────┬────────┬────────┐
3230
+ # # │ col1_0 ┆ col1_1 ┆ col1_2 ┆ col2_0 ┆ col2_1 ┆ col2_2 │
3231
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3232
+ # # │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 │
3233
+ # # ╞════════╪════════╪════════╪════════╪════════╪════════╡
3234
+ # # │ A ┆ D ┆ G ┆ 0 ┆ 3 ┆ 6 │
3235
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
3236
+ # # │ B ┆ E ┆ H ┆ 1 ┆ 4 ┆ 7 │
3237
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
3238
+ # # │ C ┆ F ┆ I ┆ 2 ┆ 5 ┆ 8 │
3239
+ # # └────────┴────────┴────────┴────────┴────────┴────────┘
3240
+ #
3241
+ # @example
3242
+ # df.unstack(step: 3, how: "horizontal")
3243
+ # # =>
3244
+ # # shape: (3, 6)
3245
+ # # ┌────────┬────────┬────────┬────────┬────────┬────────┐
3246
+ # # │ col1_0 ┆ col1_1 ┆ col1_2 ┆ col2_0 ┆ col2_1 ┆ col2_2 │
3247
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3248
+ # # │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 │
3249
+ # # ╞════════╪════════╪════════╪════════╪════════╪════════╡
3250
+ # # │ A ┆ B ┆ C ┆ 0 ┆ 1 ┆ 2 │
3251
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
3252
+ # # │ D ┆ E ┆ F ┆ 3 ┆ 4 ┆ 5 │
3253
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
3254
+ # # │ G ┆ H ┆ I ┆ 6 ┆ 7 ┆ 8 │
3255
+ # # └────────┴────────┴────────┴────────┴────────┴────────┘
3256
+ def unstack(step:, how: "vertical", columns: nil, fill_values: nil)
3257
+ if !columns.nil?
3258
+ df = select(columns)
3259
+ else
3260
+ df = self
3261
+ end
3262
+
3263
+ height = df.height
3264
+ if how == "vertical"
3265
+ n_rows = step
3266
+ n_cols = (height / n_rows.to_f).ceil
3267
+ else
3268
+ n_cols = step
3269
+ n_rows = (height / n_cols.to_f).ceil
3270
+ end
3271
+
3272
+ n_fill = n_cols * n_rows - height
3273
+
3274
+ if n_fill > 0
3275
+ if !fill_values.is_a?(Array)
3276
+ fill_values = [fill_values] * df.width
3277
+ end
3278
+
3279
+ df = df.select(
3280
+ df.get_columns.zip(fill_values).map do |s, next_fill|
3281
+ s.extend_constant(next_fill, n_fill)
3282
+ end
3283
+ )
3284
+ end
3285
+
3286
+ if how == "horizontal"
3287
+ df = (
3288
+ df.with_column(
3289
+ (Polars.arange(0, n_cols * n_rows, eager: true) % n_cols).alias(
3290
+ "__sort_order"
3291
+ )
3292
+ )
3293
+ .sort("__sort_order")
3294
+ .drop("__sort_order")
3295
+ )
3296
+ end
3297
+
3298
+ zfill_val = Math.log10(n_cols).floor + 1
3299
+ slices =
3300
+ df.get_columns.flat_map do |s|
3301
+ n_cols.times.map do |slice_nbr|
3302
+ s.slice(slice_nbr * n_rows, n_rows).alias("%s_%0#{zfill_val}d" % [s.name, slice_nbr])
3303
+ end
3304
+ end
3305
+
3306
+ _from_rbdf(DataFrame.new(slices)._df)
3307
+ end
3308
+
3309
+ # Split into multiple DataFrames partitioned by groups.
3310
+ #
3311
+ # @param groups [Object]
3312
+ # Groups to partition by.
3313
+ # @param maintain_order [Boolean]
3314
+ # Keep predictable output order. This is slower as it requires an extra sort
3315
+ # operation.
3316
+ # @param as_dict [Boolean]
3317
+ # If true, return the partitions in a dictionary keyed by the distinct group
3318
+ # values instead of a list.
3319
+ #
3320
+ # @return [Object]
3321
+ #
3322
+ # @example
3323
+ # df = Polars::DataFrame.new(
3324
+ # {
3325
+ # "foo" => ["A", "A", "B", "B", "C"],
3326
+ # "N" => [1, 2, 2, 4, 2],
3327
+ # "bar" => ["k", "l", "m", "m", "l"]
3328
+ # }
3329
+ # )
3330
+ # df.partition_by("foo", maintain_order: true)
3331
+ # # =>
3332
+ # # [shape: (2, 3)
3333
+ # # ┌─────┬─────┬─────┐
3334
+ # # │ foo ┆ N ┆ bar │
3335
+ # # │ --- ┆ --- ┆ --- │
3336
+ # # │ str ┆ i64 ┆ str │
3337
+ # # ╞═════╪═════╪═════╡
3338
+ # # │ A ┆ 1 ┆ k │
3339
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3340
+ # # │ A ┆ 2 ┆ l │
3341
+ # # └─────┴─────┴─────┘, shape: (2, 3)
3342
+ # # ┌─────┬─────┬─────┐
3343
+ # # │ foo ┆ N ┆ bar │
3344
+ # # │ --- ┆ --- ┆ --- │
3345
+ # # │ str ┆ i64 ┆ str │
3346
+ # # ╞═════╪═════╪═════╡
3347
+ # # │ B ┆ 2 ┆ m │
3348
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3349
+ # # │ B ┆ 4 ┆ m │
3350
+ # # └─────┴─────┴─────┘, shape: (1, 3)
3351
+ # # ┌─────┬─────┬─────┐
3352
+ # # │ foo ┆ N ┆ bar │
3353
+ # # │ --- ┆ --- ┆ --- │
3354
+ # # │ str ┆ i64 ┆ str │
3355
+ # # ╞═════╪═════╪═════╡
3356
+ # # │ C ┆ 2 ┆ l │
3357
+ # # └─────┴─────┴─────┘]
3358
+ #
3359
+ # @example
3360
+ # df.partition_by("foo", maintain_order: true, as_dict: true)
3361
+ # # =>
3362
+ # # {"A"=>shape: (2, 3)
3363
+ # # ┌─────┬─────┬─────┐
3364
+ # # │ foo ┆ N ┆ bar │
3365
+ # # │ --- ┆ --- ┆ --- │
3366
+ # # │ str ┆ i64 ┆ str │
3367
+ # # ╞═════╪═════╪═════╡
3368
+ # # │ A ┆ 1 ┆ k │
3369
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3370
+ # # │ A ┆ 2 ┆ l │
3371
+ # # └─────┴─────┴─────┘, "B"=>shape: (2, 3)
3372
+ # # ┌─────┬─────┬─────┐
3373
+ # # │ foo ┆ N ┆ bar │
3374
+ # # │ --- ┆ --- ┆ --- │
3375
+ # # │ str ┆ i64 ┆ str │
3376
+ # # ╞═════╪═════╪═════╡
3377
+ # # │ B ┆ 2 ┆ m │
3378
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3379
+ # # │ B ┆ 4 ┆ m │
3380
+ # # └─────┴─────┴─────┘, "C"=>shape: (1, 3)
3381
+ # # ┌─────┬─────┬─────┐
3382
+ # # │ foo ┆ N ┆ bar │
3383
+ # # │ --- ┆ --- ┆ --- │
3384
+ # # │ str ┆ i64 ┆ str │
3385
+ # # ╞═════╪═════╪═════╡
3386
+ # # │ C ┆ 2 ┆ l │
3387
+ # # └─────┴─────┴─────┘}
3388
+ def partition_by(groups, maintain_order: true, as_dict: false)
3389
+ if groups.is_a?(String)
3390
+ groups = [groups]
3391
+ elsif !groups.is_a?(Array)
3392
+ groups = Array(groups)
3393
+ end
3394
+
3395
+ if as_dict
3396
+ out = {}
3397
+ if groups.length == 1
3398
+ _df.partition_by(groups, maintain_order).each do |df|
3399
+ df = _from_rbdf(df)
3400
+ out[df[groups][0, 0]] = df
3401
+ end
3402
+ else
3403
+ _df.partition_by(groups, maintain_order).each do |df|
3404
+ df = _from_rbdf(df)
3405
+ out[df[groups].row(0)] = df
3406
+ end
3407
+ end
3408
+ out
3409
+ else
3410
+ _df.partition_by(groups, maintain_order).map { |df| _from_rbdf(df) }
3411
+ end
3412
+ end
3413
+
3414
+ # Shift values by the given period.
3415
+ #
3416
+ # @param periods [Integer]
3417
+ # Number of places to shift (may be negative).
3418
+ #
3419
+ # @return [DataFrame]
3420
+ #
3421
+ # @example
3422
+ # df = Polars::DataFrame.new(
3423
+ # {
3424
+ # "foo" => [1, 2, 3],
3425
+ # "bar" => [6, 7, 8],
3426
+ # "ham" => ["a", "b", "c"]
3427
+ # }
3428
+ # )
3429
+ # df.shift(1)
3430
+ # # =>
3431
+ # # shape: (3, 3)
3432
+ # # ┌──────┬──────┬──────┐
3433
+ # # │ foo ┆ bar ┆ ham │
3434
+ # # │ --- ┆ --- ┆ --- │
3435
+ # # │ i64 ┆ i64 ┆ str │
3436
+ # # ╞══════╪══════╪══════╡
3437
+ # # │ null ┆ null ┆ null │
3438
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3439
+ # # │ 1 ┆ 6 ┆ a │
3440
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3441
+ # # │ 2 ┆ 7 ┆ b │
3442
+ # # └──────┴──────┴──────┘
3443
+ #
3444
+ # @example
3445
+ # df.shift(-1)
3446
+ # # =>
3447
+ # # shape: (3, 3)
3448
+ # # ┌──────┬──────┬──────┐
3449
+ # # │ foo ┆ bar ┆ ham │
3450
+ # # │ --- ┆ --- ┆ --- │
3451
+ # # │ i64 ┆ i64 ┆ str │
3452
+ # # ╞══════╪══════╪══════╡
3453
+ # # │ 2 ┆ 7 ┆ b │
3454
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3455
+ # # │ 3 ┆ 8 ┆ c │
3456
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
3457
+ # # │ null ┆ null ┆ null │
3458
+ # # └──────┴──────┴──────┘
3459
+ def shift(periods)
3460
+ _from_rbdf(_df.shift(periods))
3461
+ end
3462
+
3463
+ # Shift the values by a given period and fill the resulting null values.
3464
+ #
3465
+ # @param periods [Integer]
3466
+ # Number of places to shift (may be negative).
3467
+ # @param fill_value [Object]
3468
+ # fill nil values with this value.
3469
+ #
3470
+ # @return [DataFrame]
3471
+ #
3472
+ # @example
3473
+ # df = Polars::DataFrame.new(
3474
+ # {
3475
+ # "foo" => [1, 2, 3],
3476
+ # "bar" => [6, 7, 8],
3477
+ # "ham" => ["a", "b", "c"]
3478
+ # }
3479
+ # )
3480
+ # df.shift_and_fill(1, 0)
3481
+ # # =>
3482
+ # # shape: (3, 3)
3483
+ # # ┌─────┬─────┬─────┐
3484
+ # # │ foo ┆ bar ┆ ham │
3485
+ # # │ --- ┆ --- ┆ --- │
3486
+ # # │ i64 ┆ i64 ┆ str │
3487
+ # # ╞═════╪═════╪═════╡
3488
+ # # │ 0 ┆ 0 ┆ 0 │
3489
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3490
+ # # │ 1 ┆ 6 ┆ a │
3491
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3492
+ # # │ 2 ┆ 7 ┆ b │
3493
+ # # └─────┴─────┴─────┘
3494
+ def shift_and_fill(periods, fill_value)
3495
+ lazy
3496
+ .shift_and_fill(periods, fill_value)
3497
+ .collect(no_optimization: true, string_cache: false)
3498
+ end
3499
+
3500
+ # Get a mask of all duplicated rows in this DataFrame.
3501
+ #
3502
+ # @return [Series]
3503
+ #
3504
+ # @example
3505
+ # df = Polars::DataFrame.new(
3506
+ # {
3507
+ # "a" => [1, 2, 3, 1],
3508
+ # "b" => ["x", "y", "z", "x"],
3509
+ # }
3510
+ # )
3511
+ # df.is_duplicated
3512
+ # # =>
3513
+ # # shape: (4,)
3514
+ # # Series: '' [bool]
3515
+ # # [
3516
+ # # true
3517
+ # # false
3518
+ # # false
3519
+ # # true
3520
+ # # ]
3521
+ def is_duplicated
3522
+ Utils.wrap_s(_df.is_duplicated)
3523
+ end
3524
+
3525
+ # Get a mask of all unique rows in this DataFrame.
3526
+ #
3527
+ # @return [Series]
3528
+ #
3529
+ # @example
3530
+ # df = Polars::DataFrame.new(
3531
+ # {
3532
+ # "a" => [1, 2, 3, 1],
3533
+ # "b" => ["x", "y", "z", "x"]
3534
+ # }
3535
+ # )
3536
+ # df.is_unique
3537
+ # # =>
3538
+ # # shape: (4,)
3539
+ # # Series: '' [bool]
3540
+ # # [
3541
+ # # false
3542
+ # # true
3543
+ # # true
3544
+ # # false
3545
+ # # ]
3546
+ def is_unique
3547
+ Utils.wrap_s(_df.is_unique)
3548
+ end
3549
+
3550
+ # Start a lazy query from this point.
3551
+ #
3552
+ # @return [LazyFrame]
3553
+ def lazy
3554
+ wrap_ldf(_df.lazy)
3555
+ end
3556
+
3557
+ # Select columns from this DataFrame.
3558
+ #
3559
+ # @param exprs [Object]
3560
+ # Column or columns to select.
3561
+ #
3562
+ # @return [DataFrame]
3563
+ #
3564
+ # @example
3565
+ # df = Polars::DataFrame.new(
3566
+ # {
3567
+ # "foo" => [1, 2, 3],
3568
+ # "bar" => [6, 7, 8],
3569
+ # "ham" => ["a", "b", "c"]
3570
+ # }
3571
+ # )
3572
+ # df.select("foo")
3573
+ # # =>
3574
+ # # shape: (3, 1)
3575
+ # # ┌─────┐
3576
+ # # │ foo │
3577
+ # # │ --- │
3578
+ # # │ i64 │
3579
+ # # ╞═════╡
3580
+ # # │ 1 │
3581
+ # # ├╌╌╌╌╌┤
3582
+ # # │ 2 │
3583
+ # # ├╌╌╌╌╌┤
3584
+ # # │ 3 │
3585
+ # # └─────┘
3586
+ #
3587
+ # @example
3588
+ # df.select(["foo", "bar"])
3589
+ # # =>
3590
+ # # shape: (3, 2)
3591
+ # # ┌─────┬─────┐
3592
+ # # │ foo ┆ bar │
3593
+ # # │ --- ┆ --- │
3594
+ # # │ i64 ┆ i64 │
3595
+ # # ╞═════╪═════╡
3596
+ # # │ 1 ┆ 6 │
3597
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3598
+ # # │ 2 ┆ 7 │
3599
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3600
+ # # │ 3 ┆ 8 │
3601
+ # # └─────┴─────┘
3602
+ #
3603
+ # @example
3604
+ # df.select(Polars.col("foo") + 1)
3605
+ # # =>
3606
+ # # shape: (3, 1)
3607
+ # # ┌─────┐
3608
+ # # │ foo │
3609
+ # # │ --- │
3610
+ # # │ i64 │
3611
+ # # ╞═════╡
3612
+ # # │ 2 │
3613
+ # # ├╌╌╌╌╌┤
3614
+ # # │ 3 │
3615
+ # # ├╌╌╌╌╌┤
3616
+ # # │ 4 │
3617
+ # # └─────┘
3618
+ #
3619
+ # @example
3620
+ # df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
3621
+ # # =>
3622
+ # # shape: (3, 2)
3623
+ # # ┌─────┬─────┐
3624
+ # # │ foo ┆ bar │
3625
+ # # │ --- ┆ --- │
3626
+ # # │ i64 ┆ i64 │
3627
+ # # ╞═════╪═════╡
3628
+ # # │ 2 ┆ 7 │
3629
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3630
+ # # │ 3 ┆ 8 │
3631
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
3632
+ # # │ 4 ┆ 9 │
3633
+ # # └─────┴─────┘
3634
+ #
3635
+ # @example
3636
+ # df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
3637
+ # # =>
3638
+ # # shape: (3, 1)
3639
+ # # ┌─────────┐
3640
+ # # │ literal │
3641
+ # # │ --- │
3642
+ # # │ i64 │
3643
+ # # ╞═════════╡
3644
+ # # │ 0 │
3645
+ # # ├╌╌╌╌╌╌╌╌╌┤
3646
+ # # │ 0 │
3647
+ # # ├╌╌╌╌╌╌╌╌╌┤
3648
+ # # │ 10 │
3649
+ # # └─────────┘
3650
+ def select(exprs)
3651
+ _from_rbdf(
3652
+ lazy
3653
+ .select(exprs)
3654
+ .collect(no_optimization: true, string_cache: false)
3655
+ ._df
3656
+ )
3657
+ end
3658
+
3659
+ # Add or overwrite multiple columns in a DataFrame.
3660
+ #
3661
+ # @param exprs [Array]
3662
+ # Array of Expressions that evaluate to columns.
3663
+ #
3664
+ # @return [DataFrame]
3665
+ #
3666
+ # @example
3667
+ # df = Polars::DataFrame.new(
3668
+ # {
3669
+ # "a" => [1, 2, 3, 4],
3670
+ # "b" => [0.5, 4, 10, 13],
3671
+ # "c" => [true, true, false, true]
3672
+ # }
3673
+ # )
3674
+ # df.with_columns(
3675
+ # [
3676
+ # (Polars.col("a") ** 2).alias("a^2"),
3677
+ # (Polars.col("b") / 2).alias("b/2"),
3678
+ # (Polars.col("c").is_not).alias("not c")
3679
+ # ]
3680
+ # )
3681
+ # # =>
3682
+ # # shape: (4, 6)
3683
+ # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
3684
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
3685
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3686
+ # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
3687
+ # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
3688
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
3689
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3690
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
3691
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3692
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
3693
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3694
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
3695
+ # # └─────┴──────┴───────┴──────┴──────┴───────┘
3696
+ def with_columns(exprs)
3697
+ if !exprs.nil? && !exprs.is_a?(Array)
3698
+ exprs = [exprs]
3699
+ end
3700
+ lazy
3701
+ .with_columns(exprs)
3702
+ .collect(no_optimization: true, string_cache: false)
3703
+ end
3704
+
3705
+ # Get number of chunks used by the ChunkedArrays of this DataFrame.
3706
+ #
3707
+ # @param strategy ["first", "all"]
3708
+ # Return the number of chunks of the 'first' column,
3709
+ # or 'all' columns in this DataFrame.
3710
+ #
3711
+ # @return [Object]
3712
+ #
3713
+ # @example
3714
+ # df = Polars::DataFrame.new(
3715
+ # {
3716
+ # "a" => [1, 2, 3, 4],
3717
+ # "b" => [0.5, 4, 10, 13],
3718
+ # "c" => [true, true, false, true]
3719
+ # }
3720
+ # )
3721
+ # df.n_chunks
3722
+ # # => 1
3723
+ # df.n_chunks(strategy: "all")
3724
+ # # => [1, 1, 1]
3725
+ def n_chunks(strategy: "first")
3726
+ if strategy == "first"
3727
+ _df.n_chunks
3728
+ elsif strategy == "all"
3729
+ get_columns.map(&:n_chunks)
3730
+ else
3731
+ raise ArgumentError, "Strategy: '{strategy}' not understood. Choose one of {{'first', 'all'}}"
3732
+ end
3733
+ end
3734
+
3735
+ # Aggregate the columns of this DataFrame to their maximum value.
3736
+ #
3737
+ # @return [DataFrame]
3738
+ #
3739
+ # @example
3740
+ # df = Polars::DataFrame.new(
3741
+ # {
3742
+ # "foo" => [1, 2, 3],
3743
+ # "bar" => [6, 7, 8],
3744
+ # "ham" => ["a", "b", "c"]
3745
+ # }
3746
+ # )
3747
+ # df.max
3748
+ # # =>
3749
+ # # shape: (1, 3)
3750
+ # # ┌─────┬─────┬─────┐
3751
+ # # │ foo ┆ bar ┆ ham │
3752
+ # # │ --- ┆ --- ┆ --- │
3753
+ # # │ i64 ┆ i64 ┆ str │
3754
+ # # ╞═════╪═════╪═════╡
3755
+ # # │ 3 ┆ 8 ┆ c │
3756
+ # # └─────┴─────┴─────┘
3757
+ def max(axis: 0)
3758
+ if axis == 0
3759
+ _from_rbdf(_df.max)
3760
+ elsif axis == 1
3761
+ Utils.wrap_s(_df.hmax)
3762
+ else
3763
+ raise ArgumentError, "Axis should be 0 or 1."
3764
+ end
3765
+ end
3766
+
3767
+ # Aggregate the columns of this DataFrame to their minimum value.
3768
+ #
3769
+ # @return [DataFrame]
3770
+ #
3771
+ # @example
3772
+ # df = Polars::DataFrame.new(
3773
+ # {
3774
+ # "foo" => [1, 2, 3],
3775
+ # "bar" => [6, 7, 8],
3776
+ # "ham" => ["a", "b", "c"]
3777
+ # }
3778
+ # )
3779
+ # df.min
3780
+ # # =>
3781
+ # # shape: (1, 3)
3782
+ # # ┌─────┬─────┬─────┐
3783
+ # # │ foo ┆ bar ┆ ham │
3784
+ # # │ --- ┆ --- ┆ --- │
3785
+ # # │ i64 ┆ i64 ┆ str │
3786
+ # # ╞═════╪═════╪═════╡
3787
+ # # │ 1 ┆ 6 ┆ a │
3788
+ # # └─────┴─────┴─────┘
3789
+ def min(axis: 0)
3790
+ if axis == 0
3791
+ _from_rbdf(_df.min)
3792
+ elsif axis == 1
3793
+ Utils.wrap_s(_df.hmin)
3794
+ else
3795
+ raise ArgumentError, "Axis should be 0 or 1."
3796
+ end
3797
+ end
3798
+
3799
+ # Aggregate the columns of this DataFrame to their sum value.
3800
+ #
3801
+ # @param axis [Integer]
3802
+ # Either 0 or 1.
3803
+ # @param null_strategy ["ignore", "propagate"]
3804
+ # This argument is only used if axis == 1.
3805
+ #
3806
+ # @return [DataFrame]
3807
+ #
3808
+ # @example
3809
+ # df = Polars::DataFrame.new(
3810
+ # {
3811
+ # "foo" => [1, 2, 3],
3812
+ # "bar" => [6, 7, 8],
3813
+ # "ham" => ["a", "b", "c"],
3814
+ # }
3815
+ # )
3816
+ # df.sum
3817
+ # # =>
3818
+ # # shape: (1, 3)
3819
+ # # ┌─────┬─────┬──────┐
3820
+ # # │ foo ┆ bar ┆ ham │
3821
+ # # │ --- ┆ --- ┆ --- │
3822
+ # # │ i64 ┆ i64 ┆ str │
3823
+ # # ╞═════╪═════╪══════╡
3824
+ # # │ 6 ┆ 21 ┆ null │
3825
+ # # └─────┴─────┴──────┘
3826
+ #
3827
+ # @example
3828
+ # df.sum(axis: 1)
3829
+ # # =>
3830
+ # # shape: (3,)
3831
+ # # Series: 'foo' [str]
3832
+ # # [
3833
+ # # "16a"
3834
+ # # "27b"
3835
+ # # "38c"
3836
+ # # ]
3837
+ def sum(axis: 0, null_strategy: "ignore")
3838
+ case axis
3839
+ when 0
3840
+ _from_rbdf(_df.sum)
3841
+ when 1
3842
+ Utils.wrap_s(_df.hsum(null_strategy))
3843
+ else
3844
+ raise ArgumentError, "Axis should be 0 or 1."
3845
+ end
3846
+ end
3847
+
3848
+ # Aggregate the columns of this DataFrame to their mean value.
3849
+ #
3850
+ # @param axis [Integer]
3851
+ # Either 0 or 1.
3852
+ # @param null_strategy ["ignore", "propagate"]
3853
+ # This argument is only used if axis == 1.
3854
+ #
3855
+ # @return [DataFrame]
3856
+ #
3857
+ # @example
3858
+ # df = Polars::DataFrame.new(
3859
+ # {
3860
+ # "foo" => [1, 2, 3],
3861
+ # "bar" => [6, 7, 8],
3862
+ # "ham" => ["a", "b", "c"]
3863
+ # }
3864
+ # )
3865
+ # df.mean
3866
+ # # =>
3867
+ # # shape: (1, 3)
3868
+ # # ┌─────┬─────┬──────┐
3869
+ # # │ foo ┆ bar ┆ ham │
3870
+ # # │ --- ┆ --- ┆ --- │
3871
+ # # │ f64 ┆ f64 ┆ str │
3872
+ # # ╞═════╪═════╪══════╡
3873
+ # # │ 2.0 ┆ 7.0 ┆ null │
3874
+ # # └─────┴─────┴──────┘
3875
+ def mean(axis: 0, null_strategy: "ignore")
3876
+ case axis
3877
+ when 0
3878
+ _from_rbdf(_df.mean)
3879
+ when 1
3880
+ Utils.wrap_s(_df.hmean(null_strategy))
3881
+ else
3882
+ raise ArgumentError, "Axis should be 0 or 1."
3883
+ end
3884
+ end
3885
+
3886
+ # Aggregate the columns of this DataFrame to their standard deviation value.
3887
+ #
3888
+ # @param ddof [Integer]
3889
+ # Degrees of freedom
3890
+ #
3891
+ # @return [DataFrame]
3892
+ #
3893
+ # @example
3894
+ # df = Polars::DataFrame.new(
3895
+ # {
3896
+ # "foo" => [1, 2, 3],
3897
+ # "bar" => [6, 7, 8],
3898
+ # "ham" => ["a", "b", "c"]
3899
+ # }
3900
+ # )
3901
+ # df.std
3902
+ # # =>
3903
+ # # shape: (1, 3)
3904
+ # # ┌─────┬─────┬──────┐
3905
+ # # │ foo ┆ bar ┆ ham │
3906
+ # # │ --- ┆ --- ┆ --- │
3907
+ # # │ f64 ┆ f64 ┆ str │
3908
+ # # ╞═════╪═════╪══════╡
3909
+ # # │ 1.0 ┆ 1.0 ┆ null │
3910
+ # # └─────┴─────┴──────┘
3911
+ #
3912
+ # @example
3913
+ # df.std(ddof: 0)
3914
+ # # =>
3915
+ # # shape: (1, 3)
3916
+ # # ┌──────────┬──────────┬──────┐
3917
+ # # │ foo ┆ bar ┆ ham │
3918
+ # # │ --- ┆ --- ┆ --- │
3919
+ # # │ f64 ┆ f64 ┆ str │
3920
+ # # ╞══════════╪══════════╪══════╡
3921
+ # # │ 0.816497 ┆ 0.816497 ┆ null │
3922
+ # # └──────────┴──────────┴──────┘
3923
+ def std(ddof: 1)
3924
+ _from_rbdf(_df.std(ddof))
3925
+ end
3926
+
3927
+ # Aggregate the columns of this DataFrame to their variance value.
3928
+ #
3929
+ # @param ddof [Integer]
3930
+ # Degrees of freedom
3931
+ #
3932
+ # @return [DataFrame]
3933
+ #
3934
+ # @example
3935
+ # df = Polars::DataFrame.new(
3936
+ # {
3937
+ # "foo" => [1, 2, 3],
3938
+ # "bar" => [6, 7, 8],
3939
+ # "ham" => ["a", "b", "c"]
3940
+ # }
3941
+ # )
3942
+ # df.var
3943
+ # # =>
3944
+ # # shape: (1, 3)
3945
+ # # ┌─────┬─────┬──────┐
3946
+ # # │ foo ┆ bar ┆ ham │
3947
+ # # │ --- ┆ --- ┆ --- │
3948
+ # # │ f64 ┆ f64 ┆ str │
3949
+ # # ╞═════╪═════╪══════╡
3950
+ # # │ 1.0 ┆ 1.0 ┆ null │
3951
+ # # └─────┴─────┴──────┘
3952
+ #
3953
+ # @example
3954
+ # df.var(ddof: 0)
3955
+ # # =>
3956
+ # # shape: (1, 3)
3957
+ # # ┌──────────┬──────────┬──────┐
3958
+ # # │ foo ┆ bar ┆ ham │
3959
+ # # │ --- ┆ --- ┆ --- │
3960
+ # # │ f64 ┆ f64 ┆ str │
3961
+ # # ╞══════════╪══════════╪══════╡
3962
+ # # │ 0.666667 ┆ 0.666667 ┆ null │
3963
+ # # └──────────┴──────────┴──────┘
3964
+ def var(ddof: 1)
3965
+ _from_rbdf(_df.var(ddof))
3966
+ end
3967
+
3968
+ # Aggregate the columns of this DataFrame to their median value.
3969
+ #
3970
+ # @return [DataFrame]
3971
+ #
3972
+ # @example
3973
+ # df = Polars::DataFrame.new(
3974
+ # {
3975
+ # "foo" => [1, 2, 3],
3976
+ # "bar" => [6, 7, 8],
3977
+ # "ham" => ["a", "b", "c"]
3978
+ # }
3979
+ # )
3980
+ # df.median
3981
+ # # =>
3982
+ # # shape: (1, 3)
3983
+ # # ┌─────┬─────┬──────┐
3984
+ # # │ foo ┆ bar ┆ ham │
3985
+ # # │ --- ┆ --- ┆ --- │
3986
+ # # │ f64 ┆ f64 ┆ str │
3987
+ # # ╞═════╪═════╪══════╡
3988
+ # # │ 2.0 ┆ 7.0 ┆ null │
3989
+ # # └─────┴─────┴──────┘
3990
+ def median
3991
+ _from_rbdf(_df.median)
3992
+ end
3993
+
3994
+ # Aggregate the columns of this DataFrame to their product values.
3995
+ #
3996
+ # @return [DataFrame]
3997
+ #
3998
+ # @example
3999
+ # df = Polars::DataFrame.new(
4000
+ # {
4001
+ # "a" => [1, 2, 3],
4002
+ # "b" => [0.5, 4, 10],
4003
+ # "c" => [true, true, false]
4004
+ # }
4005
+ # )
4006
+ # df.product
4007
+ # # =>
4008
+ # # shape: (1, 3)
4009
+ # # ┌─────┬──────┬─────┐
4010
+ # # │ a ┆ b ┆ c │
4011
+ # # │ --- ┆ --- ┆ --- │
4012
+ # # │ i64 ┆ f64 ┆ i64 │
4013
+ # # ╞═════╪══════╪═════╡
4014
+ # # │ 6 ┆ 20.0 ┆ 0 │
4015
+ # # └─────┴──────┴─────┘
4016
+ def product
4017
+ select(Polars.all.product)
4018
+ end
4019
+
4020
+ # Aggregate the columns of this DataFrame to their quantile value.
4021
+ #
4022
+ # @param quantile [Float]
4023
+ # Quantile between 0.0 and 1.0.
4024
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
4025
+ # Interpolation method.
4026
+ #
4027
+ # @return [DataFrame]
4028
+ #
4029
+ # @example
4030
+ # df = Polars::DataFrame.new(
4031
+ # {
4032
+ # "foo" => [1, 2, 3],
4033
+ # "bar" => [6, 7, 8],
4034
+ # "ham" => ["a", "b", "c"]
4035
+ # }
4036
+ # )
4037
+ # df.quantile(0.5, interpolation: "nearest")
4038
+ # # =>
4039
+ # # shape: (1, 3)
4040
+ # # ┌─────┬─────┬──────┐
4041
+ # # │ foo ┆ bar ┆ ham │
4042
+ # # │ --- ┆ --- ┆ --- │
4043
+ # # │ f64 ┆ f64 ┆ str │
4044
+ # # ╞═════╪═════╪══════╡
4045
+ # # │ 2.0 ┆ 7.0 ┆ null │
4046
+ # # └─────┴─────┴──────┘
4047
+ def quantile(quantile, interpolation: "nearest")
4048
+ _from_rbdf(_df.quantile(quantile, interpolation))
4049
+ end
4050
+
4051
+ # Get one hot encoded dummy variables.
4052
+ #
4053
+ # @param columns
4054
+ # A subset of columns to convert to dummy variables. `nil` means
4055
+ # "all columns".
4056
+ #
4057
+ # @return [DataFrame]
4058
+ #
4059
+ # @example
4060
+ # df = Polars::DataFrame.new(
4061
+ # {
4062
+ # "foo" => [1, 2],
4063
+ # "bar" => [3, 4],
4064
+ # "ham" => ["a", "b"]
4065
+ # }
4066
+ # )
4067
+ # df.to_dummies
4068
+ # # =>
4069
+ # # shape: (2, 6)
4070
+ # # ┌───────┬───────┬───────┬───────┬───────┬───────┐
4071
+ # # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
4072
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
4073
+ # # │ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 ┆ u8 │
4074
+ # # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
4075
+ # # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
4076
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4077
+ # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
4078
+ # # └───────┴───────┴───────┴───────┴───────┴───────┘
4079
+ def to_dummies(columns: nil)
4080
+ if columns.is_a?(String)
4081
+ columns = [columns]
4082
+ end
4083
+ _from_rbdf(_df.to_dummies(columns))
4084
+ end
4085
+
4086
+ # Drop duplicate rows from this DataFrame.
4087
+ #
4088
+ # @param maintain_order [Boolean]
4089
+ # Keep the same order as the original DataFrame. This requires more work to
4090
+ # compute.
4091
+ # @param subset [Object]
4092
+ # Subset to use to compare rows.
4093
+ # @param keep ["first", "last"]
4094
+ # Which of the duplicate rows to keep (in conjunction with `subset`).
4095
+ #
4096
+ # @return [DataFrame]
4097
+ #
4098
+ # @note
4099
+ # Note that this fails if there is a column of type `List` in the DataFrame or
4100
+ # subset.
4101
+ #
4102
+ # @example
4103
+ # df = Polars::DataFrame.new(
4104
+ # {
4105
+ # "a" => [1, 1, 2, 3, 4, 5],
4106
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
4107
+ # "c" => [true, true, true, false, true, true]
4108
+ # }
4109
+ # )
4110
+ # df.unique
4111
+ # # =>
4112
+ # # shape: (5, 3)
4113
+ # # ┌─────┬─────┬───────┐
4114
+ # # │ a ┆ b ┆ c │
4115
+ # # │ --- ┆ --- ┆ --- │
4116
+ # # │ i64 ┆ f64 ┆ bool │
4117
+ # # ╞═════╪═════╪═══════╡
4118
+ # # │ 1 ┆ 0.5 ┆ true │
4119
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4120
+ # # │ 2 ┆ 1.0 ┆ true │
4121
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4122
+ # # │ 3 ┆ 2.0 ┆ false │
4123
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4124
+ # # │ 4 ┆ 3.0 ┆ true │
4125
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4126
+ # # │ 5 ┆ 3.0 ┆ true │
4127
+ # # └─────┴─────┴───────┘
4128
+ def unique(maintain_order: true, subset: nil, keep: "first")
4129
+ if !subset.nil?
4130
+ if subset.is_a?(String)
4131
+ subset = [subset]
4132
+ elsif !subset.is_a?(Array)
4133
+ subset = subset.to_a
4134
+ end
4135
+ end
4136
+
4137
+ _from_rbdf(_df.unique(maintain_order, subset, keep))
4138
+ end
4139
+
4140
+ # Return the number of unique rows, or the number of unique row-subsets.
4141
+ #
4142
+ # @param subset [Object]
4143
+ # One or more columns/expressions that define what to count;
4144
+ # omit to return the count of unique rows.
4145
+ #
4146
+ # @return [DataFrame]
4147
+ #
4148
+ # @example
4149
+ # df = Polars::DataFrame.new(
4150
+ # {
4151
+ # "a" => [1, 1, 2, 3, 4, 5],
4152
+ # "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
4153
+ # "c" => [true, true, true, false, true, true]
4154
+ # }
4155
+ # )
4156
+ # df.n_unique
4157
+ # # => 5
4158
+ #
4159
+ # @example Simple columns subset
4160
+ # df.n_unique(subset: ["b", "c"])
4161
+ # # => 4
4162
+ #
4163
+ # @example Expression subset
4164
+ # df.n_unique(
4165
+ # subset: [
4166
+ # (Polars.col("a").floordiv(2)),
4167
+ # (Polars.col("c") | (Polars.col("b") >= 2))
4168
+ # ]
4169
+ # )
4170
+ # # => 3
4171
+ def n_unique(subset: nil)
4172
+ if subset.is_a?(StringIO)
4173
+ subset = [Polars.col(subset)]
4174
+ elsif subset.is_a?(Expr)
4175
+ subset = [subset]
4176
+ end
4177
+
4178
+ if subset.is_a?(Array) && subset.length == 1
4179
+ expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
4180
+ else
4181
+ struct_fields = subset.nil? ? Polars.all : subset
4182
+ expr = Polars.struct(struct_fields)
4183
+ end
4184
+
4185
+ df = lazy.select(expr.n_unique).collect
4186
+ df.is_empty ? 0 : df.row(0)[0]
4187
+ end
4188
+
4189
+ # Rechunk the data in this DataFrame to a contiguous allocation.
4190
+
4191
+ # This will make sure all subsequent operations have optimal and predictable
4192
+ # performance.
4193
+ #
4194
+ # @return [DataFrame]
4195
+ def rechunk
4196
+ _from_rbdf(_df.rechunk)
4197
+ end
4198
+
4199
+ # Create a new DataFrame that shows the null counts per column.
4200
+ #
4201
+ # @return [DataFrame]
4202
+ #
4203
+ # @example
4204
+ # df = Polars::DataFrame.new(
4205
+ # {
4206
+ # "foo" => [1, nil, 3],
4207
+ # "bar" => [6, 7, nil],
4208
+ # "ham" => ["a", "b", "c"]
4209
+ # }
4210
+ # )
4211
+ # df.null_count
4212
+ # # =>
4213
+ # # shape: (1, 3)
4214
+ # # ┌─────┬─────┬─────┐
4215
+ # # │ foo ┆ bar ┆ ham │
4216
+ # # │ --- ┆ --- ┆ --- │
4217
+ # # │ u32 ┆ u32 ┆ u32 │
4218
+ # # ╞═════╪═════╪═════╡
4219
+ # # │ 1 ┆ 1 ┆ 0 │
4220
+ # # └─────┴─────┴─────┘
4221
+ def null_count
4222
+ _from_rbdf(_df.null_count)
4223
+ end
4224
+
4225
+ # Sample from this DataFrame.
4226
+ #
4227
+ # @param n [Integer]
4228
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
4229
+ # `frac` is nil.
4230
+ # @param frac [Float]
4231
+ # Fraction of items to return. Cannot be used with `n`.
4232
+ # @param with_replacement [Boolean]
4233
+ # Allow values to be sampled more than once.
4234
+ # @param shuffle [Boolean]
4235
+ # Shuffle the order of sampled data points.
4236
+ # @param seed [Integer]
4237
+ # Seed for the random number generator. If set to nil (default), a random
4238
+ # seed is used.
4239
+ #
4240
+ # @return [DataFrame]
4241
+ #
4242
+ # @example
4243
+ # df = Polars::DataFrame.new(
4244
+ # {
4245
+ # "foo" => [1, 2, 3],
4246
+ # "bar" => [6, 7, 8],
4247
+ # "ham" => ["a", "b", "c"]
4248
+ # }
4249
+ # )
4250
+ # df.sample(n: 2, seed: 0)
4251
+ # # =>
4252
+ # # shape: (2, 3)
4253
+ # # ┌─────┬─────┬─────┐
4254
+ # # │ foo ┆ bar ┆ ham │
4255
+ # # │ --- ┆ --- ┆ --- │
4256
+ # # │ i64 ┆ i64 ┆ str │
4257
+ # # ╞═════╪═════╪═════╡
4258
+ # # │ 3 ┆ 8 ┆ c │
4259
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
4260
+ # # │ 2 ┆ 7 ┆ b │
4261
+ # # └─────┴─────┴─────┘
4262
+ def sample(
4263
+ n: nil,
4264
+ frac: nil,
4265
+ with_replacement: false,
4266
+ shuffle: false,
4267
+ seed: nil
4268
+ )
4269
+ if !n.nil? && !frac.nil?
4270
+ raise ArgumentError, "cannot specify both `n` and `frac`"
4271
+ end
4272
+
4273
+ if n.nil? && !frac.nil?
4274
+ _from_rbdf(
4275
+ _df.sample_frac(frac, with_replacement, shuffle, seed)
4276
+ )
4277
+ end
4278
+
4279
+ if n.nil?
4280
+ n = 1
4281
+ end
4282
+ _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
4283
+ end
4284
+
4285
+ # Apply a horizontal reduction on a DataFrame.
4286
+ #
4287
+ # This can be used to effectively determine aggregations on a row level, and can
4288
+ # be applied to any DataType that can be supercasted (casted to a similar parent
4289
+ # type).
4290
+ #
4291
+ # An example of the supercast rules when applying an arithmetic operation on two
4292
+ # DataTypes are for instance:
4293
+ #
4294
+ # i8 + str = str
4295
+ # f32 + i64 = f32
4296
+ # f32 + f64 = f64
4297
+ #
4298
+ # @return [Series]
4299
+ #
4300
+ # @example A horizontal sum operation:
4301
+ # df = Polars::DataFrame.new(
4302
+ # {
4303
+ # "a" => [2, 1, 3],
4304
+ # "b" => [1, 2, 3],
4305
+ # "c" => [1.0, 2.0, 3.0]
4306
+ # }
4307
+ # )
4308
+ # df.fold { |s1, s2| s1 + s2 }
4309
+ # # =>
4310
+ # # shape: (3,)
4311
+ # # Series: 'a' [f64]
4312
+ # # [
4313
+ # # 4.0
4314
+ # # 5.0
4315
+ # # 9.0
4316
+ # # ]
4317
+ #
4318
+ # @example A horizontal minimum operation:
4319
+ # df = Polars::DataFrame.new({"a" => [2, 1, 3], "b" => [1, 2, 3], "c" => [1.0, 2.0, 3.0]})
4320
+ # df.fold { |s1, s2| s1.zip_with(s1 < s2, s2) }
4321
+ # # =>
4322
+ # # shape: (3,)
4323
+ # # Series: 'a' [f64]
4324
+ # # [
4325
+ # # 1.0
4326
+ # # 1.0
4327
+ # # 3.0
4328
+ # # ]
4329
+ #
4330
+ # @example A horizontal string concatenation:
4331
+ # df = Polars::DataFrame.new(
4332
+ # {
4333
+ # "a" => ["foo", "bar", 2],
4334
+ # "b" => [1, 2, 3],
4335
+ # "c" => [1.0, 2.0, 3.0]
4336
+ # }
4337
+ # )
4338
+ # df.fold { |s1, s2| s1 + s2 }
4339
+ # # =>
4340
+ # # shape: (3,)
4341
+ # # Series: 'a' [str]
4342
+ # # [
4343
+ # # "foo11.0"
4344
+ # # "bar22.0"
4345
+ # # null
4346
+ # # ]
4347
+ #
4348
+ # @example A horizontal boolean or, similar to a row-wise .any():
4349
+ # df = Polars::DataFrame.new(
4350
+ # {
4351
+ # "a" => [false, false, true],
4352
+ # "b" => [false, true, false]
4353
+ # }
4354
+ # )
4355
+ # df.fold { |s1, s2| s1 | s2 }
4356
+ # # =>
4357
+ # # shape: (3,)
4358
+ # # Series: 'a' [bool]
4359
+ # # [
4360
+ # # false
4361
+ # # true
4362
+ # # true
4363
+ # # ]
4364
+ def fold(&operation)
4365
+ acc = to_series(0)
4366
+
4367
+ 1.upto(width - 1) do |i|
4368
+ acc = operation.call(acc, to_series(i))
4369
+ end
4370
+ acc
4371
+ end
4372
+
4373
+ # Get a row as tuple, either by index or by predicate.
4374
+ #
4375
+ # @param index [Object]
4376
+ # Row index.
4377
+ # @param by_predicate [Object]
4378
+ # Select the row according to a given expression/predicate.
4379
+ #
4380
+ # @return [Object]
4381
+ #
4382
+ # @note
4383
+ # The `index` and `by_predicate` params are mutually exclusive. Additionally,
4384
+ # to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
4385
+ #
4386
+ # When using `by_predicate` it is an error condition if anything other than
4387
+ # one row is returned; more than one row raises `TooManyRowsReturned`, and
4388
+ # zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
4389
+ #
4390
+ # @example Return the row at the given index
4391
+ # df = Polars::DataFrame.new(
4392
+ # {
4393
+ # "foo" => [1, 2, 3],
4394
+ # "bar" => [6, 7, 8],
4395
+ # "ham" => ["a", "b", "c"]
4396
+ # }
4397
+ # )
4398
+ # df.row(2)
4399
+ # # => [3, 8, "c"]
4400
+ #
4401
+ # @example Return the row that matches the given predicate
4402
+ # df.row(by_predicate: Polars.col("ham") == "b")
4403
+ # # => [2, 7, "b"]
4404
+ def row(index = nil, by_predicate: nil)
4405
+ if !index.nil? && !by_predicate.nil?
4406
+ raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
4407
+ elsif index.is_a?(Expr)
4408
+ raise TypeError, "Expressions should be passed to the 'by_predicate' param"
4409
+ elsif index.is_a?(Integer)
4410
+ _df.row_tuple(index)
4411
+ elsif by_predicate.is_a?(Expr)
4412
+ rows = filter(by_predicate).rows
4413
+ n_rows = rows.length
4414
+ if n_rows > 1
4415
+ raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
4416
+ elsif n_rows == 0
4417
+ raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
4418
+ end
4419
+ rows[0]
4420
+ else
4421
+ raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
4422
+ end
4423
+ end
4424
+
4425
+ # Convert columnar data to rows as Ruby arrays.
4426
+ #
4427
+ # @return [Array]
4428
+ #
4429
+ # @example
4430
+ # df = Polars::DataFrame.new(
4431
+ # {
4432
+ # "a" => [1, 3, 5],
4433
+ # "b" => [2, 4, 6]
4434
+ # }
4435
+ # )
4436
+ # df.rows
4437
+ # # => [[1, 2], [3, 4], [5, 6]]
4438
+ def rows
4439
+ _df.row_tuples
4440
+ end
4441
+
4442
+ # Shrink DataFrame memory usage.
4443
+ #
4444
+ # Shrinks to fit the exact capacity needed to hold the data.
4445
+ #
4446
+ # @return [DataFrame]
4447
+ def shrink_to_fit(in_place: false)
4448
+ if in_place
4449
+ _df.shrink_to_fit
4450
+ self
4451
+ else
4452
+ df = clone
4453
+ df._df.shrink_to_fit
4454
+ df
4455
+ end
4456
+ end
4457
+
4458
+ # Take every nth row in the DataFrame and return as a new DataFrame.
4459
+ #
4460
+ # @return [DataFrame]
4461
+ #
4462
+ # @example
4463
+ # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
4464
+ # s.take_every(2)
4465
+ # # =>
4466
+ # # shape: (2, 2)
4467
+ # # ┌─────┬─────┐
4468
+ # # │ a ┆ b │
4469
+ # # │ --- ┆ --- │
4470
+ # # │ i64 ┆ i64 │
4471
+ # # ╞═════╪═════╡
4472
+ # # │ 1 ┆ 5 │
4473
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
4474
+ # # │ 3 ┆ 7 │
4475
+ # # └─────┴─────┘
4476
+ def take_every(n)
4477
+ select(Utils.col("*").take_every(n))
4478
+ end
4479
+
4480
+ # Hash and combine the rows in this DataFrame.
4481
+ #
4482
+ # The hash value is of type `:u64`.
4483
+ #
4484
+ # @param seed [Integer]
4485
+ # Random seed parameter. Defaults to 0.
4486
+ # @param seed_1 [Integer]
4487
+ # Random seed parameter. Defaults to `seed` if not set.
4488
+ # @param seed_2 [Integer]
4489
+ # Random seed parameter. Defaults to `seed` if not set.
4490
+ # @param seed_3 [Integer]
4491
+ # Random seed parameter. Defaults to `seed` if not set.
4492
+ #
4493
+ # @return [Series]
4494
+ #
4495
+ # @example
4496
+ # df = Polars::DataFrame.new(
4497
+ # {
4498
+ # "foo" => [1, nil, 3, 4],
4499
+ # "ham" => ["a", "b", nil, "d"]
4500
+ # }
4501
+ # )
4502
+ # df.hash_rows(seed: 42)
4503
+ # # =>
4504
+ # # shape: (4,)
4505
+ # # Series: '' [u64]
4506
+ # # [
4507
+ # # 4238614331852490969
4508
+ # # 17976148875586754089
4509
+ # # 4702262519505526977
4510
+ # # 18144177983981041107
4511
+ # # ]
4512
+ def hash_rows(seed: 0, seed_1: nil, seed_2: nil, seed_3: nil)
4513
+ k0 = seed
4514
+ k1 = seed_1.nil? ? seed : seed_1
4515
+ k2 = seed_2.nil? ? seed : seed_2
4516
+ k3 = seed_3.nil? ? seed : seed_3
4517
+ Utils.wrap_s(_df.hash_rows(k0, k1, k2, k3))
4518
+ end
4519
+
4520
+ # Interpolate intermediate values. The interpolation method is linear.
4521
+ #
4522
+ # @return [DataFrame]
4523
+ #
4524
+ # @example
4525
+ # df = Polars::DataFrame.new(
4526
+ # {
4527
+ # "foo" => [1, nil, 9, 10],
4528
+ # "bar" => [6, 7, 9, nil],
4529
+ # "baz" => [1, nil, nil, 9]
4530
+ # }
4531
+ # )
4532
+ # df.interpolate
4533
+ # # =>
4534
+ # # shape: (4, 3)
4535
+ # # ┌─────┬──────┬─────┐
4536
+ # # │ foo ┆ bar ┆ baz │
4537
+ # # │ --- ┆ --- ┆ --- │
4538
+ # # │ i64 ┆ i64 ┆ i64 │
4539
+ # # ╞═════╪══════╪═════╡
4540
+ # # │ 1 ┆ 6 ┆ 1 │
4541
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
4542
+ # # │ 5 ┆ 7 ┆ 3 │
4543
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
4544
+ # # │ 9 ┆ 9 ┆ 6 │
4545
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
4546
+ # # │ 10 ┆ null ┆ 9 │
4547
+ # # └─────┴──────┴─────┘
4548
+ def interpolate
4549
+ select(Utils.col("*").interpolate)
4550
+ end
4551
+
4552
+ # Check if the dataframe is empty.
4553
+ #
4554
+ # @return [Boolean]
4555
+ #
4556
+ # @example
4557
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
4558
+ # df.is_empty
4559
+ # # => false
4560
+ # df.filter(Polars.col("foo") > 99).is_empty
4561
+ # # => true
4562
+ def is_empty
4563
+ height == 0
4564
+ end
4565
+ alias_method :empty?, :is_empty
4566
+
4567
+ # Convert a `DataFrame` to a `Series` of type `Struct`.
4568
+ #
4569
+ # @param name [String]
4570
+ # Name for the struct Series
4571
+ #
4572
+ # @return [Series]
4573
+ #
4574
+ # @example
4575
+ # df = Polars::DataFrame.new(
4576
+ # {
4577
+ # "a" => [1, 2, 3, 4, 5],
4578
+ # "b" => ["one", "two", "three", "four", "five"]
4579
+ # }
4580
+ # )
4581
+ # df.to_struct("nums")
4582
+ # # =>
4583
+ # # shape: (5,)
4584
+ # # Series: 'nums' [struct[2]]
4585
+ # # [
4586
+ # # {1,"one"}
4587
+ # # {2,"two"}
4588
+ # # {3,"three"}
4589
+ # # {4,"four"}
4590
+ # # {5,"five"}
4591
+ # # ]
4592
+ def to_struct(name)
4593
+ Utils.wrap_s(_df.to_struct(name))
4594
+ end
4595
+
4596
+ # Decompose a struct into its fields.
4597
+ #
4598
+ # The fields will be inserted into the `DataFrame` on the location of the
4599
+ # `struct` type.
4600
+ #
4601
+ # @param names [Object]
4602
+ # Names of the struct columns that will be decomposed by its fields
4603
+ #
4604
+ # @return [DataFrame]
4605
+ #
4606
+ # @example
4607
+ # df = Polars::DataFrame.new(
4608
+ # {
4609
+ # "before" => ["foo", "bar"],
4610
+ # "t_a" => [1, 2],
4611
+ # "t_b" => ["a", "b"],
4612
+ # "t_c" => [true, nil],
4613
+ # "t_d" => [[1, 2], [3]],
4614
+ # "after" => ["baz", "womp"]
4615
+ # }
4616
+ # ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
4617
+ # df.unnest("t_struct")
4618
+ # # =>
4619
+ # # shape: (2, 6)
4620
+ # # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
4621
+ # # │ before ┆ t_a ┆ t_b ┆ t_c ┆ t_d ┆ after │
4622
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
4623
+ # # │ str ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str │
4624
+ # # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
4625
+ # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
4626
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
4627
+ # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
4628
+ # # └────────┴─────┴─────┴──────┴───────────┴───────┘
4629
+ def unnest(names)
4630
+ if names.is_a?(String)
4631
+ names = [names]
4632
+ end
4633
+ _from_rbdf(_df.unnest(names))
4634
+ end
4635
+
4636
+ private
4637
+
4638
+ def initialize_copy(other)
4639
+ super
4640
+ self._df = _df._clone
4641
+ end
4642
+
4643
+ def _pos_idx(idx, dim)
4644
+ if idx >= 0
4645
+ idx
4646
+ else
4647
+ shape[dim] + idx
4648
+ end
4649
+ end
4650
+
4651
+ # def _pos_idxs
4652
+ # end
4653
+
4654
+ # @private
4655
+ def self.hash_to_rbdf(data, columns: nil)
4656
+ if !columns.nil?
4657
+ columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
4658
+
4659
+ if data.empty? && dtypes
4660
+ data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
4661
+ else
4662
+ data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
4663
+ end
4664
+ data_series = _handle_columns_arg(data_series, columns: columns)
4665
+ return RbDataFrame.new(data_series)
4666
+ end
4667
+
4668
+ RbDataFrame.read_hash(data)
4669
+ end
4670
+
4671
+ # @private
4672
+ def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4673
+ if columns.is_a?(Hash)
4674
+ columns = columns.to_a
4675
+ end
4676
+ column_names =
4677
+ (columns || []).map.with_index do |col, i|
4678
+ if col.is_a?(String)
4679
+ col || "column_#{i}"
4680
+ else
4681
+ col[0]
4682
+ end
4683
+ end
4684
+ if column_names.empty? && n_expected
4685
+ column_names = n_expected.times.map { |i| "column_#{i}" }
4686
+ end
4687
+ # TODO zip_longest
4688
+ lookup = column_names.zip(lookup_names || []).to_h
4689
+
4690
+ [
4691
+ column_names,
4692
+ (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4693
+ [lookup[col[0]] || col[0], col[1]]
4694
+ end
4695
+ ]
4696
+ end
4697
+
4698
+ def self._handle_columns_arg(data, columns: nil)
4699
+ if columns.nil?
4700
+ data
4701
+ else
4702
+ if data.empty?
4703
+ columns.map { |c| Series.new(c, nil)._s }
4704
+ elsif data.length == columns.length
4705
+ columns.each_with_index do |c, i|
4706
+ # not in-place?
4707
+ data[i].rename(c)
4708
+ end
4709
+ data
4710
+ else
4711
+ raise ArgumentError, "Dimensions of columns arg must match data dimensions."
4712
+ end
4713
+ end
4714
+ end
4715
+
4716
+ # @private
4717
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4718
+ if data.length == 0
4719
+ return hash_to_rbdf({}, columns: columns)
4720
+ end
4721
+
4722
+ if data[0].is_a?(Series)
4723
+ # series_names = data.map(&:name)
4724
+ # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
4725
+ data_series = []
4726
+ data.each do |s|
4727
+ data_series << s._s
4728
+ end
4729
+ elsif data[0].is_a?(Array)
4730
+ if orient.nil? && !columns.nil?
4731
+ orient = columns.length == data.length ? "col" : "row"
4732
+ end
4733
+
4734
+ if orient == "row"
4735
+ raise Todo
4736
+ elsif orient == "col" || orient.nil?
4737
+ raise Todo
4738
+ else
4739
+ raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
4740
+ end
4741
+ end
4742
+
4743
+ data_series = _handle_columns_arg(data_series, columns: columns)
4744
+ RbDataFrame.new(data_series)
4745
+ end
4746
+
4747
+ # @private
4748
+ def self.series_to_rbdf(data, columns: nil)
4749
+ if columns
4750
+ raise Todo
4751
+ end
4752
+ RbDataFrame.new([data._s])
4753
+ end
4754
+
4755
+ def wrap_ldf(ldf)
4756
+ LazyFrame._from_rbldf(ldf)
4757
+ end
4758
+
4759
+ def _from_rbdf(rb_df)
4760
+ self.class._from_rbdf(rb_df)
4761
+ end
4762
+
4763
+ def _comp(other, op)
4764
+ if other.is_a?(DataFrame)
4765
+ _compare_to_other_df(other, op)
4766
+ else
4767
+ _compare_to_non_df(other, op)
4768
+ end
4769
+ end
4770
+
4771
+ def _compare_to_other_df(other, op)
4772
+ if columns != other.columns
4773
+ raise ArgmentError, "DataFrame columns do not match"
4774
+ end
4775
+ if shape != other.shape
4776
+ raise ArgmentError, "DataFrame dimensions do not match"
4777
+ end
4778
+
4779
+ suffix = "__POLARS_CMP_OTHER"
4780
+ other_renamed = other.select(Polars.all.suffix(suffix))
4781
+ combined = Polars.concat([self, other_renamed], how: "horizontal")
4782
+
4783
+ expr = case op
4784
+ when "eq"
4785
+ columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
4786
+ when "neq"
4787
+ columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
4788
+ when "gt"
4789
+ columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
4790
+ when "lt"
4791
+ columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
4792
+ when "gt_eq"
4793
+ columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
4794
+ when "lt_eq"
4795
+ columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
4796
+ else
4797
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
4798
+ end
4799
+
4800
+ combined.select(expr)
4801
+ end
4802
+
4803
+ def _compare_to_non_df(other, op)
4804
+ case op
4805
+ when "eq"
4806
+ select(Polars.all == other)
4807
+ when "neq"
4808
+ select(Polars.all != other)
4809
+ when "gt"
4810
+ select(Polars.all > other)
4811
+ when "lt"
4812
+ select(Polars.all < other)
4813
+ when "gt_eq"
4814
+ select(Polars.all >= other)
4815
+ when "lt_eq"
4816
+ select(Polars.all <= other)
4817
+ else
4818
+ raise ArgumentError, "got unexpected comparison operator: #{op}"
4819
+ end
4820
+ end
4821
+
4822
+ def _prepare_other_arg(other)
4823
+ if !other.is_a?(Series)
4824
+ if other.is_a?(Array)
4825
+ raise ArgumentError, "Operation not supported."
4826
+ end
4827
+
4828
+ other = Series.new("", [other])
4829
+ end
4830
+ other
4831
+ end
4832
+ end
4833
+ end