polars-df 0.21.0-x86_64-darwin → 0.22.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/LICENSE-THIRD-PARTY.txt +23 -49
  6. data/README.md +12 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/3.4/polars.bundle +0 -0
  10. data/lib/polars/array_expr.rb +382 -3
  11. data/lib/polars/array_name_space.rb +281 -0
  12. data/lib/polars/binary_expr.rb +67 -0
  13. data/lib/polars/binary_name_space.rb +43 -0
  14. data/lib/polars/cat_expr.rb +224 -0
  15. data/lib/polars/cat_name_space.rb +138 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/convert.rb +6 -6
  18. data/lib/polars/data_frame.rb +794 -27
  19. data/lib/polars/data_type_expr.rb +52 -0
  20. data/lib/polars/data_types.rb +26 -5
  21. data/lib/polars/date_time_expr.rb +252 -1
  22. data/lib/polars/date_time_name_space.rb +299 -0
  23. data/lib/polars/expr.rb +1248 -206
  24. data/lib/polars/functions/business.rb +95 -0
  25. data/lib/polars/functions/datatype.rb +21 -0
  26. data/lib/polars/functions/lazy.rb +14 -1
  27. data/lib/polars/io/csv.rb +1 -1
  28. data/lib/polars/io/iceberg.rb +27 -0
  29. data/lib/polars/io/json.rb +4 -4
  30. data/lib/polars/io/ndjson.rb +4 -4
  31. data/lib/polars/io/parquet.rb +32 -7
  32. data/lib/polars/io/scan_options.rb +4 -1
  33. data/lib/polars/lazy_frame.rb +1028 -28
  34. data/lib/polars/list_expr.rb +217 -17
  35. data/lib/polars/list_name_space.rb +231 -22
  36. data/lib/polars/meta_expr.rb +89 -0
  37. data/lib/polars/name_expr.rb +36 -0
  38. data/lib/polars/query_opt_flags.rb +50 -0
  39. data/lib/polars/scan_cast_options.rb +20 -1
  40. data/lib/polars/schema.rb +79 -3
  41. data/lib/polars/selector.rb +72 -0
  42. data/lib/polars/selectors.rb +3 -3
  43. data/lib/polars/series.rb +1053 -54
  44. data/lib/polars/string_expr.rb +436 -32
  45. data/lib/polars/string_name_space.rb +736 -50
  46. data/lib/polars/struct_expr.rb +103 -0
  47. data/lib/polars/struct_name_space.rb +19 -1
  48. data/lib/polars/utils/serde.rb +17 -0
  49. data/lib/polars/utils/various.rb +22 -1
  50. data/lib/polars/utils.rb +5 -1
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +6 -0
  53. metadata +8 -2
@@ -0,0 +1,52 @@
1
+ module Polars
2
+ # A lazily instantiated `DataType` that can be used in an `Expr`.
3
+ class DataTypeExpr
4
+ # @private
5
+ attr_accessor :_rbdatatype_expr
6
+
7
+ # @private
8
+ def self._from_rbdatatype_expr(rbdatatype_expr)
9
+ slf = new
10
+ slf._rbdatatype_expr = rbdatatype_expr
11
+ slf
12
+ end
13
+
14
+ # Materialize the `DataTypeExpr` in a specific context.
15
+ #
16
+ # This is a useful function when debugging datatype expressions.
17
+ #
18
+ # @return [DataType]
19
+ #
20
+ # @example
21
+ # lf = Polars::LazyFrame.new(
22
+ # {
23
+ # "a" => [1, 2, 3]
24
+ # }
25
+ # )
26
+ # Polars.dtype_of("a").collect_dtype(lf)
27
+ # # => Polars::Int64
28
+ #
29
+ # @example
30
+ # Polars.dtype_of("a").collect_dtype({"a" => Polars::String})
31
+ # # => Polars::String
32
+ def collect_dtype(
33
+ context
34
+ )
35
+ schema = nil
36
+ if context.is_a?(Schema)
37
+ schema = context
38
+ elsif context.is_a?(Hash)
39
+ schema = Schema.new(context)
40
+ elsif context.is_a?(DataFrame)
41
+ schema = context.schema
42
+ elsif context.is_a?(LazyFrame)
43
+ schema = context.collect_schema
44
+ else
45
+ msg = "DataTypeExpr.collect_dtype did not expect #{context.inspect}"
46
+ raise TypeError, msg
47
+ end
48
+
49
+ _rbdatatype_expr.collect_dtype(schema.to_h)
50
+ end
51
+ end
52
+ end
@@ -99,12 +99,34 @@ module Polars
99
99
  self < NestedType
100
100
  end
101
101
 
102
+ # Return a `DataTypeExpr` with a static `DataType`.
103
+ #
104
+ # @return [Expr]
105
+ #
106
+ # @example
107
+ # Polars::Int16.new.to_dtype_expr.collect_dtype({})
108
+ # # => Polars::Int16
109
+ def self.to_dtype_expr
110
+ DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
111
+ end
112
+
102
113
  [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
114
  define_method(v) do
104
115
  self.class.public_send(v)
105
116
  end
106
117
  end
107
118
 
119
+ # Return a `DataTypeExpr` with a static `DataType`.
120
+ #
121
+ # @return [Expr]
122
+ #
123
+ # @example
124
+ # Polars::Int16.new.to_dtype_expr.collect_dtype({})
125
+ # # => Polars::Int16
126
+ def to_dtype_expr
127
+ DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
128
+ end
129
+
108
130
  # Returns a string representing the data type.
109
131
  #
110
132
  # @return [String]
@@ -306,17 +328,16 @@ module Polars
306
328
  class Categories
307
329
  attr_accessor :_categories
308
330
 
309
- def initialize
310
- # TODO fix
311
- name = nil
331
+ def initialize(name = nil)
312
332
  if name.nil? || name == ""
313
- @_categories = RbCategories.global_categories
333
+ self._categories = RbCategories.global_categories
314
334
  return
315
335
  end
316
336
 
317
337
  raise Todo
318
338
  end
319
339
 
340
+ # @private
320
341
  def self._from_rb_categories(rb_categories)
321
342
  slf = new
322
343
  slf._categories = rb_categories
@@ -401,7 +422,7 @@ module Polars
401
422
  class Object < DataType
402
423
  end
403
424
 
404
- # Type representing Null / None values.
425
+ # Type representing Null / nil values.
405
426
  class Null < DataType
406
427
  end
407
428
 
@@ -9,6 +9,57 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Offset by `n` business days.
13
+ #
14
+ # @note
15
+ # This functionality is considered **unstable**. It may be changed
16
+ # at any point without it being considered a breaking change.
17
+ #
18
+ # @param n
19
+ # Number of business days to offset by. Can be a single number of an
20
+ # expression.
21
+ # @param week_mask
22
+ # Which days of the week to count. The default is Monday to Friday.
23
+ # If you wanted to count only Monday to Thursday, you would pass
24
+ # `[true, true, true, true, false, false, false]`.
25
+ # @param roll
26
+ # What to do when the start date lands on a non-business day. Options are:
27
+ #
28
+ # - `'raise'`: raise an error
29
+ # - `'forward'`: move to the next business day
30
+ # - `'backward'`: move to the previous business day
31
+ #
32
+ # @return [Expr]
33
+ #
34
+ # @example
35
+ # df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]})
36
+ # df.with_columns(result: Polars.col("start").dt.add_business_days(5))
37
+ # # =>
38
+ # # shape: (2, 2)
39
+ # # ┌────────────┬────────────┐
40
+ # # │ start ┆ result │
41
+ # # │ --- ┆ --- │
42
+ # # │ date ┆ date │
43
+ # # ╞════════════╪════════════╡
44
+ # # │ 2020-01-01 ┆ 2020-01-08 │
45
+ # # │ 2020-01-02 ┆ 2020-01-09 │
46
+ # # └────────────┴────────────┘
47
+ def add_business_days(
48
+ n,
49
+ week_mask: [true, true, true, true, true, false, false],
50
+ roll: "raise"
51
+ )
52
+ n_rbexpr = Utils.parse_into_expression(n)
53
+ Utils.wrap_expr(
54
+ _rbexpr.dt_add_business_days(
55
+ n_rbexpr,
56
+ week_mask,
57
+ [],
58
+ roll
59
+ )
60
+ )
61
+ end
62
+
12
63
  # Divide the date/datetime range into buckets.
13
64
  #
14
65
  # Each date/datetime is mapped to the start of its bucket.
@@ -203,6 +254,93 @@ module Polars
203
254
  Utils.wrap_expr(_rbexpr.dt_round(every))
204
255
  end
205
256
 
257
+ # Replace time unit.
258
+ #
259
+ # @param year [Object]
260
+ # Column or literal.
261
+ # @param month [Object]
262
+ # Column or literal, ranging from 1-12.
263
+ # @param day [Object]
264
+ # Column or literal, ranging from 1-31.
265
+ # @param hour [Object]
266
+ # Column or literal, ranging from 0-23.
267
+ # @param minute [Object]
268
+ # Column or literal, ranging from 0-59.
269
+ # @param second [Object]
270
+ # Column or literal, ranging from 0-59.
271
+ # @param microsecond [Object]
272
+ # Column or literal, ranging from 0-999999.
273
+ # @param ambiguous [String]
274
+ # Determine how to deal with ambiguous datetimes:
275
+ #
276
+ # - `'raise'` (default): raise
277
+ # - `'earliest'`: use the earliest datetime
278
+ # - `'latest'`: use the latest datetime
279
+ # - `'null'`: set to null
280
+ #
281
+ # @return [Expr]
282
+ #
283
+ # @example
284
+ # df = Polars::DataFrame.new(
285
+ # {
286
+ # "date" => [Date.new(2024, 4, 1), Date.new(2025, 3, 16)],
287
+ # "new_day" => [10, 15]
288
+ # }
289
+ # )
290
+ # df.with_columns(Polars.col("date").dt.replace(day: "new_day").alias("replaced"))
291
+ # # =>
292
+ # # shape: (2, 3)
293
+ # # ┌────────────┬─────────┬────────────┐
294
+ # # │ date ┆ new_day ┆ replaced │
295
+ # # │ --- ┆ --- ┆ --- │
296
+ # # │ date ┆ i64 ┆ date │
297
+ # # ╞════════════╪═════════╪════════════╡
298
+ # # │ 2024-04-01 ┆ 10 ┆ 2024-04-10 │
299
+ # # │ 2025-03-16 ┆ 15 ┆ 2025-03-15 │
300
+ # # └────────────┴─────────┴────────────┘
301
+ #
302
+ # @example
303
+ # df.with_columns(Polars.col("date").dt.replace(year: 1800).alias("replaced"))
304
+ # # =>
305
+ # # shape: (2, 3)
306
+ # # ┌────────────┬─────────┬────────────┐
307
+ # # │ date ┆ new_day ┆ replaced │
308
+ # # │ --- ┆ --- ┆ --- │
309
+ # # │ date ┆ i64 ┆ date │
310
+ # # ╞════════════╪═════════╪════════════╡
311
+ # # │ 2024-04-01 ┆ 10 ┆ 1800-04-01 │
312
+ # # │ 2025-03-16 ┆ 15 ┆ 1800-03-16 │
313
+ # # └────────────┴─────────┴────────────┘
314
+ def replace(
315
+ year: nil,
316
+ month: nil,
317
+ day: nil,
318
+ hour: nil,
319
+ minute: nil,
320
+ second: nil,
321
+ microsecond: nil,
322
+ ambiguous: "raise"
323
+ )
324
+ day, month, year, hour, minute, second, microsecond = (
325
+ Utils.parse_into_list_of_expressions(
326
+ day, month, year, hour, minute, second, microsecond
327
+ )
328
+ )
329
+ ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
330
+ Utils.wrap_expr(
331
+ _rbexpr.dt_replace(
332
+ year,
333
+ month,
334
+ day,
335
+ hour,
336
+ minute,
337
+ second,
338
+ microsecond,
339
+ ambiguous_expr
340
+ )
341
+ )
342
+ end
343
+
206
344
  # Create a naive Datetime from an existing Date/Datetime expression and a Time.
207
345
  #
208
346
  # If the underlying expression is a Datetime then its time component is replaced,
@@ -317,6 +455,82 @@ module Polars
317
455
  Utils.wrap_expr(_rbexpr.strftime(fmt))
318
456
  end
319
457
 
458
+ # Extract the millennium from underlying representation.
459
+ #
460
+ # Applies to Date and Datetime columns.
461
+ #
462
+ # Returns the millennium number in the calendar date.
463
+ #
464
+ # @return [Expr]
465
+ #
466
+ # @example
467
+ # df = Polars::DataFrame.new(
468
+ # {
469
+ # "date" => [
470
+ # Date.new(999, 12, 31),
471
+ # Date.new(1897, 5, 7),
472
+ # Date.new(2000, 1, 1),
473
+ # Date.new(2001, 7, 5),
474
+ # Date.new(3002, 10, 20)
475
+ # ]
476
+ # }
477
+ # )
478
+ # df.with_columns(mlnm: Polars.col("date").dt.millennium)
479
+ # # =>
480
+ # # shape: (5, 2)
481
+ # # ┌────────────┬──────┐
482
+ # # │ date ┆ mlnm │
483
+ # # │ --- ┆ --- │
484
+ # # │ date ┆ i32 │
485
+ # # ╞════════════╪══════╡
486
+ # # │ 0999-12-31 ┆ 1 │
487
+ # # │ 1897-05-07 ┆ 2 │
488
+ # # │ 2000-01-01 ┆ 2 │
489
+ # # │ 2001-07-05 ┆ 3 │
490
+ # # │ 3002-10-20 ┆ 4 │
491
+ # # └────────────┴──────┘
492
+ def millennium
493
+ Utils.wrap_expr(_rbexpr.dt_millennium)
494
+ end
495
+
496
+ # Extract the century from underlying representation.
497
+ #
498
+ # Applies to Date and Datetime columns.
499
+ #
500
+ # Returns the century number in the calendar date.
501
+ #
502
+ # @return [Expr]
503
+ #
504
+ # @example
505
+ # df = Polars::DataFrame.new(
506
+ # {
507
+ # "date" => [
508
+ # Date.new(999, 12, 31),
509
+ # Date.new(1897, 5, 7),
510
+ # Date.new(2000, 1, 1),
511
+ # Date.new(2001, 7, 5),
512
+ # Date.new(3002, 10, 20)
513
+ # ]
514
+ # }
515
+ # )
516
+ # df.with_columns(cent: Polars.col("date").dt.century)
517
+ # # =>
518
+ # # shape: (5, 2)
519
+ # # ┌────────────┬──────┐
520
+ # # │ date ┆ cent │
521
+ # # │ --- ┆ --- │
522
+ # # │ date ┆ i32 │
523
+ # # ╞════════════╪══════╡
524
+ # # │ 0999-12-31 ┆ 10 │
525
+ # # │ 1897-05-07 ┆ 19 │
526
+ # # │ 2000-01-01 ┆ 20 │
527
+ # # │ 2001-07-05 ┆ 21 │
528
+ # # │ 3002-10-20 ┆ 31 │
529
+ # # └────────────┴──────┘
530
+ def century
531
+ Utils.wrap_expr(_rbexpr.dt_century)
532
+ end
533
+
320
534
  # Extract year from underlying Date representation.
321
535
  #
322
536
  # Applies to Date and Datetime columns.
@@ -348,6 +562,43 @@ module Polars
348
562
  Utils.wrap_expr(_rbexpr.dt_year)
349
563
  end
350
564
 
565
+ # Determine whether each day lands on a business day.
566
+ #
567
+ # @note
568
+ # This functionality is considered **unstable**. It may be changed
569
+ # at any point without it being considered a breaking change.
570
+ #
571
+ # @param week_mask [Array]
572
+ # Which days of the week to count. The default is Monday to Friday.
573
+ # If you wanted to count only Monday to Thursday, you would pass
574
+ # `[true, true, true, true, false, false, false]`.
575
+ #
576
+ # @return [Expr]
577
+ #
578
+ # @example
579
+ # df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 3), Date.new(2020, 1, 5)]})
580
+ # df.with_columns(is_business_day: Polars.col("start").dt.is_business_day)
581
+ # # =>
582
+ # # shape: (2, 2)
583
+ # # ┌────────────┬─────────────────┐
584
+ # # │ start ┆ is_business_day │
585
+ # # │ --- ┆ --- │
586
+ # # │ date ┆ bool │
587
+ # # ╞════════════╪═════════════════╡
588
+ # # │ 2020-01-03 ┆ true │
589
+ # # │ 2020-01-05 ┆ false │
590
+ # # └────────────┴─────────────────┘
591
+ def is_business_day(
592
+ week_mask: [true, true, true, true, true, false, false]
593
+ )
594
+ Utils.wrap_expr(
595
+ _rbexpr.dt_is_business_day(
596
+ week_mask,
597
+ []
598
+ )
599
+ )
600
+ end
601
+
351
602
  # Determine whether the year of the underlying date is a leap year.
352
603
  #
353
604
  # Applies to Date and Datetime columns.
@@ -937,7 +1188,7 @@ module Polars
937
1188
  if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
938
1189
  timestamp(time_unit)
939
1190
  elsif time_unit == "s"
940
- Utils.wrap_expr(_rbexpr.dt_epoch_seconds)
1191
+ timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
941
1192
  elsif time_unit == "d"
942
1193
  Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
943
1194
  else