polars-df 0.21.0-x86_64-linux-musl → 0.22.0-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/LICENSE-THIRD-PARTY.txt +23 -49
- data/README.md +12 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +794 -27
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +26 -5
- data/lib/polars/date_time_expr.rb +252 -1
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1248 -206
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +14 -1
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +32 -7
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +1028 -28
- data/lib/polars/list_expr.rb +217 -17
- data/lib/polars/list_name_space.rb +231 -22
- data/lib/polars/meta_expr.rb +89 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1053 -54
- data/lib/polars/string_expr.rb +436 -32
- data/lib/polars/string_name_space.rb +736 -50
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +22 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +8 -2
@@ -0,0 +1,52 @@
|
|
1
|
+
module Polars
|
2
|
+
# A lazily instantiated `DataType` that can be used in an `Expr`.
|
3
|
+
class DataTypeExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbdatatype_expr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def self._from_rbdatatype_expr(rbdatatype_expr)
|
9
|
+
slf = new
|
10
|
+
slf._rbdatatype_expr = rbdatatype_expr
|
11
|
+
slf
|
12
|
+
end
|
13
|
+
|
14
|
+
# Materialize the `DataTypeExpr` in a specific context.
|
15
|
+
#
|
16
|
+
# This is a useful function when debugging datatype expressions.
|
17
|
+
#
|
18
|
+
# @return [DataType]
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# lf = Polars::LazyFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2, 3]
|
24
|
+
# }
|
25
|
+
# )
|
26
|
+
# Polars.dtype_of("a").collect_dtype(lf)
|
27
|
+
# # => Polars::Int64
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# Polars.dtype_of("a").collect_dtype({"a" => Polars::String})
|
31
|
+
# # => Polars::String
|
32
|
+
def collect_dtype(
|
33
|
+
context
|
34
|
+
)
|
35
|
+
schema = nil
|
36
|
+
if context.is_a?(Schema)
|
37
|
+
schema = context
|
38
|
+
elsif context.is_a?(Hash)
|
39
|
+
schema = Schema.new(context)
|
40
|
+
elsif context.is_a?(DataFrame)
|
41
|
+
schema = context.schema
|
42
|
+
elsif context.is_a?(LazyFrame)
|
43
|
+
schema = context.collect_schema
|
44
|
+
else
|
45
|
+
msg = "DataTypeExpr.collect_dtype did not expect #{context.inspect}"
|
46
|
+
raise TypeError, msg
|
47
|
+
end
|
48
|
+
|
49
|
+
_rbdatatype_expr.collect_dtype(schema.to_h)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/polars/data_types.rb
CHANGED
@@ -99,12 +99,34 @@ module Polars
|
|
99
99
|
self < NestedType
|
100
100
|
end
|
101
101
|
|
102
|
+
# Return a `DataTypeExpr` with a static `DataType`.
|
103
|
+
#
|
104
|
+
# @return [Expr]
|
105
|
+
#
|
106
|
+
# @example
|
107
|
+
# Polars::Int16.new.to_dtype_expr.collect_dtype({})
|
108
|
+
# # => Polars::Int16
|
109
|
+
def self.to_dtype_expr
|
110
|
+
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
|
111
|
+
end
|
112
|
+
|
102
113
|
[:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
|
103
114
|
define_method(v) do
|
104
115
|
self.class.public_send(v)
|
105
116
|
end
|
106
117
|
end
|
107
118
|
|
119
|
+
# Return a `DataTypeExpr` with a static `DataType`.
|
120
|
+
#
|
121
|
+
# @return [Expr]
|
122
|
+
#
|
123
|
+
# @example
|
124
|
+
# Polars::Int16.new.to_dtype_expr.collect_dtype({})
|
125
|
+
# # => Polars::Int16
|
126
|
+
def to_dtype_expr
|
127
|
+
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
|
128
|
+
end
|
129
|
+
|
108
130
|
# Returns a string representing the data type.
|
109
131
|
#
|
110
132
|
# @return [String]
|
@@ -306,17 +328,16 @@ module Polars
|
|
306
328
|
class Categories
|
307
329
|
attr_accessor :_categories
|
308
330
|
|
309
|
-
def initialize
|
310
|
-
# TODO fix
|
311
|
-
name = nil
|
331
|
+
def initialize(name = nil)
|
312
332
|
if name.nil? || name == ""
|
313
|
-
|
333
|
+
self._categories = RbCategories.global_categories
|
314
334
|
return
|
315
335
|
end
|
316
336
|
|
317
337
|
raise Todo
|
318
338
|
end
|
319
339
|
|
340
|
+
# @private
|
320
341
|
def self._from_rb_categories(rb_categories)
|
321
342
|
slf = new
|
322
343
|
slf._categories = rb_categories
|
@@ -401,7 +422,7 @@ module Polars
|
|
401
422
|
class Object < DataType
|
402
423
|
end
|
403
424
|
|
404
|
-
# Type representing Null /
|
425
|
+
# Type representing Null / nil values.
|
405
426
|
class Null < DataType
|
406
427
|
end
|
407
428
|
|
@@ -9,6 +9,57 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Offset by `n` business days.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# This functionality is considered **unstable**. It may be changed
|
16
|
+
# at any point without it being considered a breaking change.
|
17
|
+
#
|
18
|
+
# @param n
|
19
|
+
# Number of business days to offset by. Can be a single number of an
|
20
|
+
# expression.
|
21
|
+
# @param week_mask
|
22
|
+
# Which days of the week to count. The default is Monday to Friday.
|
23
|
+
# If you wanted to count only Monday to Thursday, you would pass
|
24
|
+
# `[true, true, true, true, false, false, false]`.
|
25
|
+
# @param roll
|
26
|
+
# What to do when the start date lands on a non-business day. Options are:
|
27
|
+
#
|
28
|
+
# - `'raise'`: raise an error
|
29
|
+
# - `'forward'`: move to the next business day
|
30
|
+
# - `'backward'`: move to the previous business day
|
31
|
+
#
|
32
|
+
# @return [Expr]
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]})
|
36
|
+
# df.with_columns(result: Polars.col("start").dt.add_business_days(5))
|
37
|
+
# # =>
|
38
|
+
# # shape: (2, 2)
|
39
|
+
# # ┌────────────┬────────────┐
|
40
|
+
# # │ start ┆ result │
|
41
|
+
# # │ --- ┆ --- │
|
42
|
+
# # │ date ┆ date │
|
43
|
+
# # ╞════════════╪════════════╡
|
44
|
+
# # │ 2020-01-01 ┆ 2020-01-08 │
|
45
|
+
# # │ 2020-01-02 ┆ 2020-01-09 │
|
46
|
+
# # └────────────┴────────────┘
|
47
|
+
def add_business_days(
|
48
|
+
n,
|
49
|
+
week_mask: [true, true, true, true, true, false, false],
|
50
|
+
roll: "raise"
|
51
|
+
)
|
52
|
+
n_rbexpr = Utils.parse_into_expression(n)
|
53
|
+
Utils.wrap_expr(
|
54
|
+
_rbexpr.dt_add_business_days(
|
55
|
+
n_rbexpr,
|
56
|
+
week_mask,
|
57
|
+
[],
|
58
|
+
roll
|
59
|
+
)
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
12
63
|
# Divide the date/datetime range into buckets.
|
13
64
|
#
|
14
65
|
# Each date/datetime is mapped to the start of its bucket.
|
@@ -203,6 +254,93 @@ module Polars
|
|
203
254
|
Utils.wrap_expr(_rbexpr.dt_round(every))
|
204
255
|
end
|
205
256
|
|
257
|
+
# Replace time unit.
|
258
|
+
#
|
259
|
+
# @param year [Object]
|
260
|
+
# Column or literal.
|
261
|
+
# @param month [Object]
|
262
|
+
# Column or literal, ranging from 1-12.
|
263
|
+
# @param day [Object]
|
264
|
+
# Column or literal, ranging from 1-31.
|
265
|
+
# @param hour [Object]
|
266
|
+
# Column or literal, ranging from 0-23.
|
267
|
+
# @param minute [Object]
|
268
|
+
# Column or literal, ranging from 0-59.
|
269
|
+
# @param second [Object]
|
270
|
+
# Column or literal, ranging from 0-59.
|
271
|
+
# @param microsecond [Object]
|
272
|
+
# Column or literal, ranging from 0-999999.
|
273
|
+
# @param ambiguous [String]
|
274
|
+
# Determine how to deal with ambiguous datetimes:
|
275
|
+
#
|
276
|
+
# - `'raise'` (default): raise
|
277
|
+
# - `'earliest'`: use the earliest datetime
|
278
|
+
# - `'latest'`: use the latest datetime
|
279
|
+
# - `'null'`: set to null
|
280
|
+
#
|
281
|
+
# @return [Expr]
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# df = Polars::DataFrame.new(
|
285
|
+
# {
|
286
|
+
# "date" => [Date.new(2024, 4, 1), Date.new(2025, 3, 16)],
|
287
|
+
# "new_day" => [10, 15]
|
288
|
+
# }
|
289
|
+
# )
|
290
|
+
# df.with_columns(Polars.col("date").dt.replace(day: "new_day").alias("replaced"))
|
291
|
+
# # =>
|
292
|
+
# # shape: (2, 3)
|
293
|
+
# # ┌────────────┬─────────┬────────────┐
|
294
|
+
# # │ date ┆ new_day ┆ replaced │
|
295
|
+
# # │ --- ┆ --- ┆ --- │
|
296
|
+
# # │ date ┆ i64 ┆ date │
|
297
|
+
# # ╞════════════╪═════════╪════════════╡
|
298
|
+
# # │ 2024-04-01 ┆ 10 ┆ 2024-04-10 │
|
299
|
+
# # │ 2025-03-16 ┆ 15 ┆ 2025-03-15 │
|
300
|
+
# # └────────────┴─────────┴────────────┘
|
301
|
+
#
|
302
|
+
# @example
|
303
|
+
# df.with_columns(Polars.col("date").dt.replace(year: 1800).alias("replaced"))
|
304
|
+
# # =>
|
305
|
+
# # shape: (2, 3)
|
306
|
+
# # ┌────────────┬─────────┬────────────┐
|
307
|
+
# # │ date ┆ new_day ┆ replaced │
|
308
|
+
# # │ --- ┆ --- ┆ --- │
|
309
|
+
# # │ date ┆ i64 ┆ date │
|
310
|
+
# # ╞════════════╪═════════╪════════════╡
|
311
|
+
# # │ 2024-04-01 ┆ 10 ┆ 1800-04-01 │
|
312
|
+
# # │ 2025-03-16 ┆ 15 ┆ 1800-03-16 │
|
313
|
+
# # └────────────┴─────────┴────────────┘
|
314
|
+
def replace(
|
315
|
+
year: nil,
|
316
|
+
month: nil,
|
317
|
+
day: nil,
|
318
|
+
hour: nil,
|
319
|
+
minute: nil,
|
320
|
+
second: nil,
|
321
|
+
microsecond: nil,
|
322
|
+
ambiguous: "raise"
|
323
|
+
)
|
324
|
+
day, month, year, hour, minute, second, microsecond = (
|
325
|
+
Utils.parse_into_list_of_expressions(
|
326
|
+
day, month, year, hour, minute, second, microsecond
|
327
|
+
)
|
328
|
+
)
|
329
|
+
ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
|
330
|
+
Utils.wrap_expr(
|
331
|
+
_rbexpr.dt_replace(
|
332
|
+
year,
|
333
|
+
month,
|
334
|
+
day,
|
335
|
+
hour,
|
336
|
+
minute,
|
337
|
+
second,
|
338
|
+
microsecond,
|
339
|
+
ambiguous_expr
|
340
|
+
)
|
341
|
+
)
|
342
|
+
end
|
343
|
+
|
206
344
|
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
207
345
|
#
|
208
346
|
# If the underlying expression is a Datetime then its time component is replaced,
|
@@ -317,6 +455,82 @@ module Polars
|
|
317
455
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
318
456
|
end
|
319
457
|
|
458
|
+
# Extract the millennium from underlying representation.
|
459
|
+
#
|
460
|
+
# Applies to Date and Datetime columns.
|
461
|
+
#
|
462
|
+
# Returns the millennium number in the calendar date.
|
463
|
+
#
|
464
|
+
# @return [Expr]
|
465
|
+
#
|
466
|
+
# @example
|
467
|
+
# df = Polars::DataFrame.new(
|
468
|
+
# {
|
469
|
+
# "date" => [
|
470
|
+
# Date.new(999, 12, 31),
|
471
|
+
# Date.new(1897, 5, 7),
|
472
|
+
# Date.new(2000, 1, 1),
|
473
|
+
# Date.new(2001, 7, 5),
|
474
|
+
# Date.new(3002, 10, 20)
|
475
|
+
# ]
|
476
|
+
# }
|
477
|
+
# )
|
478
|
+
# df.with_columns(mlnm: Polars.col("date").dt.millennium)
|
479
|
+
# # =>
|
480
|
+
# # shape: (5, 2)
|
481
|
+
# # ┌────────────┬──────┐
|
482
|
+
# # │ date ┆ mlnm │
|
483
|
+
# # │ --- ┆ --- │
|
484
|
+
# # │ date ┆ i32 │
|
485
|
+
# # ╞════════════╪══════╡
|
486
|
+
# # │ 0999-12-31 ┆ 1 │
|
487
|
+
# # │ 1897-05-07 ┆ 2 │
|
488
|
+
# # │ 2000-01-01 ┆ 2 │
|
489
|
+
# # │ 2001-07-05 ┆ 3 │
|
490
|
+
# # │ 3002-10-20 ┆ 4 │
|
491
|
+
# # └────────────┴──────┘
|
492
|
+
def millennium
|
493
|
+
Utils.wrap_expr(_rbexpr.dt_millennium)
|
494
|
+
end
|
495
|
+
|
496
|
+
# Extract the century from underlying representation.
|
497
|
+
#
|
498
|
+
# Applies to Date and Datetime columns.
|
499
|
+
#
|
500
|
+
# Returns the century number in the calendar date.
|
501
|
+
#
|
502
|
+
# @return [Expr]
|
503
|
+
#
|
504
|
+
# @example
|
505
|
+
# df = Polars::DataFrame.new(
|
506
|
+
# {
|
507
|
+
# "date" => [
|
508
|
+
# Date.new(999, 12, 31),
|
509
|
+
# Date.new(1897, 5, 7),
|
510
|
+
# Date.new(2000, 1, 1),
|
511
|
+
# Date.new(2001, 7, 5),
|
512
|
+
# Date.new(3002, 10, 20)
|
513
|
+
# ]
|
514
|
+
# }
|
515
|
+
# )
|
516
|
+
# df.with_columns(cent: Polars.col("date").dt.century)
|
517
|
+
# # =>
|
518
|
+
# # shape: (5, 2)
|
519
|
+
# # ┌────────────┬──────┐
|
520
|
+
# # │ date ┆ cent │
|
521
|
+
# # │ --- ┆ --- │
|
522
|
+
# # │ date ┆ i32 │
|
523
|
+
# # ╞════════════╪══════╡
|
524
|
+
# # │ 0999-12-31 ┆ 10 │
|
525
|
+
# # │ 1897-05-07 ┆ 19 │
|
526
|
+
# # │ 2000-01-01 ┆ 20 │
|
527
|
+
# # │ 2001-07-05 ┆ 21 │
|
528
|
+
# # │ 3002-10-20 ┆ 31 │
|
529
|
+
# # └────────────┴──────┘
|
530
|
+
def century
|
531
|
+
Utils.wrap_expr(_rbexpr.dt_century)
|
532
|
+
end
|
533
|
+
|
320
534
|
# Extract year from underlying Date representation.
|
321
535
|
#
|
322
536
|
# Applies to Date and Datetime columns.
|
@@ -348,6 +562,43 @@ module Polars
|
|
348
562
|
Utils.wrap_expr(_rbexpr.dt_year)
|
349
563
|
end
|
350
564
|
|
565
|
+
# Determine whether each day lands on a business day.
|
566
|
+
#
|
567
|
+
# @note
|
568
|
+
# This functionality is considered **unstable**. It may be changed
|
569
|
+
# at any point without it being considered a breaking change.
|
570
|
+
#
|
571
|
+
# @param week_mask [Array]
|
572
|
+
# Which days of the week to count. The default is Monday to Friday.
|
573
|
+
# If you wanted to count only Monday to Thursday, you would pass
|
574
|
+
# `[true, true, true, true, false, false, false]`.
|
575
|
+
#
|
576
|
+
# @return [Expr]
|
577
|
+
#
|
578
|
+
# @example
|
579
|
+
# df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 3), Date.new(2020, 1, 5)]})
|
580
|
+
# df.with_columns(is_business_day: Polars.col("start").dt.is_business_day)
|
581
|
+
# # =>
|
582
|
+
# # shape: (2, 2)
|
583
|
+
# # ┌────────────┬─────────────────┐
|
584
|
+
# # │ start ┆ is_business_day │
|
585
|
+
# # │ --- ┆ --- │
|
586
|
+
# # │ date ┆ bool │
|
587
|
+
# # ╞════════════╪═════════════════╡
|
588
|
+
# # │ 2020-01-03 ┆ true │
|
589
|
+
# # │ 2020-01-05 ┆ false │
|
590
|
+
# # └────────────┴─────────────────┘
|
591
|
+
def is_business_day(
|
592
|
+
week_mask: [true, true, true, true, true, false, false]
|
593
|
+
)
|
594
|
+
Utils.wrap_expr(
|
595
|
+
_rbexpr.dt_is_business_day(
|
596
|
+
week_mask,
|
597
|
+
[]
|
598
|
+
)
|
599
|
+
)
|
600
|
+
end
|
601
|
+
|
351
602
|
# Determine whether the year of the underlying date is a leap year.
|
352
603
|
#
|
353
604
|
# Applies to Date and Datetime columns.
|
@@ -937,7 +1188,7 @@ module Polars
|
|
937
1188
|
if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
|
938
1189
|
timestamp(time_unit)
|
939
1190
|
elsif time_unit == "s"
|
940
|
-
|
1191
|
+
timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
|
941
1192
|
elsif time_unit == "d"
|
942
1193
|
Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
|
943
1194
|
else
|