polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +2153 -2532
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +12 -2
@@ -0,0 +1,52 @@
|
|
1
|
+
module Polars
|
2
|
+
# A lazily instantiated `DataType` that can be used in an `Expr`.
|
3
|
+
class DataTypeExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbdatatype_expr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def self._from_rbdatatype_expr(rbdatatype_expr)
|
9
|
+
slf = new
|
10
|
+
slf._rbdatatype_expr = rbdatatype_expr
|
11
|
+
slf
|
12
|
+
end
|
13
|
+
|
14
|
+
# Materialize the `DataTypeExpr` in a specific context.
|
15
|
+
#
|
16
|
+
# This is a useful function when debugging datatype expressions.
|
17
|
+
#
|
18
|
+
# @return [DataType]
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# lf = Polars::LazyFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2, 3]
|
24
|
+
# }
|
25
|
+
# )
|
26
|
+
# Polars.dtype_of("a").collect_dtype(lf)
|
27
|
+
# # => Polars::Int64
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# Polars.dtype_of("a").collect_dtype({"a" => Polars::String})
|
31
|
+
# # => Polars::String
|
32
|
+
def collect_dtype(
|
33
|
+
context
|
34
|
+
)
|
35
|
+
schema = nil
|
36
|
+
if context.is_a?(Schema)
|
37
|
+
schema = context
|
38
|
+
elsif context.is_a?(Hash)
|
39
|
+
schema = Schema.new(context)
|
40
|
+
elsif context.is_a?(DataFrame)
|
41
|
+
schema = context.schema
|
42
|
+
elsif context.is_a?(LazyFrame)
|
43
|
+
schema = context.collect_schema
|
44
|
+
else
|
45
|
+
msg = "DataTypeExpr.collect_dtype did not expect #{context.inspect}"
|
46
|
+
raise TypeError, msg
|
47
|
+
end
|
48
|
+
|
49
|
+
_rbdatatype_expr.collect_dtype(schema.to_h)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/polars/data_types.rb
CHANGED
@@ -99,7 +99,18 @@ module Polars
|
|
99
99
|
self < NestedType
|
100
100
|
end
|
101
101
|
|
102
|
-
|
102
|
+
# Return a `DataTypeExpr` with a static `DataType`.
|
103
|
+
#
|
104
|
+
# @return [Expr]
|
105
|
+
#
|
106
|
+
# @example
|
107
|
+
# Polars::Int16.new.to_dtype_expr.collect_dtype({})
|
108
|
+
# # => Polars::Int16
|
109
|
+
def self.to_dtype_expr
|
110
|
+
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
|
111
|
+
end
|
112
|
+
|
113
|
+
[:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?, :to_dtype_expr].each do |v|
|
103
114
|
define_method(v) do
|
104
115
|
self.class.public_send(v)
|
105
116
|
end
|
@@ -294,12 +305,57 @@ module Polars
|
|
294
305
|
end
|
295
306
|
end
|
296
307
|
|
308
|
+
# A named collection of categories for `Categorical`.
|
309
|
+
#
|
310
|
+
# Two categories are considered equal (and will use the same physical mapping of
|
311
|
+
# categories to strings) if they have the same name, namespace and physical backing
|
312
|
+
# type, even if they are created in separate calls to `Categories`.
|
313
|
+
#
|
314
|
+
# @note
|
315
|
+
# This functionality is currently considered **unstable**. It may be
|
316
|
+
# changed at any point without it being considered a breaking change.
|
317
|
+
class Categories
|
318
|
+
attr_accessor :_categories
|
319
|
+
|
320
|
+
def initialize
|
321
|
+
# TODO fix
|
322
|
+
name = nil
|
323
|
+
if name.nil? || name == ""
|
324
|
+
@_categories = RbCategories.global_categories
|
325
|
+
return
|
326
|
+
end
|
327
|
+
|
328
|
+
raise Todo
|
329
|
+
end
|
330
|
+
|
331
|
+
# @private
|
332
|
+
def self._from_rb_categories(rb_categories)
|
333
|
+
slf = new
|
334
|
+
slf._categories = rb_categories
|
335
|
+
slf
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
297
339
|
# A categorical encoding of a set of strings.
|
298
340
|
class Categorical < DataType
|
299
|
-
attr_reader :ordering
|
341
|
+
attr_reader :ordering, :categories
|
300
342
|
|
301
|
-
def initialize(ordering = "physical")
|
302
|
-
|
343
|
+
def initialize(ordering = "physical", **kwargs)
|
344
|
+
if ordering.is_a?(Categories)
|
345
|
+
@ordering = "lexical"
|
346
|
+
@categories = ordering
|
347
|
+
# assert kwargs.length == 0
|
348
|
+
return
|
349
|
+
end
|
350
|
+
|
351
|
+
@ordering = "lexical"
|
352
|
+
if kwargs[:categories]
|
353
|
+
# assert kwargs.length == 1
|
354
|
+
@categories = kwargs[:categories]
|
355
|
+
else
|
356
|
+
# assert kwargs.length == 0
|
357
|
+
@categories = Categories.new
|
358
|
+
end
|
303
359
|
end
|
304
360
|
end
|
305
361
|
|
@@ -357,7 +413,7 @@ module Polars
|
|
357
413
|
class Object < DataType
|
358
414
|
end
|
359
415
|
|
360
|
-
# Type representing Null /
|
416
|
+
# Type representing Null / nil values.
|
361
417
|
class Null < DataType
|
362
418
|
end
|
363
419
|
|
@@ -9,6 +9,57 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Offset by `n` business days.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# This functionality is considered **unstable**. It may be changed
|
16
|
+
# at any point without it being considered a breaking change.
|
17
|
+
#
|
18
|
+
# @param n
|
19
|
+
# Number of business days to offset by. Can be a single number of an
|
20
|
+
# expression.
|
21
|
+
# @param week_mask
|
22
|
+
# Which days of the week to count. The default is Monday to Friday.
|
23
|
+
# If you wanted to count only Monday to Thursday, you would pass
|
24
|
+
# `[true, true, true, true, false, false, false]`.
|
25
|
+
# @param roll
|
26
|
+
# What to do when the start date lands on a non-business day. Options are:
|
27
|
+
#
|
28
|
+
# - `'raise'`: raise an error
|
29
|
+
# - `'forward'`: move to the next business day
|
30
|
+
# - `'backward'`: move to the previous business day
|
31
|
+
#
|
32
|
+
# @return [Expr]
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]})
|
36
|
+
# df.with_columns(result: Polars.col("start").dt.add_business_days(5))
|
37
|
+
# # =>
|
38
|
+
# # shape: (2, 2)
|
39
|
+
# # ┌────────────┬────────────┐
|
40
|
+
# # │ start ┆ result │
|
41
|
+
# # │ --- ┆ --- │
|
42
|
+
# # │ date ┆ date │
|
43
|
+
# # ╞════════════╪════════════╡
|
44
|
+
# # │ 2020-01-01 ┆ 2020-01-08 │
|
45
|
+
# # │ 2020-01-02 ┆ 2020-01-09 │
|
46
|
+
# # └────────────┴────────────┘
|
47
|
+
def add_business_days(
|
48
|
+
n,
|
49
|
+
week_mask: [true, true, true, true, true, false, false],
|
50
|
+
roll: "raise"
|
51
|
+
)
|
52
|
+
n_rbexpr = Utils.parse_into_expression(n)
|
53
|
+
Utils.wrap_expr(
|
54
|
+
_rbexpr.dt_add_business_days(
|
55
|
+
n_rbexpr,
|
56
|
+
week_mask,
|
57
|
+
[],
|
58
|
+
roll
|
59
|
+
)
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
12
63
|
# Divide the date/datetime range into buckets.
|
13
64
|
#
|
14
65
|
# Each date/datetime is mapped to the start of its bucket.
|
@@ -203,6 +254,93 @@ module Polars
|
|
203
254
|
Utils.wrap_expr(_rbexpr.dt_round(every))
|
204
255
|
end
|
205
256
|
|
257
|
+
# Replace time unit.
|
258
|
+
#
|
259
|
+
# @param year [Object]
|
260
|
+
# Column or literal.
|
261
|
+
# @param month [Object]
|
262
|
+
# Column or literal, ranging from 1-12.
|
263
|
+
# @param day [Object]
|
264
|
+
# Column or literal, ranging from 1-31.
|
265
|
+
# @param hour [Object]
|
266
|
+
# Column or literal, ranging from 0-23.
|
267
|
+
# @param minute [Object]
|
268
|
+
# Column or literal, ranging from 0-59.
|
269
|
+
# @param second [Object]
|
270
|
+
# Column or literal, ranging from 0-59.
|
271
|
+
# @param microsecond [Object]
|
272
|
+
# Column or literal, ranging from 0-999999.
|
273
|
+
# @param ambiguous [String]
|
274
|
+
# Determine how to deal with ambiguous datetimes:
|
275
|
+
#
|
276
|
+
# - `'raise'` (default): raise
|
277
|
+
# - `'earliest'`: use the earliest datetime
|
278
|
+
# - `'latest'`: use the latest datetime
|
279
|
+
# - `'null'`: set to null
|
280
|
+
#
|
281
|
+
# @return [Expr]
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# df = Polars::DataFrame.new(
|
285
|
+
# {
|
286
|
+
# "date" => [Date.new(2024, 4, 1), Date.new(2025, 3, 16)],
|
287
|
+
# "new_day" => [10, 15]
|
288
|
+
# }
|
289
|
+
# )
|
290
|
+
# df.with_columns(Polars.col("date").dt.replace(day: "new_day").alias("replaced"))
|
291
|
+
# # =>
|
292
|
+
# # shape: (2, 3)
|
293
|
+
# # ┌────────────┬─────────┬────────────┐
|
294
|
+
# # │ date ┆ new_day ┆ replaced │
|
295
|
+
# # │ --- ┆ --- ┆ --- │
|
296
|
+
# # │ date ┆ i64 ┆ date │
|
297
|
+
# # ╞════════════╪═════════╪════════════╡
|
298
|
+
# # │ 2024-04-01 ┆ 10 ┆ 2024-04-10 │
|
299
|
+
# # │ 2025-03-16 ┆ 15 ┆ 2025-03-15 │
|
300
|
+
# # └────────────┴─────────┴────────────┘
|
301
|
+
#
|
302
|
+
# @example
|
303
|
+
# df.with_columns(Polars.col("date").dt.replace(year: 1800).alias("replaced"))
|
304
|
+
# # =>
|
305
|
+
# # shape: (2, 3)
|
306
|
+
# # ┌────────────┬─────────┬────────────┐
|
307
|
+
# # │ date ┆ new_day ┆ replaced │
|
308
|
+
# # │ --- ┆ --- ┆ --- │
|
309
|
+
# # │ date ┆ i64 ┆ date │
|
310
|
+
# # ╞════════════╪═════════╪════════════╡
|
311
|
+
# # │ 2024-04-01 ┆ 10 ┆ 1800-04-01 │
|
312
|
+
# # │ 2025-03-16 ┆ 15 ┆ 1800-03-16 │
|
313
|
+
# # └────────────┴─────────┴────────────┘
|
314
|
+
def replace(
|
315
|
+
year: nil,
|
316
|
+
month: nil,
|
317
|
+
day: nil,
|
318
|
+
hour: nil,
|
319
|
+
minute: nil,
|
320
|
+
second: nil,
|
321
|
+
microsecond: nil,
|
322
|
+
ambiguous: "raise"
|
323
|
+
)
|
324
|
+
day, month, year, hour, minute, second, microsecond = (
|
325
|
+
Utils.parse_into_list_of_expressions(
|
326
|
+
day, month, year, hour, minute, second, microsecond
|
327
|
+
)
|
328
|
+
)
|
329
|
+
ambiguous_expr = Utils.parse_into_expression(ambiguous, str_as_lit: true)
|
330
|
+
Utils.wrap_expr(
|
331
|
+
_rbexpr.dt_replace(
|
332
|
+
year,
|
333
|
+
month,
|
334
|
+
day,
|
335
|
+
hour,
|
336
|
+
minute,
|
337
|
+
second,
|
338
|
+
microsecond,
|
339
|
+
ambiguous_expr
|
340
|
+
)
|
341
|
+
)
|
342
|
+
end
|
343
|
+
|
206
344
|
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
207
345
|
#
|
208
346
|
# If the underlying expression is a Datetime then its time component is replaced,
|
@@ -317,6 +455,82 @@ module Polars
|
|
317
455
|
Utils.wrap_expr(_rbexpr.strftime(fmt))
|
318
456
|
end
|
319
457
|
|
458
|
+
# Extract the millennium from underlying representation.
|
459
|
+
#
|
460
|
+
# Applies to Date and Datetime columns.
|
461
|
+
#
|
462
|
+
# Returns the millennium number in the calendar date.
|
463
|
+
#
|
464
|
+
# @return [Expr]
|
465
|
+
#
|
466
|
+
# @example
|
467
|
+
# df = Polars::DataFrame.new(
|
468
|
+
# {
|
469
|
+
# "date" => [
|
470
|
+
# Date.new(999, 12, 31),
|
471
|
+
# Date.new(1897, 5, 7),
|
472
|
+
# Date.new(2000, 1, 1),
|
473
|
+
# Date.new(2001, 7, 5),
|
474
|
+
# Date.new(3002, 10, 20)
|
475
|
+
# ]
|
476
|
+
# }
|
477
|
+
# )
|
478
|
+
# df.with_columns(mlnm: Polars.col("date").dt.millennium)
|
479
|
+
# # =>
|
480
|
+
# # shape: (5, 2)
|
481
|
+
# # ┌────────────┬──────┐
|
482
|
+
# # │ date ┆ mlnm │
|
483
|
+
# # │ --- ┆ --- │
|
484
|
+
# # │ date ┆ i32 │
|
485
|
+
# # ╞════════════╪══════╡
|
486
|
+
# # │ 0999-12-31 ┆ 1 │
|
487
|
+
# # │ 1897-05-07 ┆ 2 │
|
488
|
+
# # │ 2000-01-01 ┆ 2 │
|
489
|
+
# # │ 2001-07-05 ┆ 3 │
|
490
|
+
# # │ 3002-10-20 ┆ 4 │
|
491
|
+
# # └────────────┴──────┘
|
492
|
+
def millennium
|
493
|
+
Utils.wrap_expr(_rbexpr.dt_millennium)
|
494
|
+
end
|
495
|
+
|
496
|
+
# Extract the century from underlying representation.
|
497
|
+
#
|
498
|
+
# Applies to Date and Datetime columns.
|
499
|
+
#
|
500
|
+
# Returns the century number in the calendar date.
|
501
|
+
#
|
502
|
+
# @return [Expr]
|
503
|
+
#
|
504
|
+
# @example
|
505
|
+
# df = Polars::DataFrame.new(
|
506
|
+
# {
|
507
|
+
# "date" => [
|
508
|
+
# Date.new(999, 12, 31),
|
509
|
+
# Date.new(1897, 5, 7),
|
510
|
+
# Date.new(2000, 1, 1),
|
511
|
+
# Date.new(2001, 7, 5),
|
512
|
+
# Date.new(3002, 10, 20)
|
513
|
+
# ]
|
514
|
+
# }
|
515
|
+
# )
|
516
|
+
# df.with_columns(cent: Polars.col("date").dt.century)
|
517
|
+
# # =>
|
518
|
+
# # shape: (5, 2)
|
519
|
+
# # ┌────────────┬──────┐
|
520
|
+
# # │ date ┆ cent │
|
521
|
+
# # │ --- ┆ --- │
|
522
|
+
# # │ date ┆ i32 │
|
523
|
+
# # ╞════════════╪══════╡
|
524
|
+
# # │ 0999-12-31 ┆ 10 │
|
525
|
+
# # │ 1897-05-07 ┆ 19 │
|
526
|
+
# # │ 2000-01-01 ┆ 20 │
|
527
|
+
# # │ 2001-07-05 ┆ 21 │
|
528
|
+
# # │ 3002-10-20 ┆ 31 │
|
529
|
+
# # └────────────┴──────┘
|
530
|
+
def century
|
531
|
+
Utils.wrap_expr(_rbexpr.dt_century)
|
532
|
+
end
|
533
|
+
|
320
534
|
# Extract year from underlying Date representation.
|
321
535
|
#
|
322
536
|
# Applies to Date and Datetime columns.
|
@@ -348,6 +562,43 @@ module Polars
|
|
348
562
|
Utils.wrap_expr(_rbexpr.dt_year)
|
349
563
|
end
|
350
564
|
|
565
|
+
# Determine whether each day lands on a business day.
|
566
|
+
#
|
567
|
+
# @note
|
568
|
+
# This functionality is considered **unstable**. It may be changed
|
569
|
+
# at any point without it being considered a breaking change.
|
570
|
+
#
|
571
|
+
# @param week_mask [Array]
|
572
|
+
# Which days of the week to count. The default is Monday to Friday.
|
573
|
+
# If you wanted to count only Monday to Thursday, you would pass
|
574
|
+
# `[true, true, true, true, false, false, false]`.
|
575
|
+
#
|
576
|
+
# @return [Expr]
|
577
|
+
#
|
578
|
+
# @example
|
579
|
+
# df = Polars::DataFrame.new({"start" => [Date.new(2020, 1, 3), Date.new(2020, 1, 5)]})
|
580
|
+
# df.with_columns(is_business_day: Polars.col("start").dt.is_business_day)
|
581
|
+
# # =>
|
582
|
+
# # shape: (2, 2)
|
583
|
+
# # ┌────────────┬─────────────────┐
|
584
|
+
# # │ start ┆ is_business_day │
|
585
|
+
# # │ --- ┆ --- │
|
586
|
+
# # │ date ┆ bool │
|
587
|
+
# # ╞════════════╪═════════════════╡
|
588
|
+
# # │ 2020-01-03 ┆ true │
|
589
|
+
# # │ 2020-01-05 ┆ false │
|
590
|
+
# # └────────────┴─────────────────┘
|
591
|
+
def is_business_day(
|
592
|
+
week_mask: [true, true, true, true, true, false, false]
|
593
|
+
)
|
594
|
+
Utils.wrap_expr(
|
595
|
+
_rbexpr.dt_is_business_day(
|
596
|
+
week_mask,
|
597
|
+
[]
|
598
|
+
)
|
599
|
+
)
|
600
|
+
end
|
601
|
+
|
351
602
|
# Determine whether the year of the underlying date is a leap year.
|
352
603
|
#
|
353
604
|
# Applies to Date and Datetime columns.
|