polars-df 0.21.1-x86_64-darwin → 0.23.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +120 -90
- data/Cargo.toml +3 -0
- data/LICENSE-THIRD-PARTY.txt +806 -381
- data/README.md +20 -7
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +1 -1
- data/lib/polars/data_frame.rb +119 -15
- data/lib/polars/data_types.rb +23 -6
- data/lib/polars/date_time_expr.rb +36 -15
- data/lib/polars/expr.rb +41 -32
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/lazy.rb +1 -1
- data/lib/polars/iceberg_dataset.rb +113 -0
- data/lib/polars/io/iceberg.rb +34 -0
- data/lib/polars/io/ipc.rb +28 -49
- data/lib/polars/io/parquet.rb +7 -4
- data/lib/polars/io/scan_options.rb +12 -3
- data/lib/polars/io/utils.rb +17 -0
- data/lib/polars/lazy_frame.rb +97 -10
- data/lib/polars/list_expr.rb +21 -13
- data/lib/polars/list_name_space.rb +33 -21
- data/lib/polars/meta_expr.rb +25 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +23 -1
- data/lib/polars/schema.rb +1 -1
- data/lib/polars/selectors.rb +8 -8
- data/lib/polars/series.rb +26 -2
- data/lib/polars/string_expr.rb +27 -28
- data/lib/polars/string_name_space.rb +18 -5
- data/lib/polars/utils/convert.rb +2 -2
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +4 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +8 -2
data/README.md
CHANGED
|
@@ -88,6 +88,12 @@ From Avro
|
|
|
88
88
|
Polars.read_avro("file.avro")
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
From Iceberg (requires [iceberg](https://github.com/ankane/iceberg-ruby)) [experimental]
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
Polars.scan_iceberg(table)
|
|
95
|
+
```
|
|
96
|
+
|
|
91
97
|
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
|
|
92
98
|
|
|
93
99
|
```ruby
|
|
@@ -365,6 +371,12 @@ Avro
|
|
|
365
371
|
df.write_avro("file.avro")
|
|
366
372
|
```
|
|
367
373
|
|
|
374
|
+
Iceberg [experimental]
|
|
375
|
+
|
|
376
|
+
```ruby
|
|
377
|
+
df.write_iceberg(table, mode: "append")
|
|
378
|
+
```
|
|
379
|
+
|
|
368
380
|
Delta Lake [experimental]
|
|
369
381
|
|
|
370
382
|
```ruby
|
|
@@ -388,13 +400,14 @@ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float3
|
|
|
388
400
|
Supported types are:
|
|
389
401
|
|
|
390
402
|
- boolean - `Boolean`
|
|
391
|
-
-
|
|
392
|
-
-
|
|
393
|
-
-
|
|
394
|
-
-
|
|
395
|
-
-
|
|
396
|
-
-
|
|
397
|
-
-
|
|
403
|
+
- decimal - `Decimal`
|
|
404
|
+
- float - `Float32`, `Float64`
|
|
405
|
+
- integer - `Int8`, `Int16`, `Int32`, `Int64`, `Int128`
|
|
406
|
+
- unsigned integer - `UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`
|
|
407
|
+
- string - `String`, `Categorical`, `Enum`
|
|
408
|
+
- temporal - `Date`, `Datetime`, `Duration`, `Time`
|
|
409
|
+
- nested - `Array`, `List`, `Struct`
|
|
410
|
+
- other - `Binary`, `Object`, `Null`, `Unknown`
|
|
398
411
|
|
|
399
412
|
Get column types
|
|
400
413
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/polars/array_expr.rb
CHANGED
|
@@ -38,7 +38,7 @@ module Polars
|
|
|
38
38
|
# @param offset [Integer]
|
|
39
39
|
# Start index. Negative indexing is supported.
|
|
40
40
|
# @param length [Integer]
|
|
41
|
-
# Length of the slice. If set to `
|
|
41
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
|
42
42
|
# end of the list.
|
|
43
43
|
# @param as_array [Boolean]
|
|
44
44
|
# Return result as a fixed-length `Array`, otherwise as a `List`.
|
data/lib/polars/data_frame.rb
CHANGED
|
@@ -72,6 +72,43 @@ module Polars
|
|
|
72
72
|
end
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
+
# Read a serialized DataFrame from a file.
|
|
76
|
+
#
|
|
77
|
+
# @param source [Object]
|
|
78
|
+
# Path to a file or a file-like object (by file-like object, we refer to
|
|
79
|
+
# objects that have a `read` method, such as a file handler or `StringIO`).
|
|
80
|
+
#
|
|
81
|
+
# @return [DataFrame]
|
|
82
|
+
#
|
|
83
|
+
# @note
|
|
84
|
+
# Serialization is not stable across Polars versions: a LazyFrame serialized
|
|
85
|
+
# in one Polars version may not be deserializable in another Polars version.
|
|
86
|
+
#
|
|
87
|
+
# @example
|
|
88
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
|
|
89
|
+
# bytes = df.serialize
|
|
90
|
+
# Polars::DataFrame.deserialize(StringIO.new(bytes))
|
|
91
|
+
# # =>
|
|
92
|
+
# # shape: (3, 2)
|
|
93
|
+
# # ┌─────┬─────┐
|
|
94
|
+
# # │ a ┆ b │
|
|
95
|
+
# # │ --- ┆ --- │
|
|
96
|
+
# # │ i64 ┆ f64 │
|
|
97
|
+
# # ╞═════╪═════╡
|
|
98
|
+
# # │ 1 ┆ 4.0 │
|
|
99
|
+
# # │ 2 ┆ 5.0 │
|
|
100
|
+
# # │ 3 ┆ 6.0 │
|
|
101
|
+
# # └─────┴─────┘
|
|
102
|
+
def self.deserialize(source)
|
|
103
|
+
if Utils.pathlike?(source)
|
|
104
|
+
source = Utils.normalize_filepath(source)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
deserializer = RbDataFrame.method(:deserialize_binary)
|
|
108
|
+
|
|
109
|
+
_from_rbdf(deserializer.(source))
|
|
110
|
+
end
|
|
111
|
+
|
|
75
112
|
# @private
|
|
76
113
|
def self._from_rbdf(rb_df)
|
|
77
114
|
df = DataFrame.allocate
|
|
@@ -562,8 +599,6 @@ module Polars
|
|
|
562
599
|
|
|
563
600
|
# Convert every row to a hash.
|
|
564
601
|
#
|
|
565
|
-
# Note that this is slow.
|
|
566
|
-
#
|
|
567
602
|
# @return [Array]
|
|
568
603
|
#
|
|
569
604
|
# @example
|
|
@@ -572,12 +607,7 @@ module Polars
|
|
|
572
607
|
# # =>
|
|
573
608
|
# # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
|
|
574
609
|
def to_hashes
|
|
575
|
-
|
|
576
|
-
names = columns
|
|
577
|
-
|
|
578
|
-
height.times.map do |i|
|
|
579
|
-
names.zip(rbdf.row_tuple(i)).to_h
|
|
580
|
-
end
|
|
610
|
+
rows(named: true)
|
|
581
611
|
end
|
|
582
612
|
|
|
583
613
|
# Convert DataFrame to a 2D Numo array.
|
|
@@ -634,6 +664,44 @@ module Polars
|
|
|
634
664
|
Utils.wrap_s(_df.select_at_idx(index))
|
|
635
665
|
end
|
|
636
666
|
|
|
667
|
+
# Serialize this DataFrame to a file or string.
|
|
668
|
+
#
|
|
669
|
+
# @param file [Object]
|
|
670
|
+
# File path or writable file-like object to which the result will be written.
|
|
671
|
+
# If set to `nil` (default), the output is returned as a string instead.
|
|
672
|
+
#
|
|
673
|
+
# @return [Object]
|
|
674
|
+
#
|
|
675
|
+
# @note
|
|
676
|
+
# Serialization is not stable across Polars versions: a LazyFrame serialized
|
|
677
|
+
# in one Polars version may not be deserializable in another Polars version.
|
|
678
|
+
#
|
|
679
|
+
# @example
|
|
680
|
+
# df = Polars::DataFrame.new(
|
|
681
|
+
# {
|
|
682
|
+
# "foo" => [1, 2, 3],
|
|
683
|
+
# "bar" => [6, 7, 8]
|
|
684
|
+
# }
|
|
685
|
+
# )
|
|
686
|
+
# bytes = df.serialize
|
|
687
|
+
# Polars::DataFrame.deserialize(StringIO.new(bytes))
|
|
688
|
+
# # =>
|
|
689
|
+
# # shape: (3, 2)
|
|
690
|
+
# # ┌─────┬─────┐
|
|
691
|
+
# # │ foo ┆ bar │
|
|
692
|
+
# # │ --- ┆ --- │
|
|
693
|
+
# # │ i64 ┆ i64 │
|
|
694
|
+
# # ╞═════╪═════╡
|
|
695
|
+
# # │ 1 ┆ 6 │
|
|
696
|
+
# # │ 2 ┆ 7 │
|
|
697
|
+
# # │ 3 ┆ 8 │
|
|
698
|
+
# # └─────┴─────┘
|
|
699
|
+
def serialize(file = nil)
|
|
700
|
+
serializer = _df.method(:serialize_binary)
|
|
701
|
+
|
|
702
|
+
Utils.serialize_polars_object(serializer, file)
|
|
703
|
+
end
|
|
704
|
+
|
|
637
705
|
# Serialize to JSON representation.
|
|
638
706
|
#
|
|
639
707
|
# @param file [String]
|
|
@@ -1148,6 +1216,40 @@ module Polars
|
|
|
1148
1216
|
end
|
|
1149
1217
|
end
|
|
1150
1218
|
|
|
1219
|
+
# Write DataFrame to an Iceberg table.
|
|
1220
|
+
#
|
|
1221
|
+
# @note
|
|
1222
|
+
# This functionality is currently considered **unstable**. It may be
|
|
1223
|
+
# changed at any point without it being considered a breaking change.
|
|
1224
|
+
#
|
|
1225
|
+
# @param target [Object]
|
|
1226
|
+
# Name of the table or the Table object representing an Iceberg table.
|
|
1227
|
+
# @param mode ['append', 'overwrite']
|
|
1228
|
+
# How to handle existing data.
|
|
1229
|
+
#
|
|
1230
|
+
# - If 'append', will add new data.
|
|
1231
|
+
# - If 'overwrite', will replace table with new data.
|
|
1232
|
+
#
|
|
1233
|
+
# @return [nil]
|
|
1234
|
+
def write_iceberg(target, mode:)
|
|
1235
|
+
require "iceberg"
|
|
1236
|
+
|
|
1237
|
+
table =
|
|
1238
|
+
if target.is_a?(Iceberg::Table)
|
|
1239
|
+
target
|
|
1240
|
+
else
|
|
1241
|
+
raise Todo
|
|
1242
|
+
end
|
|
1243
|
+
|
|
1244
|
+
data = self
|
|
1245
|
+
|
|
1246
|
+
if mode == "append"
|
|
1247
|
+
table.append(data)
|
|
1248
|
+
else
|
|
1249
|
+
raise Todo
|
|
1250
|
+
end
|
|
1251
|
+
end
|
|
1252
|
+
|
|
1151
1253
|
# Write DataFrame as delta table.
|
|
1152
1254
|
#
|
|
1153
1255
|
# @param target [Object]
|
|
@@ -5957,8 +6059,13 @@ module Polars
|
|
|
5957
6059
|
# The fields will be inserted into the `DataFrame` on the location of the
|
|
5958
6060
|
# `struct` type.
|
|
5959
6061
|
#
|
|
5960
|
-
# @param
|
|
5961
|
-
#
|
|
6062
|
+
# @param columns [Object]
|
|
6063
|
+
# Name of the struct column(s) that should be unnested.
|
|
6064
|
+
# @param more_columns [Array]
|
|
6065
|
+
# Additional columns to unnest, specified as positional arguments.
|
|
6066
|
+
# @param separator [String]
|
|
6067
|
+
# Rename output column names as combination of the struct column name,
|
|
6068
|
+
# name separator and field name.
|
|
5962
6069
|
#
|
|
5963
6070
|
# @return [DataFrame]
|
|
5964
6071
|
#
|
|
@@ -5984,11 +6091,8 @@ module Polars
|
|
|
5984
6091
|
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
|
5985
6092
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
|
5986
6093
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
|
5987
|
-
def unnest(
|
|
5988
|
-
|
|
5989
|
-
names = [names]
|
|
5990
|
-
end
|
|
5991
|
-
_from_rbdf(_df.unnest(names))
|
|
6094
|
+
def unnest(columns, *more_columns, separator: nil)
|
|
6095
|
+
lazy.unnest(columns, *more_columns, separator: separator).collect(_eager: true)
|
|
5992
6096
|
end
|
|
5993
6097
|
|
|
5994
6098
|
# Requires NumPy
|
data/lib/polars/data_types.rb
CHANGED
|
@@ -110,12 +110,23 @@ module Polars
|
|
|
110
110
|
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
|
|
111
111
|
end
|
|
112
112
|
|
|
113
|
-
[:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested
|
|
113
|
+
[:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
|
|
114
114
|
define_method(v) do
|
|
115
115
|
self.class.public_send(v)
|
|
116
116
|
end
|
|
117
117
|
end
|
|
118
118
|
|
|
119
|
+
# Return a `DataTypeExpr` with a static `DataType`.
|
|
120
|
+
#
|
|
121
|
+
# @return [Expr]
|
|
122
|
+
#
|
|
123
|
+
# @example
|
|
124
|
+
# Polars::Int16.new.to_dtype_expr.collect_dtype({})
|
|
125
|
+
# # => Polars::Int16
|
|
126
|
+
def to_dtype_expr
|
|
127
|
+
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
|
|
128
|
+
end
|
|
129
|
+
|
|
119
130
|
# Returns a string representing the data type.
|
|
120
131
|
#
|
|
121
132
|
# @return [String]
|
|
@@ -198,6 +209,10 @@ module Polars
|
|
|
198
209
|
class UInt64 < UnsignedIntegerType
|
|
199
210
|
end
|
|
200
211
|
|
|
212
|
+
# 128-bit unsigned integer type.
|
|
213
|
+
class UInt128 < UnsignedIntegerType
|
|
214
|
+
end
|
|
215
|
+
|
|
201
216
|
# 32-bit floating point type.
|
|
202
217
|
class Float32 < FloatType
|
|
203
218
|
end
|
|
@@ -212,7 +227,11 @@ module Polars
|
|
|
212
227
|
class Decimal < NumericType
|
|
213
228
|
attr_reader :precision, :scale
|
|
214
229
|
|
|
215
|
-
def initialize(precision, scale)
|
|
230
|
+
def initialize(precision = nil, scale = 0)
|
|
231
|
+
if precision.nil?
|
|
232
|
+
precision = 38
|
|
233
|
+
end
|
|
234
|
+
|
|
216
235
|
@precision = precision
|
|
217
236
|
@scale = scale
|
|
218
237
|
end
|
|
@@ -317,11 +336,9 @@ module Polars
|
|
|
317
336
|
class Categories
|
|
318
337
|
attr_accessor :_categories
|
|
319
338
|
|
|
320
|
-
def initialize
|
|
321
|
-
# TODO fix
|
|
322
|
-
name = nil
|
|
339
|
+
def initialize(name = nil)
|
|
323
340
|
if name.nil? || name == ""
|
|
324
|
-
|
|
341
|
+
self._categories = RbCategories.global_categories
|
|
325
342
|
return
|
|
326
343
|
end
|
|
327
344
|
|
|
@@ -1188,7 +1188,7 @@ module Polars
|
|
|
1188
1188
|
if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
|
|
1189
1189
|
timestamp(time_unit)
|
|
1190
1190
|
elsif time_unit == "s"
|
|
1191
|
-
|
|
1191
|
+
timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
|
|
1192
1192
|
elsif time_unit == "d"
|
|
1193
1193
|
Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
|
|
1194
1194
|
else
|
|
@@ -1435,6 +1435,9 @@ module Polars
|
|
|
1435
1435
|
|
|
1436
1436
|
# Extract the days from a Duration type.
|
|
1437
1437
|
#
|
|
1438
|
+
# @param fractional [Boolean]
|
|
1439
|
+
# Whether to include the fractional component of the second.
|
|
1440
|
+
#
|
|
1438
1441
|
# @return [Expr]
|
|
1439
1442
|
#
|
|
1440
1443
|
# @example
|
|
@@ -1462,13 +1465,16 @@ module Polars
|
|
|
1462
1465
|
# # │ 2020-04-01 00:00:00 ┆ 31 │
|
|
1463
1466
|
# # │ 2020-05-01 00:00:00 ┆ 30 │
|
|
1464
1467
|
# # └─────────────────────┴───────────┘
|
|
1465
|
-
def total_days
|
|
1466
|
-
Utils.wrap_expr(_rbexpr.dt_total_days)
|
|
1468
|
+
def total_days(fractional: false)
|
|
1469
|
+
Utils.wrap_expr(_rbexpr.dt_total_days(fractional))
|
|
1467
1470
|
end
|
|
1468
1471
|
alias_method :days, :total_days
|
|
1469
1472
|
|
|
1470
1473
|
# Extract the hours from a Duration type.
|
|
1471
1474
|
#
|
|
1475
|
+
# @param fractional [Boolean]
|
|
1476
|
+
# Whether to include the fractional component of the second.
|
|
1477
|
+
#
|
|
1472
1478
|
# @return [Expr]
|
|
1473
1479
|
#
|
|
1474
1480
|
# @example
|
|
@@ -1497,13 +1503,16 @@ module Polars
|
|
|
1497
1503
|
# # │ 2020-01-03 00:00:00 ┆ 24 │
|
|
1498
1504
|
# # │ 2020-01-04 00:00:00 ┆ 24 │
|
|
1499
1505
|
# # └─────────────────────┴────────────┘
|
|
1500
|
-
def total_hours
|
|
1501
|
-
Utils.wrap_expr(_rbexpr.dt_total_hours)
|
|
1506
|
+
def total_hours(fractional: false)
|
|
1507
|
+
Utils.wrap_expr(_rbexpr.dt_total_hours(fractional))
|
|
1502
1508
|
end
|
|
1503
1509
|
alias_method :hours, :total_hours
|
|
1504
1510
|
|
|
1505
1511
|
# Extract the minutes from a Duration type.
|
|
1506
1512
|
#
|
|
1513
|
+
# @param fractional [Boolean]
|
|
1514
|
+
# Whether to include the fractional component of the second.
|
|
1515
|
+
#
|
|
1507
1516
|
# @return [Expr]
|
|
1508
1517
|
#
|
|
1509
1518
|
# @example
|
|
@@ -1532,13 +1541,16 @@ module Polars
|
|
|
1532
1541
|
# # │ 2020-01-03 00:00:00 ┆ 1440 │
|
|
1533
1542
|
# # │ 2020-01-04 00:00:00 ┆ 1440 │
|
|
1534
1543
|
# # └─────────────────────┴──────────────┘
|
|
1535
|
-
def total_minutes
|
|
1536
|
-
Utils.wrap_expr(_rbexpr.dt_total_minutes)
|
|
1544
|
+
def total_minutes(fractional: false)
|
|
1545
|
+
Utils.wrap_expr(_rbexpr.dt_total_minutes(fractional))
|
|
1537
1546
|
end
|
|
1538
1547
|
alias_method :minutes, :total_minutes
|
|
1539
1548
|
|
|
1540
1549
|
# Extract the seconds from a Duration type.
|
|
1541
1550
|
#
|
|
1551
|
+
# @param fractional [Boolean]
|
|
1552
|
+
# Whether to include the fractional component of the second.
|
|
1553
|
+
#
|
|
1542
1554
|
# @return [Expr]
|
|
1543
1555
|
#
|
|
1544
1556
|
# @example
|
|
@@ -1568,13 +1580,16 @@ module Polars
|
|
|
1568
1580
|
# # │ 2020-01-01 00:03:00 ┆ 60 │
|
|
1569
1581
|
# # │ 2020-01-01 00:04:00 ┆ 60 │
|
|
1570
1582
|
# # └─────────────────────┴──────────────┘
|
|
1571
|
-
def total_seconds
|
|
1572
|
-
Utils.wrap_expr(_rbexpr.dt_total_seconds)
|
|
1583
|
+
def total_seconds(fractional: false)
|
|
1584
|
+
Utils.wrap_expr(_rbexpr.dt_total_seconds(fractional))
|
|
1573
1585
|
end
|
|
1574
1586
|
alias_method :seconds, :total_seconds
|
|
1575
1587
|
|
|
1576
1588
|
# Extract the milliseconds from a Duration type.
|
|
1577
1589
|
#
|
|
1590
|
+
# @param fractional [Boolean]
|
|
1591
|
+
# Whether to include the fractional component of the second.
|
|
1592
|
+
#
|
|
1578
1593
|
# @return [Expr]
|
|
1579
1594
|
#
|
|
1580
1595
|
# @example
|
|
@@ -1610,13 +1625,16 @@ module Polars
|
|
|
1610
1625
|
# # │ 2020-01-01 00:00:00.999 ┆ 1 │
|
|
1611
1626
|
# # │ 2020-01-01 00:00:01 ┆ 1 │
|
|
1612
1627
|
# # └─────────────────────────┴───────────────────┘
|
|
1613
|
-
def total_milliseconds
|
|
1614
|
-
Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
|
|
1628
|
+
def total_milliseconds(fractional: false)
|
|
1629
|
+
Utils.wrap_expr(_rbexpr.dt_total_milliseconds(fractional))
|
|
1615
1630
|
end
|
|
1616
1631
|
alias_method :milliseconds, :total_milliseconds
|
|
1617
1632
|
|
|
1618
1633
|
# Extract the microseconds from a Duration type.
|
|
1619
1634
|
#
|
|
1635
|
+
# @param fractional [Boolean]
|
|
1636
|
+
# Whether to include the fractional component of the second.
|
|
1637
|
+
#
|
|
1620
1638
|
# @return [Expr]
|
|
1621
1639
|
#
|
|
1622
1640
|
# @example
|
|
@@ -1652,13 +1670,16 @@ module Polars
|
|
|
1652
1670
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000 │
|
|
1653
1671
|
# # │ 2020-01-01 00:00:01 ┆ 1000 │
|
|
1654
1672
|
# # └─────────────────────────┴───────────────────┘
|
|
1655
|
-
def total_microseconds
|
|
1656
|
-
Utils.wrap_expr(_rbexpr.dt_total_microseconds)
|
|
1673
|
+
def total_microseconds(fractional: false)
|
|
1674
|
+
Utils.wrap_expr(_rbexpr.dt_total_microseconds(fractional))
|
|
1657
1675
|
end
|
|
1658
1676
|
alias_method :microseconds, :total_microseconds
|
|
1659
1677
|
|
|
1660
1678
|
# Extract the nanoseconds from a Duration type.
|
|
1661
1679
|
#
|
|
1680
|
+
# @param fractional [Boolean]
|
|
1681
|
+
# Whether to include the fractional component of the second.
|
|
1682
|
+
#
|
|
1662
1683
|
# @return [Expr]
|
|
1663
1684
|
#
|
|
1664
1685
|
# @example
|
|
@@ -1694,8 +1715,8 @@ module Polars
|
|
|
1694
1715
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
|
|
1695
1716
|
# # │ 2020-01-01 00:00:01 ┆ 1000000 │
|
|
1696
1717
|
# # └─────────────────────────┴──────────────────┘
|
|
1697
|
-
def total_nanoseconds
|
|
1698
|
-
Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
|
|
1718
|
+
def total_nanoseconds(fractional: false)
|
|
1719
|
+
Utils.wrap_expr(_rbexpr.dt_total_nanoseconds(fractional))
|
|
1699
1720
|
end
|
|
1700
1721
|
alias_method :nanoseconds, :total_nanoseconds
|
|
1701
1722
|
|
data/lib/polars/expr.rb
CHANGED
|
@@ -146,6 +146,40 @@ module Polars
|
|
|
146
146
|
wrap_expr(_rbexpr.neg)
|
|
147
147
|
end
|
|
148
148
|
|
|
149
|
+
# Read a serialized expression from a file.
|
|
150
|
+
#
|
|
151
|
+
# @param source [Object]
|
|
152
|
+
# Path to a file or a file-like object (by file-like object, we refer to
|
|
153
|
+
# objects that have a `read` method, such as a file handler or `StringIO`).
|
|
154
|
+
#
|
|
155
|
+
# @return [Expr]
|
|
156
|
+
#
|
|
157
|
+
# @note
|
|
158
|
+
# This function uses marshaling if the logical plan contains Ruby UDFs,
|
|
159
|
+
# and as such inherits the security implications. Deserializing can execute
|
|
160
|
+
# arbitrary code, so it should only be attempted on trusted data.
|
|
161
|
+
#
|
|
162
|
+
# @note
|
|
163
|
+
# Serialization is not stable across Polars versions: a LazyFrame serialized
|
|
164
|
+
# in one Polars version may not be deserializable in another Polars version.
|
|
165
|
+
#
|
|
166
|
+
# @example
|
|
167
|
+
# expr = Polars.col("foo").sum.over("bar")
|
|
168
|
+
# bytes = expr.meta.serialize
|
|
169
|
+
# Polars::Expr.deserialize(StringIO.new(bytes))
|
|
170
|
+
# # => col("foo").sum().over([col("bar")])
|
|
171
|
+
def self.deserialize(source)
|
|
172
|
+
raise Todo unless RbExpr.respond_to?(:deserialize_binary)
|
|
173
|
+
|
|
174
|
+
if Utils.pathlike?(source)
|
|
175
|
+
source = Utils.normalize_filepath(source)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
deserializer = RbExpr.method(:deserialize_binary)
|
|
179
|
+
|
|
180
|
+
_from_rbexpr(deserializer.(source))
|
|
181
|
+
end
|
|
182
|
+
|
|
149
183
|
# Cast to physical representation of the logical dtype.
|
|
150
184
|
#
|
|
151
185
|
# - `:date` -> `:i32`
|
|
@@ -377,8 +411,6 @@ module Polars
|
|
|
377
411
|
wrap_expr(_rbexpr._alias(name))
|
|
378
412
|
end
|
|
379
413
|
|
|
380
|
-
# TODO support symbols for exclude
|
|
381
|
-
|
|
382
414
|
# Exclude certain columns from a wildcard/regex selection.
|
|
383
415
|
#
|
|
384
416
|
# You may also use regexes in the exclude list. They must start with `^` and end
|
|
@@ -1787,7 +1819,7 @@ module Polars
|
|
|
1787
1819
|
wrap_expr(_rbexpr.arg_min)
|
|
1788
1820
|
end
|
|
1789
1821
|
|
|
1790
|
-
# Get the index of the first occurrence of a value, or
|
|
1822
|
+
# Get the index of the first occurrence of a value, or `nil` if it's not found.
|
|
1791
1823
|
#
|
|
1792
1824
|
# @param element [Object]
|
|
1793
1825
|
# Value to find.
|
|
@@ -6580,8 +6612,8 @@ module Polars
|
|
|
6580
6612
|
# # │ 10 ┆ null │
|
|
6581
6613
|
# # │ 11 ┆ 0.1 │
|
|
6582
6614
|
# # │ 12 ┆ 0.090909 │
|
|
6583
|
-
# # │ null ┆
|
|
6584
|
-
# # │ 12 ┆
|
|
6615
|
+
# # │ null ┆ null │
|
|
6616
|
+
# # │ 12 ┆ null │
|
|
6585
6617
|
# # └──────┴────────────┘
|
|
6586
6618
|
def pct_change(n: 1)
|
|
6587
6619
|
n = Utils.parse_into_expression(n)
|
|
@@ -7571,7 +7603,8 @@ module Polars
|
|
|
7571
7603
|
# # │ 1.584963 │
|
|
7572
7604
|
# # └──────────┘
|
|
7573
7605
|
def log(base = Math::E)
|
|
7574
|
-
|
|
7606
|
+
base_rbexpr = Utils.parse_into_expression(base)
|
|
7607
|
+
wrap_expr(_rbexpr.log(base_rbexpr))
|
|
7575
7608
|
end
|
|
7576
7609
|
|
|
7577
7610
|
# Compute the natural logarithm of each element plus one.
|
|
@@ -7743,33 +7776,9 @@ module Polars
|
|
|
7743
7776
|
# This can be used to reduce memory pressure.
|
|
7744
7777
|
#
|
|
7745
7778
|
# @return [Expr]
|
|
7746
|
-
#
|
|
7747
|
-
# @example
|
|
7748
|
-
# Polars::DataFrame.new(
|
|
7749
|
-
# {
|
|
7750
|
-
# "a" => [1, 2, 3],
|
|
7751
|
-
# "b" => [1, 2, 2 << 32],
|
|
7752
|
-
# "c" => [-1, 2, 1 << 30],
|
|
7753
|
-
# "d" => [-112, 2, 112],
|
|
7754
|
-
# "e" => [-112, 2, 129],
|
|
7755
|
-
# "f" => ["a", "b", "c"],
|
|
7756
|
-
# "g" => [0.1, 1.32, 0.12],
|
|
7757
|
-
# "h" => [true, nil, false]
|
|
7758
|
-
# }
|
|
7759
|
-
# ).select(Polars.all.shrink_dtype)
|
|
7760
|
-
# # =>
|
|
7761
|
-
# # shape: (3, 8)
|
|
7762
|
-
# # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
|
|
7763
|
-
# # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
|
|
7764
|
-
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
7765
|
-
# # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
|
|
7766
|
-
# # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
|
|
7767
|
-
# # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
|
|
7768
|
-
# # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
|
|
7769
|
-
# # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
|
|
7770
|
-
# # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
|
|
7771
7779
|
def shrink_dtype
|
|
7772
|
-
|
|
7780
|
+
warn "`Expr.shrink_dtype` is deprecated and is a no-op; use `Series.shrink_dtype` instead."
|
|
7781
|
+
self
|
|
7773
7782
|
end
|
|
7774
7783
|
|
|
7775
7784
|
# Bin values into buckets and count their occurrences.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
module Functions
|
|
3
|
+
# Count the number of business days between `start` and `end` (not including `end`).
|
|
4
|
+
#
|
|
5
|
+
# @note
|
|
6
|
+
# This functionality is considered **unstable**. It may be changed
|
|
7
|
+
# at any point without it being considered a breaking change.
|
|
8
|
+
#
|
|
9
|
+
# @param start [Object]
|
|
10
|
+
# Start dates.
|
|
11
|
+
# @param stop [Object]
|
|
12
|
+
# End dates.
|
|
13
|
+
# @param week_mask [Array]
|
|
14
|
+
# Which days of the week to count. The default is Monday to Friday.
|
|
15
|
+
# If you wanted to count only Monday to Thursday, you would pass
|
|
16
|
+
# `[true, true, true, true, false, false, false]`.
|
|
17
|
+
# @param holidays [Array]
|
|
18
|
+
# Holidays to exclude from the count.
|
|
19
|
+
#
|
|
20
|
+
# @return [Expr]
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# df = Polars::DataFrame.new(
|
|
24
|
+
# {
|
|
25
|
+
# "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
|
|
26
|
+
# "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
|
|
27
|
+
# }
|
|
28
|
+
# )
|
|
29
|
+
# df.with_columns(
|
|
30
|
+
# business_day_count: Polars.business_day_count("start", "end")
|
|
31
|
+
# )
|
|
32
|
+
# # =>
|
|
33
|
+
# # shape: (2, 3)
|
|
34
|
+
# # ┌────────────┬────────────┬────────────────────┐
|
|
35
|
+
# # │ start ┆ end ┆ business_day_count │
|
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
|
37
|
+
# # │ date ┆ date ┆ i32 │
|
|
38
|
+
# # ╞════════════╪════════════╪════════════════════╡
|
|
39
|
+
# # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
|
|
40
|
+
# # │ 2020-01-02 ┆ 2020-01-10 ┆ 6 │
|
|
41
|
+
# # └────────────┴────────────┴────────────────────┘
|
|
42
|
+
#
|
|
43
|
+
# @example You can pass a custom weekend - for example, if you only take Sunday off:
|
|
44
|
+
# week_mask = [true, true, true, true, true, true, false]
|
|
45
|
+
# df.with_columns(
|
|
46
|
+
# business_day_count: Polars.business_day_count(
|
|
47
|
+
# "start", "end", week_mask: week_mask
|
|
48
|
+
# )
|
|
49
|
+
# )
|
|
50
|
+
# # =>
|
|
51
|
+
# # shape: (2, 3)
|
|
52
|
+
# # ┌────────────┬────────────┬────────────────────┐
|
|
53
|
+
# # │ start ┆ end ┆ business_day_count │
|
|
54
|
+
# # │ --- ┆ --- ┆ --- │
|
|
55
|
+
# # │ date ┆ date ┆ i32 │
|
|
56
|
+
# # ╞════════════╪════════════╪════════════════════╡
|
|
57
|
+
# # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
|
|
58
|
+
# # │ 2020-01-02 ┆ 2020-01-10 ┆ 7 │
|
|
59
|
+
# # └────────────┴────────────┴────────────────────┘
|
|
60
|
+
#
|
|
61
|
+
# @example You can also pass a list of holidays to exclude from the count:
|
|
62
|
+
# holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
|
|
63
|
+
# df.with_columns(
|
|
64
|
+
# business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
|
|
65
|
+
# )
|
|
66
|
+
# # =>
|
|
67
|
+
# # shape: (2, 3)
|
|
68
|
+
# # ┌────────────┬────────────┬────────────────────┐
|
|
69
|
+
# # │ start ┆ end ┆ business_day_count │
|
|
70
|
+
# # │ --- ┆ --- ┆ --- │
|
|
71
|
+
# # │ date ┆ date ┆ i32 │
|
|
72
|
+
# # ╞════════════╪════════════╪════════════════════╡
|
|
73
|
+
# # │ 2020-01-01 ┆ 2020-01-02 ┆ 0 │
|
|
74
|
+
# # │ 2020-01-02 ┆ 2020-01-10 ┆ 5 │
|
|
75
|
+
# # └────────────┴────────────┴────────────────────┘
|
|
76
|
+
def business_day_count(
|
|
77
|
+
start,
|
|
78
|
+
stop,
|
|
79
|
+
week_mask: [true, true, true, true, true, false, false],
|
|
80
|
+
holidays: []
|
|
81
|
+
)
|
|
82
|
+
start_rbexpr = Utils.parse_into_expression(start)
|
|
83
|
+
end_rbexpr = Utils.parse_into_expression(stop)
|
|
84
|
+
unix_epoch = ::Date.new(1970, 1, 1)
|
|
85
|
+
Utils.wrap_expr(
|
|
86
|
+
Plr.business_day_count(
|
|
87
|
+
start_rbexpr,
|
|
88
|
+
end_rbexpr,
|
|
89
|
+
week_mask,
|
|
90
|
+
holidays.map { |holiday| holiday - unix_epoch }
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|