polars-df 0.3.1-arm64-darwin → 0.5.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/LICENSE-THIRD-PARTY.txt +6761 -7881
- data/README.md +31 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +4 -2
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1130,7 +1198,7 @@ module Polars
|
|
1130
1198
|
# ]
|
1131
1199
|
# )
|
1132
1200
|
# # =>
|
1133
|
-
# # shape: (
|
1201
|
+
# # shape: (1_001, 2)
|
1134
1202
|
# # ┌─────────────────────────┬───────────────────┐
|
1135
1203
|
# # │ date ┆ milliseconds_diff │
|
1136
1204
|
# # │ --- ┆ --- │
|
@@ -1140,7 +1208,7 @@ module Polars
|
|
1140
1208
|
# # │ 2020-01-01 00:00:00.001 ┆ 1 │
|
1141
1209
|
# # │ 2020-01-01 00:00:00.002 ┆ 1 │
|
1142
1210
|
# # │ 2020-01-01 00:00:00.003 ┆ 1 │
|
1143
|
-
# # │
|
1211
|
+
# # │ … ┆ … │
|
1144
1212
|
# # │ 2020-01-01 00:00:00.997 ┆ 1 │
|
1145
1213
|
# # │ 2020-01-01 00:00:00.998 ┆ 1 │
|
1146
1214
|
# # │ 2020-01-01 00:00:00.999 ┆ 1 │
|
@@ -1169,7 +1237,7 @@ module Polars
|
|
1169
1237
|
# ]
|
1170
1238
|
# )
|
1171
1239
|
# # =>
|
1172
|
-
# # shape: (
|
1240
|
+
# # shape: (1_001, 2)
|
1173
1241
|
# # ┌─────────────────────────┬───────────────────┐
|
1174
1242
|
# # │ date ┆ microseconds_diff │
|
1175
1243
|
# # │ --- ┆ --- │
|
@@ -1179,7 +1247,7 @@ module Polars
|
|
1179
1247
|
# # │ 2020-01-01 00:00:00.001 ┆ 1000 │
|
1180
1248
|
# # │ 2020-01-01 00:00:00.002 ┆ 1000 │
|
1181
1249
|
# # │ 2020-01-01 00:00:00.003 ┆ 1000 │
|
1182
|
-
# # │
|
1250
|
+
# # │ … ┆ … │
|
1183
1251
|
# # │ 2020-01-01 00:00:00.997 ┆ 1000 │
|
1184
1252
|
# # │ 2020-01-01 00:00:00.998 ┆ 1000 │
|
1185
1253
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000 │
|
@@ -1208,7 +1276,7 @@ module Polars
|
|
1208
1276
|
# ]
|
1209
1277
|
# )
|
1210
1278
|
# # =>
|
1211
|
-
# # shape: (
|
1279
|
+
# # shape: (1_001, 2)
|
1212
1280
|
# # ┌─────────────────────────┬──────────────────┐
|
1213
1281
|
# # │ date ┆ nanoseconds_diff │
|
1214
1282
|
# # │ --- ┆ --- │
|
@@ -1218,7 +1286,7 @@ module Polars
|
|
1218
1286
|
# # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
|
1219
1287
|
# # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
|
1220
1288
|
# # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
|
1221
|
-
# # │
|
1289
|
+
# # │ … ┆ … │
|
1222
1290
|
# # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
|
1223
1291
|
# # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
|
1224
1292
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -1308,8 +1308,6 @@ module Polars
|
|
1308
1308
|
#
|
1309
1309
|
# @param k [Integer]
|
1310
1310
|
# Number of elements to return.
|
1311
|
-
# @param reverse [Boolean]
|
1312
|
-
# Return the smallest elements.
|
1313
1311
|
#
|
1314
1312
|
# @return [Expr]
|
1315
1313
|
#
|
@@ -1322,7 +1320,45 @@ module Polars
|
|
1322
1320
|
# df.select(
|
1323
1321
|
# [
|
1324
1322
|
# Polars.col("value").top_k.alias("top_k"),
|
1325
|
-
# Polars.col("value").
|
1323
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1324
|
+
# ]
|
1325
|
+
# )
|
1326
|
+
# # =>
|
1327
|
+
# # shape: (5, 2)
|
1328
|
+
# # ┌───────┬──────────┐
|
1329
|
+
# # │ top_k ┆ bottom_k │
|
1330
|
+
# # │ --- ┆ --- │
|
1331
|
+
# # │ i64 ┆ i64 │
|
1332
|
+
# # ╞═══════╪══════════╡
|
1333
|
+
# # │ 99 ┆ 1 │
|
1334
|
+
# # │ 98 ┆ 2 │
|
1335
|
+
# # │ 4 ┆ 3 │
|
1336
|
+
# # │ 3 ┆ 4 │
|
1337
|
+
# # │ 2 ┆ 98 │
|
1338
|
+
# # └───────┴──────────┘
|
1339
|
+
def top_k(k: 5)
|
1340
|
+
wrap_expr(_rbexpr.top_k(k))
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# Return the `k` smallest elements.
|
1344
|
+
#
|
1345
|
+
# If 'reverse: true` the smallest elements will be given.
|
1346
|
+
#
|
1347
|
+
# @param k [Integer]
|
1348
|
+
# Number of elements to return.
|
1349
|
+
#
|
1350
|
+
# @return [Expr]
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df = Polars::DataFrame.new(
|
1354
|
+
# {
|
1355
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1356
|
+
# }
|
1357
|
+
# )
|
1358
|
+
# df.select(
|
1359
|
+
# [
|
1360
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1361
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1326
1362
|
# ]
|
1327
1363
|
# )
|
1328
1364
|
# # =>
|
@@ -1338,8 +1374,8 @@ module Polars
|
|
1338
1374
|
# # │ 3 ┆ 4 │
|
1339
1375
|
# # │ 2 ┆ 98 │
|
1340
1376
|
# # └───────┴──────────┘
|
1341
|
-
def
|
1342
|
-
wrap_expr(_rbexpr.
|
1377
|
+
def bottom_k(k: 5)
|
1378
|
+
wrap_expr(_rbexpr.bottom_k(k))
|
1343
1379
|
end
|
1344
1380
|
|
1345
1381
|
# Get the index values that would sort this column.
|
@@ -2008,6 +2044,28 @@ module Polars
|
|
2008
2044
|
wrap_expr(_rbexpr.n_unique)
|
2009
2045
|
end
|
2010
2046
|
|
2047
|
+
# Approx count unique values.
|
2048
|
+
#
|
2049
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
2050
|
+
#
|
2051
|
+
# @return [Expr]
|
2052
|
+
#
|
2053
|
+
# @example
|
2054
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2055
|
+
# df.select(Polars.col("a").approx_unique)
|
2056
|
+
# # =>
|
2057
|
+
# # shape: (1, 1)
|
2058
|
+
# # ┌─────┐
|
2059
|
+
# # │ a │
|
2060
|
+
# # │ --- │
|
2061
|
+
# # │ u32 │
|
2062
|
+
# # ╞═════╡
|
2063
|
+
# # │ 2 │
|
2064
|
+
# # └─────┘
|
2065
|
+
def approx_unique
|
2066
|
+
wrap_expr(_rbexpr.approx_unique)
|
2067
|
+
end
|
2068
|
+
|
2011
2069
|
# Count null values.
|
2012
2070
|
#
|
2013
2071
|
# @return [Expr]
|
@@ -2194,7 +2252,7 @@ module Polars
|
|
2194
2252
|
# # │ 4 │
|
2195
2253
|
# # │ 6 │
|
2196
2254
|
# # │ 6 │
|
2197
|
-
# # │
|
2255
|
+
# # │ 4 │
|
2198
2256
|
# # │ 6 │
|
2199
2257
|
# # │ 6 │
|
2200
2258
|
# # │ 6 │
|
@@ -2571,7 +2629,7 @@ module Polars
|
|
2571
2629
|
# # │ e │
|
2572
2630
|
# # │ l │
|
2573
2631
|
# # │ l │
|
2574
|
-
# # │
|
2632
|
+
# # │ … │
|
2575
2633
|
# # │ o │
|
2576
2634
|
# # │ r │
|
2577
2635
|
# # │ l │
|
@@ -2751,6 +2809,7 @@ module Polars
|
|
2751
2809
|
end
|
2752
2810
|
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2753
2811
|
end
|
2812
|
+
alias_method :in?, :is_in
|
2754
2813
|
|
2755
2814
|
# Repeat the elements in this Series as specified in the given expression.
|
2756
2815
|
#
|
@@ -3914,8 +3973,8 @@ module Polars
|
|
3914
3973
|
# # │ 2 │
|
3915
3974
|
# # │ 5 │
|
3916
3975
|
# # └─────┘
|
3917
|
-
def rank(method: "average", reverse: false)
|
3918
|
-
wrap_expr(_rbexpr.rank(method, reverse))
|
3976
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3977
|
+
wrap_expr(_rbexpr.rank(method, reverse, seed))
|
3919
3978
|
end
|
3920
3979
|
|
3921
3980
|
# Calculate the n-th discrete difference.
|
@@ -4916,9 +4975,10 @@ module Polars
|
|
4916
4975
|
# # ╞═══════════╪═══════════╡
|
4917
4976
|
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4918
4977
|
# # └───────────┴───────────┘
|
4919
|
-
def
|
4920
|
-
wrap_expr(_rbexpr.
|
4978
|
+
def implode
|
4979
|
+
wrap_expr(_rbexpr.implode)
|
4921
4980
|
end
|
4981
|
+
alias_method :list, :implode
|
4922
4982
|
|
4923
4983
|
# Shrink numeric columns to the minimal required datatype.
|
4924
4984
|
#
|
@@ -4962,6 +5022,13 @@ module Polars
|
|
4962
5022
|
ListExpr.new(self)
|
4963
5023
|
end
|
4964
5024
|
|
5025
|
+
# Create an object namespace of all binary related methods.
|
5026
|
+
#
|
5027
|
+
# @return [BinaryExpr]
|
5028
|
+
def bin
|
5029
|
+
BinaryExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
4965
5032
|
# Create an object namespace of all categorical related methods.
|
4966
5033
|
#
|
4967
5034
|
# @return [CatExpr]
|