polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/LICENSE-THIRD-PARTY.txt +7353 -8473
- data/README.md +31 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +4 -2
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1130,7 +1198,7 @@ module Polars
|
|
1130
1198
|
# ]
|
1131
1199
|
# )
|
1132
1200
|
# # =>
|
1133
|
-
# # shape: (
|
1201
|
+
# # shape: (1_001, 2)
|
1134
1202
|
# # ┌─────────────────────────┬───────────────────┐
|
1135
1203
|
# # │ date ┆ milliseconds_diff │
|
1136
1204
|
# # │ --- ┆ --- │
|
@@ -1140,7 +1208,7 @@ module Polars
|
|
1140
1208
|
# # │ 2020-01-01 00:00:00.001 ┆ 1 │
|
1141
1209
|
# # │ 2020-01-01 00:00:00.002 ┆ 1 │
|
1142
1210
|
# # │ 2020-01-01 00:00:00.003 ┆ 1 │
|
1143
|
-
# # │
|
1211
|
+
# # │ … ┆ … │
|
1144
1212
|
# # │ 2020-01-01 00:00:00.997 ┆ 1 │
|
1145
1213
|
# # │ 2020-01-01 00:00:00.998 ┆ 1 │
|
1146
1214
|
# # │ 2020-01-01 00:00:00.999 ┆ 1 │
|
@@ -1169,7 +1237,7 @@ module Polars
|
|
1169
1237
|
# ]
|
1170
1238
|
# )
|
1171
1239
|
# # =>
|
1172
|
-
# # shape: (
|
1240
|
+
# # shape: (1_001, 2)
|
1173
1241
|
# # ┌─────────────────────────┬───────────────────┐
|
1174
1242
|
# # │ date ┆ microseconds_diff │
|
1175
1243
|
# # │ --- ┆ --- │
|
@@ -1179,7 +1247,7 @@ module Polars
|
|
1179
1247
|
# # │ 2020-01-01 00:00:00.001 ┆ 1000 │
|
1180
1248
|
# # │ 2020-01-01 00:00:00.002 ┆ 1000 │
|
1181
1249
|
# # │ 2020-01-01 00:00:00.003 ┆ 1000 │
|
1182
|
-
# # │
|
1250
|
+
# # │ … ┆ … │
|
1183
1251
|
# # │ 2020-01-01 00:00:00.997 ┆ 1000 │
|
1184
1252
|
# # │ 2020-01-01 00:00:00.998 ┆ 1000 │
|
1185
1253
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000 │
|
@@ -1208,7 +1276,7 @@ module Polars
|
|
1208
1276
|
# ]
|
1209
1277
|
# )
|
1210
1278
|
# # =>
|
1211
|
-
# # shape: (
|
1279
|
+
# # shape: (1_001, 2)
|
1212
1280
|
# # ┌─────────────────────────┬──────────────────┐
|
1213
1281
|
# # │ date ┆ nanoseconds_diff │
|
1214
1282
|
# # │ --- ┆ --- │
|
@@ -1218,7 +1286,7 @@ module Polars
|
|
1218
1286
|
# # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
|
1219
1287
|
# # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
|
1220
1288
|
# # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
|
1221
|
-
# # │
|
1289
|
+
# # │ … ┆ … │
|
1222
1290
|
# # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
|
1223
1291
|
# # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
|
1224
1292
|
# # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -1308,8 +1308,6 @@ module Polars
|
|
1308
1308
|
#
|
1309
1309
|
# @param k [Integer]
|
1310
1310
|
# Number of elements to return.
|
1311
|
-
# @param reverse [Boolean]
|
1312
|
-
# Return the smallest elements.
|
1313
1311
|
#
|
1314
1312
|
# @return [Expr]
|
1315
1313
|
#
|
@@ -1322,7 +1320,45 @@ module Polars
|
|
1322
1320
|
# df.select(
|
1323
1321
|
# [
|
1324
1322
|
# Polars.col("value").top_k.alias("top_k"),
|
1325
|
-
# Polars.col("value").
|
1323
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1324
|
+
# ]
|
1325
|
+
# )
|
1326
|
+
# # =>
|
1327
|
+
# # shape: (5, 2)
|
1328
|
+
# # ┌───────┬──────────┐
|
1329
|
+
# # │ top_k ┆ bottom_k │
|
1330
|
+
# # │ --- ┆ --- │
|
1331
|
+
# # │ i64 ┆ i64 │
|
1332
|
+
# # ╞═══════╪══════════╡
|
1333
|
+
# # │ 99 ┆ 1 │
|
1334
|
+
# # │ 98 ┆ 2 │
|
1335
|
+
# # │ 4 ┆ 3 │
|
1336
|
+
# # │ 3 ┆ 4 │
|
1337
|
+
# # │ 2 ┆ 98 │
|
1338
|
+
# # └───────┴──────────┘
|
1339
|
+
def top_k(k: 5)
|
1340
|
+
wrap_expr(_rbexpr.top_k(k))
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# Return the `k` smallest elements.
|
1344
|
+
#
|
1345
|
+
# If 'reverse: true` the smallest elements will be given.
|
1346
|
+
#
|
1347
|
+
# @param k [Integer]
|
1348
|
+
# Number of elements to return.
|
1349
|
+
#
|
1350
|
+
# @return [Expr]
|
1351
|
+
#
|
1352
|
+
# @example
|
1353
|
+
# df = Polars::DataFrame.new(
|
1354
|
+
# {
|
1355
|
+
# "value" => [1, 98, 2, 3, 99, 4]
|
1356
|
+
# }
|
1357
|
+
# )
|
1358
|
+
# df.select(
|
1359
|
+
# [
|
1360
|
+
# Polars.col("value").top_k.alias("top_k"),
|
1361
|
+
# Polars.col("value").bottom_k.alias("bottom_k")
|
1326
1362
|
# ]
|
1327
1363
|
# )
|
1328
1364
|
# # =>
|
@@ -1338,8 +1374,8 @@ module Polars
|
|
1338
1374
|
# # │ 3 ┆ 4 │
|
1339
1375
|
# # │ 2 ┆ 98 │
|
1340
1376
|
# # └───────┴──────────┘
|
1341
|
-
def
|
1342
|
-
wrap_expr(_rbexpr.
|
1377
|
+
def bottom_k(k: 5)
|
1378
|
+
wrap_expr(_rbexpr.bottom_k(k))
|
1343
1379
|
end
|
1344
1380
|
|
1345
1381
|
# Get the index values that would sort this column.
|
@@ -2008,6 +2044,28 @@ module Polars
|
|
2008
2044
|
wrap_expr(_rbexpr.n_unique)
|
2009
2045
|
end
|
2010
2046
|
|
2047
|
+
# Approx count unique values.
|
2048
|
+
#
|
2049
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
2050
|
+
#
|
2051
|
+
# @return [Expr]
|
2052
|
+
#
|
2053
|
+
# @example
|
2054
|
+
# df = Polars::DataFrame.new({"a" => [1, 1, 2]})
|
2055
|
+
# df.select(Polars.col("a").approx_unique)
|
2056
|
+
# # =>
|
2057
|
+
# # shape: (1, 1)
|
2058
|
+
# # ┌─────┐
|
2059
|
+
# # │ a │
|
2060
|
+
# # │ --- │
|
2061
|
+
# # │ u32 │
|
2062
|
+
# # ╞═════╡
|
2063
|
+
# # │ 2 │
|
2064
|
+
# # └─────┘
|
2065
|
+
def approx_unique
|
2066
|
+
wrap_expr(_rbexpr.approx_unique)
|
2067
|
+
end
|
2068
|
+
|
2011
2069
|
# Count null values.
|
2012
2070
|
#
|
2013
2071
|
# @return [Expr]
|
@@ -2194,7 +2252,7 @@ module Polars
|
|
2194
2252
|
# # │ 4 │
|
2195
2253
|
# # │ 6 │
|
2196
2254
|
# # │ 6 │
|
2197
|
-
# # │
|
2255
|
+
# # │ 4 │
|
2198
2256
|
# # │ 6 │
|
2199
2257
|
# # │ 6 │
|
2200
2258
|
# # │ 6 │
|
@@ -2571,7 +2629,7 @@ module Polars
|
|
2571
2629
|
# # │ e │
|
2572
2630
|
# # │ l │
|
2573
2631
|
# # │ l │
|
2574
|
-
# # │
|
2632
|
+
# # │ … │
|
2575
2633
|
# # │ o │
|
2576
2634
|
# # │ r │
|
2577
2635
|
# # │ l │
|
@@ -2751,6 +2809,7 @@ module Polars
|
|
2751
2809
|
end
|
2752
2810
|
wrap_expr(_rbexpr.is_in(other._rbexpr))
|
2753
2811
|
end
|
2812
|
+
alias_method :in?, :is_in
|
2754
2813
|
|
2755
2814
|
# Repeat the elements in this Series as specified in the given expression.
|
2756
2815
|
#
|
@@ -3914,8 +3973,8 @@ module Polars
|
|
3914
3973
|
# # │ 2 │
|
3915
3974
|
# # │ 5 │
|
3916
3975
|
# # └─────┘
|
3917
|
-
def rank(method: "average", reverse: false)
|
3918
|
-
wrap_expr(_rbexpr.rank(method, reverse))
|
3976
|
+
def rank(method: "average", reverse: false, seed: nil)
|
3977
|
+
wrap_expr(_rbexpr.rank(method, reverse, seed))
|
3919
3978
|
end
|
3920
3979
|
|
3921
3980
|
# Calculate the n-th discrete difference.
|
@@ -4916,9 +4975,10 @@ module Polars
|
|
4916
4975
|
# # ╞═══════════╪═══════════╡
|
4917
4976
|
# # │ [1, 2, 3] ┆ [4, 5, 6] │
|
4918
4977
|
# # └───────────┴───────────┘
|
4919
|
-
def
|
4920
|
-
wrap_expr(_rbexpr.
|
4978
|
+
def implode
|
4979
|
+
wrap_expr(_rbexpr.implode)
|
4921
4980
|
end
|
4981
|
+
alias_method :list, :implode
|
4922
4982
|
|
4923
4983
|
# Shrink numeric columns to the minimal required datatype.
|
4924
4984
|
#
|
@@ -4962,6 +5022,13 @@ module Polars
|
|
4962
5022
|
ListExpr.new(self)
|
4963
5023
|
end
|
4964
5024
|
|
5025
|
+
# Create an object namespace of all binary related methods.
|
5026
|
+
#
|
5027
|
+
# @return [BinaryExpr]
|
5028
|
+
def bin
|
5029
|
+
BinaryExpr.new(self)
|
5030
|
+
end
|
5031
|
+
|
4965
5032
|
# Create an object namespace of all categorical related methods.
|
4966
5033
|
#
|
4967
5034
|
# @return [CatExpr]
|