polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1130,7 +1198,7 @@ module Polars
1130
1198
  # ]
1131
1199
  # )
1132
1200
  # # =>
1133
- # # shape: (1001, 2)
1201
+ # # shape: (1_001, 2)
1134
1202
  # # ┌─────────────────────────┬───────────────────┐
1135
1203
  # # │ date ┆ milliseconds_diff │
1136
1204
  # # │ --- ┆ --- │
@@ -1140,7 +1208,7 @@ module Polars
1140
1208
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1141
1209
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1142
1210
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1143
- # # │ ... ...
1211
+ # # │
1144
1212
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1145
1213
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1146
1214
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
@@ -1169,7 +1237,7 @@ module Polars
1169
1237
  # ]
1170
1238
  # )
1171
1239
  # # =>
1172
- # # shape: (1001, 2)
1240
+ # # shape: (1_001, 2)
1173
1241
  # # ┌─────────────────────────┬───────────────────┐
1174
1242
  # # │ date ┆ microseconds_diff │
1175
1243
  # # │ --- ┆ --- │
@@ -1179,7 +1247,7 @@ module Polars
1179
1247
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1180
1248
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1181
1249
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1182
- # # │ ... ...
1250
+ # # │
1183
1251
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1184
1252
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1185
1253
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
@@ -1208,7 +1276,7 @@ module Polars
1208
1276
  # ]
1209
1277
  # )
1210
1278
  # # =>
1211
- # # shape: (1001, 2)
1279
+ # # shape: (1_001, 2)
1212
1280
  # # ┌─────────────────────────┬──────────────────┐
1213
1281
  # # │ date ┆ nanoseconds_diff │
1214
1282
  # # │ --- ┆ --- │
@@ -1218,7 +1286,7 @@ module Polars
1218
1286
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1219
1287
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1220
1288
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1221
- # # │ ... ...
1289
+ # # │
1222
1290
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1223
1291
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1224
1292
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end
data/lib/polars/expr.rb CHANGED
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,45 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1324
+ # ]
1325
+ # )
1326
+ # # =>
1327
+ # # shape: (5, 2)
1328
+ # # ┌───────┬──────────┐
1329
+ # # │ top_k ┆ bottom_k │
1330
+ # # │ --- ┆ --- │
1331
+ # # │ i64 ┆ i64 │
1332
+ # # ╞═══════╪══════════╡
1333
+ # # │ 99 ┆ 1 │
1334
+ # # │ 98 ┆ 2 │
1335
+ # # │ 4 ┆ 3 │
1336
+ # # │ 3 ┆ 4 │
1337
+ # # │ 2 ┆ 98 │
1338
+ # # └───────┴──────────┘
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1362
  # ]
1327
1363
  # )
1328
1364
  # # =>
@@ -1338,8 +1374,8 @@ module Polars
1338
1374
  # # │ 3 ┆ 4 │
1339
1375
  # # │ 2 ┆ 98 │
1340
1376
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │ ...
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2571,7 +2629,7 @@ module Polars
2571
2629
  # # │ e │
2572
2630
  # # │ l │
2573
2631
  # # │ l │
2574
- # # │ ...
2632
+ # # │
2575
2633
  # # │ o │
2576
2634
  # # │ r │
2577
2635
  # # │ l │
@@ -2751,6 +2809,7 @@ module Polars
2751
2809
  end
2752
2810
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2811
  end
2812
+ alias_method :in?, :is_in
2754
2813
 
2755
2814
  # Repeat the elements in this Series as specified in the given expression.
2756
2815
  #
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4916,9 +4975,10 @@ module Polars
4916
4975
  # # ╞═══════════╪═══════════╡
4917
4976
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4977
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4978
+ def implode
4979
+ wrap_expr(_rbexpr.implode)
4921
4980
  end
4981
+ alias_method :list, :implode
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
4924
4984
  #
@@ -4962,6 +5022,13 @@ module Polars
4962
5022
  ListExpr.new(self)
4963
5023
  end
4964
5024
 
5025
+ # Create an object namespace of all binary related methods.
5026
+ #
5027
+ # @return [BinaryExpr]
5028
+ def bin
5029
+ BinaryExpr.new(self)
5030
+ end
5031
+
4965
5032
  # Create an object namespace of all categorical related methods.
4966
5033
  #
4967
5034
  # @return [CatExpr]