polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1130,7 +1198,7 @@ module Polars
1130
1198
  # ]
1131
1199
  # )
1132
1200
  # # =>
1133
- # # shape: (1001, 2)
1201
+ # # shape: (1_001, 2)
1134
1202
  # # ┌─────────────────────────┬───────────────────┐
1135
1203
  # # │ date ┆ milliseconds_diff │
1136
1204
  # # │ --- ┆ --- │
@@ -1140,7 +1208,7 @@ module Polars
1140
1208
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1141
1209
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1142
1210
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1143
- # # │ ... ...
1211
+ # # │
1144
1212
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1145
1213
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1146
1214
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
@@ -1169,7 +1237,7 @@ module Polars
1169
1237
  # ]
1170
1238
  # )
1171
1239
  # # =>
1172
- # # shape: (1001, 2)
1240
+ # # shape: (1_001, 2)
1173
1241
  # # ┌─────────────────────────┬───────────────────┐
1174
1242
  # # │ date ┆ microseconds_diff │
1175
1243
  # # │ --- ┆ --- │
@@ -1179,7 +1247,7 @@ module Polars
1179
1247
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1180
1248
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1181
1249
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1182
- # # │ ... ...
1250
+ # # │
1183
1251
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1184
1252
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1185
1253
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
@@ -1208,7 +1276,7 @@ module Polars
1208
1276
  # ]
1209
1277
  # )
1210
1278
  # # =>
1211
- # # shape: (1001, 2)
1279
+ # # shape: (1_001, 2)
1212
1280
  # # ┌─────────────────────────┬──────────────────┐
1213
1281
  # # │ date ┆ nanoseconds_diff │
1214
1282
  # # │ --- ┆ --- │
@@ -1218,7 +1286,7 @@ module Polars
1218
1286
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1219
1287
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1220
1288
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1221
- # # │ ... ...
1289
+ # # │
1222
1290
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1223
1291
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1224
1292
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end
data/lib/polars/expr.rb CHANGED
@@ -1308,8 +1308,6 @@ module Polars
1308
1308
  #
1309
1309
  # @param k [Integer]
1310
1310
  # Number of elements to return.
1311
- # @param reverse [Boolean]
1312
- # Return the smallest elements.
1313
1311
  #
1314
1312
  # @return [Expr]
1315
1313
  #
@@ -1322,7 +1320,45 @@ module Polars
1322
1320
  # df.select(
1323
1321
  # [
1324
1322
  # Polars.col("value").top_k.alias("top_k"),
1325
- # Polars.col("value").top_k(reverse: true).alias("bottom_k")
1323
+ # Polars.col("value").bottom_k.alias("bottom_k")
1324
+ # ]
1325
+ # )
1326
+ # # =>
1327
+ # # shape: (5, 2)
1328
+ # # ┌───────┬──────────┐
1329
+ # # │ top_k ┆ bottom_k │
1330
+ # # │ --- ┆ --- │
1331
+ # # │ i64 ┆ i64 │
1332
+ # # ╞═══════╪══════════╡
1333
+ # # │ 99 ┆ 1 │
1334
+ # # │ 98 ┆ 2 │
1335
+ # # │ 4 ┆ 3 │
1336
+ # # │ 3 ┆ 4 │
1337
+ # # │ 2 ┆ 98 │
1338
+ # # └───────┴──────────┘
1339
+ def top_k(k: 5)
1340
+ wrap_expr(_rbexpr.top_k(k))
1341
+ end
1342
+
1343
+ # Return the `k` smallest elements.
1344
+ #
1345
+ # If 'reverse: true` the smallest elements will be given.
1346
+ #
1347
+ # @param k [Integer]
1348
+ # Number of elements to return.
1349
+ #
1350
+ # @return [Expr]
1351
+ #
1352
+ # @example
1353
+ # df = Polars::DataFrame.new(
1354
+ # {
1355
+ # "value" => [1, 98, 2, 3, 99, 4]
1356
+ # }
1357
+ # )
1358
+ # df.select(
1359
+ # [
1360
+ # Polars.col("value").top_k.alias("top_k"),
1361
+ # Polars.col("value").bottom_k.alias("bottom_k")
1326
1362
  # ]
1327
1363
  # )
1328
1364
  # # =>
@@ -1338,8 +1374,8 @@ module Polars
1338
1374
  # # │ 3 ┆ 4 │
1339
1375
  # # │ 2 ┆ 98 │
1340
1376
  # # └───────┴──────────┘
1341
- def top_k(k: 5, reverse: false)
1342
- wrap_expr(_rbexpr.top_k(k, reverse))
1377
+ def bottom_k(k: 5)
1378
+ wrap_expr(_rbexpr.bottom_k(k))
1343
1379
  end
1344
1380
 
1345
1381
  # Get the index values that would sort this column.
@@ -2008,6 +2044,28 @@ module Polars
2008
2044
  wrap_expr(_rbexpr.n_unique)
2009
2045
  end
2010
2046
 
2047
+ # Approx count unique values.
2048
+ #
2049
+ # This is done using the HyperLogLog++ algorithm for cardinality estimation.
2050
+ #
2051
+ # @return [Expr]
2052
+ #
2053
+ # @example
2054
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
2055
+ # df.select(Polars.col("a").approx_unique)
2056
+ # # =>
2057
+ # # shape: (1, 1)
2058
+ # # ┌─────┐
2059
+ # # │ a │
2060
+ # # │ --- │
2061
+ # # │ u32 │
2062
+ # # ╞═════╡
2063
+ # # │ 2 │
2064
+ # # └─────┘
2065
+ def approx_unique
2066
+ wrap_expr(_rbexpr.approx_unique)
2067
+ end
2068
+
2011
2069
  # Count null values.
2012
2070
  #
2013
2071
  # @return [Expr]
@@ -2194,7 +2252,7 @@ module Polars
2194
2252
  # # │ 4 │
2195
2253
  # # │ 6 │
2196
2254
  # # │ 6 │
2197
- # # │ ...
2255
+ # # │ 4
2198
2256
  # # │ 6 │
2199
2257
  # # │ 6 │
2200
2258
  # # │ 6 │
@@ -2571,7 +2629,7 @@ module Polars
2571
2629
  # # │ e │
2572
2630
  # # │ l │
2573
2631
  # # │ l │
2574
- # # │ ...
2632
+ # # │
2575
2633
  # # │ o │
2576
2634
  # # │ r │
2577
2635
  # # │ l │
@@ -2751,6 +2809,7 @@ module Polars
2751
2809
  end
2752
2810
  wrap_expr(_rbexpr.is_in(other._rbexpr))
2753
2811
  end
2812
+ alias_method :in?, :is_in
2754
2813
 
2755
2814
  # Repeat the elements in this Series as specified in the given expression.
2756
2815
  #
@@ -3914,8 +3973,8 @@ module Polars
3914
3973
  # # │ 2 │
3915
3974
  # # │ 5 │
3916
3975
  # # └─────┘
3917
- def rank(method: "average", reverse: false)
3918
- wrap_expr(_rbexpr.rank(method, reverse))
3976
+ def rank(method: "average", reverse: false, seed: nil)
3977
+ wrap_expr(_rbexpr.rank(method, reverse, seed))
3919
3978
  end
3920
3979
 
3921
3980
  # Calculate the n-th discrete difference.
@@ -4916,9 +4975,10 @@ module Polars
4916
4975
  # # ╞═══════════╪═══════════╡
4917
4976
  # # │ [1, 2, 3] ┆ [4, 5, 6] │
4918
4977
  # # └───────────┴───────────┘
4919
- def list
4920
- wrap_expr(_rbexpr.list)
4978
+ def implode
4979
+ wrap_expr(_rbexpr.implode)
4921
4980
  end
4981
+ alias_method :list, :implode
4922
4982
 
4923
4983
  # Shrink numeric columns to the minimal required datatype.
4924
4984
  #
@@ -4962,6 +5022,13 @@ module Polars
4962
5022
  ListExpr.new(self)
4963
5023
  end
4964
5024
 
5025
+ # Create an object namespace of all binary related methods.
5026
+ #
5027
+ # @return [BinaryExpr]
5028
+ def bin
5029
+ BinaryExpr.new(self)
5030
+ end
5031
+
4965
5032
  # Create an object namespace of all categorical related methods.
4966
5033
  #
4967
5034
  # @return [CatExpr]