polars-df 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -120,9 +120,12 @@ module Polars
120
120
  end
121
121
 
122
122
  # UTF-8 encoded string type.
123
- class Utf8 < DataType
123
+ class String < DataType
124
124
  end
125
125
 
126
+ # Allow Utf8 as an alias for String
127
+ Utf8 = String
128
+
126
129
  # Binary type.
127
130
  class Binary < DataType
128
131
  end
@@ -97,15 +97,20 @@ module Polars
97
97
  # # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
98
98
  # # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
99
99
  # # └─────────────────────┴─────────────────────┘
100
- def truncate(every, offset: nil)
100
+ def truncate(every, offset: nil, use_earliest: nil)
101
101
  if offset.nil?
102
102
  offset = "0ns"
103
103
  end
104
104
 
105
+ if !every.is_a?(Expr)
106
+ every = Utils._timedelta_to_pl_duration(every)
107
+ end
108
+ every = Utils.parse_as_expression(every, str_as_lit: true)
109
+
105
110
  Utils.wrap_expr(
106
111
  _rbexpr.dt_truncate(
107
- Utils._timedelta_to_pl_duration(every),
108
- Utils._timedelta_to_pl_duration(offset)
112
+ every,
113
+ Utils._timedelta_to_pl_duration(offset),
109
114
  )
110
115
  )
111
116
  end
@@ -360,7 +365,7 @@ module Polars
360
365
  # # ┌──────┐
361
366
  # # │ date │
362
367
  # # │ --- │
363
- # # │ u32
368
+ # # │ i8
364
369
  # # ╞══════╡
365
370
  # # │ 1 │
366
371
  # # │ 2 │
@@ -402,7 +407,7 @@ module Polars
402
407
  # # ┌──────┐
403
408
  # # │ date │
404
409
  # # │ --- │
405
- # # │ u32
410
+ # # │ i8
406
411
  # # ╞══════╡
407
412
  # # │ 1 │
408
413
  # # │ 2 │
@@ -444,7 +449,7 @@ module Polars
444
449
  # # ┌──────┐
445
450
  # # │ date │
446
451
  # # │ --- │
447
- # # │ u32
452
+ # # │ i8
448
453
  # # ╞══════╡
449
454
  # # │ 1 │
450
455
  # # │ 5 │
@@ -491,7 +496,7 @@ module Polars
491
496
  # # ┌─────────┬──────────────┬─────────────┐
492
497
  # # │ weekday ┆ day_of_month ┆ day_of_year │
493
498
  # # │ --- ┆ --- ┆ --- │
494
- # # │ u32 u32 u32
499
+ # # │ i8 i8 i16
495
500
  # # ╞═════════╪══════════════╪═════════════╡
496
501
  # # │ 1 ┆ 1 ┆ 1 │
497
502
  # # │ 4 ┆ 4 ┆ 4 │
@@ -539,7 +544,7 @@ module Polars
539
544
  # # ┌─────────┬──────────────┬─────────────┐
540
545
  # # │ weekday ┆ day_of_month ┆ day_of_year │
541
546
  # # │ --- ┆ --- ┆ --- │
542
- # # │ u32 u32 u32
547
+ # # │ i8 i8 i16
543
548
  # # ╞═════════╪══════════════╪═════════════╡
544
549
  # # │ 1 ┆ 1 ┆ 1 │
545
550
  # # │ 4 ┆ 4 ┆ 4 │
@@ -587,7 +592,7 @@ module Polars
587
592
  # # ┌─────────┬──────────────┬─────────────┐
588
593
  # # │ weekday ┆ day_of_month ┆ day_of_year │
589
594
  # # │ --- ┆ --- ┆ --- │
590
- # # │ u32 u32 u32
595
+ # # │ i8 i8 i16
591
596
  # # ╞═════════╪══════════════╪═════════════╡
592
597
  # # │ 1 ┆ 1 ┆ 1 │
593
598
  # # │ 4 ┆ 4 ┆ 4 │
@@ -649,7 +654,7 @@ module Polars
649
654
  # # ┌──────┐
650
655
  # # │ date │
651
656
  # # │ --- │
652
- # # │ u32
657
+ # # │ i8
653
658
  # # ╞══════╡
654
659
  # # │ 0 │
655
660
  # # │ 12 │
@@ -690,7 +695,7 @@ module Polars
690
695
  # # ┌──────┐
691
696
  # # │ date │
692
697
  # # │ --- │
693
- # # │ u32
698
+ # # │ i8
694
699
  # # ╞══════╡
695
700
  # # │ 0 │
696
701
  # # │ 2 │
@@ -739,7 +744,7 @@ module Polars
739
744
  # # ┌──────┐
740
745
  # # │ secs │
741
746
  # # │ --- │
742
- # # │ u32
747
+ # # │ i8
743
748
  # # ╞══════╡
744
749
  # # │ 0 │
745
750
  # # │ 3 │
@@ -784,7 +789,7 @@ module Polars
784
789
  # # ┌──────┐
785
790
  # # │ date │
786
791
  # # │ --- │
787
- # # │ u32
792
+ # # │ i8
788
793
  # # ╞══════╡
789
794
  # # │ 0 │
790
795
  # # │ 2 │
@@ -1026,21 +1031,10 @@ module Polars
1026
1031
  # Time zone for the `Datetime` Series.
1027
1032
  #
1028
1033
  # @return [Expr]
1029
- def replace_time_zone(tz, use_earliest: nil)
1030
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
1031
- end
1032
-
1033
- # Localize tz-naive Datetime Series to tz-aware Datetime Series.
1034
- #
1035
- # This method takes a naive Datetime Series and makes this time zone aware.
1036
- # It does not move the time to another time zone.
1037
- #
1038
- # @param tz [String]
1039
- # Time zone for the `Datetime` Series.
1040
- #
1041
- # @return [Expr]
1042
- def tz_localize(tz)
1043
- Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
1034
+ def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
1035
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
1036
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
1037
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
1044
1038
  end
1045
1039
 
1046
1040
  # Extract the days from a Duration type.
@@ -1348,6 +1342,7 @@ module Polars
1348
1342
  # # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
1349
1343
  # # └─────────────────────┴─────────────────────┘
1350
1344
  def offset_by(by)
1345
+ by = Utils.parse_as_expression(by, str_as_lit: true)
1351
1346
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1352
1347
  end
1353
1348
 
@@ -23,18 +23,8 @@ module Polars
23
23
  # @return [Object]
24
24
  #
25
25
  # @example
26
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
- # # =>
28
- # # shape: (3,)
29
- # # Series: '' [datetime[μs]]
30
- # # [
31
- # # 2001-01-01 00:00:00
32
- # # 2001-01-02 00:00:00
33
- # # 2001-01-03 00:00:00
34
- # # ]
35
- #
36
- # @example
37
- # date.dt.min
26
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
+ # s.dt.min
38
28
  # # => 2001-01-01 00:00:00 UTC
39
29
  def min
40
30
  Utils.wrap_s(_s).min
@@ -45,18 +35,8 @@ module Polars
45
35
  # @return [Object]
46
36
  #
47
37
  # @example
48
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
49
- # # =>
50
- # # shape: (3,)
51
- # # Series: '' [datetime[μs]]
52
- # # [
53
- # # 2001-01-01 00:00:00
54
- # # 2001-01-02 00:00:00
55
- # # 2001-01-03 00:00:00
56
- # # ]
57
- #
58
- # @example
59
- # date.dt.max
38
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
39
+ # s.dt.max
60
40
  # # => 2001-01-03 00:00:00 UTC
61
41
  def max
62
42
  Utils.wrap_s(_s).max
@@ -240,7 +220,7 @@ module Polars
240
220
  # date.dt.quarter
241
221
  # # =>
242
222
  # # shape: (4,)
243
- # # Series: '' [u32]
223
+ # # Series: '' [i8]
244
224
  # # [
245
225
  # # 1
246
226
  # # 1
@@ -278,7 +258,7 @@ module Polars
278
258
  # date.dt.month
279
259
  # # =>
280
260
  # # shape: (4,)
281
- # # Series: '' [u32]
261
+ # # Series: '' [i8]
282
262
  # # [
283
263
  # # 1
284
264
  # # 2
@@ -316,7 +296,7 @@ module Polars
316
296
  # date.dt.week
317
297
  # # =>
318
298
  # # shape: (4,)
319
- # # Series: '' [u32]
299
+ # # Series: '' [i8]
320
300
  # # [
321
301
  # # 1
322
302
  # # 5
@@ -356,7 +336,7 @@ module Polars
356
336
  # date.dt.weekday
357
337
  # # =>
358
338
  # # shape: (7,)
359
- # # Series: '' [u32]
339
+ # # Series: '' [i8]
360
340
  # # [
361
341
  # # 1
362
342
  # # 2
@@ -398,7 +378,7 @@ module Polars
398
378
  # date.dt.day
399
379
  # # =>
400
380
  # # shape: (5,)
401
- # # Series: '' [u32]
381
+ # # Series: '' [i8]
402
382
  # # [
403
383
  # # 1
404
384
  # # 3
@@ -436,7 +416,7 @@ module Polars
436
416
  # date.dt.ordinal_day
437
417
  # # =>
438
418
  # # shape: (3,)
439
- # # Series: '' [u32]
419
+ # # Series: '' [i16]
440
420
  # # [
441
421
  # # 1
442
422
  # # 32
@@ -472,7 +452,7 @@ module Polars
472
452
  # date.dt.hour
473
453
  # # =>
474
454
  # # shape: (4,)
475
- # # Series: '' [u32]
455
+ # # Series: '' [i8]
476
456
  # # [
477
457
  # # 0
478
458
  # # 1
@@ -508,7 +488,7 @@ module Polars
508
488
  # date.dt.minute
509
489
  # # =>
510
490
  # # shape: (3,)
511
- # # Series: '' [u32]
491
+ # # Series: '' [i8]
512
492
  # # [
513
493
  # # 0
514
494
  # # 2
@@ -551,7 +531,7 @@ module Polars
551
531
  # date.dt.second
552
532
  # # =>
553
533
  # # shape: (9,)
554
- # # Series: '' [u32]
534
+ # # Series: '' [i8]
555
535
  # # [
556
536
  # # 0
557
537
  # # 0
@@ -613,7 +593,7 @@ module Polars
613
593
  # date.dt.millisecond
614
594
  # # =>
615
595
  # # shape: (9,)
616
- # # Series: '' [u32]
596
+ # # Series: '' [i32]
617
597
  # # [
618
598
  # # 0
619
599
  # # 500
@@ -658,7 +638,7 @@ module Polars
658
638
  # date.dt.microsecond
659
639
  # # =>
660
640
  # # shape: (9,)
661
- # # Series: '' [u32]
641
+ # # Series: '' [i32]
662
642
  # # [
663
643
  # # 0
664
644
  # # 500000
@@ -703,7 +683,7 @@ module Polars
703
683
  # date.dt.nanosecond
704
684
  # # =>
705
685
  # # shape: (9,)
706
- # # Series: '' [u32]
686
+ # # Series: '' [i32]
707
687
  # # [
708
688
  # # 0
709
689
  # # 500000000
@@ -1400,7 +1380,7 @@ module Polars
1400
1380
  # # 2001-01-01 00:30:00
1401
1381
  # # 2001-01-01 01:00:00
1402
1382
  # # ]
1403
- def truncate(every, offset: nil)
1383
+ def truncate(every, offset: nil, use_earliest: nil)
1404
1384
  super
1405
1385
  end
1406
1386
 
@@ -2,7 +2,7 @@ module Polars
2
2
  # A dynamic grouper.
3
3
  #
4
4
  # This has an `.agg` method which allows you to run all polars expressions in a
5
- # groupby context.
5
+ # group by context.
6
6
  class DynamicGroupBy
7
7
  def initialize(
8
8
  df,
@@ -34,7 +34,7 @@ module Polars
34
34
 
35
35
  def agg(aggs)
36
36
  @df.lazy
37
- .groupby_dynamic(
37
+ .group_by_dynamic(
38
38
  @time_column,
39
39
  every: @every,
40
40
  period: @period,