polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
@@ -9,8 +9,83 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
- # def strptime
13
- # end
12
+ # Parse a Utf8 expression to a Date/Datetime/Time type.
13
+ #
14
+ # @param datatype [Symbol]
15
+ # `:date`, `:dateime`, or `:time`.
16
+ # @param fmt [String]
17
+ # Format to use, refer to the
18
+ # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
19
+ # for specification. Example: `"%y-%m-%d"`.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # - If true, require an exact format match.
24
+ # - If false, allow the format to match anywhere in the target string.
25
+ #
26
+ # @return [Expr]
27
+ #
28
+ # @note
29
+ # When parsing a Datetime the column precision will be inferred from
30
+ # the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
31
+ # no fractional second component is found then the default is "us".
32
+ #
33
+ # @example
34
+ # s = Polars::Series.new(
35
+ # "date",
36
+ # [
37
+ # "2021-04-22",
38
+ # "2022-01-04 00:00:00",
39
+ # "01/31/22",
40
+ # "Sun Jul 8 00:34:60 2001"
41
+ # ]
42
+ # )
43
+ # s.to_frame.with_column(
44
+ # Polars.col("date")
45
+ # .str.strptime(:date, "%F", strict: false)
46
+ # .fill_null(
47
+ # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
48
+ # )
49
+ # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
50
+ # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
51
+ # )
52
+ # # =>
53
+ # # shape: (4, 1)
54
+ # # ┌────────────┐
55
+ # # │ date │
56
+ # # │ --- │
57
+ # # │ date │
58
+ # # ╞════════════╡
59
+ # # │ 2021-04-22 │
60
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
61
+ # # │ 2022-01-04 │
62
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
63
+ # # │ 2022-01-31 │
64
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
65
+ # # │ 2001-07-08 │
66
+ # # └────────────┘
67
+ def strptime(datatype, fmt = nil, strict: true, exact: true)
68
+ if !Utils.is_polars_dtype(datatype)
69
+ raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
+ end
71
+
72
+ if datatype == :date
73
+ Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
74
+ elsif datatype == :datetime
75
+ # TODO fix
76
+ tu = nil # datatype.tu
77
+ dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
78
+ if tu.nil?
79
+ dtcol
80
+ else
81
+ dtcol.dt.cast_time_unit(tu)
82
+ end
83
+ elsif datatype == :time
84
+ Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
85
+ else
86
+ raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
87
+ end
88
+ end
14
89
 
15
90
  # Get length of the strings as `:u32` (as number of bytes).
16
91
  #
@@ -291,7 +366,7 @@ module Polars
291
366
 
292
367
  # Return the string left justified in a string of length `width`.
293
368
  #
294
- # Padding is done using the specified `fillcha``.
369
+ # Padding is done using the specified `fillchar`.
295
370
  # The original string is returned if `width` is less than or equal to
296
371
  # `s.length`.
297
372
  #
@@ -324,7 +399,7 @@ module Polars
324
399
  Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
325
400
  end
326
401
 
327
- # Return the string right justified in a string of length ``width``.
402
+ # Return the string right justified in a string of length `width`.
328
403
  #
329
404
  # Padding is done using the specified `fillchar`.
330
405
  # The original string is returned if `width` is less than or equal to
@@ -478,14 +553,115 @@ module Polars
478
553
  Utils.wrap_expr(_rbexpr.str_starts_with(sub))
479
554
  end
480
555
 
481
- # def json_path_match
482
- # end
556
+ # Extract the first match of json string with provided JSONPath expression.
557
+ #
558
+ # Throw errors if encounter invalid json strings.
559
+ # All return value will be casted to Utf8 regardless of the original value.
560
+ #
561
+ # Documentation on JSONPath standard can be found
562
+ # [here](https://goessner.net/articles/JsonPath/).
563
+ #
564
+ # @param json_path [String]
565
+ # A valid JSON path query string.
566
+ #
567
+ # @return [Expr]
568
+ #
569
+ # @example
570
+ # df = Polars::DataFrame.new(
571
+ # {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
572
+ # )
573
+ # df.select(Polars.col("json_val").str.json_path_match("$.a"))
574
+ # # =>
575
+ # # shape: (5, 1)
576
+ # # ┌──────────┐
577
+ # # │ json_val │
578
+ # # │ --- │
579
+ # # │ str │
580
+ # # ╞══════════╡
581
+ # # │ 1 │
582
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
583
+ # # │ null │
584
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
585
+ # # │ 2 │
586
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
587
+ # # │ 2.1 │
588
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
589
+ # # │ true │
590
+ # # └──────────┘
591
+ def json_path_match(json_path)
592
+ Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
593
+ end
483
594
 
484
- # def decode
485
- # end
595
+ # Decode a value using the provided encoding.
596
+ #
597
+ # @param encoding ["hex", "base64"]
598
+ # The encoding to use.
599
+ # @param strict [Boolean]
600
+ # How to handle invalid inputs:
601
+ #
602
+ # - `true`: An error will be thrown if unable to decode a value.
603
+ # - `false`: Unhandled values will be replaced with `nil`.
604
+ #
605
+ # @return [Expr]
606
+ #
607
+ # @example
608
+ # df = Polars::DataFrame.new({"encoded" => ["666f6f", "626172", nil]})
609
+ # df.select(Polars.col("encoded").str.decode("hex"))
610
+ # # =>
611
+ # # shape: (3, 1)
612
+ # # ┌─────────┐
613
+ # # │ encoded │
614
+ # # │ --- │
615
+ # # │ str │
616
+ # # ╞═════════╡
617
+ # # │ foo │
618
+ # # ├╌╌╌╌╌╌╌╌╌┤
619
+ # # │ bar │
620
+ # # ├╌╌╌╌╌╌╌╌╌┤
621
+ # # │ null │
622
+ # # └─────────┘
623
+ def decode(encoding, strict: false)
624
+ if encoding == "hex"
625
+ Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
626
+ elsif encoding == "base64"
627
+ Utils.wrap_expr(_rbexpr.str_base64_decode(strict))
628
+ else
629
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
630
+ end
631
+ end
486
632
 
487
- # def encode
488
- # end
633
+ # Encode a value using the provided encoding.
634
+ #
635
+ # @param encoding ["hex", "base64"]
636
+ # The encoding to use.
637
+ #
638
+ # @return [Expr]
639
+ #
640
+ # @example
641
+ # df = Polars::DataFrame.new({"strings" => ["foo", "bar", nil]})
642
+ # df.select(Polars.col("strings").str.encode("hex"))
643
+ # # =>
644
+ # # shape: (3, 1)
645
+ # # ┌─────────┐
646
+ # # │ strings │
647
+ # # │ --- │
648
+ # # │ str │
649
+ # # ╞═════════╡
650
+ # # │ 666f6f │
651
+ # # ├╌╌╌╌╌╌╌╌╌┤
652
+ # # │ 626172 │
653
+ # # ├╌╌╌╌╌╌╌╌╌┤
654
+ # # │ null │
655
+ # # └─────────┘
656
+ def encode(encoding)
657
+ if encoding == "hex"
658
+ Utils.wrap_expr(_rbexpr.str_hex_encode)
659
+ elsif encoding == "base64"
660
+ Utils.wrap_expr(_rbexpr.str_base64_encode)
661
+ else
662
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
663
+ end
664
+ end
489
665
 
490
666
  # Extract the target capture group from provided patterns.
491
667
  #
@@ -659,10 +835,10 @@ module Polars
659
835
  end
660
836
  end
661
837
 
662
- # Split the string by a substring, restricted to returning at most ``n`` items.
838
+ # Split the string by a substring, restricted to returning at most `n` items.
663
839
  #
664
- # If the number of possible splits is less than ``n-1``, the remaining field
665
- # elements will be null. If the number of possible splits is ``n-1`` or greater,
840
+ # If the number of possible splits is less than `n-1`, the remaining field
841
+ # elements will be null. If the number of possible splits is `n-1` or greater,
666
842
  # the last (nth) substring will contain the remainder of the string.
667
843
  #
668
844
  # @param by [String]