polars-df 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,8 +9,83 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
- # def strptime
13
- # end
12
+ # Parse a Utf8 expression to a Date/Datetime/Time type.
13
+ #
14
+ # @param datatype [Symbol]
15
+ # `:date`, `:dateime`, or `:time`.
16
+ # @param fmt [String]
17
+ # Format to use, refer to the
18
+ # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
19
+ # for specification. Example: `"%y-%m-%d"`.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # - If true, require an exact format match.
24
+ # - If false, allow the format to match anywhere in the target string.
25
+ #
26
+ # @return [Expr]
27
+ #
28
+ # @note
29
+ # When parsing a Datetime the column precision will be inferred from
30
+ # the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
31
+ # no fractional second component is found then the default is "us".
32
+ #
33
+ # @example
34
+ # s = Polars::Series.new(
35
+ # "date",
36
+ # [
37
+ # "2021-04-22",
38
+ # "2022-01-04 00:00:00",
39
+ # "01/31/22",
40
+ # "Sun Jul 8 00:34:60 2001"
41
+ # ]
42
+ # )
43
+ # s.to_frame.with_column(
44
+ # Polars.col("date")
45
+ # .str.strptime(:date, "%F", strict: false)
46
+ # .fill_null(
47
+ # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
48
+ # )
49
+ # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
50
+ # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
51
+ # )
52
+ # # =>
53
+ # # shape: (4, 1)
54
+ # # ┌────────────┐
55
+ # # │ date │
56
+ # # │ --- │
57
+ # # │ date │
58
+ # # ╞════════════╡
59
+ # # │ 2021-04-22 │
60
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
61
+ # # │ 2022-01-04 │
62
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
63
+ # # │ 2022-01-31 │
64
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
65
+ # # │ 2001-07-08 │
66
+ # # └────────────┘
67
+ def strptime(datatype, fmt = nil, strict: true, exact: true)
68
+ if !Utils.is_polars_dtype(datatype)
69
+ raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
+ end
71
+
72
+ if datatype == :date
73
+ Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
74
+ elsif datatype == :datetime
75
+ # TODO fix
76
+ tu = nil # datatype.tu
77
+ dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
78
+ if tu.nil?
79
+ dtcol
80
+ else
81
+ dtcol.dt.cast_time_unit(tu)
82
+ end
83
+ elsif datatype == :time
84
+ Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
85
+ else
86
+ raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
87
+ end
88
+ end
14
89
 
15
90
  # Get length of the strings as `:u32` (as number of bytes).
16
91
  #
@@ -291,7 +366,7 @@ module Polars
291
366
 
292
367
  # Return the string left justified in a string of length `width`.
293
368
  #
294
- # Padding is done using the specified `fillcha``.
369
+ # Padding is done using the specified `fillchar`.
295
370
  # The original string is returned if `width` is less than or equal to
296
371
  # `s.length`.
297
372
  #
@@ -324,7 +399,7 @@ module Polars
324
399
  Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
325
400
  end
326
401
 
327
- # Return the string right justified in a string of length ``width``.
402
+ # Return the string right justified in a string of length `width`.
328
403
  #
329
404
  # Padding is done using the specified `fillchar`.
330
405
  # The original string is returned if `width` is less than or equal to
@@ -478,14 +553,115 @@ module Polars
478
553
  Utils.wrap_expr(_rbexpr.str_starts_with(sub))
479
554
  end
480
555
 
481
- # def json_path_match
482
- # end
556
+ # Extract the first match of json string with provided JSONPath expression.
557
+ #
558
+ # Throw errors if encounter invalid json strings.
559
+ # All return value will be casted to Utf8 regardless of the original value.
560
+ #
561
+ # Documentation on JSONPath standard can be found
562
+ # [here](https://goessner.net/articles/JsonPath/).
563
+ #
564
+ # @param json_path [String]
565
+ # A valid JSON path query string.
566
+ #
567
+ # @return [Expr]
568
+ #
569
+ # @example
570
+ # df = Polars::DataFrame.new(
571
+ # {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
572
+ # )
573
+ # df.select(Polars.col("json_val").str.json_path_match("$.a"))
574
+ # # =>
575
+ # # shape: (5, 1)
576
+ # # ┌──────────┐
577
+ # # │ json_val │
578
+ # # │ --- │
579
+ # # │ str │
580
+ # # ╞══════════╡
581
+ # # │ 1 │
582
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
583
+ # # │ null │
584
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
585
+ # # │ 2 │
586
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
587
+ # # │ 2.1 │
588
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
589
+ # # │ true │
590
+ # # └──────────┘
591
+ def json_path_match(json_path)
592
+ Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
593
+ end
483
594
 
484
- # def decode
485
- # end
595
+ # Decode a value using the provided encoding.
596
+ #
597
+ # @param encoding ["hex", "base64"]
598
+ # The encoding to use.
599
+ # @param strict [Boolean]
600
+ # How to handle invalid inputs:
601
+ #
602
+ # - `true`: An error will be thrown if unable to decode a value.
603
+ # - `false`: Unhandled values will be replaced with `nil`.
604
+ #
605
+ # @return [Expr]
606
+ #
607
+ # @example
608
+ # df = Polars::DataFrame.new({"encoded" => ["666f6f", "626172", nil]})
609
+ # df.select(Polars.col("encoded").str.decode("hex"))
610
+ # # =>
611
+ # # shape: (3, 1)
612
+ # # ┌─────────┐
613
+ # # │ encoded │
614
+ # # │ --- │
615
+ # # │ str │
616
+ # # ╞═════════╡
617
+ # # │ foo │
618
+ # # ├╌╌╌╌╌╌╌╌╌┤
619
+ # # │ bar │
620
+ # # ├╌╌╌╌╌╌╌╌╌┤
621
+ # # │ null │
622
+ # # └─────────┘
623
+ def decode(encoding, strict: false)
624
+ if encoding == "hex"
625
+ Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
626
+ elsif encoding == "base64"
627
+ Utils.wrap_expr(_rbexpr.str_base64_decode(strict))
628
+ else
629
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
630
+ end
631
+ end
486
632
 
487
- # def encode
488
- # end
633
+ # Encode a value using the provided encoding.
634
+ #
635
+ # @param encoding ["hex", "base64"]
636
+ # The encoding to use.
637
+ #
638
+ # @return [Expr]
639
+ #
640
+ # @example
641
+ # df = Polars::DataFrame.new({"strings" => ["foo", "bar", nil]})
642
+ # df.select(Polars.col("strings").str.encode("hex"))
643
+ # # =>
644
+ # # shape: (3, 1)
645
+ # # ┌─────────┐
646
+ # # │ strings │
647
+ # # │ --- │
648
+ # # │ str │
649
+ # # ╞═════════╡
650
+ # # │ 666f6f │
651
+ # # ├╌╌╌╌╌╌╌╌╌┤
652
+ # # │ 626172 │
653
+ # # ├╌╌╌╌╌╌╌╌╌┤
654
+ # # │ null │
655
+ # # └─────────┘
656
+ def encode(encoding)
657
+ if encoding == "hex"
658
+ Utils.wrap_expr(_rbexpr.str_hex_encode)
659
+ elsif encoding == "base64"
660
+ Utils.wrap_expr(_rbexpr.str_base64_encode)
661
+ else
662
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
663
+ end
664
+ end
489
665
 
490
666
  # Extract the target capture group from provided patterns.
491
667
  #
@@ -659,10 +835,10 @@ module Polars
659
835
  end
660
836
  end
661
837
 
662
- # Split the string by a substring, restricted to returning at most ``n`` items.
838
+ # Split the string by a substring, restricted to returning at most `n` items.
663
839
  #
664
- # If the number of possible splits is less than ``n-1``, the remaining field
665
- # elements will be null. If the number of possible splits is ``n-1`` or greater,
840
+ # If the number of possible splits is less than `n-1`, the remaining field
841
+ # elements will be null. If the number of possible splits is `n-1` or greater,
666
842
  # the last (nth) substring will contain the remainder of the string.
667
843
  #
668
844
  # @param by [String]