polars-df 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -9,8 +9,83 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
|
+
#
|
14
|
+
# @param datatype [Symbol]
|
15
|
+
# `:date`, `:dateime`, or `:time`.
|
16
|
+
# @param fmt [String]
|
17
|
+
# Format to use, refer to the
|
18
|
+
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
|
+
# for specification. Example: `"%y-%m-%d"`.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# - If true, require an exact format match.
|
24
|
+
# - If false, allow the format to match anywhere in the target string.
|
25
|
+
#
|
26
|
+
# @return [Expr]
|
27
|
+
#
|
28
|
+
# @note
|
29
|
+
# When parsing a Datetime the column precision will be inferred from
|
30
|
+
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
31
|
+
# no fractional second component is found then the default is "us".
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# s = Polars::Series.new(
|
35
|
+
# "date",
|
36
|
+
# [
|
37
|
+
# "2021-04-22",
|
38
|
+
# "2022-01-04 00:00:00",
|
39
|
+
# "01/31/22",
|
40
|
+
# "Sun Jul 8 00:34:60 2001"
|
41
|
+
# ]
|
42
|
+
# )
|
43
|
+
# s.to_frame.with_column(
|
44
|
+
# Polars.col("date")
|
45
|
+
# .str.strptime(:date, "%F", strict: false)
|
46
|
+
# .fill_null(
|
47
|
+
# Polars.col("date").str.strptime(:date, "%F %T", strict: false)
|
48
|
+
# )
|
49
|
+
# .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
|
50
|
+
# .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 1)
|
54
|
+
# # ┌────────────┐
|
55
|
+
# # │ date │
|
56
|
+
# # │ --- │
|
57
|
+
# # │ date │
|
58
|
+
# # ╞════════════╡
|
59
|
+
# # │ 2021-04-22 │
|
60
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
61
|
+
# # │ 2022-01-04 │
|
62
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
63
|
+
# # │ 2022-01-31 │
|
64
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
65
|
+
# # │ 2001-07-08 │
|
66
|
+
# # └────────────┘
|
67
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true)
|
68
|
+
if !Utils.is_polars_dtype(datatype)
|
69
|
+
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
+
end
|
71
|
+
|
72
|
+
if datatype == :date
|
73
|
+
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
|
74
|
+
elsif datatype == :datetime
|
75
|
+
# TODO fix
|
76
|
+
tu = nil # datatype.tu
|
77
|
+
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
|
78
|
+
if tu.nil?
|
79
|
+
dtcol
|
80
|
+
else
|
81
|
+
dtcol.dt.cast_time_unit(tu)
|
82
|
+
end
|
83
|
+
elsif datatype == :time
|
84
|
+
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
|
85
|
+
else
|
86
|
+
raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
|
87
|
+
end
|
88
|
+
end
|
14
89
|
|
15
90
|
# Get length of the strings as `:u32` (as number of bytes).
|
16
91
|
#
|
@@ -291,7 +366,7 @@ module Polars
|
|
291
366
|
|
292
367
|
# Return the string left justified in a string of length `width`.
|
293
368
|
#
|
294
|
-
# Padding is done using the specified `
|
369
|
+
# Padding is done using the specified `fillchar`.
|
295
370
|
# The original string is returned if `width` is less than or equal to
|
296
371
|
# `s.length`.
|
297
372
|
#
|
@@ -324,7 +399,7 @@ module Polars
|
|
324
399
|
Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
|
325
400
|
end
|
326
401
|
|
327
|
-
# Return the string right justified in a string of length
|
402
|
+
# Return the string right justified in a string of length `width`.
|
328
403
|
#
|
329
404
|
# Padding is done using the specified `fillchar`.
|
330
405
|
# The original string is returned if `width` is less than or equal to
|
@@ -478,14 +553,115 @@ module Polars
|
|
478
553
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
479
554
|
end
|
480
555
|
|
481
|
-
#
|
482
|
-
#
|
556
|
+
# Extract the first match of json string with provided JSONPath expression.
|
557
|
+
#
|
558
|
+
# Throw errors if encounter invalid json strings.
|
559
|
+
# All return value will be casted to Utf8 regardless of the original value.
|
560
|
+
#
|
561
|
+
# Documentation on JSONPath standard can be found
|
562
|
+
# [here](https://goessner.net/articles/JsonPath/).
|
563
|
+
#
|
564
|
+
# @param json_path [String]
|
565
|
+
# A valid JSON path query string.
|
566
|
+
#
|
567
|
+
# @return [Expr]
|
568
|
+
#
|
569
|
+
# @example
|
570
|
+
# df = Polars::DataFrame.new(
|
571
|
+
# {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
|
572
|
+
# )
|
573
|
+
# df.select(Polars.col("json_val").str.json_path_match("$.a"))
|
574
|
+
# # =>
|
575
|
+
# # shape: (5, 1)
|
576
|
+
# # ┌──────────┐
|
577
|
+
# # │ json_val │
|
578
|
+
# # │ --- │
|
579
|
+
# # │ str │
|
580
|
+
# # ╞══════════╡
|
581
|
+
# # │ 1 │
|
582
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
583
|
+
# # │ null │
|
584
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
585
|
+
# # │ 2 │
|
586
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
587
|
+
# # │ 2.1 │
|
588
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
589
|
+
# # │ true │
|
590
|
+
# # └──────────┘
|
591
|
+
def json_path_match(json_path)
|
592
|
+
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
593
|
+
end
|
483
594
|
|
484
|
-
#
|
485
|
-
#
|
595
|
+
# Decode a value using the provided encoding.
|
596
|
+
#
|
597
|
+
# @param encoding ["hex", "base64"]
|
598
|
+
# The encoding to use.
|
599
|
+
# @param strict [Boolean]
|
600
|
+
# How to handle invalid inputs:
|
601
|
+
#
|
602
|
+
# - `true`: An error will be thrown if unable to decode a value.
|
603
|
+
# - `false`: Unhandled values will be replaced with `nil`.
|
604
|
+
#
|
605
|
+
# @return [Expr]
|
606
|
+
#
|
607
|
+
# @example
|
608
|
+
# df = Polars::DataFrame.new({"encoded" => ["666f6f", "626172", nil]})
|
609
|
+
# df.select(Polars.col("encoded").str.decode("hex"))
|
610
|
+
# # =>
|
611
|
+
# # shape: (3, 1)
|
612
|
+
# # ┌─────────┐
|
613
|
+
# # │ encoded │
|
614
|
+
# # │ --- │
|
615
|
+
# # │ str │
|
616
|
+
# # ╞═════════╡
|
617
|
+
# # │ foo │
|
618
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
619
|
+
# # │ bar │
|
620
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
621
|
+
# # │ null │
|
622
|
+
# # └─────────┘
|
623
|
+
def decode(encoding, strict: false)
|
624
|
+
if encoding == "hex"
|
625
|
+
Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
|
626
|
+
elsif encoding == "base64"
|
627
|
+
Utils.wrap_expr(_rbexpr.str_base64_decode(strict))
|
628
|
+
else
|
629
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
630
|
+
end
|
631
|
+
end
|
486
632
|
|
487
|
-
#
|
488
|
-
#
|
633
|
+
# Encode a value using the provided encoding.
|
634
|
+
#
|
635
|
+
# @param encoding ["hex", "base64"]
|
636
|
+
# The encoding to use.
|
637
|
+
#
|
638
|
+
# @return [Expr]
|
639
|
+
#
|
640
|
+
# @example
|
641
|
+
# df = Polars::DataFrame.new({"strings" => ["foo", "bar", nil]})
|
642
|
+
# df.select(Polars.col("strings").str.encode("hex"))
|
643
|
+
# # =>
|
644
|
+
# # shape: (3, 1)
|
645
|
+
# # ┌─────────┐
|
646
|
+
# # │ strings │
|
647
|
+
# # │ --- │
|
648
|
+
# # │ str │
|
649
|
+
# # ╞═════════╡
|
650
|
+
# # │ 666f6f │
|
651
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
652
|
+
# # │ 626172 │
|
653
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
654
|
+
# # │ null │
|
655
|
+
# # └─────────┘
|
656
|
+
def encode(encoding)
|
657
|
+
if encoding == "hex"
|
658
|
+
Utils.wrap_expr(_rbexpr.str_hex_encode)
|
659
|
+
elsif encoding == "base64"
|
660
|
+
Utils.wrap_expr(_rbexpr.str_base64_encode)
|
661
|
+
else
|
662
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
663
|
+
end
|
664
|
+
end
|
489
665
|
|
490
666
|
# Extract the target capture group from provided patterns.
|
491
667
|
#
|
@@ -659,10 +835,10 @@ module Polars
|
|
659
835
|
end
|
660
836
|
end
|
661
837
|
|
662
|
-
# Split the string by a substring, restricted to returning at most
|
838
|
+
# Split the string by a substring, restricted to returning at most `n` items.
|
663
839
|
#
|
664
|
-
# If the number of possible splits is less than
|
665
|
-
# elements will be null. If the number of possible splits is
|
840
|
+
# If the number of possible splits is less than `n-1`, the remaining field
|
841
|
+
# elements will be null. If the number of possible splits is `n-1` or greater,
|
666
842
|
# the last (nth) substring will contain the remainder of the string.
|
667
843
|
#
|
668
844
|
# @param by [String]
|