polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -9,8 +9,83 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
|
+
#
|
14
|
+
# @param datatype [Symbol]
|
15
|
+
# `:date`, `:dateime`, or `:time`.
|
16
|
+
# @param fmt [String]
|
17
|
+
# Format to use, refer to the
|
18
|
+
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
|
+
# for specification. Example: `"%y-%m-%d"`.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# - If true, require an exact format match.
|
24
|
+
# - If false, allow the format to match anywhere in the target string.
|
25
|
+
#
|
26
|
+
# @return [Expr]
|
27
|
+
#
|
28
|
+
# @note
|
29
|
+
# When parsing a Datetime the column precision will be inferred from
|
30
|
+
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
31
|
+
# no fractional second component is found then the default is "us".
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# s = Polars::Series.new(
|
35
|
+
# "date",
|
36
|
+
# [
|
37
|
+
# "2021-04-22",
|
38
|
+
# "2022-01-04 00:00:00",
|
39
|
+
# "01/31/22",
|
40
|
+
# "Sun Jul 8 00:34:60 2001"
|
41
|
+
# ]
|
42
|
+
# )
|
43
|
+
# s.to_frame.with_column(
|
44
|
+
# Polars.col("date")
|
45
|
+
# .str.strptime(:date, "%F", strict: false)
|
46
|
+
# .fill_null(
|
47
|
+
# Polars.col("date").str.strptime(:date, "%F %T", strict: false)
|
48
|
+
# )
|
49
|
+
# .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
|
50
|
+
# .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 1)
|
54
|
+
# # ┌────────────┐
|
55
|
+
# # │ date │
|
56
|
+
# # │ --- │
|
57
|
+
# # │ date │
|
58
|
+
# # ╞════════════╡
|
59
|
+
# # │ 2021-04-22 │
|
60
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
61
|
+
# # │ 2022-01-04 │
|
62
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
63
|
+
# # │ 2022-01-31 │
|
64
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
65
|
+
# # │ 2001-07-08 │
|
66
|
+
# # └────────────┘
|
67
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true)
|
68
|
+
if !Utils.is_polars_dtype(datatype)
|
69
|
+
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
+
end
|
71
|
+
|
72
|
+
if datatype == :date
|
73
|
+
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
|
74
|
+
elsif datatype == :datetime
|
75
|
+
# TODO fix
|
76
|
+
tu = nil # datatype.tu
|
77
|
+
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
|
78
|
+
if tu.nil?
|
79
|
+
dtcol
|
80
|
+
else
|
81
|
+
dtcol.dt.cast_time_unit(tu)
|
82
|
+
end
|
83
|
+
elsif datatype == :time
|
84
|
+
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
|
85
|
+
else
|
86
|
+
raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
|
87
|
+
end
|
88
|
+
end
|
14
89
|
|
15
90
|
# Get length of the strings as `:u32` (as number of bytes).
|
16
91
|
#
|
@@ -291,7 +366,7 @@ module Polars
|
|
291
366
|
|
292
367
|
# Return the string left justified in a string of length `width`.
|
293
368
|
#
|
294
|
-
# Padding is done using the specified `
|
369
|
+
# Padding is done using the specified `fillchar`.
|
295
370
|
# The original string is returned if `width` is less than or equal to
|
296
371
|
# `s.length`.
|
297
372
|
#
|
@@ -324,7 +399,7 @@ module Polars
|
|
324
399
|
Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
|
325
400
|
end
|
326
401
|
|
327
|
-
# Return the string right justified in a string of length
|
402
|
+
# Return the string right justified in a string of length `width`.
|
328
403
|
#
|
329
404
|
# Padding is done using the specified `fillchar`.
|
330
405
|
# The original string is returned if `width` is less than or equal to
|
@@ -478,14 +553,115 @@ module Polars
|
|
478
553
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
479
554
|
end
|
480
555
|
|
481
|
-
#
|
482
|
-
#
|
556
|
+
# Extract the first match of json string with provided JSONPath expression.
|
557
|
+
#
|
558
|
+
# Throw errors if encounter invalid json strings.
|
559
|
+
# All return value will be casted to Utf8 regardless of the original value.
|
560
|
+
#
|
561
|
+
# Documentation on JSONPath standard can be found
|
562
|
+
# [here](https://goessner.net/articles/JsonPath/).
|
563
|
+
#
|
564
|
+
# @param json_path [String]
|
565
|
+
# A valid JSON path query string.
|
566
|
+
#
|
567
|
+
# @return [Expr]
|
568
|
+
#
|
569
|
+
# @example
|
570
|
+
# df = Polars::DataFrame.new(
|
571
|
+
# {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
|
572
|
+
# )
|
573
|
+
# df.select(Polars.col("json_val").str.json_path_match("$.a"))
|
574
|
+
# # =>
|
575
|
+
# # shape: (5, 1)
|
576
|
+
# # ┌──────────┐
|
577
|
+
# # │ json_val │
|
578
|
+
# # │ --- │
|
579
|
+
# # │ str │
|
580
|
+
# # ╞══════════╡
|
581
|
+
# # │ 1 │
|
582
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
583
|
+
# # │ null │
|
584
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
585
|
+
# # │ 2 │
|
586
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
587
|
+
# # │ 2.1 │
|
588
|
+
# # ├╌╌╌╌╌╌╌╌╌╌┤
|
589
|
+
# # │ true │
|
590
|
+
# # └──────────┘
|
591
|
+
def json_path_match(json_path)
|
592
|
+
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
593
|
+
end
|
483
594
|
|
484
|
-
#
|
485
|
-
#
|
595
|
+
# Decode a value using the provided encoding.
|
596
|
+
#
|
597
|
+
# @param encoding ["hex", "base64"]
|
598
|
+
# The encoding to use.
|
599
|
+
# @param strict [Boolean]
|
600
|
+
# How to handle invalid inputs:
|
601
|
+
#
|
602
|
+
# - `true`: An error will be thrown if unable to decode a value.
|
603
|
+
# - `false`: Unhandled values will be replaced with `nil`.
|
604
|
+
#
|
605
|
+
# @return [Expr]
|
606
|
+
#
|
607
|
+
# @example
|
608
|
+
# df = Polars::DataFrame.new({"encoded" => ["666f6f", "626172", nil]})
|
609
|
+
# df.select(Polars.col("encoded").str.decode("hex"))
|
610
|
+
# # =>
|
611
|
+
# # shape: (3, 1)
|
612
|
+
# # ┌─────────┐
|
613
|
+
# # │ encoded │
|
614
|
+
# # │ --- │
|
615
|
+
# # │ str │
|
616
|
+
# # ╞═════════╡
|
617
|
+
# # │ foo │
|
618
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
619
|
+
# # │ bar │
|
620
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
621
|
+
# # │ null │
|
622
|
+
# # └─────────┘
|
623
|
+
def decode(encoding, strict: false)
|
624
|
+
if encoding == "hex"
|
625
|
+
Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
|
626
|
+
elsif encoding == "base64"
|
627
|
+
Utils.wrap_expr(_rbexpr.str_base64_decode(strict))
|
628
|
+
else
|
629
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
630
|
+
end
|
631
|
+
end
|
486
632
|
|
487
|
-
#
|
488
|
-
#
|
633
|
+
# Encode a value using the provided encoding.
|
634
|
+
#
|
635
|
+
# @param encoding ["hex", "base64"]
|
636
|
+
# The encoding to use.
|
637
|
+
#
|
638
|
+
# @return [Expr]
|
639
|
+
#
|
640
|
+
# @example
|
641
|
+
# df = Polars::DataFrame.new({"strings" => ["foo", "bar", nil]})
|
642
|
+
# df.select(Polars.col("strings").str.encode("hex"))
|
643
|
+
# # =>
|
644
|
+
# # shape: (3, 1)
|
645
|
+
# # ┌─────────┐
|
646
|
+
# # │ strings │
|
647
|
+
# # │ --- │
|
648
|
+
# # │ str │
|
649
|
+
# # ╞═════════╡
|
650
|
+
# # │ 666f6f │
|
651
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
652
|
+
# # │ 626172 │
|
653
|
+
# # ├╌╌╌╌╌╌╌╌╌┤
|
654
|
+
# # │ null │
|
655
|
+
# # └─────────┘
|
656
|
+
def encode(encoding)
|
657
|
+
if encoding == "hex"
|
658
|
+
Utils.wrap_expr(_rbexpr.str_hex_encode)
|
659
|
+
elsif encoding == "base64"
|
660
|
+
Utils.wrap_expr(_rbexpr.str_base64_encode)
|
661
|
+
else
|
662
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
663
|
+
end
|
664
|
+
end
|
489
665
|
|
490
666
|
# Extract the target capture group from provided patterns.
|
491
667
|
#
|
@@ -659,10 +835,10 @@ module Polars
|
|
659
835
|
end
|
660
836
|
end
|
661
837
|
|
662
|
-
# Split the string by a substring, restricted to returning at most
|
838
|
+
# Split the string by a substring, restricted to returning at most `n` items.
|
663
839
|
#
|
664
|
-
# If the number of possible splits is less than
|
665
|
-
# elements will be null. If the number of possible splits is
|
840
|
+
# If the number of possible splits is less than `n-1`, the remaining field
|
841
|
+
# elements will be null. If the number of possible splits is `n-1` or greater,
|
666
842
|
# the last (nth) substring will contain the remainder of the string.
|
667
843
|
#
|
668
844
|
# @param by [String]
|