polars-df 0.4.0-x86_64-darwin → 0.6.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2142 -972
- data/README.md +6 -5
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -9,11 +9,129 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Convert a Utf8 column into a Date column.
|
13
|
+
#
|
14
|
+
# @param format [String]
|
15
|
+
# Format to use for conversion. Refer to the
|
16
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
17
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
18
|
+
# If set to nil (default), the format is inferred from the data.
|
19
|
+
# @param strict [Boolean]
|
20
|
+
# Raise an error if any conversion fails.
|
21
|
+
# @param exact [Boolean]
|
22
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
23
|
+
# in the target string.
|
24
|
+
# @param cache [Boolean]
|
25
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
31
|
+
# s.str.to_date
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: '' [date]
|
35
|
+
# # [
|
36
|
+
# # 2020-01-01
|
37
|
+
# # 2020-02-01
|
38
|
+
# # 2020-03-01
|
39
|
+
# # ]
|
40
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
|
+
_validate_format_argument(format)
|
42
|
+
Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Expr]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
_validate_format_argument(format)
|
88
|
+
Utils.wrap_expr(
|
89
|
+
self._rbexpr.str_to_datetime(
|
90
|
+
format,
|
91
|
+
time_unit,
|
92
|
+
time_zone,
|
93
|
+
strict,
|
94
|
+
exact,
|
95
|
+
cache
|
96
|
+
)
|
97
|
+
)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Convert a Utf8 column into a Time column.
|
101
|
+
#
|
102
|
+
# @param format [String]
|
103
|
+
# Format to use for conversion. Refer to the
|
104
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
105
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
106
|
+
# If set to nil (default), the format is inferred from the data.
|
107
|
+
# @param strict [Boolean]
|
108
|
+
# Raise an error if any conversion fails.
|
109
|
+
# @param cache [Boolean]
|
110
|
+
# Use a cache of unique, converted times to apply the conversion.
|
111
|
+
#
|
112
|
+
# @return [Expr]
|
113
|
+
#
|
114
|
+
# @example
|
115
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
116
|
+
# s.str.to_time("%H:%M")
|
117
|
+
# # =>
|
118
|
+
# # shape: (3,)
|
119
|
+
# # Series: '' [time]
|
120
|
+
# # [
|
121
|
+
# # 01:00:00
|
122
|
+
# # 02:00:00
|
123
|
+
# # 03:00:00
|
124
|
+
# # ]
|
125
|
+
def to_time(format = nil, strict: true, cache: true)
|
126
|
+
_validate_format_argument(format)
|
127
|
+
Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
|
128
|
+
end
|
129
|
+
|
12
130
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
131
|
#
|
14
|
-
# @param
|
15
|
-
#
|
16
|
-
# @param
|
132
|
+
# @param dtype [Object]
|
133
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
134
|
+
# @param format [String]
|
17
135
|
# Format to use, refer to the
|
18
136
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
137
|
# for specification. Example: `"%y-%m-%d"`.
|
@@ -33,57 +151,58 @@ module Polars
|
|
33
151
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
152
|
# no fractional second component is found then the default is "us".
|
35
153
|
#
|
36
|
-
# @example
|
154
|
+
# @example Dealing with a consistent format:
|
155
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
156
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
157
|
+
# # =>
|
158
|
+
# # shape: (2,)
|
159
|
+
# # Series: '' [datetime[μs, UTC]]
|
160
|
+
# # [
|
161
|
+
# # 2020-01-01 01:00:00 UTC
|
162
|
+
# # 2020-01-01 02:00:00 UTC
|
163
|
+
# # ]
|
164
|
+
#
|
165
|
+
# @example Dealing with different formats.
|
37
166
|
# s = Polars::Series.new(
|
38
167
|
# "date",
|
39
168
|
# [
|
40
169
|
# "2021-04-22",
|
41
170
|
# "2022-01-04 00:00:00",
|
42
171
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
172
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
173
|
# ]
|
45
174
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
175
|
+
# s.to_frame.select(
|
176
|
+
# Polars.coalesce(
|
177
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
178
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
179
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
180
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
181
|
+
# )
|
182
|
+
# ).to_series
|
55
183
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
184
|
+
# # shape: (4,)
|
185
|
+
# # Series: 'date' [date]
|
186
|
+
# # [
|
187
|
+
# # 2021-04-22
|
188
|
+
# # 2022-01-04
|
189
|
+
# # 2022-01-31
|
190
|
+
# # 2001-07-08
|
191
|
+
# # ]
|
192
|
+
def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
|
193
|
+
_validate_format_argument(format)
|
71
194
|
|
72
|
-
if
|
73
|
-
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
84
|
-
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
195
|
+
if dtype == Date
|
196
|
+
to_date(format, strict: strict, exact: exact, cache: cache)
|
197
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
198
|
+
dtype = Datetime.new if dtype == Datetime
|
199
|
+
time_unit = dtype.time_unit
|
200
|
+
time_zone = dtype.time_zone
|
201
|
+
to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
|
202
|
+
elsif dtype == Time
|
203
|
+
to_time(format, strict: strict, cache: cache)
|
85
204
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
205
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
206
|
end
|
88
207
|
end
|
89
208
|
|
@@ -521,6 +640,40 @@ module Polars
|
|
521
640
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
641
|
end
|
523
642
|
|
643
|
+
# Parse string values as JSON.
|
644
|
+
#
|
645
|
+
# Throw errors if encounter invalid JSON strings.
|
646
|
+
#
|
647
|
+
# @param dtype [Object]
|
648
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
649
|
+
# inferred from the JSON value.
|
650
|
+
#
|
651
|
+
# @return [Expr]
|
652
|
+
#
|
653
|
+
# @example
|
654
|
+
# df = Polars::DataFrame.new(
|
655
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
656
|
+
# )
|
657
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
658
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
659
|
+
# # =>
|
660
|
+
# # shape: (3, 1)
|
661
|
+
# # ┌─────────────┐
|
662
|
+
# # │ json │
|
663
|
+
# # │ --- │
|
664
|
+
# # │ struct[2] │
|
665
|
+
# # ╞═════════════╡
|
666
|
+
# # │ {1,true} │
|
667
|
+
# # │ {null,null} │
|
668
|
+
# # │ {2,false} │
|
669
|
+
# # └─────────────┘
|
670
|
+
def json_extract(dtype = nil, infer_schema_length: 100)
|
671
|
+
if !dtype.nil?
|
672
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
673
|
+
end
|
674
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
|
675
|
+
end
|
676
|
+
|
524
677
|
# Extract the first match of json string with provided JSONPath expression.
|
525
678
|
#
|
526
679
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +999,10 @@ module Polars
|
|
846
999
|
# # │ 1 ┆ 123ABC │
|
847
1000
|
# # │ 2 ┆ abc456 │
|
848
1001
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
1002
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
1003
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
1004
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
1005
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
1006
|
end
|
854
1007
|
|
855
1008
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +1065,84 @@ module Polars
|
|
912
1065
|
def slice(offset, length = nil)
|
913
1066
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
1067
|
end
|
1068
|
+
|
1069
|
+
# Returns a column with a separate row for every string character.
|
1070
|
+
#
|
1071
|
+
# @return [Expr]
|
1072
|
+
#
|
1073
|
+
# @example
|
1074
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
1075
|
+
# df.select(Polars.col("a").str.explode)
|
1076
|
+
# # =>
|
1077
|
+
# # shape: (6, 1)
|
1078
|
+
# # ┌─────┐
|
1079
|
+
# # │ a │
|
1080
|
+
# # │ --- │
|
1081
|
+
# # │ str │
|
1082
|
+
# # ╞═════╡
|
1083
|
+
# # │ f │
|
1084
|
+
# # │ o │
|
1085
|
+
# # │ o │
|
1086
|
+
# # │ b │
|
1087
|
+
# # │ a │
|
1088
|
+
# # │ r │
|
1089
|
+
# # └─────┘
|
1090
|
+
def explode
|
1091
|
+
Utils.wrap_expr(_rbexpr.str_explode)
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
# Parse integers with base radix from strings.
|
1095
|
+
#
|
1096
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
1097
|
+
#
|
1098
|
+
# @param radix [Integer]
|
1099
|
+
# Positive integer which is the base of the string we are parsing.
|
1100
|
+
# Default: 2.
|
1101
|
+
# @param strict [Boolean]
|
1102
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
1103
|
+
# False silently convert to Null.
|
1104
|
+
#
|
1105
|
+
# @return [Expr]
|
1106
|
+
#
|
1107
|
+
# @example
|
1108
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1109
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
1110
|
+
# # =>
|
1111
|
+
# # shape: (4, 1)
|
1112
|
+
# # ┌──────┐
|
1113
|
+
# # │ bin │
|
1114
|
+
# # │ --- │
|
1115
|
+
# # │ i32 │
|
1116
|
+
# # ╞══════╡
|
1117
|
+
# # │ 6 │
|
1118
|
+
# # │ 5 │
|
1119
|
+
# # │ 2 │
|
1120
|
+
# # │ null │
|
1121
|
+
# # └──────┘
|
1122
|
+
#
|
1123
|
+
# @example
|
1124
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1125
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1126
|
+
# # =>
|
1127
|
+
# # shape: (4, 1)
|
1128
|
+
# # ┌───────┐
|
1129
|
+
# # │ hex │
|
1130
|
+
# # │ --- │
|
1131
|
+
# # │ i32 │
|
1132
|
+
# # ╞═══════╡
|
1133
|
+
# # │ 64030 │
|
1134
|
+
# # │ 65280 │
|
1135
|
+
# # │ 51966 │
|
1136
|
+
# # │ null │
|
1137
|
+
# # └───────┘
|
1138
|
+
def parse_int(radix = 2, strict: true)
|
1139
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
private
|
1143
|
+
|
1144
|
+
def _validate_format_argument(format)
|
1145
|
+
# TODO
|
1146
|
+
end
|
915
1147
|
end
|
916
1148
|
end
|
@@ -10,6 +10,112 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
+
# Convert a Utf8 column into a Date column.
|
14
|
+
#
|
15
|
+
# @param format [String]
|
16
|
+
# Format to use for conversion. Refer to the
|
17
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
18
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
19
|
+
# If set to nil (default), the format is inferred from the data.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
24
|
+
# in the target string.
|
25
|
+
# @param cache [Boolean]
|
26
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
32
|
+
# s.str.to_date
|
33
|
+
# # =>
|
34
|
+
# # shape: (3,)
|
35
|
+
# # Series: '' [date]
|
36
|
+
# # [
|
37
|
+
# # 2020-01-01
|
38
|
+
# # 2020-02-01
|
39
|
+
# # 2020-03-01
|
40
|
+
# # ]
|
41
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Series]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
super
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convert a Utf8 column into a Time column.
|
91
|
+
#
|
92
|
+
# @param format [String]
|
93
|
+
# Format to use for conversion. Refer to the
|
94
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
95
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
96
|
+
# If set to nil (default), the format is inferred from the data.
|
97
|
+
# @param strict [Boolean]
|
98
|
+
# Raise an error if any conversion fails.
|
99
|
+
# @param cache [Boolean]
|
100
|
+
# Use a cache of unique, converted times to apply the conversion.
|
101
|
+
#
|
102
|
+
# @return [Series]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
106
|
+
# s.str.to_time("%H:%M")
|
107
|
+
# # =>
|
108
|
+
# # shape: (3,)
|
109
|
+
# # Series: '' [time]
|
110
|
+
# # [
|
111
|
+
# # 01:00:00
|
112
|
+
# # 02:00:00
|
113
|
+
# # 03:00:00
|
114
|
+
# # ]
|
115
|
+
def to_time(format = nil, strict: true, cache: true)
|
116
|
+
super
|
117
|
+
end
|
118
|
+
|
13
119
|
# Parse a Series of dtype Utf8 to a Date/Datetime Series.
|
14
120
|
#
|
15
121
|
# @param datatype [Symbol]
|
@@ -23,10 +129,23 @@ module Polars
|
|
23
129
|
# @param exact [Boolean]
|
24
130
|
# - If true, require an exact format match.
|
25
131
|
# - If false, allow the format to match anywhere in the target string.
|
132
|
+
# @param cache [Boolean]
|
133
|
+
# Use a cache of unique, converted dates to apply the datetime conversion.
|
26
134
|
#
|
27
135
|
# @return [Series]
|
28
136
|
#
|
29
|
-
# @example
|
137
|
+
# @example Dealing with a consistent format:
|
138
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
139
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
140
|
+
# # =>
|
141
|
+
# # shape: (2,)
|
142
|
+
# # Series: '' [datetime[μs, UTC]]
|
143
|
+
# # [
|
144
|
+
# # 2020-01-01 01:00:00 UTC
|
145
|
+
# # 2020-01-01 02:00:00 UTC
|
146
|
+
# # ]
|
147
|
+
#
|
148
|
+
# @example Dealing with different formats.
|
30
149
|
# s = Polars::Series.new(
|
31
150
|
# "date",
|
32
151
|
# [
|
@@ -36,28 +155,24 @@ module Polars
|
|
36
155
|
# "Sun Jul 8 00:34:60 2001"
|
37
156
|
# ]
|
38
157
|
# )
|
39
|
-
# s.to_frame.
|
40
|
-
# Polars.
|
41
|
-
# .str.strptime(
|
42
|
-
# .
|
43
|
-
#
|
44
|
-
# )
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# )
|
158
|
+
# s.to_frame.select(
|
159
|
+
# Polars.coalesce(
|
160
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
161
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
162
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
163
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
164
|
+
# )
|
165
|
+
# ).to_series
|
48
166
|
# # =>
|
49
|
-
# # shape: (4,
|
50
|
-
# #
|
51
|
-
# #
|
52
|
-
# #
|
53
|
-
# #
|
54
|
-
# #
|
55
|
-
# #
|
56
|
-
# #
|
57
|
-
|
58
|
-
# # │ 2001-07-08 │
|
59
|
-
# # └────────────┘
|
60
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
167
|
+
# # shape: (4,)
|
168
|
+
# # Series: 'date' [date]
|
169
|
+
# # [
|
170
|
+
# # 2021-04-22
|
171
|
+
# # 2022-01-04
|
172
|
+
# # 2022-01-31
|
173
|
+
# # 2001-07-08
|
174
|
+
# # ]
|
175
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
|
61
176
|
super
|
62
177
|
end
|
63
178
|
|
@@ -60,5 +60,37 @@ module Polars
|
|
60
60
|
def rename_fields(names)
|
61
61
|
super
|
62
62
|
end
|
63
|
+
|
64
|
+
# Get the struct definition as a name/dtype schema dict.
|
65
|
+
#
|
66
|
+
# @return [Object]
|
67
|
+
def schema
|
68
|
+
if _s.nil?
|
69
|
+
{}
|
70
|
+
else
|
71
|
+
_s.dtype.to_schema
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Convert this struct Series to a DataFrame with a separate column for each field.
|
76
|
+
#
|
77
|
+
# @return [DataFrame]
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
81
|
+
# s.struct.unnest
|
82
|
+
# # =>
|
83
|
+
# # shape: (2, 2)
|
84
|
+
# # ┌─────┬─────┐
|
85
|
+
# # │ a ┆ b │
|
86
|
+
# # │ --- ┆ --- │
|
87
|
+
# # │ i64 ┆ i64 │
|
88
|
+
# # ╞═════╪═════╡
|
89
|
+
# # │ 1 ┆ 2 │
|
90
|
+
# # │ 3 ┆ 4 │
|
91
|
+
# # └─────┴─────┘
|
92
|
+
def unnest
|
93
|
+
Utils.wrap_df(_s.struct_unnest)
|
94
|
+
end
|
63
95
|
end
|
64
96
|
end
|