polars-df 0.4.0-x86_64-darwin → 0.6.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2142 -972
- data/README.md +6 -5
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +4 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -9,11 +9,129 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Convert a Utf8 column into a Date column.
|
13
|
+
#
|
14
|
+
# @param format [String]
|
15
|
+
# Format to use for conversion. Refer to the
|
16
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
17
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
18
|
+
# If set to nil (default), the format is inferred from the data.
|
19
|
+
# @param strict [Boolean]
|
20
|
+
# Raise an error if any conversion fails.
|
21
|
+
# @param exact [Boolean]
|
22
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
23
|
+
# in the target string.
|
24
|
+
# @param cache [Boolean]
|
25
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
31
|
+
# s.str.to_date
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: '' [date]
|
35
|
+
# # [
|
36
|
+
# # 2020-01-01
|
37
|
+
# # 2020-02-01
|
38
|
+
# # 2020-03-01
|
39
|
+
# # ]
|
40
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
|
+
_validate_format_argument(format)
|
42
|
+
Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Expr]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
_validate_format_argument(format)
|
88
|
+
Utils.wrap_expr(
|
89
|
+
self._rbexpr.str_to_datetime(
|
90
|
+
format,
|
91
|
+
time_unit,
|
92
|
+
time_zone,
|
93
|
+
strict,
|
94
|
+
exact,
|
95
|
+
cache
|
96
|
+
)
|
97
|
+
)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Convert a Utf8 column into a Time column.
|
101
|
+
#
|
102
|
+
# @param format [String]
|
103
|
+
# Format to use for conversion. Refer to the
|
104
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
105
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
106
|
+
# If set to nil (default), the format is inferred from the data.
|
107
|
+
# @param strict [Boolean]
|
108
|
+
# Raise an error if any conversion fails.
|
109
|
+
# @param cache [Boolean]
|
110
|
+
# Use a cache of unique, converted times to apply the conversion.
|
111
|
+
#
|
112
|
+
# @return [Expr]
|
113
|
+
#
|
114
|
+
# @example
|
115
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
116
|
+
# s.str.to_time("%H:%M")
|
117
|
+
# # =>
|
118
|
+
# # shape: (3,)
|
119
|
+
# # Series: '' [time]
|
120
|
+
# # [
|
121
|
+
# # 01:00:00
|
122
|
+
# # 02:00:00
|
123
|
+
# # 03:00:00
|
124
|
+
# # ]
|
125
|
+
def to_time(format = nil, strict: true, cache: true)
|
126
|
+
_validate_format_argument(format)
|
127
|
+
Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
|
128
|
+
end
|
129
|
+
|
12
130
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
131
|
#
|
14
|
-
# @param
|
15
|
-
#
|
16
|
-
# @param
|
132
|
+
# @param dtype [Object]
|
133
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
134
|
+
# @param format [String]
|
17
135
|
# Format to use, refer to the
|
18
136
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
137
|
# for specification. Example: `"%y-%m-%d"`.
|
@@ -33,57 +151,58 @@ module Polars
|
|
33
151
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
152
|
# no fractional second component is found then the default is "us".
|
35
153
|
#
|
36
|
-
# @example
|
154
|
+
# @example Dealing with a consistent format:
|
155
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
156
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
157
|
+
# # =>
|
158
|
+
# # shape: (2,)
|
159
|
+
# # Series: '' [datetime[μs, UTC]]
|
160
|
+
# # [
|
161
|
+
# # 2020-01-01 01:00:00 UTC
|
162
|
+
# # 2020-01-01 02:00:00 UTC
|
163
|
+
# # ]
|
164
|
+
#
|
165
|
+
# @example Dealing with different formats.
|
37
166
|
# s = Polars::Series.new(
|
38
167
|
# "date",
|
39
168
|
# [
|
40
169
|
# "2021-04-22",
|
41
170
|
# "2022-01-04 00:00:00",
|
42
171
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
172
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
173
|
# ]
|
45
174
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
175
|
+
# s.to_frame.select(
|
176
|
+
# Polars.coalesce(
|
177
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
178
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
179
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
180
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
181
|
+
# )
|
182
|
+
# ).to_series
|
55
183
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
184
|
+
# # shape: (4,)
|
185
|
+
# # Series: 'date' [date]
|
186
|
+
# # [
|
187
|
+
# # 2021-04-22
|
188
|
+
# # 2022-01-04
|
189
|
+
# # 2022-01-31
|
190
|
+
# # 2001-07-08
|
191
|
+
# # ]
|
192
|
+
def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
|
193
|
+
_validate_format_argument(format)
|
71
194
|
|
72
|
-
if
|
73
|
-
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
84
|
-
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
195
|
+
if dtype == Date
|
196
|
+
to_date(format, strict: strict, exact: exact, cache: cache)
|
197
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
198
|
+
dtype = Datetime.new if dtype == Datetime
|
199
|
+
time_unit = dtype.time_unit
|
200
|
+
time_zone = dtype.time_zone
|
201
|
+
to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
|
202
|
+
elsif dtype == Time
|
203
|
+
to_time(format, strict: strict, cache: cache)
|
85
204
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
205
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
206
|
end
|
88
207
|
end
|
89
208
|
|
@@ -521,6 +640,40 @@ module Polars
|
|
521
640
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
641
|
end
|
523
642
|
|
643
|
+
# Parse string values as JSON.
|
644
|
+
#
|
645
|
+
# Throw errors if encounter invalid JSON strings.
|
646
|
+
#
|
647
|
+
# @param dtype [Object]
|
648
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
649
|
+
# inferred from the JSON value.
|
650
|
+
#
|
651
|
+
# @return [Expr]
|
652
|
+
#
|
653
|
+
# @example
|
654
|
+
# df = Polars::DataFrame.new(
|
655
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
656
|
+
# )
|
657
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
658
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
659
|
+
# # =>
|
660
|
+
# # shape: (3, 1)
|
661
|
+
# # ┌─────────────┐
|
662
|
+
# # │ json │
|
663
|
+
# # │ --- │
|
664
|
+
# # │ struct[2] │
|
665
|
+
# # ╞═════════════╡
|
666
|
+
# # │ {1,true} │
|
667
|
+
# # │ {null,null} │
|
668
|
+
# # │ {2,false} │
|
669
|
+
# # └─────────────┘
|
670
|
+
def json_extract(dtype = nil, infer_schema_length: 100)
|
671
|
+
if !dtype.nil?
|
672
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
673
|
+
end
|
674
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
|
675
|
+
end
|
676
|
+
|
524
677
|
# Extract the first match of json string with provided JSONPath expression.
|
525
678
|
#
|
526
679
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +999,10 @@ module Polars
|
|
846
999
|
# # │ 1 ┆ 123ABC │
|
847
1000
|
# # │ 2 ┆ abc456 │
|
848
1001
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
1002
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
1003
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
1004
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
1005
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
1006
|
end
|
854
1007
|
|
855
1008
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +1065,84 @@ module Polars
|
|
912
1065
|
def slice(offset, length = nil)
|
913
1066
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
1067
|
end
|
1068
|
+
|
1069
|
+
# Returns a column with a separate row for every string character.
|
1070
|
+
#
|
1071
|
+
# @return [Expr]
|
1072
|
+
#
|
1073
|
+
# @example
|
1074
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
1075
|
+
# df.select(Polars.col("a").str.explode)
|
1076
|
+
# # =>
|
1077
|
+
# # shape: (6, 1)
|
1078
|
+
# # ┌─────┐
|
1079
|
+
# # │ a │
|
1080
|
+
# # │ --- │
|
1081
|
+
# # │ str │
|
1082
|
+
# # ╞═════╡
|
1083
|
+
# # │ f │
|
1084
|
+
# # │ o │
|
1085
|
+
# # │ o │
|
1086
|
+
# # │ b │
|
1087
|
+
# # │ a │
|
1088
|
+
# # │ r │
|
1089
|
+
# # └─────┘
|
1090
|
+
def explode
|
1091
|
+
Utils.wrap_expr(_rbexpr.str_explode)
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
# Parse integers with base radix from strings.
|
1095
|
+
#
|
1096
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
1097
|
+
#
|
1098
|
+
# @param radix [Integer]
|
1099
|
+
# Positive integer which is the base of the string we are parsing.
|
1100
|
+
# Default: 2.
|
1101
|
+
# @param strict [Boolean]
|
1102
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
1103
|
+
# False silently convert to Null.
|
1104
|
+
#
|
1105
|
+
# @return [Expr]
|
1106
|
+
#
|
1107
|
+
# @example
|
1108
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1109
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
1110
|
+
# # =>
|
1111
|
+
# # shape: (4, 1)
|
1112
|
+
# # ┌──────┐
|
1113
|
+
# # │ bin │
|
1114
|
+
# # │ --- │
|
1115
|
+
# # │ i32 │
|
1116
|
+
# # ╞══════╡
|
1117
|
+
# # │ 6 │
|
1118
|
+
# # │ 5 │
|
1119
|
+
# # │ 2 │
|
1120
|
+
# # │ null │
|
1121
|
+
# # └──────┘
|
1122
|
+
#
|
1123
|
+
# @example
|
1124
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1125
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1126
|
+
# # =>
|
1127
|
+
# # shape: (4, 1)
|
1128
|
+
# # ┌───────┐
|
1129
|
+
# # │ hex │
|
1130
|
+
# # │ --- │
|
1131
|
+
# # │ i32 │
|
1132
|
+
# # ╞═══════╡
|
1133
|
+
# # │ 64030 │
|
1134
|
+
# # │ 65280 │
|
1135
|
+
# # │ 51966 │
|
1136
|
+
# # │ null │
|
1137
|
+
# # └───────┘
|
1138
|
+
def parse_int(radix = 2, strict: true)
|
1139
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
private
|
1143
|
+
|
1144
|
+
def _validate_format_argument(format)
|
1145
|
+
# TODO
|
1146
|
+
end
|
915
1147
|
end
|
916
1148
|
end
|
@@ -10,6 +10,112 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
+
# Convert a Utf8 column into a Date column.
|
14
|
+
#
|
15
|
+
# @param format [String]
|
16
|
+
# Format to use for conversion. Refer to the
|
17
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
18
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
19
|
+
# If set to nil (default), the format is inferred from the data.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
24
|
+
# in the target string.
|
25
|
+
# @param cache [Boolean]
|
26
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
32
|
+
# s.str.to_date
|
33
|
+
# # =>
|
34
|
+
# # shape: (3,)
|
35
|
+
# # Series: '' [date]
|
36
|
+
# # [
|
37
|
+
# # 2020-01-01
|
38
|
+
# # 2020-02-01
|
39
|
+
# # 2020-03-01
|
40
|
+
# # ]
|
41
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Series]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
super
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convert a Utf8 column into a Time column.
|
91
|
+
#
|
92
|
+
# @param format [String]
|
93
|
+
# Format to use for conversion. Refer to the
|
94
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
95
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
96
|
+
# If set to nil (default), the format is inferred from the data.
|
97
|
+
# @param strict [Boolean]
|
98
|
+
# Raise an error if any conversion fails.
|
99
|
+
# @param cache [Boolean]
|
100
|
+
# Use a cache of unique, converted times to apply the conversion.
|
101
|
+
#
|
102
|
+
# @return [Series]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
106
|
+
# s.str.to_time("%H:%M")
|
107
|
+
# # =>
|
108
|
+
# # shape: (3,)
|
109
|
+
# # Series: '' [time]
|
110
|
+
# # [
|
111
|
+
# # 01:00:00
|
112
|
+
# # 02:00:00
|
113
|
+
# # 03:00:00
|
114
|
+
# # ]
|
115
|
+
def to_time(format = nil, strict: true, cache: true)
|
116
|
+
super
|
117
|
+
end
|
118
|
+
|
13
119
|
# Parse a Series of dtype Utf8 to a Date/Datetime Series.
|
14
120
|
#
|
15
121
|
# @param datatype [Symbol]
|
@@ -23,10 +129,23 @@ module Polars
|
|
23
129
|
# @param exact [Boolean]
|
24
130
|
# - If true, require an exact format match.
|
25
131
|
# - If false, allow the format to match anywhere in the target string.
|
132
|
+
# @param cache [Boolean]
|
133
|
+
# Use a cache of unique, converted dates to apply the datetime conversion.
|
26
134
|
#
|
27
135
|
# @return [Series]
|
28
136
|
#
|
29
|
-
# @example
|
137
|
+
# @example Dealing with a consistent format:
|
138
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
139
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
140
|
+
# # =>
|
141
|
+
# # shape: (2,)
|
142
|
+
# # Series: '' [datetime[μs, UTC]]
|
143
|
+
# # [
|
144
|
+
# # 2020-01-01 01:00:00 UTC
|
145
|
+
# # 2020-01-01 02:00:00 UTC
|
146
|
+
# # ]
|
147
|
+
#
|
148
|
+
# @example Dealing with different formats.
|
30
149
|
# s = Polars::Series.new(
|
31
150
|
# "date",
|
32
151
|
# [
|
@@ -36,28 +155,24 @@ module Polars
|
|
36
155
|
# "Sun Jul 8 00:34:60 2001"
|
37
156
|
# ]
|
38
157
|
# )
|
39
|
-
# s.to_frame.
|
40
|
-
# Polars.
|
41
|
-
# .str.strptime(
|
42
|
-
# .
|
43
|
-
#
|
44
|
-
# )
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# )
|
158
|
+
# s.to_frame.select(
|
159
|
+
# Polars.coalesce(
|
160
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
161
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
162
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
163
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
164
|
+
# )
|
165
|
+
# ).to_series
|
48
166
|
# # =>
|
49
|
-
# # shape: (4,
|
50
|
-
# #
|
51
|
-
# #
|
52
|
-
# #
|
53
|
-
# #
|
54
|
-
# #
|
55
|
-
# #
|
56
|
-
# #
|
57
|
-
|
58
|
-
# # │ 2001-07-08 │
|
59
|
-
# # └────────────┘
|
60
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
167
|
+
# # shape: (4,)
|
168
|
+
# # Series: 'date' [date]
|
169
|
+
# # [
|
170
|
+
# # 2021-04-22
|
171
|
+
# # 2022-01-04
|
172
|
+
# # 2022-01-31
|
173
|
+
# # 2001-07-08
|
174
|
+
# # ]
|
175
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
|
61
176
|
super
|
62
177
|
end
|
63
178
|
|
@@ -60,5 +60,37 @@ module Polars
|
|
60
60
|
def rename_fields(names)
|
61
61
|
super
|
62
62
|
end
|
63
|
+
|
64
|
+
# Get the struct definition as a name/dtype schema dict.
|
65
|
+
#
|
66
|
+
# @return [Object]
|
67
|
+
def schema
|
68
|
+
if _s.nil?
|
69
|
+
{}
|
70
|
+
else
|
71
|
+
_s.dtype.to_schema
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Convert this struct Series to a DataFrame with a separate column for each field.
|
76
|
+
#
|
77
|
+
# @return [DataFrame]
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
81
|
+
# s.struct.unnest
|
82
|
+
# # =>
|
83
|
+
# # shape: (2, 2)
|
84
|
+
# # ┌─────┬─────┐
|
85
|
+
# # │ a ┆ b │
|
86
|
+
# # │ --- ┆ --- │
|
87
|
+
# # │ i64 ┆ i64 │
|
88
|
+
# # ╞═════╪═════╡
|
89
|
+
# # │ 1 ┆ 2 │
|
90
|
+
# # │ 3 ┆ 4 │
|
91
|
+
# # └─────┴─────┘
|
92
|
+
def unnest
|
93
|
+
Utils.wrap_df(_s.struct_unnest)
|
94
|
+
end
|
63
95
|
end
|
64
96
|
end
|