polars-df 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/README.md +4 -3
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/mod.rs +7 -3
- data/ext/polars/src/conversion.rs +171 -63
- data/ext/polars/src/dataframe.rs +19 -23
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/general.rs +39 -9
- data/ext/polars/src/expr/list.rs +27 -22
- data/ext/polars/src/expr/string.rs +10 -9
- data/ext/polars/src/expr.rs +1 -0
- data/ext/polars/src/functions/lazy.rs +61 -21
- data/ext/polars/src/lazyframe.rs +14 -2
- data/ext/polars/src/lib.rs +25 -20
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +4 -0
- data/ext/polars/src/series/construction.rs +28 -2
- data/ext/polars/src/series.rs +57 -17
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/series.rb
CHANGED
@@ -65,7 +65,7 @@ module Polars
|
|
65
65
|
)
|
66
66
|
.rename(name, in_place: true)
|
67
67
|
._s
|
68
|
-
elsif values.is_a?(Array)
|
68
|
+
elsif values.is_a?(::Array)
|
69
69
|
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
70
70
|
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
71
71
|
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
@@ -317,6 +317,10 @@ module Polars
|
|
317
317
|
end
|
318
318
|
|
319
319
|
if item.is_a?(Integer)
|
320
|
+
if item < 0
|
321
|
+
item = len + item
|
322
|
+
end
|
323
|
+
|
320
324
|
return _s.get_idx(item)
|
321
325
|
end
|
322
326
|
|
@@ -335,7 +339,7 @@ module Polars
|
|
335
339
|
#
|
336
340
|
# @return [Object]
|
337
341
|
def []=(key, value)
|
338
|
-
if value.is_a?(Array)
|
342
|
+
if value.is_a?(::Array)
|
339
343
|
if is_numeric || is_datelike
|
340
344
|
set_at_idx(key, value)
|
341
345
|
return
|
@@ -353,7 +357,7 @@ module Polars
|
|
353
357
|
else
|
354
358
|
raise Todo
|
355
359
|
end
|
356
|
-
elsif key.is_a?(Array)
|
360
|
+
elsif key.is_a?(::Array)
|
357
361
|
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
358
362
|
self[s] = value
|
359
363
|
elsif key.is_a?(Range)
|
@@ -715,8 +719,8 @@ module Polars
|
|
715
719
|
# # │ 0 ┆ 1 ┆ 0 │
|
716
720
|
# # │ 0 ┆ 0 ┆ 1 │
|
717
721
|
# # └─────┴─────┴─────┘
|
718
|
-
def to_dummies(separator: "_")
|
719
|
-
Utils.wrap_df(_s.to_dummies(separator))
|
722
|
+
def to_dummies(separator: "_", drop_first: false)
|
723
|
+
Utils.wrap_df(_s.to_dummies(separator, drop_first))
|
720
724
|
end
|
721
725
|
|
722
726
|
# Count the unique values in a Series.
|
@@ -1124,7 +1128,7 @@ module Polars
|
|
1124
1128
|
# # 3
|
1125
1129
|
# # ]
|
1126
1130
|
def filter(predicate)
|
1127
|
-
if predicate.is_a?(Array)
|
1131
|
+
if predicate.is_a?(::Array)
|
1128
1132
|
predicate = Series.new("", predicate)
|
1129
1133
|
end
|
1130
1134
|
Utils.wrap_s(_s.filter(predicate._s))
|
@@ -2813,7 +2817,8 @@ module Polars
|
|
2813
2817
|
window_size,
|
2814
2818
|
weights: nil,
|
2815
2819
|
min_periods: nil,
|
2816
|
-
center: false
|
2820
|
+
center: false,
|
2821
|
+
ddof: 1
|
2817
2822
|
)
|
2818
2823
|
to_frame
|
2819
2824
|
.select(
|
@@ -2821,7 +2826,8 @@ module Polars
|
|
2821
2826
|
window_size,
|
2822
2827
|
weights: weights,
|
2823
2828
|
min_periods: min_periods,
|
2824
|
-
center: center
|
2829
|
+
center: center,
|
2830
|
+
ddof: ddof
|
2825
2831
|
)
|
2826
2832
|
)
|
2827
2833
|
.to_series
|
@@ -2864,7 +2870,8 @@ module Polars
|
|
2864
2870
|
window_size,
|
2865
2871
|
weights: nil,
|
2866
2872
|
min_periods: nil,
|
2867
|
-
center: false
|
2873
|
+
center: false,
|
2874
|
+
ddof: 1
|
2868
2875
|
)
|
2869
2876
|
to_frame
|
2870
2877
|
.select(
|
@@ -2872,7 +2879,8 @@ module Polars
|
|
2872
2879
|
window_size,
|
2873
2880
|
weights: weights,
|
2874
2881
|
min_periods: min_periods,
|
2875
|
-
center: center
|
2882
|
+
center: center,
|
2883
|
+
ddof: ddof
|
2876
2884
|
)
|
2877
2885
|
)
|
2878
2886
|
.to_series
|
@@ -3581,10 +3589,17 @@ module Polars
|
|
3581
3589
|
# Create an object namespace of all list related methods.
|
3582
3590
|
#
|
3583
3591
|
# @return [ListNameSpace]
|
3584
|
-
def
|
3592
|
+
def list
|
3585
3593
|
ListNameSpace.new(self)
|
3586
3594
|
end
|
3587
3595
|
|
3596
|
+
# Create an object namespace of all array related methods.
|
3597
|
+
#
|
3598
|
+
# @return [ArrayNameSpace]
|
3599
|
+
def arr
|
3600
|
+
ArrayNameSpace.new(self)
|
3601
|
+
end
|
3602
|
+
|
3588
3603
|
# Create an object namespace of all binary related methods.
|
3589
3604
|
#
|
3590
3605
|
# @return [BinaryNameSpace]
|
@@ -3824,9 +3839,12 @@ module Polars
|
|
3824
3839
|
|
3825
3840
|
if (values.nil? || values.empty?) && dtype.nil?
|
3826
3841
|
dtype = dtype_if_empty || Float32
|
3842
|
+
elsif dtype == List
|
3843
|
+
ruby_dtype = ::Array
|
3827
3844
|
end
|
3828
3845
|
|
3829
3846
|
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
3847
|
+
rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
|
3830
3848
|
|
3831
3849
|
value = _get_first_non_none(values)
|
3832
3850
|
if !value.nil?
|
@@ -3835,9 +3853,20 @@ module Polars
|
|
3835
3853
|
end
|
3836
3854
|
end
|
3837
3855
|
|
3838
|
-
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3856
|
+
if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3857
|
+
if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
|
3858
|
+
dtype = Array.new(value.size)
|
3859
|
+
end
|
3860
|
+
|
3839
3861
|
constructor = polars_type_to_constructor(dtype)
|
3840
3862
|
rbseries = constructor.call(name, values, strict)
|
3863
|
+
|
3864
|
+
base_type = dtype.is_a?(DataType) ? dtype.class : dtype
|
3865
|
+
if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
|
3866
|
+
if rbseries.dtype != dtype
|
3867
|
+
rbseries = rbseries.cast(dtype, true)
|
3868
|
+
end
|
3869
|
+
end
|
3841
3870
|
return rbseries
|
3842
3871
|
else
|
3843
3872
|
if ruby_dtype.nil?
|
@@ -3868,7 +3897,17 @@ module Polars
|
|
3868
3897
|
return s._s
|
3869
3898
|
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
3870
3899
|
raise Todo
|
3871
|
-
elsif ruby_dtype == Array
|
3900
|
+
elsif ruby_dtype == ::Array
|
3901
|
+
if dtype.is_a?(Object)
|
3902
|
+
return RbSeries.new_object(name, values, strict)
|
3903
|
+
end
|
3904
|
+
if dtype
|
3905
|
+
srs = sequence_from_anyvalue_or_object(name, values)
|
3906
|
+
if dtype != srs.dtype
|
3907
|
+
srs = srs.cast(dtype, strict: false)
|
3908
|
+
end
|
3909
|
+
return srs
|
3910
|
+
end
|
3872
3911
|
return sequence_from_anyvalue_or_object(name, values)
|
3873
3912
|
elsif ruby_dtype == Series
|
3874
3913
|
return RbSeries.new_series_list(name, values.map(&:_s), strict)
|
@@ -3910,9 +3949,17 @@ module Polars
|
|
3910
3949
|
UInt16 => RbSeries.method(:new_opt_u16),
|
3911
3950
|
UInt32 => RbSeries.method(:new_opt_u32),
|
3912
3951
|
UInt64 => RbSeries.method(:new_opt_u64),
|
3952
|
+
Decimal => RbSeries.method(:new_decimal),
|
3953
|
+
Date => RbSeries.method(:new_from_anyvalues),
|
3954
|
+
Datetime => RbSeries.method(:new_from_anyvalues),
|
3955
|
+
Duration => RbSeries.method(:new_from_anyvalues),
|
3956
|
+
Time => RbSeries.method(:new_from_anyvalues),
|
3913
3957
|
Boolean => RbSeries.method(:new_opt_bool),
|
3914
3958
|
Utf8 => RbSeries.method(:new_str),
|
3915
|
-
|
3959
|
+
Object => RbSeries.method(:new_object),
|
3960
|
+
Categorical => RbSeries.method(:new_str),
|
3961
|
+
Binary => RbSeries.method(:new_binary),
|
3962
|
+
Null => RbSeries.method(:new_null)
|
3916
3963
|
}
|
3917
3964
|
|
3918
3965
|
SYM_TYPE_TO_CONSTRUCTOR = {
|
@@ -3931,8 +3978,14 @@ module Polars
|
|
3931
3978
|
}
|
3932
3979
|
|
3933
3980
|
def polars_type_to_constructor(dtype)
|
3934
|
-
if dtype.is_a?(
|
3981
|
+
if dtype.is_a?(Array)
|
3982
|
+
lambda do |name, values, strict|
|
3983
|
+
RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
|
3984
|
+
end
|
3985
|
+
elsif dtype.is_a?(Class) && dtype < DataType
|
3935
3986
|
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
3987
|
+
elsif dtype.is_a?(DataType)
|
3988
|
+
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
|
3936
3989
|
else
|
3937
3990
|
SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
3938
3991
|
end
|
@@ -3944,7 +3997,8 @@ module Polars
|
|
3944
3997
|
Float => RbSeries.method(:new_opt_f64),
|
3945
3998
|
Integer => RbSeries.method(:new_opt_i64),
|
3946
3999
|
TrueClass => RbSeries.method(:new_opt_bool),
|
3947
|
-
FalseClass => RbSeries.method(:new_opt_bool)
|
4000
|
+
FalseClass => RbSeries.method(:new_opt_bool),
|
4001
|
+
BigDecimal => RbSeries.method(:new_decimal)
|
3948
4002
|
}
|
3949
4003
|
|
3950
4004
|
def rb_type_to_constructor(dtype)
|
data/lib/polars/string_expr.rb
CHANGED
@@ -9,11 +9,129 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Convert a Utf8 column into a Date column.
|
13
|
+
#
|
14
|
+
# @param format [String]
|
15
|
+
# Format to use for conversion. Refer to the
|
16
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
17
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
18
|
+
# If set to nil (default), the format is inferred from the data.
|
19
|
+
# @param strict [Boolean]
|
20
|
+
# Raise an error if any conversion fails.
|
21
|
+
# @param exact [Boolean]
|
22
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
23
|
+
# in the target string.
|
24
|
+
# @param cache [Boolean]
|
25
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
31
|
+
# s.str.to_date
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: '' [date]
|
35
|
+
# # [
|
36
|
+
# # 2020-01-01
|
37
|
+
# # 2020-02-01
|
38
|
+
# # 2020-03-01
|
39
|
+
# # ]
|
40
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
|
+
_validate_format_argument(format)
|
42
|
+
Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Expr]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
_validate_format_argument(format)
|
88
|
+
Utils.wrap_expr(
|
89
|
+
self._rbexpr.str_to_datetime(
|
90
|
+
format,
|
91
|
+
time_unit,
|
92
|
+
time_zone,
|
93
|
+
strict,
|
94
|
+
exact,
|
95
|
+
cache
|
96
|
+
)
|
97
|
+
)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Convert a Utf8 column into a Time column.
|
101
|
+
#
|
102
|
+
# @param format [String]
|
103
|
+
# Format to use for conversion. Refer to the
|
104
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
105
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
106
|
+
# If set to nil (default), the format is inferred from the data.
|
107
|
+
# @param strict [Boolean]
|
108
|
+
# Raise an error if any conversion fails.
|
109
|
+
# @param cache [Boolean]
|
110
|
+
# Use a cache of unique, converted times to apply the conversion.
|
111
|
+
#
|
112
|
+
# @return [Expr]
|
113
|
+
#
|
114
|
+
# @example
|
115
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
116
|
+
# s.str.to_time("%H:%M")
|
117
|
+
# # =>
|
118
|
+
# # shape: (3,)
|
119
|
+
# # Series: '' [time]
|
120
|
+
# # [
|
121
|
+
# # 01:00:00
|
122
|
+
# # 02:00:00
|
123
|
+
# # 03:00:00
|
124
|
+
# # ]
|
125
|
+
def to_time(format = nil, strict: true, cache: true)
|
126
|
+
_validate_format_argument(format)
|
127
|
+
Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
|
128
|
+
end
|
129
|
+
|
12
130
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
131
|
#
|
14
132
|
# @param dtype [Object]
|
15
133
|
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
|
-
# @param
|
134
|
+
# @param format [String]
|
17
135
|
# Format to use, refer to the
|
18
136
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
137
|
# for specification. Example: `"%y-%m-%d"`.
|
@@ -38,10 +156,10 @@ module Polars
|
|
38
156
|
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
157
|
# # =>
|
40
158
|
# # shape: (2,)
|
41
|
-
# # Series: '' [datetime[μs,
|
159
|
+
# # Series: '' [datetime[μs, UTC]]
|
42
160
|
# # [
|
43
|
-
# # 2020-01-01 01:00:00
|
44
|
-
# # 2020-01-01 02:00:00
|
161
|
+
# # 2020-01-01 01:00:00 UTC
|
162
|
+
# # 2020-01-01 02:00:00 UTC
|
45
163
|
# # ]
|
46
164
|
#
|
47
165
|
# @example Dealing with different formats.
|
@@ -71,16 +189,18 @@ module Polars
|
|
71
189
|
# # 2022-01-31
|
72
190
|
# # 2001-07-08
|
73
191
|
# # ]
|
74
|
-
def strptime(dtype,
|
192
|
+
def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
|
193
|
+
_validate_format_argument(format)
|
194
|
+
|
75
195
|
if dtype == Date
|
76
|
-
|
196
|
+
to_date(format, strict: strict, exact: exact, cache: cache)
|
77
197
|
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
198
|
dtype = Datetime.new if dtype == Datetime
|
79
199
|
time_unit = dtype.time_unit
|
80
200
|
time_zone = dtype.time_zone
|
81
|
-
|
201
|
+
to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
|
82
202
|
elsif dtype == Time
|
83
|
-
|
203
|
+
to_time(format, strict: strict, cache: cache)
|
84
204
|
else
|
85
205
|
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
86
206
|
end
|
@@ -547,11 +667,11 @@ module Polars
|
|
547
667
|
# # │ {null,null} │
|
548
668
|
# # │ {2,false} │
|
549
669
|
# # └─────────────┘
|
550
|
-
def json_extract(dtype = nil)
|
670
|
+
def json_extract(dtype = nil, infer_schema_length: 100)
|
551
671
|
if !dtype.nil?
|
552
672
|
dtype = Utils.rb_type_to_dtype(dtype)
|
553
673
|
end
|
554
|
-
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
674
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
|
555
675
|
end
|
556
676
|
|
557
677
|
# Extract the first match of json string with provided JSONPath expression.
|
@@ -968,7 +1088,7 @@ module Polars
|
|
968
1088
|
# # │ r │
|
969
1089
|
# # └─────┘
|
970
1090
|
def explode
|
971
|
-
Utils.wrap_expr(_rbexpr.
|
1091
|
+
Utils.wrap_expr(_rbexpr.str_explode)
|
972
1092
|
end
|
973
1093
|
|
974
1094
|
# Parse integers with base radix from strings.
|
@@ -1018,5 +1138,11 @@ module Polars
|
|
1018
1138
|
def parse_int(radix = 2, strict: true)
|
1019
1139
|
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1020
1140
|
end
|
1141
|
+
|
1142
|
+
private
|
1143
|
+
|
1144
|
+
def _validate_format_argument(format)
|
1145
|
+
# TODO
|
1146
|
+
end
|
1021
1147
|
end
|
1022
1148
|
end
|
@@ -10,6 +10,112 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
+
# Convert a Utf8 column into a Date column.
|
14
|
+
#
|
15
|
+
# @param format [String]
|
16
|
+
# Format to use for conversion. Refer to the
|
17
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
18
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
19
|
+
# If set to nil (default), the format is inferred from the data.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
24
|
+
# in the target string.
|
25
|
+
# @param cache [Boolean]
|
26
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
32
|
+
# s.str.to_date
|
33
|
+
# # =>
|
34
|
+
# # shape: (3,)
|
35
|
+
# # Series: '' [date]
|
36
|
+
# # [
|
37
|
+
# # 2020-01-01
|
38
|
+
# # 2020-02-01
|
39
|
+
# # 2020-03-01
|
40
|
+
# # ]
|
41
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Series]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
super
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convert a Utf8 column into a Time column.
|
91
|
+
#
|
92
|
+
# @param format [String]
|
93
|
+
# Format to use for conversion. Refer to the
|
94
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
95
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
96
|
+
# If set to nil (default), the format is inferred from the data.
|
97
|
+
# @param strict [Boolean]
|
98
|
+
# Raise an error if any conversion fails.
|
99
|
+
# @param cache [Boolean]
|
100
|
+
# Use a cache of unique, converted times to apply the conversion.
|
101
|
+
#
|
102
|
+
# @return [Series]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
106
|
+
# s.str.to_time("%H:%M")
|
107
|
+
# # =>
|
108
|
+
# # shape: (3,)
|
109
|
+
# # Series: '' [time]
|
110
|
+
# # [
|
111
|
+
# # 01:00:00
|
112
|
+
# # 02:00:00
|
113
|
+
# # 03:00:00
|
114
|
+
# # ]
|
115
|
+
def to_time(format = nil, strict: true, cache: true)
|
116
|
+
super
|
117
|
+
end
|
118
|
+
|
13
119
|
# Parse a Series of dtype Utf8 to a Date/Datetime Series.
|
14
120
|
#
|
15
121
|
# @param datatype [Symbol]
|
@@ -23,10 +129,23 @@ module Polars
|
|
23
129
|
# @param exact [Boolean]
|
24
130
|
# - If true, require an exact format match.
|
25
131
|
# - If false, allow the format to match anywhere in the target string.
|
132
|
+
# @param cache [Boolean]
|
133
|
+
# Use a cache of unique, converted dates to apply the datetime conversion.
|
26
134
|
#
|
27
135
|
# @return [Series]
|
28
136
|
#
|
29
|
-
# @example
|
137
|
+
# @example Dealing with a consistent format:
|
138
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
139
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
140
|
+
# # =>
|
141
|
+
# # shape: (2,)
|
142
|
+
# # Series: '' [datetime[μs, UTC]]
|
143
|
+
# # [
|
144
|
+
# # 2020-01-01 01:00:00 UTC
|
145
|
+
# # 2020-01-01 02:00:00 UTC
|
146
|
+
# # ]
|
147
|
+
#
|
148
|
+
# @example Dealing with different formats.
|
30
149
|
# s = Polars::Series.new(
|
31
150
|
# "date",
|
32
151
|
# [
|
@@ -36,28 +155,24 @@ module Polars
|
|
36
155
|
# "Sun Jul 8 00:34:60 2001"
|
37
156
|
# ]
|
38
157
|
# )
|
39
|
-
# s.to_frame.
|
40
|
-
# Polars.
|
41
|
-
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
|
-
# .
|
43
|
-
#
|
44
|
-
# )
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# )
|
158
|
+
# s.to_frame.select(
|
159
|
+
# Polars.coalesce(
|
160
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
161
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
162
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
163
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
164
|
+
# )
|
165
|
+
# ).to_series
|
48
166
|
# # =>
|
49
|
-
# # shape: (4,
|
50
|
-
# #
|
51
|
-
# #
|
52
|
-
# #
|
53
|
-
# #
|
54
|
-
# #
|
55
|
-
# #
|
56
|
-
# #
|
57
|
-
|
58
|
-
# # │ 2001-07-08 │
|
59
|
-
# # └────────────┘
|
60
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
167
|
+
# # shape: (4,)
|
168
|
+
# # Series: 'date' [date]
|
169
|
+
# # [
|
170
|
+
# # 2021-04-22
|
171
|
+
# # 2022-01-04
|
172
|
+
# # 2022-01-31
|
173
|
+
# # 2001-07-08
|
174
|
+
# # ]
|
175
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
|
61
176
|
super
|
62
177
|
end
|
63
178
|
|