polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2043 -1202
- data/README.md +2 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +2 -2
data/lib/polars/series.rb
CHANGED
@@ -67,6 +67,12 @@ module Polars
|
|
67
67
|
._s
|
68
68
|
elsif values.is_a?(Array)
|
69
69
|
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
70
|
+
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
71
|
+
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
72
|
+
|
73
|
+
if !dtype.nil?
|
74
|
+
self._s = self.cast(dtype, strict: true)._s
|
75
|
+
end
|
70
76
|
else
|
71
77
|
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
72
78
|
end
|
@@ -90,10 +96,14 @@ module Polars
|
|
90
96
|
#
|
91
97
|
# @return [Hash]
|
92
98
|
def flags
|
93
|
-
{
|
99
|
+
out = {
|
94
100
|
"SORTED_ASC" => _s.is_sorted_flag,
|
95
101
|
"SORTED_DESC" => _s.is_sorted_reverse_flag
|
96
102
|
}
|
103
|
+
if dtype.is_a?(List)
|
104
|
+
out["FAST_EXPLODE"] = _s.can_fast_explode_flag
|
105
|
+
end
|
106
|
+
out
|
97
107
|
end
|
98
108
|
|
99
109
|
# Get the inner dtype in of a List typed Series.
|
@@ -222,14 +232,28 @@ module Polars
|
|
222
232
|
#
|
223
233
|
# @return [Series]
|
224
234
|
def *(other)
|
225
|
-
|
235
|
+
if is_temporal
|
236
|
+
raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
|
237
|
+
elsif other.is_a?(DataFrame)
|
238
|
+
other * self
|
239
|
+
else
|
240
|
+
_arithmetic(other, :mul)
|
241
|
+
end
|
226
242
|
end
|
227
243
|
|
228
244
|
# Performs division.
|
229
245
|
#
|
230
246
|
# @return [Series]
|
231
247
|
def /(other)
|
232
|
-
|
248
|
+
if is_temporal
|
249
|
+
raise ArgumentError, "first cast to integer before dividing datelike dtypes"
|
250
|
+
end
|
251
|
+
|
252
|
+
if is_float
|
253
|
+
return _arithmetic(other, :div)
|
254
|
+
end
|
255
|
+
|
256
|
+
cast(Float64) / other
|
233
257
|
end
|
234
258
|
|
235
259
|
# Returns the modulo.
|
@@ -252,6 +276,16 @@ module Polars
|
|
252
276
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
253
277
|
end
|
254
278
|
|
279
|
+
# Performs boolean not.
|
280
|
+
#
|
281
|
+
# @return [Series]
|
282
|
+
def !
|
283
|
+
if dtype == Boolean
|
284
|
+
return Utils.wrap_s(_s.not)
|
285
|
+
end
|
286
|
+
raise NotImplementedError
|
287
|
+
end
|
288
|
+
|
255
289
|
# Performs negation.
|
256
290
|
#
|
257
291
|
# @return [Series]
|
@@ -278,6 +312,10 @@ module Polars
|
|
278
312
|
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
279
313
|
end
|
280
314
|
|
315
|
+
if item.is_a?(Series) && item.bool?
|
316
|
+
return filter(item)
|
317
|
+
end
|
318
|
+
|
281
319
|
if item.is_a?(Integer)
|
282
320
|
return _s.get_idx(item)
|
283
321
|
end
|
@@ -369,16 +407,26 @@ module Polars
|
|
369
407
|
# Check if any boolean value in the column is `true`.
|
370
408
|
#
|
371
409
|
# @return [Boolean]
|
372
|
-
def any
|
373
|
-
|
410
|
+
def any?(&block)
|
411
|
+
if block_given?
|
412
|
+
apply(&block).any?
|
413
|
+
else
|
414
|
+
to_frame.select(Polars.col(name).any).to_series[0]
|
415
|
+
end
|
374
416
|
end
|
417
|
+
alias_method :any, :any?
|
375
418
|
|
376
419
|
# Check if all boolean values in the column are `true`.
|
377
420
|
#
|
378
421
|
# @return [Boolean]
|
379
|
-
def all
|
380
|
-
|
422
|
+
def all?(&block)
|
423
|
+
if block_given?
|
424
|
+
apply(&block).all?
|
425
|
+
else
|
426
|
+
to_frame.select(Polars.col(name).all).to_series[0]
|
427
|
+
end
|
381
428
|
end
|
429
|
+
alias_method :all, :all?
|
382
430
|
|
383
431
|
# Compute the logarithm to a given base.
|
384
432
|
#
|
@@ -1314,6 +1362,7 @@ module Polars
|
|
1314
1362
|
def unique(maintain_order: false)
|
1315
1363
|
super
|
1316
1364
|
end
|
1365
|
+
alias_method :uniq, :unique
|
1317
1366
|
|
1318
1367
|
# Take values by index.
|
1319
1368
|
#
|
@@ -1535,6 +1584,7 @@ module Polars
|
|
1535
1584
|
def is_in(other)
|
1536
1585
|
super
|
1537
1586
|
end
|
1587
|
+
alias_method :in?, :is_in
|
1538
1588
|
|
1539
1589
|
# Get index values where Boolean Series evaluate `true`.
|
1540
1590
|
#
|
@@ -1660,6 +1710,7 @@ module Polars
|
|
1660
1710
|
end
|
1661
1711
|
alias_method :count, :len
|
1662
1712
|
alias_method :length, :len
|
1713
|
+
alias_method :size, :len
|
1663
1714
|
|
1664
1715
|
# Cast between data types.
|
1665
1716
|
#
|
@@ -1779,6 +1830,8 @@ module Polars
|
|
1779
1830
|
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1831
|
end
|
1781
1832
|
alias_method :datelike?, :is_datelike
|
1833
|
+
alias_method :is_temporal, :is_datelike
|
1834
|
+
alias_method :temporal?, :is_datelike
|
1782
1835
|
|
1783
1836
|
# Check if this Series has floating point numbers.
|
1784
1837
|
#
|
@@ -2432,6 +2485,7 @@ module Polars
|
|
2432
2485
|
end
|
2433
2486
|
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2434
2487
|
end
|
2488
|
+
alias_method :map, :apply
|
2435
2489
|
|
2436
2490
|
# Shift the values by a given period.
|
2437
2491
|
#
|
@@ -3483,7 +3537,7 @@ module Polars
|
|
3483
3537
|
# # 99
|
3484
3538
|
# # ]
|
3485
3539
|
def extend_constant(value, n)
|
3486
|
-
|
3540
|
+
Utils.wrap_s(_s.extend_constant(value, n))
|
3487
3541
|
end
|
3488
3542
|
|
3489
3543
|
# Flags the Series as sorted.
|
@@ -3637,14 +3691,39 @@ module Polars
|
|
3637
3691
|
end
|
3638
3692
|
|
3639
3693
|
def _comp(other, op)
|
3694
|
+
if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
|
3695
|
+
if (other == true && op == :eq) || (other == false && op == :neq)
|
3696
|
+
return clone
|
3697
|
+
elsif (other == false && op == :eq) || (other == true && op == :neq)
|
3698
|
+
return !self
|
3699
|
+
end
|
3700
|
+
end
|
3701
|
+
|
3702
|
+
if other.is_a?(::Time) && dtype.is_a?(Datetime)
|
3703
|
+
ts = Utils._datetime_to_pl_timestamp(other, time_unit)
|
3704
|
+
f = ffi_func("#{op}_<>", Int64, _s)
|
3705
|
+
fail if f.nil?
|
3706
|
+
return Utils.wrap_s(f.call(ts))
|
3707
|
+
elsif other.is_a?(::Date) && dtype == Date
|
3708
|
+
d = Utils._date_to_pl_date(other)
|
3709
|
+
f = ffi_func("#{op}_<>", Int32, _s)
|
3710
|
+
fail if f.nil?
|
3711
|
+
return Utils.wrap_s(f.call(d))
|
3712
|
+
end
|
3713
|
+
|
3640
3714
|
if other.is_a?(Series)
|
3641
3715
|
return Utils.wrap_s(_s.send(op, other._s))
|
3642
3716
|
end
|
3643
3717
|
|
3644
|
-
|
3645
|
-
|
3718
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3719
|
+
if f.nil?
|
3720
|
+
raise NotImplementedError
|
3646
3721
|
end
|
3647
|
-
Utils.wrap_s(
|
3722
|
+
Utils.wrap_s(f.call(other))
|
3723
|
+
end
|
3724
|
+
|
3725
|
+
def ffi_func(name, dtype, _s)
|
3726
|
+
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
|
3648
3727
|
end
|
3649
3728
|
|
3650
3729
|
def _arithmetic(other, op)
|
@@ -3655,14 +3734,16 @@ module Polars
|
|
3655
3734
|
return Utils.wrap_s(_s.send(op, other._s))
|
3656
3735
|
end
|
3657
3736
|
|
3658
|
-
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3659
|
-
|
3660
|
-
|
3661
|
-
if other.is_a?(Float) && !is_float
|
3662
|
-
raise Todo
|
3737
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
|
3738
|
+
_s2 = sequence_to_rbseries(name, [other])
|
3739
|
+
return Utils.wrap_s(_s.send(op, _s2))
|
3663
3740
|
end
|
3664
3741
|
|
3665
|
-
|
3742
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3743
|
+
if f.nil?
|
3744
|
+
raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
|
3745
|
+
end
|
3746
|
+
Utils.wrap_s(f.call(other))
|
3666
3747
|
end
|
3667
3748
|
|
3668
3749
|
DTYPE_TO_FFINAME = {
|
@@ -3695,25 +3776,57 @@ module Polars
|
|
3695
3776
|
values._s
|
3696
3777
|
end
|
3697
3778
|
|
3779
|
+
def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
|
3780
|
+
# not needed yet
|
3781
|
+
# if !values.contiguous?
|
3782
|
+
# end
|
3783
|
+
|
3784
|
+
if values.shape.length == 1
|
3785
|
+
values, dtype = numo_values_and_dtype(values)
|
3786
|
+
strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
|
3787
|
+
if dtype == Numo::RObject
|
3788
|
+
sequence_to_rbseries(name, values.to_a, strict: strict)
|
3789
|
+
else
|
3790
|
+
constructor = numo_type_to_constructor(dtype)
|
3791
|
+
# TODO improve performance
|
3792
|
+
constructor.call(name, values.to_a, strict)
|
3793
|
+
end
|
3794
|
+
elsif values.shape.length == 2
|
3795
|
+
raise Todo
|
3796
|
+
else
|
3797
|
+
raise Todo
|
3798
|
+
end
|
3799
|
+
end
|
3800
|
+
|
3801
|
+
def numo_values_and_dtype(values)
|
3802
|
+
[values, values.class]
|
3803
|
+
end
|
3804
|
+
|
3805
|
+
def numo_type_to_constructor(dtype)
|
3806
|
+
{
|
3807
|
+
Numo::Float32 => RbSeries.method(:new_opt_f32),
|
3808
|
+
Numo::Float64 => RbSeries.method(:new_opt_f64),
|
3809
|
+
Numo::Int8 => RbSeries.method(:new_opt_i8),
|
3810
|
+
Numo::Int16 => RbSeries.method(:new_opt_i16),
|
3811
|
+
Numo::Int32 => RbSeries.method(:new_opt_i32),
|
3812
|
+
Numo::Int64 => RbSeries.method(:new_opt_i64),
|
3813
|
+
Numo::UInt8 => RbSeries.method(:new_opt_u8),
|
3814
|
+
Numo::UInt16 => RbSeries.method(:new_opt_u16),
|
3815
|
+
Numo::UInt32 => RbSeries.method(:new_opt_u32),
|
3816
|
+
Numo::UInt64 => RbSeries.method(:new_opt_u64)
|
3817
|
+
}.fetch(dtype)
|
3818
|
+
rescue KeyError
|
3819
|
+
RbSeries.method(:new_object)
|
3820
|
+
end
|
3821
|
+
|
3698
3822
|
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
3699
3823
|
ruby_dtype = nil
|
3700
|
-
nested_dtype = nil
|
3701
3824
|
|
3702
3825
|
if (values.nil? || values.empty?) && dtype.nil?
|
3703
|
-
|
3704
|
-
# if dtype for empty sequence could be guessed
|
3705
|
-
# (e.g comparisons between self and other)
|
3706
|
-
dtype = dtype_if_empty
|
3707
|
-
else
|
3708
|
-
# default to Float32 type
|
3709
|
-
dtype = :f32
|
3710
|
-
end
|
3826
|
+
dtype = dtype_if_empty || Float32
|
3711
3827
|
end
|
3712
3828
|
|
3713
|
-
rb_temporal_types = []
|
3714
|
-
rb_temporal_types << ::Date if defined?(::Date)
|
3715
|
-
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3716
|
-
rb_temporal_types << ::Time if defined?(::Time)
|
3829
|
+
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
3717
3830
|
|
3718
3831
|
value = _get_first_non_none(values)
|
3719
3832
|
if !value.nil?
|
@@ -3738,58 +3851,54 @@ module Polars
|
|
3738
3851
|
|
3739
3852
|
# temporal branch
|
3740
3853
|
if rb_temporal_types.include?(ruby_dtype)
|
3741
|
-
|
3742
|
-
|
3743
|
-
|
3744
|
-
|
3745
|
-
# end
|
3746
|
-
|
3747
|
-
if ruby_dtype == ::Date
|
3748
|
-
RbSeries.new_opt_date(name, values, strict)
|
3749
|
-
elsif ruby_dtype == ::Time
|
3750
|
-
RbSeries.new_opt_datetime(name, values, strict)
|
3751
|
-
elsif ruby_dtype == ::DateTime
|
3752
|
-
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3753
|
-
else
|
3754
|
-
raise Todo
|
3755
|
-
end
|
3756
|
-
elsif ruby_dtype == Array
|
3757
|
-
if nested_dtype.nil?
|
3758
|
-
nested_value = _get_first_non_none(value)
|
3759
|
-
nested_dtype = nested_value.nil? ? Float : nested_value.class
|
3854
|
+
if dtype.nil?
|
3855
|
+
dtype = Utils.rb_type_to_dtype(ruby_dtype)
|
3856
|
+
elsif rb_temporal_types.include?(dtype)
|
3857
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
3760
3858
|
end
|
3859
|
+
# TODO
|
3860
|
+
time_unit = nil
|
3761
3861
|
|
3762
|
-
|
3763
|
-
|
3862
|
+
rb_series = RbSeries.new_from_anyvalues(name, values, strict)
|
3863
|
+
if time_unit.nil?
|
3864
|
+
s = Utils.wrap_s(rb_series)
|
3865
|
+
else
|
3866
|
+
s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
|
3764
3867
|
end
|
3765
|
-
|
3766
|
-
|
3767
|
-
|
3768
|
-
|
3769
|
-
|
3770
|
-
|
3771
|
-
|
3772
|
-
|
3773
|
-
|
3774
|
-
|
3775
|
-
|
3868
|
+
return s._s
|
3869
|
+
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
3870
|
+
raise Todo
|
3871
|
+
elsif ruby_dtype == Array
|
3872
|
+
return sequence_from_anyvalue_or_object(name, values)
|
3873
|
+
elsif ruby_dtype == Series
|
3874
|
+
return RbSeries.new_series_list(name, values.map(&:_s), strict)
|
3875
|
+
elsif ruby_dtype == RbSeries
|
3876
|
+
return RbSeries.new_series_list(name, values, strict)
|
3877
|
+
else
|
3878
|
+
constructor =
|
3879
|
+
if value.is_a?(String)
|
3880
|
+
if value.encoding == Encoding::UTF_8
|
3881
|
+
RbSeries.method(:new_str)
|
3882
|
+
else
|
3883
|
+
RbSeries.method(:new_binary)
|
3776
3884
|
end
|
3885
|
+
elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
|
3886
|
+
# TODO improve performance
|
3887
|
+
RbSeries.method(:new_opt_f64)
|
3888
|
+
else
|
3889
|
+
rb_type_to_constructor(value.class)
|
3777
3890
|
end
|
3778
|
-
if equal_to_inner
|
3779
|
-
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3780
|
-
# TODO rescue and fallback to new_object
|
3781
|
-
return RbSeries.new_list(name, values, dtype)
|
3782
|
-
end
|
3783
|
-
end
|
3784
|
-
|
3785
|
-
RbSeries.new_object(name, values, strict)
|
3786
|
-
else
|
3787
|
-
constructor = rb_type_to_constructor(value.class)
|
3788
3891
|
constructor.call(name, values, strict)
|
3789
3892
|
end
|
3790
3893
|
end
|
3791
3894
|
end
|
3792
3895
|
|
3896
|
+
def sequence_from_anyvalue_or_object(name, values)
|
3897
|
+
RbSeries.new_from_anyvalues(name, values, true)
|
3898
|
+
rescue
|
3899
|
+
RbSeries.new_object(name, values, false)
|
3900
|
+
end
|
3901
|
+
|
3793
3902
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3794
3903
|
Float32 => RbSeries.method(:new_opt_f32),
|
3795
3904
|
Float64 => RbSeries.method(:new_opt_f64),
|
@@ -3834,7 +3943,6 @@ module Polars
|
|
3834
3943
|
RB_TYPE_TO_CONSTRUCTOR = {
|
3835
3944
|
Float => RbSeries.method(:new_opt_f64),
|
3836
3945
|
Integer => RbSeries.method(:new_opt_i64),
|
3837
|
-
String => RbSeries.method(:new_str),
|
3838
3946
|
TrueClass => RbSeries.method(:new_opt_bool),
|
3839
3947
|
FalseClass => RbSeries.method(:new_opt_bool)
|
3840
3948
|
}
|
data/lib/polars/string_expr.rb
CHANGED
@@ -11,8 +11,8 @@ module Polars
|
|
11
11
|
|
12
12
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
13
|
#
|
14
|
-
# @param
|
15
|
-
#
|
14
|
+
# @param dtype [Object]
|
15
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
16
|
# @param fmt [String]
|
17
17
|
# Format to use, refer to the
|
18
18
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
@@ -33,57 +33,56 @@ module Polars
|
|
33
33
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
34
|
# no fractional second component is found then the default is "us".
|
35
35
|
#
|
36
|
-
# @example
|
36
|
+
# @example Dealing with a consistent format:
|
37
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
38
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
|
+
# # =>
|
40
|
+
# # shape: (2,)
|
41
|
+
# # Series: '' [datetime[μs, +00:00]]
|
42
|
+
# # [
|
43
|
+
# # 2020-01-01 01:00:00 +00:00
|
44
|
+
# # 2020-01-01 02:00:00 +00:00
|
45
|
+
# # ]
|
46
|
+
#
|
47
|
+
# @example Dealing with different formats.
|
37
48
|
# s = Polars::Series.new(
|
38
49
|
# "date",
|
39
50
|
# [
|
40
51
|
# "2021-04-22",
|
41
52
|
# "2022-01-04 00:00:00",
|
42
53
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
54
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
55
|
# ]
|
45
56
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
57
|
+
# s.to_frame.select(
|
58
|
+
# Polars.coalesce(
|
59
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
60
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
61
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
62
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
63
|
+
# )
|
64
|
+
# ).to_series
|
55
65
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
71
|
-
|
72
|
-
if datatype == :date
|
66
|
+
# # shape: (4,)
|
67
|
+
# # Series: 'date' [date]
|
68
|
+
# # [
|
69
|
+
# # 2021-04-22
|
70
|
+
# # 2022-01-04
|
71
|
+
# # 2022-01-31
|
72
|
+
# # 2001-07-08
|
73
|
+
# # ]
|
74
|
+
def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
75
|
+
if dtype == Date
|
73
76
|
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
77
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
|
+
dtype = Datetime.new if dtype == Datetime
|
79
|
+
time_unit = dtype.time_unit
|
80
|
+
time_zone = dtype.time_zone
|
81
|
+
Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
|
82
|
+
elsif dtype == Time
|
84
83
|
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
84
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
85
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
86
|
end
|
88
87
|
end
|
89
88
|
|
@@ -521,6 +520,40 @@ module Polars
|
|
521
520
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
521
|
end
|
523
522
|
|
523
|
+
# Parse string values as JSON.
|
524
|
+
#
|
525
|
+
# Throw errors if encounter invalid JSON strings.
|
526
|
+
#
|
527
|
+
# @param dtype [Object]
|
528
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
529
|
+
# inferred from the JSON value.
|
530
|
+
#
|
531
|
+
# @return [Expr]
|
532
|
+
#
|
533
|
+
# @example
|
534
|
+
# df = Polars::DataFrame.new(
|
535
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
536
|
+
# )
|
537
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
538
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
539
|
+
# # =>
|
540
|
+
# # shape: (3, 1)
|
541
|
+
# # ┌─────────────┐
|
542
|
+
# # │ json │
|
543
|
+
# # │ --- │
|
544
|
+
# # │ struct[2] │
|
545
|
+
# # ╞═════════════╡
|
546
|
+
# # │ {1,true} │
|
547
|
+
# # │ {null,null} │
|
548
|
+
# # │ {2,false} │
|
549
|
+
# # └─────────────┘
|
550
|
+
def json_extract(dtype = nil)
|
551
|
+
if !dtype.nil?
|
552
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
553
|
+
end
|
554
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
555
|
+
end
|
556
|
+
|
524
557
|
# Extract the first match of json string with provided JSONPath expression.
|
525
558
|
#
|
526
559
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +879,10 @@ module Polars
|
|
846
879
|
# # │ 1 ┆ 123ABC │
|
847
880
|
# # │ 2 ┆ abc456 │
|
848
881
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
882
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
883
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
884
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
885
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
886
|
end
|
854
887
|
|
855
888
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +945,78 @@ module Polars
|
|
912
945
|
def slice(offset, length = nil)
|
913
946
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
947
|
end
|
948
|
+
|
949
|
+
# Returns a column with a separate row for every string character.
|
950
|
+
#
|
951
|
+
# @return [Expr]
|
952
|
+
#
|
953
|
+
# @example
|
954
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
955
|
+
# df.select(Polars.col("a").str.explode)
|
956
|
+
# # =>
|
957
|
+
# # shape: (6, 1)
|
958
|
+
# # ┌─────┐
|
959
|
+
# # │ a │
|
960
|
+
# # │ --- │
|
961
|
+
# # │ str │
|
962
|
+
# # ╞═════╡
|
963
|
+
# # │ f │
|
964
|
+
# # │ o │
|
965
|
+
# # │ o │
|
966
|
+
# # │ b │
|
967
|
+
# # │ a │
|
968
|
+
# # │ r │
|
969
|
+
# # └─────┘
|
970
|
+
def explode
|
971
|
+
Utils.wrap_expr(_rbexpr.explode)
|
972
|
+
end
|
973
|
+
|
974
|
+
# Parse integers with base radix from strings.
|
975
|
+
#
|
976
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
977
|
+
#
|
978
|
+
# @param radix [Integer]
|
979
|
+
# Positive integer which is the base of the string we are parsing.
|
980
|
+
# Default: 2.
|
981
|
+
# @param strict [Boolean]
|
982
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
983
|
+
# False silently convert to Null.
|
984
|
+
#
|
985
|
+
# @return [Expr]
|
986
|
+
#
|
987
|
+
# @example
|
988
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
989
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
990
|
+
# # =>
|
991
|
+
# # shape: (4, 1)
|
992
|
+
# # ┌──────┐
|
993
|
+
# # │ bin │
|
994
|
+
# # │ --- │
|
995
|
+
# # │ i32 │
|
996
|
+
# # ╞══════╡
|
997
|
+
# # │ 6 │
|
998
|
+
# # │ 5 │
|
999
|
+
# # │ 2 │
|
1000
|
+
# # │ null │
|
1001
|
+
# # └──────┘
|
1002
|
+
#
|
1003
|
+
# @example
|
1004
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (4, 1)
|
1008
|
+
# # ┌───────┐
|
1009
|
+
# # │ hex │
|
1010
|
+
# # │ --- │
|
1011
|
+
# # │ i32 │
|
1012
|
+
# # ╞═══════╡
|
1013
|
+
# # │ 64030 │
|
1014
|
+
# # │ 65280 │
|
1015
|
+
# # │ 51966 │
|
1016
|
+
# # │ null │
|
1017
|
+
# # └───────┘
|
1018
|
+
def parse_int(radix = 2, strict: true)
|
1019
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1020
|
+
end
|
915
1021
|
end
|
916
1022
|
end
|
@@ -38,12 +38,12 @@ module Polars
|
|
38
38
|
# )
|
39
39
|
# s.to_frame.with_column(
|
40
40
|
# Polars.col("date")
|
41
|
-
# .str.strptime(
|
41
|
+
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
42
|
# .fill_null(
|
43
|
-
# Polars.col("date").str.strptime(
|
43
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
|
44
44
|
# )
|
45
|
-
# .fill_null(Polars.col("date").str.strptime(
|
46
|
-
# .fill_null(Polars.col("date").str.strptime(
|
45
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
|
46
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
|
47
47
|
# )
|
48
48
|
# # =>
|
49
49
|
# # shape: (4, 1)
|