polars-df 0.4.0-x86_64-linux → 0.5.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +1990 -1149
- data/README.md +2 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +2 -2
data/lib/polars/series.rb
CHANGED
@@ -67,6 +67,12 @@ module Polars
|
|
67
67
|
._s
|
68
68
|
elsif values.is_a?(Array)
|
69
69
|
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
70
|
+
elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
|
71
|
+
self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
|
72
|
+
|
73
|
+
if !dtype.nil?
|
74
|
+
self._s = self.cast(dtype, strict: true)._s
|
75
|
+
end
|
70
76
|
else
|
71
77
|
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
72
78
|
end
|
@@ -90,10 +96,14 @@ module Polars
|
|
90
96
|
#
|
91
97
|
# @return [Hash]
|
92
98
|
def flags
|
93
|
-
{
|
99
|
+
out = {
|
94
100
|
"SORTED_ASC" => _s.is_sorted_flag,
|
95
101
|
"SORTED_DESC" => _s.is_sorted_reverse_flag
|
96
102
|
}
|
103
|
+
if dtype.is_a?(List)
|
104
|
+
out["FAST_EXPLODE"] = _s.can_fast_explode_flag
|
105
|
+
end
|
106
|
+
out
|
97
107
|
end
|
98
108
|
|
99
109
|
# Get the inner dtype in of a List typed Series.
|
@@ -222,14 +232,28 @@ module Polars
|
|
222
232
|
#
|
223
233
|
# @return [Series]
|
224
234
|
def *(other)
|
225
|
-
|
235
|
+
if is_temporal
|
236
|
+
raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
|
237
|
+
elsif other.is_a?(DataFrame)
|
238
|
+
other * self
|
239
|
+
else
|
240
|
+
_arithmetic(other, :mul)
|
241
|
+
end
|
226
242
|
end
|
227
243
|
|
228
244
|
# Performs division.
|
229
245
|
#
|
230
246
|
# @return [Series]
|
231
247
|
def /(other)
|
232
|
-
|
248
|
+
if is_temporal
|
249
|
+
raise ArgumentError, "first cast to integer before dividing datelike dtypes"
|
250
|
+
end
|
251
|
+
|
252
|
+
if is_float
|
253
|
+
return _arithmetic(other, :div)
|
254
|
+
end
|
255
|
+
|
256
|
+
cast(Float64) / other
|
233
257
|
end
|
234
258
|
|
235
259
|
# Returns the modulo.
|
@@ -252,6 +276,16 @@ module Polars
|
|
252
276
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
253
277
|
end
|
254
278
|
|
279
|
+
# Performs boolean not.
|
280
|
+
#
|
281
|
+
# @return [Series]
|
282
|
+
def !
|
283
|
+
if dtype == Boolean
|
284
|
+
return Utils.wrap_s(_s.not)
|
285
|
+
end
|
286
|
+
raise NotImplementedError
|
287
|
+
end
|
288
|
+
|
255
289
|
# Performs negation.
|
256
290
|
#
|
257
291
|
# @return [Series]
|
@@ -278,6 +312,10 @@ module Polars
|
|
278
312
|
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
279
313
|
end
|
280
314
|
|
315
|
+
if item.is_a?(Series) && item.bool?
|
316
|
+
return filter(item)
|
317
|
+
end
|
318
|
+
|
281
319
|
if item.is_a?(Integer)
|
282
320
|
return _s.get_idx(item)
|
283
321
|
end
|
@@ -369,16 +407,26 @@ module Polars
|
|
369
407
|
# Check if any boolean value in the column is `true`.
|
370
408
|
#
|
371
409
|
# @return [Boolean]
|
372
|
-
def any
|
373
|
-
|
410
|
+
def any?(&block)
|
411
|
+
if block_given?
|
412
|
+
apply(&block).any?
|
413
|
+
else
|
414
|
+
to_frame.select(Polars.col(name).any).to_series[0]
|
415
|
+
end
|
374
416
|
end
|
417
|
+
alias_method :any, :any?
|
375
418
|
|
376
419
|
# Check if all boolean values in the column are `true`.
|
377
420
|
#
|
378
421
|
# @return [Boolean]
|
379
|
-
def all
|
380
|
-
|
422
|
+
def all?(&block)
|
423
|
+
if block_given?
|
424
|
+
apply(&block).all?
|
425
|
+
else
|
426
|
+
to_frame.select(Polars.col(name).all).to_series[0]
|
427
|
+
end
|
381
428
|
end
|
429
|
+
alias_method :all, :all?
|
382
430
|
|
383
431
|
# Compute the logarithm to a given base.
|
384
432
|
#
|
@@ -1314,6 +1362,7 @@ module Polars
|
|
1314
1362
|
def unique(maintain_order: false)
|
1315
1363
|
super
|
1316
1364
|
end
|
1365
|
+
alias_method :uniq, :unique
|
1317
1366
|
|
1318
1367
|
# Take values by index.
|
1319
1368
|
#
|
@@ -1535,6 +1584,7 @@ module Polars
|
|
1535
1584
|
def is_in(other)
|
1536
1585
|
super
|
1537
1586
|
end
|
1587
|
+
alias_method :in?, :is_in
|
1538
1588
|
|
1539
1589
|
# Get index values where Boolean Series evaluate `true`.
|
1540
1590
|
#
|
@@ -1660,6 +1710,7 @@ module Polars
|
|
1660
1710
|
end
|
1661
1711
|
alias_method :count, :len
|
1662
1712
|
alias_method :length, :len
|
1713
|
+
alias_method :size, :len
|
1663
1714
|
|
1664
1715
|
# Cast between data types.
|
1665
1716
|
#
|
@@ -1779,6 +1830,8 @@ module Polars
|
|
1779
1830
|
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1831
|
end
|
1781
1832
|
alias_method :datelike?, :is_datelike
|
1833
|
+
alias_method :is_temporal, :is_datelike
|
1834
|
+
alias_method :temporal?, :is_datelike
|
1782
1835
|
|
1783
1836
|
# Check if this Series has floating point numbers.
|
1784
1837
|
#
|
@@ -2432,6 +2485,7 @@ module Polars
|
|
2432
2485
|
end
|
2433
2486
|
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2434
2487
|
end
|
2488
|
+
alias_method :map, :apply
|
2435
2489
|
|
2436
2490
|
# Shift the values by a given period.
|
2437
2491
|
#
|
@@ -3483,7 +3537,7 @@ module Polars
|
|
3483
3537
|
# # 99
|
3484
3538
|
# # ]
|
3485
3539
|
def extend_constant(value, n)
|
3486
|
-
|
3540
|
+
Utils.wrap_s(_s.extend_constant(value, n))
|
3487
3541
|
end
|
3488
3542
|
|
3489
3543
|
# Flags the Series as sorted.
|
@@ -3637,14 +3691,39 @@ module Polars
|
|
3637
3691
|
end
|
3638
3692
|
|
3639
3693
|
def _comp(other, op)
|
3694
|
+
if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
|
3695
|
+
if (other == true && op == :eq) || (other == false && op == :neq)
|
3696
|
+
return clone
|
3697
|
+
elsif (other == false && op == :eq) || (other == true && op == :neq)
|
3698
|
+
return !self
|
3699
|
+
end
|
3700
|
+
end
|
3701
|
+
|
3702
|
+
if other.is_a?(::Time) && dtype.is_a?(Datetime)
|
3703
|
+
ts = Utils._datetime_to_pl_timestamp(other, time_unit)
|
3704
|
+
f = ffi_func("#{op}_<>", Int64, _s)
|
3705
|
+
fail if f.nil?
|
3706
|
+
return Utils.wrap_s(f.call(ts))
|
3707
|
+
elsif other.is_a?(::Date) && dtype == Date
|
3708
|
+
d = Utils._date_to_pl_date(other)
|
3709
|
+
f = ffi_func("#{op}_<>", Int32, _s)
|
3710
|
+
fail if f.nil?
|
3711
|
+
return Utils.wrap_s(f.call(d))
|
3712
|
+
end
|
3713
|
+
|
3640
3714
|
if other.is_a?(Series)
|
3641
3715
|
return Utils.wrap_s(_s.send(op, other._s))
|
3642
3716
|
end
|
3643
3717
|
|
3644
|
-
|
3645
|
-
|
3718
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3719
|
+
if f.nil?
|
3720
|
+
raise NotImplementedError
|
3646
3721
|
end
|
3647
|
-
Utils.wrap_s(
|
3722
|
+
Utils.wrap_s(f.call(other))
|
3723
|
+
end
|
3724
|
+
|
3725
|
+
def ffi_func(name, dtype, _s)
|
3726
|
+
_s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
|
3648
3727
|
end
|
3649
3728
|
|
3650
3729
|
def _arithmetic(other, op)
|
@@ -3655,14 +3734,16 @@ module Polars
|
|
3655
3734
|
return Utils.wrap_s(_s.send(op, other._s))
|
3656
3735
|
end
|
3657
3736
|
|
3658
|
-
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3659
|
-
|
3660
|
-
|
3661
|
-
if other.is_a?(Float) && !is_float
|
3662
|
-
raise Todo
|
3737
|
+
if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
|
3738
|
+
_s2 = sequence_to_rbseries(name, [other])
|
3739
|
+
return Utils.wrap_s(_s.send(op, _s2))
|
3663
3740
|
end
|
3664
3741
|
|
3665
|
-
|
3742
|
+
f = ffi_func("#{op}_<>", dtype, _s)
|
3743
|
+
if f.nil?
|
3744
|
+
raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
|
3745
|
+
end
|
3746
|
+
Utils.wrap_s(f.call(other))
|
3666
3747
|
end
|
3667
3748
|
|
3668
3749
|
DTYPE_TO_FFINAME = {
|
@@ -3695,25 +3776,57 @@ module Polars
|
|
3695
3776
|
values._s
|
3696
3777
|
end
|
3697
3778
|
|
3779
|
+
def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
|
3780
|
+
# not needed yet
|
3781
|
+
# if !values.contiguous?
|
3782
|
+
# end
|
3783
|
+
|
3784
|
+
if values.shape.length == 1
|
3785
|
+
values, dtype = numo_values_and_dtype(values)
|
3786
|
+
strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
|
3787
|
+
if dtype == Numo::RObject
|
3788
|
+
sequence_to_rbseries(name, values.to_a, strict: strict)
|
3789
|
+
else
|
3790
|
+
constructor = numo_type_to_constructor(dtype)
|
3791
|
+
# TODO improve performance
|
3792
|
+
constructor.call(name, values.to_a, strict)
|
3793
|
+
end
|
3794
|
+
elsif values.shape.length == 2
|
3795
|
+
raise Todo
|
3796
|
+
else
|
3797
|
+
raise Todo
|
3798
|
+
end
|
3799
|
+
end
|
3800
|
+
|
3801
|
+
def numo_values_and_dtype(values)
|
3802
|
+
[values, values.class]
|
3803
|
+
end
|
3804
|
+
|
3805
|
+
def numo_type_to_constructor(dtype)
|
3806
|
+
{
|
3807
|
+
Numo::Float32 => RbSeries.method(:new_opt_f32),
|
3808
|
+
Numo::Float64 => RbSeries.method(:new_opt_f64),
|
3809
|
+
Numo::Int8 => RbSeries.method(:new_opt_i8),
|
3810
|
+
Numo::Int16 => RbSeries.method(:new_opt_i16),
|
3811
|
+
Numo::Int32 => RbSeries.method(:new_opt_i32),
|
3812
|
+
Numo::Int64 => RbSeries.method(:new_opt_i64),
|
3813
|
+
Numo::UInt8 => RbSeries.method(:new_opt_u8),
|
3814
|
+
Numo::UInt16 => RbSeries.method(:new_opt_u16),
|
3815
|
+
Numo::UInt32 => RbSeries.method(:new_opt_u32),
|
3816
|
+
Numo::UInt64 => RbSeries.method(:new_opt_u64)
|
3817
|
+
}.fetch(dtype)
|
3818
|
+
rescue KeyError
|
3819
|
+
RbSeries.method(:new_object)
|
3820
|
+
end
|
3821
|
+
|
3698
3822
|
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
3699
3823
|
ruby_dtype = nil
|
3700
|
-
nested_dtype = nil
|
3701
3824
|
|
3702
3825
|
if (values.nil? || values.empty?) && dtype.nil?
|
3703
|
-
|
3704
|
-
# if dtype for empty sequence could be guessed
|
3705
|
-
# (e.g comparisons between self and other)
|
3706
|
-
dtype = dtype_if_empty
|
3707
|
-
else
|
3708
|
-
# default to Float32 type
|
3709
|
-
dtype = :f32
|
3710
|
-
end
|
3826
|
+
dtype = dtype_if_empty || Float32
|
3711
3827
|
end
|
3712
3828
|
|
3713
|
-
rb_temporal_types = []
|
3714
|
-
rb_temporal_types << ::Date if defined?(::Date)
|
3715
|
-
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3716
|
-
rb_temporal_types << ::Time if defined?(::Time)
|
3829
|
+
rb_temporal_types = [::Date, ::DateTime, ::Time]
|
3717
3830
|
|
3718
3831
|
value = _get_first_non_none(values)
|
3719
3832
|
if !value.nil?
|
@@ -3738,58 +3851,54 @@ module Polars
|
|
3738
3851
|
|
3739
3852
|
# temporal branch
|
3740
3853
|
if rb_temporal_types.include?(ruby_dtype)
|
3741
|
-
|
3742
|
-
|
3743
|
-
|
3744
|
-
|
3745
|
-
# end
|
3746
|
-
|
3747
|
-
if ruby_dtype == ::Date
|
3748
|
-
RbSeries.new_opt_date(name, values, strict)
|
3749
|
-
elsif ruby_dtype == ::Time
|
3750
|
-
RbSeries.new_opt_datetime(name, values, strict)
|
3751
|
-
elsif ruby_dtype == ::DateTime
|
3752
|
-
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3753
|
-
else
|
3754
|
-
raise Todo
|
3755
|
-
end
|
3756
|
-
elsif ruby_dtype == Array
|
3757
|
-
if nested_dtype.nil?
|
3758
|
-
nested_value = _get_first_non_none(value)
|
3759
|
-
nested_dtype = nested_value.nil? ? Float : nested_value.class
|
3854
|
+
if dtype.nil?
|
3855
|
+
dtype = Utils.rb_type_to_dtype(ruby_dtype)
|
3856
|
+
elsif rb_temporal_types.include?(dtype)
|
3857
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
3760
3858
|
end
|
3859
|
+
# TODO
|
3860
|
+
time_unit = nil
|
3761
3861
|
|
3762
|
-
|
3763
|
-
|
3862
|
+
rb_series = RbSeries.new_from_anyvalues(name, values, strict)
|
3863
|
+
if time_unit.nil?
|
3864
|
+
s = Utils.wrap_s(rb_series)
|
3865
|
+
else
|
3866
|
+
s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
|
3764
3867
|
end
|
3765
|
-
|
3766
|
-
|
3767
|
-
|
3768
|
-
|
3769
|
-
|
3770
|
-
|
3771
|
-
|
3772
|
-
|
3773
|
-
|
3774
|
-
|
3775
|
-
|
3868
|
+
return s._s
|
3869
|
+
elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
|
3870
|
+
raise Todo
|
3871
|
+
elsif ruby_dtype == Array
|
3872
|
+
return sequence_from_anyvalue_or_object(name, values)
|
3873
|
+
elsif ruby_dtype == Series
|
3874
|
+
return RbSeries.new_series_list(name, values.map(&:_s), strict)
|
3875
|
+
elsif ruby_dtype == RbSeries
|
3876
|
+
return RbSeries.new_series_list(name, values, strict)
|
3877
|
+
else
|
3878
|
+
constructor =
|
3879
|
+
if value.is_a?(String)
|
3880
|
+
if value.encoding == Encoding::UTF_8
|
3881
|
+
RbSeries.method(:new_str)
|
3882
|
+
else
|
3883
|
+
RbSeries.method(:new_binary)
|
3776
3884
|
end
|
3885
|
+
elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
|
3886
|
+
# TODO improve performance
|
3887
|
+
RbSeries.method(:new_opt_f64)
|
3888
|
+
else
|
3889
|
+
rb_type_to_constructor(value.class)
|
3777
3890
|
end
|
3778
|
-
if equal_to_inner
|
3779
|
-
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3780
|
-
# TODO rescue and fallback to new_object
|
3781
|
-
return RbSeries.new_list(name, values, dtype)
|
3782
|
-
end
|
3783
|
-
end
|
3784
|
-
|
3785
|
-
RbSeries.new_object(name, values, strict)
|
3786
|
-
else
|
3787
|
-
constructor = rb_type_to_constructor(value.class)
|
3788
3891
|
constructor.call(name, values, strict)
|
3789
3892
|
end
|
3790
3893
|
end
|
3791
3894
|
end
|
3792
3895
|
|
3896
|
+
def sequence_from_anyvalue_or_object(name, values)
|
3897
|
+
RbSeries.new_from_anyvalues(name, values, true)
|
3898
|
+
rescue
|
3899
|
+
RbSeries.new_object(name, values, false)
|
3900
|
+
end
|
3901
|
+
|
3793
3902
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3794
3903
|
Float32 => RbSeries.method(:new_opt_f32),
|
3795
3904
|
Float64 => RbSeries.method(:new_opt_f64),
|
@@ -3834,7 +3943,6 @@ module Polars
|
|
3834
3943
|
RB_TYPE_TO_CONSTRUCTOR = {
|
3835
3944
|
Float => RbSeries.method(:new_opt_f64),
|
3836
3945
|
Integer => RbSeries.method(:new_opt_i64),
|
3837
|
-
String => RbSeries.method(:new_str),
|
3838
3946
|
TrueClass => RbSeries.method(:new_opt_bool),
|
3839
3947
|
FalseClass => RbSeries.method(:new_opt_bool)
|
3840
3948
|
}
|
data/lib/polars/string_expr.rb
CHANGED
@@ -11,8 +11,8 @@ module Polars
|
|
11
11
|
|
12
12
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
13
|
#
|
14
|
-
# @param
|
15
|
-
#
|
14
|
+
# @param dtype [Object]
|
15
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
16
|
# @param fmt [String]
|
17
17
|
# Format to use, refer to the
|
18
18
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
@@ -33,57 +33,56 @@ module Polars
|
|
33
33
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
34
|
# no fractional second component is found then the default is "us".
|
35
35
|
#
|
36
|
-
# @example
|
36
|
+
# @example Dealing with a consistent format:
|
37
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
38
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
|
+
# # =>
|
40
|
+
# # shape: (2,)
|
41
|
+
# # Series: '' [datetime[μs, +00:00]]
|
42
|
+
# # [
|
43
|
+
# # 2020-01-01 01:00:00 +00:00
|
44
|
+
# # 2020-01-01 02:00:00 +00:00
|
45
|
+
# # ]
|
46
|
+
#
|
47
|
+
# @example Dealing with different formats.
|
37
48
|
# s = Polars::Series.new(
|
38
49
|
# "date",
|
39
50
|
# [
|
40
51
|
# "2021-04-22",
|
41
52
|
# "2022-01-04 00:00:00",
|
42
53
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
54
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
55
|
# ]
|
45
56
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
57
|
+
# s.to_frame.select(
|
58
|
+
# Polars.coalesce(
|
59
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
60
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
61
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
62
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
63
|
+
# )
|
64
|
+
# ).to_series
|
55
65
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
71
|
-
|
72
|
-
if datatype == :date
|
66
|
+
# # shape: (4,)
|
67
|
+
# # Series: 'date' [date]
|
68
|
+
# # [
|
69
|
+
# # 2021-04-22
|
70
|
+
# # 2022-01-04
|
71
|
+
# # 2022-01-31
|
72
|
+
# # 2001-07-08
|
73
|
+
# # ]
|
74
|
+
def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
75
|
+
if dtype == Date
|
73
76
|
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
77
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
|
+
dtype = Datetime.new if dtype == Datetime
|
79
|
+
time_unit = dtype.time_unit
|
80
|
+
time_zone = dtype.time_zone
|
81
|
+
Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
|
82
|
+
elsif dtype == Time
|
84
83
|
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
84
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
85
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
86
|
end
|
88
87
|
end
|
89
88
|
|
@@ -521,6 +520,40 @@ module Polars
|
|
521
520
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
521
|
end
|
523
522
|
|
523
|
+
# Parse string values as JSON.
|
524
|
+
#
|
525
|
+
# Throw errors if encounter invalid JSON strings.
|
526
|
+
#
|
527
|
+
# @param dtype [Object]
|
528
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
529
|
+
# inferred from the JSON value.
|
530
|
+
#
|
531
|
+
# @return [Expr]
|
532
|
+
#
|
533
|
+
# @example
|
534
|
+
# df = Polars::DataFrame.new(
|
535
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
536
|
+
# )
|
537
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
538
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
539
|
+
# # =>
|
540
|
+
# # shape: (3, 1)
|
541
|
+
# # ┌─────────────┐
|
542
|
+
# # │ json │
|
543
|
+
# # │ --- │
|
544
|
+
# # │ struct[2] │
|
545
|
+
# # ╞═════════════╡
|
546
|
+
# # │ {1,true} │
|
547
|
+
# # │ {null,null} │
|
548
|
+
# # │ {2,false} │
|
549
|
+
# # └─────────────┘
|
550
|
+
def json_extract(dtype = nil)
|
551
|
+
if !dtype.nil?
|
552
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
553
|
+
end
|
554
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
555
|
+
end
|
556
|
+
|
524
557
|
# Extract the first match of json string with provided JSONPath expression.
|
525
558
|
#
|
526
559
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +879,10 @@ module Polars
|
|
846
879
|
# # │ 1 ┆ 123ABC │
|
847
880
|
# # │ 2 ┆ abc456 │
|
848
881
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
882
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
883
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
884
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
885
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
886
|
end
|
854
887
|
|
855
888
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +945,78 @@ module Polars
|
|
912
945
|
def slice(offset, length = nil)
|
913
946
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
947
|
end
|
948
|
+
|
949
|
+
# Returns a column with a separate row for every string character.
|
950
|
+
#
|
951
|
+
# @return [Expr]
|
952
|
+
#
|
953
|
+
# @example
|
954
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
955
|
+
# df.select(Polars.col("a").str.explode)
|
956
|
+
# # =>
|
957
|
+
# # shape: (6, 1)
|
958
|
+
# # ┌─────┐
|
959
|
+
# # │ a │
|
960
|
+
# # │ --- │
|
961
|
+
# # │ str │
|
962
|
+
# # ╞═════╡
|
963
|
+
# # │ f │
|
964
|
+
# # │ o │
|
965
|
+
# # │ o │
|
966
|
+
# # │ b │
|
967
|
+
# # │ a │
|
968
|
+
# # │ r │
|
969
|
+
# # └─────┘
|
970
|
+
def explode
|
971
|
+
Utils.wrap_expr(_rbexpr.explode)
|
972
|
+
end
|
973
|
+
|
974
|
+
# Parse integers with base radix from strings.
|
975
|
+
#
|
976
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
977
|
+
#
|
978
|
+
# @param radix [Integer]
|
979
|
+
# Positive integer which is the base of the string we are parsing.
|
980
|
+
# Default: 2.
|
981
|
+
# @param strict [Boolean]
|
982
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
983
|
+
# False silently convert to Null.
|
984
|
+
#
|
985
|
+
# @return [Expr]
|
986
|
+
#
|
987
|
+
# @example
|
988
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
989
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
990
|
+
# # =>
|
991
|
+
# # shape: (4, 1)
|
992
|
+
# # ┌──────┐
|
993
|
+
# # │ bin │
|
994
|
+
# # │ --- │
|
995
|
+
# # │ i32 │
|
996
|
+
# # ╞══════╡
|
997
|
+
# # │ 6 │
|
998
|
+
# # │ 5 │
|
999
|
+
# # │ 2 │
|
1000
|
+
# # │ null │
|
1001
|
+
# # └──────┘
|
1002
|
+
#
|
1003
|
+
# @example
|
1004
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (4, 1)
|
1008
|
+
# # ┌───────┐
|
1009
|
+
# # │ hex │
|
1010
|
+
# # │ --- │
|
1011
|
+
# # │ i32 │
|
1012
|
+
# # ╞═══════╡
|
1013
|
+
# # │ 64030 │
|
1014
|
+
# # │ 65280 │
|
1015
|
+
# # │ 51966 │
|
1016
|
+
# # │ null │
|
1017
|
+
# # └───────┘
|
1018
|
+
def parse_int(radix = 2, strict: true)
|
1019
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1020
|
+
end
|
915
1021
|
end
|
916
1022
|
end
|
@@ -38,12 +38,12 @@ module Polars
|
|
38
38
|
# )
|
39
39
|
# s.to_frame.with_column(
|
40
40
|
# Polars.col("date")
|
41
|
-
# .str.strptime(
|
41
|
+
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
42
|
# .fill_null(
|
43
|
-
# Polars.col("date").str.strptime(
|
43
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
|
44
44
|
# )
|
45
|
-
# .fill_null(Polars.col("date").str.strptime(
|
46
|
-
# .fill_null(Polars.col("date").str.strptime(
|
45
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
|
46
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
|
47
47
|
# )
|
48
48
|
# # =>
|
49
49
|
# # shape: (4, 1)
|