polars-df 0.4.0-arm64-darwin → 0.5.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -67,6 +67,12 @@ module Polars
67
67
  ._s
68
68
  elsif values.is_a?(Array)
69
69
  self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
+ elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
+ self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
72
+
73
+ if !dtype.nil?
74
+ self._s = self.cast(dtype, strict: true)._s
75
+ end
70
76
  else
71
77
  raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
72
78
  end
@@ -90,10 +96,14 @@ module Polars
90
96
  #
91
97
  # @return [Hash]
92
98
  def flags
93
- {
99
+ out = {
94
100
  "SORTED_ASC" => _s.is_sorted_flag,
95
101
  "SORTED_DESC" => _s.is_sorted_reverse_flag
96
102
  }
103
+ if dtype.is_a?(List)
104
+ out["FAST_EXPLODE"] = _s.can_fast_explode_flag
105
+ end
106
+ out
97
107
  end
98
108
 
99
109
  # Get the inner dtype in of a List typed Series.
@@ -222,14 +232,28 @@ module Polars
222
232
  #
223
233
  # @return [Series]
224
234
  def *(other)
225
- _arithmetic(other, :mul)
235
+ if is_temporal
236
+ raise ArgumentError, "first cast to integer before multiplying datelike dtypes"
237
+ elsif other.is_a?(DataFrame)
238
+ other * self
239
+ else
240
+ _arithmetic(other, :mul)
241
+ end
226
242
  end
227
243
 
228
244
  # Performs division.
229
245
  #
230
246
  # @return [Series]
231
247
  def /(other)
232
- _arithmetic(other, :div)
248
+ if is_temporal
249
+ raise ArgumentError, "first cast to integer before dividing datelike dtypes"
250
+ end
251
+
252
+ if is_float
253
+ return _arithmetic(other, :div)
254
+ end
255
+
256
+ cast(Float64) / other
233
257
  end
234
258
 
235
259
  # Returns the modulo.
@@ -252,6 +276,16 @@ module Polars
252
276
  to_frame.select(Polars.col(name).pow(power)).to_series
253
277
  end
254
278
 
279
+ # Performs boolean not.
280
+ #
281
+ # @return [Series]
282
+ def !
283
+ if dtype == Boolean
284
+ return Utils.wrap_s(_s.not)
285
+ end
286
+ raise NotImplementedError
287
+ end
288
+
255
289
  # Performs negation.
256
290
  #
257
291
  # @return [Series]
@@ -278,6 +312,10 @@ module Polars
278
312
  return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
279
313
  end
280
314
 
315
+ if item.is_a?(Series) && item.bool?
316
+ return filter(item)
317
+ end
318
+
281
319
  if item.is_a?(Integer)
282
320
  return _s.get_idx(item)
283
321
  end
@@ -369,16 +407,26 @@ module Polars
369
407
  # Check if any boolean value in the column is `true`.
370
408
  #
371
409
  # @return [Boolean]
372
- def any
373
- to_frame.select(Polars.col(name).any).to_series[0]
410
+ def any?(&block)
411
+ if block_given?
412
+ apply(&block).any?
413
+ else
414
+ to_frame.select(Polars.col(name).any).to_series[0]
415
+ end
374
416
  end
417
+ alias_method :any, :any?
375
418
 
376
419
  # Check if all boolean values in the column are `true`.
377
420
  #
378
421
  # @return [Boolean]
379
- def all
380
- to_frame.select(Polars.col(name).all).to_series[0]
422
+ def all?(&block)
423
+ if block_given?
424
+ apply(&block).all?
425
+ else
426
+ to_frame.select(Polars.col(name).all).to_series[0]
427
+ end
381
428
  end
429
+ alias_method :all, :all?
382
430
 
383
431
  # Compute the logarithm to a given base.
384
432
  #
@@ -1314,6 +1362,7 @@ module Polars
1314
1362
  def unique(maintain_order: false)
1315
1363
  super
1316
1364
  end
1365
+ alias_method :uniq, :unique
1317
1366
 
1318
1367
  # Take values by index.
1319
1368
  #
@@ -1535,6 +1584,7 @@ module Polars
1535
1584
  def is_in(other)
1536
1585
  super
1537
1586
  end
1587
+ alias_method :in?, :is_in
1538
1588
 
1539
1589
  # Get index values where Boolean Series evaluate `true`.
1540
1590
  #
@@ -1660,6 +1710,7 @@ module Polars
1660
1710
  end
1661
1711
  alias_method :count, :len
1662
1712
  alias_method :length, :len
1713
+ alias_method :size, :len
1663
1714
 
1664
1715
  # Cast between data types.
1665
1716
  #
@@ -1779,6 +1830,8 @@ module Polars
1779
1830
  [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
1780
1831
  end
1781
1832
  alias_method :datelike?, :is_datelike
1833
+ alias_method :is_temporal, :is_datelike
1834
+ alias_method :temporal?, :is_datelike
1782
1835
 
1783
1836
  # Check if this Series has floating point numbers.
1784
1837
  #
@@ -2432,6 +2485,7 @@ module Polars
2432
2485
  end
2433
2486
  Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2434
2487
  end
2488
+ alias_method :map, :apply
2435
2489
 
2436
2490
  # Shift the values by a given period.
2437
2491
  #
@@ -3483,7 +3537,7 @@ module Polars
3483
3537
  # # 99
3484
3538
  # # ]
3485
3539
  def extend_constant(value, n)
3486
- super
3540
+ Utils.wrap_s(_s.extend_constant(value, n))
3487
3541
  end
3488
3542
 
3489
3543
  # Flags the Series as sorted.
@@ -3637,14 +3691,39 @@ module Polars
3637
3691
  end
3638
3692
 
3639
3693
  def _comp(other, op)
3694
+ if dtype == Boolean && Utils.bool?(other) && [:eq, :neq].include?(op)
3695
+ if (other == true && op == :eq) || (other == false && op == :neq)
3696
+ return clone
3697
+ elsif (other == false && op == :eq) || (other == true && op == :neq)
3698
+ return !self
3699
+ end
3700
+ end
3701
+
3702
+ if other.is_a?(::Time) && dtype.is_a?(Datetime)
3703
+ ts = Utils._datetime_to_pl_timestamp(other, time_unit)
3704
+ f = ffi_func("#{op}_<>", Int64, _s)
3705
+ fail if f.nil?
3706
+ return Utils.wrap_s(f.call(ts))
3707
+ elsif other.is_a?(::Date) && dtype == Date
3708
+ d = Utils._date_to_pl_date(other)
3709
+ f = ffi_func("#{op}_<>", Int32, _s)
3710
+ fail if f.nil?
3711
+ return Utils.wrap_s(f.call(d))
3712
+ end
3713
+
3640
3714
  if other.is_a?(Series)
3641
3715
  return Utils.wrap_s(_s.send(op, other._s))
3642
3716
  end
3643
3717
 
3644
- if dtype == Utf8
3645
- raise Todo
3718
+ f = ffi_func("#{op}_<>", dtype, _s)
3719
+ if f.nil?
3720
+ raise NotImplementedError
3646
3721
  end
3647
- Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3722
+ Utils.wrap_s(f.call(other))
3723
+ end
3724
+
3725
+ def ffi_func(name, dtype, _s)
3726
+ _s.method(name.sub("<>", DTYPE_TO_FFINAME.fetch(dtype))) if DTYPE_TO_FFINAME.key?(dtype)
3648
3727
  end
3649
3728
 
3650
3729
  def _arithmetic(other, op)
@@ -3655,14 +3734,16 @@ module Polars
3655
3734
  return Utils.wrap_s(_s.send(op, other._s))
3656
3735
  end
3657
3736
 
3658
- if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
3659
- raise Todo
3660
- end
3661
- if other.is_a?(Float) && !is_float
3662
- raise Todo
3737
+ if (other.is_a?(Float) || other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)) && !is_float
3738
+ _s2 = sequence_to_rbseries(name, [other])
3739
+ return Utils.wrap_s(_s.send(op, _s2))
3663
3740
  end
3664
3741
 
3665
- Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3742
+ f = ffi_func("#{op}_<>", dtype, _s)
3743
+ if f.nil?
3744
+ raise ArgumentError, "cannot do arithmetic with series of dtype: #{dtype} and argument of type: #{other.class.name}"
3745
+ end
3746
+ Utils.wrap_s(f.call(other))
3666
3747
  end
3667
3748
 
3668
3749
  DTYPE_TO_FFINAME = {
@@ -3695,25 +3776,57 @@ module Polars
3695
3776
  values._s
3696
3777
  end
3697
3778
 
3779
+ def numo_to_rbseries(name, values, strict: true, nan_to_null: false)
3780
+ # not needed yet
3781
+ # if !values.contiguous?
3782
+ # end
3783
+
3784
+ if values.shape.length == 1
3785
+ values, dtype = numo_values_and_dtype(values)
3786
+ strict = nan_to_null if [Numo::SFloat, Numo::DFloat].include?(dtype)
3787
+ if dtype == Numo::RObject
3788
+ sequence_to_rbseries(name, values.to_a, strict: strict)
3789
+ else
3790
+ constructor = numo_type_to_constructor(dtype)
3791
+ # TODO improve performance
3792
+ constructor.call(name, values.to_a, strict)
3793
+ end
3794
+ elsif values.shape.length == 2
3795
+ raise Todo
3796
+ else
3797
+ raise Todo
3798
+ end
3799
+ end
3800
+
3801
+ def numo_values_and_dtype(values)
3802
+ [values, values.class]
3803
+ end
3804
+
3805
+ def numo_type_to_constructor(dtype)
3806
+ {
3807
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
3808
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
3809
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
3810
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
3811
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
3812
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
3813
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
3814
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
3815
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
3816
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
3817
+ }.fetch(dtype)
3818
+ rescue KeyError
3819
+ RbSeries.method(:new_object)
3820
+ end
3821
+
3698
3822
  def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
3699
3823
  ruby_dtype = nil
3700
- nested_dtype = nil
3701
3824
 
3702
3825
  if (values.nil? || values.empty?) && dtype.nil?
3703
- if dtype_if_empty
3704
- # if dtype for empty sequence could be guessed
3705
- # (e.g comparisons between self and other)
3706
- dtype = dtype_if_empty
3707
- else
3708
- # default to Float32 type
3709
- dtype = :f32
3710
- end
3826
+ dtype = dtype_if_empty || Float32
3711
3827
  end
3712
3828
 
3713
- rb_temporal_types = []
3714
- rb_temporal_types << ::Date if defined?(::Date)
3715
- rb_temporal_types << ::DateTime if defined?(::DateTime)
3716
- rb_temporal_types << ::Time if defined?(::Time)
3829
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
3717
3830
 
3718
3831
  value = _get_first_non_none(values)
3719
3832
  if !value.nil?
@@ -3738,58 +3851,54 @@ module Polars
3738
3851
 
3739
3852
  # temporal branch
3740
3853
  if rb_temporal_types.include?(ruby_dtype)
3741
- # if dtype.nil?
3742
- # dtype = rb_type_to_dtype(ruby_dtype)
3743
- # elsif rb_temporal_types.include?(dtype)
3744
- # dtype = rb_type_to_dtype(dtype)
3745
- # end
3746
-
3747
- if ruby_dtype == ::Date
3748
- RbSeries.new_opt_date(name, values, strict)
3749
- elsif ruby_dtype == ::Time
3750
- RbSeries.new_opt_datetime(name, values, strict)
3751
- elsif ruby_dtype == ::DateTime
3752
- RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
3753
- else
3754
- raise Todo
3755
- end
3756
- elsif ruby_dtype == Array
3757
- if nested_dtype.nil?
3758
- nested_value = _get_first_non_none(value)
3759
- nested_dtype = nested_value.nil? ? Float : nested_value.class
3854
+ if dtype.nil?
3855
+ dtype = Utils.rb_type_to_dtype(ruby_dtype)
3856
+ elsif rb_temporal_types.include?(dtype)
3857
+ dtype = Utils.rb_type_to_dtype(dtype)
3760
3858
  end
3859
+ # TODO
3860
+ time_unit = nil
3761
3861
 
3762
- if nested_dtype == Array
3763
- raise Todo
3862
+ rb_series = RbSeries.new_from_anyvalues(name, values, strict)
3863
+ if time_unit.nil?
3864
+ s = Utils.wrap_s(rb_series)
3865
+ else
3866
+ s = Utils.wrap_s(rb_series).dt.cast_time_unit(time_unit)
3764
3867
  end
3765
-
3766
- if value.is_a?(Array)
3767
- count = 0
3768
- equal_to_inner = true
3769
- values.each do |lst|
3770
- lst.each do |vl|
3771
- equal_to_inner = vl.class == nested_dtype
3772
- if !equal_to_inner || count > 50
3773
- break
3774
- end
3775
- count += 1
3868
+ return s._s
3869
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
3870
+ raise Todo
3871
+ elsif ruby_dtype == Array
3872
+ return sequence_from_anyvalue_or_object(name, values)
3873
+ elsif ruby_dtype == Series
3874
+ return RbSeries.new_series_list(name, values.map(&:_s), strict)
3875
+ elsif ruby_dtype == RbSeries
3876
+ return RbSeries.new_series_list(name, values, strict)
3877
+ else
3878
+ constructor =
3879
+ if value.is_a?(String)
3880
+ if value.encoding == Encoding::UTF_8
3881
+ RbSeries.method(:new_str)
3882
+ else
3883
+ RbSeries.method(:new_binary)
3776
3884
  end
3885
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
3886
+ # TODO improve performance
3887
+ RbSeries.method(:new_opt_f64)
3888
+ else
3889
+ rb_type_to_constructor(value.class)
3777
3890
  end
3778
- if equal_to_inner
3779
- dtype = Utils.rb_type_to_dtype(nested_dtype)
3780
- # TODO rescue and fallback to new_object
3781
- return RbSeries.new_list(name, values, dtype)
3782
- end
3783
- end
3784
-
3785
- RbSeries.new_object(name, values, strict)
3786
- else
3787
- constructor = rb_type_to_constructor(value.class)
3788
3891
  constructor.call(name, values, strict)
3789
3892
  end
3790
3893
  end
3791
3894
  end
3792
3895
 
3896
+ def sequence_from_anyvalue_or_object(name, values)
3897
+ RbSeries.new_from_anyvalues(name, values, true)
3898
+ rescue
3899
+ RbSeries.new_object(name, values, false)
3900
+ end
3901
+
3793
3902
  POLARS_TYPE_TO_CONSTRUCTOR = {
3794
3903
  Float32 => RbSeries.method(:new_opt_f32),
3795
3904
  Float64 => RbSeries.method(:new_opt_f64),
@@ -3834,7 +3943,6 @@ module Polars
3834
3943
  RB_TYPE_TO_CONSTRUCTOR = {
3835
3944
  Float => RbSeries.method(:new_opt_f64),
3836
3945
  Integer => RbSeries.method(:new_opt_i64),
3837
- String => RbSeries.method(:new_str),
3838
3946
  TrueClass => RbSeries.method(:new_opt_bool),
3839
3947
  FalseClass => RbSeries.method(:new_opt_bool)
3840
3948
  }
@@ -11,8 +11,8 @@ module Polars
11
11
 
12
12
  # Parse a Utf8 expression to a Date/Datetime/Time type.
13
13
  #
14
- # @param datatype [Symbol]
15
- # `:date`, `:dateime`, or `:time`.
14
+ # @param dtype [Object]
15
+ # The data type to convert into. Can be either Date, Datetime, or Time.
16
16
  # @param fmt [String]
17
17
  # Format to use, refer to the
18
18
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
@@ -33,57 +33,56 @@ module Polars
33
33
  # the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
34
34
  # no fractional second component is found then the default is "us".
35
35
  #
36
- # @example
36
+ # @example Dealing with a consistent format:
37
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
38
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
39
+ # # =>
40
+ # # shape: (2,)
41
+ # # Series: '' [datetime[μs, +00:00]]
42
+ # # [
43
+ # # 2020-01-01 01:00:00 +00:00
44
+ # # 2020-01-01 02:00:00 +00:00
45
+ # # ]
46
+ #
47
+ # @example Dealing with different formats.
37
48
  # s = Polars::Series.new(
38
49
  # "date",
39
50
  # [
40
51
  # "2021-04-22",
41
52
  # "2022-01-04 00:00:00",
42
53
  # "01/31/22",
43
- # "Sun Jul 8 00:34:60 2001"
54
+ # "Sun Jul 8 00:34:60 2001",
44
55
  # ]
45
56
  # )
46
- # s.to_frame.with_column(
47
- # Polars.col("date")
48
- # .str.strptime(:date, "%F", strict: false)
49
- # .fill_null(
50
- # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
51
- # )
52
- # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
53
- # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
54
- # )
57
+ # s.to_frame.select(
58
+ # Polars.coalesce(
59
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
60
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
61
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
62
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
63
+ # )
64
+ # ).to_series
55
65
  # # =>
56
- # # shape: (4, 1)
57
- # # ┌────────────┐
58
- # # │ date │
59
- # # │ --- │
60
- # # │ date │
61
- # # ╞════════════╡
62
- # # │ 2021-04-22 │
63
- # # │ 2022-01-04 │
64
- # # 2022-01-31
65
- # # 2001-07-08
66
- # # └────────────┘
67
- def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
68
- if !Utils.is_polars_dtype(datatype)
69
- raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
- end
71
-
72
- if datatype == :date
66
+ # # shape: (4,)
67
+ # # Series: 'date' [date]
68
+ # # [
69
+ # # 2021-04-22
70
+ # # 2022-01-04
71
+ # # 2022-01-31
72
+ # # 2001-07-08
73
+ # # ]
74
+ def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
75
+ if dtype == Date
73
76
  Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
74
- elsif datatype == :datetime
75
- # TODO fix
76
- tu = nil # datatype.tu
77
- dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware, utc))
78
- if tu.nil?
79
- dtcol
80
- else
81
- dtcol.dt.cast_time_unit(tu)
82
- end
83
- elsif datatype == :time
77
+ elsif dtype == Datetime || dtype.is_a?(Datetime)
78
+ dtype = Datetime.new if dtype == Datetime
79
+ time_unit = dtype.time_unit
80
+ time_zone = dtype.time_zone
81
+ Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
82
+ elsif dtype == Time
84
83
  Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
85
84
  else
86
- raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
85
+ raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
87
86
  end
88
87
  end
89
88
 
@@ -521,6 +520,40 @@ module Polars
521
520
  Utils.wrap_expr(_rbexpr.str_starts_with(sub))
522
521
  end
523
522
 
523
+ # Parse string values as JSON.
524
+ #
525
+ # Throw errors if encounter invalid JSON strings.
526
+ #
527
+ # @param dtype [Object]
528
+ # The dtype to cast the extracted value to. If nil, the dtype will be
529
+ # inferred from the JSON value.
530
+ #
531
+ # @return [Expr]
532
+ #
533
+ # @example
534
+ # df = Polars::DataFrame.new(
535
+ # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
536
+ # )
537
+ # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
538
+ # df.select(Polars.col("json").str.json_extract(dtype))
539
+ # # =>
540
+ # # shape: (3, 1)
541
+ # # ┌─────────────┐
542
+ # # │ json │
543
+ # # │ --- │
544
+ # # │ struct[2] │
545
+ # # ╞═════════════╡
546
+ # # │ {1,true} │
547
+ # # │ {null,null} │
548
+ # # │ {2,false} │
549
+ # # └─────────────┘
550
+ def json_extract(dtype = nil)
551
+ if !dtype.nil?
552
+ dtype = Utils.rb_type_to_dtype(dtype)
553
+ end
554
+ Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
555
+ end
556
+
524
557
  # Extract the first match of json string with provided JSONPath expression.
525
558
  #
526
559
  # Throw errors if encounter invalid json strings.
@@ -846,10 +879,10 @@ module Polars
846
879
  # # │ 1 ┆ 123ABC │
847
880
  # # │ 2 ┆ abc456 │
848
881
  # # └─────┴────────┘
849
- def replace(pattern, value, literal: false)
882
+ def replace(pattern, value, literal: false, n: 1)
850
883
  pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
851
884
  value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
852
- Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
885
+ Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
853
886
  end
854
887
 
855
888
  # Replace all matching regex/literal substrings with a new string value.
@@ -912,5 +945,78 @@ module Polars
912
945
  def slice(offset, length = nil)
913
946
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
914
947
  end
948
+
949
+ # Returns a column with a separate row for every string character.
950
+ #
951
+ # @return [Expr]
952
+ #
953
+ # @example
954
+ # df = Polars::DataFrame.new({"a": ["foo", "bar"]})
955
+ # df.select(Polars.col("a").str.explode)
956
+ # # =>
957
+ # # shape: (6, 1)
958
+ # # ┌─────┐
959
+ # # │ a │
960
+ # # │ --- │
961
+ # # │ str │
962
+ # # ╞═════╡
963
+ # # │ f │
964
+ # # │ o │
965
+ # # │ o │
966
+ # # │ b │
967
+ # # │ a │
968
+ # # │ r │
969
+ # # └─────┘
970
+ def explode
971
+ Utils.wrap_expr(_rbexpr.explode)
972
+ end
973
+
974
+ # Parse integers with base radix from strings.
975
+ #
976
+ # By default base 2. ParseError/Overflows become Nulls.
977
+ #
978
+ # @param radix [Integer]
979
+ # Positive integer which is the base of the string we are parsing.
980
+ # Default: 2.
981
+ # @param strict [Boolean]
982
+ # Bool, Default=true will raise any ParseError or overflow as ComputeError.
983
+ # False silently convert to Null.
984
+ #
985
+ # @return [Expr]
986
+ #
987
+ # @example
988
+ # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
989
+ # df.select(Polars.col("bin").str.parse_int(2, strict: false))
990
+ # # =>
991
+ # # shape: (4, 1)
992
+ # # ┌──────┐
993
+ # # │ bin │
994
+ # # │ --- │
995
+ # # │ i32 │
996
+ # # ╞══════╡
997
+ # # │ 6 │
998
+ # # │ 5 │
999
+ # # │ 2 │
1000
+ # # │ null │
1001
+ # # └──────┘
1002
+ #
1003
+ # @example
1004
+ # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1005
+ # df.select(Polars.col("hex").str.parse_int(16, strict: true))
1006
+ # # =>
1007
+ # # shape: (4, 1)
1008
+ # # ┌───────┐
1009
+ # # │ hex │
1010
+ # # │ --- │
1011
+ # # │ i32 │
1012
+ # # ╞═══════╡
1013
+ # # │ 64030 │
1014
+ # # │ 65280 │
1015
+ # # │ 51966 │
1016
+ # # │ null │
1017
+ # # └───────┘
1018
+ def parse_int(radix = 2, strict: true)
1019
+ Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1020
+ end
915
1021
  end
916
1022
  end
@@ -38,12 +38,12 @@ module Polars
38
38
  # )
39
39
  # s.to_frame.with_column(
40
40
  # Polars.col("date")
41
- # .str.strptime(:date, "%F", strict: false)
41
+ # .str.strptime(Polars::Date, "%F", strict: false)
42
42
  # .fill_null(
43
- # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
43
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
44
44
  # )
45
- # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
46
- # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
45
+ # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
46
+ # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
47
47
  # )
48
48
  # # =>
49
49
  # # shape: (4, 1)