polars-df 0.8.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1726 -754
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +179 -43
  17. data/lib/polars/data_types.rb +191 -28
  18. data/lib/polars/date_time_expr.rb +31 -14
  19. data/lib/polars/exceptions.rb +12 -1
  20. data/lib/polars/expr.rb +866 -186
  21. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  22. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  23. data/lib/polars/functions/as_datatype.rb +248 -0
  24. data/lib/polars/functions/col.rb +47 -0
  25. data/lib/polars/functions/eager.rb +182 -0
  26. data/lib/polars/functions/lazy.rb +1280 -0
  27. data/lib/polars/functions/len.rb +49 -0
  28. data/lib/polars/functions/lit.rb +35 -0
  29. data/lib/polars/functions/random.rb +16 -0
  30. data/lib/polars/functions/range/date_range.rb +103 -0
  31. data/lib/polars/functions/range/int_range.rb +51 -0
  32. data/lib/polars/functions/repeat.rb +144 -0
  33. data/lib/polars/functions/whenthen.rb +27 -0
  34. data/lib/polars/functions.rb +29 -416
  35. data/lib/polars/group_by.rb +2 -2
  36. data/lib/polars/io.rb +18 -25
  37. data/lib/polars/lazy_frame.rb +367 -53
  38. data/lib/polars/list_expr.rb +152 -6
  39. data/lib/polars/list_name_space.rb +102 -0
  40. data/lib/polars/meta_expr.rb +175 -7
  41. data/lib/polars/series.rb +273 -34
  42. data/lib/polars/string_cache.rb +75 -0
  43. data/lib/polars/string_expr.rb +412 -96
  44. data/lib/polars/string_name_space.rb +4 -4
  45. data/lib/polars/testing.rb +507 -0
  46. data/lib/polars/utils.rb +52 -8
  47. data/lib/polars/version.rb +1 -1
  48. data/lib/polars.rb +15 -2
  49. metadata +33 -4
  50. data/lib/polars/lazy_functions.rb +0 -1181
@@ -211,6 +211,49 @@ module Polars
211
211
  end
212
212
  end
213
213
 
214
+ # Convert a String column into a Decimal column.
215
+ #
216
+ # This method infers the needed parameters `precision` and `scale`.
217
+ #
218
+ # @param inference_length [Integer]
219
+ # Number of elements to parse to determine the `precision` and `scale`.
220
+ #
221
+ # @return [Expr]
222
+ #
223
+ # @example
224
+ # df = Polars::DataFrame.new(
225
+ # {
226
+ # "numbers": [
227
+ # "40.12",
228
+ # "3420.13",
229
+ # "120134.19",
230
+ # "3212.98",
231
+ # "12.90",
232
+ # "143.09",
233
+ # "143.9"
234
+ # ]
235
+ # }
236
+ # )
237
+ # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
238
+ # # =>
239
+ # # shape: (7, 2)
240
+ # # ┌───────────┬─────────────────┐
241
+ # # │ numbers ┆ numbers_decimal │
242
+ # # │ --- ┆ --- │
243
+ # # │ str ┆ decimal[*,2] │
244
+ # # ╞═══════════╪═════════════════╡
245
+ # # │ 40.12 ┆ 40.12 │
246
+ # # │ 3420.13 ┆ 3420.13 │
247
+ # # │ 120134.19 ┆ 120134.19 │
248
+ # # │ 3212.98 ┆ 3212.98 │
249
+ # # │ 12.90 ┆ 12.90 │
250
+ # # │ 143.09 ┆ 143.09 │
251
+ # # │ 143.9 ┆ 143.90 │
252
+ # # └───────────┴─────────────────┘
253
+ def to_decimal(inference_length = 100)
254
+ Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
255
+ end
256
+
214
257
  # Get length of the strings as `:u32` (as number of bytes).
215
258
  #
216
259
  # @return [Expr]
@@ -222,8 +265,8 @@ module Polars
222
265
  # @example
223
266
  # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
224
267
  # [
225
- # Polars.col("s").str.lengths.alias("length"),
226
- # Polars.col("s").str.n_chars.alias("nchars")
268
+ # Polars.col("s").str.len_bytes.alias("length"),
269
+ # Polars.col("s").str.len_chars.alias("nchars")
227
270
  # ]
228
271
  # )
229
272
  # df
@@ -239,9 +282,10 @@ module Polars
239
282
  # # │ 345 ┆ 3 ┆ 3 │
240
283
  # # │ 東京 ┆ 6 ┆ 2 │
241
284
  # # └──────┴────────┴────────┘
242
- def lengths
285
+ def len_bytes
243
286
  Utils.wrap_expr(_rbexpr.str_len_bytes)
244
287
  end
288
+ alias_method :lengths, :len_bytes
245
289
 
246
290
  # Get length of the strings as `:u32` (as number of chars).
247
291
  #
@@ -254,8 +298,8 @@ module Polars
254
298
  # @example
255
299
  # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
256
300
  # [
257
- # Polars.col("s").str.lengths.alias("length"),
258
- # Polars.col("s").str.n_chars.alias("nchars")
301
+ # Polars.col("s").str.len_bytes.alias("length"),
302
+ # Polars.col("s").str.len_chars.alias("nchars")
259
303
  # ]
260
304
  # )
261
305
  # df
@@ -271,9 +315,10 @@ module Polars
271
315
  # # │ 345 ┆ 3 ┆ 3 │
272
316
  # # │ 東京 ┆ 6 ┆ 2 │
273
317
  # # └──────┴────────┴────────┘
274
- def n_chars
318
+ def len_chars
275
319
  Utils.wrap_expr(_rbexpr.str_len_chars)
276
320
  end
321
+ alias_method :n_chars, :len_chars
277
322
 
278
323
  # Vertically concat the values in the Series to a single string value.
279
324
  #
@@ -355,6 +400,30 @@ module Polars
355
400
  Utils.wrap_expr(_rbexpr.str_to_lowercase)
356
401
  end
357
402
 
403
+ # Transform to titlecase variant.
404
+ #
405
+ # @return [Expr]
406
+ #
407
+ # @example
408
+ # df = Polars::DataFrame.new(
409
+ # {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}
410
+ # )
411
+ # df.with_columns(foo_title: Polars.col("sing").str.to_titlecase)
412
+ # # =>
413
+ # # shape: (2, 2)
414
+ # # ┌─────────────────────────┬─────────────────────────┐
415
+ # # │ sing ┆ foo_title │
416
+ # # │ --- ┆ --- │
417
+ # # │ str ┆ str │
418
+ # # ╞═════════════════════════╪═════════════════════════╡
419
+ # # │ welcome to my world ┆ Welcome To My World │
420
+ # # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
421
+ # # └─────────────────────────┴─────────────────────────┘
422
+ def to_titlecase
423
+ raise Todo
424
+ Utils.wrap_expr(_rbexpr.str_to_titlecase)
425
+ end
426
+
358
427
  # Remove leading and trailing whitespace.
359
428
  #
360
429
  # @param characters [String, nil]
@@ -436,113 +505,158 @@ module Polars
436
505
  end
437
506
  alias_method :rstrip, :strip_chars_end
438
507
 
439
- # Fills the string with zeroes.
508
+ # Remove prefix.
440
509
  #
441
- # Return a copy of the string left filled with ASCII '0' digits to make a string
442
- # of length width.
510
+ # The prefix will be removed from the string exactly once, if found.
443
511
  #
444
- # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
445
- # sign character rather than before. The original string is returned if width is
446
- # less than or equal to `s.length`.
512
+ # @param prefix [String]
513
+ # The prefix to be removed.
447
514
  #
448
- # @param alignment [Integer]
449
- # Fill the value up to this length
515
+ # @return [Expr]
516
+ #
517
+ # @example
518
+ # df = Polars::DataFrame.new({"a" => ["foobar", "foofoobar", "foo", "bar"]})
519
+ # df.with_columns(Polars.col("a").str.strip_prefix("foo").alias("stripped"))
520
+ # # =>
521
+ # # shape: (4, 2)
522
+ # # ┌───────────┬──────────┐
523
+ # # │ a ┆ stripped │
524
+ # # │ --- ┆ --- │
525
+ # # │ str ┆ str │
526
+ # # ╞═══════════╪══════════╡
527
+ # # │ foobar ┆ bar │
528
+ # # │ foofoobar ┆ foobar │
529
+ # # │ foo ┆ │
530
+ # # │ bar ┆ bar │
531
+ # # └───────────┴──────────┘
532
+ def strip_prefix(prefix)
533
+ prefix = Utils.parse_as_expression(prefix, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
535
+ end
536
+
537
+ # Remove suffix.
538
+ #
539
+ # The suffix will be removed from the string exactly once, if found.
540
+ #
541
+ #
542
+ # @param suffix [String]
543
+ # The suffix to be removed.
450
544
  #
451
545
  # @return [Expr]
452
546
  #
453
547
  # @example
454
- # df = Polars::DataFrame.new(
455
- # {
456
- # "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
457
- # }
458
- # )
459
- # df.with_column(Polars.col("num").cast(String).str.zfill(5))
548
+ # df = Polars::DataFrame.new({"a" => ["foobar", "foobarbar", "foo", "bar"]})
549
+ # df.with_columns(Polars.col("a").str.strip_suffix("bar").alias("stripped"))
460
550
  # # =>
461
- # # shape: (11, 1)
462
- # # ┌─────────┐
463
- # # │ num
464
- # # │ ---
465
- # # │ str
466
- # # ╞═════════╡
467
- # # │ -0010
468
- # # │ -0001
469
- # # │ 00000
470
- # # │ 00001
471
- # # │ … │
472
- # # │ 10000 │
473
- # # 100000 │
474
- # # │ 1000000 │
475
- # # │ null │
476
- # # └─────────┘
477
- def zfill(alignment)
478
- Utils.wrap_expr(_rbexpr.str_zfill(alignment))
551
+ # # shape: (4, 2)
552
+ # # ┌───────────┬──────────┐
553
+ # # │ a ┆ stripped
554
+ # # │ --- ┆ ---
555
+ # # │ str ┆ str
556
+ # # ╞═══════════╪══════════╡
557
+ # # │ foobar ┆ foo
558
+ # # │ foobarbar ┆ foobar
559
+ # # │ foo ┆ foo
560
+ # # │ bar ┆
561
+ # # └───────────┴──────────┘
562
+ def strip_suffix(suffix)
563
+ suffix = Utils.parse_as_expression(suffix, str_as_lit: true)
564
+ Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
479
565
  end
480
566
 
481
- # Return the string left justified in a string of length `length`.
567
+ # Pad the start of the string until it reaches the given length.
568
+ #
569
+ # @param length [Integer]
570
+ # Pad the string until it reaches this length. Strings with length equal to
571
+ # or greater than this value are returned as-is.
572
+ # @param fill_char [String]
573
+ # The character to pad the string with.
482
574
  #
483
- # Padding is done using the specified `fillchar`.
484
- # The original string is returned if `length` is less than or equal to
485
- # `s.length`.
575
+ # @return [Expr]
576
+ #
577
+ # @example
578
+ # df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
579
+ # df.with_columns(padded: Polars.col("a").str.pad_start(8, "*"))
580
+ # # =>
581
+ # # shape: (4, 2)
582
+ # # ┌──────────────┬──────────────┐
583
+ # # │ a ┆ padded │
584
+ # # │ --- ┆ --- │
585
+ # # │ str ┆ str │
586
+ # # ╞══════════════╪══════════════╡
587
+ # # │ cow ┆ *****cow │
588
+ # # │ monkey ┆ **monkey │
589
+ # # │ hippopotamus ┆ hippopotamus │
590
+ # # │ null ┆ null │
591
+ # # └──────────────┴──────────────┘
592
+ def pad_start(length, fill_char = " ")
593
+ Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
594
+ end
595
+ alias_method :rjust, :pad_start
596
+
597
+ # Pad the end of the string until it reaches the given length.
486
598
  #
487
599
  # @param length [Integer]
488
- # Justify left to this length.
489
- # @param fillchar [String]
490
- # Fill with this ASCII character.
600
+ # Pad the string until it reaches this length. Strings with length equal to
601
+ # or greater than this value are returned as-is.
602
+ # @param fill_char [String]
603
+ # The character to pad the string with.
491
604
  #
492
605
  # @return [Expr]
493
606
  #
494
607
  # @example
495
- # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
496
- # df.select(Polars.col("a").str.ljust(8, "*"))
608
+ # df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
609
+ # df.with_columns(padded: Polars.col("a").str.pad_end(8, "*"))
497
610
  # # =>
498
- # # shape: (4, 1)
499
- # # ┌──────────────┐
500
- # # │ a │
501
- # # │ --- │
502
- # # │ str │
503
- # # ╞══════════════╡
504
- # # │ cow***** │
505
- # # │ monkey** │
506
- # # │ null
507
- # # │ hippopotamus
508
- # # └──────────────┘
509
- def ljust(length, fillchar = " ")
510
- Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
611
+ # # shape: (4, 2)
612
+ # # ┌──────────────┬──────────────┐
613
+ # # │ a ┆ padded
614
+ # # │ --- ┆ ---
615
+ # # │ str ┆ str
616
+ # # ╞══════════════╪══════════════╡
617
+ # # │ cow ┆ cow***** │
618
+ # # │ monkey ┆ monkey** │
619
+ # # │ hippopotamus ┆ hippopotamus
620
+ # # │ null ┆ null
621
+ # # └──────────────┴──────────────┘
622
+ def pad_end(length, fill_char = " ")
623
+ Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
511
624
  end
512
- alias_method :pad_end, :ljust
625
+ alias_method :ljust, :pad_end
513
626
 
514
- # Return the string right justified in a string of length `length`.
627
+ # Fills the string with zeroes.
515
628
  #
516
- # Padding is done using the specified `fillchar`.
517
- # The original string is returned if `length` is less than or equal to
518
- # `s.length`.
629
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
630
+ # of length width.
631
+ #
632
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
633
+ # sign character rather than before. The original string is returned if width is
634
+ # less than or equal to `s.length`.
519
635
  #
520
636
  # @param length [Integer]
521
- # Justify right to this length.
522
- # @param fillchar [String]
523
- # Fill with this ASCII character.
637
+ # Fill the value up to this length
524
638
  #
525
639
  # @return [Expr]
526
640
  #
527
641
  # @example
528
- # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
529
- # df.select(Polars.col("a").str.rjust(8, "*"))
642
+ # df = Polars::DataFrame.new({"a" => [-1, 123, 999999, nil]})
643
+ # df.with_columns(Polars.col("a").cast(Polars::String).str.zfill(4).alias("zfill"))
530
644
  # # =>
531
- # # shape: (4, 1)
532
- # # ┌──────────────┐
533
- # # │ a
534
- # # │ ---
535
- # # │ str
536
- # # ╞══════════════╡
537
- # # │ *****cow
538
- # # │ **monkey
539
- # # │ null
540
- # # │ hippopotamus
541
- # # └──────────────┘
542
- def rjust(length, fillchar = " ")
543
- Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
645
+ # # shape: (4, 2)
646
+ # # ┌────────┬────────┐
647
+ # # │ a ┆ zfill
648
+ # # │ --- ┆ ---
649
+ # # │ i64 ┆ str
650
+ # # ╞════════╪════════╡
651
+ # # │ -1 ┆ -001
652
+ # # │ 123 ┆ 0123
653
+ # # │ 999999 ┆ 999999
654
+ # # │ null ┆ null
655
+ # # └────────┴────────┘
656
+ def zfill(length)
657
+ length = Utils.parse_as_expression(length)
658
+ Utils.wrap_expr(_rbexpr.str_zfill(length))
544
659
  end
545
- alias_method :pad_start, :rjust
546
660
 
547
661
  # Check if string contains a substring that matches a regex.
548
662
  #
@@ -674,7 +788,7 @@ module Polars
674
788
  # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
675
789
  # )
676
790
  # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
677
- # df.select(Polars.col("json").str.json_extract(dtype))
791
+ # df.select(Polars.col("json").str.json_decode(dtype))
678
792
  # # =>
679
793
  # # shape: (3, 1)
680
794
  # # ┌─────────────┐
@@ -686,12 +800,13 @@ module Polars
686
800
  # # │ {null,null} │
687
801
  # # │ {2,false} │
688
802
  # # └─────────────┘
689
- def json_extract(dtype = nil, infer_schema_length: 100)
803
+ def json_decode(dtype = nil, infer_schema_length: 100)
690
804
  if !dtype.nil?
691
805
  dtype = Utils.rb_type_to_dtype(dtype)
692
806
  end
693
- Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
807
+ Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
694
808
  end
809
+ alias_method :json_extract, :json_decode
695
810
 
696
811
  # Extract the first match of json string with provided JSONPath expression.
697
812
  #
@@ -745,15 +860,15 @@ module Polars
745
860
  # df.select(Polars.col("encoded").str.decode("hex"))
746
861
  # # =>
747
862
  # # shape: (3, 1)
748
- # # ┌───────────────┐
749
- # # │ encoded
750
- # # │ ---
751
- # # │ binary
752
- # # ╞═══════════════╡
753
- # # │ [binary data]
754
- # # │ [binary data]
755
- # # │ null
756
- # # └───────────────┘
863
+ # # ┌─────────┐
864
+ # # │ encoded
865
+ # # │ ---
866
+ # # │ binary
867
+ # # ╞═════════╡
868
+ # # │ b"foo"
869
+ # # │ b"bar"
870
+ # # │ null
871
+ # # └─────────┘
757
872
  def decode(encoding, strict: true)
758
873
  if encoding == "hex"
759
874
  Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
@@ -824,6 +939,7 @@ module Polars
824
939
  # # │ 678 │
825
940
  # # └─────┘
826
941
  def extract(pattern, group_index: 1)
942
+ pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
827
943
  Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
828
944
  end
829
945
 
@@ -859,6 +975,62 @@ module Polars
859
975
  Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
860
976
  end
861
977
 
978
+ # Extract all capture groups for the given regex pattern.
979
+ #
980
+ # @param pattern [String]
981
+ # A valid regular expression pattern containing at least one capture group,
982
+ # compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
983
+ #
984
+ # @return [Expr]
985
+ #
986
+ # @example
987
+ # df = Polars::DataFrame.new(
988
+ # {
989
+ # "url": [
990
+ # "http://vote.com/ballon_dor?candidate=messi&ref=python",
991
+ # "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
992
+ # "http://vote.com/ballon_dor?error=404&ref=rust"
993
+ # ]
994
+ # }
995
+ # )
996
+ # pattern = /candidate=(?<candidate>\w+)&ref=(?<ref>\w+)/.to_s
997
+ # df.select(captures: Polars.col("url").str.extract_groups(pattern)).unnest(
998
+ # "captures"
999
+ # )
1000
+ # # =>
1001
+ # # shape: (3, 2)
1002
+ # # ┌───────────┬────────┐
1003
+ # # │ candidate ┆ ref │
1004
+ # # │ --- ┆ --- │
1005
+ # # │ str ┆ str │
1006
+ # # ╞═══════════╪════════╡
1007
+ # # │ messi ┆ python │
1008
+ # # │ weghorst ┆ polars │
1009
+ # # │ null ┆ null │
1010
+ # # └───────────┴────────┘
1011
+ #
1012
+ # @example Unnamed groups have their numerical position converted to a string:
1013
+ # pattern = /candidate=(\w+)&ref=(\w+)/.to_s
1014
+ # (
1015
+ # df.with_columns(
1016
+ # captures: Polars.col("url").str.extract_groups(pattern)
1017
+ # ).with_columns(name: Polars.col("captures").struct["1"].str.to_uppercase)
1018
+ # )
1019
+ # # =>
1020
+ # # shape: (3, 3)
1021
+ # # ┌───────────────────────────────────┬───────────────────────┬──────────┐
1022
+ # # │ url ┆ captures ┆ name │
1023
+ # # │ --- ┆ --- ┆ --- │
1024
+ # # │ str ┆ struct[2] ┆ str │
1025
+ # # ╞═══════════════════════════════════╪═══════════════════════╪══════════╡
1026
+ # # │ http://vote.com/ballon_dor?candi… ┆ {"messi","python"} ┆ MESSI │
1027
+ # # │ http://vote.com/ballon_dor?candi… ┆ {"weghorst","polars"} ┆ WEGHORST │
1028
+ # # │ http://vote.com/ballon_dor?error… ┆ {null,null} ┆ null │
1029
+ # # └───────────────────────────────────┴───────────────────────┴──────────┘
1030
+ def extract_groups(pattern)
1031
+ Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
1032
+ end
1033
+
862
1034
  # Count all successive non-overlapping regex matches.
863
1035
  #
864
1036
  # @param pattern [String]
@@ -1059,6 +1231,28 @@ module Polars
1059
1231
  Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
1060
1232
  end
1061
1233
 
1234
+ # Returns string values in reversed order.
1235
+ #
1236
+ # @return [Expr]
1237
+ #
1238
+ # @example
1239
+ # df = Polars::DataFrame.new({"text" => ["foo", "bar", "man\u0303ana"]})
1240
+ # df.with_columns(Polars.col("text").str.reverse.alias("reversed"))
1241
+ # # =>
1242
+ # # shape: (3, 2)
1243
+ # # ┌────────┬──────────┐
1244
+ # # │ text ┆ reversed │
1245
+ # # │ --- ┆ --- │
1246
+ # # │ str ┆ str │
1247
+ # # ╞════════╪══════════╡
1248
+ # # │ foo ┆ oof │
1249
+ # # │ bar ┆ rab │
1250
+ # # │ mañana ┆ anañam │
1251
+ # # └────────┴──────────┘
1252
+ def reverse
1253
+ Utils.wrap_expr(_rbexpr.str_reverse)
1254
+ end
1255
+
1062
1256
  # Create subslices of the string values of a Utf8 Series.
1063
1257
  #
1064
1258
  # @param offset [Integer]
@@ -1087,6 +1281,8 @@ module Polars
1087
1281
  # # │ dragonfruit ┆ uit │
1088
1282
  # # └─────────────┴──────────┘
1089
1283
  def slice(offset, length = nil)
1284
+ offset = Utils.parse_as_expression(offset)
1285
+ length = Utils.parse_as_expression(length)
1090
1286
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
1091
1287
  end
1092
1288
 
@@ -1193,6 +1389,126 @@ module Polars
1193
1389
  to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
1194
1390
  end
1195
1391
 
1392
+ # Use the aho-corasick algorithm to find matches.
1393
+ #
1394
+ # This version determines if any of the patterns find a match.
1395
+ #
1396
+ # @param patterns [String]
1397
+ # String patterns to search.
1398
+ # @param ascii_case_insensitive [Boolean]
1399
+ # Enable ASCII-aware case insensitive matching.
1400
+ # When this option is enabled, searching will be performed without respect
1401
+ # to case for ASCII letters (a-z and A-Z) only.
1402
+ #
1403
+ # @return [Expr]
1404
+ #
1405
+ # @example
1406
+ # df = Polars::DataFrame.new(
1407
+ # {
1408
+ # "lyrics": [
1409
+ # "Everybody wants to rule the world",
1410
+ # "Tell me what you want, what you really really want",
1411
+ # "Can you feel the love tonight"
1412
+ # ]
1413
+ # }
1414
+ # )
1415
+ # df.with_columns(
1416
+ # Polars.col("lyrics").str.contains_any(["you", "me"]).alias("contains_any")
1417
+ # )
1418
+ # # =>
1419
+ # # shape: (3, 2)
1420
+ # # ┌───────────────────────────────────┬──────────────┐
1421
+ # # │ lyrics ┆ contains_any │
1422
+ # # │ --- ┆ --- │
1423
+ # # │ str ┆ bool │
1424
+ # # ╞═══════════════════════════════════╪══════════════╡
1425
+ # # │ Everybody wants to rule the worl… ┆ false │
1426
+ # # │ Tell me what you want, what you … ┆ true │
1427
+ # # │ Can you feel the love tonight ┆ true │
1428
+ # # └───────────────────────────────────┴──────────────┘
1429
+ def contains_any(patterns, ascii_case_insensitive: false)
1430
+ patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1431
+ Utils.wrap_expr(
1432
+ _rbexpr.str_contains_any(patterns, ascii_case_insensitive)
1433
+ )
1434
+ end
1435
+
1436
+ # Use the aho-corasick algorithm to replace many matches.
1437
+ #
1438
+ # @param patterns [String]
1439
+ # String patterns to search and replace.
1440
+ # @param replace_with [String]
1441
+ # Strings to replace where a pattern was a match.
1442
+ # This can be broadcasted. So it supports many:one and many:many.
1443
+ # @param ascii_case_insensitive [Boolean]
1444
+ # Enable ASCII-aware case insensitive matching.
1445
+ # When this option is enabled, searching will be performed without respect
1446
+ # to case for ASCII letters (a-z and A-Z) only.
1447
+ #
1448
+ # @return [Expr]
1449
+ #
1450
+ # @example
1451
+ # df = Polars::DataFrame.new(
1452
+ # {
1453
+ # "lyrics": [
1454
+ # "Everybody wants to rule the world",
1455
+ # "Tell me what you want, what you really really want",
1456
+ # "Can you feel the love tonight"
1457
+ # ]
1458
+ # }
1459
+ # )
1460
+ # df.with_columns(
1461
+ # Polars.col("lyrics")
1462
+ # .str.replace_many(
1463
+ # ["me", "you", "they"],
1464
+ # ""
1465
+ # )
1466
+ # .alias("removes_pronouns")
1467
+ # )
1468
+ # # =>
1469
+ # # shape: (3, 2)
1470
+ # # ┌───────────────────────────────────┬───────────────────────────────────┐
1471
+ # # │ lyrics ┆ removes_pronouns │
1472
+ # # │ --- ┆ --- │
1473
+ # # │ str ┆ str │
1474
+ # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1475
+ # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1476
+ # # │ Tell me what you want, what you … ┆ Tell what want, what really r… │
1477
+ # # │ Can you feel the love tonight ┆ Can feel the love tonight │
1478
+ # # └───────────────────────────────────┴───────────────────────────────────┘
1479
+ #
1480
+ # @example
1481
+ # df.with_columns(
1482
+ # Polars.col("lyrics")
1483
+ # .str.replace_many(
1484
+ # ["me", "you"],
1485
+ # ["you", "me"]
1486
+ # )
1487
+ # .alias("confusing")
1488
+ # )
1489
+ # # =>
1490
+ # # shape: (3, 2)
1491
+ # # ┌───────────────────────────────────┬───────────────────────────────────┐
1492
+ # # │ lyrics ┆ confusing │
1493
+ # # │ --- ┆ --- │
1494
+ # # │ str ┆ str │
1495
+ # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1496
+ # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1497
+ # # │ Tell me what you want, what you … ┆ Tell you what me want, what me r… │
1498
+ # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1499
+ # # └───────────────────────────────────┴───────────────────────────────────┘
1500
+ def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1501
+ patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1502
+ replace_with = Utils.parse_as_expression(
1503
+ replace_with, str_as_lit: true, list_as_lit: false
1504
+ )
1505
+ Utils.wrap_expr(
1506
+ _rbexpr.str_replace_many(
1507
+ patterns, replace_with, ascii_case_insensitive
1508
+ )
1509
+ )
1510
+ end
1511
+
1196
1512
  private
1197
1513
 
1198
1514
  def _validate_format_argument(format)
@@ -353,8 +353,8 @@ module Polars
353
353
  # # shape: (3,)
354
354
  # # Series: '' [binary]
355
355
  # # [
356
- # # [binary data]
357
- # # [binary data]
356
+ # # b"foo"
357
+ # # b"bar"
358
358
  # # null
359
359
  # # ]
360
360
  def decode(encoding, strict: false)
@@ -690,11 +690,11 @@ module Polars
690
690
  # sign character rather than before. The original string is returned if width is
691
691
  # less than or equal to `s.length`.
692
692
  #
693
- # @param alignment [Integer]
693
+ # @param length [Integer]
694
694
  # Fill the value up to this length.
695
695
  #
696
696
  # @return [Series]
697
- def zfill(alignment)
697
+ def zfill(length)
698
698
  super
699
699
  end
700
700