polars-df 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +15 -7
  8. data/ext/polars/src/batched_csv.rs +4 -4
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
  12. data/ext/polars/src/dataframe.rs +69 -53
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/datetime.rs +22 -56
  15. data/ext/polars/src/expr/general.rs +61 -33
  16. data/ext/polars/src/expr/list.rs +52 -4
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +59 -8
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/functions/aggregation.rs +6 -0
  22. data/ext/polars/src/functions/lazy.rs +103 -48
  23. data/ext/polars/src/functions/meta.rs +45 -1
  24. data/ext/polars/src/functions/string_cache.rs +14 -0
  25. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
  26. data/ext/polars/src/lib.rs +226 -168
  27. data/ext/polars/src/series/aggregation.rs +20 -0
  28. data/ext/polars/src/series/mod.rs +25 -4
  29. data/lib/polars/array_expr.rb +449 -0
  30. data/lib/polars/array_name_space.rb +346 -0
  31. data/lib/polars/cat_expr.rb +24 -0
  32. data/lib/polars/cat_name_space.rb +75 -0
  33. data/lib/polars/config.rb +2 -2
  34. data/lib/polars/data_frame.rb +179 -43
  35. data/lib/polars/data_types.rb +191 -28
  36. data/lib/polars/date_time_expr.rb +31 -14
  37. data/lib/polars/exceptions.rb +12 -1
  38. data/lib/polars/expr.rb +866 -186
  39. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  40. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  41. data/lib/polars/functions/as_datatype.rb +248 -0
  42. data/lib/polars/functions/col.rb +47 -0
  43. data/lib/polars/functions/eager.rb +182 -0
  44. data/lib/polars/functions/lazy.rb +1280 -0
  45. data/lib/polars/functions/len.rb +49 -0
  46. data/lib/polars/functions/lit.rb +35 -0
  47. data/lib/polars/functions/random.rb +16 -0
  48. data/lib/polars/functions/range/date_range.rb +103 -0
  49. data/lib/polars/functions/range/int_range.rb +51 -0
  50. data/lib/polars/functions/repeat.rb +144 -0
  51. data/lib/polars/functions/whenthen.rb +27 -0
  52. data/lib/polars/functions.rb +29 -416
  53. data/lib/polars/group_by.rb +2 -2
  54. data/lib/polars/io.rb +18 -25
  55. data/lib/polars/lazy_frame.rb +367 -53
  56. data/lib/polars/list_expr.rb +152 -6
  57. data/lib/polars/list_name_space.rb +102 -0
  58. data/lib/polars/meta_expr.rb +175 -7
  59. data/lib/polars/series.rb +273 -34
  60. data/lib/polars/string_cache.rb +75 -0
  61. data/lib/polars/string_expr.rb +412 -96
  62. data/lib/polars/string_name_space.rb +4 -4
  63. data/lib/polars/testing.rb +507 -0
  64. data/lib/polars/utils.rb +52 -8
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars.rb +15 -2
  67. metadata +35 -5
  68. data/lib/polars/lazy_functions.rb +0 -1181
@@ -211,6 +211,49 @@ module Polars
211
211
  end
212
212
  end
213
213
 
214
+ # Convert a String column into a Decimal column.
215
+ #
216
+ # This method infers the needed parameters `precision` and `scale`.
217
+ #
218
+ # @param inference_length [Integer]
219
+ # Number of elements to parse to determine the `precision` and `scale`.
220
+ #
221
+ # @return [Expr]
222
+ #
223
+ # @example
224
+ # df = Polars::DataFrame.new(
225
+ # {
226
+ # "numbers": [
227
+ # "40.12",
228
+ # "3420.13",
229
+ # "120134.19",
230
+ # "3212.98",
231
+ # "12.90",
232
+ # "143.09",
233
+ # "143.9"
234
+ # ]
235
+ # }
236
+ # )
237
+ # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
238
+ # # =>
239
+ # # shape: (7, 2)
240
+ # # ┌───────────┬─────────────────┐
241
+ # # │ numbers ┆ numbers_decimal │
242
+ # # │ --- ┆ --- │
243
+ # # │ str ┆ decimal[*,2] │
244
+ # # ╞═══════════╪═════════════════╡
245
+ # # │ 40.12 ┆ 40.12 │
246
+ # # │ 3420.13 ┆ 3420.13 │
247
+ # # │ 120134.19 ┆ 120134.19 │
248
+ # # │ 3212.98 ┆ 3212.98 │
249
+ # # │ 12.90 ┆ 12.90 │
250
+ # # │ 143.09 ┆ 143.09 │
251
+ # # │ 143.9 ┆ 143.90 │
252
+ # # └───────────┴─────────────────┘
253
+ def to_decimal(inference_length = 100)
254
+ Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
255
+ end
256
+
214
257
  # Get length of the strings as `:u32` (as number of bytes).
215
258
  #
216
259
  # @return [Expr]
@@ -222,8 +265,8 @@ module Polars
222
265
  # @example
223
266
  # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
224
267
  # [
225
- # Polars.col("s").str.lengths.alias("length"),
226
- # Polars.col("s").str.n_chars.alias("nchars")
268
+ # Polars.col("s").str.len_bytes.alias("length"),
269
+ # Polars.col("s").str.len_chars.alias("nchars")
227
270
  # ]
228
271
  # )
229
272
  # df
@@ -239,9 +282,10 @@ module Polars
239
282
  # # │ 345 ┆ 3 ┆ 3 │
240
283
  # # │ 東京 ┆ 6 ┆ 2 │
241
284
  # # └──────┴────────┴────────┘
242
- def lengths
285
+ def len_bytes
243
286
  Utils.wrap_expr(_rbexpr.str_len_bytes)
244
287
  end
288
+ alias_method :lengths, :len_bytes
245
289
 
246
290
  # Get length of the strings as `:u32` (as number of chars).
247
291
  #
@@ -254,8 +298,8 @@ module Polars
254
298
  # @example
255
299
  # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
256
300
  # [
257
- # Polars.col("s").str.lengths.alias("length"),
258
- # Polars.col("s").str.n_chars.alias("nchars")
301
+ # Polars.col("s").str.len_bytes.alias("length"),
302
+ # Polars.col("s").str.len_chars.alias("nchars")
259
303
  # ]
260
304
  # )
261
305
  # df
@@ -271,9 +315,10 @@ module Polars
271
315
  # # │ 345 ┆ 3 ┆ 3 │
272
316
  # # │ 東京 ┆ 6 ┆ 2 │
273
317
  # # └──────┴────────┴────────┘
274
- def n_chars
318
+ def len_chars
275
319
  Utils.wrap_expr(_rbexpr.str_len_chars)
276
320
  end
321
+ alias_method :n_chars, :len_chars
277
322
 
278
323
  # Vertically concat the values in the Series to a single string value.
279
324
  #
@@ -355,6 +400,30 @@ module Polars
355
400
  Utils.wrap_expr(_rbexpr.str_to_lowercase)
356
401
  end
357
402
 
403
+ # Transform to titlecase variant.
404
+ #
405
+ # @return [Expr]
406
+ #
407
+ # @example
408
+ # df = Polars::DataFrame.new(
409
+ # {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}
410
+ # )
411
+ # df.with_columns(foo_title: Polars.col("sing").str.to_titlecase)
412
+ # # =>
413
+ # # shape: (2, 2)
414
+ # # ┌─────────────────────────┬─────────────────────────┐
415
+ # # │ sing ┆ foo_title │
416
+ # # │ --- ┆ --- │
417
+ # # │ str ┆ str │
418
+ # # ╞═════════════════════════╪═════════════════════════╡
419
+ # # │ welcome to my world ┆ Welcome To My World │
420
+ # # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
421
+ # # └─────────────────────────┴─────────────────────────┘
422
+ def to_titlecase
423
+ raise Todo
424
+ Utils.wrap_expr(_rbexpr.str_to_titlecase)
425
+ end
426
+
358
427
  # Remove leading and trailing whitespace.
359
428
  #
360
429
  # @param characters [String, nil]
@@ -436,113 +505,158 @@ module Polars
436
505
  end
437
506
  alias_method :rstrip, :strip_chars_end
438
507
 
439
- # Fills the string with zeroes.
508
+ # Remove prefix.
440
509
  #
441
- # Return a copy of the string left filled with ASCII '0' digits to make a string
442
- # of length width.
510
+ # The prefix will be removed from the string exactly once, if found.
443
511
  #
444
- # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
445
- # sign character rather than before. The original string is returned if width is
446
- # less than or equal to `s.length`.
512
+ # @param prefix [String]
513
+ # The prefix to be removed.
447
514
  #
448
- # @param alignment [Integer]
449
- # Fill the value up to this length
515
+ # @return [Expr]
516
+ #
517
+ # @example
518
+ # df = Polars::DataFrame.new({"a" => ["foobar", "foofoobar", "foo", "bar"]})
519
+ # df.with_columns(Polars.col("a").str.strip_prefix("foo").alias("stripped"))
520
+ # # =>
521
+ # # shape: (4, 2)
522
+ # # ┌───────────┬──────────┐
523
+ # # │ a ┆ stripped │
524
+ # # │ --- ┆ --- │
525
+ # # │ str ┆ str │
526
+ # # ╞═══════════╪══════════╡
527
+ # # │ foobar ┆ bar │
528
+ # # │ foofoobar ┆ foobar │
529
+ # # │ foo ┆ │
530
+ # # │ bar ┆ bar │
531
+ # # └───────────┴──────────┘
532
+ def strip_prefix(prefix)
533
+ prefix = Utils.parse_as_expression(prefix, str_as_lit: true)
534
+ Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
535
+ end
536
+
537
+ # Remove suffix.
538
+ #
539
+ # The suffix will be removed from the string exactly once, if found.
540
+ #
541
+ #
542
+ # @param suffix [String]
543
+ # The suffix to be removed.
450
544
  #
451
545
  # @return [Expr]
452
546
  #
453
547
  # @example
454
- # df = Polars::DataFrame.new(
455
- # {
456
- # "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
457
- # }
458
- # )
459
- # df.with_column(Polars.col("num").cast(String).str.zfill(5))
548
+ # df = Polars::DataFrame.new({"a" => ["foobar", "foobarbar", "foo", "bar"]})
549
+ # df.with_columns(Polars.col("a").str.strip_suffix("bar").alias("stripped"))
460
550
  # # =>
461
- # # shape: (11, 1)
462
- # # ┌─────────┐
463
- # # │ num
464
- # # │ ---
465
- # # │ str
466
- # # ╞═════════╡
467
- # # │ -0010
468
- # # │ -0001
469
- # # │ 00000
470
- # # │ 00001
471
- # # │ … │
472
- # # │ 10000 │
473
- # # 100000 │
474
- # # │ 1000000 │
475
- # # │ null │
476
- # # └─────────┘
477
- def zfill(alignment)
478
- Utils.wrap_expr(_rbexpr.str_zfill(alignment))
551
+ # # shape: (4, 2)
552
+ # # ┌───────────┬──────────┐
553
+ # # │ a ┆ stripped
554
+ # # │ --- ┆ ---
555
+ # # │ str ┆ str
556
+ # # ╞═══════════╪══════════╡
557
+ # # │ foobar ┆ foo
558
+ # # │ foobarbar ┆ foobar
559
+ # # │ foo ┆ foo
560
+ # # │ bar ┆
561
+ # # └───────────┴──────────┘
562
+ def strip_suffix(suffix)
563
+ suffix = Utils.parse_as_expression(suffix, str_as_lit: true)
564
+ Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
479
565
  end
480
566
 
481
- # Return the string left justified in a string of length `length`.
567
+ # Pad the start of the string until it reaches the given length.
568
+ #
569
+ # @param length [Integer]
570
+ # Pad the string until it reaches this length. Strings with length equal to
571
+ # or greater than this value are returned as-is.
572
+ # @param fill_char [String]
573
+ # The character to pad the string with.
482
574
  #
483
- # Padding is done using the specified `fillchar`.
484
- # The original string is returned if `length` is less than or equal to
485
- # `s.length`.
575
+ # @return [Expr]
576
+ #
577
+ # @example
578
+ # df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
579
+ # df.with_columns(padded: Polars.col("a").str.pad_start(8, "*"))
580
+ # # =>
581
+ # # shape: (4, 2)
582
+ # # ┌──────────────┬──────────────┐
583
+ # # │ a ┆ padded │
584
+ # # │ --- ┆ --- │
585
+ # # │ str ┆ str │
586
+ # # ╞══════════════╪══════════════╡
587
+ # # │ cow ┆ *****cow │
588
+ # # │ monkey ┆ **monkey │
589
+ # # │ hippopotamus ┆ hippopotamus │
590
+ # # │ null ┆ null │
591
+ # # └──────────────┴──────────────┘
592
+ def pad_start(length, fill_char = " ")
593
+ Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
594
+ end
595
+ alias_method :rjust, :pad_start
596
+
597
+ # Pad the end of the string until it reaches the given length.
486
598
  #
487
599
  # @param length [Integer]
488
- # Justify left to this length.
489
- # @param fillchar [String]
490
- # Fill with this ASCII character.
600
+ # Pad the string until it reaches this length. Strings with length equal to
601
+ # or greater than this value are returned as-is.
602
+ # @param fill_char [String]
603
+ # The character to pad the string with.
491
604
  #
492
605
  # @return [Expr]
493
606
  #
494
607
  # @example
495
- # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
496
- # df.select(Polars.col("a").str.ljust(8, "*"))
608
+ # df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
609
+ # df.with_columns(padded: Polars.col("a").str.pad_end(8, "*"))
497
610
  # # =>
498
- # # shape: (4, 1)
499
- # # ┌──────────────┐
500
- # # │ a │
501
- # # │ --- │
502
- # # │ str │
503
- # # ╞══════════════╡
504
- # # │ cow***** │
505
- # # │ monkey** │
506
- # # │ null
507
- # # │ hippopotamus
508
- # # └──────────────┘
509
- def ljust(length, fillchar = " ")
510
- Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
611
+ # # shape: (4, 2)
612
+ # # ┌──────────────┬──────────────┐
613
+ # # │ a ┆ padded
614
+ # # │ --- ┆ ---
615
+ # # │ str ┆ str
616
+ # # ╞══════════════╪══════════════╡
617
+ # # │ cow ┆ cow***** │
618
+ # # │ monkey ┆ monkey** │
619
+ # # │ hippopotamus ┆ hippopotamus
620
+ # # │ null ┆ null
621
+ # # └──────────────┴──────────────┘
622
+ def pad_end(length, fill_char = " ")
623
+ Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
511
624
  end
512
- alias_method :pad_end, :ljust
625
+ alias_method :ljust, :pad_end
513
626
 
514
- # Return the string right justified in a string of length `length`.
627
+ # Fills the string with zeroes.
515
628
  #
516
- # Padding is done using the specified `fillchar`.
517
- # The original string is returned if `length` is less than or equal to
518
- # `s.length`.
629
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
630
+ # of length width.
631
+ #
632
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
633
+ # sign character rather than before. The original string is returned if width is
634
+ # less than or equal to `s.length`.
519
635
  #
520
636
  # @param length [Integer]
521
- # Justify right to this length.
522
- # @param fillchar [String]
523
- # Fill with this ASCII character.
637
+ # Fill the value up to this length
524
638
  #
525
639
  # @return [Expr]
526
640
  #
527
641
  # @example
528
- # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
529
- # df.select(Polars.col("a").str.rjust(8, "*"))
642
+ # df = Polars::DataFrame.new({"a" => [-1, 123, 999999, nil]})
643
+ # df.with_columns(Polars.col("a").cast(Polars::String).str.zfill(4).alias("zfill"))
530
644
  # # =>
531
- # # shape: (4, 1)
532
- # # ┌──────────────┐
533
- # # │ a
534
- # # │ ---
535
- # # │ str
536
- # # ╞══════════════╡
537
- # # │ *****cow
538
- # # │ **monkey
539
- # # │ null
540
- # # │ hippopotamus
541
- # # └──────────────┘
542
- def rjust(length, fillchar = " ")
543
- Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
645
+ # # shape: (4, 2)
646
+ # # ┌────────┬────────┐
647
+ # # │ a ┆ zfill
648
+ # # │ --- ┆ ---
649
+ # # │ i64 ┆ str
650
+ # # ╞════════╪════════╡
651
+ # # │ -1 ┆ -001
652
+ # # │ 123 ┆ 0123
653
+ # # │ 999999 ┆ 999999
654
+ # # │ null ┆ null
655
+ # # └────────┴────────┘
656
+ def zfill(length)
657
+ length = Utils.parse_as_expression(length)
658
+ Utils.wrap_expr(_rbexpr.str_zfill(length))
544
659
  end
545
- alias_method :pad_start, :rjust
546
660
 
547
661
  # Check if string contains a substring that matches a regex.
548
662
  #
@@ -674,7 +788,7 @@ module Polars
674
788
  # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
675
789
  # )
676
790
  # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
677
- # df.select(Polars.col("json").str.json_extract(dtype))
791
+ # df.select(Polars.col("json").str.json_decode(dtype))
678
792
  # # =>
679
793
  # # shape: (3, 1)
680
794
  # # ┌─────────────┐
@@ -686,12 +800,13 @@ module Polars
686
800
  # # │ {null,null} │
687
801
  # # │ {2,false} │
688
802
  # # └─────────────┘
689
- def json_extract(dtype = nil, infer_schema_length: 100)
803
+ def json_decode(dtype = nil, infer_schema_length: 100)
690
804
  if !dtype.nil?
691
805
  dtype = Utils.rb_type_to_dtype(dtype)
692
806
  end
693
- Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
807
+ Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
694
808
  end
809
+ alias_method :json_extract, :json_decode
695
810
 
696
811
  # Extract the first match of json string with provided JSONPath expression.
697
812
  #
@@ -745,15 +860,15 @@ module Polars
745
860
  # df.select(Polars.col("encoded").str.decode("hex"))
746
861
  # # =>
747
862
  # # shape: (3, 1)
748
- # # ┌───────────────┐
749
- # # │ encoded
750
- # # │ ---
751
- # # │ binary
752
- # # ╞═══════════════╡
753
- # # │ [binary data]
754
- # # │ [binary data]
755
- # # │ null
756
- # # └───────────────┘
863
+ # # ┌─────────┐
864
+ # # │ encoded
865
+ # # │ ---
866
+ # # │ binary
867
+ # # ╞═════════╡
868
+ # # │ b"foo"
869
+ # # │ b"bar"
870
+ # # │ null
871
+ # # └─────────┘
757
872
  def decode(encoding, strict: true)
758
873
  if encoding == "hex"
759
874
  Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
@@ -824,6 +939,7 @@ module Polars
824
939
  # # │ 678 │
825
940
  # # └─────┘
826
941
  def extract(pattern, group_index: 1)
942
+ pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
827
943
  Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
828
944
  end
829
945
 
@@ -859,6 +975,62 @@ module Polars
859
975
  Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
860
976
  end
861
977
 
978
+ # Extract all capture groups for the given regex pattern.
979
+ #
980
+ # @param pattern [String]
981
+ # A valid regular expression pattern containing at least one capture group,
982
+ # compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
983
+ #
984
+ # @return [Expr]
985
+ #
986
+ # @example
987
+ # df = Polars::DataFrame.new(
988
+ # {
989
+ # "url": [
990
+ # "http://vote.com/ballon_dor?candidate=messi&ref=python",
991
+ # "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
992
+ # "http://vote.com/ballon_dor?error=404&ref=rust"
993
+ # ]
994
+ # }
995
+ # )
996
+ # pattern = /candidate=(?<candidate>\w+)&ref=(?<ref>\w+)/.to_s
997
+ # df.select(captures: Polars.col("url").str.extract_groups(pattern)).unnest(
998
+ # "captures"
999
+ # )
1000
+ # # =>
1001
+ # # shape: (3, 2)
1002
+ # # ┌───────────┬────────┐
1003
+ # # │ candidate ┆ ref │
1004
+ # # │ --- ┆ --- │
1005
+ # # │ str ┆ str │
1006
+ # # ╞═══════════╪════════╡
1007
+ # # │ messi ┆ python │
1008
+ # # │ weghorst ┆ polars │
1009
+ # # │ null ┆ null │
1010
+ # # └───────────┴────────┘
1011
+ #
1012
+ # @example Unnamed groups have their numerical position converted to a string:
1013
+ # pattern = /candidate=(\w+)&ref=(\w+)/.to_s
1014
+ # (
1015
+ # df.with_columns(
1016
+ # captures: Polars.col("url").str.extract_groups(pattern)
1017
+ # ).with_columns(name: Polars.col("captures").struct["1"].str.to_uppercase)
1018
+ # )
1019
+ # # =>
1020
+ # # shape: (3, 3)
1021
+ # # ┌───────────────────────────────────┬───────────────────────┬──────────┐
1022
+ # # │ url ┆ captures ┆ name │
1023
+ # # │ --- ┆ --- ┆ --- │
1024
+ # # │ str ┆ struct[2] ┆ str │
1025
+ # # ╞═══════════════════════════════════╪═══════════════════════╪══════════╡
1026
+ # # │ http://vote.com/ballon_dor?candi… ┆ {"messi","python"} ┆ MESSI │
1027
+ # # │ http://vote.com/ballon_dor?candi… ┆ {"weghorst","polars"} ┆ WEGHORST │
1028
+ # # │ http://vote.com/ballon_dor?error… ┆ {null,null} ┆ null │
1029
+ # # └───────────────────────────────────┴───────────────────────┴──────────┘
1030
+ def extract_groups(pattern)
1031
+ Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
1032
+ end
1033
+
862
1034
  # Count all successive non-overlapping regex matches.
863
1035
  #
864
1036
  # @param pattern [String]
@@ -1059,6 +1231,28 @@ module Polars
1059
1231
  Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
1060
1232
  end
1061
1233
 
1234
+ # Returns string values in reversed order.
1235
+ #
1236
+ # @return [Expr]
1237
+ #
1238
+ # @example
1239
+ # df = Polars::DataFrame.new({"text" => ["foo", "bar", "man\u0303ana"]})
1240
+ # df.with_columns(Polars.col("text").str.reverse.alias("reversed"))
1241
+ # # =>
1242
+ # # shape: (3, 2)
1243
+ # # ┌────────┬──────────┐
1244
+ # # │ text ┆ reversed │
1245
+ # # │ --- ┆ --- │
1246
+ # # │ str ┆ str │
1247
+ # # ╞════════╪══════════╡
1248
+ # # │ foo ┆ oof │
1249
+ # # │ bar ┆ rab │
1250
+ # # │ mañana ┆ anañam │
1251
+ # # └────────┴──────────┘
1252
+ def reverse
1253
+ Utils.wrap_expr(_rbexpr.str_reverse)
1254
+ end
1255
+
1062
1256
  # Create subslices of the string values of a Utf8 Series.
1063
1257
  #
1064
1258
  # @param offset [Integer]
@@ -1087,6 +1281,8 @@ module Polars
1087
1281
  # # │ dragonfruit ┆ uit │
1088
1282
  # # └─────────────┴──────────┘
1089
1283
  def slice(offset, length = nil)
1284
+ offset = Utils.parse_as_expression(offset)
1285
+ length = Utils.parse_as_expression(length)
1090
1286
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
1091
1287
  end
1092
1288
 
@@ -1193,6 +1389,126 @@ module Polars
1193
1389
  to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
1194
1390
  end
1195
1391
 
1392
+ # Use the aho-corasick algorithm to find matches.
1393
+ #
1394
+ # This version determines if any of the patterns find a match.
1395
+ #
1396
+ # @param patterns [String]
1397
+ # String patterns to search.
1398
+ # @param ascii_case_insensitive [Boolean]
1399
+ # Enable ASCII-aware case insensitive matching.
1400
+ # When this option is enabled, searching will be performed without respect
1401
+ # to case for ASCII letters (a-z and A-Z) only.
1402
+ #
1403
+ # @return [Expr]
1404
+ #
1405
+ # @example
1406
+ # df = Polars::DataFrame.new(
1407
+ # {
1408
+ # "lyrics": [
1409
+ # "Everybody wants to rule the world",
1410
+ # "Tell me what you want, what you really really want",
1411
+ # "Can you feel the love tonight"
1412
+ # ]
1413
+ # }
1414
+ # )
1415
+ # df.with_columns(
1416
+ # Polars.col("lyrics").str.contains_any(["you", "me"]).alias("contains_any")
1417
+ # )
1418
+ # # =>
1419
+ # # shape: (3, 2)
1420
+ # # ┌───────────────────────────────────┬──────────────┐
1421
+ # # │ lyrics ┆ contains_any │
1422
+ # # │ --- ┆ --- │
1423
+ # # │ str ┆ bool │
1424
+ # # ╞═══════════════════════════════════╪══════════════╡
1425
+ # # │ Everybody wants to rule the worl… ┆ false │
1426
+ # # │ Tell me what you want, what you … ┆ true │
1427
+ # # │ Can you feel the love tonight ┆ true │
1428
+ # # └───────────────────────────────────┴──────────────┘
1429
+ def contains_any(patterns, ascii_case_insensitive: false)
1430
+ patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1431
+ Utils.wrap_expr(
1432
+ _rbexpr.str_contains_any(patterns, ascii_case_insensitive)
1433
+ )
1434
+ end
1435
+
1436
+ # Use the aho-corasick algorithm to replace many matches.
1437
+ #
1438
+ # @param patterns [String]
1439
+ # String patterns to search and replace.
1440
+ # @param replace_with [String]
1441
+ # Strings to replace where a pattern was a match.
1442
+ # This can be broadcasted. So it supports many:one and many:many.
1443
+ # @param ascii_case_insensitive [Boolean]
1444
+ # Enable ASCII-aware case insensitive matching.
1445
+ # When this option is enabled, searching will be performed without respect
1446
+ # to case for ASCII letters (a-z and A-Z) only.
1447
+ #
1448
+ # @return [Expr]
1449
+ #
1450
+ # @example
1451
+ # df = Polars::DataFrame.new(
1452
+ # {
1453
+ # "lyrics": [
1454
+ # "Everybody wants to rule the world",
1455
+ # "Tell me what you want, what you really really want",
1456
+ # "Can you feel the love tonight"
1457
+ # ]
1458
+ # }
1459
+ # )
1460
+ # df.with_columns(
1461
+ # Polars.col("lyrics")
1462
+ # .str.replace_many(
1463
+ # ["me", "you", "they"],
1464
+ # ""
1465
+ # )
1466
+ # .alias("removes_pronouns")
1467
+ # )
1468
+ # # =>
1469
+ # # shape: (3, 2)
1470
+ # # ┌───────────────────────────────────┬───────────────────────────────────┐
1471
+ # # │ lyrics ┆ removes_pronouns │
1472
+ # # │ --- ┆ --- │
1473
+ # # │ str ┆ str │
1474
+ # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1475
+ # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1476
+ # # │ Tell me what you want, what you … ┆ Tell what want, what really r… │
1477
+ # # │ Can you feel the love tonight ┆ Can feel the love tonight │
1478
+ # # └───────────────────────────────────┴───────────────────────────────────┘
1479
+ #
1480
+ # @example
1481
+ # df.with_columns(
1482
+ # Polars.col("lyrics")
1483
+ # .str.replace_many(
1484
+ # ["me", "you"],
1485
+ # ["you", "me"]
1486
+ # )
1487
+ # .alias("confusing")
1488
+ # )
1489
+ # # =>
1490
+ # # shape: (3, 2)
1491
+ # # ┌───────────────────────────────────┬───────────────────────────────────┐
1492
+ # # │ lyrics ┆ confusing │
1493
+ # # │ --- ┆ --- │
1494
+ # # │ str ┆ str │
1495
+ # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1496
+ # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1497
+ # # │ Tell me what you want, what you … ┆ Tell you what me want, what me r… │
1498
+ # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1499
+ # # └───────────────────────────────────┴───────────────────────────────────┘
1500
+ def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1501
+ patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1502
+ replace_with = Utils.parse_as_expression(
1503
+ replace_with, str_as_lit: true, list_as_lit: false
1504
+ )
1505
+ Utils.wrap_expr(
1506
+ _rbexpr.str_replace_many(
1507
+ patterns, replace_with, ascii_case_insensitive
1508
+ )
1509
+ )
1510
+ end
1511
+
1196
1512
  private
1197
1513
 
1198
1514
  def _validate_format_argument(format)
@@ -353,8 +353,8 @@ module Polars
353
353
  # # shape: (3,)
354
354
  # # Series: '' [binary]
355
355
  # # [
356
- # # [binary data]
357
- # # [binary data]
356
+ # # b"foo"
357
+ # # b"bar"
358
358
  # # null
359
359
  # # ]
360
360
  def decode(encoding, strict: false)
@@ -690,11 +690,11 @@ module Polars
690
690
  # sign character rather than before. The original string is returned if width is
691
691
  # less than or equal to `s.length`.
692
692
  #
693
- # @param alignment [Integer]
693
+ # @param length [Integer]
694
694
  # Fill the value up to this length.
695
695
  #
696
696
  # @return [Series]
697
- def zfill(alignment)
697
+ def zfill(length)
698
698
  super
699
699
  end
700
700