polars-df 0.7.0-arm64-darwin → 0.9.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +4014 -3495
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/string_expr.rb
CHANGED
@@ -211,6 +211,49 @@ module Polars
|
|
211
211
|
end
|
212
212
|
end
|
213
213
|
|
214
|
+
# Convert a String column into a Decimal column.
|
215
|
+
#
|
216
|
+
# This method infers the needed parameters `precision` and `scale`.
|
217
|
+
#
|
218
|
+
# @param inference_length [Integer]
|
219
|
+
# Number of elements to parse to determine the `precision` and `scale`.
|
220
|
+
#
|
221
|
+
# @return [Expr]
|
222
|
+
#
|
223
|
+
# @example
|
224
|
+
# df = Polars::DataFrame.new(
|
225
|
+
# {
|
226
|
+
# "numbers": [
|
227
|
+
# "40.12",
|
228
|
+
# "3420.13",
|
229
|
+
# "120134.19",
|
230
|
+
# "3212.98",
|
231
|
+
# "12.90",
|
232
|
+
# "143.09",
|
233
|
+
# "143.9"
|
234
|
+
# ]
|
235
|
+
# }
|
236
|
+
# )
|
237
|
+
# df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
|
238
|
+
# # =>
|
239
|
+
# # shape: (7, 2)
|
240
|
+
# # ┌───────────┬─────────────────┐
|
241
|
+
# # │ numbers ┆ numbers_decimal │
|
242
|
+
# # │ --- ┆ --- │
|
243
|
+
# # │ str ┆ decimal[*,2] │
|
244
|
+
# # ╞═══════════╪═════════════════╡
|
245
|
+
# # │ 40.12 ┆ 40.12 │
|
246
|
+
# # │ 3420.13 ┆ 3420.13 │
|
247
|
+
# # │ 120134.19 ┆ 120134.19 │
|
248
|
+
# # │ 3212.98 ┆ 3212.98 │
|
249
|
+
# # │ 12.90 ┆ 12.90 │
|
250
|
+
# # │ 143.09 ┆ 143.09 │
|
251
|
+
# # │ 143.9 ┆ 143.90 │
|
252
|
+
# # └───────────┴─────────────────┘
|
253
|
+
def to_decimal(inference_length = 100)
|
254
|
+
Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
|
255
|
+
end
|
256
|
+
|
214
257
|
# Get length of the strings as `:u32` (as number of bytes).
|
215
258
|
#
|
216
259
|
# @return [Expr]
|
@@ -222,8 +265,8 @@ module Polars
|
|
222
265
|
# @example
|
223
266
|
# df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
|
224
267
|
# [
|
225
|
-
# Polars.col("s").str.
|
226
|
-
# Polars.col("s").str.
|
268
|
+
# Polars.col("s").str.len_bytes.alias("length"),
|
269
|
+
# Polars.col("s").str.len_chars.alias("nchars")
|
227
270
|
# ]
|
228
271
|
# )
|
229
272
|
# df
|
@@ -239,9 +282,10 @@ module Polars
|
|
239
282
|
# # │ 345 ┆ 3 ┆ 3 │
|
240
283
|
# # │ 東京 ┆ 6 ┆ 2 │
|
241
284
|
# # └──────┴────────┴────────┘
|
242
|
-
def
|
285
|
+
def len_bytes
|
243
286
|
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
244
287
|
end
|
288
|
+
alias_method :lengths, :len_bytes
|
245
289
|
|
246
290
|
# Get length of the strings as `:u32` (as number of chars).
|
247
291
|
#
|
@@ -254,8 +298,8 @@ module Polars
|
|
254
298
|
# @example
|
255
299
|
# df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
|
256
300
|
# [
|
257
|
-
# Polars.col("s").str.
|
258
|
-
# Polars.col("s").str.
|
301
|
+
# Polars.col("s").str.len_bytes.alias("length"),
|
302
|
+
# Polars.col("s").str.len_chars.alias("nchars")
|
259
303
|
# ]
|
260
304
|
# )
|
261
305
|
# df
|
@@ -271,9 +315,10 @@ module Polars
|
|
271
315
|
# # │ 345 ┆ 3 ┆ 3 │
|
272
316
|
# # │ 東京 ┆ 6 ┆ 2 │
|
273
317
|
# # └──────┴────────┴────────┘
|
274
|
-
def
|
318
|
+
def len_chars
|
275
319
|
Utils.wrap_expr(_rbexpr.str_len_chars)
|
276
320
|
end
|
321
|
+
alias_method :n_chars, :len_chars
|
277
322
|
|
278
323
|
# Vertically concat the values in the Series to a single string value.
|
279
324
|
#
|
@@ -355,6 +400,30 @@ module Polars
|
|
355
400
|
Utils.wrap_expr(_rbexpr.str_to_lowercase)
|
356
401
|
end
|
357
402
|
|
403
|
+
# Transform to titlecase variant.
|
404
|
+
#
|
405
|
+
# @return [Expr]
|
406
|
+
#
|
407
|
+
# @example
|
408
|
+
# df = Polars::DataFrame.new(
|
409
|
+
# {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}
|
410
|
+
# )
|
411
|
+
# df.with_columns(foo_title: Polars.col("sing").str.to_titlecase)
|
412
|
+
# # =>
|
413
|
+
# # shape: (2, 2)
|
414
|
+
# # ┌─────────────────────────┬─────────────────────────┐
|
415
|
+
# # │ sing ┆ foo_title │
|
416
|
+
# # │ --- ┆ --- │
|
417
|
+
# # │ str ┆ str │
|
418
|
+
# # ╞═════════════════════════╪═════════════════════════╡
|
419
|
+
# # │ welcome to my world ┆ Welcome To My World │
|
420
|
+
# # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
|
421
|
+
# # └─────────────────────────┴─────────────────────────┘
|
422
|
+
def to_titlecase
|
423
|
+
raise Todo
|
424
|
+
Utils.wrap_expr(_rbexpr.str_to_titlecase)
|
425
|
+
end
|
426
|
+
|
358
427
|
# Remove leading and trailing whitespace.
|
359
428
|
#
|
360
429
|
# @param characters [String, nil]
|
@@ -436,113 +505,158 @@ module Polars
|
|
436
505
|
end
|
437
506
|
alias_method :rstrip, :strip_chars_end
|
438
507
|
|
439
|
-
#
|
508
|
+
# Remove prefix.
|
440
509
|
#
|
441
|
-
#
|
442
|
-
# of length width.
|
510
|
+
# The prefix will be removed from the string exactly once, if found.
|
443
511
|
#
|
444
|
-
#
|
445
|
-
#
|
446
|
-
# less than or equal to `s.length`.
|
512
|
+
# @param prefix [String]
|
513
|
+
# The prefix to be removed.
|
447
514
|
#
|
448
|
-
# @
|
449
|
-
#
|
515
|
+
# @return [Expr]
|
516
|
+
#
|
517
|
+
# @example
|
518
|
+
# df = Polars::DataFrame.new({"a" => ["foobar", "foofoobar", "foo", "bar"]})
|
519
|
+
# df.with_columns(Polars.col("a").str.strip_prefix("foo").alias("stripped"))
|
520
|
+
# # =>
|
521
|
+
# # shape: (4, 2)
|
522
|
+
# # ┌───────────┬──────────┐
|
523
|
+
# # │ a ┆ stripped │
|
524
|
+
# # │ --- ┆ --- │
|
525
|
+
# # │ str ┆ str │
|
526
|
+
# # ╞═══════════╪══════════╡
|
527
|
+
# # │ foobar ┆ bar │
|
528
|
+
# # │ foofoobar ┆ foobar │
|
529
|
+
# # │ foo ┆ │
|
530
|
+
# # │ bar ┆ bar │
|
531
|
+
# # └───────────┴──────────┘
|
532
|
+
def strip_prefix(prefix)
|
533
|
+
prefix = Utils.parse_as_expression(prefix, str_as_lit: true)
|
534
|
+
Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
|
535
|
+
end
|
536
|
+
|
537
|
+
# Remove suffix.
|
538
|
+
#
|
539
|
+
# The suffix will be removed from the string exactly once, if found.
|
540
|
+
#
|
541
|
+
#
|
542
|
+
# @param suffix [String]
|
543
|
+
# The suffix to be removed.
|
450
544
|
#
|
451
545
|
# @return [Expr]
|
452
546
|
#
|
453
547
|
# @example
|
454
|
-
# df = Polars::DataFrame.new(
|
455
|
-
#
|
456
|
-
# "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
|
457
|
-
# }
|
458
|
-
# )
|
459
|
-
# df.with_column(Polars.col("num").cast(String).str.zfill(5))
|
548
|
+
# df = Polars::DataFrame.new({"a" => ["foobar", "foobarbar", "foo", "bar"]})
|
549
|
+
# df.with_columns(Polars.col("a").str.strip_suffix("bar").alias("stripped"))
|
460
550
|
# # =>
|
461
|
-
# # shape: (
|
462
|
-
# #
|
463
|
-
# # │
|
464
|
-
# # │ ---
|
465
|
-
# # │ str
|
466
|
-
# #
|
467
|
-
# # │
|
468
|
-
# # │
|
469
|
-
# # │
|
470
|
-
# # │
|
471
|
-
# #
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
# # │ null │
|
476
|
-
# # └─────────┘
|
477
|
-
def zfill(alignment)
|
478
|
-
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
551
|
+
# # shape: (4, 2)
|
552
|
+
# # ┌───────────┬──────────┐
|
553
|
+
# # │ a ┆ stripped │
|
554
|
+
# # │ --- ┆ --- │
|
555
|
+
# # │ str ┆ str │
|
556
|
+
# # ╞═══════════╪══════════╡
|
557
|
+
# # │ foobar ┆ foo │
|
558
|
+
# # │ foobarbar ┆ foobar │
|
559
|
+
# # │ foo ┆ foo │
|
560
|
+
# # │ bar ┆ │
|
561
|
+
# # └───────────┴──────────┘
|
562
|
+
def strip_suffix(suffix)
|
563
|
+
suffix = Utils.parse_as_expression(suffix, str_as_lit: true)
|
564
|
+
Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
|
479
565
|
end
|
480
566
|
|
481
|
-
#
|
567
|
+
# Pad the start of the string until it reaches the given length.
|
568
|
+
#
|
569
|
+
# @param length [Integer]
|
570
|
+
# Pad the string until it reaches this length. Strings with length equal to
|
571
|
+
# or greater than this value are returned as-is.
|
572
|
+
# @param fill_char [String]
|
573
|
+
# The character to pad the string with.
|
482
574
|
#
|
483
|
-
#
|
484
|
-
#
|
485
|
-
#
|
575
|
+
# @return [Expr]
|
576
|
+
#
|
577
|
+
# @example
|
578
|
+
# df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
|
579
|
+
# df.with_columns(padded: Polars.col("a").str.pad_start(8, "*"))
|
580
|
+
# # =>
|
581
|
+
# # shape: (4, 2)
|
582
|
+
# # ┌──────────────┬──────────────┐
|
583
|
+
# # │ a ┆ padded │
|
584
|
+
# # │ --- ┆ --- │
|
585
|
+
# # │ str ┆ str │
|
586
|
+
# # ╞══════════════╪══════════════╡
|
587
|
+
# # │ cow ┆ *****cow │
|
588
|
+
# # │ monkey ┆ **monkey │
|
589
|
+
# # │ hippopotamus ┆ hippopotamus │
|
590
|
+
# # │ null ┆ null │
|
591
|
+
# # └──────────────┴──────────────┘
|
592
|
+
def pad_start(length, fill_char = " ")
|
593
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
|
594
|
+
end
|
595
|
+
alias_method :rjust, :pad_start
|
596
|
+
|
597
|
+
# Pad the end of the string until it reaches the given length.
|
486
598
|
#
|
487
599
|
# @param length [Integer]
|
488
|
-
#
|
489
|
-
#
|
490
|
-
#
|
600
|
+
# Pad the string until it reaches this length. Strings with length equal to
|
601
|
+
# or greater than this value are returned as-is.
|
602
|
+
# @param fill_char [String]
|
603
|
+
# The character to pad the string with.
|
491
604
|
#
|
492
605
|
# @return [Expr]
|
493
606
|
#
|
494
607
|
# @example
|
495
|
-
# df = Polars::DataFrame.new({"a"
|
496
|
-
# df.
|
608
|
+
# df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
|
609
|
+
# df.with_columns(padded: Polars.col("a").str.pad_end(8, "*"))
|
497
610
|
# # =>
|
498
|
-
# # shape: (4,
|
499
|
-
# #
|
500
|
-
# # │ a │
|
501
|
-
# # │ --- │
|
502
|
-
# # │ str │
|
503
|
-
# #
|
504
|
-
# # │ cow***** │
|
505
|
-
# # │ monkey** │
|
506
|
-
# # │
|
507
|
-
# # │
|
508
|
-
# #
|
509
|
-
def
|
510
|
-
Utils.wrap_expr(_rbexpr.str_pad_end(length,
|
611
|
+
# # shape: (4, 2)
|
612
|
+
# # ┌──────────────┬──────────────┐
|
613
|
+
# # │ a ┆ padded │
|
614
|
+
# # │ --- ┆ --- │
|
615
|
+
# # │ str ┆ str │
|
616
|
+
# # ╞══════════════╪══════════════╡
|
617
|
+
# # │ cow ┆ cow***** │
|
618
|
+
# # │ monkey ┆ monkey** │
|
619
|
+
# # │ hippopotamus ┆ hippopotamus │
|
620
|
+
# # │ null ┆ null │
|
621
|
+
# # └──────────────┴──────────────┘
|
622
|
+
def pad_end(length, fill_char = " ")
|
623
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
|
511
624
|
end
|
512
|
-
alias_method :
|
625
|
+
alias_method :ljust, :pad_end
|
513
626
|
|
514
|
-
#
|
627
|
+
# Fills the string with zeroes.
|
515
628
|
#
|
516
|
-
#
|
517
|
-
#
|
518
|
-
#
|
629
|
+
# Return a copy of the string left filled with ASCII '0' digits to make a string
|
630
|
+
# of length width.
|
631
|
+
#
|
632
|
+
# A leading sign prefix ('+'/'-') is handled by inserting the padding after the
|
633
|
+
# sign character rather than before. The original string is returned if width is
|
634
|
+
# less than or equal to `s.length`.
|
519
635
|
#
|
520
636
|
# @param length [Integer]
|
521
|
-
#
|
522
|
-
# @param fillchar [String]
|
523
|
-
# Fill with this ASCII character.
|
637
|
+
# Fill the value up to this length
|
524
638
|
#
|
525
639
|
# @return [Expr]
|
526
640
|
#
|
527
641
|
# @example
|
528
|
-
# df = Polars::DataFrame.new({"a" => [
|
529
|
-
# df.
|
642
|
+
# df = Polars::DataFrame.new({"a" => [-1, 123, 999999, nil]})
|
643
|
+
# df.with_columns(Polars.col("a").cast(Polars::String).str.zfill(4).alias("zfill"))
|
530
644
|
# # =>
|
531
|
-
# # shape: (4,
|
532
|
-
# #
|
533
|
-
# # │ a
|
534
|
-
# # │ ---
|
535
|
-
# # │ str
|
536
|
-
# #
|
537
|
-
# # │
|
538
|
-
# # │
|
539
|
-
# # │
|
540
|
-
# # │
|
541
|
-
# #
|
542
|
-
def
|
543
|
-
Utils.
|
645
|
+
# # shape: (4, 2)
|
646
|
+
# # ┌────────┬────────┐
|
647
|
+
# # │ a ┆ zfill │
|
648
|
+
# # │ --- ┆ --- │
|
649
|
+
# # │ i64 ┆ str │
|
650
|
+
# # ╞════════╪════════╡
|
651
|
+
# # │ -1 ┆ -001 │
|
652
|
+
# # │ 123 ┆ 0123 │
|
653
|
+
# # │ 999999 ┆ 999999 │
|
654
|
+
# # │ null ┆ null │
|
655
|
+
# # └────────┴────────┘
|
656
|
+
def zfill(length)
|
657
|
+
length = Utils.parse_as_expression(length)
|
658
|
+
Utils.wrap_expr(_rbexpr.str_zfill(length))
|
544
659
|
end
|
545
|
-
alias_method :pad_start, :rjust
|
546
660
|
|
547
661
|
# Check if string contains a substring that matches a regex.
|
548
662
|
#
|
@@ -674,7 +788,7 @@ module Polars
|
|
674
788
|
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
675
789
|
# )
|
676
790
|
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
677
|
-
# df.select(Polars.col("json").str.
|
791
|
+
# df.select(Polars.col("json").str.json_decode(dtype))
|
678
792
|
# # =>
|
679
793
|
# # shape: (3, 1)
|
680
794
|
# # ┌─────────────┐
|
@@ -686,12 +800,13 @@ module Polars
|
|
686
800
|
# # │ {null,null} │
|
687
801
|
# # │ {2,false} │
|
688
802
|
# # └─────────────┘
|
689
|
-
def
|
803
|
+
def json_decode(dtype = nil, infer_schema_length: 100)
|
690
804
|
if !dtype.nil?
|
691
805
|
dtype = Utils.rb_type_to_dtype(dtype)
|
692
806
|
end
|
693
|
-
Utils.wrap_expr(_rbexpr.
|
807
|
+
Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
|
694
808
|
end
|
809
|
+
alias_method :json_extract, :json_decode
|
695
810
|
|
696
811
|
# Extract the first match of json string with provided JSONPath expression.
|
697
812
|
#
|
@@ -745,15 +860,15 @@ module Polars
|
|
745
860
|
# df.select(Polars.col("encoded").str.decode("hex"))
|
746
861
|
# # =>
|
747
862
|
# # shape: (3, 1)
|
748
|
-
# #
|
749
|
-
# # │ encoded
|
750
|
-
# # │ ---
|
751
|
-
# # │ binary
|
752
|
-
# #
|
753
|
-
# # │
|
754
|
-
# # │
|
755
|
-
# # │ null
|
756
|
-
# #
|
863
|
+
# # ┌─────────┐
|
864
|
+
# # │ encoded │
|
865
|
+
# # │ --- │
|
866
|
+
# # │ binary │
|
867
|
+
# # ╞═════════╡
|
868
|
+
# # │ b"foo" │
|
869
|
+
# # │ b"bar" │
|
870
|
+
# # │ null │
|
871
|
+
# # └─────────┘
|
757
872
|
def decode(encoding, strict: true)
|
758
873
|
if encoding == "hex"
|
759
874
|
Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
|
@@ -824,6 +939,7 @@ module Polars
|
|
824
939
|
# # │ 678 │
|
825
940
|
# # └─────┘
|
826
941
|
def extract(pattern, group_index: 1)
|
942
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
827
943
|
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
828
944
|
end
|
829
945
|
|
@@ -859,6 +975,62 @@ module Polars
|
|
859
975
|
Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
|
860
976
|
end
|
861
977
|
|
978
|
+
# Extract all capture groups for the given regex pattern.
|
979
|
+
#
|
980
|
+
# @param pattern [String]
|
981
|
+
# A valid regular expression pattern containing at least one capture group,
|
982
|
+
# compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
983
|
+
#
|
984
|
+
# @return [Expr]
|
985
|
+
#
|
986
|
+
# @example
|
987
|
+
# df = Polars::DataFrame.new(
|
988
|
+
# {
|
989
|
+
# "url": [
|
990
|
+
# "http://vote.com/ballon_dor?candidate=messi&ref=python",
|
991
|
+
# "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
|
992
|
+
# "http://vote.com/ballon_dor?error=404&ref=rust"
|
993
|
+
# ]
|
994
|
+
# }
|
995
|
+
# )
|
996
|
+
# pattern = /candidate=(?<candidate>\w+)&ref=(?<ref>\w+)/.to_s
|
997
|
+
# df.select(captures: Polars.col("url").str.extract_groups(pattern)).unnest(
|
998
|
+
# "captures"
|
999
|
+
# )
|
1000
|
+
# # =>
|
1001
|
+
# # shape: (3, 2)
|
1002
|
+
# # ┌───────────┬────────┐
|
1003
|
+
# # │ candidate ┆ ref │
|
1004
|
+
# # │ --- ┆ --- │
|
1005
|
+
# # │ str ┆ str │
|
1006
|
+
# # ╞═══════════╪════════╡
|
1007
|
+
# # │ messi ┆ python │
|
1008
|
+
# # │ weghorst ┆ polars │
|
1009
|
+
# # │ null ┆ null │
|
1010
|
+
# # └───────────┴────────┘
|
1011
|
+
#
|
1012
|
+
# @example Unnamed groups have their numerical position converted to a string:
|
1013
|
+
# pattern = /candidate=(\w+)&ref=(\w+)/.to_s
|
1014
|
+
# (
|
1015
|
+
# df.with_columns(
|
1016
|
+
# captures: Polars.col("url").str.extract_groups(pattern)
|
1017
|
+
# ).with_columns(name: Polars.col("captures").struct["1"].str.to_uppercase)
|
1018
|
+
# )
|
1019
|
+
# # =>
|
1020
|
+
# # shape: (3, 3)
|
1021
|
+
# # ┌───────────────────────────────────┬───────────────────────┬──────────┐
|
1022
|
+
# # │ url ┆ captures ┆ name │
|
1023
|
+
# # │ --- ┆ --- ┆ --- │
|
1024
|
+
# # │ str ┆ struct[2] ┆ str │
|
1025
|
+
# # ╞═══════════════════════════════════╪═══════════════════════╪══════════╡
|
1026
|
+
# # │ http://vote.com/ballon_dor?candi… ┆ {"messi","python"} ┆ MESSI │
|
1027
|
+
# # │ http://vote.com/ballon_dor?candi… ┆ {"weghorst","polars"} ┆ WEGHORST │
|
1028
|
+
# # │ http://vote.com/ballon_dor?error… ┆ {null,null} ┆ null │
|
1029
|
+
# # └───────────────────────────────────┴───────────────────────┴──────────┘
|
1030
|
+
def extract_groups(pattern)
|
1031
|
+
Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
|
1032
|
+
end
|
1033
|
+
|
862
1034
|
# Count all successive non-overlapping regex matches.
|
863
1035
|
#
|
864
1036
|
# @param pattern [String]
|
@@ -1059,6 +1231,28 @@ module Polars
|
|
1059
1231
|
Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
|
1060
1232
|
end
|
1061
1233
|
|
1234
|
+
# Returns string values in reversed order.
|
1235
|
+
#
|
1236
|
+
# @return [Expr]
|
1237
|
+
#
|
1238
|
+
# @example
|
1239
|
+
# df = Polars::DataFrame.new({"text" => ["foo", "bar", "man\u0303ana"]})
|
1240
|
+
# df.with_columns(Polars.col("text").str.reverse.alias("reversed"))
|
1241
|
+
# # =>
|
1242
|
+
# # shape: (3, 2)
|
1243
|
+
# # ┌────────┬──────────┐
|
1244
|
+
# # │ text ┆ reversed │
|
1245
|
+
# # │ --- ┆ --- │
|
1246
|
+
# # │ str ┆ str │
|
1247
|
+
# # ╞════════╪══════════╡
|
1248
|
+
# # │ foo ┆ oof │
|
1249
|
+
# # │ bar ┆ rab │
|
1250
|
+
# # │ mañana ┆ anañam │
|
1251
|
+
# # └────────┴──────────┘
|
1252
|
+
def reverse
|
1253
|
+
Utils.wrap_expr(_rbexpr.str_reverse)
|
1254
|
+
end
|
1255
|
+
|
1062
1256
|
# Create subslices of the string values of a Utf8 Series.
|
1063
1257
|
#
|
1064
1258
|
# @param offset [Integer]
|
@@ -1087,6 +1281,8 @@ module Polars
|
|
1087
1281
|
# # │ dragonfruit ┆ uit │
|
1088
1282
|
# # └─────────────┴──────────┘
|
1089
1283
|
def slice(offset, length = nil)
|
1284
|
+
offset = Utils.parse_as_expression(offset)
|
1285
|
+
length = Utils.parse_as_expression(length)
|
1090
1286
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
1091
1287
|
end
|
1092
1288
|
|
@@ -1193,6 +1389,126 @@ module Polars
|
|
1193
1389
|
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1194
1390
|
end
|
1195
1391
|
|
1392
|
+
# Use the aho-corasick algorithm to find matches.
|
1393
|
+
#
|
1394
|
+
# This version determines if any of the patterns find a match.
|
1395
|
+
#
|
1396
|
+
# @param patterns [String]
|
1397
|
+
# String patterns to search.
|
1398
|
+
# @param ascii_case_insensitive [Boolean]
|
1399
|
+
# Enable ASCII-aware case insensitive matching.
|
1400
|
+
# When this option is enabled, searching will be performed without respect
|
1401
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1402
|
+
#
|
1403
|
+
# @return [Expr]
|
1404
|
+
#
|
1405
|
+
# @example
|
1406
|
+
# df = Polars::DataFrame.new(
|
1407
|
+
# {
|
1408
|
+
# "lyrics": [
|
1409
|
+
# "Everybody wants to rule the world",
|
1410
|
+
# "Tell me what you want, what you really really want",
|
1411
|
+
# "Can you feel the love tonight"
|
1412
|
+
# ]
|
1413
|
+
# }
|
1414
|
+
# )
|
1415
|
+
# df.with_columns(
|
1416
|
+
# Polars.col("lyrics").str.contains_any(["you", "me"]).alias("contains_any")
|
1417
|
+
# )
|
1418
|
+
# # =>
|
1419
|
+
# # shape: (3, 2)
|
1420
|
+
# # ┌───────────────────────────────────┬──────────────┐
|
1421
|
+
# # │ lyrics ┆ contains_any │
|
1422
|
+
# # │ --- ┆ --- │
|
1423
|
+
# # │ str ┆ bool │
|
1424
|
+
# # ╞═══════════════════════════════════╪══════════════╡
|
1425
|
+
# # │ Everybody wants to rule the worl… ┆ false │
|
1426
|
+
# # │ Tell me what you want, what you … ┆ true │
|
1427
|
+
# # │ Can you feel the love tonight ┆ true │
|
1428
|
+
# # └───────────────────────────────────┴──────────────┘
|
1429
|
+
def contains_any(patterns, ascii_case_insensitive: false)
|
1430
|
+
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1431
|
+
Utils.wrap_expr(
|
1432
|
+
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1433
|
+
)
|
1434
|
+
end
|
1435
|
+
|
1436
|
+
# Use the aho-corasick algorithm to replace many matches.
|
1437
|
+
#
|
1438
|
+
# @param patterns [String]
|
1439
|
+
# String patterns to search and replace.
|
1440
|
+
# @param replace_with [String]
|
1441
|
+
# Strings to replace where a pattern was a match.
|
1442
|
+
# This can be broadcasted. So it supports many:one and many:many.
|
1443
|
+
# @param ascii_case_insensitive [Boolean]
|
1444
|
+
# Enable ASCII-aware case insensitive matching.
|
1445
|
+
# When this option is enabled, searching will be performed without respect
|
1446
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1447
|
+
#
|
1448
|
+
# @return [Expr]
|
1449
|
+
#
|
1450
|
+
# @example
|
1451
|
+
# df = Polars::DataFrame.new(
|
1452
|
+
# {
|
1453
|
+
# "lyrics": [
|
1454
|
+
# "Everybody wants to rule the world",
|
1455
|
+
# "Tell me what you want, what you really really want",
|
1456
|
+
# "Can you feel the love tonight"
|
1457
|
+
# ]
|
1458
|
+
# }
|
1459
|
+
# )
|
1460
|
+
# df.with_columns(
|
1461
|
+
# Polars.col("lyrics")
|
1462
|
+
# .str.replace_many(
|
1463
|
+
# ["me", "you", "they"],
|
1464
|
+
# ""
|
1465
|
+
# )
|
1466
|
+
# .alias("removes_pronouns")
|
1467
|
+
# )
|
1468
|
+
# # =>
|
1469
|
+
# # shape: (3, 2)
|
1470
|
+
# # ┌───────────────────────────────────┬───────────────────────────────────┐
|
1471
|
+
# # │ lyrics ┆ removes_pronouns │
|
1472
|
+
# # │ --- ┆ --- │
|
1473
|
+
# # │ str ┆ str │
|
1474
|
+
# # ╞═══════════════════════════════════╪═══════════════════════════════════╡
|
1475
|
+
# # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
|
1476
|
+
# # │ Tell me what you want, what you … ┆ Tell what want, what really r… │
|
1477
|
+
# # │ Can you feel the love tonight ┆ Can feel the love tonight │
|
1478
|
+
# # └───────────────────────────────────┴───────────────────────────────────┘
|
1479
|
+
#
|
1480
|
+
# @example
|
1481
|
+
# df.with_columns(
|
1482
|
+
# Polars.col("lyrics")
|
1483
|
+
# .str.replace_many(
|
1484
|
+
# ["me", "you"],
|
1485
|
+
# ["you", "me"]
|
1486
|
+
# )
|
1487
|
+
# .alias("confusing")
|
1488
|
+
# )
|
1489
|
+
# # =>
|
1490
|
+
# # shape: (3, 2)
|
1491
|
+
# # ┌───────────────────────────────────┬───────────────────────────────────┐
|
1492
|
+
# # │ lyrics ┆ confusing │
|
1493
|
+
# # │ --- ┆ --- │
|
1494
|
+
# # │ str ┆ str │
|
1495
|
+
# # ╞═══════════════════════════════════╪═══════════════════════════════════╡
|
1496
|
+
# # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
|
1497
|
+
# # │ Tell me what you want, what you … ┆ Tell you what me want, what me r… │
|
1498
|
+
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1499
|
+
# # └───────────────────────────────────┴───────────────────────────────────┘
|
1500
|
+
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1501
|
+
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1502
|
+
replace_with = Utils.parse_as_expression(
|
1503
|
+
replace_with, str_as_lit: true, list_as_lit: false
|
1504
|
+
)
|
1505
|
+
Utils.wrap_expr(
|
1506
|
+
_rbexpr.str_replace_many(
|
1507
|
+
patterns, replace_with, ascii_case_insensitive
|
1508
|
+
)
|
1509
|
+
)
|
1510
|
+
end
|
1511
|
+
|
1196
1512
|
private
|
1197
1513
|
|
1198
1514
|
def _validate_format_argument(format)
|
@@ -353,8 +353,8 @@ module Polars
|
|
353
353
|
# # shape: (3,)
|
354
354
|
# # Series: '' [binary]
|
355
355
|
# # [
|
356
|
-
# #
|
357
|
-
# #
|
356
|
+
# # b"foo"
|
357
|
+
# # b"bar"
|
358
358
|
# # null
|
359
359
|
# # ]
|
360
360
|
def decode(encoding, strict: false)
|
@@ -690,11 +690,11 @@ module Polars
|
|
690
690
|
# sign character rather than before. The original string is returned if width is
|
691
691
|
# less than or equal to `s.length`.
|
692
692
|
#
|
693
|
-
# @param
|
693
|
+
# @param length [Integer]
|
694
694
|
# Fill the value up to this length.
|
695
695
|
#
|
696
696
|
# @return [Series]
|
697
|
-
def zfill(
|
697
|
+
def zfill(length)
|
698
698
|
super
|
699
699
|
end
|
700
700
|
|