polars-df 0.8.0-arm64-darwin → 0.10.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +3112 -1613
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +34 -6
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/string_expr.rb
CHANGED
@@ -211,6 +211,49 @@ module Polars
|
|
211
211
|
end
|
212
212
|
end
|
213
213
|
|
214
|
+
# Convert a String column into a Decimal column.
|
215
|
+
#
|
216
|
+
# This method infers the needed parameters `precision` and `scale`.
|
217
|
+
#
|
218
|
+
# @param inference_length [Integer]
|
219
|
+
# Number of elements to parse to determine the `precision` and `scale`.
|
220
|
+
#
|
221
|
+
# @return [Expr]
|
222
|
+
#
|
223
|
+
# @example
|
224
|
+
# df = Polars::DataFrame.new(
|
225
|
+
# {
|
226
|
+
# "numbers": [
|
227
|
+
# "40.12",
|
228
|
+
# "3420.13",
|
229
|
+
# "120134.19",
|
230
|
+
# "3212.98",
|
231
|
+
# "12.90",
|
232
|
+
# "143.09",
|
233
|
+
# "143.9"
|
234
|
+
# ]
|
235
|
+
# }
|
236
|
+
# )
|
237
|
+
# df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
|
238
|
+
# # =>
|
239
|
+
# # shape: (7, 2)
|
240
|
+
# # ┌───────────┬─────────────────┐
|
241
|
+
# # │ numbers ┆ numbers_decimal │
|
242
|
+
# # │ --- ┆ --- │
|
243
|
+
# # │ str ┆ decimal[*,2] │
|
244
|
+
# # ╞═══════════╪═════════════════╡
|
245
|
+
# # │ 40.12 ┆ 40.12 │
|
246
|
+
# # │ 3420.13 ┆ 3420.13 │
|
247
|
+
# # │ 120134.19 ┆ 120134.19 │
|
248
|
+
# # │ 3212.98 ┆ 3212.98 │
|
249
|
+
# # │ 12.90 ┆ 12.90 │
|
250
|
+
# # │ 143.09 ┆ 143.09 │
|
251
|
+
# # │ 143.9 ┆ 143.90 │
|
252
|
+
# # └───────────┴─────────────────┘
|
253
|
+
def to_decimal(inference_length = 100)
|
254
|
+
Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
|
255
|
+
end
|
256
|
+
|
214
257
|
# Get length of the strings as `:u32` (as number of bytes).
|
215
258
|
#
|
216
259
|
# @return [Expr]
|
@@ -222,8 +265,8 @@ module Polars
|
|
222
265
|
# @example
|
223
266
|
# df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
|
224
267
|
# [
|
225
|
-
# Polars.col("s").str.
|
226
|
-
# Polars.col("s").str.
|
268
|
+
# Polars.col("s").str.len_bytes.alias("length"),
|
269
|
+
# Polars.col("s").str.len_chars.alias("nchars")
|
227
270
|
# ]
|
228
271
|
# )
|
229
272
|
# df
|
@@ -239,9 +282,10 @@ module Polars
|
|
239
282
|
# # │ 345 ┆ 3 ┆ 3 │
|
240
283
|
# # │ 東京 ┆ 6 ┆ 2 │
|
241
284
|
# # └──────┴────────┴────────┘
|
242
|
-
def
|
285
|
+
def len_bytes
|
243
286
|
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
244
287
|
end
|
288
|
+
alias_method :lengths, :len_bytes
|
245
289
|
|
246
290
|
# Get length of the strings as `:u32` (as number of chars).
|
247
291
|
#
|
@@ -254,8 +298,8 @@ module Polars
|
|
254
298
|
# @example
|
255
299
|
# df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
|
256
300
|
# [
|
257
|
-
# Polars.col("s").str.
|
258
|
-
# Polars.col("s").str.
|
301
|
+
# Polars.col("s").str.len_bytes.alias("length"),
|
302
|
+
# Polars.col("s").str.len_chars.alias("nchars")
|
259
303
|
# ]
|
260
304
|
# )
|
261
305
|
# df
|
@@ -271,9 +315,10 @@ module Polars
|
|
271
315
|
# # │ 345 ┆ 3 ┆ 3 │
|
272
316
|
# # │ 東京 ┆ 6 ┆ 2 │
|
273
317
|
# # └──────┴────────┴────────┘
|
274
|
-
def
|
318
|
+
def len_chars
|
275
319
|
Utils.wrap_expr(_rbexpr.str_len_chars)
|
276
320
|
end
|
321
|
+
alias_method :n_chars, :len_chars
|
277
322
|
|
278
323
|
# Vertically concat the values in the Series to a single string value.
|
279
324
|
#
|
@@ -355,6 +400,30 @@ module Polars
|
|
355
400
|
Utils.wrap_expr(_rbexpr.str_to_lowercase)
|
356
401
|
end
|
357
402
|
|
403
|
+
# Transform to titlecase variant.
|
404
|
+
#
|
405
|
+
# @return [Expr]
|
406
|
+
#
|
407
|
+
# @example
|
408
|
+
# df = Polars::DataFrame.new(
|
409
|
+
# {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}
|
410
|
+
# )
|
411
|
+
# df.with_columns(foo_title: Polars.col("sing").str.to_titlecase)
|
412
|
+
# # =>
|
413
|
+
# # shape: (2, 2)
|
414
|
+
# # ┌─────────────────────────┬─────────────────────────┐
|
415
|
+
# # │ sing ┆ foo_title │
|
416
|
+
# # │ --- ┆ --- │
|
417
|
+
# # │ str ┆ str │
|
418
|
+
# # ╞═════════════════════════╪═════════════════════════╡
|
419
|
+
# # │ welcome to my world ┆ Welcome To My World │
|
420
|
+
# # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
|
421
|
+
# # └─────────────────────────┴─────────────────────────┘
|
422
|
+
def to_titlecase
|
423
|
+
raise Todo
|
424
|
+
Utils.wrap_expr(_rbexpr.str_to_titlecase)
|
425
|
+
end
|
426
|
+
|
358
427
|
# Remove leading and trailing whitespace.
|
359
428
|
#
|
360
429
|
# @param characters [String, nil]
|
@@ -436,113 +505,158 @@ module Polars
|
|
436
505
|
end
|
437
506
|
alias_method :rstrip, :strip_chars_end
|
438
507
|
|
439
|
-
#
|
508
|
+
# Remove prefix.
|
440
509
|
#
|
441
|
-
#
|
442
|
-
# of length width.
|
510
|
+
# The prefix will be removed from the string exactly once, if found.
|
443
511
|
#
|
444
|
-
#
|
445
|
-
#
|
446
|
-
# less than or equal to `s.length`.
|
512
|
+
# @param prefix [String]
|
513
|
+
# The prefix to be removed.
|
447
514
|
#
|
448
|
-
# @
|
449
|
-
#
|
515
|
+
# @return [Expr]
|
516
|
+
#
|
517
|
+
# @example
|
518
|
+
# df = Polars::DataFrame.new({"a" => ["foobar", "foofoobar", "foo", "bar"]})
|
519
|
+
# df.with_columns(Polars.col("a").str.strip_prefix("foo").alias("stripped"))
|
520
|
+
# # =>
|
521
|
+
# # shape: (4, 2)
|
522
|
+
# # ┌───────────┬──────────┐
|
523
|
+
# # │ a ┆ stripped │
|
524
|
+
# # │ --- ┆ --- │
|
525
|
+
# # │ str ┆ str │
|
526
|
+
# # ╞═══════════╪══════════╡
|
527
|
+
# # │ foobar ┆ bar │
|
528
|
+
# # │ foofoobar ┆ foobar │
|
529
|
+
# # │ foo ┆ │
|
530
|
+
# # │ bar ┆ bar │
|
531
|
+
# # └───────────┴──────────┘
|
532
|
+
def strip_prefix(prefix)
|
533
|
+
prefix = Utils.parse_as_expression(prefix, str_as_lit: true)
|
534
|
+
Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
|
535
|
+
end
|
536
|
+
|
537
|
+
# Remove suffix.
|
538
|
+
#
|
539
|
+
# The suffix will be removed from the string exactly once, if found.
|
540
|
+
#
|
541
|
+
#
|
542
|
+
# @param suffix [String]
|
543
|
+
# The suffix to be removed.
|
450
544
|
#
|
451
545
|
# @return [Expr]
|
452
546
|
#
|
453
547
|
# @example
|
454
|
-
# df = Polars::DataFrame.new(
|
455
|
-
#
|
456
|
-
# "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
|
457
|
-
# }
|
458
|
-
# )
|
459
|
-
# df.with_column(Polars.col("num").cast(String).str.zfill(5))
|
548
|
+
# df = Polars::DataFrame.new({"a" => ["foobar", "foobarbar", "foo", "bar"]})
|
549
|
+
# df.with_columns(Polars.col("a").str.strip_suffix("bar").alias("stripped"))
|
460
550
|
# # =>
|
461
|
-
# # shape: (
|
462
|
-
# #
|
463
|
-
# # │
|
464
|
-
# # │ ---
|
465
|
-
# # │ str
|
466
|
-
# #
|
467
|
-
# # │
|
468
|
-
# # │
|
469
|
-
# # │
|
470
|
-
# # │
|
471
|
-
# #
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
# # │ null │
|
476
|
-
# # └─────────┘
|
477
|
-
def zfill(alignment)
|
478
|
-
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
551
|
+
# # shape: (4, 2)
|
552
|
+
# # ┌───────────┬──────────┐
|
553
|
+
# # │ a ┆ stripped │
|
554
|
+
# # │ --- ┆ --- │
|
555
|
+
# # │ str ┆ str │
|
556
|
+
# # ╞═══════════╪══════════╡
|
557
|
+
# # │ foobar ┆ foo │
|
558
|
+
# # │ foobarbar ┆ foobar │
|
559
|
+
# # │ foo ┆ foo │
|
560
|
+
# # │ bar ┆ │
|
561
|
+
# # └───────────┴──────────┘
|
562
|
+
def strip_suffix(suffix)
|
563
|
+
suffix = Utils.parse_as_expression(suffix, str_as_lit: true)
|
564
|
+
Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
|
479
565
|
end
|
480
566
|
|
481
|
-
#
|
567
|
+
# Pad the start of the string until it reaches the given length.
|
568
|
+
#
|
569
|
+
# @param length [Integer]
|
570
|
+
# Pad the string until it reaches this length. Strings with length equal to
|
571
|
+
# or greater than this value are returned as-is.
|
572
|
+
# @param fill_char [String]
|
573
|
+
# The character to pad the string with.
|
482
574
|
#
|
483
|
-
#
|
484
|
-
#
|
485
|
-
#
|
575
|
+
# @return [Expr]
|
576
|
+
#
|
577
|
+
# @example
|
578
|
+
# df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
|
579
|
+
# df.with_columns(padded: Polars.col("a").str.pad_start(8, "*"))
|
580
|
+
# # =>
|
581
|
+
# # shape: (4, 2)
|
582
|
+
# # ┌──────────────┬──────────────┐
|
583
|
+
# # │ a ┆ padded │
|
584
|
+
# # │ --- ┆ --- │
|
585
|
+
# # │ str ┆ str │
|
586
|
+
# # ╞══════════════╪══════════════╡
|
587
|
+
# # │ cow ┆ *****cow │
|
588
|
+
# # │ monkey ┆ **monkey │
|
589
|
+
# # │ hippopotamus ┆ hippopotamus │
|
590
|
+
# # │ null ┆ null │
|
591
|
+
# # └──────────────┴──────────────┘
|
592
|
+
def pad_start(length, fill_char = " ")
|
593
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
|
594
|
+
end
|
595
|
+
alias_method :rjust, :pad_start
|
596
|
+
|
597
|
+
# Pad the end of the string until it reaches the given length.
|
486
598
|
#
|
487
599
|
# @param length [Integer]
|
488
|
-
#
|
489
|
-
#
|
490
|
-
#
|
600
|
+
# Pad the string until it reaches this length. Strings with length equal to
|
601
|
+
# or greater than this value are returned as-is.
|
602
|
+
# @param fill_char [String]
|
603
|
+
# The character to pad the string with.
|
491
604
|
#
|
492
605
|
# @return [Expr]
|
493
606
|
#
|
494
607
|
# @example
|
495
|
-
# df = Polars::DataFrame.new({"a"
|
496
|
-
# df.
|
608
|
+
# df = Polars::DataFrame.new({"a": ["cow", "monkey", "hippopotamus", nil]})
|
609
|
+
# df.with_columns(padded: Polars.col("a").str.pad_end(8, "*"))
|
497
610
|
# # =>
|
498
|
-
# # shape: (4,
|
499
|
-
# #
|
500
|
-
# # │ a │
|
501
|
-
# # │ --- │
|
502
|
-
# # │ str │
|
503
|
-
# #
|
504
|
-
# # │ cow***** │
|
505
|
-
# # │ monkey** │
|
506
|
-
# # │
|
507
|
-
# # │
|
508
|
-
# #
|
509
|
-
def
|
510
|
-
Utils.wrap_expr(_rbexpr.str_pad_end(length,
|
611
|
+
# # shape: (4, 2)
|
612
|
+
# # ┌──────────────┬──────────────┐
|
613
|
+
# # │ a ┆ padded │
|
614
|
+
# # │ --- ┆ --- │
|
615
|
+
# # │ str ┆ str │
|
616
|
+
# # ╞══════════════╪══════════════╡
|
617
|
+
# # │ cow ┆ cow***** │
|
618
|
+
# # │ monkey ┆ monkey** │
|
619
|
+
# # │ hippopotamus ┆ hippopotamus │
|
620
|
+
# # │ null ┆ null │
|
621
|
+
# # └──────────────┴──────────────┘
|
622
|
+
def pad_end(length, fill_char = " ")
|
623
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
|
511
624
|
end
|
512
|
-
alias_method :
|
625
|
+
alias_method :ljust, :pad_end
|
513
626
|
|
514
|
-
#
|
627
|
+
# Fills the string with zeroes.
|
515
628
|
#
|
516
|
-
#
|
517
|
-
#
|
518
|
-
#
|
629
|
+
# Return a copy of the string left filled with ASCII '0' digits to make a string
|
630
|
+
# of length width.
|
631
|
+
#
|
632
|
+
# A leading sign prefix ('+'/'-') is handled by inserting the padding after the
|
633
|
+
# sign character rather than before. The original string is returned if width is
|
634
|
+
# less than or equal to `s.length`.
|
519
635
|
#
|
520
636
|
# @param length [Integer]
|
521
|
-
#
|
522
|
-
# @param fillchar [String]
|
523
|
-
# Fill with this ASCII character.
|
637
|
+
# Fill the value up to this length
|
524
638
|
#
|
525
639
|
# @return [Expr]
|
526
640
|
#
|
527
641
|
# @example
|
528
|
-
# df = Polars::DataFrame.new({"a" => [
|
529
|
-
# df.
|
642
|
+
# df = Polars::DataFrame.new({"a" => [-1, 123, 999999, nil]})
|
643
|
+
# df.with_columns(Polars.col("a").cast(Polars::String).str.zfill(4).alias("zfill"))
|
530
644
|
# # =>
|
531
|
-
# # shape: (4,
|
532
|
-
# #
|
533
|
-
# # │ a
|
534
|
-
# # │ ---
|
535
|
-
# # │ str
|
536
|
-
# #
|
537
|
-
# # │
|
538
|
-
# # │
|
539
|
-
# # │
|
540
|
-
# # │
|
541
|
-
# #
|
542
|
-
def
|
543
|
-
Utils.
|
645
|
+
# # shape: (4, 2)
|
646
|
+
# # ┌────────┬────────┐
|
647
|
+
# # │ a ┆ zfill │
|
648
|
+
# # │ --- ┆ --- │
|
649
|
+
# # │ i64 ┆ str │
|
650
|
+
# # ╞════════╪════════╡
|
651
|
+
# # │ -1 ┆ -001 │
|
652
|
+
# # │ 123 ┆ 0123 │
|
653
|
+
# # │ 999999 ┆ 999999 │
|
654
|
+
# # │ null ┆ null │
|
655
|
+
# # └────────┴────────┘
|
656
|
+
def zfill(length)
|
657
|
+
length = Utils.parse_as_expression(length)
|
658
|
+
Utils.wrap_expr(_rbexpr.str_zfill(length))
|
544
659
|
end
|
545
|
-
alias_method :pad_start, :rjust
|
546
660
|
|
547
661
|
# Check if string contains a substring that matches a regex.
|
548
662
|
#
|
@@ -674,7 +788,7 @@ module Polars
|
|
674
788
|
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
675
789
|
# )
|
676
790
|
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
677
|
-
# df.select(Polars.col("json").str.
|
791
|
+
# df.select(Polars.col("json").str.json_decode(dtype))
|
678
792
|
# # =>
|
679
793
|
# # shape: (3, 1)
|
680
794
|
# # ┌─────────────┐
|
@@ -686,12 +800,13 @@ module Polars
|
|
686
800
|
# # │ {null,null} │
|
687
801
|
# # │ {2,false} │
|
688
802
|
# # └─────────────┘
|
689
|
-
def
|
803
|
+
def json_decode(dtype = nil, infer_schema_length: 100)
|
690
804
|
if !dtype.nil?
|
691
805
|
dtype = Utils.rb_type_to_dtype(dtype)
|
692
806
|
end
|
693
|
-
Utils.wrap_expr(_rbexpr.
|
807
|
+
Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
|
694
808
|
end
|
809
|
+
alias_method :json_extract, :json_decode
|
695
810
|
|
696
811
|
# Extract the first match of json string with provided JSONPath expression.
|
697
812
|
#
|
@@ -745,15 +860,15 @@ module Polars
|
|
745
860
|
# df.select(Polars.col("encoded").str.decode("hex"))
|
746
861
|
# # =>
|
747
862
|
# # shape: (3, 1)
|
748
|
-
# #
|
749
|
-
# # │ encoded
|
750
|
-
# # │ ---
|
751
|
-
# # │ binary
|
752
|
-
# #
|
753
|
-
# # │
|
754
|
-
# # │
|
755
|
-
# # │ null
|
756
|
-
# #
|
863
|
+
# # ┌─────────┐
|
864
|
+
# # │ encoded │
|
865
|
+
# # │ --- │
|
866
|
+
# # │ binary │
|
867
|
+
# # ╞═════════╡
|
868
|
+
# # │ b"foo" │
|
869
|
+
# # │ b"bar" │
|
870
|
+
# # │ null │
|
871
|
+
# # └─────────┘
|
757
872
|
def decode(encoding, strict: true)
|
758
873
|
if encoding == "hex"
|
759
874
|
Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
|
@@ -824,6 +939,7 @@ module Polars
|
|
824
939
|
# # │ 678 │
|
825
940
|
# # └─────┘
|
826
941
|
def extract(pattern, group_index: 1)
|
942
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
827
943
|
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
828
944
|
end
|
829
945
|
|
@@ -859,6 +975,62 @@ module Polars
|
|
859
975
|
Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
|
860
976
|
end
|
861
977
|
|
978
|
+
# Extract all capture groups for the given regex pattern.
|
979
|
+
#
|
980
|
+
# @param pattern [String]
|
981
|
+
# A valid regular expression pattern containing at least one capture group,
|
982
|
+
# compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
983
|
+
#
|
984
|
+
# @return [Expr]
|
985
|
+
#
|
986
|
+
# @example
|
987
|
+
# df = Polars::DataFrame.new(
|
988
|
+
# {
|
989
|
+
# "url": [
|
990
|
+
# "http://vote.com/ballon_dor?candidate=messi&ref=python",
|
991
|
+
# "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
|
992
|
+
# "http://vote.com/ballon_dor?error=404&ref=rust"
|
993
|
+
# ]
|
994
|
+
# }
|
995
|
+
# )
|
996
|
+
# pattern = /candidate=(?<candidate>\w+)&ref=(?<ref>\w+)/.to_s
|
997
|
+
# df.select(captures: Polars.col("url").str.extract_groups(pattern)).unnest(
|
998
|
+
# "captures"
|
999
|
+
# )
|
1000
|
+
# # =>
|
1001
|
+
# # shape: (3, 2)
|
1002
|
+
# # ┌───────────┬────────┐
|
1003
|
+
# # │ candidate ┆ ref │
|
1004
|
+
# # │ --- ┆ --- │
|
1005
|
+
# # │ str ┆ str │
|
1006
|
+
# # ╞═══════════╪════════╡
|
1007
|
+
# # │ messi ┆ python │
|
1008
|
+
# # │ weghorst ┆ polars │
|
1009
|
+
# # │ null ┆ null │
|
1010
|
+
# # └───────────┴────────┘
|
1011
|
+
#
|
1012
|
+
# @example Unnamed groups have their numerical position converted to a string:
|
1013
|
+
# pattern = /candidate=(\w+)&ref=(\w+)/.to_s
|
1014
|
+
# (
|
1015
|
+
# df.with_columns(
|
1016
|
+
# captures: Polars.col("url").str.extract_groups(pattern)
|
1017
|
+
# ).with_columns(name: Polars.col("captures").struct["1"].str.to_uppercase)
|
1018
|
+
# )
|
1019
|
+
# # =>
|
1020
|
+
# # shape: (3, 3)
|
1021
|
+
# # ┌───────────────────────────────────┬───────────────────────┬──────────┐
|
1022
|
+
# # │ url ┆ captures ┆ name │
|
1023
|
+
# # │ --- ┆ --- ┆ --- │
|
1024
|
+
# # │ str ┆ struct[2] ┆ str │
|
1025
|
+
# # ╞═══════════════════════════════════╪═══════════════════════╪══════════╡
|
1026
|
+
# # │ http://vote.com/ballon_dor?candi… ┆ {"messi","python"} ┆ MESSI │
|
1027
|
+
# # │ http://vote.com/ballon_dor?candi… ┆ {"weghorst","polars"} ┆ WEGHORST │
|
1028
|
+
# # │ http://vote.com/ballon_dor?error… ┆ {null,null} ┆ null │
|
1029
|
+
# # └───────────────────────────────────┴───────────────────────┴──────────┘
|
1030
|
+
def extract_groups(pattern)
|
1031
|
+
Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
|
1032
|
+
end
|
1033
|
+
|
862
1034
|
# Count all successive non-overlapping regex matches.
|
863
1035
|
#
|
864
1036
|
# @param pattern [String]
|
@@ -1059,6 +1231,28 @@ module Polars
|
|
1059
1231
|
Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
|
1060
1232
|
end
|
1061
1233
|
|
1234
|
+
# Returns string values in reversed order.
|
1235
|
+
#
|
1236
|
+
# @return [Expr]
|
1237
|
+
#
|
1238
|
+
# @example
|
1239
|
+
# df = Polars::DataFrame.new({"text" => ["foo", "bar", "man\u0303ana"]})
|
1240
|
+
# df.with_columns(Polars.col("text").str.reverse.alias("reversed"))
|
1241
|
+
# # =>
|
1242
|
+
# # shape: (3, 2)
|
1243
|
+
# # ┌────────┬──────────┐
|
1244
|
+
# # │ text ┆ reversed │
|
1245
|
+
# # │ --- ┆ --- │
|
1246
|
+
# # │ str ┆ str │
|
1247
|
+
# # ╞════════╪══════════╡
|
1248
|
+
# # │ foo ┆ oof │
|
1249
|
+
# # │ bar ┆ rab │
|
1250
|
+
# # │ mañana ┆ anañam │
|
1251
|
+
# # └────────┴──────────┘
|
1252
|
+
def reverse
|
1253
|
+
Utils.wrap_expr(_rbexpr.str_reverse)
|
1254
|
+
end
|
1255
|
+
|
1062
1256
|
# Create subslices of the string values of a Utf8 Series.
|
1063
1257
|
#
|
1064
1258
|
# @param offset [Integer]
|
@@ -1087,6 +1281,8 @@ module Polars
|
|
1087
1281
|
# # │ dragonfruit ┆ uit │
|
1088
1282
|
# # └─────────────┴──────────┘
|
1089
1283
|
def slice(offset, length = nil)
|
1284
|
+
offset = Utils.parse_as_expression(offset)
|
1285
|
+
length = Utils.parse_as_expression(length)
|
1090
1286
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
1091
1287
|
end
|
1092
1288
|
|
@@ -1158,6 +1354,7 @@ module Polars
|
|
1158
1354
|
# # │ null ┆ null │
|
1159
1355
|
# # └──────┴────────┘
|
1160
1356
|
def to_integer(base: 10, strict: true)
|
1357
|
+
base = Utils.parse_as_expression(base, str_as_lit: false)
|
1161
1358
|
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1162
1359
|
end
|
1163
1360
|
|
@@ -1193,6 +1390,126 @@ module Polars
|
|
1193
1390
|
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1194
1391
|
end
|
1195
1392
|
|
1393
|
+
# Use the aho-corasick algorithm to find matches.
|
1394
|
+
#
|
1395
|
+
# This version determines if any of the patterns find a match.
|
1396
|
+
#
|
1397
|
+
# @param patterns [String]
|
1398
|
+
# String patterns to search.
|
1399
|
+
# @param ascii_case_insensitive [Boolean]
|
1400
|
+
# Enable ASCII-aware case insensitive matching.
|
1401
|
+
# When this option is enabled, searching will be performed without respect
|
1402
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1403
|
+
#
|
1404
|
+
# @return [Expr]
|
1405
|
+
#
|
1406
|
+
# @example
|
1407
|
+
# df = Polars::DataFrame.new(
|
1408
|
+
# {
|
1409
|
+
# "lyrics": [
|
1410
|
+
# "Everybody wants to rule the world",
|
1411
|
+
# "Tell me what you want, what you really really want",
|
1412
|
+
# "Can you feel the love tonight"
|
1413
|
+
# ]
|
1414
|
+
# }
|
1415
|
+
# )
|
1416
|
+
# df.with_columns(
|
1417
|
+
# Polars.col("lyrics").str.contains_any(["you", "me"]).alias("contains_any")
|
1418
|
+
# )
|
1419
|
+
# # =>
|
1420
|
+
# # shape: (3, 2)
|
1421
|
+
# # ┌───────────────────────────────────┬──────────────┐
|
1422
|
+
# # │ lyrics ┆ contains_any │
|
1423
|
+
# # │ --- ┆ --- │
|
1424
|
+
# # │ str ┆ bool │
|
1425
|
+
# # ╞═══════════════════════════════════╪══════════════╡
|
1426
|
+
# # │ Everybody wants to rule the worl… ┆ false │
|
1427
|
+
# # │ Tell me what you want, what you … ┆ true │
|
1428
|
+
# # │ Can you feel the love tonight ┆ true │
|
1429
|
+
# # └───────────────────────────────────┴──────────────┘
|
1430
|
+
def contains_any(patterns, ascii_case_insensitive: false)
|
1431
|
+
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1432
|
+
Utils.wrap_expr(
|
1433
|
+
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1434
|
+
)
|
1435
|
+
end
|
1436
|
+
|
1437
|
+
# Use the aho-corasick algorithm to replace many matches.
|
1438
|
+
#
|
1439
|
+
# @param patterns [String]
|
1440
|
+
# String patterns to search and replace.
|
1441
|
+
# @param replace_with [String]
|
1442
|
+
# Strings to replace where a pattern was a match.
|
1443
|
+
# This can be broadcasted. So it supports many:one and many:many.
|
1444
|
+
# @param ascii_case_insensitive [Boolean]
|
1445
|
+
# Enable ASCII-aware case insensitive matching.
|
1446
|
+
# When this option is enabled, searching will be performed without respect
|
1447
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1448
|
+
#
|
1449
|
+
# @return [Expr]
|
1450
|
+
#
|
1451
|
+
# @example
|
1452
|
+
# df = Polars::DataFrame.new(
|
1453
|
+
# {
|
1454
|
+
# "lyrics": [
|
1455
|
+
# "Everybody wants to rule the world",
|
1456
|
+
# "Tell me what you want, what you really really want",
|
1457
|
+
# "Can you feel the love tonight"
|
1458
|
+
# ]
|
1459
|
+
# }
|
1460
|
+
# )
|
1461
|
+
# df.with_columns(
|
1462
|
+
# Polars.col("lyrics")
|
1463
|
+
# .str.replace_many(
|
1464
|
+
# ["me", "you", "they"],
|
1465
|
+
# ""
|
1466
|
+
# )
|
1467
|
+
# .alias("removes_pronouns")
|
1468
|
+
# )
|
1469
|
+
# # =>
|
1470
|
+
# # shape: (3, 2)
|
1471
|
+
# # ┌───────────────────────────────────┬───────────────────────────────────┐
|
1472
|
+
# # │ lyrics ┆ removes_pronouns │
|
1473
|
+
# # │ --- ┆ --- │
|
1474
|
+
# # │ str ┆ str │
|
1475
|
+
# # ╞═══════════════════════════════════╪═══════════════════════════════════╡
|
1476
|
+
# # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
|
1477
|
+
# # │ Tell me what you want, what you … ┆ Tell what want, what really r… │
|
1478
|
+
# # │ Can you feel the love tonight ┆ Can feel the love tonight │
|
1479
|
+
# # └───────────────────────────────────┴───────────────────────────────────┘
|
1480
|
+
#
|
1481
|
+
# @example
|
1482
|
+
# df.with_columns(
|
1483
|
+
# Polars.col("lyrics")
|
1484
|
+
# .str.replace_many(
|
1485
|
+
# ["me", "you"],
|
1486
|
+
# ["you", "me"]
|
1487
|
+
# )
|
1488
|
+
# .alias("confusing")
|
1489
|
+
# )
|
1490
|
+
# # =>
|
1491
|
+
# # shape: (3, 2)
|
1492
|
+
# # ┌───────────────────────────────────┬───────────────────────────────────┐
|
1493
|
+
# # │ lyrics ┆ confusing │
|
1494
|
+
# # │ --- ┆ --- │
|
1495
|
+
# # │ str ┆ str │
|
1496
|
+
# # ╞═══════════════════════════════════╪═══════════════════════════════════╡
|
1497
|
+
# # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
|
1498
|
+
# # │ Tell me what you want, what you … ┆ Tell you what me want, what me r… │
|
1499
|
+
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1500
|
+
# # └───────────────────────────────────┴───────────────────────────────────┘
|
1501
|
+
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1502
|
+
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1503
|
+
replace_with = Utils.parse_as_expression(
|
1504
|
+
replace_with, str_as_lit: true, list_as_lit: false
|
1505
|
+
)
|
1506
|
+
Utils.wrap_expr(
|
1507
|
+
_rbexpr.str_replace_many(
|
1508
|
+
patterns, replace_with, ascii_case_insensitive
|
1509
|
+
)
|
1510
|
+
)
|
1511
|
+
end
|
1512
|
+
|
1196
1513
|
private
|
1197
1514
|
|
1198
1515
|
def _validate_format_argument(format)
|
@@ -353,8 +353,8 @@ module Polars
|
|
353
353
|
# # shape: (3,)
|
354
354
|
# # Series: '' [binary]
|
355
355
|
# # [
|
356
|
-
# #
|
357
|
-
# #
|
356
|
+
# # b"foo"
|
357
|
+
# # b"bar"
|
358
358
|
# # null
|
359
359
|
# # ]
|
360
360
|
def decode(encoding, strict: false)
|
@@ -690,11 +690,11 @@ module Polars
|
|
690
690
|
# sign character rather than before. The original string is returned if width is
|
691
691
|
# less than or equal to `s.length`.
|
692
692
|
#
|
693
|
-
# @param
|
693
|
+
# @param length [Integer]
|
694
694
|
# Fill the value up to this length.
|
695
695
|
#
|
696
696
|
# @return [Series]
|
697
|
-
def zfill(
|
697
|
+
def zfill(length)
|
698
698
|
super
|
699
699
|
end
|
700
700
|
|