polars-df 0.10.0-arm64-darwin → 0.12.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1127 -867
- data/README.md +6 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/lib/polars/string_expr.rb
CHANGED
@@ -39,7 +39,7 @@ module Polars
|
|
39
39
|
# # ]
|
40
40
|
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
41
|
_validate_format_argument(format)
|
42
|
-
Utils.wrap_expr(
|
42
|
+
Utils.wrap_expr(_rbexpr.str_to_date(format, strict, exact, cache))
|
43
43
|
end
|
44
44
|
|
45
45
|
# Convert a Utf8 column into a Datetime column.
|
@@ -83,14 +83,14 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
use_earliest: nil,
|
87
86
|
ambiguous: "raise"
|
88
87
|
)
|
89
88
|
_validate_format_argument(format)
|
90
|
-
ambiguous
|
91
|
-
|
89
|
+
unless ambiguous.is_a?(Expr)
|
90
|
+
ambiguous = Polars.lit(ambiguous)
|
91
|
+
end
|
92
92
|
Utils.wrap_expr(
|
93
|
-
|
93
|
+
_rbexpr.str_to_datetime(
|
94
94
|
format,
|
95
95
|
time_unit,
|
96
96
|
time_zone,
|
@@ -331,7 +331,7 @@ module Polars
|
|
331
331
|
#
|
332
332
|
# @example
|
333
333
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
334
|
-
# df.select(Polars.col("foo").str.
|
334
|
+
# df.select(Polars.col("foo").str.join("-"))
|
335
335
|
# # =>
|
336
336
|
# # shape: (1, 1)
|
337
337
|
# # ┌─────┐
|
@@ -344,7 +344,7 @@ module Polars
|
|
344
344
|
#
|
345
345
|
# @example
|
346
346
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
347
|
-
# df.select(Polars.col("foo").str.
|
347
|
+
# df.select(Polars.col("foo").str.join("-", ignore_nulls: false))
|
348
348
|
# # =>
|
349
349
|
# # shape: (1, 1)
|
350
350
|
# # ┌──────┐
|
@@ -354,9 +354,10 @@ module Polars
|
|
354
354
|
# # ╞══════╡
|
355
355
|
# # │ null │
|
356
356
|
# # └──────┘
|
357
|
-
def
|
358
|
-
Utils.wrap_expr(_rbexpr.
|
357
|
+
def join(delimiter = "-", ignore_nulls: true)
|
358
|
+
Utils.wrap_expr(_rbexpr.str_join(delimiter, ignore_nulls))
|
359
359
|
end
|
360
|
+
alias_method :concat, :join
|
360
361
|
|
361
362
|
# Transform to uppercase variant.
|
362
363
|
#
|
@@ -446,7 +447,7 @@ module Polars
|
|
446
447
|
# # │ both │
|
447
448
|
# # └───────┘
|
448
449
|
def strip_chars(characters = nil)
|
449
|
-
characters = Utils.
|
450
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
450
451
|
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
451
452
|
end
|
452
453
|
alias_method :strip, :strip_chars
|
@@ -473,7 +474,7 @@ module Polars
|
|
473
474
|
# # │ both │
|
474
475
|
# # └────────┘
|
475
476
|
def strip_chars_start(characters = nil)
|
476
|
-
characters = Utils.
|
477
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
477
478
|
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
478
479
|
end
|
479
480
|
alias_method :lstrip, :strip_chars_start
|
@@ -500,7 +501,7 @@ module Polars
|
|
500
501
|
# # │ both │
|
501
502
|
# # └───────┘
|
502
503
|
def strip_chars_end(characters = nil)
|
503
|
-
characters = Utils.
|
504
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
504
505
|
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
505
506
|
end
|
506
507
|
alias_method :rstrip, :strip_chars_end
|
@@ -530,7 +531,7 @@ module Polars
|
|
530
531
|
# # │ bar ┆ bar │
|
531
532
|
# # └───────────┴──────────┘
|
532
533
|
def strip_prefix(prefix)
|
533
|
-
prefix = Utils.
|
534
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
534
535
|
Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
|
535
536
|
end
|
536
537
|
|
@@ -560,7 +561,7 @@ module Polars
|
|
560
561
|
# # │ bar ┆ │
|
561
562
|
# # └───────────┴──────────┘
|
562
563
|
def strip_suffix(suffix)
|
563
|
-
suffix = Utils.
|
564
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
564
565
|
Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
|
565
566
|
end
|
566
567
|
|
@@ -654,7 +655,7 @@ module Polars
|
|
654
655
|
# # │ null ┆ null │
|
655
656
|
# # └────────┴────────┘
|
656
657
|
def zfill(length)
|
657
|
-
length = Utils.
|
658
|
+
length = Utils.parse_into_expression(length)
|
658
659
|
Utils.wrap_expr(_rbexpr.str_zfill(length))
|
659
660
|
end
|
660
661
|
|
@@ -689,7 +690,7 @@ module Polars
|
|
689
690
|
# # │ null ┆ null ┆ null │
|
690
691
|
# # └─────────────┴───────┴─────────┘
|
691
692
|
def contains(pattern, literal: false, strict: true)
|
692
|
-
pattern = Utils.
|
693
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
693
694
|
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
|
694
695
|
end
|
695
696
|
|
@@ -729,7 +730,7 @@ module Polars
|
|
729
730
|
# # │ mango │
|
730
731
|
# # └────────┘
|
731
732
|
def ends_with(sub)
|
732
|
-
sub = Utils.
|
733
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
733
734
|
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
|
734
735
|
end
|
735
736
|
|
@@ -769,7 +770,7 @@ module Polars
|
|
769
770
|
# # │ apple │
|
770
771
|
# # └────────┘
|
771
772
|
def starts_with(sub)
|
772
|
-
sub = Utils.
|
773
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
773
774
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
774
775
|
end
|
775
776
|
|
@@ -840,6 +841,7 @@ module Polars
|
|
840
841
|
# # │ true │
|
841
842
|
# # └──────────┘
|
842
843
|
def json_path_match(json_path)
|
844
|
+
json_path = Utils.parse_into_expression(json_path, str_as_lit: true)
|
843
845
|
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
844
846
|
end
|
845
847
|
|
@@ -939,7 +941,7 @@ module Polars
|
|
939
941
|
# # │ 678 │
|
940
942
|
# # └─────┘
|
941
943
|
def extract(pattern, group_index: 1)
|
942
|
-
pattern = Utils.
|
944
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
943
945
|
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
944
946
|
end
|
945
947
|
|
@@ -971,8 +973,8 @@ module Polars
|
|
971
973
|
# # │ ["678", "910"] │
|
972
974
|
# # └────────────────┘
|
973
975
|
def extract_all(pattern)
|
974
|
-
pattern = Utils.
|
975
|
-
Utils.wrap_expr(_rbexpr.str_extract_all(pattern
|
976
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
977
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
|
976
978
|
end
|
977
979
|
|
978
980
|
# Extract all capture groups for the given regex pattern.
|
@@ -1018,15 +1020,15 @@ module Polars
|
|
1018
1020
|
# )
|
1019
1021
|
# # =>
|
1020
1022
|
# # shape: (3, 3)
|
1021
|
-
# #
|
1022
|
-
# # │ url
|
1023
|
-
# # │ ---
|
1024
|
-
# # │ str
|
1025
|
-
# #
|
1026
|
-
# # │ http://vote.com/ballon_dor?
|
1027
|
-
# # │ http://vote.com/ballon_dor?
|
1028
|
-
# # │ http://vote.com/ballon_dor?
|
1029
|
-
# #
|
1023
|
+
# # ┌─────────────────────────────────┬───────────────────────┬──────────┐
|
1024
|
+
# # │ url ┆ captures ┆ name │
|
1025
|
+
# # │ --- ┆ --- ┆ --- │
|
1026
|
+
# # │ str ┆ struct[2] ┆ str │
|
1027
|
+
# # ╞═════════════════════════════════╪═══════════════════════╪══════════╡
|
1028
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"messi","python"} ┆ MESSI │
|
1029
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"weghorst","polars"} ┆ WEGHORST │
|
1030
|
+
# # │ http://vote.com/ballon_dor?err… ┆ {null,null} ┆ null │
|
1031
|
+
# # └─────────────────────────────────┴───────────────────────┴──────────┘
|
1030
1032
|
def extract_groups(pattern)
|
1031
1033
|
Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
|
1032
1034
|
end
|
@@ -1056,7 +1058,7 @@ module Polars
|
|
1056
1058
|
# # │ 6 │
|
1057
1059
|
# # └──────────────┘
|
1058
1060
|
def count_matches(pattern, literal: false)
|
1059
|
-
pattern = Utils.
|
1061
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1060
1062
|
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
1061
1063
|
end
|
1062
1064
|
alias_method :count_match, :count_matches
|
@@ -1085,12 +1087,11 @@ module Polars
|
|
1085
1087
|
# # │ ["foo", "bar", "baz"] │
|
1086
1088
|
# # └───────────────────────┘
|
1087
1089
|
def split(by, inclusive: false)
|
1088
|
-
by = Utils.
|
1090
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1089
1091
|
if inclusive
|
1090
|
-
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1091
|
-
else
|
1092
|
-
Utils.wrap_expr(_rbexpr.str_split(by))
|
1092
|
+
return Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1093
1093
|
end
|
1094
|
+
Utils.wrap_expr(_rbexpr.str_split(by))
|
1094
1095
|
end
|
1095
1096
|
|
1096
1097
|
# Split the string by a substring using `n` splits.
|
@@ -1128,7 +1129,7 @@ module Polars
|
|
1128
1129
|
# # │ {"d","4"} │
|
1129
1130
|
# # └─────────────┘
|
1130
1131
|
def split_exact(by, n, inclusive: false)
|
1131
|
-
by = Utils.
|
1132
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1132
1133
|
if inclusive
|
1133
1134
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
1134
1135
|
else
|
@@ -1165,7 +1166,7 @@ module Polars
|
|
1165
1166
|
# # │ {"foo","bar baz"} │
|
1166
1167
|
# # └───────────────────┘
|
1167
1168
|
def splitn(by, n)
|
1168
|
-
by = Utils.
|
1169
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1169
1170
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
1170
1171
|
end
|
1171
1172
|
|
@@ -1196,9 +1197,9 @@ module Polars
|
|
1196
1197
|
# # │ 2 ┆ abc456 │
|
1197
1198
|
# # └─────┴────────┘
|
1198
1199
|
def replace(pattern, value, literal: false, n: 1)
|
1199
|
-
pattern = Utils.
|
1200
|
-
value = Utils.
|
1201
|
-
Utils.wrap_expr(_rbexpr.str_replace_n(pattern
|
1200
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1201
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1202
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern, value, literal, n))
|
1202
1203
|
end
|
1203
1204
|
|
1204
1205
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -1226,9 +1227,9 @@ module Polars
|
|
1226
1227
|
# # │ 2 ┆ 123-123 │
|
1227
1228
|
# # └─────┴─────────┘
|
1228
1229
|
def replace_all(pattern, value, literal: false)
|
1229
|
-
pattern = Utils.
|
1230
|
-
value = Utils.
|
1231
|
-
Utils.wrap_expr(_rbexpr.str_replace_all(pattern
|
1230
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1231
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1232
|
+
Utils.wrap_expr(_rbexpr.str_replace_all(pattern, value, literal))
|
1232
1233
|
end
|
1233
1234
|
|
1234
1235
|
# Returns string values in reversed order.
|
@@ -1281,36 +1282,11 @@ module Polars
|
|
1281
1282
|
# # │ dragonfruit ┆ uit │
|
1282
1283
|
# # └─────────────┴──────────┘
|
1283
1284
|
def slice(offset, length = nil)
|
1284
|
-
offset = Utils.
|
1285
|
-
length = Utils.
|
1285
|
+
offset = Utils.parse_into_expression(offset)
|
1286
|
+
length = Utils.parse_into_expression(length)
|
1286
1287
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
1287
1288
|
end
|
1288
1289
|
|
1289
|
-
# Returns a column with a separate row for every string character.
|
1290
|
-
#
|
1291
|
-
# @return [Expr]
|
1292
|
-
#
|
1293
|
-
# @example
|
1294
|
-
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
1295
|
-
# df.select(Polars.col("a").str.explode)
|
1296
|
-
# # =>
|
1297
|
-
# # shape: (6, 1)
|
1298
|
-
# # ┌─────┐
|
1299
|
-
# # │ a │
|
1300
|
-
# # │ --- │
|
1301
|
-
# # │ str │
|
1302
|
-
# # ╞═════╡
|
1303
|
-
# # │ f │
|
1304
|
-
# # │ o │
|
1305
|
-
# # │ o │
|
1306
|
-
# # │ b │
|
1307
|
-
# # │ a │
|
1308
|
-
# # │ r │
|
1309
|
-
# # └─────┘
|
1310
|
-
def explode
|
1311
|
-
Utils.wrap_expr(_rbexpr.str_explode)
|
1312
|
-
end
|
1313
|
-
|
1314
1290
|
# Convert an Utf8 column into an Int64 column with base radix.
|
1315
1291
|
#
|
1316
1292
|
# @param base [Integer]
|
@@ -1354,7 +1330,7 @@ module Polars
|
|
1354
1330
|
# # │ null ┆ null │
|
1355
1331
|
# # └──────┴────────┘
|
1356
1332
|
def to_integer(base: 10, strict: true)
|
1357
|
-
base = Utils.
|
1333
|
+
base = Utils.parse_into_expression(base, str_as_lit: false)
|
1358
1334
|
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1359
1335
|
end
|
1360
1336
|
|
@@ -1418,17 +1394,17 @@ module Polars
|
|
1418
1394
|
# )
|
1419
1395
|
# # =>
|
1420
1396
|
# # shape: (3, 2)
|
1421
|
-
# #
|
1422
|
-
# # │ lyrics
|
1423
|
-
# # │ ---
|
1424
|
-
# # │ str
|
1425
|
-
# #
|
1426
|
-
# # │ Everybody wants to rule the
|
1427
|
-
# # │ Tell me what you want, what
|
1428
|
-
# # │ Can you feel the love tonight
|
1429
|
-
# #
|
1397
|
+
# # ┌─────────────────────────────────┬──────────────┐
|
1398
|
+
# # │ lyrics ┆ contains_any │
|
1399
|
+
# # │ --- ┆ --- │
|
1400
|
+
# # │ str ┆ bool │
|
1401
|
+
# # ╞═════════════════════════════════╪══════════════╡
|
1402
|
+
# # │ Everybody wants to rule the wo… ┆ false │
|
1403
|
+
# # │ Tell me what you want, what yo… ┆ true │
|
1404
|
+
# # │ Can you feel the love tonight ┆ true │
|
1405
|
+
# # └─────────────────────────────────┴──────────────┘
|
1430
1406
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1431
|
-
patterns = Utils.
|
1407
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1432
1408
|
Utils.wrap_expr(
|
1433
1409
|
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1434
1410
|
)
|
@@ -1468,15 +1444,15 @@ module Polars
|
|
1468
1444
|
# )
|
1469
1445
|
# # =>
|
1470
1446
|
# # shape: (3, 2)
|
1471
|
-
# #
|
1472
|
-
# # │ lyrics
|
1473
|
-
# # │ ---
|
1474
|
-
# # │ str
|
1475
|
-
# #
|
1476
|
-
# # │ Everybody wants to rule the
|
1477
|
-
# # │ Tell me what you want, what
|
1478
|
-
# # │ Can you feel the love tonight
|
1479
|
-
# #
|
1447
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1448
|
+
# # │ lyrics ┆ removes_pronouns │
|
1449
|
+
# # │ --- ┆ --- │
|
1450
|
+
# # │ str ┆ str │
|
1451
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1452
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1453
|
+
# # │ Tell me what you want, what yo… ┆ Tell what want, what really… │
|
1454
|
+
# # │ Can you feel the love tonight ┆ Can feel the love tonight │
|
1455
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1480
1456
|
#
|
1481
1457
|
# @example
|
1482
1458
|
# df.with_columns(
|
@@ -1489,19 +1465,19 @@ module Polars
|
|
1489
1465
|
# )
|
1490
1466
|
# # =>
|
1491
1467
|
# # shape: (3, 2)
|
1492
|
-
# #
|
1493
|
-
# # │ lyrics
|
1494
|
-
# # │ ---
|
1495
|
-
# # │ str
|
1496
|
-
# #
|
1497
|
-
# # │ Everybody wants to rule the
|
1498
|
-
# # │ Tell me what you want, what
|
1499
|
-
# # │ Can you feel the love tonight
|
1500
|
-
# #
|
1468
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1469
|
+
# # │ lyrics ┆ confusing │
|
1470
|
+
# # │ --- ┆ --- │
|
1471
|
+
# # │ str ┆ str │
|
1472
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1473
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1474
|
+
# # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
|
1475
|
+
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1476
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1501
1477
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1502
|
-
patterns = Utils.
|
1503
|
-
replace_with = Utils.
|
1504
|
-
replace_with, str_as_lit: true,
|
1478
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1479
|
+
replace_with = Utils.parse_into_expression(
|
1480
|
+
replace_with, str_as_lit: true, list_as_series: true
|
1505
1481
|
)
|
1506
1482
|
Utils.wrap_expr(
|
1507
1483
|
_rbexpr.str_replace_many(
|
@@ -83,7 +83,7 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
|
86
|
+
ambiguous: "raise"
|
87
87
|
)
|
88
88
|
super
|
89
89
|
end
|
@@ -233,7 +233,7 @@ module Polars
|
|
233
233
|
# @return [Series]
|
234
234
|
#
|
235
235
|
# @example
|
236
|
-
# Polars::Series.new([1, nil, 2]).str.
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
237
237
|
# # =>
|
238
238
|
# # shape: (1,)
|
239
239
|
# # Series: '' [str]
|
@@ -242,16 +242,17 @@ module Polars
|
|
242
242
|
# # ]
|
243
243
|
#
|
244
244
|
# @example
|
245
|
-
# Polars::Series.new([1, nil, 2]).str.
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
246
246
|
# # =>
|
247
247
|
# # shape: (1,)
|
248
248
|
# # Series: '' [str]
|
249
249
|
# # [
|
250
250
|
# # null
|
251
251
|
# # ]
|
252
|
-
def
|
252
|
+
def join(delimiter = "-", ignore_nulls: true)
|
253
253
|
super
|
254
254
|
end
|
255
|
+
alias_method :concat, :join
|
255
256
|
|
256
257
|
# Check if strings in Series contain a substring that matches a regex.
|
257
258
|
#
|
data/lib/polars/testing.rb
CHANGED
@@ -271,9 +271,9 @@ module Polars
|
|
271
271
|
|
272
272
|
def _assert_correct_input_type(left, right)
|
273
273
|
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
-
|
274
|
+
false
|
275
275
|
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
-
|
276
|
+
true
|
277
277
|
else
|
278
278
|
raise_assertion_error(
|
279
279
|
"inputs",
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_as_duration_string(td)
|
4
|
+
if td.nil? || td.is_a?(::String)
|
5
|
+
return td
|
6
|
+
end
|
7
|
+
_timedelta_to_duration_string(td)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self._timedelta_to_pl_duration(td)
|
11
|
+
td
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.negate_duration_string(duration)
|
15
|
+
if duration.start_with?("-")
|
16
|
+
duration[1..]
|
17
|
+
else
|
18
|
+
"-#{duration}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.date_to_int(d)
|
23
|
+
dt = d.to_datetime.to_time
|
24
|
+
dt.to_i / SECONDS_PER_DAY
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.datetime_to_int(dt, time_unit)
|
28
|
+
dt = dt.to_datetime.to_time
|
29
|
+
if time_unit == "ns"
|
30
|
+
nanos = dt.nsec
|
31
|
+
dt.to_i * NS_PER_SECOND + nanos
|
32
|
+
elsif time_unit == "us"
|
33
|
+
micros = dt.usec
|
34
|
+
dt.to_i * US_PER_SECOND + micros
|
35
|
+
elsif time_unit == "ms"
|
36
|
+
millis = dt.usec / 1000
|
37
|
+
dt.to_i * MS_PER_SECOND + millis
|
38
|
+
elsif time_unit.nil?
|
39
|
+
# Ruby has ns precision
|
40
|
+
nanos = dt.nsec
|
41
|
+
dt.to_i * NS_PER_SECOND + nanos
|
42
|
+
else
|
43
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self._to_ruby_date(value)
|
48
|
+
# days to seconds
|
49
|
+
# important to create from utc. Not doing this leads
|
50
|
+
# to inconsistencies dependent on the timezone you are in.
|
51
|
+
::Time.at(value * 86400).utc.to_date
|
52
|
+
end
|
53
|
+
|
54
|
+
def self._to_ruby_time(value)
|
55
|
+
if value == 0
|
56
|
+
::Time.utc(2000, 1, 1)
|
57
|
+
else
|
58
|
+
seconds, nanoseconds = value.divmod(1_000_000_000)
|
59
|
+
minutes, seconds = seconds.divmod(60)
|
60
|
+
hours, minutes = minutes.divmod(60)
|
61
|
+
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
66
|
+
if time_zone.nil? || time_zone == "" || time_zone == "UTC"
|
67
|
+
if time_unit == "ns"
|
68
|
+
::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
69
|
+
elsif time_unit == "us"
|
70
|
+
::Time.at(value / 1000000, value % 1000000, :usec).utc
|
71
|
+
elsif time_unit == "ms"
|
72
|
+
::Time.at(value / 1000, value % 1000, :millisecond).utc
|
73
|
+
else
|
74
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise Todo
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self._to_ruby_duration(value, time_unit = "ns")
|
82
|
+
if time_unit == "ns"
|
83
|
+
value / 1e9
|
84
|
+
elsif time_unit == "us"
|
85
|
+
value / 1e6
|
86
|
+
elsif time_unit == "ms"
|
87
|
+
value / 1e3
|
88
|
+
else
|
89
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self._to_ruby_decimal(digits, scale)
|
94
|
+
BigDecimal("#{digits}e#{scale}")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_into_expression(
|
4
|
+
input,
|
5
|
+
str_as_lit: false,
|
6
|
+
list_as_series: false,
|
7
|
+
structify: false,
|
8
|
+
dtype: nil
|
9
|
+
)
|
10
|
+
if input.is_a?(Expr)
|
11
|
+
expr = input
|
12
|
+
if structify
|
13
|
+
expr = _structify_expression(expr)
|
14
|
+
end
|
15
|
+
elsif (input.is_a?(::String) || input.is_a?(Symbol)) && !str_as_lit
|
16
|
+
expr = F.col(input)
|
17
|
+
elsif input.is_a?(::Array) && list_as_series
|
18
|
+
expr = F.lit(Series.new(input), dtype: dtype)
|
19
|
+
else
|
20
|
+
expr = F.lit(input, dtype: dtype)
|
21
|
+
end
|
22
|
+
|
23
|
+
expr._rbexpr
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse_into_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
27
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
28
|
+
if named_inputs.any?
|
29
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
30
|
+
exprs.concat(named_exprs)
|
31
|
+
end
|
32
|
+
|
33
|
+
exprs
|
34
|
+
end
|
35
|
+
|
36
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
37
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
38
|
+
inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def self._parse_inputs_as_iterable(inputs)
|
42
|
+
if inputs.empty?
|
43
|
+
return []
|
44
|
+
end
|
45
|
+
|
46
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
47
|
+
return inputs[0]
|
48
|
+
end
|
49
|
+
|
50
|
+
inputs
|
51
|
+
end
|
52
|
+
|
53
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
54
|
+
named_inputs.map do |name, input|
|
55
|
+
parse_into_expression(input, structify: structify)._alias(name.to_s)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
60
|
+
all_predicates = _parse_positional_inputs(predicates)
|
61
|
+
|
62
|
+
if constraints.any?
|
63
|
+
constraint_predicates = _parse_constraints(constraints)
|
64
|
+
all_predicates.concat(constraint_predicates)
|
65
|
+
end
|
66
|
+
|
67
|
+
_combine_predicates(all_predicates)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self._parse_constraints(constraints)
|
71
|
+
constraints.map do |name, value|
|
72
|
+
Polars.col(name).eq(value)._rbexpr
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self._combine_predicates(predicates)
|
77
|
+
if !predicates.any?
|
78
|
+
msg = "at least one predicate or constraint must be provided"
|
79
|
+
raise TypeError, msg
|
80
|
+
end
|
81
|
+
|
82
|
+
if predicates.length == 1
|
83
|
+
return predicates[0]
|
84
|
+
end
|
85
|
+
|
86
|
+
Plr.all_horizontal(predicates)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|