polars-df 0.10.0-x86_64-linux → 0.12.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1125 -865
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/lib/polars/string_expr.rb
CHANGED
@@ -39,7 +39,7 @@ module Polars
|
|
39
39
|
# # ]
|
40
40
|
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
41
|
_validate_format_argument(format)
|
42
|
-
Utils.wrap_expr(
|
42
|
+
Utils.wrap_expr(_rbexpr.str_to_date(format, strict, exact, cache))
|
43
43
|
end
|
44
44
|
|
45
45
|
# Convert a Utf8 column into a Datetime column.
|
@@ -83,14 +83,14 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
use_earliest: nil,
|
87
86
|
ambiguous: "raise"
|
88
87
|
)
|
89
88
|
_validate_format_argument(format)
|
90
|
-
ambiguous
|
91
|
-
|
89
|
+
unless ambiguous.is_a?(Expr)
|
90
|
+
ambiguous = Polars.lit(ambiguous)
|
91
|
+
end
|
92
92
|
Utils.wrap_expr(
|
93
|
-
|
93
|
+
_rbexpr.str_to_datetime(
|
94
94
|
format,
|
95
95
|
time_unit,
|
96
96
|
time_zone,
|
@@ -331,7 +331,7 @@ module Polars
|
|
331
331
|
#
|
332
332
|
# @example
|
333
333
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
334
|
-
# df.select(Polars.col("foo").str.
|
334
|
+
# df.select(Polars.col("foo").str.join("-"))
|
335
335
|
# # =>
|
336
336
|
# # shape: (1, 1)
|
337
337
|
# # ┌─────┐
|
@@ -344,7 +344,7 @@ module Polars
|
|
344
344
|
#
|
345
345
|
# @example
|
346
346
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
347
|
-
# df.select(Polars.col("foo").str.
|
347
|
+
# df.select(Polars.col("foo").str.join("-", ignore_nulls: false))
|
348
348
|
# # =>
|
349
349
|
# # shape: (1, 1)
|
350
350
|
# # ┌──────┐
|
@@ -354,9 +354,10 @@ module Polars
|
|
354
354
|
# # ╞══════╡
|
355
355
|
# # │ null │
|
356
356
|
# # └──────┘
|
357
|
-
def
|
358
|
-
Utils.wrap_expr(_rbexpr.
|
357
|
+
def join(delimiter = "-", ignore_nulls: true)
|
358
|
+
Utils.wrap_expr(_rbexpr.str_join(delimiter, ignore_nulls))
|
359
359
|
end
|
360
|
+
alias_method :concat, :join
|
360
361
|
|
361
362
|
# Transform to uppercase variant.
|
362
363
|
#
|
@@ -446,7 +447,7 @@ module Polars
|
|
446
447
|
# # │ both │
|
447
448
|
# # └───────┘
|
448
449
|
def strip_chars(characters = nil)
|
449
|
-
characters = Utils.
|
450
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
450
451
|
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
451
452
|
end
|
452
453
|
alias_method :strip, :strip_chars
|
@@ -473,7 +474,7 @@ module Polars
|
|
473
474
|
# # │ both │
|
474
475
|
# # └────────┘
|
475
476
|
def strip_chars_start(characters = nil)
|
476
|
-
characters = Utils.
|
477
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
477
478
|
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
478
479
|
end
|
479
480
|
alias_method :lstrip, :strip_chars_start
|
@@ -500,7 +501,7 @@ module Polars
|
|
500
501
|
# # │ both │
|
501
502
|
# # └───────┘
|
502
503
|
def strip_chars_end(characters = nil)
|
503
|
-
characters = Utils.
|
504
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
504
505
|
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
505
506
|
end
|
506
507
|
alias_method :rstrip, :strip_chars_end
|
@@ -530,7 +531,7 @@ module Polars
|
|
530
531
|
# # │ bar ┆ bar │
|
531
532
|
# # └───────────┴──────────┘
|
532
533
|
def strip_prefix(prefix)
|
533
|
-
prefix = Utils.
|
534
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
534
535
|
Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
|
535
536
|
end
|
536
537
|
|
@@ -560,7 +561,7 @@ module Polars
|
|
560
561
|
# # │ bar ┆ │
|
561
562
|
# # └───────────┴──────────┘
|
562
563
|
def strip_suffix(suffix)
|
563
|
-
suffix = Utils.
|
564
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
564
565
|
Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
|
565
566
|
end
|
566
567
|
|
@@ -654,7 +655,7 @@ module Polars
|
|
654
655
|
# # │ null ┆ null │
|
655
656
|
# # └────────┴────────┘
|
656
657
|
def zfill(length)
|
657
|
-
length = Utils.
|
658
|
+
length = Utils.parse_into_expression(length)
|
658
659
|
Utils.wrap_expr(_rbexpr.str_zfill(length))
|
659
660
|
end
|
660
661
|
|
@@ -689,7 +690,7 @@ module Polars
|
|
689
690
|
# # │ null ┆ null ┆ null │
|
690
691
|
# # └─────────────┴───────┴─────────┘
|
691
692
|
def contains(pattern, literal: false, strict: true)
|
692
|
-
pattern = Utils.
|
693
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
693
694
|
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
|
694
695
|
end
|
695
696
|
|
@@ -729,7 +730,7 @@ module Polars
|
|
729
730
|
# # │ mango │
|
730
731
|
# # └────────┘
|
731
732
|
def ends_with(sub)
|
732
|
-
sub = Utils.
|
733
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
733
734
|
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
|
734
735
|
end
|
735
736
|
|
@@ -769,7 +770,7 @@ module Polars
|
|
769
770
|
# # │ apple │
|
770
771
|
# # └────────┘
|
771
772
|
def starts_with(sub)
|
772
|
-
sub = Utils.
|
773
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
773
774
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
774
775
|
end
|
775
776
|
|
@@ -840,6 +841,7 @@ module Polars
|
|
840
841
|
# # │ true │
|
841
842
|
# # └──────────┘
|
842
843
|
def json_path_match(json_path)
|
844
|
+
json_path = Utils.parse_into_expression(json_path, str_as_lit: true)
|
843
845
|
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
844
846
|
end
|
845
847
|
|
@@ -939,7 +941,7 @@ module Polars
|
|
939
941
|
# # │ 678 │
|
940
942
|
# # └─────┘
|
941
943
|
def extract(pattern, group_index: 1)
|
942
|
-
pattern = Utils.
|
944
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
943
945
|
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
944
946
|
end
|
945
947
|
|
@@ -971,8 +973,8 @@ module Polars
|
|
971
973
|
# # │ ["678", "910"] │
|
972
974
|
# # └────────────────┘
|
973
975
|
def extract_all(pattern)
|
974
|
-
pattern = Utils.
|
975
|
-
Utils.wrap_expr(_rbexpr.str_extract_all(pattern
|
976
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
977
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
|
976
978
|
end
|
977
979
|
|
978
980
|
# Extract all capture groups for the given regex pattern.
|
@@ -1018,15 +1020,15 @@ module Polars
|
|
1018
1020
|
# )
|
1019
1021
|
# # =>
|
1020
1022
|
# # shape: (3, 3)
|
1021
|
-
# #
|
1022
|
-
# # │ url
|
1023
|
-
# # │ ---
|
1024
|
-
# # │ str
|
1025
|
-
# #
|
1026
|
-
# # │ http://vote.com/ballon_dor?
|
1027
|
-
# # │ http://vote.com/ballon_dor?
|
1028
|
-
# # │ http://vote.com/ballon_dor?
|
1029
|
-
# #
|
1023
|
+
# # ┌─────────────────────────────────┬───────────────────────┬──────────┐
|
1024
|
+
# # │ url ┆ captures ┆ name │
|
1025
|
+
# # │ --- ┆ --- ┆ --- │
|
1026
|
+
# # │ str ┆ struct[2] ┆ str │
|
1027
|
+
# # ╞═════════════════════════════════╪═══════════════════════╪══════════╡
|
1028
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"messi","python"} ┆ MESSI │
|
1029
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"weghorst","polars"} ┆ WEGHORST │
|
1030
|
+
# # │ http://vote.com/ballon_dor?err… ┆ {null,null} ┆ null │
|
1031
|
+
# # └─────────────────────────────────┴───────────────────────┴──────────┘
|
1030
1032
|
def extract_groups(pattern)
|
1031
1033
|
Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
|
1032
1034
|
end
|
@@ -1056,7 +1058,7 @@ module Polars
|
|
1056
1058
|
# # │ 6 │
|
1057
1059
|
# # └──────────────┘
|
1058
1060
|
def count_matches(pattern, literal: false)
|
1059
|
-
pattern = Utils.
|
1061
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1060
1062
|
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
1061
1063
|
end
|
1062
1064
|
alias_method :count_match, :count_matches
|
@@ -1085,12 +1087,11 @@ module Polars
|
|
1085
1087
|
# # │ ["foo", "bar", "baz"] │
|
1086
1088
|
# # └───────────────────────┘
|
1087
1089
|
def split(by, inclusive: false)
|
1088
|
-
by = Utils.
|
1090
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1089
1091
|
if inclusive
|
1090
|
-
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1091
|
-
else
|
1092
|
-
Utils.wrap_expr(_rbexpr.str_split(by))
|
1092
|
+
return Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1093
1093
|
end
|
1094
|
+
Utils.wrap_expr(_rbexpr.str_split(by))
|
1094
1095
|
end
|
1095
1096
|
|
1096
1097
|
# Split the string by a substring using `n` splits.
|
@@ -1128,7 +1129,7 @@ module Polars
|
|
1128
1129
|
# # │ {"d","4"} │
|
1129
1130
|
# # └─────────────┘
|
1130
1131
|
def split_exact(by, n, inclusive: false)
|
1131
|
-
by = Utils.
|
1132
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1132
1133
|
if inclusive
|
1133
1134
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
1134
1135
|
else
|
@@ -1165,7 +1166,7 @@ module Polars
|
|
1165
1166
|
# # │ {"foo","bar baz"} │
|
1166
1167
|
# # └───────────────────┘
|
1167
1168
|
def splitn(by, n)
|
1168
|
-
by = Utils.
|
1169
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1169
1170
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
1170
1171
|
end
|
1171
1172
|
|
@@ -1196,9 +1197,9 @@ module Polars
|
|
1196
1197
|
# # │ 2 ┆ abc456 │
|
1197
1198
|
# # └─────┴────────┘
|
1198
1199
|
def replace(pattern, value, literal: false, n: 1)
|
1199
|
-
pattern = Utils.
|
1200
|
-
value = Utils.
|
1201
|
-
Utils.wrap_expr(_rbexpr.str_replace_n(pattern
|
1200
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1201
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1202
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern, value, literal, n))
|
1202
1203
|
end
|
1203
1204
|
|
1204
1205
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -1226,9 +1227,9 @@ module Polars
|
|
1226
1227
|
# # │ 2 ┆ 123-123 │
|
1227
1228
|
# # └─────┴─────────┘
|
1228
1229
|
def replace_all(pattern, value, literal: false)
|
1229
|
-
pattern = Utils.
|
1230
|
-
value = Utils.
|
1231
|
-
Utils.wrap_expr(_rbexpr.str_replace_all(pattern
|
1230
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1231
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1232
|
+
Utils.wrap_expr(_rbexpr.str_replace_all(pattern, value, literal))
|
1232
1233
|
end
|
1233
1234
|
|
1234
1235
|
# Returns string values in reversed order.
|
@@ -1281,36 +1282,11 @@ module Polars
|
|
1281
1282
|
# # │ dragonfruit ┆ uit │
|
1282
1283
|
# # └─────────────┴──────────┘
|
1283
1284
|
def slice(offset, length = nil)
|
1284
|
-
offset = Utils.
|
1285
|
-
length = Utils.
|
1285
|
+
offset = Utils.parse_into_expression(offset)
|
1286
|
+
length = Utils.parse_into_expression(length)
|
1286
1287
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
1287
1288
|
end
|
1288
1289
|
|
1289
|
-
# Returns a column with a separate row for every string character.
|
1290
|
-
#
|
1291
|
-
# @return [Expr]
|
1292
|
-
#
|
1293
|
-
# @example
|
1294
|
-
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
1295
|
-
# df.select(Polars.col("a").str.explode)
|
1296
|
-
# # =>
|
1297
|
-
# # shape: (6, 1)
|
1298
|
-
# # ┌─────┐
|
1299
|
-
# # │ a │
|
1300
|
-
# # │ --- │
|
1301
|
-
# # │ str │
|
1302
|
-
# # ╞═════╡
|
1303
|
-
# # │ f │
|
1304
|
-
# # │ o │
|
1305
|
-
# # │ o │
|
1306
|
-
# # │ b │
|
1307
|
-
# # │ a │
|
1308
|
-
# # │ r │
|
1309
|
-
# # └─────┘
|
1310
|
-
def explode
|
1311
|
-
Utils.wrap_expr(_rbexpr.str_explode)
|
1312
|
-
end
|
1313
|
-
|
1314
1290
|
# Convert an Utf8 column into an Int64 column with base radix.
|
1315
1291
|
#
|
1316
1292
|
# @param base [Integer]
|
@@ -1354,7 +1330,7 @@ module Polars
|
|
1354
1330
|
# # │ null ┆ null │
|
1355
1331
|
# # └──────┴────────┘
|
1356
1332
|
def to_integer(base: 10, strict: true)
|
1357
|
-
base = Utils.
|
1333
|
+
base = Utils.parse_into_expression(base, str_as_lit: false)
|
1358
1334
|
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1359
1335
|
end
|
1360
1336
|
|
@@ -1418,17 +1394,17 @@ module Polars
|
|
1418
1394
|
# )
|
1419
1395
|
# # =>
|
1420
1396
|
# # shape: (3, 2)
|
1421
|
-
# #
|
1422
|
-
# # │ lyrics
|
1423
|
-
# # │ ---
|
1424
|
-
# # │ str
|
1425
|
-
# #
|
1426
|
-
# # │ Everybody wants to rule the
|
1427
|
-
# # │ Tell me what you want, what
|
1428
|
-
# # │ Can you feel the love tonight
|
1429
|
-
# #
|
1397
|
+
# # ┌─────────────────────────────────┬──────────────┐
|
1398
|
+
# # │ lyrics ┆ contains_any │
|
1399
|
+
# # │ --- ┆ --- │
|
1400
|
+
# # │ str ┆ bool │
|
1401
|
+
# # ╞═════════════════════════════════╪══════════════╡
|
1402
|
+
# # │ Everybody wants to rule the wo… ┆ false │
|
1403
|
+
# # │ Tell me what you want, what yo… ┆ true │
|
1404
|
+
# # │ Can you feel the love tonight ┆ true │
|
1405
|
+
# # └─────────────────────────────────┴──────────────┘
|
1430
1406
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1431
|
-
patterns = Utils.
|
1407
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1432
1408
|
Utils.wrap_expr(
|
1433
1409
|
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1434
1410
|
)
|
@@ -1468,15 +1444,15 @@ module Polars
|
|
1468
1444
|
# )
|
1469
1445
|
# # =>
|
1470
1446
|
# # shape: (3, 2)
|
1471
|
-
# #
|
1472
|
-
# # │ lyrics
|
1473
|
-
# # │ ---
|
1474
|
-
# # │ str
|
1475
|
-
# #
|
1476
|
-
# # │ Everybody wants to rule the
|
1477
|
-
# # │ Tell me what you want, what
|
1478
|
-
# # │ Can you feel the love tonight
|
1479
|
-
# #
|
1447
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1448
|
+
# # │ lyrics ┆ removes_pronouns │
|
1449
|
+
# # │ --- ┆ --- │
|
1450
|
+
# # │ str ┆ str │
|
1451
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1452
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1453
|
+
# # │ Tell me what you want, what yo… ┆ Tell what want, what really… │
|
1454
|
+
# # │ Can you feel the love tonight ┆ Can feel the love tonight │
|
1455
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1480
1456
|
#
|
1481
1457
|
# @example
|
1482
1458
|
# df.with_columns(
|
@@ -1489,19 +1465,19 @@ module Polars
|
|
1489
1465
|
# )
|
1490
1466
|
# # =>
|
1491
1467
|
# # shape: (3, 2)
|
1492
|
-
# #
|
1493
|
-
# # │ lyrics
|
1494
|
-
# # │ ---
|
1495
|
-
# # │ str
|
1496
|
-
# #
|
1497
|
-
# # │ Everybody wants to rule the
|
1498
|
-
# # │ Tell me what you want, what
|
1499
|
-
# # │ Can you feel the love tonight
|
1500
|
-
# #
|
1468
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1469
|
+
# # │ lyrics ┆ confusing │
|
1470
|
+
# # │ --- ┆ --- │
|
1471
|
+
# # │ str ┆ str │
|
1472
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1473
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1474
|
+
# # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
|
1475
|
+
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1476
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1501
1477
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1502
|
-
patterns = Utils.
|
1503
|
-
replace_with = Utils.
|
1504
|
-
replace_with, str_as_lit: true,
|
1478
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1479
|
+
replace_with = Utils.parse_into_expression(
|
1480
|
+
replace_with, str_as_lit: true, list_as_series: true
|
1505
1481
|
)
|
1506
1482
|
Utils.wrap_expr(
|
1507
1483
|
_rbexpr.str_replace_many(
|
@@ -83,7 +83,7 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
|
86
|
+
ambiguous: "raise"
|
87
87
|
)
|
88
88
|
super
|
89
89
|
end
|
@@ -233,7 +233,7 @@ module Polars
|
|
233
233
|
# @return [Series]
|
234
234
|
#
|
235
235
|
# @example
|
236
|
-
# Polars::Series.new([1, nil, 2]).str.
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
237
237
|
# # =>
|
238
238
|
# # shape: (1,)
|
239
239
|
# # Series: '' [str]
|
@@ -242,16 +242,17 @@ module Polars
|
|
242
242
|
# # ]
|
243
243
|
#
|
244
244
|
# @example
|
245
|
-
# Polars::Series.new([1, nil, 2]).str.
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
246
246
|
# # =>
|
247
247
|
# # shape: (1,)
|
248
248
|
# # Series: '' [str]
|
249
249
|
# # [
|
250
250
|
# # null
|
251
251
|
# # ]
|
252
|
-
def
|
252
|
+
def join(delimiter = "-", ignore_nulls: true)
|
253
253
|
super
|
254
254
|
end
|
255
|
+
alias_method :concat, :join
|
255
256
|
|
256
257
|
# Check if strings in Series contain a substring that matches a regex.
|
257
258
|
#
|
data/lib/polars/testing.rb
CHANGED
@@ -271,9 +271,9 @@ module Polars
|
|
271
271
|
|
272
272
|
def _assert_correct_input_type(left, right)
|
273
273
|
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
-
|
274
|
+
false
|
275
275
|
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
-
|
276
|
+
true
|
277
277
|
else
|
278
278
|
raise_assertion_error(
|
279
279
|
"inputs",
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_as_duration_string(td)
|
4
|
+
if td.nil? || td.is_a?(::String)
|
5
|
+
return td
|
6
|
+
end
|
7
|
+
_timedelta_to_duration_string(td)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self._timedelta_to_pl_duration(td)
|
11
|
+
td
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.negate_duration_string(duration)
|
15
|
+
if duration.start_with?("-")
|
16
|
+
duration[1..]
|
17
|
+
else
|
18
|
+
"-#{duration}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.date_to_int(d)
|
23
|
+
dt = d.to_datetime.to_time
|
24
|
+
dt.to_i / SECONDS_PER_DAY
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.datetime_to_int(dt, time_unit)
|
28
|
+
dt = dt.to_datetime.to_time
|
29
|
+
if time_unit == "ns"
|
30
|
+
nanos = dt.nsec
|
31
|
+
dt.to_i * NS_PER_SECOND + nanos
|
32
|
+
elsif time_unit == "us"
|
33
|
+
micros = dt.usec
|
34
|
+
dt.to_i * US_PER_SECOND + micros
|
35
|
+
elsif time_unit == "ms"
|
36
|
+
millis = dt.usec / 1000
|
37
|
+
dt.to_i * MS_PER_SECOND + millis
|
38
|
+
elsif time_unit.nil?
|
39
|
+
# Ruby has ns precision
|
40
|
+
nanos = dt.nsec
|
41
|
+
dt.to_i * NS_PER_SECOND + nanos
|
42
|
+
else
|
43
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self._to_ruby_date(value)
|
48
|
+
# days to seconds
|
49
|
+
# important to create from utc. Not doing this leads
|
50
|
+
# to inconsistencies dependent on the timezone you are in.
|
51
|
+
::Time.at(value * 86400).utc.to_date
|
52
|
+
end
|
53
|
+
|
54
|
+
def self._to_ruby_time(value)
|
55
|
+
if value == 0
|
56
|
+
::Time.utc(2000, 1, 1)
|
57
|
+
else
|
58
|
+
seconds, nanoseconds = value.divmod(1_000_000_000)
|
59
|
+
minutes, seconds = seconds.divmod(60)
|
60
|
+
hours, minutes = minutes.divmod(60)
|
61
|
+
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
66
|
+
if time_zone.nil? || time_zone == "" || time_zone == "UTC"
|
67
|
+
if time_unit == "ns"
|
68
|
+
::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
69
|
+
elsif time_unit == "us"
|
70
|
+
::Time.at(value / 1000000, value % 1000000, :usec).utc
|
71
|
+
elsif time_unit == "ms"
|
72
|
+
::Time.at(value / 1000, value % 1000, :millisecond).utc
|
73
|
+
else
|
74
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise Todo
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self._to_ruby_duration(value, time_unit = "ns")
|
82
|
+
if time_unit == "ns"
|
83
|
+
value / 1e9
|
84
|
+
elsif time_unit == "us"
|
85
|
+
value / 1e6
|
86
|
+
elsif time_unit == "ms"
|
87
|
+
value / 1e3
|
88
|
+
else
|
89
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self._to_ruby_decimal(digits, scale)
|
94
|
+
BigDecimal("#{digits}e#{scale}")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_into_expression(
|
4
|
+
input,
|
5
|
+
str_as_lit: false,
|
6
|
+
list_as_series: false,
|
7
|
+
structify: false,
|
8
|
+
dtype: nil
|
9
|
+
)
|
10
|
+
if input.is_a?(Expr)
|
11
|
+
expr = input
|
12
|
+
if structify
|
13
|
+
expr = _structify_expression(expr)
|
14
|
+
end
|
15
|
+
elsif (input.is_a?(::String) || input.is_a?(Symbol)) && !str_as_lit
|
16
|
+
expr = F.col(input)
|
17
|
+
elsif input.is_a?(::Array) && list_as_series
|
18
|
+
expr = F.lit(Series.new(input), dtype: dtype)
|
19
|
+
else
|
20
|
+
expr = F.lit(input, dtype: dtype)
|
21
|
+
end
|
22
|
+
|
23
|
+
expr._rbexpr
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse_into_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
27
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
28
|
+
if named_inputs.any?
|
29
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
30
|
+
exprs.concat(named_exprs)
|
31
|
+
end
|
32
|
+
|
33
|
+
exprs
|
34
|
+
end
|
35
|
+
|
36
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
37
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
38
|
+
inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def self._parse_inputs_as_iterable(inputs)
|
42
|
+
if inputs.empty?
|
43
|
+
return []
|
44
|
+
end
|
45
|
+
|
46
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
47
|
+
return inputs[0]
|
48
|
+
end
|
49
|
+
|
50
|
+
inputs
|
51
|
+
end
|
52
|
+
|
53
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
54
|
+
named_inputs.map do |name, input|
|
55
|
+
parse_into_expression(input, structify: structify)._alias(name.to_s)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
60
|
+
all_predicates = _parse_positional_inputs(predicates)
|
61
|
+
|
62
|
+
if constraints.any?
|
63
|
+
constraint_predicates = _parse_constraints(constraints)
|
64
|
+
all_predicates.concat(constraint_predicates)
|
65
|
+
end
|
66
|
+
|
67
|
+
_combine_predicates(all_predicates)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self._parse_constraints(constraints)
|
71
|
+
constraints.map do |name, value|
|
72
|
+
Polars.col(name).eq(value)._rbexpr
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self._combine_predicates(predicates)
|
77
|
+
if !predicates.any?
|
78
|
+
msg = "at least one predicate or constraint must be provided"
|
79
|
+
raise TypeError, msg
|
80
|
+
end
|
81
|
+
|
82
|
+
if predicates.length == 1
|
83
|
+
return predicates[0]
|
84
|
+
end
|
85
|
+
|
86
|
+
Plr.all_horizontal(predicates)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|