polars-df 0.11.0-x86_64-linux → 0.12.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/LICENSE-THIRD-PARTY.txt +1065 -878
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -39,7 +39,7 @@ module Polars
|
|
39
39
|
# # ]
|
40
40
|
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
41
|
_validate_format_argument(format)
|
42
|
-
Utils.wrap_expr(
|
42
|
+
Utils.wrap_expr(_rbexpr.str_to_date(format, strict, exact, cache))
|
43
43
|
end
|
44
44
|
|
45
45
|
# Convert a Utf8 column into a Datetime column.
|
@@ -83,14 +83,14 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
use_earliest: nil,
|
87
86
|
ambiguous: "raise"
|
88
87
|
)
|
89
88
|
_validate_format_argument(format)
|
90
|
-
ambiguous
|
91
|
-
|
89
|
+
unless ambiguous.is_a?(Expr)
|
90
|
+
ambiguous = Polars.lit(ambiguous)
|
91
|
+
end
|
92
92
|
Utils.wrap_expr(
|
93
|
-
|
93
|
+
_rbexpr.str_to_datetime(
|
94
94
|
format,
|
95
95
|
time_unit,
|
96
96
|
time_zone,
|
@@ -331,7 +331,7 @@ module Polars
|
|
331
331
|
#
|
332
332
|
# @example
|
333
333
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
334
|
-
# df.select(Polars.col("foo").str.
|
334
|
+
# df.select(Polars.col("foo").str.join("-"))
|
335
335
|
# # =>
|
336
336
|
# # shape: (1, 1)
|
337
337
|
# # ┌─────┐
|
@@ -344,7 +344,7 @@ module Polars
|
|
344
344
|
#
|
345
345
|
# @example
|
346
346
|
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
347
|
-
# df.select(Polars.col("foo").str.
|
347
|
+
# df.select(Polars.col("foo").str.join("-", ignore_nulls: false))
|
348
348
|
# # =>
|
349
349
|
# # shape: (1, 1)
|
350
350
|
# # ┌──────┐
|
@@ -354,9 +354,10 @@ module Polars
|
|
354
354
|
# # ╞══════╡
|
355
355
|
# # │ null │
|
356
356
|
# # └──────┘
|
357
|
-
def
|
358
|
-
Utils.wrap_expr(_rbexpr.
|
357
|
+
def join(delimiter = "-", ignore_nulls: true)
|
358
|
+
Utils.wrap_expr(_rbexpr.str_join(delimiter, ignore_nulls))
|
359
359
|
end
|
360
|
+
alias_method :concat, :join
|
360
361
|
|
361
362
|
# Transform to uppercase variant.
|
362
363
|
#
|
@@ -446,7 +447,7 @@ module Polars
|
|
446
447
|
# # │ both │
|
447
448
|
# # └───────┘
|
448
449
|
def strip_chars(characters = nil)
|
449
|
-
characters = Utils.
|
450
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
450
451
|
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
451
452
|
end
|
452
453
|
alias_method :strip, :strip_chars
|
@@ -473,7 +474,7 @@ module Polars
|
|
473
474
|
# # │ both │
|
474
475
|
# # └────────┘
|
475
476
|
def strip_chars_start(characters = nil)
|
476
|
-
characters = Utils.
|
477
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
477
478
|
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
478
479
|
end
|
479
480
|
alias_method :lstrip, :strip_chars_start
|
@@ -500,7 +501,7 @@ module Polars
|
|
500
501
|
# # │ both │
|
501
502
|
# # └───────┘
|
502
503
|
def strip_chars_end(characters = nil)
|
503
|
-
characters = Utils.
|
504
|
+
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
504
505
|
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
505
506
|
end
|
506
507
|
alias_method :rstrip, :strip_chars_end
|
@@ -530,7 +531,7 @@ module Polars
|
|
530
531
|
# # │ bar ┆ bar │
|
531
532
|
# # └───────────┴──────────┘
|
532
533
|
def strip_prefix(prefix)
|
533
|
-
prefix = Utils.
|
534
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
534
535
|
Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
|
535
536
|
end
|
536
537
|
|
@@ -560,7 +561,7 @@ module Polars
|
|
560
561
|
# # │ bar ┆ │
|
561
562
|
# # └───────────┴──────────┘
|
562
563
|
def strip_suffix(suffix)
|
563
|
-
suffix = Utils.
|
564
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
564
565
|
Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
|
565
566
|
end
|
566
567
|
|
@@ -654,7 +655,7 @@ module Polars
|
|
654
655
|
# # │ null ┆ null │
|
655
656
|
# # └────────┴────────┘
|
656
657
|
def zfill(length)
|
657
|
-
length = Utils.
|
658
|
+
length = Utils.parse_into_expression(length)
|
658
659
|
Utils.wrap_expr(_rbexpr.str_zfill(length))
|
659
660
|
end
|
660
661
|
|
@@ -689,7 +690,7 @@ module Polars
|
|
689
690
|
# # │ null ┆ null ┆ null │
|
690
691
|
# # └─────────────┴───────┴─────────┘
|
691
692
|
def contains(pattern, literal: false, strict: true)
|
692
|
-
pattern = Utils.
|
693
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
693
694
|
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
|
694
695
|
end
|
695
696
|
|
@@ -729,7 +730,7 @@ module Polars
|
|
729
730
|
# # │ mango │
|
730
731
|
# # └────────┘
|
731
732
|
def ends_with(sub)
|
732
|
-
sub = Utils.
|
733
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
733
734
|
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
|
734
735
|
end
|
735
736
|
|
@@ -769,7 +770,7 @@ module Polars
|
|
769
770
|
# # │ apple │
|
770
771
|
# # └────────┘
|
771
772
|
def starts_with(sub)
|
772
|
-
sub = Utils.
|
773
|
+
sub = Utils.parse_into_expression(sub, str_as_lit: true)
|
773
774
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
774
775
|
end
|
775
776
|
|
@@ -840,7 +841,7 @@ module Polars
|
|
840
841
|
# # │ true │
|
841
842
|
# # └──────────┘
|
842
843
|
def json_path_match(json_path)
|
843
|
-
json_path = Utils.
|
844
|
+
json_path = Utils.parse_into_expression(json_path, str_as_lit: true)
|
844
845
|
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
845
846
|
end
|
846
847
|
|
@@ -940,7 +941,7 @@ module Polars
|
|
940
941
|
# # │ 678 │
|
941
942
|
# # └─────┘
|
942
943
|
def extract(pattern, group_index: 1)
|
943
|
-
pattern = Utils.
|
944
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
944
945
|
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
945
946
|
end
|
946
947
|
|
@@ -972,8 +973,8 @@ module Polars
|
|
972
973
|
# # │ ["678", "910"] │
|
973
974
|
# # └────────────────┘
|
974
975
|
def extract_all(pattern)
|
975
|
-
pattern = Utils.
|
976
|
-
Utils.wrap_expr(_rbexpr.str_extract_all(pattern
|
976
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
977
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
|
977
978
|
end
|
978
979
|
|
979
980
|
# Extract all capture groups for the given regex pattern.
|
@@ -1057,7 +1058,7 @@ module Polars
|
|
1057
1058
|
# # │ 6 │
|
1058
1059
|
# # └──────────────┘
|
1059
1060
|
def count_matches(pattern, literal: false)
|
1060
|
-
pattern = Utils.
|
1061
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1061
1062
|
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
1062
1063
|
end
|
1063
1064
|
alias_method :count_match, :count_matches
|
@@ -1086,12 +1087,11 @@ module Polars
|
|
1086
1087
|
# # │ ["foo", "bar", "baz"] │
|
1087
1088
|
# # └───────────────────────┘
|
1088
1089
|
def split(by, inclusive: false)
|
1089
|
-
by = Utils.
|
1090
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1090
1091
|
if inclusive
|
1091
|
-
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1092
|
-
else
|
1093
|
-
Utils.wrap_expr(_rbexpr.str_split(by))
|
1092
|
+
return Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
1094
1093
|
end
|
1094
|
+
Utils.wrap_expr(_rbexpr.str_split(by))
|
1095
1095
|
end
|
1096
1096
|
|
1097
1097
|
# Split the string by a substring using `n` splits.
|
@@ -1129,7 +1129,7 @@ module Polars
|
|
1129
1129
|
# # │ {"d","4"} │
|
1130
1130
|
# # └─────────────┘
|
1131
1131
|
def split_exact(by, n, inclusive: false)
|
1132
|
-
by = Utils.
|
1132
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1133
1133
|
if inclusive
|
1134
1134
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
1135
1135
|
else
|
@@ -1166,7 +1166,7 @@ module Polars
|
|
1166
1166
|
# # │ {"foo","bar baz"} │
|
1167
1167
|
# # └───────────────────┘
|
1168
1168
|
def splitn(by, n)
|
1169
|
-
by = Utils.
|
1169
|
+
by = Utils.parse_into_expression(by, str_as_lit: true)
|
1170
1170
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
1171
1171
|
end
|
1172
1172
|
|
@@ -1197,9 +1197,9 @@ module Polars
|
|
1197
1197
|
# # │ 2 ┆ abc456 │
|
1198
1198
|
# # └─────┴────────┘
|
1199
1199
|
def replace(pattern, value, literal: false, n: 1)
|
1200
|
-
pattern = Utils.
|
1201
|
-
value = Utils.
|
1202
|
-
Utils.wrap_expr(_rbexpr.str_replace_n(pattern
|
1200
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1201
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1202
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern, value, literal, n))
|
1203
1203
|
end
|
1204
1204
|
|
1205
1205
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -1227,9 +1227,9 @@ module Polars
|
|
1227
1227
|
# # │ 2 ┆ 123-123 │
|
1228
1228
|
# # └─────┴─────────┘
|
1229
1229
|
def replace_all(pattern, value, literal: false)
|
1230
|
-
pattern = Utils.
|
1231
|
-
value = Utils.
|
1232
|
-
Utils.wrap_expr(_rbexpr.str_replace_all(pattern
|
1230
|
+
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
1231
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
1232
|
+
Utils.wrap_expr(_rbexpr.str_replace_all(pattern, value, literal))
|
1233
1233
|
end
|
1234
1234
|
|
1235
1235
|
# Returns string values in reversed order.
|
@@ -1282,36 +1282,11 @@ module Polars
|
|
1282
1282
|
# # │ dragonfruit ┆ uit │
|
1283
1283
|
# # └─────────────┴──────────┘
|
1284
1284
|
def slice(offset, length = nil)
|
1285
|
-
offset = Utils.
|
1286
|
-
length = Utils.
|
1285
|
+
offset = Utils.parse_into_expression(offset)
|
1286
|
+
length = Utils.parse_into_expression(length)
|
1287
1287
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
1288
1288
|
end
|
1289
1289
|
|
1290
|
-
# Returns a column with a separate row for every string character.
|
1291
|
-
#
|
1292
|
-
# @return [Expr]
|
1293
|
-
#
|
1294
|
-
# @example
|
1295
|
-
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
1296
|
-
# df.select(Polars.col("a").str.explode)
|
1297
|
-
# # =>
|
1298
|
-
# # shape: (6, 1)
|
1299
|
-
# # ┌─────┐
|
1300
|
-
# # │ a │
|
1301
|
-
# # │ --- │
|
1302
|
-
# # │ str │
|
1303
|
-
# # ╞═════╡
|
1304
|
-
# # │ f │
|
1305
|
-
# # │ o │
|
1306
|
-
# # │ o │
|
1307
|
-
# # │ b │
|
1308
|
-
# # │ a │
|
1309
|
-
# # │ r │
|
1310
|
-
# # └─────┘
|
1311
|
-
def explode
|
1312
|
-
Utils.wrap_expr(_rbexpr.str_explode)
|
1313
|
-
end
|
1314
|
-
|
1315
1290
|
# Convert an Utf8 column into an Int64 column with base radix.
|
1316
1291
|
#
|
1317
1292
|
# @param base [Integer]
|
@@ -1355,7 +1330,7 @@ module Polars
|
|
1355
1330
|
# # │ null ┆ null │
|
1356
1331
|
# # └──────┴────────┘
|
1357
1332
|
def to_integer(base: 10, strict: true)
|
1358
|
-
base = Utils.
|
1333
|
+
base = Utils.parse_into_expression(base, str_as_lit: false)
|
1359
1334
|
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1360
1335
|
end
|
1361
1336
|
|
@@ -1429,7 +1404,7 @@ module Polars
|
|
1429
1404
|
# # │ Can you feel the love tonight ┆ true │
|
1430
1405
|
# # └─────────────────────────────────┴──────────────┘
|
1431
1406
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1432
|
-
patterns = Utils.
|
1407
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1433
1408
|
Utils.wrap_expr(
|
1434
1409
|
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1435
1410
|
)
|
@@ -1500,9 +1475,9 @@ module Polars
|
|
1500
1475
|
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1501
1476
|
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1502
1477
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1503
|
-
patterns = Utils.
|
1504
|
-
replace_with = Utils.
|
1505
|
-
replace_with, str_as_lit: true,
|
1478
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
|
1479
|
+
replace_with = Utils.parse_into_expression(
|
1480
|
+
replace_with, str_as_lit: true, list_as_series: true
|
1506
1481
|
)
|
1507
1482
|
Utils.wrap_expr(
|
1508
1483
|
_rbexpr.str_replace_many(
|
@@ -83,7 +83,7 @@ module Polars
|
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
85
|
cache: true,
|
86
|
-
|
86
|
+
ambiguous: "raise"
|
87
87
|
)
|
88
88
|
super
|
89
89
|
end
|
@@ -233,7 +233,7 @@ module Polars
|
|
233
233
|
# @return [Series]
|
234
234
|
#
|
235
235
|
# @example
|
236
|
-
# Polars::Series.new([1, nil, 2]).str.
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
237
237
|
# # =>
|
238
238
|
# # shape: (1,)
|
239
239
|
# # Series: '' [str]
|
@@ -242,16 +242,17 @@ module Polars
|
|
242
242
|
# # ]
|
243
243
|
#
|
244
244
|
# @example
|
245
|
-
# Polars::Series.new([1, nil, 2]).str.
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
246
246
|
# # =>
|
247
247
|
# # shape: (1,)
|
248
248
|
# # Series: '' [str]
|
249
249
|
# # [
|
250
250
|
# # null
|
251
251
|
# # ]
|
252
|
-
def
|
252
|
+
def join(delimiter = "-", ignore_nulls: true)
|
253
253
|
super
|
254
254
|
end
|
255
|
+
alias_method :concat, :join
|
255
256
|
|
256
257
|
# Check if strings in Series contain a substring that matches a regex.
|
257
258
|
#
|
data/lib/polars/testing.rb
CHANGED
@@ -271,9 +271,9 @@ module Polars
|
|
271
271
|
|
272
272
|
def _assert_correct_input_type(left, right)
|
273
273
|
if left.is_a?(DataFrame) && right.is_a?(DataFrame)
|
274
|
-
|
274
|
+
false
|
275
275
|
elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
|
276
|
-
|
276
|
+
true
|
277
277
|
else
|
278
278
|
raise_assertion_error(
|
279
279
|
"inputs",
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_as_duration_string(td)
|
4
|
+
if td.nil? || td.is_a?(::String)
|
5
|
+
return td
|
6
|
+
end
|
7
|
+
_timedelta_to_duration_string(td)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self._timedelta_to_pl_duration(td)
|
11
|
+
td
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.negate_duration_string(duration)
|
15
|
+
if duration.start_with?("-")
|
16
|
+
duration[1..]
|
17
|
+
else
|
18
|
+
"-#{duration}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.date_to_int(d)
|
23
|
+
dt = d.to_datetime.to_time
|
24
|
+
dt.to_i / SECONDS_PER_DAY
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.datetime_to_int(dt, time_unit)
|
28
|
+
dt = dt.to_datetime.to_time
|
29
|
+
if time_unit == "ns"
|
30
|
+
nanos = dt.nsec
|
31
|
+
dt.to_i * NS_PER_SECOND + nanos
|
32
|
+
elsif time_unit == "us"
|
33
|
+
micros = dt.usec
|
34
|
+
dt.to_i * US_PER_SECOND + micros
|
35
|
+
elsif time_unit == "ms"
|
36
|
+
millis = dt.usec / 1000
|
37
|
+
dt.to_i * MS_PER_SECOND + millis
|
38
|
+
elsif time_unit.nil?
|
39
|
+
# Ruby has ns precision
|
40
|
+
nanos = dt.nsec
|
41
|
+
dt.to_i * NS_PER_SECOND + nanos
|
42
|
+
else
|
43
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self._to_ruby_date(value)
|
48
|
+
# days to seconds
|
49
|
+
# important to create from utc. Not doing this leads
|
50
|
+
# to inconsistencies dependent on the timezone you are in.
|
51
|
+
::Time.at(value * 86400).utc.to_date
|
52
|
+
end
|
53
|
+
|
54
|
+
def self._to_ruby_time(value)
|
55
|
+
if value == 0
|
56
|
+
::Time.utc(2000, 1, 1)
|
57
|
+
else
|
58
|
+
seconds, nanoseconds = value.divmod(1_000_000_000)
|
59
|
+
minutes, seconds = seconds.divmod(60)
|
60
|
+
hours, minutes = minutes.divmod(60)
|
61
|
+
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
66
|
+
if time_zone.nil? || time_zone == "" || time_zone == "UTC"
|
67
|
+
if time_unit == "ns"
|
68
|
+
::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
69
|
+
elsif time_unit == "us"
|
70
|
+
::Time.at(value / 1000000, value % 1000000, :usec).utc
|
71
|
+
elsif time_unit == "ms"
|
72
|
+
::Time.at(value / 1000, value % 1000, :millisecond).utc
|
73
|
+
else
|
74
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise Todo
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self._to_ruby_duration(value, time_unit = "ns")
|
82
|
+
if time_unit == "ns"
|
83
|
+
value / 1e9
|
84
|
+
elsif time_unit == "us"
|
85
|
+
value / 1e6
|
86
|
+
elsif time_unit == "ms"
|
87
|
+
value / 1e3
|
88
|
+
else
|
89
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self._to_ruby_decimal(digits, scale)
|
94
|
+
BigDecimal("#{digits}e#{scale}")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.parse_into_expression(
|
4
|
+
input,
|
5
|
+
str_as_lit: false,
|
6
|
+
list_as_series: false,
|
7
|
+
structify: false,
|
8
|
+
dtype: nil
|
9
|
+
)
|
10
|
+
if input.is_a?(Expr)
|
11
|
+
expr = input
|
12
|
+
if structify
|
13
|
+
expr = _structify_expression(expr)
|
14
|
+
end
|
15
|
+
elsif (input.is_a?(::String) || input.is_a?(Symbol)) && !str_as_lit
|
16
|
+
expr = F.col(input)
|
17
|
+
elsif input.is_a?(::Array) && list_as_series
|
18
|
+
expr = F.lit(Series.new(input), dtype: dtype)
|
19
|
+
else
|
20
|
+
expr = F.lit(input, dtype: dtype)
|
21
|
+
end
|
22
|
+
|
23
|
+
expr._rbexpr
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse_into_list_of_expressions(*inputs, __structify: false, **named_inputs)
|
27
|
+
exprs = _parse_positional_inputs(inputs, structify: __structify)
|
28
|
+
if named_inputs.any?
|
29
|
+
named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
|
30
|
+
exprs.concat(named_exprs)
|
31
|
+
end
|
32
|
+
|
33
|
+
exprs
|
34
|
+
end
|
35
|
+
|
36
|
+
def self._parse_positional_inputs(inputs, structify: false)
|
37
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
38
|
+
inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def self._parse_inputs_as_iterable(inputs)
|
42
|
+
if inputs.empty?
|
43
|
+
return []
|
44
|
+
end
|
45
|
+
|
46
|
+
if inputs.length == 1 && inputs[0].is_a?(::Array)
|
47
|
+
return inputs[0]
|
48
|
+
end
|
49
|
+
|
50
|
+
inputs
|
51
|
+
end
|
52
|
+
|
53
|
+
def self._parse_named_inputs(named_inputs, structify: false)
|
54
|
+
named_inputs.map do |name, input|
|
55
|
+
parse_into_expression(input, structify: structify)._alias(name.to_s)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.parse_predicates_constraints_into_expression(*predicates, **constraints)
|
60
|
+
all_predicates = _parse_positional_inputs(predicates)
|
61
|
+
|
62
|
+
if constraints.any?
|
63
|
+
constraint_predicates = _parse_constraints(constraints)
|
64
|
+
all_predicates.concat(constraint_predicates)
|
65
|
+
end
|
66
|
+
|
67
|
+
_combine_predicates(all_predicates)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self._parse_constraints(constraints)
|
71
|
+
constraints.map do |name, value|
|
72
|
+
Polars.col(name).eq(value)._rbexpr
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self._combine_predicates(predicates)
|
77
|
+
if !predicates.any?
|
78
|
+
msg = "at least one predicate or constraint must be provided"
|
79
|
+
raise TypeError, msg
|
80
|
+
end
|
81
|
+
|
82
|
+
if predicates.length == 1
|
83
|
+
return predicates[0]
|
84
|
+
end
|
85
|
+
|
86
|
+
Plr.all_horizontal(predicates)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self._process_null_values(null_values)
|
4
|
+
if null_values.is_a?(Hash)
|
5
|
+
null_values.to_a
|
6
|
+
else
|
7
|
+
null_values
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self._is_iterable_of(val, eltype)
|
12
|
+
val.all? { |x| x.is_a?(eltype) }
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.is_bool_sequence(val)
|
16
|
+
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.is_int_sequence(val)
|
20
|
+
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.is_str_sequence(val, allow_str: false)
|
24
|
+
if allow_str == false && val.is_a?(::String)
|
25
|
+
false
|
26
|
+
else
|
27
|
+
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.arrlen(obj)
|
32
|
+
if obj.is_a?(Range)
|
33
|
+
# size only works for numeric ranges
|
34
|
+
obj.to_a.length
|
35
|
+
elsif obj.is_a?(::String)
|
36
|
+
nil
|
37
|
+
else
|
38
|
+
obj.length
|
39
|
+
end
|
40
|
+
rescue
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.normalize_filepath(path, check_not_directory: true)
|
45
|
+
path = File.expand_path(path)
|
46
|
+
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
47
|
+
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
48
|
+
end
|
49
|
+
path
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.scale_bytes(sz, to:)
|
53
|
+
scaling_factor = {
|
54
|
+
"b" => 1,
|
55
|
+
"k" => 1024,
|
56
|
+
"m" => 1024 ** 2,
|
57
|
+
"g" => 1024 ** 3,
|
58
|
+
"t" => 1024 ** 4
|
59
|
+
}[to[0]]
|
60
|
+
if scaling_factor > 1
|
61
|
+
sz / scaling_factor.to_f
|
62
|
+
else
|
63
|
+
sz
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.extend_bool(value, n_match, value_name, match_name)
|
68
|
+
values = bool?(value) ? [value] * n_match : value
|
69
|
+
if n_match != values.length
|
70
|
+
msg = "the length of `#{value_name}` (#{values.length}) does not match the length of `#{match_name}` (#{n_match})"
|
71
|
+
raise ValueError, msg
|
72
|
+
end
|
73
|
+
values
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.wrap_df(df)
|
4
|
+
DataFrame._from_rbdf(df)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.wrap_ldf(ldf)
|
8
|
+
LazyFrame._from_rbldf(ldf)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.wrap_s(s)
|
12
|
+
Series._from_rbseries(s)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.wrap_expr(rbexpr)
|
16
|
+
Expr._from_rbexpr(rbexpr)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|