polars-df 0.6.0-x86_64-linux → 0.7.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3223 -4194
- data/README.md +8 -7
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -82,9 +82,13 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil,
|
87
|
+
ambiguous: "raise"
|
86
88
|
)
|
87
89
|
_validate_format_argument(format)
|
90
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
91
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
88
92
|
Utils.wrap_expr(
|
89
93
|
self._rbexpr.str_to_datetime(
|
90
94
|
format,
|
@@ -92,7 +96,8 @@ module Polars
|
|
92
96
|
time_zone,
|
93
97
|
strict,
|
94
98
|
exact,
|
95
|
-
cache
|
99
|
+
cache,
|
100
|
+
ambiguous._rbexpr
|
96
101
|
)
|
97
102
|
)
|
98
103
|
end
|
@@ -235,7 +240,7 @@ module Polars
|
|
235
240
|
# # │ 東京 ┆ 6 ┆ 2 │
|
236
241
|
# # └──────┴────────┴────────┘
|
237
242
|
def lengths
|
238
|
-
Utils.wrap_expr(_rbexpr.
|
243
|
+
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
239
244
|
end
|
240
245
|
|
241
246
|
# Get length of the strings as `:u32` (as number of chars).
|
@@ -267,13 +272,15 @@ module Polars
|
|
267
272
|
# # │ 東京 ┆ 6 ┆ 2 │
|
268
273
|
# # └──────┴────────┴────────┘
|
269
274
|
def n_chars
|
270
|
-
Utils.wrap_expr(_rbexpr.
|
275
|
+
Utils.wrap_expr(_rbexpr.str_len_chars)
|
271
276
|
end
|
272
277
|
|
273
278
|
# Vertically concat the values in the Series to a single string value.
|
274
279
|
#
|
275
280
|
# @param delimiter [String]
|
276
281
|
# The delimiter to insert between consecutive string values.
|
282
|
+
# @param ignore_nulls [Boolean]
|
283
|
+
# Ignore null values (default).
|
277
284
|
#
|
278
285
|
# @return [Expr]
|
279
286
|
#
|
@@ -282,15 +289,28 @@ module Polars
|
|
282
289
|
# df.select(Polars.col("foo").str.concat("-"))
|
283
290
|
# # =>
|
284
291
|
# # shape: (1, 1)
|
285
|
-
# #
|
286
|
-
# # │ foo
|
287
|
-
# # │ ---
|
288
|
-
# # │ str
|
289
|
-
# #
|
290
|
-
# # │ 1-
|
291
|
-
# #
|
292
|
-
|
293
|
-
|
292
|
+
# # ┌─────┐
|
293
|
+
# # │ foo │
|
294
|
+
# # │ --- │
|
295
|
+
# # │ str │
|
296
|
+
# # ╞═════╡
|
297
|
+
# # │ 1-2 │
|
298
|
+
# # └─────┘
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
302
|
+
# df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 1)
|
305
|
+
# # ┌──────┐
|
306
|
+
# # │ foo │
|
307
|
+
# # │ --- │
|
308
|
+
# # │ str │
|
309
|
+
# # ╞══════╡
|
310
|
+
# # │ null │
|
311
|
+
# # └──────┘
|
312
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
313
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
|
294
314
|
end
|
295
315
|
|
296
316
|
# Transform to uppercase variant.
|
@@ -337,7 +357,7 @@ module Polars
|
|
337
357
|
|
338
358
|
# Remove leading and trailing whitespace.
|
339
359
|
#
|
340
|
-
# @param
|
360
|
+
# @param characters [String, nil]
|
341
361
|
# An optional single character that should be trimmed.
|
342
362
|
#
|
343
363
|
# @return [Expr]
|
@@ -356,16 +376,15 @@ module Polars
|
|
356
376
|
# # │ trail │
|
357
377
|
# # │ both │
|
358
378
|
# # └───────┘
|
359
|
-
def
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
379
|
+
def strip_chars(characters = nil)
|
380
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
381
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
364
382
|
end
|
383
|
+
alias_method :strip, :strip_chars
|
365
384
|
|
366
385
|
# Remove leading whitespace.
|
367
386
|
#
|
368
|
-
# @param
|
387
|
+
# @param characters [String, nil]
|
369
388
|
# An optional single character that should be trimmed.
|
370
389
|
#
|
371
390
|
# @return [Expr]
|
@@ -384,16 +403,15 @@ module Polars
|
|
384
403
|
# # │ trail │
|
385
404
|
# # │ both │
|
386
405
|
# # └────────┘
|
387
|
-
def
|
388
|
-
|
389
|
-
|
390
|
-
end
|
391
|
-
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
406
|
+
def strip_chars_start(characters = nil)
|
407
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
408
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
392
409
|
end
|
410
|
+
alias_method :lstrip, :strip_chars_start
|
393
411
|
|
394
412
|
# Remove trailing whitespace.
|
395
413
|
#
|
396
|
-
# @param
|
414
|
+
# @param characters [String, nil]
|
397
415
|
# An optional single character that should be trimmed.
|
398
416
|
#
|
399
417
|
# @return [Expr]
|
@@ -412,12 +430,11 @@ module Polars
|
|
412
430
|
# # │ trail │
|
413
431
|
# # │ both │
|
414
432
|
# # └───────┘
|
415
|
-
def
|
416
|
-
|
417
|
-
|
418
|
-
end
|
419
|
-
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
433
|
+
def strip_chars_end(characters = nil)
|
434
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
435
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
420
436
|
end
|
437
|
+
alias_method :rstrip, :strip_chars_end
|
421
438
|
|
422
439
|
# Fills the string with zeroes.
|
423
440
|
#
|
@@ -461,13 +478,13 @@ module Polars
|
|
461
478
|
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
462
479
|
end
|
463
480
|
|
464
|
-
# Return the string left justified in a string of length `
|
481
|
+
# Return the string left justified in a string of length `length`.
|
465
482
|
#
|
466
483
|
# Padding is done using the specified `fillchar`.
|
467
|
-
# The original string is returned if `
|
484
|
+
# The original string is returned if `length` is less than or equal to
|
468
485
|
# `s.length`.
|
469
486
|
#
|
470
|
-
# @param
|
487
|
+
# @param length [Integer]
|
471
488
|
# Justify left to this length.
|
472
489
|
# @param fillchar [String]
|
473
490
|
# Fill with this ASCII character.
|
@@ -489,17 +506,18 @@ module Polars
|
|
489
506
|
# # │ null │
|
490
507
|
# # │ hippopotamus │
|
491
508
|
# # └──────────────┘
|
492
|
-
def ljust(
|
493
|
-
Utils.wrap_expr(_rbexpr.
|
509
|
+
def ljust(length, fillchar = " ")
|
510
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
|
494
511
|
end
|
512
|
+
alias_method :pad_end, :ljust
|
495
513
|
|
496
|
-
# Return the string right justified in a string of length `
|
514
|
+
# Return the string right justified in a string of length `length`.
|
497
515
|
#
|
498
516
|
# Padding is done using the specified `fillchar`.
|
499
|
-
# The original string is returned if `
|
517
|
+
# The original string is returned if `length` is less than or equal to
|
500
518
|
# `s.length`.
|
501
519
|
#
|
502
|
-
# @param
|
520
|
+
# @param length [Integer]
|
503
521
|
# Justify right to this length.
|
504
522
|
# @param fillchar [String]
|
505
523
|
# Fill with this ASCII character.
|
@@ -521,9 +539,10 @@ module Polars
|
|
521
539
|
# # │ null │
|
522
540
|
# # │ hippopotamus │
|
523
541
|
# # └──────────────┘
|
524
|
-
def rjust(
|
525
|
-
Utils.wrap_expr(_rbexpr.
|
542
|
+
def rjust(length, fillchar = " ")
|
543
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
|
526
544
|
end
|
545
|
+
alias_method :pad_start, :rjust
|
527
546
|
|
528
547
|
# Check if string contains a substring that matches a regex.
|
529
548
|
#
|
@@ -864,9 +883,11 @@ module Polars
|
|
864
883
|
# # │ 5 │
|
865
884
|
# # │ 6 │
|
866
885
|
# # └──────────────┘
|
867
|
-
def
|
868
|
-
Utils.
|
886
|
+
def count_matches(pattern, literal: false)
|
887
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
888
|
+
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
869
889
|
end
|
890
|
+
alias_method :count_match, :count_matches
|
870
891
|
|
871
892
|
# Split the string by a substring.
|
872
893
|
#
|
@@ -892,6 +913,7 @@ module Polars
|
|
892
913
|
# # │ ["foo", "bar", "baz"] │
|
893
914
|
# # └───────────────────────┘
|
894
915
|
def split(by, inclusive: false)
|
916
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
895
917
|
if inclusive
|
896
918
|
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
897
919
|
else
|
@@ -934,6 +956,7 @@ module Polars
|
|
934
956
|
# # │ {"d","4"} │
|
935
957
|
# # └─────────────┘
|
936
958
|
def split_exact(by, n, inclusive: false)
|
959
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
937
960
|
if inclusive
|
938
961
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
939
962
|
else
|
@@ -970,6 +993,7 @@ module Polars
|
|
970
993
|
# # │ {"foo","bar baz"} │
|
971
994
|
# # └───────────────────┘
|
972
995
|
def splitn(by, n)
|
996
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
973
997
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
974
998
|
end
|
975
999
|
|
@@ -1091,6 +1115,52 @@ module Polars
|
|
1091
1115
|
Utils.wrap_expr(_rbexpr.str_explode)
|
1092
1116
|
end
|
1093
1117
|
|
1118
|
+
# Convert an Utf8 column into an Int64 column with base radix.
|
1119
|
+
#
|
1120
|
+
# @param base [Integer]
|
1121
|
+
# Positive integer which is the base of the string we are parsing.
|
1122
|
+
# Default: 10.
|
1123
|
+
# @param strict [Boolean]
|
1124
|
+
# Bool, default=true will raise any ParseError or overflow as ComputeError.
|
1125
|
+
# false silently convert to Null.
|
1126
|
+
#
|
1127
|
+
# @return [Expr]
|
1128
|
+
#
|
1129
|
+
# @example
|
1130
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1131
|
+
# df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
|
1132
|
+
# # =>
|
1133
|
+
# # shape: (4, 2)
|
1134
|
+
# # ┌─────────┬────────┐
|
1135
|
+
# # │ bin ┆ parsed │
|
1136
|
+
# # │ --- ┆ --- │
|
1137
|
+
# # │ str ┆ i64 │
|
1138
|
+
# # ╞═════════╪════════╡
|
1139
|
+
# # │ 110 ┆ 6 │
|
1140
|
+
# # │ 101 ┆ 5 │
|
1141
|
+
# # │ 010 ┆ 2 │
|
1142
|
+
# # │ invalid ┆ null │
|
1143
|
+
# # └─────────┴────────┘
|
1144
|
+
#
|
1145
|
+
# @example
|
1146
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1147
|
+
# df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
|
1148
|
+
# # =>
|
1149
|
+
# # shape: (4, 2)
|
1150
|
+
# # ┌──────┬────────┐
|
1151
|
+
# # │ hex ┆ parsed │
|
1152
|
+
# # │ --- ┆ --- │
|
1153
|
+
# # │ str ┆ i64 │
|
1154
|
+
# # ╞══════╪════════╡
|
1155
|
+
# # │ fa1e ┆ 64030 │
|
1156
|
+
# # │ ff00 ┆ 65280 │
|
1157
|
+
# # │ cafe ┆ 51966 │
|
1158
|
+
# # │ null ┆ null │
|
1159
|
+
# # └──────┴────────┘
|
1160
|
+
def to_integer(base: 10, strict: true)
|
1161
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1162
|
+
end
|
1163
|
+
|
1094
1164
|
# Parse integers with base radix from strings.
|
1095
1165
|
#
|
1096
1166
|
# By default base 2. ParseError/Overflows become Nulls.
|
@@ -1119,24 +1189,8 @@ module Polars
|
|
1119
1189
|
# # │ 2 │
|
1120
1190
|
# # │ null │
|
1121
1191
|
# # └──────┘
|
1122
|
-
#
|
1123
|
-
# @example
|
1124
|
-
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1125
|
-
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1126
|
-
# # =>
|
1127
|
-
# # shape: (4, 1)
|
1128
|
-
# # ┌───────┐
|
1129
|
-
# # │ hex │
|
1130
|
-
# # │ --- │
|
1131
|
-
# # │ i32 │
|
1132
|
-
# # ╞═══════╡
|
1133
|
-
# # │ 64030 │
|
1134
|
-
# # │ 65280 │
|
1135
|
-
# # │ 51966 │
|
1136
|
-
# # │ null │
|
1137
|
-
# # └───────┘
|
1138
1192
|
def parse_int(radix = 2, strict: true)
|
1139
|
-
|
1193
|
+
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1140
1194
|
end
|
1141
1195
|
|
1142
1196
|
private
|
@@ -82,7 +82,8 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil
|
86
87
|
)
|
87
88
|
super
|
88
89
|
end
|
@@ -232,9 +233,23 @@ module Polars
|
|
232
233
|
# @return [Series]
|
233
234
|
#
|
234
235
|
# @example
|
235
|
-
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
236
|
-
# # =>
|
237
|
-
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
237
|
+
# # =>
|
238
|
+
# # shape: (1,)
|
239
|
+
# # Series: '' [str]
|
240
|
+
# # [
|
241
|
+
# # "1-2"
|
242
|
+
# # ]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
|
246
|
+
# # =>
|
247
|
+
# # shape: (1,)
|
248
|
+
# # Series: '' [str]
|
249
|
+
# # [
|
250
|
+
# # null
|
251
|
+
# # ]
|
252
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
238
253
|
super
|
239
254
|
end
|
240
255
|
|
data/lib/polars/utils.rb
CHANGED
@@ -308,5 +308,17 @@ module Polars
|
|
308
308
|
|
309
309
|
expr._rbexpr
|
310
310
|
end
|
311
|
+
|
312
|
+
USE_EARLIEST_TO_AMBIGUOUS = {
|
313
|
+
true => "earliest",
|
314
|
+
false => "latest"
|
315
|
+
}
|
316
|
+
|
317
|
+
def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
318
|
+
unless use_earliest.nil?
|
319
|
+
ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
|
320
|
+
end
|
321
|
+
ambiguous
|
322
|
+
end
|
311
323
|
end
|
312
324
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -19,6 +19,7 @@ require_relative "polars/binary_expr"
|
|
19
19
|
require_relative "polars/binary_name_space"
|
20
20
|
require_relative "polars/cat_expr"
|
21
21
|
require_relative "polars/cat_name_space"
|
22
|
+
require_relative "polars/config"
|
22
23
|
require_relative "polars/convert"
|
23
24
|
require_relative "polars/plot"
|
24
25
|
require_relative "polars/data_frame"
|
@@ -37,9 +38,11 @@ require_relative "polars/lazy_group_by"
|
|
37
38
|
require_relative "polars/list_expr"
|
38
39
|
require_relative "polars/list_name_space"
|
39
40
|
require_relative "polars/meta_expr"
|
41
|
+
require_relative "polars/name_expr"
|
40
42
|
require_relative "polars/rolling_group_by"
|
41
43
|
require_relative "polars/series"
|
42
44
|
require_relative "polars/slice"
|
45
|
+
require_relative "polars/sql_context"
|
43
46
|
require_relative "polars/string_expr"
|
44
47
|
require_relative "polars/string_name_space"
|
45
48
|
require_relative "polars/struct_expr"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -35,6 +35,7 @@ files:
|
|
35
35
|
- lib/polars/binary_name_space.rb
|
36
36
|
- lib/polars/cat_expr.rb
|
37
37
|
- lib/polars/cat_name_space.rb
|
38
|
+
- lib/polars/config.rb
|
38
39
|
- lib/polars/convert.rb
|
39
40
|
- lib/polars/data_frame.rb
|
40
41
|
- lib/polars/data_types.rb
|
@@ -53,10 +54,12 @@ files:
|
|
53
54
|
- lib/polars/list_expr.rb
|
54
55
|
- lib/polars/list_name_space.rb
|
55
56
|
- lib/polars/meta_expr.rb
|
57
|
+
- lib/polars/name_expr.rb
|
56
58
|
- lib/polars/plot.rb
|
57
59
|
- lib/polars/rolling_group_by.rb
|
58
60
|
- lib/polars/series.rb
|
59
61
|
- lib/polars/slice.rb
|
62
|
+
- lib/polars/sql_context.rb
|
60
63
|
- lib/polars/string_expr.rb
|
61
64
|
- lib/polars/string_name_space.rb
|
62
65
|
- lib/polars/struct_expr.rb
|