polars-df 0.13.0-arm64-darwin → 0.15.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE-THIRD-PARTY.txt +24439 -12853
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
data/LICENSE.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
2
|
Copyright (c) 2022-2024 Andrew Kane
|
3
|
+
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -17,8 +17,7 @@ gem "polars-df"
|
|
17
17
|
This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
Polars.
|
21
|
-
.lazy
|
20
|
+
Polars.scan_csv("iris.csv")
|
22
21
|
.filter(Polars.col("sepal_length") > 5)
|
23
22
|
.group_by("species")
|
24
23
|
.agg(Polars.all.sum)
|
Binary file
|
Binary file
|
Binary file
|
@@ -26,7 +26,6 @@ module Polars
|
|
26
26
|
skip_rows_after_header: 0,
|
27
27
|
row_count_name: nil,
|
28
28
|
row_count_offset: 0,
|
29
|
-
sample_size: 1024,
|
30
29
|
eol_char: "\n",
|
31
30
|
new_columns: nil,
|
32
31
|
raise_if_empty: true,
|
@@ -79,7 +78,6 @@ module Polars
|
|
79
78
|
parse_dates,
|
80
79
|
skip_rows_after_header,
|
81
80
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
|
-
sample_size,
|
83
81
|
eol_char,
|
84
82
|
raise_if_empty,
|
85
83
|
truncate_ragged_lines,
|
data/lib/polars/binary_expr.rb
CHANGED
@@ -11,32 +11,112 @@ module Polars
|
|
11
11
|
|
12
12
|
# Check if binaries in Series contain a binary substring.
|
13
13
|
#
|
14
|
-
# @param
|
14
|
+
# @param literal [String]
|
15
15
|
# The binary substring to look for
|
16
16
|
#
|
17
17
|
# @return [Expr]
|
18
|
-
|
19
|
-
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# colors = Polars::DataFrame.new(
|
21
|
+
# {
|
22
|
+
# "name" => ["black", "yellow", "blue"],
|
23
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
24
|
+
# "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# colors.select(
|
28
|
+
# "name",
|
29
|
+
# Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
|
30
|
+
# Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
|
31
|
+
# )
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌────────┬───────────────────┬────────────────────┐
|
35
|
+
# # │ name ┆ contains_with_lit ┆ contains_with_expr │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ bool ┆ bool │
|
38
|
+
# # ╞════════╪═══════════════════╪════════════════════╡
|
39
|
+
# # │ black ┆ false ┆ true │
|
40
|
+
# # │ yellow ┆ true ┆ true │
|
41
|
+
# # │ blue ┆ true ┆ false │
|
42
|
+
# # └────────┴───────────────────┴────────────────────┘
|
43
|
+
def contains(literal)
|
44
|
+
literal = Utils.parse_into_expression(literal, str_as_lit: true)
|
45
|
+
Utils.wrap_expr(_rbexpr.binary_contains(literal))
|
20
46
|
end
|
21
47
|
|
22
48
|
# Check if string values end with a binary substring.
|
23
49
|
#
|
24
|
-
# @param
|
50
|
+
# @param suffix [String]
|
25
51
|
# Suffix substring.
|
26
52
|
#
|
27
53
|
# @return [Expr]
|
28
|
-
|
29
|
-
|
54
|
+
#
|
55
|
+
# @example
|
56
|
+
# colors = Polars::DataFrame.new(
|
57
|
+
# {
|
58
|
+
# "name" => ["black", "yellow", "blue"],
|
59
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
60
|
+
# "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
61
|
+
# }
|
62
|
+
# )
|
63
|
+
# colors.select(
|
64
|
+
# "name",
|
65
|
+
# Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
|
66
|
+
# Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
|
67
|
+
# )
|
68
|
+
# # =>
|
69
|
+
# # shape: (3, 3)
|
70
|
+
# # ┌────────┬───────────────┬────────────────┐
|
71
|
+
# # │ name ┆ ends_with_lit ┆ ends_with_expr │
|
72
|
+
# # │ --- ┆ --- ┆ --- │
|
73
|
+
# # │ str ┆ bool ┆ bool │
|
74
|
+
# # ╞════════╪═══════════════╪════════════════╡
|
75
|
+
# # │ black ┆ false ┆ true │
|
76
|
+
# # │ yellow ┆ false ┆ true │
|
77
|
+
# # │ blue ┆ true ┆ false │
|
78
|
+
# # └────────┴───────────────┴────────────────┘
|
79
|
+
def ends_with(suffix)
|
80
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
81
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
|
30
82
|
end
|
31
83
|
|
32
84
|
# Check if values start with a binary substring.
|
33
85
|
#
|
34
|
-
# @param
|
86
|
+
# @param prefix [String]
|
35
87
|
# Prefix substring.
|
36
88
|
#
|
37
89
|
# @return [Expr]
|
38
|
-
|
39
|
-
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# colors = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "name": ["black", "yellow", "blue"],
|
95
|
+
# "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
96
|
+
# "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
97
|
+
# }
|
98
|
+
# )
|
99
|
+
# colors.select(
|
100
|
+
# "name",
|
101
|
+
# Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
|
102
|
+
# Polars.col("code")
|
103
|
+
# .bin.starts_with(Polars.col("prefix"))
|
104
|
+
# .alias("starts_with_expr")
|
105
|
+
# )
|
106
|
+
# # =>
|
107
|
+
# # shape: (3, 3)
|
108
|
+
# # ┌────────┬─────────────────┬──────────────────┐
|
109
|
+
# # │ name ┆ starts_with_lit ┆ starts_with_expr │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ str ┆ bool ┆ bool │
|
112
|
+
# # ╞════════╪═════════════════╪══════════════════╡
|
113
|
+
# # │ black ┆ false ┆ true │
|
114
|
+
# # │ yellow ┆ true ┆ false │
|
115
|
+
# # │ blue ┆ false ┆ true │
|
116
|
+
# # └────────┴─────────────────┴──────────────────┘
|
117
|
+
def starts_with(prefix)
|
118
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
119
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
|
40
120
|
end
|
41
121
|
|
42
122
|
# Decode a value using the provided encoding.
|
@@ -48,6 +128,28 @@ module Polars
|
|
48
128
|
# otherwise mask out with a null value.
|
49
129
|
#
|
50
130
|
# @return [Expr]
|
131
|
+
#
|
132
|
+
# @example
|
133
|
+
# colors = Polars::DataFrame.new(
|
134
|
+
# {
|
135
|
+
# "name" => ["black", "yellow", "blue"],
|
136
|
+
# "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
|
137
|
+
# }
|
138
|
+
# )
|
139
|
+
# colors.with_columns(
|
140
|
+
# Polars.col("encoded").bin.decode("hex").alias("code")
|
141
|
+
# )
|
142
|
+
# # =>
|
143
|
+
# # shape: (3, 3)
|
144
|
+
# # ┌────────┬───────────┬─────────────────┐
|
145
|
+
# # │ name ┆ encoded ┆ code │
|
146
|
+
# # │ --- ┆ --- ┆ --- │
|
147
|
+
# # │ str ┆ binary ┆ binary │
|
148
|
+
# # ╞════════╪═══════════╪═════════════════╡
|
149
|
+
# # │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
|
150
|
+
# # │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
|
151
|
+
# # │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
|
152
|
+
# # └────────┴───────────┴─────────────────┘
|
51
153
|
def decode(encoding, strict: true)
|
52
154
|
if encoding == "hex"
|
53
155
|
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
@@ -64,6 +166,28 @@ module Polars
|
|
64
166
|
# The encoding to use.
|
65
167
|
#
|
66
168
|
# @return [Expr]
|
169
|
+
#
|
170
|
+
# @example
|
171
|
+
# colors = Polars::DataFrame.new(
|
172
|
+
# {
|
173
|
+
# "color" => ["black", "yellow", "blue"],
|
174
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
|
175
|
+
# }
|
176
|
+
# )
|
177
|
+
# colors.with_columns(
|
178
|
+
# Polars.col("code").bin.encode("hex").alias("encoded")
|
179
|
+
# )
|
180
|
+
# # =>
|
181
|
+
# # shape: (3, 3)
|
182
|
+
# # ┌────────┬─────────────────┬─────────┐
|
183
|
+
# # │ color ┆ code ┆ encoded │
|
184
|
+
# # │ --- ┆ --- ┆ --- │
|
185
|
+
# # │ str ┆ binary ┆ str │
|
186
|
+
# # ╞════════╪═════════════════╪═════════╡
|
187
|
+
# # │ black ┆ b"\x00\x00\x00" ┆ 000000 │
|
188
|
+
# # │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
|
189
|
+
# # │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
|
190
|
+
# # └────────┴─────────────────┴─────────┘
|
67
191
|
def encode(encoding)
|
68
192
|
if encoding == "hex"
|
69
193
|
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
@@ -12,31 +12,67 @@ module Polars
|
|
12
12
|
|
13
13
|
# Check if binaries in Series contain a binary substring.
|
14
14
|
#
|
15
|
-
# @param
|
15
|
+
# @param literal [String]
|
16
16
|
# The binary substring to look for
|
17
17
|
#
|
18
18
|
# @return [Series]
|
19
|
-
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
22
|
+
# s.bin.contains("\xff".b)
|
23
|
+
# # =>
|
24
|
+
# # shape: (3,)
|
25
|
+
# # Series: 'colors' [bool]
|
26
|
+
# # [
|
27
|
+
# # false
|
28
|
+
# # true
|
29
|
+
# # true
|
30
|
+
# # ]
|
31
|
+
def contains(literal)
|
20
32
|
super
|
21
33
|
end
|
22
34
|
|
23
35
|
# Check if string values end with a binary substring.
|
24
36
|
#
|
25
|
-
# @param
|
37
|
+
# @param suffix [String]
|
26
38
|
# Suffix substring.
|
27
39
|
#
|
28
40
|
# @return [Series]
|
29
|
-
|
41
|
+
#
|
42
|
+
# @example
|
43
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
44
|
+
# s.bin.ends_with("\x00".b)
|
45
|
+
# # =>
|
46
|
+
# # shape: (3,)
|
47
|
+
# # Series: 'colors' [bool]
|
48
|
+
# # [
|
49
|
+
# # true
|
50
|
+
# # true
|
51
|
+
# # false
|
52
|
+
# # ]
|
53
|
+
def ends_with(suffix)
|
30
54
|
super
|
31
55
|
end
|
32
56
|
|
33
57
|
# Check if values start with a binary substring.
|
34
58
|
#
|
35
|
-
# @param
|
59
|
+
# @param prefix [String]
|
36
60
|
# Prefix substring.
|
37
61
|
#
|
38
62
|
# @return [Series]
|
39
|
-
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
66
|
+
# s.bin.starts_with("\x00".b)
|
67
|
+
# # =>
|
68
|
+
# # shape: (3,)
|
69
|
+
# # Series: 'colors' [bool]
|
70
|
+
# # [
|
71
|
+
# # true
|
72
|
+
# # false
|
73
|
+
# # true
|
74
|
+
# # ]
|
75
|
+
def starts_with(prefix)
|
40
76
|
super
|
41
77
|
end
|
42
78
|
|
@@ -49,6 +85,42 @@ module Polars
|
|
49
85
|
# otherwise mask out with a null value.
|
50
86
|
#
|
51
87
|
# @return [Series]
|
88
|
+
#
|
89
|
+
# @example Decode values using hexadecimal encoding.
|
90
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b])
|
91
|
+
# s.bin.decode("hex")
|
92
|
+
# # =>
|
93
|
+
# # shape: (3,)
|
94
|
+
# # Series: 'colors' [binary]
|
95
|
+
# # [
|
96
|
+
# # b"\x00\x00\x00"
|
97
|
+
# # b"\xff\xff\x00"
|
98
|
+
# # b"\x00\x00\xff"
|
99
|
+
# # ]
|
100
|
+
#
|
101
|
+
# @example Decode values using Base64 encoding.
|
102
|
+
# s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b])
|
103
|
+
# s.bin.decode("base64")
|
104
|
+
# # =>
|
105
|
+
# # shape: (3,)
|
106
|
+
# # Series: 'colors' [binary]
|
107
|
+
# # [
|
108
|
+
# # b"\x00\x00\x00"
|
109
|
+
# # b"\xff\xff\x00"
|
110
|
+
# # b"\x00\x00\xff"
|
111
|
+
# # ]
|
112
|
+
#
|
113
|
+
# @example Set `strict=False` to set invalid values to null instead of raising an error.
|
114
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b])
|
115
|
+
# s.bin.decode("hex", strict: false)
|
116
|
+
# # =>
|
117
|
+
# # shape: (3,)
|
118
|
+
# # Series: 'colors' [binary]
|
119
|
+
# # [
|
120
|
+
# # b"\x00\x00\x00"
|
121
|
+
# # b"\xff\xff\x00"
|
122
|
+
# # null
|
123
|
+
# # ]
|
52
124
|
def decode(encoding, strict: true)
|
53
125
|
super
|
54
126
|
end
|
@@ -59,6 +131,29 @@ module Polars
|
|
59
131
|
# The encoding to use.
|
60
132
|
#
|
61
133
|
# @return [Series]
|
134
|
+
#
|
135
|
+
# @example Encode values using hexadecimal encoding.
|
136
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
137
|
+
# s.bin.encode("hex")
|
138
|
+
# # =>
|
139
|
+
# # shape: (3,)
|
140
|
+
# # Series: 'colors' [str]
|
141
|
+
# # [
|
142
|
+
# # "000000"
|
143
|
+
# # "ffff00"
|
144
|
+
# # "0000ff"
|
145
|
+
# # ]
|
146
|
+
#
|
147
|
+
# @example Encode values using Base64 encoding.
|
148
|
+
# s.bin.encode("base64")
|
149
|
+
# # =>
|
150
|
+
# # shape: (3,)
|
151
|
+
# # Series: 'colors' [str]
|
152
|
+
# # [
|
153
|
+
# # "AAAA"
|
154
|
+
# # "//8A"
|
155
|
+
# # "AAD/"
|
156
|
+
# # ]
|
62
157
|
def encode(encoding)
|
63
158
|
super
|
64
159
|
end
|