polars-df 0.14.0-x86_64-darwin → 0.16.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +24956 -14152
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
data/LICENSE.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
2
|
Copyright (c) 2022-2024 Andrew Kane
|
3
|
+
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -14,18 +14,17 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
Polars.
|
21
|
-
.lazy
|
20
|
+
Polars.scan_csv("iris.csv")
|
22
21
|
.filter(Polars.col("sepal_length") > 5)
|
23
22
|
.group_by("species")
|
24
23
|
.agg(Polars.all.sum)
|
25
24
|
.collect
|
26
25
|
```
|
27
26
|
|
28
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
28
|
|
30
29
|
## Reference
|
31
30
|
|
@@ -89,6 +88,15 @@ From Avro
|
|
89
88
|
Polars.read_avro("file.avro")
|
90
89
|
```
|
91
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
92
100
|
From a hash
|
93
101
|
|
94
102
|
```ruby
|
@@ -337,6 +345,32 @@ Parquet
|
|
337
345
|
df.write_parquet("file.parquet")
|
338
346
|
```
|
339
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
340
374
|
Numo array
|
341
375
|
|
342
376
|
```ruby
|
Binary file
|
Binary file
|
Binary file
|
@@ -26,7 +26,6 @@ module Polars
|
|
26
26
|
skip_rows_after_header: 0,
|
27
27
|
row_count_name: nil,
|
28
28
|
row_count_offset: 0,
|
29
|
-
sample_size: 1024,
|
30
29
|
eol_char: "\n",
|
31
30
|
new_columns: nil,
|
32
31
|
raise_if_empty: true,
|
@@ -79,7 +78,6 @@ module Polars
|
|
79
78
|
parse_dates,
|
80
79
|
skip_rows_after_header,
|
81
80
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
|
-
sample_size,
|
83
81
|
eol_char,
|
84
82
|
raise_if_empty,
|
85
83
|
truncate_ragged_lines,
|
data/lib/polars/binary_expr.rb
CHANGED
@@ -11,32 +11,112 @@ module Polars
|
|
11
11
|
|
12
12
|
# Check if binaries in Series contain a binary substring.
|
13
13
|
#
|
14
|
-
# @param
|
14
|
+
# @param literal [String]
|
15
15
|
# The binary substring to look for
|
16
16
|
#
|
17
17
|
# @return [Expr]
|
18
|
-
|
19
|
-
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# colors = Polars::DataFrame.new(
|
21
|
+
# {
|
22
|
+
# "name" => ["black", "yellow", "blue"],
|
23
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
24
|
+
# "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# colors.select(
|
28
|
+
# "name",
|
29
|
+
# Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
|
30
|
+
# Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
|
31
|
+
# )
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌────────┬───────────────────┬────────────────────┐
|
35
|
+
# # │ name ┆ contains_with_lit ┆ contains_with_expr │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ bool ┆ bool │
|
38
|
+
# # ╞════════╪═══════════════════╪════════════════════╡
|
39
|
+
# # │ black ┆ false ┆ true │
|
40
|
+
# # │ yellow ┆ true ┆ true │
|
41
|
+
# # │ blue ┆ true ┆ false │
|
42
|
+
# # └────────┴───────────────────┴────────────────────┘
|
43
|
+
def contains(literal)
|
44
|
+
literal = Utils.parse_into_expression(literal, str_as_lit: true)
|
45
|
+
Utils.wrap_expr(_rbexpr.binary_contains(literal))
|
20
46
|
end
|
21
47
|
|
22
48
|
# Check if string values end with a binary substring.
|
23
49
|
#
|
24
|
-
# @param
|
50
|
+
# @param suffix [String]
|
25
51
|
# Suffix substring.
|
26
52
|
#
|
27
53
|
# @return [Expr]
|
28
|
-
|
29
|
-
|
54
|
+
#
|
55
|
+
# @example
|
56
|
+
# colors = Polars::DataFrame.new(
|
57
|
+
# {
|
58
|
+
# "name" => ["black", "yellow", "blue"],
|
59
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
60
|
+
# "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
61
|
+
# }
|
62
|
+
# )
|
63
|
+
# colors.select(
|
64
|
+
# "name",
|
65
|
+
# Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
|
66
|
+
# Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
|
67
|
+
# )
|
68
|
+
# # =>
|
69
|
+
# # shape: (3, 3)
|
70
|
+
# # ┌────────┬───────────────┬────────────────┐
|
71
|
+
# # │ name ┆ ends_with_lit ┆ ends_with_expr │
|
72
|
+
# # │ --- ┆ --- ┆ --- │
|
73
|
+
# # │ str ┆ bool ┆ bool │
|
74
|
+
# # ╞════════╪═══════════════╪════════════════╡
|
75
|
+
# # │ black ┆ false ┆ true │
|
76
|
+
# # │ yellow ┆ false ┆ true │
|
77
|
+
# # │ blue ┆ true ┆ false │
|
78
|
+
# # └────────┴───────────────┴────────────────┘
|
79
|
+
def ends_with(suffix)
|
80
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
81
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
|
30
82
|
end
|
31
83
|
|
32
84
|
# Check if values start with a binary substring.
|
33
85
|
#
|
34
|
-
# @param
|
86
|
+
# @param prefix [String]
|
35
87
|
# Prefix substring.
|
36
88
|
#
|
37
89
|
# @return [Expr]
|
38
|
-
|
39
|
-
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# colors = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "name": ["black", "yellow", "blue"],
|
95
|
+
# "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
96
|
+
# "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
97
|
+
# }
|
98
|
+
# )
|
99
|
+
# colors.select(
|
100
|
+
# "name",
|
101
|
+
# Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
|
102
|
+
# Polars.col("code")
|
103
|
+
# .bin.starts_with(Polars.col("prefix"))
|
104
|
+
# .alias("starts_with_expr")
|
105
|
+
# )
|
106
|
+
# # =>
|
107
|
+
# # shape: (3, 3)
|
108
|
+
# # ┌────────┬─────────────────┬──────────────────┐
|
109
|
+
# # │ name ┆ starts_with_lit ┆ starts_with_expr │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ str ┆ bool ┆ bool │
|
112
|
+
# # ╞════════╪═════════════════╪══════════════════╡
|
113
|
+
# # │ black ┆ false ┆ true │
|
114
|
+
# # │ yellow ┆ true ┆ false │
|
115
|
+
# # │ blue ┆ false ┆ true │
|
116
|
+
# # └────────┴─────────────────┴──────────────────┘
|
117
|
+
def starts_with(prefix)
|
118
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
119
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
|
40
120
|
end
|
41
121
|
|
42
122
|
# Decode a value using the provided encoding.
|
@@ -48,6 +128,28 @@ module Polars
|
|
48
128
|
# otherwise mask out with a null value.
|
49
129
|
#
|
50
130
|
# @return [Expr]
|
131
|
+
#
|
132
|
+
# @example
|
133
|
+
# colors = Polars::DataFrame.new(
|
134
|
+
# {
|
135
|
+
# "name" => ["black", "yellow", "blue"],
|
136
|
+
# "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
|
137
|
+
# }
|
138
|
+
# )
|
139
|
+
# colors.with_columns(
|
140
|
+
# Polars.col("encoded").bin.decode("hex").alias("code")
|
141
|
+
# )
|
142
|
+
# # =>
|
143
|
+
# # shape: (3, 3)
|
144
|
+
# # ┌────────┬───────────┬─────────────────┐
|
145
|
+
# # │ name ┆ encoded ┆ code │
|
146
|
+
# # │ --- ┆ --- ┆ --- │
|
147
|
+
# # │ str ┆ binary ┆ binary │
|
148
|
+
# # ╞════════╪═══════════╪═════════════════╡
|
149
|
+
# # │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
|
150
|
+
# # │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
|
151
|
+
# # │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
|
152
|
+
# # └────────┴───────────┴─────────────────┘
|
51
153
|
def decode(encoding, strict: true)
|
52
154
|
if encoding == "hex"
|
53
155
|
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
@@ -64,6 +166,28 @@ module Polars
|
|
64
166
|
# The encoding to use.
|
65
167
|
#
|
66
168
|
# @return [Expr]
|
169
|
+
#
|
170
|
+
# @example
|
171
|
+
# colors = Polars::DataFrame.new(
|
172
|
+
# {
|
173
|
+
# "color" => ["black", "yellow", "blue"],
|
174
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
|
175
|
+
# }
|
176
|
+
# )
|
177
|
+
# colors.with_columns(
|
178
|
+
# Polars.col("code").bin.encode("hex").alias("encoded")
|
179
|
+
# )
|
180
|
+
# # =>
|
181
|
+
# # shape: (3, 3)
|
182
|
+
# # ┌────────┬─────────────────┬─────────┐
|
183
|
+
# # │ color ┆ code ┆ encoded │
|
184
|
+
# # │ --- ┆ --- ┆ --- │
|
185
|
+
# # │ str ┆ binary ┆ str │
|
186
|
+
# # ╞════════╪═════════════════╪═════════╡
|
187
|
+
# # │ black ┆ b"\x00\x00\x00" ┆ 000000 │
|
188
|
+
# # │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
|
189
|
+
# # │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
|
190
|
+
# # └────────┴─────────────────┴─────────┘
|
67
191
|
def encode(encoding)
|
68
192
|
if encoding == "hex"
|
69
193
|
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
@@ -12,31 +12,67 @@ module Polars
|
|
12
12
|
|
13
13
|
# Check if binaries in Series contain a binary substring.
|
14
14
|
#
|
15
|
-
# @param
|
15
|
+
# @param literal [String]
|
16
16
|
# The binary substring to look for
|
17
17
|
#
|
18
18
|
# @return [Series]
|
19
|
-
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
22
|
+
# s.bin.contains("\xff".b)
|
23
|
+
# # =>
|
24
|
+
# # shape: (3,)
|
25
|
+
# # Series: 'colors' [bool]
|
26
|
+
# # [
|
27
|
+
# # false
|
28
|
+
# # true
|
29
|
+
# # true
|
30
|
+
# # ]
|
31
|
+
def contains(literal)
|
20
32
|
super
|
21
33
|
end
|
22
34
|
|
23
35
|
# Check if string values end with a binary substring.
|
24
36
|
#
|
25
|
-
# @param
|
37
|
+
# @param suffix [String]
|
26
38
|
# Suffix substring.
|
27
39
|
#
|
28
40
|
# @return [Series]
|
29
|
-
|
41
|
+
#
|
42
|
+
# @example
|
43
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
44
|
+
# s.bin.ends_with("\x00".b)
|
45
|
+
# # =>
|
46
|
+
# # shape: (3,)
|
47
|
+
# # Series: 'colors' [bool]
|
48
|
+
# # [
|
49
|
+
# # true
|
50
|
+
# # true
|
51
|
+
# # false
|
52
|
+
# # ]
|
53
|
+
def ends_with(suffix)
|
30
54
|
super
|
31
55
|
end
|
32
56
|
|
33
57
|
# Check if values start with a binary substring.
|
34
58
|
#
|
35
|
-
# @param
|
59
|
+
# @param prefix [String]
|
36
60
|
# Prefix substring.
|
37
61
|
#
|
38
62
|
# @return [Series]
|
39
|
-
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
66
|
+
# s.bin.starts_with("\x00".b)
|
67
|
+
# # =>
|
68
|
+
# # shape: (3,)
|
69
|
+
# # Series: 'colors' [bool]
|
70
|
+
# # [
|
71
|
+
# # true
|
72
|
+
# # false
|
73
|
+
# # true
|
74
|
+
# # ]
|
75
|
+
def starts_with(prefix)
|
40
76
|
super
|
41
77
|
end
|
42
78
|
|
@@ -49,6 +85,42 @@ module Polars
|
|
49
85
|
# otherwise mask out with a null value.
|
50
86
|
#
|
51
87
|
# @return [Series]
|
88
|
+
#
|
89
|
+
# @example Decode values using hexadecimal encoding.
|
90
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b])
|
91
|
+
# s.bin.decode("hex")
|
92
|
+
# # =>
|
93
|
+
# # shape: (3,)
|
94
|
+
# # Series: 'colors' [binary]
|
95
|
+
# # [
|
96
|
+
# # b"\x00\x00\x00"
|
97
|
+
# # b"\xff\xff\x00"
|
98
|
+
# # b"\x00\x00\xff"
|
99
|
+
# # ]
|
100
|
+
#
|
101
|
+
# @example Decode values using Base64 encoding.
|
102
|
+
# s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b])
|
103
|
+
# s.bin.decode("base64")
|
104
|
+
# # =>
|
105
|
+
# # shape: (3,)
|
106
|
+
# # Series: 'colors' [binary]
|
107
|
+
# # [
|
108
|
+
# # b"\x00\x00\x00"
|
109
|
+
# # b"\xff\xff\x00"
|
110
|
+
# # b"\x00\x00\xff"
|
111
|
+
# # ]
|
112
|
+
#
|
113
|
+
# @example Set `strict=False` to set invalid values to null instead of raising an error.
|
114
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b])
|
115
|
+
# s.bin.decode("hex", strict: false)
|
116
|
+
# # =>
|
117
|
+
# # shape: (3,)
|
118
|
+
# # Series: 'colors' [binary]
|
119
|
+
# # [
|
120
|
+
# # b"\x00\x00\x00"
|
121
|
+
# # b"\xff\xff\x00"
|
122
|
+
# # null
|
123
|
+
# # ]
|
52
124
|
def decode(encoding, strict: true)
|
53
125
|
super
|
54
126
|
end
|
@@ -59,6 +131,29 @@ module Polars
|
|
59
131
|
# The encoding to use.
|
60
132
|
#
|
61
133
|
# @return [Series]
|
134
|
+
#
|
135
|
+
# @example Encode values using hexadecimal encoding.
|
136
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
137
|
+
# s.bin.encode("hex")
|
138
|
+
# # =>
|
139
|
+
# # shape: (3,)
|
140
|
+
# # Series: 'colors' [str]
|
141
|
+
# # [
|
142
|
+
# # "000000"
|
143
|
+
# # "ffff00"
|
144
|
+
# # "0000ff"
|
145
|
+
# # ]
|
146
|
+
#
|
147
|
+
# @example Encode values using Base64 encoding.
|
148
|
+
# s.bin.encode("base64")
|
149
|
+
# # =>
|
150
|
+
# # shape: (3,)
|
151
|
+
# # Series: 'colors' [str]
|
152
|
+
# # [
|
153
|
+
# # "AAAA"
|
154
|
+
# # "//8A"
|
155
|
+
# # "AAD/"
|
156
|
+
# # ]
|
62
157
|
def encode(encoding)
|
63
158
|
super
|
64
159
|
end
|