polars-df 0.14.0-x64-mingw-ucrt → 0.16.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE-THIRD-PARTY.txt +24369 -14580
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +9 -8
- data/lib/polars/functions.rb +0 -57
data/LICENSE.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
2
|
Copyright (c) 2022-2024 Andrew Kane
|
3
|
+
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -14,18 +14,17 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
Polars.
|
21
|
-
.lazy
|
20
|
+
Polars.scan_csv("iris.csv")
|
22
21
|
.filter(Polars.col("sepal_length") > 5)
|
23
22
|
.group_by("species")
|
24
23
|
.agg(Polars.all.sum)
|
25
24
|
.collect
|
26
25
|
```
|
27
26
|
|
28
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
28
|
|
30
29
|
## Reference
|
31
30
|
|
@@ -89,6 +88,15 @@ From Avro
|
|
89
88
|
Polars.read_avro("file.avro")
|
90
89
|
```
|
91
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
92
100
|
From a hash
|
93
101
|
|
94
102
|
```ruby
|
@@ -337,6 +345,32 @@ Parquet
|
|
337
345
|
df.write_parquet("file.parquet")
|
338
346
|
```
|
339
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
340
374
|
Numo array
|
341
375
|
|
342
376
|
```ruby
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
Binary file
|
@@ -26,7 +26,6 @@ module Polars
|
|
26
26
|
skip_rows_after_header: 0,
|
27
27
|
row_count_name: nil,
|
28
28
|
row_count_offset: 0,
|
29
|
-
sample_size: 1024,
|
30
29
|
eol_char: "\n",
|
31
30
|
new_columns: nil,
|
32
31
|
raise_if_empty: true,
|
@@ -79,7 +78,6 @@ module Polars
|
|
79
78
|
parse_dates,
|
80
79
|
skip_rows_after_header,
|
81
80
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
|
-
sample_size,
|
83
81
|
eol_char,
|
84
82
|
raise_if_empty,
|
85
83
|
truncate_ragged_lines,
|
data/lib/polars/binary_expr.rb
CHANGED
@@ -11,32 +11,112 @@ module Polars
|
|
11
11
|
|
12
12
|
# Check if binaries in Series contain a binary substring.
|
13
13
|
#
|
14
|
-
# @param
|
14
|
+
# @param literal [String]
|
15
15
|
# The binary substring to look for
|
16
16
|
#
|
17
17
|
# @return [Expr]
|
18
|
-
|
19
|
-
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
# colors = Polars::DataFrame.new(
|
21
|
+
# {
|
22
|
+
# "name" => ["black", "yellow", "blue"],
|
23
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
24
|
+
# "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# colors.select(
|
28
|
+
# "name",
|
29
|
+
# Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
|
30
|
+
# Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
|
31
|
+
# )
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌────────┬───────────────────┬────────────────────┐
|
35
|
+
# # │ name ┆ contains_with_lit ┆ contains_with_expr │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ bool ┆ bool │
|
38
|
+
# # ╞════════╪═══════════════════╪════════════════════╡
|
39
|
+
# # │ black ┆ false ┆ true │
|
40
|
+
# # │ yellow ┆ true ┆ true │
|
41
|
+
# # │ blue ┆ true ┆ false │
|
42
|
+
# # └────────┴───────────────────┴────────────────────┘
|
43
|
+
def contains(literal)
|
44
|
+
literal = Utils.parse_into_expression(literal, str_as_lit: true)
|
45
|
+
Utils.wrap_expr(_rbexpr.binary_contains(literal))
|
20
46
|
end
|
21
47
|
|
22
48
|
# Check if string values end with a binary substring.
|
23
49
|
#
|
24
|
-
# @param
|
50
|
+
# @param suffix [String]
|
25
51
|
# Suffix substring.
|
26
52
|
#
|
27
53
|
# @return [Expr]
|
28
|
-
|
29
|
-
|
54
|
+
#
|
55
|
+
# @example
|
56
|
+
# colors = Polars::DataFrame.new(
|
57
|
+
# {
|
58
|
+
# "name" => ["black", "yellow", "blue"],
|
59
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
60
|
+
# "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
61
|
+
# }
|
62
|
+
# )
|
63
|
+
# colors.select(
|
64
|
+
# "name",
|
65
|
+
# Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
|
66
|
+
# Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
|
67
|
+
# )
|
68
|
+
# # =>
|
69
|
+
# # shape: (3, 3)
|
70
|
+
# # ┌────────┬───────────────┬────────────────┐
|
71
|
+
# # │ name ┆ ends_with_lit ┆ ends_with_expr │
|
72
|
+
# # │ --- ┆ --- ┆ --- │
|
73
|
+
# # │ str ┆ bool ┆ bool │
|
74
|
+
# # ╞════════╪═══════════════╪════════════════╡
|
75
|
+
# # │ black ┆ false ┆ true │
|
76
|
+
# # │ yellow ┆ false ┆ true │
|
77
|
+
# # │ blue ┆ true ┆ false │
|
78
|
+
# # └────────┴───────────────┴────────────────┘
|
79
|
+
def ends_with(suffix)
|
80
|
+
suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
|
81
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
|
30
82
|
end
|
31
83
|
|
32
84
|
# Check if values start with a binary substring.
|
33
85
|
#
|
34
|
-
# @param
|
86
|
+
# @param prefix [String]
|
35
87
|
# Prefix substring.
|
36
88
|
#
|
37
89
|
# @return [Expr]
|
38
|
-
|
39
|
-
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# colors = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "name": ["black", "yellow", "blue"],
|
95
|
+
# "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
|
96
|
+
# "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
|
97
|
+
# }
|
98
|
+
# )
|
99
|
+
# colors.select(
|
100
|
+
# "name",
|
101
|
+
# Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
|
102
|
+
# Polars.col("code")
|
103
|
+
# .bin.starts_with(Polars.col("prefix"))
|
104
|
+
# .alias("starts_with_expr")
|
105
|
+
# )
|
106
|
+
# # =>
|
107
|
+
# # shape: (3, 3)
|
108
|
+
# # ┌────────┬─────────────────┬──────────────────┐
|
109
|
+
# # │ name ┆ starts_with_lit ┆ starts_with_expr │
|
110
|
+
# # │ --- ┆ --- ┆ --- │
|
111
|
+
# # │ str ┆ bool ┆ bool │
|
112
|
+
# # ╞════════╪═════════════════╪══════════════════╡
|
113
|
+
# # │ black ┆ false ┆ true │
|
114
|
+
# # │ yellow ┆ true ┆ false │
|
115
|
+
# # │ blue ┆ false ┆ true │
|
116
|
+
# # └────────┴─────────────────┴──────────────────┘
|
117
|
+
def starts_with(prefix)
|
118
|
+
prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
|
119
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
|
40
120
|
end
|
41
121
|
|
42
122
|
# Decode a value using the provided encoding.
|
@@ -48,6 +128,28 @@ module Polars
|
|
48
128
|
# otherwise mask out with a null value.
|
49
129
|
#
|
50
130
|
# @return [Expr]
|
131
|
+
#
|
132
|
+
# @example
|
133
|
+
# colors = Polars::DataFrame.new(
|
134
|
+
# {
|
135
|
+
# "name" => ["black", "yellow", "blue"],
|
136
|
+
# "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
|
137
|
+
# }
|
138
|
+
# )
|
139
|
+
# colors.with_columns(
|
140
|
+
# Polars.col("encoded").bin.decode("hex").alias("code")
|
141
|
+
# )
|
142
|
+
# # =>
|
143
|
+
# # shape: (3, 3)
|
144
|
+
# # ┌────────┬───────────┬─────────────────┐
|
145
|
+
# # │ name ┆ encoded ┆ code │
|
146
|
+
# # │ --- ┆ --- ┆ --- │
|
147
|
+
# # │ str ┆ binary ┆ binary │
|
148
|
+
# # ╞════════╪═══════════╪═════════════════╡
|
149
|
+
# # │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
|
150
|
+
# # │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
|
151
|
+
# # │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
|
152
|
+
# # └────────┴───────────┴─────────────────┘
|
51
153
|
def decode(encoding, strict: true)
|
52
154
|
if encoding == "hex"
|
53
155
|
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
@@ -64,6 +166,28 @@ module Polars
|
|
64
166
|
# The encoding to use.
|
65
167
|
#
|
66
168
|
# @return [Expr]
|
169
|
+
#
|
170
|
+
# @example
|
171
|
+
# colors = Polars::DataFrame.new(
|
172
|
+
# {
|
173
|
+
# "color" => ["black", "yellow", "blue"],
|
174
|
+
# "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
|
175
|
+
# }
|
176
|
+
# )
|
177
|
+
# colors.with_columns(
|
178
|
+
# Polars.col("code").bin.encode("hex").alias("encoded")
|
179
|
+
# )
|
180
|
+
# # =>
|
181
|
+
# # shape: (3, 3)
|
182
|
+
# # ┌────────┬─────────────────┬─────────┐
|
183
|
+
# # │ color ┆ code ┆ encoded │
|
184
|
+
# # │ --- ┆ --- ┆ --- │
|
185
|
+
# # │ str ┆ binary ┆ str │
|
186
|
+
# # ╞════════╪═════════════════╪═════════╡
|
187
|
+
# # │ black ┆ b"\x00\x00\x00" ┆ 000000 │
|
188
|
+
# # │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
|
189
|
+
# # │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
|
190
|
+
# # └────────┴─────────────────┴─────────┘
|
67
191
|
def encode(encoding)
|
68
192
|
if encoding == "hex"
|
69
193
|
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
@@ -12,31 +12,67 @@ module Polars
|
|
12
12
|
|
13
13
|
# Check if binaries in Series contain a binary substring.
|
14
14
|
#
|
15
|
-
# @param
|
15
|
+
# @param literal [String]
|
16
16
|
# The binary substring to look for
|
17
17
|
#
|
18
18
|
# @return [Series]
|
19
|
-
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
22
|
+
# s.bin.contains("\xff".b)
|
23
|
+
# # =>
|
24
|
+
# # shape: (3,)
|
25
|
+
# # Series: 'colors' [bool]
|
26
|
+
# # [
|
27
|
+
# # false
|
28
|
+
# # true
|
29
|
+
# # true
|
30
|
+
# # ]
|
31
|
+
def contains(literal)
|
20
32
|
super
|
21
33
|
end
|
22
34
|
|
23
35
|
# Check if string values end with a binary substring.
|
24
36
|
#
|
25
|
-
# @param
|
37
|
+
# @param suffix [String]
|
26
38
|
# Suffix substring.
|
27
39
|
#
|
28
40
|
# @return [Series]
|
29
|
-
|
41
|
+
#
|
42
|
+
# @example
|
43
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
44
|
+
# s.bin.ends_with("\x00".b)
|
45
|
+
# # =>
|
46
|
+
# # shape: (3,)
|
47
|
+
# # Series: 'colors' [bool]
|
48
|
+
# # [
|
49
|
+
# # true
|
50
|
+
# # true
|
51
|
+
# # false
|
52
|
+
# # ]
|
53
|
+
def ends_with(suffix)
|
30
54
|
super
|
31
55
|
end
|
32
56
|
|
33
57
|
# Check if values start with a binary substring.
|
34
58
|
#
|
35
|
-
# @param
|
59
|
+
# @param prefix [String]
|
36
60
|
# Prefix substring.
|
37
61
|
#
|
38
62
|
# @return [Series]
|
39
|
-
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
66
|
+
# s.bin.starts_with("\x00".b)
|
67
|
+
# # =>
|
68
|
+
# # shape: (3,)
|
69
|
+
# # Series: 'colors' [bool]
|
70
|
+
# # [
|
71
|
+
# # true
|
72
|
+
# # false
|
73
|
+
# # true
|
74
|
+
# # ]
|
75
|
+
def starts_with(prefix)
|
40
76
|
super
|
41
77
|
end
|
42
78
|
|
@@ -49,6 +85,42 @@ module Polars
|
|
49
85
|
# otherwise mask out with a null value.
|
50
86
|
#
|
51
87
|
# @return [Series]
|
88
|
+
#
|
89
|
+
# @example Decode values using hexadecimal encoding.
|
90
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b])
|
91
|
+
# s.bin.decode("hex")
|
92
|
+
# # =>
|
93
|
+
# # shape: (3,)
|
94
|
+
# # Series: 'colors' [binary]
|
95
|
+
# # [
|
96
|
+
# # b"\x00\x00\x00"
|
97
|
+
# # b"\xff\xff\x00"
|
98
|
+
# # b"\x00\x00\xff"
|
99
|
+
# # ]
|
100
|
+
#
|
101
|
+
# @example Decode values using Base64 encoding.
|
102
|
+
# s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b])
|
103
|
+
# s.bin.decode("base64")
|
104
|
+
# # =>
|
105
|
+
# # shape: (3,)
|
106
|
+
# # Series: 'colors' [binary]
|
107
|
+
# # [
|
108
|
+
# # b"\x00\x00\x00"
|
109
|
+
# # b"\xff\xff\x00"
|
110
|
+
# # b"\x00\x00\xff"
|
111
|
+
# # ]
|
112
|
+
#
|
113
|
+
# @example Set `strict=False` to set invalid values to null instead of raising an error.
|
114
|
+
# s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b])
|
115
|
+
# s.bin.decode("hex", strict: false)
|
116
|
+
# # =>
|
117
|
+
# # shape: (3,)
|
118
|
+
# # Series: 'colors' [binary]
|
119
|
+
# # [
|
120
|
+
# # b"\x00\x00\x00"
|
121
|
+
# # b"\xff\xff\x00"
|
122
|
+
# # null
|
123
|
+
# # ]
|
52
124
|
def decode(encoding, strict: true)
|
53
125
|
super
|
54
126
|
end
|
@@ -59,6 +131,29 @@ module Polars
|
|
59
131
|
# The encoding to use.
|
60
132
|
#
|
61
133
|
# @return [Series]
|
134
|
+
#
|
135
|
+
# @example Encode values using hexadecimal encoding.
|
136
|
+
# s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
|
137
|
+
# s.bin.encode("hex")
|
138
|
+
# # =>
|
139
|
+
# # shape: (3,)
|
140
|
+
# # Series: 'colors' [str]
|
141
|
+
# # [
|
142
|
+
# # "000000"
|
143
|
+
# # "ffff00"
|
144
|
+
# # "0000ff"
|
145
|
+
# # ]
|
146
|
+
#
|
147
|
+
# @example Encode values using Base64 encoding.
|
148
|
+
# s.bin.encode("base64")
|
149
|
+
# # =>
|
150
|
+
# # shape: (3,)
|
151
|
+
# # Series: 'colors' [str]
|
152
|
+
# # [
|
153
|
+
# # "AAAA"
|
154
|
+
# # "//8A"
|
155
|
+
# # "AAD/"
|
156
|
+
# # ]
|
62
157
|
def encode(encoding)
|
63
158
|
super
|
64
159
|
end
|