polars-df 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
|
|
20
20
|
Polars.read_csv("iris.csv")
|
21
21
|
.lazy
|
22
22
|
.filter(Polars.col("sepal_length") > 5)
|
23
|
-
.
|
23
|
+
.group_by("species")
|
24
24
|
.agg(Polars.all.sum)
|
25
25
|
.collect
|
26
26
|
```
|
@@ -260,19 +260,19 @@ df["a"].var
|
|
260
260
|
Group
|
261
261
|
|
262
262
|
```ruby
|
263
|
-
df.
|
263
|
+
df.group_by("a").count
|
264
264
|
```
|
265
265
|
|
266
266
|
Works with all summary statistics
|
267
267
|
|
268
268
|
```ruby
|
269
|
-
df.
|
269
|
+
df.group_by("a").max
|
270
270
|
```
|
271
271
|
|
272
272
|
Multiple groups
|
273
273
|
|
274
274
|
```ruby
|
275
|
-
df.
|
275
|
+
df.group_by(["a", "b"]).count
|
276
276
|
```
|
277
277
|
|
278
278
|
## Combining Data Frames
|
@@ -359,7 +359,8 @@ Supported types are:
|
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
360
|
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
-
-
|
362
|
+
- nested - `List`, `Struct`, `Array`
|
363
|
+
- other - `Object`, `Null`
|
363
364
|
|
364
365
|
Get column types
|
365
366
|
|
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
|
|
402
403
|
Group data
|
403
404
|
|
404
405
|
```ruby
|
405
|
-
df.
|
406
|
+
df.group_by("c").plot("a", "b")
|
406
407
|
```
|
407
408
|
|
408
409
|
Stacked columns or bars
|
409
410
|
|
410
411
|
```ruby
|
411
|
-
df.
|
412
|
+
df.group_by("c").plot("a", "b", stacked: true)
|
412
413
|
```
|
413
414
|
|
414
415
|
## History
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.8.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
+
rust-version = "1.73.0"
|
7
8
|
publish = false
|
8
9
|
|
9
10
|
[lib]
|
@@ -11,14 +12,17 @@ crate-type = ["cdylib"]
|
|
11
12
|
|
12
13
|
[dependencies]
|
13
14
|
ahash = "0.8"
|
14
|
-
chrono = "
|
15
|
-
|
16
|
-
|
15
|
+
chrono = "0.4"
|
16
|
+
either = "1.8"
|
17
|
+
magnus = "0.6"
|
18
|
+
polars-core = "=0.36.2"
|
19
|
+
polars-parquet = "=0.36.2"
|
20
|
+
polars-utils = "=0.36.2"
|
17
21
|
serde_json = "1"
|
18
22
|
smartstring = "1"
|
19
23
|
|
20
24
|
[dependencies.polars]
|
21
|
-
version = "0.
|
25
|
+
version = "=0.36.2"
|
22
26
|
features = [
|
23
27
|
"abs",
|
24
28
|
"approx_unique",
|
@@ -27,32 +31,35 @@ features = [
|
|
27
31
|
"avro",
|
28
32
|
"binary_encoding",
|
29
33
|
"concat_str",
|
34
|
+
"cov",
|
30
35
|
"cse",
|
31
36
|
"csv",
|
32
37
|
"cum_agg",
|
33
38
|
"cumulative_eval",
|
39
|
+
"cutqcut",
|
34
40
|
"dataframe_arithmetic",
|
35
41
|
"date_offset",
|
36
42
|
"diagonal_concat",
|
37
43
|
"diff",
|
38
44
|
"dot_product",
|
39
45
|
"dtype-full",
|
40
|
-
"
|
46
|
+
"dynamic_group_by",
|
41
47
|
"ewma",
|
42
48
|
"extract_jsonpath",
|
43
49
|
"fmt",
|
44
50
|
"horizontal_concat",
|
45
51
|
"interpolate",
|
46
52
|
"ipc",
|
47
|
-
"
|
53
|
+
"is_first_distinct",
|
48
54
|
"is_in",
|
55
|
+
"is_last_distinct",
|
49
56
|
"is_unique",
|
50
57
|
"json",
|
51
58
|
"lazy",
|
52
59
|
"lazy_regex",
|
53
60
|
"list_count",
|
54
61
|
"list_eval",
|
55
|
-
"
|
62
|
+
"list_gather",
|
56
63
|
"list_to_struct",
|
57
64
|
"log",
|
58
65
|
"meta",
|
@@ -62,6 +69,7 @@ features = [
|
|
62
69
|
"parquet",
|
63
70
|
"partition_by",
|
64
71
|
"pct_change",
|
72
|
+
"peaks",
|
65
73
|
"performant",
|
66
74
|
"pivot",
|
67
75
|
"product",
|
@@ -71,6 +79,7 @@ features = [
|
|
71
79
|
"range",
|
72
80
|
"reinterpret",
|
73
81
|
"repeat_by",
|
82
|
+
"rle",
|
74
83
|
"rolling_window",
|
75
84
|
"round_series",
|
76
85
|
"row_hash",
|
@@ -78,9 +87,10 @@ features = [
|
|
78
87
|
"semi_anti_join",
|
79
88
|
"serde-lazy",
|
80
89
|
"sign",
|
90
|
+
"sql",
|
81
91
|
"string_encoding",
|
82
|
-
"
|
83
|
-
"
|
92
|
+
"string_pad",
|
93
|
+
"string_to_integer",
|
84
94
|
"strings",
|
85
95
|
"timezones",
|
86
96
|
"to_dummies",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, Value};
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -24,35 +24,34 @@ impl RbBatchedCsv {
|
|
24
24
|
pub fn new(arguments: &[Value]) -> RbResult<Self> {
|
25
25
|
// start arguments
|
26
26
|
// this pattern is needed for more than 16
|
27
|
-
let infer_schema_length
|
28
|
-
let chunk_size
|
29
|
-
let has_header
|
30
|
-
let ignore_errors
|
31
|
-
let n_rows
|
32
|
-
let skip_rows
|
33
|
-
let projection
|
34
|
-
let
|
35
|
-
let rechunk
|
36
|
-
let columns
|
37
|
-
let encoding
|
38
|
-
let n_threads
|
39
|
-
let path
|
40
|
-
let overwrite_dtype
|
27
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
|
28
|
+
let chunk_size = usize::try_convert(arguments[1])?;
|
29
|
+
let has_header = bool::try_convert(arguments[2])?;
|
30
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
31
|
+
let n_rows = Option::<usize>::try_convert(arguments[4])?;
|
32
|
+
let skip_rows = usize::try_convert(arguments[5])?;
|
33
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
|
34
|
+
let separator = String::try_convert(arguments[7])?;
|
35
|
+
let rechunk = bool::try_convert(arguments[8])?;
|
36
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
|
37
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
|
38
|
+
let n_threads = Option::<usize>::try_convert(arguments[11])?;
|
39
|
+
let path = PathBuf::try_convert(arguments[12])?;
|
40
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
|
41
41
|
// TODO fix
|
42
|
-
let overwrite_dtype_slice
|
43
|
-
let low_memory
|
44
|
-
let
|
45
|
-
let quote_char
|
46
|
-
let null_values
|
47
|
-
let try_parse_dates
|
48
|
-
let skip_rows_after_header
|
49
|
-
let row_count
|
50
|
-
let sample_size
|
51
|
-
let eol_char
|
42
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
|
+
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
+
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
|
+
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
+
let try_parse_dates = bool::try_convert(arguments[19])?;
|
48
|
+
let skip_rows_after_header = usize::try_convert(arguments[20])?;
|
49
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
|
50
|
+
let sample_size = usize::try_convert(arguments[22])?;
|
51
|
+
let eol_char = String::try_convert(arguments[23])?;
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
56
|
|
58
57
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -90,7 +89,7 @@ impl RbBatchedCsv {
|
|
90
89
|
.infer_schema(infer_schema_length)
|
91
90
|
.has_header(has_header)
|
92
91
|
.with_n_rows(n_rows)
|
93
|
-
.
|
92
|
+
.with_separator(separator.as_bytes()[0])
|
94
93
|
.with_skip_rows(skip_rows)
|
95
94
|
.with_ignore_errors(ignore_errors)
|
96
95
|
.with_projection(projection)
|
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
|
|
101
100
|
.with_n_threads(n_threads)
|
102
101
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
103
102
|
.low_memory(low_memory)
|
104
|
-
.
|
103
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
105
104
|
.with_null_values(null_values)
|
106
105
|
.with_try_parse_dates(try_parse_dates)
|
107
106
|
.with_quote_char(quote_char)
|