polars-df 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
|
|
20
20
|
Polars.read_csv("iris.csv")
|
21
21
|
.lazy
|
22
22
|
.filter(Polars.col("sepal_length") > 5)
|
23
|
-
.
|
23
|
+
.group_by("species")
|
24
24
|
.agg(Polars.all.sum)
|
25
25
|
.collect
|
26
26
|
```
|
@@ -260,19 +260,19 @@ df["a"].var
|
|
260
260
|
Group
|
261
261
|
|
262
262
|
```ruby
|
263
|
-
df.
|
263
|
+
df.group_by("a").count
|
264
264
|
```
|
265
265
|
|
266
266
|
Works with all summary statistics
|
267
267
|
|
268
268
|
```ruby
|
269
|
-
df.
|
269
|
+
df.group_by("a").max
|
270
270
|
```
|
271
271
|
|
272
272
|
Multiple groups
|
273
273
|
|
274
274
|
```ruby
|
275
|
-
df.
|
275
|
+
df.group_by(["a", "b"]).count
|
276
276
|
```
|
277
277
|
|
278
278
|
## Combining Data Frames
|
@@ -359,7 +359,8 @@ Supported types are:
|
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
360
|
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
-
-
|
362
|
+
- nested - `List`, `Struct`, `Array`
|
363
|
+
- other - `Object`, `Null`
|
363
364
|
|
364
365
|
Get column types
|
365
366
|
|
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
|
|
402
403
|
Group data
|
403
404
|
|
404
405
|
```ruby
|
405
|
-
df.
|
406
|
+
df.group_by("c").plot("a", "b")
|
406
407
|
```
|
407
408
|
|
408
409
|
Stacked columns or bars
|
409
410
|
|
410
411
|
```ruby
|
411
|
-
df.
|
412
|
+
df.group_by("c").plot("a", "b", stacked: true)
|
412
413
|
```
|
413
414
|
|
414
415
|
## History
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.8.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
+
rust-version = "1.73.0"
|
7
8
|
publish = false
|
8
9
|
|
9
10
|
[lib]
|
@@ -11,14 +12,17 @@ crate-type = ["cdylib"]
|
|
11
12
|
|
12
13
|
[dependencies]
|
13
14
|
ahash = "0.8"
|
14
|
-
chrono = "
|
15
|
-
|
16
|
-
|
15
|
+
chrono = "0.4"
|
16
|
+
either = "1.8"
|
17
|
+
magnus = "0.6"
|
18
|
+
polars-core = "=0.36.2"
|
19
|
+
polars-parquet = "=0.36.2"
|
20
|
+
polars-utils = "=0.36.2"
|
17
21
|
serde_json = "1"
|
18
22
|
smartstring = "1"
|
19
23
|
|
20
24
|
[dependencies.polars]
|
21
|
-
version = "0.
|
25
|
+
version = "=0.36.2"
|
22
26
|
features = [
|
23
27
|
"abs",
|
24
28
|
"approx_unique",
|
@@ -27,32 +31,35 @@ features = [
|
|
27
31
|
"avro",
|
28
32
|
"binary_encoding",
|
29
33
|
"concat_str",
|
34
|
+
"cov",
|
30
35
|
"cse",
|
31
36
|
"csv",
|
32
37
|
"cum_agg",
|
33
38
|
"cumulative_eval",
|
39
|
+
"cutqcut",
|
34
40
|
"dataframe_arithmetic",
|
35
41
|
"date_offset",
|
36
42
|
"diagonal_concat",
|
37
43
|
"diff",
|
38
44
|
"dot_product",
|
39
45
|
"dtype-full",
|
40
|
-
"
|
46
|
+
"dynamic_group_by",
|
41
47
|
"ewma",
|
42
48
|
"extract_jsonpath",
|
43
49
|
"fmt",
|
44
50
|
"horizontal_concat",
|
45
51
|
"interpolate",
|
46
52
|
"ipc",
|
47
|
-
"
|
53
|
+
"is_first_distinct",
|
48
54
|
"is_in",
|
55
|
+
"is_last_distinct",
|
49
56
|
"is_unique",
|
50
57
|
"json",
|
51
58
|
"lazy",
|
52
59
|
"lazy_regex",
|
53
60
|
"list_count",
|
54
61
|
"list_eval",
|
55
|
-
"
|
62
|
+
"list_gather",
|
56
63
|
"list_to_struct",
|
57
64
|
"log",
|
58
65
|
"meta",
|
@@ -62,6 +69,7 @@ features = [
|
|
62
69
|
"parquet",
|
63
70
|
"partition_by",
|
64
71
|
"pct_change",
|
72
|
+
"peaks",
|
65
73
|
"performant",
|
66
74
|
"pivot",
|
67
75
|
"product",
|
@@ -71,6 +79,7 @@ features = [
|
|
71
79
|
"range",
|
72
80
|
"reinterpret",
|
73
81
|
"repeat_by",
|
82
|
+
"rle",
|
74
83
|
"rolling_window",
|
75
84
|
"round_series",
|
76
85
|
"row_hash",
|
@@ -78,9 +87,10 @@ features = [
|
|
78
87
|
"semi_anti_join",
|
79
88
|
"serde-lazy",
|
80
89
|
"sign",
|
90
|
+
"sql",
|
81
91
|
"string_encoding",
|
82
|
-
"
|
83
|
-
"
|
92
|
+
"string_pad",
|
93
|
+
"string_to_integer",
|
84
94
|
"strings",
|
85
95
|
"timezones",
|
86
96
|
"to_dummies",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, Value};
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -24,35 +24,34 @@ impl RbBatchedCsv {
|
|
24
24
|
pub fn new(arguments: &[Value]) -> RbResult<Self> {
|
25
25
|
// start arguments
|
26
26
|
// this pattern is needed for more than 16
|
27
|
-
let infer_schema_length
|
28
|
-
let chunk_size
|
29
|
-
let has_header
|
30
|
-
let ignore_errors
|
31
|
-
let n_rows
|
32
|
-
let skip_rows
|
33
|
-
let projection
|
34
|
-
let
|
35
|
-
let rechunk
|
36
|
-
let columns
|
37
|
-
let encoding
|
38
|
-
let n_threads
|
39
|
-
let path
|
40
|
-
let overwrite_dtype
|
27
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
|
28
|
+
let chunk_size = usize::try_convert(arguments[1])?;
|
29
|
+
let has_header = bool::try_convert(arguments[2])?;
|
30
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
31
|
+
let n_rows = Option::<usize>::try_convert(arguments[4])?;
|
32
|
+
let skip_rows = usize::try_convert(arguments[5])?;
|
33
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
|
34
|
+
let separator = String::try_convert(arguments[7])?;
|
35
|
+
let rechunk = bool::try_convert(arguments[8])?;
|
36
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
|
37
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
|
38
|
+
let n_threads = Option::<usize>::try_convert(arguments[11])?;
|
39
|
+
let path = PathBuf::try_convert(arguments[12])?;
|
40
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
|
41
41
|
// TODO fix
|
42
|
-
let overwrite_dtype_slice
|
43
|
-
let low_memory
|
44
|
-
let
|
45
|
-
let quote_char
|
46
|
-
let null_values
|
47
|
-
let try_parse_dates
|
48
|
-
let skip_rows_after_header
|
49
|
-
let row_count
|
50
|
-
let sample_size
|
51
|
-
let eol_char
|
42
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
|
+
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
+
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
|
+
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
+
let try_parse_dates = bool::try_convert(arguments[19])?;
|
48
|
+
let skip_rows_after_header = usize::try_convert(arguments[20])?;
|
49
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
|
50
|
+
let sample_size = usize::try_convert(arguments[22])?;
|
51
|
+
let eol_char = String::try_convert(arguments[23])?;
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
56
|
|
58
57
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -90,7 +89,7 @@ impl RbBatchedCsv {
|
|
90
89
|
.infer_schema(infer_schema_length)
|
91
90
|
.has_header(has_header)
|
92
91
|
.with_n_rows(n_rows)
|
93
|
-
.
|
92
|
+
.with_separator(separator.as_bytes()[0])
|
94
93
|
.with_skip_rows(skip_rows)
|
95
94
|
.with_ignore_errors(ignore_errors)
|
96
95
|
.with_projection(projection)
|
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
|
|
101
100
|
.with_n_threads(n_threads)
|
102
101
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
103
102
|
.low_memory(low_memory)
|
104
|
-
.
|
103
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
105
104
|
.with_null_values(null_values)
|
106
105
|
.with_try_parse_dates(try_parse_dates)
|
107
106
|
.with_quote_char(quote_char)
|