polars-df 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
|
|
20
20
|
Polars.read_csv("iris.csv")
|
21
21
|
.lazy
|
22
22
|
.filter(Polars.col("sepal_length") > 5)
|
23
|
-
.
|
23
|
+
.group_by("species")
|
24
24
|
.agg(Polars.all.sum)
|
25
25
|
.collect
|
26
26
|
```
|
@@ -260,19 +260,19 @@ df["a"].var
|
|
260
260
|
Group
|
261
261
|
|
262
262
|
```ruby
|
263
|
-
df.
|
263
|
+
df.group_by("a").count
|
264
264
|
```
|
265
265
|
|
266
266
|
Works with all summary statistics
|
267
267
|
|
268
268
|
```ruby
|
269
|
-
df.
|
269
|
+
df.group_by("a").max
|
270
270
|
```
|
271
271
|
|
272
272
|
Multiple groups
|
273
273
|
|
274
274
|
```ruby
|
275
|
-
df.
|
275
|
+
df.group_by(["a", "b"]).count
|
276
276
|
```
|
277
277
|
|
278
278
|
## Combining Data Frames
|
@@ -359,7 +359,8 @@ Supported types are:
|
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
360
|
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
-
-
|
362
|
+
- nested - `List`, `Struct`, `Array`
|
363
|
+
- other - `Object`, `Null`
|
363
364
|
|
364
365
|
Get column types
|
365
366
|
|
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
|
|
402
403
|
Group data
|
403
404
|
|
404
405
|
```ruby
|
405
|
-
df.
|
406
|
+
df.group_by("c").plot("a", "b")
|
406
407
|
```
|
407
408
|
|
408
409
|
Stacked columns or bars
|
409
410
|
|
410
411
|
```ruby
|
411
|
-
df.
|
412
|
+
df.group_by("c").plot("a", "b", stacked: true)
|
412
413
|
```
|
413
414
|
|
414
415
|
## History
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.7.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
+
rust-version = "1.73.0"
|
7
8
|
publish = false
|
8
9
|
|
9
10
|
[lib]
|
@@ -11,14 +12,16 @@ crate-type = ["cdylib"]
|
|
11
12
|
|
12
13
|
[dependencies]
|
13
14
|
ahash = "0.8"
|
14
|
-
chrono = "
|
15
|
-
|
16
|
-
|
15
|
+
chrono = "0.4"
|
16
|
+
either = "1.8"
|
17
|
+
magnus = "0.6"
|
18
|
+
polars-core = "=0.35.2"
|
19
|
+
polars-parquet = "=0.35.2"
|
17
20
|
serde_json = "1"
|
18
21
|
smartstring = "1"
|
19
22
|
|
20
23
|
[dependencies.polars]
|
21
|
-
version = "0.
|
24
|
+
version = "=0.35.2"
|
22
25
|
features = [
|
23
26
|
"abs",
|
24
27
|
"approx_unique",
|
@@ -27,6 +30,7 @@ features = [
|
|
27
30
|
"avro",
|
28
31
|
"binary_encoding",
|
29
32
|
"concat_str",
|
33
|
+
"cov",
|
30
34
|
"cse",
|
31
35
|
"csv",
|
32
36
|
"cum_agg",
|
@@ -37,22 +41,23 @@ features = [
|
|
37
41
|
"diff",
|
38
42
|
"dot_product",
|
39
43
|
"dtype-full",
|
40
|
-
"
|
44
|
+
"dynamic_group_by",
|
41
45
|
"ewma",
|
42
46
|
"extract_jsonpath",
|
43
47
|
"fmt",
|
44
48
|
"horizontal_concat",
|
45
49
|
"interpolate",
|
46
50
|
"ipc",
|
47
|
-
"
|
51
|
+
"is_first_distinct",
|
48
52
|
"is_in",
|
53
|
+
"is_last_distinct",
|
49
54
|
"is_unique",
|
50
55
|
"json",
|
51
56
|
"lazy",
|
52
57
|
"lazy_regex",
|
53
58
|
"list_count",
|
54
59
|
"list_eval",
|
55
|
-
"
|
60
|
+
"list_gather",
|
56
61
|
"list_to_struct",
|
57
62
|
"log",
|
58
63
|
"meta",
|
@@ -62,6 +67,7 @@ features = [
|
|
62
67
|
"parquet",
|
63
68
|
"partition_by",
|
64
69
|
"pct_change",
|
70
|
+
"peaks",
|
65
71
|
"performant",
|
66
72
|
"pivot",
|
67
73
|
"product",
|
@@ -78,9 +84,10 @@ features = [
|
|
78
84
|
"semi_anti_join",
|
79
85
|
"serde-lazy",
|
80
86
|
"sign",
|
87
|
+
"sql",
|
81
88
|
"string_encoding",
|
82
|
-
"
|
83
|
-
"
|
89
|
+
"string_pad",
|
90
|
+
"string_to_integer",
|
84
91
|
"strings",
|
85
92
|
"timezones",
|
86
93
|
"to_dummies",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, Value};
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -24,31 +24,31 @@ impl RbBatchedCsv {
|
|
24
24
|
pub fn new(arguments: &[Value]) -> RbResult<Self> {
|
25
25
|
// start arguments
|
26
26
|
// this pattern is needed for more than 16
|
27
|
-
let infer_schema_length
|
28
|
-
let chunk_size
|
29
|
-
let has_header
|
30
|
-
let ignore_errors
|
31
|
-
let n_rows
|
32
|
-
let skip_rows
|
33
|
-
let projection
|
34
|
-
let
|
35
|
-
let rechunk
|
36
|
-
let columns
|
37
|
-
let encoding
|
38
|
-
let n_threads
|
39
|
-
let path
|
40
|
-
let overwrite_dtype
|
27
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
|
28
|
+
let chunk_size = usize::try_convert(arguments[1])?;
|
29
|
+
let has_header = bool::try_convert(arguments[2])?;
|
30
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
31
|
+
let n_rows = Option::<usize>::try_convert(arguments[4])?;
|
32
|
+
let skip_rows = usize::try_convert(arguments[5])?;
|
33
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
|
34
|
+
let separator = String::try_convert(arguments[7])?;
|
35
|
+
let rechunk = bool::try_convert(arguments[8])?;
|
36
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
|
37
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
|
38
|
+
let n_threads = Option::<usize>::try_convert(arguments[11])?;
|
39
|
+
let path = PathBuf::try_convert(arguments[12])?;
|
40
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
|
41
41
|
// TODO fix
|
42
|
-
let overwrite_dtype_slice
|
43
|
-
let low_memory
|
44
|
-
let comment_char
|
45
|
-
let quote_char
|
46
|
-
let null_values
|
47
|
-
let try_parse_dates
|
48
|
-
let skip_rows_after_header
|
49
|
-
let row_count
|
50
|
-
let sample_size
|
51
|
-
let eol_char
|
42
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
|
+
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
+
let comment_char = Option::<String>::try_convert(arguments[16])?;
|
45
|
+
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
+
let try_parse_dates = bool::try_convert(arguments[19])?;
|
48
|
+
let skip_rows_after_header = usize::try_convert(arguments[20])?;
|
49
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
|
50
|
+
let sample_size = usize::try_convert(arguments[22])?;
|
51
|
+
let eol_char = String::try_convert(arguments[23])?;
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
@@ -90,7 +90,7 @@ impl RbBatchedCsv {
|
|
90
90
|
.infer_schema(infer_schema_length)
|
91
91
|
.has_header(has_header)
|
92
92
|
.with_n_rows(n_rows)
|
93
|
-
.
|
93
|
+
.with_separator(separator.as_bytes()[0])
|
94
94
|
.with_skip_rows(skip_rows)
|
95
95
|
.with_ignore_errors(ignore_errors)
|
96
96
|
.with_projection(projection)
|