polars-df 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -20,12 +20,12 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
|
|
20
20
|
Polars.read_csv("iris.csv")
|
21
21
|
.lazy
|
22
22
|
.filter(Polars.col("sepal_length") > 5)
|
23
|
-
.
|
23
|
+
.group_by("species")
|
24
24
|
.agg(Polars.all.sum)
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
29
|
|
30
30
|
## Reference
|
31
31
|
|
@@ -260,19 +260,19 @@ df["a"].var
|
|
260
260
|
Group
|
261
261
|
|
262
262
|
```ruby
|
263
|
-
df.
|
263
|
+
df.group_by("a").count
|
264
264
|
```
|
265
265
|
|
266
266
|
Works with all summary statistics
|
267
267
|
|
268
268
|
```ruby
|
269
|
-
df.
|
269
|
+
df.group_by("a").max
|
270
270
|
```
|
271
271
|
|
272
272
|
Multiple groups
|
273
273
|
|
274
274
|
```ruby
|
275
|
-
df.
|
275
|
+
df.group_by(["a", "b"]).count
|
276
276
|
```
|
277
277
|
|
278
278
|
## Combining Data Frames
|
@@ -348,7 +348,7 @@ df.to_numo
|
|
348
348
|
You can specify column types when creating a data frame
|
349
349
|
|
350
350
|
```ruby
|
351
|
-
Polars::DataFrame.new(data,
|
351
|
+
Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
|
352
352
|
```
|
353
353
|
|
354
354
|
Supported types are:
|
@@ -357,8 +357,10 @@ Supported types are:
|
|
357
357
|
- float - `Float64`, `Float32`
|
358
358
|
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
|
-
- string - `Utf8`, `Categorical`
|
360
|
+
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
+
- nested - `List`, `Struct`, `Array`
|
363
|
+
- other - `Object`, `Null`
|
362
364
|
|
363
365
|
Get column types
|
364
366
|
|
@@ -401,13 +403,13 @@ df.plot("a", "b", type: "pie")
|
|
401
403
|
Group data
|
402
404
|
|
403
405
|
```ruby
|
404
|
-
df.
|
406
|
+
df.group_by("c").plot("a", "b")
|
405
407
|
```
|
406
408
|
|
407
409
|
Stacked columns or bars
|
408
410
|
|
409
411
|
```ruby
|
410
|
-
df.
|
412
|
+
df.group_by("c").plot("a", "b", stacked: true)
|
411
413
|
```
|
412
414
|
|
413
415
|
## History
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.7.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
+
rust-version = "1.73.0"
|
7
8
|
publish = false
|
8
9
|
|
9
10
|
[lib]
|
@@ -11,22 +12,25 @@ crate-type = ["cdylib"]
|
|
11
12
|
|
12
13
|
[dependencies]
|
13
14
|
ahash = "0.8"
|
14
|
-
|
15
|
-
|
15
|
+
chrono = "0.4"
|
16
|
+
either = "1.8"
|
17
|
+
magnus = "0.6"
|
18
|
+
polars-core = "=0.35.2"
|
19
|
+
polars-parquet = "=0.35.2"
|
16
20
|
serde_json = "1"
|
17
21
|
smartstring = "1"
|
18
22
|
|
19
23
|
[dependencies.polars]
|
20
|
-
version = "0.
|
24
|
+
version = "=0.35.2"
|
21
25
|
features = [
|
22
26
|
"abs",
|
23
27
|
"approx_unique",
|
24
|
-
"arange",
|
25
28
|
"arg_where",
|
26
29
|
"asof_join",
|
27
30
|
"avro",
|
28
31
|
"binary_encoding",
|
29
32
|
"concat_str",
|
33
|
+
"cov",
|
30
34
|
"cse",
|
31
35
|
"csv",
|
32
36
|
"cum_agg",
|
@@ -37,22 +41,23 @@ features = [
|
|
37
41
|
"diff",
|
38
42
|
"dot_product",
|
39
43
|
"dtype-full",
|
40
|
-
"
|
44
|
+
"dynamic_group_by",
|
41
45
|
"ewma",
|
42
46
|
"extract_jsonpath",
|
43
47
|
"fmt",
|
44
48
|
"horizontal_concat",
|
45
49
|
"interpolate",
|
46
50
|
"ipc",
|
47
|
-
"
|
51
|
+
"is_first_distinct",
|
48
52
|
"is_in",
|
53
|
+
"is_last_distinct",
|
49
54
|
"is_unique",
|
50
55
|
"json",
|
51
56
|
"lazy",
|
52
57
|
"lazy_regex",
|
53
58
|
"list_count",
|
54
59
|
"list_eval",
|
55
|
-
"
|
60
|
+
"list_gather",
|
56
61
|
"list_to_struct",
|
57
62
|
"log",
|
58
63
|
"meta",
|
@@ -62,12 +67,14 @@ features = [
|
|
62
67
|
"parquet",
|
63
68
|
"partition_by",
|
64
69
|
"pct_change",
|
70
|
+
"peaks",
|
65
71
|
"performant",
|
66
72
|
"pivot",
|
67
73
|
"product",
|
68
74
|
"propagate_nans",
|
69
75
|
"random",
|
70
76
|
"rank",
|
77
|
+
"range",
|
71
78
|
"reinterpret",
|
72
79
|
"repeat_by",
|
73
80
|
"rolling_window",
|
@@ -77,9 +84,10 @@ features = [
|
|
77
84
|
"semi_anti_join",
|
78
85
|
"serde-lazy",
|
79
86
|
"sign",
|
87
|
+
"sql",
|
80
88
|
"string_encoding",
|
81
|
-
"
|
82
|
-
"
|
89
|
+
"string_pad",
|
90
|
+
"string_to_integer",
|
83
91
|
"strings",
|
84
92
|
"timezones",
|
85
93
|
"to_dummies",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, Value};
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -24,31 +24,31 @@ impl RbBatchedCsv {
|
|
24
24
|
pub fn new(arguments: &[Value]) -> RbResult<Self> {
|
25
25
|
// start arguments
|
26
26
|
// this pattern is needed for more than 16
|
27
|
-
let infer_schema_length
|
28
|
-
let chunk_size
|
29
|
-
let has_header
|
30
|
-
let ignore_errors
|
31
|
-
let n_rows
|
32
|
-
let skip_rows
|
33
|
-
let projection
|
34
|
-
let
|
35
|
-
let rechunk
|
36
|
-
let columns
|
37
|
-
let encoding
|
38
|
-
let n_threads
|
39
|
-
let path
|
40
|
-
let overwrite_dtype
|
27
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
|
28
|
+
let chunk_size = usize::try_convert(arguments[1])?;
|
29
|
+
let has_header = bool::try_convert(arguments[2])?;
|
30
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
31
|
+
let n_rows = Option::<usize>::try_convert(arguments[4])?;
|
32
|
+
let skip_rows = usize::try_convert(arguments[5])?;
|
33
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
|
34
|
+
let separator = String::try_convert(arguments[7])?;
|
35
|
+
let rechunk = bool::try_convert(arguments[8])?;
|
36
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
|
37
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
|
38
|
+
let n_threads = Option::<usize>::try_convert(arguments[11])?;
|
39
|
+
let path = PathBuf::try_convert(arguments[12])?;
|
40
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
|
41
41
|
// TODO fix
|
42
|
-
let overwrite_dtype_slice
|
43
|
-
let low_memory
|
44
|
-
let comment_char
|
45
|
-
let quote_char
|
46
|
-
let null_values
|
47
|
-
let try_parse_dates
|
48
|
-
let skip_rows_after_header
|
49
|
-
let row_count
|
50
|
-
let sample_size
|
51
|
-
let eol_char
|
42
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
|
+
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
+
let comment_char = Option::<String>::try_convert(arguments[16])?;
|
45
|
+
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
+
let try_parse_dates = bool::try_convert(arguments[19])?;
|
48
|
+
let skip_rows_after_header = usize::try_convert(arguments[20])?;
|
49
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
|
50
|
+
let sample_size = usize::try_convert(arguments[22])?;
|
51
|
+
let eol_char = String::try_convert(arguments[23])?;
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
@@ -90,7 +90,7 @@ impl RbBatchedCsv {
|
|
90
90
|
.infer_schema(infer_schema_length)
|
91
91
|
.has_header(has_header)
|
92
92
|
.with_n_rows(n_rows)
|
93
|
-
.
|
93
|
+
.with_separator(separator.as_bytes()[0])
|
94
94
|
.with_skip_rows(skip_rows)
|
95
95
|
.with_ignore_errors(ignore_errors)
|
96
96
|
.with_projection(projection)
|