polars-df 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +7 -7
- data/README.md +10 -0
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/conversion.rs +51 -12
- data/ext/polars/src/lib.rs +2 -3
- data/ext/polars/src/rb_modules.rs +7 -3
- data/ext/polars/src/series.rs +6 -4
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5afa804963fc59154e8adde6034e07d1a2a90c077b8e0b72d2ad3ca49af34e8e
|
4
|
+
data.tar.gz: b2291436080973ad43595ef25559cdc81669b07986a6779c85c4c372e220e39a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d7aa73d12e9a322de83db5f69b4ab0f5e9605b737776a83476f917cf32ad1ef6770c79e80a810be68a9144206fef6c3931af3285763741232b8ba87871d0a06
|
7
|
+
data.tar.gz: 2f9daceaba3edd0671a650ab1a2da6ddc6591f768e52a4c8a8f01d7e3fd573320c4464d0081762daeb75d41475bb4e87e298b426b507ef6c7bb9d3164149e7e6
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
|
|
1367
1367
|
|
1368
1368
|
[[package]]
|
1369
1369
|
name = "polars"
|
1370
|
-
version = "0.2.
|
1370
|
+
version = "0.2.4"
|
1371
1371
|
dependencies = [
|
1372
1372
|
"ahash",
|
1373
1373
|
"jemallocator",
|
@@ -1653,18 +1653,18 @@ dependencies = [
|
|
1653
1653
|
|
1654
1654
|
[[package]]
|
1655
1655
|
name = "rb-sys"
|
1656
|
-
version = "0.9.
|
1656
|
+
version = "0.9.58"
|
1657
1657
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1658
|
-
checksum = "
|
1658
|
+
checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
|
1659
1659
|
dependencies = [
|
1660
1660
|
"rb-sys-build",
|
1661
1661
|
]
|
1662
1662
|
|
1663
1663
|
[[package]]
|
1664
1664
|
name = "rb-sys-build"
|
1665
|
-
version = "0.9.
|
1665
|
+
version = "0.9.58"
|
1666
1666
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1667
|
-
checksum = "
|
1667
|
+
checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
|
1668
1668
|
dependencies = [
|
1669
1669
|
"bindgen",
|
1670
1670
|
"regex",
|
@@ -1673,9 +1673,9 @@ dependencies = [
|
|
1673
1673
|
|
1674
1674
|
[[package]]
|
1675
1675
|
name = "rb-sys-env"
|
1676
|
-
version = "0.1.
|
1676
|
+
version = "0.1.2"
|
1677
1677
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1678
|
-
checksum = "
|
1678
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
1679
1679
|
|
1680
1680
|
[[package]]
|
1681
1681
|
name = "redox_syscall"
|
data/README.md
CHANGED
@@ -69,6 +69,16 @@ Polars::DataFrame.new({
|
|
69
69
|
})
|
70
70
|
```
|
71
71
|
|
72
|
+
From an array of hashes
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
Polars::DataFrame.new([
|
76
|
+
{"a" => 1, "b" => "one"},
|
77
|
+
{"a" => 2, "b" => "two"},
|
78
|
+
{"a" => 3, "b" => "three"}
|
79
|
+
])
|
80
|
+
```
|
81
|
+
|
72
82
|
From an array of series
|
73
83
|
|
74
84
|
```ruby
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{
|
2
|
+
class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
|
3
|
+
Value, QNIL,
|
4
|
+
};
|
2
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
7
|
use polars::datatypes::AnyValue;
|
5
|
-
use polars::frame::row::Row;
|
8
|
+
use polars::frame::row::{any_values_to_dtype, Row};
|
6
9
|
use polars::frame::NullStrategy;
|
7
10
|
use polars::io::avro::AvroCompression;
|
8
11
|
use polars::prelude::*;
|
@@ -273,15 +276,24 @@ impl TryConvert for Wrap<DataType> {
|
|
273
276
|
|
274
277
|
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
275
278
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
279
|
+
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
280
|
+
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
281
|
+
} else if let Some(v) = Integer::from_value(ob) {
|
282
|
+
Ok(AnyValue::Int64(v.to_i64()?).into())
|
283
|
+
} else if let Some(v) = RFloat::from_value(ob) {
|
284
|
+
Ok(AnyValue::Float64(v.to_f64()).into())
|
285
|
+
} else if let Some(v) = RString::from_value(ob) {
|
286
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
287
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
288
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
289
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
290
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
291
|
+
let v = sec * 1_000_000_000 + nsec;
|
292
|
+
// TODO support time zone
|
293
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
281
294
|
} else if ob.is_nil() {
|
282
295
|
Ok(AnyValue::Null.into())
|
283
|
-
} else if
|
284
|
-
let dict = ob.try_convert::<RHash>().unwrap();
|
296
|
+
} else if let Some(dict) = RHash::from_value(ob) {
|
285
297
|
let len = dict.len();
|
286
298
|
let mut keys = Vec::with_capacity(len);
|
287
299
|
let mut vals = Vec::with_capacity(len);
|
@@ -294,6 +306,28 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
294
306
|
Ok(ForEach::Continue)
|
295
307
|
})?;
|
296
308
|
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
309
|
+
} else if let Some(v) = RArray::from_value(ob) {
|
310
|
+
if v.is_empty() {
|
311
|
+
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
312
|
+
} else {
|
313
|
+
let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
|
314
|
+
// use first `n` values to infer datatype
|
315
|
+
// this value is not too large as this will be done with every
|
316
|
+
// anyvalue that has to be converted, which can be many
|
317
|
+
let n = 25;
|
318
|
+
let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
|
319
|
+
.map_err(RbPolarsErr::from)?;
|
320
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype)
|
321
|
+
.map_err(RbPolarsErr::from)?;
|
322
|
+
Ok(Wrap(AnyValue::List(s)))
|
323
|
+
}
|
324
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
325
|
+
// convert to DateTime for UTC
|
326
|
+
let v = ob
|
327
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
328
|
+
.funcall::<_, _, Value>("to_time", ())?
|
329
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
330
|
+
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
297
331
|
} else {
|
298
332
|
Err(RbPolarsErr::other(format!(
|
299
333
|
"object type not supported {:?}",
|
@@ -412,8 +446,12 @@ pub(crate) fn dicts_to_rows(
|
|
412
446
|
let d = d?;
|
413
447
|
let d = d.try_convert::<RHash>()?;
|
414
448
|
|
415
|
-
d.foreach(|name:
|
416
|
-
|
449
|
+
d.foreach(|name: Value, _value: Value| {
|
450
|
+
if let Some(v) = Symbol::from_value(name) {
|
451
|
+
key_names.insert(v.name()?.into());
|
452
|
+
} else {
|
453
|
+
key_names.insert(name.try_convert::<String>()?);
|
454
|
+
};
|
417
455
|
Ok(ForEach::Continue)
|
418
456
|
})?;
|
419
457
|
}
|
@@ -427,7 +465,8 @@ pub(crate) fn dicts_to_rows(
|
|
427
465
|
let mut row = Vec::with_capacity(key_names.len());
|
428
466
|
|
429
467
|
for k in key_names.iter() {
|
430
|
-
|
468
|
+
// TODO improve performance
|
469
|
+
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
431
470
|
None => AnyValue::Null,
|
432
471
|
Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
|
433
472
|
};
|
data/ext/polars/src/lib.rs
CHANGED
@@ -21,13 +21,12 @@ use file::get_file_like;
|
|
21
21
|
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
22
22
|
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
23
23
|
use lazy::utils::rb_exprs_to_exprs;
|
24
|
-
use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
|
24
|
+
use magnus::{define_module, function, method, prelude::*, Error, RArray, RHash, Value};
|
25
25
|
use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
|
26
26
|
use polars::error::PolarsResult;
|
27
27
|
use polars::frame::DataFrame;
|
28
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
29
29
|
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
30
|
-
use rb_modules::polars;
|
31
30
|
use series::RbSeries;
|
32
31
|
|
33
32
|
#[cfg(target_os = "linux")]
|
@@ -48,7 +47,7 @@ type RbResult<T> = Result<T, Error>;
|
|
48
47
|
|
49
48
|
#[magnus::init]
|
50
49
|
fn init() -> RbResult<()> {
|
51
|
-
let module =
|
50
|
+
let module = define_module("Polars")?;
|
52
51
|
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
53
52
|
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
54
53
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
@@ -1,9 +1,13 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{class, memoize, Module, RClass, RModule};
|
2
2
|
|
3
3
|
pub(crate) fn polars() -> RModule {
|
4
|
-
*memoize!(RModule:
|
4
|
+
*memoize!(RModule: class::object().const_get("Polars").unwrap())
|
5
5
|
}
|
6
6
|
|
7
7
|
pub(crate) fn series() -> RClass {
|
8
|
-
*memoize!(RClass: polars().
|
8
|
+
*memoize!(RClass: polars().const_get("Series").unwrap())
|
9
|
+
}
|
10
|
+
|
11
|
+
pub(crate) fn date() -> RClass {
|
12
|
+
*memoize!(RClass: class::object().const_get("Date").unwrap())
|
9
13
|
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -1109,11 +1109,13 @@ impl RbSeries {
|
|
1109
1109
|
builder.append_null();
|
1110
1110
|
} else {
|
1111
1111
|
// convert to DateTime for UTC
|
1112
|
-
let v
|
1113
|
-
|
1114
|
-
|
1112
|
+
let v = v
|
1113
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
1114
|
+
.funcall::<_, _, Value>("to_time", ())?
|
1115
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
1116
|
+
|
1115
1117
|
// TODO use strict
|
1116
|
-
builder.append_value(v
|
1118
|
+
builder.append_value((v / 86400) as i32);
|
1117
1119
|
}
|
1118
1120
|
}
|
1119
1121
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|