polars-df 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +7 -7
- data/README.md +10 -0
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/conversion.rs +51 -12
- data/ext/polars/src/lib.rs +2 -3
- data/ext/polars/src/rb_modules.rs +7 -3
- data/ext/polars/src/series.rs +6 -4
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5afa804963fc59154e8adde6034e07d1a2a90c077b8e0b72d2ad3ca49af34e8e
|
4
|
+
data.tar.gz: b2291436080973ad43595ef25559cdc81669b07986a6779c85c4c372e220e39a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d7aa73d12e9a322de83db5f69b4ab0f5e9605b737776a83476f917cf32ad1ef6770c79e80a810be68a9144206fef6c3931af3285763741232b8ba87871d0a06
|
7
|
+
data.tar.gz: 2f9daceaba3edd0671a650ab1a2da6ddc6591f768e52a4c8a8f01d7e3fd573320c4464d0081762daeb75d41475bb4e87e298b426b507ef6c7bb9d3164149e7e6
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
|
|
1367
1367
|
|
1368
1368
|
[[package]]
|
1369
1369
|
name = "polars"
|
1370
|
-
version = "0.2.
|
1370
|
+
version = "0.2.4"
|
1371
1371
|
dependencies = [
|
1372
1372
|
"ahash",
|
1373
1373
|
"jemallocator",
|
@@ -1653,18 +1653,18 @@ dependencies = [
|
|
1653
1653
|
|
1654
1654
|
[[package]]
|
1655
1655
|
name = "rb-sys"
|
1656
|
-
version = "0.9.
|
1656
|
+
version = "0.9.58"
|
1657
1657
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1658
|
-
checksum = "
|
1658
|
+
checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
|
1659
1659
|
dependencies = [
|
1660
1660
|
"rb-sys-build",
|
1661
1661
|
]
|
1662
1662
|
|
1663
1663
|
[[package]]
|
1664
1664
|
name = "rb-sys-build"
|
1665
|
-
version = "0.9.
|
1665
|
+
version = "0.9.58"
|
1666
1666
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1667
|
-
checksum = "
|
1667
|
+
checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
|
1668
1668
|
dependencies = [
|
1669
1669
|
"bindgen",
|
1670
1670
|
"regex",
|
@@ -1673,9 +1673,9 @@ dependencies = [
|
|
1673
1673
|
|
1674
1674
|
[[package]]
|
1675
1675
|
name = "rb-sys-env"
|
1676
|
-
version = "0.1.
|
1676
|
+
version = "0.1.2"
|
1677
1677
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1678
|
-
checksum = "
|
1678
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
1679
1679
|
|
1680
1680
|
[[package]]
|
1681
1681
|
name = "redox_syscall"
|
data/README.md
CHANGED
@@ -69,6 +69,16 @@ Polars::DataFrame.new({
|
|
69
69
|
})
|
70
70
|
```
|
71
71
|
|
72
|
+
From an array of hashes
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
Polars::DataFrame.new([
|
76
|
+
{"a" => 1, "b" => "one"},
|
77
|
+
{"a" => 2, "b" => "two"},
|
78
|
+
{"a" => 3, "b" => "three"}
|
79
|
+
])
|
80
|
+
```
|
81
|
+
|
72
82
|
From an array of series
|
73
83
|
|
74
84
|
```ruby
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{
|
2
|
+
class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
|
3
|
+
Value, QNIL,
|
4
|
+
};
|
2
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
7
|
use polars::datatypes::AnyValue;
|
5
|
-
use polars::frame::row::Row;
|
8
|
+
use polars::frame::row::{any_values_to_dtype, Row};
|
6
9
|
use polars::frame::NullStrategy;
|
7
10
|
use polars::io::avro::AvroCompression;
|
8
11
|
use polars::prelude::*;
|
@@ -273,15 +276,24 @@ impl TryConvert for Wrap<DataType> {
|
|
273
276
|
|
274
277
|
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
275
278
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
279
|
+
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
280
|
+
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
281
|
+
} else if let Some(v) = Integer::from_value(ob) {
|
282
|
+
Ok(AnyValue::Int64(v.to_i64()?).into())
|
283
|
+
} else if let Some(v) = RFloat::from_value(ob) {
|
284
|
+
Ok(AnyValue::Float64(v.to_f64()).into())
|
285
|
+
} else if let Some(v) = RString::from_value(ob) {
|
286
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
287
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
288
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
289
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
290
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
291
|
+
let v = sec * 1_000_000_000 + nsec;
|
292
|
+
// TODO support time zone
|
293
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
281
294
|
} else if ob.is_nil() {
|
282
295
|
Ok(AnyValue::Null.into())
|
283
|
-
} else if
|
284
|
-
let dict = ob.try_convert::<RHash>().unwrap();
|
296
|
+
} else if let Some(dict) = RHash::from_value(ob) {
|
285
297
|
let len = dict.len();
|
286
298
|
let mut keys = Vec::with_capacity(len);
|
287
299
|
let mut vals = Vec::with_capacity(len);
|
@@ -294,6 +306,28 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
294
306
|
Ok(ForEach::Continue)
|
295
307
|
})?;
|
296
308
|
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
309
|
+
} else if let Some(v) = RArray::from_value(ob) {
|
310
|
+
if v.is_empty() {
|
311
|
+
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
312
|
+
} else {
|
313
|
+
let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
|
314
|
+
// use first `n` values to infer datatype
|
315
|
+
// this value is not too large as this will be done with every
|
316
|
+
// anyvalue that has to be converted, which can be many
|
317
|
+
let n = 25;
|
318
|
+
let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
|
319
|
+
.map_err(RbPolarsErr::from)?;
|
320
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype)
|
321
|
+
.map_err(RbPolarsErr::from)?;
|
322
|
+
Ok(Wrap(AnyValue::List(s)))
|
323
|
+
}
|
324
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
325
|
+
// convert to DateTime for UTC
|
326
|
+
let v = ob
|
327
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
328
|
+
.funcall::<_, _, Value>("to_time", ())?
|
329
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
330
|
+
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
297
331
|
} else {
|
298
332
|
Err(RbPolarsErr::other(format!(
|
299
333
|
"object type not supported {:?}",
|
@@ -412,8 +446,12 @@ pub(crate) fn dicts_to_rows(
|
|
412
446
|
let d = d?;
|
413
447
|
let d = d.try_convert::<RHash>()?;
|
414
448
|
|
415
|
-
d.foreach(|name:
|
416
|
-
|
449
|
+
d.foreach(|name: Value, _value: Value| {
|
450
|
+
if let Some(v) = Symbol::from_value(name) {
|
451
|
+
key_names.insert(v.name()?.into());
|
452
|
+
} else {
|
453
|
+
key_names.insert(name.try_convert::<String>()?);
|
454
|
+
};
|
417
455
|
Ok(ForEach::Continue)
|
418
456
|
})?;
|
419
457
|
}
|
@@ -427,7 +465,8 @@ pub(crate) fn dicts_to_rows(
|
|
427
465
|
let mut row = Vec::with_capacity(key_names.len());
|
428
466
|
|
429
467
|
for k in key_names.iter() {
|
430
|
-
|
468
|
+
// TODO improve performance
|
469
|
+
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
431
470
|
None => AnyValue::Null,
|
432
471
|
Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
|
433
472
|
};
|
data/ext/polars/src/lib.rs
CHANGED
@@ -21,13 +21,12 @@ use file::get_file_like;
|
|
21
21
|
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
22
22
|
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
23
23
|
use lazy::utils::rb_exprs_to_exprs;
|
24
|
-
use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
|
24
|
+
use magnus::{define_module, function, method, prelude::*, Error, RArray, RHash, Value};
|
25
25
|
use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
|
26
26
|
use polars::error::PolarsResult;
|
27
27
|
use polars::frame::DataFrame;
|
28
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
29
29
|
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
30
|
-
use rb_modules::polars;
|
31
30
|
use series::RbSeries;
|
32
31
|
|
33
32
|
#[cfg(target_os = "linux")]
|
@@ -48,7 +47,7 @@ type RbResult<T> = Result<T, Error>;
|
|
48
47
|
|
49
48
|
#[magnus::init]
|
50
49
|
fn init() -> RbResult<()> {
|
51
|
-
let module =
|
50
|
+
let module = define_module("Polars")?;
|
52
51
|
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
53
52
|
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
54
53
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
@@ -1,9 +1,13 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{class, memoize, Module, RClass, RModule};
|
2
2
|
|
3
3
|
pub(crate) fn polars() -> RModule {
|
4
|
-
*memoize!(RModule:
|
4
|
+
*memoize!(RModule: class::object().const_get("Polars").unwrap())
|
5
5
|
}
|
6
6
|
|
7
7
|
pub(crate) fn series() -> RClass {
|
8
|
-
*memoize!(RClass: polars().
|
8
|
+
*memoize!(RClass: polars().const_get("Series").unwrap())
|
9
|
+
}
|
10
|
+
|
11
|
+
pub(crate) fn date() -> RClass {
|
12
|
+
*memoize!(RClass: class::object().const_get("Date").unwrap())
|
9
13
|
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -1109,11 +1109,13 @@ impl RbSeries {
|
|
1109
1109
|
builder.append_null();
|
1110
1110
|
} else {
|
1111
1111
|
// convert to DateTime for UTC
|
1112
|
-
let v
|
1113
|
-
|
1114
|
-
|
1112
|
+
let v = v
|
1113
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
1114
|
+
.funcall::<_, _, Value>("to_time", ())?
|
1115
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
1116
|
+
|
1115
1117
|
// TODO use strict
|
1116
|
-
builder.append_value(v
|
1118
|
+
builder.append_value((v / 86400) as i32);
|
1117
1119
|
}
|
1118
1120
|
}
|
1119
1121
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|