polars-df 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a07e6dd4bee3bef4020d7818a060c6f28aaedb7264b206e35a485e575cd8a695
4
- data.tar.gz: c586e0ec898aab7642f49d49b54c614121b9cb1f748eb7c98d76611ae2ad56a2
3
+ metadata.gz: 5afa804963fc59154e8adde6034e07d1a2a90c077b8e0b72d2ad3ca49af34e8e
4
+ data.tar.gz: b2291436080973ad43595ef25559cdc81669b07986a6779c85c4c372e220e39a
5
5
  SHA512:
6
- metadata.gz: ff035a9b60966342ca16dc5eea3b0abd0c4a08f5db8a0fc3c4d6ef206dd20ad56becbcd3a7ecdb5c328de6f9a52d53531eeb44c75078170d451bc39197553570
7
- data.tar.gz: 48c7334a56339fb0feda046c415839ece39e2c92cf807ed422026d00787d687bbda2ba1198d612434379a2de92830841f0798109920f76d730205e612ab72cb1
6
+ metadata.gz: 7d7aa73d12e9a322de83db5f69b4ab0f5e9605b737776a83476f917cf32ad1ef6770c79e80a810be68a9144206fef6c3931af3285763741232b8ba87871d0a06
7
+ data.tar.gz: 2f9daceaba3edd0671a650ab1a2da6ddc6591f768e52a4c8a8f01d7e3fd573320c4464d0081762daeb75d41475bb4e87e298b426b507ef6c7bb9d3164149e7e6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.4 (2023-01-29)
2
+
3
+ - Added support for more types when creating a data frame from an array of hashes
4
+
1
5
  ## 0.2.3 (2023-01-22)
2
6
 
3
7
  - Fixed error with precompiled gem on Mac ARM
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.3"
1370
+ version = "0.2.4"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1653,18 +1653,18 @@ dependencies = [
1653
1653
 
1654
1654
  [[package]]
1655
1655
  name = "rb-sys"
1656
- version = "0.9.56"
1656
+ version = "0.9.58"
1657
1657
  source = "registry+https://github.com/rust-lang/crates.io-index"
1658
- checksum = "ef82428221475c6f9e7893fe30b88d45ac86bdb12e58e7c92055ba4bceb78a69"
1658
+ checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
1659
1659
  dependencies = [
1660
1660
  "rb-sys-build",
1661
1661
  ]
1662
1662
 
1663
1663
  [[package]]
1664
1664
  name = "rb-sys-build"
1665
- version = "0.9.56"
1665
+ version = "0.9.58"
1666
1666
  source = "registry+https://github.com/rust-lang/crates.io-index"
1667
- checksum = "950bfc239d2e7704576abe4d37b008876bbfd70a99196a188c5caeae2ba7344a"
1667
+ checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
1668
1668
  dependencies = [
1669
1669
  "bindgen",
1670
1670
  "regex",
@@ -1673,9 +1673,9 @@ dependencies = [
1673
1673
 
1674
1674
  [[package]]
1675
1675
  name = "rb-sys-env"
1676
- version = "0.1.1"
1676
+ version = "0.1.2"
1677
1677
  source = "registry+https://github.com/rust-lang/crates.io-index"
1678
- checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
1678
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
1679
1679
 
1680
1680
  [[package]]
1681
1681
  name = "redox_syscall"
data/README.md CHANGED
@@ -69,6 +69,16 @@ Polars::DataFrame.new({
69
69
  })
70
70
  ```
71
71
 
72
+ From an array of hashes
73
+
74
+ ```ruby
75
+ Polars::DataFrame.new([
76
+ {"a" => 1, "b" => "one"},
77
+ {"a" => 2, "b" => "two"},
78
+ {"a" => 3, "b" => "three"}
79
+ ])
80
+ ```
81
+
72
82
  From an array of series
73
83
 
74
84
  ```ruby
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -1,8 +1,11 @@
1
- use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{
2
+ class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
+ Value, QNIL,
4
+ };
2
5
  use polars::chunked_array::object::PolarsObjectSafe;
3
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
7
  use polars::datatypes::AnyValue;
5
- use polars::frame::row::Row;
8
+ use polars::frame::row::{any_values_to_dtype, Row};
6
9
  use polars::frame::NullStrategy;
7
10
  use polars::io::avro::AvroCompression;
8
11
  use polars::prelude::*;
@@ -273,15 +276,24 @@ impl TryConvert for Wrap<DataType> {
273
276
 
274
277
  impl<'s> TryConvert for Wrap<AnyValue<'s>> {
275
278
  fn try_convert(ob: Value) -> RbResult<Self> {
276
- // TODO improve
277
- if let Ok(v) = ob.try_convert::<i64>() {
278
- Ok(AnyValue::Int64(v).into())
279
- } else if let Ok(v) = ob.try_convert::<f64>() {
280
- Ok(AnyValue::Float64(v).into())
279
+ if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
280
+ Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
281
+ } else if let Some(v) = Integer::from_value(ob) {
282
+ Ok(AnyValue::Int64(v.to_i64()?).into())
283
+ } else if let Some(v) = RFloat::from_value(ob) {
284
+ Ok(AnyValue::Float64(v.to_f64()).into())
285
+ } else if let Some(v) = RString::from_value(ob) {
286
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
287
+ // call is_a? for ActiveSupport::TimeWithZone
288
+ } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
289
+ let sec = ob.funcall::<_, _, i64>("to_i", ())?;
290
+ let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
291
+ let v = sec * 1_000_000_000 + nsec;
292
+ // TODO support time zone
293
+ Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
281
294
  } else if ob.is_nil() {
282
295
  Ok(AnyValue::Null.into())
283
- } else if ob.is_kind_of(class::hash()) {
284
- let dict = ob.try_convert::<RHash>().unwrap();
296
+ } else if let Some(dict) = RHash::from_value(ob) {
285
297
  let len = dict.len();
286
298
  let mut keys = Vec::with_capacity(len);
287
299
  let mut vals = Vec::with_capacity(len);
@@ -294,6 +306,28 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
294
306
  Ok(ForEach::Continue)
295
307
  })?;
296
308
  Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
309
+ } else if let Some(v) = RArray::from_value(ob) {
310
+ if v.is_empty() {
311
+ Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
312
+ } else {
313
+ let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
314
+ // use first `n` values to infer datatype
315
+ // this value is not too large as this will be done with every
316
+ // anyvalue that has to be converted, which can be many
317
+ let n = 25;
318
+ let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
319
+ .map_err(RbPolarsErr::from)?;
320
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype)
321
+ .map_err(RbPolarsErr::from)?;
322
+ Ok(Wrap(AnyValue::List(s)))
323
+ }
324
+ } else if ob.is_kind_of(crate::rb_modules::date()) {
325
+ // convert to DateTime for UTC
326
+ let v = ob
327
+ .funcall::<_, _, Value>("to_datetime", ())?
328
+ .funcall::<_, _, Value>("to_time", ())?
329
+ .funcall::<_, _, i64>("to_i", ())?;
330
+ Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
297
331
  } else {
298
332
  Err(RbPolarsErr::other(format!(
299
333
  "object type not supported {:?}",
@@ -412,8 +446,12 @@ pub(crate) fn dicts_to_rows(
412
446
  let d = d?;
413
447
  let d = d.try_convert::<RHash>()?;
414
448
 
415
- d.foreach(|name: String, _value: Value| {
416
- key_names.insert(name);
449
+ d.foreach(|name: Value, _value: Value| {
450
+ if let Some(v) = Symbol::from_value(name) {
451
+ key_names.insert(v.name()?.into());
452
+ } else {
453
+ key_names.insert(name.try_convert::<String>()?);
454
+ };
417
455
  Ok(ForEach::Continue)
418
456
  })?;
419
457
  }
@@ -427,7 +465,8 @@ pub(crate) fn dicts_to_rows(
427
465
  let mut row = Vec::with_capacity(key_names.len());
428
466
 
429
467
  for k in key_names.iter() {
430
- let val = match d.get(k.clone()) {
468
+ // TODO improve performance
469
+ let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
431
470
  None => AnyValue::Null,
432
471
  Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
433
472
  };
@@ -21,13 +21,12 @@ use file::get_file_like;
21
21
  use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
22
22
  use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
23
23
  use lazy::utils::rb_exprs_to_exprs;
24
- use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
24
+ use magnus::{define_module, function, method, prelude::*, Error, RArray, RHash, Value};
25
25
  use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
29
29
  use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
30
- use rb_modules::polars;
31
30
  use series::RbSeries;
32
31
 
33
32
  #[cfg(target_os = "linux")]
@@ -48,7 +47,7 @@ type RbResult<T> = Result<T, Error>;
48
47
 
49
48
  #[magnus::init]
50
49
  fn init() -> RbResult<()> {
51
- let module = polars();
50
+ let module = define_module("Polars")?;
52
51
  module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
53
52
  module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
54
53
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
@@ -1,9 +1,13 @@
1
- use magnus::{define_module, memoize, Module, RClass, RModule};
1
+ use magnus::{class, memoize, Module, RClass, RModule};
2
2
 
3
3
  pub(crate) fn polars() -> RModule {
4
- *memoize!(RModule: define_module("Polars").unwrap())
4
+ *memoize!(RModule: class::object().const_get("Polars").unwrap())
5
5
  }
6
6
 
7
7
  pub(crate) fn series() -> RClass {
8
- *memoize!(RClass: polars().define_class("Series", Default::default()).unwrap())
8
+ *memoize!(RClass: polars().const_get("Series").unwrap())
9
+ }
10
+
11
+ pub(crate) fn date() -> RClass {
12
+ *memoize!(RClass: class::object().const_get("Date").unwrap())
9
13
  }
@@ -1109,11 +1109,13 @@ impl RbSeries {
1109
1109
  builder.append_null();
1110
1110
  } else {
1111
1111
  // convert to DateTime for UTC
1112
- let v: Value = v.funcall("to_datetime", ())?;
1113
- let v: Value = v.funcall("to_time", ())?;
1114
- let v: Value = v.funcall("to_i", ())?;
1112
+ let v = v
1113
+ .funcall::<_, _, Value>("to_datetime", ())?
1114
+ .funcall::<_, _, Value>("to_time", ())?
1115
+ .funcall::<_, _, i64>("to_i", ())?;
1116
+
1115
1117
  // TODO use strict
1116
- builder.append_value(v.try_convert::<i32>()? / 86400);
1118
+ builder.append_value((v / 86400) as i32);
1117
1119
  }
1118
1120
  }
1119
1121
  let ca: ChunkedArray<Int32Type> = builder.finish();
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.3"
3
+ VERSION = "0.2.4"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-22 00:00:00.000000000 Z
11
+ date: 2023-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys