polars-df 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a07e6dd4bee3bef4020d7818a060c6f28aaedb7264b206e35a485e575cd8a695
4
- data.tar.gz: c586e0ec898aab7642f49d49b54c614121b9cb1f748eb7c98d76611ae2ad56a2
3
+ metadata.gz: 5afa804963fc59154e8adde6034e07d1a2a90c077b8e0b72d2ad3ca49af34e8e
4
+ data.tar.gz: b2291436080973ad43595ef25559cdc81669b07986a6779c85c4c372e220e39a
5
5
  SHA512:
6
- metadata.gz: ff035a9b60966342ca16dc5eea3b0abd0c4a08f5db8a0fc3c4d6ef206dd20ad56becbcd3a7ecdb5c328de6f9a52d53531eeb44c75078170d451bc39197553570
7
- data.tar.gz: 48c7334a56339fb0feda046c415839ece39e2c92cf807ed422026d00787d687bbda2ba1198d612434379a2de92830841f0798109920f76d730205e612ab72cb1
6
+ metadata.gz: 7d7aa73d12e9a322de83db5f69b4ab0f5e9605b737776a83476f917cf32ad1ef6770c79e80a810be68a9144206fef6c3931af3285763741232b8ba87871d0a06
7
+ data.tar.gz: 2f9daceaba3edd0671a650ab1a2da6ddc6591f768e52a4c8a8f01d7e3fd573320c4464d0081762daeb75d41475bb4e87e298b426b507ef6c7bb9d3164149e7e6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.4 (2023-01-29)
2
+
3
+ - Added support for more types when creating a data frame from an array of hashes
4
+
1
5
  ## 0.2.3 (2023-01-22)
2
6
 
3
7
  - Fixed error with precompiled gem on Mac ARM
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.3"
1370
+ version = "0.2.4"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1653,18 +1653,18 @@ dependencies = [
1653
1653
 
1654
1654
  [[package]]
1655
1655
  name = "rb-sys"
1656
- version = "0.9.56"
1656
+ version = "0.9.58"
1657
1657
  source = "registry+https://github.com/rust-lang/crates.io-index"
1658
- checksum = "ef82428221475c6f9e7893fe30b88d45ac86bdb12e58e7c92055ba4bceb78a69"
1658
+ checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
1659
1659
  dependencies = [
1660
1660
  "rb-sys-build",
1661
1661
  ]
1662
1662
 
1663
1663
  [[package]]
1664
1664
  name = "rb-sys-build"
1665
- version = "0.9.56"
1665
+ version = "0.9.58"
1666
1666
  source = "registry+https://github.com/rust-lang/crates.io-index"
1667
- checksum = "950bfc239d2e7704576abe4d37b008876bbfd70a99196a188c5caeae2ba7344a"
1667
+ checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
1668
1668
  dependencies = [
1669
1669
  "bindgen",
1670
1670
  "regex",
@@ -1673,9 +1673,9 @@ dependencies = [
1673
1673
 
1674
1674
  [[package]]
1675
1675
  name = "rb-sys-env"
1676
- version = "0.1.1"
1676
+ version = "0.1.2"
1677
1677
  source = "registry+https://github.com/rust-lang/crates.io-index"
1678
- checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
1678
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
1679
1679
 
1680
1680
  [[package]]
1681
1681
  name = "redox_syscall"
data/README.md CHANGED
@@ -69,6 +69,16 @@ Polars::DataFrame.new({
69
69
  })
70
70
  ```
71
71
 
72
+ From an array of hashes
73
+
74
+ ```ruby
75
+ Polars::DataFrame.new([
76
+ {"a" => 1, "b" => "one"},
77
+ {"a" => 2, "b" => "two"},
78
+ {"a" => 3, "b" => "three"}
79
+ ])
80
+ ```
81
+
72
82
  From an array of series
73
83
 
74
84
  ```ruby
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -1,8 +1,11 @@
1
- use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{
2
+ class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
3
+ Value, QNIL,
4
+ };
2
5
  use polars::chunked_array::object::PolarsObjectSafe;
3
6
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
7
  use polars::datatypes::AnyValue;
5
- use polars::frame::row::Row;
8
+ use polars::frame::row::{any_values_to_dtype, Row};
6
9
  use polars::frame::NullStrategy;
7
10
  use polars::io::avro::AvroCompression;
8
11
  use polars::prelude::*;
@@ -273,15 +276,24 @@ impl TryConvert for Wrap<DataType> {
273
276
 
274
277
  impl<'s> TryConvert for Wrap<AnyValue<'s>> {
275
278
  fn try_convert(ob: Value) -> RbResult<Self> {
276
- // TODO improve
277
- if let Ok(v) = ob.try_convert::<i64>() {
278
- Ok(AnyValue::Int64(v).into())
279
- } else if let Ok(v) = ob.try_convert::<f64>() {
280
- Ok(AnyValue::Float64(v).into())
279
+ if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
280
+ Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
281
+ } else if let Some(v) = Integer::from_value(ob) {
282
+ Ok(AnyValue::Int64(v.to_i64()?).into())
283
+ } else if let Some(v) = RFloat::from_value(ob) {
284
+ Ok(AnyValue::Float64(v.to_f64()).into())
285
+ } else if let Some(v) = RString::from_value(ob) {
286
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
287
+ // call is_a? for ActiveSupport::TimeWithZone
288
+ } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
289
+ let sec = ob.funcall::<_, _, i64>("to_i", ())?;
290
+ let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
291
+ let v = sec * 1_000_000_000 + nsec;
292
+ // TODO support time zone
293
+ Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
281
294
  } else if ob.is_nil() {
282
295
  Ok(AnyValue::Null.into())
283
- } else if ob.is_kind_of(class::hash()) {
284
- let dict = ob.try_convert::<RHash>().unwrap();
296
+ } else if let Some(dict) = RHash::from_value(ob) {
285
297
  let len = dict.len();
286
298
  let mut keys = Vec::with_capacity(len);
287
299
  let mut vals = Vec::with_capacity(len);
@@ -294,6 +306,28 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
294
306
  Ok(ForEach::Continue)
295
307
  })?;
296
308
  Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
309
+ } else if let Some(v) = RArray::from_value(ob) {
310
+ if v.is_empty() {
311
+ Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
312
+ } else {
313
+ let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
314
+ // use first `n` values to infer datatype
315
+ // this value is not too large as this will be done with every
316
+ // anyvalue that has to be converted, which can be many
317
+ let n = 25;
318
+ let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
319
+ .map_err(RbPolarsErr::from)?;
320
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype)
321
+ .map_err(RbPolarsErr::from)?;
322
+ Ok(Wrap(AnyValue::List(s)))
323
+ }
324
+ } else if ob.is_kind_of(crate::rb_modules::date()) {
325
+ // convert to DateTime for UTC
326
+ let v = ob
327
+ .funcall::<_, _, Value>("to_datetime", ())?
328
+ .funcall::<_, _, Value>("to_time", ())?
329
+ .funcall::<_, _, i64>("to_i", ())?;
330
+ Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
297
331
  } else {
298
332
  Err(RbPolarsErr::other(format!(
299
333
  "object type not supported {:?}",
@@ -412,8 +446,12 @@ pub(crate) fn dicts_to_rows(
412
446
  let d = d?;
413
447
  let d = d.try_convert::<RHash>()?;
414
448
 
415
- d.foreach(|name: String, _value: Value| {
416
- key_names.insert(name);
449
+ d.foreach(|name: Value, _value: Value| {
450
+ if let Some(v) = Symbol::from_value(name) {
451
+ key_names.insert(v.name()?.into());
452
+ } else {
453
+ key_names.insert(name.try_convert::<String>()?);
454
+ };
417
455
  Ok(ForEach::Continue)
418
456
  })?;
419
457
  }
@@ -427,7 +465,8 @@ pub(crate) fn dicts_to_rows(
427
465
  let mut row = Vec::with_capacity(key_names.len());
428
466
 
429
467
  for k in key_names.iter() {
430
- let val = match d.get(k.clone()) {
468
+ // TODO improve performance
469
+ let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
431
470
  None => AnyValue::Null,
432
471
  Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
433
472
  };
@@ -21,13 +21,12 @@ use file::get_file_like;
21
21
  use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
22
22
  use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
23
23
  use lazy::utils::rb_exprs_to_exprs;
24
- use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
24
+ use magnus::{define_module, function, method, prelude::*, Error, RArray, RHash, Value};
25
25
  use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
29
29
  use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
30
- use rb_modules::polars;
31
30
  use series::RbSeries;
32
31
 
33
32
  #[cfg(target_os = "linux")]
@@ -48,7 +47,7 @@ type RbResult<T> = Result<T, Error>;
48
47
 
49
48
  #[magnus::init]
50
49
  fn init() -> RbResult<()> {
51
- let module = polars();
50
+ let module = define_module("Polars")?;
52
51
  module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
53
52
  module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
54
53
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
@@ -1,9 +1,13 @@
1
- use magnus::{define_module, memoize, Module, RClass, RModule};
1
+ use magnus::{class, memoize, Module, RClass, RModule};
2
2
 
3
3
  pub(crate) fn polars() -> RModule {
4
- *memoize!(RModule: define_module("Polars").unwrap())
4
+ *memoize!(RModule: class::object().const_get("Polars").unwrap())
5
5
  }
6
6
 
7
7
  pub(crate) fn series() -> RClass {
8
- *memoize!(RClass: polars().define_class("Series", Default::default()).unwrap())
8
+ *memoize!(RClass: polars().const_get("Series").unwrap())
9
+ }
10
+
11
+ pub(crate) fn date() -> RClass {
12
+ *memoize!(RClass: class::object().const_get("Date").unwrap())
9
13
  }
@@ -1109,11 +1109,13 @@ impl RbSeries {
1109
1109
  builder.append_null();
1110
1110
  } else {
1111
1111
  // convert to DateTime for UTC
1112
- let v: Value = v.funcall("to_datetime", ())?;
1113
- let v: Value = v.funcall("to_time", ())?;
1114
- let v: Value = v.funcall("to_i", ())?;
1112
+ let v = v
1113
+ .funcall::<_, _, Value>("to_datetime", ())?
1114
+ .funcall::<_, _, Value>("to_time", ())?
1115
+ .funcall::<_, _, i64>("to_i", ())?;
1116
+
1115
1117
  // TODO use strict
1116
- builder.append_value(v.try_convert::<i32>()? / 86400);
1118
+ builder.append_value((v / 86400) as i32);
1117
1119
  }
1118
1120
  }
1119
1121
  let ca: ChunkedArray<Int32Type> = builder.finish();
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.3"
3
+ VERSION = "0.2.4"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-22 00:00:00.000000000 Z
11
+ date: 2023-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys