polars-df 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
4
- data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
3
+ metadata.gz: a07e6dd4bee3bef4020d7818a060c6f28aaedb7264b206e35a485e575cd8a695
4
+ data.tar.gz: c586e0ec898aab7642f49d49b54c614121b9cb1f748eb7c98d76611ae2ad56a2
5
5
  SHA512:
6
- metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
7
- data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
6
+ metadata.gz: ff035a9b60966342ca16dc5eea3b0abd0c4a08f5db8a0fc3c4d6ef206dd20ad56becbcd3a7ecdb5c328de6f9a52d53531eeb44c75078170d451bc39197553570
7
+ data.tar.gz: 48c7334a56339fb0feda046c415839ece39e2c92cf807ed422026d00787d687bbda2ba1198d612434379a2de92830841f0798109920f76d730205e612ab72cb1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.3 (2023-01-22)
2
+
3
+ - Fixed error with precompiled gem on Mac ARM
4
+ - Fixed issue with structs
5
+
1
6
  ## 0.2.2 (2023-01-20)
2
7
 
3
8
  - Added support for strings to `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.2"
1370
+ version = "0.2.3"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -56,6 +56,8 @@ From Active Record
56
56
 
57
57
  ```ruby
58
58
  Polars.read_sql(User.all)
59
+ # or
60
+ Polars.read_sql("SELECT * FROM users")
59
61
  ```
60
62
 
61
63
  From a hash
@@ -287,13 +289,13 @@ CSV
287
289
  ```ruby
288
290
  df.to_csv
289
291
  # or
290
- df.write_csv("data.csv")
292
+ df.write_csv("file.csv")
291
293
  ```
292
294
 
293
295
  Parquet
294
296
 
295
297
  ```ruby
296
- df.write_parquet("data.parquet")
298
+ df.write_parquet("file.parquet")
297
299
  ```
298
300
 
299
301
  ## Types
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -144,7 +144,7 @@ impl From<Wrap<AnyValue<'_>>> for Value {
144
144
 
145
145
  impl From<Wrap<DataType>> for Value {
146
146
  fn from(w: Wrap<DataType>) -> Self {
147
- let pl = crate::module();
147
+ let pl = crate::rb_modules::polars();
148
148
 
149
149
  match &w.0 {
150
150
  DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
@@ -278,6 +278,22 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
278
278
  Ok(AnyValue::Int64(v).into())
279
279
  } else if let Ok(v) = ob.try_convert::<f64>() {
280
280
  Ok(AnyValue::Float64(v).into())
281
+ } else if ob.is_nil() {
282
+ Ok(AnyValue::Null.into())
283
+ } else if ob.is_kind_of(class::hash()) {
284
+ let dict = ob.try_convert::<RHash>().unwrap();
285
+ let len = dict.len();
286
+ let mut keys = Vec::with_capacity(len);
287
+ let mut vals = Vec::with_capacity(len);
288
+ dict.foreach(|k: Value, v: Value| {
289
+ let key = k.try_convert::<String>()?;
290
+ let val = v.try_convert::<Wrap<AnyValue>>()?.0;
291
+ let dtype = DataType::from(&val);
292
+ keys.push(Field::new(&key, dtype));
293
+ vals.push(val);
294
+ Ok(ForEach::Continue)
295
+ })?;
296
+ Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
281
297
  } else {
282
298
  Err(RbPolarsErr::other(format!(
283
299
  "object type not supported {:?}",
@@ -287,6 +303,141 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
287
303
  }
288
304
  }
289
305
 
306
+ impl<'s> TryConvert for Wrap<Row<'s>> {
307
+ fn try_convert(ob: Value) -> RbResult<Self> {
308
+ let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
309
+ for item in ob.try_convert::<RArray>()?.each() {
310
+ vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
311
+ }
312
+ let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
313
+ Ok(Wrap(Row(vals)))
314
+ }
315
+ }
316
+
317
+ impl TryConvert for Wrap<Schema> {
318
+ fn try_convert(ob: Value) -> RbResult<Self> {
319
+ let dict = ob.try_convert::<RHash>()?;
320
+
321
+ let mut schema = Vec::new();
322
+ dict.foreach(|key: String, val: Wrap<DataType>| {
323
+ schema.push(Field::new(&key, val.0));
324
+ Ok(ForEach::Continue)
325
+ })
326
+ .unwrap();
327
+
328
+ Ok(Wrap(schema.into_iter().into()))
329
+ }
330
+ }
331
+
332
+ #[derive(Clone, Debug)]
333
+ pub struct ObjectValue {
334
+ pub inner: Value,
335
+ }
336
+
337
+ impl Hash for ObjectValue {
338
+ fn hash<H: Hasher>(&self, state: &mut H) {
339
+ let h = self
340
+ .inner
341
+ .funcall::<_, _, isize>("hash", ())
342
+ .expect("should be hashable");
343
+ state.write_isize(h)
344
+ }
345
+ }
346
+
347
+ impl Eq for ObjectValue {}
348
+
349
+ impl PartialEq for ObjectValue {
350
+ fn eq(&self, other: &Self) -> bool {
351
+ self.inner.eql(&other.inner).unwrap_or(false)
352
+ }
353
+ }
354
+
355
+ impl Display for ObjectValue {
356
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
357
+ write!(f, "{}", self.inner)
358
+ }
359
+ }
360
+
361
+ impl PolarsObject for ObjectValue {
362
+ fn type_name() -> &'static str {
363
+ "object"
364
+ }
365
+ }
366
+
367
+ impl From<Value> for ObjectValue {
368
+ fn from(v: Value) -> Self {
369
+ Self { inner: v }
370
+ }
371
+ }
372
+
373
+ impl TryConvert for ObjectValue {
374
+ fn try_convert(ob: Value) -> RbResult<Self> {
375
+ Ok(ObjectValue { inner: ob })
376
+ }
377
+ }
378
+
379
+ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
380
+ fn from(val: &dyn PolarsObjectSafe) -> Self {
381
+ unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
382
+ }
383
+ }
384
+
385
+ // TODO remove
386
+ impl ObjectValue {
387
+ pub fn to_object(&self) -> Value {
388
+ self.inner
389
+ }
390
+ }
391
+
392
+ impl From<ObjectValue> for Value {
393
+ fn from(val: ObjectValue) -> Self {
394
+ val.inner
395
+ }
396
+ }
397
+
398
+ impl Default for ObjectValue {
399
+ fn default() -> Self {
400
+ ObjectValue { inner: *QNIL }
401
+ }
402
+ }
403
+
404
+ pub(crate) fn dicts_to_rows(
405
+ records: &Value,
406
+ infer_schema_len: usize,
407
+ ) -> RbResult<(Vec<Row>, Vec<String>)> {
408
+ let (dicts, len) = get_rbseq(*records)?;
409
+
410
+ let mut key_names = PlIndexSet::new();
411
+ for d in dicts.each().take(infer_schema_len) {
412
+ let d = d?;
413
+ let d = d.try_convert::<RHash>()?;
414
+
415
+ d.foreach(|name: String, _value: Value| {
416
+ key_names.insert(name);
417
+ Ok(ForEach::Continue)
418
+ })?;
419
+ }
420
+
421
+ let mut rows = Vec::with_capacity(len);
422
+
423
+ for d in dicts.each() {
424
+ let d = d?;
425
+ let d = d.try_convert::<RHash>()?;
426
+
427
+ let mut row = Vec::with_capacity(key_names.len());
428
+
429
+ for k in key_names.iter() {
430
+ let val = match d.get(k.clone()) {
431
+ None => AnyValue::Null,
432
+ Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
433
+ };
434
+ row.push(val)
435
+ }
436
+ rows.push(Row(row))
437
+ }
438
+ Ok((rows, key_names.into_iter().collect()))
439
+ }
440
+
290
441
  impl TryConvert for Wrap<AsofStrategy> {
291
442
  fn try_convert(ob: Value) -> RbResult<Self> {
292
443
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -641,101 +792,3 @@ pub fn parse_parquet_compression(
641
792
  };
642
793
  Ok(parsed)
643
794
  }
644
-
645
- impl<'s> TryConvert for Wrap<Row<'s>> {
646
- fn try_convert(ob: Value) -> RbResult<Self> {
647
- let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
648
- for item in ob.try_convert::<RArray>()?.each() {
649
- vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
650
- }
651
- let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
652
- Ok(Wrap(Row(vals)))
653
- }
654
- }
655
-
656
- impl TryConvert for Wrap<Schema> {
657
- fn try_convert(ob: Value) -> RbResult<Self> {
658
- let dict = ob.try_convert::<RHash>()?;
659
-
660
- let mut schema = Vec::new();
661
- dict.foreach(|key: String, val: Wrap<DataType>| {
662
- schema.push(Field::new(&key, val.0));
663
- Ok(ForEach::Continue)
664
- })
665
- .unwrap();
666
-
667
- Ok(Wrap(schema.into_iter().into()))
668
- }
669
- }
670
-
671
- #[derive(Clone, Debug)]
672
- pub struct ObjectValue {
673
- pub inner: Value,
674
- }
675
-
676
- impl Hash for ObjectValue {
677
- fn hash<H: Hasher>(&self, state: &mut H) {
678
- let h = self
679
- .inner
680
- .funcall::<_, _, isize>("hash", ())
681
- .expect("should be hashable");
682
- state.write_isize(h)
683
- }
684
- }
685
-
686
- impl Eq for ObjectValue {}
687
-
688
- impl PartialEq for ObjectValue {
689
- fn eq(&self, other: &Self) -> bool {
690
- self.inner.eql(&other.inner).unwrap_or(false)
691
- }
692
- }
693
-
694
- impl Display for ObjectValue {
695
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
696
- write!(f, "{}", self.inner)
697
- }
698
- }
699
-
700
- impl PolarsObject for ObjectValue {
701
- fn type_name() -> &'static str {
702
- "object"
703
- }
704
- }
705
-
706
- impl From<Value> for ObjectValue {
707
- fn from(v: Value) -> Self {
708
- Self { inner: v }
709
- }
710
- }
711
-
712
- impl TryConvert for ObjectValue {
713
- fn try_convert(ob: Value) -> RbResult<Self> {
714
- Ok(ObjectValue { inner: ob })
715
- }
716
- }
717
-
718
- impl From<&dyn PolarsObjectSafe> for &ObjectValue {
719
- fn from(val: &dyn PolarsObjectSafe) -> Self {
720
- unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
721
- }
722
- }
723
-
724
- // TODO remove
725
- impl ObjectValue {
726
- pub fn to_object(&self) -> Value {
727
- self.inner
728
- }
729
- }
730
-
731
- impl From<ObjectValue> for Value {
732
- fn from(val: ObjectValue) -> Self {
733
- val.inner
734
- }
735
- }
736
-
737
- impl Default for ObjectValue {
738
- fn default() -> Self {
739
- ObjectValue { inner: *QNIL }
740
- }
741
- }
@@ -1,4 +1,5 @@
1
1
  use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
2
+ use polars::frame::row::{rows_to_schema_supertypes, Row};
2
3
  use polars::frame::NullStrategy;
3
4
  use polars::io::avro::AvroCompression;
4
5
  use polars::io::mmap::ReaderBytes;
@@ -15,8 +16,9 @@ use crate::apply::dataframe::{
15
16
  };
16
17
  use crate::conversion::*;
17
18
  use crate::file::{get_file_like, get_mmap_bytes_reader};
19
+ use crate::rb_modules;
18
20
  use crate::series::{to_rbseries_collection, to_series_collection};
19
- use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
21
+ use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
20
22
 
21
23
  #[magnus::wrap(class = "Polars::RbDataFrame")]
22
24
  pub struct RbDataFrame {
@@ -36,6 +38,45 @@ impl RbDataFrame {
36
38
  }
37
39
  }
38
40
 
41
+ fn finish_from_rows(
42
+ rows: Vec<Row>,
43
+ infer_schema_length: Option<usize>,
44
+ schema_overwrite: Option<Schema>,
45
+ ) -> RbResult<Self> {
46
+ // object builder must be registered.
47
+ crate::object::register_object_builder();
48
+
49
+ let schema =
50
+ rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?;
51
+ // replace inferred nulls with boolean
52
+ let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
53
+ DataType::Null => {
54
+ fld.coerce(DataType::Boolean);
55
+ fld
56
+ }
57
+ _ => fld,
58
+ });
59
+ let mut schema = Schema::from(fields);
60
+
61
+ if let Some(schema_overwrite) = schema_overwrite {
62
+ for (i, (name, dtype)) in schema_overwrite.into_iter().enumerate() {
63
+ if let Some((name_, dtype_)) = schema.get_index_mut(i) {
64
+ *name_ = name;
65
+
66
+ // if user sets dtype unknown, we use the inferred datatype
67
+ if !matches!(dtype, DataType::Unknown) {
68
+ *dtype_ = dtype;
69
+ }
70
+ } else {
71
+ schema.with_column(name, dtype)
72
+ }
73
+ }
74
+ }
75
+
76
+ let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
77
+ Ok(df.into())
78
+ }
79
+
39
80
  pub fn init(columns: RArray) -> RbResult<Self> {
40
81
  let mut cols = Vec::new();
41
82
  for i in columns.each() {
@@ -288,17 +329,45 @@ impl RbDataFrame {
288
329
  }
289
330
 
290
331
  pub fn read_hashes(
291
- _dicts: Value,
292
- _infer_schema_length: Option<usize>,
293
- _schema_overwrite: Option<Wrap<Schema>>,
332
+ dicts: Value,
333
+ infer_schema_length: Option<usize>,
334
+ schema_overwrite: Option<Wrap<Schema>>,
294
335
  ) -> RbResult<Self> {
295
- Err(RbPolarsErr::todo())
336
+ let (rows, mut names) = dicts_to_rows(&dicts, infer_schema_length.unwrap_or(50))?;
337
+
338
+ // ensure the new names are used
339
+ if let Some(schema) = &schema_overwrite {
340
+ for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
341
+ *name = new_name.clone();
342
+ }
343
+ }
344
+ let rbdf = Self::finish_from_rows(
345
+ rows,
346
+ infer_schema_length,
347
+ schema_overwrite.map(|wrap| wrap.0),
348
+ )?;
349
+
350
+ rbdf.df
351
+ .borrow_mut()
352
+ .get_columns_mut()
353
+ .iter_mut()
354
+ .zip(&names)
355
+ .for_each(|(s, name)| {
356
+ s.rename(name);
357
+ });
358
+ let length = names.len();
359
+ if names.into_iter().collect::<PlHashSet<_>>().len() != length {
360
+ let err = PolarsError::SchemaMisMatch("duplicate column names found".into());
361
+ Err(RbPolarsErr::from(err))?;
362
+ }
363
+
364
+ Ok(rbdf)
296
365
  }
297
366
 
298
367
  pub fn read_hash(data: RHash) -> RbResult<Self> {
299
368
  let mut cols: Vec<Series> = Vec::new();
300
369
  data.foreach(|name: String, values: Value| {
301
- let obj: Value = series().funcall("new", (name, values))?;
370
+ let obj: Value = rb_modules::series().funcall("new", (name, values))?;
302
371
  let rbseries = obj.funcall::<_, _, &RbSeries>("_s", ())?;
303
372
  cols.push(rbseries.series.borrow().clone());
304
373
  Ok(ForEach::Continue)
@@ -6,6 +6,9 @@ mod error;
6
6
  mod file;
7
7
  mod lazy;
8
8
  mod list_construction;
9
+ mod object;
10
+ mod prelude;
11
+ pub(crate) mod rb_modules;
9
12
  mod series;
10
13
  mod set;
11
14
  mod utils;
@@ -18,15 +21,13 @@ use file::get_file_like;
18
21
  use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
19
22
  use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
20
23
  use lazy::utils::rb_exprs_to_exprs;
21
- use magnus::{
22
- define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
23
- Value,
24
- };
24
+ use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
25
25
  use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
29
29
  use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
30
+ use rb_modules::polars;
30
31
  use series::RbSeries;
31
32
 
32
33
  #[cfg(target_os = "linux")]
@@ -45,17 +46,9 @@ static GLOBAL: MiMalloc = MiMalloc;
45
46
 
46
47
  type RbResult<T> = Result<T, Error>;
47
48
 
48
- fn module() -> RModule {
49
- *memoize!(RModule: define_module("Polars").unwrap())
50
- }
51
-
52
- fn series() -> RClass {
53
- *memoize!(RClass: module().define_class("Series", Default::default()).unwrap())
54
- }
55
-
56
49
  #[magnus::init]
57
50
  fn init() -> RbResult<()> {
58
- let module = module();
51
+ let module = polars();
59
52
  module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
60
53
  module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
61
54
  module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
@@ -0,0 +1,30 @@
1
+ use std::any::Any;
2
+ use std::sync::Arc;
3
+
4
+ use polars_core::chunked_array::object::builder::ObjectChunkedBuilder;
5
+ use polars_core::chunked_array::object::registry;
6
+ use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
7
+ use polars_core::prelude::AnyValue;
8
+
9
+ use crate::prelude::ObjectValue;
10
+ use crate::Wrap;
11
+
12
+ // pub(crate) const OBJECT_NAME: &str = "object";
13
+
14
+ pub(crate) fn register_object_builder() {
15
+ if !registry::is_object_builder_registered() {
16
+ let object_builder = Box::new(|name: &str, capacity: usize| {
17
+ Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
18
+ as Box<dyn AnonymousObjectBuilder>
19
+ });
20
+
21
+ let object_converter = Arc::new(|av: AnyValue| {
22
+ let object = ObjectValue {
23
+ inner: Wrap(av).into(),
24
+ };
25
+ Box::new(object) as Box<dyn Any>
26
+ });
27
+
28
+ registry::register_object_builder(object_builder, object_converter)
29
+ }
30
+ }
@@ -0,0 +1,3 @@
1
+ pub use polars::prelude::*;
2
+
3
+ pub use crate::conversion::*;
@@ -0,0 +1,9 @@
1
+ use magnus::{define_module, memoize, Module, RClass, RModule};
2
+
3
+ pub(crate) fn polars() -> RModule {
4
+ *memoize!(RModule: define_module("Polars").unwrap())
5
+ }
6
+
7
+ pub(crate) fn series() -> RClass {
8
+ *memoize!(RClass: polars().define_class("Series", Default::default()).unwrap())
9
+ }
@@ -4746,7 +4746,14 @@ module Polars
4746
4746
  end
4747
4747
 
4748
4748
  # @private
4749
- def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4749
+ def self.include_unknowns(schema, cols)
4750
+ cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
4751
+ end
4752
+
4753
+ # @private
4754
+ def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4755
+ raise Todo if schema_overrides
4756
+
4750
4757
  if columns.is_a?(Hash)
4751
4758
  columns = columns.to_a
4752
4759
  end
@@ -4790,8 +4797,48 @@ module Polars
4790
4797
  end
4791
4798
  end
4792
4799
 
4800
+ def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4801
+ rbdf_columns = rbdf.columns
4802
+ rbdf_dtypes = rbdf.dtypes
4803
+ columns, dtypes = _unpack_columns(
4804
+ (columns || rbdf_columns), schema_overrides: schema_overrides
4805
+ )
4806
+ column_subset = []
4807
+ if columns != rbdf_columns
4808
+ if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
4809
+ column_subset = columns
4810
+ else
4811
+ rbdf.set_column_names(columns)
4812
+ end
4813
+ end
4814
+
4815
+ column_casts = []
4816
+ columns.each do |col, i|
4817
+ if dtypes[col] == Categorical # != rbdf_dtypes[i]
4818
+ column_casts << Polars.col(col).cast(Categorical)._rbexpr
4819
+ elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4820
+ column_casts << Polars.col(col).cast(structs[col])._rbexpr
4821
+ elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4822
+ column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
4823
+ end
4824
+ end
4825
+
4826
+ if column_casts.any? || column_subset.any?
4827
+ rbdf = rbdf.lazy
4828
+ if column_casts.any?
4829
+ rbdf = rbdf.with_columns(column_casts)
4830
+ end
4831
+ if column_subset.any?
4832
+ rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
4833
+ end
4834
+ rbdf = rbdf.collect
4835
+ end
4836
+
4837
+ rbdf
4838
+ end
4839
+
4793
4840
  # @private
4794
- def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4841
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4795
4842
  if data.length == 0
4796
4843
  return hash_to_rbdf({}, columns: columns)
4797
4844
  end
@@ -4803,6 +4850,14 @@ module Polars
4803
4850
  data.each do |s|
4804
4851
  data_series << s._s
4805
4852
  end
4853
+ elsif data[0].is_a?(Hash)
4854
+ column_names, dtypes = _unpack_columns(columns)
4855
+ schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4856
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4857
+ if column_names
4858
+ rbdf = _post_apply_columns(rbdf, column_names)
4859
+ end
4860
+ return rbdf
4806
4861
  elsif data[0].is_a?(Array)
4807
4862
  if orient.nil? && !columns.nil?
4808
4863
  orient = columns.length == data.length ? "col" : "row"
@@ -3,44 +3,64 @@ module Polars
3
3
  class DataType
4
4
  end
5
5
 
6
+ # Base class for numeric data types.
7
+ class NumericType < DataType
8
+ end
9
+
10
+ # Base class for integral data types.
11
+ class IntegralType < NumericType
12
+ end
13
+
14
+ # Base class for fractional data types.
15
+ class FractionalType < NumericType
16
+ end
17
+
18
+ # Base class for temporal data types.
19
+ class TemporalType < DataType
20
+ end
21
+
22
+ # Base class for nested data types.
23
+ class NestedType < DataType
24
+ end
25
+
6
26
  # 8-bit signed integer type.
7
- class Int8 < DataType
27
+ class Int8 < IntegralType
8
28
  end
9
29
 
10
30
  # 16-bit signed integer type.
11
- class Int16 < DataType
31
+ class Int16 < IntegralType
12
32
  end
13
33
 
14
34
  # 32-bit signed integer type.
15
- class Int32 < DataType
35
+ class Int32 < IntegralType
16
36
  end
17
37
 
18
38
  # 64-bit signed integer type.
19
- class Int64 < DataType
39
+ class Int64 < IntegralType
20
40
  end
21
41
 
22
42
  # 8-bit unsigned integer type.
23
- class UInt8 < DataType
43
+ class UInt8 < IntegralType
24
44
  end
25
45
 
26
46
  # 16-bit unsigned integer type.
27
- class UInt16 < DataType
47
+ class UInt16 < IntegralType
28
48
  end
29
49
 
30
50
  # 32-bit unsigned integer type.
31
- class UInt32 < DataType
51
+ class UInt32 < IntegralType
32
52
  end
33
53
 
34
54
  # 64-bit unsigned integer type.
35
- class UInt64 < DataType
55
+ class UInt64 < IntegralType
36
56
  end
37
57
 
38
58
  # 32-bit floating point type.
39
- class Float32 < DataType
59
+ class Float32 < FractionalType
40
60
  end
41
61
 
42
62
  # 64-bit floating point type.
43
- class Float64 < DataType
63
+ class Float64 < FractionalType
44
64
  end
45
65
 
46
66
  # Boolean type.
@@ -51,31 +71,19 @@ module Polars
51
71
  class Utf8 < DataType
52
72
  end
53
73
 
54
- # Binary type.
55
- class Binary < DataType
56
- end
57
-
58
- # Type representing Null / None values.
59
- class Null < DataType
60
- end
61
-
62
- # Type representing Datatype values that could not be determined statically.
63
- class Unknown < DataType
64
- end
65
-
66
74
  # Nested list/array type.
67
- class List < DataType
75
+ class List < NestedType
68
76
  def initialize(inner)
69
77
  @inner = Utils.rb_type_to_dtype(inner)
70
78
  end
71
79
  end
72
80
 
73
81
  # Calendar date type.
74
- class Date < DataType
82
+ class Date < TemporalType
75
83
  end
76
84
 
77
85
  # Calendar date and time type.
78
- class Datetime < DataType
86
+ class Datetime < TemporalType
79
87
  def initialize(time_unit = "us", time_zone = nil)
80
88
  @tu = time_unit || "us"
81
89
  @time_zone = time_zone
@@ -83,14 +91,14 @@ module Polars
83
91
  end
84
92
 
85
93
  # Time duration/delta type.
86
- class Duration < DataType
94
+ class Duration < TemporalType
87
95
  def initialize(time_unit = "us")
88
96
  @tu = time_unit
89
97
  end
90
98
  end
91
99
 
92
100
  # Time of day type.
93
- class Time < DataType
101
+ class Time < TemporalType
94
102
  end
95
103
 
96
104
  # Type for wrapping arbitrary Ruby objects.
@@ -102,15 +110,24 @@ module Polars
102
110
  end
103
111
 
104
112
  # Definition of a single field within a `Struct` DataType.
105
- class Field < DataType
113
+ class Field
114
+ attr_reader :name, :dtype
115
+
106
116
  def initialize(name, dtype)
107
117
  @name = name
108
118
  @dtype = Utils.rb_type_to_dtype(dtype)
109
119
  end
120
+
121
+ def inspect
122
+ class_name = self.class.name
123
+ "#{class_name}(#{@name}: #{@dtype})"
124
+ end
110
125
  end
111
126
 
112
127
  # Struct composite type.
113
- class Struct < DataType
128
+ class Struct < NestedType
129
+ attr_reader :fields
130
+
114
131
  def initialize(fields)
115
132
  if fields.is_a?(Hash)
116
133
  @fields = fields.map { |n, d| Field.new(n, d) }
@@ -118,5 +135,26 @@ module Polars
118
135
  @fields = fields
119
136
  end
120
137
  end
138
+
139
+ def inspect
140
+ class_name = self.class.name
141
+ "#{class_name}(#{@fields})"
142
+ end
143
+
144
+ def to_schema
145
+ @fields.to_h { |f| [f.name, f.dtype] }
146
+ end
147
+ end
148
+
149
+ # Binary type.
150
+ class Binary < DataType
151
+ end
152
+
153
+ # Type representing Null / None values.
154
+ class Null < DataType
155
+ end
156
+
157
+ # Type representing Datatype values that could not be determined statically.
158
+ class Unknown < DataType
121
159
  end
122
160
  end
data/lib/polars/series.rb CHANGED
@@ -3667,6 +3667,11 @@ module Polars
3667
3667
  rb_temporal_types << ::Time if defined?(::Time)
3668
3668
 
3669
3669
  value = _get_first_non_none(values)
3670
+ if !value.nil?
3671
+ if value.is_a?(Hash)
3672
+ return DataFrame.new(values).to_struct(name)._s
3673
+ end
3674
+ end
3670
3675
 
3671
3676
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3672
3677
  constructor = polars_type_to_constructor(dtype)
data/lib/polars/utils.rb CHANGED
@@ -160,11 +160,11 @@ module Polars
160
160
 
161
161
  def self.scale_bytes(sz, to:)
162
162
  scaling_factor = {
163
- "b" => 1,
164
- "k" => 1024,
165
- "m" => 1024 ** 2,
166
- "g" => 1024 ** 3,
167
- "t" => 1024 ** 4,
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4
168
168
  }[to[0]]
169
169
  if scaling_factor > 1
170
170
  sz / scaling_factor.to_f
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.2"
3
+ VERSION = "0.2.3"
4
4
  end
data/lib/polars-df.rb CHANGED
@@ -1 +1 @@
1
- require "polars"
1
+ require_relative "polars"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-21 00:00:00.000000000 Z
11
+ date: 2023-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -55,6 +55,9 @@ files:
55
55
  - ext/polars/src/lazy/utils.rs
56
56
  - ext/polars/src/lib.rs
57
57
  - ext/polars/src/list_construction.rs
58
+ - ext/polars/src/object.rs
59
+ - ext/polars/src/prelude.rs
60
+ - ext/polars/src/rb_modules.rs
58
61
  - ext/polars/src/series.rs
59
62
  - ext/polars/src/set.rs
60
63
  - ext/polars/src/utils.rs