polars-df 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +10 -13
- data/ext/polars/src/conversion/any_value.rs +37 -21
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +159 -46
- data/ext/polars/src/dataframe/construction.rs +4 -7
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +22 -16
- data/ext/polars/src/dataframe/io.rs +78 -174
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +16 -7
- data/ext/polars/src/expr/general.rs +14 -23
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +17 -37
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -825
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +44 -13
- data/ext/polars/src/map/dataframe.rs +46 -14
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +17 -16
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +52 -25
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
|
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
5
|
+
use std::fs::File;
|
5
6
|
use std::hash::{Hash, Hasher};
|
6
7
|
use std::num::NonZeroUsize;
|
8
|
+
use std::path::PathBuf;
|
7
9
|
|
8
10
|
use magnus::{
|
9
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
10
|
-
Ruby, Symbol, TryConvert, Value,
|
11
|
+
class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
11
13
|
};
|
12
14
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -15,13 +17,15 @@ use polars::datatypes::AnyValue;
|
|
15
17
|
use polars::frame::row::Row;
|
16
18
|
use polars::frame::NullStrategy;
|
17
19
|
use polars::io::avro::AvroCompression;
|
20
|
+
use polars::io::cloud::CloudOptions;
|
18
21
|
use polars::prelude::*;
|
19
22
|
use polars::series::ops::NullBehavior;
|
20
23
|
use polars_core::utils::arrow::array::Array;
|
21
24
|
use polars_core::utils::materialize_dyn_int;
|
25
|
+
use polars_plan::plans::ScanSources;
|
22
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
|
-
use smartstring::alias::String as SmartString;
|
24
27
|
|
28
|
+
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
25
29
|
use crate::object::OBJECT_NAME;
|
26
30
|
use crate::rb_modules::series;
|
27
31
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -84,18 +88,31 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
|
|
84
88
|
.unwrap()
|
85
89
|
}
|
86
90
|
|
91
|
+
impl TryConvert for Wrap<PlSmallStr> {
|
92
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
93
|
+
Ok(Wrap((&*String::try_convert(ob)?).into()))
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
87
97
|
impl TryConvert for Wrap<NullValues> {
|
88
98
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
89
99
|
if let Ok(s) = String::try_convert(ob) {
|
90
|
-
Ok(Wrap(NullValues::AllColumnsSingle(s)))
|
100
|
+
Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
|
91
101
|
} else if let Ok(s) = Vec::<String>::try_convert(ob) {
|
92
|
-
Ok(Wrap(NullValues::AllColumns(
|
102
|
+
Ok(Wrap(NullValues::AllColumns(
|
103
|
+
s.into_iter().map(|x| (&*x).into()).collect(),
|
104
|
+
)))
|
93
105
|
} else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
|
94
|
-
Ok(Wrap(NullValues::Named(
|
106
|
+
Ok(Wrap(NullValues::Named(
|
107
|
+
s.into_iter()
|
108
|
+
.map(|(a, b)| ((&*a).into(), (&*b).into()))
|
109
|
+
.collect(),
|
110
|
+
)))
|
95
111
|
} else {
|
96
|
-
Err(
|
97
|
-
"could not extract value from null_values argument".into()
|
98
|
-
|
112
|
+
Err(
|
113
|
+
RbPolarsErr::Other("could not extract value from null_values argument".into())
|
114
|
+
.into(),
|
115
|
+
)
|
99
116
|
}
|
100
117
|
}
|
101
118
|
}
|
@@ -189,7 +206,7 @@ impl IntoValue for Wrap<DataType> {
|
|
189
206
|
DataType::Datetime(tu, tz) => {
|
190
207
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
191
208
|
datetime_class
|
192
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
209
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
|
193
210
|
.unwrap()
|
194
211
|
}
|
195
212
|
DataType::Duration(tu) => {
|
@@ -210,7 +227,9 @@ impl IntoValue for Wrap<DataType> {
|
|
210
227
|
// we should always have an initialized rev_map coming from rust
|
211
228
|
let categories = rev_map.as_ref().unwrap().get_categories();
|
212
229
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
213
|
-
let s =
|
230
|
+
let s =
|
231
|
+
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
232
|
+
.unwrap();
|
214
233
|
let series = to_series(s.into());
|
215
234
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
216
235
|
}
|
@@ -222,7 +241,7 @@ impl IntoValue for Wrap<DataType> {
|
|
222
241
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
223
242
|
let iter = fields.iter().map(|fld| {
|
224
243
|
let name = fld.name().as_str();
|
225
|
-
let dtype = Wrap(fld.
|
244
|
+
let dtype = Wrap(fld.dtype().clone());
|
226
245
|
field_class
|
227
246
|
.funcall::<_, _, Value>("new", (name, dtype))
|
228
247
|
.unwrap()
|
@@ -276,7 +295,7 @@ impl TryConvert for Wrap<Field> {
|
|
276
295
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
277
296
|
let name: String = ob.funcall("name", ())?;
|
278
297
|
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
279
|
-
Ok(Wrap(Field::new(
|
298
|
+
Ok(Wrap(Field::new((&*name).into(), dtype.0)))
|
280
299
|
}
|
281
300
|
}
|
282
301
|
|
@@ -315,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
|
|
315
334
|
)))
|
316
335
|
}
|
317
336
|
}
|
318
|
-
// TODO improve
|
319
337
|
} else if String::try_convert(ob).is_err() {
|
320
338
|
let name = unsafe { ob.class().name() }.into_owned();
|
321
339
|
match name.as_str() {
|
@@ -341,7 +359,7 @@ impl TryConvert for Wrap<DataType> {
|
|
341
359
|
let s = get_series(categories)?;
|
342
360
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
343
361
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
344
|
-
|
362
|
+
create_enum_dtype(categories)
|
345
363
|
}
|
346
364
|
"Polars::Date" => DataType::Date,
|
347
365
|
"Polars::Time" => DataType::Time,
|
@@ -357,8 +375,8 @@ impl TryConvert for Wrap<DataType> {
|
|
357
375
|
"Polars::Datetime" => {
|
358
376
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
359
377
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
360
|
-
let time_zone = ob.funcall("time_zone", ())?;
|
361
|
-
DataType::Datetime(time_unit, time_zone)
|
378
|
+
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
379
|
+
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
362
380
|
}
|
363
381
|
"Polars::Decimal" => {
|
364
382
|
let precision = ob.funcall("precision", ())?;
|
@@ -421,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
|
|
421
439
|
}
|
422
440
|
}
|
423
441
|
|
442
|
+
unsafe impl TryConvertOwned for Wrap<DataType> {}
|
443
|
+
|
424
444
|
impl TryConvert for Wrap<StatisticsOptions> {
|
425
445
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
426
446
|
let mut statistics = StatisticsOptions::empty();
|
@@ -439,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
439
459
|
}
|
440
460
|
}
|
441
461
|
Ok(ForEach::Continue)
|
442
|
-
})
|
443
|
-
.unwrap();
|
462
|
+
})?;
|
444
463
|
|
445
464
|
Ok(Wrap(statistics))
|
446
465
|
}
|
@@ -463,15 +482,77 @@ impl TryConvert for Wrap<Schema> {
|
|
463
482
|
|
464
483
|
let mut schema = Vec::new();
|
465
484
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
466
|
-
schema.push(Ok(Field::new(
|
485
|
+
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
467
486
|
Ok(ForEach::Continue)
|
468
|
-
})
|
469
|
-
.unwrap();
|
487
|
+
})?;
|
470
488
|
|
471
489
|
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
472
490
|
}
|
473
491
|
}
|
474
492
|
|
493
|
+
impl TryConvert for Wrap<ScanSources> {
|
494
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
495
|
+
let list = RArray::try_convert(ob)?;
|
496
|
+
|
497
|
+
if list.is_empty() {
|
498
|
+
return Ok(Wrap(ScanSources::default()));
|
499
|
+
}
|
500
|
+
|
501
|
+
enum MutableSources {
|
502
|
+
Paths(Vec<PathBuf>),
|
503
|
+
Files(Vec<File>),
|
504
|
+
Buffers(Vec<bytes::Bytes>),
|
505
|
+
}
|
506
|
+
|
507
|
+
let num_items = list.len();
|
508
|
+
let mut iter = list
|
509
|
+
.into_iter()
|
510
|
+
.map(|val| get_ruby_scan_source_input(val, false));
|
511
|
+
|
512
|
+
let Some(first) = iter.next() else {
|
513
|
+
return Ok(Wrap(ScanSources::default()));
|
514
|
+
};
|
515
|
+
|
516
|
+
let mut sources = match first? {
|
517
|
+
RubyScanSourceInput::Path(path) => {
|
518
|
+
let mut sources = Vec::with_capacity(num_items);
|
519
|
+
sources.push(path);
|
520
|
+
MutableSources::Paths(sources)
|
521
|
+
}
|
522
|
+
RubyScanSourceInput::File(file) => {
|
523
|
+
let mut sources = Vec::with_capacity(num_items);
|
524
|
+
sources.push(file);
|
525
|
+
MutableSources::Files(sources)
|
526
|
+
}
|
527
|
+
RubyScanSourceInput::Buffer(buffer) => {
|
528
|
+
let mut sources = Vec::with_capacity(num_items);
|
529
|
+
sources.push(buffer);
|
530
|
+
MutableSources::Buffers(sources)
|
531
|
+
}
|
532
|
+
};
|
533
|
+
|
534
|
+
for source in iter {
|
535
|
+
match (&mut sources, source?) {
|
536
|
+
(MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
|
537
|
+
(MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
|
538
|
+
(MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
|
539
|
+
_ => {
|
540
|
+
return Err(RbTypeError::new_err(
|
541
|
+
"Cannot combine in-memory bytes, paths and files for scan sources"
|
542
|
+
.to_string(),
|
543
|
+
))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
Ok(Wrap(match sources {
|
549
|
+
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
|
550
|
+
MutableSources::Files(i) => ScanSources::Files(i.into()),
|
551
|
+
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
|
552
|
+
}))
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
475
556
|
#[derive(Clone)]
|
476
557
|
pub struct ObjectValue {
|
477
558
|
pub inner: Opaque<Value>,
|
@@ -480,7 +561,7 @@ pub struct ObjectValue {
|
|
480
561
|
impl Debug for ObjectValue {
|
481
562
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
482
563
|
f.debug_struct("ObjectValue")
|
483
|
-
.field("inner", &self.
|
564
|
+
.field("inner", &self.to_value())
|
484
565
|
.finish()
|
485
566
|
}
|
486
567
|
}
|
@@ -488,7 +569,7 @@ impl Debug for ObjectValue {
|
|
488
569
|
impl Hash for ObjectValue {
|
489
570
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
490
571
|
let h = self
|
491
|
-
.
|
572
|
+
.to_value()
|
492
573
|
.funcall::<_, _, isize>("hash", ())
|
493
574
|
.expect("should be hashable");
|
494
575
|
state.write_isize(h)
|
@@ -499,7 +580,7 @@ impl Eq for ObjectValue {}
|
|
499
580
|
|
500
581
|
impl PartialEq for ObjectValue {
|
501
582
|
fn eq(&self, other: &Self) -> bool {
|
502
|
-
self.
|
583
|
+
self.to_value().eql(other.to_value()).unwrap_or(false)
|
503
584
|
}
|
504
585
|
}
|
505
586
|
|
@@ -520,7 +601,7 @@ impl TotalHash for ObjectValue {
|
|
520
601
|
|
521
602
|
impl Display for ObjectValue {
|
522
603
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
523
|
-
write!(f, "{}", self.
|
604
|
+
write!(f, "{}", self.to_value())
|
524
605
|
}
|
525
606
|
}
|
526
607
|
|
@@ -548,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
548
629
|
}
|
549
630
|
}
|
550
631
|
|
551
|
-
// TODO remove
|
552
632
|
impl ObjectValue {
|
553
|
-
pub fn
|
554
|
-
|
633
|
+
pub fn to_value(&self) -> Value {
|
634
|
+
self.clone().into_value()
|
555
635
|
}
|
556
636
|
}
|
557
637
|
|
558
638
|
impl IntoValue for ObjectValue {
|
559
|
-
fn into_value_with(self,
|
560
|
-
self.
|
639
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
640
|
+
ruby.get_inner(self.inner)
|
561
641
|
}
|
562
642
|
}
|
563
643
|
|
@@ -574,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
574
654
|
let parsed = match String::try_convert(ob)?.as_str() {
|
575
655
|
"backward" => AsofStrategy::Backward,
|
576
656
|
"forward" => AsofStrategy::Forward,
|
657
|
+
"nearest" => AsofStrategy::Nearest,
|
577
658
|
v => {
|
578
659
|
return Err(RbValueError::new_err(format!(
|
579
|
-
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
580
|
-
v
|
660
|
+
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
581
661
|
)))
|
582
662
|
}
|
583
663
|
};
|
@@ -817,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
817
897
|
}
|
818
898
|
}
|
819
899
|
|
820
|
-
impl TryConvert for Wrap<
|
900
|
+
impl TryConvert for Wrap<QuantileMethod> {
|
821
901
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
822
902
|
let parsed = match String::try_convert(ob)?.as_str() {
|
823
|
-
"lower" =>
|
824
|
-
"higher" =>
|
825
|
-
"nearest" =>
|
826
|
-
"linear" =>
|
827
|
-
"midpoint" =>
|
903
|
+
"lower" => QuantileMethod::Lower,
|
904
|
+
"higher" => QuantileMethod::Higher,
|
905
|
+
"nearest" => QuantileMethod::Nearest,
|
906
|
+
"linear" => QuantileMethod::Linear,
|
907
|
+
"midpoint" => QuantileMethod::Midpoint,
|
828
908
|
v => {
|
829
909
|
return Err(RbValueError::new_err(format!(
|
830
910
|
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
@@ -988,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
988
1068
|
}
|
989
1069
|
}
|
990
1070
|
|
1071
|
+
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
1072
|
+
let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
|
1073
|
+
Ok(out)
|
1074
|
+
}
|
1075
|
+
|
991
1076
|
pub fn parse_fill_null_strategy(
|
992
1077
|
strategy: &str,
|
993
1078
|
limit: FillNullLimit,
|
@@ -1053,19 +1138,47 @@ pub fn parse_parquet_compression(
|
|
1053
1138
|
Ok(parsed)
|
1054
1139
|
}
|
1055
1140
|
|
1056
|
-
|
1141
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1142
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1143
|
+
let v = usize::try_convert(ob)?;
|
1144
|
+
NonZeroUsize::new(v)
|
1145
|
+
.map(Wrap)
|
1146
|
+
.ok_or(RbValueError::new_err("must be non-zero"))
|
1147
|
+
}
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
|
1057
1151
|
where
|
1058
1152
|
I: IntoIterator<Item = S>,
|
1059
1153
|
S: AsRef<str>,
|
1060
1154
|
{
|
1061
|
-
container
|
1155
|
+
container
|
1156
|
+
.into_iter()
|
1157
|
+
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1158
|
+
.collect()
|
1062
1159
|
}
|
1063
1160
|
|
1064
|
-
|
1161
|
+
#[derive(Debug, Copy, Clone)]
|
1162
|
+
pub struct RbCompatLevel(pub CompatLevel);
|
1163
|
+
|
1164
|
+
impl TryConvert for RbCompatLevel {
|
1065
1165
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
|
-
let
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1166
|
+
Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
|
1167
|
+
if let Ok(compat_level) = CompatLevel::with_level(level) {
|
1168
|
+
compat_level
|
1169
|
+
} else {
|
1170
|
+
return Err(RbValueError::new_err("invalid compat level".to_string()));
|
1171
|
+
}
|
1172
|
+
} else if let Ok(future) = bool::try_convert(ob) {
|
1173
|
+
if future {
|
1174
|
+
CompatLevel::newest()
|
1175
|
+
} else {
|
1176
|
+
CompatLevel::oldest()
|
1177
|
+
}
|
1178
|
+
} else {
|
1179
|
+
return Err(RbTypeError::new_err(
|
1180
|
+
"'compat_level' argument accepts int or bool".to_string(),
|
1181
|
+
));
|
1182
|
+
}))
|
1070
1183
|
}
|
1071
1184
|
}
|
@@ -54,9 +54,6 @@ fn finish_from_rows(
|
|
54
54
|
schema_overrides: Option<Schema>,
|
55
55
|
infer_schema_length: Option<usize>,
|
56
56
|
) -> RbResult<RbDataFrame> {
|
57
|
-
// Object builder must be registered
|
58
|
-
crate::on_startup::register_object_builder();
|
59
|
-
|
60
57
|
let mut schema = if let Some(mut schema) = schema {
|
61
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
@@ -79,7 +76,7 @@ fn update_schema_from_rows(
|
|
79
76
|
rows: &[Row],
|
80
77
|
infer_schema_length: Option<usize>,
|
81
78
|
) -> RbResult<()> {
|
82
|
-
let schema_is_complete = schema.
|
79
|
+
let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
|
83
80
|
if schema_is_complete {
|
84
81
|
return Ok(());
|
85
82
|
}
|
@@ -89,7 +86,7 @@ fn update_schema_from_rows(
|
|
89
86
|
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
87
|
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
88
|
|
92
|
-
for (i, dtype) in schema.
|
89
|
+
for (i, dtype) in schema.iter_values_mut().enumerate() {
|
93
90
|
if !dtype.is_known() {
|
94
91
|
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
92
|
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
@@ -110,7 +107,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
110
107
|
}
|
111
108
|
|
112
109
|
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
-
for dtype in schema.
|
110
|
+
for dtype in schema.iter_values_mut() {
|
114
111
|
if let DataType::Decimal(_, _) = dtype {
|
115
112
|
*dtype = DataType::Decimal(None, None)
|
116
113
|
}
|
@@ -123,7 +120,7 @@ where
|
|
123
120
|
{
|
124
121
|
let fields = column_names
|
125
122
|
.into_iter()
|
126
|
-
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
123
|
+
.map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
|
127
124
|
Schema::from_iter(fields)
|
128
125
|
}
|
129
126
|
|
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
+
use crate::RbResult;
|
5
7
|
|
6
8
|
impl RbDataFrame {
|
7
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|
@@ -18,7 +20,7 @@ impl RbDataFrame {
|
|
18
20
|
.map(|s| match s.dtype() {
|
19
21
|
DataType::Object(_, _) => {
|
20
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
-
obj.unwrap().
|
23
|
+
obj.unwrap().to_value()
|
22
24
|
}
|
23
25
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
26
|
}),
|
@@ -37,7 +39,7 @@ impl RbDataFrame {
|
|
37
39
|
.map(|s| match s.dtype() {
|
38
40
|
DataType::Object(_, _) => {
|
39
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
-
obj.unwrap().
|
42
|
+
obj.unwrap().to_value()
|
41
43
|
}
|
42
44
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
45
|
}),
|
@@ -45,4 +47,9 @@ impl RbDataFrame {
|
|
45
47
|
}))
|
46
48
|
.as_value()
|
47
49
|
}
|
50
|
+
|
51
|
+
pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
|
52
|
+
self.df.borrow_mut().align_chunks();
|
53
|
+
dataframe_to_stream(&self.df.borrow())
|
54
|
+
}
|
48
55
|
}
|
@@ -9,14 +9,15 @@ use crate::map::dataframe::{
|
|
9
9
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
10
10
|
apply_lambda_with_utf8_out_type,
|
11
11
|
};
|
12
|
-
use crate::
|
12
|
+
use crate::prelude::strings_to_pl_smallstr;
|
13
|
+
use crate::series::{to_rbseries, to_series};
|
13
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
14
15
|
|
15
16
|
impl RbDataFrame {
|
16
17
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
17
18
|
let mut cols = Vec::new();
|
18
19
|
for i in columns.into_iter() {
|
19
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
20
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
|
20
21
|
}
|
21
22
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
22
23
|
Ok(RbDataFrame::new(df))
|
@@ -127,7 +128,7 @@ impl RbDataFrame {
|
|
127
128
|
|
128
129
|
pub fn get_columns(&self) -> RArray {
|
129
130
|
let cols = self.df.borrow().get_columns().to_vec();
|
130
|
-
|
131
|
+
to_rbseries(cols)
|
131
132
|
}
|
132
133
|
|
133
134
|
pub fn columns(&self) -> Vec<String> {
|
@@ -173,7 +174,8 @@ impl RbDataFrame {
|
|
173
174
|
}
|
174
175
|
|
175
176
|
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
176
|
-
let columns =
|
177
|
+
let columns = to_series(columns)?;
|
178
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
177
179
|
let df = self
|
178
180
|
.df
|
179
181
|
.borrow()
|
@@ -183,7 +185,8 @@ impl RbDataFrame {
|
|
183
185
|
}
|
184
186
|
|
185
187
|
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
186
|
-
let columns =
|
188
|
+
let columns = to_series(columns)?;
|
189
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
187
190
|
self.df
|
188
191
|
.borrow_mut()
|
189
192
|
.hstack_mut(&columns)
|
@@ -222,6 +225,7 @@ impl RbDataFrame {
|
|
222
225
|
.borrow_mut()
|
223
226
|
.drop_in_place(&name)
|
224
227
|
.map_err(RbPolarsErr::from)?;
|
228
|
+
let s = s.take_materialized_series();
|
225
229
|
Ok(RbSeries::new(s))
|
226
230
|
}
|
227
231
|
|
@@ -229,7 +233,7 @@ impl RbDataFrame {
|
|
229
233
|
self.df
|
230
234
|
.borrow()
|
231
235
|
.select_at_idx(idx)
|
232
|
-
.map(|s| RbSeries::new(s.clone()))
|
236
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
233
237
|
}
|
234
238
|
|
235
239
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
@@ -237,11 +241,13 @@ impl RbDataFrame {
|
|
237
241
|
}
|
238
242
|
|
239
243
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
240
|
-
self
|
244
|
+
let series = self
|
245
|
+
.df
|
241
246
|
.borrow()
|
242
247
|
.column(&name)
|
243
|
-
.map(|s| RbSeries::new(s.clone()))
|
244
|
-
.map_err(RbPolarsErr::from)
|
248
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
249
|
+
.map_err(RbPolarsErr::from)?;
|
250
|
+
Ok(series)
|
245
251
|
}
|
246
252
|
|
247
253
|
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
@@ -254,7 +260,7 @@ impl RbDataFrame {
|
|
254
260
|
}
|
255
261
|
|
256
262
|
pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
257
|
-
let indices = IdxCa::from_vec("", indices);
|
263
|
+
let indices = IdxCa::from_vec("".into(), indices);
|
258
264
|
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
259
265
|
Ok(RbDataFrame::new(df))
|
260
266
|
}
|
@@ -332,7 +338,7 @@ impl RbDataFrame {
|
|
332
338
|
let df = self
|
333
339
|
.df
|
334
340
|
.borrow()
|
335
|
-
.with_row_index(
|
341
|
+
.with_row_index(name.into(), offset)
|
336
342
|
.map_err(RbPolarsErr::from)?;
|
337
343
|
Ok(df.into())
|
338
344
|
}
|
@@ -349,8 +355,8 @@ impl RbDataFrame {
|
|
349
355
|
variable_name: Option<String>,
|
350
356
|
) -> RbResult<Self> {
|
351
357
|
let args = UnpivotArgsIR {
|
352
|
-
on:
|
353
|
-
index:
|
358
|
+
on: strings_to_pl_smallstr(on),
|
359
|
+
index: strings_to_pl_smallstr(index),
|
354
360
|
value_name: value_name.map(|s| s.into()),
|
355
361
|
variable_name: variable_name.map(|s| s.into()),
|
356
362
|
};
|
@@ -410,7 +416,7 @@ impl RbDataFrame {
|
|
410
416
|
.borrow()
|
411
417
|
.max_horizontal()
|
412
418
|
.map_err(RbPolarsErr::from)?;
|
413
|
-
Ok(s.map(|s| s.into()))
|
419
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
414
420
|
}
|
415
421
|
|
416
422
|
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
@@ -419,7 +425,7 @@ impl RbDataFrame {
|
|
419
425
|
.borrow()
|
420
426
|
.min_horizontal()
|
421
427
|
.map_err(RbPolarsErr::from)?;
|
422
|
-
Ok(s.map(|s| s.into()))
|
428
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
423
429
|
}
|
424
430
|
|
425
431
|
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
@@ -581,7 +587,7 @@ impl RbDataFrame {
|
|
581
587
|
}
|
582
588
|
|
583
589
|
pub fn to_struct(&self, name: String) -> RbSeries {
|
584
|
-
let s = self.df.borrow().clone().into_struct(
|
590
|
+
let s = self.df.borrow().clone().into_struct(name.into());
|
585
591
|
s.into_series().into()
|
586
592
|
}
|
587
593
|
|