polars-df 0.13.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +10 -13
- data/ext/polars/src/conversion/any_value.rs +37 -21
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +159 -46
- data/ext/polars/src/dataframe/construction.rs +4 -7
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +22 -16
- data/ext/polars/src/dataframe/io.rs +78 -174
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +16 -7
- data/ext/polars/src/expr/general.rs +14 -23
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +17 -37
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -825
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +44 -13
- data/ext/polars/src/map/dataframe.rs +46 -14
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +17 -16
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +52 -25
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
|
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
5
|
+
use std::fs::File;
|
5
6
|
use std::hash::{Hash, Hasher};
|
6
7
|
use std::num::NonZeroUsize;
|
8
|
+
use std::path::PathBuf;
|
7
9
|
|
8
10
|
use magnus::{
|
9
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
10
|
-
Ruby, Symbol, TryConvert, Value,
|
11
|
+
class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
11
13
|
};
|
12
14
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -15,13 +17,15 @@ use polars::datatypes::AnyValue;
|
|
15
17
|
use polars::frame::row::Row;
|
16
18
|
use polars::frame::NullStrategy;
|
17
19
|
use polars::io::avro::AvroCompression;
|
20
|
+
use polars::io::cloud::CloudOptions;
|
18
21
|
use polars::prelude::*;
|
19
22
|
use polars::series::ops::NullBehavior;
|
20
23
|
use polars_core::utils::arrow::array::Array;
|
21
24
|
use polars_core::utils::materialize_dyn_int;
|
25
|
+
use polars_plan::plans::ScanSources;
|
22
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
|
-
use smartstring::alias::String as SmartString;
|
24
27
|
|
28
|
+
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
25
29
|
use crate::object::OBJECT_NAME;
|
26
30
|
use crate::rb_modules::series;
|
27
31
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -84,18 +88,31 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
|
|
84
88
|
.unwrap()
|
85
89
|
}
|
86
90
|
|
91
|
+
impl TryConvert for Wrap<PlSmallStr> {
|
92
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
93
|
+
Ok(Wrap((&*String::try_convert(ob)?).into()))
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
87
97
|
impl TryConvert for Wrap<NullValues> {
|
88
98
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
89
99
|
if let Ok(s) = String::try_convert(ob) {
|
90
|
-
Ok(Wrap(NullValues::AllColumnsSingle(s)))
|
100
|
+
Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
|
91
101
|
} else if let Ok(s) = Vec::<String>::try_convert(ob) {
|
92
|
-
Ok(Wrap(NullValues::AllColumns(
|
102
|
+
Ok(Wrap(NullValues::AllColumns(
|
103
|
+
s.into_iter().map(|x| (&*x).into()).collect(),
|
104
|
+
)))
|
93
105
|
} else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
|
94
|
-
Ok(Wrap(NullValues::Named(
|
106
|
+
Ok(Wrap(NullValues::Named(
|
107
|
+
s.into_iter()
|
108
|
+
.map(|(a, b)| ((&*a).into(), (&*b).into()))
|
109
|
+
.collect(),
|
110
|
+
)))
|
95
111
|
} else {
|
96
|
-
Err(
|
97
|
-
"could not extract value from null_values argument".into()
|
98
|
-
|
112
|
+
Err(
|
113
|
+
RbPolarsErr::Other("could not extract value from null_values argument".into())
|
114
|
+
.into(),
|
115
|
+
)
|
99
116
|
}
|
100
117
|
}
|
101
118
|
}
|
@@ -189,7 +206,7 @@ impl IntoValue for Wrap<DataType> {
|
|
189
206
|
DataType::Datetime(tu, tz) => {
|
190
207
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
191
208
|
datetime_class
|
192
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
209
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
|
193
210
|
.unwrap()
|
194
211
|
}
|
195
212
|
DataType::Duration(tu) => {
|
@@ -210,7 +227,9 @@ impl IntoValue for Wrap<DataType> {
|
|
210
227
|
// we should always have an initialized rev_map coming from rust
|
211
228
|
let categories = rev_map.as_ref().unwrap().get_categories();
|
212
229
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
213
|
-
let s =
|
230
|
+
let s =
|
231
|
+
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
232
|
+
.unwrap();
|
214
233
|
let series = to_series(s.into());
|
215
234
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
216
235
|
}
|
@@ -222,7 +241,7 @@ impl IntoValue for Wrap<DataType> {
|
|
222
241
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
223
242
|
let iter = fields.iter().map(|fld| {
|
224
243
|
let name = fld.name().as_str();
|
225
|
-
let dtype = Wrap(fld.
|
244
|
+
let dtype = Wrap(fld.dtype().clone());
|
226
245
|
field_class
|
227
246
|
.funcall::<_, _, Value>("new", (name, dtype))
|
228
247
|
.unwrap()
|
@@ -276,7 +295,7 @@ impl TryConvert for Wrap<Field> {
|
|
276
295
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
277
296
|
let name: String = ob.funcall("name", ())?;
|
278
297
|
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
279
|
-
Ok(Wrap(Field::new(
|
298
|
+
Ok(Wrap(Field::new((&*name).into(), dtype.0)))
|
280
299
|
}
|
281
300
|
}
|
282
301
|
|
@@ -315,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
|
|
315
334
|
)))
|
316
335
|
}
|
317
336
|
}
|
318
|
-
// TODO improve
|
319
337
|
} else if String::try_convert(ob).is_err() {
|
320
338
|
let name = unsafe { ob.class().name() }.into_owned();
|
321
339
|
match name.as_str() {
|
@@ -341,7 +359,7 @@ impl TryConvert for Wrap<DataType> {
|
|
341
359
|
let s = get_series(categories)?;
|
342
360
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
343
361
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
344
|
-
|
362
|
+
create_enum_dtype(categories)
|
345
363
|
}
|
346
364
|
"Polars::Date" => DataType::Date,
|
347
365
|
"Polars::Time" => DataType::Time,
|
@@ -357,8 +375,8 @@ impl TryConvert for Wrap<DataType> {
|
|
357
375
|
"Polars::Datetime" => {
|
358
376
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
359
377
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
360
|
-
let time_zone = ob.funcall("time_zone", ())?;
|
361
|
-
DataType::Datetime(time_unit, time_zone)
|
378
|
+
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
379
|
+
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
362
380
|
}
|
363
381
|
"Polars::Decimal" => {
|
364
382
|
let precision = ob.funcall("precision", ())?;
|
@@ -421,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
|
|
421
439
|
}
|
422
440
|
}
|
423
441
|
|
442
|
+
unsafe impl TryConvertOwned for Wrap<DataType> {}
|
443
|
+
|
424
444
|
impl TryConvert for Wrap<StatisticsOptions> {
|
425
445
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
426
446
|
let mut statistics = StatisticsOptions::empty();
|
@@ -439,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
439
459
|
}
|
440
460
|
}
|
441
461
|
Ok(ForEach::Continue)
|
442
|
-
})
|
443
|
-
.unwrap();
|
462
|
+
})?;
|
444
463
|
|
445
464
|
Ok(Wrap(statistics))
|
446
465
|
}
|
@@ -463,15 +482,77 @@ impl TryConvert for Wrap<Schema> {
|
|
463
482
|
|
464
483
|
let mut schema = Vec::new();
|
465
484
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
466
|
-
schema.push(Ok(Field::new(
|
485
|
+
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
467
486
|
Ok(ForEach::Continue)
|
468
|
-
})
|
469
|
-
.unwrap();
|
487
|
+
})?;
|
470
488
|
|
471
489
|
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
472
490
|
}
|
473
491
|
}
|
474
492
|
|
493
|
+
impl TryConvert for Wrap<ScanSources> {
|
494
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
495
|
+
let list = RArray::try_convert(ob)?;
|
496
|
+
|
497
|
+
if list.is_empty() {
|
498
|
+
return Ok(Wrap(ScanSources::default()));
|
499
|
+
}
|
500
|
+
|
501
|
+
enum MutableSources {
|
502
|
+
Paths(Vec<PathBuf>),
|
503
|
+
Files(Vec<File>),
|
504
|
+
Buffers(Vec<bytes::Bytes>),
|
505
|
+
}
|
506
|
+
|
507
|
+
let num_items = list.len();
|
508
|
+
let mut iter = list
|
509
|
+
.into_iter()
|
510
|
+
.map(|val| get_ruby_scan_source_input(val, false));
|
511
|
+
|
512
|
+
let Some(first) = iter.next() else {
|
513
|
+
return Ok(Wrap(ScanSources::default()));
|
514
|
+
};
|
515
|
+
|
516
|
+
let mut sources = match first? {
|
517
|
+
RubyScanSourceInput::Path(path) => {
|
518
|
+
let mut sources = Vec::with_capacity(num_items);
|
519
|
+
sources.push(path);
|
520
|
+
MutableSources::Paths(sources)
|
521
|
+
}
|
522
|
+
RubyScanSourceInput::File(file) => {
|
523
|
+
let mut sources = Vec::with_capacity(num_items);
|
524
|
+
sources.push(file);
|
525
|
+
MutableSources::Files(sources)
|
526
|
+
}
|
527
|
+
RubyScanSourceInput::Buffer(buffer) => {
|
528
|
+
let mut sources = Vec::with_capacity(num_items);
|
529
|
+
sources.push(buffer);
|
530
|
+
MutableSources::Buffers(sources)
|
531
|
+
}
|
532
|
+
};
|
533
|
+
|
534
|
+
for source in iter {
|
535
|
+
match (&mut sources, source?) {
|
536
|
+
(MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
|
537
|
+
(MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
|
538
|
+
(MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
|
539
|
+
_ => {
|
540
|
+
return Err(RbTypeError::new_err(
|
541
|
+
"Cannot combine in-memory bytes, paths and files for scan sources"
|
542
|
+
.to_string(),
|
543
|
+
))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
Ok(Wrap(match sources {
|
549
|
+
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
|
550
|
+
MutableSources::Files(i) => ScanSources::Files(i.into()),
|
551
|
+
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
|
552
|
+
}))
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
475
556
|
#[derive(Clone)]
|
476
557
|
pub struct ObjectValue {
|
477
558
|
pub inner: Opaque<Value>,
|
@@ -480,7 +561,7 @@ pub struct ObjectValue {
|
|
480
561
|
impl Debug for ObjectValue {
|
481
562
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
482
563
|
f.debug_struct("ObjectValue")
|
483
|
-
.field("inner", &self.
|
564
|
+
.field("inner", &self.to_value())
|
484
565
|
.finish()
|
485
566
|
}
|
486
567
|
}
|
@@ -488,7 +569,7 @@ impl Debug for ObjectValue {
|
|
488
569
|
impl Hash for ObjectValue {
|
489
570
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
490
571
|
let h = self
|
491
|
-
.
|
572
|
+
.to_value()
|
492
573
|
.funcall::<_, _, isize>("hash", ())
|
493
574
|
.expect("should be hashable");
|
494
575
|
state.write_isize(h)
|
@@ -499,7 +580,7 @@ impl Eq for ObjectValue {}
|
|
499
580
|
|
500
581
|
impl PartialEq for ObjectValue {
|
501
582
|
fn eq(&self, other: &Self) -> bool {
|
502
|
-
self.
|
583
|
+
self.to_value().eql(other.to_value()).unwrap_or(false)
|
503
584
|
}
|
504
585
|
}
|
505
586
|
|
@@ -520,7 +601,7 @@ impl TotalHash for ObjectValue {
|
|
520
601
|
|
521
602
|
impl Display for ObjectValue {
|
522
603
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
523
|
-
write!(f, "{}", self.
|
604
|
+
write!(f, "{}", self.to_value())
|
524
605
|
}
|
525
606
|
}
|
526
607
|
|
@@ -548,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
548
629
|
}
|
549
630
|
}
|
550
631
|
|
551
|
-
// TODO remove
|
552
632
|
impl ObjectValue {
|
553
|
-
pub fn
|
554
|
-
|
633
|
+
pub fn to_value(&self) -> Value {
|
634
|
+
self.clone().into_value()
|
555
635
|
}
|
556
636
|
}
|
557
637
|
|
558
638
|
impl IntoValue for ObjectValue {
|
559
|
-
fn into_value_with(self,
|
560
|
-
self.
|
639
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
640
|
+
ruby.get_inner(self.inner)
|
561
641
|
}
|
562
642
|
}
|
563
643
|
|
@@ -574,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
574
654
|
let parsed = match String::try_convert(ob)?.as_str() {
|
575
655
|
"backward" => AsofStrategy::Backward,
|
576
656
|
"forward" => AsofStrategy::Forward,
|
657
|
+
"nearest" => AsofStrategy::Nearest,
|
577
658
|
v => {
|
578
659
|
return Err(RbValueError::new_err(format!(
|
579
|
-
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
580
|
-
v
|
660
|
+
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
581
661
|
)))
|
582
662
|
}
|
583
663
|
};
|
@@ -817,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
817
897
|
}
|
818
898
|
}
|
819
899
|
|
820
|
-
impl TryConvert for Wrap<
|
900
|
+
impl TryConvert for Wrap<QuantileMethod> {
|
821
901
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
822
902
|
let parsed = match String::try_convert(ob)?.as_str() {
|
823
|
-
"lower" =>
|
824
|
-
"higher" =>
|
825
|
-
"nearest" =>
|
826
|
-
"linear" =>
|
827
|
-
"midpoint" =>
|
903
|
+
"lower" => QuantileMethod::Lower,
|
904
|
+
"higher" => QuantileMethod::Higher,
|
905
|
+
"nearest" => QuantileMethod::Nearest,
|
906
|
+
"linear" => QuantileMethod::Linear,
|
907
|
+
"midpoint" => QuantileMethod::Midpoint,
|
828
908
|
v => {
|
829
909
|
return Err(RbValueError::new_err(format!(
|
830
910
|
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
@@ -988,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
988
1068
|
}
|
989
1069
|
}
|
990
1070
|
|
1071
|
+
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
1072
|
+
let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
|
1073
|
+
Ok(out)
|
1074
|
+
}
|
1075
|
+
|
991
1076
|
pub fn parse_fill_null_strategy(
|
992
1077
|
strategy: &str,
|
993
1078
|
limit: FillNullLimit,
|
@@ -1053,19 +1138,47 @@ pub fn parse_parquet_compression(
|
|
1053
1138
|
Ok(parsed)
|
1054
1139
|
}
|
1055
1140
|
|
1056
|
-
|
1141
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1142
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1143
|
+
let v = usize::try_convert(ob)?;
|
1144
|
+
NonZeroUsize::new(v)
|
1145
|
+
.map(Wrap)
|
1146
|
+
.ok_or(RbValueError::new_err("must be non-zero"))
|
1147
|
+
}
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
|
1057
1151
|
where
|
1058
1152
|
I: IntoIterator<Item = S>,
|
1059
1153
|
S: AsRef<str>,
|
1060
1154
|
{
|
1061
|
-
container
|
1155
|
+
container
|
1156
|
+
.into_iter()
|
1157
|
+
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1158
|
+
.collect()
|
1062
1159
|
}
|
1063
1160
|
|
1064
|
-
|
1161
|
+
#[derive(Debug, Copy, Clone)]
|
1162
|
+
pub struct RbCompatLevel(pub CompatLevel);
|
1163
|
+
|
1164
|
+
impl TryConvert for RbCompatLevel {
|
1065
1165
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
|
-
let
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1166
|
+
Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
|
1167
|
+
if let Ok(compat_level) = CompatLevel::with_level(level) {
|
1168
|
+
compat_level
|
1169
|
+
} else {
|
1170
|
+
return Err(RbValueError::new_err("invalid compat level".to_string()));
|
1171
|
+
}
|
1172
|
+
} else if let Ok(future) = bool::try_convert(ob) {
|
1173
|
+
if future {
|
1174
|
+
CompatLevel::newest()
|
1175
|
+
} else {
|
1176
|
+
CompatLevel::oldest()
|
1177
|
+
}
|
1178
|
+
} else {
|
1179
|
+
return Err(RbTypeError::new_err(
|
1180
|
+
"'compat_level' argument accepts int or bool".to_string(),
|
1181
|
+
));
|
1182
|
+
}))
|
1070
1183
|
}
|
1071
1184
|
}
|
@@ -54,9 +54,6 @@ fn finish_from_rows(
|
|
54
54
|
schema_overrides: Option<Schema>,
|
55
55
|
infer_schema_length: Option<usize>,
|
56
56
|
) -> RbResult<RbDataFrame> {
|
57
|
-
// Object builder must be registered
|
58
|
-
crate::on_startup::register_object_builder();
|
59
|
-
|
60
57
|
let mut schema = if let Some(mut schema) = schema {
|
61
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
@@ -79,7 +76,7 @@ fn update_schema_from_rows(
|
|
79
76
|
rows: &[Row],
|
80
77
|
infer_schema_length: Option<usize>,
|
81
78
|
) -> RbResult<()> {
|
82
|
-
let schema_is_complete = schema.
|
79
|
+
let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
|
83
80
|
if schema_is_complete {
|
84
81
|
return Ok(());
|
85
82
|
}
|
@@ -89,7 +86,7 @@ fn update_schema_from_rows(
|
|
89
86
|
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
87
|
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
88
|
|
92
|
-
for (i, dtype) in schema.
|
89
|
+
for (i, dtype) in schema.iter_values_mut().enumerate() {
|
93
90
|
if !dtype.is_known() {
|
94
91
|
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
92
|
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
@@ -110,7 +107,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
110
107
|
}
|
111
108
|
|
112
109
|
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
-
for dtype in schema.
|
110
|
+
for dtype in schema.iter_values_mut() {
|
114
111
|
if let DataType::Decimal(_, _) = dtype {
|
115
112
|
*dtype = DataType::Decimal(None, None)
|
116
113
|
}
|
@@ -123,7 +120,7 @@ where
|
|
123
120
|
{
|
124
121
|
let fields = column_names
|
125
122
|
.into_iter()
|
126
|
-
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
123
|
+
.map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
|
127
124
|
Schema::from_iter(fields)
|
128
125
|
}
|
129
126
|
|
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
+
use crate::RbResult;
|
5
7
|
|
6
8
|
impl RbDataFrame {
|
7
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|
@@ -18,7 +20,7 @@ impl RbDataFrame {
|
|
18
20
|
.map(|s| match s.dtype() {
|
19
21
|
DataType::Object(_, _) => {
|
20
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
-
obj.unwrap().
|
23
|
+
obj.unwrap().to_value()
|
22
24
|
}
|
23
25
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
26
|
}),
|
@@ -37,7 +39,7 @@ impl RbDataFrame {
|
|
37
39
|
.map(|s| match s.dtype() {
|
38
40
|
DataType::Object(_, _) => {
|
39
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
-
obj.unwrap().
|
42
|
+
obj.unwrap().to_value()
|
41
43
|
}
|
42
44
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
45
|
}),
|
@@ -45,4 +47,9 @@ impl RbDataFrame {
|
|
45
47
|
}))
|
46
48
|
.as_value()
|
47
49
|
}
|
50
|
+
|
51
|
+
pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
|
52
|
+
self.df.borrow_mut().align_chunks();
|
53
|
+
dataframe_to_stream(&self.df.borrow())
|
54
|
+
}
|
48
55
|
}
|
@@ -9,14 +9,15 @@ use crate::map::dataframe::{
|
|
9
9
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
10
10
|
apply_lambda_with_utf8_out_type,
|
11
11
|
};
|
12
|
-
use crate::
|
12
|
+
use crate::prelude::strings_to_pl_smallstr;
|
13
|
+
use crate::series::{to_rbseries, to_series};
|
13
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
14
15
|
|
15
16
|
impl RbDataFrame {
|
16
17
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
17
18
|
let mut cols = Vec::new();
|
18
19
|
for i in columns.into_iter() {
|
19
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
20
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
|
20
21
|
}
|
21
22
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
22
23
|
Ok(RbDataFrame::new(df))
|
@@ -127,7 +128,7 @@ impl RbDataFrame {
|
|
127
128
|
|
128
129
|
pub fn get_columns(&self) -> RArray {
|
129
130
|
let cols = self.df.borrow().get_columns().to_vec();
|
130
|
-
|
131
|
+
to_rbseries(cols)
|
131
132
|
}
|
132
133
|
|
133
134
|
pub fn columns(&self) -> Vec<String> {
|
@@ -173,7 +174,8 @@ impl RbDataFrame {
|
|
173
174
|
}
|
174
175
|
|
175
176
|
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
176
|
-
let columns =
|
177
|
+
let columns = to_series(columns)?;
|
178
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
177
179
|
let df = self
|
178
180
|
.df
|
179
181
|
.borrow()
|
@@ -183,7 +185,8 @@ impl RbDataFrame {
|
|
183
185
|
}
|
184
186
|
|
185
187
|
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
186
|
-
let columns =
|
188
|
+
let columns = to_series(columns)?;
|
189
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
187
190
|
self.df
|
188
191
|
.borrow_mut()
|
189
192
|
.hstack_mut(&columns)
|
@@ -222,6 +225,7 @@ impl RbDataFrame {
|
|
222
225
|
.borrow_mut()
|
223
226
|
.drop_in_place(&name)
|
224
227
|
.map_err(RbPolarsErr::from)?;
|
228
|
+
let s = s.take_materialized_series();
|
225
229
|
Ok(RbSeries::new(s))
|
226
230
|
}
|
227
231
|
|
@@ -229,7 +233,7 @@ impl RbDataFrame {
|
|
229
233
|
self.df
|
230
234
|
.borrow()
|
231
235
|
.select_at_idx(idx)
|
232
|
-
.map(|s| RbSeries::new(s.clone()))
|
236
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
233
237
|
}
|
234
238
|
|
235
239
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
@@ -237,11 +241,13 @@ impl RbDataFrame {
|
|
237
241
|
}
|
238
242
|
|
239
243
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
240
|
-
self
|
244
|
+
let series = self
|
245
|
+
.df
|
241
246
|
.borrow()
|
242
247
|
.column(&name)
|
243
|
-
.map(|s| RbSeries::new(s.clone()))
|
244
|
-
.map_err(RbPolarsErr::from)
|
248
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
249
|
+
.map_err(RbPolarsErr::from)?;
|
250
|
+
Ok(series)
|
245
251
|
}
|
246
252
|
|
247
253
|
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
@@ -254,7 +260,7 @@ impl RbDataFrame {
|
|
254
260
|
}
|
255
261
|
|
256
262
|
pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
257
|
-
let indices = IdxCa::from_vec("", indices);
|
263
|
+
let indices = IdxCa::from_vec("".into(), indices);
|
258
264
|
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
259
265
|
Ok(RbDataFrame::new(df))
|
260
266
|
}
|
@@ -332,7 +338,7 @@ impl RbDataFrame {
|
|
332
338
|
let df = self
|
333
339
|
.df
|
334
340
|
.borrow()
|
335
|
-
.with_row_index(
|
341
|
+
.with_row_index(name.into(), offset)
|
336
342
|
.map_err(RbPolarsErr::from)?;
|
337
343
|
Ok(df.into())
|
338
344
|
}
|
@@ -349,8 +355,8 @@ impl RbDataFrame {
|
|
349
355
|
variable_name: Option<String>,
|
350
356
|
) -> RbResult<Self> {
|
351
357
|
let args = UnpivotArgsIR {
|
352
|
-
on:
|
353
|
-
index:
|
358
|
+
on: strings_to_pl_smallstr(on),
|
359
|
+
index: strings_to_pl_smallstr(index),
|
354
360
|
value_name: value_name.map(|s| s.into()),
|
355
361
|
variable_name: variable_name.map(|s| s.into()),
|
356
362
|
};
|
@@ -410,7 +416,7 @@ impl RbDataFrame {
|
|
410
416
|
.borrow()
|
411
417
|
.max_horizontal()
|
412
418
|
.map_err(RbPolarsErr::from)?;
|
413
|
-
Ok(s.map(|s| s.into()))
|
419
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
414
420
|
}
|
415
421
|
|
416
422
|
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
@@ -419,7 +425,7 @@ impl RbDataFrame {
|
|
419
425
|
.borrow()
|
420
426
|
.min_horizontal()
|
421
427
|
.map_err(RbPolarsErr::from)?;
|
422
|
-
Ok(s.map(|s| s.into()))
|
428
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
423
429
|
}
|
424
430
|
|
425
431
|
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
@@ -581,7 +587,7 @@ impl RbDataFrame {
|
|
581
587
|
}
|
582
588
|
|
583
589
|
pub fn to_struct(&self, name: String) -> RbSeries {
|
584
|
-
let s = self.df.borrow().clone().into_struct(
|
590
|
+
let s = self.df.borrow().clone().into_struct(name.into());
|
585
591
|
s.into_series().into()
|
586
592
|
}
|
587
593
|
|