polars-df 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -2,25 +2,30 @@ pub(crate) mod any_value;
|
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
5
|
+
use std::fs::File;
|
5
6
|
use std::hash::{Hash, Hasher};
|
6
7
|
use std::num::NonZeroUsize;
|
8
|
+
use std::path::PathBuf;
|
7
9
|
|
8
10
|
use magnus::{
|
9
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
10
|
-
Ruby, Symbol, TryConvert, Value,
|
11
|
+
class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
11
13
|
};
|
12
14
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
14
16
|
use polars::datatypes::AnyValue;
|
15
17
|
use polars::frame::row::Row;
|
16
|
-
use polars::frame::NullStrategy;
|
17
18
|
use polars::io::avro::AvroCompression;
|
19
|
+
use polars::io::cloud::CloudOptions;
|
18
20
|
use polars::prelude::*;
|
19
21
|
use polars::series::ops::NullBehavior;
|
20
22
|
use polars_core::utils::arrow::array::Array;
|
21
23
|
use polars_core::utils::materialize_dyn_int;
|
24
|
+
use polars_plan::plans::ScanSources;
|
25
|
+
use polars_utils::mmap::MemSlice;
|
22
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
27
|
|
28
|
+
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
24
29
|
use crate::object::OBJECT_NAME;
|
25
30
|
use crate::rb_modules::series;
|
26
31
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
|
|
104
109
|
.collect(),
|
105
110
|
)))
|
106
111
|
} else {
|
107
|
-
Err(
|
108
|
-
"could not extract value from null_values argument".into()
|
109
|
-
|
112
|
+
Err(
|
113
|
+
RbPolarsErr::Other("could not extract value from null_values argument".into())
|
114
|
+
.into(),
|
115
|
+
)
|
110
116
|
}
|
111
117
|
}
|
112
118
|
}
|
@@ -298,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
|
|
298
304
|
let dtype = if ob.is_kind_of(class::class()) {
|
299
305
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
300
306
|
match name.as_str() {
|
301
|
-
"Polars::UInt8" => DataType::UInt8,
|
302
|
-
"Polars::UInt16" => DataType::UInt16,
|
303
|
-
"Polars::UInt32" => DataType::UInt32,
|
304
|
-
"Polars::UInt64" => DataType::UInt64,
|
305
307
|
"Polars::Int8" => DataType::Int8,
|
306
308
|
"Polars::Int16" => DataType::Int16,
|
307
309
|
"Polars::Int32" => DataType::Int32,
|
308
310
|
"Polars::Int64" => DataType::Int64,
|
311
|
+
"Polars::UInt8" => DataType::UInt8,
|
312
|
+
"Polars::UInt16" => DataType::UInt16,
|
313
|
+
"Polars::UInt32" => DataType::UInt32,
|
314
|
+
"Polars::UInt64" => DataType::UInt64,
|
315
|
+
"Polars::Float32" => DataType::Float32,
|
316
|
+
"Polars::Float64" => DataType::Float64,
|
317
|
+
"Polars::Boolean" => DataType::Boolean,
|
309
318
|
"Polars::String" => DataType::String,
|
310
319
|
"Polars::Binary" => DataType::Binary,
|
311
|
-
"Polars::Boolean" => DataType::Boolean,
|
312
320
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
313
321
|
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
314
322
|
"Polars::Date" => DataType::Date,
|
315
|
-
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
316
323
|
"Polars::Time" => DataType::Time,
|
324
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
317
325
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
318
326
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
319
|
-
"Polars::Float32" => DataType::Float32,
|
320
|
-
"Polars::Float64" => DataType::Float64,
|
321
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
322
327
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
328
|
+
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
329
|
+
"Polars::Struct" => DataType::Struct(vec![]),
|
323
330
|
"Polars::Null" => DataType::Null,
|
331
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
324
332
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
325
333
|
dt => {
|
326
334
|
return Err(RbValueError::new_err(format!(
|
@@ -328,7 +336,6 @@ impl TryConvert for Wrap<DataType> {
|
|
328
336
|
)))
|
329
337
|
}
|
330
338
|
}
|
331
|
-
// TODO improve
|
332
339
|
} else if String::try_convert(ob).is_err() {
|
333
340
|
let name = unsafe { ob.class().name() }.into_owned();
|
334
341
|
match name.as_str() {
|
@@ -340,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
|
|
340
347
|
"Polars::UInt16" => DataType::UInt16,
|
341
348
|
"Polars::UInt32" => DataType::UInt32,
|
342
349
|
"Polars::UInt64" => DataType::UInt64,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Boolean" => DataType::Boolean,
|
343
353
|
"Polars::String" => DataType::String,
|
344
354
|
"Polars::Binary" => DataType::Binary,
|
345
|
-
"Polars::Boolean" => DataType::Boolean,
|
346
355
|
"Polars::Categorical" => {
|
347
356
|
let ordering = ob
|
348
357
|
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
@@ -358,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
|
|
358
367
|
}
|
359
368
|
"Polars::Date" => DataType::Date,
|
360
369
|
"Polars::Time" => DataType::Time,
|
361
|
-
"Polars::Float32" => DataType::Float32,
|
362
|
-
"Polars::Float64" => DataType::Float64,
|
363
|
-
"Polars::Null" => DataType::Null,
|
364
|
-
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
365
|
-
"Polars::Duration" => {
|
366
|
-
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
367
|
-
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
368
|
-
DataType::Duration(time_unit)
|
369
|
-
}
|
370
370
|
"Polars::Datetime" => {
|
371
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
372
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
373
373
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
374
374
|
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
375
375
|
}
|
376
|
+
"Polars::Duration" => {
|
377
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
378
|
+
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
379
|
+
DataType::Duration(time_unit)
|
380
|
+
}
|
376
381
|
"Polars::Decimal" => {
|
377
382
|
let precision = ob.funcall("precision", ())?;
|
378
383
|
let scale = ob.funcall("scale", ())?;
|
@@ -383,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
|
|
383
388
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
384
389
|
DataType::List(Box::new(inner.0))
|
385
390
|
}
|
391
|
+
"Polars::Array" => {
|
392
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
393
|
+
let size: Value = ob.funcall("size", ()).unwrap();
|
394
|
+
let inner = Wrap::<DataType>::try_convert(inner)?;
|
395
|
+
let size = usize::try_convert(size)?;
|
396
|
+
DataType::Array(Box::new(inner.0), size)
|
397
|
+
}
|
386
398
|
"Polars::Struct" => {
|
387
399
|
let arr: RArray = ob.funcall("fields", ())?;
|
388
400
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -391,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
|
|
391
403
|
}
|
392
404
|
DataType::Struct(fields)
|
393
405
|
}
|
406
|
+
"Polars::Null" => DataType::Null,
|
407
|
+
"Object" => DataType::Object(OBJECT_NAME, None),
|
408
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
394
409
|
dt => {
|
395
410
|
return Err(RbTypeError::new_err(format!(
|
396
411
|
"A {dt} object is not a correct polars DataType. \
|
@@ -434,6 +449,8 @@ impl TryConvert for Wrap<DataType> {
|
|
434
449
|
}
|
435
450
|
}
|
436
451
|
|
452
|
+
unsafe impl TryConvertOwned for Wrap<DataType> {}
|
453
|
+
|
437
454
|
impl TryConvert for Wrap<StatisticsOptions> {
|
438
455
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
439
456
|
let mut statistics = StatisticsOptions::empty();
|
@@ -452,8 +469,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
452
469
|
}
|
453
470
|
}
|
454
471
|
Ok(ForEach::Continue)
|
455
|
-
})
|
456
|
-
.unwrap();
|
472
|
+
})?;
|
457
473
|
|
458
474
|
Ok(Wrap(statistics))
|
459
475
|
}
|
@@ -478,13 +494,75 @@ impl TryConvert for Wrap<Schema> {
|
|
478
494
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
479
495
|
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
480
496
|
Ok(ForEach::Continue)
|
481
|
-
})
|
482
|
-
.unwrap();
|
497
|
+
})?;
|
483
498
|
|
484
499
|
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
485
500
|
}
|
486
501
|
}
|
487
502
|
|
503
|
+
impl TryConvert for Wrap<ScanSources> {
|
504
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
505
|
+
let list = RArray::try_convert(ob)?;
|
506
|
+
|
507
|
+
if list.is_empty() {
|
508
|
+
return Ok(Wrap(ScanSources::default()));
|
509
|
+
}
|
510
|
+
|
511
|
+
enum MutableSources {
|
512
|
+
Paths(Vec<PathBuf>),
|
513
|
+
Files(Vec<File>),
|
514
|
+
Buffers(Vec<MemSlice>),
|
515
|
+
}
|
516
|
+
|
517
|
+
let num_items = list.len();
|
518
|
+
let mut iter = list
|
519
|
+
.into_iter()
|
520
|
+
.map(|val| get_ruby_scan_source_input(val, false));
|
521
|
+
|
522
|
+
let Some(first) = iter.next() else {
|
523
|
+
return Ok(Wrap(ScanSources::default()));
|
524
|
+
};
|
525
|
+
|
526
|
+
let mut sources = match first? {
|
527
|
+
RubyScanSourceInput::Path(path) => {
|
528
|
+
let mut sources = Vec::with_capacity(num_items);
|
529
|
+
sources.push(path);
|
530
|
+
MutableSources::Paths(sources)
|
531
|
+
}
|
532
|
+
RubyScanSourceInput::File(file) => {
|
533
|
+
let mut sources = Vec::with_capacity(num_items);
|
534
|
+
sources.push(file);
|
535
|
+
MutableSources::Files(sources)
|
536
|
+
}
|
537
|
+
RubyScanSourceInput::Buffer(buffer) => {
|
538
|
+
let mut sources = Vec::with_capacity(num_items);
|
539
|
+
sources.push(buffer);
|
540
|
+
MutableSources::Buffers(sources)
|
541
|
+
}
|
542
|
+
};
|
543
|
+
|
544
|
+
for source in iter {
|
545
|
+
match (&mut sources, source?) {
|
546
|
+
(MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
|
547
|
+
(MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
|
548
|
+
(MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
|
549
|
+
_ => {
|
550
|
+
return Err(RbTypeError::new_err(
|
551
|
+
"Cannot combine in-memory bytes, paths and files for scan sources"
|
552
|
+
.to_string(),
|
553
|
+
))
|
554
|
+
}
|
555
|
+
}
|
556
|
+
}
|
557
|
+
|
558
|
+
Ok(Wrap(match sources {
|
559
|
+
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
|
560
|
+
MutableSources::Files(i) => ScanSources::Files(i.into()),
|
561
|
+
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
|
562
|
+
}))
|
563
|
+
}
|
564
|
+
}
|
565
|
+
|
488
566
|
#[derive(Clone)]
|
489
567
|
pub struct ObjectValue {
|
490
568
|
pub inner: Opaque<Value>,
|
@@ -493,7 +571,7 @@ pub struct ObjectValue {
|
|
493
571
|
impl Debug for ObjectValue {
|
494
572
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
495
573
|
f.debug_struct("ObjectValue")
|
496
|
-
.field("inner", &self.
|
574
|
+
.field("inner", &self.to_value())
|
497
575
|
.finish()
|
498
576
|
}
|
499
577
|
}
|
@@ -501,7 +579,7 @@ impl Debug for ObjectValue {
|
|
501
579
|
impl Hash for ObjectValue {
|
502
580
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
503
581
|
let h = self
|
504
|
-
.
|
582
|
+
.to_value()
|
505
583
|
.funcall::<_, _, isize>("hash", ())
|
506
584
|
.expect("should be hashable");
|
507
585
|
state.write_isize(h)
|
@@ -512,7 +590,7 @@ impl Eq for ObjectValue {}
|
|
512
590
|
|
513
591
|
impl PartialEq for ObjectValue {
|
514
592
|
fn eq(&self, other: &Self) -> bool {
|
515
|
-
self.
|
593
|
+
self.to_value().eql(other.to_value()).unwrap_or(false)
|
516
594
|
}
|
517
595
|
}
|
518
596
|
|
@@ -533,7 +611,7 @@ impl TotalHash for ObjectValue {
|
|
533
611
|
|
534
612
|
impl Display for ObjectValue {
|
535
613
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
536
|
-
write!(f, "{}", self.
|
614
|
+
write!(f, "{}", self.to_value())
|
537
615
|
}
|
538
616
|
}
|
539
617
|
|
@@ -561,16 +639,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
561
639
|
}
|
562
640
|
}
|
563
641
|
|
564
|
-
// TODO remove
|
565
642
|
impl ObjectValue {
|
566
|
-
pub fn
|
567
|
-
|
643
|
+
pub fn to_value(&self) -> Value {
|
644
|
+
self.clone().into_value()
|
568
645
|
}
|
569
646
|
}
|
570
647
|
|
571
648
|
impl IntoValue for ObjectValue {
|
572
|
-
fn into_value_with(self,
|
573
|
-
self.
|
649
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
650
|
+
ruby.get_inner(self.inner)
|
574
651
|
}
|
575
652
|
}
|
576
653
|
|
@@ -587,10 +664,10 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
587
664
|
let parsed = match String::try_convert(ob)?.as_str() {
|
588
665
|
"backward" => AsofStrategy::Backward,
|
589
666
|
"forward" => AsofStrategy::Forward,
|
667
|
+
"nearest" => AsofStrategy::Nearest,
|
590
668
|
v => {
|
591
669
|
return Err(RbValueError::new_err(format!(
|
592
|
-
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
593
|
-
v
|
670
|
+
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
594
671
|
)))
|
595
672
|
}
|
596
673
|
};
|
@@ -830,14 +907,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
830
907
|
}
|
831
908
|
}
|
832
909
|
|
833
|
-
impl TryConvert for Wrap<
|
910
|
+
impl TryConvert for Wrap<QuantileMethod> {
|
834
911
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
835
912
|
let parsed = match String::try_convert(ob)?.as_str() {
|
836
|
-
"lower" =>
|
837
|
-
"higher" =>
|
838
|
-
"nearest" =>
|
839
|
-
"linear" =>
|
840
|
-
"midpoint" =>
|
913
|
+
"lower" => QuantileMethod::Lower,
|
914
|
+
"higher" => QuantileMethod::Higher,
|
915
|
+
"nearest" => QuantileMethod::Nearest,
|
916
|
+
"linear" => QuantileMethod::Linear,
|
917
|
+
"midpoint" => QuantileMethod::Midpoint,
|
841
918
|
v => {
|
842
919
|
return Err(RbValueError::new_err(format!(
|
843
920
|
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
@@ -1001,6 +1078,11 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
1001
1078
|
}
|
1002
1079
|
}
|
1003
1080
|
|
1081
|
+
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
1082
|
+
let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
|
1083
|
+
Ok(out)
|
1084
|
+
}
|
1085
|
+
|
1004
1086
|
pub fn parse_fill_null_strategy(
|
1005
1087
|
strategy: &str,
|
1006
1088
|
limit: FillNullLimit,
|
@@ -1071,7 +1153,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1071
1153
|
let v = usize::try_convert(ob)?;
|
1072
1154
|
NonZeroUsize::new(v)
|
1073
1155
|
.map(Wrap)
|
1074
|
-
.ok_or(RbValueError::new_err("must be non-zero"
|
1156
|
+
.ok_or(RbValueError::new_err("must be non-zero"))
|
1075
1157
|
}
|
1076
1158
|
}
|
1077
1159
|
|
@@ -1085,3 +1167,28 @@ where
|
|
1085
1167
|
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1086
1168
|
.collect()
|
1087
1169
|
}
|
1170
|
+
|
1171
|
+
#[derive(Debug, Copy, Clone)]
|
1172
|
+
pub struct RbCompatLevel(pub CompatLevel);
|
1173
|
+
|
1174
|
+
impl TryConvert for RbCompatLevel {
|
1175
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1176
|
+
Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
|
1177
|
+
if let Ok(compat_level) = CompatLevel::with_level(level) {
|
1178
|
+
compat_level
|
1179
|
+
} else {
|
1180
|
+
return Err(RbValueError::new_err("invalid compat level".to_string()));
|
1181
|
+
}
|
1182
|
+
} else if let Ok(future) = bool::try_convert(ob) {
|
1183
|
+
if future {
|
1184
|
+
CompatLevel::newest()
|
1185
|
+
} else {
|
1186
|
+
CompatLevel::oldest()
|
1187
|
+
}
|
1188
|
+
} else {
|
1189
|
+
return Err(RbTypeError::new_err(
|
1190
|
+
"'compat_level' argument accepts int or bool".to_string(),
|
1191
|
+
));
|
1192
|
+
}))
|
1193
|
+
}
|
1194
|
+
}
|
@@ -54,9 +54,6 @@ fn finish_from_rows(
|
|
54
54
|
schema_overrides: Option<Schema>,
|
55
55
|
infer_schema_length: Option<usize>,
|
56
56
|
) -> RbResult<RbDataFrame> {
|
57
|
-
// Object builder must be registered
|
58
|
-
crate::on_startup::register_object_builder();
|
59
|
-
|
60
57
|
let mut schema = if let Some(mut schema) = schema {
|
61
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
+
use crate::RbResult;
|
5
7
|
|
6
8
|
impl RbDataFrame {
|
7
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|
@@ -18,7 +20,7 @@ impl RbDataFrame {
|
|
18
20
|
.map(|s| match s.dtype() {
|
19
21
|
DataType::Object(_, _) => {
|
20
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
-
obj.unwrap().
|
23
|
+
obj.unwrap().to_value()
|
22
24
|
}
|
23
25
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
26
|
}),
|
@@ -37,7 +39,7 @@ impl RbDataFrame {
|
|
37
39
|
.map(|s| match s.dtype() {
|
38
40
|
DataType::Object(_, _) => {
|
39
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
-
obj.unwrap().
|
42
|
+
obj.unwrap().to_value()
|
41
43
|
}
|
42
44
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
45
|
}),
|
@@ -45,4 +47,9 @@ impl RbDataFrame {
|
|
45
47
|
}))
|
46
48
|
.as_value()
|
47
49
|
}
|
50
|
+
|
51
|
+
pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
|
52
|
+
self.df.borrow_mut().align_chunks();
|
53
|
+
dataframe_to_stream(&self.df.borrow())
|
54
|
+
}
|
48
55
|
}
|
@@ -1,6 +1,5 @@
|
|
1
1
|
use either::Either;
|
2
2
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
|
-
use polars::frame::NullStrategy;
|
4
3
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
5
4
|
use polars::prelude::*;
|
6
5
|
|
@@ -10,14 +9,14 @@ use crate::map::dataframe::{
|
|
10
9
|
apply_lambda_with_utf8_out_type,
|
11
10
|
};
|
12
11
|
use crate::prelude::strings_to_pl_smallstr;
|
13
|
-
use crate::series::{
|
12
|
+
use crate::series::{to_rbseries, to_series};
|
14
13
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
14
|
|
16
15
|
impl RbDataFrame {
|
17
16
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
18
17
|
let mut cols = Vec::new();
|
19
18
|
for i in columns.into_iter() {
|
20
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
19
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
|
21
20
|
}
|
22
21
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
23
22
|
Ok(RbDataFrame::new(df))
|
@@ -128,7 +127,7 @@ impl RbDataFrame {
|
|
128
127
|
|
129
128
|
pub fn get_columns(&self) -> RArray {
|
130
129
|
let cols = self.df.borrow().get_columns().to_vec();
|
131
|
-
|
130
|
+
to_rbseries(cols)
|
132
131
|
}
|
133
132
|
|
134
133
|
pub fn columns(&self) -> Vec<String> {
|
@@ -158,7 +157,7 @@ impl RbDataFrame {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
pub fn n_chunks(&self) -> usize {
|
161
|
-
self.df.borrow().
|
160
|
+
self.df.borrow().first_col_n_chunks()
|
162
161
|
}
|
163
162
|
|
164
163
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -174,7 +173,8 @@ impl RbDataFrame {
|
|
174
173
|
}
|
175
174
|
|
176
175
|
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
177
|
-
let columns =
|
176
|
+
let columns = to_series(columns)?;
|
177
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
178
178
|
let df = self
|
179
179
|
.df
|
180
180
|
.borrow()
|
@@ -184,7 +184,8 @@ impl RbDataFrame {
|
|
184
184
|
}
|
185
185
|
|
186
186
|
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
187
|
-
let columns =
|
187
|
+
let columns = to_series(columns)?;
|
188
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
188
189
|
self.df
|
189
190
|
.borrow_mut()
|
190
191
|
.hstack_mut(&columns)
|
@@ -223,6 +224,7 @@ impl RbDataFrame {
|
|
223
224
|
.borrow_mut()
|
224
225
|
.drop_in_place(&name)
|
225
226
|
.map_err(RbPolarsErr::from)?;
|
227
|
+
let s = s.take_materialized_series();
|
226
228
|
Ok(RbSeries::new(s))
|
227
229
|
}
|
228
230
|
|
@@ -230,7 +232,7 @@ impl RbDataFrame {
|
|
230
232
|
self.df
|
231
233
|
.borrow()
|
232
234
|
.select_at_idx(idx)
|
233
|
-
.map(|s| RbSeries::new(s.clone()))
|
235
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
234
236
|
}
|
235
237
|
|
236
238
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
@@ -238,11 +240,13 @@ impl RbDataFrame {
|
|
238
240
|
}
|
239
241
|
|
240
242
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
241
|
-
self
|
243
|
+
let series = self
|
244
|
+
.df
|
242
245
|
.borrow()
|
243
246
|
.column(&name)
|
244
|
-
.map(|s| RbSeries::new(s.clone()))
|
245
|
-
.map_err(RbPolarsErr::from)
|
247
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
248
|
+
.map_err(RbPolarsErr::from)?;
|
249
|
+
Ok(series)
|
246
250
|
}
|
247
251
|
|
248
252
|
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
@@ -405,52 +409,6 @@ impl RbDataFrame {
|
|
405
409
|
self.df.borrow().clone().lazy().into()
|
406
410
|
}
|
407
411
|
|
408
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
409
|
-
let s = self
|
410
|
-
.df
|
411
|
-
.borrow()
|
412
|
-
.max_horizontal()
|
413
|
-
.map_err(RbPolarsErr::from)?;
|
414
|
-
Ok(s.map(|s| s.into()))
|
415
|
-
}
|
416
|
-
|
417
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
418
|
-
let s = self
|
419
|
-
.df
|
420
|
-
.borrow()
|
421
|
-
.min_horizontal()
|
422
|
-
.map_err(RbPolarsErr::from)?;
|
423
|
-
Ok(s.map(|s| s.into()))
|
424
|
-
}
|
425
|
-
|
426
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
427
|
-
let null_strategy = if ignore_nulls {
|
428
|
-
NullStrategy::Ignore
|
429
|
-
} else {
|
430
|
-
NullStrategy::Propagate
|
431
|
-
};
|
432
|
-
let s = self
|
433
|
-
.df
|
434
|
-
.borrow()
|
435
|
-
.sum_horizontal(null_strategy)
|
436
|
-
.map_err(RbPolarsErr::from)?;
|
437
|
-
Ok(s.map(|s| s.into()))
|
438
|
-
}
|
439
|
-
|
440
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
441
|
-
let null_strategy = if ignore_nulls {
|
442
|
-
NullStrategy::Ignore
|
443
|
-
} else {
|
444
|
-
NullStrategy::Propagate
|
445
|
-
};
|
446
|
-
let s = self
|
447
|
-
.df
|
448
|
-
.borrow()
|
449
|
-
.mean_horizontal(null_strategy)
|
450
|
-
.map_err(RbPolarsErr::from)?;
|
451
|
-
Ok(s.map(|s| s.into()))
|
452
|
-
}
|
453
|
-
|
454
412
|
pub fn to_dummies(
|
455
413
|
&self,
|
456
414
|
columns: Option<Vec<String>>,
|