polars-df 0.14.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -2,25 +2,30 @@ pub(crate) mod any_value;
|
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
5
|
+
use std::fs::File;
|
5
6
|
use std::hash::{Hash, Hasher};
|
6
7
|
use std::num::NonZeroUsize;
|
8
|
+
use std::path::PathBuf;
|
7
9
|
|
8
10
|
use magnus::{
|
9
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
10
|
-
Ruby, Symbol, TryConvert, Value,
|
11
|
+
class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
11
13
|
};
|
12
14
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
14
16
|
use polars::datatypes::AnyValue;
|
15
17
|
use polars::frame::row::Row;
|
16
|
-
use polars::frame::NullStrategy;
|
17
18
|
use polars::io::avro::AvroCompression;
|
19
|
+
use polars::io::cloud::CloudOptions;
|
18
20
|
use polars::prelude::*;
|
19
21
|
use polars::series::ops::NullBehavior;
|
20
22
|
use polars_core::utils::arrow::array::Array;
|
21
23
|
use polars_core::utils::materialize_dyn_int;
|
24
|
+
use polars_plan::plans::ScanSources;
|
25
|
+
use polars_utils::mmap::MemSlice;
|
22
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
27
|
|
28
|
+
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
24
29
|
use crate::object::OBJECT_NAME;
|
25
30
|
use crate::rb_modules::series;
|
26
31
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
|
|
104
109
|
.collect(),
|
105
110
|
)))
|
106
111
|
} else {
|
107
|
-
Err(
|
108
|
-
"could not extract value from null_values argument".into()
|
109
|
-
|
112
|
+
Err(
|
113
|
+
RbPolarsErr::Other("could not extract value from null_values argument".into())
|
114
|
+
.into(),
|
115
|
+
)
|
110
116
|
}
|
111
117
|
}
|
112
118
|
}
|
@@ -298,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
|
|
298
304
|
let dtype = if ob.is_kind_of(class::class()) {
|
299
305
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
300
306
|
match name.as_str() {
|
301
|
-
"Polars::UInt8" => DataType::UInt8,
|
302
|
-
"Polars::UInt16" => DataType::UInt16,
|
303
|
-
"Polars::UInt32" => DataType::UInt32,
|
304
|
-
"Polars::UInt64" => DataType::UInt64,
|
305
307
|
"Polars::Int8" => DataType::Int8,
|
306
308
|
"Polars::Int16" => DataType::Int16,
|
307
309
|
"Polars::Int32" => DataType::Int32,
|
308
310
|
"Polars::Int64" => DataType::Int64,
|
311
|
+
"Polars::UInt8" => DataType::UInt8,
|
312
|
+
"Polars::UInt16" => DataType::UInt16,
|
313
|
+
"Polars::UInt32" => DataType::UInt32,
|
314
|
+
"Polars::UInt64" => DataType::UInt64,
|
315
|
+
"Polars::Float32" => DataType::Float32,
|
316
|
+
"Polars::Float64" => DataType::Float64,
|
317
|
+
"Polars::Boolean" => DataType::Boolean,
|
309
318
|
"Polars::String" => DataType::String,
|
310
319
|
"Polars::Binary" => DataType::Binary,
|
311
|
-
"Polars::Boolean" => DataType::Boolean,
|
312
320
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
313
321
|
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
314
322
|
"Polars::Date" => DataType::Date,
|
315
|
-
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
316
323
|
"Polars::Time" => DataType::Time,
|
324
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
317
325
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
318
326
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
319
|
-
"Polars::Float32" => DataType::Float32,
|
320
|
-
"Polars::Float64" => DataType::Float64,
|
321
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
322
327
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
328
|
+
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
329
|
+
"Polars::Struct" => DataType::Struct(vec![]),
|
323
330
|
"Polars::Null" => DataType::Null,
|
331
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
324
332
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
325
333
|
dt => {
|
326
334
|
return Err(RbValueError::new_err(format!(
|
@@ -328,7 +336,6 @@ impl TryConvert for Wrap<DataType> {
|
|
328
336
|
)))
|
329
337
|
}
|
330
338
|
}
|
331
|
-
// TODO improve
|
332
339
|
} else if String::try_convert(ob).is_err() {
|
333
340
|
let name = unsafe { ob.class().name() }.into_owned();
|
334
341
|
match name.as_str() {
|
@@ -340,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
|
|
340
347
|
"Polars::UInt16" => DataType::UInt16,
|
341
348
|
"Polars::UInt32" => DataType::UInt32,
|
342
349
|
"Polars::UInt64" => DataType::UInt64,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Boolean" => DataType::Boolean,
|
343
353
|
"Polars::String" => DataType::String,
|
344
354
|
"Polars::Binary" => DataType::Binary,
|
345
|
-
"Polars::Boolean" => DataType::Boolean,
|
346
355
|
"Polars::Categorical" => {
|
347
356
|
let ordering = ob
|
348
357
|
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
@@ -358,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
|
|
358
367
|
}
|
359
368
|
"Polars::Date" => DataType::Date,
|
360
369
|
"Polars::Time" => DataType::Time,
|
361
|
-
"Polars::Float32" => DataType::Float32,
|
362
|
-
"Polars::Float64" => DataType::Float64,
|
363
|
-
"Polars::Null" => DataType::Null,
|
364
|
-
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
365
|
-
"Polars::Duration" => {
|
366
|
-
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
367
|
-
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
368
|
-
DataType::Duration(time_unit)
|
369
|
-
}
|
370
370
|
"Polars::Datetime" => {
|
371
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
372
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
373
373
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
374
374
|
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
375
375
|
}
|
376
|
+
"Polars::Duration" => {
|
377
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
378
|
+
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
379
|
+
DataType::Duration(time_unit)
|
380
|
+
}
|
376
381
|
"Polars::Decimal" => {
|
377
382
|
let precision = ob.funcall("precision", ())?;
|
378
383
|
let scale = ob.funcall("scale", ())?;
|
@@ -383,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
|
|
383
388
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
384
389
|
DataType::List(Box::new(inner.0))
|
385
390
|
}
|
391
|
+
"Polars::Array" => {
|
392
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
393
|
+
let size: Value = ob.funcall("size", ()).unwrap();
|
394
|
+
let inner = Wrap::<DataType>::try_convert(inner)?;
|
395
|
+
let size = usize::try_convert(size)?;
|
396
|
+
DataType::Array(Box::new(inner.0), size)
|
397
|
+
}
|
386
398
|
"Polars::Struct" => {
|
387
399
|
let arr: RArray = ob.funcall("fields", ())?;
|
388
400
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -391,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
|
|
391
403
|
}
|
392
404
|
DataType::Struct(fields)
|
393
405
|
}
|
406
|
+
"Polars::Null" => DataType::Null,
|
407
|
+
"Object" => DataType::Object(OBJECT_NAME, None),
|
408
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
394
409
|
dt => {
|
395
410
|
return Err(RbTypeError::new_err(format!(
|
396
411
|
"A {dt} object is not a correct polars DataType. \
|
@@ -434,6 +449,8 @@ impl TryConvert for Wrap<DataType> {
|
|
434
449
|
}
|
435
450
|
}
|
436
451
|
|
452
|
+
unsafe impl TryConvertOwned for Wrap<DataType> {}
|
453
|
+
|
437
454
|
impl TryConvert for Wrap<StatisticsOptions> {
|
438
455
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
439
456
|
let mut statistics = StatisticsOptions::empty();
|
@@ -452,8 +469,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
452
469
|
}
|
453
470
|
}
|
454
471
|
Ok(ForEach::Continue)
|
455
|
-
})
|
456
|
-
.unwrap();
|
472
|
+
})?;
|
457
473
|
|
458
474
|
Ok(Wrap(statistics))
|
459
475
|
}
|
@@ -478,13 +494,75 @@ impl TryConvert for Wrap<Schema> {
|
|
478
494
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
479
495
|
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
480
496
|
Ok(ForEach::Continue)
|
481
|
-
})
|
482
|
-
.unwrap();
|
497
|
+
})?;
|
483
498
|
|
484
499
|
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
485
500
|
}
|
486
501
|
}
|
487
502
|
|
503
|
+
impl TryConvert for Wrap<ScanSources> {
|
504
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
505
|
+
let list = RArray::try_convert(ob)?;
|
506
|
+
|
507
|
+
if list.is_empty() {
|
508
|
+
return Ok(Wrap(ScanSources::default()));
|
509
|
+
}
|
510
|
+
|
511
|
+
enum MutableSources {
|
512
|
+
Paths(Vec<PathBuf>),
|
513
|
+
Files(Vec<File>),
|
514
|
+
Buffers(Vec<MemSlice>),
|
515
|
+
}
|
516
|
+
|
517
|
+
let num_items = list.len();
|
518
|
+
let mut iter = list
|
519
|
+
.into_iter()
|
520
|
+
.map(|val| get_ruby_scan_source_input(val, false));
|
521
|
+
|
522
|
+
let Some(first) = iter.next() else {
|
523
|
+
return Ok(Wrap(ScanSources::default()));
|
524
|
+
};
|
525
|
+
|
526
|
+
let mut sources = match first? {
|
527
|
+
RubyScanSourceInput::Path(path) => {
|
528
|
+
let mut sources = Vec::with_capacity(num_items);
|
529
|
+
sources.push(path);
|
530
|
+
MutableSources::Paths(sources)
|
531
|
+
}
|
532
|
+
RubyScanSourceInput::File(file) => {
|
533
|
+
let mut sources = Vec::with_capacity(num_items);
|
534
|
+
sources.push(file);
|
535
|
+
MutableSources::Files(sources)
|
536
|
+
}
|
537
|
+
RubyScanSourceInput::Buffer(buffer) => {
|
538
|
+
let mut sources = Vec::with_capacity(num_items);
|
539
|
+
sources.push(buffer);
|
540
|
+
MutableSources::Buffers(sources)
|
541
|
+
}
|
542
|
+
};
|
543
|
+
|
544
|
+
for source in iter {
|
545
|
+
match (&mut sources, source?) {
|
546
|
+
(MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
|
547
|
+
(MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
|
548
|
+
(MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
|
549
|
+
_ => {
|
550
|
+
return Err(RbTypeError::new_err(
|
551
|
+
"Cannot combine in-memory bytes, paths and files for scan sources"
|
552
|
+
.to_string(),
|
553
|
+
))
|
554
|
+
}
|
555
|
+
}
|
556
|
+
}
|
557
|
+
|
558
|
+
Ok(Wrap(match sources {
|
559
|
+
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
|
560
|
+
MutableSources::Files(i) => ScanSources::Files(i.into()),
|
561
|
+
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
|
562
|
+
}))
|
563
|
+
}
|
564
|
+
}
|
565
|
+
|
488
566
|
#[derive(Clone)]
|
489
567
|
pub struct ObjectValue {
|
490
568
|
pub inner: Opaque<Value>,
|
@@ -493,7 +571,7 @@ pub struct ObjectValue {
|
|
493
571
|
impl Debug for ObjectValue {
|
494
572
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
495
573
|
f.debug_struct("ObjectValue")
|
496
|
-
.field("inner", &self.
|
574
|
+
.field("inner", &self.to_value())
|
497
575
|
.finish()
|
498
576
|
}
|
499
577
|
}
|
@@ -501,7 +579,7 @@ impl Debug for ObjectValue {
|
|
501
579
|
impl Hash for ObjectValue {
|
502
580
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
503
581
|
let h = self
|
504
|
-
.
|
582
|
+
.to_value()
|
505
583
|
.funcall::<_, _, isize>("hash", ())
|
506
584
|
.expect("should be hashable");
|
507
585
|
state.write_isize(h)
|
@@ -512,7 +590,7 @@ impl Eq for ObjectValue {}
|
|
512
590
|
|
513
591
|
impl PartialEq for ObjectValue {
|
514
592
|
fn eq(&self, other: &Self) -> bool {
|
515
|
-
self.
|
593
|
+
self.to_value().eql(other.to_value()).unwrap_or(false)
|
516
594
|
}
|
517
595
|
}
|
518
596
|
|
@@ -533,7 +611,7 @@ impl TotalHash for ObjectValue {
|
|
533
611
|
|
534
612
|
impl Display for ObjectValue {
|
535
613
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
536
|
-
write!(f, "{}", self.
|
614
|
+
write!(f, "{}", self.to_value())
|
537
615
|
}
|
538
616
|
}
|
539
617
|
|
@@ -561,16 +639,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
561
639
|
}
|
562
640
|
}
|
563
641
|
|
564
|
-
// TODO remove
|
565
642
|
impl ObjectValue {
|
566
|
-
pub fn
|
567
|
-
|
643
|
+
pub fn to_value(&self) -> Value {
|
644
|
+
self.clone().into_value()
|
568
645
|
}
|
569
646
|
}
|
570
647
|
|
571
648
|
impl IntoValue for ObjectValue {
|
572
|
-
fn into_value_with(self,
|
573
|
-
self.
|
649
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
650
|
+
ruby.get_inner(self.inner)
|
574
651
|
}
|
575
652
|
}
|
576
653
|
|
@@ -587,10 +664,10 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
587
664
|
let parsed = match String::try_convert(ob)?.as_str() {
|
588
665
|
"backward" => AsofStrategy::Backward,
|
589
666
|
"forward" => AsofStrategy::Forward,
|
667
|
+
"nearest" => AsofStrategy::Nearest,
|
590
668
|
v => {
|
591
669
|
return Err(RbValueError::new_err(format!(
|
592
|
-
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
593
|
-
v
|
670
|
+
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
594
671
|
)))
|
595
672
|
}
|
596
673
|
};
|
@@ -830,14 +907,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
830
907
|
}
|
831
908
|
}
|
832
909
|
|
833
|
-
impl TryConvert for Wrap<
|
910
|
+
impl TryConvert for Wrap<QuantileMethod> {
|
834
911
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
835
912
|
let parsed = match String::try_convert(ob)?.as_str() {
|
836
|
-
"lower" =>
|
837
|
-
"higher" =>
|
838
|
-
"nearest" =>
|
839
|
-
"linear" =>
|
840
|
-
"midpoint" =>
|
913
|
+
"lower" => QuantileMethod::Lower,
|
914
|
+
"higher" => QuantileMethod::Higher,
|
915
|
+
"nearest" => QuantileMethod::Nearest,
|
916
|
+
"linear" => QuantileMethod::Linear,
|
917
|
+
"midpoint" => QuantileMethod::Midpoint,
|
841
918
|
v => {
|
842
919
|
return Err(RbValueError::new_err(format!(
|
843
920
|
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
@@ -1001,6 +1078,11 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
1001
1078
|
}
|
1002
1079
|
}
|
1003
1080
|
|
1081
|
+
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
1082
|
+
let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
|
1083
|
+
Ok(out)
|
1084
|
+
}
|
1085
|
+
|
1004
1086
|
pub fn parse_fill_null_strategy(
|
1005
1087
|
strategy: &str,
|
1006
1088
|
limit: FillNullLimit,
|
@@ -1071,7 +1153,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1071
1153
|
let v = usize::try_convert(ob)?;
|
1072
1154
|
NonZeroUsize::new(v)
|
1073
1155
|
.map(Wrap)
|
1074
|
-
.ok_or(RbValueError::new_err("must be non-zero"
|
1156
|
+
.ok_or(RbValueError::new_err("must be non-zero"))
|
1075
1157
|
}
|
1076
1158
|
}
|
1077
1159
|
|
@@ -1085,3 +1167,28 @@ where
|
|
1085
1167
|
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1086
1168
|
.collect()
|
1087
1169
|
}
|
1170
|
+
|
1171
|
+
#[derive(Debug, Copy, Clone)]
|
1172
|
+
pub struct RbCompatLevel(pub CompatLevel);
|
1173
|
+
|
1174
|
+
impl TryConvert for RbCompatLevel {
|
1175
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1176
|
+
Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
|
1177
|
+
if let Ok(compat_level) = CompatLevel::with_level(level) {
|
1178
|
+
compat_level
|
1179
|
+
} else {
|
1180
|
+
return Err(RbValueError::new_err("invalid compat level".to_string()));
|
1181
|
+
}
|
1182
|
+
} else if let Ok(future) = bool::try_convert(ob) {
|
1183
|
+
if future {
|
1184
|
+
CompatLevel::newest()
|
1185
|
+
} else {
|
1186
|
+
CompatLevel::oldest()
|
1187
|
+
}
|
1188
|
+
} else {
|
1189
|
+
return Err(RbTypeError::new_err(
|
1190
|
+
"'compat_level' argument accepts int or bool".to_string(),
|
1191
|
+
));
|
1192
|
+
}))
|
1193
|
+
}
|
1194
|
+
}
|
@@ -54,9 +54,6 @@ fn finish_from_rows(
|
|
54
54
|
schema_overrides: Option<Schema>,
|
55
55
|
infer_schema_length: Option<usize>,
|
56
56
|
) -> RbResult<RbDataFrame> {
|
57
|
-
// Object builder must be registered
|
58
|
-
crate::on_startup::register_object_builder();
|
59
|
-
|
60
57
|
let mut schema = if let Some(mut schema) = schema {
|
61
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
+
use crate::RbResult;
|
5
7
|
|
6
8
|
impl RbDataFrame {
|
7
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|
@@ -18,7 +20,7 @@ impl RbDataFrame {
|
|
18
20
|
.map(|s| match s.dtype() {
|
19
21
|
DataType::Object(_, _) => {
|
20
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
-
obj.unwrap().
|
23
|
+
obj.unwrap().to_value()
|
22
24
|
}
|
23
25
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
26
|
}),
|
@@ -37,7 +39,7 @@ impl RbDataFrame {
|
|
37
39
|
.map(|s| match s.dtype() {
|
38
40
|
DataType::Object(_, _) => {
|
39
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
-
obj.unwrap().
|
42
|
+
obj.unwrap().to_value()
|
41
43
|
}
|
42
44
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
45
|
}),
|
@@ -45,4 +47,9 @@ impl RbDataFrame {
|
|
45
47
|
}))
|
46
48
|
.as_value()
|
47
49
|
}
|
50
|
+
|
51
|
+
pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
|
52
|
+
self.df.borrow_mut().align_chunks();
|
53
|
+
dataframe_to_stream(&self.df.borrow())
|
54
|
+
}
|
48
55
|
}
|
@@ -1,6 +1,5 @@
|
|
1
1
|
use either::Either;
|
2
2
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
|
-
use polars::frame::NullStrategy;
|
4
3
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
5
4
|
use polars::prelude::*;
|
6
5
|
|
@@ -10,14 +9,14 @@ use crate::map::dataframe::{
|
|
10
9
|
apply_lambda_with_utf8_out_type,
|
11
10
|
};
|
12
11
|
use crate::prelude::strings_to_pl_smallstr;
|
13
|
-
use crate::series::{
|
12
|
+
use crate::series::{to_rbseries, to_series};
|
14
13
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
14
|
|
16
15
|
impl RbDataFrame {
|
17
16
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
18
17
|
let mut cols = Vec::new();
|
19
18
|
for i in columns.into_iter() {
|
20
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
19
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
|
21
20
|
}
|
22
21
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
23
22
|
Ok(RbDataFrame::new(df))
|
@@ -128,7 +127,7 @@ impl RbDataFrame {
|
|
128
127
|
|
129
128
|
pub fn get_columns(&self) -> RArray {
|
130
129
|
let cols = self.df.borrow().get_columns().to_vec();
|
131
|
-
|
130
|
+
to_rbseries(cols)
|
132
131
|
}
|
133
132
|
|
134
133
|
pub fn columns(&self) -> Vec<String> {
|
@@ -158,7 +157,7 @@ impl RbDataFrame {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
pub fn n_chunks(&self) -> usize {
|
161
|
-
self.df.borrow().
|
160
|
+
self.df.borrow().first_col_n_chunks()
|
162
161
|
}
|
163
162
|
|
164
163
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -174,7 +173,8 @@ impl RbDataFrame {
|
|
174
173
|
}
|
175
174
|
|
176
175
|
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
177
|
-
let columns =
|
176
|
+
let columns = to_series(columns)?;
|
177
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
178
178
|
let df = self
|
179
179
|
.df
|
180
180
|
.borrow()
|
@@ -184,7 +184,8 @@ impl RbDataFrame {
|
|
184
184
|
}
|
185
185
|
|
186
186
|
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
187
|
-
let columns =
|
187
|
+
let columns = to_series(columns)?;
|
188
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
188
189
|
self.df
|
189
190
|
.borrow_mut()
|
190
191
|
.hstack_mut(&columns)
|
@@ -223,6 +224,7 @@ impl RbDataFrame {
|
|
223
224
|
.borrow_mut()
|
224
225
|
.drop_in_place(&name)
|
225
226
|
.map_err(RbPolarsErr::from)?;
|
227
|
+
let s = s.take_materialized_series();
|
226
228
|
Ok(RbSeries::new(s))
|
227
229
|
}
|
228
230
|
|
@@ -230,7 +232,7 @@ impl RbDataFrame {
|
|
230
232
|
self.df
|
231
233
|
.borrow()
|
232
234
|
.select_at_idx(idx)
|
233
|
-
.map(|s| RbSeries::new(s.clone()))
|
235
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
234
236
|
}
|
235
237
|
|
236
238
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
@@ -238,11 +240,13 @@ impl RbDataFrame {
|
|
238
240
|
}
|
239
241
|
|
240
242
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
241
|
-
self
|
243
|
+
let series = self
|
244
|
+
.df
|
242
245
|
.borrow()
|
243
246
|
.column(&name)
|
244
|
-
.map(|s| RbSeries::new(s.clone()))
|
245
|
-
.map_err(RbPolarsErr::from)
|
247
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
248
|
+
.map_err(RbPolarsErr::from)?;
|
249
|
+
Ok(series)
|
246
250
|
}
|
247
251
|
|
248
252
|
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
@@ -405,52 +409,6 @@ impl RbDataFrame {
|
|
405
409
|
self.df.borrow().clone().lazy().into()
|
406
410
|
}
|
407
411
|
|
408
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
409
|
-
let s = self
|
410
|
-
.df
|
411
|
-
.borrow()
|
412
|
-
.max_horizontal()
|
413
|
-
.map_err(RbPolarsErr::from)?;
|
414
|
-
Ok(s.map(|s| s.into()))
|
415
|
-
}
|
416
|
-
|
417
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
418
|
-
let s = self
|
419
|
-
.df
|
420
|
-
.borrow()
|
421
|
-
.min_horizontal()
|
422
|
-
.map_err(RbPolarsErr::from)?;
|
423
|
-
Ok(s.map(|s| s.into()))
|
424
|
-
}
|
425
|
-
|
426
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
427
|
-
let null_strategy = if ignore_nulls {
|
428
|
-
NullStrategy::Ignore
|
429
|
-
} else {
|
430
|
-
NullStrategy::Propagate
|
431
|
-
};
|
432
|
-
let s = self
|
433
|
-
.df
|
434
|
-
.borrow()
|
435
|
-
.sum_horizontal(null_strategy)
|
436
|
-
.map_err(RbPolarsErr::from)?;
|
437
|
-
Ok(s.map(|s| s.into()))
|
438
|
-
}
|
439
|
-
|
440
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
441
|
-
let null_strategy = if ignore_nulls {
|
442
|
-
NullStrategy::Ignore
|
443
|
-
} else {
|
444
|
-
NullStrategy::Propagate
|
445
|
-
};
|
446
|
-
let s = self
|
447
|
-
.df
|
448
|
-
.borrow()
|
449
|
-
.mean_horizontal(null_strategy)
|
450
|
-
.map_err(RbPolarsErr::from)?;
|
451
|
-
Ok(s.map(|s| s.into()))
|
452
|
-
}
|
453
|
-
|
454
412
|
pub fn to_dummies(
|
455
413
|
&self,
|
456
414
|
columns: Option<Vec<String>>,
|