polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
use magnus::{r_hash::ForEach,
|
1
|
+
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::
|
7
|
-
use std::io::{BufReader, BufWriter, Cursor};
|
6
|
+
use std::io::{BufWriter, Cursor};
|
8
7
|
use std::ops::Deref;
|
9
|
-
use std::path::PathBuf;
|
10
8
|
|
11
9
|
use crate::conversion::*;
|
12
10
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
13
|
-
use crate::series::to_rbseries_collection;
|
11
|
+
use crate::series::{to_rbseries_collection, to_series_collection};
|
14
12
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
13
|
|
16
14
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -137,13 +135,27 @@ impl RbDataFrame {
|
|
137
135
|
Ok(df.into())
|
138
136
|
}
|
139
137
|
|
140
|
-
pub fn read_parquet(
|
141
|
-
|
142
|
-
|
143
|
-
|
138
|
+
pub fn read_parquet(
|
139
|
+
rb_f: Value,
|
140
|
+
columns: Option<Vec<String>>,
|
141
|
+
projection: Option<Vec<usize>>,
|
142
|
+
n_rows: Option<usize>,
|
143
|
+
parallel: Wrap<ParallelStrategy>,
|
144
|
+
row_count: Option<(String, IdxSize)>,
|
145
|
+
low_memory: bool,
|
146
|
+
) -> RbResult<Self> {
|
147
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
148
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
149
|
+
let df = ParquetReader::new(mmap_bytes_r)
|
150
|
+
.with_projection(projection)
|
151
|
+
.with_columns(columns)
|
152
|
+
.read_parallel(parallel.0)
|
153
|
+
.with_n_rows(n_rows)
|
154
|
+
.with_row_count(row_count)
|
155
|
+
.set_low_memory(low_memory)
|
144
156
|
.finish()
|
145
|
-
.map_err(RbPolarsErr::from)
|
146
|
-
|
157
|
+
.map_err(RbPolarsErr::from)?;
|
158
|
+
Ok(RbDataFrame::new(df))
|
147
159
|
}
|
148
160
|
|
149
161
|
pub fn read_ipc(
|
@@ -313,6 +325,55 @@ impl RbDataFrame {
|
|
313
325
|
Ok(())
|
314
326
|
}
|
315
327
|
|
328
|
+
pub fn row_tuple(&self, idx: i64) -> Value {
|
329
|
+
let idx = if idx < 0 {
|
330
|
+
(self.df.borrow().height() as i64 + idx) as usize
|
331
|
+
} else {
|
332
|
+
idx as usize
|
333
|
+
};
|
334
|
+
RArray::from_vec(
|
335
|
+
self.df
|
336
|
+
.borrow()
|
337
|
+
.get_columns()
|
338
|
+
.iter()
|
339
|
+
.map(|s| match s.dtype() {
|
340
|
+
DataType::Object(_) => {
|
341
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
342
|
+
obj.unwrap().to_object()
|
343
|
+
}
|
344
|
+
_ => Wrap(s.get(idx)).into(),
|
345
|
+
})
|
346
|
+
.collect(),
|
347
|
+
)
|
348
|
+
.into()
|
349
|
+
}
|
350
|
+
|
351
|
+
pub fn row_tuples(&self) -> Value {
|
352
|
+
let df = &self.df;
|
353
|
+
RArray::from_vec(
|
354
|
+
(0..df.borrow().height())
|
355
|
+
.map(|idx| {
|
356
|
+
RArray::from_vec(
|
357
|
+
self.df
|
358
|
+
.borrow()
|
359
|
+
.get_columns()
|
360
|
+
.iter()
|
361
|
+
.map(|s| match s.dtype() {
|
362
|
+
DataType::Object(_) => {
|
363
|
+
let obj: Option<&ObjectValue> =
|
364
|
+
s.get_object(idx).map(|any| any.into());
|
365
|
+
obj.unwrap().to_object()
|
366
|
+
}
|
367
|
+
_ => Wrap(s.get(idx)).into(),
|
368
|
+
})
|
369
|
+
.collect(),
|
370
|
+
)
|
371
|
+
})
|
372
|
+
.collect(),
|
373
|
+
)
|
374
|
+
.into()
|
375
|
+
}
|
376
|
+
|
316
377
|
pub fn write_parquet(
|
317
378
|
&self,
|
318
379
|
rb_f: Value,
|
@@ -338,6 +399,86 @@ impl RbDataFrame {
|
|
338
399
|
Ok(())
|
339
400
|
}
|
340
401
|
|
402
|
+
pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
|
403
|
+
let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
404
|
+
Ok(df.into())
|
405
|
+
}
|
406
|
+
|
407
|
+
pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
|
408
|
+
let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
409
|
+
Ok(df.into())
|
410
|
+
}
|
411
|
+
|
412
|
+
pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
|
413
|
+
let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
414
|
+
Ok(df.into())
|
415
|
+
}
|
416
|
+
|
417
|
+
pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
|
418
|
+
let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
419
|
+
Ok(df.into())
|
420
|
+
}
|
421
|
+
|
422
|
+
pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
|
423
|
+
let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
424
|
+
Ok(df.into())
|
425
|
+
}
|
426
|
+
|
427
|
+
pub fn add_df(&self, s: &Self) -> RbResult<Self> {
|
428
|
+
let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
429
|
+
Ok(df.into())
|
430
|
+
}
|
431
|
+
|
432
|
+
pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
|
433
|
+
let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
434
|
+
Ok(df.into())
|
435
|
+
}
|
436
|
+
|
437
|
+
pub fn div_df(&self, s: &Self) -> RbResult<Self> {
|
438
|
+
let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
439
|
+
Ok(df.into())
|
440
|
+
}
|
441
|
+
|
442
|
+
pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
|
443
|
+
let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
444
|
+
Ok(df.into())
|
445
|
+
}
|
446
|
+
|
447
|
+
pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
|
448
|
+
let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
449
|
+
Ok(df.into())
|
450
|
+
}
|
451
|
+
|
452
|
+
pub fn sample_n(
|
453
|
+
&self,
|
454
|
+
n: usize,
|
455
|
+
with_replacement: bool,
|
456
|
+
shuffle: bool,
|
457
|
+
seed: Option<u64>,
|
458
|
+
) -> RbResult<Self> {
|
459
|
+
let df = self
|
460
|
+
.df
|
461
|
+
.borrow()
|
462
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
463
|
+
.map_err(RbPolarsErr::from)?;
|
464
|
+
Ok(df.into())
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn sample_frac(
|
468
|
+
&self,
|
469
|
+
frac: f64,
|
470
|
+
with_replacement: bool,
|
471
|
+
shuffle: bool,
|
472
|
+
seed: Option<u64>,
|
473
|
+
) -> RbResult<Self> {
|
474
|
+
let df = self
|
475
|
+
.df
|
476
|
+
.borrow()
|
477
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
478
|
+
.map_err(RbPolarsErr::from)?;
|
479
|
+
Ok(df.into())
|
480
|
+
}
|
481
|
+
|
341
482
|
pub fn rechunk(&self) -> Self {
|
342
483
|
self.df.borrow().agg_chunks().into()
|
343
484
|
}
|
@@ -393,6 +534,73 @@ impl RbDataFrame {
|
|
393
534
|
self.df.borrow().width()
|
394
535
|
}
|
395
536
|
|
537
|
+
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
538
|
+
let columns = to_series_collection(columns)?;
|
539
|
+
self.df
|
540
|
+
.borrow_mut()
|
541
|
+
.hstack_mut(&columns)
|
542
|
+
.map_err(RbPolarsErr::from)?;
|
543
|
+
Ok(())
|
544
|
+
}
|
545
|
+
|
546
|
+
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
547
|
+
let columns = to_series_collection(columns)?;
|
548
|
+
let df = self
|
549
|
+
.df
|
550
|
+
.borrow()
|
551
|
+
.hstack(&columns)
|
552
|
+
.map_err(RbPolarsErr::from)?;
|
553
|
+
Ok(df.into())
|
554
|
+
}
|
555
|
+
|
556
|
+
pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
|
557
|
+
self.df
|
558
|
+
.borrow_mut()
|
559
|
+
.extend(&df.df.borrow())
|
560
|
+
.map_err(RbPolarsErr::from)?;
|
561
|
+
Ok(())
|
562
|
+
}
|
563
|
+
|
564
|
+
pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
|
565
|
+
self.df
|
566
|
+
.borrow_mut()
|
567
|
+
.vstack_mut(&df.df.borrow())
|
568
|
+
.map_err(RbPolarsErr::from)?;
|
569
|
+
Ok(())
|
570
|
+
}
|
571
|
+
|
572
|
+
pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
|
573
|
+
let df = self
|
574
|
+
.df
|
575
|
+
.borrow()
|
576
|
+
.vstack(&df.df.borrow())
|
577
|
+
.map_err(RbPolarsErr::from)?;
|
578
|
+
Ok(df.into())
|
579
|
+
}
|
580
|
+
|
581
|
+
pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
|
582
|
+
let s = self
|
583
|
+
.df
|
584
|
+
.borrow_mut()
|
585
|
+
.drop_in_place(&name)
|
586
|
+
.map_err(RbPolarsErr::from)?;
|
587
|
+
Ok(RbSeries::new(s))
|
588
|
+
}
|
589
|
+
|
590
|
+
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
|
591
|
+
let df = self
|
592
|
+
.df
|
593
|
+
.borrow()
|
594
|
+
.drop_nulls(subset.as_ref().map(|s| s.as_ref()))
|
595
|
+
.map_err(RbPolarsErr::from)?;
|
596
|
+
Ok(df.into())
|
597
|
+
}
|
598
|
+
|
599
|
+
pub fn drop(&self, name: String) -> RbResult<Self> {
|
600
|
+
let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
|
601
|
+
Ok(RbDataFrame::new(df))
|
602
|
+
}
|
603
|
+
|
396
604
|
pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
|
397
605
|
self.df
|
398
606
|
.borrow()
|
@@ -400,6 +608,10 @@ impl RbDataFrame {
|
|
400
608
|
.map(|s| RbSeries::new(s.clone()))
|
401
609
|
}
|
402
610
|
|
611
|
+
pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
|
612
|
+
self.df.borrow().find_idx_by_name(&name)
|
613
|
+
}
|
614
|
+
|
403
615
|
// TODO remove clone
|
404
616
|
pub fn column(&self, name: String) -> RbResult<RbSeries> {
|
405
617
|
self.df
|
@@ -702,6 +914,11 @@ impl RbDataFrame {
|
|
702
914
|
Ok(out.into())
|
703
915
|
}
|
704
916
|
|
917
|
+
pub fn to_struct(&self, name: String) -> RbSeries {
|
918
|
+
let s = self.df.borrow().clone().into_struct(&name);
|
919
|
+
s.into_series().into()
|
920
|
+
}
|
921
|
+
|
705
922
|
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
706
923
|
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
707
924
|
Ok(df.into())
|
data/ext/polars/src/error.rs
CHANGED
@@ -211,7 +211,7 @@ impl RbLazyFrame {
|
|
211
211
|
projection_pushdown: bool,
|
212
212
|
simplify_expr: bool,
|
213
213
|
slice_pushdown: bool,
|
214
|
-
|
214
|
+
cse: bool,
|
215
215
|
allow_streaming: bool,
|
216
216
|
) -> RbLazyFrame {
|
217
217
|
let ldf = self.ldf.clone();
|
@@ -220,7 +220,7 @@ impl RbLazyFrame {
|
|
220
220
|
.with_predicate_pushdown(predicate_pushdown)
|
221
221
|
.with_simplify_expr(simplify_expr)
|
222
222
|
.with_slice_pushdown(slice_pushdown)
|
223
|
-
|
223
|
+
.with_common_subplan_elimination(cse)
|
224
224
|
.with_streaming(allow_streaming)
|
225
225
|
.with_projection_pushdown(projection_pushdown);
|
226
226
|
ldf.into()
|
@@ -520,9 +520,9 @@ impl RbLazyFrame {
|
|
520
520
|
Ok(self.get_schema()?.iter_names().cloned().collect())
|
521
521
|
}
|
522
522
|
|
523
|
-
pub fn dtypes(&self) -> RbResult<Vec<
|
523
|
+
pub fn dtypes(&self) -> RbResult<Vec<Value>> {
|
524
524
|
let schema = self.get_schema()?;
|
525
|
-
let iter = schema.iter_dtypes().map(|dt| dt.
|
525
|
+
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
|
526
526
|
Ok(iter.collect())
|
527
527
|
}
|
528
528
|
|
@@ -533,7 +533,7 @@ impl RbLazyFrame {
|
|
533
533
|
schema.iter_fields().for_each(|fld| {
|
534
534
|
// TODO remove unwrap
|
535
535
|
schema_dict
|
536
|
-
.aset(fld.name().clone(), fld.data_type().
|
536
|
+
.aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
|
537
537
|
.unwrap();
|
538
538
|
});
|
539
539
|
Ok(schema_dict)
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, RString, Value};
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
2
|
use polars::chunked_array::ops::SortOptions;
|
3
3
|
use polars::lazy::dsl;
|
4
4
|
use polars::lazy::dsl::Operator;
|
@@ -8,7 +8,8 @@ use polars::series::ops::NullBehavior;
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::lazy::apply::*;
|
10
10
|
use crate::lazy::utils::rb_exprs_to_exprs;
|
11
|
-
use crate::
|
11
|
+
use crate::utils::reinterpret;
|
12
|
+
use crate::{RbResult, RbSeries};
|
12
13
|
|
13
14
|
#[magnus::wrap(class = "Polars::RbExpr")]
|
14
15
|
#[derive(Clone)]
|
@@ -658,6 +659,65 @@ impl RbExpr {
|
|
658
659
|
self.inner.clone().str().starts_with(sub).into()
|
659
660
|
}
|
660
661
|
|
662
|
+
pub fn str_hex_encode(&self) -> Self {
|
663
|
+
self.clone()
|
664
|
+
.inner
|
665
|
+
.map(
|
666
|
+
move |s| s.utf8().map(|s| s.hex_encode().into_series()),
|
667
|
+
GetOutput::same_type(),
|
668
|
+
)
|
669
|
+
.with_fmt("str.hex_encode")
|
670
|
+
.into()
|
671
|
+
}
|
672
|
+
|
673
|
+
pub fn str_hex_decode(&self, strict: Option<bool>) -> Self {
|
674
|
+
self.clone()
|
675
|
+
.inner
|
676
|
+
.map(
|
677
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| s.into_series()),
|
678
|
+
GetOutput::same_type(),
|
679
|
+
)
|
680
|
+
.with_fmt("str.hex_decode")
|
681
|
+
.into()
|
682
|
+
}
|
683
|
+
|
684
|
+
pub fn str_base64_encode(&self) -> Self {
|
685
|
+
self.clone()
|
686
|
+
.inner
|
687
|
+
.map(
|
688
|
+
move |s| s.utf8().map(|s| s.base64_encode().into_series()),
|
689
|
+
GetOutput::same_type(),
|
690
|
+
)
|
691
|
+
.with_fmt("str.base64_encode")
|
692
|
+
.into()
|
693
|
+
}
|
694
|
+
|
695
|
+
pub fn str_base64_decode(&self, strict: Option<bool>) -> Self {
|
696
|
+
self.clone()
|
697
|
+
.inner
|
698
|
+
.map(
|
699
|
+
move |s| s.utf8()?.base64_decode(strict).map(|s| s.into_series()),
|
700
|
+
GetOutput::same_type(),
|
701
|
+
)
|
702
|
+
.with_fmt("str.base64_decode")
|
703
|
+
.into()
|
704
|
+
}
|
705
|
+
|
706
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
707
|
+
let function = move |s: Series| {
|
708
|
+
let ca = s.utf8()?;
|
709
|
+
match ca.json_path_match(&pat) {
|
710
|
+
Ok(ca) => Ok(ca.into_series()),
|
711
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
712
|
+
}
|
713
|
+
};
|
714
|
+
self.clone()
|
715
|
+
.inner
|
716
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
717
|
+
.with_fmt("str.json_path_match")
|
718
|
+
.into()
|
719
|
+
}
|
720
|
+
|
661
721
|
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
662
722
|
self.inner.clone().str().extract(&pat, group_index).into()
|
663
723
|
}
|
@@ -886,6 +946,23 @@ impl RbExpr {
|
|
886
946
|
self.inner.clone().dt().round(&every, &offset).into()
|
887
947
|
}
|
888
948
|
|
949
|
+
pub fn dot(&self, other: &RbExpr) -> Self {
|
950
|
+
self.inner.clone().dot(other.inner.clone()).into()
|
951
|
+
}
|
952
|
+
|
953
|
+
pub fn reinterpret(&self, signed: bool) -> Self {
|
954
|
+
let function = move |s: Series| reinterpret(&s, signed);
|
955
|
+
let dt = if signed {
|
956
|
+
DataType::Int64
|
957
|
+
} else {
|
958
|
+
DataType::UInt64
|
959
|
+
};
|
960
|
+
self.clone()
|
961
|
+
.inner
|
962
|
+
.map(function, GetOutput::from_type(dt))
|
963
|
+
.into()
|
964
|
+
}
|
965
|
+
|
889
966
|
pub fn mode(&self) -> Self {
|
890
967
|
self.inner.clone().mode().into()
|
891
968
|
}
|
@@ -1194,6 +1271,28 @@ impl RbExpr {
|
|
1194
1271
|
.into()
|
1195
1272
|
}
|
1196
1273
|
|
1274
|
+
pub fn lst_to_struct(
|
1275
|
+
&self,
|
1276
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
1277
|
+
_name_gen: Option<Value>,
|
1278
|
+
) -> RbResult<Self> {
|
1279
|
+
// TODO fix
|
1280
|
+
let name_gen = None;
|
1281
|
+
// let name_gen = name_gen.map(|lambda| {
|
1282
|
+
// Arc::new(move |idx: usize| {
|
1283
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
1284
|
+
// out.try_convert::<String>().unwrap()
|
1285
|
+
// }) as NameGenerator
|
1286
|
+
// });
|
1287
|
+
|
1288
|
+
Ok(self
|
1289
|
+
.inner
|
1290
|
+
.clone()
|
1291
|
+
.arr()
|
1292
|
+
.to_struct(width_strat.0, name_gen)
|
1293
|
+
.into())
|
1294
|
+
}
|
1295
|
+
|
1197
1296
|
pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
|
1198
1297
|
let options = RankOptions {
|
1199
1298
|
method: method.0,
|
@@ -1305,6 +1404,21 @@ impl RbExpr {
|
|
1305
1404
|
self.inner.clone().ewm_var(options).into()
|
1306
1405
|
}
|
1307
1406
|
|
1407
|
+
pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> Self {
|
1408
|
+
let value = Value::from(value);
|
1409
|
+
self.inner
|
1410
|
+
.clone()
|
1411
|
+
.apply(
|
1412
|
+
move |s| {
|
1413
|
+
let value = value.try_convert::<Wrap<AnyValue>>().unwrap().0;
|
1414
|
+
s.extend_constant(value, n)
|
1415
|
+
},
|
1416
|
+
GetOutput::same_type(),
|
1417
|
+
)
|
1418
|
+
.with_fmt("extend")
|
1419
|
+
.into()
|
1420
|
+
}
|
1421
|
+
|
1308
1422
|
pub fn any(&self) -> Self {
|
1309
1423
|
self.inner.clone().any().into()
|
1310
1424
|
}
|
@@ -1332,6 +1446,10 @@ impl RbExpr {
|
|
1332
1446
|
pub fn exp(&self) -> Self {
|
1333
1447
|
self.inner.clone().exp().into()
|
1334
1448
|
}
|
1449
|
+
|
1450
|
+
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1451
|
+
self.inner.clone().entropy(base, normalize).into()
|
1452
|
+
}
|
1335
1453
|
}
|
1336
1454
|
|
1337
1455
|
pub fn col(name: String) -> RbExpr {
|
@@ -1365,8 +1483,24 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1365
1483
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1366
1484
|
if value.is_nil() {
|
1367
1485
|
Ok(dsl::lit(Null {}).into())
|
1486
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
1487
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
1368
1488
|
} else if let Some(v) = RString::from_value(value) {
|
1369
1489
|
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
1490
|
+
} else if value.is_kind_of(class::integer()) {
|
1491
|
+
match value.try_convert::<i64>() {
|
1492
|
+
Ok(val) => {
|
1493
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
1494
|
+
Ok(dsl::lit(val as i32).into())
|
1495
|
+
} else {
|
1496
|
+
Ok(dsl::lit(val).into())
|
1497
|
+
}
|
1498
|
+
}
|
1499
|
+
_ => {
|
1500
|
+
let val = value.try_convert::<u64>()?;
|
1501
|
+
Ok(dsl::lit(val).into())
|
1502
|
+
}
|
1503
|
+
}
|
1370
1504
|
} else {
|
1371
1505
|
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
1372
1506
|
}
|
@@ -1376,6 +1510,27 @@ pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
|
|
1376
1510
|
polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
1377
1511
|
}
|
1378
1512
|
|
1513
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
1514
|
+
if value.is_nil() {
|
1515
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
1516
|
+
} else {
|
1517
|
+
todo!();
|
1518
|
+
}
|
1519
|
+
}
|
1520
|
+
|
1521
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
1522
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
1523
|
+
}
|
1524
|
+
|
1525
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
1526
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
1527
|
+
.into()
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
1531
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1532
|
+
}
|
1533
|
+
|
1379
1534
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1380
1535
|
#[derive(Clone)]
|
1381
1536
|
pub struct RbWhen {
|