polars-df 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e6fb732e5dafe2fde285322554bd9159483cbbdf17d6e2bba9cba9a83563b47
|
4
|
+
data.tar.gz: 1b4249d0c0100f136973c601b8404cb6d92abc632d5ed0476bd93bc5360a11dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9414d6f60c489e2b3b72885288822083ba8c04bac4053f4e34c1d53ee805d164f17fe4b8b3a8f4ff562550bcc657f374bea6e250b52985367f601ea50e3037f
|
7
|
+
data.tar.gz: 9e3a7cfe105f03ec20e9c26aa38c1475074ccc1ea057a170a97b7068b41943d561d50af49bb1d1f74b7705809dc1375900f542ab93683ba627dea080274f6d91
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1160,7 +1160,7 @@ dependencies = [
|
|
1160
1160
|
|
1161
1161
|
[[package]]
|
1162
1162
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1163
|
+
version = "0.1.3"
|
1164
1164
|
dependencies = [
|
1165
1165
|
"magnus",
|
1166
1166
|
"polars 0.25.1",
|
@@ -1217,6 +1217,7 @@ dependencies = [
|
|
1217
1217
|
"rayon",
|
1218
1218
|
"regex",
|
1219
1219
|
"serde",
|
1220
|
+
"serde_json",
|
1220
1221
|
"smartstring",
|
1221
1222
|
"thiserror",
|
1222
1223
|
]
|
data/README.md
CHANGED
@@ -27,7 +27,7 @@ Polars.read_csv("iris.csv")
|
|
27
27
|
.collect
|
28
28
|
```
|
29
29
|
|
30
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
30
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
|
31
31
|
|
32
32
|
## Examples
|
33
33
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.3"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -17,10 +17,12 @@ version = "0.25.1"
|
|
17
17
|
features = [
|
18
18
|
"abs",
|
19
19
|
"arange",
|
20
|
+
"arg_where",
|
20
21
|
"concat_str",
|
21
22
|
"csv-file",
|
22
23
|
"cum_agg",
|
23
24
|
"cumulative_eval",
|
25
|
+
"dataframe_arithmetic",
|
24
26
|
"date_offset",
|
25
27
|
"diagonal_concat",
|
26
28
|
"diff",
|
@@ -38,16 +40,20 @@ features = [
|
|
38
40
|
"lazy",
|
39
41
|
"lazy_regex",
|
40
42
|
"list_eval",
|
43
|
+
"list_to_struct",
|
41
44
|
"log",
|
42
45
|
"meta",
|
43
46
|
"mode",
|
44
47
|
"moment",
|
48
|
+
"object",
|
45
49
|
"parquet",
|
46
50
|
"partition_by",
|
47
51
|
"pct_change",
|
48
52
|
"product",
|
53
|
+
"propagate_nans",
|
49
54
|
"random",
|
50
55
|
"rank",
|
56
|
+
"reinterpret",
|
51
57
|
"repeat_by",
|
52
58
|
"rolling_window",
|
53
59
|
"round_series",
|
@@ -1,4 +1,5 @@
|
|
1
|
-
use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
2
|
+
use polars::chunked_array::object::PolarsObjectSafe;
|
2
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
4
|
use polars::datatypes::AnyValue;
|
4
5
|
use polars::frame::DataFrame;
|
@@ -79,6 +80,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
79
80
|
AnyValue::Null => *QNIL,
|
80
81
|
AnyValue::Boolean(v) => Value::from(v),
|
81
82
|
AnyValue::Utf8(v) => Value::from(v),
|
83
|
+
AnyValue::Date(v) => class::time()
|
84
|
+
.funcall::<_, _, Value>("at", (v * 86400,))
|
85
|
+
.unwrap()
|
86
|
+
.funcall::<_, _, Value>("utc", ())
|
87
|
+
.unwrap()
|
88
|
+
.funcall::<_, _, Value>("to_date", ())
|
89
|
+
.unwrap(),
|
82
90
|
_ => todo!(),
|
83
91
|
}
|
84
92
|
}
|
@@ -102,10 +110,19 @@ impl TryConvert for Wrap<DataType> {
|
|
102
110
|
"i32" => DataType::Int32,
|
103
111
|
"i64" => DataType::Int64,
|
104
112
|
"str" => DataType::Utf8,
|
113
|
+
"bin" => DataType::Binary,
|
105
114
|
"bool" => DataType::Boolean,
|
115
|
+
"cat" => DataType::Categorical(None),
|
116
|
+
"date" => DataType::Date,
|
117
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
106
118
|
"f32" => DataType::Float32,
|
119
|
+
"time" => DataType::Time,
|
120
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
107
121
|
"f64" => DataType::Float64,
|
108
|
-
"
|
122
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
123
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
124
|
+
"null" => DataType::Null,
|
125
|
+
"unk" => DataType::Unknown,
|
109
126
|
_ => {
|
110
127
|
return Err(RbValueError::new_err(format!(
|
111
128
|
"{} is not a supported DataType.",
|
@@ -407,3 +424,19 @@ pub fn parse_parquet_compression(
|
|
407
424
|
};
|
408
425
|
Ok(parsed)
|
409
426
|
}
|
427
|
+
|
428
|
+
pub struct ObjectValue {
|
429
|
+
pub inner: Value,
|
430
|
+
}
|
431
|
+
|
432
|
+
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
433
|
+
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
434
|
+
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
435
|
+
}
|
436
|
+
}
|
437
|
+
|
438
|
+
impl ObjectValue {
|
439
|
+
pub fn to_object(&self) -> Value {
|
440
|
+
self.inner
|
441
|
+
}
|
442
|
+
}
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
use magnus::{r_hash::ForEach,
|
1
|
+
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::
|
7
|
-
use std::io::{BufReader, BufWriter, Cursor};
|
6
|
+
use std::io::{BufWriter, Cursor};
|
8
7
|
use std::ops::Deref;
|
9
|
-
use std::path::PathBuf;
|
10
8
|
|
11
9
|
use crate::conversion::*;
|
12
10
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
13
|
-
use crate::series::to_rbseries_collection;
|
11
|
+
use crate::series::{to_rbseries_collection, to_series_collection};
|
14
12
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
13
|
|
16
14
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -137,13 +135,27 @@ impl RbDataFrame {
|
|
137
135
|
Ok(df.into())
|
138
136
|
}
|
139
137
|
|
140
|
-
pub fn read_parquet(
|
141
|
-
|
142
|
-
|
143
|
-
|
138
|
+
pub fn read_parquet(
|
139
|
+
rb_f: Value,
|
140
|
+
columns: Option<Vec<String>>,
|
141
|
+
projection: Option<Vec<usize>>,
|
142
|
+
n_rows: Option<usize>,
|
143
|
+
parallel: Wrap<ParallelStrategy>,
|
144
|
+
row_count: Option<(String, IdxSize)>,
|
145
|
+
low_memory: bool,
|
146
|
+
) -> RbResult<Self> {
|
147
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
148
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
149
|
+
let df = ParquetReader::new(mmap_bytes_r)
|
150
|
+
.with_projection(projection)
|
151
|
+
.with_columns(columns)
|
152
|
+
.read_parallel(parallel.0)
|
153
|
+
.with_n_rows(n_rows)
|
154
|
+
.with_row_count(row_count)
|
155
|
+
.set_low_memory(low_memory)
|
144
156
|
.finish()
|
145
|
-
.map_err(RbPolarsErr::from)
|
146
|
-
|
157
|
+
.map_err(RbPolarsErr::from)?;
|
158
|
+
Ok(RbDataFrame::new(df))
|
147
159
|
}
|
148
160
|
|
149
161
|
pub fn read_ipc(
|
@@ -313,6 +325,55 @@ impl RbDataFrame {
|
|
313
325
|
Ok(())
|
314
326
|
}
|
315
327
|
|
328
|
+
pub fn row_tuple(&self, idx: i64) -> Value {
|
329
|
+
let idx = if idx < 0 {
|
330
|
+
(self.df.borrow().height() as i64 + idx) as usize
|
331
|
+
} else {
|
332
|
+
idx as usize
|
333
|
+
};
|
334
|
+
RArray::from_vec(
|
335
|
+
self.df
|
336
|
+
.borrow()
|
337
|
+
.get_columns()
|
338
|
+
.iter()
|
339
|
+
.map(|s| match s.dtype() {
|
340
|
+
DataType::Object(_) => {
|
341
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
342
|
+
obj.unwrap().to_object()
|
343
|
+
}
|
344
|
+
_ => Wrap(s.get(idx)).into(),
|
345
|
+
})
|
346
|
+
.collect(),
|
347
|
+
)
|
348
|
+
.into()
|
349
|
+
}
|
350
|
+
|
351
|
+
pub fn row_tuples(&self) -> Value {
|
352
|
+
let df = &self.df;
|
353
|
+
RArray::from_vec(
|
354
|
+
(0..df.borrow().height())
|
355
|
+
.map(|idx| {
|
356
|
+
RArray::from_vec(
|
357
|
+
self.df
|
358
|
+
.borrow()
|
359
|
+
.get_columns()
|
360
|
+
.iter()
|
361
|
+
.map(|s| match s.dtype() {
|
362
|
+
DataType::Object(_) => {
|
363
|
+
let obj: Option<&ObjectValue> =
|
364
|
+
s.get_object(idx).map(|any| any.into());
|
365
|
+
obj.unwrap().to_object()
|
366
|
+
}
|
367
|
+
_ => Wrap(s.get(idx)).into(),
|
368
|
+
})
|
369
|
+
.collect(),
|
370
|
+
)
|
371
|
+
})
|
372
|
+
.collect(),
|
373
|
+
)
|
374
|
+
.into()
|
375
|
+
}
|
376
|
+
|
316
377
|
pub fn write_parquet(
|
317
378
|
&self,
|
318
379
|
rb_f: Value,
|
@@ -338,6 +399,86 @@ impl RbDataFrame {
|
|
338
399
|
Ok(())
|
339
400
|
}
|
340
401
|
|
402
|
+
pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
|
403
|
+
let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
404
|
+
Ok(df.into())
|
405
|
+
}
|
406
|
+
|
407
|
+
pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
|
408
|
+
let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
409
|
+
Ok(df.into())
|
410
|
+
}
|
411
|
+
|
412
|
+
pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
|
413
|
+
let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
414
|
+
Ok(df.into())
|
415
|
+
}
|
416
|
+
|
417
|
+
pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
|
418
|
+
let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
419
|
+
Ok(df.into())
|
420
|
+
}
|
421
|
+
|
422
|
+
pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
|
423
|
+
let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
424
|
+
Ok(df.into())
|
425
|
+
}
|
426
|
+
|
427
|
+
pub fn add_df(&self, s: &Self) -> RbResult<Self> {
|
428
|
+
let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
429
|
+
Ok(df.into())
|
430
|
+
}
|
431
|
+
|
432
|
+
pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
|
433
|
+
let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
434
|
+
Ok(df.into())
|
435
|
+
}
|
436
|
+
|
437
|
+
pub fn div_df(&self, s: &Self) -> RbResult<Self> {
|
438
|
+
let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
439
|
+
Ok(df.into())
|
440
|
+
}
|
441
|
+
|
442
|
+
pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
|
443
|
+
let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
444
|
+
Ok(df.into())
|
445
|
+
}
|
446
|
+
|
447
|
+
pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
|
448
|
+
let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
449
|
+
Ok(df.into())
|
450
|
+
}
|
451
|
+
|
452
|
+
pub fn sample_n(
|
453
|
+
&self,
|
454
|
+
n: usize,
|
455
|
+
with_replacement: bool,
|
456
|
+
shuffle: bool,
|
457
|
+
seed: Option<u64>,
|
458
|
+
) -> RbResult<Self> {
|
459
|
+
let df = self
|
460
|
+
.df
|
461
|
+
.borrow()
|
462
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
463
|
+
.map_err(RbPolarsErr::from)?;
|
464
|
+
Ok(df.into())
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn sample_frac(
|
468
|
+
&self,
|
469
|
+
frac: f64,
|
470
|
+
with_replacement: bool,
|
471
|
+
shuffle: bool,
|
472
|
+
seed: Option<u64>,
|
473
|
+
) -> RbResult<Self> {
|
474
|
+
let df = self
|
475
|
+
.df
|
476
|
+
.borrow()
|
477
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
478
|
+
.map_err(RbPolarsErr::from)?;
|
479
|
+
Ok(df.into())
|
480
|
+
}
|
481
|
+
|
341
482
|
pub fn rechunk(&self) -> Self {
|
342
483
|
self.df.borrow().agg_chunks().into()
|
343
484
|
}
|
@@ -393,6 +534,73 @@ impl RbDataFrame {
|
|
393
534
|
self.df.borrow().width()
|
394
535
|
}
|
395
536
|
|
537
|
+
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
538
|
+
let columns = to_series_collection(columns)?;
|
539
|
+
self.df
|
540
|
+
.borrow_mut()
|
541
|
+
.hstack_mut(&columns)
|
542
|
+
.map_err(RbPolarsErr::from)?;
|
543
|
+
Ok(())
|
544
|
+
}
|
545
|
+
|
546
|
+
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
547
|
+
let columns = to_series_collection(columns)?;
|
548
|
+
let df = self
|
549
|
+
.df
|
550
|
+
.borrow()
|
551
|
+
.hstack(&columns)
|
552
|
+
.map_err(RbPolarsErr::from)?;
|
553
|
+
Ok(df.into())
|
554
|
+
}
|
555
|
+
|
556
|
+
pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
|
557
|
+
self.df
|
558
|
+
.borrow_mut()
|
559
|
+
.extend(&df.df.borrow())
|
560
|
+
.map_err(RbPolarsErr::from)?;
|
561
|
+
Ok(())
|
562
|
+
}
|
563
|
+
|
564
|
+
pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
|
565
|
+
self.df
|
566
|
+
.borrow_mut()
|
567
|
+
.vstack_mut(&df.df.borrow())
|
568
|
+
.map_err(RbPolarsErr::from)?;
|
569
|
+
Ok(())
|
570
|
+
}
|
571
|
+
|
572
|
+
pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
|
573
|
+
let df = self
|
574
|
+
.df
|
575
|
+
.borrow()
|
576
|
+
.vstack(&df.df.borrow())
|
577
|
+
.map_err(RbPolarsErr::from)?;
|
578
|
+
Ok(df.into())
|
579
|
+
}
|
580
|
+
|
581
|
+
pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
|
582
|
+
let s = self
|
583
|
+
.df
|
584
|
+
.borrow_mut()
|
585
|
+
.drop_in_place(&name)
|
586
|
+
.map_err(RbPolarsErr::from)?;
|
587
|
+
Ok(RbSeries::new(s))
|
588
|
+
}
|
589
|
+
|
590
|
+
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
|
591
|
+
let df = self
|
592
|
+
.df
|
593
|
+
.borrow()
|
594
|
+
.drop_nulls(subset.as_ref().map(|s| s.as_ref()))
|
595
|
+
.map_err(RbPolarsErr::from)?;
|
596
|
+
Ok(df.into())
|
597
|
+
}
|
598
|
+
|
599
|
+
pub fn drop(&self, name: String) -> RbResult<Self> {
|
600
|
+
let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
|
601
|
+
Ok(RbDataFrame::new(df))
|
602
|
+
}
|
603
|
+
|
396
604
|
pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
|
397
605
|
self.df
|
398
606
|
.borrow()
|
@@ -400,6 +608,10 @@ impl RbDataFrame {
|
|
400
608
|
.map(|s| RbSeries::new(s.clone()))
|
401
609
|
}
|
402
610
|
|
611
|
+
pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
|
612
|
+
self.df.borrow().find_idx_by_name(&name)
|
613
|
+
}
|
614
|
+
|
403
615
|
// TODO remove clone
|
404
616
|
pub fn column(&self, name: String) -> RbResult<RbSeries> {
|
405
617
|
self.df
|
@@ -702,6 +914,11 @@ impl RbDataFrame {
|
|
702
914
|
Ok(out.into())
|
703
915
|
}
|
704
916
|
|
917
|
+
pub fn to_struct(&self, name: String) -> RbSeries {
|
918
|
+
let s = self.df.borrow().clone().into_struct(&name);
|
919
|
+
s.into_series().into()
|
920
|
+
}
|
921
|
+
|
705
922
|
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
706
923
|
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
707
924
|
Ok(df.into())
|
@@ -520,9 +520,9 @@ impl RbLazyFrame {
|
|
520
520
|
Ok(self.get_schema()?.iter_names().cloned().collect())
|
521
521
|
}
|
522
522
|
|
523
|
-
pub fn dtypes(&self) -> RbResult<Vec<
|
523
|
+
pub fn dtypes(&self) -> RbResult<Vec<Value>> {
|
524
524
|
let schema = self.get_schema()?;
|
525
|
-
let iter = schema.iter_dtypes().map(|dt| dt.
|
525
|
+
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
|
526
526
|
Ok(iter.collect())
|
527
527
|
}
|
528
528
|
|
@@ -533,7 +533,7 @@ impl RbLazyFrame {
|
|
533
533
|
schema.iter_fields().for_each(|fld| {
|
534
534
|
// TODO remove unwrap
|
535
535
|
schema_dict
|
536
|
-
.aset(fld.name().clone(), fld.data_type().
|
536
|
+
.aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
|
537
537
|
.unwrap();
|
538
538
|
});
|
539
539
|
Ok(schema_dict)
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, RString, Value};
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
2
|
use polars::chunked_array::ops::SortOptions;
|
3
3
|
use polars::lazy::dsl;
|
4
4
|
use polars::lazy::dsl::Operator;
|
@@ -8,7 +8,8 @@ use polars::series::ops::NullBehavior;
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::lazy::apply::*;
|
10
10
|
use crate::lazy::utils::rb_exprs_to_exprs;
|
11
|
-
use crate::
|
11
|
+
use crate::utils::reinterpret;
|
12
|
+
use crate::{RbResult, RbSeries};
|
12
13
|
|
13
14
|
#[magnus::wrap(class = "Polars::RbExpr")]
|
14
15
|
#[derive(Clone)]
|
@@ -886,6 +887,19 @@ impl RbExpr {
|
|
886
887
|
self.inner.clone().dt().round(&every, &offset).into()
|
887
888
|
}
|
888
889
|
|
890
|
+
pub fn reinterpret(&self, signed: bool) -> Self {
|
891
|
+
let function = move |s: Series| reinterpret(&s, signed);
|
892
|
+
let dt = if signed {
|
893
|
+
DataType::Int64
|
894
|
+
} else {
|
895
|
+
DataType::UInt64
|
896
|
+
};
|
897
|
+
self.clone()
|
898
|
+
.inner
|
899
|
+
.map(function, GetOutput::from_type(dt))
|
900
|
+
.into()
|
901
|
+
}
|
902
|
+
|
889
903
|
pub fn mode(&self) -> Self {
|
890
904
|
self.inner.clone().mode().into()
|
891
905
|
}
|
@@ -1305,6 +1319,21 @@ impl RbExpr {
|
|
1305
1319
|
self.inner.clone().ewm_var(options).into()
|
1306
1320
|
}
|
1307
1321
|
|
1322
|
+
pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> Self {
|
1323
|
+
let value = Value::from(value);
|
1324
|
+
self.inner
|
1325
|
+
.clone()
|
1326
|
+
.apply(
|
1327
|
+
move |s| {
|
1328
|
+
let value = value.try_convert::<Wrap<AnyValue>>().unwrap().0;
|
1329
|
+
s.extend_constant(value, n)
|
1330
|
+
},
|
1331
|
+
GetOutput::same_type(),
|
1332
|
+
)
|
1333
|
+
.with_fmt("extend")
|
1334
|
+
.into()
|
1335
|
+
}
|
1336
|
+
|
1308
1337
|
pub fn any(&self) -> Self {
|
1309
1338
|
self.inner.clone().any().into()
|
1310
1339
|
}
|
@@ -1332,6 +1361,10 @@ impl RbExpr {
|
|
1332
1361
|
pub fn exp(&self) -> Self {
|
1333
1362
|
self.inner.clone().exp().into()
|
1334
1363
|
}
|
1364
|
+
|
1365
|
+
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1366
|
+
self.inner.clone().entropy(base, normalize).into()
|
1367
|
+
}
|
1335
1368
|
}
|
1336
1369
|
|
1337
1370
|
pub fn col(name: String) -> RbExpr {
|
@@ -1365,8 +1398,24 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1365
1398
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1366
1399
|
if value.is_nil() {
|
1367
1400
|
Ok(dsl::lit(Null {}).into())
|
1401
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
1402
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
1368
1403
|
} else if let Some(v) = RString::from_value(value) {
|
1369
1404
|
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
1405
|
+
} else if value.is_kind_of(class::integer()) {
|
1406
|
+
match value.try_convert::<i64>() {
|
1407
|
+
Ok(val) => {
|
1408
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
1409
|
+
Ok(dsl::lit(val as i32).into())
|
1410
|
+
} else {
|
1411
|
+
Ok(dsl::lit(val).into())
|
1412
|
+
}
|
1413
|
+
}
|
1414
|
+
_ => {
|
1415
|
+
let val = value.try_convert::<u64>()?;
|
1416
|
+
Ok(dsl::lit(val).into())
|
1417
|
+
}
|
1418
|
+
}
|
1370
1419
|
} else {
|
1371
1420
|
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
1372
1421
|
}
|
@@ -1376,6 +1425,14 @@ pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
|
|
1376
1425
|
polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
1377
1426
|
}
|
1378
1427
|
|
1428
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
1429
|
+
if value.is_nil() {
|
1430
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
1431
|
+
} else {
|
1432
|
+
todo!();
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
|
1379
1436
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1380
1437
|
#[derive(Clone)]
|
1381
1438
|
pub struct RbWhen {
|