polars-df 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e6fb732e5dafe2fde285322554bd9159483cbbdf17d6e2bba9cba9a83563b47
|
4
|
+
data.tar.gz: 1b4249d0c0100f136973c601b8404cb6d92abc632d5ed0476bd93bc5360a11dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9414d6f60c489e2b3b72885288822083ba8c04bac4053f4e34c1d53ee805d164f17fe4b8b3a8f4ff562550bcc657f374bea6e250b52985367f601ea50e3037f
|
7
|
+
data.tar.gz: 9e3a7cfe105f03ec20e9c26aa38c1475074ccc1ea057a170a97b7068b41943d561d50af49bb1d1f74b7705809dc1375900f542ab93683ba627dea080274f6d91
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -1160,7 +1160,7 @@ dependencies = [
|
|
1160
1160
|
|
1161
1161
|
[[package]]
|
1162
1162
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1163
|
+
version = "0.1.3"
|
1164
1164
|
dependencies = [
|
1165
1165
|
"magnus",
|
1166
1166
|
"polars 0.25.1",
|
@@ -1217,6 +1217,7 @@ dependencies = [
|
|
1217
1217
|
"rayon",
|
1218
1218
|
"regex",
|
1219
1219
|
"serde",
|
1220
|
+
"serde_json",
|
1220
1221
|
"smartstring",
|
1221
1222
|
"thiserror",
|
1222
1223
|
]
|
data/README.md
CHANGED
@@ -27,7 +27,7 @@ Polars.read_csv("iris.csv")
|
|
27
27
|
.collect
|
28
28
|
```
|
29
29
|
|
30
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
30
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
|
31
31
|
|
32
32
|
## Examples
|
33
33
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.3"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -17,10 +17,12 @@ version = "0.25.1"
|
|
17
17
|
features = [
|
18
18
|
"abs",
|
19
19
|
"arange",
|
20
|
+
"arg_where",
|
20
21
|
"concat_str",
|
21
22
|
"csv-file",
|
22
23
|
"cum_agg",
|
23
24
|
"cumulative_eval",
|
25
|
+
"dataframe_arithmetic",
|
24
26
|
"date_offset",
|
25
27
|
"diagonal_concat",
|
26
28
|
"diff",
|
@@ -38,16 +40,20 @@ features = [
|
|
38
40
|
"lazy",
|
39
41
|
"lazy_regex",
|
40
42
|
"list_eval",
|
43
|
+
"list_to_struct",
|
41
44
|
"log",
|
42
45
|
"meta",
|
43
46
|
"mode",
|
44
47
|
"moment",
|
48
|
+
"object",
|
45
49
|
"parquet",
|
46
50
|
"partition_by",
|
47
51
|
"pct_change",
|
48
52
|
"product",
|
53
|
+
"propagate_nans",
|
49
54
|
"random",
|
50
55
|
"rank",
|
56
|
+
"reinterpret",
|
51
57
|
"repeat_by",
|
52
58
|
"rolling_window",
|
53
59
|
"round_series",
|
@@ -1,4 +1,5 @@
|
|
1
|
-
use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
2
|
+
use polars::chunked_array::object::PolarsObjectSafe;
|
2
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
4
|
use polars::datatypes::AnyValue;
|
4
5
|
use polars::frame::DataFrame;
|
@@ -79,6 +80,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
79
80
|
AnyValue::Null => *QNIL,
|
80
81
|
AnyValue::Boolean(v) => Value::from(v),
|
81
82
|
AnyValue::Utf8(v) => Value::from(v),
|
83
|
+
AnyValue::Date(v) => class::time()
|
84
|
+
.funcall::<_, _, Value>("at", (v * 86400,))
|
85
|
+
.unwrap()
|
86
|
+
.funcall::<_, _, Value>("utc", ())
|
87
|
+
.unwrap()
|
88
|
+
.funcall::<_, _, Value>("to_date", ())
|
89
|
+
.unwrap(),
|
82
90
|
_ => todo!(),
|
83
91
|
}
|
84
92
|
}
|
@@ -102,10 +110,19 @@ impl TryConvert for Wrap<DataType> {
|
|
102
110
|
"i32" => DataType::Int32,
|
103
111
|
"i64" => DataType::Int64,
|
104
112
|
"str" => DataType::Utf8,
|
113
|
+
"bin" => DataType::Binary,
|
105
114
|
"bool" => DataType::Boolean,
|
115
|
+
"cat" => DataType::Categorical(None),
|
116
|
+
"date" => DataType::Date,
|
117
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
106
118
|
"f32" => DataType::Float32,
|
119
|
+
"time" => DataType::Time,
|
120
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
107
121
|
"f64" => DataType::Float64,
|
108
|
-
"
|
122
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
123
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
124
|
+
"null" => DataType::Null,
|
125
|
+
"unk" => DataType::Unknown,
|
109
126
|
_ => {
|
110
127
|
return Err(RbValueError::new_err(format!(
|
111
128
|
"{} is not a supported DataType.",
|
@@ -407,3 +424,19 @@ pub fn parse_parquet_compression(
|
|
407
424
|
};
|
408
425
|
Ok(parsed)
|
409
426
|
}
|
427
|
+
|
428
|
+
pub struct ObjectValue {
|
429
|
+
pub inner: Value,
|
430
|
+
}
|
431
|
+
|
432
|
+
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
433
|
+
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
434
|
+
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
435
|
+
}
|
436
|
+
}
|
437
|
+
|
438
|
+
impl ObjectValue {
|
439
|
+
pub fn to_object(&self) -> Value {
|
440
|
+
self.inner
|
441
|
+
}
|
442
|
+
}
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
use magnus::{r_hash::ForEach,
|
1
|
+
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::
|
7
|
-
use std::io::{BufReader, BufWriter, Cursor};
|
6
|
+
use std::io::{BufWriter, Cursor};
|
8
7
|
use std::ops::Deref;
|
9
|
-
use std::path::PathBuf;
|
10
8
|
|
11
9
|
use crate::conversion::*;
|
12
10
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
13
|
-
use crate::series::to_rbseries_collection;
|
11
|
+
use crate::series::{to_rbseries_collection, to_series_collection};
|
14
12
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
13
|
|
16
14
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -137,13 +135,27 @@ impl RbDataFrame {
|
|
137
135
|
Ok(df.into())
|
138
136
|
}
|
139
137
|
|
140
|
-
pub fn read_parquet(
|
141
|
-
|
142
|
-
|
143
|
-
|
138
|
+
pub fn read_parquet(
|
139
|
+
rb_f: Value,
|
140
|
+
columns: Option<Vec<String>>,
|
141
|
+
projection: Option<Vec<usize>>,
|
142
|
+
n_rows: Option<usize>,
|
143
|
+
parallel: Wrap<ParallelStrategy>,
|
144
|
+
row_count: Option<(String, IdxSize)>,
|
145
|
+
low_memory: bool,
|
146
|
+
) -> RbResult<Self> {
|
147
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
148
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
149
|
+
let df = ParquetReader::new(mmap_bytes_r)
|
150
|
+
.with_projection(projection)
|
151
|
+
.with_columns(columns)
|
152
|
+
.read_parallel(parallel.0)
|
153
|
+
.with_n_rows(n_rows)
|
154
|
+
.with_row_count(row_count)
|
155
|
+
.set_low_memory(low_memory)
|
144
156
|
.finish()
|
145
|
-
.map_err(RbPolarsErr::from)
|
146
|
-
|
157
|
+
.map_err(RbPolarsErr::from)?;
|
158
|
+
Ok(RbDataFrame::new(df))
|
147
159
|
}
|
148
160
|
|
149
161
|
pub fn read_ipc(
|
@@ -313,6 +325,55 @@ impl RbDataFrame {
|
|
313
325
|
Ok(())
|
314
326
|
}
|
315
327
|
|
328
|
+
pub fn row_tuple(&self, idx: i64) -> Value {
|
329
|
+
let idx = if idx < 0 {
|
330
|
+
(self.df.borrow().height() as i64 + idx) as usize
|
331
|
+
} else {
|
332
|
+
idx as usize
|
333
|
+
};
|
334
|
+
RArray::from_vec(
|
335
|
+
self.df
|
336
|
+
.borrow()
|
337
|
+
.get_columns()
|
338
|
+
.iter()
|
339
|
+
.map(|s| match s.dtype() {
|
340
|
+
DataType::Object(_) => {
|
341
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
342
|
+
obj.unwrap().to_object()
|
343
|
+
}
|
344
|
+
_ => Wrap(s.get(idx)).into(),
|
345
|
+
})
|
346
|
+
.collect(),
|
347
|
+
)
|
348
|
+
.into()
|
349
|
+
}
|
350
|
+
|
351
|
+
pub fn row_tuples(&self) -> Value {
|
352
|
+
let df = &self.df;
|
353
|
+
RArray::from_vec(
|
354
|
+
(0..df.borrow().height())
|
355
|
+
.map(|idx| {
|
356
|
+
RArray::from_vec(
|
357
|
+
self.df
|
358
|
+
.borrow()
|
359
|
+
.get_columns()
|
360
|
+
.iter()
|
361
|
+
.map(|s| match s.dtype() {
|
362
|
+
DataType::Object(_) => {
|
363
|
+
let obj: Option<&ObjectValue> =
|
364
|
+
s.get_object(idx).map(|any| any.into());
|
365
|
+
obj.unwrap().to_object()
|
366
|
+
}
|
367
|
+
_ => Wrap(s.get(idx)).into(),
|
368
|
+
})
|
369
|
+
.collect(),
|
370
|
+
)
|
371
|
+
})
|
372
|
+
.collect(),
|
373
|
+
)
|
374
|
+
.into()
|
375
|
+
}
|
376
|
+
|
316
377
|
pub fn write_parquet(
|
317
378
|
&self,
|
318
379
|
rb_f: Value,
|
@@ -338,6 +399,86 @@ impl RbDataFrame {
|
|
338
399
|
Ok(())
|
339
400
|
}
|
340
401
|
|
402
|
+
pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
|
403
|
+
let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
404
|
+
Ok(df.into())
|
405
|
+
}
|
406
|
+
|
407
|
+
pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
|
408
|
+
let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
409
|
+
Ok(df.into())
|
410
|
+
}
|
411
|
+
|
412
|
+
pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
|
413
|
+
let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
414
|
+
Ok(df.into())
|
415
|
+
}
|
416
|
+
|
417
|
+
pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
|
418
|
+
let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
419
|
+
Ok(df.into())
|
420
|
+
}
|
421
|
+
|
422
|
+
pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
|
423
|
+
let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
424
|
+
Ok(df.into())
|
425
|
+
}
|
426
|
+
|
427
|
+
pub fn add_df(&self, s: &Self) -> RbResult<Self> {
|
428
|
+
let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
429
|
+
Ok(df.into())
|
430
|
+
}
|
431
|
+
|
432
|
+
pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
|
433
|
+
let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
434
|
+
Ok(df.into())
|
435
|
+
}
|
436
|
+
|
437
|
+
pub fn div_df(&self, s: &Self) -> RbResult<Self> {
|
438
|
+
let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
439
|
+
Ok(df.into())
|
440
|
+
}
|
441
|
+
|
442
|
+
pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
|
443
|
+
let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
444
|
+
Ok(df.into())
|
445
|
+
}
|
446
|
+
|
447
|
+
pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
|
448
|
+
let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
449
|
+
Ok(df.into())
|
450
|
+
}
|
451
|
+
|
452
|
+
pub fn sample_n(
|
453
|
+
&self,
|
454
|
+
n: usize,
|
455
|
+
with_replacement: bool,
|
456
|
+
shuffle: bool,
|
457
|
+
seed: Option<u64>,
|
458
|
+
) -> RbResult<Self> {
|
459
|
+
let df = self
|
460
|
+
.df
|
461
|
+
.borrow()
|
462
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
463
|
+
.map_err(RbPolarsErr::from)?;
|
464
|
+
Ok(df.into())
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn sample_frac(
|
468
|
+
&self,
|
469
|
+
frac: f64,
|
470
|
+
with_replacement: bool,
|
471
|
+
shuffle: bool,
|
472
|
+
seed: Option<u64>,
|
473
|
+
) -> RbResult<Self> {
|
474
|
+
let df = self
|
475
|
+
.df
|
476
|
+
.borrow()
|
477
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
478
|
+
.map_err(RbPolarsErr::from)?;
|
479
|
+
Ok(df.into())
|
480
|
+
}
|
481
|
+
|
341
482
|
pub fn rechunk(&self) -> Self {
|
342
483
|
self.df.borrow().agg_chunks().into()
|
343
484
|
}
|
@@ -393,6 +534,73 @@ impl RbDataFrame {
|
|
393
534
|
self.df.borrow().width()
|
394
535
|
}
|
395
536
|
|
537
|
+
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
538
|
+
let columns = to_series_collection(columns)?;
|
539
|
+
self.df
|
540
|
+
.borrow_mut()
|
541
|
+
.hstack_mut(&columns)
|
542
|
+
.map_err(RbPolarsErr::from)?;
|
543
|
+
Ok(())
|
544
|
+
}
|
545
|
+
|
546
|
+
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
547
|
+
let columns = to_series_collection(columns)?;
|
548
|
+
let df = self
|
549
|
+
.df
|
550
|
+
.borrow()
|
551
|
+
.hstack(&columns)
|
552
|
+
.map_err(RbPolarsErr::from)?;
|
553
|
+
Ok(df.into())
|
554
|
+
}
|
555
|
+
|
556
|
+
pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
|
557
|
+
self.df
|
558
|
+
.borrow_mut()
|
559
|
+
.extend(&df.df.borrow())
|
560
|
+
.map_err(RbPolarsErr::from)?;
|
561
|
+
Ok(())
|
562
|
+
}
|
563
|
+
|
564
|
+
pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
|
565
|
+
self.df
|
566
|
+
.borrow_mut()
|
567
|
+
.vstack_mut(&df.df.borrow())
|
568
|
+
.map_err(RbPolarsErr::from)?;
|
569
|
+
Ok(())
|
570
|
+
}
|
571
|
+
|
572
|
+
pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
|
573
|
+
let df = self
|
574
|
+
.df
|
575
|
+
.borrow()
|
576
|
+
.vstack(&df.df.borrow())
|
577
|
+
.map_err(RbPolarsErr::from)?;
|
578
|
+
Ok(df.into())
|
579
|
+
}
|
580
|
+
|
581
|
+
pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
|
582
|
+
let s = self
|
583
|
+
.df
|
584
|
+
.borrow_mut()
|
585
|
+
.drop_in_place(&name)
|
586
|
+
.map_err(RbPolarsErr::from)?;
|
587
|
+
Ok(RbSeries::new(s))
|
588
|
+
}
|
589
|
+
|
590
|
+
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
|
591
|
+
let df = self
|
592
|
+
.df
|
593
|
+
.borrow()
|
594
|
+
.drop_nulls(subset.as_ref().map(|s| s.as_ref()))
|
595
|
+
.map_err(RbPolarsErr::from)?;
|
596
|
+
Ok(df.into())
|
597
|
+
}
|
598
|
+
|
599
|
+
pub fn drop(&self, name: String) -> RbResult<Self> {
|
600
|
+
let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
|
601
|
+
Ok(RbDataFrame::new(df))
|
602
|
+
}
|
603
|
+
|
396
604
|
pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
|
397
605
|
self.df
|
398
606
|
.borrow()
|
@@ -400,6 +608,10 @@ impl RbDataFrame {
|
|
400
608
|
.map(|s| RbSeries::new(s.clone()))
|
401
609
|
}
|
402
610
|
|
611
|
+
pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
|
612
|
+
self.df.borrow().find_idx_by_name(&name)
|
613
|
+
}
|
614
|
+
|
403
615
|
// TODO remove clone
|
404
616
|
pub fn column(&self, name: String) -> RbResult<RbSeries> {
|
405
617
|
self.df
|
@@ -702,6 +914,11 @@ impl RbDataFrame {
|
|
702
914
|
Ok(out.into())
|
703
915
|
}
|
704
916
|
|
917
|
+
pub fn to_struct(&self, name: String) -> RbSeries {
|
918
|
+
let s = self.df.borrow().clone().into_struct(&name);
|
919
|
+
s.into_series().into()
|
920
|
+
}
|
921
|
+
|
705
922
|
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
706
923
|
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
707
924
|
Ok(df.into())
|
@@ -520,9 +520,9 @@ impl RbLazyFrame {
|
|
520
520
|
Ok(self.get_schema()?.iter_names().cloned().collect())
|
521
521
|
}
|
522
522
|
|
523
|
-
pub fn dtypes(&self) -> RbResult<Vec<
|
523
|
+
pub fn dtypes(&self) -> RbResult<Vec<Value>> {
|
524
524
|
let schema = self.get_schema()?;
|
525
|
-
let iter = schema.iter_dtypes().map(|dt| dt.
|
525
|
+
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into());
|
526
526
|
Ok(iter.collect())
|
527
527
|
}
|
528
528
|
|
@@ -533,7 +533,7 @@ impl RbLazyFrame {
|
|
533
533
|
schema.iter_fields().for_each(|fld| {
|
534
534
|
// TODO remove unwrap
|
535
535
|
schema_dict
|
536
|
-
.aset(fld.name().clone(), fld.data_type().
|
536
|
+
.aset::<String, Value>(fld.name().clone(), Wrap(fld.data_type().clone()).into())
|
537
537
|
.unwrap();
|
538
538
|
});
|
539
539
|
Ok(schema_dict)
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, RString, Value};
|
1
|
+
use magnus::{class, RArray, RString, Value};
|
2
2
|
use polars::chunked_array::ops::SortOptions;
|
3
3
|
use polars::lazy::dsl;
|
4
4
|
use polars::lazy::dsl::Operator;
|
@@ -8,7 +8,8 @@ use polars::series::ops::NullBehavior;
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::lazy::apply::*;
|
10
10
|
use crate::lazy::utils::rb_exprs_to_exprs;
|
11
|
-
use crate::
|
11
|
+
use crate::utils::reinterpret;
|
12
|
+
use crate::{RbResult, RbSeries};
|
12
13
|
|
13
14
|
#[magnus::wrap(class = "Polars::RbExpr")]
|
14
15
|
#[derive(Clone)]
|
@@ -886,6 +887,19 @@ impl RbExpr {
|
|
886
887
|
self.inner.clone().dt().round(&every, &offset).into()
|
887
888
|
}
|
888
889
|
|
890
|
+
pub fn reinterpret(&self, signed: bool) -> Self {
|
891
|
+
let function = move |s: Series| reinterpret(&s, signed);
|
892
|
+
let dt = if signed {
|
893
|
+
DataType::Int64
|
894
|
+
} else {
|
895
|
+
DataType::UInt64
|
896
|
+
};
|
897
|
+
self.clone()
|
898
|
+
.inner
|
899
|
+
.map(function, GetOutput::from_type(dt))
|
900
|
+
.into()
|
901
|
+
}
|
902
|
+
|
889
903
|
pub fn mode(&self) -> Self {
|
890
904
|
self.inner.clone().mode().into()
|
891
905
|
}
|
@@ -1305,6 +1319,21 @@ impl RbExpr {
|
|
1305
1319
|
self.inner.clone().ewm_var(options).into()
|
1306
1320
|
}
|
1307
1321
|
|
1322
|
+
pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> Self {
|
1323
|
+
let value = Value::from(value);
|
1324
|
+
self.inner
|
1325
|
+
.clone()
|
1326
|
+
.apply(
|
1327
|
+
move |s| {
|
1328
|
+
let value = value.try_convert::<Wrap<AnyValue>>().unwrap().0;
|
1329
|
+
s.extend_constant(value, n)
|
1330
|
+
},
|
1331
|
+
GetOutput::same_type(),
|
1332
|
+
)
|
1333
|
+
.with_fmt("extend")
|
1334
|
+
.into()
|
1335
|
+
}
|
1336
|
+
|
1308
1337
|
pub fn any(&self) -> Self {
|
1309
1338
|
self.inner.clone().any().into()
|
1310
1339
|
}
|
@@ -1332,6 +1361,10 @@ impl RbExpr {
|
|
1332
1361
|
pub fn exp(&self) -> Self {
|
1333
1362
|
self.inner.clone().exp().into()
|
1334
1363
|
}
|
1364
|
+
|
1365
|
+
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1366
|
+
self.inner.clone().entropy(base, normalize).into()
|
1367
|
+
}
|
1335
1368
|
}
|
1336
1369
|
|
1337
1370
|
pub fn col(name: String) -> RbExpr {
|
@@ -1365,8 +1398,24 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1365
1398
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1366
1399
|
if value.is_nil() {
|
1367
1400
|
Ok(dsl::lit(Null {}).into())
|
1401
|
+
} else if let Ok(series) = value.try_convert::<&RbSeries>() {
|
1402
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
1368
1403
|
} else if let Some(v) = RString::from_value(value) {
|
1369
1404
|
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
1405
|
+
} else if value.is_kind_of(class::integer()) {
|
1406
|
+
match value.try_convert::<i64>() {
|
1407
|
+
Ok(val) => {
|
1408
|
+
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
1409
|
+
Ok(dsl::lit(val as i32).into())
|
1410
|
+
} else {
|
1411
|
+
Ok(dsl::lit(val).into())
|
1412
|
+
}
|
1413
|
+
}
|
1414
|
+
_ => {
|
1415
|
+
let val = value.try_convert::<u64>()?;
|
1416
|
+
Ok(dsl::lit(val).into())
|
1417
|
+
}
|
1418
|
+
}
|
1370
1419
|
} else {
|
1371
1420
|
Ok(dsl::lit(value.try_convert::<f64>()?).into())
|
1372
1421
|
}
|
@@ -1376,6 +1425,14 @@ pub fn arange(low: &RbExpr, high: &RbExpr, step: usize) -> RbExpr {
|
|
1376
1425
|
polars::lazy::dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
1377
1426
|
}
|
1378
1427
|
|
1428
|
+
pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
1429
|
+
if value.is_nil() {
|
1430
|
+
Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
|
1431
|
+
} else {
|
1432
|
+
todo!();
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
|
1379
1436
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1380
1437
|
#[derive(Clone)]
|
1381
1438
|
pub struct RbWhen {
|