polars-df 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Cargo.lock +335 -310
- data/Cargo.toml +0 -1
- data/README.md +69 -2
- data/ext/polars/Cargo.toml +5 -3
- data/ext/polars/src/batched_csv.rs +29 -14
- data/ext/polars/src/conversion.rs +69 -16
- data/ext/polars/src/dataframe.rs +56 -39
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/dataframe.rs +48 -14
- data/ext/polars/src/lazy/dsl.rs +69 -4
- data/ext/polars/src/lib.rs +24 -5
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/series.rs +57 -33
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/data_frame.rb +89 -43
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/date_time_expr.rb +6 -6
- data/lib/polars/expr.rb +9 -2
- data/lib/polars/group_by.rb +11 -0
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +103 -7
- data/lib/polars/lazy_functions.rb +3 -2
- data/lib/polars/list_expr.rb +2 -2
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +50 -4
- data/lib/polars/string_expr.rb +1 -1
- data/lib/polars/utils.rb +10 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +7 -3
@@ -4,6 +4,7 @@ use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::path::PathBuf;
|
7
8
|
|
8
9
|
use crate::conversion::*;
|
9
10
|
use crate::file::get_file_like;
|
@@ -118,7 +119,7 @@ impl RbLazyFrame {
|
|
118
119
|
let skip_rows_after_header: usize = arguments[15].try_convert()?;
|
119
120
|
let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
|
120
121
|
let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
|
121
|
-
let
|
122
|
+
let try_parse_dates: bool = arguments[18].try_convert()?;
|
122
123
|
let eol_char: String = arguments[19].try_convert()?;
|
123
124
|
// end arguments
|
124
125
|
|
@@ -153,7 +154,7 @@ impl RbLazyFrame {
|
|
153
154
|
.with_skip_rows_after_header(skip_rows_after_header)
|
154
155
|
.with_encoding(encoding.0)
|
155
156
|
.with_row_count(row_count)
|
156
|
-
.
|
157
|
+
.with_try_parse_dates(try_parse_dates)
|
157
158
|
.with_null_values(null_values);
|
158
159
|
|
159
160
|
if let Some(_lambda) = with_schema_modify {
|
@@ -163,6 +164,7 @@ impl RbLazyFrame {
|
|
163
164
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
164
165
|
}
|
165
166
|
|
167
|
+
#[allow(clippy::too_many_arguments)]
|
166
168
|
pub fn new_from_parquet(
|
167
169
|
path: String,
|
168
170
|
n_rows: Option<usize>,
|
@@ -171,6 +173,7 @@ impl RbLazyFrame {
|
|
171
173
|
rechunk: bool,
|
172
174
|
row_count: Option<(String, IdxSize)>,
|
173
175
|
low_memory: bool,
|
176
|
+
use_statistics: bool,
|
174
177
|
) -> RbResult<Self> {
|
175
178
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
176
179
|
let args = ScanArgsParquet {
|
@@ -182,6 +185,7 @@ impl RbLazyFrame {
|
|
182
185
|
low_memory,
|
183
186
|
// TODO support cloud options
|
184
187
|
cloud_options: None,
|
188
|
+
use_statistics,
|
185
189
|
};
|
186
190
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
187
191
|
Ok(lf.into())
|
@@ -284,6 +288,32 @@ impl RbLazyFrame {
|
|
284
288
|
Ok(df.into())
|
285
289
|
}
|
286
290
|
|
291
|
+
#[allow(clippy::too_many_arguments)]
|
292
|
+
pub fn sink_parquet(
|
293
|
+
&self,
|
294
|
+
path: PathBuf,
|
295
|
+
compression: String,
|
296
|
+
compression_level: Option<i32>,
|
297
|
+
statistics: bool,
|
298
|
+
row_group_size: Option<usize>,
|
299
|
+
data_pagesize_limit: Option<usize>,
|
300
|
+
maintain_order: bool,
|
301
|
+
) -> RbResult<()> {
|
302
|
+
let compression = parse_parquet_compression(&compression, compression_level)?;
|
303
|
+
|
304
|
+
let options = ParquetWriteOptions {
|
305
|
+
compression,
|
306
|
+
statistics,
|
307
|
+
row_group_size,
|
308
|
+
data_pagesize_limit,
|
309
|
+
maintain_order,
|
310
|
+
};
|
311
|
+
|
312
|
+
let ldf = self.ldf.clone();
|
313
|
+
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
314
|
+
Ok(())
|
315
|
+
}
|
316
|
+
|
287
317
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
288
318
|
let ldf = self.ldf.clone();
|
289
319
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -328,7 +358,7 @@ impl RbLazyFrame {
|
|
328
358
|
let lazy_gb = ldf.groupby_rolling(
|
329
359
|
by,
|
330
360
|
RollingGroupOptions {
|
331
|
-
index_column,
|
361
|
+
index_column: index_column.into(),
|
332
362
|
period: Duration::parse(&period),
|
333
363
|
offset: Duration::parse(&offset),
|
334
364
|
closed_window,
|
@@ -359,7 +389,7 @@ impl RbLazyFrame {
|
|
359
389
|
let lazy_gb = ldf.groupby_dynamic(
|
360
390
|
by,
|
361
391
|
DynamicGroupOptions {
|
362
|
-
index_column,
|
392
|
+
index_column: index_column.into(),
|
363
393
|
every: Duration::parse(&every),
|
364
394
|
period: Duration::parse(&period),
|
365
395
|
offset: Duration::parse(&offset),
|
@@ -415,10 +445,10 @@ impl RbLazyFrame {
|
|
415
445
|
.force_parallel(force_parallel)
|
416
446
|
.how(JoinType::AsOf(AsOfOptions {
|
417
447
|
strategy: strategy.0,
|
418
|
-
left_by,
|
419
|
-
right_by,
|
448
|
+
left_by: left_by.map(strings_to_smartstrings),
|
449
|
+
right_by: right_by.map(strings_to_smartstrings),
|
420
450
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
421
|
-
tolerance_str,
|
451
|
+
tolerance_str: tolerance_str.map(|s| s.into()),
|
422
452
|
}))
|
423
453
|
.suffix(suffix)
|
424
454
|
.finish()
|
@@ -570,12 +600,14 @@ impl RbLazyFrame {
|
|
570
600
|
value_vars: Vec<String>,
|
571
601
|
value_name: Option<String>,
|
572
602
|
variable_name: Option<String>,
|
603
|
+
streamable: bool,
|
573
604
|
) -> Self {
|
574
605
|
let args = MeltArgs {
|
575
|
-
id_vars,
|
576
|
-
value_vars,
|
577
|
-
value_name,
|
578
|
-
variable_name,
|
606
|
+
id_vars: strings_to_smartstrings(id_vars),
|
607
|
+
value_vars: strings_to_smartstrings(value_vars),
|
608
|
+
value_name: value_name.map(|s| s.into()),
|
609
|
+
variable_name: variable_name.map(|s| s.into()),
|
610
|
+
streamable,
|
579
611
|
};
|
580
612
|
|
581
613
|
let ldf = self.ldf.clone();
|
@@ -596,8 +628,10 @@ impl RbLazyFrame {
|
|
596
628
|
self.ldf.clone().into()
|
597
629
|
}
|
598
630
|
|
599
|
-
pub fn columns(&self) -> RbResult<
|
600
|
-
|
631
|
+
pub fn columns(&self) -> RbResult<RArray> {
|
632
|
+
let schema = self.get_schema()?;
|
633
|
+
let iter = schema.iter_names().map(|s| s.as_str());
|
634
|
+
Ok(RArray::from_iter(iter))
|
601
635
|
}
|
602
636
|
|
603
637
|
pub fn dtypes(&self) -> RbResult<RArray> {
|
@@ -614,7 +648,7 @@ impl RbLazyFrame {
|
|
614
648
|
// TODO remove unwrap
|
615
649
|
schema_dict
|
616
650
|
.aset::<String, Value>(
|
617
|
-
fld.name().
|
651
|
+
fld.name().to_string(),
|
618
652
|
Wrap(fld.data_type().clone()).into_value(),
|
619
653
|
)
|
620
654
|
.unwrap();
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -10,7 +10,7 @@ use crate::conversion::*;
|
|
10
10
|
use crate::lazy::apply::*;
|
11
11
|
use crate::lazy::utils::rb_exprs_to_exprs;
|
12
12
|
use crate::utils::reinterpret;
|
13
|
-
use crate::{RbResult, RbSeries};
|
13
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
14
14
|
|
15
15
|
#[magnus::wrap(class = "Polars::RbExpr")]
|
16
16
|
#[derive(Clone)]
|
@@ -715,6 +715,18 @@ impl RbExpr {
|
|
715
715
|
.into()
|
716
716
|
}
|
717
717
|
|
718
|
+
pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
|
719
|
+
self.inner.clone().binary().contains_literal(lit).into()
|
720
|
+
}
|
721
|
+
|
722
|
+
pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
|
723
|
+
self.inner.clone().binary().ends_with(sub).into()
|
724
|
+
}
|
725
|
+
|
726
|
+
pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
|
727
|
+
self.inner.clone().binary().starts_with(sub).into()
|
728
|
+
}
|
729
|
+
|
718
730
|
pub fn str_hex_encode(&self) -> Self {
|
719
731
|
self.clone()
|
720
732
|
.inner
|
@@ -763,6 +775,58 @@ impl RbExpr {
|
|
763
775
|
.into()
|
764
776
|
}
|
765
777
|
|
778
|
+
pub fn binary_hex_encode(&self) -> Self {
|
779
|
+
self.clone()
|
780
|
+
.inner
|
781
|
+
.map(
|
782
|
+
move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
|
783
|
+
GetOutput::same_type(),
|
784
|
+
)
|
785
|
+
.with_fmt("binary.hex_encode")
|
786
|
+
.into()
|
787
|
+
}
|
788
|
+
|
789
|
+
pub fn binary_hex_decode(&self, strict: bool) -> Self {
|
790
|
+
self.clone()
|
791
|
+
.inner
|
792
|
+
.map(
|
793
|
+
move |s| {
|
794
|
+
s.binary()?
|
795
|
+
.hex_decode(strict)
|
796
|
+
.map(|s| Some(s.into_series()))
|
797
|
+
},
|
798
|
+
GetOutput::same_type(),
|
799
|
+
)
|
800
|
+
.with_fmt("binary.hex_decode")
|
801
|
+
.into()
|
802
|
+
}
|
803
|
+
|
804
|
+
pub fn binary_base64_encode(&self) -> Self {
|
805
|
+
self.clone()
|
806
|
+
.inner
|
807
|
+
.map(
|
808
|
+
move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
|
809
|
+
GetOutput::same_type(),
|
810
|
+
)
|
811
|
+
.with_fmt("binary.base64_encode")
|
812
|
+
.into()
|
813
|
+
}
|
814
|
+
|
815
|
+
pub fn binary_base64_decode(&self, strict: bool) -> Self {
|
816
|
+
self.clone()
|
817
|
+
.inner
|
818
|
+
.map(
|
819
|
+
move |s| {
|
820
|
+
s.binary()?
|
821
|
+
.base64_decode(strict)
|
822
|
+
.map(|s| Some(s.into_series()))
|
823
|
+
},
|
824
|
+
GetOutput::same_type(),
|
825
|
+
)
|
826
|
+
.with_fmt("binary.base64_decode")
|
827
|
+
.into()
|
828
|
+
}
|
829
|
+
|
766
830
|
pub fn str_json_path_match(&self, pat: String) -> Self {
|
767
831
|
let function = move |s: Series| {
|
768
832
|
let ca = s.utf8()?;
|
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
|
1654
1718
|
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1655
1719
|
}
|
1656
1720
|
|
1657
|
-
pub fn
|
1721
|
+
pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1658
1722
|
let by = rb_exprs_to_exprs(by)?;
|
1659
|
-
Ok(polars::lazy::dsl::
|
1723
|
+
Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
|
1660
1724
|
}
|
1661
1725
|
|
1662
1726
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
|
1706
1770
|
|
1707
1771
|
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
1708
1772
|
let s = rb_exprs_to_exprs(s)?;
|
1709
|
-
|
1773
|
+
let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
|
1774
|
+
Ok(expr.into())
|
1710
1775
|
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -6,6 +6,7 @@ mod error;
|
|
6
6
|
mod file;
|
7
7
|
mod lazy;
|
8
8
|
mod list_construction;
|
9
|
+
mod numo;
|
9
10
|
mod object;
|
10
11
|
mod prelude;
|
11
12
|
pub(crate) mod rb_modules;
|
@@ -72,7 +73,7 @@ fn init() -> RbResult<()> {
|
|
72
73
|
let class = module.define_class("RbDataFrame", Default::default())?;
|
73
74
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
74
75
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
75
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet,
|
76
|
+
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
76
77
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
77
78
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
78
79
|
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
|
|
87
88
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
88
89
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
89
90
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
91
|
+
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
90
92
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
91
93
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
92
94
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
@@ -149,7 +151,6 @@ fn init() -> RbResult<()> {
|
|
149
151
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
150
152
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
151
153
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
152
|
-
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
153
154
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
154
155
|
class.define_method("max", method!(RbDataFrame::max, 0))?;
|
155
156
|
class.define_method("min", method!(RbDataFrame::min, 0))?;
|
@@ -302,10 +303,23 @@ fn init() -> RbResult<()> {
|
|
302
303
|
class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
|
303
304
|
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
304
305
|
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
306
|
+
class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
|
307
|
+
class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
|
308
|
+
class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
|
305
309
|
class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
|
306
310
|
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
307
311
|
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
308
312
|
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
313
|
+
class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
|
314
|
+
class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
|
315
|
+
class.define_method(
|
316
|
+
"binary_base64_encode",
|
317
|
+
method!(RbExpr::binary_base64_encode, 0),
|
318
|
+
)?;
|
319
|
+
class.define_method(
|
320
|
+
"binary_base64_decode",
|
321
|
+
method!(RbExpr::binary_base64_decode, 1),
|
322
|
+
)?;
|
309
323
|
class.define_method(
|
310
324
|
"str_json_path_match",
|
311
325
|
method!(RbExpr::str_json_path_match, 1),
|
@@ -471,7 +485,7 @@ fn init() -> RbResult<()> {
|
|
471
485
|
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
472
486
|
)?;
|
473
487
|
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
474
|
-
class.define_singleton_method("
|
488
|
+
class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
|
475
489
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
476
490
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
477
491
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
@@ -485,7 +499,7 @@ fn init() -> RbResult<()> {
|
|
485
499
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
486
500
|
class.define_singleton_method(
|
487
501
|
"new_from_parquet",
|
488
|
-
function!(RbLazyFrame::new_from_parquet,
|
502
|
+
function!(RbLazyFrame::new_from_parquet, 8),
|
489
503
|
)?;
|
490
504
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
491
505
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -502,6 +516,7 @@ fn init() -> RbResult<()> {
|
|
502
516
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
|
503
517
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
504
518
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
519
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
505
520
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
506
521
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
507
522
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -530,7 +545,7 @@ fn init() -> RbResult<()> {
|
|
530
545
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
531
546
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
532
547
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
533
|
-
class.define_method("melt", method!(RbLazyFrame::melt,
|
548
|
+
class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
|
534
549
|
class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
|
535
550
|
class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
|
536
551
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
@@ -558,6 +573,7 @@ fn init() -> RbResult<()> {
|
|
558
573
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
559
574
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
560
575
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
576
|
+
class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
|
561
577
|
class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
|
562
578
|
class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
|
563
579
|
class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
|
@@ -783,6 +799,9 @@ fn init() -> RbResult<()> {
|
|
783
799
|
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
784
800
|
// class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
785
801
|
|
802
|
+
// npy
|
803
|
+
class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
|
804
|
+
|
786
805
|
let class = module.define_class("RbWhen", Default::default())?;
|
787
806
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
788
807
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
use magnus::{class, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
|
5
|
+
|
6
|
+
impl RbSeries {
|
7
|
+
/// For numeric types, this should only be called for Series with null types.
|
8
|
+
/// This will cast to floats so that `nil = NAN`
|
9
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
10
|
+
let s = &self.series.borrow();
|
11
|
+
match s.dtype() {
|
12
|
+
DataType::Utf8 => {
|
13
|
+
let ca = s.utf8().unwrap();
|
14
|
+
|
15
|
+
// TODO make more efficient
|
16
|
+
let np_arr = RArray::from_iter(ca.into_iter());
|
17
|
+
class::object()
|
18
|
+
.const_get::<_, RModule>("Numo")?
|
19
|
+
.const_get::<_, RClass>("RObject")?
|
20
|
+
.funcall("cast", (np_arr,))
|
21
|
+
}
|
22
|
+
dt if dt.is_numeric() => {
|
23
|
+
if s.bit_repr_is_large() {
|
24
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
25
|
+
let ca = s.f64().unwrap();
|
26
|
+
// TODO make more efficient
|
27
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
28
|
+
Some(v) => v,
|
29
|
+
None => f64::NAN,
|
30
|
+
}));
|
31
|
+
class::object()
|
32
|
+
.const_get::<_, RModule>("Numo")?
|
33
|
+
.const_get::<_, RClass>("DFloat")?
|
34
|
+
.funcall("cast", (np_arr,))
|
35
|
+
} else {
|
36
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
37
|
+
let ca = s.f32().unwrap();
|
38
|
+
// TODO make more efficient
|
39
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
40
|
+
Some(v) => v,
|
41
|
+
None => f32::NAN,
|
42
|
+
}));
|
43
|
+
class::object()
|
44
|
+
.const_get::<_, RModule>("Numo")?
|
45
|
+
.const_get::<_, RClass>("SFloat")?
|
46
|
+
.funcall("cast", (np_arr,))
|
47
|
+
}
|
48
|
+
}
|
49
|
+
dt => {
|
50
|
+
raise_err!(
|
51
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
52
|
+
ComputeError
|
53
|
+
);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -125,6 +125,12 @@ impl RbSeries {
|
|
125
125
|
RbSeries::new(s)
|
126
126
|
}
|
127
127
|
|
128
|
+
pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
|
129
|
+
let mut s = val.0.into_series();
|
130
|
+
s.rename(&name);
|
131
|
+
RbSeries::new(s)
|
132
|
+
}
|
133
|
+
|
128
134
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
129
135
|
let val = val
|
130
136
|
.each()
|
@@ -489,40 +495,58 @@ impl RbSeries {
|
|
489
495
|
}
|
490
496
|
|
491
497
|
pub fn to_a(&self) -> RArray {
|
492
|
-
let series = self.series.borrow();
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
498
|
+
let series = &self.series.borrow();
|
499
|
+
|
500
|
+
fn to_list_recursive(series: &Series) -> RArray {
|
501
|
+
let rblist = match series.dtype() {
|
502
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
|
503
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
|
504
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
|
505
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
|
506
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
|
507
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
|
508
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
|
509
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
|
510
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
|
511
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
|
512
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
|
513
|
+
DataType::Categorical(_) => {
|
514
|
+
RArray::from_iter(series.categorical().unwrap().iter_str())
|
515
|
+
}
|
516
|
+
DataType::Date => {
|
517
|
+
let a = RArray::with_capacity(series.len());
|
518
|
+
for v in series.iter() {
|
519
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
520
|
+
}
|
521
|
+
return a;
|
522
|
+
}
|
523
|
+
DataType::Datetime(_, _) => {
|
524
|
+
let a = RArray::with_capacity(series.len());
|
525
|
+
for v in series.iter() {
|
526
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
527
|
+
}
|
528
|
+
return a;
|
529
|
+
}
|
530
|
+
DataType::Utf8 => {
|
531
|
+
let ca = series.utf8().unwrap();
|
532
|
+
return RArray::from_iter(ca);
|
533
|
+
}
|
534
|
+
DataType::Binary => {
|
535
|
+
let a = RArray::with_capacity(series.len());
|
536
|
+
for v in series.iter() {
|
537
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
538
|
+
}
|
539
|
+
return a;
|
540
|
+
}
|
541
|
+
DataType::Null | DataType::Unknown => {
|
542
|
+
panic!("to_a not implemented for null/unknown")
|
543
|
+
}
|
544
|
+
_ => todo!(),
|
545
|
+
};
|
546
|
+
rblist
|
525
547
|
}
|
548
|
+
|
549
|
+
to_list_recursive(series)
|
526
550
|
}
|
527
551
|
|
528
552
|
pub fn median(&self) -> Option<f64> {
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for binary related expressions.
|
3
|
+
class BinaryExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Check if binaries in Series contain a binary substring.
|
13
|
+
#
|
14
|
+
# @param lit [String]
|
15
|
+
# The binary substring to look for
|
16
|
+
#
|
17
|
+
# @return [Expr]
|
18
|
+
def contains(lit)
|
19
|
+
Utils.wrap_expr(_rbexpr.binary_contains(lit))
|
20
|
+
end
|
21
|
+
|
22
|
+
# Check if string values end with a binary substring.
|
23
|
+
#
|
24
|
+
# @param sub [String]
|
25
|
+
# Suffix substring.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
def ends_with(sub)
|
29
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Check if values start with a binary substring.
|
33
|
+
#
|
34
|
+
# @param sub [String]
|
35
|
+
# Prefix substring.
|
36
|
+
#
|
37
|
+
# @return [Expr]
|
38
|
+
def starts_with(sub)
|
39
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decode a value using the provided encoding.
|
43
|
+
#
|
44
|
+
# @param encoding ["hex", "base64"]
|
45
|
+
# The encoding to use.
|
46
|
+
# @param strict [Boolean]
|
47
|
+
# Raise an error if the underlying value cannot be decoded,
|
48
|
+
# otherwise mask out with a null value.
|
49
|
+
#
|
50
|
+
# @return [Expr]
|
51
|
+
def decode(encoding, strict: true)
|
52
|
+
if encoding == "hex"
|
53
|
+
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
54
|
+
elsif encoding == "base64"
|
55
|
+
Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
|
56
|
+
else
|
57
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Encode a value using the provided encoding.
|
62
|
+
#
|
63
|
+
# @param encoding ["hex", "base64"]
|
64
|
+
# The encoding to use.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
def encode(encoding)
|
68
|
+
if encoding == "hex"
|
69
|
+
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
70
|
+
elsif encoding == "base64"
|
71
|
+
Utils.wrap_expr(_rbexpr.binary_base64_encode)
|
72
|
+
else
|
73
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.bin namespace.
|
3
|
+
class BinaryNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "bin"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Check if binaries in Series contain a binary substring.
|
14
|
+
#
|
15
|
+
# @param lit [String]
|
16
|
+
# The binary substring to look for
|
17
|
+
#
|
18
|
+
# @return [Series]
|
19
|
+
def contains(lit)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
# Check if string values end with a binary substring.
|
24
|
+
#
|
25
|
+
# @param sub [String]
|
26
|
+
# Suffix substring.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
def ends_with(sub)
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if values start with a binary substring.
|
34
|
+
#
|
35
|
+
# @param sub [String]
|
36
|
+
# Prefix substring.
|
37
|
+
#
|
38
|
+
# @return [Series]
|
39
|
+
def starts_with(sub)
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
# Decode a value using the provided encoding.
|
44
|
+
#
|
45
|
+
# @param encoding ["hex", "base64"]
|
46
|
+
# The encoding to use.
|
47
|
+
# @param strict [Boolean]
|
48
|
+
# Raise an error if the underlying value cannot be decoded,
|
49
|
+
# otherwise mask out with a null value.
|
50
|
+
#
|
51
|
+
# @return [Series]
|
52
|
+
def decode(encoding, strict: true)
|
53
|
+
super
|
54
|
+
end
|
55
|
+
|
56
|
+
# Encode a value using the provided encoding.
|
57
|
+
#
|
58
|
+
# @param encoding ["hex", "base64"]
|
59
|
+
# The encoding to use.
|
60
|
+
#
|
61
|
+
# @return [Series]
|
62
|
+
def encode(encoding)
|
63
|
+
super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|