polars-df 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Cargo.lock +335 -310
- data/Cargo.toml +0 -1
- data/README.md +69 -2
- data/ext/polars/Cargo.toml +5 -3
- data/ext/polars/src/batched_csv.rs +29 -14
- data/ext/polars/src/conversion.rs +69 -16
- data/ext/polars/src/dataframe.rs +56 -39
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/dataframe.rs +48 -14
- data/ext/polars/src/lazy/dsl.rs +69 -4
- data/ext/polars/src/lib.rs +24 -5
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/series.rs +57 -33
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/data_frame.rb +89 -43
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/date_time_expr.rb +6 -6
- data/lib/polars/expr.rb +9 -2
- data/lib/polars/group_by.rb +11 -0
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +103 -7
- data/lib/polars/lazy_functions.rb +3 -2
- data/lib/polars/list_expr.rb +2 -2
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +50 -4
- data/lib/polars/string_expr.rb +1 -1
- data/lib/polars/utils.rb +10 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +7 -3
@@ -4,6 +4,7 @@ use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
6
|
use std::io::{BufWriter, Read};
|
7
|
+
use std::path::PathBuf;
|
7
8
|
|
8
9
|
use crate::conversion::*;
|
9
10
|
use crate::file::get_file_like;
|
@@ -118,7 +119,7 @@ impl RbLazyFrame {
|
|
118
119
|
let skip_rows_after_header: usize = arguments[15].try_convert()?;
|
119
120
|
let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
|
120
121
|
let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
|
121
|
-
let
|
122
|
+
let try_parse_dates: bool = arguments[18].try_convert()?;
|
122
123
|
let eol_char: String = arguments[19].try_convert()?;
|
123
124
|
// end arguments
|
124
125
|
|
@@ -153,7 +154,7 @@ impl RbLazyFrame {
|
|
153
154
|
.with_skip_rows_after_header(skip_rows_after_header)
|
154
155
|
.with_encoding(encoding.0)
|
155
156
|
.with_row_count(row_count)
|
156
|
-
.
|
157
|
+
.with_try_parse_dates(try_parse_dates)
|
157
158
|
.with_null_values(null_values);
|
158
159
|
|
159
160
|
if let Some(_lambda) = with_schema_modify {
|
@@ -163,6 +164,7 @@ impl RbLazyFrame {
|
|
163
164
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
164
165
|
}
|
165
166
|
|
167
|
+
#[allow(clippy::too_many_arguments)]
|
166
168
|
pub fn new_from_parquet(
|
167
169
|
path: String,
|
168
170
|
n_rows: Option<usize>,
|
@@ -171,6 +173,7 @@ impl RbLazyFrame {
|
|
171
173
|
rechunk: bool,
|
172
174
|
row_count: Option<(String, IdxSize)>,
|
173
175
|
low_memory: bool,
|
176
|
+
use_statistics: bool,
|
174
177
|
) -> RbResult<Self> {
|
175
178
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
176
179
|
let args = ScanArgsParquet {
|
@@ -182,6 +185,7 @@ impl RbLazyFrame {
|
|
182
185
|
low_memory,
|
183
186
|
// TODO support cloud options
|
184
187
|
cloud_options: None,
|
188
|
+
use_statistics,
|
185
189
|
};
|
186
190
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
187
191
|
Ok(lf.into())
|
@@ -284,6 +288,32 @@ impl RbLazyFrame {
|
|
284
288
|
Ok(df.into())
|
285
289
|
}
|
286
290
|
|
291
|
+
#[allow(clippy::too_many_arguments)]
|
292
|
+
pub fn sink_parquet(
|
293
|
+
&self,
|
294
|
+
path: PathBuf,
|
295
|
+
compression: String,
|
296
|
+
compression_level: Option<i32>,
|
297
|
+
statistics: bool,
|
298
|
+
row_group_size: Option<usize>,
|
299
|
+
data_pagesize_limit: Option<usize>,
|
300
|
+
maintain_order: bool,
|
301
|
+
) -> RbResult<()> {
|
302
|
+
let compression = parse_parquet_compression(&compression, compression_level)?;
|
303
|
+
|
304
|
+
let options = ParquetWriteOptions {
|
305
|
+
compression,
|
306
|
+
statistics,
|
307
|
+
row_group_size,
|
308
|
+
data_pagesize_limit,
|
309
|
+
maintain_order,
|
310
|
+
};
|
311
|
+
|
312
|
+
let ldf = self.ldf.clone();
|
313
|
+
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
314
|
+
Ok(())
|
315
|
+
}
|
316
|
+
|
287
317
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
288
318
|
let ldf = self.ldf.clone();
|
289
319
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
@@ -328,7 +358,7 @@ impl RbLazyFrame {
|
|
328
358
|
let lazy_gb = ldf.groupby_rolling(
|
329
359
|
by,
|
330
360
|
RollingGroupOptions {
|
331
|
-
index_column,
|
361
|
+
index_column: index_column.into(),
|
332
362
|
period: Duration::parse(&period),
|
333
363
|
offset: Duration::parse(&offset),
|
334
364
|
closed_window,
|
@@ -359,7 +389,7 @@ impl RbLazyFrame {
|
|
359
389
|
let lazy_gb = ldf.groupby_dynamic(
|
360
390
|
by,
|
361
391
|
DynamicGroupOptions {
|
362
|
-
index_column,
|
392
|
+
index_column: index_column.into(),
|
363
393
|
every: Duration::parse(&every),
|
364
394
|
period: Duration::parse(&period),
|
365
395
|
offset: Duration::parse(&offset),
|
@@ -415,10 +445,10 @@ impl RbLazyFrame {
|
|
415
445
|
.force_parallel(force_parallel)
|
416
446
|
.how(JoinType::AsOf(AsOfOptions {
|
417
447
|
strategy: strategy.0,
|
418
|
-
left_by,
|
419
|
-
right_by,
|
448
|
+
left_by: left_by.map(strings_to_smartstrings),
|
449
|
+
right_by: right_by.map(strings_to_smartstrings),
|
420
450
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
421
|
-
tolerance_str,
|
451
|
+
tolerance_str: tolerance_str.map(|s| s.into()),
|
422
452
|
}))
|
423
453
|
.suffix(suffix)
|
424
454
|
.finish()
|
@@ -570,12 +600,14 @@ impl RbLazyFrame {
|
|
570
600
|
value_vars: Vec<String>,
|
571
601
|
value_name: Option<String>,
|
572
602
|
variable_name: Option<String>,
|
603
|
+
streamable: bool,
|
573
604
|
) -> Self {
|
574
605
|
let args = MeltArgs {
|
575
|
-
id_vars,
|
576
|
-
value_vars,
|
577
|
-
value_name,
|
578
|
-
variable_name,
|
606
|
+
id_vars: strings_to_smartstrings(id_vars),
|
607
|
+
value_vars: strings_to_smartstrings(value_vars),
|
608
|
+
value_name: value_name.map(|s| s.into()),
|
609
|
+
variable_name: variable_name.map(|s| s.into()),
|
610
|
+
streamable,
|
579
611
|
};
|
580
612
|
|
581
613
|
let ldf = self.ldf.clone();
|
@@ -596,8 +628,10 @@ impl RbLazyFrame {
|
|
596
628
|
self.ldf.clone().into()
|
597
629
|
}
|
598
630
|
|
599
|
-
pub fn columns(&self) -> RbResult<
|
600
|
-
|
631
|
+
pub fn columns(&self) -> RbResult<RArray> {
|
632
|
+
let schema = self.get_schema()?;
|
633
|
+
let iter = schema.iter_names().map(|s| s.as_str());
|
634
|
+
Ok(RArray::from_iter(iter))
|
601
635
|
}
|
602
636
|
|
603
637
|
pub fn dtypes(&self) -> RbResult<RArray> {
|
@@ -614,7 +648,7 @@ impl RbLazyFrame {
|
|
614
648
|
// TODO remove unwrap
|
615
649
|
schema_dict
|
616
650
|
.aset::<String, Value>(
|
617
|
-
fld.name().
|
651
|
+
fld.name().to_string(),
|
618
652
|
Wrap(fld.data_type().clone()).into_value(),
|
619
653
|
)
|
620
654
|
.unwrap();
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -10,7 +10,7 @@ use crate::conversion::*;
|
|
10
10
|
use crate::lazy::apply::*;
|
11
11
|
use crate::lazy::utils::rb_exprs_to_exprs;
|
12
12
|
use crate::utils::reinterpret;
|
13
|
-
use crate::{RbResult, RbSeries};
|
13
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
14
14
|
|
15
15
|
#[magnus::wrap(class = "Polars::RbExpr")]
|
16
16
|
#[derive(Clone)]
|
@@ -715,6 +715,18 @@ impl RbExpr {
|
|
715
715
|
.into()
|
716
716
|
}
|
717
717
|
|
718
|
+
pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
|
719
|
+
self.inner.clone().binary().contains_literal(lit).into()
|
720
|
+
}
|
721
|
+
|
722
|
+
pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
|
723
|
+
self.inner.clone().binary().ends_with(sub).into()
|
724
|
+
}
|
725
|
+
|
726
|
+
pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
|
727
|
+
self.inner.clone().binary().starts_with(sub).into()
|
728
|
+
}
|
729
|
+
|
718
730
|
pub fn str_hex_encode(&self) -> Self {
|
719
731
|
self.clone()
|
720
732
|
.inner
|
@@ -763,6 +775,58 @@ impl RbExpr {
|
|
763
775
|
.into()
|
764
776
|
}
|
765
777
|
|
778
|
+
pub fn binary_hex_encode(&self) -> Self {
|
779
|
+
self.clone()
|
780
|
+
.inner
|
781
|
+
.map(
|
782
|
+
move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
|
783
|
+
GetOutput::same_type(),
|
784
|
+
)
|
785
|
+
.with_fmt("binary.hex_encode")
|
786
|
+
.into()
|
787
|
+
}
|
788
|
+
|
789
|
+
pub fn binary_hex_decode(&self, strict: bool) -> Self {
|
790
|
+
self.clone()
|
791
|
+
.inner
|
792
|
+
.map(
|
793
|
+
move |s| {
|
794
|
+
s.binary()?
|
795
|
+
.hex_decode(strict)
|
796
|
+
.map(|s| Some(s.into_series()))
|
797
|
+
},
|
798
|
+
GetOutput::same_type(),
|
799
|
+
)
|
800
|
+
.with_fmt("binary.hex_decode")
|
801
|
+
.into()
|
802
|
+
}
|
803
|
+
|
804
|
+
pub fn binary_base64_encode(&self) -> Self {
|
805
|
+
self.clone()
|
806
|
+
.inner
|
807
|
+
.map(
|
808
|
+
move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
|
809
|
+
GetOutput::same_type(),
|
810
|
+
)
|
811
|
+
.with_fmt("binary.base64_encode")
|
812
|
+
.into()
|
813
|
+
}
|
814
|
+
|
815
|
+
pub fn binary_base64_decode(&self, strict: bool) -> Self {
|
816
|
+
self.clone()
|
817
|
+
.inner
|
818
|
+
.map(
|
819
|
+
move |s| {
|
820
|
+
s.binary()?
|
821
|
+
.base64_decode(strict)
|
822
|
+
.map(|s| Some(s.into_series()))
|
823
|
+
},
|
824
|
+
GetOutput::same_type(),
|
825
|
+
)
|
826
|
+
.with_fmt("binary.base64_decode")
|
827
|
+
.into()
|
828
|
+
}
|
829
|
+
|
766
830
|
pub fn str_json_path_match(&self, pat: String) -> Self {
|
767
831
|
let function = move |s: Series| {
|
768
832
|
let ca = s.utf8()?;
|
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
|
1654
1718
|
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1655
1719
|
}
|
1656
1720
|
|
1657
|
-
pub fn
|
1721
|
+
pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1658
1722
|
let by = rb_exprs_to_exprs(by)?;
|
1659
|
-
Ok(polars::lazy::dsl::
|
1723
|
+
Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
|
1660
1724
|
}
|
1661
1725
|
|
1662
1726
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
|
1706
1770
|
|
1707
1771
|
pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
1708
1772
|
let s = rb_exprs_to_exprs(s)?;
|
1709
|
-
|
1773
|
+
let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
|
1774
|
+
Ok(expr.into())
|
1710
1775
|
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -6,6 +6,7 @@ mod error;
|
|
6
6
|
mod file;
|
7
7
|
mod lazy;
|
8
8
|
mod list_construction;
|
9
|
+
mod numo;
|
9
10
|
mod object;
|
10
11
|
mod prelude;
|
11
12
|
pub(crate) mod rb_modules;
|
@@ -72,7 +73,7 @@ fn init() -> RbResult<()> {
|
|
72
73
|
let class = module.define_class("RbDataFrame", Default::default())?;
|
73
74
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
74
75
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
75
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet,
|
76
|
+
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
76
77
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
77
78
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
78
79
|
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
|
|
87
88
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
88
89
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
89
90
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
91
|
+
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
90
92
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
91
93
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
92
94
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
@@ -149,7 +151,6 @@ fn init() -> RbResult<()> {
|
|
149
151
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
150
152
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
151
153
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
152
|
-
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
153
154
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
154
155
|
class.define_method("max", method!(RbDataFrame::max, 0))?;
|
155
156
|
class.define_method("min", method!(RbDataFrame::min, 0))?;
|
@@ -302,10 +303,23 @@ fn init() -> RbResult<()> {
|
|
302
303
|
class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
|
303
304
|
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
304
305
|
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
306
|
+
class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
|
307
|
+
class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
|
308
|
+
class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
|
305
309
|
class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
|
306
310
|
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
307
311
|
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
308
312
|
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
313
|
+
class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
|
314
|
+
class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
|
315
|
+
class.define_method(
|
316
|
+
"binary_base64_encode",
|
317
|
+
method!(RbExpr::binary_base64_encode, 0),
|
318
|
+
)?;
|
319
|
+
class.define_method(
|
320
|
+
"binary_base64_decode",
|
321
|
+
method!(RbExpr::binary_base64_decode, 1),
|
322
|
+
)?;
|
309
323
|
class.define_method(
|
310
324
|
"str_json_path_match",
|
311
325
|
method!(RbExpr::str_json_path_match, 1),
|
@@ -471,7 +485,7 @@ fn init() -> RbResult<()> {
|
|
471
485
|
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
472
486
|
)?;
|
473
487
|
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
474
|
-
class.define_singleton_method("
|
488
|
+
class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
|
475
489
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
476
490
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
477
491
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
@@ -485,7 +499,7 @@ fn init() -> RbResult<()> {
|
|
485
499
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
486
500
|
class.define_singleton_method(
|
487
501
|
"new_from_parquet",
|
488
|
-
function!(RbLazyFrame::new_from_parquet,
|
502
|
+
function!(RbLazyFrame::new_from_parquet, 8),
|
489
503
|
)?;
|
490
504
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
491
505
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -502,6 +516,7 @@ fn init() -> RbResult<()> {
|
|
502
516
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
|
503
517
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
504
518
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
519
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
505
520
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
506
521
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
507
522
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -530,7 +545,7 @@ fn init() -> RbResult<()> {
|
|
530
545
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
531
546
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
532
547
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
533
|
-
class.define_method("melt", method!(RbLazyFrame::melt,
|
548
|
+
class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
|
534
549
|
class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
|
535
550
|
class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
|
536
551
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
@@ -558,6 +573,7 @@ fn init() -> RbResult<()> {
|
|
558
573
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
559
574
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
560
575
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
576
|
+
class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
|
561
577
|
class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
|
562
578
|
class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
|
563
579
|
class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
|
@@ -783,6 +799,9 @@ fn init() -> RbResult<()> {
|
|
783
799
|
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
784
800
|
// class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
785
801
|
|
802
|
+
// npy
|
803
|
+
class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
|
804
|
+
|
786
805
|
let class = module.define_class("RbWhen", Default::default())?;
|
787
806
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
788
807
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
use magnus::{class, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
|
5
|
+
|
6
|
+
impl RbSeries {
|
7
|
+
/// For numeric types, this should only be called for Series with null types.
|
8
|
+
/// This will cast to floats so that `nil = NAN`
|
9
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
10
|
+
let s = &self.series.borrow();
|
11
|
+
match s.dtype() {
|
12
|
+
DataType::Utf8 => {
|
13
|
+
let ca = s.utf8().unwrap();
|
14
|
+
|
15
|
+
// TODO make more efficient
|
16
|
+
let np_arr = RArray::from_iter(ca.into_iter());
|
17
|
+
class::object()
|
18
|
+
.const_get::<_, RModule>("Numo")?
|
19
|
+
.const_get::<_, RClass>("RObject")?
|
20
|
+
.funcall("cast", (np_arr,))
|
21
|
+
}
|
22
|
+
dt if dt.is_numeric() => {
|
23
|
+
if s.bit_repr_is_large() {
|
24
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
25
|
+
let ca = s.f64().unwrap();
|
26
|
+
// TODO make more efficient
|
27
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
28
|
+
Some(v) => v,
|
29
|
+
None => f64::NAN,
|
30
|
+
}));
|
31
|
+
class::object()
|
32
|
+
.const_get::<_, RModule>("Numo")?
|
33
|
+
.const_get::<_, RClass>("DFloat")?
|
34
|
+
.funcall("cast", (np_arr,))
|
35
|
+
} else {
|
36
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
37
|
+
let ca = s.f32().unwrap();
|
38
|
+
// TODO make more efficient
|
39
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
40
|
+
Some(v) => v,
|
41
|
+
None => f32::NAN,
|
42
|
+
}));
|
43
|
+
class::object()
|
44
|
+
.const_get::<_, RModule>("Numo")?
|
45
|
+
.const_get::<_, RClass>("SFloat")?
|
46
|
+
.funcall("cast", (np_arr,))
|
47
|
+
}
|
48
|
+
}
|
49
|
+
dt => {
|
50
|
+
raise_err!(
|
51
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
52
|
+
ComputeError
|
53
|
+
);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -125,6 +125,12 @@ impl RbSeries {
|
|
125
125
|
RbSeries::new(s)
|
126
126
|
}
|
127
127
|
|
128
|
+
pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
|
129
|
+
let mut s = val.0.into_series();
|
130
|
+
s.rename(&name);
|
131
|
+
RbSeries::new(s)
|
132
|
+
}
|
133
|
+
|
128
134
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
129
135
|
let val = val
|
130
136
|
.each()
|
@@ -489,40 +495,58 @@ impl RbSeries {
|
|
489
495
|
}
|
490
496
|
|
491
497
|
pub fn to_a(&self) -> RArray {
|
492
|
-
let series = self.series.borrow();
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
498
|
+
let series = &self.series.borrow();
|
499
|
+
|
500
|
+
fn to_list_recursive(series: &Series) -> RArray {
|
501
|
+
let rblist = match series.dtype() {
|
502
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
|
503
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
|
504
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
|
505
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
|
506
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
|
507
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
|
508
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
|
509
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
|
510
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
|
511
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
|
512
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
|
513
|
+
DataType::Categorical(_) => {
|
514
|
+
RArray::from_iter(series.categorical().unwrap().iter_str())
|
515
|
+
}
|
516
|
+
DataType::Date => {
|
517
|
+
let a = RArray::with_capacity(series.len());
|
518
|
+
for v in series.iter() {
|
519
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
520
|
+
}
|
521
|
+
return a;
|
522
|
+
}
|
523
|
+
DataType::Datetime(_, _) => {
|
524
|
+
let a = RArray::with_capacity(series.len());
|
525
|
+
for v in series.iter() {
|
526
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
527
|
+
}
|
528
|
+
return a;
|
529
|
+
}
|
530
|
+
DataType::Utf8 => {
|
531
|
+
let ca = series.utf8().unwrap();
|
532
|
+
return RArray::from_iter(ca);
|
533
|
+
}
|
534
|
+
DataType::Binary => {
|
535
|
+
let a = RArray::with_capacity(series.len());
|
536
|
+
for v in series.iter() {
|
537
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
538
|
+
}
|
539
|
+
return a;
|
540
|
+
}
|
541
|
+
DataType::Null | DataType::Unknown => {
|
542
|
+
panic!("to_a not implemented for null/unknown")
|
543
|
+
}
|
544
|
+
_ => todo!(),
|
545
|
+
};
|
546
|
+
rblist
|
525
547
|
}
|
548
|
+
|
549
|
+
to_list_recursive(series)
|
526
550
|
}
|
527
551
|
|
528
552
|
pub fn median(&self) -> Option<f64> {
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for binary related expressions.
|
3
|
+
class BinaryExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Check if binaries in Series contain a binary substring.
|
13
|
+
#
|
14
|
+
# @param lit [String]
|
15
|
+
# The binary substring to look for
|
16
|
+
#
|
17
|
+
# @return [Expr]
|
18
|
+
def contains(lit)
|
19
|
+
Utils.wrap_expr(_rbexpr.binary_contains(lit))
|
20
|
+
end
|
21
|
+
|
22
|
+
# Check if string values end with a binary substring.
|
23
|
+
#
|
24
|
+
# @param sub [String]
|
25
|
+
# Suffix substring.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
def ends_with(sub)
|
29
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Check if values start with a binary substring.
|
33
|
+
#
|
34
|
+
# @param sub [String]
|
35
|
+
# Prefix substring.
|
36
|
+
#
|
37
|
+
# @return [Expr]
|
38
|
+
def starts_with(sub)
|
39
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decode a value using the provided encoding.
|
43
|
+
#
|
44
|
+
# @param encoding ["hex", "base64"]
|
45
|
+
# The encoding to use.
|
46
|
+
# @param strict [Boolean]
|
47
|
+
# Raise an error if the underlying value cannot be decoded,
|
48
|
+
# otherwise mask out with a null value.
|
49
|
+
#
|
50
|
+
# @return [Expr]
|
51
|
+
def decode(encoding, strict: true)
|
52
|
+
if encoding == "hex"
|
53
|
+
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
54
|
+
elsif encoding == "base64"
|
55
|
+
Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
|
56
|
+
else
|
57
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Encode a value using the provided encoding.
|
62
|
+
#
|
63
|
+
# @param encoding ["hex", "base64"]
|
64
|
+
# The encoding to use.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
def encode(encoding)
|
68
|
+
if encoding == "hex"
|
69
|
+
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
70
|
+
elsif encoding == "base64"
|
71
|
+
Utils.wrap_expr(_rbexpr.binary_base64_encode)
|
72
|
+
else
|
73
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.bin namespace.
|
3
|
+
class BinaryNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "bin"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Check if binaries in Series contain a binary substring.
|
14
|
+
#
|
15
|
+
# @param lit [String]
|
16
|
+
# The binary substring to look for
|
17
|
+
#
|
18
|
+
# @return [Series]
|
19
|
+
def contains(lit)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
# Check if string values end with a binary substring.
|
24
|
+
#
|
25
|
+
# @param sub [String]
|
26
|
+
# Suffix substring.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
def ends_with(sub)
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if values start with a binary substring.
|
34
|
+
#
|
35
|
+
# @param sub [String]
|
36
|
+
# Prefix substring.
|
37
|
+
#
|
38
|
+
# @return [Series]
|
39
|
+
def starts_with(sub)
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
# Decode a value using the provided encoding.
|
44
|
+
#
|
45
|
+
# @param encoding ["hex", "base64"]
|
46
|
+
# The encoding to use.
|
47
|
+
# @param strict [Boolean]
|
48
|
+
# Raise an error if the underlying value cannot be decoded,
|
49
|
+
# otherwise mask out with a null value.
|
50
|
+
#
|
51
|
+
# @return [Series]
|
52
|
+
def decode(encoding, strict: true)
|
53
|
+
super
|
54
|
+
end
|
55
|
+
|
56
|
+
# Encode a value using the provided encoding.
|
57
|
+
#
|
58
|
+
# @param encoding ["hex", "base64"]
|
59
|
+
# The encoding to use.
|
60
|
+
#
|
61
|
+
# @return [Series]
|
62
|
+
def encode(encoding)
|
63
|
+
super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|