polars-df 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,7 @@ use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::path::PathBuf;
7
8
 
8
9
  use crate::conversion::*;
9
10
  use crate::file::get_file_like;
@@ -118,7 +119,7 @@ impl RbLazyFrame {
118
119
  let skip_rows_after_header: usize = arguments[15].try_convert()?;
119
120
  let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
120
121
  let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
121
- let parse_dates: bool = arguments[18].try_convert()?;
122
+ let try_parse_dates: bool = arguments[18].try_convert()?;
122
123
  let eol_char: String = arguments[19].try_convert()?;
123
124
  // end arguments
124
125
 
@@ -153,7 +154,7 @@ impl RbLazyFrame {
153
154
  .with_skip_rows_after_header(skip_rows_after_header)
154
155
  .with_encoding(encoding.0)
155
156
  .with_row_count(row_count)
156
- .with_parse_dates(parse_dates)
157
+ .with_try_parse_dates(try_parse_dates)
157
158
  .with_null_values(null_values);
158
159
 
159
160
  if let Some(_lambda) = with_schema_modify {
@@ -163,6 +164,7 @@ impl RbLazyFrame {
163
164
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
164
165
  }
165
166
 
167
+ #[allow(clippy::too_many_arguments)]
166
168
  pub fn new_from_parquet(
167
169
  path: String,
168
170
  n_rows: Option<usize>,
@@ -171,6 +173,7 @@ impl RbLazyFrame {
171
173
  rechunk: bool,
172
174
  row_count: Option<(String, IdxSize)>,
173
175
  low_memory: bool,
176
+ use_statistics: bool,
174
177
  ) -> RbResult<Self> {
175
178
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
176
179
  let args = ScanArgsParquet {
@@ -182,6 +185,7 @@ impl RbLazyFrame {
182
185
  low_memory,
183
186
  // TODO support cloud options
184
187
  cloud_options: None,
188
+ use_statistics,
185
189
  };
186
190
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
187
191
  Ok(lf.into())
@@ -284,6 +288,32 @@ impl RbLazyFrame {
284
288
  Ok(df.into())
285
289
  }
286
290
 
291
+ #[allow(clippy::too_many_arguments)]
292
+ pub fn sink_parquet(
293
+ &self,
294
+ path: PathBuf,
295
+ compression: String,
296
+ compression_level: Option<i32>,
297
+ statistics: bool,
298
+ row_group_size: Option<usize>,
299
+ data_pagesize_limit: Option<usize>,
300
+ maintain_order: bool,
301
+ ) -> RbResult<()> {
302
+ let compression = parse_parquet_compression(&compression, compression_level)?;
303
+
304
+ let options = ParquetWriteOptions {
305
+ compression,
306
+ statistics,
307
+ row_group_size,
308
+ data_pagesize_limit,
309
+ maintain_order,
310
+ };
311
+
312
+ let ldf = self.ldf.clone();
313
+ ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
314
+ Ok(())
315
+ }
316
+
287
317
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
288
318
  let ldf = self.ldf.clone();
289
319
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -328,7 +358,7 @@ impl RbLazyFrame {
328
358
  let lazy_gb = ldf.groupby_rolling(
329
359
  by,
330
360
  RollingGroupOptions {
331
- index_column,
361
+ index_column: index_column.into(),
332
362
  period: Duration::parse(&period),
333
363
  offset: Duration::parse(&offset),
334
364
  closed_window,
@@ -359,7 +389,7 @@ impl RbLazyFrame {
359
389
  let lazy_gb = ldf.groupby_dynamic(
360
390
  by,
361
391
  DynamicGroupOptions {
362
- index_column,
392
+ index_column: index_column.into(),
363
393
  every: Duration::parse(&every),
364
394
  period: Duration::parse(&period),
365
395
  offset: Duration::parse(&offset),
@@ -415,10 +445,10 @@ impl RbLazyFrame {
415
445
  .force_parallel(force_parallel)
416
446
  .how(JoinType::AsOf(AsOfOptions {
417
447
  strategy: strategy.0,
418
- left_by,
419
- right_by,
448
+ left_by: left_by.map(strings_to_smartstrings),
449
+ right_by: right_by.map(strings_to_smartstrings),
420
450
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
421
- tolerance_str,
451
+ tolerance_str: tolerance_str.map(|s| s.into()),
422
452
  }))
423
453
  .suffix(suffix)
424
454
  .finish()
@@ -570,12 +600,14 @@ impl RbLazyFrame {
570
600
  value_vars: Vec<String>,
571
601
  value_name: Option<String>,
572
602
  variable_name: Option<String>,
603
+ streamable: bool,
573
604
  ) -> Self {
574
605
  let args = MeltArgs {
575
- id_vars,
576
- value_vars,
577
- value_name,
578
- variable_name,
606
+ id_vars: strings_to_smartstrings(id_vars),
607
+ value_vars: strings_to_smartstrings(value_vars),
608
+ value_name: value_name.map(|s| s.into()),
609
+ variable_name: variable_name.map(|s| s.into()),
610
+ streamable,
579
611
  };
580
612
 
581
613
  let ldf = self.ldf.clone();
@@ -596,8 +628,10 @@ impl RbLazyFrame {
596
628
  self.ldf.clone().into()
597
629
  }
598
630
 
599
- pub fn columns(&self) -> RbResult<Vec<String>> {
600
- Ok(self.get_schema()?.iter_names().cloned().collect())
631
+ pub fn columns(&self) -> RbResult<RArray> {
632
+ let schema = self.get_schema()?;
633
+ let iter = schema.iter_names().map(|s| s.as_str());
634
+ Ok(RArray::from_iter(iter))
601
635
  }
602
636
 
603
637
  pub fn dtypes(&self) -> RbResult<RArray> {
@@ -614,7 +648,7 @@ impl RbLazyFrame {
614
648
  // TODO remove unwrap
615
649
  schema_dict
616
650
  .aset::<String, Value>(
617
- fld.name().clone(),
651
+ fld.name().to_string(),
618
652
  Wrap(fld.data_type().clone()).into_value(),
619
653
  )
620
654
  .unwrap();
@@ -10,7 +10,7 @@ use crate::conversion::*;
10
10
  use crate::lazy::apply::*;
11
11
  use crate::lazy::utils::rb_exprs_to_exprs;
12
12
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
13
+ use crate::{RbPolarsErr, RbResult, RbSeries};
14
14
 
15
15
  #[magnus::wrap(class = "Polars::RbExpr")]
16
16
  #[derive(Clone)]
@@ -715,6 +715,18 @@ impl RbExpr {
715
715
  .into()
716
716
  }
717
717
 
718
+ pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
719
+ self.inner.clone().binary().contains_literal(lit).into()
720
+ }
721
+
722
+ pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
723
+ self.inner.clone().binary().ends_with(sub).into()
724
+ }
725
+
726
+ pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
727
+ self.inner.clone().binary().starts_with(sub).into()
728
+ }
729
+
718
730
  pub fn str_hex_encode(&self) -> Self {
719
731
  self.clone()
720
732
  .inner
@@ -763,6 +775,58 @@ impl RbExpr {
763
775
  .into()
764
776
  }
765
777
 
778
+ pub fn binary_hex_encode(&self) -> Self {
779
+ self.clone()
780
+ .inner
781
+ .map(
782
+ move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
783
+ GetOutput::same_type(),
784
+ )
785
+ .with_fmt("binary.hex_encode")
786
+ .into()
787
+ }
788
+
789
+ pub fn binary_hex_decode(&self, strict: bool) -> Self {
790
+ self.clone()
791
+ .inner
792
+ .map(
793
+ move |s| {
794
+ s.binary()?
795
+ .hex_decode(strict)
796
+ .map(|s| Some(s.into_series()))
797
+ },
798
+ GetOutput::same_type(),
799
+ )
800
+ .with_fmt("binary.hex_decode")
801
+ .into()
802
+ }
803
+
804
+ pub fn binary_base64_encode(&self) -> Self {
805
+ self.clone()
806
+ .inner
807
+ .map(
808
+ move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
809
+ GetOutput::same_type(),
810
+ )
811
+ .with_fmt("binary.base64_encode")
812
+ .into()
813
+ }
814
+
815
+ pub fn binary_base64_decode(&self, strict: bool) -> Self {
816
+ self.clone()
817
+ .inner
818
+ .map(
819
+ move |s| {
820
+ s.binary()?
821
+ .base64_decode(strict)
822
+ .map(|s| Some(s.into_series()))
823
+ },
824
+ GetOutput::same_type(),
825
+ )
826
+ .with_fmt("binary.base64_decode")
827
+ .into()
828
+ }
829
+
766
830
  pub fn str_json_path_match(&self, pat: String) -> Self {
767
831
  let function = move |s: Series| {
768
832
  let ca = s.utf8()?;
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
1718
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
1719
  }
1656
1720
 
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1721
+ pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
1722
  let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1723
+ Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
1660
1724
  }
1661
1725
 
1662
1726
  #[magnus::wrap(class = "Polars::RbWhen")]
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1706
1770
 
1707
1771
  pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
1772
  let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1773
+ let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
1774
+ Ok(expr.into())
1710
1775
  }
@@ -6,6 +6,7 @@ mod error;
6
6
  mod file;
7
7
  mod lazy;
8
8
  mod list_construction;
9
+ mod numo;
9
10
  mod object;
10
11
  mod prelude;
11
12
  pub(crate) mod rb_modules;
@@ -72,7 +73,7 @@ fn init() -> RbResult<()> {
72
73
  let class = module.define_class("RbDataFrame", Default::default())?;
73
74
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
74
75
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
75
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
76
77
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
77
78
  class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
78
79
  class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
87
88
  class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
88
89
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
89
90
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
91
+ class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
90
92
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
91
93
  class.define_method("add", method!(RbDataFrame::add, 1))?;
92
94
  class.define_method("sub", method!(RbDataFrame::sub, 1))?;
@@ -149,7 +151,6 @@ fn init() -> RbResult<()> {
149
151
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
150
152
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
151
153
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
152
- class.define_method("unique", method!(RbDataFrame::unique, 3))?;
153
154
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
154
155
  class.define_method("max", method!(RbDataFrame::max, 0))?;
155
156
  class.define_method("min", method!(RbDataFrame::min, 0))?;
@@ -302,10 +303,23 @@ fn init() -> RbResult<()> {
302
303
  class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
303
304
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
304
305
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
306
+ class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
307
+ class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
308
+ class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
305
309
  class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
306
310
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
307
311
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
308
312
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
313
+ class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
314
+ class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
315
+ class.define_method(
316
+ "binary_base64_encode",
317
+ method!(RbExpr::binary_base64_encode, 0),
318
+ )?;
319
+ class.define_method(
320
+ "binary_base64_decode",
321
+ method!(RbExpr::binary_base64_decode, 1),
322
+ )?;
309
323
  class.define_method(
310
324
  "str_json_path_match",
311
325
  method!(RbExpr::str_json_path_match, 1),
@@ -471,7 +485,7 @@ fn init() -> RbResult<()> {
471
485
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
472
486
  )?;
473
487
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
474
- class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
488
+ class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
475
489
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
476
490
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
477
491
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -485,7 +499,7 @@ fn init() -> RbResult<()> {
485
499
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
486
500
  class.define_singleton_method(
487
501
  "new_from_parquet",
488
- function!(RbLazyFrame::new_from_parquet, 7),
502
+ function!(RbLazyFrame::new_from_parquet, 8),
489
503
  )?;
490
504
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
491
505
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -502,6 +516,7 @@ fn init() -> RbResult<()> {
502
516
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
503
517
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
504
518
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
519
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
505
520
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
506
521
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
507
522
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -530,7 +545,7 @@ fn init() -> RbResult<()> {
530
545
  class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
531
546
  class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
532
547
  class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
533
- class.define_method("melt", method!(RbLazyFrame::melt, 4))?;
548
+ class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
534
549
  class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
535
550
  class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
536
551
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
@@ -558,6 +573,7 @@ fn init() -> RbResult<()> {
558
573
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
559
574
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
560
575
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
576
+ class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
561
577
  class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
562
578
  class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
563
579
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
@@ -783,6 +799,9 @@ fn init() -> RbResult<()> {
783
799
  class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
784
800
  // class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
785
801
 
802
+ // npy
803
+ class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
804
+
786
805
  let class = module.define_class("RbWhen", Default::default())?;
787
806
  class.define_method("_then", method!(RbWhen::then, 1))?;
788
807
 
@@ -0,0 +1,57 @@
1
+ use magnus::{class, Module, RArray, RClass, RModule, Value};
2
+ use polars_core::prelude::*;
3
+
4
+ use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
5
+
6
+ impl RbSeries {
7
+ /// For numeric types, this should only be called for Series with null types.
8
+ /// This will cast to floats so that `nil = NAN`
9
+ pub fn to_numo(&self) -> RbResult<Value> {
10
+ let s = &self.series.borrow();
11
+ match s.dtype() {
12
+ DataType::Utf8 => {
13
+ let ca = s.utf8().unwrap();
14
+
15
+ // TODO make more efficient
16
+ let np_arr = RArray::from_iter(ca.into_iter());
17
+ class::object()
18
+ .const_get::<_, RModule>("Numo")?
19
+ .const_get::<_, RClass>("RObject")?
20
+ .funcall("cast", (np_arr,))
21
+ }
22
+ dt if dt.is_numeric() => {
23
+ if s.bit_repr_is_large() {
24
+ let s = s.cast(&DataType::Float64).unwrap();
25
+ let ca = s.f64().unwrap();
26
+ // TODO make more efficient
27
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
28
+ Some(v) => v,
29
+ None => f64::NAN,
30
+ }));
31
+ class::object()
32
+ .const_get::<_, RModule>("Numo")?
33
+ .const_get::<_, RClass>("DFloat")?
34
+ .funcall("cast", (np_arr,))
35
+ } else {
36
+ let s = s.cast(&DataType::Float32).unwrap();
37
+ let ca = s.f32().unwrap();
38
+ // TODO make more efficient
39
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
40
+ Some(v) => v,
41
+ None => f32::NAN,
42
+ }));
43
+ class::object()
44
+ .const_get::<_, RModule>("Numo")?
45
+ .const_get::<_, RClass>("SFloat")?
46
+ .funcall("cast", (np_arr,))
47
+ }
48
+ }
49
+ dt => {
50
+ raise_err!(
51
+ format!("'to_numo' not supported for dtype: {dt:?}"),
52
+ ComputeError
53
+ );
54
+ }
55
+ }
56
+ }
57
+ }
@@ -125,6 +125,12 @@ impl RbSeries {
125
125
  RbSeries::new(s)
126
126
  }
127
127
 
128
+ pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
129
+ let mut s = val.0.into_series();
130
+ s.rename(&name);
131
+ RbSeries::new(s)
132
+ }
133
+
128
134
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
129
135
  let val = val
130
136
  .each()
@@ -489,40 +495,58 @@ impl RbSeries {
489
495
  }
490
496
 
491
497
  pub fn to_a(&self) -> RArray {
492
- let series = self.series.borrow();
493
- if let Ok(s) = series.f32() {
494
- s.into_iter().collect()
495
- } else if let Ok(s) = series.f64() {
496
- s.into_iter().collect()
497
- } else if let Ok(s) = series.i8() {
498
- s.into_iter().collect()
499
- } else if let Ok(s) = series.i16() {
500
- s.into_iter().collect()
501
- } else if let Ok(s) = series.i32() {
502
- s.into_iter().collect()
503
- } else if let Ok(s) = series.i64() {
504
- s.into_iter().collect()
505
- } else if let Ok(s) = series.u8() {
506
- s.into_iter().collect()
507
- } else if let Ok(s) = series.u16() {
508
- s.into_iter().collect()
509
- } else if let Ok(s) = series.u32() {
510
- s.into_iter().collect()
511
- } else if let Ok(s) = series.u64() {
512
- s.into_iter().collect()
513
- } else if let Ok(s) = series.bool() {
514
- s.into_iter().collect()
515
- } else if let Ok(s) = series.utf8() {
516
- s.into_iter().collect()
517
- } else if let Ok(_s) = series.date() {
518
- let a = RArray::with_capacity(series.len());
519
- for v in series.iter() {
520
- a.push::<Value>(Wrap(v).into_value()).unwrap();
521
- }
522
- a
523
- } else {
524
- unimplemented!();
498
+ let series = &self.series.borrow();
499
+
500
+ fn to_list_recursive(series: &Series) -> RArray {
501
+ let rblist = match series.dtype() {
502
+ DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
503
+ DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
504
+ DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
505
+ DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
506
+ DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
507
+ DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
508
+ DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
509
+ DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
510
+ DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
511
+ DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
512
+ DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
513
+ DataType::Categorical(_) => {
514
+ RArray::from_iter(series.categorical().unwrap().iter_str())
515
+ }
516
+ DataType::Date => {
517
+ let a = RArray::with_capacity(series.len());
518
+ for v in series.iter() {
519
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
520
+ }
521
+ return a;
522
+ }
523
+ DataType::Datetime(_, _) => {
524
+ let a = RArray::with_capacity(series.len());
525
+ for v in series.iter() {
526
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
527
+ }
528
+ return a;
529
+ }
530
+ DataType::Utf8 => {
531
+ let ca = series.utf8().unwrap();
532
+ return RArray::from_iter(ca);
533
+ }
534
+ DataType::Binary => {
535
+ let a = RArray::with_capacity(series.len());
536
+ for v in series.iter() {
537
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
538
+ }
539
+ return a;
540
+ }
541
+ DataType::Null | DataType::Unknown => {
542
+ panic!("to_a not implemented for null/unknown")
543
+ }
544
+ _ => todo!(),
545
+ };
546
+ rblist
525
547
  }
548
+
549
+ to_list_recursive(series)
526
550
  }
527
551
 
528
552
  pub fn median(&self) -> Option<f64> {
@@ -30,7 +30,7 @@ module Polars
30
30
  new_columns: nil
31
31
  )
32
32
  if Utils.pathlike?(file)
33
- path = Utils.format_path(file)
33
+ path = Utils.normalise_filepath(file)
34
34
  end
35
35
 
36
36
  dtype_list = nil
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Namespace for binary related expressions.
3
+ class BinaryExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Check if binaries in Series contain a binary substring.
13
+ #
14
+ # @param lit [String]
15
+ # The binary substring to look for
16
+ #
17
+ # @return [Expr]
18
+ def contains(lit)
19
+ Utils.wrap_expr(_rbexpr.binary_contains(lit))
20
+ end
21
+
22
+ # Check if string values end with a binary substring.
23
+ #
24
+ # @param sub [String]
25
+ # Suffix substring.
26
+ #
27
+ # @return [Expr]
28
+ def ends_with(sub)
29
+ Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
30
+ end
31
+
32
+ # Check if values start with a binary substring.
33
+ #
34
+ # @param sub [String]
35
+ # Prefix substring.
36
+ #
37
+ # @return [Expr]
38
+ def starts_with(sub)
39
+ Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
40
+ end
41
+
42
+ # Decode a value using the provided encoding.
43
+ #
44
+ # @param encoding ["hex", "base64"]
45
+ # The encoding to use.
46
+ # @param strict [Boolean]
47
+ # Raise an error if the underlying value cannot be decoded,
48
+ # otherwise mask out with a null value.
49
+ #
50
+ # @return [Expr]
51
+ def decode(encoding, strict: true)
52
+ if encoding == "hex"
53
+ Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
54
+ elsif encoding == "base64"
55
+ Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
56
+ else
57
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
58
+ end
59
+ end
60
+
61
+ # Encode a value using the provided encoding.
62
+ #
63
+ # @param encoding ["hex", "base64"]
64
+ # The encoding to use.
65
+ #
66
+ # @return [Expr]
67
+ def encode(encoding)
68
+ if encoding == "hex"
69
+ Utils.wrap_expr(_rbexpr.binary_hex_encode)
70
+ elsif encoding == "base64"
71
+ Utils.wrap_expr(_rbexpr.binary_base64_encode)
72
+ else
73
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,66 @@
1
+ module Polars
2
+ # Series.bin namespace.
3
+ class BinaryNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "bin"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Check if binaries in Series contain a binary substring.
14
+ #
15
+ # @param lit [String]
16
+ # The binary substring to look for
17
+ #
18
+ # @return [Series]
19
+ def contains(lit)
20
+ super
21
+ end
22
+
23
+ # Check if string values end with a binary substring.
24
+ #
25
+ # @param sub [String]
26
+ # Suffix substring.
27
+ #
28
+ # @return [Series]
29
+ def ends_with(sub)
30
+ super
31
+ end
32
+
33
+ # Check if values start with a binary substring.
34
+ #
35
+ # @param sub [String]
36
+ # Prefix substring.
37
+ #
38
+ # @return [Series]
39
+ def starts_with(sub)
40
+ super
41
+ end
42
+
43
+ # Decode a value using the provided encoding.
44
+ #
45
+ # @param encoding ["hex", "base64"]
46
+ # The encoding to use.
47
+ # @param strict [Boolean]
48
+ # Raise an error if the underlying value cannot be decoded,
49
+ # otherwise mask out with a null value.
50
+ #
51
+ # @return [Series]
52
+ def decode(encoding, strict: true)
53
+ super
54
+ end
55
+
56
+ # Encode a value using the provided encoding.
57
+ #
58
+ # @param encoding ["hex", "base64"]
59
+ # The encoding to use.
60
+ #
61
+ # @return [Series]
62
+ def encode(encoding)
63
+ super
64
+ end
65
+ end
66
+ end