polars-df 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ use polars::lazy::frame::{LazyFrame, LazyGroupBy};
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::path::PathBuf;
7
8
 
8
9
  use crate::conversion::*;
9
10
  use crate::file::get_file_like;
@@ -118,7 +119,7 @@ impl RbLazyFrame {
118
119
  let skip_rows_after_header: usize = arguments[15].try_convert()?;
119
120
  let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
120
121
  let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
121
- let parse_dates: bool = arguments[18].try_convert()?;
122
+ let try_parse_dates: bool = arguments[18].try_convert()?;
122
123
  let eol_char: String = arguments[19].try_convert()?;
123
124
  // end arguments
124
125
 
@@ -153,7 +154,7 @@ impl RbLazyFrame {
153
154
  .with_skip_rows_after_header(skip_rows_after_header)
154
155
  .with_encoding(encoding.0)
155
156
  .with_row_count(row_count)
156
- .with_parse_dates(parse_dates)
157
+ .with_try_parse_dates(try_parse_dates)
157
158
  .with_null_values(null_values);
158
159
 
159
160
  if let Some(_lambda) = with_schema_modify {
@@ -163,6 +164,7 @@ impl RbLazyFrame {
163
164
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
164
165
  }
165
166
 
167
+ #[allow(clippy::too_many_arguments)]
166
168
  pub fn new_from_parquet(
167
169
  path: String,
168
170
  n_rows: Option<usize>,
@@ -171,6 +173,7 @@ impl RbLazyFrame {
171
173
  rechunk: bool,
172
174
  row_count: Option<(String, IdxSize)>,
173
175
  low_memory: bool,
176
+ use_statistics: bool,
174
177
  ) -> RbResult<Self> {
175
178
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
176
179
  let args = ScanArgsParquet {
@@ -182,6 +185,7 @@ impl RbLazyFrame {
182
185
  low_memory,
183
186
  // TODO support cloud options
184
187
  cloud_options: None,
188
+ use_statistics,
185
189
  };
186
190
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
187
191
  Ok(lf.into())
@@ -284,6 +288,32 @@ impl RbLazyFrame {
284
288
  Ok(df.into())
285
289
  }
286
290
 
291
+ #[allow(clippy::too_many_arguments)]
292
+ pub fn sink_parquet(
293
+ &self,
294
+ path: PathBuf,
295
+ compression: String,
296
+ compression_level: Option<i32>,
297
+ statistics: bool,
298
+ row_group_size: Option<usize>,
299
+ data_pagesize_limit: Option<usize>,
300
+ maintain_order: bool,
301
+ ) -> RbResult<()> {
302
+ let compression = parse_parquet_compression(&compression, compression_level)?;
303
+
304
+ let options = ParquetWriteOptions {
305
+ compression,
306
+ statistics,
307
+ row_group_size,
308
+ data_pagesize_limit,
309
+ maintain_order,
310
+ };
311
+
312
+ let ldf = self.ldf.clone();
313
+ ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
314
+ Ok(())
315
+ }
316
+
287
317
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
288
318
  let ldf = self.ldf.clone();
289
319
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -328,7 +358,7 @@ impl RbLazyFrame {
328
358
  let lazy_gb = ldf.groupby_rolling(
329
359
  by,
330
360
  RollingGroupOptions {
331
- index_column,
361
+ index_column: index_column.into(),
332
362
  period: Duration::parse(&period),
333
363
  offset: Duration::parse(&offset),
334
364
  closed_window,
@@ -359,7 +389,7 @@ impl RbLazyFrame {
359
389
  let lazy_gb = ldf.groupby_dynamic(
360
390
  by,
361
391
  DynamicGroupOptions {
362
- index_column,
392
+ index_column: index_column.into(),
363
393
  every: Duration::parse(&every),
364
394
  period: Duration::parse(&period),
365
395
  offset: Duration::parse(&offset),
@@ -415,10 +445,10 @@ impl RbLazyFrame {
415
445
  .force_parallel(force_parallel)
416
446
  .how(JoinType::AsOf(AsOfOptions {
417
447
  strategy: strategy.0,
418
- left_by,
419
- right_by,
448
+ left_by: left_by.map(strings_to_smartstrings),
449
+ right_by: right_by.map(strings_to_smartstrings),
420
450
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
421
- tolerance_str,
451
+ tolerance_str: tolerance_str.map(|s| s.into()),
422
452
  }))
423
453
  .suffix(suffix)
424
454
  .finish()
@@ -570,12 +600,14 @@ impl RbLazyFrame {
570
600
  value_vars: Vec<String>,
571
601
  value_name: Option<String>,
572
602
  variable_name: Option<String>,
603
+ streamable: bool,
573
604
  ) -> Self {
574
605
  let args = MeltArgs {
575
- id_vars,
576
- value_vars,
577
- value_name,
578
- variable_name,
606
+ id_vars: strings_to_smartstrings(id_vars),
607
+ value_vars: strings_to_smartstrings(value_vars),
608
+ value_name: value_name.map(|s| s.into()),
609
+ variable_name: variable_name.map(|s| s.into()),
610
+ streamable,
579
611
  };
580
612
 
581
613
  let ldf = self.ldf.clone();
@@ -596,8 +628,10 @@ impl RbLazyFrame {
596
628
  self.ldf.clone().into()
597
629
  }
598
630
 
599
- pub fn columns(&self) -> RbResult<Vec<String>> {
600
- Ok(self.get_schema()?.iter_names().cloned().collect())
631
+ pub fn columns(&self) -> RbResult<RArray> {
632
+ let schema = self.get_schema()?;
633
+ let iter = schema.iter_names().map(|s| s.as_str());
634
+ Ok(RArray::from_iter(iter))
601
635
  }
602
636
 
603
637
  pub fn dtypes(&self) -> RbResult<RArray> {
@@ -614,7 +648,7 @@ impl RbLazyFrame {
614
648
  // TODO remove unwrap
615
649
  schema_dict
616
650
  .aset::<String, Value>(
617
- fld.name().clone(),
651
+ fld.name().to_string(),
618
652
  Wrap(fld.data_type().clone()).into_value(),
619
653
  )
620
654
  .unwrap();
@@ -10,7 +10,7 @@ use crate::conversion::*;
10
10
  use crate::lazy::apply::*;
11
11
  use crate::lazy::utils::rb_exprs_to_exprs;
12
12
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
13
+ use crate::{RbPolarsErr, RbResult, RbSeries};
14
14
 
15
15
  #[magnus::wrap(class = "Polars::RbExpr")]
16
16
  #[derive(Clone)]
@@ -715,6 +715,18 @@ impl RbExpr {
715
715
  .into()
716
716
  }
717
717
 
718
+ pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
719
+ self.inner.clone().binary().contains_literal(lit).into()
720
+ }
721
+
722
+ pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
723
+ self.inner.clone().binary().ends_with(sub).into()
724
+ }
725
+
726
+ pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
727
+ self.inner.clone().binary().starts_with(sub).into()
728
+ }
729
+
718
730
  pub fn str_hex_encode(&self) -> Self {
719
731
  self.clone()
720
732
  .inner
@@ -763,6 +775,58 @@ impl RbExpr {
763
775
  .into()
764
776
  }
765
777
 
778
+ pub fn binary_hex_encode(&self) -> Self {
779
+ self.clone()
780
+ .inner
781
+ .map(
782
+ move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
783
+ GetOutput::same_type(),
784
+ )
785
+ .with_fmt("binary.hex_encode")
786
+ .into()
787
+ }
788
+
789
+ pub fn binary_hex_decode(&self, strict: bool) -> Self {
790
+ self.clone()
791
+ .inner
792
+ .map(
793
+ move |s| {
794
+ s.binary()?
795
+ .hex_decode(strict)
796
+ .map(|s| Some(s.into_series()))
797
+ },
798
+ GetOutput::same_type(),
799
+ )
800
+ .with_fmt("binary.hex_decode")
801
+ .into()
802
+ }
803
+
804
+ pub fn binary_base64_encode(&self) -> Self {
805
+ self.clone()
806
+ .inner
807
+ .map(
808
+ move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
809
+ GetOutput::same_type(),
810
+ )
811
+ .with_fmt("binary.base64_encode")
812
+ .into()
813
+ }
814
+
815
+ pub fn binary_base64_decode(&self, strict: bool) -> Self {
816
+ self.clone()
817
+ .inner
818
+ .map(
819
+ move |s| {
820
+ s.binary()?
821
+ .base64_decode(strict)
822
+ .map(|s| Some(s.into_series()))
823
+ },
824
+ GetOutput::same_type(),
825
+ )
826
+ .with_fmt("binary.base64_decode")
827
+ .into()
828
+ }
829
+
766
830
  pub fn str_json_path_match(&self, pat: String) -> Self {
767
831
  let function = move |s: Series| {
768
832
  let ca = s.utf8()?;
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
1718
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
1719
  }
1656
1720
 
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1721
+ pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
1722
  let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1723
+ Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
1660
1724
  }
1661
1725
 
1662
1726
  #[magnus::wrap(class = "Polars::RbWhen")]
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1706
1770
 
1707
1771
  pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
1772
  let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1773
+ let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
1774
+ Ok(expr.into())
1710
1775
  }
@@ -6,6 +6,7 @@ mod error;
6
6
  mod file;
7
7
  mod lazy;
8
8
  mod list_construction;
9
+ mod numo;
9
10
  mod object;
10
11
  mod prelude;
11
12
  pub(crate) mod rb_modules;
@@ -72,7 +73,7 @@ fn init() -> RbResult<()> {
72
73
  let class = module.define_class("RbDataFrame", Default::default())?;
73
74
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
74
75
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
75
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
76
77
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
77
78
  class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
78
79
  class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
87
88
  class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
88
89
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
89
90
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
91
+ class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
90
92
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
91
93
  class.define_method("add", method!(RbDataFrame::add, 1))?;
92
94
  class.define_method("sub", method!(RbDataFrame::sub, 1))?;
@@ -149,7 +151,6 @@ fn init() -> RbResult<()> {
149
151
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
150
152
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
151
153
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
152
- class.define_method("unique", method!(RbDataFrame::unique, 3))?;
153
154
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
154
155
  class.define_method("max", method!(RbDataFrame::max, 0))?;
155
156
  class.define_method("min", method!(RbDataFrame::min, 0))?;
@@ -302,10 +303,23 @@ fn init() -> RbResult<()> {
302
303
  class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
303
304
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
304
305
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
306
+ class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
307
+ class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
308
+ class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
305
309
  class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
306
310
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
307
311
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
308
312
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
313
+ class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
314
+ class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
315
+ class.define_method(
316
+ "binary_base64_encode",
317
+ method!(RbExpr::binary_base64_encode, 0),
318
+ )?;
319
+ class.define_method(
320
+ "binary_base64_decode",
321
+ method!(RbExpr::binary_base64_decode, 1),
322
+ )?;
309
323
  class.define_method(
310
324
  "str_json_path_match",
311
325
  method!(RbExpr::str_json_path_match, 1),
@@ -471,7 +485,7 @@ fn init() -> RbResult<()> {
471
485
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
472
486
  )?;
473
487
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
474
- class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
488
+ class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
475
489
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
476
490
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
477
491
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -485,7 +499,7 @@ fn init() -> RbResult<()> {
485
499
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
486
500
  class.define_singleton_method(
487
501
  "new_from_parquet",
488
- function!(RbLazyFrame::new_from_parquet, 7),
502
+ function!(RbLazyFrame::new_from_parquet, 8),
489
503
  )?;
490
504
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
491
505
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -502,6 +516,7 @@ fn init() -> RbResult<()> {
502
516
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
503
517
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
504
518
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
519
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
505
520
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
506
521
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
507
522
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -530,7 +545,7 @@ fn init() -> RbResult<()> {
530
545
  class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
531
546
  class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
532
547
  class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
533
- class.define_method("melt", method!(RbLazyFrame::melt, 4))?;
548
+ class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
534
549
  class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
535
550
  class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
536
551
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
@@ -558,6 +573,7 @@ fn init() -> RbResult<()> {
558
573
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
559
574
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
560
575
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
576
+ class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
561
577
  class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
562
578
  class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
563
579
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
@@ -783,6 +799,9 @@ fn init() -> RbResult<()> {
783
799
  class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
784
800
  // class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
785
801
 
802
+ // npy
803
+ class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
804
+
786
805
  let class = module.define_class("RbWhen", Default::default())?;
787
806
  class.define_method("_then", method!(RbWhen::then, 1))?;
788
807
 
@@ -0,0 +1,57 @@
1
+ use magnus::{class, Module, RArray, RClass, RModule, Value};
2
+ use polars_core::prelude::*;
3
+
4
+ use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
5
+
6
+ impl RbSeries {
7
+ /// For numeric types, this should only be called for Series with null types.
8
+ /// This will cast to floats so that `nil = NAN`
9
+ pub fn to_numo(&self) -> RbResult<Value> {
10
+ let s = &self.series.borrow();
11
+ match s.dtype() {
12
+ DataType::Utf8 => {
13
+ let ca = s.utf8().unwrap();
14
+
15
+ // TODO make more efficient
16
+ let np_arr = RArray::from_iter(ca.into_iter());
17
+ class::object()
18
+ .const_get::<_, RModule>("Numo")?
19
+ .const_get::<_, RClass>("RObject")?
20
+ .funcall("cast", (np_arr,))
21
+ }
22
+ dt if dt.is_numeric() => {
23
+ if s.bit_repr_is_large() {
24
+ let s = s.cast(&DataType::Float64).unwrap();
25
+ let ca = s.f64().unwrap();
26
+ // TODO make more efficient
27
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
28
+ Some(v) => v,
29
+ None => f64::NAN,
30
+ }));
31
+ class::object()
32
+ .const_get::<_, RModule>("Numo")?
33
+ .const_get::<_, RClass>("DFloat")?
34
+ .funcall("cast", (np_arr,))
35
+ } else {
36
+ let s = s.cast(&DataType::Float32).unwrap();
37
+ let ca = s.f32().unwrap();
38
+ // TODO make more efficient
39
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
40
+ Some(v) => v,
41
+ None => f32::NAN,
42
+ }));
43
+ class::object()
44
+ .const_get::<_, RModule>("Numo")?
45
+ .const_get::<_, RClass>("SFloat")?
46
+ .funcall("cast", (np_arr,))
47
+ }
48
+ }
49
+ dt => {
50
+ raise_err!(
51
+ format!("'to_numo' not supported for dtype: {dt:?}"),
52
+ ComputeError
53
+ );
54
+ }
55
+ }
56
+ }
57
+ }
@@ -125,6 +125,12 @@ impl RbSeries {
125
125
  RbSeries::new(s)
126
126
  }
127
127
 
128
+ pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
129
+ let mut s = val.0.into_series();
130
+ s.rename(&name);
131
+ RbSeries::new(s)
132
+ }
133
+
128
134
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
129
135
  let val = val
130
136
  .each()
@@ -489,40 +495,58 @@ impl RbSeries {
489
495
  }
490
496
 
491
497
  pub fn to_a(&self) -> RArray {
492
- let series = self.series.borrow();
493
- if let Ok(s) = series.f32() {
494
- s.into_iter().collect()
495
- } else if let Ok(s) = series.f64() {
496
- s.into_iter().collect()
497
- } else if let Ok(s) = series.i8() {
498
- s.into_iter().collect()
499
- } else if let Ok(s) = series.i16() {
500
- s.into_iter().collect()
501
- } else if let Ok(s) = series.i32() {
502
- s.into_iter().collect()
503
- } else if let Ok(s) = series.i64() {
504
- s.into_iter().collect()
505
- } else if let Ok(s) = series.u8() {
506
- s.into_iter().collect()
507
- } else if let Ok(s) = series.u16() {
508
- s.into_iter().collect()
509
- } else if let Ok(s) = series.u32() {
510
- s.into_iter().collect()
511
- } else if let Ok(s) = series.u64() {
512
- s.into_iter().collect()
513
- } else if let Ok(s) = series.bool() {
514
- s.into_iter().collect()
515
- } else if let Ok(s) = series.utf8() {
516
- s.into_iter().collect()
517
- } else if let Ok(_s) = series.date() {
518
- let a = RArray::with_capacity(series.len());
519
- for v in series.iter() {
520
- a.push::<Value>(Wrap(v).into_value()).unwrap();
521
- }
522
- a
523
- } else {
524
- unimplemented!();
498
+ let series = &self.series.borrow();
499
+
500
+ fn to_list_recursive(series: &Series) -> RArray {
501
+ let rblist = match series.dtype() {
502
+ DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
503
+ DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
504
+ DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
505
+ DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
506
+ DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
507
+ DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
508
+ DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
509
+ DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
510
+ DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
511
+ DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
512
+ DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
513
+ DataType::Categorical(_) => {
514
+ RArray::from_iter(series.categorical().unwrap().iter_str())
515
+ }
516
+ DataType::Date => {
517
+ let a = RArray::with_capacity(series.len());
518
+ for v in series.iter() {
519
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
520
+ }
521
+ return a;
522
+ }
523
+ DataType::Datetime(_, _) => {
524
+ let a = RArray::with_capacity(series.len());
525
+ for v in series.iter() {
526
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
527
+ }
528
+ return a;
529
+ }
530
+ DataType::Utf8 => {
531
+ let ca = series.utf8().unwrap();
532
+ return RArray::from_iter(ca);
533
+ }
534
+ DataType::Binary => {
535
+ let a = RArray::with_capacity(series.len());
536
+ for v in series.iter() {
537
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
538
+ }
539
+ return a;
540
+ }
541
+ DataType::Null | DataType::Unknown => {
542
+ panic!("to_a not implemented for null/unknown")
543
+ }
544
+ _ => todo!(),
545
+ };
546
+ rblist
525
547
  }
548
+
549
+ to_list_recursive(series)
526
550
  }
527
551
 
528
552
  pub fn median(&self) -> Option<f64> {
@@ -30,7 +30,7 @@ module Polars
30
30
  new_columns: nil
31
31
  )
32
32
  if Utils.pathlike?(file)
33
- path = Utils.format_path(file)
33
+ path = Utils.normalise_filepath(file)
34
34
  end
35
35
 
36
36
  dtype_list = nil
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Namespace for binary related expressions.
3
+ class BinaryExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Check if binaries in Series contain a binary substring.
13
+ #
14
+ # @param lit [String]
15
+ # The binary substring to look for
16
+ #
17
+ # @return [Expr]
18
+ def contains(lit)
19
+ Utils.wrap_expr(_rbexpr.binary_contains(lit))
20
+ end
21
+
22
+ # Check if string values end with a binary substring.
23
+ #
24
+ # @param sub [String]
25
+ # Suffix substring.
26
+ #
27
+ # @return [Expr]
28
+ def ends_with(sub)
29
+ Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
30
+ end
31
+
32
+ # Check if values start with a binary substring.
33
+ #
34
+ # @param sub [String]
35
+ # Prefix substring.
36
+ #
37
+ # @return [Expr]
38
+ def starts_with(sub)
39
+ Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
40
+ end
41
+
42
+ # Decode a value using the provided encoding.
43
+ #
44
+ # @param encoding ["hex", "base64"]
45
+ # The encoding to use.
46
+ # @param strict [Boolean]
47
+ # Raise an error if the underlying value cannot be decoded,
48
+ # otherwise mask out with a null value.
49
+ #
50
+ # @return [Expr]
51
+ def decode(encoding, strict: true)
52
+ if encoding == "hex"
53
+ Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
54
+ elsif encoding == "base64"
55
+ Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
56
+ else
57
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
58
+ end
59
+ end
60
+
61
+ # Encode a value using the provided encoding.
62
+ #
63
+ # @param encoding ["hex", "base64"]
64
+ # The encoding to use.
65
+ #
66
+ # @return [Expr]
67
+ def encode(encoding)
68
+ if encoding == "hex"
69
+ Utils.wrap_expr(_rbexpr.binary_hex_encode)
70
+ elsif encoding == "base64"
71
+ Utils.wrap_expr(_rbexpr.binary_base64_encode)
72
+ else
73
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,66 @@
1
+ module Polars
2
+ # Series.bin namespace.
3
+ class BinaryNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "bin"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Check if binaries in Series contain a binary substring.
14
+ #
15
+ # @param lit [String]
16
+ # The binary substring to look for
17
+ #
18
+ # @return [Series]
19
+ def contains(lit)
20
+ super
21
+ end
22
+
23
+ # Check if string values end with a binary substring.
24
+ #
25
+ # @param sub [String]
26
+ # Suffix substring.
27
+ #
28
+ # @return [Series]
29
+ def ends_with(sub)
30
+ super
31
+ end
32
+
33
+ # Check if values start with a binary substring.
34
+ #
35
+ # @param sub [String]
36
+ # Prefix substring.
37
+ #
38
+ # @return [Series]
39
+ def starts_with(sub)
40
+ super
41
+ end
42
+
43
+ # Decode a value using the provided encoding.
44
+ #
45
+ # @param encoding ["hex", "base64"]
46
+ # The encoding to use.
47
+ # @param strict [Boolean]
48
+ # Raise an error if the underlying value cannot be decoded,
49
+ # otherwise mask out with a null value.
50
+ #
51
+ # @return [Series]
52
+ def decode(encoding, strict: true)
53
+ super
54
+ end
55
+
56
+ # Encode a value using the provided encoding.
57
+ #
58
+ # @param encoding ["hex", "base64"]
59
+ # The encoding to use.
60
+ #
61
+ # @return [Series]
62
+ def encode(encoding)
63
+ super
64
+ end
65
+ end
66
+ end