polars-df 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.12.0"
3
+ version = "0.13.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
- rust-version = "1.76.0"
7
+ rust-version = "1.80.0"
8
8
  publish = false
9
9
 
10
10
  [lib]
@@ -15,14 +15,14 @@ ahash = "0.8"
15
15
  chrono = "0.4"
16
16
  either = "1.8"
17
17
  magnus = "0.7"
18
- polars-core = "=0.41.3"
19
- polars-parquet = "=0.41.3"
20
- polars-utils = "=0.41.3"
18
+ polars-core = "=0.42.0"
19
+ polars-parquet = "=0.42.0"
20
+ polars-utils = "=0.42.0"
21
21
  serde_json = "1"
22
22
  smartstring = "1"
23
23
 
24
24
  [dependencies.polars]
25
- version = "=0.41.3"
25
+ version = "=0.42.0"
26
26
  features = [
27
27
  "abs",
28
28
  "approx_unique",
@@ -117,5 +117,5 @@ features = [
117
117
  [target.'cfg(target_os = "linux")'.dependencies]
118
118
  jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
119
119
 
120
- [target.'cfg(not(target_os = "linux"))'.dependencies]
120
+ [target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies]
121
121
  mimalloc = { version = "0.1", default-features = false }
@@ -0,0 +1,13 @@
1
+ #[cfg(target_os = "linux")]
2
+ use jemallocator::Jemalloc;
3
+
4
+ #[cfg(not(any(target_os = "linux", target_os = "windows")))]
5
+ use mimalloc::MiMalloc;
6
+
7
+ #[global_allocator]
8
+ #[cfg(target_os = "linux")]
9
+ static ALLOC: Jemalloc = Jemalloc;
10
+
11
+ #[global_allocator]
12
+ #[cfg(not(any(target_os = "linux", target_os = "windows")))]
13
+ static ALLOC: MiMalloc = MiMalloc;
@@ -56,17 +56,15 @@ impl IntoValue for Wrap<&BinaryChunked> {
56
56
  }
57
57
 
58
58
  impl IntoValue for Wrap<&StructChunked> {
59
- fn into_value_with(self, _: &Ruby) -> Value {
59
+ fn into_value_with(self, ruby: &Ruby) -> Value {
60
60
  let s = self.0.clone().into_series();
61
61
  // todo! iterate its chunks and flatten.
62
62
  // make series::iter() accept a chunk index.
63
63
  let s = s.rechunk();
64
- let iter = s.iter().map(|av| {
65
- if let AnyValue::Struct(_, _, flds) = av {
66
- struct_dict(av._iter_struct_av(), flds)
67
- } else {
68
- unreachable!()
69
- }
64
+ let iter = s.iter().map(|av| match av {
65
+ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
66
+ AnyValue::Null => ruby.qnil().as_value(),
67
+ _ => unreachable!(),
70
68
  });
71
69
 
72
70
  RArray::from_iter(iter).into_value()
@@ -26,12 +26,6 @@ use crate::object::OBJECT_NAME;
26
26
  use crate::rb_modules::series;
27
27
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
28
28
 
29
- pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
30
- // Safety:
31
- // Wrap is transparent.
32
- unsafe { std::mem::transmute(slice) }
33
- }
34
-
35
29
  pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
36
30
  // Safety:
37
31
  // Wrap is transparent.
@@ -348,12 +348,11 @@ impl RbDataFrame {
348
348
  value_name: Option<String>,
349
349
  variable_name: Option<String>,
350
350
  ) -> RbResult<Self> {
351
- let args = UnpivotArgs {
351
+ let args = UnpivotArgsIR {
352
352
  on: strings_to_smartstrings(on),
353
353
  index: strings_to_smartstrings(index),
354
354
  value_name: value_name.map(|s| s.into()),
355
355
  variable_name: variable_name.map(|s| s.into()),
356
- streamable: false,
357
356
  };
358
357
 
359
358
  let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
@@ -145,7 +145,7 @@ impl RbDataFrame {
145
145
  .with_projection(projection)
146
146
  .with_columns(columns)
147
147
  .read_parallel(parallel.0)
148
- .with_n_rows(n_rows)
148
+ .with_slice(n_rows.map(|x| (0, x)))
149
149
  .with_row_index(row_index)
150
150
  .set_low_memory(low_memory)
151
151
  .use_statistics(use_statistics)
@@ -156,7 +156,7 @@ impl RbDataFrame {
156
156
  .with_projection(projection)
157
157
  .with_columns(columns)
158
158
  .read_parallel(parallel.0)
159
- .with_n_rows(n_rows)
159
+ .with_slice(n_rows.map(|x| (0, x)))
160
160
  .with_row_index(row_index)
161
161
  .use_statistics(use_statistics)
162
162
  .set_rechunk(rechunk)
@@ -7,15 +7,11 @@ impl RbExpr {
7
7
  self.inner == other.inner
8
8
  }
9
9
 
10
- pub fn meta_pop(&self) -> RArray {
11
- RArray::from_iter(
12
- self.inner
13
- .clone()
14
- .meta()
15
- .pop()
16
- .into_iter()
17
- .map(RbExpr::from),
18
- )
10
+ pub fn meta_pop(&self) -> RbResult<RArray> {
11
+ let exprs = self.inner.clone().meta().pop().map_err(RbPolarsErr::from)?;
12
+ Ok(RArray::from_iter(
13
+ exprs.iter().map(|e| RbExpr::from(e.clone())),
14
+ ))
19
15
  }
20
16
 
21
17
  pub fn meta_root_names(&self) -> Vec<String> {
@@ -64,6 +64,6 @@ pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
64
64
  for item in seq.into_iter() {
65
65
  dfs.push(get_df(item)?);
66
66
  }
67
- let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
67
+ let df = functions::concat_df_horizontal(&dfs, true).map_err(RbPolarsErr::from)?;
68
68
  Ok(df.into())
69
69
  }
@@ -162,6 +162,7 @@ impl RbLazyFrame {
162
162
  hive_schema: Option<Wrap<Schema>>,
163
163
  try_parse_hive_dates: bool,
164
164
  glob: bool,
165
+ include_file_paths: Option<String>,
165
166
  ) -> RbResult<Self> {
166
167
  let parallel = parallel.0;
167
168
  let hive_schema = hive_schema.map(|s| Arc::new(s.0));
@@ -196,6 +197,7 @@ impl RbLazyFrame {
196
197
  use_statistics,
197
198
  hive_options,
198
199
  glob,
200
+ include_file_paths: include_file_paths.map(Arc::from),
199
201
  };
200
202
 
201
203
  let lf = if path.is_some() {
@@ -207,6 +209,7 @@ impl RbLazyFrame {
207
209
  Ok(lf.into())
208
210
  }
209
211
 
212
+ #[allow(clippy::too_many_arguments)]
210
213
  pub fn new_from_ipc(
211
214
  path: String,
212
215
  n_rows: Option<usize>,
@@ -214,12 +217,23 @@ impl RbLazyFrame {
214
217
  rechunk: bool,
215
218
  row_index: Option<(String, IdxSize)>,
216
219
  memory_map: bool,
220
+ hive_partitioning: Option<bool>,
221
+ hive_schema: Option<Wrap<Schema>>,
222
+ try_parse_hive_dates: bool,
223
+ include_file_paths: Option<String>,
217
224
  ) -> RbResult<Self> {
218
225
  let row_index = row_index.map(|(name, offset)| RowIndex {
219
226
  name: Arc::from(name.as_str()),
220
227
  offset,
221
228
  });
222
229
 
230
+ let hive_options = HiveOptions {
231
+ enabled: hive_partitioning,
232
+ hive_start_idx: 0,
233
+ schema: hive_schema.map(|x| Arc::new(x.0)),
234
+ try_parse_dates: try_parse_hive_dates,
235
+ };
236
+
223
237
  let args = ScanArgsIpc {
224
238
  n_rows,
225
239
  cache,
@@ -227,6 +241,8 @@ impl RbLazyFrame {
227
241
  row_index,
228
242
  memory_map,
229
243
  cloud_options: None,
244
+ hive_options,
245
+ include_file_paths: include_file_paths.map(Arc::from),
230
246
  };
231
247
  let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
232
248
  Ok(lf.into())
@@ -348,7 +364,7 @@ impl RbLazyFrame {
348
364
  compression_level: Option<i32>,
349
365
  statistics: Wrap<StatisticsOptions>,
350
366
  row_group_size: Option<usize>,
351
- data_pagesize_limit: Option<usize>,
367
+ data_page_size: Option<usize>,
352
368
  maintain_order: bool,
353
369
  ) -> RbResult<()> {
354
370
  let compression = parse_parquet_compression(&compression, compression_level)?;
@@ -357,7 +373,7 @@ impl RbLazyFrame {
357
373
  compression,
358
374
  statistics: statistics.0,
359
375
  row_group_size,
360
- data_pagesize_limit,
376
+ data_page_size,
361
377
  maintain_order,
362
378
  };
363
379
 
@@ -752,22 +768,22 @@ impl RbLazyFrame {
752
768
 
753
769
  pub fn unpivot(
754
770
  &self,
755
- on: Vec<String>,
756
- index: Vec<String>,
771
+ on: RArray,
772
+ index: RArray,
757
773
  value_name: Option<String>,
758
774
  variable_name: Option<String>,
759
- streamable: bool,
760
- ) -> Self {
761
- let args = UnpivotArgs {
762
- on: strings_to_smartstrings(on),
763
- index: strings_to_smartstrings(index),
775
+ ) -> RbResult<Self> {
776
+ let on = rb_exprs_to_exprs(on)?;
777
+ let index = rb_exprs_to_exprs(index)?;
778
+ let args = UnpivotArgsDSL {
779
+ on: on.into_iter().map(|e| e.into()).collect(),
780
+ index: index.into_iter().map(|e| e.into()).collect(),
764
781
  value_name: value_name.map(|s| s.into()),
765
782
  variable_name: variable_name.map(|s| s.into()),
766
- streamable,
767
783
  };
768
784
 
769
785
  let ldf = self.ldf.borrow().clone();
770
- ldf.unpivot(args).into()
786
+ Ok(ldf.unpivot(args).into())
771
787
  }
772
788
 
773
789
  pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
@@ -1,3 +1,4 @@
1
+ mod allocator;
1
2
  mod batched_csv;
2
3
  mod conversion;
3
4
  mod dataframe;
@@ -31,20 +32,6 @@ use magnus::{define_module, function, method, prelude::*, Error, Ruby};
31
32
  use series::RbSeries;
32
33
  use sql::RbSQLContext;
33
34
 
34
- #[cfg(target_os = "linux")]
35
- use jemallocator::Jemalloc;
36
-
37
- #[cfg(not(target_os = "linux"))]
38
- use mimalloc::MiMalloc;
39
-
40
- #[global_allocator]
41
- #[cfg(target_os = "linux")]
42
- static GLOBAL: Jemalloc = Jemalloc;
43
-
44
- #[global_allocator]
45
- #[cfg(not(target_os = "linux"))]
46
- static GLOBAL: MiMalloc = MiMalloc;
47
-
48
35
  type RbResult<T> = Result<T, Error>;
49
36
 
50
37
  #[magnus::init]
@@ -722,9 +709,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
722
709
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
723
710
  class.define_singleton_method(
724
711
  "new_from_parquet",
725
- function!(RbLazyFrame::new_from_parquet, 13),
712
+ function!(RbLazyFrame::new_from_parquet, 14),
726
713
  )?;
727
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
714
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
728
715
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
729
716
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
730
717
  class.define_method(
@@ -780,7 +767,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
780
767
  class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
781
768
  class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
782
769
  class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
783
- class.define_method("unpivot", method!(RbLazyFrame::unpivot, 5))?;
770
+ class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
784
771
  class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
785
772
  class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
786
773
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
@@ -878,7 +865,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
878
865
  class.define_method("arg_max", method!(RbSeries::arg_max, 0))?;
879
866
  class.define_method("take_with_series", method!(RbSeries::take_with_series, 1))?;
880
867
  class.define_method("null_count", method!(RbSeries::null_count, 0))?;
881
- class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
868
+ class.define_method("has_nulls", method!(RbSeries::has_nulls, 0))?;
882
869
  class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
883
870
  class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
884
871
  class.define_method("equals", method!(RbSeries::equals, 4))?;
@@ -93,7 +93,7 @@ fn iterator_to_struct(
93
93
  .collect::<Vec<_>>()
94
94
  });
95
95
 
96
- Ok(StructChunked::new(name, &fields)
96
+ Ok(StructChunked::from_series(name, &fields)
97
97
  .unwrap()
98
98
  .into_series()
99
99
  .into())