polars-df 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Cargo.toml CHANGED
@@ -4,7 +4,6 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
7
 
9
8
  [profile.release]
10
9
  strip = true
data/README.md CHANGED
@@ -50,6 +50,9 @@ From Parquet
50
50
 
51
51
  ```ruby
52
52
  Polars.read_parquet("file.parquet")
53
+
54
+ # or lazily with
55
+ Polars.scan_parquet("file.parquet")
53
56
  ```
54
57
 
55
58
  From Active Record
@@ -60,6 +63,32 @@ Polars.read_sql(User.all)
60
63
  Polars.read_sql("SELECT * FROM users")
61
64
  ```
62
65
 
66
+ From JSON
67
+
68
+ ```ruby
69
+ Polars.read_json("file.json")
70
+ # or
71
+ Polars.read_ndjson("file.ndjson")
72
+
73
+ # or lazily with
74
+ Polars.scan_ndjson("file.ndjson")
75
+ ```
76
+
77
+ From Feather / Arrow IPC
78
+
79
+ ```ruby
80
+ Polars.read_ipc("file.arrow")
81
+
82
+ # or lazily with
83
+ Polars.scan_ipc("file.arrow")
84
+ ```
85
+
86
+ From Avro
87
+
88
+ ```ruby
89
+ Polars.read_avro("file.avro")
90
+ ```
91
+
63
92
  From a hash
64
93
 
65
94
  ```ruby
@@ -282,10 +311,10 @@ df.to_dummies
282
311
 
283
312
  ## Conversion
284
313
 
285
- Array of rows
314
+ Array of hashes
286
315
 
287
316
  ```ruby
288
- df.rows
317
+ df.rows(named: true)
289
318
  ```
290
319
 
291
320
  Hash of series
@@ -308,6 +337,12 @@ Parquet
308
337
  df.write_parquet("file.parquet")
309
338
  ```
310
339
 
340
+ Numo array
341
+
342
+ ```ruby
343
+ df.to_numo
344
+ ```
345
+
311
346
  ## Types
312
347
 
313
348
  You can specify column types when creating a data frame
@@ -343,6 +378,38 @@ Cast a column
343
378
  df["a"].cast(Polars::Int32)
344
379
  ```
345
380
 
381
+ ## Visualization
382
+
383
+ Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
384
+
385
+ ```ruby
386
+ gem "vega"
387
+ ```
388
+
389
+ And use:
390
+
391
+ ```ruby
392
+ df.plot("a", "b")
393
+ ```
394
+
395
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
396
+
397
+ ```ruby
398
+ df.plot("a", "b", type: "pie")
399
+ ```
400
+
401
+ Group data
402
+
403
+ ```ruby
404
+ df.groupby("c").plot("a", "b")
405
+ ```
406
+
407
+ Stacked columns or bars
408
+
409
+ ```ruby
410
+ df.groupby("c").plot("a", "b", stacked: true)
411
+ ```
412
+
346
413
  ## History
347
414
 
348
415
  View the [changelog](CHANGELOG.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,11 +12,12 @@ crate-type = ["cdylib"]
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
14
  magnus = "0.5"
15
- polars-core = "0.27.0"
15
+ polars-core = "0.28.0"
16
16
  serde_json = "1"
17
+ smartstring = "1"
17
18
 
18
19
  [dependencies.polars]
19
- version = "0.27.0"
20
+ version = "0.28.0"
20
21
  features = [
21
22
  "abs",
22
23
  "arange",
@@ -44,6 +45,7 @@ features = [
44
45
  "ipc",
45
46
  "is_first",
46
47
  "is_in",
48
+ "is_unique",
47
49
  "json",
48
50
  "lazy",
49
51
  "lazy_regex",
@@ -7,11 +7,17 @@ use std::cell::RefCell;
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
10
11
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
11
12
 
13
+ pub enum BatchedReader {
14
+ MMap(OwnedBatchedCsvReaderMmap),
15
+ Read(OwnedBatchedCsvReader),
16
+ }
17
+
12
18
  #[magnus::wrap(class = "Polars::RbBatchedCsv")]
13
19
  pub struct RbBatchedCsv {
14
- pub reader: RefCell<OwnedBatchedCsvReader>,
20
+ pub reader: RefCell<BatchedReader>,
15
21
  }
16
22
 
17
23
  impl RbBatchedCsv {
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
38
44
  let comment_char: Option<String> = arguments[16].try_convert()?;
39
45
  let quote_char: Option<String> = arguments[17].try_convert()?;
40
46
  let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
41
- let parse_dates: bool = arguments[19].try_convert()?;
47
+ let try_parse_dates: bool = arguments[19].try_convert()?;
42
48
  let skip_rows_after_header: usize = arguments[20].try_convert()?;
43
49
  let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
44
50
  let sample_size: usize = arguments[22].try_convert()?;
@@ -95,14 +101,24 @@ impl RbBatchedCsv {
95
101
  .low_memory(low_memory)
96
102
  .with_comment_char(comment_char)
97
103
  .with_null_values(null_values)
98
- .with_parse_dates(parse_dates)
104
+ .with_try_parse_dates(try_parse_dates)
99
105
  .with_quote_char(quote_char)
100
106
  .with_end_of_line_char(eol_char)
101
107
  .with_skip_rows_after_header(skip_rows_after_header)
102
108
  .with_row_count(row_count)
103
- .sample_size(sample_size)
104
- .batched(overwrite_dtype.map(Arc::new))
105
- .map_err(RbPolarsErr::from)?;
109
+ .sample_size(sample_size);
110
+
111
+ let reader = if low_memory {
112
+ let reader = reader
113
+ .batched_read(overwrite_dtype.map(Arc::new))
114
+ .map_err(RbPolarsErr::from)?;
115
+ BatchedReader::Read(reader)
116
+ } else {
117
+ let reader = reader
118
+ .batched_mmap(overwrite_dtype.map(Arc::new))
119
+ .map_err(RbPolarsErr::from)?;
120
+ BatchedReader::MMap(reader)
121
+ };
106
122
 
107
123
  Ok(RbBatchedCsv {
108
124
  reader: RefCell::new(reader),
@@ -110,13 +126,12 @@ impl RbBatchedCsv {
110
126
  }
111
127
 
112
128
  pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
- let batches = self
114
- .reader
115
- .borrow_mut()
116
- .next_batches(n)
117
- .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| {
119
- RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
- }))
129
+ let batches = match &mut *self.reader.borrow_mut() {
130
+ BatchedReader::MMap(reader) => reader.next_batches(n),
131
+ BatchedReader::Read(reader) => reader.next_batches(n),
132
+ }
133
+ .map_err(RbPolarsErr::from)?;
134
+
135
+ Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
121
136
  }
122
137
  }
@@ -1,3 +1,6 @@
1
+ use std::fmt::{Display, Formatter};
2
+ use std::hash::{Hash, Hasher};
3
+
1
4
  use magnus::{
2
5
  class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
6
  RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
@@ -10,8 +13,7 @@ use polars::frame::NullStrategy;
10
13
  use polars::io::avro::AvroCompression;
11
14
  use polars::prelude::*;
12
15
  use polars::series::ops::NullBehavior;
13
- use std::fmt::{Display, Formatter};
14
- use std::hash::{Hash, Hasher};
16
+ use smartstring::alias::String as SmartString;
15
17
 
16
18
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
17
19
 
@@ -82,6 +84,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
82
84
  }
83
85
  }
84
86
 
87
+ impl TryConvert for Wrap<BinaryChunked> {
88
+ fn try_convert(obj: Value) -> RbResult<Self> {
89
+ let (seq, len) = get_rbseq(obj)?;
90
+ let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
91
+
92
+ for res in seq.each() {
93
+ let item = res?;
94
+ match item.try_convert::<RString>() {
95
+ Ok(val) => builder.append_value(unsafe { val.as_slice() }),
96
+ Err(_) => builder.append_null(),
97
+ }
98
+ }
99
+ Ok(Wrap(builder.finish()))
100
+ }
101
+ }
102
+
85
103
  impl TryConvert for Wrap<NullValues> {
86
104
  fn try_convert(ob: Value) -> RbResult<Self> {
87
105
  if let Ok(s) = ob.try_convert::<String>() {
@@ -98,6 +116,14 @@ impl TryConvert for Wrap<NullValues> {
98
116
  }
99
117
  }
100
118
 
119
+ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
120
+ let dict = RHash::new();
121
+ for (fld, val) in flds.iter().zip(vals) {
122
+ dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
123
+ }
124
+ dict.into_value()
125
+ }
126
+
101
127
  impl IntoValue for Wrap<AnyValue<'_>> {
102
128
  fn into_value_with(self, _: &RubyHandle) -> Value {
103
129
  match self.0 {
@@ -114,7 +140,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
114
140
  AnyValue::Null => *QNIL,
115
141
  AnyValue::Boolean(v) => Value::from(v),
116
142
  AnyValue::Utf8(v) => Value::from(v),
117
- AnyValue::Utf8Owned(_v) => todo!(),
143
+ AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
118
144
  AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
119
145
  AnyValue::Date(v) => class::time()
120
146
  .funcall::<_, _, Value>("at", (v * 86400,))
@@ -125,7 +151,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
125
151
  .unwrap(),
126
152
  AnyValue::Datetime(v, tu, tz) => {
127
153
  let t = match tu {
128
- TimeUnit::Nanoseconds => todo!(),
154
+ TimeUnit::Nanoseconds => {
155
+ let sec = v / 1000000000;
156
+ let subsec = v % 1000000000;
157
+ class::time()
158
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
159
+ .unwrap()
160
+ }
129
161
  TimeUnit::Microseconds => {
130
162
  let sec = v / 1000000;
131
163
  let subsec = v % 1000000;
@@ -133,7 +165,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
133
165
  .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
134
166
  .unwrap()
135
167
  }
136
- TimeUnit::Milliseconds => todo!(),
168
+ TimeUnit::Milliseconds => {
169
+ let sec = v / 1000;
170
+ let subsec = v % 1000;
171
+ class::time()
172
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
173
+ .unwrap()
174
+ }
137
175
  };
138
176
 
139
177
  if tz.is_some() {
@@ -145,12 +183,19 @@ impl IntoValue for Wrap<AnyValue<'_>> {
145
183
  AnyValue::Duration(_v, _tu) => todo!(),
146
184
  AnyValue::Time(_v) => todo!(),
147
185
  AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
148
- ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
149
- AnyValue::StructOwned(_payload) => todo!(),
150
- AnyValue::Object(_v) => todo!(),
151
- AnyValue::ObjectOwned(_v) => todo!(),
152
- AnyValue::Binary(_v) => todo!(),
153
- AnyValue::BinaryOwned(_v) => todo!(),
186
+ ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
187
+ AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
188
+ AnyValue::Object(v) => {
189
+ let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
190
+ object.inner
191
+ }
192
+ AnyValue::ObjectOwned(v) => {
193
+ let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
194
+ object.inner
195
+ }
196
+ AnyValue::Binary(v) => RString::from_slice(v).into_value(),
197
+ AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
198
+ AnyValue::Decimal(_v, _scale) => todo!(),
154
199
  }
155
200
  }
156
201
  }
@@ -170,12 +215,12 @@ impl IntoValue for Wrap<DataType> {
170
215
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
171
216
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
172
217
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
173
- DataType::Decimal128(_) => todo!(),
218
+ DataType::Decimal(_precision, _scale) => todo!(),
174
219
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
175
220
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
176
221
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
177
222
  DataType::List(inner) => {
178
- let inner = Wrap(*inner.clone());
223
+ let inner = Wrap(*inner);
179
224
  let list_class = pl.const_get::<_, Value>("List").unwrap();
180
225
  list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
181
226
  }
@@ -183,7 +228,7 @@ impl IntoValue for Wrap<DataType> {
183
228
  DataType::Datetime(tu, tz) => {
184
229
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
185
230
  datetime_class
186
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
231
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
187
232
  .unwrap()
188
233
  }
189
234
  DataType::Duration(tu) => {
@@ -198,7 +243,7 @@ impl IntoValue for Wrap<DataType> {
198
243
  DataType::Struct(fields) => {
199
244
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
200
245
  let iter = fields.iter().map(|fld| {
201
- let name = fld.name().clone();
246
+ let name = fld.name().as_str();
202
247
  let dtype = Wrap(fld.data_type().clone());
203
248
  field_class
204
249
  .funcall::<_, _, Value>("new", (name, dtype))
@@ -328,7 +373,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
328
373
  let n = 25;
329
374
  let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
330
375
  .map_err(RbPolarsErr::from)?;
331
- let s = Series::from_any_values_and_dtype("", &avs, &dtype)
376
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
332
377
  .map_err(RbPolarsErr::from)?;
333
378
  Ok(Wrap(AnyValue::List(s)))
334
379
  }
@@ -858,3 +903,11 @@ pub fn parse_parquet_compression(
858
903
  };
859
904
  Ok(parsed)
860
905
  }
906
+
907
+ pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
908
+ where
909
+ I: IntoIterator<Item = S>,
910
+ S: AsRef<str>,
911
+ {
912
+ container.into_iter().map(|s| s.as_ref().into()).collect()
913
+ }
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
6
6
  use polars::io::RowCount;
7
7
  use polars::prelude::pivot::{pivot, pivot_stable};
8
8
  use polars::prelude::*;
9
+ use polars_core::utils::try_get_supertype;
9
10
  use std::cell::RefCell;
10
11
  use std::io::{BufWriter, Cursor};
11
12
  use std::ops::Deref;
@@ -114,7 +115,7 @@ impl RbDataFrame {
114
115
  let comment_char: Option<String> = arguments[17].try_convert()?;
115
116
  let quote_char: Option<String> = arguments[18].try_convert()?;
116
117
  let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
117
- let parse_dates: bool = arguments[20].try_convert()?;
118
+ let try_parse_dates: bool = arguments[20].try_convert()?;
118
119
  let skip_rows_after_header: usize = arguments[21].try_convert()?;
119
120
  let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
120
121
  let sample_size: usize = arguments[23].try_convert()?;
@@ -167,12 +168,12 @@ impl RbDataFrame {
167
168
  .with_columns(columns)
168
169
  .with_n_threads(n_threads)
169
170
  .with_path(path)
170
- .with_dtypes(overwrite_dtype.as_ref())
171
+ .with_dtypes(overwrite_dtype.map(Arc::new))
171
172
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
172
173
  .low_memory(low_memory)
173
174
  .with_comment_char(comment_char)
174
175
  .with_null_values(null_values)
175
- .with_parse_dates(parse_dates)
176
+ .with_try_parse_dates(try_parse_dates)
176
177
  .with_quote_char(quote_char)
177
178
  .with_end_of_line_char(eol_char)
178
179
  .with_skip_rows_after_header(skip_rows_after_header)
@@ -183,6 +184,7 @@ impl RbDataFrame {
183
184
  Ok(df.into())
184
185
  }
185
186
 
187
+ #[allow(clippy::too_many_arguments)]
186
188
  pub fn read_parquet(
187
189
  rb_f: Value,
188
190
  columns: Option<Vec<String>>,
@@ -191,6 +193,8 @@ impl RbDataFrame {
191
193
  parallel: Wrap<ParallelStrategy>,
192
194
  row_count: Option<(String, IdxSize)>,
193
195
  low_memory: bool,
196
+ use_statistics: bool,
197
+ rechunk: bool,
194
198
  ) -> RbResult<Self> {
195
199
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
196
200
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -201,6 +205,8 @@ impl RbDataFrame {
201
205
  .with_n_rows(n_rows)
202
206
  .with_row_count(row_count)
203
207
  .set_low_memory(low_memory)
208
+ .use_statistics(use_statistics)
209
+ .set_rechunk(rechunk)
204
210
  .finish()
205
211
  .map_err(RbPolarsErr::from)?;
206
212
  Ok(RbDataFrame::new(df))
@@ -253,7 +259,7 @@ impl RbDataFrame {
253
259
  use polars::io::avro::AvroWriter;
254
260
 
255
261
  if let Ok(s) = rb_f.try_convert::<String>() {
256
- let f = std::fs::File::create(&s).unwrap();
262
+ let f = std::fs::File::create(s).unwrap();
257
263
  AvroWriter::new(f)
258
264
  .with_compression(compression.0)
259
265
  .finish(&mut self.df.borrow_mut())
@@ -338,7 +344,7 @@ impl RbDataFrame {
338
344
  // ensure the new names are used
339
345
  if let Some(schema) = &schema_overwrite {
340
346
  for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
341
- *name = new_name.clone();
347
+ *name = new_name.to_string();
342
348
  }
343
349
  }
344
350
  let rbdf = Self::finish_from_rows(
@@ -347,17 +353,19 @@ impl RbDataFrame {
347
353
  schema_overwrite.map(|wrap| wrap.0),
348
354
  )?;
349
355
 
350
- rbdf.df
351
- .borrow_mut()
352
- .get_columns_mut()
353
- .iter_mut()
354
- .zip(&names)
355
- .for_each(|(s, name)| {
356
- s.rename(name);
357
- });
356
+ unsafe {
357
+ rbdf.df
358
+ .borrow_mut()
359
+ .get_columns_mut()
360
+ .iter_mut()
361
+ .zip(&names)
362
+ .for_each(|(s, name)| {
363
+ s.rename(name);
364
+ });
365
+ }
358
366
  let length = names.len();
359
367
  if names.into_iter().collect::<PlHashSet<_>>().len() != length {
360
- let err = PolarsError::SchemaMisMatch("duplicate column names found".into());
368
+ let err = PolarsError::SchemaMismatch("duplicate column names found".into());
361
369
  Err(RbPolarsErr::from(err))?;
362
370
  }
363
371
 
@@ -393,7 +401,7 @@ impl RbDataFrame {
393
401
  let null = null_value.unwrap_or_default();
394
402
 
395
403
  if let Ok(s) = rb_f.try_convert::<String>() {
396
- let f = std::fs::File::create(&s).unwrap();
404
+ let f = std::fs::File::create(s).unwrap();
397
405
  // no need for a buffered writer, because the csv writer does internal buffering
398
406
  CsvWriter::new(f)
399
407
  .has_header(has_header)
@@ -435,7 +443,7 @@ impl RbDataFrame {
435
443
  compression: Wrap<Option<IpcCompression>>,
436
444
  ) -> RbResult<()> {
437
445
  if let Ok(s) = rb_f.try_convert::<String>() {
438
- let f = std::fs::File::create(&s).unwrap();
446
+ let f = std::fs::File::create(s).unwrap();
439
447
  IpcWriter::new(f)
440
448
  .with_compression(compression.0)
441
449
  .finish(&mut self.df.borrow_mut())
@@ -493,6 +501,25 @@ impl RbDataFrame {
493
501
  .into()
494
502
  }
495
503
 
504
+ pub fn to_numo(&self) -> Option<Value> {
505
+ let mut st = None;
506
+ for s in self.df.borrow().iter() {
507
+ let dt_i = s.dtype();
508
+ match st {
509
+ None => st = Some(dt_i.clone()),
510
+ Some(ref mut st) => {
511
+ *st = try_get_supertype(st, dt_i).ok()?;
512
+ }
513
+ }
514
+ }
515
+ let st = st?;
516
+
517
+ match st {
518
+ // TODO
519
+ _ => None,
520
+ }
521
+ }
522
+
496
523
  pub fn write_parquet(
497
524
  &self,
498
525
  rb_f: Value,
@@ -504,7 +531,7 @@ impl RbDataFrame {
504
531
  let compression = parse_parquet_compression(&compression, compression_level)?;
505
532
 
506
533
  if let Ok(s) = rb_f.try_convert::<String>() {
507
- let f = std::fs::File::create(&s).unwrap();
534
+ let f = std::fs::File::create(s).unwrap();
508
535
  ParquetWriter::new(f)
509
536
  .with_compression(compression)
510
537
  .with_statistics(statistics)
@@ -607,7 +634,7 @@ impl RbDataFrame {
607
634
  }
608
635
 
609
636
  pub fn get_columns(&self) -> RArray {
610
- let cols = self.df.borrow().get_columns().clone();
637
+ let cols = self.df.borrow().get_columns().to_vec();
611
638
  to_rbseries_collection(cols)
612
639
  }
613
640
 
@@ -861,10 +888,11 @@ impl RbDataFrame {
861
888
  variable_name: Option<String>,
862
889
  ) -> RbResult<Self> {
863
890
  let args = MeltArgs {
864
- id_vars,
865
- value_vars,
866
- value_name,
867
- variable_name,
891
+ id_vars: strings_to_smartstrings(id_vars),
892
+ value_vars: strings_to_smartstrings(value_vars),
893
+ value_name: value_name.map(|s| s.into()),
894
+ variable_name: variable_name.map(|s| s.into()),
895
+ streamable: false,
868
896
  };
869
897
 
870
898
  let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
@@ -877,22 +905,26 @@ impl RbDataFrame {
877
905
  values: Vec<String>,
878
906
  index: Vec<String>,
879
907
  columns: Vec<String>,
880
- aggregate_expr: &RbExpr,
881
908
  maintain_order: bool,
882
909
  sort_columns: bool,
910
+ aggregate_expr: Option<&RbExpr>,
883
911
  separator: Option<String>,
884
912
  ) -> RbResult<Self> {
885
913
  let fun = match maintain_order {
886
914
  true => pivot_stable,
887
915
  false => pivot,
888
916
  };
917
+ let agg_expr = match aggregate_expr {
918
+ Some(aggregate_expr) => Some(aggregate_expr.inner.clone()),
919
+ None => None,
920
+ };
889
921
  let df = fun(
890
922
  &self.df.borrow(),
891
923
  values,
892
924
  index,
893
925
  columns,
894
- aggregate_expr.inner.clone(),
895
926
  sort_columns,
927
+ agg_expr,
896
928
  separator.as_deref(),
897
929
  )
898
930
  .map_err(RbPolarsErr::from)?;
@@ -913,21 +945,6 @@ impl RbDataFrame {
913
945
  self.df.borrow().shift(periods).into()
914
946
  }
915
947
 
916
- pub fn unique(
917
- &self,
918
- maintain_order: bool,
919
- subset: Option<Vec<String>>,
920
- keep: Wrap<UniqueKeepStrategy>,
921
- ) -> RbResult<Self> {
922
- let subset = subset.as_ref().map(|v| v.as_ref());
923
- let df = match maintain_order {
924
- true => self.df.borrow().unique_stable(subset, keep.0),
925
- false => self.df.borrow().unique(subset, keep.0),
926
- }
927
- .map_err(RbPolarsErr::from)?;
928
- Ok(df.into())
929
- }
930
-
931
948
  pub fn lazy(&self) -> RbLazyFrame {
932
949
  self.df.borrow().clone().lazy().into()
933
950
  }
@@ -43,3 +43,11 @@ impl ComputeError {
43
43
  Error::new(exception::runtime_error(), message)
44
44
  }
45
45
  }
46
+
47
+ #[macro_export]
48
+ macro_rules! raise_err(
49
+ ($msg:expr, $err:ident) => {{
50
+ Err(PolarsError::$err($msg.into())).map_err(RbPolarsErr::from)?;
51
+ unreachable!()
52
+ }}
53
+ );