polars-df 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/Cargo.toml CHANGED
@@ -4,7 +4,6 @@ members = ["ext/polars"]
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
6
  halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
7
 
9
8
  [profile.release]
10
9
  strip = true
data/README.md CHANGED
@@ -50,6 +50,9 @@ From Parquet
50
50
 
51
51
  ```ruby
52
52
  Polars.read_parquet("file.parquet")
53
+
54
+ # or lazily with
55
+ Polars.scan_parquet("file.parquet")
53
56
  ```
54
57
 
55
58
  From Active Record
@@ -60,6 +63,32 @@ Polars.read_sql(User.all)
60
63
  Polars.read_sql("SELECT * FROM users")
61
64
  ```
62
65
 
66
+ From JSON
67
+
68
+ ```ruby
69
+ Polars.read_json("file.json")
70
+ # or
71
+ Polars.read_ndjson("file.ndjson")
72
+
73
+ # or lazily with
74
+ Polars.scan_ndjson("file.ndjson")
75
+ ```
76
+
77
+ From Feather / Arrow IPC
78
+
79
+ ```ruby
80
+ Polars.read_ipc("file.arrow")
81
+
82
+ # or lazily with
83
+ Polars.scan_ipc("file.arrow")
84
+ ```
85
+
86
+ From Avro
87
+
88
+ ```ruby
89
+ Polars.read_avro("file.avro")
90
+ ```
91
+
63
92
  From a hash
64
93
 
65
94
  ```ruby
@@ -282,10 +311,10 @@ df.to_dummies
282
311
 
283
312
  ## Conversion
284
313
 
285
- Array of rows
314
+ Array of hashes
286
315
 
287
316
  ```ruby
288
- df.rows
317
+ df.rows(named: true)
289
318
  ```
290
319
 
291
320
  Hash of series
@@ -308,6 +337,12 @@ Parquet
308
337
  df.write_parquet("file.parquet")
309
338
  ```
310
339
 
340
+ Numo array
341
+
342
+ ```ruby
343
+ df.to_numo
344
+ ```
345
+
311
346
  ## Types
312
347
 
313
348
  You can specify column types when creating a data frame
@@ -343,6 +378,38 @@ Cast a column
343
378
  df["a"].cast(Polars::Int32)
344
379
  ```
345
380
 
381
+ ## Visualization
382
+
383
+ Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
384
+
385
+ ```ruby
386
+ gem "vega"
387
+ ```
388
+
389
+ And use:
390
+
391
+ ```ruby
392
+ df.plot("a", "b")
393
+ ```
394
+
395
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
396
+
397
+ ```ruby
398
+ df.plot("a", "b", type: "pie")
399
+ ```
400
+
401
+ Group data
402
+
403
+ ```ruby
404
+ df.groupby("c").plot("a", "b")
405
+ ```
406
+
407
+ Stacked columns or bars
408
+
409
+ ```ruby
410
+ df.groupby("c").plot("a", "b", stacked: true)
411
+ ```
412
+
346
413
  ## History
347
414
 
348
415
  View the [changelog](CHANGELOG.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,11 +12,12 @@ crate-type = ["cdylib"]
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
14
  magnus = "0.5"
15
- polars-core = "0.27.0"
15
+ polars-core = "0.28.0"
16
16
  serde_json = "1"
17
+ smartstring = "1"
17
18
 
18
19
  [dependencies.polars]
19
- version = "0.27.0"
20
+ version = "0.28.0"
20
21
  features = [
21
22
  "abs",
22
23
  "arange",
@@ -44,6 +45,7 @@ features = [
44
45
  "ipc",
45
46
  "is_first",
46
47
  "is_in",
48
+ "is_unique",
47
49
  "json",
48
50
  "lazy",
49
51
  "lazy_regex",
@@ -7,11 +7,17 @@ use std::cell::RefCell;
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
10
11
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
11
12
 
13
+ pub enum BatchedReader {
14
+ MMap(OwnedBatchedCsvReaderMmap),
15
+ Read(OwnedBatchedCsvReader),
16
+ }
17
+
12
18
  #[magnus::wrap(class = "Polars::RbBatchedCsv")]
13
19
  pub struct RbBatchedCsv {
14
- pub reader: RefCell<OwnedBatchedCsvReader>,
20
+ pub reader: RefCell<BatchedReader>,
15
21
  }
16
22
 
17
23
  impl RbBatchedCsv {
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
38
44
  let comment_char: Option<String> = arguments[16].try_convert()?;
39
45
  let quote_char: Option<String> = arguments[17].try_convert()?;
40
46
  let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
41
- let parse_dates: bool = arguments[19].try_convert()?;
47
+ let try_parse_dates: bool = arguments[19].try_convert()?;
42
48
  let skip_rows_after_header: usize = arguments[20].try_convert()?;
43
49
  let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
44
50
  let sample_size: usize = arguments[22].try_convert()?;
@@ -95,14 +101,24 @@ impl RbBatchedCsv {
95
101
  .low_memory(low_memory)
96
102
  .with_comment_char(comment_char)
97
103
  .with_null_values(null_values)
98
- .with_parse_dates(parse_dates)
104
+ .with_try_parse_dates(try_parse_dates)
99
105
  .with_quote_char(quote_char)
100
106
  .with_end_of_line_char(eol_char)
101
107
  .with_skip_rows_after_header(skip_rows_after_header)
102
108
  .with_row_count(row_count)
103
- .sample_size(sample_size)
104
- .batched(overwrite_dtype.map(Arc::new))
105
- .map_err(RbPolarsErr::from)?;
109
+ .sample_size(sample_size);
110
+
111
+ let reader = if low_memory {
112
+ let reader = reader
113
+ .batched_read(overwrite_dtype.map(Arc::new))
114
+ .map_err(RbPolarsErr::from)?;
115
+ BatchedReader::Read(reader)
116
+ } else {
117
+ let reader = reader
118
+ .batched_mmap(overwrite_dtype.map(Arc::new))
119
+ .map_err(RbPolarsErr::from)?;
120
+ BatchedReader::MMap(reader)
121
+ };
106
122
 
107
123
  Ok(RbBatchedCsv {
108
124
  reader: RefCell::new(reader),
@@ -110,13 +126,12 @@ impl RbBatchedCsv {
110
126
  }
111
127
 
112
128
  pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
- let batches = self
114
- .reader
115
- .borrow_mut()
116
- .next_batches(n)
117
- .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| {
119
- RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
- }))
129
+ let batches = match &mut *self.reader.borrow_mut() {
130
+ BatchedReader::MMap(reader) => reader.next_batches(n),
131
+ BatchedReader::Read(reader) => reader.next_batches(n),
132
+ }
133
+ .map_err(RbPolarsErr::from)?;
134
+
135
+ Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
121
136
  }
122
137
  }
@@ -1,3 +1,6 @@
1
+ use std::fmt::{Display, Formatter};
2
+ use std::hash::{Hash, Hasher};
3
+
1
4
  use magnus::{
2
5
  class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
6
  RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
@@ -10,8 +13,7 @@ use polars::frame::NullStrategy;
10
13
  use polars::io::avro::AvroCompression;
11
14
  use polars::prelude::*;
12
15
  use polars::series::ops::NullBehavior;
13
- use std::fmt::{Display, Formatter};
14
- use std::hash::{Hash, Hasher};
16
+ use smartstring::alias::String as SmartString;
15
17
 
16
18
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
17
19
 
@@ -82,6 +84,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
82
84
  }
83
85
  }
84
86
 
87
+ impl TryConvert for Wrap<BinaryChunked> {
88
+ fn try_convert(obj: Value) -> RbResult<Self> {
89
+ let (seq, len) = get_rbseq(obj)?;
90
+ let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
91
+
92
+ for res in seq.each() {
93
+ let item = res?;
94
+ match item.try_convert::<RString>() {
95
+ Ok(val) => builder.append_value(unsafe { val.as_slice() }),
96
+ Err(_) => builder.append_null(),
97
+ }
98
+ }
99
+ Ok(Wrap(builder.finish()))
100
+ }
101
+ }
102
+
85
103
  impl TryConvert for Wrap<NullValues> {
86
104
  fn try_convert(ob: Value) -> RbResult<Self> {
87
105
  if let Ok(s) = ob.try_convert::<String>() {
@@ -98,6 +116,14 @@ impl TryConvert for Wrap<NullValues> {
98
116
  }
99
117
  }
100
118
 
119
+ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
120
+ let dict = RHash::new();
121
+ for (fld, val) in flds.iter().zip(vals) {
122
+ dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
123
+ }
124
+ dict.into_value()
125
+ }
126
+
101
127
  impl IntoValue for Wrap<AnyValue<'_>> {
102
128
  fn into_value_with(self, _: &RubyHandle) -> Value {
103
129
  match self.0 {
@@ -114,7 +140,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
114
140
  AnyValue::Null => *QNIL,
115
141
  AnyValue::Boolean(v) => Value::from(v),
116
142
  AnyValue::Utf8(v) => Value::from(v),
117
- AnyValue::Utf8Owned(_v) => todo!(),
143
+ AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
118
144
  AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
119
145
  AnyValue::Date(v) => class::time()
120
146
  .funcall::<_, _, Value>("at", (v * 86400,))
@@ -125,7 +151,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
125
151
  .unwrap(),
126
152
  AnyValue::Datetime(v, tu, tz) => {
127
153
  let t = match tu {
128
- TimeUnit::Nanoseconds => todo!(),
154
+ TimeUnit::Nanoseconds => {
155
+ let sec = v / 1000000000;
156
+ let subsec = v % 1000000000;
157
+ class::time()
158
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
159
+ .unwrap()
160
+ }
129
161
  TimeUnit::Microseconds => {
130
162
  let sec = v / 1000000;
131
163
  let subsec = v % 1000000;
@@ -133,7 +165,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
133
165
  .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
134
166
  .unwrap()
135
167
  }
136
- TimeUnit::Milliseconds => todo!(),
168
+ TimeUnit::Milliseconds => {
169
+ let sec = v / 1000;
170
+ let subsec = v % 1000;
171
+ class::time()
172
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
173
+ .unwrap()
174
+ }
137
175
  };
138
176
 
139
177
  if tz.is_some() {
@@ -145,12 +183,19 @@ impl IntoValue for Wrap<AnyValue<'_>> {
145
183
  AnyValue::Duration(_v, _tu) => todo!(),
146
184
  AnyValue::Time(_v) => todo!(),
147
185
  AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
148
- ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
149
- AnyValue::StructOwned(_payload) => todo!(),
150
- AnyValue::Object(_v) => todo!(),
151
- AnyValue::ObjectOwned(_v) => todo!(),
152
- AnyValue::Binary(_v) => todo!(),
153
- AnyValue::BinaryOwned(_v) => todo!(),
186
+ ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
187
+ AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
188
+ AnyValue::Object(v) => {
189
+ let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
190
+ object.inner
191
+ }
192
+ AnyValue::ObjectOwned(v) => {
193
+ let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
194
+ object.inner
195
+ }
196
+ AnyValue::Binary(v) => RString::from_slice(v).into_value(),
197
+ AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
198
+ AnyValue::Decimal(_v, _scale) => todo!(),
154
199
  }
155
200
  }
156
201
  }
@@ -170,12 +215,12 @@ impl IntoValue for Wrap<DataType> {
170
215
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
171
216
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
172
217
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
173
- DataType::Decimal128(_) => todo!(),
218
+ DataType::Decimal(_precision, _scale) => todo!(),
174
219
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
175
220
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
176
221
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
177
222
  DataType::List(inner) => {
178
- let inner = Wrap(*inner.clone());
223
+ let inner = Wrap(*inner);
179
224
  let list_class = pl.const_get::<_, Value>("List").unwrap();
180
225
  list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
181
226
  }
@@ -183,7 +228,7 @@ impl IntoValue for Wrap<DataType> {
183
228
  DataType::Datetime(tu, tz) => {
184
229
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
185
230
  datetime_class
186
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
231
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
187
232
  .unwrap()
188
233
  }
189
234
  DataType::Duration(tu) => {
@@ -198,7 +243,7 @@ impl IntoValue for Wrap<DataType> {
198
243
  DataType::Struct(fields) => {
199
244
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
200
245
  let iter = fields.iter().map(|fld| {
201
- let name = fld.name().clone();
246
+ let name = fld.name().as_str();
202
247
  let dtype = Wrap(fld.data_type().clone());
203
248
  field_class
204
249
  .funcall::<_, _, Value>("new", (name, dtype))
@@ -328,7 +373,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
328
373
  let n = 25;
329
374
  let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
330
375
  .map_err(RbPolarsErr::from)?;
331
- let s = Series::from_any_values_and_dtype("", &avs, &dtype)
376
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
332
377
  .map_err(RbPolarsErr::from)?;
333
378
  Ok(Wrap(AnyValue::List(s)))
334
379
  }
@@ -858,3 +903,11 @@ pub fn parse_parquet_compression(
858
903
  };
859
904
  Ok(parsed)
860
905
  }
906
+
907
+ pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
908
+ where
909
+ I: IntoIterator<Item = S>,
910
+ S: AsRef<str>,
911
+ {
912
+ container.into_iter().map(|s| s.as_ref().into()).collect()
913
+ }
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
6
6
  use polars::io::RowCount;
7
7
  use polars::prelude::pivot::{pivot, pivot_stable};
8
8
  use polars::prelude::*;
9
+ use polars_core::utils::try_get_supertype;
9
10
  use std::cell::RefCell;
10
11
  use std::io::{BufWriter, Cursor};
11
12
  use std::ops::Deref;
@@ -114,7 +115,7 @@ impl RbDataFrame {
114
115
  let comment_char: Option<String> = arguments[17].try_convert()?;
115
116
  let quote_char: Option<String> = arguments[18].try_convert()?;
116
117
  let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
117
- let parse_dates: bool = arguments[20].try_convert()?;
118
+ let try_parse_dates: bool = arguments[20].try_convert()?;
118
119
  let skip_rows_after_header: usize = arguments[21].try_convert()?;
119
120
  let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
120
121
  let sample_size: usize = arguments[23].try_convert()?;
@@ -167,12 +168,12 @@ impl RbDataFrame {
167
168
  .with_columns(columns)
168
169
  .with_n_threads(n_threads)
169
170
  .with_path(path)
170
- .with_dtypes(overwrite_dtype.as_ref())
171
+ .with_dtypes(overwrite_dtype.map(Arc::new))
171
172
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
172
173
  .low_memory(low_memory)
173
174
  .with_comment_char(comment_char)
174
175
  .with_null_values(null_values)
175
- .with_parse_dates(parse_dates)
176
+ .with_try_parse_dates(try_parse_dates)
176
177
  .with_quote_char(quote_char)
177
178
  .with_end_of_line_char(eol_char)
178
179
  .with_skip_rows_after_header(skip_rows_after_header)
@@ -183,6 +184,7 @@ impl RbDataFrame {
183
184
  Ok(df.into())
184
185
  }
185
186
 
187
+ #[allow(clippy::too_many_arguments)]
186
188
  pub fn read_parquet(
187
189
  rb_f: Value,
188
190
  columns: Option<Vec<String>>,
@@ -191,6 +193,8 @@ impl RbDataFrame {
191
193
  parallel: Wrap<ParallelStrategy>,
192
194
  row_count: Option<(String, IdxSize)>,
193
195
  low_memory: bool,
196
+ use_statistics: bool,
197
+ rechunk: bool,
194
198
  ) -> RbResult<Self> {
195
199
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
196
200
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -201,6 +205,8 @@ impl RbDataFrame {
201
205
  .with_n_rows(n_rows)
202
206
  .with_row_count(row_count)
203
207
  .set_low_memory(low_memory)
208
+ .use_statistics(use_statistics)
209
+ .set_rechunk(rechunk)
204
210
  .finish()
205
211
  .map_err(RbPolarsErr::from)?;
206
212
  Ok(RbDataFrame::new(df))
@@ -253,7 +259,7 @@ impl RbDataFrame {
253
259
  use polars::io::avro::AvroWriter;
254
260
 
255
261
  if let Ok(s) = rb_f.try_convert::<String>() {
256
- let f = std::fs::File::create(&s).unwrap();
262
+ let f = std::fs::File::create(s).unwrap();
257
263
  AvroWriter::new(f)
258
264
  .with_compression(compression.0)
259
265
  .finish(&mut self.df.borrow_mut())
@@ -338,7 +344,7 @@ impl RbDataFrame {
338
344
  // ensure the new names are used
339
345
  if let Some(schema) = &schema_overwrite {
340
346
  for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
341
- *name = new_name.clone();
347
+ *name = new_name.to_string();
342
348
  }
343
349
  }
344
350
  let rbdf = Self::finish_from_rows(
@@ -347,17 +353,19 @@ impl RbDataFrame {
347
353
  schema_overwrite.map(|wrap| wrap.0),
348
354
  )?;
349
355
 
350
- rbdf.df
351
- .borrow_mut()
352
- .get_columns_mut()
353
- .iter_mut()
354
- .zip(&names)
355
- .for_each(|(s, name)| {
356
- s.rename(name);
357
- });
356
+ unsafe {
357
+ rbdf.df
358
+ .borrow_mut()
359
+ .get_columns_mut()
360
+ .iter_mut()
361
+ .zip(&names)
362
+ .for_each(|(s, name)| {
363
+ s.rename(name);
364
+ });
365
+ }
358
366
  let length = names.len();
359
367
  if names.into_iter().collect::<PlHashSet<_>>().len() != length {
360
- let err = PolarsError::SchemaMisMatch("duplicate column names found".into());
368
+ let err = PolarsError::SchemaMismatch("duplicate column names found".into());
361
369
  Err(RbPolarsErr::from(err))?;
362
370
  }
363
371
 
@@ -393,7 +401,7 @@ impl RbDataFrame {
393
401
  let null = null_value.unwrap_or_default();
394
402
 
395
403
  if let Ok(s) = rb_f.try_convert::<String>() {
396
- let f = std::fs::File::create(&s).unwrap();
404
+ let f = std::fs::File::create(s).unwrap();
397
405
  // no need for a buffered writer, because the csv writer does internal buffering
398
406
  CsvWriter::new(f)
399
407
  .has_header(has_header)
@@ -435,7 +443,7 @@ impl RbDataFrame {
435
443
  compression: Wrap<Option<IpcCompression>>,
436
444
  ) -> RbResult<()> {
437
445
  if let Ok(s) = rb_f.try_convert::<String>() {
438
- let f = std::fs::File::create(&s).unwrap();
446
+ let f = std::fs::File::create(s).unwrap();
439
447
  IpcWriter::new(f)
440
448
  .with_compression(compression.0)
441
449
  .finish(&mut self.df.borrow_mut())
@@ -493,6 +501,25 @@ impl RbDataFrame {
493
501
  .into()
494
502
  }
495
503
 
504
+ pub fn to_numo(&self) -> Option<Value> {
505
+ let mut st = None;
506
+ for s in self.df.borrow().iter() {
507
+ let dt_i = s.dtype();
508
+ match st {
509
+ None => st = Some(dt_i.clone()),
510
+ Some(ref mut st) => {
511
+ *st = try_get_supertype(st, dt_i).ok()?;
512
+ }
513
+ }
514
+ }
515
+ let st = st?;
516
+
517
+ match st {
518
+ // TODO
519
+ _ => None,
520
+ }
521
+ }
522
+
496
523
  pub fn write_parquet(
497
524
  &self,
498
525
  rb_f: Value,
@@ -504,7 +531,7 @@ impl RbDataFrame {
504
531
  let compression = parse_parquet_compression(&compression, compression_level)?;
505
532
 
506
533
  if let Ok(s) = rb_f.try_convert::<String>() {
507
- let f = std::fs::File::create(&s).unwrap();
534
+ let f = std::fs::File::create(s).unwrap();
508
535
  ParquetWriter::new(f)
509
536
  .with_compression(compression)
510
537
  .with_statistics(statistics)
@@ -607,7 +634,7 @@ impl RbDataFrame {
607
634
  }
608
635
 
609
636
  pub fn get_columns(&self) -> RArray {
610
- let cols = self.df.borrow().get_columns().clone();
637
+ let cols = self.df.borrow().get_columns().to_vec();
611
638
  to_rbseries_collection(cols)
612
639
  }
613
640
 
@@ -861,10 +888,11 @@ impl RbDataFrame {
861
888
  variable_name: Option<String>,
862
889
  ) -> RbResult<Self> {
863
890
  let args = MeltArgs {
864
- id_vars,
865
- value_vars,
866
- value_name,
867
- variable_name,
891
+ id_vars: strings_to_smartstrings(id_vars),
892
+ value_vars: strings_to_smartstrings(value_vars),
893
+ value_name: value_name.map(|s| s.into()),
894
+ variable_name: variable_name.map(|s| s.into()),
895
+ streamable: false,
868
896
  };
869
897
 
870
898
  let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
@@ -877,22 +905,26 @@ impl RbDataFrame {
877
905
  values: Vec<String>,
878
906
  index: Vec<String>,
879
907
  columns: Vec<String>,
880
- aggregate_expr: &RbExpr,
881
908
  maintain_order: bool,
882
909
  sort_columns: bool,
910
+ aggregate_expr: Option<&RbExpr>,
883
911
  separator: Option<String>,
884
912
  ) -> RbResult<Self> {
885
913
  let fun = match maintain_order {
886
914
  true => pivot_stable,
887
915
  false => pivot,
888
916
  };
917
+ let agg_expr = match aggregate_expr {
918
+ Some(aggregate_expr) => Some(aggregate_expr.inner.clone()),
919
+ None => None,
920
+ };
889
921
  let df = fun(
890
922
  &self.df.borrow(),
891
923
  values,
892
924
  index,
893
925
  columns,
894
- aggregate_expr.inner.clone(),
895
926
  sort_columns,
927
+ agg_expr,
896
928
  separator.as_deref(),
897
929
  )
898
930
  .map_err(RbPolarsErr::from)?;
@@ -913,21 +945,6 @@ impl RbDataFrame {
913
945
  self.df.borrow().shift(periods).into()
914
946
  }
915
947
 
916
- pub fn unique(
917
- &self,
918
- maintain_order: bool,
919
- subset: Option<Vec<String>>,
920
- keep: Wrap<UniqueKeepStrategy>,
921
- ) -> RbResult<Self> {
922
- let subset = subset.as_ref().map(|v| v.as_ref());
923
- let df = match maintain_order {
924
- true => self.df.borrow().unique_stable(subset, keep.0),
925
- false => self.df.borrow().unique(subset, keep.0),
926
- }
927
- .map_err(RbPolarsErr::from)?;
928
- Ok(df.into())
929
- }
930
-
931
948
  pub fn lazy(&self) -> RbLazyFrame {
932
949
  self.df.borrow().clone().lazy().into()
933
950
  }
@@ -43,3 +43,11 @@ impl ComputeError {
43
43
  Error::new(exception::runtime_error(), message)
44
44
  }
45
45
  }
46
+
47
+ #[macro_export]
48
+ macro_rules! raise_err(
49
+ ($msg:expr, $err:ident) => {{
50
+ Err(PolarsError::$err($msg.into())).map_err(RbPolarsErr::from)?;
51
+ unreachable!()
52
+ }}
53
+ );