polars-df 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +337 -381
  4. data/README.md +4 -3
  5. data/ext/polars/Cargo.toml +5 -4
  6. data/ext/polars/src/apply/mod.rs +7 -3
  7. data/ext/polars/src/conversion.rs +171 -63
  8. data/ext/polars/src/dataframe.rs +19 -23
  9. data/ext/polars/src/error.rs +8 -0
  10. data/ext/polars/src/expr/array.rs +15 -0
  11. data/ext/polars/src/expr/general.rs +39 -9
  12. data/ext/polars/src/expr/list.rs +27 -22
  13. data/ext/polars/src/expr/string.rs +10 -9
  14. data/ext/polars/src/expr.rs +1 -0
  15. data/ext/polars/src/functions/lazy.rs +61 -21
  16. data/ext/polars/src/lazyframe.rs +14 -2
  17. data/ext/polars/src/lib.rs +25 -20
  18. data/ext/polars/src/object.rs +1 -1
  19. data/ext/polars/src/rb_modules.rs +4 -0
  20. data/ext/polars/src/series/construction.rs +28 -2
  21. data/ext/polars/src/series.rs +57 -17
  22. data/lib/polars/array_expr.rb +84 -0
  23. data/lib/polars/array_name_space.rb +77 -0
  24. data/lib/polars/batched_csv_reader.rb +1 -1
  25. data/lib/polars/data_frame.rb +91 -49
  26. data/lib/polars/data_types.rb +163 -29
  27. data/lib/polars/date_time_name_space.rb +17 -3
  28. data/lib/polars/expr.rb +76 -69
  29. data/lib/polars/functions.rb +0 -1
  30. data/lib/polars/group_by.rb +1 -22
  31. data/lib/polars/lazy_frame.rb +82 -30
  32. data/lib/polars/lazy_functions.rb +67 -31
  33. data/lib/polars/list_expr.rb +28 -28
  34. data/lib/polars/list_name_space.rb +13 -13
  35. data/lib/polars/rolling_group_by.rb +4 -2
  36. data/lib/polars/series.rb +70 -16
  37. data/lib/polars/string_expr.rb +137 -11
  38. data/lib/polars/string_name_space.rb +137 -22
  39. data/lib/polars/utils.rb +107 -57
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +3 -0
  42. metadata +5 -2
data/README.md CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,9 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
363
 
363
364
  Get column types
364
365
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,17 +11,17 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
+ chrono = "=0.4.24"
14
15
  magnus = "0.5"
15
- polars-core = "0.29.0"
16
+ polars-core = "0.31.1"
16
17
  serde_json = "1"
17
18
  smartstring = "1"
18
19
 
19
20
  [dependencies.polars]
20
- version = "0.29.0"
21
+ version = "0.31.1"
21
22
  features = [
22
23
  "abs",
23
24
  "approx_unique",
24
- "arange",
25
25
  "arg_where",
26
26
  "asof_join",
27
27
  "avro",
@@ -68,6 +68,7 @@ features = [
68
68
  "propagate_nans",
69
69
  "random",
70
70
  "rank",
71
+ "range",
71
72
  "reinterpret",
72
73
  "repeat_by",
73
74
  "rolling_window",
@@ -237,15 +237,19 @@ fn iterator_to_list(
237
237
  for _ in 0..init_null_count {
238
238
  builder.append_null()
239
239
  }
240
- builder.append_opt_series(first_value);
240
+ builder
241
+ .append_opt_series(first_value)
242
+ .map_err(RbPolarsErr::from)?;
241
243
  for opt_val in it {
242
244
  match opt_val {
243
245
  None => builder.append_null(),
244
246
  Some(s) => {
245
247
  if s.len() == 0 && s.dtype() != dt {
246
- builder.append_series(&Series::full_null("", 0, dt))
248
+ builder
249
+ .append_series(&Series::full_null("", 0, dt))
250
+ .unwrap()
247
251
  } else {
248
- builder.append_series(&s)
252
+ builder.append_series(&s).map_err(RbPolarsErr::from)?
249
253
  }
250
254
  }
251
255
  }
@@ -1,11 +1,11 @@
1
1
  use std::fmt::{Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
7
+ RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
7
8
  };
8
- use magnus::encoding::{EncodingCapable, Index};
9
9
  use polars::chunked_array::object::PolarsObjectSafe;
10
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
11
  use polars::datatypes::AnyValue;
@@ -16,8 +16,9 @@ use polars::prelude::*;
16
16
  use polars::series::ops::NullBehavior;
17
17
  use smartstring::alias::String as SmartString;
18
18
 
19
+ use crate::object::OBJECT_NAME;
19
20
  use crate::rb_modules::utils;
20
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
21
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
21
22
 
22
23
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
24
  // Safety:
@@ -149,51 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
149
150
  AnyValue::Boolean(v) => Value::from(v),
150
151
  AnyValue::Utf8(v) => Value::from(v),
151
152
  AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
152
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
153
- AnyValue::Date(v) => class::time()
154
- .funcall::<_, _, Value>("at", (v * 86400,))
155
- .unwrap()
156
- .funcall::<_, _, Value>("utc", ())
157
- .unwrap()
158
- .funcall::<_, _, Value>("to_date", ())
159
- .unwrap(),
160
- AnyValue::Datetime(v, tu, tz) => {
161
- let t = match tu {
162
- TimeUnit::Nanoseconds => {
163
- let sec = v / 1000000000;
164
- let subsec = v % 1000000000;
165
- class::time()
166
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
167
- .unwrap()
168
- }
169
- TimeUnit::Microseconds => {
170
- let sec = v / 1000000;
171
- let subsec = v % 1000000;
172
- class::time()
173
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
174
- .unwrap()
175
- }
176
- TimeUnit::Milliseconds => {
177
- let sec = v / 1000;
178
- let subsec = v % 1000;
179
- class::time()
180
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
181
- .unwrap()
182
- }
183
- };
184
-
185
- if tz.is_some() {
186
- todo!();
153
+ AnyValue::Categorical(idx, rev, arr) => {
154
+ let s = if arr.is_null() {
155
+ rev.get(idx)
187
156
  } else {
188
- t.funcall::<_, _, Value>("utc", ()).unwrap()
189
- }
157
+ unsafe { arr.deref_unchecked().value(idx as usize) }
158
+ };
159
+ s.into_value()
190
160
  }
191
- AnyValue::Duration(v, tu) => {
192
- let tu = tu.to_ascii();
193
- utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
161
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
162
+ AnyValue::Datetime(v, time_unit, time_zone) => {
163
+ let time_unit = time_unit.to_ascii();
164
+ utils()
165
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
166
+ .unwrap()
194
167
  }
195
- AnyValue::Time(_v) => todo!(),
196
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
168
+ AnyValue::Duration(v, time_unit) => {
169
+ let time_unit = time_unit.to_ascii();
170
+ utils()
171
+ .funcall("_to_ruby_duration", (v, time_unit))
172
+ .unwrap()
173
+ }
174
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
175
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
197
176
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
177
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
178
  AnyValue::Object(v) => {
@@ -206,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
206
185
  }
207
186
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
187
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
- AnyValue::Decimal(_v, _scale) => todo!(),
188
+ AnyValue::Decimal(v, scale) => utils()
189
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
190
+ .unwrap(),
210
191
  }
211
192
  }
212
193
  }
@@ -226,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
226
207
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
227
208
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
228
209
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
229
- DataType::Decimal(_precision, _scale) => todo!(),
210
+ DataType::Decimal(precision, scale) => {
211
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
212
+ decimal_class
213
+ .funcall::<_, _, Value>("new", (precision, scale))
214
+ .unwrap()
215
+ }
230
216
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
231
217
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
232
218
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
219
+ DataType::Array(inner, size) => {
220
+ let inner = Wrap(*inner);
221
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
222
+ list_class
223
+ .funcall::<_, _, Value>("new", (size, inner))
224
+ .unwrap()
225
+ }
233
226
  DataType::List(inner) => {
234
227
  let inner = Wrap(*inner);
235
228
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -321,11 +314,11 @@ impl IntoValue for Wrap<&StructChunked> {
321
314
  impl IntoValue for Wrap<&DurationChunked> {
322
315
  fn into_value_with(self, _: &RubyHandle) -> Value {
323
316
  let utils = utils();
324
- let tu = Wrap(self.0.time_unit()).into_value();
317
+ let time_unit = Wrap(self.0.time_unit()).into_value();
325
318
  let iter = self.0.into_iter().map(|opt_v| {
326
319
  opt_v.map(|v| {
327
320
  utils
328
- .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
321
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
329
322
  .unwrap()
330
323
  })
331
324
  });
@@ -333,6 +326,64 @@ impl IntoValue for Wrap<&DurationChunked> {
333
326
  }
334
327
  }
335
328
 
329
+ impl IntoValue for Wrap<&DatetimeChunked> {
330
+ fn into_value_with(self, _: &RubyHandle) -> Value {
331
+ let utils = utils();
332
+ let time_unit = Wrap(self.0.time_unit()).into_value();
333
+ let time_zone = self.0.time_zone().clone().into_value();
334
+ let iter = self.0.into_iter().map(|opt_v| {
335
+ opt_v.map(|v| {
336
+ utils
337
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
338
+ .unwrap()
339
+ })
340
+ });
341
+ RArray::from_iter(iter).into_value()
342
+ }
343
+ }
344
+
345
+ impl IntoValue for Wrap<&TimeChunked> {
346
+ fn into_value_with(self, _: &RubyHandle) -> Value {
347
+ let utils = utils();
348
+ let iter = self.0.into_iter().map(|opt_v| {
349
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
350
+ });
351
+ RArray::from_iter(iter).into_value()
352
+ }
353
+ }
354
+
355
+ impl IntoValue for Wrap<&DateChunked> {
356
+ fn into_value_with(self, _: &RubyHandle) -> Value {
357
+ let utils = utils();
358
+ let iter = self.0.into_iter().map(|opt_v| {
359
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
360
+ });
361
+ RArray::from_iter(iter).into_value()
362
+ }
363
+ }
364
+
365
+ impl IntoValue for Wrap<&DecimalChunked> {
366
+ fn into_value_with(self, _: &RubyHandle) -> Value {
367
+ let utils = utils();
368
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
369
+ let iter = self.0.into_iter().map(|opt_v| {
370
+ opt_v.map(|v| {
371
+ utils
372
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
373
+ .unwrap()
374
+ })
375
+ });
376
+ RArray::from_iter(iter).into_value()
377
+ }
378
+ }
379
+
380
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
381
+ match digits.parse::<i128>() {
382
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
383
+ Err(_) => None,
384
+ }
385
+ }
386
+
336
387
  impl TryConvert for Wrap<Field> {
337
388
  fn try_convert(ob: Value) -> RbResult<Self> {
338
389
  let name: String = ob.funcall("name", ())?;
@@ -362,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
362
413
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
363
414
  "Polars::Time" => DataType::Time,
364
415
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
416
+ "Polars::Decimal" => DataType::Decimal(None, None),
365
417
  "Polars::Float32" => DataType::Float32,
366
418
  "Polars::Float64" => DataType::Float64,
367
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
368
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
419
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
420
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
369
421
  "Polars::Null" => DataType::Null,
370
422
  "Polars::Unknown" => DataType::Unknown,
371
423
  dt => {
@@ -378,6 +430,28 @@ impl TryConvert for Wrap<DataType> {
378
430
  } else if ob.try_convert::<String>().is_err() {
379
431
  let name = unsafe { ob.class().name() }.into_owned();
380
432
  match name.as_str() {
433
+ "Polars::Duration" => {
434
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
435
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
436
+ DataType::Duration(time_unit)
437
+ }
438
+ "Polars::Datetime" => {
439
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
440
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
441
+ let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
442
+ let time_zone = time_zone.try_convert()?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
447
+ let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = inner.try_convert::<Wrap<DataType>>()?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
381
455
  "Polars::Struct" => {
382
456
  let arr: RArray = ob.funcall("fields", ())?;
383
457
  let mut fields = Vec::with_capacity(arr.len());
@@ -386,7 +460,12 @@ impl TryConvert for Wrap<DataType> {
386
460
  }
387
461
  DataType::Struct(fields)
388
462
  }
389
- _ => todo!(),
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
390
469
  }
391
470
  } else {
392
471
  match ob.try_convert::<String>()?.as_str() {
@@ -430,7 +509,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
430
509
  Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
431
510
  } else if let Some(v) = Integer::from_value(ob) {
432
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
433
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
434
513
  Ok(AnyValue::Float64(v.to_f64()).into())
435
514
  } else if let Some(v) = RString::from_value(ob) {
436
515
  if v.enc_get() == Index::utf8() {
@@ -443,7 +522,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
443
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
444
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
445
524
  let v = sec * 1_000_000_000 + nsec;
446
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
447
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
448
528
  } else if ob.is_nil() {
449
529
  Ok(AnyValue::Null.into())
@@ -464,13 +544,23 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
464
544
  if v.is_empty() {
465
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
466
546
  } else {
467
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
468
- // use first `n` values to infer datatype
469
- // this value is not too large as this will be done with every
470
- // anyvalue that has to be converted, which can be many
471
- let n = 25;
472
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
473
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
562
+ }
563
+
474
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
475
565
  .map_err(RbPolarsErr::from)?;
476
566
  Ok(Wrap(AnyValue::List(s)))
@@ -478,7 +568,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
478
568
  } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
569
  let sec: i64 = ob.funcall("to_i", ())?;
480
570
  let nsec: i64 = ob.funcall("nsec", ())?;
481
- Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
482
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
483
577
  // convert to DateTime for UTC
484
578
  let v = ob
@@ -486,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
486
580
  .funcall::<_, _, Value>("to_time", ())?
487
581
  .funcall::<_, _, i64>("to_i", ())?;
488
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob
585
+ .funcall::<_, _, Value>("split", ())
586
+ .unwrap()
587
+ .try_convert()
588
+ .unwrap();
589
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
590
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
591
+ })?;
592
+ if sign < 0 {
593
+ // TODO better error
594
+ v = v.checked_neg().unwrap();
595
+ }
596
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
489
597
  } else {
490
598
  Err(RbPolarsErr::other(format!(
491
599
  "object type not supported {:?}",
@@ -456,12 +456,14 @@ impl RbDataFrame {
456
456
  .finish(&mut self.df.borrow_mut())
457
457
  .map_err(RbPolarsErr::from)?;
458
458
  } else {
459
- let mut buf = get_file_like(rb_f, true)?;
460
-
459
+ let mut buf = Cursor::new(Vec::new());
461
460
  IpcWriter::new(&mut buf)
462
461
  .with_compression(compression.0)
463
462
  .finish(&mut self.df.borrow_mut())
464
463
  .map_err(RbPolarsErr::from)?;
464
+ // TODO less copying
465
+ let rb_str = RString::from_slice(&buf.into_inner());
466
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
465
467
  }
466
468
  Ok(())
467
469
  }
@@ -794,22 +796,6 @@ impl RbDataFrame {
794
796
  Ok(RbDataFrame::new(df))
795
797
  }
796
798
 
797
- pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
798
- let df = self
799
- .df
800
- .borrow()
801
- .sort_with_options(
802
- &by_column,
803
- SortOptions {
804
- descending: reverse,
805
- nulls_last,
806
- multithreaded: true,
807
- },
808
- )
809
- .map_err(RbPolarsErr::from)?;
810
- Ok(RbDataFrame::new(df))
811
- }
812
-
813
799
  pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
814
800
  self.df
815
801
  .borrow_mut()
@@ -933,11 +919,16 @@ impl RbDataFrame {
933
919
  Ok(RbDataFrame::new(df))
934
920
  }
935
921
 
936
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
937
- let out = if stable {
938
- self.df.borrow().partition_by_stable(groups)
922
+ pub fn partition_by(
923
+ &self,
924
+ by: Vec<String>,
925
+ maintain_order: bool,
926
+ include_key: bool,
927
+ ) -> RbResult<RArray> {
928
+ let out = if maintain_order {
929
+ self.df.borrow().partition_by_stable(by, include_key)
939
930
  } else {
940
- self.df.borrow().partition_by(groups)
931
+ self.df.borrow().partition_by(by, include_key)
941
932
  }
942
933
  .map_err(RbPolarsErr::from)?;
943
934
  Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
@@ -1024,13 +1015,18 @@ impl RbDataFrame {
1024
1015
  &self,
1025
1016
  columns: Option<Vec<String>>,
1026
1017
  separator: Option<String>,
1018
+ drop_first: bool,
1027
1019
  ) -> RbResult<Self> {
1028
1020
  let df = match columns {
1029
1021
  Some(cols) => self.df.borrow().columns_to_dummies(
1030
1022
  cols.iter().map(|x| x as &str).collect(),
1031
1023
  separator.as_deref(),
1024
+ drop_first,
1032
1025
  ),
1033
- None => self.df.borrow().to_dummies(separator.as_deref()),
1026
+ None => self
1027
+ .df
1028
+ .borrow()
1029
+ .to_dummies(separator.as_deref(), drop_first),
1034
1030
  }
1035
1031
  .map_err(RbPolarsErr::from)?;
1036
1032
  Ok(df.into())
@@ -24,6 +24,14 @@ impl RbPolarsErr {
24
24
  }
25
25
  }
26
26
 
27
+ pub struct RbTypeError {}
28
+
29
+ impl RbTypeError {
30
+ pub fn new_err(message: String) -> Error {
31
+ Error::new(exception::type_error(), message)
32
+ }
33
+ }
34
+
27
35
  pub struct RbValueError {}
28
36
 
29
37
  impl RbValueError {
@@ -0,0 +1,15 @@
1
+ use crate::RbExpr;
2
+
3
+ impl RbExpr {
4
+ pub fn array_max(&self) -> Self {
5
+ self.inner.clone().arr().max().into()
6
+ }
7
+
8
+ pub fn array_min(&self) -> Self {
9
+ self.inner.clone().arr().min().into()
10
+ }
11
+
12
+ pub fn array_sum(&self) -> Self {
13
+ self.inner.clone().arr().sum().into()
14
+ }
15
+ }