polars-df 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +337 -381
  4. data/README.md +4 -3
  5. data/ext/polars/Cargo.toml +5 -4
  6. data/ext/polars/src/apply/mod.rs +7 -3
  7. data/ext/polars/src/conversion.rs +171 -63
  8. data/ext/polars/src/dataframe.rs +19 -23
  9. data/ext/polars/src/error.rs +8 -0
  10. data/ext/polars/src/expr/array.rs +15 -0
  11. data/ext/polars/src/expr/general.rs +39 -9
  12. data/ext/polars/src/expr/list.rs +27 -22
  13. data/ext/polars/src/expr/string.rs +10 -9
  14. data/ext/polars/src/expr.rs +1 -0
  15. data/ext/polars/src/functions/lazy.rs +61 -21
  16. data/ext/polars/src/lazyframe.rs +14 -2
  17. data/ext/polars/src/lib.rs +25 -20
  18. data/ext/polars/src/object.rs +1 -1
  19. data/ext/polars/src/rb_modules.rs +4 -0
  20. data/ext/polars/src/series/construction.rs +28 -2
  21. data/ext/polars/src/series.rs +57 -17
  22. data/lib/polars/array_expr.rb +84 -0
  23. data/lib/polars/array_name_space.rb +77 -0
  24. data/lib/polars/batched_csv_reader.rb +1 -1
  25. data/lib/polars/data_frame.rb +91 -49
  26. data/lib/polars/data_types.rb +163 -29
  27. data/lib/polars/date_time_name_space.rb +17 -3
  28. data/lib/polars/expr.rb +76 -69
  29. data/lib/polars/functions.rb +0 -1
  30. data/lib/polars/group_by.rb +1 -22
  31. data/lib/polars/lazy_frame.rb +82 -30
  32. data/lib/polars/lazy_functions.rb +67 -31
  33. data/lib/polars/list_expr.rb +28 -28
  34. data/lib/polars/list_name_space.rb +13 -13
  35. data/lib/polars/rolling_group_by.rb +4 -2
  36. data/lib/polars/series.rb +70 -16
  37. data/lib/polars/string_expr.rb +137 -11
  38. data/lib/polars/string_name_space.rb +137 -22
  39. data/lib/polars/utils.rb +107 -57
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +3 -0
  42. metadata +5 -2
data/README.md CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,9 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
363
 
363
364
  Get column types
364
365
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,17 +11,17 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
+ chrono = "=0.4.24"
14
15
  magnus = "0.5"
15
- polars-core = "0.29.0"
16
+ polars-core = "0.31.1"
16
17
  serde_json = "1"
17
18
  smartstring = "1"
18
19
 
19
20
  [dependencies.polars]
20
- version = "0.29.0"
21
+ version = "0.31.1"
21
22
  features = [
22
23
  "abs",
23
24
  "approx_unique",
24
- "arange",
25
25
  "arg_where",
26
26
  "asof_join",
27
27
  "avro",
@@ -68,6 +68,7 @@ features = [
68
68
  "propagate_nans",
69
69
  "random",
70
70
  "rank",
71
+ "range",
71
72
  "reinterpret",
72
73
  "repeat_by",
73
74
  "rolling_window",
@@ -237,15 +237,19 @@ fn iterator_to_list(
237
237
  for _ in 0..init_null_count {
238
238
  builder.append_null()
239
239
  }
240
- builder.append_opt_series(first_value);
240
+ builder
241
+ .append_opt_series(first_value)
242
+ .map_err(RbPolarsErr::from)?;
241
243
  for opt_val in it {
242
244
  match opt_val {
243
245
  None => builder.append_null(),
244
246
  Some(s) => {
245
247
  if s.len() == 0 && s.dtype() != dt {
246
- builder.append_series(&Series::full_null("", 0, dt))
248
+ builder
249
+ .append_series(&Series::full_null("", 0, dt))
250
+ .unwrap()
247
251
  } else {
248
- builder.append_series(&s)
252
+ builder.append_series(&s).map_err(RbPolarsErr::from)?
249
253
  }
250
254
  }
251
255
  }
@@ -1,11 +1,11 @@
1
1
  use std::fmt::{Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
7
+ RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
7
8
  };
8
- use magnus::encoding::{EncodingCapable, Index};
9
9
  use polars::chunked_array::object::PolarsObjectSafe;
10
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
11
  use polars::datatypes::AnyValue;
@@ -16,8 +16,9 @@ use polars::prelude::*;
16
16
  use polars::series::ops::NullBehavior;
17
17
  use smartstring::alias::String as SmartString;
18
18
 
19
+ use crate::object::OBJECT_NAME;
19
20
  use crate::rb_modules::utils;
20
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
21
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
21
22
 
22
23
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
24
  // Safety:
@@ -149,51 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
149
150
  AnyValue::Boolean(v) => Value::from(v),
150
151
  AnyValue::Utf8(v) => Value::from(v),
151
152
  AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
152
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
153
- AnyValue::Date(v) => class::time()
154
- .funcall::<_, _, Value>("at", (v * 86400,))
155
- .unwrap()
156
- .funcall::<_, _, Value>("utc", ())
157
- .unwrap()
158
- .funcall::<_, _, Value>("to_date", ())
159
- .unwrap(),
160
- AnyValue::Datetime(v, tu, tz) => {
161
- let t = match tu {
162
- TimeUnit::Nanoseconds => {
163
- let sec = v / 1000000000;
164
- let subsec = v % 1000000000;
165
- class::time()
166
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
167
- .unwrap()
168
- }
169
- TimeUnit::Microseconds => {
170
- let sec = v / 1000000;
171
- let subsec = v % 1000000;
172
- class::time()
173
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
174
- .unwrap()
175
- }
176
- TimeUnit::Milliseconds => {
177
- let sec = v / 1000;
178
- let subsec = v % 1000;
179
- class::time()
180
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
181
- .unwrap()
182
- }
183
- };
184
-
185
- if tz.is_some() {
186
- todo!();
153
+ AnyValue::Categorical(idx, rev, arr) => {
154
+ let s = if arr.is_null() {
155
+ rev.get(idx)
187
156
  } else {
188
- t.funcall::<_, _, Value>("utc", ()).unwrap()
189
- }
157
+ unsafe { arr.deref_unchecked().value(idx as usize) }
158
+ };
159
+ s.into_value()
190
160
  }
191
- AnyValue::Duration(v, tu) => {
192
- let tu = tu.to_ascii();
193
- utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
161
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
162
+ AnyValue::Datetime(v, time_unit, time_zone) => {
163
+ let time_unit = time_unit.to_ascii();
164
+ utils()
165
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
166
+ .unwrap()
194
167
  }
195
- AnyValue::Time(_v) => todo!(),
196
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
168
+ AnyValue::Duration(v, time_unit) => {
169
+ let time_unit = time_unit.to_ascii();
170
+ utils()
171
+ .funcall("_to_ruby_duration", (v, time_unit))
172
+ .unwrap()
173
+ }
174
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
175
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
197
176
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
177
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
178
  AnyValue::Object(v) => {
@@ -206,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
206
185
  }
207
186
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
187
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
- AnyValue::Decimal(_v, _scale) => todo!(),
188
+ AnyValue::Decimal(v, scale) => utils()
189
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
190
+ .unwrap(),
210
191
  }
211
192
  }
212
193
  }
@@ -226,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
226
207
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
227
208
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
228
209
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
229
- DataType::Decimal(_precision, _scale) => todo!(),
210
+ DataType::Decimal(precision, scale) => {
211
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
212
+ decimal_class
213
+ .funcall::<_, _, Value>("new", (precision, scale))
214
+ .unwrap()
215
+ }
230
216
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
231
217
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
232
218
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
219
+ DataType::Array(inner, size) => {
220
+ let inner = Wrap(*inner);
221
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
222
+ list_class
223
+ .funcall::<_, _, Value>("new", (size, inner))
224
+ .unwrap()
225
+ }
233
226
  DataType::List(inner) => {
234
227
  let inner = Wrap(*inner);
235
228
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -321,11 +314,11 @@ impl IntoValue for Wrap<&StructChunked> {
321
314
  impl IntoValue for Wrap<&DurationChunked> {
322
315
  fn into_value_with(self, _: &RubyHandle) -> Value {
323
316
  let utils = utils();
324
- let tu = Wrap(self.0.time_unit()).into_value();
317
+ let time_unit = Wrap(self.0.time_unit()).into_value();
325
318
  let iter = self.0.into_iter().map(|opt_v| {
326
319
  opt_v.map(|v| {
327
320
  utils
328
- .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
321
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
329
322
  .unwrap()
330
323
  })
331
324
  });
@@ -333,6 +326,64 @@ impl IntoValue for Wrap<&DurationChunked> {
333
326
  }
334
327
  }
335
328
 
329
+ impl IntoValue for Wrap<&DatetimeChunked> {
330
+ fn into_value_with(self, _: &RubyHandle) -> Value {
331
+ let utils = utils();
332
+ let time_unit = Wrap(self.0.time_unit()).into_value();
333
+ let time_zone = self.0.time_zone().clone().into_value();
334
+ let iter = self.0.into_iter().map(|opt_v| {
335
+ opt_v.map(|v| {
336
+ utils
337
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
338
+ .unwrap()
339
+ })
340
+ });
341
+ RArray::from_iter(iter).into_value()
342
+ }
343
+ }
344
+
345
+ impl IntoValue for Wrap<&TimeChunked> {
346
+ fn into_value_with(self, _: &RubyHandle) -> Value {
347
+ let utils = utils();
348
+ let iter = self.0.into_iter().map(|opt_v| {
349
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
350
+ });
351
+ RArray::from_iter(iter).into_value()
352
+ }
353
+ }
354
+
355
+ impl IntoValue for Wrap<&DateChunked> {
356
+ fn into_value_with(self, _: &RubyHandle) -> Value {
357
+ let utils = utils();
358
+ let iter = self.0.into_iter().map(|opt_v| {
359
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
360
+ });
361
+ RArray::from_iter(iter).into_value()
362
+ }
363
+ }
364
+
365
+ impl IntoValue for Wrap<&DecimalChunked> {
366
+ fn into_value_with(self, _: &RubyHandle) -> Value {
367
+ let utils = utils();
368
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
369
+ let iter = self.0.into_iter().map(|opt_v| {
370
+ opt_v.map(|v| {
371
+ utils
372
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
373
+ .unwrap()
374
+ })
375
+ });
376
+ RArray::from_iter(iter).into_value()
377
+ }
378
+ }
379
+
380
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
381
+ match digits.parse::<i128>() {
382
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
383
+ Err(_) => None,
384
+ }
385
+ }
386
+
336
387
  impl TryConvert for Wrap<Field> {
337
388
  fn try_convert(ob: Value) -> RbResult<Self> {
338
389
  let name: String = ob.funcall("name", ())?;
@@ -362,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
362
413
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
363
414
  "Polars::Time" => DataType::Time,
364
415
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
416
+ "Polars::Decimal" => DataType::Decimal(None, None),
365
417
  "Polars::Float32" => DataType::Float32,
366
418
  "Polars::Float64" => DataType::Float64,
367
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
368
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
419
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
420
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
369
421
  "Polars::Null" => DataType::Null,
370
422
  "Polars::Unknown" => DataType::Unknown,
371
423
  dt => {
@@ -378,6 +430,28 @@ impl TryConvert for Wrap<DataType> {
378
430
  } else if ob.try_convert::<String>().is_err() {
379
431
  let name = unsafe { ob.class().name() }.into_owned();
380
432
  match name.as_str() {
433
+ "Polars::Duration" => {
434
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
435
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
436
+ DataType::Duration(time_unit)
437
+ }
438
+ "Polars::Datetime" => {
439
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
440
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
441
+ let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
442
+ let time_zone = time_zone.try_convert()?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
447
+ let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = inner.try_convert::<Wrap<DataType>>()?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
381
455
  "Polars::Struct" => {
382
456
  let arr: RArray = ob.funcall("fields", ())?;
383
457
  let mut fields = Vec::with_capacity(arr.len());
@@ -386,7 +460,12 @@ impl TryConvert for Wrap<DataType> {
386
460
  }
387
461
  DataType::Struct(fields)
388
462
  }
389
- _ => todo!(),
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
390
469
  }
391
470
  } else {
392
471
  match ob.try_convert::<String>()?.as_str() {
@@ -430,7 +509,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
430
509
  Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
431
510
  } else if let Some(v) = Integer::from_value(ob) {
432
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
433
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
434
513
  Ok(AnyValue::Float64(v.to_f64()).into())
435
514
  } else if let Some(v) = RString::from_value(ob) {
436
515
  if v.enc_get() == Index::utf8() {
@@ -443,7 +522,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
443
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
444
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
445
524
  let v = sec * 1_000_000_000 + nsec;
446
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
447
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
448
528
  } else if ob.is_nil() {
449
529
  Ok(AnyValue::Null.into())
@@ -464,13 +544,23 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
464
544
  if v.is_empty() {
465
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
466
546
  } else {
467
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
468
- // use first `n` values to infer datatype
469
- // this value is not too large as this will be done with every
470
- // anyvalue that has to be converted, which can be many
471
- let n = 25;
472
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
473
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
562
+ }
563
+
474
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
475
565
  .map_err(RbPolarsErr::from)?;
476
566
  Ok(Wrap(AnyValue::List(s)))
@@ -478,7 +568,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
478
568
  } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
569
  let sec: i64 = ob.funcall("to_i", ())?;
480
570
  let nsec: i64 = ob.funcall("nsec", ())?;
481
- Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
482
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
483
577
  // convert to DateTime for UTC
484
578
  let v = ob
@@ -486,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
486
580
  .funcall::<_, _, Value>("to_time", ())?
487
581
  .funcall::<_, _, i64>("to_i", ())?;
488
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob
585
+ .funcall::<_, _, Value>("split", ())
586
+ .unwrap()
587
+ .try_convert()
588
+ .unwrap();
589
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
590
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
591
+ })?;
592
+ if sign < 0 {
593
+ // TODO better error
594
+ v = v.checked_neg().unwrap();
595
+ }
596
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
489
597
  } else {
490
598
  Err(RbPolarsErr::other(format!(
491
599
  "object type not supported {:?}",
@@ -456,12 +456,14 @@ impl RbDataFrame {
456
456
  .finish(&mut self.df.borrow_mut())
457
457
  .map_err(RbPolarsErr::from)?;
458
458
  } else {
459
- let mut buf = get_file_like(rb_f, true)?;
460
-
459
+ let mut buf = Cursor::new(Vec::new());
461
460
  IpcWriter::new(&mut buf)
462
461
  .with_compression(compression.0)
463
462
  .finish(&mut self.df.borrow_mut())
464
463
  .map_err(RbPolarsErr::from)?;
464
+ // TODO less copying
465
+ let rb_str = RString::from_slice(&buf.into_inner());
466
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
465
467
  }
466
468
  Ok(())
467
469
  }
@@ -794,22 +796,6 @@ impl RbDataFrame {
794
796
  Ok(RbDataFrame::new(df))
795
797
  }
796
798
 
797
- pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
798
- let df = self
799
- .df
800
- .borrow()
801
- .sort_with_options(
802
- &by_column,
803
- SortOptions {
804
- descending: reverse,
805
- nulls_last,
806
- multithreaded: true,
807
- },
808
- )
809
- .map_err(RbPolarsErr::from)?;
810
- Ok(RbDataFrame::new(df))
811
- }
812
-
813
799
  pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
814
800
  self.df
815
801
  .borrow_mut()
@@ -933,11 +919,16 @@ impl RbDataFrame {
933
919
  Ok(RbDataFrame::new(df))
934
920
  }
935
921
 
936
- pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
937
- let out = if stable {
938
- self.df.borrow().partition_by_stable(groups)
922
+ pub fn partition_by(
923
+ &self,
924
+ by: Vec<String>,
925
+ maintain_order: bool,
926
+ include_key: bool,
927
+ ) -> RbResult<RArray> {
928
+ let out = if maintain_order {
929
+ self.df.borrow().partition_by_stable(by, include_key)
939
930
  } else {
940
- self.df.borrow().partition_by(groups)
931
+ self.df.borrow().partition_by(by, include_key)
941
932
  }
942
933
  .map_err(RbPolarsErr::from)?;
943
934
  Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
@@ -1024,13 +1015,18 @@ impl RbDataFrame {
1024
1015
  &self,
1025
1016
  columns: Option<Vec<String>>,
1026
1017
  separator: Option<String>,
1018
+ drop_first: bool,
1027
1019
  ) -> RbResult<Self> {
1028
1020
  let df = match columns {
1029
1021
  Some(cols) => self.df.borrow().columns_to_dummies(
1030
1022
  cols.iter().map(|x| x as &str).collect(),
1031
1023
  separator.as_deref(),
1024
+ drop_first,
1032
1025
  ),
1033
- None => self.df.borrow().to_dummies(separator.as_deref()),
1026
+ None => self
1027
+ .df
1028
+ .borrow()
1029
+ .to_dummies(separator.as_deref(), drop_first),
1034
1030
  }
1035
1031
  .map_err(RbPolarsErr::from)?;
1036
1032
  Ok(df.into())
@@ -24,6 +24,14 @@ impl RbPolarsErr {
24
24
  }
25
25
  }
26
26
 
27
+ pub struct RbTypeError {}
28
+
29
+ impl RbTypeError {
30
+ pub fn new_err(message: String) -> Error {
31
+ Error::new(exception::type_error(), message)
32
+ }
33
+ }
34
+
27
35
  pub struct RbValueError {}
28
36
 
29
37
  impl RbValueError {
@@ -0,0 +1,15 @@
1
+ use crate::RbExpr;
2
+
3
+ impl RbExpr {
4
+ pub fn array_max(&self) -> Self {
5
+ self.inner.clone().arr().max().into()
6
+ }
7
+
8
+ pub fn array_min(&self) -> Self {
9
+ self.inner.clone().arr().min().into()
10
+ }
11
+
12
+ pub fn array_sum(&self) -> Self {
13
+ self.inner.clone().arr().sum().into()
14
+ }
15
+ }