polars-df 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/README.md +4 -3
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/mod.rs +7 -3
- data/ext/polars/src/conversion.rs +171 -63
- data/ext/polars/src/dataframe.rs +19 -23
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/general.rs +39 -9
- data/ext/polars/src/expr/list.rs +27 -22
- data/ext/polars/src/expr/string.rs +10 -9
- data/ext/polars/src/expr.rs +1 -0
- data/ext/polars/src/functions/lazy.rs +61 -21
- data/ext/polars/src/lazyframe.rs +14 -2
- data/ext/polars/src/lib.rs +25 -20
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +4 -0
- data/ext/polars/src/series/construction.rs +28 -2
- data/ext/polars/src/series.rs +57 -17
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/README.md
CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
29
|
|
30
30
|
## Reference
|
31
31
|
|
@@ -348,7 +348,7 @@ df.to_numo
|
|
348
348
|
You can specify column types when creating a data frame
|
349
349
|
|
350
350
|
```ruby
|
351
|
-
Polars::DataFrame.new(data,
|
351
|
+
Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
|
352
352
|
```
|
353
353
|
|
354
354
|
Supported types are:
|
@@ -357,8 +357,9 @@ Supported types are:
|
|
357
357
|
- float - `Float64`, `Float32`
|
358
358
|
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
|
-
- string - `Utf8`, `Categorical`
|
360
|
+
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
+
- other - `Object`, `List`, `Struct`, `Array` [unreleased]
|
362
363
|
|
363
364
|
Get column types
|
364
365
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.6.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,17 +11,17 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
+
chrono = "=0.4.24"
|
14
15
|
magnus = "0.5"
|
15
|
-
polars-core = "0.
|
16
|
+
polars-core = "0.31.1"
|
16
17
|
serde_json = "1"
|
17
18
|
smartstring = "1"
|
18
19
|
|
19
20
|
[dependencies.polars]
|
20
|
-
version = "0.
|
21
|
+
version = "0.31.1"
|
21
22
|
features = [
|
22
23
|
"abs",
|
23
24
|
"approx_unique",
|
24
|
-
"arange",
|
25
25
|
"arg_where",
|
26
26
|
"asof_join",
|
27
27
|
"avro",
|
@@ -68,6 +68,7 @@ features = [
|
|
68
68
|
"propagate_nans",
|
69
69
|
"random",
|
70
70
|
"rank",
|
71
|
+
"range",
|
71
72
|
"reinterpret",
|
72
73
|
"repeat_by",
|
73
74
|
"rolling_window",
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -237,15 +237,19 @@ fn iterator_to_list(
|
|
237
237
|
for _ in 0..init_null_count {
|
238
238
|
builder.append_null()
|
239
239
|
}
|
240
|
-
builder
|
240
|
+
builder
|
241
|
+
.append_opt_series(first_value)
|
242
|
+
.map_err(RbPolarsErr::from)?;
|
241
243
|
for opt_val in it {
|
242
244
|
match opt_val {
|
243
245
|
None => builder.append_null(),
|
244
246
|
Some(s) => {
|
245
247
|
if s.len() == 0 && s.dtype() != dt {
|
246
|
-
builder
|
248
|
+
builder
|
249
|
+
.append_series(&Series::full_null("", 0, dt))
|
250
|
+
.unwrap()
|
247
251
|
} else {
|
248
|
-
builder.append_series(&s)
|
252
|
+
builder.append_series(&s).map_err(RbPolarsErr::from)?
|
249
253
|
}
|
250
254
|
}
|
251
255
|
}
|
@@ -1,11 +1,11 @@
|
|
1
1
|
use std::fmt::{Display, Formatter};
|
2
2
|
use std::hash::{Hash, Hasher};
|
3
3
|
|
4
|
+
use magnus::encoding::{EncodingCapable, Index};
|
4
5
|
use magnus::{
|
5
|
-
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module,
|
6
|
-
|
6
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
|
7
|
+
RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
7
8
|
};
|
8
|
-
use magnus::encoding::{EncodingCapable, Index};
|
9
9
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
10
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
11
|
use polars::datatypes::AnyValue;
|
@@ -16,8 +16,9 @@ use polars::prelude::*;
|
|
16
16
|
use polars::series::ops::NullBehavior;
|
17
17
|
use smartstring::alias::String as SmartString;
|
18
18
|
|
19
|
+
use crate::object::OBJECT_NAME;
|
19
20
|
use crate::rb_modules::utils;
|
20
|
-
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
21
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
21
22
|
|
22
23
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
23
24
|
// Safety:
|
@@ -149,51 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
149
150
|
AnyValue::Boolean(v) => Value::from(v),
|
150
151
|
AnyValue::Utf8(v) => Value::from(v),
|
151
152
|
AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
|
152
|
-
AnyValue::Categorical(
|
153
|
-
|
154
|
-
|
155
|
-
.unwrap()
|
156
|
-
.funcall::<_, _, Value>("utc", ())
|
157
|
-
.unwrap()
|
158
|
-
.funcall::<_, _, Value>("to_date", ())
|
159
|
-
.unwrap(),
|
160
|
-
AnyValue::Datetime(v, tu, tz) => {
|
161
|
-
let t = match tu {
|
162
|
-
TimeUnit::Nanoseconds => {
|
163
|
-
let sec = v / 1000000000;
|
164
|
-
let subsec = v % 1000000000;
|
165
|
-
class::time()
|
166
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
167
|
-
.unwrap()
|
168
|
-
}
|
169
|
-
TimeUnit::Microseconds => {
|
170
|
-
let sec = v / 1000000;
|
171
|
-
let subsec = v % 1000000;
|
172
|
-
class::time()
|
173
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
174
|
-
.unwrap()
|
175
|
-
}
|
176
|
-
TimeUnit::Milliseconds => {
|
177
|
-
let sec = v / 1000;
|
178
|
-
let subsec = v % 1000;
|
179
|
-
class::time()
|
180
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
181
|
-
.unwrap()
|
182
|
-
}
|
183
|
-
};
|
184
|
-
|
185
|
-
if tz.is_some() {
|
186
|
-
todo!();
|
153
|
+
AnyValue::Categorical(idx, rev, arr) => {
|
154
|
+
let s = if arr.is_null() {
|
155
|
+
rev.get(idx)
|
187
156
|
} else {
|
188
|
-
|
189
|
-
}
|
157
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
158
|
+
};
|
159
|
+
s.into_value()
|
190
160
|
}
|
191
|
-
AnyValue::
|
192
|
-
|
193
|
-
|
161
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
162
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
163
|
+
let time_unit = time_unit.to_ascii();
|
164
|
+
utils()
|
165
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
166
|
+
.unwrap()
|
194
167
|
}
|
195
|
-
AnyValue::
|
196
|
-
|
168
|
+
AnyValue::Duration(v, time_unit) => {
|
169
|
+
let time_unit = time_unit.to_ascii();
|
170
|
+
utils()
|
171
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
172
|
+
.unwrap()
|
173
|
+
}
|
174
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
175
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
197
176
|
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
198
177
|
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
199
178
|
AnyValue::Object(v) => {
|
@@ -206,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
206
185
|
}
|
207
186
|
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
208
187
|
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
209
|
-
AnyValue::Decimal(
|
188
|
+
AnyValue::Decimal(v, scale) => utils()
|
189
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
190
|
+
.unwrap(),
|
210
191
|
}
|
211
192
|
}
|
212
193
|
}
|
@@ -226,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
|
|
226
207
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
227
208
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
228
209
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
229
|
-
DataType::Decimal(
|
210
|
+
DataType::Decimal(precision, scale) => {
|
211
|
+
let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
|
212
|
+
decimal_class
|
213
|
+
.funcall::<_, _, Value>("new", (precision, scale))
|
214
|
+
.unwrap()
|
215
|
+
}
|
230
216
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
231
217
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
232
218
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
219
|
+
DataType::Array(inner, size) => {
|
220
|
+
let inner = Wrap(*inner);
|
221
|
+
let list_class = pl.const_get::<_, Value>("Array").unwrap();
|
222
|
+
list_class
|
223
|
+
.funcall::<_, _, Value>("new", (size, inner))
|
224
|
+
.unwrap()
|
225
|
+
}
|
233
226
|
DataType::List(inner) => {
|
234
227
|
let inner = Wrap(*inner);
|
235
228
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
@@ -321,11 +314,11 @@ impl IntoValue for Wrap<&StructChunked> {
|
|
321
314
|
impl IntoValue for Wrap<&DurationChunked> {
|
322
315
|
fn into_value_with(self, _: &RubyHandle) -> Value {
|
323
316
|
let utils = utils();
|
324
|
-
let
|
317
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
325
318
|
let iter = self.0.into_iter().map(|opt_v| {
|
326
319
|
opt_v.map(|v| {
|
327
320
|
utils
|
328
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v,
|
321
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
329
322
|
.unwrap()
|
330
323
|
})
|
331
324
|
});
|
@@ -333,6 +326,64 @@ impl IntoValue for Wrap<&DurationChunked> {
|
|
333
326
|
}
|
334
327
|
}
|
335
328
|
|
329
|
+
impl IntoValue for Wrap<&DatetimeChunked> {
|
330
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
331
|
+
let utils = utils();
|
332
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
333
|
+
let time_zone = self.0.time_zone().clone().into_value();
|
334
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
335
|
+
opt_v.map(|v| {
|
336
|
+
utils
|
337
|
+
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
338
|
+
.unwrap()
|
339
|
+
})
|
340
|
+
});
|
341
|
+
RArray::from_iter(iter).into_value()
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
impl IntoValue for Wrap<&TimeChunked> {
|
346
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
347
|
+
let utils = utils();
|
348
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
349
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
350
|
+
});
|
351
|
+
RArray::from_iter(iter).into_value()
|
352
|
+
}
|
353
|
+
}
|
354
|
+
|
355
|
+
impl IntoValue for Wrap<&DateChunked> {
|
356
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
357
|
+
let utils = utils();
|
358
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
359
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
360
|
+
});
|
361
|
+
RArray::from_iter(iter).into_value()
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
impl IntoValue for Wrap<&DecimalChunked> {
|
366
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
367
|
+
let utils = utils();
|
368
|
+
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
369
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
370
|
+
opt_v.map(|v| {
|
371
|
+
utils
|
372
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
373
|
+
.unwrap()
|
374
|
+
})
|
375
|
+
});
|
376
|
+
RArray::from_iter(iter).into_value()
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
381
|
+
match digits.parse::<i128>() {
|
382
|
+
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
383
|
+
Err(_) => None,
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
336
387
|
impl TryConvert for Wrap<Field> {
|
337
388
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
338
389
|
let name: String = ob.funcall("name", ())?;
|
@@ -362,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
|
|
362
413
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
363
414
|
"Polars::Time" => DataType::Time,
|
364
415
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
416
|
+
"Polars::Decimal" => DataType::Decimal(None, None),
|
365
417
|
"Polars::Float32" => DataType::Float32,
|
366
418
|
"Polars::Float64" => DataType::Float64,
|
367
|
-
|
368
|
-
"Polars::List" => DataType::List(Box::new(DataType::
|
419
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
420
|
+
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
369
421
|
"Polars::Null" => DataType::Null,
|
370
422
|
"Polars::Unknown" => DataType::Unknown,
|
371
423
|
dt => {
|
@@ -378,6 +430,28 @@ impl TryConvert for Wrap<DataType> {
|
|
378
430
|
} else if ob.try_convert::<String>().is_err() {
|
379
431
|
let name = unsafe { ob.class().name() }.into_owned();
|
380
432
|
match name.as_str() {
|
433
|
+
"Polars::Duration" => {
|
434
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
435
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
436
|
+
DataType::Duration(time_unit)
|
437
|
+
}
|
438
|
+
"Polars::Datetime" => {
|
439
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
440
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
441
|
+
let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
|
442
|
+
let time_zone = time_zone.try_convert()?;
|
443
|
+
DataType::Datetime(time_unit, time_zone)
|
444
|
+
}
|
445
|
+
"Polars::Decimal" => {
|
446
|
+
let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
|
447
|
+
let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
|
448
|
+
DataType::Decimal(precision, Some(scale))
|
449
|
+
}
|
450
|
+
"Polars::List" => {
|
451
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
452
|
+
let inner = inner.try_convert::<Wrap<DataType>>()?;
|
453
|
+
DataType::List(Box::new(inner.0))
|
454
|
+
}
|
381
455
|
"Polars::Struct" => {
|
382
456
|
let arr: RArray = ob.funcall("fields", ())?;
|
383
457
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -386,7 +460,12 @@ impl TryConvert for Wrap<DataType> {
|
|
386
460
|
}
|
387
461
|
DataType::Struct(fields)
|
388
462
|
}
|
389
|
-
|
463
|
+
dt => {
|
464
|
+
return Err(RbTypeError::new_err(format!(
|
465
|
+
"A {dt} object is not a correct polars DataType. \
|
466
|
+
Hint: use the class without instantiating it.",
|
467
|
+
)))
|
468
|
+
}
|
390
469
|
}
|
391
470
|
} else {
|
392
471
|
match ob.try_convert::<String>()?.as_str() {
|
@@ -430,7 +509,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
430
509
|
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
431
510
|
} else if let Some(v) = Integer::from_value(ob) {
|
432
511
|
Ok(AnyValue::Int64(v.to_i64()?).into())
|
433
|
-
} else if let Some(v) =
|
512
|
+
} else if let Some(v) = Float::from_value(ob) {
|
434
513
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
435
514
|
} else if let Some(v) = RString::from_value(ob) {
|
436
515
|
if v.enc_get() == Index::utf8() {
|
@@ -443,7 +522,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
443
522
|
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
444
523
|
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
445
524
|
let v = sec * 1_000_000_000 + nsec;
|
446
|
-
// TODO support time zone
|
525
|
+
// TODO support time zone when possible
|
526
|
+
// https://github.com/pola-rs/polars/issues/9103
|
447
527
|
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
448
528
|
} else if ob.is_nil() {
|
449
529
|
Ok(AnyValue::Null.into())
|
@@ -464,13 +544,23 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
464
544
|
if v.is_empty() {
|
465
545
|
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
466
546
|
} else {
|
467
|
-
let
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
.
|
547
|
+
let list = v;
|
548
|
+
|
549
|
+
let mut avs = Vec::with_capacity(25);
|
550
|
+
let mut iter = list.each();
|
551
|
+
|
552
|
+
for item in (&mut iter).take(25) {
|
553
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
554
|
+
}
|
555
|
+
|
556
|
+
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
+
|
558
|
+
// push the rest
|
559
|
+
avs.reserve(list.len());
|
560
|
+
for item in iter {
|
561
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
562
|
+
}
|
563
|
+
|
474
564
|
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
475
565
|
.map_err(RbPolarsErr::from)?;
|
476
566
|
Ok(Wrap(AnyValue::List(s)))
|
@@ -478,7 +568,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
478
568
|
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
479
569
|
let sec: i64 = ob.funcall("to_i", ())?;
|
480
570
|
let nsec: i64 = ob.funcall("nsec", ())?;
|
481
|
-
Ok(Wrap(AnyValue::Datetime(
|
571
|
+
Ok(Wrap(AnyValue::Datetime(
|
572
|
+
sec * 1_000_000_000 + nsec,
|
573
|
+
TimeUnit::Nanoseconds,
|
574
|
+
&None,
|
575
|
+
)))
|
482
576
|
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
483
577
|
// convert to DateTime for UTC
|
484
578
|
let v = ob
|
@@ -486,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
486
580
|
.funcall::<_, _, Value>("to_time", ())?
|
487
581
|
.funcall::<_, _, i64>("to_i", ())?;
|
488
582
|
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob
|
585
|
+
.funcall::<_, _, Value>("split", ())
|
586
|
+
.unwrap()
|
587
|
+
.try_convert()
|
588
|
+
.unwrap();
|
589
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
590
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
591
|
+
})?;
|
592
|
+
if sign < 0 {
|
593
|
+
// TODO better error
|
594
|
+
v = v.checked_neg().unwrap();
|
595
|
+
}
|
596
|
+
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
489
597
|
} else {
|
490
598
|
Err(RbPolarsErr::other(format!(
|
491
599
|
"object type not supported {:?}",
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -456,12 +456,14 @@ impl RbDataFrame {
|
|
456
456
|
.finish(&mut self.df.borrow_mut())
|
457
457
|
.map_err(RbPolarsErr::from)?;
|
458
458
|
} else {
|
459
|
-
let mut buf =
|
460
|
-
|
459
|
+
let mut buf = Cursor::new(Vec::new());
|
461
460
|
IpcWriter::new(&mut buf)
|
462
461
|
.with_compression(compression.0)
|
463
462
|
.finish(&mut self.df.borrow_mut())
|
464
463
|
.map_err(RbPolarsErr::from)?;
|
464
|
+
// TODO less copying
|
465
|
+
let rb_str = RString::from_slice(&buf.into_inner());
|
466
|
+
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
465
467
|
}
|
466
468
|
Ok(())
|
467
469
|
}
|
@@ -794,22 +796,6 @@ impl RbDataFrame {
|
|
794
796
|
Ok(RbDataFrame::new(df))
|
795
797
|
}
|
796
798
|
|
797
|
-
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
|
798
|
-
let df = self
|
799
|
-
.df
|
800
|
-
.borrow()
|
801
|
-
.sort_with_options(
|
802
|
-
&by_column,
|
803
|
-
SortOptions {
|
804
|
-
descending: reverse,
|
805
|
-
nulls_last,
|
806
|
-
multithreaded: true,
|
807
|
-
},
|
808
|
-
)
|
809
|
-
.map_err(RbPolarsErr::from)?;
|
810
|
-
Ok(RbDataFrame::new(df))
|
811
|
-
}
|
812
|
-
|
813
799
|
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
814
800
|
self.df
|
815
801
|
.borrow_mut()
|
@@ -933,11 +919,16 @@ impl RbDataFrame {
|
|
933
919
|
Ok(RbDataFrame::new(df))
|
934
920
|
}
|
935
921
|
|
936
|
-
pub fn partition_by(
|
937
|
-
|
938
|
-
|
922
|
+
pub fn partition_by(
|
923
|
+
&self,
|
924
|
+
by: Vec<String>,
|
925
|
+
maintain_order: bool,
|
926
|
+
include_key: bool,
|
927
|
+
) -> RbResult<RArray> {
|
928
|
+
let out = if maintain_order {
|
929
|
+
self.df.borrow().partition_by_stable(by, include_key)
|
939
930
|
} else {
|
940
|
-
self.df.borrow().partition_by(
|
931
|
+
self.df.borrow().partition_by(by, include_key)
|
941
932
|
}
|
942
933
|
.map_err(RbPolarsErr::from)?;
|
943
934
|
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
@@ -1024,13 +1015,18 @@ impl RbDataFrame {
|
|
1024
1015
|
&self,
|
1025
1016
|
columns: Option<Vec<String>>,
|
1026
1017
|
separator: Option<String>,
|
1018
|
+
drop_first: bool,
|
1027
1019
|
) -> RbResult<Self> {
|
1028
1020
|
let df = match columns {
|
1029
1021
|
Some(cols) => self.df.borrow().columns_to_dummies(
|
1030
1022
|
cols.iter().map(|x| x as &str).collect(),
|
1031
1023
|
separator.as_deref(),
|
1024
|
+
drop_first,
|
1032
1025
|
),
|
1033
|
-
None => self
|
1026
|
+
None => self
|
1027
|
+
.df
|
1028
|
+
.borrow()
|
1029
|
+
.to_dummies(separator.as_deref(), drop_first),
|
1034
1030
|
}
|
1035
1031
|
.map_err(RbPolarsErr::from)?;
|
1036
1032
|
Ok(df.into())
|
data/ext/polars/src/error.rs
CHANGED
@@ -24,6 +24,14 @@ impl RbPolarsErr {
|
|
24
24
|
}
|
25
25
|
}
|
26
26
|
|
27
|
+
pub struct RbTypeError {}
|
28
|
+
|
29
|
+
impl RbTypeError {
|
30
|
+
pub fn new_err(message: String) -> Error {
|
31
|
+
Error::new(exception::type_error(), message)
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
27
35
|
pub struct RbValueError {}
|
28
36
|
|
29
37
|
impl RbValueError {
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::RbExpr;
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn array_max(&self) -> Self {
|
5
|
+
self.inner.clone().arr().max().into()
|
6
|
+
}
|
7
|
+
|
8
|
+
pub fn array_min(&self) -> Self {
|
9
|
+
self.inner.clone().arr().min().into()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn array_sum(&self) -> Self {
|
13
|
+
self.inner.clone().arr().sum().into()
|
14
|
+
}
|
15
|
+
}
|