polars-df 0.10.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,4 +1,4 @@
|
|
1
|
-
pub(crate) mod
|
1
|
+
pub(crate) mod any_value;
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
@@ -18,6 +18,7 @@ use polars::io::avro::AvroCompression;
|
|
18
18
|
use polars::prelude::*;
|
19
19
|
use polars::series::ops::NullBehavior;
|
20
20
|
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_core::utils::materialize_dyn_int;
|
21
22
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
22
23
|
use smartstring::alias::String as SmartString;
|
23
24
|
|
@@ -74,7 +75,7 @@ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
|
74
75
|
|
75
76
|
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
76
77
|
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
77
|
-
Ok(rbdf.ldf.clone())
|
78
|
+
Ok(rbdf.ldf.borrow().clone())
|
78
79
|
}
|
79
80
|
|
80
81
|
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
@@ -154,7 +155,7 @@ impl IntoValue for Wrap<DataType> {
|
|
154
155
|
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
156
|
class.funcall("new", ()).unwrap()
|
156
157
|
}
|
157
|
-
DataType::Float64 => {
|
158
|
+
DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
|
158
159
|
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
160
|
class.funcall("new", ()).unwrap()
|
160
161
|
}
|
@@ -168,7 +169,7 @@ impl IntoValue for Wrap<DataType> {
|
|
168
169
|
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
170
|
class.funcall("new", ()).unwrap()
|
170
171
|
}
|
171
|
-
DataType::String => {
|
172
|
+
DataType::String | DataType::Unknown(UnknownKind::Str) => {
|
172
173
|
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
174
|
class.funcall("new", ()).unwrap()
|
174
175
|
}
|
@@ -242,7 +243,10 @@ impl IntoValue for Wrap<DataType> {
|
|
242
243
|
let class = pl.const_get::<_, Value>("Null").unwrap();
|
243
244
|
class.funcall("new", ()).unwrap()
|
244
245
|
}
|
245
|
-
DataType::Unknown => {
|
246
|
+
DataType::Unknown(UnknownKind::Int(v)) => {
|
247
|
+
Wrap(materialize_dyn_int(v).dtype()).into_value()
|
248
|
+
}
|
249
|
+
DataType::Unknown(_) => {
|
246
250
|
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
247
251
|
class.funcall("new", ()).unwrap()
|
248
252
|
}
|
@@ -310,7 +314,7 @@ impl TryConvert for Wrap<DataType> {
|
|
310
314
|
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
311
315
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
312
316
|
"Polars::Null" => DataType::Null,
|
313
|
-
"Polars::Unknown" => DataType::Unknown,
|
317
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
314
318
|
dt => {
|
315
319
|
return Err(RbValueError::new_err(format!(
|
316
320
|
"{dt} is not a correct polars DataType.",
|
@@ -350,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
|
|
350
354
|
"Polars::Float32" => DataType::Float32,
|
351
355
|
"Polars::Float64" => DataType::Float64,
|
352
356
|
"Polars::Null" => DataType::Null,
|
353
|
-
"Polars::Unknown" => DataType::Unknown,
|
357
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
354
358
|
"Polars::Duration" => {
|
355
359
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
356
360
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -375,8 +379,8 @@ impl TryConvert for Wrap<DataType> {
|
|
375
379
|
"Polars::Struct" => {
|
376
380
|
let arr: RArray = ob.funcall("fields", ())?;
|
377
381
|
let mut fields = Vec::with_capacity(arr.len());
|
378
|
-
for v in arr.
|
379
|
-
fields.push(Wrap::<Field>::try_convert(v
|
382
|
+
for v in arr.into_iter() {
|
383
|
+
fields.push(Wrap::<Field>::try_convert(v)?.0);
|
380
384
|
}
|
381
385
|
DataType::Struct(fields)
|
382
386
|
}
|
@@ -410,7 +414,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
414
|
"obj" => DataType::Object(OBJECT_NAME, None),
|
411
415
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
412
416
|
"null" => DataType::Null,
|
413
|
-
"unk" => DataType::Unknown,
|
417
|
+
"unk" => DataType::Unknown(Default::default()),
|
414
418
|
_ => {
|
415
419
|
return Err(RbValueError::new_err(format!(
|
416
420
|
"{} is not a supported DataType.",
|
@@ -423,11 +427,36 @@ impl TryConvert for Wrap<DataType> {
|
|
423
427
|
}
|
424
428
|
}
|
425
429
|
|
430
|
+
impl TryConvert for Wrap<StatisticsOptions> {
|
431
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
|
+
let mut statistics = StatisticsOptions::empty();
|
433
|
+
|
434
|
+
let dict = RHash::try_convert(ob)?;
|
435
|
+
dict.foreach(|key: Symbol, val: bool| {
|
436
|
+
match key.name()?.as_ref() {
|
437
|
+
"min" => statistics.min_value = val,
|
438
|
+
"max" => statistics.max_value = val,
|
439
|
+
"distinct_count" => statistics.distinct_count = val,
|
440
|
+
"null_count" => statistics.null_count = val,
|
441
|
+
_ => {
|
442
|
+
return Err(RbTypeError::new_err(format!(
|
443
|
+
"'{key}' is not a valid statistic option",
|
444
|
+
)))
|
445
|
+
}
|
446
|
+
}
|
447
|
+
Ok(ForEach::Continue)
|
448
|
+
})
|
449
|
+
.unwrap();
|
450
|
+
|
451
|
+
Ok(Wrap(statistics))
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
426
455
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
427
456
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
428
457
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
429
|
-
for item in RArray::try_convert(ob)?.
|
430
|
-
vals.push(Wrap::<AnyValue<'s>>::try_convert(item
|
458
|
+
for item in RArray::try_convert(ob)?.into_iter() {
|
459
|
+
vals.push(Wrap::<AnyValue<'s>>::try_convert(item)?);
|
431
460
|
}
|
432
461
|
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
433
462
|
Ok(Wrap(Row(vals)))
|
@@ -546,57 +575,6 @@ impl Default for ObjectValue {
|
|
546
575
|
}
|
547
576
|
}
|
548
577
|
|
549
|
-
pub(crate) fn dicts_to_rows(
|
550
|
-
records: &Value,
|
551
|
-
infer_schema_len: Option<usize>,
|
552
|
-
schema_columns: PlIndexSet<String>,
|
553
|
-
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
554
|
-
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
555
|
-
let (dicts, len) = get_rbseq(*records)?;
|
556
|
-
|
557
|
-
let key_names = {
|
558
|
-
if !schema_columns.is_empty() {
|
559
|
-
schema_columns
|
560
|
-
} else {
|
561
|
-
let mut inferred_keys = PlIndexSet::new();
|
562
|
-
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
563
|
-
let d = d?;
|
564
|
-
let d = RHash::try_convert(d)?;
|
565
|
-
|
566
|
-
d.foreach(|name: Value, _value: Value| {
|
567
|
-
if let Some(v) = Symbol::from_value(name) {
|
568
|
-
inferred_keys.insert(v.name()?.into());
|
569
|
-
} else {
|
570
|
-
inferred_keys.insert(String::try_convert(name)?);
|
571
|
-
};
|
572
|
-
Ok(ForEach::Continue)
|
573
|
-
})?;
|
574
|
-
}
|
575
|
-
inferred_keys
|
576
|
-
}
|
577
|
-
};
|
578
|
-
|
579
|
-
let mut rows = Vec::with_capacity(len);
|
580
|
-
|
581
|
-
for d in dicts.each() {
|
582
|
-
let d = d?;
|
583
|
-
let d = RHash::try_convert(d)?;
|
584
|
-
|
585
|
-
let mut row = Vec::with_capacity(key_names.len());
|
586
|
-
|
587
|
-
for k in key_names.iter() {
|
588
|
-
// TODO improve performance
|
589
|
-
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
590
|
-
None => AnyValue::Null,
|
591
|
-
Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
|
592
|
-
};
|
593
|
-
row.push(val)
|
594
|
-
}
|
595
|
-
rows.push(Row(row))
|
596
|
-
}
|
597
|
-
Ok((rows, key_names.into_iter().collect()))
|
598
|
-
}
|
599
|
-
|
600
578
|
impl TryConvert for Wrap<AsofStrategy> {
|
601
579
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
602
580
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -733,14 +711,13 @@ impl TryConvert for Wrap<JoinType> {
|
|
733
711
|
let parsed = match String::try_convert(ob)?.as_str() {
|
734
712
|
"inner" => JoinType::Inner,
|
735
713
|
"left" => JoinType::Left,
|
736
|
-
"
|
737
|
-
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
714
|
+
"full" => JoinType::Full,
|
738
715
|
"semi" => JoinType::Semi,
|
739
716
|
"anti" => JoinType::Anti,
|
740
717
|
"cross" => JoinType::Cross,
|
741
718
|
v => {
|
742
719
|
return Err(RbValueError::new_err(format!(
|
743
|
-
"how must be one of {{'inner', 'left', '
|
720
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {}",
|
744
721
|
v
|
745
722
|
)))
|
746
723
|
}
|
@@ -950,6 +927,23 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
950
927
|
}
|
951
928
|
}
|
952
929
|
|
930
|
+
impl TryConvert for Wrap<ClosedInterval> {
|
931
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
932
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
933
|
+
"both" => ClosedInterval::Both,
|
934
|
+
"left" => ClosedInterval::Left,
|
935
|
+
"right" => ClosedInterval::Right,
|
936
|
+
"none" => ClosedInterval::None,
|
937
|
+
v => {
|
938
|
+
return Err(RbValueError::new_err(format!(
|
939
|
+
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
940
|
+
)))
|
941
|
+
}
|
942
|
+
};
|
943
|
+
Ok(Wrap(parsed))
|
944
|
+
}
|
945
|
+
}
|
946
|
+
|
953
947
|
impl TryConvert for Wrap<WindowMapping> {
|
954
948
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
955
949
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -0,0 +1,184 @@
|
|
1
|
+
use magnus::{prelude::*, r_hash::ForEach, RArray, RHash, Symbol, Value};
|
2
|
+
use polars::frame::row::{rows_to_schema_supertypes, rows_to_supertypes, Row};
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::conversion::*;
|
7
|
+
use crate::{RbPolarsErr, RbResult};
|
8
|
+
|
9
|
+
impl RbDataFrame {
|
10
|
+
pub fn from_rows(
|
11
|
+
rb_rows: RArray,
|
12
|
+
infer_schema_length: Option<usize>,
|
13
|
+
schema: Option<Wrap<Schema>>,
|
14
|
+
) -> RbResult<Self> {
|
15
|
+
let mut data = Vec::with_capacity(rb_rows.len());
|
16
|
+
for v in rb_rows.into_iter() {
|
17
|
+
let rb_row = RArray::try_convert(v)?;
|
18
|
+
let mut row = Vec::with_capacity(rb_row.len());
|
19
|
+
for val in rb_row.into_iter() {
|
20
|
+
row.push(Wrap::<AnyValue>::try_convert(val)?.0);
|
21
|
+
}
|
22
|
+
data.push(Row(row));
|
23
|
+
}
|
24
|
+
let schema = schema.map(|wrap| wrap.0);
|
25
|
+
finish_from_rows(data, schema, None, infer_schema_length)
|
26
|
+
}
|
27
|
+
|
28
|
+
pub fn from_hashes(
|
29
|
+
data: Value,
|
30
|
+
schema: Option<Wrap<Schema>>,
|
31
|
+
schema_overrides: Option<Wrap<Schema>>,
|
32
|
+
strict: bool,
|
33
|
+
infer_schema_length: Option<usize>,
|
34
|
+
) -> RbResult<Self> {
|
35
|
+
let schema = schema.map(|wrap| wrap.0);
|
36
|
+
let schema_overrides = schema_overrides.map(|wrap| wrap.0);
|
37
|
+
|
38
|
+
let names = get_schema_names(&data, schema.as_ref(), infer_schema_length)?;
|
39
|
+
let rows = dicts_to_rows(&data, &names, strict)?;
|
40
|
+
|
41
|
+
let schema = schema.or_else(|| {
|
42
|
+
Some(columns_names_to_empty_schema(
|
43
|
+
names.iter().map(String::as_str),
|
44
|
+
))
|
45
|
+
});
|
46
|
+
|
47
|
+
finish_from_rows(rows, schema, schema_overrides, infer_schema_length)
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
fn finish_from_rows(
|
52
|
+
rows: Vec<Row>,
|
53
|
+
schema: Option<Schema>,
|
54
|
+
schema_overrides: Option<Schema>,
|
55
|
+
infer_schema_length: Option<usize>,
|
56
|
+
) -> RbResult<RbDataFrame> {
|
57
|
+
// Object builder must be registered
|
58
|
+
crate::on_startup::register_object_builder();
|
59
|
+
|
60
|
+
let mut schema = if let Some(mut schema) = schema {
|
61
|
+
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
|
+
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
63
|
+
schema
|
64
|
+
} else {
|
65
|
+
rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
|
66
|
+
};
|
67
|
+
|
68
|
+
// TODO: Remove this step when Decimals are supported properly.
|
69
|
+
// Erasing the decimal precision/scale here will just require us to infer it again later.
|
70
|
+
// https://github.com/pola-rs/polars/issues/14427
|
71
|
+
erase_decimal_precision_scale(&mut schema);
|
72
|
+
|
73
|
+
let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
|
74
|
+
Ok(df.into())
|
75
|
+
}
|
76
|
+
|
77
|
+
fn update_schema_from_rows(
|
78
|
+
schema: &mut Schema,
|
79
|
+
rows: &[Row],
|
80
|
+
infer_schema_length: Option<usize>,
|
81
|
+
) -> RbResult<()> {
|
82
|
+
let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
|
83
|
+
if schema_is_complete {
|
84
|
+
return Ok(());
|
85
|
+
}
|
86
|
+
|
87
|
+
// TODO: Only infer dtypes for columns with an unknown dtype
|
88
|
+
let inferred_dtypes =
|
89
|
+
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
|
+
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
|
+
|
92
|
+
for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
|
93
|
+
if !dtype.is_known() {
|
94
|
+
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
|
+
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
96
|
+
})
|
97
|
+
.map_err(RbPolarsErr::from)?
|
98
|
+
.clone();
|
99
|
+
}
|
100
|
+
}
|
101
|
+
Ok(())
|
102
|
+
}
|
103
|
+
|
104
|
+
fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema>) {
|
105
|
+
if let Some(overrides) = schema_overrides {
|
106
|
+
for (name, dtype) in overrides.into_iter() {
|
107
|
+
schema.set_dtype(name.as_str(), dtype);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
+
for dtype in schema.iter_dtypes_mut() {
|
114
|
+
if let DataType::Decimal(_, _) = dtype {
|
115
|
+
*dtype = DataType::Decimal(None, None)
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
|
121
|
+
where
|
122
|
+
I: IntoIterator<Item = &'a str>,
|
123
|
+
{
|
124
|
+
let fields = column_names
|
125
|
+
.into_iter()
|
126
|
+
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
127
|
+
Schema::from_iter(fields)
|
128
|
+
}
|
129
|
+
|
130
|
+
fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
|
131
|
+
let (data, len) = get_rbseq(*data)?;
|
132
|
+
let mut rows = Vec::with_capacity(len);
|
133
|
+
for d in data.into_iter() {
|
134
|
+
let d = RHash::try_convert(d)?;
|
135
|
+
|
136
|
+
let mut row = Vec::with_capacity(names.len());
|
137
|
+
for k in names.iter() {
|
138
|
+
// TODO improve performance
|
139
|
+
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
140
|
+
None => AnyValue::Null,
|
141
|
+
Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
|
142
|
+
};
|
143
|
+
row.push(val)
|
144
|
+
}
|
145
|
+
rows.push(Row(row))
|
146
|
+
}
|
147
|
+
Ok(rows)
|
148
|
+
}
|
149
|
+
|
150
|
+
fn get_schema_names(
|
151
|
+
data: &Value,
|
152
|
+
schema: Option<&Schema>,
|
153
|
+
infer_schema_length: Option<usize>,
|
154
|
+
) -> RbResult<Vec<String>> {
|
155
|
+
if let Some(schema) = schema {
|
156
|
+
Ok(schema.iter_names().map(|n| n.to_string()).collect())
|
157
|
+
} else {
|
158
|
+
infer_schema_names_from_data(data, infer_schema_length)
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
fn infer_schema_names_from_data(
|
163
|
+
data: &Value,
|
164
|
+
infer_schema_length: Option<usize>,
|
165
|
+
) -> RbResult<Vec<String>> {
|
166
|
+
let (data, data_len) = get_rbseq(*data)?;
|
167
|
+
let infer_schema_length = infer_schema_length
|
168
|
+
.map(|n| std::cmp::max(1, n))
|
169
|
+
.unwrap_or(data_len);
|
170
|
+
|
171
|
+
let mut names = PlIndexSet::new();
|
172
|
+
for d in data.into_iter().take(infer_schema_length) {
|
173
|
+
let d = RHash::try_convert(d)?;
|
174
|
+
d.foreach(|name: Value, _value: Value| {
|
175
|
+
if let Some(v) = Symbol::from_value(name) {
|
176
|
+
names.insert(v.name()?.into());
|
177
|
+
} else {
|
178
|
+
names.insert(String::try_convert(name)?);
|
179
|
+
};
|
180
|
+
Ok(ForEach::Continue)
|
181
|
+
})?;
|
182
|
+
}
|
183
|
+
Ok(names.into_iter().collect())
|
184
|
+
}
|
@@ -0,0 +1,48 @@
|
|
1
|
+
use magnus::{prelude::*, IntoValue, RArray, Value};
|
2
|
+
|
3
|
+
use super::*;
|
4
|
+
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn row_tuple(&self, idx: i64) -> Value {
|
8
|
+
let idx = if idx < 0 {
|
9
|
+
(self.df.borrow().height() as i64 + idx) as usize
|
10
|
+
} else {
|
11
|
+
idx as usize
|
12
|
+
};
|
13
|
+
RArray::from_iter(
|
14
|
+
self.df
|
15
|
+
.borrow()
|
16
|
+
.get_columns()
|
17
|
+
.iter()
|
18
|
+
.map(|s| match s.dtype() {
|
19
|
+
DataType::Object(_, _) => {
|
20
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
+
obj.unwrap().to_object()
|
22
|
+
}
|
23
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
|
+
}),
|
25
|
+
)
|
26
|
+
.as_value()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn row_tuples(&self) -> Value {
|
30
|
+
let df = &self.df;
|
31
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
32
|
+
RArray::from_iter(
|
33
|
+
self.df
|
34
|
+
.borrow()
|
35
|
+
.get_columns()
|
36
|
+
.iter()
|
37
|
+
.map(|s| match s.dtype() {
|
38
|
+
DataType::Object(_, _) => {
|
39
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
+
obj.unwrap().to_object()
|
41
|
+
}
|
42
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
|
+
}),
|
44
|
+
)
|
45
|
+
}))
|
46
|
+
.as_value()
|
47
|
+
}
|
48
|
+
}
|