polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,4 +1,4 @@
|
|
1
|
-
pub(crate) mod
|
1
|
+
pub(crate) mod any_value;
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
@@ -18,6 +18,7 @@ use polars::io::avro::AvroCompression;
|
|
18
18
|
use polars::prelude::*;
|
19
19
|
use polars::series::ops::NullBehavior;
|
20
20
|
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_core::utils::materialize_dyn_int;
|
21
22
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
22
23
|
use smartstring::alias::String as SmartString;
|
23
24
|
|
@@ -74,7 +75,7 @@ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
|
74
75
|
|
75
76
|
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
76
77
|
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
77
|
-
Ok(rbdf.ldf.clone())
|
78
|
+
Ok(rbdf.ldf.borrow().clone())
|
78
79
|
}
|
79
80
|
|
80
81
|
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
@@ -154,7 +155,7 @@ impl IntoValue for Wrap<DataType> {
|
|
154
155
|
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
156
|
class.funcall("new", ()).unwrap()
|
156
157
|
}
|
157
|
-
DataType::Float64 => {
|
158
|
+
DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
|
158
159
|
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
160
|
class.funcall("new", ()).unwrap()
|
160
161
|
}
|
@@ -168,7 +169,7 @@ impl IntoValue for Wrap<DataType> {
|
|
168
169
|
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
170
|
class.funcall("new", ()).unwrap()
|
170
171
|
}
|
171
|
-
DataType::String => {
|
172
|
+
DataType::String | DataType::Unknown(UnknownKind::Str) => {
|
172
173
|
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
174
|
class.funcall("new", ()).unwrap()
|
174
175
|
}
|
@@ -242,7 +243,10 @@ impl IntoValue for Wrap<DataType> {
|
|
242
243
|
let class = pl.const_get::<_, Value>("Null").unwrap();
|
243
244
|
class.funcall("new", ()).unwrap()
|
244
245
|
}
|
245
|
-
DataType::Unknown => {
|
246
|
+
DataType::Unknown(UnknownKind::Int(v)) => {
|
247
|
+
Wrap(materialize_dyn_int(v).dtype()).into_value()
|
248
|
+
}
|
249
|
+
DataType::Unknown(_) => {
|
246
250
|
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
247
251
|
class.funcall("new", ()).unwrap()
|
248
252
|
}
|
@@ -310,7 +314,7 @@ impl TryConvert for Wrap<DataType> {
|
|
310
314
|
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
311
315
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
312
316
|
"Polars::Null" => DataType::Null,
|
313
|
-
"Polars::Unknown" => DataType::Unknown,
|
317
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
314
318
|
dt => {
|
315
319
|
return Err(RbValueError::new_err(format!(
|
316
320
|
"{dt} is not a correct polars DataType.",
|
@@ -350,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
|
|
350
354
|
"Polars::Float32" => DataType::Float32,
|
351
355
|
"Polars::Float64" => DataType::Float64,
|
352
356
|
"Polars::Null" => DataType::Null,
|
353
|
-
"Polars::Unknown" => DataType::Unknown,
|
357
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
354
358
|
"Polars::Duration" => {
|
355
359
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
356
360
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -375,8 +379,8 @@ impl TryConvert for Wrap<DataType> {
|
|
375
379
|
"Polars::Struct" => {
|
376
380
|
let arr: RArray = ob.funcall("fields", ())?;
|
377
381
|
let mut fields = Vec::with_capacity(arr.len());
|
378
|
-
for v in arr.
|
379
|
-
fields.push(Wrap::<Field>::try_convert(v
|
382
|
+
for v in arr.into_iter() {
|
383
|
+
fields.push(Wrap::<Field>::try_convert(v)?.0);
|
380
384
|
}
|
381
385
|
DataType::Struct(fields)
|
382
386
|
}
|
@@ -410,7 +414,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
414
|
"obj" => DataType::Object(OBJECT_NAME, None),
|
411
415
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
412
416
|
"null" => DataType::Null,
|
413
|
-
"unk" => DataType::Unknown,
|
417
|
+
"unk" => DataType::Unknown(Default::default()),
|
414
418
|
_ => {
|
415
419
|
return Err(RbValueError::new_err(format!(
|
416
420
|
"{} is not a supported DataType.",
|
@@ -423,11 +427,36 @@ impl TryConvert for Wrap<DataType> {
|
|
423
427
|
}
|
424
428
|
}
|
425
429
|
|
430
|
+
impl TryConvert for Wrap<StatisticsOptions> {
|
431
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
|
+
let mut statistics = StatisticsOptions::empty();
|
433
|
+
|
434
|
+
let dict = RHash::try_convert(ob)?;
|
435
|
+
dict.foreach(|key: Symbol, val: bool| {
|
436
|
+
match key.name()?.as_ref() {
|
437
|
+
"min" => statistics.min_value = val,
|
438
|
+
"max" => statistics.max_value = val,
|
439
|
+
"distinct_count" => statistics.distinct_count = val,
|
440
|
+
"null_count" => statistics.null_count = val,
|
441
|
+
_ => {
|
442
|
+
return Err(RbTypeError::new_err(format!(
|
443
|
+
"'{key}' is not a valid statistic option",
|
444
|
+
)))
|
445
|
+
}
|
446
|
+
}
|
447
|
+
Ok(ForEach::Continue)
|
448
|
+
})
|
449
|
+
.unwrap();
|
450
|
+
|
451
|
+
Ok(Wrap(statistics))
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
426
455
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
427
456
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
428
457
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
429
|
-
for item in RArray::try_convert(ob)?.
|
430
|
-
vals.push(Wrap::<AnyValue<'s>>::try_convert(item
|
458
|
+
for item in RArray::try_convert(ob)?.into_iter() {
|
459
|
+
vals.push(Wrap::<AnyValue<'s>>::try_convert(item)?);
|
431
460
|
}
|
432
461
|
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
433
462
|
Ok(Wrap(Row(vals)))
|
@@ -546,57 +575,6 @@ impl Default for ObjectValue {
|
|
546
575
|
}
|
547
576
|
}
|
548
577
|
|
549
|
-
pub(crate) fn dicts_to_rows(
|
550
|
-
records: &Value,
|
551
|
-
infer_schema_len: Option<usize>,
|
552
|
-
schema_columns: PlIndexSet<String>,
|
553
|
-
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
554
|
-
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
555
|
-
let (dicts, len) = get_rbseq(*records)?;
|
556
|
-
|
557
|
-
let key_names = {
|
558
|
-
if !schema_columns.is_empty() {
|
559
|
-
schema_columns
|
560
|
-
} else {
|
561
|
-
let mut inferred_keys = PlIndexSet::new();
|
562
|
-
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
563
|
-
let d = d?;
|
564
|
-
let d = RHash::try_convert(d)?;
|
565
|
-
|
566
|
-
d.foreach(|name: Value, _value: Value| {
|
567
|
-
if let Some(v) = Symbol::from_value(name) {
|
568
|
-
inferred_keys.insert(v.name()?.into());
|
569
|
-
} else {
|
570
|
-
inferred_keys.insert(String::try_convert(name)?);
|
571
|
-
};
|
572
|
-
Ok(ForEach::Continue)
|
573
|
-
})?;
|
574
|
-
}
|
575
|
-
inferred_keys
|
576
|
-
}
|
577
|
-
};
|
578
|
-
|
579
|
-
let mut rows = Vec::with_capacity(len);
|
580
|
-
|
581
|
-
for d in dicts.each() {
|
582
|
-
let d = d?;
|
583
|
-
let d = RHash::try_convert(d)?;
|
584
|
-
|
585
|
-
let mut row = Vec::with_capacity(key_names.len());
|
586
|
-
|
587
|
-
for k in key_names.iter() {
|
588
|
-
// TODO improve performance
|
589
|
-
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
590
|
-
None => AnyValue::Null,
|
591
|
-
Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
|
592
|
-
};
|
593
|
-
row.push(val)
|
594
|
-
}
|
595
|
-
rows.push(Row(row))
|
596
|
-
}
|
597
|
-
Ok((rows, key_names.into_iter().collect()))
|
598
|
-
}
|
599
|
-
|
600
578
|
impl TryConvert for Wrap<AsofStrategy> {
|
601
579
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
602
580
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -733,14 +711,13 @@ impl TryConvert for Wrap<JoinType> {
|
|
733
711
|
let parsed = match String::try_convert(ob)?.as_str() {
|
734
712
|
"inner" => JoinType::Inner,
|
735
713
|
"left" => JoinType::Left,
|
736
|
-
"
|
737
|
-
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
714
|
+
"full" => JoinType::Full,
|
738
715
|
"semi" => JoinType::Semi,
|
739
716
|
"anti" => JoinType::Anti,
|
740
717
|
"cross" => JoinType::Cross,
|
741
718
|
v => {
|
742
719
|
return Err(RbValueError::new_err(format!(
|
743
|
-
"how must be one of {{'inner', 'left', '
|
720
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {}",
|
744
721
|
v
|
745
722
|
)))
|
746
723
|
}
|
@@ -950,6 +927,23 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
950
927
|
}
|
951
928
|
}
|
952
929
|
|
930
|
+
impl TryConvert for Wrap<ClosedInterval> {
|
931
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
932
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
933
|
+
"both" => ClosedInterval::Both,
|
934
|
+
"left" => ClosedInterval::Left,
|
935
|
+
"right" => ClosedInterval::Right,
|
936
|
+
"none" => ClosedInterval::None,
|
937
|
+
v => {
|
938
|
+
return Err(RbValueError::new_err(format!(
|
939
|
+
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
940
|
+
)))
|
941
|
+
}
|
942
|
+
};
|
943
|
+
Ok(Wrap(parsed))
|
944
|
+
}
|
945
|
+
}
|
946
|
+
|
953
947
|
impl TryConvert for Wrap<WindowMapping> {
|
954
948
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
955
949
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -0,0 +1,184 @@
|
|
1
|
+
use magnus::{prelude::*, r_hash::ForEach, RArray, RHash, Symbol, Value};
|
2
|
+
use polars::frame::row::{rows_to_schema_supertypes, rows_to_supertypes, Row};
|
3
|
+
use polars::prelude::*;
|
4
|
+
|
5
|
+
use super::*;
|
6
|
+
use crate::conversion::*;
|
7
|
+
use crate::{RbPolarsErr, RbResult};
|
8
|
+
|
9
|
+
impl RbDataFrame {
|
10
|
+
pub fn from_rows(
|
11
|
+
rb_rows: RArray,
|
12
|
+
infer_schema_length: Option<usize>,
|
13
|
+
schema: Option<Wrap<Schema>>,
|
14
|
+
) -> RbResult<Self> {
|
15
|
+
let mut data = Vec::with_capacity(rb_rows.len());
|
16
|
+
for v in rb_rows.into_iter() {
|
17
|
+
let rb_row = RArray::try_convert(v)?;
|
18
|
+
let mut row = Vec::with_capacity(rb_row.len());
|
19
|
+
for val in rb_row.into_iter() {
|
20
|
+
row.push(Wrap::<AnyValue>::try_convert(val)?.0);
|
21
|
+
}
|
22
|
+
data.push(Row(row));
|
23
|
+
}
|
24
|
+
let schema = schema.map(|wrap| wrap.0);
|
25
|
+
finish_from_rows(data, schema, None, infer_schema_length)
|
26
|
+
}
|
27
|
+
|
28
|
+
pub fn from_hashes(
|
29
|
+
data: Value,
|
30
|
+
schema: Option<Wrap<Schema>>,
|
31
|
+
schema_overrides: Option<Wrap<Schema>>,
|
32
|
+
strict: bool,
|
33
|
+
infer_schema_length: Option<usize>,
|
34
|
+
) -> RbResult<Self> {
|
35
|
+
let schema = schema.map(|wrap| wrap.0);
|
36
|
+
let schema_overrides = schema_overrides.map(|wrap| wrap.0);
|
37
|
+
|
38
|
+
let names = get_schema_names(&data, schema.as_ref(), infer_schema_length)?;
|
39
|
+
let rows = dicts_to_rows(&data, &names, strict)?;
|
40
|
+
|
41
|
+
let schema = schema.or_else(|| {
|
42
|
+
Some(columns_names_to_empty_schema(
|
43
|
+
names.iter().map(String::as_str),
|
44
|
+
))
|
45
|
+
});
|
46
|
+
|
47
|
+
finish_from_rows(rows, schema, schema_overrides, infer_schema_length)
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
fn finish_from_rows(
|
52
|
+
rows: Vec<Row>,
|
53
|
+
schema: Option<Schema>,
|
54
|
+
schema_overrides: Option<Schema>,
|
55
|
+
infer_schema_length: Option<usize>,
|
56
|
+
) -> RbResult<RbDataFrame> {
|
57
|
+
// Object builder must be registered
|
58
|
+
crate::on_startup::register_object_builder();
|
59
|
+
|
60
|
+
let mut schema = if let Some(mut schema) = schema {
|
61
|
+
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
|
+
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
63
|
+
schema
|
64
|
+
} else {
|
65
|
+
rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
|
66
|
+
};
|
67
|
+
|
68
|
+
// TODO: Remove this step when Decimals are supported properly.
|
69
|
+
// Erasing the decimal precision/scale here will just require us to infer it again later.
|
70
|
+
// https://github.com/pola-rs/polars/issues/14427
|
71
|
+
erase_decimal_precision_scale(&mut schema);
|
72
|
+
|
73
|
+
let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
|
74
|
+
Ok(df.into())
|
75
|
+
}
|
76
|
+
|
77
|
+
fn update_schema_from_rows(
|
78
|
+
schema: &mut Schema,
|
79
|
+
rows: &[Row],
|
80
|
+
infer_schema_length: Option<usize>,
|
81
|
+
) -> RbResult<()> {
|
82
|
+
let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
|
83
|
+
if schema_is_complete {
|
84
|
+
return Ok(());
|
85
|
+
}
|
86
|
+
|
87
|
+
// TODO: Only infer dtypes for columns with an unknown dtype
|
88
|
+
let inferred_dtypes =
|
89
|
+
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
|
+
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
|
+
|
92
|
+
for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
|
93
|
+
if !dtype.is_known() {
|
94
|
+
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
|
+
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
96
|
+
})
|
97
|
+
.map_err(RbPolarsErr::from)?
|
98
|
+
.clone();
|
99
|
+
}
|
100
|
+
}
|
101
|
+
Ok(())
|
102
|
+
}
|
103
|
+
|
104
|
+
fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema>) {
|
105
|
+
if let Some(overrides) = schema_overrides {
|
106
|
+
for (name, dtype) in overrides.into_iter() {
|
107
|
+
schema.set_dtype(name.as_str(), dtype);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
+
for dtype in schema.iter_dtypes_mut() {
|
114
|
+
if let DataType::Decimal(_, _) = dtype {
|
115
|
+
*dtype = DataType::Decimal(None, None)
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
|
121
|
+
where
|
122
|
+
I: IntoIterator<Item = &'a str>,
|
123
|
+
{
|
124
|
+
let fields = column_names
|
125
|
+
.into_iter()
|
126
|
+
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
127
|
+
Schema::from_iter(fields)
|
128
|
+
}
|
129
|
+
|
130
|
+
fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
|
131
|
+
let (data, len) = get_rbseq(*data)?;
|
132
|
+
let mut rows = Vec::with_capacity(len);
|
133
|
+
for d in data.into_iter() {
|
134
|
+
let d = RHash::try_convert(d)?;
|
135
|
+
|
136
|
+
let mut row = Vec::with_capacity(names.len());
|
137
|
+
for k in names.iter() {
|
138
|
+
// TODO improve performance
|
139
|
+
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
140
|
+
None => AnyValue::Null,
|
141
|
+
Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
|
142
|
+
};
|
143
|
+
row.push(val)
|
144
|
+
}
|
145
|
+
rows.push(Row(row))
|
146
|
+
}
|
147
|
+
Ok(rows)
|
148
|
+
}
|
149
|
+
|
150
|
+
fn get_schema_names(
|
151
|
+
data: &Value,
|
152
|
+
schema: Option<&Schema>,
|
153
|
+
infer_schema_length: Option<usize>,
|
154
|
+
) -> RbResult<Vec<String>> {
|
155
|
+
if let Some(schema) = schema {
|
156
|
+
Ok(schema.iter_names().map(|n| n.to_string()).collect())
|
157
|
+
} else {
|
158
|
+
infer_schema_names_from_data(data, infer_schema_length)
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
fn infer_schema_names_from_data(
|
163
|
+
data: &Value,
|
164
|
+
infer_schema_length: Option<usize>,
|
165
|
+
) -> RbResult<Vec<String>> {
|
166
|
+
let (data, data_len) = get_rbseq(*data)?;
|
167
|
+
let infer_schema_length = infer_schema_length
|
168
|
+
.map(|n| std::cmp::max(1, n))
|
169
|
+
.unwrap_or(data_len);
|
170
|
+
|
171
|
+
let mut names = PlIndexSet::new();
|
172
|
+
for d in data.into_iter().take(infer_schema_length) {
|
173
|
+
let d = RHash::try_convert(d)?;
|
174
|
+
d.foreach(|name: Value, _value: Value| {
|
175
|
+
if let Some(v) = Symbol::from_value(name) {
|
176
|
+
names.insert(v.name()?.into());
|
177
|
+
} else {
|
178
|
+
names.insert(String::try_convert(name)?);
|
179
|
+
};
|
180
|
+
Ok(ForEach::Continue)
|
181
|
+
})?;
|
182
|
+
}
|
183
|
+
Ok(names.into_iter().collect())
|
184
|
+
}
|
@@ -0,0 +1,48 @@
|
|
1
|
+
use magnus::{prelude::*, IntoValue, RArray, Value};
|
2
|
+
|
3
|
+
use super::*;
|
4
|
+
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn row_tuple(&self, idx: i64) -> Value {
|
8
|
+
let idx = if idx < 0 {
|
9
|
+
(self.df.borrow().height() as i64 + idx) as usize
|
10
|
+
} else {
|
11
|
+
idx as usize
|
12
|
+
};
|
13
|
+
RArray::from_iter(
|
14
|
+
self.df
|
15
|
+
.borrow()
|
16
|
+
.get_columns()
|
17
|
+
.iter()
|
18
|
+
.map(|s| match s.dtype() {
|
19
|
+
DataType::Object(_, _) => {
|
20
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
+
obj.unwrap().to_object()
|
22
|
+
}
|
23
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
|
+
}),
|
25
|
+
)
|
26
|
+
.as_value()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn row_tuples(&self) -> Value {
|
30
|
+
let df = &self.df;
|
31
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
32
|
+
RArray::from_iter(
|
33
|
+
self.df
|
34
|
+
.borrow()
|
35
|
+
.get_columns()
|
36
|
+
.iter()
|
37
|
+
.map(|s| match s.dtype() {
|
38
|
+
DataType::Object(_, _) => {
|
39
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
+
obj.unwrap().to_object()
|
41
|
+
}
|
42
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
|
+
}),
|
44
|
+
)
|
45
|
+
}))
|
46
|
+
.as_value()
|
47
|
+
}
|
48
|
+
}
|