polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,10 +1,11 @@
|
|
1
1
|
use magnus::{prelude::*, RArray};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
+
use crate::any_value::rb_object_to_any_value;
|
4
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
5
6
|
use crate::prelude::ObjectValue;
|
6
7
|
use crate::series::to_series_collection;
|
7
|
-
use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
|
8
|
+
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
8
9
|
|
9
10
|
impl RbSeries {
|
10
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
@@ -35,36 +36,28 @@ impl RbSeries {
|
|
35
36
|
}
|
36
37
|
}
|
37
38
|
|
38
|
-
fn new_primitive<T>(name: &str,
|
39
|
+
fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
|
39
40
|
where
|
40
41
|
T: PolarsNumericType,
|
41
42
|
ChunkedArray<T>: IntoSeries,
|
42
43
|
T::Native: magnus::TryConvert,
|
43
44
|
{
|
44
|
-
let len =
|
45
|
+
let len = values.len();
|
45
46
|
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
Err(e) => {
|
55
|
-
if strict {
|
56
|
-
return Err(e);
|
57
|
-
}
|
58
|
-
builder.append_null()
|
59
|
-
}
|
60
|
-
}
|
61
|
-
}
|
48
|
+
for res in values.into_iter() {
|
49
|
+
let value = res;
|
50
|
+
if value.is_nil() {
|
51
|
+
builder.append_null()
|
52
|
+
} else {
|
53
|
+
let v = <T::Native>::try_convert(value)?;
|
54
|
+
builder.append_value(v)
|
62
55
|
}
|
63
56
|
}
|
64
|
-
let ca = builder.finish();
|
65
57
|
|
58
|
+
let ca = builder.finish();
|
66
59
|
let s = ca.into_series();
|
67
|
-
Ok(
|
60
|
+
Ok(s.into())
|
68
61
|
}
|
69
62
|
|
70
63
|
// Init with lists that can contain Nones
|
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
|
|
91
84
|
|
92
85
|
fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
|
93
86
|
let mut val = Vec::with_capacity(arr.len());
|
94
|
-
for v in arr.
|
95
|
-
val.push(Wrap::<AnyValue<'s>>::try_convert(v
|
87
|
+
for v in arr.into_iter() {
|
88
|
+
val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
|
96
89
|
}
|
97
90
|
Ok(val)
|
98
91
|
}
|
99
92
|
|
100
93
|
impl RbSeries {
|
101
|
-
pub fn
|
102
|
-
let
|
103
|
-
let avs = slice_extract_wrapped(&val);
|
94
|
+
pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
95
|
+
let any_values_result = vec_wrap_any_value(values);
|
104
96
|
// from anyvalues is fallible
|
105
|
-
let
|
97
|
+
let result = any_values_result.and_then(|avs| {
|
98
|
+
let avs = slice_extract_wrapped(&avs);
|
99
|
+
let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
|
100
|
+
RbTypeError::new_err(format!(
|
101
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
|
+
))
|
103
|
+
})?;
|
104
|
+
Ok(s.into())
|
105
|
+
});
|
106
|
+
|
107
|
+
// Fall back to Object type for non-strict construction.
|
108
|
+
if !strict && result.is_err() {
|
109
|
+
return Self::new_object(name, values, strict);
|
110
|
+
}
|
111
|
+
|
112
|
+
result
|
113
|
+
}
|
114
|
+
|
115
|
+
pub fn new_from_any_values_and_dtype(
|
116
|
+
name: String,
|
117
|
+
values: RArray,
|
118
|
+
dtype: Wrap<DataType>,
|
119
|
+
strict: bool,
|
120
|
+
) -> RbResult<Self> {
|
121
|
+
let any_values = values
|
122
|
+
.into_iter()
|
123
|
+
.map(|v| rb_object_to_any_value(v, strict))
|
124
|
+
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
+
let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
|
126
|
+
.map_err(|e| {
|
127
|
+
RbTypeError::new_err(format!(
|
128
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
|
+
))
|
130
|
+
})?;
|
106
131
|
Ok(s.into())
|
107
132
|
}
|
108
133
|
|
@@ -125,9 +150,9 @@ impl RbSeries {
|
|
125
150
|
|
126
151
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
127
152
|
let val = val
|
128
|
-
.
|
129
|
-
.map(
|
130
|
-
.collect::<
|
153
|
+
.into_iter()
|
154
|
+
.map(ObjectValue::from)
|
155
|
+
.collect::<Vec<ObjectValue>>();
|
131
156
|
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
132
157
|
Ok(s.into())
|
133
158
|
}
|
@@ -163,15 +188,8 @@ impl RbSeries {
|
|
163
188
|
}
|
164
189
|
}
|
165
190
|
|
166
|
-
pub fn new_decimal(name: String,
|
167
|
-
|
168
|
-
// TODO: do we have to respect 'strict' here? it's possible if we want to
|
169
|
-
let avs = slice_extract_wrapped(&val);
|
170
|
-
// create a fake dtype with a placeholder "none" scale, to be inferred later
|
171
|
-
let dtype = DataType::Decimal(None, None);
|
172
|
-
let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
|
173
|
-
.map_err(RbPolarsErr::from)?;
|
174
|
-
Ok(s.into())
|
191
|
+
pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
192
|
+
Self::new_from_any_values(name, values, strict)
|
175
193
|
}
|
176
194
|
|
177
195
|
pub fn repeat(
|
@@ -1,57 +1,139 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{value::qnil, IntoValue, RArray, Value};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::prelude::*;
|
5
|
+
use crate::RbSeries;
|
5
6
|
|
6
7
|
impl RbSeries {
|
7
|
-
///
|
8
|
-
/// This
|
9
|
-
pub fn
|
10
|
-
let
|
11
|
-
match s.dtype() {
|
12
|
-
DataType::String => {
|
13
|
-
let ca = s.str().unwrap();
|
8
|
+
/// Convert this Series to a Ruby array.
|
9
|
+
/// This operation copies data.
|
10
|
+
pub fn to_a(&self) -> Value {
|
11
|
+
let series = &self.series.borrow();
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
13
|
+
fn to_a_recursive(series: &Series) -> Value {
|
14
|
+
let rblist = match series.dtype() {
|
15
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
16
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
17
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
18
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
19
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
20
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
21
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
27
|
+
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
28
|
+
}
|
29
|
+
DataType::Object(_, _) => {
|
30
|
+
let v = RArray::with_capacity(series.len());
|
31
|
+
for i in 0..series.len() {
|
32
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
|
+
match obj {
|
34
|
+
Some(val) => v.push(val.to_object()).unwrap(),
|
35
|
+
None => v.push(qnil()).unwrap(),
|
36
|
+
};
|
37
|
+
}
|
38
|
+
v.into_value()
|
39
|
+
}
|
40
|
+
DataType::List(_) => {
|
41
|
+
let v = RArray::new();
|
42
|
+
let ca = series.list().unwrap();
|
43
|
+
for opt_s in ca.amortized_iter() {
|
44
|
+
match opt_s {
|
45
|
+
None => {
|
46
|
+
v.push(qnil()).unwrap();
|
47
|
+
}
|
48
|
+
Some(s) => {
|
49
|
+
let rblst = to_a_recursive(s.as_ref());
|
50
|
+
v.push(rblst).unwrap();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
v.into_value()
|
55
|
+
}
|
56
|
+
DataType::Array(_, _) => {
|
57
|
+
let v = RArray::new();
|
58
|
+
let ca = series.array().unwrap();
|
59
|
+
for opt_s in ca.amortized_iter() {
|
60
|
+
match opt_s {
|
61
|
+
None => {
|
62
|
+
v.push(qnil()).unwrap();
|
63
|
+
}
|
64
|
+
Some(s) => {
|
65
|
+
let rblst = to_a_recursive(s.as_ref());
|
66
|
+
v.push(rblst).unwrap();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
v.into_value()
|
71
|
+
}
|
72
|
+
DataType::Date => {
|
73
|
+
let ca = series.date().unwrap();
|
74
|
+
return Wrap(ca).into_value();
|
75
|
+
}
|
76
|
+
DataType::Time => {
|
77
|
+
let ca = series.time().unwrap();
|
78
|
+
return Wrap(ca).into_value();
|
79
|
+
}
|
80
|
+
DataType::Datetime(_, _) => {
|
81
|
+
let ca = series.datetime().unwrap();
|
82
|
+
return Wrap(ca).into_value();
|
83
|
+
}
|
84
|
+
DataType::Decimal(_, _) => {
|
85
|
+
let ca = series.decimal().unwrap();
|
86
|
+
return Wrap(ca).into_value();
|
87
|
+
}
|
88
|
+
DataType::String => {
|
89
|
+
let ca = series.str().unwrap();
|
90
|
+
return Wrap(ca).into_value();
|
91
|
+
}
|
92
|
+
DataType::Struct(_) => {
|
93
|
+
let ca = series.struct_().unwrap();
|
94
|
+
return Wrap(ca).into_value();
|
95
|
+
}
|
96
|
+
DataType::Duration(_) => {
|
97
|
+
let ca = series.duration().unwrap();
|
98
|
+
return Wrap(ca).into_value();
|
99
|
+
}
|
100
|
+
DataType::Binary => {
|
101
|
+
let ca = series.binary().unwrap();
|
102
|
+
return Wrap(ca).into_value();
|
103
|
+
}
|
104
|
+
DataType::Null => {
|
105
|
+
let null: Option<u8> = None;
|
106
|
+
let n = series.len();
|
107
|
+
let iter = std::iter::repeat(null).take(n);
|
108
|
+
use std::iter::{Repeat, Take};
|
109
|
+
struct NullIter {
|
110
|
+
iter: Take<Repeat<Option<u8>>>,
|
111
|
+
n: usize,
|
112
|
+
}
|
113
|
+
impl Iterator for NullIter {
|
114
|
+
type Item = Option<u8>;
|
115
|
+
|
116
|
+
fn next(&mut self) -> Option<Self::Item> {
|
117
|
+
self.iter.next()
|
118
|
+
}
|
119
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
120
|
+
(self.n, Some(self.n))
|
121
|
+
}
|
122
|
+
}
|
123
|
+
impl ExactSizeIterator for NullIter {}
|
124
|
+
|
125
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
126
|
+
}
|
127
|
+
DataType::Unknown(_) => {
|
128
|
+
panic!("to_a not implemented for unknown")
|
129
|
+
}
|
130
|
+
DataType::BinaryOffset => {
|
131
|
+
unreachable!()
|
132
|
+
}
|
133
|
+
};
|
134
|
+
rblist
|
55
135
|
}
|
136
|
+
|
137
|
+
to_a_recursive(series)
|
56
138
|
}
|
57
139
|
}
|
@@ -5,7 +5,7 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, prelude::*,
|
8
|
+
use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
@@ -36,8 +36,8 @@ impl RbSeries {
|
|
36
36
|
|
37
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
38
38
|
let mut series = Vec::new();
|
39
|
-
for item in rs.
|
40
|
-
series.push(<&RbSeries>::try_convert(item
|
39
|
+
for item in rs.into_iter() {
|
40
|
+
series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
|
41
41
|
}
|
42
42
|
Ok(series)
|
43
43
|
}
|
@@ -247,13 +247,24 @@ impl RbSeries {
|
|
247
247
|
.into())
|
248
248
|
}
|
249
249
|
|
250
|
-
pub fn value_counts(
|
251
|
-
|
250
|
+
pub fn value_counts(
|
251
|
+
&self,
|
252
|
+
sort: bool,
|
253
|
+
parallel: bool,
|
254
|
+
name: String,
|
255
|
+
normalize: bool,
|
256
|
+
) -> RbResult<RbDataFrame> {
|
257
|
+
let out = self
|
252
258
|
.series
|
253
259
|
.borrow()
|
254
|
-
.value_counts(
|
260
|
+
.value_counts(sort, parallel, name, normalize)
|
255
261
|
.map_err(RbPolarsErr::from)?;
|
256
|
-
Ok(
|
262
|
+
Ok(out.into())
|
263
|
+
}
|
264
|
+
|
265
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
266
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
267
|
+
self.series.borrow().slice(offset, length).into()
|
257
268
|
}
|
258
269
|
|
259
270
|
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
@@ -301,10 +312,20 @@ impl RbSeries {
|
|
301
312
|
Ok(s.into())
|
302
313
|
}
|
303
314
|
|
304
|
-
pub fn equals(
|
305
|
-
|
306
|
-
|
307
|
-
|
315
|
+
pub fn equals(
|
316
|
+
&self,
|
317
|
+
other: &RbSeries,
|
318
|
+
check_dtypes: bool,
|
319
|
+
check_names: bool,
|
320
|
+
null_equal: bool,
|
321
|
+
) -> bool {
|
322
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
326
|
+
return false;
|
327
|
+
}
|
328
|
+
if null_equal {
|
308
329
|
self.series.borrow().equals_missing(&other.series.borrow())
|
309
330
|
} else {
|
310
331
|
self.series.borrow().equals(&other.series.borrow())
|
@@ -325,136 +346,6 @@ impl RbSeries {
|
|
325
346
|
self.series.borrow().len()
|
326
347
|
}
|
327
348
|
|
328
|
-
pub fn to_a(&self) -> Value {
|
329
|
-
let series = &self.series.borrow();
|
330
|
-
|
331
|
-
fn to_a_recursive(series: &Series) -> Value {
|
332
|
-
let rblist = match series.dtype() {
|
333
|
-
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
334
|
-
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
335
|
-
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
336
|
-
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
337
|
-
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
338
|
-
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
339
|
-
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
340
|
-
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
341
|
-
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
342
|
-
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
343
|
-
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
344
|
-
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
345
|
-
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
346
|
-
}
|
347
|
-
DataType::Object(_, _) => {
|
348
|
-
let v = RArray::with_capacity(series.len());
|
349
|
-
for i in 0..series.len() {
|
350
|
-
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
351
|
-
match obj {
|
352
|
-
Some(val) => v.push(val.to_object()).unwrap(),
|
353
|
-
None => v.push(qnil()).unwrap(),
|
354
|
-
};
|
355
|
-
}
|
356
|
-
v.into_value()
|
357
|
-
}
|
358
|
-
DataType::List(_) => {
|
359
|
-
let v = RArray::new();
|
360
|
-
let ca = series.list().unwrap();
|
361
|
-
for opt_s in unsafe { ca.amortized_iter() } {
|
362
|
-
match opt_s {
|
363
|
-
None => {
|
364
|
-
v.push(qnil()).unwrap();
|
365
|
-
}
|
366
|
-
Some(s) => {
|
367
|
-
let rblst = to_a_recursive(s.as_ref());
|
368
|
-
v.push(rblst).unwrap();
|
369
|
-
}
|
370
|
-
}
|
371
|
-
}
|
372
|
-
v.into_value()
|
373
|
-
}
|
374
|
-
DataType::Array(_, _) => {
|
375
|
-
let v = RArray::new();
|
376
|
-
let ca = series.array().unwrap();
|
377
|
-
for opt_s in ca.amortized_iter() {
|
378
|
-
match opt_s {
|
379
|
-
None => {
|
380
|
-
v.push(qnil()).unwrap();
|
381
|
-
}
|
382
|
-
Some(s) => {
|
383
|
-
let rblst = to_a_recursive(s.as_ref());
|
384
|
-
v.push(rblst).unwrap();
|
385
|
-
}
|
386
|
-
}
|
387
|
-
}
|
388
|
-
v.into_value()
|
389
|
-
}
|
390
|
-
DataType::Date => {
|
391
|
-
let ca = series.date().unwrap();
|
392
|
-
return Wrap(ca).into_value();
|
393
|
-
}
|
394
|
-
DataType::Time => {
|
395
|
-
let ca = series.time().unwrap();
|
396
|
-
return Wrap(ca).into_value();
|
397
|
-
}
|
398
|
-
DataType::Datetime(_, _) => {
|
399
|
-
let ca = series.datetime().unwrap();
|
400
|
-
return Wrap(ca).into_value();
|
401
|
-
}
|
402
|
-
DataType::Decimal(_, _) => {
|
403
|
-
let ca = series.decimal().unwrap();
|
404
|
-
return Wrap(ca).into_value();
|
405
|
-
}
|
406
|
-
DataType::String => {
|
407
|
-
let ca = series.str().unwrap();
|
408
|
-
return Wrap(ca).into_value();
|
409
|
-
}
|
410
|
-
DataType::Struct(_) => {
|
411
|
-
let ca = series.struct_().unwrap();
|
412
|
-
return Wrap(ca).into_value();
|
413
|
-
}
|
414
|
-
DataType::Duration(_) => {
|
415
|
-
let ca = series.duration().unwrap();
|
416
|
-
return Wrap(ca).into_value();
|
417
|
-
}
|
418
|
-
DataType::Binary => {
|
419
|
-
let ca = series.binary().unwrap();
|
420
|
-
return Wrap(ca).into_value();
|
421
|
-
}
|
422
|
-
DataType::Null => {
|
423
|
-
let null: Option<u8> = None;
|
424
|
-
let n = series.len();
|
425
|
-
let iter = std::iter::repeat(null).take(n);
|
426
|
-
use std::iter::{Repeat, Take};
|
427
|
-
struct NullIter {
|
428
|
-
iter: Take<Repeat<Option<u8>>>,
|
429
|
-
n: usize,
|
430
|
-
}
|
431
|
-
impl Iterator for NullIter {
|
432
|
-
type Item = Option<u8>;
|
433
|
-
|
434
|
-
fn next(&mut self) -> Option<Self::Item> {
|
435
|
-
self.iter.next()
|
436
|
-
}
|
437
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
438
|
-
(self.n, Some(self.n))
|
439
|
-
}
|
440
|
-
}
|
441
|
-
impl ExactSizeIterator for NullIter {}
|
442
|
-
|
443
|
-
RArray::from_iter(NullIter { iter, n }).into_value()
|
444
|
-
}
|
445
|
-
DataType::Unknown => {
|
446
|
-
panic!("to_a not implemented for unknown")
|
447
|
-
}
|
448
|
-
DataType::BinaryOffset => {
|
449
|
-
unreachable!()
|
450
|
-
}
|
451
|
-
};
|
452
|
-
rblist
|
453
|
-
}
|
454
|
-
|
455
|
-
to_a_recursive(series)
|
456
|
-
}
|
457
|
-
|
458
349
|
pub fn clone(&self) -> Self {
|
459
350
|
RbSeries::new(self.series.borrow().clone())
|
460
351
|
}
|
data/ext/polars/src/sql.rs
CHANGED
@@ -37,7 +37,9 @@ impl RbSQLContext {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
-
self.context
|
40
|
+
self.context
|
41
|
+
.borrow_mut()
|
42
|
+
.register(&name, lf.ldf.borrow().clone())
|
41
43
|
}
|
42
44
|
|
43
45
|
pub fn unregister(&self, name: String) {
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -39,7 +42,7 @@ module Polars
|
|
39
42
|
if !dtypes.nil?
|
40
43
|
if dtypes.is_a?(Hash)
|
41
44
|
dtype_list = []
|
42
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
43
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
44
47
|
end
|
45
48
|
elsif dtypes.is_a?(::Array)
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|