polars-df 0.10.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,10 +1,11 @@
|
|
1
1
|
use magnus::{prelude::*, RArray};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
+
use crate::any_value::rb_object_to_any_value;
|
4
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
5
6
|
use crate::prelude::ObjectValue;
|
6
7
|
use crate::series::to_series_collection;
|
7
|
-
use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
|
8
|
+
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
8
9
|
|
9
10
|
impl RbSeries {
|
10
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
@@ -35,36 +36,28 @@ impl RbSeries {
|
|
35
36
|
}
|
36
37
|
}
|
37
38
|
|
38
|
-
fn new_primitive<T>(name: &str,
|
39
|
+
fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
|
39
40
|
where
|
40
41
|
T: PolarsNumericType,
|
41
42
|
ChunkedArray<T>: IntoSeries,
|
42
43
|
T::Native: magnus::TryConvert,
|
43
44
|
{
|
44
|
-
let len =
|
45
|
+
let len = values.len();
|
45
46
|
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
Err(e) => {
|
55
|
-
if strict {
|
56
|
-
return Err(e);
|
57
|
-
}
|
58
|
-
builder.append_null()
|
59
|
-
}
|
60
|
-
}
|
61
|
-
}
|
48
|
+
for res in values.into_iter() {
|
49
|
+
let value = res;
|
50
|
+
if value.is_nil() {
|
51
|
+
builder.append_null()
|
52
|
+
} else {
|
53
|
+
let v = <T::Native>::try_convert(value)?;
|
54
|
+
builder.append_value(v)
|
62
55
|
}
|
63
56
|
}
|
64
|
-
let ca = builder.finish();
|
65
57
|
|
58
|
+
let ca = builder.finish();
|
66
59
|
let s = ca.into_series();
|
67
|
-
Ok(
|
60
|
+
Ok(s.into())
|
68
61
|
}
|
69
62
|
|
70
63
|
// Init with lists that can contain Nones
|
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
|
|
91
84
|
|
92
85
|
fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
|
93
86
|
let mut val = Vec::with_capacity(arr.len());
|
94
|
-
for v in arr.
|
95
|
-
val.push(Wrap::<AnyValue<'s>>::try_convert(v
|
87
|
+
for v in arr.into_iter() {
|
88
|
+
val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
|
96
89
|
}
|
97
90
|
Ok(val)
|
98
91
|
}
|
99
92
|
|
100
93
|
impl RbSeries {
|
101
|
-
pub fn
|
102
|
-
let
|
103
|
-
let avs = slice_extract_wrapped(&val);
|
94
|
+
pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
95
|
+
let any_values_result = vec_wrap_any_value(values);
|
104
96
|
// from anyvalues is fallible
|
105
|
-
let
|
97
|
+
let result = any_values_result.and_then(|avs| {
|
98
|
+
let avs = slice_extract_wrapped(&avs);
|
99
|
+
let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
|
100
|
+
RbTypeError::new_err(format!(
|
101
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
|
+
))
|
103
|
+
})?;
|
104
|
+
Ok(s.into())
|
105
|
+
});
|
106
|
+
|
107
|
+
// Fall back to Object type for non-strict construction.
|
108
|
+
if !strict && result.is_err() {
|
109
|
+
return Self::new_object(name, values, strict);
|
110
|
+
}
|
111
|
+
|
112
|
+
result
|
113
|
+
}
|
114
|
+
|
115
|
+
pub fn new_from_any_values_and_dtype(
|
116
|
+
name: String,
|
117
|
+
values: RArray,
|
118
|
+
dtype: Wrap<DataType>,
|
119
|
+
strict: bool,
|
120
|
+
) -> RbResult<Self> {
|
121
|
+
let any_values = values
|
122
|
+
.into_iter()
|
123
|
+
.map(|v| rb_object_to_any_value(v, strict))
|
124
|
+
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
+
let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
|
126
|
+
.map_err(|e| {
|
127
|
+
RbTypeError::new_err(format!(
|
128
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
|
+
))
|
130
|
+
})?;
|
106
131
|
Ok(s.into())
|
107
132
|
}
|
108
133
|
|
@@ -125,9 +150,9 @@ impl RbSeries {
|
|
125
150
|
|
126
151
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
127
152
|
let val = val
|
128
|
-
.
|
129
|
-
.map(
|
130
|
-
.collect::<
|
153
|
+
.into_iter()
|
154
|
+
.map(ObjectValue::from)
|
155
|
+
.collect::<Vec<ObjectValue>>();
|
131
156
|
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
132
157
|
Ok(s.into())
|
133
158
|
}
|
@@ -163,15 +188,8 @@ impl RbSeries {
|
|
163
188
|
}
|
164
189
|
}
|
165
190
|
|
166
|
-
pub fn new_decimal(name: String,
|
167
|
-
|
168
|
-
// TODO: do we have to respect 'strict' here? it's possible if we want to
|
169
|
-
let avs = slice_extract_wrapped(&val);
|
170
|
-
// create a fake dtype with a placeholder "none" scale, to be inferred later
|
171
|
-
let dtype = DataType::Decimal(None, None);
|
172
|
-
let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
|
173
|
-
.map_err(RbPolarsErr::from)?;
|
174
|
-
Ok(s.into())
|
191
|
+
pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
192
|
+
Self::new_from_any_values(name, values, strict)
|
175
193
|
}
|
176
194
|
|
177
195
|
pub fn repeat(
|
@@ -1,57 +1,139 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{value::qnil, IntoValue, RArray, Value};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::prelude::*;
|
5
|
+
use crate::RbSeries;
|
5
6
|
|
6
7
|
impl RbSeries {
|
7
|
-
///
|
8
|
-
/// This
|
9
|
-
pub fn
|
10
|
-
let
|
11
|
-
match s.dtype() {
|
12
|
-
DataType::String => {
|
13
|
-
let ca = s.str().unwrap();
|
8
|
+
/// Convert this Series to a Ruby array.
|
9
|
+
/// This operation copies data.
|
10
|
+
pub fn to_a(&self) -> Value {
|
11
|
+
let series = &self.series.borrow();
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
13
|
+
fn to_a_recursive(series: &Series) -> Value {
|
14
|
+
let rblist = match series.dtype() {
|
15
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
16
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
17
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
18
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
19
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
20
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
21
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
27
|
+
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
28
|
+
}
|
29
|
+
DataType::Object(_, _) => {
|
30
|
+
let v = RArray::with_capacity(series.len());
|
31
|
+
for i in 0..series.len() {
|
32
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
|
+
match obj {
|
34
|
+
Some(val) => v.push(val.to_object()).unwrap(),
|
35
|
+
None => v.push(qnil()).unwrap(),
|
36
|
+
};
|
37
|
+
}
|
38
|
+
v.into_value()
|
39
|
+
}
|
40
|
+
DataType::List(_) => {
|
41
|
+
let v = RArray::new();
|
42
|
+
let ca = series.list().unwrap();
|
43
|
+
for opt_s in ca.amortized_iter() {
|
44
|
+
match opt_s {
|
45
|
+
None => {
|
46
|
+
v.push(qnil()).unwrap();
|
47
|
+
}
|
48
|
+
Some(s) => {
|
49
|
+
let rblst = to_a_recursive(s.as_ref());
|
50
|
+
v.push(rblst).unwrap();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
v.into_value()
|
55
|
+
}
|
56
|
+
DataType::Array(_, _) => {
|
57
|
+
let v = RArray::new();
|
58
|
+
let ca = series.array().unwrap();
|
59
|
+
for opt_s in ca.amortized_iter() {
|
60
|
+
match opt_s {
|
61
|
+
None => {
|
62
|
+
v.push(qnil()).unwrap();
|
63
|
+
}
|
64
|
+
Some(s) => {
|
65
|
+
let rblst = to_a_recursive(s.as_ref());
|
66
|
+
v.push(rblst).unwrap();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
v.into_value()
|
71
|
+
}
|
72
|
+
DataType::Date => {
|
73
|
+
let ca = series.date().unwrap();
|
74
|
+
return Wrap(ca).into_value();
|
75
|
+
}
|
76
|
+
DataType::Time => {
|
77
|
+
let ca = series.time().unwrap();
|
78
|
+
return Wrap(ca).into_value();
|
79
|
+
}
|
80
|
+
DataType::Datetime(_, _) => {
|
81
|
+
let ca = series.datetime().unwrap();
|
82
|
+
return Wrap(ca).into_value();
|
83
|
+
}
|
84
|
+
DataType::Decimal(_, _) => {
|
85
|
+
let ca = series.decimal().unwrap();
|
86
|
+
return Wrap(ca).into_value();
|
87
|
+
}
|
88
|
+
DataType::String => {
|
89
|
+
let ca = series.str().unwrap();
|
90
|
+
return Wrap(ca).into_value();
|
91
|
+
}
|
92
|
+
DataType::Struct(_) => {
|
93
|
+
let ca = series.struct_().unwrap();
|
94
|
+
return Wrap(ca).into_value();
|
95
|
+
}
|
96
|
+
DataType::Duration(_) => {
|
97
|
+
let ca = series.duration().unwrap();
|
98
|
+
return Wrap(ca).into_value();
|
99
|
+
}
|
100
|
+
DataType::Binary => {
|
101
|
+
let ca = series.binary().unwrap();
|
102
|
+
return Wrap(ca).into_value();
|
103
|
+
}
|
104
|
+
DataType::Null => {
|
105
|
+
let null: Option<u8> = None;
|
106
|
+
let n = series.len();
|
107
|
+
let iter = std::iter::repeat(null).take(n);
|
108
|
+
use std::iter::{Repeat, Take};
|
109
|
+
struct NullIter {
|
110
|
+
iter: Take<Repeat<Option<u8>>>,
|
111
|
+
n: usize,
|
112
|
+
}
|
113
|
+
impl Iterator for NullIter {
|
114
|
+
type Item = Option<u8>;
|
115
|
+
|
116
|
+
fn next(&mut self) -> Option<Self::Item> {
|
117
|
+
self.iter.next()
|
118
|
+
}
|
119
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
120
|
+
(self.n, Some(self.n))
|
121
|
+
}
|
122
|
+
}
|
123
|
+
impl ExactSizeIterator for NullIter {}
|
124
|
+
|
125
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
126
|
+
}
|
127
|
+
DataType::Unknown(_) => {
|
128
|
+
panic!("to_a not implemented for unknown")
|
129
|
+
}
|
130
|
+
DataType::BinaryOffset => {
|
131
|
+
unreachable!()
|
132
|
+
}
|
133
|
+
};
|
134
|
+
rblist
|
55
135
|
}
|
136
|
+
|
137
|
+
to_a_recursive(series)
|
56
138
|
}
|
57
139
|
}
|
@@ -5,7 +5,7 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, prelude::*,
|
8
|
+
use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
@@ -36,8 +36,8 @@ impl RbSeries {
|
|
36
36
|
|
37
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
38
38
|
let mut series = Vec::new();
|
39
|
-
for item in rs.
|
40
|
-
series.push(<&RbSeries>::try_convert(item
|
39
|
+
for item in rs.into_iter() {
|
40
|
+
series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
|
41
41
|
}
|
42
42
|
Ok(series)
|
43
43
|
}
|
@@ -247,13 +247,24 @@ impl RbSeries {
|
|
247
247
|
.into())
|
248
248
|
}
|
249
249
|
|
250
|
-
pub fn value_counts(
|
251
|
-
|
250
|
+
pub fn value_counts(
|
251
|
+
&self,
|
252
|
+
sort: bool,
|
253
|
+
parallel: bool,
|
254
|
+
name: String,
|
255
|
+
normalize: bool,
|
256
|
+
) -> RbResult<RbDataFrame> {
|
257
|
+
let out = self
|
252
258
|
.series
|
253
259
|
.borrow()
|
254
|
-
.value_counts(
|
260
|
+
.value_counts(sort, parallel, name, normalize)
|
255
261
|
.map_err(RbPolarsErr::from)?;
|
256
|
-
Ok(
|
262
|
+
Ok(out.into())
|
263
|
+
}
|
264
|
+
|
265
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
266
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
267
|
+
self.series.borrow().slice(offset, length).into()
|
257
268
|
}
|
258
269
|
|
259
270
|
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
@@ -301,10 +312,20 @@ impl RbSeries {
|
|
301
312
|
Ok(s.into())
|
302
313
|
}
|
303
314
|
|
304
|
-
pub fn equals(
|
305
|
-
|
306
|
-
|
307
|
-
|
315
|
+
pub fn equals(
|
316
|
+
&self,
|
317
|
+
other: &RbSeries,
|
318
|
+
check_dtypes: bool,
|
319
|
+
check_names: bool,
|
320
|
+
null_equal: bool,
|
321
|
+
) -> bool {
|
322
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
326
|
+
return false;
|
327
|
+
}
|
328
|
+
if null_equal {
|
308
329
|
self.series.borrow().equals_missing(&other.series.borrow())
|
309
330
|
} else {
|
310
331
|
self.series.borrow().equals(&other.series.borrow())
|
@@ -325,136 +346,6 @@ impl RbSeries {
|
|
325
346
|
self.series.borrow().len()
|
326
347
|
}
|
327
348
|
|
328
|
-
pub fn to_a(&self) -> Value {
|
329
|
-
let series = &self.series.borrow();
|
330
|
-
|
331
|
-
fn to_a_recursive(series: &Series) -> Value {
|
332
|
-
let rblist = match series.dtype() {
|
333
|
-
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
334
|
-
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
335
|
-
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
336
|
-
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
337
|
-
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
338
|
-
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
339
|
-
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
340
|
-
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
341
|
-
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
342
|
-
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
343
|
-
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
344
|
-
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
345
|
-
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
346
|
-
}
|
347
|
-
DataType::Object(_, _) => {
|
348
|
-
let v = RArray::with_capacity(series.len());
|
349
|
-
for i in 0..series.len() {
|
350
|
-
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
351
|
-
match obj {
|
352
|
-
Some(val) => v.push(val.to_object()).unwrap(),
|
353
|
-
None => v.push(qnil()).unwrap(),
|
354
|
-
};
|
355
|
-
}
|
356
|
-
v.into_value()
|
357
|
-
}
|
358
|
-
DataType::List(_) => {
|
359
|
-
let v = RArray::new();
|
360
|
-
let ca = series.list().unwrap();
|
361
|
-
for opt_s in unsafe { ca.amortized_iter() } {
|
362
|
-
match opt_s {
|
363
|
-
None => {
|
364
|
-
v.push(qnil()).unwrap();
|
365
|
-
}
|
366
|
-
Some(s) => {
|
367
|
-
let rblst = to_a_recursive(s.as_ref());
|
368
|
-
v.push(rblst).unwrap();
|
369
|
-
}
|
370
|
-
}
|
371
|
-
}
|
372
|
-
v.into_value()
|
373
|
-
}
|
374
|
-
DataType::Array(_, _) => {
|
375
|
-
let v = RArray::new();
|
376
|
-
let ca = series.array().unwrap();
|
377
|
-
for opt_s in ca.amortized_iter() {
|
378
|
-
match opt_s {
|
379
|
-
None => {
|
380
|
-
v.push(qnil()).unwrap();
|
381
|
-
}
|
382
|
-
Some(s) => {
|
383
|
-
let rblst = to_a_recursive(s.as_ref());
|
384
|
-
v.push(rblst).unwrap();
|
385
|
-
}
|
386
|
-
}
|
387
|
-
}
|
388
|
-
v.into_value()
|
389
|
-
}
|
390
|
-
DataType::Date => {
|
391
|
-
let ca = series.date().unwrap();
|
392
|
-
return Wrap(ca).into_value();
|
393
|
-
}
|
394
|
-
DataType::Time => {
|
395
|
-
let ca = series.time().unwrap();
|
396
|
-
return Wrap(ca).into_value();
|
397
|
-
}
|
398
|
-
DataType::Datetime(_, _) => {
|
399
|
-
let ca = series.datetime().unwrap();
|
400
|
-
return Wrap(ca).into_value();
|
401
|
-
}
|
402
|
-
DataType::Decimal(_, _) => {
|
403
|
-
let ca = series.decimal().unwrap();
|
404
|
-
return Wrap(ca).into_value();
|
405
|
-
}
|
406
|
-
DataType::String => {
|
407
|
-
let ca = series.str().unwrap();
|
408
|
-
return Wrap(ca).into_value();
|
409
|
-
}
|
410
|
-
DataType::Struct(_) => {
|
411
|
-
let ca = series.struct_().unwrap();
|
412
|
-
return Wrap(ca).into_value();
|
413
|
-
}
|
414
|
-
DataType::Duration(_) => {
|
415
|
-
let ca = series.duration().unwrap();
|
416
|
-
return Wrap(ca).into_value();
|
417
|
-
}
|
418
|
-
DataType::Binary => {
|
419
|
-
let ca = series.binary().unwrap();
|
420
|
-
return Wrap(ca).into_value();
|
421
|
-
}
|
422
|
-
DataType::Null => {
|
423
|
-
let null: Option<u8> = None;
|
424
|
-
let n = series.len();
|
425
|
-
let iter = std::iter::repeat(null).take(n);
|
426
|
-
use std::iter::{Repeat, Take};
|
427
|
-
struct NullIter {
|
428
|
-
iter: Take<Repeat<Option<u8>>>,
|
429
|
-
n: usize,
|
430
|
-
}
|
431
|
-
impl Iterator for NullIter {
|
432
|
-
type Item = Option<u8>;
|
433
|
-
|
434
|
-
fn next(&mut self) -> Option<Self::Item> {
|
435
|
-
self.iter.next()
|
436
|
-
}
|
437
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
438
|
-
(self.n, Some(self.n))
|
439
|
-
}
|
440
|
-
}
|
441
|
-
impl ExactSizeIterator for NullIter {}
|
442
|
-
|
443
|
-
RArray::from_iter(NullIter { iter, n }).into_value()
|
444
|
-
}
|
445
|
-
DataType::Unknown => {
|
446
|
-
panic!("to_a not implemented for unknown")
|
447
|
-
}
|
448
|
-
DataType::BinaryOffset => {
|
449
|
-
unreachable!()
|
450
|
-
}
|
451
|
-
};
|
452
|
-
rblist
|
453
|
-
}
|
454
|
-
|
455
|
-
to_a_recursive(series)
|
456
|
-
}
|
457
|
-
|
458
349
|
pub fn clone(&self) -> Self {
|
459
350
|
RbSeries::new(self.series.borrow().clone())
|
460
351
|
}
|
data/ext/polars/src/sql.rs
CHANGED
@@ -37,7 +37,9 @@ impl RbSQLContext {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
-
self.context
|
40
|
+
self.context
|
41
|
+
.borrow_mut()
|
42
|
+
.register(&name, lf.ldf.borrow().clone())
|
41
43
|
}
|
42
44
|
|
43
45
|
pub fn unregister(&self, name: String) {
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -39,7 +42,7 @@ module Polars
|
|
39
42
|
if !dtypes.nil?
|
40
43
|
if dtypes.is_a?(Hash)
|
41
44
|
dtype_list = []
|
42
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
43
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
44
47
|
end
|
45
48
|
elsif dtypes.is_a?(::Array)
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|