polars-df 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +284 -216
- data/ext/polars/Cargo.toml +7 -4
- data/ext/polars/src/batched_csv.rs +2 -3
- data/ext/polars/src/conversion.rs +18 -17
- data/ext/polars/src/dataframe.rs +27 -63
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/general.rs +63 -4
- data/ext/polars/src/expr/rolling.rs +15 -10
- data/ext/polars/src/expr/string.rs +9 -9
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/lazyframe.rs +28 -19
- data/ext/polars/src/lib.rs +20 -20
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/data_frame.rb +69 -65
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/expr.rb +223 -18
- data/lib/polars/group_by.rb +1 -1
- data/lib/polars/io.rb +4 -4
- data/lib/polars/lazy_frame.rb +23 -23
- data/lib/polars/lazy_functions.rb +4 -20
- data/lib/polars/series.rb +289 -30
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +13 -13
- data/lib/polars/version.rb +1 -1
- metadata +7 -6
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.8.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,13 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.6"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
18
|
+
polars-core = "=0.36.2"
|
19
|
+
polars-parquet = "=0.36.2"
|
20
|
+
polars-utils = "=0.36.2"
|
20
21
|
serde_json = "1"
|
21
22
|
smartstring = "1"
|
22
23
|
|
23
24
|
[dependencies.polars]
|
24
|
-
version = "=0.
|
25
|
+
version = "=0.36.2"
|
25
26
|
features = [
|
26
27
|
"abs",
|
27
28
|
"approx_unique",
|
@@ -35,6 +36,7 @@ features = [
|
|
35
36
|
"csv",
|
36
37
|
"cum_agg",
|
37
38
|
"cumulative_eval",
|
39
|
+
"cutqcut",
|
38
40
|
"dataframe_arithmetic",
|
39
41
|
"date_offset",
|
40
42
|
"diagonal_concat",
|
@@ -77,6 +79,7 @@ features = [
|
|
77
79
|
"range",
|
78
80
|
"reinterpret",
|
79
81
|
"repeat_by",
|
82
|
+
"rle",
|
80
83
|
"rolling_window",
|
81
84
|
"round_series",
|
82
85
|
"row_hash",
|
@@ -41,7 +41,7 @@ impl RbBatchedCsv {
|
|
41
41
|
// TODO fix
|
42
42
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
43
|
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
-
let
|
44
|
+
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
45
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
46
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
47
|
let try_parse_dates = bool::try_convert(arguments[19])?;
|
@@ -52,7 +52,6 @@ impl RbBatchedCsv {
|
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
56
|
|
58
57
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
|
|
101
100
|
.with_n_threads(n_threads)
|
102
101
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
103
102
|
.low_memory(low_memory)
|
104
|
-
.
|
103
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
105
104
|
.with_null_values(null_values)
|
106
105
|
.with_try_parse_dates(try_parse_dates)
|
107
106
|
.with_quote_char(quote_char)
|
@@ -14,7 +14,7 @@ use polars::frame::NullStrategy;
|
|
14
14
|
use polars::io::avro::AvroCompression;
|
15
15
|
use polars::prelude::*;
|
16
16
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
17
|
+
use polars_utils::total_ord::TotalEq;
|
18
18
|
use smartstring::alias::String as SmartString;
|
19
19
|
|
20
20
|
use crate::object::OBJECT_NAME;
|
@@ -78,10 +78,10 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
78
|
Ok(rbs.series.borrow().clone())
|
79
79
|
}
|
80
80
|
|
81
|
-
impl TryConvert for Wrap<
|
81
|
+
impl TryConvert for Wrap<StringChunked> {
|
82
82
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
83
83
|
let (seq, len) = get_rbseq(obj)?;
|
84
|
-
let mut builder =
|
84
|
+
let mut builder = StringChunkedBuilder::new("", len, len * 25);
|
85
85
|
|
86
86
|
for res in seq.each() {
|
87
87
|
let item = res?;
|
@@ -149,8 +149,8 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
149
149
|
AnyValue::Float64(v) => ruby.into_value(v),
|
150
150
|
AnyValue::Null => ruby.qnil().as_value(),
|
151
151
|
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::
|
153
|
-
AnyValue::
|
152
|
+
AnyValue::String(v) => ruby.into_value(v),
|
153
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
154
|
AnyValue::Categorical(idx, rev, arr) => {
|
155
155
|
let s = if arr.is_null() {
|
156
156
|
rev.get(idx)
|
@@ -215,7 +215,7 @@ impl IntoValue for Wrap<DataType> {
|
|
215
215
|
.unwrap()
|
216
216
|
}
|
217
217
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
218
|
-
DataType::
|
218
|
+
DataType::String => pl.const_get::<_, Value>("String").unwrap(),
|
219
219
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
220
220
|
DataType::Array(inner, size) => {
|
221
221
|
let inner = Wrap(*inner);
|
@@ -242,8 +242,8 @@ impl IntoValue for Wrap<DataType> {
|
|
242
242
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
243
|
.unwrap()
|
244
244
|
}
|
245
|
-
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
246
|
-
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
245
|
+
DataType::Object(_, _) => pl.const_get::<_, Value>("Object").unwrap(),
|
246
|
+
DataType::Categorical(_, _) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
247
247
|
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
248
248
|
DataType::Struct(fields) => {
|
249
249
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
@@ -277,7 +277,7 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
277
|
}
|
278
278
|
}
|
279
279
|
|
280
|
-
impl IntoValue for Wrap<&
|
280
|
+
impl IntoValue for Wrap<&StringChunked> {
|
281
281
|
fn into_value_with(self, _: &Ruby) -> Value {
|
282
282
|
let iter = self.0.into_iter();
|
283
283
|
RArray::from_iter(iter).into_value()
|
@@ -406,10 +406,10 @@ impl TryConvert for Wrap<DataType> {
|
|
406
406
|
"Polars::Int16" => DataType::Int16,
|
407
407
|
"Polars::Int32" => DataType::Int32,
|
408
408
|
"Polars::Int64" => DataType::Int64,
|
409
|
-
"Polars::
|
409
|
+
"Polars::String" => DataType::String,
|
410
410
|
"Polars::Binary" => DataType::Binary,
|
411
411
|
"Polars::Boolean" => DataType::Boolean,
|
412
|
-
"Polars::Categorical" => DataType::Categorical(None),
|
412
|
+
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
413
413
|
"Polars::Date" => DataType::Date,
|
414
414
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
415
|
"Polars::Time" => DataType::Time,
|
@@ -417,7 +417,7 @@ impl TryConvert for Wrap<DataType> {
|
|
417
417
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
418
418
|
"Polars::Float32" => DataType::Float32,
|
419
419
|
"Polars::Float64" => DataType::Float64,
|
420
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
420
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
421
421
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
422
422
|
"Polars::Null" => DataType::Null,
|
423
423
|
"Polars::Unknown" => DataType::Unknown,
|
@@ -477,17 +477,17 @@ impl TryConvert for Wrap<DataType> {
|
|
477
477
|
"i16" => DataType::Int16,
|
478
478
|
"i32" => DataType::Int32,
|
479
479
|
"i64" => DataType::Int64,
|
480
|
-
"str" => DataType::
|
480
|
+
"str" => DataType::String,
|
481
481
|
"bin" => DataType::Binary,
|
482
482
|
"bool" => DataType::Boolean,
|
483
|
-
"cat" => DataType::Categorical(None),
|
483
|
+
"cat" => DataType::Categorical(None, Default::default()),
|
484
484
|
"date" => DataType::Date,
|
485
485
|
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
486
486
|
"f32" => DataType::Float32,
|
487
487
|
"time" => DataType::Time,
|
488
488
|
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
489
489
|
"f64" => DataType::Float64,
|
490
|
-
"obj" => DataType::Object(OBJECT_NAME),
|
490
|
+
"obj" => DataType::Object(OBJECT_NAME, None),
|
491
491
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
492
492
|
"null" => DataType::Null,
|
493
493
|
"unk" => DataType::Unknown,
|
@@ -513,7 +513,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
513
513
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
514
|
} else if let Some(v) = RString::from_value(ob) {
|
515
515
|
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::
|
516
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
517
|
} else {
|
518
518
|
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
519
|
}
|
@@ -891,7 +891,8 @@ impl TryConvert for Wrap<JoinType> {
|
|
891
891
|
let parsed = match String::try_convert(ob)?.as_str() {
|
892
892
|
"inner" => JoinType::Inner,
|
893
893
|
"left" => JoinType::Left,
|
894
|
-
"outer" => JoinType::Outer,
|
894
|
+
"outer" => JoinType::Outer { coalesce: false },
|
895
|
+
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
895
896
|
"semi" => JoinType::Semi,
|
896
897
|
"anti" => JoinType::Anti,
|
897
898
|
// #[cfg(feature = "cross_join")]
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -48,7 +48,7 @@ impl RbDataFrame {
|
|
48
48
|
schema_overwrite: Option<Schema>,
|
49
49
|
) -> RbResult<Self> {
|
50
50
|
// object builder must be registered.
|
51
|
-
crate::
|
51
|
+
crate::on_startup::register_object_builder();
|
52
52
|
|
53
53
|
let schema =
|
54
54
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
@@ -120,7 +120,7 @@ impl RbDataFrame {
|
|
120
120
|
// TODO fix
|
121
121
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
122
|
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
-
let
|
123
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
124
|
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
125
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
126
|
let try_parse_dates = bool::try_convert(arguments[20])?;
|
@@ -131,7 +131,6 @@ impl RbDataFrame {
|
|
131
131
|
// end arguments
|
132
132
|
|
133
133
|
let null_values = null_values.map(|w| w.0);
|
134
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
135
134
|
let eol_char = eol_char.as_bytes()[0];
|
136
135
|
|
137
136
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -181,7 +180,7 @@ impl RbDataFrame {
|
|
181
180
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
182
181
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
183
182
|
.low_memory(low_memory)
|
184
|
-
.
|
183
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
185
184
|
.with_null_values(null_values)
|
186
185
|
.with_try_parse_dates(try_parse_dates)
|
187
186
|
.with_quote_char(quote_char)
|
@@ -297,12 +296,18 @@ impl RbDataFrame {
|
|
297
296
|
Ok(df) => Ok(df.into()),
|
298
297
|
// try arrow json reader instead
|
299
298
|
// this is row oriented
|
300
|
-
Err(
|
301
|
-
let
|
302
|
-
|
303
|
-
.
|
304
|
-
|
305
|
-
|
299
|
+
Err(e) => {
|
300
|
+
let msg = format!("{e}");
|
301
|
+
if msg.contains("successful parse invalid data") {
|
302
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
303
|
+
Err(e)
|
304
|
+
} else {
|
305
|
+
let out = JsonReader::new(mmap_bytes_r)
|
306
|
+
.with_json_format(JsonFormat::Json)
|
307
|
+
.finish()
|
308
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
309
|
+
Ok(out.into())
|
310
|
+
}
|
306
311
|
}
|
307
312
|
}
|
308
313
|
}
|
@@ -504,7 +509,7 @@ impl RbDataFrame {
|
|
504
509
|
.get_columns()
|
505
510
|
.iter()
|
506
511
|
.map(|s| match s.dtype() {
|
507
|
-
DataType::Object(_) => {
|
512
|
+
DataType::Object(_, _) => {
|
508
513
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
509
514
|
obj.unwrap().to_object()
|
510
515
|
}
|
@@ -523,7 +528,7 @@ impl RbDataFrame {
|
|
523
528
|
.get_columns()
|
524
529
|
.iter()
|
525
530
|
.map(|s| match s.dtype() {
|
526
|
-
DataType::Object(_) => {
|
531
|
+
DataType::Object(_, _) => {
|
527
532
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
528
533
|
obj.unwrap().to_object()
|
529
534
|
}
|
@@ -785,8 +790,8 @@ impl RbDataFrame {
|
|
785
790
|
.map(|s| RbSeries::new(s.clone()))
|
786
791
|
}
|
787
792
|
|
788
|
-
pub fn
|
789
|
-
self.df.borrow().
|
793
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
794
|
+
self.df.borrow().get_column_index(&name)
|
790
795
|
}
|
791
796
|
|
792
797
|
// TODO remove clone
|
@@ -828,18 +833,18 @@ impl RbDataFrame {
|
|
828
833
|
Ok(())
|
829
834
|
}
|
830
835
|
|
831
|
-
pub fn
|
836
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
832
837
|
self.df
|
833
838
|
.borrow_mut()
|
834
|
-
.
|
839
|
+
.replace_column(index, new_col.series.borrow().clone())
|
835
840
|
.map_err(RbPolarsErr::from)?;
|
836
841
|
Ok(())
|
837
842
|
}
|
838
843
|
|
839
|
-
pub fn
|
844
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
840
845
|
self.df
|
841
846
|
.borrow_mut()
|
842
|
-
.
|
847
|
+
.insert_column(index, new_col.series.borrow().clone())
|
843
848
|
.map_err(RbPolarsErr::from)?;
|
844
849
|
Ok(())
|
845
850
|
}
|
@@ -874,11 +879,11 @@ impl RbDataFrame {
|
|
874
879
|
Ok(mask.into_series().into())
|
875
880
|
}
|
876
881
|
|
877
|
-
pub fn
|
882
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
878
883
|
if null_equal {
|
879
|
-
self.df.borrow().
|
884
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
880
885
|
} else {
|
881
|
-
self.df.borrow().
|
886
|
+
self.df.borrow().equals(&other.df.borrow())
|
882
887
|
}
|
883
888
|
}
|
884
889
|
|
@@ -966,34 +971,6 @@ impl RbDataFrame {
|
|
966
971
|
self.df.borrow().clone().lazy().into()
|
967
972
|
}
|
968
973
|
|
969
|
-
pub fn max(&self) -> Self {
|
970
|
-
self.df.borrow().max().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn min(&self) -> Self {
|
974
|
-
self.df.borrow().min().into()
|
975
|
-
}
|
976
|
-
|
977
|
-
pub fn sum(&self) -> Self {
|
978
|
-
self.df.borrow().sum().into()
|
979
|
-
}
|
980
|
-
|
981
|
-
pub fn mean(&self) -> Self {
|
982
|
-
self.df.borrow().mean().into()
|
983
|
-
}
|
984
|
-
|
985
|
-
pub fn std(&self, ddof: u8) -> Self {
|
986
|
-
self.df.borrow().std(ddof).into()
|
987
|
-
}
|
988
|
-
|
989
|
-
pub fn var(&self, ddof: u8) -> Self {
|
990
|
-
self.df.borrow().var(ddof).into()
|
991
|
-
}
|
992
|
-
|
993
|
-
pub fn median(&self) -> Self {
|
994
|
-
self.df.borrow().median().into()
|
995
|
-
}
|
996
|
-
|
997
974
|
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
998
975
|
let s = self
|
999
976
|
.df
|
@@ -1040,19 +1017,6 @@ impl RbDataFrame {
|
|
1040
1017
|
Ok(s.map(|s| s.into()))
|
1041
1018
|
}
|
1042
1019
|
|
1043
|
-
pub fn quantile(
|
1044
|
-
&self,
|
1045
|
-
quantile: f64,
|
1046
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
1047
|
-
) -> RbResult<Self> {
|
1048
|
-
let df = self
|
1049
|
-
.df
|
1050
|
-
.borrow()
|
1051
|
-
.quantile(quantile, interpolation.0)
|
1052
|
-
.map_err(RbPolarsErr::from)?;
|
1053
|
-
Ok(df.into())
|
1054
|
-
}
|
1055
|
-
|
1056
1020
|
pub fn to_dummies(
|
1057
1021
|
&self,
|
1058
1022
|
columns: Option<Vec<String>>,
|
@@ -1124,7 +1088,7 @@ impl RbDataFrame {
|
|
1124
1088
|
.into_datetime(tu, tz)
|
1125
1089
|
.into_series()
|
1126
1090
|
}
|
1127
|
-
Some(DataType::
|
1091
|
+
Some(DataType::String) => {
|
1128
1092
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1129
1093
|
}
|
1130
1094
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -162,12 +162,71 @@ impl RbExpr {
|
|
162
162
|
.into()
|
163
163
|
}
|
164
164
|
|
165
|
+
pub fn cut(
|
166
|
+
&self,
|
167
|
+
breaks: Vec<f64>,
|
168
|
+
labels: Option<Vec<String>>,
|
169
|
+
left_closed: bool,
|
170
|
+
include_breaks: bool,
|
171
|
+
) -> Self {
|
172
|
+
self.inner
|
173
|
+
.clone()
|
174
|
+
.cut(breaks, labels, left_closed, include_breaks)
|
175
|
+
.into()
|
176
|
+
}
|
177
|
+
|
178
|
+
pub fn qcut(
|
179
|
+
&self,
|
180
|
+
probs: Vec<f64>,
|
181
|
+
labels: Option<Vec<String>>,
|
182
|
+
left_closed: bool,
|
183
|
+
allow_duplicates: bool,
|
184
|
+
include_breaks: bool,
|
185
|
+
) -> Self {
|
186
|
+
self.inner
|
187
|
+
.clone()
|
188
|
+
.qcut(probs, labels, left_closed, allow_duplicates, include_breaks)
|
189
|
+
.into()
|
190
|
+
}
|
191
|
+
|
192
|
+
pub fn qcut_uniform(
|
193
|
+
&self,
|
194
|
+
n_bins: usize,
|
195
|
+
labels: Option<Vec<String>>,
|
196
|
+
left_closed: bool,
|
197
|
+
allow_duplicates: bool,
|
198
|
+
include_breaks: bool,
|
199
|
+
) -> Self {
|
200
|
+
self.inner
|
201
|
+
.clone()
|
202
|
+
.qcut_uniform(
|
203
|
+
n_bins,
|
204
|
+
labels,
|
205
|
+
left_closed,
|
206
|
+
allow_duplicates,
|
207
|
+
include_breaks,
|
208
|
+
)
|
209
|
+
.into()
|
210
|
+
}
|
211
|
+
|
212
|
+
pub fn rle(&self) -> Self {
|
213
|
+
self.inner.clone().rle().into()
|
214
|
+
}
|
215
|
+
|
216
|
+
pub fn rle_id(&self) -> Self {
|
217
|
+
self.inner.clone().rle_id().into()
|
218
|
+
}
|
219
|
+
|
165
220
|
pub fn agg_groups(&self) -> Self {
|
166
|
-
self.clone().
|
221
|
+
self.inner.clone().agg_groups().into()
|
167
222
|
}
|
168
223
|
|
169
224
|
pub fn count(&self) -> Self {
|
170
|
-
self.clone().
|
225
|
+
self.inner.clone().count().into()
|
226
|
+
}
|
227
|
+
|
228
|
+
pub fn len(&self) -> Self {
|
229
|
+
self.inner.clone().len().into()
|
171
230
|
}
|
172
231
|
|
173
232
|
pub fn value_counts(&self, multithreaded: bool, sorted: bool) -> Self {
|
@@ -345,11 +404,11 @@ impl RbExpr {
|
|
345
404
|
self.clone().inner.explode().into()
|
346
405
|
}
|
347
406
|
|
348
|
-
pub fn gather_every(&self, n: usize) -> Self {
|
407
|
+
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
349
408
|
self.clone()
|
350
409
|
.inner
|
351
410
|
.map(
|
352
|
-
move |s: Series| Ok(Some(s.gather_every(n))),
|
411
|
+
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
353
412
|
GetOutput::same_type(),
|
354
413
|
)
|
355
414
|
.with_fmt("gather_every")
|
@@ -100,6 +100,7 @@ impl RbExpr {
|
|
100
100
|
by: Option<String>,
|
101
101
|
closed: Option<Wrap<ClosedWindow>>,
|
102
102
|
ddof: u8,
|
103
|
+
warn_if_unsorted: bool,
|
103
104
|
) -> Self {
|
104
105
|
let options = RollingOptions {
|
105
106
|
window_size: Duration::parse(&window_size),
|
@@ -109,6 +110,7 @@ impl RbExpr {
|
|
109
110
|
by,
|
110
111
|
closed_window: closed.map(|c| c.0),
|
111
112
|
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
113
|
+
warn_if_unsorted,
|
112
114
|
};
|
113
115
|
|
114
116
|
self.inner.clone().rolling_std(options).into()
|
@@ -124,6 +126,7 @@ impl RbExpr {
|
|
124
126
|
by: Option<String>,
|
125
127
|
closed: Option<Wrap<ClosedWindow>>,
|
126
128
|
ddof: u8,
|
129
|
+
warn_if_unsorted: bool,
|
127
130
|
) -> Self {
|
128
131
|
let options = RollingOptions {
|
129
132
|
window_size: Duration::parse(&window_size),
|
@@ -133,6 +136,7 @@ impl RbExpr {
|
|
133
136
|
by,
|
134
137
|
closed_window: closed.map(|c| c.0),
|
135
138
|
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
139
|
+
warn_if_unsorted,
|
136
140
|
};
|
137
141
|
|
138
142
|
self.inner.clone().rolling_var(options).into()
|
@@ -146,6 +150,7 @@ impl RbExpr {
|
|
146
150
|
center: bool,
|
147
151
|
by: Option<String>,
|
148
152
|
closed: Option<Wrap<ClosedWindow>>,
|
153
|
+
warn_if_unsorted: bool,
|
149
154
|
) -> Self {
|
150
155
|
let options = RollingOptions {
|
151
156
|
window_size: Duration::parse(&window_size),
|
@@ -154,12 +159,10 @@ impl RbExpr {
|
|
154
159
|
center,
|
155
160
|
by,
|
156
161
|
closed_window: closed.map(|c| c.0),
|
157
|
-
fn_params:
|
158
|
-
|
159
|
-
interpol: QuantileInterpolOptions::Linear,
|
160
|
-
}) as Arc<dyn Any + Send + Sync>),
|
162
|
+
fn_params: None,
|
163
|
+
warn_if_unsorted,
|
161
164
|
};
|
162
|
-
self.inner.clone().
|
165
|
+
self.inner.clone().rolling_median(options).into()
|
163
166
|
}
|
164
167
|
|
165
168
|
#[allow(clippy::too_many_arguments)]
|
@@ -173,6 +176,7 @@ impl RbExpr {
|
|
173
176
|
center: bool,
|
174
177
|
by: Option<String>,
|
175
178
|
closed: Option<Wrap<ClosedWindow>>,
|
179
|
+
warn_if_unsorted: bool,
|
176
180
|
) -> Self {
|
177
181
|
let options = RollingOptions {
|
178
182
|
window_size: Duration::parse(&window_size),
|
@@ -181,13 +185,14 @@ impl RbExpr {
|
|
181
185
|
center,
|
182
186
|
by,
|
183
187
|
closed_window: closed.map(|c| c.0),
|
184
|
-
fn_params:
|
185
|
-
|
186
|
-
interpol: interpolation.0,
|
187
|
-
}) as Arc<dyn Any + Send + Sync>),
|
188
|
+
fn_params: None,
|
189
|
+
warn_if_unsorted,
|
188
190
|
};
|
189
191
|
|
190
|
-
self.inner
|
192
|
+
self.inner
|
193
|
+
.clone()
|
194
|
+
.rolling_quantile(interpolation.0, quantile, options)
|
195
|
+
.into()
|
191
196
|
}
|
192
197
|
|
193
198
|
pub fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {
|
@@ -192,7 +192,7 @@ impl RbExpr {
|
|
192
192
|
self.clone()
|
193
193
|
.inner
|
194
194
|
.map(
|
195
|
-
move |s| s.
|
195
|
+
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
196
196
|
GetOutput::same_type(),
|
197
197
|
)
|
198
198
|
.with_fmt("str.hex_encode")
|
@@ -203,7 +203,7 @@ impl RbExpr {
|
|
203
203
|
self.clone()
|
204
204
|
.inner
|
205
205
|
.map(
|
206
|
-
move |s| s.
|
206
|
+
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
207
207
|
GetOutput::same_type(),
|
208
208
|
)
|
209
209
|
.with_fmt("str.hex_decode")
|
@@ -214,7 +214,7 @@ impl RbExpr {
|
|
214
214
|
self.clone()
|
215
215
|
.inner
|
216
216
|
.map(
|
217
|
-
move |s| s.
|
217
|
+
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
218
218
|
GetOutput::same_type(),
|
219
219
|
)
|
220
220
|
.with_fmt("str.base64_encode")
|
@@ -226,7 +226,7 @@ impl RbExpr {
|
|
226
226
|
.inner
|
227
227
|
.map(
|
228
228
|
move |s| {
|
229
|
-
s.
|
229
|
+
s.str()?
|
230
230
|
.base64_decode(strict)
|
231
231
|
.map(|s| Some(s.into_series()))
|
232
232
|
},
|
@@ -258,8 +258,8 @@ impl RbExpr {
|
|
258
258
|
};
|
259
259
|
|
260
260
|
let function = move |s: Series| {
|
261
|
-
let ca = s.
|
262
|
-
match ca.
|
261
|
+
let ca = s.str()?;
|
262
|
+
match ca.json_decode(dtype.clone(), infer_schema_len) {
|
263
263
|
Ok(ca) => Ok(Some(ca.into_series())),
|
264
264
|
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
265
265
|
}
|
@@ -268,13 +268,13 @@ impl RbExpr {
|
|
268
268
|
self.clone()
|
269
269
|
.inner
|
270
270
|
.map(function, output_type)
|
271
|
-
.with_fmt("str.
|
271
|
+
.with_fmt("str.json_decode")
|
272
272
|
.into()
|
273
273
|
}
|
274
274
|
|
275
275
|
pub fn str_json_path_match(&self, pat: String) -> Self {
|
276
276
|
let function = move |s: Series| {
|
277
|
-
let ca = s.
|
277
|
+
let ca = s.str()?;
|
278
278
|
match ca.json_path_match(&pat) {
|
279
279
|
Ok(ca) => Ok(Some(ca.into_series())),
|
280
280
|
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
@@ -282,7 +282,7 @@ impl RbExpr {
|
|
282
282
|
};
|
283
283
|
self.clone()
|
284
284
|
.inner
|
285
|
-
.map(function, GetOutput::from_type(DataType::
|
285
|
+
.map(function, GetOutput::from_type(DataType::String))
|
286
286
|
.with_fmt("str.json_path_match")
|
287
287
|
.into()
|
288
288
|
}
|
@@ -6,21 +6,16 @@ use crate::prelude::*;
|
|
6
6
|
use crate::RbExpr;
|
7
7
|
|
8
8
|
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let start = start.inner.clone();
|
10
|
+
let end = end.inner.clone();
|
9
11
|
let dtype = dtype.0;
|
10
|
-
|
11
|
-
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
-
|
13
|
-
if dtype != DataType::Int64 {
|
14
|
-
result = result.cast(dtype)
|
15
|
-
}
|
16
|
-
|
17
|
-
result.into()
|
12
|
+
dsl::int_range(start, end, step, dtype).into()
|
18
13
|
}
|
19
14
|
|
20
|
-
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step:
|
15
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataType>) -> RbExpr {
|
21
16
|
let dtype = dtype.0;
|
22
17
|
|
23
|
-
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
18
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step.inner.clone());
|
24
19
|
|
25
20
|
if dtype != DataType::Int64 {
|
26
21
|
result = result.cast(DataType::List(Box::new(dtype)))
|