polars-df 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +284 -216
- data/ext/polars/Cargo.toml +7 -4
- data/ext/polars/src/batched_csv.rs +2 -3
- data/ext/polars/src/conversion.rs +18 -17
- data/ext/polars/src/dataframe.rs +27 -63
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/general.rs +63 -4
- data/ext/polars/src/expr/rolling.rs +15 -10
- data/ext/polars/src/expr/string.rs +9 -9
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/lazyframe.rs +28 -19
- data/ext/polars/src/lib.rs +20 -20
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/data_frame.rb +69 -65
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/expr.rb +223 -18
- data/lib/polars/group_by.rb +1 -1
- data/lib/polars/io.rb +4 -4
- data/lib/polars/lazy_frame.rb +23 -23
- data/lib/polars/lazy_functions.rb +4 -20
- data/lib/polars/series.rb +289 -30
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +13 -13
- data/lib/polars/version.rb +1 -1
- metadata +7 -6
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.8.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,13 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.6"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
18
|
+
polars-core = "=0.36.2"
|
19
|
+
polars-parquet = "=0.36.2"
|
20
|
+
polars-utils = "=0.36.2"
|
20
21
|
serde_json = "1"
|
21
22
|
smartstring = "1"
|
22
23
|
|
23
24
|
[dependencies.polars]
|
24
|
-
version = "=0.
|
25
|
+
version = "=0.36.2"
|
25
26
|
features = [
|
26
27
|
"abs",
|
27
28
|
"approx_unique",
|
@@ -35,6 +36,7 @@ features = [
|
|
35
36
|
"csv",
|
36
37
|
"cum_agg",
|
37
38
|
"cumulative_eval",
|
39
|
+
"cutqcut",
|
38
40
|
"dataframe_arithmetic",
|
39
41
|
"date_offset",
|
40
42
|
"diagonal_concat",
|
@@ -77,6 +79,7 @@ features = [
|
|
77
79
|
"range",
|
78
80
|
"reinterpret",
|
79
81
|
"repeat_by",
|
82
|
+
"rle",
|
80
83
|
"rolling_window",
|
81
84
|
"round_series",
|
82
85
|
"row_hash",
|
@@ -41,7 +41,7 @@ impl RbBatchedCsv {
|
|
41
41
|
// TODO fix
|
42
42
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
43
43
|
let low_memory = bool::try_convert(arguments[15])?;
|
44
|
-
let
|
44
|
+
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
45
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
46
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
47
|
let try_parse_dates = bool::try_convert(arguments[19])?;
|
@@ -52,7 +52,6 @@ impl RbBatchedCsv {
|
|
52
52
|
// end arguments
|
53
53
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
56
|
|
58
57
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
|
|
101
100
|
.with_n_threads(n_threads)
|
102
101
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
103
102
|
.low_memory(low_memory)
|
104
|
-
.
|
103
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
105
104
|
.with_null_values(null_values)
|
106
105
|
.with_try_parse_dates(try_parse_dates)
|
107
106
|
.with_quote_char(quote_char)
|
@@ -14,7 +14,7 @@ use polars::frame::NullStrategy;
|
|
14
14
|
use polars::io::avro::AvroCompression;
|
15
15
|
use polars::prelude::*;
|
16
16
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
17
|
+
use polars_utils::total_ord::TotalEq;
|
18
18
|
use smartstring::alias::String as SmartString;
|
19
19
|
|
20
20
|
use crate::object::OBJECT_NAME;
|
@@ -78,10 +78,10 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
78
|
Ok(rbs.series.borrow().clone())
|
79
79
|
}
|
80
80
|
|
81
|
-
impl TryConvert for Wrap<
|
81
|
+
impl TryConvert for Wrap<StringChunked> {
|
82
82
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
83
83
|
let (seq, len) = get_rbseq(obj)?;
|
84
|
-
let mut builder =
|
84
|
+
let mut builder = StringChunkedBuilder::new("", len, len * 25);
|
85
85
|
|
86
86
|
for res in seq.each() {
|
87
87
|
let item = res?;
|
@@ -149,8 +149,8 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
149
149
|
AnyValue::Float64(v) => ruby.into_value(v),
|
150
150
|
AnyValue::Null => ruby.qnil().as_value(),
|
151
151
|
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::
|
153
|
-
AnyValue::
|
152
|
+
AnyValue::String(v) => ruby.into_value(v),
|
153
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
154
|
AnyValue::Categorical(idx, rev, arr) => {
|
155
155
|
let s = if arr.is_null() {
|
156
156
|
rev.get(idx)
|
@@ -215,7 +215,7 @@ impl IntoValue for Wrap<DataType> {
|
|
215
215
|
.unwrap()
|
216
216
|
}
|
217
217
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
218
|
-
DataType::
|
218
|
+
DataType::String => pl.const_get::<_, Value>("String").unwrap(),
|
219
219
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
220
220
|
DataType::Array(inner, size) => {
|
221
221
|
let inner = Wrap(*inner);
|
@@ -242,8 +242,8 @@ impl IntoValue for Wrap<DataType> {
|
|
242
242
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
243
|
.unwrap()
|
244
244
|
}
|
245
|
-
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
246
|
-
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
245
|
+
DataType::Object(_, _) => pl.const_get::<_, Value>("Object").unwrap(),
|
246
|
+
DataType::Categorical(_, _) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
247
247
|
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
248
248
|
DataType::Struct(fields) => {
|
249
249
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
@@ -277,7 +277,7 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
277
|
}
|
278
278
|
}
|
279
279
|
|
280
|
-
impl IntoValue for Wrap<&
|
280
|
+
impl IntoValue for Wrap<&StringChunked> {
|
281
281
|
fn into_value_with(self, _: &Ruby) -> Value {
|
282
282
|
let iter = self.0.into_iter();
|
283
283
|
RArray::from_iter(iter).into_value()
|
@@ -406,10 +406,10 @@ impl TryConvert for Wrap<DataType> {
|
|
406
406
|
"Polars::Int16" => DataType::Int16,
|
407
407
|
"Polars::Int32" => DataType::Int32,
|
408
408
|
"Polars::Int64" => DataType::Int64,
|
409
|
-
"Polars::
|
409
|
+
"Polars::String" => DataType::String,
|
410
410
|
"Polars::Binary" => DataType::Binary,
|
411
411
|
"Polars::Boolean" => DataType::Boolean,
|
412
|
-
"Polars::Categorical" => DataType::Categorical(None),
|
412
|
+
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
413
413
|
"Polars::Date" => DataType::Date,
|
414
414
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
415
|
"Polars::Time" => DataType::Time,
|
@@ -417,7 +417,7 @@ impl TryConvert for Wrap<DataType> {
|
|
417
417
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
418
418
|
"Polars::Float32" => DataType::Float32,
|
419
419
|
"Polars::Float64" => DataType::Float64,
|
420
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
420
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
421
421
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
422
422
|
"Polars::Null" => DataType::Null,
|
423
423
|
"Polars::Unknown" => DataType::Unknown,
|
@@ -477,17 +477,17 @@ impl TryConvert for Wrap<DataType> {
|
|
477
477
|
"i16" => DataType::Int16,
|
478
478
|
"i32" => DataType::Int32,
|
479
479
|
"i64" => DataType::Int64,
|
480
|
-
"str" => DataType::
|
480
|
+
"str" => DataType::String,
|
481
481
|
"bin" => DataType::Binary,
|
482
482
|
"bool" => DataType::Boolean,
|
483
|
-
"cat" => DataType::Categorical(None),
|
483
|
+
"cat" => DataType::Categorical(None, Default::default()),
|
484
484
|
"date" => DataType::Date,
|
485
485
|
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
486
486
|
"f32" => DataType::Float32,
|
487
487
|
"time" => DataType::Time,
|
488
488
|
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
489
489
|
"f64" => DataType::Float64,
|
490
|
-
"obj" => DataType::Object(OBJECT_NAME),
|
490
|
+
"obj" => DataType::Object(OBJECT_NAME, None),
|
491
491
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
492
492
|
"null" => DataType::Null,
|
493
493
|
"unk" => DataType::Unknown,
|
@@ -513,7 +513,7 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
513
513
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
514
|
} else if let Some(v) = RString::from_value(ob) {
|
515
515
|
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::
|
516
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
517
|
} else {
|
518
518
|
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
519
|
}
|
@@ -891,7 +891,8 @@ impl TryConvert for Wrap<JoinType> {
|
|
891
891
|
let parsed = match String::try_convert(ob)?.as_str() {
|
892
892
|
"inner" => JoinType::Inner,
|
893
893
|
"left" => JoinType::Left,
|
894
|
-
"outer" => JoinType::Outer,
|
894
|
+
"outer" => JoinType::Outer { coalesce: false },
|
895
|
+
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
895
896
|
"semi" => JoinType::Semi,
|
896
897
|
"anti" => JoinType::Anti,
|
897
898
|
// #[cfg(feature = "cross_join")]
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -48,7 +48,7 @@ impl RbDataFrame {
|
|
48
48
|
schema_overwrite: Option<Schema>,
|
49
49
|
) -> RbResult<Self> {
|
50
50
|
// object builder must be registered.
|
51
|
-
crate::
|
51
|
+
crate::on_startup::register_object_builder();
|
52
52
|
|
53
53
|
let schema =
|
54
54
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
@@ -120,7 +120,7 @@ impl RbDataFrame {
|
|
120
120
|
// TODO fix
|
121
121
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
122
|
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
-
let
|
123
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
124
|
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
125
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
126
|
let try_parse_dates = bool::try_convert(arguments[20])?;
|
@@ -131,7 +131,6 @@ impl RbDataFrame {
|
|
131
131
|
// end arguments
|
132
132
|
|
133
133
|
let null_values = null_values.map(|w| w.0);
|
134
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
135
134
|
let eol_char = eol_char.as_bytes()[0];
|
136
135
|
|
137
136
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -181,7 +180,7 @@ impl RbDataFrame {
|
|
181
180
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
182
181
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
183
182
|
.low_memory(low_memory)
|
184
|
-
.
|
183
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
185
184
|
.with_null_values(null_values)
|
186
185
|
.with_try_parse_dates(try_parse_dates)
|
187
186
|
.with_quote_char(quote_char)
|
@@ -297,12 +296,18 @@ impl RbDataFrame {
|
|
297
296
|
Ok(df) => Ok(df.into()),
|
298
297
|
// try arrow json reader instead
|
299
298
|
// this is row oriented
|
300
|
-
Err(
|
301
|
-
let
|
302
|
-
|
303
|
-
.
|
304
|
-
|
305
|
-
|
299
|
+
Err(e) => {
|
300
|
+
let msg = format!("{e}");
|
301
|
+
if msg.contains("successful parse invalid data") {
|
302
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
303
|
+
Err(e)
|
304
|
+
} else {
|
305
|
+
let out = JsonReader::new(mmap_bytes_r)
|
306
|
+
.with_json_format(JsonFormat::Json)
|
307
|
+
.finish()
|
308
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
309
|
+
Ok(out.into())
|
310
|
+
}
|
306
311
|
}
|
307
312
|
}
|
308
313
|
}
|
@@ -504,7 +509,7 @@ impl RbDataFrame {
|
|
504
509
|
.get_columns()
|
505
510
|
.iter()
|
506
511
|
.map(|s| match s.dtype() {
|
507
|
-
DataType::Object(_) => {
|
512
|
+
DataType::Object(_, _) => {
|
508
513
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
509
514
|
obj.unwrap().to_object()
|
510
515
|
}
|
@@ -523,7 +528,7 @@ impl RbDataFrame {
|
|
523
528
|
.get_columns()
|
524
529
|
.iter()
|
525
530
|
.map(|s| match s.dtype() {
|
526
|
-
DataType::Object(_) => {
|
531
|
+
DataType::Object(_, _) => {
|
527
532
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
528
533
|
obj.unwrap().to_object()
|
529
534
|
}
|
@@ -785,8 +790,8 @@ impl RbDataFrame {
|
|
785
790
|
.map(|s| RbSeries::new(s.clone()))
|
786
791
|
}
|
787
792
|
|
788
|
-
pub fn
|
789
|
-
self.df.borrow().
|
793
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
794
|
+
self.df.borrow().get_column_index(&name)
|
790
795
|
}
|
791
796
|
|
792
797
|
// TODO remove clone
|
@@ -828,18 +833,18 @@ impl RbDataFrame {
|
|
828
833
|
Ok(())
|
829
834
|
}
|
830
835
|
|
831
|
-
pub fn
|
836
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
832
837
|
self.df
|
833
838
|
.borrow_mut()
|
834
|
-
.
|
839
|
+
.replace_column(index, new_col.series.borrow().clone())
|
835
840
|
.map_err(RbPolarsErr::from)?;
|
836
841
|
Ok(())
|
837
842
|
}
|
838
843
|
|
839
|
-
pub fn
|
844
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
840
845
|
self.df
|
841
846
|
.borrow_mut()
|
842
|
-
.
|
847
|
+
.insert_column(index, new_col.series.borrow().clone())
|
843
848
|
.map_err(RbPolarsErr::from)?;
|
844
849
|
Ok(())
|
845
850
|
}
|
@@ -874,11 +879,11 @@ impl RbDataFrame {
|
|
874
879
|
Ok(mask.into_series().into())
|
875
880
|
}
|
876
881
|
|
877
|
-
pub fn
|
882
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
878
883
|
if null_equal {
|
879
|
-
self.df.borrow().
|
884
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
880
885
|
} else {
|
881
|
-
self.df.borrow().
|
886
|
+
self.df.borrow().equals(&other.df.borrow())
|
882
887
|
}
|
883
888
|
}
|
884
889
|
|
@@ -966,34 +971,6 @@ impl RbDataFrame {
|
|
966
971
|
self.df.borrow().clone().lazy().into()
|
967
972
|
}
|
968
973
|
|
969
|
-
pub fn max(&self) -> Self {
|
970
|
-
self.df.borrow().max().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn min(&self) -> Self {
|
974
|
-
self.df.borrow().min().into()
|
975
|
-
}
|
976
|
-
|
977
|
-
pub fn sum(&self) -> Self {
|
978
|
-
self.df.borrow().sum().into()
|
979
|
-
}
|
980
|
-
|
981
|
-
pub fn mean(&self) -> Self {
|
982
|
-
self.df.borrow().mean().into()
|
983
|
-
}
|
984
|
-
|
985
|
-
pub fn std(&self, ddof: u8) -> Self {
|
986
|
-
self.df.borrow().std(ddof).into()
|
987
|
-
}
|
988
|
-
|
989
|
-
pub fn var(&self, ddof: u8) -> Self {
|
990
|
-
self.df.borrow().var(ddof).into()
|
991
|
-
}
|
992
|
-
|
993
|
-
pub fn median(&self) -> Self {
|
994
|
-
self.df.borrow().median().into()
|
995
|
-
}
|
996
|
-
|
997
974
|
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
998
975
|
let s = self
|
999
976
|
.df
|
@@ -1040,19 +1017,6 @@ impl RbDataFrame {
|
|
1040
1017
|
Ok(s.map(|s| s.into()))
|
1041
1018
|
}
|
1042
1019
|
|
1043
|
-
pub fn quantile(
|
1044
|
-
&self,
|
1045
|
-
quantile: f64,
|
1046
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
1047
|
-
) -> RbResult<Self> {
|
1048
|
-
let df = self
|
1049
|
-
.df
|
1050
|
-
.borrow()
|
1051
|
-
.quantile(quantile, interpolation.0)
|
1052
|
-
.map_err(RbPolarsErr::from)?;
|
1053
|
-
Ok(df.into())
|
1054
|
-
}
|
1055
|
-
|
1056
1020
|
pub fn to_dummies(
|
1057
1021
|
&self,
|
1058
1022
|
columns: Option<Vec<String>>,
|
@@ -1124,7 +1088,7 @@ impl RbDataFrame {
|
|
1124
1088
|
.into_datetime(tu, tz)
|
1125
1089
|
.into_series()
|
1126
1090
|
}
|
1127
|
-
Some(DataType::
|
1091
|
+
Some(DataType::String) => {
|
1128
1092
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1129
1093
|
}
|
1130
1094
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -162,12 +162,71 @@ impl RbExpr {
|
|
162
162
|
.into()
|
163
163
|
}
|
164
164
|
|
165
|
+
pub fn cut(
|
166
|
+
&self,
|
167
|
+
breaks: Vec<f64>,
|
168
|
+
labels: Option<Vec<String>>,
|
169
|
+
left_closed: bool,
|
170
|
+
include_breaks: bool,
|
171
|
+
) -> Self {
|
172
|
+
self.inner
|
173
|
+
.clone()
|
174
|
+
.cut(breaks, labels, left_closed, include_breaks)
|
175
|
+
.into()
|
176
|
+
}
|
177
|
+
|
178
|
+
pub fn qcut(
|
179
|
+
&self,
|
180
|
+
probs: Vec<f64>,
|
181
|
+
labels: Option<Vec<String>>,
|
182
|
+
left_closed: bool,
|
183
|
+
allow_duplicates: bool,
|
184
|
+
include_breaks: bool,
|
185
|
+
) -> Self {
|
186
|
+
self.inner
|
187
|
+
.clone()
|
188
|
+
.qcut(probs, labels, left_closed, allow_duplicates, include_breaks)
|
189
|
+
.into()
|
190
|
+
}
|
191
|
+
|
192
|
+
pub fn qcut_uniform(
|
193
|
+
&self,
|
194
|
+
n_bins: usize,
|
195
|
+
labels: Option<Vec<String>>,
|
196
|
+
left_closed: bool,
|
197
|
+
allow_duplicates: bool,
|
198
|
+
include_breaks: bool,
|
199
|
+
) -> Self {
|
200
|
+
self.inner
|
201
|
+
.clone()
|
202
|
+
.qcut_uniform(
|
203
|
+
n_bins,
|
204
|
+
labels,
|
205
|
+
left_closed,
|
206
|
+
allow_duplicates,
|
207
|
+
include_breaks,
|
208
|
+
)
|
209
|
+
.into()
|
210
|
+
}
|
211
|
+
|
212
|
+
pub fn rle(&self) -> Self {
|
213
|
+
self.inner.clone().rle().into()
|
214
|
+
}
|
215
|
+
|
216
|
+
pub fn rle_id(&self) -> Self {
|
217
|
+
self.inner.clone().rle_id().into()
|
218
|
+
}
|
219
|
+
|
165
220
|
pub fn agg_groups(&self) -> Self {
|
166
|
-
self.clone().
|
221
|
+
self.inner.clone().agg_groups().into()
|
167
222
|
}
|
168
223
|
|
169
224
|
pub fn count(&self) -> Self {
|
170
|
-
self.clone().
|
225
|
+
self.inner.clone().count().into()
|
226
|
+
}
|
227
|
+
|
228
|
+
pub fn len(&self) -> Self {
|
229
|
+
self.inner.clone().len().into()
|
171
230
|
}
|
172
231
|
|
173
232
|
pub fn value_counts(&self, multithreaded: bool, sorted: bool) -> Self {
|
@@ -345,11 +404,11 @@ impl RbExpr {
|
|
345
404
|
self.clone().inner.explode().into()
|
346
405
|
}
|
347
406
|
|
348
|
-
pub fn gather_every(&self, n: usize) -> Self {
|
407
|
+
pub fn gather_every(&self, n: usize, offset: usize) -> Self {
|
349
408
|
self.clone()
|
350
409
|
.inner
|
351
410
|
.map(
|
352
|
-
move |s: Series| Ok(Some(s.gather_every(n))),
|
411
|
+
move |s: Series| Ok(Some(s.gather_every(n, offset))),
|
353
412
|
GetOutput::same_type(),
|
354
413
|
)
|
355
414
|
.with_fmt("gather_every")
|
@@ -100,6 +100,7 @@ impl RbExpr {
|
|
100
100
|
by: Option<String>,
|
101
101
|
closed: Option<Wrap<ClosedWindow>>,
|
102
102
|
ddof: u8,
|
103
|
+
warn_if_unsorted: bool,
|
103
104
|
) -> Self {
|
104
105
|
let options = RollingOptions {
|
105
106
|
window_size: Duration::parse(&window_size),
|
@@ -109,6 +110,7 @@ impl RbExpr {
|
|
109
110
|
by,
|
110
111
|
closed_window: closed.map(|c| c.0),
|
111
112
|
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
113
|
+
warn_if_unsorted,
|
112
114
|
};
|
113
115
|
|
114
116
|
self.inner.clone().rolling_std(options).into()
|
@@ -124,6 +126,7 @@ impl RbExpr {
|
|
124
126
|
by: Option<String>,
|
125
127
|
closed: Option<Wrap<ClosedWindow>>,
|
126
128
|
ddof: u8,
|
129
|
+
warn_if_unsorted: bool,
|
127
130
|
) -> Self {
|
128
131
|
let options = RollingOptions {
|
129
132
|
window_size: Duration::parse(&window_size),
|
@@ -133,6 +136,7 @@ impl RbExpr {
|
|
133
136
|
by,
|
134
137
|
closed_window: closed.map(|c| c.0),
|
135
138
|
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
139
|
+
warn_if_unsorted,
|
136
140
|
};
|
137
141
|
|
138
142
|
self.inner.clone().rolling_var(options).into()
|
@@ -146,6 +150,7 @@ impl RbExpr {
|
|
146
150
|
center: bool,
|
147
151
|
by: Option<String>,
|
148
152
|
closed: Option<Wrap<ClosedWindow>>,
|
153
|
+
warn_if_unsorted: bool,
|
149
154
|
) -> Self {
|
150
155
|
let options = RollingOptions {
|
151
156
|
window_size: Duration::parse(&window_size),
|
@@ -154,12 +159,10 @@ impl RbExpr {
|
|
154
159
|
center,
|
155
160
|
by,
|
156
161
|
closed_window: closed.map(|c| c.0),
|
157
|
-
fn_params:
|
158
|
-
|
159
|
-
interpol: QuantileInterpolOptions::Linear,
|
160
|
-
}) as Arc<dyn Any + Send + Sync>),
|
162
|
+
fn_params: None,
|
163
|
+
warn_if_unsorted,
|
161
164
|
};
|
162
|
-
self.inner.clone().
|
165
|
+
self.inner.clone().rolling_median(options).into()
|
163
166
|
}
|
164
167
|
|
165
168
|
#[allow(clippy::too_many_arguments)]
|
@@ -173,6 +176,7 @@ impl RbExpr {
|
|
173
176
|
center: bool,
|
174
177
|
by: Option<String>,
|
175
178
|
closed: Option<Wrap<ClosedWindow>>,
|
179
|
+
warn_if_unsorted: bool,
|
176
180
|
) -> Self {
|
177
181
|
let options = RollingOptions {
|
178
182
|
window_size: Duration::parse(&window_size),
|
@@ -181,13 +185,14 @@ impl RbExpr {
|
|
181
185
|
center,
|
182
186
|
by,
|
183
187
|
closed_window: closed.map(|c| c.0),
|
184
|
-
fn_params:
|
185
|
-
|
186
|
-
interpol: interpolation.0,
|
187
|
-
}) as Arc<dyn Any + Send + Sync>),
|
188
|
+
fn_params: None,
|
189
|
+
warn_if_unsorted,
|
188
190
|
};
|
189
191
|
|
190
|
-
self.inner
|
192
|
+
self.inner
|
193
|
+
.clone()
|
194
|
+
.rolling_quantile(interpolation.0, quantile, options)
|
195
|
+
.into()
|
191
196
|
}
|
192
197
|
|
193
198
|
pub fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {
|
@@ -192,7 +192,7 @@ impl RbExpr {
|
|
192
192
|
self.clone()
|
193
193
|
.inner
|
194
194
|
.map(
|
195
|
-
move |s| s.
|
195
|
+
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
196
196
|
GetOutput::same_type(),
|
197
197
|
)
|
198
198
|
.with_fmt("str.hex_encode")
|
@@ -203,7 +203,7 @@ impl RbExpr {
|
|
203
203
|
self.clone()
|
204
204
|
.inner
|
205
205
|
.map(
|
206
|
-
move |s| s.
|
206
|
+
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
207
207
|
GetOutput::same_type(),
|
208
208
|
)
|
209
209
|
.with_fmt("str.hex_decode")
|
@@ -214,7 +214,7 @@ impl RbExpr {
|
|
214
214
|
self.clone()
|
215
215
|
.inner
|
216
216
|
.map(
|
217
|
-
move |s| s.
|
217
|
+
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
218
218
|
GetOutput::same_type(),
|
219
219
|
)
|
220
220
|
.with_fmt("str.base64_encode")
|
@@ -226,7 +226,7 @@ impl RbExpr {
|
|
226
226
|
.inner
|
227
227
|
.map(
|
228
228
|
move |s| {
|
229
|
-
s.
|
229
|
+
s.str()?
|
230
230
|
.base64_decode(strict)
|
231
231
|
.map(|s| Some(s.into_series()))
|
232
232
|
},
|
@@ -258,8 +258,8 @@ impl RbExpr {
|
|
258
258
|
};
|
259
259
|
|
260
260
|
let function = move |s: Series| {
|
261
|
-
let ca = s.
|
262
|
-
match ca.
|
261
|
+
let ca = s.str()?;
|
262
|
+
match ca.json_decode(dtype.clone(), infer_schema_len) {
|
263
263
|
Ok(ca) => Ok(Some(ca.into_series())),
|
264
264
|
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
265
265
|
}
|
@@ -268,13 +268,13 @@ impl RbExpr {
|
|
268
268
|
self.clone()
|
269
269
|
.inner
|
270
270
|
.map(function, output_type)
|
271
|
-
.with_fmt("str.
|
271
|
+
.with_fmt("str.json_decode")
|
272
272
|
.into()
|
273
273
|
}
|
274
274
|
|
275
275
|
pub fn str_json_path_match(&self, pat: String) -> Self {
|
276
276
|
let function = move |s: Series| {
|
277
|
-
let ca = s.
|
277
|
+
let ca = s.str()?;
|
278
278
|
match ca.json_path_match(&pat) {
|
279
279
|
Ok(ca) => Ok(Some(ca.into_series())),
|
280
280
|
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
@@ -282,7 +282,7 @@ impl RbExpr {
|
|
282
282
|
};
|
283
283
|
self.clone()
|
284
284
|
.inner
|
285
|
-
.map(function, GetOutput::from_type(DataType::
|
285
|
+
.map(function, GetOutput::from_type(DataType::String))
|
286
286
|
.with_fmt("str.json_path_match")
|
287
287
|
.into()
|
288
288
|
}
|
@@ -6,21 +6,16 @@ use crate::prelude::*;
|
|
6
6
|
use crate::RbExpr;
|
7
7
|
|
8
8
|
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let start = start.inner.clone();
|
10
|
+
let end = end.inner.clone();
|
9
11
|
let dtype = dtype.0;
|
10
|
-
|
11
|
-
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
-
|
13
|
-
if dtype != DataType::Int64 {
|
14
|
-
result = result.cast(dtype)
|
15
|
-
}
|
16
|
-
|
17
|
-
result.into()
|
12
|
+
dsl::int_range(start, end, step, dtype).into()
|
18
13
|
}
|
19
14
|
|
20
|
-
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step:
|
15
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataType>) -> RbExpr {
|
21
16
|
let dtype = dtype.0;
|
22
17
|
|
23
|
-
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
18
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step.inner.clone());
|
24
19
|
|
25
20
|
if dtype != DataType::Int64 {
|
26
21
|
result = result.cast(DataType::List(Box::new(dtype)))
|