polars-df 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +248 -269
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/allocator.rs +13 -0
- data/ext/polars/src/conversion/chunked_array.rs +5 -7
- data/ext/polars/src/conversion/mod.rs +0 -6
- data/ext/polars/src/dataframe/general.rs +1 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/expr/meta.rs +5 -9
- data/ext/polars/src/functions/eager.rs +1 -1
- data/ext/polars/src/lazyframe/mod.rs +27 -11
- data/ext/polars/src/lib.rs +5 -18
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +49 -99
- data/ext/polars/src/series/mod.rs +3 -3
- data/lib/polars/io/ipc.rb +32 -4
- data/lib/polars/io/parquet.rb +10 -4
- data/lib/polars/lazy_frame.rb +5 -1
- data/lib/polars/series.rb +3 -2
- data/lib/polars/string_expr.rb +9 -9
- data/lib/polars/version.rb +1 -1
- metadata +3 -2
@@ -1,8 +1,7 @@
|
|
1
|
-
use magnus::{class, prelude::*, typed_data::Obj, IntoValue,
|
1
|
+
use magnus::{class, prelude::*, typed_data::Obj, IntoValue, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
5
|
-
use crate::conversion::slice_to_wrapped;
|
6
5
|
use crate::series::RbSeries;
|
7
6
|
use crate::{ObjectValue, RbResult};
|
8
7
|
|
@@ -43,7 +42,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
43
42
|
todo!()
|
44
43
|
} else if out.is_kind_of(class::hash()) {
|
45
44
|
let first = Wrap::<AnyValue<'_>>::try_convert(out)?;
|
46
|
-
applyer.
|
45
|
+
applyer.apply_into_struct(lambda, null_count, first.0)
|
47
46
|
}
|
48
47
|
// this succeeds for numpy ints as well, where checking if it is pyint fails
|
49
48
|
// we do this later in the chain so that we don't extract integers from string chars.
|
@@ -70,12 +69,8 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
70
69
|
pub trait ApplyLambda<'a> {
|
71
70
|
fn apply_lambda_unknown(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
72
71
|
|
73
|
-
/// Apply a lambda that doesn't change output types
|
74
|
-
#[allow(dead_code)]
|
75
|
-
fn apply_lambda(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
76
|
-
|
77
72
|
// Used to store a struct type
|
78
|
-
fn
|
73
|
+
fn apply_into_struct(
|
79
74
|
&'a self,
|
80
75
|
lambda: Value,
|
81
76
|
init_null_count: usize,
|
@@ -183,19 +178,14 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
183
178
|
.into())
|
184
179
|
}
|
185
180
|
|
186
|
-
fn
|
187
|
-
self.apply_lambda_with_bool_out_type(lambda, 0, None)
|
188
|
-
.map(|ca| RbSeries::new(ca.into_series()))
|
189
|
-
}
|
190
|
-
|
191
|
-
fn apply_to_struct(
|
181
|
+
fn apply_into_struct(
|
192
182
|
&'a self,
|
193
183
|
lambda: Value,
|
194
184
|
init_null_count: usize,
|
195
185
|
first_value: AnyValue<'a>,
|
196
186
|
) -> RbResult<RbSeries> {
|
197
187
|
let skip = 1;
|
198
|
-
if !self.
|
188
|
+
if !self.has_nulls() {
|
199
189
|
let it = self
|
200
190
|
.into_no_null_iter()
|
201
191
|
.skip(init_null_count + skip)
|
@@ -223,7 +213,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
223
213
|
let skip = usize::from(first_value.is_some());
|
224
214
|
if init_null_count == self.len() {
|
225
215
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
226
|
-
} else if !self.
|
216
|
+
} else if !self.has_nulls() {
|
227
217
|
let it = self
|
228
218
|
.into_no_null_iter()
|
229
219
|
.skip(init_null_count + skip)
|
@@ -259,7 +249,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
259
249
|
let skip = usize::from(first_value.is_some());
|
260
250
|
if init_null_count == self.len() {
|
261
251
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
262
|
-
} else if !self.
|
252
|
+
} else if !self.has_nulls() {
|
263
253
|
let it = self
|
264
254
|
.into_no_null_iter()
|
265
255
|
.skip(init_null_count + skip)
|
@@ -295,7 +285,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
295
285
|
let skip = usize::from(first_value.is_some());
|
296
286
|
if init_null_count == self.len() {
|
297
287
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
298
|
-
} else if !self.
|
288
|
+
} else if !self.has_nulls() {
|
299
289
|
let it = self
|
300
290
|
.into_no_null_iter()
|
301
291
|
.skip(init_null_count + skip)
|
@@ -333,7 +323,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
333
323
|
let skip = 1;
|
334
324
|
if init_null_count == self.len() {
|
335
325
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
336
|
-
} else if !self.
|
326
|
+
} else if !self.has_nulls() {
|
337
327
|
let it = self
|
338
328
|
.into_no_null_iter()
|
339
329
|
.skip(init_null_count + skip)
|
@@ -405,7 +395,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
405
395
|
let skip = usize::from(first_value.is_some());
|
406
396
|
if init_null_count == self.len() {
|
407
397
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
408
|
-
} else if !self.
|
398
|
+
} else if !self.has_nulls() {
|
409
399
|
let it = self
|
410
400
|
.into_no_null_iter()
|
411
401
|
.skip(init_null_count + skip)
|
@@ -460,19 +450,14 @@ where
|
|
460
450
|
.into())
|
461
451
|
}
|
462
452
|
|
463
|
-
fn
|
464
|
-
self.apply_lambda_with_primitive_out_type::<T>(lambda, 0, None)
|
465
|
-
.map(|ca| RbSeries::new(ca.into_series()))
|
466
|
-
}
|
467
|
-
|
468
|
-
fn apply_to_struct(
|
453
|
+
fn apply_into_struct(
|
469
454
|
&'a self,
|
470
455
|
lambda: Value,
|
471
456
|
init_null_count: usize,
|
472
457
|
first_value: AnyValue<'a>,
|
473
458
|
) -> RbResult<RbSeries> {
|
474
459
|
let skip = 1;
|
475
|
-
if !self.
|
460
|
+
if !self.has_nulls() {
|
476
461
|
let it = self
|
477
462
|
.into_no_null_iter()
|
478
463
|
.skip(init_null_count + skip)
|
@@ -500,7 +485,7 @@ where
|
|
500
485
|
let skip = usize::from(first_value.is_some());
|
501
486
|
if init_null_count == self.len() {
|
502
487
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
503
|
-
} else if !self.
|
488
|
+
} else if !self.has_nulls() {
|
504
489
|
let it = self
|
505
490
|
.into_no_null_iter()
|
506
491
|
.skip(init_null_count + skip)
|
@@ -536,7 +521,7 @@ where
|
|
536
521
|
let skip = usize::from(first_value.is_some());
|
537
522
|
if init_null_count == self.len() {
|
538
523
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
539
|
-
} else if !self.
|
524
|
+
} else if !self.has_nulls() {
|
540
525
|
let it = self
|
541
526
|
.into_no_null_iter()
|
542
527
|
.skip(init_null_count + skip)
|
@@ -572,7 +557,7 @@ where
|
|
572
557
|
let skip = usize::from(first_value.is_some());
|
573
558
|
if init_null_count == self.len() {
|
574
559
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
575
|
-
} else if !self.
|
560
|
+
} else if !self.has_nulls() {
|
576
561
|
let it = self
|
577
562
|
.into_no_null_iter()
|
578
563
|
.skip(init_null_count + skip)
|
@@ -610,7 +595,7 @@ where
|
|
610
595
|
let skip = 1;
|
611
596
|
if init_null_count == self.len() {
|
612
597
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
613
|
-
} else if !self.
|
598
|
+
} else if !self.has_nulls() {
|
614
599
|
let it = self
|
615
600
|
.into_no_null_iter()
|
616
601
|
.skip(init_null_count + skip)
|
@@ -682,7 +667,7 @@ where
|
|
682
667
|
let skip = usize::from(first_value.is_some());
|
683
668
|
if init_null_count == self.len() {
|
684
669
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
685
|
-
} else if !self.
|
670
|
+
} else if !self.has_nulls() {
|
686
671
|
let it = self
|
687
672
|
.into_no_null_iter()
|
688
673
|
.skip(init_null_count + skip)
|
@@ -732,19 +717,14 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
732
717
|
.into())
|
733
718
|
}
|
734
719
|
|
735
|
-
fn
|
736
|
-
let ca = self.apply_lambda_with_utf8_out_type(lambda, 0, None)?;
|
737
|
-
Ok(ca.into_series().into())
|
738
|
-
}
|
739
|
-
|
740
|
-
fn apply_to_struct(
|
720
|
+
fn apply_into_struct(
|
741
721
|
&'a self,
|
742
722
|
lambda: Value,
|
743
723
|
init_null_count: usize,
|
744
724
|
first_value: AnyValue<'a>,
|
745
725
|
) -> RbResult<RbSeries> {
|
746
726
|
let skip = 1;
|
747
|
-
if !self.
|
727
|
+
if !self.has_nulls() {
|
748
728
|
let it = self
|
749
729
|
.into_no_null_iter()
|
750
730
|
.skip(init_null_count + skip)
|
@@ -772,7 +752,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
772
752
|
let skip = usize::from(first_value.is_some());
|
773
753
|
if init_null_count == self.len() {
|
774
754
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
775
|
-
} else if !self.
|
755
|
+
} else if !self.has_nulls() {
|
776
756
|
let it = self
|
777
757
|
.into_no_null_iter()
|
778
758
|
.skip(init_null_count + skip)
|
@@ -808,7 +788,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
808
788
|
let skip = usize::from(first_value.is_some());
|
809
789
|
if init_null_count == self.len() {
|
810
790
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
811
|
-
} else if !self.
|
791
|
+
} else if !self.has_nulls() {
|
812
792
|
let it = self
|
813
793
|
.into_no_null_iter()
|
814
794
|
.skip(init_null_count + skip)
|
@@ -844,7 +824,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
844
824
|
let skip = usize::from(first_value.is_some());
|
845
825
|
if init_null_count == self.len() {
|
846
826
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
847
|
-
} else if !self.
|
827
|
+
} else if !self.has_nulls() {
|
848
828
|
let it = self
|
849
829
|
.into_no_null_iter()
|
850
830
|
.skip(init_null_count + skip)
|
@@ -882,7 +862,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
882
862
|
let skip = 1;
|
883
863
|
if init_null_count == self.len() {
|
884
864
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
885
|
-
} else if !self.
|
865
|
+
} else if !self.has_nulls() {
|
886
866
|
let it = self
|
887
867
|
.into_no_null_iter()
|
888
868
|
.skip(init_null_count + skip)
|
@@ -954,7 +934,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
954
934
|
let skip = usize::from(first_value.is_some());
|
955
935
|
if init_null_count == self.len() {
|
956
936
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
957
|
-
} else if !self.
|
937
|
+
} else if !self.has_nulls() {
|
958
938
|
let it = self
|
959
939
|
.into_no_null_iter()
|
960
940
|
.skip(init_null_count + skip)
|
@@ -983,21 +963,16 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
983
963
|
}
|
984
964
|
}
|
985
965
|
|
986
|
-
fn
|
987
|
-
|
988
|
-
for (name, val) in names.iter().zip(slice_to_wrapped(vals)) {
|
989
|
-
dict.aset(name.to_string(), (*val).clone()).unwrap()
|
990
|
-
}
|
991
|
-
dict
|
966
|
+
fn iter_struct(ca: &StructChunked) -> impl Iterator<Item = AnyValue> {
|
967
|
+
(0..ca.len()).map(|i| unsafe { ca.get_any_value_unchecked(i) })
|
992
968
|
}
|
993
969
|
|
994
970
|
impl<'a> ApplyLambda<'a> for StructChunked {
|
995
971
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
996
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
997
972
|
let mut null_count = 0;
|
998
|
-
|
999
|
-
|
1000
|
-
let out: Value = lambda.funcall("call", (
|
973
|
+
|
974
|
+
for val in iter_struct(self) {
|
975
|
+
let out: Value = lambda.funcall("call", (Wrap(val),))?;
|
1001
976
|
if out.is_nil() {
|
1002
977
|
null_count += 1;
|
1003
978
|
continue;
|
@@ -1009,22 +984,15 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1009
984
|
Ok(self.clone().into_series().into())
|
1010
985
|
}
|
1011
986
|
|
1012
|
-
fn
|
1013
|
-
self.apply_lambda_unknown(lambda)
|
1014
|
-
}
|
1015
|
-
|
1016
|
-
fn apply_to_struct(
|
987
|
+
fn apply_into_struct(
|
1017
988
|
&'a self,
|
1018
989
|
lambda: Value,
|
1019
990
|
init_null_count: usize,
|
1020
991
|
first_value: AnyValue<'a>,
|
1021
992
|
) -> RbResult<RbSeries> {
|
1022
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1023
|
-
|
1024
993
|
let skip = 1;
|
1025
|
-
let it = self
|
1026
|
-
let
|
1027
|
-
let out = lambda.funcall("call", (arg,)).unwrap();
|
994
|
+
let it = iter_struct(self).skip(init_null_count + skip).map(|val| {
|
995
|
+
let out = lambda.funcall("call", (Wrap(val),)).unwrap();
|
1028
996
|
Some(out)
|
1029
997
|
});
|
1030
998
|
iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
|
@@ -1040,13 +1008,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1040
1008
|
D: RbArrowPrimitiveType,
|
1041
1009
|
D::Native: IntoValue + TryConvert,
|
1042
1010
|
{
|
1043
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1044
|
-
|
1045
1011
|
let skip = usize::from(first_value.is_some());
|
1046
|
-
let it = self
|
1047
|
-
|
1048
|
-
call_lambda_and_extract(lambda,
|
1049
|
-
});
|
1012
|
+
let it = iter_struct(self)
|
1013
|
+
.skip(init_null_count + skip)
|
1014
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1050
1015
|
|
1051
1016
|
Ok(iterator_to_primitive(
|
1052
1017
|
it,
|
@@ -1063,13 +1028,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1063
1028
|
init_null_count: usize,
|
1064
1029
|
first_value: Option<bool>,
|
1065
1030
|
) -> RbResult<BooleanChunked> {
|
1066
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1067
|
-
|
1068
1031
|
let skip = usize::from(first_value.is_some());
|
1069
|
-
let it = self
|
1070
|
-
|
1071
|
-
call_lambda_and_extract(lambda,
|
1072
|
-
});
|
1032
|
+
let it = iter_struct(self)
|
1033
|
+
.skip(init_null_count + skip)
|
1034
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1073
1035
|
|
1074
1036
|
Ok(iterator_to_bool(
|
1075
1037
|
it,
|
@@ -1086,13 +1048,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1086
1048
|
init_null_count: usize,
|
1087
1049
|
first_value: Option<&str>,
|
1088
1050
|
) -> RbResult<StringChunked> {
|
1089
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1090
|
-
|
1091
1051
|
let skip = usize::from(first_value.is_some());
|
1092
|
-
let it = self
|
1093
|
-
|
1094
|
-
call_lambda_and_extract(lambda,
|
1095
|
-
});
|
1052
|
+
let it = iter_struct(self)
|
1053
|
+
.skip(init_null_count + skip)
|
1054
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1096
1055
|
|
1097
1056
|
Ok(iterator_to_utf8(
|
1098
1057
|
it,
|
@@ -1111,13 +1070,9 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1111
1070
|
dt: &DataType,
|
1112
1071
|
) -> RbResult<ListChunked> {
|
1113
1072
|
let skip = 1;
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
let it = self.into_iter().skip(init_null_count + skip).map(|val| {
|
1118
|
-
let arg = make_dict_arg(&names, val);
|
1119
|
-
call_lambda_series_out(lambda, arg).ok()
|
1120
|
-
});
|
1073
|
+
let it = iter_struct(self)
|
1074
|
+
.skip(init_null_count + skip)
|
1075
|
+
.map(|val| call_lambda_series_out(lambda, Wrap(val)).ok());
|
1121
1076
|
iterator_to_list(
|
1122
1077
|
dt,
|
1123
1078
|
it,
|
@@ -1134,14 +1089,12 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1134
1089
|
init_null_count: usize,
|
1135
1090
|
first_value: AnyValue<'a>,
|
1136
1091
|
) -> RbResult<Series> {
|
1137
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1138
1092
|
let mut avs = Vec::with_capacity(self.len());
|
1139
1093
|
avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
|
1140
1094
|
avs.push(first_value);
|
1141
1095
|
|
1142
|
-
let iter = self
|
1143
|
-
|
1144
|
-
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, arg)
|
1096
|
+
let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
|
1097
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, Wrap(val))
|
1145
1098
|
.unwrap()
|
1146
1099
|
.0
|
1147
1100
|
});
|
@@ -1156,13 +1109,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1156
1109
|
init_null_count: usize,
|
1157
1110
|
first_value: Option<ObjectValue>,
|
1158
1111
|
) -> RbResult<ObjectChunked<ObjectValue>> {
|
1159
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1160
|
-
|
1161
1112
|
let skip = usize::from(first_value.is_some());
|
1162
|
-
let it = self
|
1163
|
-
|
1164
|
-
call_lambda_and_extract(lambda,
|
1165
|
-
});
|
1113
|
+
let it = iter_struct(self)
|
1114
|
+
.skip(init_null_count + skip)
|
1115
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1166
1116
|
|
1167
1117
|
Ok(iterator_to_object(
|
1168
1118
|
it,
|
@@ -50,7 +50,7 @@ impl RbSeries {
|
|
50
50
|
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
51
51
|
let binding = self.series.borrow();
|
52
52
|
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
53
|
-
let df: DataFrame = ca.clone().
|
53
|
+
let df: DataFrame = ca.clone().unnest();
|
54
54
|
Ok(df.into())
|
55
55
|
}
|
56
56
|
|
@@ -278,8 +278,8 @@ impl RbSeries {
|
|
278
278
|
Ok(self.series.borrow().null_count())
|
279
279
|
}
|
280
280
|
|
281
|
-
pub fn
|
282
|
-
self.series.borrow().
|
281
|
+
pub fn has_nulls(&self) -> bool {
|
282
|
+
self.series.borrow().has_nulls()
|
283
283
|
}
|
284
284
|
|
285
285
|
pub fn sample_n(
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -193,6 +193,18 @@ module Polars
|
|
193
193
|
# Try to memory map the file. This can greatly improve performance on repeated
|
194
194
|
# queries as the OS may cache pages.
|
195
195
|
# Only uncompressed IPC files can be memory mapped.
|
196
|
+
# @param hive_partitioning [Boolean]
|
197
|
+
# Infer statistics and schema from Hive partitioned URL and use them
|
198
|
+
# to prune reads. This is unset by default (i.e. `nil`), meaning it is
|
199
|
+
# automatically enabled when a single directory is passed, and otherwise
|
200
|
+
# disabled.
|
201
|
+
# @param hive_schema [Hash]
|
202
|
+
# The column names and data types of the columns by which the data is partitioned.
|
203
|
+
# If set to `nil` (default), the schema of the Hive partitions is inferred.
|
204
|
+
# @param try_parse_hive_dates [Boolean]
|
205
|
+
# Whether to try parsing hive values as date/datetime types.
|
206
|
+
# @param include_file_paths [String]
|
207
|
+
# Include the path of the source file(s) as a column with this name.
|
196
208
|
#
|
197
209
|
# @return [LazyFrame]
|
198
210
|
def scan_ipc(
|
@@ -203,7 +215,11 @@ module Polars
|
|
203
215
|
row_count_name: nil,
|
204
216
|
row_count_offset: 0,
|
205
217
|
storage_options: nil,
|
206
|
-
memory_map: true
|
218
|
+
memory_map: true,
|
219
|
+
hive_partitioning: nil,
|
220
|
+
hive_schema: nil,
|
221
|
+
try_parse_hive_dates: true,
|
222
|
+
include_file_paths: nil
|
207
223
|
)
|
208
224
|
_scan_ipc_impl(
|
209
225
|
source,
|
@@ -213,7 +229,11 @@ module Polars
|
|
213
229
|
row_count_name: row_count_name,
|
214
230
|
row_count_offset: row_count_offset,
|
215
231
|
storage_options: storage_options,
|
216
|
-
memory_map: memory_map
|
232
|
+
memory_map: memory_map,
|
233
|
+
hive_partitioning: hive_partitioning,
|
234
|
+
hive_schema: hive_schema,
|
235
|
+
try_parse_hive_dates: try_parse_hive_dates,
|
236
|
+
include_file_paths: include_file_paths
|
217
237
|
)
|
218
238
|
end
|
219
239
|
|
@@ -226,7 +246,11 @@ module Polars
|
|
226
246
|
row_count_name: nil,
|
227
247
|
row_count_offset: 0,
|
228
248
|
storage_options: nil,
|
229
|
-
memory_map: true
|
249
|
+
memory_map: true,
|
250
|
+
hive_partitioning: nil,
|
251
|
+
hive_schema: nil,
|
252
|
+
try_parse_hive_dates: true,
|
253
|
+
include_file_paths: nil
|
230
254
|
)
|
231
255
|
if Utils.pathlike?(file)
|
232
256
|
file = Utils.normalize_filepath(file)
|
@@ -239,7 +263,11 @@ module Polars
|
|
239
263
|
cache,
|
240
264
|
rechunk,
|
241
265
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
242
|
-
memory_map
|
266
|
+
memory_map,
|
267
|
+
hive_partitioning,
|
268
|
+
hive_schema,
|
269
|
+
try_parse_hive_dates,
|
270
|
+
include_file_paths
|
243
271
|
)
|
244
272
|
Utils.wrap_ldf(rblf)
|
245
273
|
end
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -158,6 +158,8 @@ module Polars
|
|
158
158
|
# Extra options that make sense for a particular storage connection.
|
159
159
|
# @param low_memory [Boolean]
|
160
160
|
# Reduce memory pressure at the expense of performance.
|
161
|
+
# @param include_file_paths [String]
|
162
|
+
# Include the path of the source file(s) as a column with this name.
|
161
163
|
#
|
162
164
|
# @return [LazyFrame]
|
163
165
|
def scan_parquet(
|
@@ -170,7 +172,8 @@ module Polars
|
|
170
172
|
row_count_name: nil,
|
171
173
|
row_count_offset: 0,
|
172
174
|
storage_options: nil,
|
173
|
-
low_memory: false
|
175
|
+
low_memory: false,
|
176
|
+
include_file_paths: nil
|
174
177
|
)
|
175
178
|
if Utils.pathlike?(source)
|
176
179
|
source = Utils.normalize_filepath(source)
|
@@ -186,7 +189,8 @@ module Polars
|
|
186
189
|
row_count_offset: row_count_offset,
|
187
190
|
storage_options: storage_options,
|
188
191
|
low_memory: low_memory,
|
189
|
-
glob: glob
|
192
|
+
glob: glob,
|
193
|
+
include_file_paths: include_file_paths
|
190
194
|
)
|
191
195
|
end
|
192
196
|
|
@@ -203,7 +207,8 @@ module Polars
|
|
203
207
|
low_memory: false,
|
204
208
|
use_statistics: true,
|
205
209
|
hive_partitioning: nil,
|
206
|
-
glob: true
|
210
|
+
glob: true,
|
211
|
+
include_file_paths: nil
|
207
212
|
)
|
208
213
|
rblf =
|
209
214
|
RbLazyFrame.new_from_parquet(
|
@@ -219,7 +224,8 @@ module Polars
|
|
219
224
|
hive_partitioning,
|
220
225
|
nil,
|
221
226
|
true,
|
222
|
-
glob
|
227
|
+
glob,
|
228
|
+
include_file_paths
|
223
229
|
)
|
224
230
|
Utils.wrap_ldf(rblf)
|
225
231
|
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -2526,11 +2526,15 @@ module Polars
|
|
2526
2526
|
value_name: nil,
|
2527
2527
|
streamable: true
|
2528
2528
|
)
|
2529
|
+
if !streamable
|
2530
|
+
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
|
+
end
|
2532
|
+
|
2529
2533
|
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
2530
2534
|
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
2531
2535
|
|
2532
2536
|
_from_rbldf(
|
2533
|
-
_ldf.unpivot(on, index, value_name, variable_name
|
2537
|
+
_ldf.unpivot(on, index, value_name, variable_name)
|
2534
2538
|
)
|
2535
2539
|
end
|
2536
2540
|
alias_method :melt, :unpivot
|
data/lib/polars/series.rb
CHANGED
@@ -1790,9 +1790,10 @@ module Polars
|
|
1790
1790
|
# Use this to swiftly assert a Series does not have null values.
|
1791
1791
|
#
|
1792
1792
|
# @return [Boolean]
|
1793
|
-
def
|
1794
|
-
_s.
|
1793
|
+
def has_nulls
|
1794
|
+
_s.has_nulls
|
1795
1795
|
end
|
1796
|
+
alias_method :has_validity, :has_nulls
|
1796
1797
|
|
1797
1798
|
# Check if the Series is empty.
|
1798
1799
|
#
|
data/lib/polars/string_expr.rb
CHANGED
@@ -792,15 +792,15 @@ module Polars
|
|
792
792
|
# df.select(Polars.col("json").str.json_decode(dtype))
|
793
793
|
# # =>
|
794
794
|
# # shape: (3, 1)
|
795
|
-
# #
|
796
|
-
# # │ json
|
797
|
-
# # │ ---
|
798
|
-
# # │ struct[2]
|
799
|
-
# #
|
800
|
-
# # │ {1,true}
|
801
|
-
# # │
|
802
|
-
# # │ {2,false}
|
803
|
-
# #
|
795
|
+
# # ┌───────────┐
|
796
|
+
# # │ json │
|
797
|
+
# # │ --- │
|
798
|
+
# # │ struct[2] │
|
799
|
+
# # ╞═══════════╡
|
800
|
+
# # │ {1,true} │
|
801
|
+
# # │ null │
|
802
|
+
# # │ {2,false} │
|
803
|
+
# # └───────────┘
|
804
804
|
def json_decode(dtype = nil, infer_schema_length: 100)
|
805
805
|
if !dtype.nil?
|
806
806
|
dtype = Utils.rb_type_to_dtype(dtype)
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- README.md
|
54
54
|
- ext/polars/Cargo.toml
|
55
55
|
- ext/polars/extconf.rb
|
56
|
+
- ext/polars/src/allocator.rs
|
56
57
|
- ext/polars/src/batched_csv.rs
|
57
58
|
- ext/polars/src/conversion/any_value.rs
|
58
59
|
- ext/polars/src/conversion/chunked_array.rs
|