polars-df 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +248 -269
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/allocator.rs +13 -0
- data/ext/polars/src/conversion/chunked_array.rs +5 -7
- data/ext/polars/src/conversion/mod.rs +0 -6
- data/ext/polars/src/dataframe/general.rs +1 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/expr/meta.rs +5 -9
- data/ext/polars/src/functions/eager.rs +1 -1
- data/ext/polars/src/lazyframe/mod.rs +27 -11
- data/ext/polars/src/lib.rs +5 -18
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +49 -99
- data/ext/polars/src/series/mod.rs +3 -3
- data/lib/polars/io/ipc.rb +32 -4
- data/lib/polars/io/parquet.rb +10 -4
- data/lib/polars/lazy_frame.rb +5 -1
- data/lib/polars/series.rb +3 -2
- data/lib/polars/string_expr.rb +9 -9
- data/lib/polars/version.rb +1 -1
- metadata +3 -2
@@ -1,8 +1,7 @@
|
|
1
|
-
use magnus::{class, prelude::*, typed_data::Obj, IntoValue,
|
1
|
+
use magnus::{class, prelude::*, typed_data::Obj, IntoValue, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
5
|
-
use crate::conversion::slice_to_wrapped;
|
6
5
|
use crate::series::RbSeries;
|
7
6
|
use crate::{ObjectValue, RbResult};
|
8
7
|
|
@@ -43,7 +42,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
43
42
|
todo!()
|
44
43
|
} else if out.is_kind_of(class::hash()) {
|
45
44
|
let first = Wrap::<AnyValue<'_>>::try_convert(out)?;
|
46
|
-
applyer.
|
45
|
+
applyer.apply_into_struct(lambda, null_count, first.0)
|
47
46
|
}
|
48
47
|
// this succeeds for numpy ints as well, where checking if it is pyint fails
|
49
48
|
// we do this later in the chain so that we don't extract integers from string chars.
|
@@ -70,12 +69,8 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
70
69
|
pub trait ApplyLambda<'a> {
|
71
70
|
fn apply_lambda_unknown(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
72
71
|
|
73
|
-
/// Apply a lambda that doesn't change output types
|
74
|
-
#[allow(dead_code)]
|
75
|
-
fn apply_lambda(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
76
|
-
|
77
72
|
// Used to store a struct type
|
78
|
-
fn
|
73
|
+
fn apply_into_struct(
|
79
74
|
&'a self,
|
80
75
|
lambda: Value,
|
81
76
|
init_null_count: usize,
|
@@ -183,19 +178,14 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
183
178
|
.into())
|
184
179
|
}
|
185
180
|
|
186
|
-
fn
|
187
|
-
self.apply_lambda_with_bool_out_type(lambda, 0, None)
|
188
|
-
.map(|ca| RbSeries::new(ca.into_series()))
|
189
|
-
}
|
190
|
-
|
191
|
-
fn apply_to_struct(
|
181
|
+
fn apply_into_struct(
|
192
182
|
&'a self,
|
193
183
|
lambda: Value,
|
194
184
|
init_null_count: usize,
|
195
185
|
first_value: AnyValue<'a>,
|
196
186
|
) -> RbResult<RbSeries> {
|
197
187
|
let skip = 1;
|
198
|
-
if !self.
|
188
|
+
if !self.has_nulls() {
|
199
189
|
let it = self
|
200
190
|
.into_no_null_iter()
|
201
191
|
.skip(init_null_count + skip)
|
@@ -223,7 +213,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
223
213
|
let skip = usize::from(first_value.is_some());
|
224
214
|
if init_null_count == self.len() {
|
225
215
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
226
|
-
} else if !self.
|
216
|
+
} else if !self.has_nulls() {
|
227
217
|
let it = self
|
228
218
|
.into_no_null_iter()
|
229
219
|
.skip(init_null_count + skip)
|
@@ -259,7 +249,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
259
249
|
let skip = usize::from(first_value.is_some());
|
260
250
|
if init_null_count == self.len() {
|
261
251
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
262
|
-
} else if !self.
|
252
|
+
} else if !self.has_nulls() {
|
263
253
|
let it = self
|
264
254
|
.into_no_null_iter()
|
265
255
|
.skip(init_null_count + skip)
|
@@ -295,7 +285,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
295
285
|
let skip = usize::from(first_value.is_some());
|
296
286
|
if init_null_count == self.len() {
|
297
287
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
298
|
-
} else if !self.
|
288
|
+
} else if !self.has_nulls() {
|
299
289
|
let it = self
|
300
290
|
.into_no_null_iter()
|
301
291
|
.skip(init_null_count + skip)
|
@@ -333,7 +323,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
333
323
|
let skip = 1;
|
334
324
|
if init_null_count == self.len() {
|
335
325
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
336
|
-
} else if !self.
|
326
|
+
} else if !self.has_nulls() {
|
337
327
|
let it = self
|
338
328
|
.into_no_null_iter()
|
339
329
|
.skip(init_null_count + skip)
|
@@ -405,7 +395,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
405
395
|
let skip = usize::from(first_value.is_some());
|
406
396
|
if init_null_count == self.len() {
|
407
397
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
408
|
-
} else if !self.
|
398
|
+
} else if !self.has_nulls() {
|
409
399
|
let it = self
|
410
400
|
.into_no_null_iter()
|
411
401
|
.skip(init_null_count + skip)
|
@@ -460,19 +450,14 @@ where
|
|
460
450
|
.into())
|
461
451
|
}
|
462
452
|
|
463
|
-
fn
|
464
|
-
self.apply_lambda_with_primitive_out_type::<T>(lambda, 0, None)
|
465
|
-
.map(|ca| RbSeries::new(ca.into_series()))
|
466
|
-
}
|
467
|
-
|
468
|
-
fn apply_to_struct(
|
453
|
+
fn apply_into_struct(
|
469
454
|
&'a self,
|
470
455
|
lambda: Value,
|
471
456
|
init_null_count: usize,
|
472
457
|
first_value: AnyValue<'a>,
|
473
458
|
) -> RbResult<RbSeries> {
|
474
459
|
let skip = 1;
|
475
|
-
if !self.
|
460
|
+
if !self.has_nulls() {
|
476
461
|
let it = self
|
477
462
|
.into_no_null_iter()
|
478
463
|
.skip(init_null_count + skip)
|
@@ -500,7 +485,7 @@ where
|
|
500
485
|
let skip = usize::from(first_value.is_some());
|
501
486
|
if init_null_count == self.len() {
|
502
487
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
503
|
-
} else if !self.
|
488
|
+
} else if !self.has_nulls() {
|
504
489
|
let it = self
|
505
490
|
.into_no_null_iter()
|
506
491
|
.skip(init_null_count + skip)
|
@@ -536,7 +521,7 @@ where
|
|
536
521
|
let skip = usize::from(first_value.is_some());
|
537
522
|
if init_null_count == self.len() {
|
538
523
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
539
|
-
} else if !self.
|
524
|
+
} else if !self.has_nulls() {
|
540
525
|
let it = self
|
541
526
|
.into_no_null_iter()
|
542
527
|
.skip(init_null_count + skip)
|
@@ -572,7 +557,7 @@ where
|
|
572
557
|
let skip = usize::from(first_value.is_some());
|
573
558
|
if init_null_count == self.len() {
|
574
559
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
575
|
-
} else if !self.
|
560
|
+
} else if !self.has_nulls() {
|
576
561
|
let it = self
|
577
562
|
.into_no_null_iter()
|
578
563
|
.skip(init_null_count + skip)
|
@@ -610,7 +595,7 @@ where
|
|
610
595
|
let skip = 1;
|
611
596
|
if init_null_count == self.len() {
|
612
597
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
613
|
-
} else if !self.
|
598
|
+
} else if !self.has_nulls() {
|
614
599
|
let it = self
|
615
600
|
.into_no_null_iter()
|
616
601
|
.skip(init_null_count + skip)
|
@@ -682,7 +667,7 @@ where
|
|
682
667
|
let skip = usize::from(first_value.is_some());
|
683
668
|
if init_null_count == self.len() {
|
684
669
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
685
|
-
} else if !self.
|
670
|
+
} else if !self.has_nulls() {
|
686
671
|
let it = self
|
687
672
|
.into_no_null_iter()
|
688
673
|
.skip(init_null_count + skip)
|
@@ -732,19 +717,14 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
732
717
|
.into())
|
733
718
|
}
|
734
719
|
|
735
|
-
fn
|
736
|
-
let ca = self.apply_lambda_with_utf8_out_type(lambda, 0, None)?;
|
737
|
-
Ok(ca.into_series().into())
|
738
|
-
}
|
739
|
-
|
740
|
-
fn apply_to_struct(
|
720
|
+
fn apply_into_struct(
|
741
721
|
&'a self,
|
742
722
|
lambda: Value,
|
743
723
|
init_null_count: usize,
|
744
724
|
first_value: AnyValue<'a>,
|
745
725
|
) -> RbResult<RbSeries> {
|
746
726
|
let skip = 1;
|
747
|
-
if !self.
|
727
|
+
if !self.has_nulls() {
|
748
728
|
let it = self
|
749
729
|
.into_no_null_iter()
|
750
730
|
.skip(init_null_count + skip)
|
@@ -772,7 +752,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
772
752
|
let skip = usize::from(first_value.is_some());
|
773
753
|
if init_null_count == self.len() {
|
774
754
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
775
|
-
} else if !self.
|
755
|
+
} else if !self.has_nulls() {
|
776
756
|
let it = self
|
777
757
|
.into_no_null_iter()
|
778
758
|
.skip(init_null_count + skip)
|
@@ -808,7 +788,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
808
788
|
let skip = usize::from(first_value.is_some());
|
809
789
|
if init_null_count == self.len() {
|
810
790
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
811
|
-
} else if !self.
|
791
|
+
} else if !self.has_nulls() {
|
812
792
|
let it = self
|
813
793
|
.into_no_null_iter()
|
814
794
|
.skip(init_null_count + skip)
|
@@ -844,7 +824,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
844
824
|
let skip = usize::from(first_value.is_some());
|
845
825
|
if init_null_count == self.len() {
|
846
826
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
847
|
-
} else if !self.
|
827
|
+
} else if !self.has_nulls() {
|
848
828
|
let it = self
|
849
829
|
.into_no_null_iter()
|
850
830
|
.skip(init_null_count + skip)
|
@@ -882,7 +862,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
882
862
|
let skip = 1;
|
883
863
|
if init_null_count == self.len() {
|
884
864
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
885
|
-
} else if !self.
|
865
|
+
} else if !self.has_nulls() {
|
886
866
|
let it = self
|
887
867
|
.into_no_null_iter()
|
888
868
|
.skip(init_null_count + skip)
|
@@ -954,7 +934,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
954
934
|
let skip = usize::from(first_value.is_some());
|
955
935
|
if init_null_count == self.len() {
|
956
936
|
Ok(ChunkedArray::full_null(self.name(), self.len()))
|
957
|
-
} else if !self.
|
937
|
+
} else if !self.has_nulls() {
|
958
938
|
let it = self
|
959
939
|
.into_no_null_iter()
|
960
940
|
.skip(init_null_count + skip)
|
@@ -983,21 +963,16 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
983
963
|
}
|
984
964
|
}
|
985
965
|
|
986
|
-
fn
|
987
|
-
|
988
|
-
for (name, val) in names.iter().zip(slice_to_wrapped(vals)) {
|
989
|
-
dict.aset(name.to_string(), (*val).clone()).unwrap()
|
990
|
-
}
|
991
|
-
dict
|
966
|
+
fn iter_struct(ca: &StructChunked) -> impl Iterator<Item = AnyValue> {
|
967
|
+
(0..ca.len()).map(|i| unsafe { ca.get_any_value_unchecked(i) })
|
992
968
|
}
|
993
969
|
|
994
970
|
impl<'a> ApplyLambda<'a> for StructChunked {
|
995
971
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
996
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
997
972
|
let mut null_count = 0;
|
998
|
-
|
999
|
-
|
1000
|
-
let out: Value = lambda.funcall("call", (
|
973
|
+
|
974
|
+
for val in iter_struct(self) {
|
975
|
+
let out: Value = lambda.funcall("call", (Wrap(val),))?;
|
1001
976
|
if out.is_nil() {
|
1002
977
|
null_count += 1;
|
1003
978
|
continue;
|
@@ -1009,22 +984,15 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1009
984
|
Ok(self.clone().into_series().into())
|
1010
985
|
}
|
1011
986
|
|
1012
|
-
fn
|
1013
|
-
self.apply_lambda_unknown(lambda)
|
1014
|
-
}
|
1015
|
-
|
1016
|
-
fn apply_to_struct(
|
987
|
+
fn apply_into_struct(
|
1017
988
|
&'a self,
|
1018
989
|
lambda: Value,
|
1019
990
|
init_null_count: usize,
|
1020
991
|
first_value: AnyValue<'a>,
|
1021
992
|
) -> RbResult<RbSeries> {
|
1022
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1023
|
-
|
1024
993
|
let skip = 1;
|
1025
|
-
let it = self
|
1026
|
-
let
|
1027
|
-
let out = lambda.funcall("call", (arg,)).unwrap();
|
994
|
+
let it = iter_struct(self).skip(init_null_count + skip).map(|val| {
|
995
|
+
let out = lambda.funcall("call", (Wrap(val),)).unwrap();
|
1028
996
|
Some(out)
|
1029
997
|
});
|
1030
998
|
iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
|
@@ -1040,13 +1008,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1040
1008
|
D: RbArrowPrimitiveType,
|
1041
1009
|
D::Native: IntoValue + TryConvert,
|
1042
1010
|
{
|
1043
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1044
|
-
|
1045
1011
|
let skip = usize::from(first_value.is_some());
|
1046
|
-
let it = self
|
1047
|
-
|
1048
|
-
call_lambda_and_extract(lambda,
|
1049
|
-
});
|
1012
|
+
let it = iter_struct(self)
|
1013
|
+
.skip(init_null_count + skip)
|
1014
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1050
1015
|
|
1051
1016
|
Ok(iterator_to_primitive(
|
1052
1017
|
it,
|
@@ -1063,13 +1028,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1063
1028
|
init_null_count: usize,
|
1064
1029
|
first_value: Option<bool>,
|
1065
1030
|
) -> RbResult<BooleanChunked> {
|
1066
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1067
|
-
|
1068
1031
|
let skip = usize::from(first_value.is_some());
|
1069
|
-
let it = self
|
1070
|
-
|
1071
|
-
call_lambda_and_extract(lambda,
|
1072
|
-
});
|
1032
|
+
let it = iter_struct(self)
|
1033
|
+
.skip(init_null_count + skip)
|
1034
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1073
1035
|
|
1074
1036
|
Ok(iterator_to_bool(
|
1075
1037
|
it,
|
@@ -1086,13 +1048,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1086
1048
|
init_null_count: usize,
|
1087
1049
|
first_value: Option<&str>,
|
1088
1050
|
) -> RbResult<StringChunked> {
|
1089
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1090
|
-
|
1091
1051
|
let skip = usize::from(first_value.is_some());
|
1092
|
-
let it = self
|
1093
|
-
|
1094
|
-
call_lambda_and_extract(lambda,
|
1095
|
-
});
|
1052
|
+
let it = iter_struct(self)
|
1053
|
+
.skip(init_null_count + skip)
|
1054
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1096
1055
|
|
1097
1056
|
Ok(iterator_to_utf8(
|
1098
1057
|
it,
|
@@ -1111,13 +1070,9 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1111
1070
|
dt: &DataType,
|
1112
1071
|
) -> RbResult<ListChunked> {
|
1113
1072
|
let skip = 1;
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
let it = self.into_iter().skip(init_null_count + skip).map(|val| {
|
1118
|
-
let arg = make_dict_arg(&names, val);
|
1119
|
-
call_lambda_series_out(lambda, arg).ok()
|
1120
|
-
});
|
1073
|
+
let it = iter_struct(self)
|
1074
|
+
.skip(init_null_count + skip)
|
1075
|
+
.map(|val| call_lambda_series_out(lambda, Wrap(val)).ok());
|
1121
1076
|
iterator_to_list(
|
1122
1077
|
dt,
|
1123
1078
|
it,
|
@@ -1134,14 +1089,12 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1134
1089
|
init_null_count: usize,
|
1135
1090
|
first_value: AnyValue<'a>,
|
1136
1091
|
) -> RbResult<Series> {
|
1137
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1138
1092
|
let mut avs = Vec::with_capacity(self.len());
|
1139
1093
|
avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
|
1140
1094
|
avs.push(first_value);
|
1141
1095
|
|
1142
|
-
let iter = self
|
1143
|
-
|
1144
|
-
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, arg)
|
1096
|
+
let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
|
1097
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, Wrap(val))
|
1145
1098
|
.unwrap()
|
1146
1099
|
.0
|
1147
1100
|
});
|
@@ -1156,13 +1109,10 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1156
1109
|
init_null_count: usize,
|
1157
1110
|
first_value: Option<ObjectValue>,
|
1158
1111
|
) -> RbResult<ObjectChunked<ObjectValue>> {
|
1159
|
-
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1160
|
-
|
1161
1112
|
let skip = usize::from(first_value.is_some());
|
1162
|
-
let it = self
|
1163
|
-
|
1164
|
-
call_lambda_and_extract(lambda,
|
1165
|
-
});
|
1113
|
+
let it = iter_struct(self)
|
1114
|
+
.skip(init_null_count + skip)
|
1115
|
+
.map(|val| call_lambda_and_extract(lambda, Wrap(val)).ok());
|
1166
1116
|
|
1167
1117
|
Ok(iterator_to_object(
|
1168
1118
|
it,
|
@@ -50,7 +50,7 @@ impl RbSeries {
|
|
50
50
|
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
51
51
|
let binding = self.series.borrow();
|
52
52
|
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
53
|
-
let df: DataFrame = ca.clone().
|
53
|
+
let df: DataFrame = ca.clone().unnest();
|
54
54
|
Ok(df.into())
|
55
55
|
}
|
56
56
|
|
@@ -278,8 +278,8 @@ impl RbSeries {
|
|
278
278
|
Ok(self.series.borrow().null_count())
|
279
279
|
}
|
280
280
|
|
281
|
-
pub fn
|
282
|
-
self.series.borrow().
|
281
|
+
pub fn has_nulls(&self) -> bool {
|
282
|
+
self.series.borrow().has_nulls()
|
283
283
|
}
|
284
284
|
|
285
285
|
pub fn sample_n(
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -193,6 +193,18 @@ module Polars
|
|
193
193
|
# Try to memory map the file. This can greatly improve performance on repeated
|
194
194
|
# queries as the OS may cache pages.
|
195
195
|
# Only uncompressed IPC files can be memory mapped.
|
196
|
+
# @param hive_partitioning [Boolean]
|
197
|
+
# Infer statistics and schema from Hive partitioned URL and use them
|
198
|
+
# to prune reads. This is unset by default (i.e. `nil`), meaning it is
|
199
|
+
# automatically enabled when a single directory is passed, and otherwise
|
200
|
+
# disabled.
|
201
|
+
# @param hive_schema [Hash]
|
202
|
+
# The column names and data types of the columns by which the data is partitioned.
|
203
|
+
# If set to `nil` (default), the schema of the Hive partitions is inferred.
|
204
|
+
# @param try_parse_hive_dates [Boolean]
|
205
|
+
# Whether to try parsing hive values as date/datetime types.
|
206
|
+
# @param include_file_paths [String]
|
207
|
+
# Include the path of the source file(s) as a column with this name.
|
196
208
|
#
|
197
209
|
# @return [LazyFrame]
|
198
210
|
def scan_ipc(
|
@@ -203,7 +215,11 @@ module Polars
|
|
203
215
|
row_count_name: nil,
|
204
216
|
row_count_offset: 0,
|
205
217
|
storage_options: nil,
|
206
|
-
memory_map: true
|
218
|
+
memory_map: true,
|
219
|
+
hive_partitioning: nil,
|
220
|
+
hive_schema: nil,
|
221
|
+
try_parse_hive_dates: true,
|
222
|
+
include_file_paths: nil
|
207
223
|
)
|
208
224
|
_scan_ipc_impl(
|
209
225
|
source,
|
@@ -213,7 +229,11 @@ module Polars
|
|
213
229
|
row_count_name: row_count_name,
|
214
230
|
row_count_offset: row_count_offset,
|
215
231
|
storage_options: storage_options,
|
216
|
-
memory_map: memory_map
|
232
|
+
memory_map: memory_map,
|
233
|
+
hive_partitioning: hive_partitioning,
|
234
|
+
hive_schema: hive_schema,
|
235
|
+
try_parse_hive_dates: try_parse_hive_dates,
|
236
|
+
include_file_paths: include_file_paths
|
217
237
|
)
|
218
238
|
end
|
219
239
|
|
@@ -226,7 +246,11 @@ module Polars
|
|
226
246
|
row_count_name: nil,
|
227
247
|
row_count_offset: 0,
|
228
248
|
storage_options: nil,
|
229
|
-
memory_map: true
|
249
|
+
memory_map: true,
|
250
|
+
hive_partitioning: nil,
|
251
|
+
hive_schema: nil,
|
252
|
+
try_parse_hive_dates: true,
|
253
|
+
include_file_paths: nil
|
230
254
|
)
|
231
255
|
if Utils.pathlike?(file)
|
232
256
|
file = Utils.normalize_filepath(file)
|
@@ -239,7 +263,11 @@ module Polars
|
|
239
263
|
cache,
|
240
264
|
rechunk,
|
241
265
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
242
|
-
memory_map
|
266
|
+
memory_map,
|
267
|
+
hive_partitioning,
|
268
|
+
hive_schema,
|
269
|
+
try_parse_hive_dates,
|
270
|
+
include_file_paths
|
243
271
|
)
|
244
272
|
Utils.wrap_ldf(rblf)
|
245
273
|
end
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -158,6 +158,8 @@ module Polars
|
|
158
158
|
# Extra options that make sense for a particular storage connection.
|
159
159
|
# @param low_memory [Boolean]
|
160
160
|
# Reduce memory pressure at the expense of performance.
|
161
|
+
# @param include_file_paths [String]
|
162
|
+
# Include the path of the source file(s) as a column with this name.
|
161
163
|
#
|
162
164
|
# @return [LazyFrame]
|
163
165
|
def scan_parquet(
|
@@ -170,7 +172,8 @@ module Polars
|
|
170
172
|
row_count_name: nil,
|
171
173
|
row_count_offset: 0,
|
172
174
|
storage_options: nil,
|
173
|
-
low_memory: false
|
175
|
+
low_memory: false,
|
176
|
+
include_file_paths: nil
|
174
177
|
)
|
175
178
|
if Utils.pathlike?(source)
|
176
179
|
source = Utils.normalize_filepath(source)
|
@@ -186,7 +189,8 @@ module Polars
|
|
186
189
|
row_count_offset: row_count_offset,
|
187
190
|
storage_options: storage_options,
|
188
191
|
low_memory: low_memory,
|
189
|
-
glob: glob
|
192
|
+
glob: glob,
|
193
|
+
include_file_paths: include_file_paths
|
190
194
|
)
|
191
195
|
end
|
192
196
|
|
@@ -203,7 +207,8 @@ module Polars
|
|
203
207
|
low_memory: false,
|
204
208
|
use_statistics: true,
|
205
209
|
hive_partitioning: nil,
|
206
|
-
glob: true
|
210
|
+
glob: true,
|
211
|
+
include_file_paths: nil
|
207
212
|
)
|
208
213
|
rblf =
|
209
214
|
RbLazyFrame.new_from_parquet(
|
@@ -219,7 +224,8 @@ module Polars
|
|
219
224
|
hive_partitioning,
|
220
225
|
nil,
|
221
226
|
true,
|
222
|
-
glob
|
227
|
+
glob,
|
228
|
+
include_file_paths
|
223
229
|
)
|
224
230
|
Utils.wrap_ldf(rblf)
|
225
231
|
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -2526,11 +2526,15 @@ module Polars
|
|
2526
2526
|
value_name: nil,
|
2527
2527
|
streamable: true
|
2528
2528
|
)
|
2529
|
+
if !streamable
|
2530
|
+
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
|
+
end
|
2532
|
+
|
2529
2533
|
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
2530
2534
|
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
2531
2535
|
|
2532
2536
|
_from_rbldf(
|
2533
|
-
_ldf.unpivot(on, index, value_name, variable_name
|
2537
|
+
_ldf.unpivot(on, index, value_name, variable_name)
|
2534
2538
|
)
|
2535
2539
|
end
|
2536
2540
|
alias_method :melt, :unpivot
|
data/lib/polars/series.rb
CHANGED
@@ -1790,9 +1790,10 @@ module Polars
|
|
1790
1790
|
# Use this to swiftly assert a Series does not have null values.
|
1791
1791
|
#
|
1792
1792
|
# @return [Boolean]
|
1793
|
-
def
|
1794
|
-
_s.
|
1793
|
+
def has_nulls
|
1794
|
+
_s.has_nulls
|
1795
1795
|
end
|
1796
|
+
alias_method :has_validity, :has_nulls
|
1796
1797
|
|
1797
1798
|
# Check if the Series is empty.
|
1798
1799
|
#
|
data/lib/polars/string_expr.rb
CHANGED
@@ -792,15 +792,15 @@ module Polars
|
|
792
792
|
# df.select(Polars.col("json").str.json_decode(dtype))
|
793
793
|
# # =>
|
794
794
|
# # shape: (3, 1)
|
795
|
-
# #
|
796
|
-
# # │ json
|
797
|
-
# # │ ---
|
798
|
-
# # │ struct[2]
|
799
|
-
# #
|
800
|
-
# # │ {1,true}
|
801
|
-
# # │
|
802
|
-
# # │ {2,false}
|
803
|
-
# #
|
795
|
+
# # ┌───────────┐
|
796
|
+
# # │ json │
|
797
|
+
# # │ --- │
|
798
|
+
# # │ struct[2] │
|
799
|
+
# # ╞═══════════╡
|
800
|
+
# # │ {1,true} │
|
801
|
+
# # │ null │
|
802
|
+
# # │ {2,false} │
|
803
|
+
# # └───────────┘
|
804
804
|
def json_decode(dtype = nil, infer_schema_length: 100)
|
805
805
|
if !dtype.nil?
|
806
806
|
dtype = Utils.rb_type_to_dtype(dtype)
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- README.md
|
54
54
|
- ext/polars/Cargo.toml
|
55
55
|
- ext/polars/extconf.rb
|
56
|
+
- ext/polars/src/allocator.rs
|
56
57
|
- ext/polars/src/batched_csv.rs
|
57
58
|
- ext/polars/src/conversion/any_value.rs
|
58
59
|
- ext/polars/src/conversion/chunked_array.rs
|