polars-df 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +248 -269
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/allocator.rs +13 -0
- data/ext/polars/src/conversion/chunked_array.rs +5 -7
- data/ext/polars/src/conversion/mod.rs +0 -6
- data/ext/polars/src/dataframe/general.rs +1 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/expr/meta.rs +5 -9
- data/ext/polars/src/functions/eager.rs +1 -1
- data/ext/polars/src/lazyframe/mod.rs +27 -11
- data/ext/polars/src/lib.rs +5 -18
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +49 -99
- data/ext/polars/src/series/mod.rs +3 -3
- data/lib/polars/io/ipc.rb +32 -4
- data/lib/polars/io/parquet.rb +10 -4
- data/lib/polars/lazy_frame.rb +5 -1
- data/lib/polars/series.rb +3 -2
- data/lib/polars/string_expr.rb +9 -9
- data/lib/polars/version.rb +1 -1
- metadata +3 -2
data/ext/polars/Cargo.toml
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.13.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.80.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
@@ -15,14 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.7"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.42.0"
|
19
|
+
polars-parquet = "=0.42.0"
|
20
|
+
polars-utils = "=0.42.0"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.42.0"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -117,5 +117,5 @@ features = [
|
|
117
117
|
[target.'cfg(target_os = "linux")'.dependencies]
|
118
118
|
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
119
119
|
|
120
|
-
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
120
|
+
[target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies]
|
121
121
|
mimalloc = { version = "0.1", default-features = false }
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#[cfg(target_os = "linux")]
|
2
|
+
use jemallocator::Jemalloc;
|
3
|
+
|
4
|
+
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
|
5
|
+
use mimalloc::MiMalloc;
|
6
|
+
|
7
|
+
#[global_allocator]
|
8
|
+
#[cfg(target_os = "linux")]
|
9
|
+
static ALLOC: Jemalloc = Jemalloc;
|
10
|
+
|
11
|
+
#[global_allocator]
|
12
|
+
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
|
13
|
+
static ALLOC: MiMalloc = MiMalloc;
|
@@ -56,17 +56,15 @@ impl IntoValue for Wrap<&BinaryChunked> {
|
|
56
56
|
}
|
57
57
|
|
58
58
|
impl IntoValue for Wrap<&StructChunked> {
|
59
|
-
fn into_value_with(self,
|
59
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
60
60
|
let s = self.0.clone().into_series();
|
61
61
|
// todo! iterate its chunks and flatten.
|
62
62
|
// make series::iter() accept a chunk index.
|
63
63
|
let s = s.rechunk();
|
64
|
-
let iter = s.iter().map(|av| {
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
unreachable!()
|
69
|
-
}
|
64
|
+
let iter = s.iter().map(|av| match av {
|
65
|
+
AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
67
|
+
_ => unreachable!(),
|
70
68
|
});
|
71
69
|
|
72
70
|
RArray::from_iter(iter).into_value()
|
@@ -26,12 +26,6 @@ use crate::object::OBJECT_NAME;
|
|
26
26
|
use crate::rb_modules::series;
|
27
27
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
28
28
|
|
29
|
-
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
30
|
-
// Safety:
|
31
|
-
// Wrap is transparent.
|
32
|
-
unsafe { std::mem::transmute(slice) }
|
33
|
-
}
|
34
|
-
|
35
29
|
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
36
30
|
// Safety:
|
37
31
|
// Wrap is transparent.
|
@@ -348,12 +348,11 @@ impl RbDataFrame {
|
|
348
348
|
value_name: Option<String>,
|
349
349
|
variable_name: Option<String>,
|
350
350
|
) -> RbResult<Self> {
|
351
|
-
let args =
|
351
|
+
let args = UnpivotArgsIR {
|
352
352
|
on: strings_to_smartstrings(on),
|
353
353
|
index: strings_to_smartstrings(index),
|
354
354
|
value_name: value_name.map(|s| s.into()),
|
355
355
|
variable_name: variable_name.map(|s| s.into()),
|
356
|
-
streamable: false,
|
357
356
|
};
|
358
357
|
|
359
358
|
let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
|
@@ -145,7 +145,7 @@ impl RbDataFrame {
|
|
145
145
|
.with_projection(projection)
|
146
146
|
.with_columns(columns)
|
147
147
|
.read_parallel(parallel.0)
|
148
|
-
.
|
148
|
+
.with_slice(n_rows.map(|x| (0, x)))
|
149
149
|
.with_row_index(row_index)
|
150
150
|
.set_low_memory(low_memory)
|
151
151
|
.use_statistics(use_statistics)
|
@@ -156,7 +156,7 @@ impl RbDataFrame {
|
|
156
156
|
.with_projection(projection)
|
157
157
|
.with_columns(columns)
|
158
158
|
.read_parallel(parallel.0)
|
159
|
-
.
|
159
|
+
.with_slice(n_rows.map(|x| (0, x)))
|
160
160
|
.with_row_index(row_index)
|
161
161
|
.use_statistics(use_statistics)
|
162
162
|
.set_rechunk(rechunk)
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -7,15 +7,11 @@ impl RbExpr {
|
|
7
7
|
self.inner == other.inner
|
8
8
|
}
|
9
9
|
|
10
|
-
pub fn meta_pop(&self) -> RArray {
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
.pop()
|
16
|
-
.into_iter()
|
17
|
-
.map(RbExpr::from),
|
18
|
-
)
|
10
|
+
pub fn meta_pop(&self) -> RbResult<RArray> {
|
11
|
+
let exprs = self.inner.clone().meta().pop().map_err(RbPolarsErr::from)?;
|
12
|
+
Ok(RArray::from_iter(
|
13
|
+
exprs.iter().map(|e| RbExpr::from(e.clone())),
|
14
|
+
))
|
19
15
|
}
|
20
16
|
|
21
17
|
pub fn meta_root_names(&self) -> Vec<String> {
|
@@ -64,6 +64,6 @@ pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
|
64
64
|
for item in seq.into_iter() {
|
65
65
|
dfs.push(get_df(item)?);
|
66
66
|
}
|
67
|
-
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
67
|
+
let df = functions::concat_df_horizontal(&dfs, true).map_err(RbPolarsErr::from)?;
|
68
68
|
Ok(df.into())
|
69
69
|
}
|
@@ -162,6 +162,7 @@ impl RbLazyFrame {
|
|
162
162
|
hive_schema: Option<Wrap<Schema>>,
|
163
163
|
try_parse_hive_dates: bool,
|
164
164
|
glob: bool,
|
165
|
+
include_file_paths: Option<String>,
|
165
166
|
) -> RbResult<Self> {
|
166
167
|
let parallel = parallel.0;
|
167
168
|
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
@@ -196,6 +197,7 @@ impl RbLazyFrame {
|
|
196
197
|
use_statistics,
|
197
198
|
hive_options,
|
198
199
|
glob,
|
200
|
+
include_file_paths: include_file_paths.map(Arc::from),
|
199
201
|
};
|
200
202
|
|
201
203
|
let lf = if path.is_some() {
|
@@ -207,6 +209,7 @@ impl RbLazyFrame {
|
|
207
209
|
Ok(lf.into())
|
208
210
|
}
|
209
211
|
|
212
|
+
#[allow(clippy::too_many_arguments)]
|
210
213
|
pub fn new_from_ipc(
|
211
214
|
path: String,
|
212
215
|
n_rows: Option<usize>,
|
@@ -214,12 +217,23 @@ impl RbLazyFrame {
|
|
214
217
|
rechunk: bool,
|
215
218
|
row_index: Option<(String, IdxSize)>,
|
216
219
|
memory_map: bool,
|
220
|
+
hive_partitioning: Option<bool>,
|
221
|
+
hive_schema: Option<Wrap<Schema>>,
|
222
|
+
try_parse_hive_dates: bool,
|
223
|
+
include_file_paths: Option<String>,
|
217
224
|
) -> RbResult<Self> {
|
218
225
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
219
226
|
name: Arc::from(name.as_str()),
|
220
227
|
offset,
|
221
228
|
});
|
222
229
|
|
230
|
+
let hive_options = HiveOptions {
|
231
|
+
enabled: hive_partitioning,
|
232
|
+
hive_start_idx: 0,
|
233
|
+
schema: hive_schema.map(|x| Arc::new(x.0)),
|
234
|
+
try_parse_dates: try_parse_hive_dates,
|
235
|
+
};
|
236
|
+
|
223
237
|
let args = ScanArgsIpc {
|
224
238
|
n_rows,
|
225
239
|
cache,
|
@@ -227,6 +241,8 @@ impl RbLazyFrame {
|
|
227
241
|
row_index,
|
228
242
|
memory_map,
|
229
243
|
cloud_options: None,
|
244
|
+
hive_options,
|
245
|
+
include_file_paths: include_file_paths.map(Arc::from),
|
230
246
|
};
|
231
247
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
232
248
|
Ok(lf.into())
|
@@ -348,7 +364,7 @@ impl RbLazyFrame {
|
|
348
364
|
compression_level: Option<i32>,
|
349
365
|
statistics: Wrap<StatisticsOptions>,
|
350
366
|
row_group_size: Option<usize>,
|
351
|
-
|
367
|
+
data_page_size: Option<usize>,
|
352
368
|
maintain_order: bool,
|
353
369
|
) -> RbResult<()> {
|
354
370
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
@@ -357,7 +373,7 @@ impl RbLazyFrame {
|
|
357
373
|
compression,
|
358
374
|
statistics: statistics.0,
|
359
375
|
row_group_size,
|
360
|
-
|
376
|
+
data_page_size,
|
361
377
|
maintain_order,
|
362
378
|
};
|
363
379
|
|
@@ -752,22 +768,22 @@ impl RbLazyFrame {
|
|
752
768
|
|
753
769
|
pub fn unpivot(
|
754
770
|
&self,
|
755
|
-
on:
|
756
|
-
index:
|
771
|
+
on: RArray,
|
772
|
+
index: RArray,
|
757
773
|
value_name: Option<String>,
|
758
774
|
variable_name: Option<String>,
|
759
|
-
|
760
|
-
|
761
|
-
let
|
762
|
-
|
763
|
-
|
775
|
+
) -> RbResult<Self> {
|
776
|
+
let on = rb_exprs_to_exprs(on)?;
|
777
|
+
let index = rb_exprs_to_exprs(index)?;
|
778
|
+
let args = UnpivotArgsDSL {
|
779
|
+
on: on.into_iter().map(|e| e.into()).collect(),
|
780
|
+
index: index.into_iter().map(|e| e.into()).collect(),
|
764
781
|
value_name: value_name.map(|s| s.into()),
|
765
782
|
variable_name: variable_name.map(|s| s.into()),
|
766
|
-
streamable,
|
767
783
|
};
|
768
784
|
|
769
785
|
let ldf = self.ldf.borrow().clone();
|
770
|
-
ldf.unpivot(args).into()
|
786
|
+
Ok(ldf.unpivot(args).into())
|
771
787
|
}
|
772
788
|
|
773
789
|
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod allocator;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
@@ -31,20 +32,6 @@ use magnus::{define_module, function, method, prelude::*, Error, Ruby};
|
|
31
32
|
use series::RbSeries;
|
32
33
|
use sql::RbSQLContext;
|
33
34
|
|
34
|
-
#[cfg(target_os = "linux")]
|
35
|
-
use jemallocator::Jemalloc;
|
36
|
-
|
37
|
-
#[cfg(not(target_os = "linux"))]
|
38
|
-
use mimalloc::MiMalloc;
|
39
|
-
|
40
|
-
#[global_allocator]
|
41
|
-
#[cfg(target_os = "linux")]
|
42
|
-
static GLOBAL: Jemalloc = Jemalloc;
|
43
|
-
|
44
|
-
#[global_allocator]
|
45
|
-
#[cfg(not(target_os = "linux"))]
|
46
|
-
static GLOBAL: MiMalloc = MiMalloc;
|
47
|
-
|
48
35
|
type RbResult<T> = Result<T, Error>;
|
49
36
|
|
50
37
|
#[magnus::init]
|
@@ -722,9 +709,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
722
709
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
723
710
|
class.define_singleton_method(
|
724
711
|
"new_from_parquet",
|
725
|
-
function!(RbLazyFrame::new_from_parquet,
|
712
|
+
function!(RbLazyFrame::new_from_parquet, 14),
|
726
713
|
)?;
|
727
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
714
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
728
715
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
729
716
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
730
717
|
class.define_method(
|
@@ -780,7 +767,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
780
767
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
781
768
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
782
769
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
783
|
-
class.define_method("unpivot", method!(RbLazyFrame::unpivot,
|
770
|
+
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
784
771
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
785
772
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
786
773
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
@@ -878,7 +865,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
878
865
|
class.define_method("arg_max", method!(RbSeries::arg_max, 0))?;
|
879
866
|
class.define_method("take_with_series", method!(RbSeries::take_with_series, 1))?;
|
880
867
|
class.define_method("null_count", method!(RbSeries::null_count, 0))?;
|
881
|
-
class.define_method("
|
868
|
+
class.define_method("has_nulls", method!(RbSeries::has_nulls, 0))?;
|
882
869
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
883
870
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
884
871
|
class.define_method("equals", method!(RbSeries::equals, 4))?;
|