polars-df 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +248 -269
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/allocator.rs +13 -0
- data/ext/polars/src/conversion/chunked_array.rs +5 -7
- data/ext/polars/src/conversion/mod.rs +0 -6
- data/ext/polars/src/dataframe/general.rs +1 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/expr/meta.rs +5 -9
- data/ext/polars/src/functions/eager.rs +1 -1
- data/ext/polars/src/lazyframe/mod.rs +27 -11
- data/ext/polars/src/lib.rs +5 -18
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +49 -99
- data/ext/polars/src/series/mod.rs +3 -3
- data/lib/polars/io/ipc.rb +32 -4
- data/lib/polars/io/parquet.rb +10 -4
- data/lib/polars/lazy_frame.rb +5 -1
- data/lib/polars/series.rb +3 -2
- data/lib/polars/string_expr.rb +9 -9
- data/lib/polars/version.rb +1 -1
- metadata +3 -2
data/ext/polars/Cargo.toml
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.13.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.80.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
@@ -15,14 +15,14 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.7"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.42.0"
|
19
|
+
polars-parquet = "=0.42.0"
|
20
|
+
polars-utils = "=0.42.0"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.42.0"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -117,5 +117,5 @@ features = [
|
|
117
117
|
[target.'cfg(target_os = "linux")'.dependencies]
|
118
118
|
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
119
119
|
|
120
|
-
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
120
|
+
[target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies]
|
121
121
|
mimalloc = { version = "0.1", default-features = false }
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#[cfg(target_os = "linux")]
|
2
|
+
use jemallocator::Jemalloc;
|
3
|
+
|
4
|
+
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
|
5
|
+
use mimalloc::MiMalloc;
|
6
|
+
|
7
|
+
#[global_allocator]
|
8
|
+
#[cfg(target_os = "linux")]
|
9
|
+
static ALLOC: Jemalloc = Jemalloc;
|
10
|
+
|
11
|
+
#[global_allocator]
|
12
|
+
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
|
13
|
+
static ALLOC: MiMalloc = MiMalloc;
|
@@ -56,17 +56,15 @@ impl IntoValue for Wrap<&BinaryChunked> {
|
|
56
56
|
}
|
57
57
|
|
58
58
|
impl IntoValue for Wrap<&StructChunked> {
|
59
|
-
fn into_value_with(self,
|
59
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
60
60
|
let s = self.0.clone().into_series();
|
61
61
|
// todo! iterate its chunks and flatten.
|
62
62
|
// make series::iter() accept a chunk index.
|
63
63
|
let s = s.rechunk();
|
64
|
-
let iter = s.iter().map(|av| {
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
unreachable!()
|
69
|
-
}
|
64
|
+
let iter = s.iter().map(|av| match av {
|
65
|
+
AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
67
|
+
_ => unreachable!(),
|
70
68
|
});
|
71
69
|
|
72
70
|
RArray::from_iter(iter).into_value()
|
@@ -26,12 +26,6 @@ use crate::object::OBJECT_NAME;
|
|
26
26
|
use crate::rb_modules::series;
|
27
27
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
28
28
|
|
29
|
-
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
30
|
-
// Safety:
|
31
|
-
// Wrap is transparent.
|
32
|
-
unsafe { std::mem::transmute(slice) }
|
33
|
-
}
|
34
|
-
|
35
29
|
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
36
30
|
// Safety:
|
37
31
|
// Wrap is transparent.
|
@@ -348,12 +348,11 @@ impl RbDataFrame {
|
|
348
348
|
value_name: Option<String>,
|
349
349
|
variable_name: Option<String>,
|
350
350
|
) -> RbResult<Self> {
|
351
|
-
let args =
|
351
|
+
let args = UnpivotArgsIR {
|
352
352
|
on: strings_to_smartstrings(on),
|
353
353
|
index: strings_to_smartstrings(index),
|
354
354
|
value_name: value_name.map(|s| s.into()),
|
355
355
|
variable_name: variable_name.map(|s| s.into()),
|
356
|
-
streamable: false,
|
357
356
|
};
|
358
357
|
|
359
358
|
let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
|
@@ -145,7 +145,7 @@ impl RbDataFrame {
|
|
145
145
|
.with_projection(projection)
|
146
146
|
.with_columns(columns)
|
147
147
|
.read_parallel(parallel.0)
|
148
|
-
.
|
148
|
+
.with_slice(n_rows.map(|x| (0, x)))
|
149
149
|
.with_row_index(row_index)
|
150
150
|
.set_low_memory(low_memory)
|
151
151
|
.use_statistics(use_statistics)
|
@@ -156,7 +156,7 @@ impl RbDataFrame {
|
|
156
156
|
.with_projection(projection)
|
157
157
|
.with_columns(columns)
|
158
158
|
.read_parallel(parallel.0)
|
159
|
-
.
|
159
|
+
.with_slice(n_rows.map(|x| (0, x)))
|
160
160
|
.with_row_index(row_index)
|
161
161
|
.use_statistics(use_statistics)
|
162
162
|
.set_rechunk(rechunk)
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -7,15 +7,11 @@ impl RbExpr {
|
|
7
7
|
self.inner == other.inner
|
8
8
|
}
|
9
9
|
|
10
|
-
pub fn meta_pop(&self) -> RArray {
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
.pop()
|
16
|
-
.into_iter()
|
17
|
-
.map(RbExpr::from),
|
18
|
-
)
|
10
|
+
pub fn meta_pop(&self) -> RbResult<RArray> {
|
11
|
+
let exprs = self.inner.clone().meta().pop().map_err(RbPolarsErr::from)?;
|
12
|
+
Ok(RArray::from_iter(
|
13
|
+
exprs.iter().map(|e| RbExpr::from(e.clone())),
|
14
|
+
))
|
19
15
|
}
|
20
16
|
|
21
17
|
pub fn meta_root_names(&self) -> Vec<String> {
|
@@ -64,6 +64,6 @@ pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
|
64
64
|
for item in seq.into_iter() {
|
65
65
|
dfs.push(get_df(item)?);
|
66
66
|
}
|
67
|
-
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
67
|
+
let df = functions::concat_df_horizontal(&dfs, true).map_err(RbPolarsErr::from)?;
|
68
68
|
Ok(df.into())
|
69
69
|
}
|
@@ -162,6 +162,7 @@ impl RbLazyFrame {
|
|
162
162
|
hive_schema: Option<Wrap<Schema>>,
|
163
163
|
try_parse_hive_dates: bool,
|
164
164
|
glob: bool,
|
165
|
+
include_file_paths: Option<String>,
|
165
166
|
) -> RbResult<Self> {
|
166
167
|
let parallel = parallel.0;
|
167
168
|
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
@@ -196,6 +197,7 @@ impl RbLazyFrame {
|
|
196
197
|
use_statistics,
|
197
198
|
hive_options,
|
198
199
|
glob,
|
200
|
+
include_file_paths: include_file_paths.map(Arc::from),
|
199
201
|
};
|
200
202
|
|
201
203
|
let lf = if path.is_some() {
|
@@ -207,6 +209,7 @@ impl RbLazyFrame {
|
|
207
209
|
Ok(lf.into())
|
208
210
|
}
|
209
211
|
|
212
|
+
#[allow(clippy::too_many_arguments)]
|
210
213
|
pub fn new_from_ipc(
|
211
214
|
path: String,
|
212
215
|
n_rows: Option<usize>,
|
@@ -214,12 +217,23 @@ impl RbLazyFrame {
|
|
214
217
|
rechunk: bool,
|
215
218
|
row_index: Option<(String, IdxSize)>,
|
216
219
|
memory_map: bool,
|
220
|
+
hive_partitioning: Option<bool>,
|
221
|
+
hive_schema: Option<Wrap<Schema>>,
|
222
|
+
try_parse_hive_dates: bool,
|
223
|
+
include_file_paths: Option<String>,
|
217
224
|
) -> RbResult<Self> {
|
218
225
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
219
226
|
name: Arc::from(name.as_str()),
|
220
227
|
offset,
|
221
228
|
});
|
222
229
|
|
230
|
+
let hive_options = HiveOptions {
|
231
|
+
enabled: hive_partitioning,
|
232
|
+
hive_start_idx: 0,
|
233
|
+
schema: hive_schema.map(|x| Arc::new(x.0)),
|
234
|
+
try_parse_dates: try_parse_hive_dates,
|
235
|
+
};
|
236
|
+
|
223
237
|
let args = ScanArgsIpc {
|
224
238
|
n_rows,
|
225
239
|
cache,
|
@@ -227,6 +241,8 @@ impl RbLazyFrame {
|
|
227
241
|
row_index,
|
228
242
|
memory_map,
|
229
243
|
cloud_options: None,
|
244
|
+
hive_options,
|
245
|
+
include_file_paths: include_file_paths.map(Arc::from),
|
230
246
|
};
|
231
247
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
232
248
|
Ok(lf.into())
|
@@ -348,7 +364,7 @@ impl RbLazyFrame {
|
|
348
364
|
compression_level: Option<i32>,
|
349
365
|
statistics: Wrap<StatisticsOptions>,
|
350
366
|
row_group_size: Option<usize>,
|
351
|
-
|
367
|
+
data_page_size: Option<usize>,
|
352
368
|
maintain_order: bool,
|
353
369
|
) -> RbResult<()> {
|
354
370
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
@@ -357,7 +373,7 @@ impl RbLazyFrame {
|
|
357
373
|
compression,
|
358
374
|
statistics: statistics.0,
|
359
375
|
row_group_size,
|
360
|
-
|
376
|
+
data_page_size,
|
361
377
|
maintain_order,
|
362
378
|
};
|
363
379
|
|
@@ -752,22 +768,22 @@ impl RbLazyFrame {
|
|
752
768
|
|
753
769
|
pub fn unpivot(
|
754
770
|
&self,
|
755
|
-
on:
|
756
|
-
index:
|
771
|
+
on: RArray,
|
772
|
+
index: RArray,
|
757
773
|
value_name: Option<String>,
|
758
774
|
variable_name: Option<String>,
|
759
|
-
|
760
|
-
|
761
|
-
let
|
762
|
-
|
763
|
-
|
775
|
+
) -> RbResult<Self> {
|
776
|
+
let on = rb_exprs_to_exprs(on)?;
|
777
|
+
let index = rb_exprs_to_exprs(index)?;
|
778
|
+
let args = UnpivotArgsDSL {
|
779
|
+
on: on.into_iter().map(|e| e.into()).collect(),
|
780
|
+
index: index.into_iter().map(|e| e.into()).collect(),
|
764
781
|
value_name: value_name.map(|s| s.into()),
|
765
782
|
variable_name: variable_name.map(|s| s.into()),
|
766
|
-
streamable,
|
767
783
|
};
|
768
784
|
|
769
785
|
let ldf = self.ldf.borrow().clone();
|
770
|
-
ldf.unpivot(args).into()
|
786
|
+
Ok(ldf.unpivot(args).into())
|
771
787
|
}
|
772
788
|
|
773
789
|
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod allocator;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
@@ -31,20 +32,6 @@ use magnus::{define_module, function, method, prelude::*, Error, Ruby};
|
|
31
32
|
use series::RbSeries;
|
32
33
|
use sql::RbSQLContext;
|
33
34
|
|
34
|
-
#[cfg(target_os = "linux")]
|
35
|
-
use jemallocator::Jemalloc;
|
36
|
-
|
37
|
-
#[cfg(not(target_os = "linux"))]
|
38
|
-
use mimalloc::MiMalloc;
|
39
|
-
|
40
|
-
#[global_allocator]
|
41
|
-
#[cfg(target_os = "linux")]
|
42
|
-
static GLOBAL: Jemalloc = Jemalloc;
|
43
|
-
|
44
|
-
#[global_allocator]
|
45
|
-
#[cfg(not(target_os = "linux"))]
|
46
|
-
static GLOBAL: MiMalloc = MiMalloc;
|
47
|
-
|
48
35
|
type RbResult<T> = Result<T, Error>;
|
49
36
|
|
50
37
|
#[magnus::init]
|
@@ -722,9 +709,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
722
709
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
723
710
|
class.define_singleton_method(
|
724
711
|
"new_from_parquet",
|
725
|
-
function!(RbLazyFrame::new_from_parquet,
|
712
|
+
function!(RbLazyFrame::new_from_parquet, 14),
|
726
713
|
)?;
|
727
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
714
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
728
715
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
729
716
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
730
717
|
class.define_method(
|
@@ -780,7 +767,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
780
767
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
781
768
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
782
769
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
783
|
-
class.define_method("unpivot", method!(RbLazyFrame::unpivot,
|
770
|
+
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
784
771
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
785
772
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
786
773
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
@@ -878,7 +865,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
878
865
|
class.define_method("arg_max", method!(RbSeries::arg_max, 0))?;
|
879
866
|
class.define_method("take_with_series", method!(RbSeries::take_with_series, 1))?;
|
880
867
|
class.define_method("null_count", method!(RbSeries::null_count, 0))?;
|
881
|
-
class.define_method("
|
868
|
+
class.define_method("has_nulls", method!(RbSeries::has_nulls, 0))?;
|
882
869
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
883
870
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
884
871
|
class.define_method("equals", method!(RbSeries::equals, 4))?;
|