polars-df 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +22 -17
- data/ext/polars/src/expr/list.rs +21 -2
- data/ext/polars/src/expr/meta.rs +0 -34
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -6
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +74 -112
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +98 -20
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
@@ -1,10 +1,8 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{Error, IntoValue, Value, exception};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars::series::IsSorted;
|
4
4
|
|
5
|
-
use crate::apply_method_all_arrow_series2;
|
6
5
|
use crate::conversion::*;
|
7
|
-
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
8
6
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
|
9
7
|
|
10
8
|
impl RbSeries {
|
@@ -42,21 +40,15 @@ impl RbSeries {
|
|
42
40
|
}
|
43
41
|
|
44
42
|
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
45
|
-
|
46
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
47
|
-
Ok(ca.uses_lexical_ordering())
|
43
|
+
Ok(true)
|
48
44
|
}
|
49
45
|
|
50
46
|
pub fn cat_is_local(&self) -> RbResult<bool> {
|
51
|
-
|
52
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
53
|
-
Ok(ca.get_rev_map().is_local())
|
47
|
+
Ok(false)
|
54
48
|
}
|
55
49
|
|
56
50
|
pub fn cat_to_local(&self) -> RbResult<Self> {
|
57
|
-
|
58
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
59
|
-
Ok(ca.to_local().into_series().into())
|
51
|
+
Ok(self.clone())
|
60
52
|
}
|
61
53
|
|
62
54
|
pub fn estimated_size(&self) -> usize {
|
@@ -75,7 +67,7 @@ impl RbSeries {
|
|
75
67
|
if val == v_trunc {
|
76
68
|
val
|
77
69
|
} else {
|
78
|
-
format!("{}…"
|
70
|
+
format!("{v_trunc}…")
|
79
71
|
}
|
80
72
|
} else {
|
81
73
|
val
|
@@ -305,198 +297,6 @@ impl RbSeries {
|
|
305
297
|
RbSeries::new(self.series.borrow().clone())
|
306
298
|
}
|
307
299
|
|
308
|
-
pub fn apply_lambda(
|
309
|
-
&self,
|
310
|
-
lambda: Value,
|
311
|
-
output_type: Option<Wrap<DataType>>,
|
312
|
-
skip_nulls: bool,
|
313
|
-
) -> RbResult<Self> {
|
314
|
-
let series = &self.series.borrow();
|
315
|
-
|
316
|
-
let output_type = output_type.map(|dt| dt.0);
|
317
|
-
|
318
|
-
macro_rules! dispatch_apply {
|
319
|
-
($self:expr, $method:ident, $($args:expr),*) => {
|
320
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
321
|
-
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
322
|
-
// ca.$method($($args),*)
|
323
|
-
todo!()
|
324
|
-
} else {
|
325
|
-
apply_method_all_arrow_series2!(
|
326
|
-
$self,
|
327
|
-
$method,
|
328
|
-
$($args),*
|
329
|
-
)
|
330
|
-
}
|
331
|
-
|
332
|
-
}
|
333
|
-
|
334
|
-
}
|
335
|
-
|
336
|
-
if matches!(
|
337
|
-
series.dtype(),
|
338
|
-
DataType::Datetime(_, _)
|
339
|
-
| DataType::Date
|
340
|
-
| DataType::Duration(_)
|
341
|
-
| DataType::Categorical(_, _)
|
342
|
-
| DataType::Time
|
343
|
-
) || !skip_nulls
|
344
|
-
{
|
345
|
-
let mut avs = Vec::with_capacity(series.len());
|
346
|
-
let iter = series.iter().map(|av| {
|
347
|
-
let input = Wrap(av);
|
348
|
-
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
349
|
-
.unwrap()
|
350
|
-
.0
|
351
|
-
});
|
352
|
-
avs.extend(iter);
|
353
|
-
return Ok(Series::new(self.name().into(), &avs).into());
|
354
|
-
}
|
355
|
-
|
356
|
-
let out = match output_type {
|
357
|
-
Some(DataType::Int8) => {
|
358
|
-
let ca: Int8Chunked = dispatch_apply!(
|
359
|
-
series,
|
360
|
-
apply_lambda_with_primitive_out_type,
|
361
|
-
lambda,
|
362
|
-
0,
|
363
|
-
None
|
364
|
-
)?;
|
365
|
-
ca.into_series()
|
366
|
-
}
|
367
|
-
Some(DataType::Int16) => {
|
368
|
-
let ca: Int16Chunked = dispatch_apply!(
|
369
|
-
series,
|
370
|
-
apply_lambda_with_primitive_out_type,
|
371
|
-
lambda,
|
372
|
-
0,
|
373
|
-
None
|
374
|
-
)?;
|
375
|
-
ca.into_series()
|
376
|
-
}
|
377
|
-
Some(DataType::Int32) => {
|
378
|
-
let ca: Int32Chunked = dispatch_apply!(
|
379
|
-
series,
|
380
|
-
apply_lambda_with_primitive_out_type,
|
381
|
-
lambda,
|
382
|
-
0,
|
383
|
-
None
|
384
|
-
)?;
|
385
|
-
ca.into_series()
|
386
|
-
}
|
387
|
-
Some(DataType::Int64) => {
|
388
|
-
let ca: Int64Chunked = dispatch_apply!(
|
389
|
-
series,
|
390
|
-
apply_lambda_with_primitive_out_type,
|
391
|
-
lambda,
|
392
|
-
0,
|
393
|
-
None
|
394
|
-
)?;
|
395
|
-
ca.into_series()
|
396
|
-
}
|
397
|
-
Some(DataType::UInt8) => {
|
398
|
-
let ca: UInt8Chunked = dispatch_apply!(
|
399
|
-
series,
|
400
|
-
apply_lambda_with_primitive_out_type,
|
401
|
-
lambda,
|
402
|
-
0,
|
403
|
-
None
|
404
|
-
)?;
|
405
|
-
ca.into_series()
|
406
|
-
}
|
407
|
-
Some(DataType::UInt16) => {
|
408
|
-
let ca: UInt16Chunked = dispatch_apply!(
|
409
|
-
series,
|
410
|
-
apply_lambda_with_primitive_out_type,
|
411
|
-
lambda,
|
412
|
-
0,
|
413
|
-
None
|
414
|
-
)?;
|
415
|
-
ca.into_series()
|
416
|
-
}
|
417
|
-
Some(DataType::UInt32) => {
|
418
|
-
let ca: UInt32Chunked = dispatch_apply!(
|
419
|
-
series,
|
420
|
-
apply_lambda_with_primitive_out_type,
|
421
|
-
lambda,
|
422
|
-
0,
|
423
|
-
None
|
424
|
-
)?;
|
425
|
-
ca.into_series()
|
426
|
-
}
|
427
|
-
Some(DataType::UInt64) => {
|
428
|
-
let ca: UInt64Chunked = dispatch_apply!(
|
429
|
-
series,
|
430
|
-
apply_lambda_with_primitive_out_type,
|
431
|
-
lambda,
|
432
|
-
0,
|
433
|
-
None
|
434
|
-
)?;
|
435
|
-
ca.into_series()
|
436
|
-
}
|
437
|
-
Some(DataType::Float32) => {
|
438
|
-
let ca: Float32Chunked = dispatch_apply!(
|
439
|
-
series,
|
440
|
-
apply_lambda_with_primitive_out_type,
|
441
|
-
lambda,
|
442
|
-
0,
|
443
|
-
None
|
444
|
-
)?;
|
445
|
-
ca.into_series()
|
446
|
-
}
|
447
|
-
Some(DataType::Float64) => {
|
448
|
-
let ca: Float64Chunked = dispatch_apply!(
|
449
|
-
series,
|
450
|
-
apply_lambda_with_primitive_out_type,
|
451
|
-
lambda,
|
452
|
-
0,
|
453
|
-
None
|
454
|
-
)?;
|
455
|
-
ca.into_series()
|
456
|
-
}
|
457
|
-
Some(DataType::Boolean) => {
|
458
|
-
let ca: BooleanChunked =
|
459
|
-
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
460
|
-
ca.into_series()
|
461
|
-
}
|
462
|
-
Some(DataType::Date) => {
|
463
|
-
let ca: Int32Chunked = dispatch_apply!(
|
464
|
-
series,
|
465
|
-
apply_lambda_with_primitive_out_type,
|
466
|
-
lambda,
|
467
|
-
0,
|
468
|
-
None
|
469
|
-
)?;
|
470
|
-
ca.into_date().into_series()
|
471
|
-
}
|
472
|
-
Some(DataType::Datetime(tu, tz)) => {
|
473
|
-
let ca: Int64Chunked = dispatch_apply!(
|
474
|
-
series,
|
475
|
-
apply_lambda_with_primitive_out_type,
|
476
|
-
lambda,
|
477
|
-
0,
|
478
|
-
None
|
479
|
-
)?;
|
480
|
-
ca.into_datetime(tu, tz).into_series()
|
481
|
-
}
|
482
|
-
Some(DataType::String) => {
|
483
|
-
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
484
|
-
|
485
|
-
ca.into_series()
|
486
|
-
}
|
487
|
-
Some(DataType::Object(_)) => {
|
488
|
-
let ca =
|
489
|
-
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
490
|
-
ca.into_series()
|
491
|
-
}
|
492
|
-
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
493
|
-
|
494
|
-
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
495
|
-
};
|
496
|
-
|
497
|
-
Ok(RbSeries::new(out))
|
498
|
-
}
|
499
|
-
|
500
300
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
501
301
|
let binding = mask.series.borrow();
|
502
302
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -508,11 +308,16 @@ impl RbSeries {
|
|
508
308
|
Ok(RbSeries::new(s))
|
509
309
|
}
|
510
310
|
|
511
|
-
pub fn to_dummies(
|
311
|
+
pub fn to_dummies(
|
312
|
+
&self,
|
313
|
+
sep: Option<String>,
|
314
|
+
drop_first: bool,
|
315
|
+
drop_nulls: bool,
|
316
|
+
) -> RbResult<RbDataFrame> {
|
512
317
|
let df = self
|
513
318
|
.series
|
514
319
|
.borrow()
|
515
|
-
.to_dummies(sep.as_deref(), drop_first)
|
320
|
+
.to_dummies(sep.as_deref(), drop_first, drop_nulls)
|
516
321
|
.map_err(RbPolarsErr::from)?;
|
517
322
|
Ok(df.into())
|
518
323
|
}
|
@@ -1,13 +1,13 @@
|
|
1
1
|
use arrow::array::Array;
|
2
2
|
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
3
|
-
use magnus::prelude::*;
|
4
3
|
use magnus::Value;
|
4
|
+
use magnus::prelude::*;
|
5
5
|
use polars::prelude::*;
|
6
6
|
|
7
7
|
use super::RbSeries;
|
8
8
|
|
9
|
-
use crate::exceptions::RbValueError;
|
10
9
|
use crate::RbResult;
|
10
|
+
use crate::exceptions::RbValueError;
|
11
11
|
|
12
12
|
/// Import `arrow_c_stream` across Ruby boundary.
|
13
13
|
fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
|
@@ -0,0 +1,227 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
|
3
|
+
use super::RbSeries;
|
4
|
+
use crate::map::check_nested_object;
|
5
|
+
use crate::map::series::{ApplyLambda, call_lambda_and_extract};
|
6
|
+
use crate::prelude::*;
|
7
|
+
use crate::{RbPolarsErr, RbResult};
|
8
|
+
use crate::{apply_method_all_arrow_series2, raise_err};
|
9
|
+
|
10
|
+
impl RbSeries {
|
11
|
+
pub fn map_elements(
|
12
|
+
&self,
|
13
|
+
function: Value,
|
14
|
+
return_dtype: Option<Wrap<DataType>>,
|
15
|
+
skip_nulls: bool,
|
16
|
+
) -> RbResult<Self> {
|
17
|
+
let series = &self.series.borrow();
|
18
|
+
|
19
|
+
if return_dtype.is_none() {
|
20
|
+
polars_warn!(
|
21
|
+
MapWithoutReturnDtypeWarning,
|
22
|
+
"Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. \
|
23
|
+
Specify `return_dtype` to silence this warning."
|
24
|
+
)
|
25
|
+
}
|
26
|
+
|
27
|
+
if skip_nulls && (series.null_count() == series.len()) {
|
28
|
+
if let Some(return_dtype) = return_dtype {
|
29
|
+
return Ok(
|
30
|
+
Series::full_null(series.name().clone(), series.len(), &return_dtype.0).into(),
|
31
|
+
);
|
32
|
+
}
|
33
|
+
let msg = "The output type of the 'map_elements' function cannot be determined.\n\
|
34
|
+
The function was never called because 'skip_nulls: true' and all values are null.\n\
|
35
|
+
Consider setting 'skip_nulls: false' or setting the 'return_dtype'.";
|
36
|
+
raise_err!(msg, ComputeError)
|
37
|
+
}
|
38
|
+
|
39
|
+
let return_dtype = return_dtype.map(|dt| dt.0);
|
40
|
+
|
41
|
+
macro_rules! dispatch_apply {
|
42
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
43
|
+
match $self.dtype() {
|
44
|
+
DataType::Object(_) => {
|
45
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
46
|
+
// ca.$method($($args),*)
|
47
|
+
todo!()
|
48
|
+
}
|
49
|
+
_ => {
|
50
|
+
apply_method_all_arrow_series2!(
|
51
|
+
$self,
|
52
|
+
$method,
|
53
|
+
$($args),*
|
54
|
+
)
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
}
|
60
|
+
|
61
|
+
if matches!(
|
62
|
+
series.dtype(),
|
63
|
+
DataType::Datetime(_, _)
|
64
|
+
| DataType::Date
|
65
|
+
| DataType::Duration(_)
|
66
|
+
| DataType::Categorical(_, _)
|
67
|
+
| DataType::Enum(_, _)
|
68
|
+
| DataType::Binary
|
69
|
+
| DataType::Array(_, _)
|
70
|
+
| DataType::Time
|
71
|
+
| DataType::Decimal(_, _)
|
72
|
+
) || !skip_nulls
|
73
|
+
{
|
74
|
+
let mut avs = Vec::with_capacity(series.len());
|
75
|
+
let s = series.rechunk();
|
76
|
+
|
77
|
+
for av in s.iter() {
|
78
|
+
let out = match (skip_nulls, av) {
|
79
|
+
(true, AnyValue::Null) => AnyValue::Null,
|
80
|
+
(_, av) => {
|
81
|
+
let av: Option<Wrap<AnyValue>> =
|
82
|
+
call_lambda_and_extract(function, Wrap(av))?;
|
83
|
+
match av {
|
84
|
+
None => AnyValue::Null,
|
85
|
+
Some(av) => av.0,
|
86
|
+
}
|
87
|
+
}
|
88
|
+
};
|
89
|
+
avs.push(out)
|
90
|
+
}
|
91
|
+
let out = Series::new(series.name().clone(), &avs);
|
92
|
+
let dtype = out.dtype();
|
93
|
+
if dtype.is_nested() {
|
94
|
+
check_nested_object(dtype)?;
|
95
|
+
}
|
96
|
+
|
97
|
+
return Ok(out.into());
|
98
|
+
}
|
99
|
+
|
100
|
+
let out = match return_dtype {
|
101
|
+
Some(DataType::Int8) => {
|
102
|
+
let ca: Int8Chunked = dispatch_apply!(
|
103
|
+
series,
|
104
|
+
apply_lambda_with_primitive_out_type,
|
105
|
+
function,
|
106
|
+
0,
|
107
|
+
None
|
108
|
+
)?;
|
109
|
+
ca.into_series()
|
110
|
+
}
|
111
|
+
Some(DataType::Int16) => {
|
112
|
+
let ca: Int16Chunked = dispatch_apply!(
|
113
|
+
series,
|
114
|
+
apply_lambda_with_primitive_out_type,
|
115
|
+
function,
|
116
|
+
0,
|
117
|
+
None
|
118
|
+
)?;
|
119
|
+
ca.into_series()
|
120
|
+
}
|
121
|
+
Some(DataType::Int32) => {
|
122
|
+
let ca: Int32Chunked = dispatch_apply!(
|
123
|
+
series,
|
124
|
+
apply_lambda_with_primitive_out_type,
|
125
|
+
function,
|
126
|
+
0,
|
127
|
+
None
|
128
|
+
)?;
|
129
|
+
ca.into_series()
|
130
|
+
}
|
131
|
+
Some(DataType::Int64) => {
|
132
|
+
let ca: Int64Chunked = dispatch_apply!(
|
133
|
+
series,
|
134
|
+
apply_lambda_with_primitive_out_type,
|
135
|
+
function,
|
136
|
+
0,
|
137
|
+
None
|
138
|
+
)?;
|
139
|
+
ca.into_series()
|
140
|
+
}
|
141
|
+
Some(DataType::UInt8) => {
|
142
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
143
|
+
series,
|
144
|
+
apply_lambda_with_primitive_out_type,
|
145
|
+
function,
|
146
|
+
0,
|
147
|
+
None
|
148
|
+
)?;
|
149
|
+
ca.into_series()
|
150
|
+
}
|
151
|
+
Some(DataType::UInt16) => {
|
152
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
153
|
+
series,
|
154
|
+
apply_lambda_with_primitive_out_type,
|
155
|
+
function,
|
156
|
+
0,
|
157
|
+
None
|
158
|
+
)?;
|
159
|
+
ca.into_series()
|
160
|
+
}
|
161
|
+
Some(DataType::UInt32) => {
|
162
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
163
|
+
series,
|
164
|
+
apply_lambda_with_primitive_out_type,
|
165
|
+
function,
|
166
|
+
0,
|
167
|
+
None
|
168
|
+
)?;
|
169
|
+
ca.into_series()
|
170
|
+
}
|
171
|
+
Some(DataType::UInt64) => {
|
172
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
173
|
+
series,
|
174
|
+
apply_lambda_with_primitive_out_type,
|
175
|
+
function,
|
176
|
+
0,
|
177
|
+
None
|
178
|
+
)?;
|
179
|
+
ca.into_series()
|
180
|
+
}
|
181
|
+
Some(DataType::Float32) => {
|
182
|
+
let ca: Float32Chunked = dispatch_apply!(
|
183
|
+
series,
|
184
|
+
apply_lambda_with_primitive_out_type,
|
185
|
+
function,
|
186
|
+
0,
|
187
|
+
None
|
188
|
+
)?;
|
189
|
+
ca.into_series()
|
190
|
+
}
|
191
|
+
Some(DataType::Float64) => {
|
192
|
+
let ca: Float64Chunked = dispatch_apply!(
|
193
|
+
series,
|
194
|
+
apply_lambda_with_primitive_out_type,
|
195
|
+
function,
|
196
|
+
0,
|
197
|
+
None
|
198
|
+
)?;
|
199
|
+
ca.into_series()
|
200
|
+
}
|
201
|
+
Some(DataType::Boolean) => {
|
202
|
+
let ca: BooleanChunked =
|
203
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, function, 0, None)?;
|
204
|
+
ca.into_series()
|
205
|
+
}
|
206
|
+
Some(DataType::String) => {
|
207
|
+
let ca =
|
208
|
+
dispatch_apply!(series, apply_lambda_with_utf8_out_type, function, 0, None)?;
|
209
|
+
|
210
|
+
ca.into_series()
|
211
|
+
}
|
212
|
+
Some(DataType::List(_inner)) => {
|
213
|
+
todo!()
|
214
|
+
}
|
215
|
+
Some(DataType::Object(_)) => {
|
216
|
+
let ca =
|
217
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, function, 0, None)?;
|
218
|
+
ca.into_series()
|
219
|
+
}
|
220
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, function),
|
221
|
+
|
222
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, function),
|
223
|
+
};
|
224
|
+
|
225
|
+
Ok(RbSeries::new(out))
|
226
|
+
}
|
227
|
+
}
|
@@ -100,7 +100,7 @@ fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series>
|
|
100
100
|
let values = values.str()?;
|
101
101
|
ca.scatter(idx, values)
|
102
102
|
}
|
103
|
-
_ => panic!("not yet implemented for dtype: {}"
|
103
|
+
_ => panic!("not yet implemented for dtype: {logical_dtype}"),
|
104
104
|
};
|
105
105
|
|
106
106
|
s.and_then(|s| s.cast(&logical_dtype))
|
data/ext/polars/src/utils.rs
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
use crate::{RbErr, RbPolarsErr};
|
2
|
+
|
1
3
|
#[macro_export]
|
2
4
|
macro_rules! apply_method_all_arrow_series2 {
|
3
5
|
($self:expr, $method:ident, $($args:expr),*) => {
|
@@ -14,11 +16,17 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
14
16
|
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
15
17
|
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
16
18
|
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
17
|
-
DataType::Date => $self.date().unwrap().$method($($args),*),
|
18
|
-
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
19
|
+
DataType::Date => $self.date().unwrap().physical().$method($($args),*),
|
20
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().physical().$method($($args),*),
|
19
21
|
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
20
22
|
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
21
23
|
dt => panic!("dtype {:?} not supported", dt)
|
22
24
|
}
|
23
25
|
}
|
24
26
|
}
|
27
|
+
|
28
|
+
/// Boilerplate for `|e| RbPolarsErr::from(e).into()`
|
29
|
+
#[allow(unused)]
|
30
|
+
pub(crate) fn to_rb_err<E: Into<RbPolarsErr>>(e: E) -> RbErr {
|
31
|
+
e.into().into()
|
32
|
+
}
|
@@ -31,56 +31,16 @@ module Polars
|
|
31
31
|
|
32
32
|
# Return whether or not the column is a local categorical.
|
33
33
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
# @example Categoricals constructed without a string cache are considered local.
|
37
|
-
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
38
|
-
# s.cat.is_local
|
39
|
-
# # => true
|
34
|
+
# Always returns false.
|
40
35
|
#
|
41
|
-
# @
|
42
|
-
# s = nil
|
43
|
-
# Polars::StringCache.new do
|
44
|
-
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
45
|
-
# end
|
46
|
-
# s.cat.is_local
|
47
|
-
# # => false
|
36
|
+
# @return [Boolean]
|
48
37
|
def is_local
|
49
38
|
_s.cat_is_local
|
50
39
|
end
|
51
40
|
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# This may change the underlying physical representation of the column.
|
41
|
+
# Simply returns the column as-is, local representations are deprecated.
|
55
42
|
#
|
56
43
|
# @return [Series]
|
57
|
-
#
|
58
|
-
# @example Compare the global and local representations of a categorical.
|
59
|
-
# s = nil
|
60
|
-
# Polars::StringCache.new do
|
61
|
-
# _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
|
62
|
-
# s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
|
63
|
-
# end
|
64
|
-
# s.to_physical
|
65
|
-
# # =>
|
66
|
-
# # shape: (3,)
|
67
|
-
# # Series: 'y' [u32]
|
68
|
-
# # [
|
69
|
-
# # 2
|
70
|
-
# # 1
|
71
|
-
# # 3
|
72
|
-
# # ]
|
73
|
-
#
|
74
|
-
# @example
|
75
|
-
# s.cat.to_local.to_physical
|
76
|
-
# # =>
|
77
|
-
# # shape: (3,)
|
78
|
-
# # Series: 'y' [u32]
|
79
|
-
# # [
|
80
|
-
# # 0
|
81
|
-
# # 1
|
82
|
-
# # 2
|
83
|
-
# # ]
|
84
44
|
def to_local
|
85
45
|
Utils.wrap_s(_s.cat_to_local)
|
86
46
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a catalog within a metastore.
|
5
|
+
CatalogInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:comment,
|
9
|
+
:properties,
|
10
|
+
:options,
|
11
|
+
:storage_location,
|
12
|
+
:created_at,
|
13
|
+
:created_by,
|
14
|
+
:updated_at,
|
15
|
+
:updated_by,
|
16
|
+
keyword_init: true
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a column within a catalog table.
|
5
|
+
ColumnInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:type_name,
|
9
|
+
:type_text,
|
10
|
+
:type_json,
|
11
|
+
:position,
|
12
|
+
:comment,
|
13
|
+
:partition_index,
|
14
|
+
keyword_init: true
|
15
|
+
)
|
16
|
+
|
17
|
+
class ColumnInfo
|
18
|
+
# Get the native polars datatype of this column.
|
19
|
+
#
|
20
|
+
# @note
|
21
|
+
# This functionality is considered **unstable**. It may be changed
|
22
|
+
# at any point without it being considered a breaking change.
|
23
|
+
#
|
24
|
+
# @return [Object]
|
25
|
+
def get_polars_dtype
|
26
|
+
RbCatalogClient.type_json_to_polars_type(type_json)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a namespace within a catalog.
|
5
|
+
#
|
6
|
+
# This is also known by the name "schema" in unity catalog terminology.
|
7
|
+
NamespaceInfo =
|
8
|
+
::Struct.new(
|
9
|
+
:name,
|
10
|
+
:comment,
|
11
|
+
:properties,
|
12
|
+
:storage_location,
|
13
|
+
:created_at,
|
14
|
+
:created_by,
|
15
|
+
:updated_at,
|
16
|
+
:updated_by,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|