polars-df 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
use
|
1
|
+
use either::Either;
|
2
|
+
use magnus::{
|
3
|
+
prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
|
4
|
+
};
|
2
5
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
6
|
use polars::frame::NullStrategy;
|
4
7
|
use polars::io::avro::AvroCompression;
|
@@ -11,12 +14,12 @@ use std::cell::RefCell;
|
|
11
14
|
use std::io::{BufWriter, Cursor};
|
12
15
|
use std::ops::Deref;
|
13
16
|
|
14
|
-
use crate::
|
17
|
+
use crate::conversion::*;
|
18
|
+
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
19
|
+
use crate::map::dataframe::{
|
15
20
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
16
21
|
apply_lambda_with_utf8_out_type,
|
17
22
|
};
|
18
|
-
use crate::conversion::*;
|
19
|
-
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
20
23
|
use crate::rb_modules;
|
21
24
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
22
25
|
use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
@@ -45,7 +48,7 @@ impl RbDataFrame {
|
|
45
48
|
schema_overwrite: Option<Schema>,
|
46
49
|
) -> RbResult<Self> {
|
47
50
|
// object builder must be registered.
|
48
|
-
crate::
|
51
|
+
crate::on_startup::register_object_builder();
|
49
52
|
|
50
53
|
let schema =
|
51
54
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
@@ -53,7 +56,7 @@ impl RbDataFrame {
|
|
53
56
|
// replace inferred nulls with boolean
|
54
57
|
let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
|
55
58
|
DataType::Null => {
|
56
|
-
fld.coerce(DataType::Boolean);
|
59
|
+
// fld.coerce(DataType::Boolean);
|
57
60
|
fld
|
58
61
|
}
|
59
62
|
DataType::Decimal(_, _) => {
|
@@ -86,7 +89,7 @@ impl RbDataFrame {
|
|
86
89
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
87
90
|
let mut cols = Vec::new();
|
88
91
|
for i in columns.each() {
|
89
|
-
cols.push(
|
92
|
+
cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
|
90
93
|
}
|
91
94
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
92
95
|
Ok(RbDataFrame::new(df))
|
@@ -99,36 +102,35 @@ impl RbDataFrame {
|
|
99
102
|
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
100
103
|
// start arguments
|
101
104
|
// this pattern is needed for more than 16
|
102
|
-
let rb_f
|
103
|
-
let infer_schema_length
|
104
|
-
let chunk_size
|
105
|
-
let has_header
|
106
|
-
let ignore_errors
|
107
|
-
let n_rows
|
108
|
-
let skip_rows
|
109
|
-
let projection
|
110
|
-
let
|
111
|
-
let rechunk
|
112
|
-
let columns
|
113
|
-
let encoding
|
114
|
-
let n_threads
|
115
|
-
let path
|
116
|
-
let overwrite_dtype
|
105
|
+
let rb_f = arguments[0];
|
106
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
|
107
|
+
let chunk_size = usize::try_convert(arguments[2])?;
|
108
|
+
let has_header = bool::try_convert(arguments[3])?;
|
109
|
+
let ignore_errors = bool::try_convert(arguments[4])?;
|
110
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
111
|
+
let skip_rows = usize::try_convert(arguments[6])?;
|
112
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
|
113
|
+
let separator = String::try_convert(arguments[8])?;
|
114
|
+
let rechunk = bool::try_convert(arguments[9])?;
|
115
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
|
116
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
|
117
|
+
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
118
|
+
let path = Option::<String>::try_convert(arguments[13])?;
|
119
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
117
120
|
// TODO fix
|
118
|
-
let overwrite_dtype_slice
|
119
|
-
let low_memory
|
120
|
-
let
|
121
|
-
let quote_char
|
122
|
-
let null_values
|
123
|
-
let try_parse_dates
|
124
|
-
let skip_rows_after_header
|
125
|
-
let row_count
|
126
|
-
let sample_size
|
127
|
-
let eol_char
|
121
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
|
+
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
|
+
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
127
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
128
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
129
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
130
|
+
let eol_char = String::try_convert(arguments[24])?;
|
128
131
|
// end arguments
|
129
132
|
|
130
133
|
let null_values = null_values.map(|w| w.0);
|
131
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
132
134
|
let eol_char = eol_char.as_bytes()[0];
|
133
135
|
|
134
136
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -165,7 +167,7 @@ impl RbDataFrame {
|
|
165
167
|
.infer_schema(infer_schema_length)
|
166
168
|
.has_header(has_header)
|
167
169
|
.with_n_rows(n_rows)
|
168
|
-
.
|
170
|
+
.with_separator(separator.as_bytes()[0])
|
169
171
|
.with_skip_rows(skip_rows)
|
170
172
|
.with_ignore_errors(ignore_errors)
|
171
173
|
.with_projection(projection)
|
@@ -178,7 +180,7 @@ impl RbDataFrame {
|
|
178
180
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
179
181
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
180
182
|
.low_memory(low_memory)
|
181
|
-
.
|
183
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
182
184
|
.with_null_values(null_values)
|
183
185
|
.with_try_parse_dates(try_parse_dates)
|
184
186
|
.with_quote_char(quote_char)
|
@@ -265,7 +267,7 @@ impl RbDataFrame {
|
|
265
267
|
) -> RbResult<()> {
|
266
268
|
use polars::io::avro::AvroWriter;
|
267
269
|
|
268
|
-
if let Ok(s) =
|
270
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
269
271
|
let f = std::fs::File::create(s).unwrap();
|
270
272
|
AvroWriter::new(f)
|
271
273
|
.with_compression(compression.0)
|
@@ -294,12 +296,18 @@ impl RbDataFrame {
|
|
294
296
|
Ok(df) => Ok(df.into()),
|
295
297
|
// try arrow json reader instead
|
296
298
|
// this is row oriented
|
297
|
-
Err(
|
298
|
-
let
|
299
|
-
|
300
|
-
.
|
301
|
-
|
302
|
-
|
299
|
+
Err(e) => {
|
300
|
+
let msg = format!("{e}");
|
301
|
+
if msg.contains("successful parse invalid data") {
|
302
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
303
|
+
Err(e)
|
304
|
+
} else {
|
305
|
+
let out = JsonReader::new(mmap_bytes_r)
|
306
|
+
.with_json_format(JsonFormat::Json)
|
307
|
+
.finish()
|
308
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
309
|
+
Ok(out.into())
|
310
|
+
}
|
303
311
|
}
|
304
312
|
}
|
305
313
|
}
|
@@ -341,6 +349,27 @@ impl RbDataFrame {
|
|
341
349
|
Ok(())
|
342
350
|
}
|
343
351
|
|
352
|
+
pub fn read_rows(
|
353
|
+
rb_rows: RArray,
|
354
|
+
infer_schema_length: Option<usize>,
|
355
|
+
schema_overwrite: Option<Wrap<Schema>>,
|
356
|
+
) -> RbResult<Self> {
|
357
|
+
let mut rows = Vec::with_capacity(rb_rows.len());
|
358
|
+
for v in rb_rows.each() {
|
359
|
+
let rb_row = RArray::try_convert(v?)?;
|
360
|
+
let mut row = Vec::with_capacity(rb_row.len());
|
361
|
+
for val in rb_row.each() {
|
362
|
+
row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
|
363
|
+
}
|
364
|
+
rows.push(Row(row));
|
365
|
+
}
|
366
|
+
Self::finish_from_rows(
|
367
|
+
rows,
|
368
|
+
infer_schema_length,
|
369
|
+
schema_overwrite.map(|wrap| wrap.0),
|
370
|
+
)
|
371
|
+
}
|
372
|
+
|
344
373
|
pub fn read_hashes(
|
345
374
|
dicts: Value,
|
346
375
|
infer_schema_length: Option<usize>,
|
@@ -395,9 +424,9 @@ impl RbDataFrame {
|
|
395
424
|
pub fn write_csv(
|
396
425
|
&self,
|
397
426
|
rb_f: Value,
|
398
|
-
|
399
|
-
|
400
|
-
|
427
|
+
include_header: bool,
|
428
|
+
separator: u8,
|
429
|
+
quote_char: u8,
|
401
430
|
batch_size: usize,
|
402
431
|
datetime_format: Option<String>,
|
403
432
|
date_format: Option<String>,
|
@@ -407,13 +436,13 @@ impl RbDataFrame {
|
|
407
436
|
) -> RbResult<()> {
|
408
437
|
let null = null_value.unwrap_or_default();
|
409
438
|
|
410
|
-
if let Ok(s) =
|
439
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
411
440
|
let f = std::fs::File::create(s).unwrap();
|
412
441
|
// no need for a buffered writer, because the csv writer does internal buffering
|
413
442
|
CsvWriter::new(f)
|
414
|
-
.
|
415
|
-
.
|
416
|
-
.
|
443
|
+
.include_header(include_header)
|
444
|
+
.with_separator(separator)
|
445
|
+
.with_quote_char(quote_char)
|
417
446
|
.with_batch_size(batch_size)
|
418
447
|
.with_datetime_format(datetime_format)
|
419
448
|
.with_date_format(date_format)
|
@@ -425,9 +454,9 @@ impl RbDataFrame {
|
|
425
454
|
} else {
|
426
455
|
let mut buf = Cursor::new(Vec::new());
|
427
456
|
CsvWriter::new(&mut buf)
|
428
|
-
.
|
429
|
-
.
|
430
|
-
.
|
457
|
+
.include_header(include_header)
|
458
|
+
.with_separator(separator)
|
459
|
+
.with_quote_char(quote_char)
|
431
460
|
.with_batch_size(batch_size)
|
432
461
|
.with_datetime_format(datetime_format)
|
433
462
|
.with_date_format(date_format)
|
@@ -449,7 +478,7 @@ impl RbDataFrame {
|
|
449
478
|
rb_f: Value,
|
450
479
|
compression: Wrap<Option<IpcCompression>>,
|
451
480
|
) -> RbResult<()> {
|
452
|
-
if let Ok(s) =
|
481
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
453
482
|
let f = std::fs::File::create(s).unwrap();
|
454
483
|
IpcWriter::new(f)
|
455
484
|
.with_compression(compression.0)
|
@@ -480,14 +509,14 @@ impl RbDataFrame {
|
|
480
509
|
.get_columns()
|
481
510
|
.iter()
|
482
511
|
.map(|s| match s.dtype() {
|
483
|
-
DataType::Object(_) => {
|
512
|
+
DataType::Object(_, _) => {
|
484
513
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
485
514
|
obj.unwrap().to_object()
|
486
515
|
}
|
487
516
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
488
517
|
}),
|
489
518
|
)
|
490
|
-
.
|
519
|
+
.as_value()
|
491
520
|
}
|
492
521
|
|
493
522
|
pub fn row_tuples(&self) -> Value {
|
@@ -499,7 +528,7 @@ impl RbDataFrame {
|
|
499
528
|
.get_columns()
|
500
529
|
.iter()
|
501
530
|
.map(|s| match s.dtype() {
|
502
|
-
DataType::Object(_) => {
|
531
|
+
DataType::Object(_, _) => {
|
503
532
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
504
533
|
obj.unwrap().to_object()
|
505
534
|
}
|
@@ -507,7 +536,7 @@ impl RbDataFrame {
|
|
507
536
|
}),
|
508
537
|
)
|
509
538
|
}))
|
510
|
-
.
|
539
|
+
.as_value()
|
511
540
|
}
|
512
541
|
|
513
542
|
pub fn to_numo(&self) -> Option<Value> {
|
@@ -537,7 +566,7 @@ impl RbDataFrame {
|
|
537
566
|
) -> RbResult<()> {
|
538
567
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
539
568
|
|
540
|
-
if let Ok(s) =
|
569
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
541
570
|
let f = std::fs::File::create(s).unwrap();
|
542
571
|
ParquetWriter::new(f)
|
543
572
|
.with_compression(compression)
|
@@ -604,7 +633,7 @@ impl RbDataFrame {
|
|
604
633
|
|
605
634
|
pub fn sample_n(
|
606
635
|
&self,
|
607
|
-
n:
|
636
|
+
n: &RbSeries,
|
608
637
|
with_replacement: bool,
|
609
638
|
shuffle: bool,
|
610
639
|
seed: Option<u64>,
|
@@ -612,14 +641,14 @@ impl RbDataFrame {
|
|
612
641
|
let df = self
|
613
642
|
.df
|
614
643
|
.borrow()
|
615
|
-
.sample_n(n, with_replacement, shuffle, seed)
|
644
|
+
.sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
|
616
645
|
.map_err(RbPolarsErr::from)?;
|
617
646
|
Ok(df.into())
|
618
647
|
}
|
619
648
|
|
620
649
|
pub fn sample_frac(
|
621
650
|
&self,
|
622
|
-
frac:
|
651
|
+
frac: &RbSeries,
|
623
652
|
with_replacement: bool,
|
624
653
|
shuffle: bool,
|
625
654
|
seed: Option<u64>,
|
@@ -627,7 +656,7 @@ impl RbDataFrame {
|
|
627
656
|
let df = self
|
628
657
|
.df
|
629
658
|
.borrow()
|
630
|
-
.sample_frac(frac, with_replacement, shuffle, seed)
|
659
|
+
.sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
|
631
660
|
.map_err(RbPolarsErr::from)?;
|
632
661
|
Ok(df.into())
|
633
662
|
}
|
@@ -761,8 +790,8 @@ impl RbDataFrame {
|
|
761
790
|
.map(|s| RbSeries::new(s.clone()))
|
762
791
|
}
|
763
792
|
|
764
|
-
pub fn
|
765
|
-
self.df.borrow().
|
793
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
794
|
+
self.df.borrow().get_column_index(&name)
|
766
795
|
}
|
767
796
|
|
768
797
|
// TODO remove clone
|
@@ -804,18 +833,18 @@ impl RbDataFrame {
|
|
804
833
|
Ok(())
|
805
834
|
}
|
806
835
|
|
807
|
-
pub fn
|
836
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
808
837
|
self.df
|
809
838
|
.borrow_mut()
|
810
|
-
.
|
839
|
+
.replace_column(index, new_col.series.borrow().clone())
|
811
840
|
.map_err(RbPolarsErr::from)?;
|
812
841
|
Ok(())
|
813
842
|
}
|
814
843
|
|
815
|
-
pub fn
|
844
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
816
845
|
self.df
|
817
846
|
.borrow_mut()
|
818
|
-
.
|
847
|
+
.insert_column(index, new_col.series.borrow().clone())
|
819
848
|
.map_err(RbPolarsErr::from)?;
|
820
849
|
Ok(())
|
821
850
|
}
|
@@ -850,11 +879,11 @@ impl RbDataFrame {
|
|
850
879
|
Ok(mask.into_series().into())
|
851
880
|
}
|
852
881
|
|
853
|
-
pub fn
|
882
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
854
883
|
if null_equal {
|
855
|
-
self.df.borrow().
|
884
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
856
885
|
} else {
|
857
|
-
self.df.borrow().
|
886
|
+
self.df.borrow().equals(&other.df.borrow())
|
858
887
|
}
|
859
888
|
}
|
860
889
|
|
@@ -942,73 +971,50 @@ impl RbDataFrame {
|
|
942
971
|
self.df.borrow().clone().lazy().into()
|
943
972
|
}
|
944
973
|
|
945
|
-
pub fn
|
946
|
-
self.df.borrow().max().into()
|
947
|
-
}
|
948
|
-
|
949
|
-
pub fn min(&self) -> Self {
|
950
|
-
self.df.borrow().min().into()
|
951
|
-
}
|
952
|
-
|
953
|
-
pub fn sum(&self) -> Self {
|
954
|
-
self.df.borrow().sum().into()
|
955
|
-
}
|
956
|
-
|
957
|
-
pub fn mean(&self) -> Self {
|
958
|
-
self.df.borrow().mean().into()
|
959
|
-
}
|
960
|
-
|
961
|
-
pub fn std(&self, ddof: u8) -> Self {
|
962
|
-
self.df.borrow().std(ddof).into()
|
963
|
-
}
|
964
|
-
|
965
|
-
pub fn var(&self, ddof: u8) -> Self {
|
966
|
-
self.df.borrow().var(ddof).into()
|
967
|
-
}
|
968
|
-
|
969
|
-
pub fn median(&self) -> Self {
|
970
|
-
self.df.borrow().median().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
974
|
+
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
974
975
|
let s = self
|
975
976
|
.df
|
976
977
|
.borrow()
|
977
|
-
.
|
978
|
+
.max_horizontal()
|
978
979
|
.map_err(RbPolarsErr::from)?;
|
979
980
|
Ok(s.map(|s| s.into()))
|
980
981
|
}
|
981
982
|
|
982
|
-
pub fn
|
983
|
-
let s = self
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
|
983
|
+
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
984
|
+
let s = self
|
985
|
+
.df
|
986
|
+
.borrow()
|
987
|
+
.min_horizontal()
|
988
|
+
.map_err(RbPolarsErr::from)?;
|
989
989
|
Ok(s.map(|s| s.into()))
|
990
990
|
}
|
991
991
|
|
992
|
-
pub fn
|
992
|
+
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
993
|
+
let null_strategy = if ignore_nulls {
|
994
|
+
NullStrategy::Ignore
|
995
|
+
} else {
|
996
|
+
NullStrategy::Propagate
|
997
|
+
};
|
993
998
|
let s = self
|
994
999
|
.df
|
995
1000
|
.borrow()
|
996
|
-
.
|
1001
|
+
.sum_horizontal(null_strategy)
|
997
1002
|
.map_err(RbPolarsErr::from)?;
|
998
1003
|
Ok(s.map(|s| s.into()))
|
999
1004
|
}
|
1000
1005
|
|
1001
|
-
pub fn
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1006
|
+
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1007
|
+
let null_strategy = if ignore_nulls {
|
1008
|
+
NullStrategy::Ignore
|
1009
|
+
} else {
|
1010
|
+
NullStrategy::Propagate
|
1011
|
+
};
|
1012
|
+
let s = self
|
1007
1013
|
.df
|
1008
1014
|
.borrow()
|
1009
|
-
.
|
1015
|
+
.mean_horizontal(null_strategy)
|
1010
1016
|
.map_err(RbPolarsErr::from)?;
|
1011
|
-
Ok(
|
1017
|
+
Ok(s.map(|s| s.into()))
|
1012
1018
|
}
|
1013
1019
|
|
1014
1020
|
pub fn to_dummies(
|
@@ -1082,13 +1088,13 @@ impl RbDataFrame {
|
|
1082
1088
|
.into_datetime(tu, tz)
|
1083
1089
|
.into_series()
|
1084
1090
|
}
|
1085
|
-
Some(DataType::
|
1091
|
+
Some(DataType::String) => {
|
1086
1092
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1087
1093
|
}
|
1088
1094
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1089
1095
|
};
|
1090
1096
|
|
1091
|
-
Ok((RbSeries::from(out).
|
1097
|
+
Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
|
1092
1098
|
}
|
1093
1099
|
|
1094
1100
|
pub fn shrink_to_fit(&self) {
|
@@ -1105,17 +1111,20 @@ impl RbDataFrame {
|
|
1105
1111
|
Ok(hash.into_series().into())
|
1106
1112
|
}
|
1107
1113
|
|
1108
|
-
pub fn transpose(&self,
|
1109
|
-
let
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1114
|
+
pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
|
1115
|
+
let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
|
1116
|
+
Some(Either::Right(name))
|
1117
|
+
} else if let Ok(name) = String::try_convert(column_names) {
|
1118
|
+
Some(Either::Left(name))
|
1119
|
+
} else {
|
1120
|
+
None
|
1121
|
+
};
|
1122
|
+
Ok(self
|
1123
|
+
.df
|
1124
|
+
.borrow()
|
1125
|
+
.transpose(keep_names_as.as_deref(), new_col_names)
|
1126
|
+
.map_err(RbPolarsErr::from)?
|
1127
|
+
.into())
|
1119
1128
|
}
|
1120
1129
|
|
1121
1130
|
pub fn upsample(
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
use magnus::exception;
|
2
2
|
use magnus::Error;
|
3
|
-
use polars::error::ArrowError;
|
4
3
|
use polars::prelude::PolarsError;
|
5
4
|
|
6
5
|
pub struct RbPolarsErr {}
|
@@ -11,10 +10,6 @@ impl RbPolarsErr {
|
|
11
10
|
Error::new(exception::runtime_error(), e.to_string())
|
12
11
|
}
|
13
12
|
|
14
|
-
pub fn arrow(e: ArrowError) -> Error {
|
15
|
-
Error::new(exception::runtime_error(), e.to_string())
|
16
|
-
}
|
17
|
-
|
18
13
|
pub fn io(e: std::io::Error) -> Error {
|
19
14
|
Error::new(exception::runtime_error(), e.to_string())
|
20
15
|
}
|
@@ -3,16 +3,28 @@ use polars::prelude::*;
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
5
|
impl RbExpr {
|
6
|
-
pub fn bin_contains(&self, lit:
|
7
|
-
self.inner
|
6
|
+
pub fn bin_contains(&self, lit: &RbExpr) -> Self {
|
7
|
+
self.inner
|
8
|
+
.clone()
|
9
|
+
.binary()
|
10
|
+
.contains_literal(lit.inner.clone())
|
11
|
+
.into()
|
8
12
|
}
|
9
13
|
|
10
|
-
pub fn bin_ends_with(&self, sub:
|
11
|
-
self.inner
|
14
|
+
pub fn bin_ends_with(&self, sub: &RbExpr) -> Self {
|
15
|
+
self.inner
|
16
|
+
.clone()
|
17
|
+
.binary()
|
18
|
+
.ends_with(sub.inner.clone())
|
19
|
+
.into()
|
12
20
|
}
|
13
21
|
|
14
|
-
pub fn bin_starts_with(&self, sub:
|
15
|
-
self.inner
|
22
|
+
pub fn bin_starts_with(&self, sub: &RbExpr) -> Self {
|
23
|
+
self.inner
|
24
|
+
.clone()
|
25
|
+
.binary()
|
26
|
+
.starts_with(sub.inner.clone())
|
27
|
+
.into()
|
16
28
|
}
|
17
29
|
|
18
30
|
pub fn bin_hex_decode(&self, strict: bool) -> Self {
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -8,9 +8,8 @@ impl RbExpr {
|
|
8
8
|
self.inner.clone().dt().to_string(&format).into()
|
9
9
|
}
|
10
10
|
|
11
|
-
pub fn dt_offset_by(&self, by:
|
12
|
-
|
13
|
-
self.inner.clone().dt().offset_by(by).into()
|
11
|
+
pub fn dt_offset_by(&self, by: &RbExpr) -> Self {
|
12
|
+
self.inner.clone().dt().offset_by(by.inner.clone()).into()
|
14
13
|
}
|
15
14
|
|
16
15
|
pub fn dt_epoch_seconds(&self) -> Self {
|
@@ -38,21 +37,20 @@ impl RbExpr {
|
|
38
37
|
self.inner.clone().dt().cast_time_unit(tu.0).into()
|
39
38
|
}
|
40
39
|
|
41
|
-
pub fn dt_replace_time_zone(&self,
|
40
|
+
pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
|
42
41
|
self.inner
|
43
42
|
.clone()
|
44
43
|
.dt()
|
45
|
-
.replace_time_zone(
|
44
|
+
.replace_time_zone(time_zone, ambiguous.inner.clone())
|
46
45
|
.into()
|
47
46
|
}
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
self.inner.clone().dt().truncate(&every, &offset).into()
|
48
|
+
pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
|
49
|
+
self.inner
|
50
|
+
.clone()
|
51
|
+
.dt()
|
52
|
+
.truncate(every.inner.clone(), offset)
|
53
|
+
.into()
|
56
54
|
}
|
57
55
|
|
58
56
|
pub fn dt_month_start(&self) -> Self {
|