polars-df 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
use
|
1
|
+
use either::Either;
|
2
|
+
use magnus::{
|
3
|
+
prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
|
4
|
+
};
|
2
5
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
6
|
use polars::frame::NullStrategy;
|
4
7
|
use polars::io::avro::AvroCompression;
|
@@ -11,12 +14,12 @@ use std::cell::RefCell;
|
|
11
14
|
use std::io::{BufWriter, Cursor};
|
12
15
|
use std::ops::Deref;
|
13
16
|
|
14
|
-
use crate::
|
17
|
+
use crate::conversion::*;
|
18
|
+
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
19
|
+
use crate::map::dataframe::{
|
15
20
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
16
21
|
apply_lambda_with_utf8_out_type,
|
17
22
|
};
|
18
|
-
use crate::conversion::*;
|
19
|
-
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
20
23
|
use crate::rb_modules;
|
21
24
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
22
25
|
use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
@@ -45,7 +48,7 @@ impl RbDataFrame {
|
|
45
48
|
schema_overwrite: Option<Schema>,
|
46
49
|
) -> RbResult<Self> {
|
47
50
|
// object builder must be registered.
|
48
|
-
crate::
|
51
|
+
crate::on_startup::register_object_builder();
|
49
52
|
|
50
53
|
let schema =
|
51
54
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
@@ -53,7 +56,7 @@ impl RbDataFrame {
|
|
53
56
|
// replace inferred nulls with boolean
|
54
57
|
let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
|
55
58
|
DataType::Null => {
|
56
|
-
fld.coerce(DataType::Boolean);
|
59
|
+
// fld.coerce(DataType::Boolean);
|
57
60
|
fld
|
58
61
|
}
|
59
62
|
DataType::Decimal(_, _) => {
|
@@ -86,7 +89,7 @@ impl RbDataFrame {
|
|
86
89
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
87
90
|
let mut cols = Vec::new();
|
88
91
|
for i in columns.each() {
|
89
|
-
cols.push(
|
92
|
+
cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
|
90
93
|
}
|
91
94
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
92
95
|
Ok(RbDataFrame::new(df))
|
@@ -99,36 +102,35 @@ impl RbDataFrame {
|
|
99
102
|
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
100
103
|
// start arguments
|
101
104
|
// this pattern is needed for more than 16
|
102
|
-
let rb_f
|
103
|
-
let infer_schema_length
|
104
|
-
let chunk_size
|
105
|
-
let has_header
|
106
|
-
let ignore_errors
|
107
|
-
let n_rows
|
108
|
-
let skip_rows
|
109
|
-
let projection
|
110
|
-
let
|
111
|
-
let rechunk
|
112
|
-
let columns
|
113
|
-
let encoding
|
114
|
-
let n_threads
|
115
|
-
let path
|
116
|
-
let overwrite_dtype
|
105
|
+
let rb_f = arguments[0];
|
106
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
|
107
|
+
let chunk_size = usize::try_convert(arguments[2])?;
|
108
|
+
let has_header = bool::try_convert(arguments[3])?;
|
109
|
+
let ignore_errors = bool::try_convert(arguments[4])?;
|
110
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
111
|
+
let skip_rows = usize::try_convert(arguments[6])?;
|
112
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
|
113
|
+
let separator = String::try_convert(arguments[8])?;
|
114
|
+
let rechunk = bool::try_convert(arguments[9])?;
|
115
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
|
116
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
|
117
|
+
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
118
|
+
let path = Option::<String>::try_convert(arguments[13])?;
|
119
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
117
120
|
// TODO fix
|
118
|
-
let overwrite_dtype_slice
|
119
|
-
let low_memory
|
120
|
-
let
|
121
|
-
let quote_char
|
122
|
-
let null_values
|
123
|
-
let try_parse_dates
|
124
|
-
let skip_rows_after_header
|
125
|
-
let row_count
|
126
|
-
let sample_size
|
127
|
-
let eol_char
|
121
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
|
+
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
|
+
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
127
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
128
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
129
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
130
|
+
let eol_char = String::try_convert(arguments[24])?;
|
128
131
|
// end arguments
|
129
132
|
|
130
133
|
let null_values = null_values.map(|w| w.0);
|
131
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
132
134
|
let eol_char = eol_char.as_bytes()[0];
|
133
135
|
|
134
136
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -165,7 +167,7 @@ impl RbDataFrame {
|
|
165
167
|
.infer_schema(infer_schema_length)
|
166
168
|
.has_header(has_header)
|
167
169
|
.with_n_rows(n_rows)
|
168
|
-
.
|
170
|
+
.with_separator(separator.as_bytes()[0])
|
169
171
|
.with_skip_rows(skip_rows)
|
170
172
|
.with_ignore_errors(ignore_errors)
|
171
173
|
.with_projection(projection)
|
@@ -178,7 +180,7 @@ impl RbDataFrame {
|
|
178
180
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
179
181
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
180
182
|
.low_memory(low_memory)
|
181
|
-
.
|
183
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
182
184
|
.with_null_values(null_values)
|
183
185
|
.with_try_parse_dates(try_parse_dates)
|
184
186
|
.with_quote_char(quote_char)
|
@@ -265,7 +267,7 @@ impl RbDataFrame {
|
|
265
267
|
) -> RbResult<()> {
|
266
268
|
use polars::io::avro::AvroWriter;
|
267
269
|
|
268
|
-
if let Ok(s) =
|
270
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
269
271
|
let f = std::fs::File::create(s).unwrap();
|
270
272
|
AvroWriter::new(f)
|
271
273
|
.with_compression(compression.0)
|
@@ -294,12 +296,18 @@ impl RbDataFrame {
|
|
294
296
|
Ok(df) => Ok(df.into()),
|
295
297
|
// try arrow json reader instead
|
296
298
|
// this is row oriented
|
297
|
-
Err(
|
298
|
-
let
|
299
|
-
|
300
|
-
.
|
301
|
-
|
302
|
-
|
299
|
+
Err(e) => {
|
300
|
+
let msg = format!("{e}");
|
301
|
+
if msg.contains("successful parse invalid data") {
|
302
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
303
|
+
Err(e)
|
304
|
+
} else {
|
305
|
+
let out = JsonReader::new(mmap_bytes_r)
|
306
|
+
.with_json_format(JsonFormat::Json)
|
307
|
+
.finish()
|
308
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
309
|
+
Ok(out.into())
|
310
|
+
}
|
303
311
|
}
|
304
312
|
}
|
305
313
|
}
|
@@ -341,6 +349,27 @@ impl RbDataFrame {
|
|
341
349
|
Ok(())
|
342
350
|
}
|
343
351
|
|
352
|
+
pub fn read_rows(
|
353
|
+
rb_rows: RArray,
|
354
|
+
infer_schema_length: Option<usize>,
|
355
|
+
schema_overwrite: Option<Wrap<Schema>>,
|
356
|
+
) -> RbResult<Self> {
|
357
|
+
let mut rows = Vec::with_capacity(rb_rows.len());
|
358
|
+
for v in rb_rows.each() {
|
359
|
+
let rb_row = RArray::try_convert(v?)?;
|
360
|
+
let mut row = Vec::with_capacity(rb_row.len());
|
361
|
+
for val in rb_row.each() {
|
362
|
+
row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
|
363
|
+
}
|
364
|
+
rows.push(Row(row));
|
365
|
+
}
|
366
|
+
Self::finish_from_rows(
|
367
|
+
rows,
|
368
|
+
infer_schema_length,
|
369
|
+
schema_overwrite.map(|wrap| wrap.0),
|
370
|
+
)
|
371
|
+
}
|
372
|
+
|
344
373
|
pub fn read_hashes(
|
345
374
|
dicts: Value,
|
346
375
|
infer_schema_length: Option<usize>,
|
@@ -395,9 +424,9 @@ impl RbDataFrame {
|
|
395
424
|
pub fn write_csv(
|
396
425
|
&self,
|
397
426
|
rb_f: Value,
|
398
|
-
|
399
|
-
|
400
|
-
|
427
|
+
include_header: bool,
|
428
|
+
separator: u8,
|
429
|
+
quote_char: u8,
|
401
430
|
batch_size: usize,
|
402
431
|
datetime_format: Option<String>,
|
403
432
|
date_format: Option<String>,
|
@@ -407,13 +436,13 @@ impl RbDataFrame {
|
|
407
436
|
) -> RbResult<()> {
|
408
437
|
let null = null_value.unwrap_or_default();
|
409
438
|
|
410
|
-
if let Ok(s) =
|
439
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
411
440
|
let f = std::fs::File::create(s).unwrap();
|
412
441
|
// no need for a buffered writer, because the csv writer does internal buffering
|
413
442
|
CsvWriter::new(f)
|
414
|
-
.
|
415
|
-
.
|
416
|
-
.
|
443
|
+
.include_header(include_header)
|
444
|
+
.with_separator(separator)
|
445
|
+
.with_quote_char(quote_char)
|
417
446
|
.with_batch_size(batch_size)
|
418
447
|
.with_datetime_format(datetime_format)
|
419
448
|
.with_date_format(date_format)
|
@@ -425,9 +454,9 @@ impl RbDataFrame {
|
|
425
454
|
} else {
|
426
455
|
let mut buf = Cursor::new(Vec::new());
|
427
456
|
CsvWriter::new(&mut buf)
|
428
|
-
.
|
429
|
-
.
|
430
|
-
.
|
457
|
+
.include_header(include_header)
|
458
|
+
.with_separator(separator)
|
459
|
+
.with_quote_char(quote_char)
|
431
460
|
.with_batch_size(batch_size)
|
432
461
|
.with_datetime_format(datetime_format)
|
433
462
|
.with_date_format(date_format)
|
@@ -449,7 +478,7 @@ impl RbDataFrame {
|
|
449
478
|
rb_f: Value,
|
450
479
|
compression: Wrap<Option<IpcCompression>>,
|
451
480
|
) -> RbResult<()> {
|
452
|
-
if let Ok(s) =
|
481
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
453
482
|
let f = std::fs::File::create(s).unwrap();
|
454
483
|
IpcWriter::new(f)
|
455
484
|
.with_compression(compression.0)
|
@@ -480,14 +509,14 @@ impl RbDataFrame {
|
|
480
509
|
.get_columns()
|
481
510
|
.iter()
|
482
511
|
.map(|s| match s.dtype() {
|
483
|
-
DataType::Object(_) => {
|
512
|
+
DataType::Object(_, _) => {
|
484
513
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
485
514
|
obj.unwrap().to_object()
|
486
515
|
}
|
487
516
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
488
517
|
}),
|
489
518
|
)
|
490
|
-
.
|
519
|
+
.as_value()
|
491
520
|
}
|
492
521
|
|
493
522
|
pub fn row_tuples(&self) -> Value {
|
@@ -499,7 +528,7 @@ impl RbDataFrame {
|
|
499
528
|
.get_columns()
|
500
529
|
.iter()
|
501
530
|
.map(|s| match s.dtype() {
|
502
|
-
DataType::Object(_) => {
|
531
|
+
DataType::Object(_, _) => {
|
503
532
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
504
533
|
obj.unwrap().to_object()
|
505
534
|
}
|
@@ -507,7 +536,7 @@ impl RbDataFrame {
|
|
507
536
|
}),
|
508
537
|
)
|
509
538
|
}))
|
510
|
-
.
|
539
|
+
.as_value()
|
511
540
|
}
|
512
541
|
|
513
542
|
pub fn to_numo(&self) -> Option<Value> {
|
@@ -537,7 +566,7 @@ impl RbDataFrame {
|
|
537
566
|
) -> RbResult<()> {
|
538
567
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
539
568
|
|
540
|
-
if let Ok(s) =
|
569
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
541
570
|
let f = std::fs::File::create(s).unwrap();
|
542
571
|
ParquetWriter::new(f)
|
543
572
|
.with_compression(compression)
|
@@ -604,7 +633,7 @@ impl RbDataFrame {
|
|
604
633
|
|
605
634
|
pub fn sample_n(
|
606
635
|
&self,
|
607
|
-
n:
|
636
|
+
n: &RbSeries,
|
608
637
|
with_replacement: bool,
|
609
638
|
shuffle: bool,
|
610
639
|
seed: Option<u64>,
|
@@ -612,14 +641,14 @@ impl RbDataFrame {
|
|
612
641
|
let df = self
|
613
642
|
.df
|
614
643
|
.borrow()
|
615
|
-
.sample_n(n, with_replacement, shuffle, seed)
|
644
|
+
.sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
|
616
645
|
.map_err(RbPolarsErr::from)?;
|
617
646
|
Ok(df.into())
|
618
647
|
}
|
619
648
|
|
620
649
|
pub fn sample_frac(
|
621
650
|
&self,
|
622
|
-
frac:
|
651
|
+
frac: &RbSeries,
|
623
652
|
with_replacement: bool,
|
624
653
|
shuffle: bool,
|
625
654
|
seed: Option<u64>,
|
@@ -627,7 +656,7 @@ impl RbDataFrame {
|
|
627
656
|
let df = self
|
628
657
|
.df
|
629
658
|
.borrow()
|
630
|
-
.sample_frac(frac, with_replacement, shuffle, seed)
|
659
|
+
.sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
|
631
660
|
.map_err(RbPolarsErr::from)?;
|
632
661
|
Ok(df.into())
|
633
662
|
}
|
@@ -761,8 +790,8 @@ impl RbDataFrame {
|
|
761
790
|
.map(|s| RbSeries::new(s.clone()))
|
762
791
|
}
|
763
792
|
|
764
|
-
pub fn
|
765
|
-
self.df.borrow().
|
793
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
794
|
+
self.df.borrow().get_column_index(&name)
|
766
795
|
}
|
767
796
|
|
768
797
|
// TODO remove clone
|
@@ -804,18 +833,18 @@ impl RbDataFrame {
|
|
804
833
|
Ok(())
|
805
834
|
}
|
806
835
|
|
807
|
-
pub fn
|
836
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
808
837
|
self.df
|
809
838
|
.borrow_mut()
|
810
|
-
.
|
839
|
+
.replace_column(index, new_col.series.borrow().clone())
|
811
840
|
.map_err(RbPolarsErr::from)?;
|
812
841
|
Ok(())
|
813
842
|
}
|
814
843
|
|
815
|
-
pub fn
|
844
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
816
845
|
self.df
|
817
846
|
.borrow_mut()
|
818
|
-
.
|
847
|
+
.insert_column(index, new_col.series.borrow().clone())
|
819
848
|
.map_err(RbPolarsErr::from)?;
|
820
849
|
Ok(())
|
821
850
|
}
|
@@ -850,11 +879,11 @@ impl RbDataFrame {
|
|
850
879
|
Ok(mask.into_series().into())
|
851
880
|
}
|
852
881
|
|
853
|
-
pub fn
|
882
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
854
883
|
if null_equal {
|
855
|
-
self.df.borrow().
|
884
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
856
885
|
} else {
|
857
|
-
self.df.borrow().
|
886
|
+
self.df.borrow().equals(&other.df.borrow())
|
858
887
|
}
|
859
888
|
}
|
860
889
|
|
@@ -942,73 +971,50 @@ impl RbDataFrame {
|
|
942
971
|
self.df.borrow().clone().lazy().into()
|
943
972
|
}
|
944
973
|
|
945
|
-
pub fn
|
946
|
-
self.df.borrow().max().into()
|
947
|
-
}
|
948
|
-
|
949
|
-
pub fn min(&self) -> Self {
|
950
|
-
self.df.borrow().min().into()
|
951
|
-
}
|
952
|
-
|
953
|
-
pub fn sum(&self) -> Self {
|
954
|
-
self.df.borrow().sum().into()
|
955
|
-
}
|
956
|
-
|
957
|
-
pub fn mean(&self) -> Self {
|
958
|
-
self.df.borrow().mean().into()
|
959
|
-
}
|
960
|
-
|
961
|
-
pub fn std(&self, ddof: u8) -> Self {
|
962
|
-
self.df.borrow().std(ddof).into()
|
963
|
-
}
|
964
|
-
|
965
|
-
pub fn var(&self, ddof: u8) -> Self {
|
966
|
-
self.df.borrow().var(ddof).into()
|
967
|
-
}
|
968
|
-
|
969
|
-
pub fn median(&self) -> Self {
|
970
|
-
self.df.borrow().median().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
974
|
+
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
974
975
|
let s = self
|
975
976
|
.df
|
976
977
|
.borrow()
|
977
|
-
.
|
978
|
+
.max_horizontal()
|
978
979
|
.map_err(RbPolarsErr::from)?;
|
979
980
|
Ok(s.map(|s| s.into()))
|
980
981
|
}
|
981
982
|
|
982
|
-
pub fn
|
983
|
-
let s = self
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
|
983
|
+
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
984
|
+
let s = self
|
985
|
+
.df
|
986
|
+
.borrow()
|
987
|
+
.min_horizontal()
|
988
|
+
.map_err(RbPolarsErr::from)?;
|
989
989
|
Ok(s.map(|s| s.into()))
|
990
990
|
}
|
991
991
|
|
992
|
-
pub fn
|
992
|
+
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
993
|
+
let null_strategy = if ignore_nulls {
|
994
|
+
NullStrategy::Ignore
|
995
|
+
} else {
|
996
|
+
NullStrategy::Propagate
|
997
|
+
};
|
993
998
|
let s = self
|
994
999
|
.df
|
995
1000
|
.borrow()
|
996
|
-
.
|
1001
|
+
.sum_horizontal(null_strategy)
|
997
1002
|
.map_err(RbPolarsErr::from)?;
|
998
1003
|
Ok(s.map(|s| s.into()))
|
999
1004
|
}
|
1000
1005
|
|
1001
|
-
pub fn
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1006
|
+
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1007
|
+
let null_strategy = if ignore_nulls {
|
1008
|
+
NullStrategy::Ignore
|
1009
|
+
} else {
|
1010
|
+
NullStrategy::Propagate
|
1011
|
+
};
|
1012
|
+
let s = self
|
1007
1013
|
.df
|
1008
1014
|
.borrow()
|
1009
|
-
.
|
1015
|
+
.mean_horizontal(null_strategy)
|
1010
1016
|
.map_err(RbPolarsErr::from)?;
|
1011
|
-
Ok(
|
1017
|
+
Ok(s.map(|s| s.into()))
|
1012
1018
|
}
|
1013
1019
|
|
1014
1020
|
pub fn to_dummies(
|
@@ -1082,13 +1088,13 @@ impl RbDataFrame {
|
|
1082
1088
|
.into_datetime(tu, tz)
|
1083
1089
|
.into_series()
|
1084
1090
|
}
|
1085
|
-
Some(DataType::
|
1091
|
+
Some(DataType::String) => {
|
1086
1092
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1087
1093
|
}
|
1088
1094
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1089
1095
|
};
|
1090
1096
|
|
1091
|
-
Ok((RbSeries::from(out).
|
1097
|
+
Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
|
1092
1098
|
}
|
1093
1099
|
|
1094
1100
|
pub fn shrink_to_fit(&self) {
|
@@ -1105,17 +1111,20 @@ impl RbDataFrame {
|
|
1105
1111
|
Ok(hash.into_series().into())
|
1106
1112
|
}
|
1107
1113
|
|
1108
|
-
pub fn transpose(&self,
|
1109
|
-
let
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1114
|
+
pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
|
1115
|
+
let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
|
1116
|
+
Some(Either::Right(name))
|
1117
|
+
} else if let Ok(name) = String::try_convert(column_names) {
|
1118
|
+
Some(Either::Left(name))
|
1119
|
+
} else {
|
1120
|
+
None
|
1121
|
+
};
|
1122
|
+
Ok(self
|
1123
|
+
.df
|
1124
|
+
.borrow()
|
1125
|
+
.transpose(keep_names_as.as_deref(), new_col_names)
|
1126
|
+
.map_err(RbPolarsErr::from)?
|
1127
|
+
.into())
|
1119
1128
|
}
|
1120
1129
|
|
1121
1130
|
pub fn upsample(
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
use magnus::exception;
|
2
2
|
use magnus::Error;
|
3
|
-
use polars::error::ArrowError;
|
4
3
|
use polars::prelude::PolarsError;
|
5
4
|
|
6
5
|
pub struct RbPolarsErr {}
|
@@ -11,10 +10,6 @@ impl RbPolarsErr {
|
|
11
10
|
Error::new(exception::runtime_error(), e.to_string())
|
12
11
|
}
|
13
12
|
|
14
|
-
pub fn arrow(e: ArrowError) -> Error {
|
15
|
-
Error::new(exception::runtime_error(), e.to_string())
|
16
|
-
}
|
17
|
-
|
18
13
|
pub fn io(e: std::io::Error) -> Error {
|
19
14
|
Error::new(exception::runtime_error(), e.to_string())
|
20
15
|
}
|
@@ -3,16 +3,28 @@ use polars::prelude::*;
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
5
|
impl RbExpr {
|
6
|
-
pub fn bin_contains(&self, lit:
|
7
|
-
self.inner
|
6
|
+
pub fn bin_contains(&self, lit: &RbExpr) -> Self {
|
7
|
+
self.inner
|
8
|
+
.clone()
|
9
|
+
.binary()
|
10
|
+
.contains_literal(lit.inner.clone())
|
11
|
+
.into()
|
8
12
|
}
|
9
13
|
|
10
|
-
pub fn bin_ends_with(&self, sub:
|
11
|
-
self.inner
|
14
|
+
pub fn bin_ends_with(&self, sub: &RbExpr) -> Self {
|
15
|
+
self.inner
|
16
|
+
.clone()
|
17
|
+
.binary()
|
18
|
+
.ends_with(sub.inner.clone())
|
19
|
+
.into()
|
12
20
|
}
|
13
21
|
|
14
|
-
pub fn bin_starts_with(&self, sub:
|
15
|
-
self.inner
|
22
|
+
pub fn bin_starts_with(&self, sub: &RbExpr) -> Self {
|
23
|
+
self.inner
|
24
|
+
.clone()
|
25
|
+
.binary()
|
26
|
+
.starts_with(sub.inner.clone())
|
27
|
+
.into()
|
16
28
|
}
|
17
29
|
|
18
30
|
pub fn bin_hex_decode(&self, strict: bool) -> Self {
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -8,9 +8,8 @@ impl RbExpr {
|
|
8
8
|
self.inner.clone().dt().to_string(&format).into()
|
9
9
|
}
|
10
10
|
|
11
|
-
pub fn dt_offset_by(&self, by:
|
12
|
-
|
13
|
-
self.inner.clone().dt().offset_by(by).into()
|
11
|
+
pub fn dt_offset_by(&self, by: &RbExpr) -> Self {
|
12
|
+
self.inner.clone().dt().offset_by(by.inner.clone()).into()
|
14
13
|
}
|
15
14
|
|
16
15
|
pub fn dt_epoch_seconds(&self) -> Self {
|
@@ -38,21 +37,20 @@ impl RbExpr {
|
|
38
37
|
self.inner.clone().dt().cast_time_unit(tu.0).into()
|
39
38
|
}
|
40
39
|
|
41
|
-
pub fn dt_replace_time_zone(&self,
|
40
|
+
pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
|
42
41
|
self.inner
|
43
42
|
.clone()
|
44
43
|
.dt()
|
45
|
-
.replace_time_zone(
|
44
|
+
.replace_time_zone(time_zone, ambiguous.inner.clone())
|
46
45
|
.into()
|
47
46
|
}
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
self.inner.clone().dt().truncate(&every, &offset).into()
|
48
|
+
pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
|
49
|
+
self.inner
|
50
|
+
.clone()
|
51
|
+
.dt()
|
52
|
+
.truncate(every.inner.clone(), offset)
|
53
|
+
.into()
|
56
54
|
}
|
57
55
|
|
58
56
|
pub fn dt_month_start(&self) -> Self {
|