polars-df 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +664 -539
- data/LICENSE.txt +1 -1
- data/README.md +37 -2
- data/ext/polars/Cargo.toml +8 -7
- data/ext/polars/src/conversion/any_value.rs +1 -0
- data/ext/polars/src/conversion/mod.rs +35 -21
- data/ext/polars/src/dataframe/general.rs +1 -48
- data/ext/polars/src/dataframe/io.rs +21 -23
- data/ext/polars/src/expr/general.rs +3 -0
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/file.rs +21 -3
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/io.rs +35 -14
- data/ext/polars/src/functions/lazy.rs +5 -4
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/interop/arrow/to_ruby.rs +2 -2
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/general.rs +52 -5
- data/ext/polars/src/lib.rs +13 -17
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/series/export.rs +1 -0
- data/ext/polars/src/series/general.rs +3 -15
- data/ext/polars/src/series/import.rs +3 -3
- data/ext/polars/src/series/scatter.rs +1 -1
- data/lib/polars/data_frame.rb +196 -68
- data/lib/polars/data_types.rb +5 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +49 -7
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/series.rb +6 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +5 -8
@@ -330,6 +330,7 @@ impl RbLazyFrame {
|
|
330
330
|
nulls_last: vec![nulls_last],
|
331
331
|
multithreaded,
|
332
332
|
maintain_order,
|
333
|
+
limit: None,
|
333
334
|
},
|
334
335
|
)
|
335
336
|
.into()
|
@@ -353,6 +354,7 @@ impl RbLazyFrame {
|
|
353
354
|
nulls_last,
|
354
355
|
maintain_order,
|
355
356
|
multithreaded,
|
357
|
+
limit: None,
|
356
358
|
},
|
357
359
|
)
|
358
360
|
.into())
|
@@ -379,6 +381,8 @@ impl RbLazyFrame {
|
|
379
381
|
row_group_size: Option<usize>,
|
380
382
|
data_page_size: Option<usize>,
|
381
383
|
maintain_order: bool,
|
384
|
+
cloud_options: Option<Vec<(String, String)>>,
|
385
|
+
retries: usize,
|
382
386
|
) -> RbResult<()> {
|
383
387
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
384
388
|
|
@@ -390,8 +394,15 @@ impl RbLazyFrame {
|
|
390
394
|
maintain_order,
|
391
395
|
};
|
392
396
|
|
397
|
+
let cloud_options = {
|
398
|
+
let cloud_options =
|
399
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
400
|
+
Some(cloud_options.with_max_retries(retries))
|
401
|
+
};
|
402
|
+
|
393
403
|
let ldf = self.ldf.borrow().clone();
|
394
|
-
ldf.sink_parquet(path, options)
|
404
|
+
ldf.sink_parquet(&path, options, cloud_options)
|
405
|
+
.map_err(RbPolarsErr::from)?;
|
395
406
|
Ok(())
|
396
407
|
}
|
397
408
|
|
@@ -400,14 +411,23 @@ impl RbLazyFrame {
|
|
400
411
|
path: PathBuf,
|
401
412
|
compression: Option<Wrap<IpcCompression>>,
|
402
413
|
maintain_order: bool,
|
414
|
+
cloud_options: Option<Vec<(String, String)>>,
|
415
|
+
retries: usize,
|
403
416
|
) -> RbResult<()> {
|
404
417
|
let options = IpcWriterOptions {
|
405
418
|
compression: compression.map(|c| c.0),
|
406
419
|
maintain_order,
|
407
420
|
};
|
408
421
|
|
422
|
+
let cloud_options = {
|
423
|
+
let cloud_options =
|
424
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
425
|
+
Some(cloud_options.with_max_retries(retries))
|
426
|
+
};
|
427
|
+
|
409
428
|
let ldf = self.ldf.borrow().clone();
|
410
|
-
ldf.sink_ipc(path, options)
|
429
|
+
ldf.sink_ipc(&path, options, cloud_options)
|
430
|
+
.map_err(RbPolarsErr::from)?;
|
411
431
|
Ok(())
|
412
432
|
}
|
413
433
|
|
@@ -430,6 +450,9 @@ impl RbLazyFrame {
|
|
430
450
|
quote_style: Option<Wrap<QuoteStyle>>,
|
431
451
|
maintain_order: bool,
|
432
452
|
) -> RbResult<()> {
|
453
|
+
// TODO
|
454
|
+
let cloud_options = None;
|
455
|
+
|
433
456
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
434
457
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
435
458
|
|
@@ -454,16 +477,36 @@ impl RbLazyFrame {
|
|
454
477
|
serialize_options,
|
455
478
|
};
|
456
479
|
|
480
|
+
let cloud_options = {
|
481
|
+
let cloud_options =
|
482
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
483
|
+
Some(cloud_options)
|
484
|
+
};
|
485
|
+
|
457
486
|
let ldf = self.ldf.borrow().clone();
|
458
|
-
ldf.sink_csv(path, options)
|
487
|
+
ldf.sink_csv(&path, options, cloud_options)
|
488
|
+
.map_err(RbPolarsErr::from)?;
|
459
489
|
Ok(())
|
460
490
|
}
|
461
491
|
|
462
|
-
pub fn sink_json(
|
492
|
+
pub fn sink_json(
|
493
|
+
&self,
|
494
|
+
path: PathBuf,
|
495
|
+
maintain_order: bool,
|
496
|
+
cloud_options: Option<Vec<(String, String)>>,
|
497
|
+
retries: usize,
|
498
|
+
) -> RbResult<()> {
|
463
499
|
let options = JsonWriterOptions { maintain_order };
|
464
500
|
|
501
|
+
let cloud_options = {
|
502
|
+
let cloud_options =
|
503
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
504
|
+
Some(cloud_options.with_max_retries(retries))
|
505
|
+
};
|
506
|
+
|
465
507
|
let ldf = self.ldf.borrow().clone();
|
466
|
-
ldf.sink_json(path, options)
|
508
|
+
ldf.sink_json(&path, options, cloud_options)
|
509
|
+
.map_err(RbPolarsErr::from)?;
|
467
510
|
Ok(())
|
468
511
|
}
|
469
512
|
|
@@ -590,6 +633,8 @@ impl RbLazyFrame {
|
|
590
633
|
tolerance: Option<Wrap<AnyValue<'_>>>,
|
591
634
|
tolerance_str: Option<String>,
|
592
635
|
coalesce: bool,
|
636
|
+
allow_eq: bool,
|
637
|
+
check_sortedness: bool,
|
593
638
|
) -> RbResult<Self> {
|
594
639
|
let coalesce = if coalesce {
|
595
640
|
JoinCoalesce::CoalesceColumns
|
@@ -614,6 +659,8 @@ impl RbLazyFrame {
|
|
614
659
|
right_by: right_by.map(strings_to_pl_smallstr),
|
615
660
|
tolerance: tolerance.map(|t| t.0.into_static()),
|
616
661
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
662
|
+
allow_eq,
|
663
|
+
check_sortedness,
|
617
664
|
}))
|
618
665
|
.suffix(suffix)
|
619
666
|
.finish()
|
data/ext/polars/src/lib.rs
CHANGED
@@ -69,10 +69,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
69
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
71
71
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
72
|
-
class.define_method("write_json", method!(RbDataFrame::write_json,
|
72
|
+
class.define_method("write_json", method!(RbDataFrame::write_json, 1))?;
|
73
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
74
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
75
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
76
76
|
class.define_method(
|
77
77
|
"write_ipc_stream",
|
78
78
|
method!(RbDataFrame::write_ipc_stream, 3),
|
@@ -143,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
143
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
144
144
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
145
145
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
146
|
-
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
147
|
-
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
148
|
-
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
149
|
-
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
150
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
151
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
152
148
|
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
@@ -568,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
568
564
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
569
565
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
570
566
|
class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
|
571
|
-
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr,
|
567
|
+
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
|
572
568
|
class.define_singleton_method(
|
573
569
|
"spearman_rank_corr",
|
574
|
-
function!(functions::lazy::spearman_rank_corr,
|
570
|
+
function!(functions::lazy::spearman_rank_corr, 3),
|
575
571
|
)?;
|
576
572
|
class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
|
577
573
|
class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
|
@@ -605,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
605
601
|
)?;
|
606
602
|
class.define_singleton_method(
|
607
603
|
"sum_horizontal",
|
608
|
-
function!(functions::aggregation::sum_horizontal,
|
604
|
+
function!(functions::aggregation::sum_horizontal, 2),
|
609
605
|
)?;
|
610
606
|
class.define_singleton_method(
|
611
607
|
"mean_horizontal",
|
612
|
-
function!(functions::aggregation::mean_horizontal,
|
608
|
+
function!(functions::aggregation::mean_horizontal, 2),
|
613
609
|
)?;
|
614
610
|
class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
|
615
611
|
class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
|
@@ -660,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
660
656
|
function!(functions::meta::get_index_type, 0),
|
661
657
|
)?;
|
662
658
|
class.define_singleton_method(
|
663
|
-
"
|
664
|
-
function!(functions::meta::
|
659
|
+
"thread_pool_size",
|
660
|
+
function!(functions::meta::thread_pool_size, 0),
|
665
661
|
)?;
|
666
662
|
class.define_singleton_method(
|
667
663
|
"enable_string_cache",
|
@@ -747,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
747
743
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
748
744
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
749
745
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
750
|
-
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet,
|
751
|
-
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc,
|
746
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
|
+
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
752
748
|
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
753
|
-
class.define_method("sink_json", method!(RbLazyFrame::sink_json,
|
749
|
+
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
754
750
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
755
751
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
756
752
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -762,7 +758,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
762
758
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
763
759
|
)?;
|
764
760
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
765
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
761
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
|
766
762
|
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
767
763
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
768
764
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
@@ -1109,7 +1105,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1109
1105
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1110
1106
|
|
1111
1107
|
// arrow array stream
|
1112
|
-
let class = module.define_class("
|
1108
|
+
let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
|
1113
1109
|
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1114
1110
|
|
1115
1111
|
Ok(())
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -5,9 +5,9 @@ pub mod series;
|
|
5
5
|
use magnus::{prelude::*, RHash, Value};
|
6
6
|
use polars::chunked_array::builder::get_list_builder;
|
7
7
|
use polars::prelude::*;
|
8
|
-
use polars_core::export::rayon::prelude::*;
|
9
8
|
use polars_core::utils::CustomIterTools;
|
10
9
|
use polars_core::POOL;
|
10
|
+
use rayon::prelude::*;
|
11
11
|
|
12
12
|
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
|
13
13
|
|
@@ -21,6 +21,7 @@ impl RbSeries {
|
|
21
21
|
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
22
|
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
23
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Int128 => todo!(),
|
24
25
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
26
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
@@ -97,29 +97,17 @@ impl RbSeries {
|
|
97
97
|
}
|
98
98
|
|
99
99
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
100
|
-
let out = self
|
101
|
-
.series
|
102
|
-
.borrow()
|
103
|
-
.bitand(&other.series.borrow())
|
104
|
-
.map_err(RbPolarsErr::from)?;
|
100
|
+
let out = (&*self.series.borrow() & &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
105
101
|
Ok(out.into())
|
106
102
|
}
|
107
103
|
|
108
104
|
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
109
|
-
let out = self
|
110
|
-
.series
|
111
|
-
.borrow()
|
112
|
-
.bitor(&other.series.borrow())
|
113
|
-
.map_err(RbPolarsErr::from)?;
|
105
|
+
let out = (&*self.series.borrow() | &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
114
106
|
Ok(out.into())
|
115
107
|
}
|
116
108
|
|
117
109
|
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
118
|
-
let out = self
|
119
|
-
.series
|
120
|
-
.borrow()
|
121
|
-
.bitxor(&other.series.borrow())
|
122
|
-
.map_err(RbPolarsErr::from)?;
|
110
|
+
let out = (&*self.series.borrow() ^ &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
123
111
|
Ok(out.into())
|
124
112
|
}
|
125
113
|
|
@@ -1,7 +1,7 @@
|
|
1
|
+
use arrow::array::Array;
|
2
|
+
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
1
3
|
use magnus::prelude::*;
|
2
4
|
use magnus::Value;
|
3
|
-
use polars::export::arrow::array::Array;
|
4
|
-
use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
5
5
|
use polars::prelude::*;
|
6
6
|
|
7
7
|
use super::RbSeries;
|
@@ -39,7 +39,7 @@ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
|
|
39
39
|
|
40
40
|
// Series::try_from fails for an empty vec of chunks
|
41
41
|
let s = if produced_arrays.is_empty() {
|
42
|
-
let polars_dt = DataType::
|
42
|
+
let polars_dt = DataType::from_arrow_field(stream.field());
|
43
43
|
Series::new_empty(stream.field().name.clone(), &polars_dt)
|
44
44
|
} else {
|
45
45
|
Series::try_from((stream.field(), produced_arrays)).unwrap()
|
data/lib/polars/data_frame.rb
CHANGED
@@ -604,10 +604,6 @@ module Polars
|
|
604
604
|
#
|
605
605
|
# @param file [String]
|
606
606
|
# File path to which the result should be written.
|
607
|
-
# @param pretty [Boolean]
|
608
|
-
# Pretty serialize json.
|
609
|
-
# @param row_oriented [Boolean]
|
610
|
-
# Write to row oriented json. This is slower, but more common.
|
611
607
|
#
|
612
608
|
# @return [nil]
|
613
609
|
#
|
@@ -619,16 +615,8 @@ module Polars
|
|
619
615
|
# }
|
620
616
|
# )
|
621
617
|
# df.write_json
|
622
|
-
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
|
623
|
-
#
|
624
|
-
# @example
|
625
|
-
# df.write_json(row_oriented: true)
|
626
618
|
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
|
627
|
-
def write_json(
|
628
|
-
file = nil,
|
629
|
-
pretty: false,
|
630
|
-
row_oriented: false
|
631
|
-
)
|
619
|
+
def write_json(file = nil)
|
632
620
|
if Utils.pathlike?(file)
|
633
621
|
file = Utils.normalize_filepath(file)
|
634
622
|
end
|
@@ -636,7 +624,7 @@ module Polars
|
|
636
624
|
if file.nil? || to_string_io
|
637
625
|
buf = StringIO.new
|
638
626
|
buf.set_encoding(Encoding::BINARY)
|
639
|
-
_df.write_json(buf
|
627
|
+
_df.write_json(buf)
|
640
628
|
json_bytes = buf.string
|
641
629
|
|
642
630
|
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
@@ -646,7 +634,7 @@ module Polars
|
|
646
634
|
return json_str
|
647
635
|
end
|
648
636
|
else
|
649
|
-
_df.write_json(file
|
637
|
+
_df.write_json(file)
|
650
638
|
end
|
651
639
|
nil
|
652
640
|
end
|
@@ -831,7 +819,13 @@ module Polars
|
|
831
819
|
# Compression method. Defaults to "uncompressed".
|
832
820
|
#
|
833
821
|
# @return [nil]
|
834
|
-
def write_ipc(
|
822
|
+
def write_ipc(
|
823
|
+
file,
|
824
|
+
compression: "uncompressed",
|
825
|
+
compat_level: nil,
|
826
|
+
storage_options: nil,
|
827
|
+
retries: 2
|
828
|
+
)
|
835
829
|
return_bytes = file.nil?
|
836
830
|
if return_bytes
|
837
831
|
file = StringIO.new
|
@@ -849,7 +843,13 @@ module Polars
|
|
849
843
|
compression = "uncompressed"
|
850
844
|
end
|
851
845
|
|
852
|
-
|
846
|
+
if storage_options&.any?
|
847
|
+
storage_options = storage_options.to_a
|
848
|
+
else
|
849
|
+
storage_options = nil
|
850
|
+
end
|
851
|
+
|
852
|
+
_df.write_ipc(file, compression, compat_level, storage_options, retries)
|
853
853
|
return_bytes ? file.string : nil
|
854
854
|
end
|
855
855
|
|
@@ -961,6 +961,61 @@ module Polars
|
|
961
961
|
)
|
962
962
|
end
|
963
963
|
|
964
|
+
# Write DataFrame as delta table.
|
965
|
+
#
|
966
|
+
# @param target [Object]
|
967
|
+
# URI of a table or a DeltaTable object.
|
968
|
+
# @param mode ["error", "append", "overwrite", "ignore", "merge"]
|
969
|
+
# How to handle existing data.
|
970
|
+
# @param storage_options [Hash]
|
971
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
972
|
+
# @param delta_write_options [Hash]
|
973
|
+
# Additional keyword arguments while writing a Delta lake Table.
|
974
|
+
# @param delta_merge_options [Hash]
|
975
|
+
# Keyword arguments which are required to `MERGE` a Delta lake Table.
|
976
|
+
#
|
977
|
+
# @return [nil]
|
978
|
+
def write_delta(
|
979
|
+
target,
|
980
|
+
mode: "error",
|
981
|
+
storage_options: nil,
|
982
|
+
delta_write_options: nil,
|
983
|
+
delta_merge_options: nil
|
984
|
+
)
|
985
|
+
Polars.send(:_check_if_delta_available)
|
986
|
+
|
987
|
+
if Utils.pathlike?(target)
|
988
|
+
target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
|
989
|
+
end
|
990
|
+
|
991
|
+
data = self
|
992
|
+
|
993
|
+
if mode == "merge"
|
994
|
+
if delta_merge_options.nil?
|
995
|
+
msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
|
996
|
+
raise ArgumentError, msg
|
997
|
+
end
|
998
|
+
if target.is_a?(::String)
|
999
|
+
dt = DeltaLake::Table.new(target, storage_options: storage_options)
|
1000
|
+
else
|
1001
|
+
dt = target
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
predicate = delta_merge_options.delete(:predicate)
|
1005
|
+
dt.merge(data, predicate, **delta_merge_options)
|
1006
|
+
else
|
1007
|
+
delta_write_options ||= {}
|
1008
|
+
|
1009
|
+
DeltaLake.write(
|
1010
|
+
target,
|
1011
|
+
data,
|
1012
|
+
mode: mode,
|
1013
|
+
storage_options: storage_options,
|
1014
|
+
**delta_write_options
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
|
964
1019
|
# Return an estimation of the total (heap) allocated size of the DataFrame.
|
965
1020
|
#
|
966
1021
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -2227,6 +2282,14 @@ module Polars
|
|
2227
2282
|
# keys are within this distance. If an asof join is done on columns of dtype
|
2228
2283
|
# "Date", "Datetime", "Duration" or "Time" you use the following string
|
2229
2284
|
# language:
|
2285
|
+
# @param allow_exact_matches [Boolean]
|
2286
|
+
# Whether exact matches are valid join predicates.
|
2287
|
+
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
2288
|
+
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
2289
|
+
# @param check_sortedness [Boolean]
|
2290
|
+
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
2291
|
+
# will error, or in case of 'by' argument raise a warning. This might become
|
2292
|
+
# a hard error in the future.
|
2230
2293
|
#
|
2231
2294
|
# - 1ns (1 nanosecond)
|
2232
2295
|
# - 1us (1 microsecond)
|
@@ -2308,7 +2371,9 @@ module Polars
|
|
2308
2371
|
tolerance: nil,
|
2309
2372
|
allow_parallel: true,
|
2310
2373
|
force_parallel: false,
|
2311
|
-
coalesce: true
|
2374
|
+
coalesce: true,
|
2375
|
+
allow_exact_matches: true,
|
2376
|
+
check_sortedness: true
|
2312
2377
|
)
|
2313
2378
|
lazy
|
2314
2379
|
.join_asof(
|
@@ -2324,7 +2389,9 @@ module Polars
|
|
2324
2389
|
tolerance: tolerance,
|
2325
2390
|
allow_parallel: allow_parallel,
|
2326
2391
|
force_parallel: force_parallel,
|
2327
|
-
coalesce: coalesce
|
2392
|
+
coalesce: coalesce,
|
2393
|
+
allow_exact_matches: allow_exact_matches,
|
2394
|
+
check_sortedness: check_sortedness
|
2328
2395
|
)
|
2329
2396
|
.collect(no_optimization: true)
|
2330
2397
|
end
|
@@ -3939,14 +4006,32 @@ module Polars
|
|
3939
4006
|
# # ╞═════╪═════╪═════╡
|
3940
4007
|
# # │ 3 ┆ 8 ┆ c │
|
3941
4008
|
# # └─────┴─────┴─────┘
|
3942
|
-
def max
|
3943
|
-
|
3944
|
-
|
3945
|
-
|
3946
|
-
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
4009
|
+
def max
|
4010
|
+
lazy.max.collect(_eager: true)
|
4011
|
+
end
|
4012
|
+
|
4013
|
+
# Get the maximum value horizontally across columns.
|
4014
|
+
#
|
4015
|
+
# @return [Series]
|
4016
|
+
#
|
4017
|
+
# @example
|
4018
|
+
# df = Polars::DataFrame.new(
|
4019
|
+
# {
|
4020
|
+
# "foo" => [1, 2, 3],
|
4021
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4022
|
+
# }
|
4023
|
+
# )
|
4024
|
+
# df.max_horizontal
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (3,)
|
4027
|
+
# # Series: 'max' [f64]
|
4028
|
+
# # [
|
4029
|
+
# # 4.0
|
4030
|
+
# # 5.0
|
4031
|
+
# # 6.0
|
4032
|
+
# # ]
|
4033
|
+
def max_horizontal
|
4034
|
+
select(max: F.max_horizontal(F.all)).to_series
|
3950
4035
|
end
|
3951
4036
|
|
3952
4037
|
# Aggregate the columns of this DataFrame to their minimum value.
|
@@ -3971,22 +4056,35 @@ module Polars
|
|
3971
4056
|
# # ╞═════╪═════╪═════╡
|
3972
4057
|
# # │ 1 ┆ 6 ┆ a │
|
3973
4058
|
# # └─────┴─────┴─────┘
|
3974
|
-
def min
|
3975
|
-
|
3976
|
-
lazy.min.collect(_eager: true)
|
3977
|
-
elsif axis == 1
|
3978
|
-
Utils.wrap_s(_df.min_horizontal)
|
3979
|
-
else
|
3980
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
3981
|
-
end
|
4059
|
+
def min
|
4060
|
+
lazy.min.collect(_eager: true)
|
3982
4061
|
end
|
3983
4062
|
|
3984
|
-
#
|
4063
|
+
# Get the minimum value horizontally across columns.
|
3985
4064
|
#
|
3986
|
-
# @
|
3987
|
-
#
|
3988
|
-
# @
|
3989
|
-
#
|
4065
|
+
# @return [Series]
|
4066
|
+
#
|
4067
|
+
# @example
|
4068
|
+
# df = Polars::DataFrame.new(
|
4069
|
+
# {
|
4070
|
+
# "foo" => [1, 2, 3],
|
4071
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4072
|
+
# }
|
4073
|
+
# )
|
4074
|
+
# df.min_horizontal
|
4075
|
+
# # =>
|
4076
|
+
# # shape: (3,)
|
4077
|
+
# # Series: 'min' [f64]
|
4078
|
+
# # [
|
4079
|
+
# # 1.0
|
4080
|
+
# # 2.0
|
4081
|
+
# # 3.0
|
4082
|
+
# # ]
|
4083
|
+
def min_horizontal
|
4084
|
+
select(min: F.min_horizontal(F.all)).to_series
|
4085
|
+
end
|
4086
|
+
|
4087
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
3990
4088
|
#
|
3991
4089
|
# @return [DataFrame]
|
3992
4090
|
#
|
@@ -4008,35 +4106,42 @@ module Polars
|
|
4008
4106
|
# # ╞═════╪═════╪══════╡
|
4009
4107
|
# # │ 6 ┆ 21 ┆ null │
|
4010
4108
|
# # └─────┴─────┴──────┘
|
4109
|
+
def sum
|
4110
|
+
lazy.sum.collect(_eager: true)
|
4111
|
+
end
|
4112
|
+
|
4113
|
+
# Sum all values horizontally across columns.
|
4114
|
+
#
|
4115
|
+
# @param ignore_nulls [Boolean]
|
4116
|
+
# Ignore null values (default).
|
4117
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4118
|
+
#
|
4119
|
+
# @return [Series]
|
4011
4120
|
#
|
4012
4121
|
# @example
|
4013
|
-
# df.
|
4122
|
+
# df = Polars::DataFrame.new(
|
4123
|
+
# {
|
4124
|
+
# "foo" => [1, 2, 3],
|
4125
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4126
|
+
# }
|
4127
|
+
# )
|
4128
|
+
# df.sum_horizontal
|
4014
4129
|
# # =>
|
4015
4130
|
# # shape: (3,)
|
4016
|
-
# # Series: '
|
4131
|
+
# # Series: 'sum' [f64]
|
4017
4132
|
# # [
|
4018
|
-
# #
|
4019
|
-
# #
|
4020
|
-
# #
|
4133
|
+
# # 5.0
|
4134
|
+
# # 7.0
|
4135
|
+
# # 9.0
|
4021
4136
|
# # ]
|
4022
|
-
def
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
when 1
|
4027
|
-
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
4028
|
-
else
|
4029
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
4030
|
-
end
|
4137
|
+
def sum_horizontal(ignore_nulls: true)
|
4138
|
+
select(
|
4139
|
+
sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4140
|
+
).to_series
|
4031
4141
|
end
|
4032
4142
|
|
4033
4143
|
# Aggregate the columns of this DataFrame to their mean value.
|
4034
4144
|
#
|
4035
|
-
# @param axis [Integer]
|
4036
|
-
# Either 0 or 1.
|
4037
|
-
# @param null_strategy ["ignore", "propagate"]
|
4038
|
-
# This argument is only used if axis == 1.
|
4039
|
-
#
|
4040
4145
|
# @return [DataFrame]
|
4041
4146
|
#
|
4042
4147
|
# @example
|
@@ -4057,15 +4162,38 @@ module Polars
|
|
4057
4162
|
# # ╞═════╪═════╪══════╡
|
4058
4163
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4059
4164
|
# # └─────┴─────┴──────┘
|
4060
|
-
def mean
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4068
|
-
|
4165
|
+
def mean
|
4166
|
+
lazy.mean.collect(_eager: true)
|
4167
|
+
end
|
4168
|
+
|
4169
|
+
# Take the mean of all values horizontally across columns.
|
4170
|
+
#
|
4171
|
+
# @param ignore_nulls [Boolean]
|
4172
|
+
# Ignore null values (default).
|
4173
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4174
|
+
#
|
4175
|
+
# @return [Series]
|
4176
|
+
#
|
4177
|
+
# @example
|
4178
|
+
# df = Polars::DataFrame.new(
|
4179
|
+
# {
|
4180
|
+
# "foo" => [1, 2, 3],
|
4181
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4182
|
+
# }
|
4183
|
+
# )
|
4184
|
+
# df.mean_horizontal
|
4185
|
+
# # =>
|
4186
|
+
# # shape: (3,)
|
4187
|
+
# # Series: 'mean' [f64]
|
4188
|
+
# # [
|
4189
|
+
# # 2.5
|
4190
|
+
# # 3.5
|
4191
|
+
# # 4.5
|
4192
|
+
# # ]
|
4193
|
+
def mean_horizontal(ignore_nulls: true)
|
4194
|
+
select(
|
4195
|
+
mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4196
|
+
).to_series
|
4069
4197
|
end
|
4070
4198
|
|
4071
4199
|
# Aggregate the columns of this DataFrame to their standard deviation value.
|