polars-df 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/conversion.rs +237 -43
- data/ext/polars/src/dataframe.rs +278 -1
- data/ext/polars/src/lazy/dataframe.rs +304 -10
- data/ext/polars/src/lazy/dsl.rs +1096 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +313 -0
- data/ext/polars/src/series.rs +168 -5
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +312 -7
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +488 -0
- data/lib/polars/lazy_frame.rb +184 -6
- data/lib/polars/lazy_functions.rb +4 -0
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +513 -11
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +27 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -1
- metadata +10 -2
    
        data/ext/polars/src/series.rs
    CHANGED
    
    | @@ -1,11 +1,12 @@ | |
| 1 | 
            -
            use crate::conversion::wrap;
         | 
| 2 | 
            -
            use crate::{RbDataFrame, RbPolarsErr, RbResult};
         | 
| 3 1 | 
             
            use magnus::exception::arg_error;
         | 
| 4 2 | 
             
            use magnus::{Error, RArray, Value};
         | 
| 5 3 | 
             
            use polars::prelude::*;
         | 
| 6 4 | 
             
            use polars::series::IsSorted;
         | 
| 7 5 | 
             
            use std::cell::RefCell;
         | 
| 8 6 |  | 
| 7 | 
            +
            use crate::conversion::*;
         | 
| 8 | 
            +
            use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
         | 
| 9 | 
            +
             | 
| 9 10 | 
             
            #[magnus::wrap(class = "Polars::RbSeries")]
         | 
| 10 11 | 
             
            pub struct RbSeries {
         | 
| 11 12 | 
             
                pub series: RefCell<Series>,
         | 
| @@ -24,6 +25,14 @@ impl RbSeries { | |
| 24 25 | 
             
                    }
         | 
| 25 26 | 
             
                }
         | 
| 26 27 |  | 
| 28 | 
            +
                pub fn is_sorted_flag(&self) -> bool {
         | 
| 29 | 
            +
                    matches!(self.series.borrow().is_sorted(), IsSorted::Ascending)
         | 
| 30 | 
            +
                }
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                pub fn is_sorted_reverse_flag(&self) -> bool {
         | 
| 33 | 
            +
                    matches!(self.series.borrow().is_sorted(), IsSorted::Descending)
         | 
| 34 | 
            +
                }
         | 
| 35 | 
            +
             | 
| 27 36 | 
             
                pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
         | 
| 28 37 | 
             
                    let len = obj.len();
         | 
| 29 38 | 
             
                    let mut builder = BooleanChunkedBuilder::new(&name, len);
         | 
| @@ -114,6 +123,29 @@ impl RbSeries { | |
| 114 123 | 
             
                    Ok(RbSeries::new(s))
         | 
| 115 124 | 
             
                }
         | 
| 116 125 |  | 
| 126 | 
            +
                pub fn estimated_size(&self) -> usize {
         | 
| 127 | 
            +
                    self.series.borrow().estimated_size()
         | 
| 128 | 
            +
                }
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
         | 
| 131 | 
            +
                    let val = format!("{}", self.series.borrow().get(index));
         | 
| 132 | 
            +
                    if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
         | 
| 133 | 
            +
                        let v_trunc = &val[..val
         | 
| 134 | 
            +
                            .char_indices()
         | 
| 135 | 
            +
                            .take(str_lengths)
         | 
| 136 | 
            +
                            .last()
         | 
| 137 | 
            +
                            .map(|(i, c)| i + c.len_utf8())
         | 
| 138 | 
            +
                            .unwrap_or(0)];
         | 
| 139 | 
            +
                        if val == v_trunc {
         | 
| 140 | 
            +
                            val
         | 
| 141 | 
            +
                        } else {
         | 
| 142 | 
            +
                            format!("{}...", v_trunc)
         | 
| 143 | 
            +
                        }
         | 
| 144 | 
            +
                    } else {
         | 
| 145 | 
            +
                        val
         | 
| 146 | 
            +
                    }
         | 
| 147 | 
            +
                }
         | 
| 148 | 
            +
             | 
| 117 149 | 
             
                pub fn rechunk(&self, in_place: bool) -> Option<Self> {
         | 
| 118 150 | 
             
                    let series = self.series.borrow_mut().rechunk();
         | 
| 119 151 | 
             
                    if in_place {
         | 
| @@ -124,6 +156,10 @@ impl RbSeries { | |
| 124 156 | 
             
                    }
         | 
| 125 157 | 
             
                }
         | 
| 126 158 |  | 
| 159 | 
            +
                pub fn get_idx(&self, idx: usize) -> Value {
         | 
| 160 | 
            +
                    Wrap(self.series.borrow().get(idx)).into()
         | 
| 161 | 
            +
                }
         | 
| 162 | 
            +
             | 
| 127 163 | 
             
                pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
         | 
| 128 164 | 
             
                    let out = self
         | 
| 129 165 | 
             
                        .series
         | 
| @@ -196,15 +232,15 @@ impl RbSeries { | |
| 196 232 | 
             
                }
         | 
| 197 233 |  | 
| 198 234 | 
             
                pub fn max(&self) -> Value {
         | 
| 199 | 
            -
                     | 
| 235 | 
            +
                    Wrap(self.series.borrow().max_as_series().get(0)).into()
         | 
| 200 236 | 
             
                }
         | 
| 201 237 |  | 
| 202 238 | 
             
                pub fn min(&self) -> Value {
         | 
| 203 | 
            -
                     | 
| 239 | 
            +
                    Wrap(self.series.borrow().min_as_series().get(0)).into()
         | 
| 204 240 | 
             
                }
         | 
| 205 241 |  | 
| 206 242 | 
             
                pub fn sum(&self) -> Value {
         | 
| 207 | 
            -
                     | 
| 243 | 
            +
                    Wrap(self.series.borrow().sum_as_series().get(0)).into()
         | 
| 208 244 | 
             
                }
         | 
| 209 245 |  | 
| 210 246 | 
             
                pub fn n_chunks(&self) -> usize {
         | 
| @@ -454,6 +490,111 @@ impl RbSeries { | |
| 454 490 | 
             
                    }
         | 
| 455 491 | 
             
                }
         | 
| 456 492 |  | 
| 493 | 
            +
                pub fn quantile(
         | 
| 494 | 
            +
                    &self,
         | 
| 495 | 
            +
                    quantile: f64,
         | 
| 496 | 
            +
                    interpolation: Wrap<QuantileInterpolOptions>,
         | 
| 497 | 
            +
                ) -> RbResult<Value> {
         | 
| 498 | 
            +
                    Ok(Wrap(
         | 
| 499 | 
            +
                        self.series
         | 
| 500 | 
            +
                            .borrow()
         | 
| 501 | 
            +
                            .quantile_as_series(quantile, interpolation.0)
         | 
| 502 | 
            +
                            .map_err(|_| RbValueError::new_err("invalid quantile".into()))?
         | 
| 503 | 
            +
                            .get(0),
         | 
| 504 | 
            +
                    )
         | 
| 505 | 
            +
                    .into())
         | 
| 506 | 
            +
                }
         | 
| 507 | 
            +
             | 
| 508 | 
            +
                pub fn clone(&self) -> Self {
         | 
| 509 | 
            +
                    RbSeries::new(self.series.borrow().clone())
         | 
| 510 | 
            +
                }
         | 
| 511 | 
            +
             | 
| 512 | 
            +
                pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
         | 
| 513 | 
            +
                    let binding = mask.series.borrow();
         | 
| 514 | 
            +
                    let mask = binding.bool().map_err(RbPolarsErr::from)?;
         | 
| 515 | 
            +
                    let s = self
         | 
| 516 | 
            +
                        .series
         | 
| 517 | 
            +
                        .borrow()
         | 
| 518 | 
            +
                        .zip_with(mask, &other.series.borrow())
         | 
| 519 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 520 | 
            +
                    Ok(RbSeries::new(s))
         | 
| 521 | 
            +
                }
         | 
| 522 | 
            +
             | 
| 523 | 
            +
                pub fn to_dummies(&self) -> RbResult<RbDataFrame> {
         | 
| 524 | 
            +
                    let df = self
         | 
| 525 | 
            +
                        .series
         | 
| 526 | 
            +
                        .borrow()
         | 
| 527 | 
            +
                        .to_dummies()
         | 
| 528 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 529 | 
            +
                    Ok(df.into())
         | 
| 530 | 
            +
                }
         | 
| 531 | 
            +
             | 
| 532 | 
            +
                pub fn peak_max(&self) -> Self {
         | 
| 533 | 
            +
                    self.series.borrow().peak_max().into_series().into()
         | 
| 534 | 
            +
                }
         | 
| 535 | 
            +
             | 
| 536 | 
            +
                pub fn peak_min(&self) -> Self {
         | 
| 537 | 
            +
                    self.series.borrow().peak_min().into_series().into()
         | 
| 538 | 
            +
                }
         | 
| 539 | 
            +
             | 
| 540 | 
            +
                pub fn n_unique(&self) -> RbResult<usize> {
         | 
| 541 | 
            +
                    let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
         | 
| 542 | 
            +
                    Ok(n)
         | 
| 543 | 
            +
                }
         | 
| 544 | 
            +
             | 
| 545 | 
            +
                pub fn floor(&self) -> RbResult<Self> {
         | 
| 546 | 
            +
                    let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
         | 
| 547 | 
            +
                    Ok(s.into())
         | 
| 548 | 
            +
                }
         | 
| 549 | 
            +
             | 
| 550 | 
            +
                pub fn shrink_to_fit(&self) {
         | 
| 551 | 
            +
                    self.series.borrow_mut().shrink_to_fit();
         | 
| 552 | 
            +
                }
         | 
| 553 | 
            +
             | 
| 554 | 
            +
                pub fn dot(&self, other: &RbSeries) -> Option<f64> {
         | 
| 555 | 
            +
                    self.series.borrow().dot(&other.series.borrow())
         | 
| 556 | 
            +
                }
         | 
| 557 | 
            +
             | 
| 558 | 
            +
                pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
         | 
| 559 | 
            +
                    let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
         | 
| 560 | 
            +
                    Ok(out)
         | 
| 561 | 
            +
                }
         | 
| 562 | 
            +
             | 
| 563 | 
            +
                pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
         | 
| 564 | 
            +
                    let out = self
         | 
| 565 | 
            +
                        .series
         | 
| 566 | 
            +
                        .borrow()
         | 
| 567 | 
            +
                        .kurtosis(fisher, bias)
         | 
| 568 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 569 | 
            +
                    Ok(out)
         | 
| 570 | 
            +
                }
         | 
| 571 | 
            +
             | 
| 572 | 
            +
                pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
         | 
| 573 | 
            +
                    let dtype = dtype.0;
         | 
| 574 | 
            +
                    let out = if strict {
         | 
| 575 | 
            +
                        self.series.borrow().strict_cast(&dtype)
         | 
| 576 | 
            +
                    } else {
         | 
| 577 | 
            +
                        self.series.borrow().cast(&dtype)
         | 
| 578 | 
            +
                    };
         | 
| 579 | 
            +
                    let out = out.map_err(RbPolarsErr::from)?;
         | 
| 580 | 
            +
                    Ok(out.into())
         | 
| 581 | 
            +
                }
         | 
| 582 | 
            +
             | 
| 583 | 
            +
                pub fn time_unit(&self) -> Option<String> {
         | 
| 584 | 
            +
                    if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
         | 
| 585 | 
            +
                        Some(
         | 
| 586 | 
            +
                            match tu {
         | 
| 587 | 
            +
                                TimeUnit::Nanoseconds => "ns",
         | 
| 588 | 
            +
                                TimeUnit::Microseconds => "us",
         | 
| 589 | 
            +
                                TimeUnit::Milliseconds => "ms",
         | 
| 590 | 
            +
                            }
         | 
| 591 | 
            +
                            .to_string(),
         | 
| 592 | 
            +
                        )
         | 
| 593 | 
            +
                    } else {
         | 
| 594 | 
            +
                        None
         | 
| 595 | 
            +
                    }
         | 
| 596 | 
            +
                }
         | 
| 597 | 
            +
             | 
| 457 598 | 
             
                // dispatch dynamically in future?
         | 
| 458 599 |  | 
| 459 600 | 
             
                pub fn cumsum(&self, reverse: bool) -> Self {
         | 
| @@ -468,8 +609,30 @@ impl RbSeries { | |
| 468 609 | 
             
                    self.series.borrow().cummin(reverse).into()
         | 
| 469 610 | 
             
                }
         | 
| 470 611 |  | 
| 612 | 
            +
                pub fn cumprod(&self, reverse: bool) -> Self {
         | 
| 613 | 
            +
                    self.series.borrow().cumprod(reverse).into()
         | 
| 614 | 
            +
                }
         | 
| 615 | 
            +
             | 
| 471 616 | 
             
                pub fn slice(&self, offset: i64, length: usize) -> Self {
         | 
| 472 617 | 
             
                    let series = self.series.borrow().slice(offset, length);
         | 
| 473 618 | 
             
                    series.into()
         | 
| 474 619 | 
             
                }
         | 
| 620 | 
            +
             | 
| 621 | 
            +
                pub fn ceil(&self) -> RbResult<Self> {
         | 
| 622 | 
            +
                    let s = self.series.borrow().ceil().map_err(RbPolarsErr::from)?;
         | 
| 623 | 
            +
                    Ok(s.into())
         | 
| 624 | 
            +
                }
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                pub fn round(&self, decimals: u32) -> RbResult<Self> {
         | 
| 627 | 
            +
                    let s = self
         | 
| 628 | 
            +
                        .series
         | 
| 629 | 
            +
                        .borrow()
         | 
| 630 | 
            +
                        .round(decimals)
         | 
| 631 | 
            +
                        .map_err(RbPolarsErr::from)?;
         | 
| 632 | 
            +
                    Ok(s.into())
         | 
| 633 | 
            +
                }
         | 
| 634 | 
            +
            }
         | 
| 635 | 
            +
             | 
| 636 | 
            +
            pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
         | 
| 637 | 
            +
                s.into_iter().map(|v| RbSeries::new(v)).collect()
         | 
| 475 638 | 
             
            }
         | 
    
        data/lib/polars/data_frame.rb
    CHANGED
    
    | @@ -79,10 +79,51 @@ module Polars | |
| 79 79 | 
             
                  _df.columns
         | 
| 80 80 | 
             
                end
         | 
| 81 81 |  | 
| 82 | 
            +
                def columns=(columns)
         | 
| 83 | 
            +
                  _df.set_column_names(columns)
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 82 86 | 
             
                def dtypes
         | 
| 83 87 | 
             
                  _df.dtypes.map(&:to_sym)
         | 
| 84 88 | 
             
                end
         | 
| 85 89 |  | 
| 90 | 
            +
                def schema
         | 
| 91 | 
            +
                  columns.zip(dtypes).to_h
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                # def ==(other)
         | 
| 95 | 
            +
                # end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                # def !=(other)
         | 
| 98 | 
            +
                # end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                # def >(other)
         | 
| 101 | 
            +
                # end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                # def <(other)
         | 
| 104 | 
            +
                # end
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                # def >=(other)
         | 
| 107 | 
            +
                # end
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                # def <=(other)
         | 
| 110 | 
            +
                # end
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                # def *(other)
         | 
| 113 | 
            +
                # end
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                # def /(other)
         | 
| 116 | 
            +
                # end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                # def +(other)
         | 
| 119 | 
            +
                # end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                # def -(other)
         | 
| 122 | 
            +
                # end
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                # def %(other)
         | 
| 125 | 
            +
                # end
         | 
| 126 | 
            +
             | 
| 86 127 | 
             
                def to_s
         | 
| 87 128 | 
             
                  _df.to_s
         | 
| 88 129 | 
             
                end
         | 
| @@ -96,6 +137,25 @@ module Polars | |
| 96 137 | 
             
                  Utils.wrap_s(_df.column(name))
         | 
| 97 138 | 
             
                end
         | 
| 98 139 |  | 
| 140 | 
            +
                # def []=(key, value)
         | 
| 141 | 
            +
                # end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                def to_h(as_series: true)
         | 
| 144 | 
            +
                  if as_series
         | 
| 145 | 
            +
                    get_columns.to_h { |s| [s.name, s] }
         | 
| 146 | 
            +
                  else
         | 
| 147 | 
            +
                    get_columns.to_h { |s| [s.name, s.to_a] }
         | 
| 148 | 
            +
                  end
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                # def to_hs / to_a
         | 
| 152 | 
            +
                # end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                # def to_numo
         | 
| 155 | 
            +
                # end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                # no to_pandas
         | 
| 158 | 
            +
             | 
| 99 159 | 
             
                def to_series(index = 0)
         | 
| 100 160 | 
             
                  if index < 0
         | 
| 101 161 | 
             
                    index = columns.length + index
         | 
| @@ -183,6 +243,12 @@ module Polars | |
| 183 243 | 
             
                  nil
         | 
| 184 244 | 
             
                end
         | 
| 185 245 |  | 
| 246 | 
            +
                # def write_avro
         | 
| 247 | 
            +
                # end
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                # def write_ipc
         | 
| 250 | 
            +
                # end
         | 
| 251 | 
            +
             | 
| 186 252 | 
             
                def write_parquet(
         | 
| 187 253 | 
             
                  file,
         | 
| 188 254 | 
             
                  compression: "zstd",
         | 
| @@ -202,10 +268,43 @@ module Polars | |
| 202 268 | 
             
                  )
         | 
| 203 269 | 
             
                end
         | 
| 204 270 |  | 
| 271 | 
            +
                def estimated_size(unit = "b")
         | 
| 272 | 
            +
                  sz = _df.estimated_size
         | 
| 273 | 
            +
                  Utils.scale_bytes(sz, to: unit)
         | 
| 274 | 
            +
                end
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                # def transpose
         | 
| 277 | 
            +
                # end
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                def reverse
         | 
| 280 | 
            +
                  select(Polars.col("*").reverse)
         | 
| 281 | 
            +
                end
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                def rename(mapping)
         | 
| 284 | 
            +
                  lazy.rename(mapping).collect(no_optimization: true)
         | 
| 285 | 
            +
                end
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                def insert_at_idx(index, series)
         | 
| 288 | 
            +
                  if index < 0
         | 
| 289 | 
            +
                    index = columns.length + index
         | 
| 290 | 
            +
                  end
         | 
| 291 | 
            +
                  _df.insert_at_idx(index, series._s)
         | 
| 292 | 
            +
                  self
         | 
| 293 | 
            +
                end
         | 
| 294 | 
            +
             | 
| 205 295 | 
             
                def filter(predicate)
         | 
| 206 296 | 
             
                  lazy.filter(predicate).collect
         | 
| 207 297 | 
             
                end
         | 
| 208 298 |  | 
| 299 | 
            +
                # def describe
         | 
| 300 | 
            +
                # end
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                # def find_idx_by_name
         | 
| 303 | 
            +
                # end
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                # def replace_at_idx
         | 
| 306 | 
            +
                # end
         | 
| 307 | 
            +
             | 
| 209 308 | 
             
                def sort(by, reverse: false, nulls_last: false)
         | 
| 210 309 | 
             
                  _from_rbdf(_df.sort(by, reverse, nulls_last))
         | 
| 211 310 | 
             
                end
         | 
| @@ -214,6 +313,16 @@ module Polars | |
| 214 313 | 
             
                  _df.frame_equal(other._df, null_equal)
         | 
| 215 314 | 
             
                end
         | 
| 216 315 |  | 
| 316 | 
            +
                # def replace
         | 
| 317 | 
            +
                # end
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                def slice(offset, length = nil)
         | 
| 320 | 
            +
                  if !length.nil? && length < 0
         | 
| 321 | 
            +
                    length = height - offset + length
         | 
| 322 | 
            +
                  end
         | 
| 323 | 
            +
                  _from_rbdf(_df.slice(offset, length))
         | 
| 324 | 
            +
                end
         | 
| 325 | 
            +
             | 
| 217 326 | 
             
                def limit(n = 5)
         | 
| 218 327 | 
             
                  head(n)
         | 
| 219 328 | 
             
                end
         | 
| @@ -226,10 +335,31 @@ module Polars | |
| 226 335 | 
             
                  _from_rbdf(_df.tail(n))
         | 
| 227 336 | 
             
                end
         | 
| 228 337 |  | 
| 338 | 
            +
                # def drop_nulls
         | 
| 339 | 
            +
                # end
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                # def pipe
         | 
| 342 | 
            +
                # end
         | 
| 343 | 
            +
             | 
| 344 | 
            +
                # def with_row_count
         | 
| 345 | 
            +
                # end
         | 
| 346 | 
            +
             | 
| 229 347 | 
             
                def groupby(by, maintain_order: false)
         | 
| 230 348 | 
             
                  lazy.groupby(by, maintain_order: maintain_order)
         | 
| 231 349 | 
             
                end
         | 
| 232 350 |  | 
| 351 | 
            +
                # def groupby_rolling
         | 
| 352 | 
            +
                # end
         | 
| 353 | 
            +
             | 
| 354 | 
            +
                # def groupby_dynamic
         | 
| 355 | 
            +
                # end
         | 
| 356 | 
            +
             | 
| 357 | 
            +
                # def upsample
         | 
| 358 | 
            +
                # end
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                # def join_asof
         | 
| 361 | 
            +
                # end
         | 
| 362 | 
            +
             | 
| 233 363 | 
             
                def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
         | 
| 234 364 | 
             
                  lazy
         | 
| 235 365 | 
             
                    .join(
         | 
| @@ -243,12 +373,79 @@ module Polars | |
| 243 373 | 
             
                    .collect(no_optimization: true)
         | 
| 244 374 | 
             
                end
         | 
| 245 375 |  | 
| 376 | 
            +
                # def apply
         | 
| 377 | 
            +
                # end
         | 
| 378 | 
            +
             | 
| 246 379 | 
             
                def with_column(column)
         | 
| 247 380 | 
             
                  lazy
         | 
| 248 381 | 
             
                    .with_column(column)
         | 
| 249 382 | 
             
                    .collect(no_optimization: true, string_cache: false)
         | 
| 250 383 | 
             
                end
         | 
| 251 384 |  | 
| 385 | 
            +
                # def hstack
         | 
| 386 | 
            +
                # end
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                # def vstack
         | 
| 389 | 
            +
                # end
         | 
| 390 | 
            +
             | 
| 391 | 
            +
                # def extend
         | 
| 392 | 
            +
                # end
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                # def drop
         | 
| 395 | 
            +
                # end
         | 
| 396 | 
            +
             | 
| 397 | 
            +
                # def drop_in_place
         | 
| 398 | 
            +
                # end
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                # def cleared
         | 
| 401 | 
            +
                # end
         | 
| 402 | 
            +
             | 
| 403 | 
            +
                # clone handled by initialize_copy
         | 
| 404 | 
            +
             | 
| 405 | 
            +
                def get_columns
         | 
| 406 | 
            +
                  _df.get_columns.map { |s| Utils.wrap_s(s) }
         | 
| 407 | 
            +
                end
         | 
| 408 | 
            +
             | 
| 409 | 
            +
                def get_column(name)
         | 
| 410 | 
            +
                  self[name]
         | 
| 411 | 
            +
                end
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                # def fill_null
         | 
| 414 | 
            +
                # end
         | 
| 415 | 
            +
             | 
| 416 | 
            +
                def fill_nan(fill_value)
         | 
| 417 | 
            +
                  lazy.fill_nan(fill_value).collect(no_optimization: true)
         | 
| 418 | 
            +
                end
         | 
| 419 | 
            +
             | 
| 420 | 
            +
                # def explode
         | 
| 421 | 
            +
                # end
         | 
| 422 | 
            +
             | 
| 423 | 
            +
                # def pivot
         | 
| 424 | 
            +
                # end
         | 
| 425 | 
            +
             | 
| 426 | 
            +
                # def melt
         | 
| 427 | 
            +
                # end
         | 
| 428 | 
            +
             | 
| 429 | 
            +
                # def unstack
         | 
| 430 | 
            +
                # end
         | 
| 431 | 
            +
             | 
| 432 | 
            +
                # def partition_by
         | 
| 433 | 
            +
                # end
         | 
| 434 | 
            +
             | 
| 435 | 
            +
                # def shift
         | 
| 436 | 
            +
                # end
         | 
| 437 | 
            +
             | 
| 438 | 
            +
                # def shift_and_fill
         | 
| 439 | 
            +
                # end
         | 
| 440 | 
            +
             | 
| 441 | 
            +
                def is_duplicated
         | 
| 442 | 
            +
                  Utils.wrap_s(_df.is_duplicated)
         | 
| 443 | 
            +
                end
         | 
| 444 | 
            +
             | 
| 445 | 
            +
                def is_unique
         | 
| 446 | 
            +
                  Utils.wrap_s(_df.is_unique)
         | 
| 447 | 
            +
                end
         | 
| 448 | 
            +
             | 
| 252 449 | 
             
                def lazy
         | 
| 253 450 | 
             
                  wrap_ldf(_df.lazy)
         | 
| 254 451 | 
             
                end
         | 
| @@ -262,6 +459,56 @@ module Polars | |
| 262 459 | 
             
                  )
         | 
| 263 460 | 
             
                end
         | 
| 264 461 |  | 
| 462 | 
            +
                def with_columns(exprs)
         | 
| 463 | 
            +
                  if !exprs.nil? && !exprs.is_a?(Array)
         | 
| 464 | 
            +
                    exprs = [exprs]
         | 
| 465 | 
            +
                  end
         | 
| 466 | 
            +
                  lazy
         | 
| 467 | 
            +
                    .with_columns(exprs)
         | 
| 468 | 
            +
                    .collect(no_optimization: true, string_cache: false)
         | 
| 469 | 
            +
                end
         | 
| 470 | 
            +
             | 
| 471 | 
            +
                def n_chunks(strategy: "first")
         | 
| 472 | 
            +
                  if strategy == "first"
         | 
| 473 | 
            +
                    _df.n_chunks
         | 
| 474 | 
            +
                  elsif strategy == "all"
         | 
| 475 | 
            +
                    get_columns.map(&:n_chunks)
         | 
| 476 | 
            +
                  else
         | 
| 477 | 
            +
                    raise ArgumentError, "Strategy: '{strategy}' not understood. Choose one of {{'first',  'all'}}"
         | 
| 478 | 
            +
                  end
         | 
| 479 | 
            +
                end
         | 
| 480 | 
            +
             | 
| 481 | 
            +
                def max(axis: 0)
         | 
| 482 | 
            +
                  if axis == 0
         | 
| 483 | 
            +
                    _from_rbdf(_df.max)
         | 
| 484 | 
            +
                  elsif axis == 1
         | 
| 485 | 
            +
                    Utils.wrap_s(_df.hmax)
         | 
| 486 | 
            +
                  else
         | 
| 487 | 
            +
                    raise ArgumentError, "Axis should be 0 or 1."
         | 
| 488 | 
            +
                  end
         | 
| 489 | 
            +
                end
         | 
| 490 | 
            +
             | 
| 491 | 
            +
                def min(axis: 0)
         | 
| 492 | 
            +
                  if axis == 0
         | 
| 493 | 
            +
                    _from_rbdf(_df.min)
         | 
| 494 | 
            +
                  elsif axis == 1
         | 
| 495 | 
            +
                    Utils.wrap_s(_df.hmin)
         | 
| 496 | 
            +
                  else
         | 
| 497 | 
            +
                    raise ArgumentError, "Axis should be 0 or 1."
         | 
| 498 | 
            +
                  end
         | 
| 499 | 
            +
                end
         | 
| 500 | 
            +
             | 
| 501 | 
            +
                def sum(axis: 0, null_strategy: "ignore")
         | 
| 502 | 
            +
                  case axis
         | 
| 503 | 
            +
                  when 0
         | 
| 504 | 
            +
                    _from_rbdf(_df.sum)
         | 
| 505 | 
            +
                  when 1
         | 
| 506 | 
            +
                    Utils.wrap_s(_df.hsum(null_strategy))
         | 
| 507 | 
            +
                  else
         | 
| 508 | 
            +
                    raise ArgumentError, "Axis should be 0 or 1."
         | 
| 509 | 
            +
                  end
         | 
| 510 | 
            +
                end
         | 
| 511 | 
            +
             | 
| 265 512 | 
             
                def mean(axis: 0, null_strategy: "ignore")
         | 
| 266 513 | 
             
                  case axis
         | 
| 267 514 | 
             
                  when 0
         | 
| @@ -273,15 +520,33 @@ module Polars | |
| 273 520 | 
             
                  end
         | 
| 274 521 | 
             
                end
         | 
| 275 522 |  | 
| 276 | 
            -
                def  | 
| 277 | 
            -
                   | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 281 | 
            -
             | 
| 282 | 
            -
                    .collect(no_optimization: true, string_cache: false)
         | 
| 523 | 
            +
                def std(ddof: 1)
         | 
| 524 | 
            +
                  _from_rbdf(_df.std(ddof))
         | 
| 525 | 
            +
                end
         | 
| 526 | 
            +
             | 
| 527 | 
            +
                def var(ddof: 1)
         | 
| 528 | 
            +
                  _from_rbdf(_df.var(ddof))
         | 
| 283 529 | 
             
                end
         | 
| 284 530 |  | 
| 531 | 
            +
                def median
         | 
| 532 | 
            +
                  _from_rbdf(_df.median)
         | 
| 533 | 
            +
                end
         | 
| 534 | 
            +
             | 
| 535 | 
            +
                # def product
         | 
| 536 | 
            +
                # end
         | 
| 537 | 
            +
             | 
| 538 | 
            +
                # def quantile(quantile, interpolation: "nearest")
         | 
| 539 | 
            +
                # end
         | 
| 540 | 
            +
             | 
| 541 | 
            +
                # def to_dummies
         | 
| 542 | 
            +
                # end
         | 
| 543 | 
            +
             | 
| 544 | 
            +
                # def unique
         | 
| 545 | 
            +
                # end
         | 
| 546 | 
            +
             | 
| 547 | 
            +
                # def n_unique
         | 
| 548 | 
            +
                # end
         | 
| 549 | 
            +
             | 
| 285 550 | 
             
                def rechunk
         | 
| 286 551 | 
             
                  _from_rbdf(_df.rechunk)
         | 
| 287 552 | 
             
                end
         | 
| @@ -290,8 +555,48 @@ module Polars | |
| 290 555 | 
             
                  _from_rbdf(_df.null_count)
         | 
| 291 556 | 
             
                end
         | 
| 292 557 |  | 
| 558 | 
            +
                # def sample
         | 
| 559 | 
            +
                # end
         | 
| 560 | 
            +
             | 
| 561 | 
            +
                # def fold
         | 
| 562 | 
            +
                # end
         | 
| 563 | 
            +
             | 
| 564 | 
            +
                # def row
         | 
| 565 | 
            +
                # end
         | 
| 566 | 
            +
             | 
| 567 | 
            +
                # def rows
         | 
| 568 | 
            +
                # end
         | 
| 569 | 
            +
             | 
| 570 | 
            +
                # def shrink_to_fit
         | 
| 571 | 
            +
                # end
         | 
| 572 | 
            +
             | 
| 573 | 
            +
                # def take_every
         | 
| 574 | 
            +
                # end
         | 
| 575 | 
            +
             | 
| 576 | 
            +
                # def hash_rows
         | 
| 577 | 
            +
                # end
         | 
| 578 | 
            +
             | 
| 579 | 
            +
                # def interpolate
         | 
| 580 | 
            +
                # end
         | 
| 581 | 
            +
             | 
| 582 | 
            +
                def is_empty
         | 
| 583 | 
            +
                  height == 0
         | 
| 584 | 
            +
                end
         | 
| 585 | 
            +
                alias_method :empty?, :is_empty
         | 
| 586 | 
            +
             | 
| 587 | 
            +
                # def to_struct(name)
         | 
| 588 | 
            +
                # end
         | 
| 589 | 
            +
             | 
| 590 | 
            +
                # def unnest
         | 
| 591 | 
            +
                # end
         | 
| 592 | 
            +
             | 
| 293 593 | 
             
                private
         | 
| 294 594 |  | 
| 595 | 
            +
                def initialize_copy(other)
         | 
| 596 | 
            +
                  super
         | 
| 597 | 
            +
                  self._df = _df._clone
         | 
| 598 | 
            +
                end
         | 
| 599 | 
            +
             | 
| 295 600 | 
             
                def hash_to_rbdf(data)
         | 
| 296 601 | 
             
                  RbDataFrame.read_hash(data)
         | 
| 297 602 | 
             
                end
         |