polars-df 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ use magnus::Value;
2
+ use polars::prelude::*;
3
+ use polars_core::utils::CustomIterTools;
4
+
5
+ use crate::conversion::get_rbseq;
6
+ use crate::{RbPolarsErr, RbResult};
7
+
8
+ pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
9
+ let (seq, len) = get_rbseq(seq)?;
10
+
11
+ let s = match dtype {
12
+ DataType::Int64 => {
13
+ let mut builder =
14
+ ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
15
+ for sub_seq in seq.each() {
16
+ let sub_seq = sub_seq?;
17
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
18
+
19
+ // safety: we know the iterators len
20
+ let iter = unsafe {
21
+ sub_seq
22
+ .each()
23
+ .map(|v| {
24
+ let v = v.unwrap();
25
+ if v.is_nil() {
26
+ None
27
+ } else {
28
+ Some(v.try_convert::<i64>().unwrap())
29
+ }
30
+ })
31
+ .trust_my_length(len)
32
+ };
33
+ builder.append_iter(iter)
34
+ }
35
+ builder.finish().into_series()
36
+ }
37
+ DataType::Float64 => {
38
+ let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
39
+ name,
40
+ len,
41
+ len * 5,
42
+ DataType::Float64,
43
+ );
44
+ for sub_seq in seq.each() {
45
+ let sub_seq = sub_seq?;
46
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
47
+ // safety: we know the iterators len
48
+ let iter = unsafe {
49
+ sub_seq
50
+ .each()
51
+ .map(|v| {
52
+ let v = v.unwrap();
53
+ if v.is_nil() {
54
+ None
55
+ } else {
56
+ Some(v.try_convert::<f64>().unwrap())
57
+ }
58
+ })
59
+ .trust_my_length(len)
60
+ };
61
+ builder.append_iter(iter)
62
+ }
63
+ builder.finish().into_series()
64
+ }
65
+ DataType::Boolean => {
66
+ let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
67
+ for sub_seq in seq.each() {
68
+ let sub_seq = sub_seq?;
69
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
70
+ // safety: we know the iterators len
71
+ let iter = unsafe {
72
+ sub_seq
73
+ .each()
74
+ .map(|v| {
75
+ let v = v.unwrap();
76
+ if v.is_nil() {
77
+ None
78
+ } else {
79
+ Some(v.try_convert::<bool>().unwrap())
80
+ }
81
+ })
82
+ .trust_my_length(len)
83
+ };
84
+ builder.append_iter(iter)
85
+ }
86
+ builder.finish().into_series()
87
+ }
88
+ DataType::Utf8 => {
89
+ return Err(RbPolarsErr::todo());
90
+ }
91
+ dt => {
92
+ return Err(RbPolarsErr::other(format!(
93
+ "cannot create list array from {:?}",
94
+ dt
95
+ )));
96
+ }
97
+ };
98
+
99
+ Ok(s)
100
+ }
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
7
  use crate::conversion::*;
8
+ use crate::list_construction::rb_seq_to_list;
8
9
  use crate::set::set_at_idx;
9
10
  use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
10
11
 
@@ -123,6 +124,19 @@ impl RbSeries {
123
124
  RbSeries::new(s)
124
125
  }
125
126
 
127
+ pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
128
+ let val = val
129
+ .each()
130
+ .map(|v| v.map(ObjectValue::from))
131
+ .collect::<RbResult<Vec<ObjectValue>>>()?;
132
+ let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
133
+ Ok(s.into())
134
+ }
135
+
136
+ pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
137
+ rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
138
+ }
139
+
126
140
  pub fn estimated_size(&self) -> usize {
127
141
  self.series.borrow().estimated_size()
128
142
  }
@@ -787,4 +801,25 @@ impl RbSeries {
787
801
  let ca: ChunkedArray<Int32Type> = builder.finish();
788
802
  Ok(ca.into_date().into_series().into())
789
803
  }
804
+
805
+ pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
806
+ let len = values.len();
807
+ let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
808
+ for item in values.each() {
809
+ let v = item?;
810
+ if v.is_nil() {
811
+ builder.append_null();
812
+ } else {
813
+ let sec: i64 = v.funcall("to_i", ())?;
814
+ let nsec: i64 = v.funcall("nsec", ())?;
815
+ // TODO use strict
816
+ builder.append_value(sec * 1_000_000_000 + nsec);
817
+ }
818
+ }
819
+ let ca: ChunkedArray<Int64Type> = builder.finish();
820
+ Ok(ca
821
+ .into_datetime(TimeUnit::Nanoseconds, None)
822
+ .into_series()
823
+ .into())
824
+ }
790
825
  }
@@ -0,0 +1,54 @@
1
+ module Polars
2
+ # Series.cat namespace.
3
+ class CatNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "cat"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Determine how this categorical series should be sorted.
14
+ #
15
+ # @param ordering ["physical", "lexical"]
16
+ # Ordering type:
17
+ #
18
+ # - 'physical' -> Use the physical representation of the categories to
19
+ # determine the order (default).
20
+ # - 'lexical' -> Use the string values to determine the ordering.
21
+ #
22
+ # @return [Series]
23
+ #
24
+ # @example
25
+ # df = Polars::DataFrame.new(
26
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
27
+ # ).with_columns(
28
+ # [
29
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
30
+ # ]
31
+ # )
32
+ # df.sort(["cats", "vals"])
33
+ # # =>
34
+ # # shape: (5, 2)
35
+ # # ┌──────┬──────┐
36
+ # # │ cats ┆ vals │
37
+ # # │ --- ┆ --- │
38
+ # # │ cat ┆ i64 │
39
+ # # ╞══════╪══════╡
40
+ # # │ a ┆ 2 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ b ┆ 3 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ k ┆ 2 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 1 │
47
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
48
+ # # │ z ┆ 3 │
49
+ # # └──────┴──────┘
50
+ def set_ordering(ordering)
51
+ super
52
+ end
53
+ end
54
+ end
@@ -880,7 +880,7 @@ module Polars
880
880
  # "val" => [1, 2, 3]
881
881
  # }
882
882
  # )
883
- # df.reverse()
883
+ # df.reverse
884
884
  # # =>
885
885
  # # shape: (3, 2)
886
886
  # # ┌─────┬─────┐
@@ -1998,8 +1998,105 @@ module Polars
1998
1998
  self[name]
1999
1999
  end
2000
2000
 
2001
- # def fill_null
2002
- # end
2001
+ # Fill null values using the specified value or strategy.
2002
+ #
2003
+ # @param value [Numeric]
2004
+ # Value used to fill null values.
2005
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2006
+ # Strategy used to fill null values.
2007
+ # @param limit [Integer]
2008
+ # Number of consecutive null values to fill when using the 'forward' or
2009
+ # 'backward' strategy.
2010
+ # @param matches_supertype [Boolean]
2011
+ # Fill all matching supertype of the fill `value`.
2012
+ #
2013
+ # @return [DataFrame]
2014
+ #
2015
+ # @example
2016
+ # df = Polars::DataFrame.new(
2017
+ # {
2018
+ # "a" => [1, 2, nil, 4],
2019
+ # "b" => [0.5, 4, nil, 13]
2020
+ # }
2021
+ # )
2022
+ # df.fill_null(99)
2023
+ # # =>
2024
+ # # shape: (4, 2)
2025
+ # # ┌─────┬──────┐
2026
+ # # │ a ┆ b │
2027
+ # # │ --- ┆ --- │
2028
+ # # │ i64 ┆ f64 │
2029
+ # # ╞═════╪══════╡
2030
+ # # │ 1 ┆ 0.5 │
2031
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2032
+ # # │ 2 ┆ 4.0 │
2033
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2034
+ # # │ 99 ┆ 99.0 │
2035
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2036
+ # # │ 4 ┆ 13.0 │
2037
+ # # └─────┴──────┘
2038
+ #
2039
+ # @example
2040
+ # df.fill_null(strategy: "forward")
2041
+ # # =>
2042
+ # # shape: (4, 2)
2043
+ # # ┌─────┬──────┐
2044
+ # # │ a ┆ b │
2045
+ # # │ --- ┆ --- │
2046
+ # # │ i64 ┆ f64 │
2047
+ # # ╞═════╪══════╡
2048
+ # # │ 1 ┆ 0.5 │
2049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2050
+ # # │ 2 ┆ 4.0 │
2051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2052
+ # # │ 2 ┆ 4.0 │
2053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2054
+ # # │ 4 ┆ 13.0 │
2055
+ # # └─────┴──────┘
2056
+ #
2057
+ # @example
2058
+ # df.fill_null(strategy: "max")
2059
+ # # =>
2060
+ # # shape: (4, 2)
2061
+ # # ┌─────┬──────┐
2062
+ # # │ a ┆ b │
2063
+ # # │ --- ┆ --- │
2064
+ # # │ i64 ┆ f64 │
2065
+ # # ╞═════╪══════╡
2066
+ # # │ 1 ┆ 0.5 │
2067
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2068
+ # # │ 2 ┆ 4.0 │
2069
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2070
+ # # │ 4 ┆ 13.0 │
2071
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2072
+ # # │ 4 ┆ 13.0 │
2073
+ # # └─────┴──────┘
2074
+ #
2075
+ # @example
2076
+ # df.fill_null(strategy: "zero")
2077
+ # # =>
2078
+ # # shape: (4, 2)
2079
+ # # ┌─────┬──────┐
2080
+ # # │ a ┆ b │
2081
+ # # │ --- ┆ --- │
2082
+ # # │ i64 ┆ f64 │
2083
+ # # ╞═════╪══════╡
2084
+ # # │ 1 ┆ 0.5 │
2085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2086
+ # # │ 2 ┆ 4.0 │
2087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2088
+ # # │ 0 ┆ 0.0 │
2089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2090
+ # # │ 4 ┆ 13.0 │
2091
+ # # └─────┴──────┘
2092
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
2093
+ _from_rbdf(
2094
+ lazy
2095
+ .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
2096
+ .collect(no_optimization: true)
2097
+ ._df
2098
+ )
2099
+ end
2003
2100
 
2004
2101
  # Fill floating point NaN values by an Expression evaluation.
2005
2102
  #
@@ -2357,7 +2454,7 @@ module Polars
2357
2454
  # [
2358
2455
  # (Polars.col("a") ** 2).alias("a^2"),
2359
2456
  # (Polars.col("b") / 2).alias("b/2"),
2360
- # (Polars.col("c").is_not()).alias("not c")
2457
+ # (Polars.col("c").is_not).alias("not c")
2361
2458
  # ]
2362
2459
  # )
2363
2460
  # # =>
@@ -1357,9 +1357,9 @@ module Polars
1357
1357
 
1358
1358
  # Offset this date by a relative time offset.
1359
1359
  #
1360
- # This differs from ``pl.col("foo") + timedelta`` in that it can
1360
+ # This differs from `Polars.col("foo") + timedelta` in that it can
1361
1361
  # take months and leap years into account. Note that only a single minus
1362
- # sign is allowed in the ``by`` string, as the first character.
1362
+ # sign is allowed in the `by` string, as the first character.
1363
1363
  #
1364
1364
  # @param by [String]
1365
1365
  # The offset is dictated by the following string language: