polars-df 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,100 @@
1
+ use magnus::Value;
2
+ use polars::prelude::*;
3
+ use polars_core::utils::CustomIterTools;
4
+
5
+ use crate::conversion::get_rbseq;
6
+ use crate::{RbPolarsErr, RbResult};
7
+
8
+ pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
9
+ let (seq, len) = get_rbseq(seq)?;
10
+
11
+ let s = match dtype {
12
+ DataType::Int64 => {
13
+ let mut builder =
14
+ ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
15
+ for sub_seq in seq.each() {
16
+ let sub_seq = sub_seq?;
17
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
18
+
19
+ // safety: we know the iterators len
20
+ let iter = unsafe {
21
+ sub_seq
22
+ .each()
23
+ .map(|v| {
24
+ let v = v.unwrap();
25
+ if v.is_nil() {
26
+ None
27
+ } else {
28
+ Some(v.try_convert::<i64>().unwrap())
29
+ }
30
+ })
31
+ .trust_my_length(len)
32
+ };
33
+ builder.append_iter(iter)
34
+ }
35
+ builder.finish().into_series()
36
+ }
37
+ DataType::Float64 => {
38
+ let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
39
+ name,
40
+ len,
41
+ len * 5,
42
+ DataType::Float64,
43
+ );
44
+ for sub_seq in seq.each() {
45
+ let sub_seq = sub_seq?;
46
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
47
+ // safety: we know the iterators len
48
+ let iter = unsafe {
49
+ sub_seq
50
+ .each()
51
+ .map(|v| {
52
+ let v = v.unwrap();
53
+ if v.is_nil() {
54
+ None
55
+ } else {
56
+ Some(v.try_convert::<f64>().unwrap())
57
+ }
58
+ })
59
+ .trust_my_length(len)
60
+ };
61
+ builder.append_iter(iter)
62
+ }
63
+ builder.finish().into_series()
64
+ }
65
+ DataType::Boolean => {
66
+ let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
67
+ for sub_seq in seq.each() {
68
+ let sub_seq = sub_seq?;
69
+ let (sub_seq, len) = get_rbseq(sub_seq)?;
70
+ // safety: we know the iterators len
71
+ let iter = unsafe {
72
+ sub_seq
73
+ .each()
74
+ .map(|v| {
75
+ let v = v.unwrap();
76
+ if v.is_nil() {
77
+ None
78
+ } else {
79
+ Some(v.try_convert::<bool>().unwrap())
80
+ }
81
+ })
82
+ .trust_my_length(len)
83
+ };
84
+ builder.append_iter(iter)
85
+ }
86
+ builder.finish().into_series()
87
+ }
88
+ DataType::Utf8 => {
89
+ return Err(RbPolarsErr::todo());
90
+ }
91
+ dt => {
92
+ return Err(RbPolarsErr::other(format!(
93
+ "cannot create list array from {:?}",
94
+ dt
95
+ )));
96
+ }
97
+ };
98
+
99
+ Ok(s)
100
+ }
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
5
5
  use std::cell::RefCell;
6
6
 
7
7
  use crate::conversion::*;
8
+ use crate::list_construction::rb_seq_to_list;
8
9
  use crate::set::set_at_idx;
9
10
  use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
10
11
 
@@ -123,6 +124,19 @@ impl RbSeries {
123
124
  RbSeries::new(s)
124
125
  }
125
126
 
127
+ pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
128
+ let val = val
129
+ .each()
130
+ .map(|v| v.map(ObjectValue::from))
131
+ .collect::<RbResult<Vec<ObjectValue>>>()?;
132
+ let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
133
+ Ok(s.into())
134
+ }
135
+
136
+ pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
137
+ rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
138
+ }
139
+
126
140
  pub fn estimated_size(&self) -> usize {
127
141
  self.series.borrow().estimated_size()
128
142
  }
@@ -787,4 +801,25 @@ impl RbSeries {
787
801
  let ca: ChunkedArray<Int32Type> = builder.finish();
788
802
  Ok(ca.into_date().into_series().into())
789
803
  }
804
+
805
+ pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
806
+ let len = values.len();
807
+ let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
808
+ for item in values.each() {
809
+ let v = item?;
810
+ if v.is_nil() {
811
+ builder.append_null();
812
+ } else {
813
+ let sec: i64 = v.funcall("to_i", ())?;
814
+ let nsec: i64 = v.funcall("nsec", ())?;
815
+ // TODO use strict
816
+ builder.append_value(sec * 1_000_000_000 + nsec);
817
+ }
818
+ }
819
+ let ca: ChunkedArray<Int64Type> = builder.finish();
820
+ Ok(ca
821
+ .into_datetime(TimeUnit::Nanoseconds, None)
822
+ .into_series()
823
+ .into())
824
+ }
790
825
  }
@@ -0,0 +1,54 @@
1
+ module Polars
2
+ # Series.cat namespace.
3
+ class CatNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "cat"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Determine how this categorical series should be sorted.
14
+ #
15
+ # @param ordering ["physical", "lexical"]
16
+ # Ordering type:
17
+ #
18
+ # - 'physical' -> Use the physical representation of the categories to
19
+ # determine the order (default).
20
+ # - 'lexical' -> Use the string values to determine the ordering.
21
+ #
22
+ # @return [Series]
23
+ #
24
+ # @example
25
+ # df = Polars::DataFrame.new(
26
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
27
+ # ).with_columns(
28
+ # [
29
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
30
+ # ]
31
+ # )
32
+ # df.sort(["cats", "vals"])
33
+ # # =>
34
+ # # shape: (5, 2)
35
+ # # ┌──────┬──────┐
36
+ # # │ cats ┆ vals │
37
+ # # │ --- ┆ --- │
38
+ # # │ cat ┆ i64 │
39
+ # # ╞══════╪══════╡
40
+ # # │ a ┆ 2 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ b ┆ 3 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ k ┆ 2 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 1 │
47
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
48
+ # # │ z ┆ 3 │
49
+ # # └──────┴──────┘
50
+ def set_ordering(ordering)
51
+ super
52
+ end
53
+ end
54
+ end
@@ -880,7 +880,7 @@ module Polars
880
880
  # "val" => [1, 2, 3]
881
881
  # }
882
882
  # )
883
- # df.reverse()
883
+ # df.reverse
884
884
  # # =>
885
885
  # # shape: (3, 2)
886
886
  # # ┌─────┬─────┐
@@ -1998,8 +1998,105 @@ module Polars
1998
1998
  self[name]
1999
1999
  end
2000
2000
 
2001
- # def fill_null
2002
- # end
2001
+ # Fill null values using the specified value or strategy.
2002
+ #
2003
+ # @param value [Numeric]
2004
+ # Value used to fill null values.
2005
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
2006
+ # Strategy used to fill null values.
2007
+ # @param limit [Integer]
2008
+ # Number of consecutive null values to fill when using the 'forward' or
2009
+ # 'backward' strategy.
2010
+ # @param matches_supertype [Boolean]
2011
+ # Fill all matching supertype of the fill `value`.
2012
+ #
2013
+ # @return [DataFrame]
2014
+ #
2015
+ # @example
2016
+ # df = Polars::DataFrame.new(
2017
+ # {
2018
+ # "a" => [1, 2, nil, 4],
2019
+ # "b" => [0.5, 4, nil, 13]
2020
+ # }
2021
+ # )
2022
+ # df.fill_null(99)
2023
+ # # =>
2024
+ # # shape: (4, 2)
2025
+ # # ┌─────┬──────┐
2026
+ # # │ a ┆ b │
2027
+ # # │ --- ┆ --- │
2028
+ # # │ i64 ┆ f64 │
2029
+ # # ╞═════╪══════╡
2030
+ # # │ 1 ┆ 0.5 │
2031
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2032
+ # # │ 2 ┆ 4.0 │
2033
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2034
+ # # │ 99 ┆ 99.0 │
2035
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2036
+ # # │ 4 ┆ 13.0 │
2037
+ # # └─────┴──────┘
2038
+ #
2039
+ # @example
2040
+ # df.fill_null(strategy: "forward")
2041
+ # # =>
2042
+ # # shape: (4, 2)
2043
+ # # ┌─────┬──────┐
2044
+ # # │ a ┆ b │
2045
+ # # │ --- ┆ --- │
2046
+ # # │ i64 ┆ f64 │
2047
+ # # ╞═════╪══════╡
2048
+ # # │ 1 ┆ 0.5 │
2049
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2050
+ # # │ 2 ┆ 4.0 │
2051
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2052
+ # # │ 2 ┆ 4.0 │
2053
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2054
+ # # │ 4 ┆ 13.0 │
2055
+ # # └─────┴──────┘
2056
+ #
2057
+ # @example
2058
+ # df.fill_null(strategy: "max")
2059
+ # # =>
2060
+ # # shape: (4, 2)
2061
+ # # ┌─────┬──────┐
2062
+ # # │ a ┆ b │
2063
+ # # │ --- ┆ --- │
2064
+ # # │ i64 ┆ f64 │
2065
+ # # ╞═════╪══════╡
2066
+ # # │ 1 ┆ 0.5 │
2067
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2068
+ # # │ 2 ┆ 4.0 │
2069
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2070
+ # # │ 4 ┆ 13.0 │
2071
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2072
+ # # │ 4 ┆ 13.0 │
2073
+ # # └─────┴──────┘
2074
+ #
2075
+ # @example
2076
+ # df.fill_null(strategy: "zero")
2077
+ # # =>
2078
+ # # shape: (4, 2)
2079
+ # # ┌─────┬──────┐
2080
+ # # │ a ┆ b │
2081
+ # # │ --- ┆ --- │
2082
+ # # │ i64 ┆ f64 │
2083
+ # # ╞═════╪══════╡
2084
+ # # │ 1 ┆ 0.5 │
2085
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2086
+ # # │ 2 ┆ 4.0 │
2087
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2088
+ # # │ 0 ┆ 0.0 │
2089
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
2090
+ # # │ 4 ┆ 13.0 │
2091
+ # # └─────┴──────┘
2092
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
2093
+ _from_rbdf(
2094
+ lazy
2095
+ .fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
2096
+ .collect(no_optimization: true)
2097
+ ._df
2098
+ )
2099
+ end
2003
2100
 
2004
2101
  # Fill floating point NaN values by an Expression evaluation.
2005
2102
  #
@@ -2357,7 +2454,7 @@ module Polars
2357
2454
  # [
2358
2455
  # (Polars.col("a") ** 2).alias("a^2"),
2359
2456
  # (Polars.col("b") / 2).alias("b/2"),
2360
- # (Polars.col("c").is_not()).alias("not c")
2457
+ # (Polars.col("c").is_not).alias("not c")
2361
2458
  # ]
2362
2459
  # )
2363
2460
  # # =>
@@ -1357,9 +1357,9 @@ module Polars
1357
1357
 
1358
1358
  # Offset this date by a relative time offset.
1359
1359
  #
1360
- # This differs from ``pl.col("foo") + timedelta`` in that it can
1360
+ # This differs from `Polars.col("foo") + timedelta` in that it can
1361
1361
  # take months and leap years into account. Note that only a single minus
1362
- # sign is allowed in the ``by`` string, as the first character.
1362
+ # sign is allowed in the `by` string, as the first character.
1363
1363
  #
1364
1364
  # @param by [String]
1365
1365
  # The offset is dictated by the following string language: