polars-df 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +15 -7
  8. data/ext/polars/src/batched_csv.rs +4 -4
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
  12. data/ext/polars/src/dataframe.rs +69 -53
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/datetime.rs +22 -56
  15. data/ext/polars/src/expr/general.rs +61 -33
  16. data/ext/polars/src/expr/list.rs +52 -4
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +59 -8
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/functions/aggregation.rs +6 -0
  22. data/ext/polars/src/functions/lazy.rs +103 -48
  23. data/ext/polars/src/functions/meta.rs +45 -1
  24. data/ext/polars/src/functions/string_cache.rs +14 -0
  25. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
  26. data/ext/polars/src/lib.rs +226 -168
  27. data/ext/polars/src/series/aggregation.rs +20 -0
  28. data/ext/polars/src/series/mod.rs +25 -4
  29. data/lib/polars/array_expr.rb +449 -0
  30. data/lib/polars/array_name_space.rb +346 -0
  31. data/lib/polars/cat_expr.rb +24 -0
  32. data/lib/polars/cat_name_space.rb +75 -0
  33. data/lib/polars/config.rb +2 -2
  34. data/lib/polars/data_frame.rb +179 -43
  35. data/lib/polars/data_types.rb +191 -28
  36. data/lib/polars/date_time_expr.rb +31 -14
  37. data/lib/polars/exceptions.rb +12 -1
  38. data/lib/polars/expr.rb +866 -186
  39. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  40. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  41. data/lib/polars/functions/as_datatype.rb +248 -0
  42. data/lib/polars/functions/col.rb +47 -0
  43. data/lib/polars/functions/eager.rb +182 -0
  44. data/lib/polars/functions/lazy.rb +1280 -0
  45. data/lib/polars/functions/len.rb +49 -0
  46. data/lib/polars/functions/lit.rb +35 -0
  47. data/lib/polars/functions/random.rb +16 -0
  48. data/lib/polars/functions/range/date_range.rb +103 -0
  49. data/lib/polars/functions/range/int_range.rb +51 -0
  50. data/lib/polars/functions/repeat.rb +144 -0
  51. data/lib/polars/functions/whenthen.rb +27 -0
  52. data/lib/polars/functions.rb +29 -416
  53. data/lib/polars/group_by.rb +2 -2
  54. data/lib/polars/io.rb +18 -25
  55. data/lib/polars/lazy_frame.rb +367 -53
  56. data/lib/polars/list_expr.rb +152 -6
  57. data/lib/polars/list_name_space.rb +102 -0
  58. data/lib/polars/meta_expr.rb +175 -7
  59. data/lib/polars/series.rb +273 -34
  60. data/lib/polars/string_cache.rb +75 -0
  61. data/lib/polars/string_expr.rb +412 -96
  62. data/lib/polars/string_name_space.rb +4 -4
  63. data/lib/polars/testing.rb +507 -0
  64. data/lib/polars/utils.rb +52 -8
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars.rb +15 -2
  67. metadata +35 -5
  68. data/lib/polars/lazy_functions.rb +0 -1181
@@ -0,0 +1,185 @@
1
+ use magnus::encoding::{EncodingCapable, Index};
2
+ use magnus::{
3
+ class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
4
+ TryConvert, Value,
5
+ };
6
+ use polars::frame::row::any_values_to_dtype;
7
+ use polars::prelude::*;
8
+
9
+ use super::{struct_dict, ObjectValue, Wrap};
10
+
11
+ use crate::rb_modules::utils;
12
+ use crate::{RbPolarsErr, RbResult, RbSeries};
13
+
14
+ impl IntoValue for Wrap<AnyValue<'_>> {
15
+ fn into_value_with(self, ruby: &Ruby) -> Value {
16
+ match self.0 {
17
+ AnyValue::UInt8(v) => ruby.into_value(v),
18
+ AnyValue::UInt16(v) => ruby.into_value(v),
19
+ AnyValue::UInt32(v) => ruby.into_value(v),
20
+ AnyValue::UInt64(v) => ruby.into_value(v),
21
+ AnyValue::Int8(v) => ruby.into_value(v),
22
+ AnyValue::Int16(v) => ruby.into_value(v),
23
+ AnyValue::Int32(v) => ruby.into_value(v),
24
+ AnyValue::Int64(v) => ruby.into_value(v),
25
+ AnyValue::Float32(v) => ruby.into_value(v),
26
+ AnyValue::Float64(v) => ruby.into_value(v),
27
+ AnyValue::Null => ruby.qnil().as_value(),
28
+ AnyValue::Boolean(v) => ruby.into_value(v),
29
+ AnyValue::String(v) => ruby.into_value(v),
30
+ AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
31
+ AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
32
+ let s = if arr.is_null() {
33
+ rev.get(idx)
34
+ } else {
35
+ unsafe { arr.deref_unchecked().value(idx as usize) }
36
+ };
37
+ s.into_value()
38
+ }
39
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
40
+ AnyValue::Datetime(v, time_unit, time_zone) => {
41
+ let time_unit = time_unit.to_ascii();
42
+ utils()
43
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
44
+ .unwrap()
45
+ }
46
+ AnyValue::Duration(v, time_unit) => {
47
+ let time_unit = time_unit.to_ascii();
48
+ utils()
49
+ .funcall("_to_ruby_duration", (v, time_unit))
50
+ .unwrap()
51
+ }
52
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
53
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
54
+ ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
55
+ AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
56
+ AnyValue::Object(v) => {
57
+ let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
58
+ object.to_object()
59
+ }
60
+ AnyValue::ObjectOwned(v) => {
61
+ let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
62
+ object.to_object()
63
+ }
64
+ AnyValue::Binary(v) => RString::from_slice(v).into_value(),
65
+ AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
66
+ AnyValue::Decimal(v, scale) => utils()
67
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
68
+ .unwrap(),
69
+ }
70
+ }
71
+ }
72
+
73
+ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
74
+ fn try_convert(ob: Value) -> RbResult<Self> {
75
+ if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
76
+ Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
77
+ } else if let Some(v) = Integer::from_value(ob) {
78
+ Ok(AnyValue::Int64(v.to_i64()?).into())
79
+ } else if let Some(v) = Float::from_value(ob) {
80
+ Ok(AnyValue::Float64(v.to_f64()).into())
81
+ } else if let Some(v) = RString::from_value(ob) {
82
+ if v.enc_get() == Index::utf8() {
83
+ Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
84
+ } else {
85
+ Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
86
+ }
87
+ // call is_a? for ActiveSupport::TimeWithZone
88
+ } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
89
+ let sec = ob.funcall::<_, _, i64>("to_i", ())?;
90
+ let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
91
+ let v = sec * 1_000_000_000 + nsec;
92
+ // TODO support time zone when possible
93
+ // https://github.com/pola-rs/polars/issues/9103
94
+ Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
95
+ } else if ob.is_nil() {
96
+ Ok(AnyValue::Null.into())
97
+ } else if let Some(dict) = RHash::from_value(ob) {
98
+ let len = dict.len();
99
+ let mut keys = Vec::with_capacity(len);
100
+ let mut vals = Vec::with_capacity(len);
101
+ dict.foreach(|k: Value, v: Value| {
102
+ let key = String::try_convert(k)?;
103
+ let val = Wrap::<AnyValue>::try_convert(v)?.0;
104
+ let dtype = DataType::from(&val);
105
+ keys.push(Field::new(&key, dtype));
106
+ vals.push(val);
107
+ Ok(ForEach::Continue)
108
+ })?;
109
+ Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
110
+ } else if let Some(v) = RArray::from_value(ob) {
111
+ if v.is_empty() {
112
+ Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
113
+ } else {
114
+ let list = v;
115
+
116
+ let mut avs = Vec::with_capacity(25);
117
+ let mut iter = list.each();
118
+
119
+ for item in (&mut iter).take(25) {
120
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
121
+ }
122
+
123
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
124
+
125
+ // push the rest
126
+ avs.reserve(list.len());
127
+ for item in iter {
128
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
129
+ }
130
+
131
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
132
+ .map_err(RbPolarsErr::from)?;
133
+ Ok(Wrap(AnyValue::List(s)))
134
+ }
135
+ } else if ob.is_kind_of(crate::rb_modules::datetime()) {
136
+ let sec: i64 = ob.funcall("to_i", ())?;
137
+ let nsec: i64 = ob.funcall("nsec", ())?;
138
+ Ok(Wrap(AnyValue::Datetime(
139
+ sec * 1_000_000_000 + nsec,
140
+ TimeUnit::Nanoseconds,
141
+ &None,
142
+ )))
143
+ } else if ob.is_kind_of(crate::rb_modules::date()) {
144
+ // convert to DateTime for UTC
145
+ let v = ob
146
+ .funcall::<_, _, Value>("to_datetime", ())?
147
+ .funcall::<_, _, Value>("to_time", ())?
148
+ .funcall::<_, _, i64>("to_i", ())?;
149
+ Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
150
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
151
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
152
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
153
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
154
+ })?;
155
+ if sign < 0 {
156
+ // TODO better error
157
+ v = v.checked_neg().unwrap();
158
+ }
159
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
160
+ } else {
161
+ Err(RbPolarsErr::other(format!(
162
+ "object type not supported {:?}",
163
+ ob
164
+ )))
165
+ }
166
+ }
167
+ }
168
+
169
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
170
+ let exp = exp - (digits.len() as i32);
171
+ match digits.parse::<i128>() {
172
+ Ok(mut v) => {
173
+ let scale = if exp > 0 {
174
+ v = 10_i128
175
+ .checked_pow(exp as u32)
176
+ .and_then(|factor| v.checked_mul(factor))?;
177
+ 0
178
+ } else {
179
+ (-exp) as usize
180
+ };
181
+ Some((v, scale))
182
+ }
183
+ Err(_) => None,
184
+ }
185
+ }
@@ -0,0 +1,140 @@
1
+ use magnus::{prelude::*, IntoValue, RArray, RString, Ruby, TryConvert, Value};
2
+ use polars::prelude::*;
3
+
4
+ use super::{get_rbseq, struct_dict, Wrap};
5
+
6
+ use crate::rb_modules::utils;
7
+ use crate::RbResult;
8
+
9
+ impl TryConvert for Wrap<StringChunked> {
10
+ fn try_convert(obj: Value) -> RbResult<Self> {
11
+ let (seq, len) = get_rbseq(obj)?;
12
+ let mut builder = StringChunkedBuilder::new("", len);
13
+
14
+ for res in seq.each() {
15
+ let item = res?;
16
+ match String::try_convert(item) {
17
+ Ok(val) => builder.append_value(&val),
18
+ Err(_) => builder.append_null(),
19
+ }
20
+ }
21
+ Ok(Wrap(builder.finish()))
22
+ }
23
+ }
24
+
25
+ impl TryConvert for Wrap<BinaryChunked> {
26
+ fn try_convert(obj: Value) -> RbResult<Self> {
27
+ let (seq, len) = get_rbseq(obj)?;
28
+ let mut builder = BinaryChunkedBuilder::new("", len);
29
+
30
+ for res in seq.each() {
31
+ let item = res?;
32
+ match RString::try_convert(item) {
33
+ Ok(val) => builder.append_value(unsafe { val.as_slice() }),
34
+ Err(_) => builder.append_null(),
35
+ }
36
+ }
37
+ Ok(Wrap(builder.finish()))
38
+ }
39
+ }
40
+
41
+ impl IntoValue for Wrap<&StringChunked> {
42
+ fn into_value_with(self, _: &Ruby) -> Value {
43
+ let iter = self.0.into_iter();
44
+ RArray::from_iter(iter).into_value()
45
+ }
46
+ }
47
+
48
+ impl IntoValue for Wrap<&BinaryChunked> {
49
+ fn into_value_with(self, _: &Ruby) -> Value {
50
+ let iter = self
51
+ .0
52
+ .into_iter()
53
+ .map(|opt_bytes| opt_bytes.map(RString::from_slice));
54
+ RArray::from_iter(iter).into_value()
55
+ }
56
+ }
57
+
58
+ impl IntoValue for Wrap<&StructChunked> {
59
+ fn into_value_with(self, _: &Ruby) -> Value {
60
+ let s = self.0.clone().into_series();
61
+ // todo! iterate its chunks and flatten.
62
+ // make series::iter() accept a chunk index.
63
+ let s = s.rechunk();
64
+ let iter = s.iter().map(|av| {
65
+ if let AnyValue::Struct(_, _, flds) = av {
66
+ struct_dict(av._iter_struct_av(), flds)
67
+ } else {
68
+ unreachable!()
69
+ }
70
+ });
71
+
72
+ RArray::from_iter(iter).into_value()
73
+ }
74
+ }
75
+
76
+ impl IntoValue for Wrap<&DurationChunked> {
77
+ fn into_value_with(self, _: &Ruby) -> Value {
78
+ let utils = utils();
79
+ let time_unit = Wrap(self.0.time_unit()).into_value();
80
+ let iter = self.0.into_iter().map(|opt_v| {
81
+ opt_v.map(|v| {
82
+ utils
83
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
84
+ .unwrap()
85
+ })
86
+ });
87
+ RArray::from_iter(iter).into_value()
88
+ }
89
+ }
90
+
91
+ impl IntoValue for Wrap<&DatetimeChunked> {
92
+ fn into_value_with(self, _: &Ruby) -> Value {
93
+ let utils = utils();
94
+ let time_unit = Wrap(self.0.time_unit()).into_value();
95
+ let time_zone = self.0.time_zone().clone().into_value();
96
+ let iter = self.0.into_iter().map(|opt_v| {
97
+ opt_v.map(|v| {
98
+ utils
99
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
100
+ .unwrap()
101
+ })
102
+ });
103
+ RArray::from_iter(iter).into_value()
104
+ }
105
+ }
106
+
107
+ impl IntoValue for Wrap<&TimeChunked> {
108
+ fn into_value_with(self, _: &Ruby) -> Value {
109
+ let utils = utils();
110
+ let iter = self.0.into_iter().map(|opt_v| {
111
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
112
+ });
113
+ RArray::from_iter(iter).into_value()
114
+ }
115
+ }
116
+
117
+ impl IntoValue for Wrap<&DateChunked> {
118
+ fn into_value_with(self, _: &Ruby) -> Value {
119
+ let utils = utils();
120
+ let iter = self.0.into_iter().map(|opt_v| {
121
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
122
+ });
123
+ RArray::from_iter(iter).into_value()
124
+ }
125
+ }
126
+
127
+ impl IntoValue for Wrap<&DecimalChunked> {
128
+ fn into_value_with(self, _: &Ruby) -> Value {
129
+ let utils = utils();
130
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
131
+ let iter = self.0.into_iter().map(|opt_v| {
132
+ opt_v.map(|v| {
133
+ utils
134
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
135
+ .unwrap()
136
+ })
137
+ });
138
+ RArray::from_iter(iter).into_value()
139
+ }
140
+ }