polars-df 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,64 @@
1
- use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
2
2
  use polars::chunked_array::object::PolarsObjectSafe;
3
3
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
4
  use polars::datatypes::AnyValue;
5
- use polars::frame::DataFrame;
5
+ use polars::frame::row::Row;
6
+ use polars::frame::NullStrategy;
7
+ use polars::io::avro::AvroCompression;
6
8
  use polars::prelude::*;
7
9
  use polars::series::ops::NullBehavior;
8
10
  use std::fmt::{Display, Formatter};
9
11
  use std::hash::{Hash, Hasher};
10
12
 
11
- use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
12
14
 
15
+ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
16
+ // Safety:
17
+ // Wrap is transparent.
18
+ unsafe { std::mem::transmute(slice) }
19
+ }
20
+
21
+ pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
22
+ // Safety:
23
+ // Wrap is transparent.
24
+ unsafe { std::mem::transmute(buf) }
25
+ }
26
+
27
+ #[repr(transparent)]
13
28
  pub struct Wrap<T>(pub T);
14
29
 
30
+ impl<T> Clone for Wrap<T>
31
+ where
32
+ T: Clone,
33
+ {
34
+ fn clone(&self) -> Self {
35
+ Wrap(self.0.clone())
36
+ }
37
+ }
38
+
15
39
  impl<T> From<T> for Wrap<T> {
16
40
  fn from(t: T) -> Self {
17
41
  Wrap(t)
18
42
  }
19
43
  }
20
44
 
21
- pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
45
+ pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
22
46
  let seq: RArray = obj.try_convert()?;
23
47
  let len = seq.len();
24
48
  Ok((seq, len))
25
49
  }
26
50
 
27
- pub fn get_df(obj: Value) -> RbResult<DataFrame> {
51
+ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
28
52
  let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
29
53
  Ok(rbdf.df.borrow().clone())
30
54
  }
31
55
 
32
- pub fn get_series(obj: Value) -> RbResult<Series> {
56
+ pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
57
+ let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
58
+ Ok(rbdf.ldf.clone())
59
+ }
60
+
61
+ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
33
62
  let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
34
63
  Ok(rbs.series.borrow().clone())
35
64
  }
@@ -115,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
115
144
 
116
145
  impl From<Wrap<DataType>> for Value {
117
146
  fn from(w: Wrap<DataType>) -> Self {
118
- Symbol::from(w.0.to_string()).into()
147
+ let pl = crate::module();
148
+
149
+ match &w.0 {
150
+ DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
151
+ DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
152
+ DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
153
+ DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
154
+ DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
155
+ DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
156
+ DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
157
+ DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
158
+ DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
159
+ DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
160
+ DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
161
+ DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
162
+ DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
163
+ DataType::List(inner) => {
164
+ let inner = Wrap(*inner.clone());
165
+ let list_class = pl.const_get::<_, Value>("List").unwrap();
166
+ list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
167
+ }
168
+ DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
169
+ DataType::Datetime(tu, tz) => {
170
+ let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
171
+ datetime_class
172
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
173
+ .unwrap()
174
+ }
175
+ DataType::Duration(tu) => {
176
+ let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
177
+ duration_class
178
+ .funcall::<_, _, Value>("new", (tu.to_ascii(),))
179
+ .unwrap()
180
+ }
181
+ DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
182
+ DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
183
+ DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
184
+ DataType::Struct(fields) => {
185
+ let field_class = pl.const_get::<_, Value>("Field").unwrap();
186
+ let iter = fields.iter().map(|fld| {
187
+ let name = fld.name().clone();
188
+ let dtype = Wrap(fld.data_type().clone());
189
+ field_class
190
+ .funcall::<_, _, Value>("new", (name, dtype))
191
+ .unwrap()
192
+ });
193
+ let fields = RArray::from_iter(iter);
194
+ let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
195
+ struct_class
196
+ .funcall::<_, _, Value>("new", (fields,))
197
+ .unwrap()
198
+ }
199
+ DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
200
+ DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
201
+ }
119
202
  }
120
203
  }
121
204
 
122
205
  impl TryConvert for Wrap<DataType> {
123
206
  fn try_convert(ob: Value) -> RbResult<Self> {
124
- let dtype = match ob.try_convert::<String>()?.as_str() {
125
- "u8" => DataType::UInt8,
126
- "u16" => DataType::UInt16,
127
- "u32" => DataType::UInt32,
128
- "u64" => DataType::UInt64,
129
- "i8" => DataType::Int8,
130
- "i16" => DataType::Int16,
131
- "i32" => DataType::Int32,
132
- "i64" => DataType::Int64,
133
- "str" => DataType::Utf8,
134
- "bin" => DataType::Binary,
135
- "bool" => DataType::Boolean,
136
- "cat" => DataType::Categorical(None),
137
- "date" => DataType::Date,
138
- "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
139
- "f32" => DataType::Float32,
140
- "time" => DataType::Time,
141
- "dur" => DataType::Duration(TimeUnit::Microseconds),
142
- "f64" => DataType::Float64,
143
- // "obj" => DataType::Object(OBJECT_NAME),
144
- "list" => DataType::List(Box::new(DataType::Boolean)),
145
- "null" => DataType::Null,
146
- "unk" => DataType::Unknown,
147
- _ => {
148
- return Err(RbValueError::new_err(format!(
149
- "{} is not a supported DataType.",
150
- ob
151
- )))
207
+ let dtype = if ob.is_kind_of(class::class()) {
208
+ let name = ob.funcall::<_, _, String>("name", ())?;
209
+ match name.as_str() {
210
+ "Polars::UInt8" => DataType::UInt8,
211
+ "Polars::UInt16" => DataType::UInt16,
212
+ "Polars::UInt32" => DataType::UInt32,
213
+ "Polars::UInt64" => DataType::UInt64,
214
+ "Polars::Int8" => DataType::Int8,
215
+ "Polars::Int16" => DataType::Int16,
216
+ "Polars::Int32" => DataType::Int32,
217
+ "Polars::Int64" => DataType::Int64,
218
+ "Polars::Utf8" => DataType::Utf8,
219
+ "Polars::Binary" => DataType::Binary,
220
+ "Polars::Boolean" => DataType::Boolean,
221
+ "Polars::Categorical" => DataType::Categorical(None),
222
+ "Polars::Date" => DataType::Date,
223
+ "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
224
+ "Polars::Time" => DataType::Time,
225
+ "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
226
+ "Polars::Float32" => DataType::Float32,
227
+ "Polars::Float64" => DataType::Float64,
228
+ // "Polars::Object" => DataType::Object(OBJECT_NAME),
229
+ "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
230
+ "Polars::Null" => DataType::Null,
231
+ "Polars::Unknown" => DataType::Unknown,
232
+ dt => {
233
+ return Err(RbValueError::new_err(format!(
234
+ "{dt} is not a correct polars DataType.",
235
+ )))
236
+ }
237
+ }
238
+ } else {
239
+ match ob.try_convert::<String>()?.as_str() {
240
+ "u8" => DataType::UInt8,
241
+ "u16" => DataType::UInt16,
242
+ "u32" => DataType::UInt32,
243
+ "u64" => DataType::UInt64,
244
+ "i8" => DataType::Int8,
245
+ "i16" => DataType::Int16,
246
+ "i32" => DataType::Int32,
247
+ "i64" => DataType::Int64,
248
+ "str" => DataType::Utf8,
249
+ "bin" => DataType::Binary,
250
+ "bool" => DataType::Boolean,
251
+ "cat" => DataType::Categorical(None),
252
+ "date" => DataType::Date,
253
+ "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
254
+ "f32" => DataType::Float32,
255
+ "time" => DataType::Time,
256
+ "dur" => DataType::Duration(TimeUnit::Microseconds),
257
+ "f64" => DataType::Float64,
258
+ // "obj" => DataType::Object(OBJECT_NAME),
259
+ "list" => DataType::List(Box::new(DataType::Boolean)),
260
+ "null" => DataType::Null,
261
+ "unk" => DataType::Unknown,
262
+ _ => {
263
+ return Err(RbValueError::new_err(format!(
264
+ "{} is not a supported DataType.",
265
+ ob
266
+ )))
267
+ }
152
268
  }
153
269
  };
154
270
  Ok(Wrap(dtype))
@@ -171,6 +287,54 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
171
287
  }
172
288
  }
173
289
 
290
+ impl TryConvert for Wrap<AsofStrategy> {
291
+ fn try_convert(ob: Value) -> RbResult<Self> {
292
+ let parsed = match ob.try_convert::<String>()?.as_str() {
293
+ "backward" => AsofStrategy::Backward,
294
+ "forward" => AsofStrategy::Forward,
295
+ v => {
296
+ return Err(RbValueError::new_err(format!(
297
+ "strategy must be one of {{'backward', 'forward'}}, got {}",
298
+ v
299
+ )))
300
+ }
301
+ };
302
+ Ok(Wrap(parsed))
303
+ }
304
+ }
305
+
306
+ impl TryConvert for Wrap<InterpolationMethod> {
307
+ fn try_convert(ob: Value) -> RbResult<Self> {
308
+ let parsed = match ob.try_convert::<String>()?.as_str() {
309
+ "linear" => InterpolationMethod::Linear,
310
+ "nearest" => InterpolationMethod::Nearest,
311
+ v => {
312
+ return Err(RbValueError::new_err(format!(
313
+ "method must be one of {{'linear', 'nearest'}}, got {v}",
314
+ )))
315
+ }
316
+ };
317
+ Ok(Wrap(parsed))
318
+ }
319
+ }
320
+
321
+ impl TryConvert for Wrap<Option<AvroCompression>> {
322
+ fn try_convert(ob: Value) -> RbResult<Self> {
323
+ let parsed = match ob.try_convert::<String>()?.as_str() {
324
+ "uncompressed" => None,
325
+ "snappy" => Some(AvroCompression::Snappy),
326
+ "deflate" => Some(AvroCompression::Deflate),
327
+ v => {
328
+ return Err(RbValueError::new_err(format!(
329
+ "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
330
+ v
331
+ )))
332
+ }
333
+ };
334
+ Ok(Wrap(parsed))
335
+ }
336
+ }
337
+
174
338
  impl TryConvert for Wrap<CategoricalOrdering> {
175
339
  fn try_convert(ob: Value) -> RbResult<Self> {
176
340
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -187,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
187
351
  }
188
352
  }
189
353
 
354
+ impl TryConvert for Wrap<StartBy> {
355
+ fn try_convert(ob: Value) -> RbResult<Self> {
356
+ let parsed = match ob.try_convert::<String>()?.as_str() {
357
+ "window" => StartBy::WindowBound,
358
+ "datapoint" => StartBy::DataPoint,
359
+ "monday" => StartBy::Monday,
360
+ v => {
361
+ return Err(RbValueError::new_err(format!(
362
+ "closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
363
+ )))
364
+ }
365
+ };
366
+ Ok(Wrap(parsed))
367
+ }
368
+ }
369
+
190
370
  impl TryConvert for Wrap<ClosedWindow> {
191
371
  fn try_convert(ob: Value) -> RbResult<Self> {
192
372
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -462,6 +642,32 @@ pub fn parse_parquet_compression(
462
642
  Ok(parsed)
463
643
  }
464
644
 
645
+ impl<'s> TryConvert for Wrap<Row<'s>> {
646
+ fn try_convert(ob: Value) -> RbResult<Self> {
647
+ let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
648
+ for item in ob.try_convert::<RArray>()?.each() {
649
+ vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
650
+ }
651
+ let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
652
+ Ok(Wrap(Row(vals)))
653
+ }
654
+ }
655
+
656
+ impl TryConvert for Wrap<Schema> {
657
+ fn try_convert(ob: Value) -> RbResult<Self> {
658
+ let dict = ob.try_convert::<RHash>()?;
659
+
660
+ let mut schema = Vec::new();
661
+ dict.foreach(|key: String, val: Wrap<DataType>| {
662
+ schema.push(Field::new(&key, val.0));
663
+ Ok(ForEach::Continue)
664
+ })
665
+ .unwrap();
666
+
667
+ Ok(Wrap(schema.into_iter().into()))
668
+ }
669
+ }
670
+
465
671
  #[derive(Clone, Debug)]
466
672
  pub struct ObjectValue {
467
673
  pub inner: Value,
@@ -503,18 +709,31 @@ impl From<Value> for ObjectValue {
503
709
  }
504
710
  }
505
711
 
712
+ impl TryConvert for ObjectValue {
713
+ fn try_convert(ob: Value) -> RbResult<Self> {
714
+ Ok(ObjectValue { inner: ob })
715
+ }
716
+ }
717
+
506
718
  impl From<&dyn PolarsObjectSafe> for &ObjectValue {
507
719
  fn from(val: &dyn PolarsObjectSafe) -> Self {
508
720
  unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
509
721
  }
510
722
  }
511
723
 
724
+ // TODO remove
512
725
  impl ObjectValue {
513
726
  pub fn to_object(&self) -> Value {
514
727
  self.inner
515
728
  }
516
729
  }
517
730
 
731
+ impl From<ObjectValue> for Value {
732
+ fn from(val: ObjectValue) -> Self {
733
+ val.inner
734
+ }
735
+ }
736
+
518
737
  impl Default for ObjectValue {
519
738
  fn default() -> Self {
520
739
  ObjectValue { inner: *QNIL }
@@ -1,15 +1,22 @@
1
1
  use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
2
+ use polars::frame::NullStrategy;
3
+ use polars::io::avro::AvroCompression;
2
4
  use polars::io::mmap::ReaderBytes;
3
5
  use polars::io::RowCount;
6
+ use polars::prelude::pivot::{pivot, pivot_stable};
4
7
  use polars::prelude::*;
5
8
  use std::cell::RefCell;
6
9
  use std::io::{BufWriter, Cursor};
7
10
  use std::ops::Deref;
8
11
 
12
+ use crate::apply::dataframe::{
13
+ apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
14
+ apply_lambda_with_utf8_out_type,
15
+ };
9
16
  use crate::conversion::*;
10
17
  use crate::file::{get_file_like, get_mmap_bytes_reader};
11
18
  use crate::series::{to_rbseries_collection, to_series_collection};
12
- use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
19
+ use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
13
20
 
14
21
  #[magnus::wrap(class = "Polars::RbDataFrame")]
15
22
  pub struct RbDataFrame {
@@ -179,6 +186,48 @@ impl RbDataFrame {
179
186
  Ok(RbDataFrame::new(df))
180
187
  }
181
188
 
189
+ pub fn read_avro(
190
+ rb_f: Value,
191
+ columns: Option<Vec<String>>,
192
+ projection: Option<Vec<usize>>,
193
+ n_rows: Option<usize>,
194
+ ) -> RbResult<Self> {
195
+ use polars::io::avro::AvroReader;
196
+
197
+ let file = get_file_like(rb_f, false)?;
198
+ let df = AvroReader::new(file)
199
+ .with_projection(projection)
200
+ .with_columns(columns)
201
+ .with_n_rows(n_rows)
202
+ .finish()
203
+ .map_err(RbPolarsErr::from)?;
204
+ Ok(RbDataFrame::new(df))
205
+ }
206
+
207
+ pub fn write_avro(
208
+ &self,
209
+ rb_f: Value,
210
+ compression: Wrap<Option<AvroCompression>>,
211
+ ) -> RbResult<()> {
212
+ use polars::io::avro::AvroWriter;
213
+
214
+ if let Ok(s) = rb_f.try_convert::<String>() {
215
+ let f = std::fs::File::create(&s).unwrap();
216
+ AvroWriter::new(f)
217
+ .with_compression(compression.0)
218
+ .finish(&mut self.df.borrow_mut())
219
+ .map_err(RbPolarsErr::from)?;
220
+ } else {
221
+ let mut buf = get_file_like(rb_f, true)?;
222
+ AvroWriter::new(&mut buf)
223
+ .with_compression(compression.0)
224
+ .finish(&mut self.df.borrow_mut())
225
+ .map_err(RbPolarsErr::from)?;
226
+ }
227
+
228
+ Ok(())
229
+ }
230
+
182
231
  pub fn read_json(rb_f: Value) -> RbResult<Self> {
183
232
  // memmap the file first
184
233
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -238,6 +287,14 @@ impl RbDataFrame {
238
287
  Ok(())
239
288
  }
240
289
 
290
+ pub fn read_hashes(
291
+ _dicts: Value,
292
+ _infer_schema_length: Option<usize>,
293
+ _schema_overwrite: Option<Wrap<Schema>>,
294
+ ) -> RbResult<Self> {
295
+ Err(RbPolarsErr::todo())
296
+ }
297
+
241
298
  pub fn read_hash(data: RHash) -> RbResult<Self> {
242
299
  let mut cols: Vec<Series> = Vec::new();
243
300
  data.foreach(|name: String, values: Value| {
@@ -341,7 +398,7 @@ impl RbDataFrame {
341
398
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
342
399
  obj.unwrap().to_object()
343
400
  }
344
- _ => Wrap(s.get(idx)).into(),
401
+ _ => Wrap(s.get(idx).unwrap()).into(),
345
402
  })
346
403
  .collect(),
347
404
  )
@@ -364,7 +421,7 @@ impl RbDataFrame {
364
421
  s.get_object(idx).map(|any| any.into());
365
422
  obj.unwrap().to_object()
366
423
  }
367
- _ => Wrap(s.get(idx)).into(),
424
+ _ => Wrap(s.get(idx).unwrap()).into(),
368
425
  })
369
426
  .collect(),
370
427
  )
@@ -517,9 +574,8 @@ impl RbDataFrame {
517
574
  .collect()
518
575
  }
519
576
 
520
- pub fn n_chunks(&self) -> RbResult<usize> {
521
- let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
522
- Ok(n)
577
+ pub fn n_chunks(&self) -> usize {
578
+ self.df.borrow().n_chunks()
523
579
  }
524
580
 
525
581
  pub fn shape(&self) -> (usize, usize) {
@@ -751,6 +807,31 @@ impl RbDataFrame {
751
807
  Ok(RbDataFrame::new(df))
752
808
  }
753
809
 
810
+ pub fn pivot_expr(
811
+ &self,
812
+ values: Vec<String>,
813
+ index: Vec<String>,
814
+ columns: Vec<String>,
815
+ aggregate_expr: &RbExpr,
816
+ maintain_order: bool,
817
+ sort_columns: bool,
818
+ ) -> RbResult<Self> {
819
+ let fun = match maintain_order {
820
+ true => pivot_stable,
821
+ false => pivot,
822
+ };
823
+ let df = fun(
824
+ &self.df.borrow(),
825
+ values,
826
+ index,
827
+ columns,
828
+ aggregate_expr.inner.clone(),
829
+ sort_columns,
830
+ )
831
+ .map_err(RbPolarsErr::from)?;
832
+ Ok(RbDataFrame::new(df))
833
+ }
834
+
754
835
  pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
755
836
  let out = if stable {
756
837
  self.df.borrow().partition_by_stable(groups)
@@ -870,10 +951,74 @@ impl RbDataFrame {
870
951
  df.into()
871
952
  }
872
953
 
954
+ pub fn apply(
955
+ &self,
956
+ lambda: Value,
957
+ output_type: Option<Wrap<DataType>>,
958
+ inference_size: usize,
959
+ ) -> RbResult<(Value, bool)> {
960
+ let df = &self.df.borrow();
961
+
962
+ let output_type = output_type.map(|dt| dt.0);
963
+ let out = match output_type {
964
+ Some(DataType::Int32) => {
965
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
966
+ }
967
+ Some(DataType::Int64) => {
968
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
969
+ }
970
+ Some(DataType::UInt32) => {
971
+ apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
972
+ .into_series()
973
+ }
974
+ Some(DataType::UInt64) => {
975
+ apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
976
+ .into_series()
977
+ }
978
+ Some(DataType::Float32) => {
979
+ apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
980
+ .into_series()
981
+ }
982
+ Some(DataType::Float64) => {
983
+ apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
984
+ .into_series()
985
+ }
986
+ Some(DataType::Boolean) => {
987
+ apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
988
+ }
989
+ Some(DataType::Date) => {
990
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
991
+ .into_date()
992
+ .into_series()
993
+ }
994
+ Some(DataType::Datetime(tu, tz)) => {
995
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
996
+ .into_datetime(tu, tz)
997
+ .into_series()
998
+ }
999
+ Some(DataType::Utf8) => {
1000
+ apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1001
+ }
1002
+ _ => return apply_lambda_unknown(df, lambda, inference_size),
1003
+ };
1004
+
1005
+ Ok((RbSeries::from(out).into(), false))
1006
+ }
1007
+
873
1008
  pub fn shrink_to_fit(&self) {
874
1009
  self.df.borrow_mut().shrink_to_fit();
875
1010
  }
876
1011
 
1012
+ pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
1013
+ let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
1014
+ let hash = self
1015
+ .df
1016
+ .borrow_mut()
1017
+ .hash_rows(Some(hb))
1018
+ .map_err(RbPolarsErr::from)?;
1019
+ Ok(hash.into_series().into())
1020
+ }
1021
+
877
1022
  pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
878
1023
  let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
879
1024
  if include_header {
@@ -35,3 +35,11 @@ impl RbValueError {
35
35
  Error::new(arg_error(), message)
36
36
  }
37
37
  }
38
+
39
+ pub struct ComputeError {}
40
+
41
+ impl ComputeError {
42
+ pub fn new_err(message: String) -> Error {
43
+ Error::runtime_error(message)
44
+ }
45
+ }
@@ -1,7 +1,39 @@
1
1
  use magnus::Value;
2
- use polars::error::PolarsResult;
3
- use polars::series::Series;
2
+ use polars::prelude::*;
3
+
4
+ use crate::lazy::dsl::RbExpr;
5
+ use crate::Wrap;
4
6
 
5
7
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
6
8
  todo!();
7
9
  }
10
+
11
+ pub fn map_single(
12
+ rbexpr: &RbExpr,
13
+ _lambda: Value,
14
+ output_type: Option<Wrap<DataType>>,
15
+ agg_list: bool,
16
+ ) -> RbExpr {
17
+ let output_type = output_type.map(|wrap| wrap.0);
18
+
19
+ let output_type2 = output_type.clone();
20
+ let function = move |_s: Series| {
21
+ let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
22
+
23
+ todo!();
24
+ };
25
+
26
+ let output_map = GetOutput::map_field(move |fld| match output_type {
27
+ Some(ref dt) => Field::new(fld.name(), dt.clone()),
28
+ None => {
29
+ let mut fld = fld.clone();
30
+ fld.coerce(DataType::Unknown);
31
+ fld
32
+ }
33
+ });
34
+ if agg_list {
35
+ rbexpr.clone().inner.map_list(function, output_map).into()
36
+ } else {
37
+ rbexpr.clone().inner.map(function, output_map).into()
38
+ }
39
+ }