polars-df 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,35 +1,64 @@
1
- use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
2
2
  use polars::chunked_array::object::PolarsObjectSafe;
3
3
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
4
4
  use polars::datatypes::AnyValue;
5
- use polars::frame::DataFrame;
5
+ use polars::frame::row::Row;
6
+ use polars::frame::NullStrategy;
7
+ use polars::io::avro::AvroCompression;
6
8
  use polars::prelude::*;
7
9
  use polars::series::ops::NullBehavior;
8
10
  use std::fmt::{Display, Formatter};
9
11
  use std::hash::{Hash, Hasher};
10
12
 
11
- use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
12
14
 
15
+ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
16
+ // Safety:
17
+ // Wrap is transparent.
18
+ unsafe { std::mem::transmute(slice) }
19
+ }
20
+
21
+ pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
22
+ // Safety:
23
+ // Wrap is transparent.
24
+ unsafe { std::mem::transmute(buf) }
25
+ }
26
+
27
+ #[repr(transparent)]
13
28
  pub struct Wrap<T>(pub T);
14
29
 
30
+ impl<T> Clone for Wrap<T>
31
+ where
32
+ T: Clone,
33
+ {
34
+ fn clone(&self) -> Self {
35
+ Wrap(self.0.clone())
36
+ }
37
+ }
38
+
15
39
  impl<T> From<T> for Wrap<T> {
16
40
  fn from(t: T) -> Self {
17
41
  Wrap(t)
18
42
  }
19
43
  }
20
44
 
21
- pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
45
+ pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
22
46
  let seq: RArray = obj.try_convert()?;
23
47
  let len = seq.len();
24
48
  Ok((seq, len))
25
49
  }
26
50
 
27
- pub fn get_df(obj: Value) -> RbResult<DataFrame> {
51
+ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
28
52
  let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
29
53
  Ok(rbdf.df.borrow().clone())
30
54
  }
31
55
 
32
- pub fn get_series(obj: Value) -> RbResult<Series> {
56
+ pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
57
+ let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
58
+ Ok(rbdf.ldf.clone())
59
+ }
60
+
61
+ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
33
62
  let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
34
63
  Ok(rbs.series.borrow().clone())
35
64
  }
@@ -115,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
115
144
 
116
145
  impl From<Wrap<DataType>> for Value {
117
146
  fn from(w: Wrap<DataType>) -> Self {
118
- Symbol::from(w.0.to_string()).into()
147
+ let pl = crate::module();
148
+
149
+ match &w.0 {
150
+ DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
151
+ DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
152
+ DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
153
+ DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
154
+ DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
155
+ DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
156
+ DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
157
+ DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
158
+ DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
159
+ DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
160
+ DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
161
+ DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
162
+ DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
163
+ DataType::List(inner) => {
164
+ let inner = Wrap(*inner.clone());
165
+ let list_class = pl.const_get::<_, Value>("List").unwrap();
166
+ list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
167
+ }
168
+ DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
169
+ DataType::Datetime(tu, tz) => {
170
+ let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
171
+ datetime_class
172
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
173
+ .unwrap()
174
+ }
175
+ DataType::Duration(tu) => {
176
+ let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
177
+ duration_class
178
+ .funcall::<_, _, Value>("new", (tu.to_ascii(),))
179
+ .unwrap()
180
+ }
181
+ DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
182
+ DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
183
+ DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
184
+ DataType::Struct(fields) => {
185
+ let field_class = pl.const_get::<_, Value>("Field").unwrap();
186
+ let iter = fields.iter().map(|fld| {
187
+ let name = fld.name().clone();
188
+ let dtype = Wrap(fld.data_type().clone());
189
+ field_class
190
+ .funcall::<_, _, Value>("new", (name, dtype))
191
+ .unwrap()
192
+ });
193
+ let fields = RArray::from_iter(iter);
194
+ let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
195
+ struct_class
196
+ .funcall::<_, _, Value>("new", (fields,))
197
+ .unwrap()
198
+ }
199
+ DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
200
+ DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
201
+ }
119
202
  }
120
203
  }
121
204
 
122
205
  impl TryConvert for Wrap<DataType> {
123
206
  fn try_convert(ob: Value) -> RbResult<Self> {
124
- let dtype = match ob.try_convert::<String>()?.as_str() {
125
- "u8" => DataType::UInt8,
126
- "u16" => DataType::UInt16,
127
- "u32" => DataType::UInt32,
128
- "u64" => DataType::UInt64,
129
- "i8" => DataType::Int8,
130
- "i16" => DataType::Int16,
131
- "i32" => DataType::Int32,
132
- "i64" => DataType::Int64,
133
- "str" => DataType::Utf8,
134
- "bin" => DataType::Binary,
135
- "bool" => DataType::Boolean,
136
- "cat" => DataType::Categorical(None),
137
- "date" => DataType::Date,
138
- "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
139
- "f32" => DataType::Float32,
140
- "time" => DataType::Time,
141
- "dur" => DataType::Duration(TimeUnit::Microseconds),
142
- "f64" => DataType::Float64,
143
- // "obj" => DataType::Object(OBJECT_NAME),
144
- "list" => DataType::List(Box::new(DataType::Boolean)),
145
- "null" => DataType::Null,
146
- "unk" => DataType::Unknown,
147
- _ => {
148
- return Err(RbValueError::new_err(format!(
149
- "{} is not a supported DataType.",
150
- ob
151
- )))
207
+ let dtype = if ob.is_kind_of(class::class()) {
208
+ let name = ob.funcall::<_, _, String>("name", ())?;
209
+ match name.as_str() {
210
+ "Polars::UInt8" => DataType::UInt8,
211
+ "Polars::UInt16" => DataType::UInt16,
212
+ "Polars::UInt32" => DataType::UInt32,
213
+ "Polars::UInt64" => DataType::UInt64,
214
+ "Polars::Int8" => DataType::Int8,
215
+ "Polars::Int16" => DataType::Int16,
216
+ "Polars::Int32" => DataType::Int32,
217
+ "Polars::Int64" => DataType::Int64,
218
+ "Polars::Utf8" => DataType::Utf8,
219
+ "Polars::Binary" => DataType::Binary,
220
+ "Polars::Boolean" => DataType::Boolean,
221
+ "Polars::Categorical" => DataType::Categorical(None),
222
+ "Polars::Date" => DataType::Date,
223
+ "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
224
+ "Polars::Time" => DataType::Time,
225
+ "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
226
+ "Polars::Float32" => DataType::Float32,
227
+ "Polars::Float64" => DataType::Float64,
228
+ // "Polars::Object" => DataType::Object(OBJECT_NAME),
229
+ "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
230
+ "Polars::Null" => DataType::Null,
231
+ "Polars::Unknown" => DataType::Unknown,
232
+ dt => {
233
+ return Err(RbValueError::new_err(format!(
234
+ "{dt} is not a correct polars DataType.",
235
+ )))
236
+ }
237
+ }
238
+ } else {
239
+ match ob.try_convert::<String>()?.as_str() {
240
+ "u8" => DataType::UInt8,
241
+ "u16" => DataType::UInt16,
242
+ "u32" => DataType::UInt32,
243
+ "u64" => DataType::UInt64,
244
+ "i8" => DataType::Int8,
245
+ "i16" => DataType::Int16,
246
+ "i32" => DataType::Int32,
247
+ "i64" => DataType::Int64,
248
+ "str" => DataType::Utf8,
249
+ "bin" => DataType::Binary,
250
+ "bool" => DataType::Boolean,
251
+ "cat" => DataType::Categorical(None),
252
+ "date" => DataType::Date,
253
+ "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
254
+ "f32" => DataType::Float32,
255
+ "time" => DataType::Time,
256
+ "dur" => DataType::Duration(TimeUnit::Microseconds),
257
+ "f64" => DataType::Float64,
258
+ // "obj" => DataType::Object(OBJECT_NAME),
259
+ "list" => DataType::List(Box::new(DataType::Boolean)),
260
+ "null" => DataType::Null,
261
+ "unk" => DataType::Unknown,
262
+ _ => {
263
+ return Err(RbValueError::new_err(format!(
264
+ "{} is not a supported DataType.",
265
+ ob
266
+ )))
267
+ }
152
268
  }
153
269
  };
154
270
  Ok(Wrap(dtype))
@@ -171,6 +287,54 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
171
287
  }
172
288
  }
173
289
 
290
+ impl TryConvert for Wrap<AsofStrategy> {
291
+ fn try_convert(ob: Value) -> RbResult<Self> {
292
+ let parsed = match ob.try_convert::<String>()?.as_str() {
293
+ "backward" => AsofStrategy::Backward,
294
+ "forward" => AsofStrategy::Forward,
295
+ v => {
296
+ return Err(RbValueError::new_err(format!(
297
+ "strategy must be one of {{'backward', 'forward'}}, got {}",
298
+ v
299
+ )))
300
+ }
301
+ };
302
+ Ok(Wrap(parsed))
303
+ }
304
+ }
305
+
306
+ impl TryConvert for Wrap<InterpolationMethod> {
307
+ fn try_convert(ob: Value) -> RbResult<Self> {
308
+ let parsed = match ob.try_convert::<String>()?.as_str() {
309
+ "linear" => InterpolationMethod::Linear,
310
+ "nearest" => InterpolationMethod::Nearest,
311
+ v => {
312
+ return Err(RbValueError::new_err(format!(
313
+ "method must be one of {{'linear', 'nearest'}}, got {v}",
314
+ )))
315
+ }
316
+ };
317
+ Ok(Wrap(parsed))
318
+ }
319
+ }
320
+
321
+ impl TryConvert for Wrap<Option<AvroCompression>> {
322
+ fn try_convert(ob: Value) -> RbResult<Self> {
323
+ let parsed = match ob.try_convert::<String>()?.as_str() {
324
+ "uncompressed" => None,
325
+ "snappy" => Some(AvroCompression::Snappy),
326
+ "deflate" => Some(AvroCompression::Deflate),
327
+ v => {
328
+ return Err(RbValueError::new_err(format!(
329
+ "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
330
+ v
331
+ )))
332
+ }
333
+ };
334
+ Ok(Wrap(parsed))
335
+ }
336
+ }
337
+
174
338
  impl TryConvert for Wrap<CategoricalOrdering> {
175
339
  fn try_convert(ob: Value) -> RbResult<Self> {
176
340
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -187,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
187
351
  }
188
352
  }
189
353
 
354
+ impl TryConvert for Wrap<StartBy> {
355
+ fn try_convert(ob: Value) -> RbResult<Self> {
356
+ let parsed = match ob.try_convert::<String>()?.as_str() {
357
+ "window" => StartBy::WindowBound,
358
+ "datapoint" => StartBy::DataPoint,
359
+ "monday" => StartBy::Monday,
360
+ v => {
361
+ return Err(RbValueError::new_err(format!(
362
+ "closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
363
+ )))
364
+ }
365
+ };
366
+ Ok(Wrap(parsed))
367
+ }
368
+ }
369
+
190
370
  impl TryConvert for Wrap<ClosedWindow> {
191
371
  fn try_convert(ob: Value) -> RbResult<Self> {
192
372
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -462,6 +642,32 @@ pub fn parse_parquet_compression(
462
642
  Ok(parsed)
463
643
  }
464
644
 
645
+ impl<'s> TryConvert for Wrap<Row<'s>> {
646
+ fn try_convert(ob: Value) -> RbResult<Self> {
647
+ let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
648
+ for item in ob.try_convert::<RArray>()?.each() {
649
+ vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
650
+ }
651
+ let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
652
+ Ok(Wrap(Row(vals)))
653
+ }
654
+ }
655
+
656
+ impl TryConvert for Wrap<Schema> {
657
+ fn try_convert(ob: Value) -> RbResult<Self> {
658
+ let dict = ob.try_convert::<RHash>()?;
659
+
660
+ let mut schema = Vec::new();
661
+ dict.foreach(|key: String, val: Wrap<DataType>| {
662
+ schema.push(Field::new(&key, val.0));
663
+ Ok(ForEach::Continue)
664
+ })
665
+ .unwrap();
666
+
667
+ Ok(Wrap(schema.into_iter().into()))
668
+ }
669
+ }
670
+
465
671
  #[derive(Clone, Debug)]
466
672
  pub struct ObjectValue {
467
673
  pub inner: Value,
@@ -503,18 +709,31 @@ impl From<Value> for ObjectValue {
503
709
  }
504
710
  }
505
711
 
712
+ impl TryConvert for ObjectValue {
713
+ fn try_convert(ob: Value) -> RbResult<Self> {
714
+ Ok(ObjectValue { inner: ob })
715
+ }
716
+ }
717
+
506
718
  impl From<&dyn PolarsObjectSafe> for &ObjectValue {
507
719
  fn from(val: &dyn PolarsObjectSafe) -> Self {
508
720
  unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
509
721
  }
510
722
  }
511
723
 
724
+ // TODO remove
512
725
  impl ObjectValue {
513
726
  pub fn to_object(&self) -> Value {
514
727
  self.inner
515
728
  }
516
729
  }
517
730
 
731
+ impl From<ObjectValue> for Value {
732
+ fn from(val: ObjectValue) -> Self {
733
+ val.inner
734
+ }
735
+ }
736
+
518
737
  impl Default for ObjectValue {
519
738
  fn default() -> Self {
520
739
  ObjectValue { inner: *QNIL }
@@ -1,15 +1,22 @@
1
1
  use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
2
+ use polars::frame::NullStrategy;
3
+ use polars::io::avro::AvroCompression;
2
4
  use polars::io::mmap::ReaderBytes;
3
5
  use polars::io::RowCount;
6
+ use polars::prelude::pivot::{pivot, pivot_stable};
4
7
  use polars::prelude::*;
5
8
  use std::cell::RefCell;
6
9
  use std::io::{BufWriter, Cursor};
7
10
  use std::ops::Deref;
8
11
 
12
+ use crate::apply::dataframe::{
13
+ apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
14
+ apply_lambda_with_utf8_out_type,
15
+ };
9
16
  use crate::conversion::*;
10
17
  use crate::file::{get_file_like, get_mmap_bytes_reader};
11
18
  use crate::series::{to_rbseries_collection, to_series_collection};
12
- use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
19
+ use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
13
20
 
14
21
  #[magnus::wrap(class = "Polars::RbDataFrame")]
15
22
  pub struct RbDataFrame {
@@ -179,6 +186,48 @@ impl RbDataFrame {
179
186
  Ok(RbDataFrame::new(df))
180
187
  }
181
188
 
189
+ pub fn read_avro(
190
+ rb_f: Value,
191
+ columns: Option<Vec<String>>,
192
+ projection: Option<Vec<usize>>,
193
+ n_rows: Option<usize>,
194
+ ) -> RbResult<Self> {
195
+ use polars::io::avro::AvroReader;
196
+
197
+ let file = get_file_like(rb_f, false)?;
198
+ let df = AvroReader::new(file)
199
+ .with_projection(projection)
200
+ .with_columns(columns)
201
+ .with_n_rows(n_rows)
202
+ .finish()
203
+ .map_err(RbPolarsErr::from)?;
204
+ Ok(RbDataFrame::new(df))
205
+ }
206
+
207
+ pub fn write_avro(
208
+ &self,
209
+ rb_f: Value,
210
+ compression: Wrap<Option<AvroCompression>>,
211
+ ) -> RbResult<()> {
212
+ use polars::io::avro::AvroWriter;
213
+
214
+ if let Ok(s) = rb_f.try_convert::<String>() {
215
+ let f = std::fs::File::create(&s).unwrap();
216
+ AvroWriter::new(f)
217
+ .with_compression(compression.0)
218
+ .finish(&mut self.df.borrow_mut())
219
+ .map_err(RbPolarsErr::from)?;
220
+ } else {
221
+ let mut buf = get_file_like(rb_f, true)?;
222
+ AvroWriter::new(&mut buf)
223
+ .with_compression(compression.0)
224
+ .finish(&mut self.df.borrow_mut())
225
+ .map_err(RbPolarsErr::from)?;
226
+ }
227
+
228
+ Ok(())
229
+ }
230
+
182
231
  pub fn read_json(rb_f: Value) -> RbResult<Self> {
183
232
  // memmap the file first
184
233
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -238,6 +287,14 @@ impl RbDataFrame {
238
287
  Ok(())
239
288
  }
240
289
 
290
+ pub fn read_hashes(
291
+ _dicts: Value,
292
+ _infer_schema_length: Option<usize>,
293
+ _schema_overwrite: Option<Wrap<Schema>>,
294
+ ) -> RbResult<Self> {
295
+ Err(RbPolarsErr::todo())
296
+ }
297
+
241
298
  pub fn read_hash(data: RHash) -> RbResult<Self> {
242
299
  let mut cols: Vec<Series> = Vec::new();
243
300
  data.foreach(|name: String, values: Value| {
@@ -341,7 +398,7 @@ impl RbDataFrame {
341
398
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
342
399
  obj.unwrap().to_object()
343
400
  }
344
- _ => Wrap(s.get(idx)).into(),
401
+ _ => Wrap(s.get(idx).unwrap()).into(),
345
402
  })
346
403
  .collect(),
347
404
  )
@@ -364,7 +421,7 @@ impl RbDataFrame {
364
421
  s.get_object(idx).map(|any| any.into());
365
422
  obj.unwrap().to_object()
366
423
  }
367
- _ => Wrap(s.get(idx)).into(),
424
+ _ => Wrap(s.get(idx).unwrap()).into(),
368
425
  })
369
426
  .collect(),
370
427
  )
@@ -517,9 +574,8 @@ impl RbDataFrame {
517
574
  .collect()
518
575
  }
519
576
 
520
- pub fn n_chunks(&self) -> RbResult<usize> {
521
- let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
522
- Ok(n)
577
+ pub fn n_chunks(&self) -> usize {
578
+ self.df.borrow().n_chunks()
523
579
  }
524
580
 
525
581
  pub fn shape(&self) -> (usize, usize) {
@@ -751,6 +807,31 @@ impl RbDataFrame {
751
807
  Ok(RbDataFrame::new(df))
752
808
  }
753
809
 
810
+ pub fn pivot_expr(
811
+ &self,
812
+ values: Vec<String>,
813
+ index: Vec<String>,
814
+ columns: Vec<String>,
815
+ aggregate_expr: &RbExpr,
816
+ maintain_order: bool,
817
+ sort_columns: bool,
818
+ ) -> RbResult<Self> {
819
+ let fun = match maintain_order {
820
+ true => pivot_stable,
821
+ false => pivot,
822
+ };
823
+ let df = fun(
824
+ &self.df.borrow(),
825
+ values,
826
+ index,
827
+ columns,
828
+ aggregate_expr.inner.clone(),
829
+ sort_columns,
830
+ )
831
+ .map_err(RbPolarsErr::from)?;
832
+ Ok(RbDataFrame::new(df))
833
+ }
834
+
754
835
  pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
755
836
  let out = if stable {
756
837
  self.df.borrow().partition_by_stable(groups)
@@ -870,10 +951,74 @@ impl RbDataFrame {
870
951
  df.into()
871
952
  }
872
953
 
954
+ pub fn apply(
955
+ &self,
956
+ lambda: Value,
957
+ output_type: Option<Wrap<DataType>>,
958
+ inference_size: usize,
959
+ ) -> RbResult<(Value, bool)> {
960
+ let df = &self.df.borrow();
961
+
962
+ let output_type = output_type.map(|dt| dt.0);
963
+ let out = match output_type {
964
+ Some(DataType::Int32) => {
965
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
966
+ }
967
+ Some(DataType::Int64) => {
968
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
969
+ }
970
+ Some(DataType::UInt32) => {
971
+ apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
972
+ .into_series()
973
+ }
974
+ Some(DataType::UInt64) => {
975
+ apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
976
+ .into_series()
977
+ }
978
+ Some(DataType::Float32) => {
979
+ apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
980
+ .into_series()
981
+ }
982
+ Some(DataType::Float64) => {
983
+ apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
984
+ .into_series()
985
+ }
986
+ Some(DataType::Boolean) => {
987
+ apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
988
+ }
989
+ Some(DataType::Date) => {
990
+ apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
991
+ .into_date()
992
+ .into_series()
993
+ }
994
+ Some(DataType::Datetime(tu, tz)) => {
995
+ apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
996
+ .into_datetime(tu, tz)
997
+ .into_series()
998
+ }
999
+ Some(DataType::Utf8) => {
1000
+ apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1001
+ }
1002
+ _ => return apply_lambda_unknown(df, lambda, inference_size),
1003
+ };
1004
+
1005
+ Ok((RbSeries::from(out).into(), false))
1006
+ }
1007
+
873
1008
  pub fn shrink_to_fit(&self) {
874
1009
  self.df.borrow_mut().shrink_to_fit();
875
1010
  }
876
1011
 
1012
+ pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
1013
+ let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
1014
+ let hash = self
1015
+ .df
1016
+ .borrow_mut()
1017
+ .hash_rows(Some(hb))
1018
+ .map_err(RbPolarsErr::from)?;
1019
+ Ok(hash.into_series().into())
1020
+ }
1021
+
877
1022
  pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
878
1023
  let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
879
1024
  if include_header {
@@ -35,3 +35,11 @@ impl RbValueError {
35
35
  Error::new(arg_error(), message)
36
36
  }
37
37
  }
38
+
39
+ pub struct ComputeError {}
40
+
41
+ impl ComputeError {
42
+ pub fn new_err(message: String) -> Error {
43
+ Error::runtime_error(message)
44
+ }
45
+ }
@@ -1,7 +1,39 @@
1
1
  use magnus::Value;
2
- use polars::error::PolarsResult;
3
- use polars::series::Series;
2
+ use polars::prelude::*;
3
+
4
+ use crate::lazy::dsl::RbExpr;
5
+ use crate::Wrap;
4
6
 
5
7
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
6
8
  todo!();
7
9
  }
10
+
11
+ pub fn map_single(
12
+ rbexpr: &RbExpr,
13
+ _lambda: Value,
14
+ output_type: Option<Wrap<DataType>>,
15
+ agg_list: bool,
16
+ ) -> RbExpr {
17
+ let output_type = output_type.map(|wrap| wrap.0);
18
+
19
+ let output_type2 = output_type.clone();
20
+ let function = move |_s: Series| {
21
+ let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
22
+
23
+ todo!();
24
+ };
25
+
26
+ let output_map = GetOutput::map_field(move |fld| match output_type {
27
+ Some(ref dt) => Field::new(fld.name(), dt.clone()),
28
+ None => {
29
+ let mut fld = fld.clone();
30
+ fld.coerce(DataType::Unknown);
31
+ fld
32
+ }
33
+ });
34
+ if agg_list {
35
+ rbexpr.clone().inner.map_list(function, output_map).into()
36
+ } else {
37
+ rbexpr.clone().inner.map(function, output_map).into()
38
+ }
39
+ }