polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,1208 +0,0 @@
1
- use either::Either;
2
- use magnus::{
3
- prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
4
- };
5
- use polars::frame::row::{rows_to_schema_supertypes, Row};
6
- use polars::frame::NullStrategy;
7
- use polars::io::avro::AvroCompression;
8
- use polars::io::mmap::ReaderBytes;
9
- use polars::io::RowIndex;
10
- use polars::prelude::pivot::{pivot, pivot_stable};
11
- use polars::prelude::*;
12
- use polars_core::utils::try_get_supertype;
13
- use std::cell::RefCell;
14
- use std::io::{BufWriter, Cursor};
15
- use std::num::NonZeroUsize;
16
- use std::ops::Deref;
17
-
18
- use crate::conversion::*;
19
- use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
20
- use crate::map::dataframe::{
21
- apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
22
- apply_lambda_with_utf8_out_type,
23
- };
24
- use crate::rb_modules;
25
- use crate::series::{to_rbseries_collection, to_series_collection};
26
- use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
27
-
28
- #[magnus::wrap(class = "Polars::RbDataFrame")]
29
- pub struct RbDataFrame {
30
- pub df: RefCell<DataFrame>,
31
- }
32
-
33
- impl From<DataFrame> for RbDataFrame {
34
- fn from(df: DataFrame) -> Self {
35
- RbDataFrame::new(df)
36
- }
37
- }
38
-
39
- impl RbDataFrame {
40
- pub fn new(df: DataFrame) -> Self {
41
- RbDataFrame {
42
- df: RefCell::new(df),
43
- }
44
- }
45
-
46
- fn finish_from_rows(
47
- rows: Vec<Row>,
48
- infer_schema_length: Option<usize>,
49
- schema: Option<Schema>,
50
- schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
51
- ) -> RbResult<Self> {
52
- // Object builder must be registered
53
- crate::on_startup::register_object_builder();
54
-
55
- let mut final_schema =
56
- rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
57
- .map_err(RbPolarsErr::from)?;
58
-
59
- // Erase scale from inferred decimals.
60
- for dtype in final_schema.iter_dtypes_mut() {
61
- if let DataType::Decimal(_, _) = dtype {
62
- *dtype = DataType::Decimal(None, None)
63
- }
64
- }
65
-
66
- // Integrate explicit/inferred schema.
67
- if let Some(schema) = schema {
68
- for (i, (name, dtype)) in schema.into_iter().enumerate() {
69
- if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
70
- *name_ = name;
71
-
72
- // If schema dtype is Unknown, overwrite with inferred datatype.
73
- if !matches!(dtype, DataType::Unknown) {
74
- *dtype_ = dtype;
75
- }
76
- } else {
77
- final_schema.with_column(name, dtype);
78
- }
79
- }
80
- }
81
-
82
- // Optional per-field overrides; these supersede default/inferred dtypes.
83
- if let Some(overrides) = schema_overrides_by_idx {
84
- for (i, dtype) in overrides {
85
- if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
86
- if !matches!(dtype, DataType::Unknown) {
87
- *dtype_ = dtype;
88
- }
89
- }
90
- }
91
- }
92
- let df =
93
- DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
94
- Ok(df.into())
95
- }
96
-
97
- pub fn init(columns: RArray) -> RbResult<Self> {
98
- let mut cols = Vec::new();
99
- for i in columns.each() {
100
- cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
101
- }
102
- let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
103
- Ok(RbDataFrame::new(df))
104
- }
105
-
106
- pub fn estimated_size(&self) -> usize {
107
- self.df.borrow().estimated_size()
108
- }
109
-
110
- pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
111
- // start arguments
112
- // this pattern is needed for more than 16
113
- let rb_f = arguments[0];
114
- let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
115
- let chunk_size = usize::try_convert(arguments[2])?;
116
- let has_header = bool::try_convert(arguments[3])?;
117
- let ignore_errors = bool::try_convert(arguments[4])?;
118
- let n_rows = Option::<usize>::try_convert(arguments[5])?;
119
- let skip_rows = usize::try_convert(arguments[6])?;
120
- let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
121
- let separator = String::try_convert(arguments[8])?;
122
- let rechunk = bool::try_convert(arguments[9])?;
123
- let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
124
- let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
125
- let n_threads = Option::<usize>::try_convert(arguments[12])?;
126
- let path = Option::<String>::try_convert(arguments[13])?;
127
- let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
128
- // TODO fix
129
- let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
130
- let low_memory = bool::try_convert(arguments[16])?;
131
- let comment_prefix = Option::<String>::try_convert(arguments[17])?;
132
- let quote_char = Option::<String>::try_convert(arguments[18])?;
133
- let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
134
- let try_parse_dates = bool::try_convert(arguments[20])?;
135
- let skip_rows_after_header = usize::try_convert(arguments[21])?;
136
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
137
- let sample_size = usize::try_convert(arguments[23])?;
138
- let eol_char = String::try_convert(arguments[24])?;
139
- let truncate_ragged_lines = bool::try_convert(arguments[25])?;
140
- // end arguments
141
-
142
- let null_values = null_values.map(|w| w.0);
143
- let eol_char = eol_char.as_bytes()[0];
144
-
145
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
146
-
147
- let quote_char = if let Some(s) = quote_char {
148
- if s.is_empty() {
149
- None
150
- } else {
151
- Some(s.as_bytes()[0])
152
- }
153
- } else {
154
- None
155
- };
156
-
157
- let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
158
- overwrite_dtype
159
- .iter()
160
- .map(|(name, dtype)| {
161
- let dtype = dtype.0.clone();
162
- Field::new(name, dtype)
163
- })
164
- .collect::<Schema>()
165
- });
166
-
167
- let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
168
- overwrite_dtype
169
- .iter()
170
- .map(|dt| dt.0.clone())
171
- .collect::<Vec<_>>()
172
- });
173
-
174
- let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
175
- let df = CsvReader::new(mmap_bytes_r)
176
- .infer_schema(infer_schema_length)
177
- .has_header(has_header)
178
- .with_n_rows(n_rows)
179
- .with_separator(separator.as_bytes()[0])
180
- .with_skip_rows(skip_rows)
181
- .with_ignore_errors(ignore_errors)
182
- .with_projection(projection)
183
- .with_rechunk(rechunk)
184
- .with_chunk_size(chunk_size)
185
- .with_encoding(encoding.0)
186
- .with_columns(columns)
187
- .with_n_threads(n_threads)
188
- .with_path(path)
189
- .with_dtypes(overwrite_dtype.map(Arc::new))
190
- .with_dtypes_slice(overwrite_dtype_slice.as_deref())
191
- .low_memory(low_memory)
192
- .with_comment_prefix(comment_prefix.as_deref())
193
- .with_null_values(null_values)
194
- .with_try_parse_dates(try_parse_dates)
195
- .with_quote_char(quote_char)
196
- .with_end_of_line_char(eol_char)
197
- .with_skip_rows_after_header(skip_rows_after_header)
198
- .with_row_index(row_index)
199
- .sample_size(sample_size)
200
- .truncate_ragged_lines(truncate_ragged_lines)
201
- .finish()
202
- .map_err(RbPolarsErr::from)?;
203
- Ok(df.into())
204
- }
205
-
206
- #[allow(clippy::too_many_arguments)]
207
- pub fn read_parquet(
208
- rb_f: Value,
209
- columns: Option<Vec<String>>,
210
- projection: Option<Vec<usize>>,
211
- n_rows: Option<usize>,
212
- parallel: Wrap<ParallelStrategy>,
213
- row_index: Option<(String, IdxSize)>,
214
- low_memory: bool,
215
- use_statistics: bool,
216
- rechunk: bool,
217
- ) -> RbResult<Self> {
218
- use EitherRustRubyFile::*;
219
-
220
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
221
- let result = match get_either_file(rb_f, false)? {
222
- Rb(f) => {
223
- let buf = f.as_buffer();
224
- ParquetReader::new(buf)
225
- .with_projection(projection)
226
- .with_columns(columns)
227
- .read_parallel(parallel.0)
228
- .with_n_rows(n_rows)
229
- .with_row_index(row_index)
230
- .set_low_memory(low_memory)
231
- .use_statistics(use_statistics)
232
- .set_rechunk(rechunk)
233
- .finish()
234
- }
235
- Rust(f) => ParquetReader::new(f.into_inner())
236
- .with_projection(projection)
237
- .with_columns(columns)
238
- .read_parallel(parallel.0)
239
- .with_n_rows(n_rows)
240
- .with_row_index(row_index)
241
- .use_statistics(use_statistics)
242
- .set_rechunk(rechunk)
243
- .finish(),
244
- };
245
- let df = result.map_err(RbPolarsErr::from)?;
246
- Ok(RbDataFrame::new(df))
247
- }
248
-
249
- pub fn read_ipc(
250
- rb_f: Value,
251
- columns: Option<Vec<String>>,
252
- projection: Option<Vec<usize>>,
253
- n_rows: Option<usize>,
254
- row_index: Option<(String, IdxSize)>,
255
- memory_map: bool,
256
- ) -> RbResult<Self> {
257
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
258
- let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
259
- let df = IpcReader::new(mmap_bytes_r)
260
- .with_projection(projection)
261
- .with_columns(columns)
262
- .with_n_rows(n_rows)
263
- .with_row_index(row_index)
264
- .memory_mapped(memory_map)
265
- .finish()
266
- .map_err(RbPolarsErr::from)?;
267
- Ok(RbDataFrame::new(df))
268
- }
269
-
270
- pub fn read_avro(
271
- rb_f: Value,
272
- columns: Option<Vec<String>>,
273
- projection: Option<Vec<usize>>,
274
- n_rows: Option<usize>,
275
- ) -> RbResult<Self> {
276
- use polars::io::avro::AvroReader;
277
-
278
- let file = get_file_like(rb_f, false)?;
279
- let df = AvroReader::new(file)
280
- .with_projection(projection)
281
- .with_columns(columns)
282
- .with_n_rows(n_rows)
283
- .finish()
284
- .map_err(RbPolarsErr::from)?;
285
- Ok(RbDataFrame::new(df))
286
- }
287
-
288
- pub fn write_avro(
289
- &self,
290
- rb_f: Value,
291
- compression: Wrap<Option<AvroCompression>>,
292
- ) -> RbResult<()> {
293
- use polars::io::avro::AvroWriter;
294
-
295
- if let Ok(s) = String::try_convert(rb_f) {
296
- let f = std::fs::File::create(s).unwrap();
297
- AvroWriter::new(f)
298
- .with_compression(compression.0)
299
- .finish(&mut self.df.borrow_mut())
300
- .map_err(RbPolarsErr::from)?;
301
- } else {
302
- let mut buf = get_file_like(rb_f, true)?;
303
- AvroWriter::new(&mut buf)
304
- .with_compression(compression.0)
305
- .finish(&mut self.df.borrow_mut())
306
- .map_err(RbPolarsErr::from)?;
307
- }
308
-
309
- Ok(())
310
- }
311
-
312
- pub fn read_json(rb_f: Value) -> RbResult<Self> {
313
- // memmap the file first
314
- let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
315
- let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
316
- let bytes = mmap_read.deref();
317
-
318
- // Happy path is our column oriented json as that is most performant
319
- // on failure we try
320
- match serde_json::from_slice::<DataFrame>(bytes) {
321
- Ok(df) => Ok(df.into()),
322
- // try arrow json reader instead
323
- // this is row oriented
324
- Err(e) => {
325
- let msg = format!("{e}");
326
- if msg.contains("successful parse invalid data") {
327
- let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
328
- Err(e)
329
- } else {
330
- let out = JsonReader::new(mmap_bytes_r)
331
- .with_json_format(JsonFormat::Json)
332
- .finish()
333
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
334
- Ok(out.into())
335
- }
336
- }
337
- }
338
- }
339
-
340
- pub fn read_ndjson(rb_f: Value) -> RbResult<Self> {
341
- let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
342
-
343
- let out = JsonReader::new(mmap_bytes_r)
344
- .with_json_format(JsonFormat::JsonLines)
345
- .finish()
346
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
347
- Ok(out.into())
348
- }
349
-
350
- pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
351
- let file = BufWriter::new(get_file_like(rb_f, true)?);
352
-
353
- let r = match (pretty, row_oriented) {
354
- (_, true) => JsonWriter::new(file)
355
- .with_json_format(JsonFormat::Json)
356
- .finish(&mut self.df.borrow_mut()),
357
- (true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
358
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
359
- (false, _) => serde_json::to_writer(file, &*self.df.borrow())
360
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
361
- };
362
- r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
363
- Ok(())
364
- }
365
-
366
- pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
367
- let file = BufWriter::new(get_file_like(rb_f, true)?);
368
-
369
- let r = JsonWriter::new(file)
370
- .with_json_format(JsonFormat::JsonLines)
371
- .finish(&mut self.df.borrow_mut());
372
-
373
- r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
374
- Ok(())
375
- }
376
-
377
- pub fn read_rows(
378
- rb_rows: RArray,
379
- infer_schema_length: Option<usize>,
380
- schema: Option<Wrap<Schema>>,
381
- ) -> RbResult<Self> {
382
- let mut rows = Vec::with_capacity(rb_rows.len());
383
- for v in rb_rows.each() {
384
- let rb_row = RArray::try_convert(v?)?;
385
- let mut row = Vec::with_capacity(rb_row.len());
386
- for val in rb_row.each() {
387
- row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
388
- }
389
- rows.push(Row(row));
390
- }
391
- Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
392
- }
393
-
394
- pub fn read_hashes(
395
- dicts: Value,
396
- infer_schema_length: Option<usize>,
397
- schema: Option<Wrap<Schema>>,
398
- schema_overrides: Option<Wrap<Schema>>,
399
- ) -> RbResult<Self> {
400
- let mut schema_columns = PlIndexSet::new();
401
- if let Some(s) = &schema {
402
- schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
403
- }
404
- let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
405
-
406
- let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
407
- if let Some(overrides) = schema_overrides {
408
- for (idx, name) in names.iter().enumerate() {
409
- if let Some(dtype) = overrides.0.get(name) {
410
- schema_overrides_by_idx.push((idx, dtype.clone()));
411
- }
412
- }
413
- }
414
- let rbdf = Self::finish_from_rows(
415
- rows,
416
- infer_schema_length,
417
- schema.map(|wrap| wrap.0),
418
- Some(schema_overrides_by_idx),
419
- )?;
420
-
421
- unsafe {
422
- rbdf.df
423
- .borrow_mut()
424
- .get_columns_mut()
425
- .iter_mut()
426
- .zip(&names)
427
- .for_each(|(s, name)| {
428
- s.rename(name);
429
- });
430
- }
431
- let length = names.len();
432
- if names.into_iter().collect::<PlHashSet<_>>().len() != length {
433
- let err = PolarsError::SchemaMismatch("duplicate column names found".into());
434
- Err(RbPolarsErr::from(err))?;
435
- }
436
-
437
- Ok(rbdf)
438
- }
439
-
440
- pub fn read_hash(data: RHash) -> RbResult<Self> {
441
- let mut cols: Vec<Series> = Vec::new();
442
- data.foreach(|name: String, values: Value| {
443
- let obj: Value = rb_modules::series().funcall("new", (name, values))?;
444
- let rbseries = obj.funcall::<_, _, &RbSeries>("_s", ())?;
445
- cols.push(rbseries.series.borrow().clone());
446
- Ok(ForEach::Continue)
447
- })?;
448
- let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
449
- Ok(df.into())
450
- }
451
-
452
- #[allow(clippy::too_many_arguments)]
453
- pub fn write_csv(
454
- &self,
455
- rb_f: Value,
456
- include_header: bool,
457
- separator: u8,
458
- quote_char: u8,
459
- batch_size: Wrap<NonZeroUsize>,
460
- datetime_format: Option<String>,
461
- date_format: Option<String>,
462
- time_format: Option<String>,
463
- float_precision: Option<usize>,
464
- null_value: Option<String>,
465
- ) -> RbResult<()> {
466
- let batch_size = batch_size.0;
467
- let null = null_value.unwrap_or_default();
468
-
469
- if let Ok(s) = String::try_convert(rb_f) {
470
- let f = std::fs::File::create(s).unwrap();
471
- // no need for a buffered writer, because the csv writer does internal buffering
472
- CsvWriter::new(f)
473
- .include_header(include_header)
474
- .with_separator(separator)
475
- .with_quote_char(quote_char)
476
- .with_batch_size(batch_size)
477
- .with_datetime_format(datetime_format)
478
- .with_date_format(date_format)
479
- .with_time_format(time_format)
480
- .with_float_precision(float_precision)
481
- .with_null_value(null)
482
- .finish(&mut self.df.borrow_mut())
483
- .map_err(RbPolarsErr::from)?;
484
- } else {
485
- let mut buf = Cursor::new(Vec::new());
486
- CsvWriter::new(&mut buf)
487
- .include_header(include_header)
488
- .with_separator(separator)
489
- .with_quote_char(quote_char)
490
- .with_batch_size(batch_size)
491
- .with_datetime_format(datetime_format)
492
- .with_date_format(date_format)
493
- .with_time_format(time_format)
494
- .with_float_precision(float_precision)
495
- .with_null_value(null)
496
- .finish(&mut self.df.borrow_mut())
497
- .map_err(RbPolarsErr::from)?;
498
- // TODO less copying
499
- let rb_str = RString::from_slice(&buf.into_inner());
500
- rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
501
- }
502
-
503
- Ok(())
504
- }
505
-
506
- pub fn write_ipc(
507
- &self,
508
- rb_f: Value,
509
- compression: Wrap<Option<IpcCompression>>,
510
- ) -> RbResult<()> {
511
- if let Ok(s) = String::try_convert(rb_f) {
512
- let f = std::fs::File::create(s).unwrap();
513
- IpcWriter::new(f)
514
- .with_compression(compression.0)
515
- .finish(&mut self.df.borrow_mut())
516
- .map_err(RbPolarsErr::from)?;
517
- } else {
518
- let mut buf = Cursor::new(Vec::new());
519
- IpcWriter::new(&mut buf)
520
- .with_compression(compression.0)
521
- .finish(&mut self.df.borrow_mut())
522
- .map_err(RbPolarsErr::from)?;
523
- // TODO less copying
524
- let rb_str = RString::from_slice(&buf.into_inner());
525
- rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
526
- }
527
- Ok(())
528
- }
529
-
530
- pub fn row_tuple(&self, idx: i64) -> Value {
531
- let idx = if idx < 0 {
532
- (self.df.borrow().height() as i64 + idx) as usize
533
- } else {
534
- idx as usize
535
- };
536
- RArray::from_iter(
537
- self.df
538
- .borrow()
539
- .get_columns()
540
- .iter()
541
- .map(|s| match s.dtype() {
542
- DataType::Object(_, _) => {
543
- let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
544
- obj.unwrap().to_object()
545
- }
546
- _ => Wrap(s.get(idx).unwrap()).into_value(),
547
- }),
548
- )
549
- .as_value()
550
- }
551
-
552
- pub fn row_tuples(&self) -> Value {
553
- let df = &self.df;
554
- RArray::from_iter((0..df.borrow().height()).map(|idx| {
555
- RArray::from_iter(
556
- self.df
557
- .borrow()
558
- .get_columns()
559
- .iter()
560
- .map(|s| match s.dtype() {
561
- DataType::Object(_, _) => {
562
- let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
563
- obj.unwrap().to_object()
564
- }
565
- _ => Wrap(s.get(idx).unwrap()).into_value(),
566
- }),
567
- )
568
- }))
569
- .as_value()
570
- }
571
-
572
- pub fn to_numo(&self) -> Option<Value> {
573
- let mut st = None;
574
- for s in self.df.borrow().iter() {
575
- let dt_i = s.dtype();
576
- match st {
577
- None => st = Some(dt_i.clone()),
578
- Some(ref mut st) => {
579
- *st = try_get_supertype(st, dt_i).ok()?;
580
- }
581
- }
582
- }
583
- let _st = st?;
584
-
585
- // TODO
586
- None
587
- }
588
-
589
- pub fn write_parquet(
590
- &self,
591
- rb_f: Value,
592
- compression: String,
593
- compression_level: Option<i32>,
594
- statistics: bool,
595
- row_group_size: Option<usize>,
596
- data_page_size: Option<usize>,
597
- ) -> RbResult<()> {
598
- let compression = parse_parquet_compression(&compression, compression_level)?;
599
-
600
- if let Ok(s) = String::try_convert(rb_f) {
601
- let f = std::fs::File::create(s).unwrap();
602
- ParquetWriter::new(f)
603
- .with_compression(compression)
604
- .with_statistics(statistics)
605
- .with_row_group_size(row_group_size)
606
- .with_data_page_size(data_page_size)
607
- .finish(&mut self.df.borrow_mut())
608
- .map_err(RbPolarsErr::from)?;
609
- } else {
610
- let buf = get_file_like(rb_f, true)?;
611
- ParquetWriter::new(buf)
612
- .with_compression(compression)
613
- .with_statistics(statistics)
614
- .with_row_group_size(row_group_size)
615
- .with_data_page_size(data_page_size)
616
- .finish(&mut self.df.borrow_mut())
617
- .map_err(RbPolarsErr::from)?;
618
- }
619
-
620
- Ok(())
621
- }
622
-
623
- pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
624
- let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
625
- Ok(df.into())
626
- }
627
-
628
- pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
629
- let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
630
- Ok(df.into())
631
- }
632
-
633
- pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
634
- let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
635
- Ok(df.into())
636
- }
637
-
638
- pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
639
- let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
640
- Ok(df.into())
641
- }
642
-
643
- pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
644
- let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
645
- Ok(df.into())
646
- }
647
-
648
- pub fn add_df(&self, s: &Self) -> RbResult<Self> {
649
- let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
650
- Ok(df.into())
651
- }
652
-
653
- pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
654
- let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
655
- Ok(df.into())
656
- }
657
-
658
- pub fn div_df(&self, s: &Self) -> RbResult<Self> {
659
- let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
660
- Ok(df.into())
661
- }
662
-
663
- pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
664
- let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
665
- Ok(df.into())
666
- }
667
-
668
- pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
669
- let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
670
- Ok(df.into())
671
- }
672
-
673
- pub fn sample_n(
674
- &self,
675
- n: &RbSeries,
676
- with_replacement: bool,
677
- shuffle: bool,
678
- seed: Option<u64>,
679
- ) -> RbResult<Self> {
680
- let df = self
681
- .df
682
- .borrow()
683
- .sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
684
- .map_err(RbPolarsErr::from)?;
685
- Ok(df.into())
686
- }
687
-
688
- pub fn sample_frac(
689
- &self,
690
- frac: &RbSeries,
691
- with_replacement: bool,
692
- shuffle: bool,
693
- seed: Option<u64>,
694
- ) -> RbResult<Self> {
695
- let df = self
696
- .df
697
- .borrow()
698
- .sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
699
- .map_err(RbPolarsErr::from)?;
700
- Ok(df.into())
701
- }
702
-
703
- pub fn rechunk(&self) -> Self {
704
- self.df.borrow().agg_chunks().into()
705
- }
706
-
707
- pub fn to_s(&self) -> String {
708
- format!("{}", self.df.borrow())
709
- }
710
-
711
- pub fn get_columns(&self) -> RArray {
712
- let cols = self.df.borrow().get_columns().to_vec();
713
- to_rbseries_collection(cols)
714
- }
715
-
716
- pub fn columns(&self) -> Vec<String> {
717
- self.df
718
- .borrow()
719
- .get_column_names()
720
- .iter()
721
- .map(|v| v.to_string())
722
- .collect()
723
- }
724
-
725
- pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
726
- self.df
727
- .borrow_mut()
728
- .set_column_names(&names)
729
- .map_err(RbPolarsErr::from)?;
730
- Ok(())
731
- }
732
-
733
- pub fn dtypes(&self) -> RArray {
734
- RArray::from_iter(
735
- self.df
736
- .borrow()
737
- .iter()
738
- .map(|s| Wrap(s.dtype().clone()).into_value()),
739
- )
740
- }
741
-
742
- pub fn n_chunks(&self) -> usize {
743
- self.df.borrow().n_chunks()
744
- }
745
-
746
- pub fn shape(&self) -> (usize, usize) {
747
- self.df.borrow().shape()
748
- }
749
-
750
- pub fn height(&self) -> usize {
751
- self.df.borrow().height()
752
- }
753
-
754
- pub fn width(&self) -> usize {
755
- self.df.borrow().width()
756
- }
757
-
758
- pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
759
- let columns = to_series_collection(columns)?;
760
- self.df
761
- .borrow_mut()
762
- .hstack_mut(&columns)
763
- .map_err(RbPolarsErr::from)?;
764
- Ok(())
765
- }
766
-
767
- pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
768
- let columns = to_series_collection(columns)?;
769
- let df = self
770
- .df
771
- .borrow()
772
- .hstack(&columns)
773
- .map_err(RbPolarsErr::from)?;
774
- Ok(df.into())
775
- }
776
-
777
- pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
778
- self.df
779
- .borrow_mut()
780
- .extend(&df.df.borrow())
781
- .map_err(RbPolarsErr::from)?;
782
- Ok(())
783
- }
784
-
785
- pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
786
- self.df
787
- .borrow_mut()
788
- .vstack_mut(&df.df.borrow())
789
- .map_err(RbPolarsErr::from)?;
790
- Ok(())
791
- }
792
-
793
- pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
794
- let df = self
795
- .df
796
- .borrow()
797
- .vstack(&df.df.borrow())
798
- .map_err(RbPolarsErr::from)?;
799
- Ok(df.into())
800
- }
801
-
802
- pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
803
- let s = self
804
- .df
805
- .borrow_mut()
806
- .drop_in_place(&name)
807
- .map_err(RbPolarsErr::from)?;
808
- Ok(RbSeries::new(s))
809
- }
810
-
811
- pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
812
- let df = self
813
- .df
814
- .borrow()
815
- .drop_nulls(subset.as_ref().map(|s| s.as_ref()))
816
- .map_err(RbPolarsErr::from)?;
817
- Ok(df.into())
818
- }
819
-
820
- pub fn drop(&self, name: String) -> RbResult<Self> {
821
- let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
822
- Ok(RbDataFrame::new(df))
823
- }
824
-
825
- pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
826
- self.df
827
- .borrow()
828
- .select_at_idx(idx)
829
- .map(|s| RbSeries::new(s.clone()))
830
- }
831
-
832
- pub fn get_column_index(&self, name: String) -> Option<usize> {
833
- self.df.borrow().get_column_index(&name)
834
- }
835
-
836
- pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
837
- self.df
838
- .borrow()
839
- .column(&name)
840
- .map(|s| RbSeries::new(s.clone()))
841
- .map_err(RbPolarsErr::from)
842
- }
843
-
844
- pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
845
- let df = self
846
- .df
847
- .borrow()
848
- .select(selection)
849
- .map_err(RbPolarsErr::from)?;
850
- Ok(RbDataFrame::new(df))
851
- }
852
-
853
- pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
854
- let indices = IdxCa::from_vec("", indices);
855
- let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
856
- Ok(RbDataFrame::new(df))
857
- }
858
-
859
- pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
860
- let binding = indices.series.borrow();
861
- let idx = binding.idx().map_err(RbPolarsErr::from)?;
862
- let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
863
- Ok(RbDataFrame::new(df))
864
- }
865
-
866
- pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
867
- self.df
868
- .borrow_mut()
869
- .replace(&column, new_col.series.borrow().clone())
870
- .map_err(RbPolarsErr::from)?;
871
- Ok(())
872
- }
873
-
874
- pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
875
- self.df
876
- .borrow_mut()
877
- .replace_column(index, new_col.series.borrow().clone())
878
- .map_err(RbPolarsErr::from)?;
879
- Ok(())
880
- }
881
-
882
- pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
883
- self.df
884
- .borrow_mut()
885
- .insert_column(index, new_col.series.borrow().clone())
886
- .map_err(RbPolarsErr::from)?;
887
- Ok(())
888
- }
889
-
890
- pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
891
- let df = self.df.borrow().slice(
892
- offset as i64,
893
- length.unwrap_or_else(|| self.df.borrow().height()),
894
- );
895
- df.into()
896
- }
897
-
898
- pub fn head(&self, length: Option<usize>) -> Self {
899
- self.df.borrow().head(length).into()
900
- }
901
-
902
- pub fn tail(&self, length: Option<usize>) -> Self {
903
- self.df.borrow().tail(length).into()
904
- }
905
-
906
- pub fn is_unique(&self) -> RbResult<RbSeries> {
907
- let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
908
- Ok(mask.into_series().into())
909
- }
910
-
911
- pub fn is_duplicated(&self) -> RbResult<RbSeries> {
912
- let mask = self
913
- .df
914
- .borrow()
915
- .is_duplicated()
916
- .map_err(RbPolarsErr::from)?;
917
- Ok(mask.into_series().into())
918
- }
919
-
920
- pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
921
- if null_equal {
922
- self.df.borrow().equals_missing(&other.df.borrow())
923
- } else {
924
- self.df.borrow().equals(&other.df.borrow())
925
- }
926
- }
927
-
928
- pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
929
- let df = self
930
- .df
931
- .borrow()
932
- .with_row_index(&name, offset)
933
- .map_err(RbPolarsErr::from)?;
934
- Ok(df.into())
935
- }
936
-
937
- pub fn clone(&self) -> Self {
938
- RbDataFrame::new(self.df.borrow().clone())
939
- }
940
-
941
- pub fn melt(
942
- &self,
943
- id_vars: Vec<String>,
944
- value_vars: Vec<String>,
945
- value_name: Option<String>,
946
- variable_name: Option<String>,
947
- ) -> RbResult<Self> {
948
- let args = MeltArgs {
949
- id_vars: strings_to_smartstrings(id_vars),
950
- value_vars: strings_to_smartstrings(value_vars),
951
- value_name: value_name.map(|s| s.into()),
952
- variable_name: variable_name.map(|s| s.into()),
953
- streamable: false,
954
- };
955
-
956
- let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
957
- Ok(RbDataFrame::new(df))
958
- }
959
-
960
- #[allow(clippy::too_many_arguments)]
961
- pub fn pivot_expr(
962
- &self,
963
- index: Vec<String>,
964
- columns: Vec<String>,
965
- values: Option<Vec<String>>,
966
- maintain_order: bool,
967
- sort_columns: bool,
968
- aggregate_expr: Option<&RbExpr>,
969
- separator: Option<String>,
970
- ) -> RbResult<Self> {
971
- let fun = match maintain_order {
972
- true => pivot_stable,
973
- false => pivot,
974
- };
975
- let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
976
- let df = fun(
977
- &self.df.borrow(),
978
- index,
979
- columns,
980
- values,
981
- sort_columns,
982
- agg_expr,
983
- separator.as_deref(),
984
- )
985
- .map_err(RbPolarsErr::from)?;
986
- Ok(RbDataFrame::new(df))
987
- }
988
-
989
- pub fn partition_by(
990
- &self,
991
- by: Vec<String>,
992
- maintain_order: bool,
993
- include_key: bool,
994
- ) -> RbResult<RArray> {
995
- let out = if maintain_order {
996
- self.df.borrow().partition_by_stable(by, include_key)
997
- } else {
998
- self.df.borrow().partition_by(by, include_key)
999
- }
1000
- .map_err(RbPolarsErr::from)?;
1001
- Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
1002
- }
1003
-
1004
- pub fn shift(&self, periods: i64) -> Self {
1005
- self.df.borrow().shift(periods).into()
1006
- }
1007
-
1008
- pub fn lazy(&self) -> RbLazyFrame {
1009
- self.df.borrow().clone().lazy().into()
1010
- }
1011
-
1012
- pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
1013
- let s = self
1014
- .df
1015
- .borrow()
1016
- .max_horizontal()
1017
- .map_err(RbPolarsErr::from)?;
1018
- Ok(s.map(|s| s.into()))
1019
- }
1020
-
1021
- pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
1022
- let s = self
1023
- .df
1024
- .borrow()
1025
- .min_horizontal()
1026
- .map_err(RbPolarsErr::from)?;
1027
- Ok(s.map(|s| s.into()))
1028
- }
1029
-
1030
- pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
1031
- let null_strategy = if ignore_nulls {
1032
- NullStrategy::Ignore
1033
- } else {
1034
- NullStrategy::Propagate
1035
- };
1036
- let s = self
1037
- .df
1038
- .borrow()
1039
- .sum_horizontal(null_strategy)
1040
- .map_err(RbPolarsErr::from)?;
1041
- Ok(s.map(|s| s.into()))
1042
- }
1043
-
1044
- pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
1045
- let null_strategy = if ignore_nulls {
1046
- NullStrategy::Ignore
1047
- } else {
1048
- NullStrategy::Propagate
1049
- };
1050
- let s = self
1051
- .df
1052
- .borrow()
1053
- .mean_horizontal(null_strategy)
1054
- .map_err(RbPolarsErr::from)?;
1055
- Ok(s.map(|s| s.into()))
1056
- }
1057
-
1058
- pub fn to_dummies(
1059
- &self,
1060
- columns: Option<Vec<String>>,
1061
- separator: Option<String>,
1062
- drop_first: bool,
1063
- ) -> RbResult<Self> {
1064
- let df = match columns {
1065
- Some(cols) => self.df.borrow().columns_to_dummies(
1066
- cols.iter().map(|x| x as &str).collect(),
1067
- separator.as_deref(),
1068
- drop_first,
1069
- ),
1070
- None => self
1071
- .df
1072
- .borrow()
1073
- .to_dummies(separator.as_deref(), drop_first),
1074
- }
1075
- .map_err(RbPolarsErr::from)?;
1076
- Ok(df.into())
1077
- }
1078
-
1079
- pub fn null_count(&self) -> Self {
1080
- let df = self.df.borrow().null_count();
1081
- df.into()
1082
- }
1083
-
1084
- pub fn apply(
1085
- &self,
1086
- lambda: Value,
1087
- output_type: Option<Wrap<DataType>>,
1088
- inference_size: usize,
1089
- ) -> RbResult<(Value, bool)> {
1090
- let df = &self.df.borrow();
1091
-
1092
- let output_type = output_type.map(|dt| dt.0);
1093
- let out = match output_type {
1094
- Some(DataType::Int32) => {
1095
- apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
1096
- }
1097
- Some(DataType::Int64) => {
1098
- apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
1099
- }
1100
- Some(DataType::UInt32) => {
1101
- apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
1102
- .into_series()
1103
- }
1104
- Some(DataType::UInt64) => {
1105
- apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
1106
- .into_series()
1107
- }
1108
- Some(DataType::Float32) => {
1109
- apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
1110
- .into_series()
1111
- }
1112
- Some(DataType::Float64) => {
1113
- apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
1114
- .into_series()
1115
- }
1116
- Some(DataType::Boolean) => {
1117
- apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
1118
- }
1119
- Some(DataType::Date) => {
1120
- apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
1121
- .into_date()
1122
- .into_series()
1123
- }
1124
- Some(DataType::Datetime(tu, tz)) => {
1125
- apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
1126
- .into_datetime(tu, tz)
1127
- .into_series()
1128
- }
1129
- Some(DataType::String) => {
1130
- apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1131
- }
1132
- _ => return apply_lambda_unknown(df, lambda, inference_size),
1133
- };
1134
-
1135
- Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
1136
- }
1137
-
1138
- pub fn shrink_to_fit(&self) {
1139
- self.df.borrow_mut().shrink_to_fit();
1140
- }
1141
-
1142
- pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
1143
- let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
1144
- let hash = self
1145
- .df
1146
- .borrow_mut()
1147
- .hash_rows(Some(hb))
1148
- .map_err(RbPolarsErr::from)?;
1149
- Ok(hash.into_series().into())
1150
- }
1151
-
1152
- pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
1153
- let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
1154
- Some(Either::Right(name))
1155
- } else if let Ok(name) = String::try_convert(column_names) {
1156
- Some(Either::Left(name))
1157
- } else {
1158
- None
1159
- };
1160
- Ok(self
1161
- .df
1162
- .borrow_mut()
1163
- .transpose(keep_names_as.as_deref(), new_col_names)
1164
- .map_err(RbPolarsErr::from)?
1165
- .into())
1166
- }
1167
-
1168
- pub fn upsample(
1169
- &self,
1170
- by: Vec<String>,
1171
- index_column: String,
1172
- every: String,
1173
- offset: String,
1174
- stable: bool,
1175
- ) -> RbResult<Self> {
1176
- let out = if stable {
1177
- self.df.borrow().upsample_stable(
1178
- by,
1179
- &index_column,
1180
- Duration::parse(&every),
1181
- Duration::parse(&offset),
1182
- )
1183
- } else {
1184
- self.df.borrow().upsample(
1185
- by,
1186
- &index_column,
1187
- Duration::parse(&every),
1188
- Duration::parse(&offset),
1189
- )
1190
- };
1191
- let out = out.map_err(RbPolarsErr::from)?;
1192
- Ok(out.into())
1193
- }
1194
-
1195
- pub fn to_struct(&self, name: String) -> RbSeries {
1196
- let s = self.df.borrow().clone().into_struct(&name);
1197
- s.into_series().into()
1198
- }
1199
-
1200
- pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
1201
- let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
1202
- Ok(df.into())
1203
- }
1204
-
1205
- pub fn clear(&self) -> Self {
1206
- self.df.borrow().clear().into()
1207
- }
1208
- }