polars-df 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Cargo.lock +112 -89
  4. data/README.md +8 -7
  5. data/ext/polars/Cargo.toml +13 -12
  6. data/ext/polars/src/conversion/any_value.rs +14 -8
  7. data/ext/polars/src/conversion/chunked_array.rs +5 -2
  8. data/ext/polars/src/conversion/mod.rs +27 -19
  9. data/ext/polars/src/dataframe/construction.rs +1 -14
  10. data/ext/polars/src/dataframe/general.rs +0 -5
  11. data/ext/polars/src/expr/datetime.rs +22 -14
  12. data/ext/polars/src/file.rs +5 -5
  13. data/ext/polars/src/io/mod.rs +22 -8
  14. data/ext/polars/src/lazyframe/general.rs +27 -41
  15. data/ext/polars/src/lazyframe/optflags.rs +0 -1
  16. data/ext/polars/src/lib.rs +10 -18
  17. data/ext/polars/src/series/aggregation.rs +8 -1
  18. data/ext/polars/src/series/construction.rs +1 -0
  19. data/ext/polars/src/series/export.rs +1 -0
  20. data/ext/polars/src/series/general.rs +0 -1
  21. data/lib/polars/data_frame.rb +11 -9
  22. data/lib/polars/data_types.rb +9 -1
  23. data/lib/polars/date_time_expr.rb +35 -14
  24. data/lib/polars/expr.rb +2 -2
  25. data/lib/polars/iceberg_dataset.rb +113 -0
  26. data/lib/polars/io/iceberg.rb +8 -1
  27. data/lib/polars/io/ipc.rb +28 -49
  28. data/lib/polars/io/scan_options.rb +9 -3
  29. data/lib/polars/io/utils.rb +17 -0
  30. data/lib/polars/lazy_frame.rb +5 -2
  31. data/lib/polars/scan_cast_options.rb +4 -1
  32. data/lib/polars/selectors.rb +8 -8
  33. data/lib/polars/series.rb +23 -1
  34. data/lib/polars/string_expr.rb +1 -1
  35. data/lib/polars/string_name_space.rb +1 -1
  36. data/lib/polars/utils/convert.rb +2 -2
  37. data/lib/polars/version.rb +1 -1
  38. data/lib/polars.rb +2 -0
  39. metadata +3 -1
@@ -3,6 +3,7 @@ use magnus::{
3
3
  IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value, prelude::*, r_hash::ForEach,
4
4
  };
5
5
  use polars::prelude::*;
6
+ use polars_compute::decimal::{DEC128_MAX_PREC, DecimalFmtBuffer, dec128_fits};
6
7
  use polars_core::utils::any_values_to_supertype_and_n_dtypes;
7
8
 
8
9
  use super::datetime::datetime_to_rb_object;
@@ -30,6 +31,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
30
31
  AnyValue::UInt16(v) => ruby.into_value(v),
31
32
  AnyValue::UInt32(v) => ruby.into_value(v),
32
33
  AnyValue::UInt64(v) => ruby.into_value(v),
34
+ AnyValue::UInt128(v) => ruby.into_value(v),
33
35
  AnyValue::Int8(v) => ruby.into_value(v),
34
36
  AnyValue::Int16(v) => ruby.into_value(v),
35
37
  AnyValue::Int32(v) => ruby.into_value(v),
@@ -74,9 +76,11 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
74
76
  }
75
77
  AnyValue::Binary(v) => ruby.str_from_slice(v).as_value(),
76
78
  AnyValue::BinaryOwned(v) => ruby.str_from_slice(&v).as_value(),
77
- AnyValue::Decimal(v, scale) => pl_utils()
78
- .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
79
- .unwrap(),
79
+ AnyValue::Decimal(v, prec, scale) => {
80
+ let mut buf = DecimalFmtBuffer::new();
81
+ let s = buf.format_dec128(v, scale, false, false);
82
+ pl_utils().funcall("_to_ruby_decimal", (prec, s)).unwrap()
83
+ }
80
84
  }
81
85
  }
82
86
 
@@ -94,8 +98,12 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
94
98
  fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
95
99
  if let Ok(v) = i64::try_convert(ob) {
96
100
  Ok(AnyValue::Int64(v))
101
+ } else if let Ok(v) = i128::try_convert(ob) {
102
+ Ok(AnyValue::Int128(v))
97
103
  } else if let Ok(v) = u64::try_convert(ob) {
98
104
  Ok(AnyValue::UInt64(v))
105
+ } else if let Ok(v) = u128::try_convert(ob) {
106
+ Ok(AnyValue::UInt128(v))
99
107
  } else if !strict {
100
108
  let f = f64::try_convert(ob)?;
101
109
  Ok(AnyValue::Float64(f))
@@ -206,14 +214,12 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
206
214
  match digits.parse::<i128>() {
207
215
  Ok(mut v) => {
208
216
  let scale = if exp > 0 {
209
- v = 10_i128
210
- .checked_pow(exp as u32)
211
- .and_then(|factor| v.checked_mul(factor))?;
217
+ v = 10_i128.checked_pow(exp as u32)?.checked_mul(v)?;
212
218
  0
213
219
  } else {
214
220
  (-exp) as usize
215
221
  };
216
- Some((v, scale))
222
+ dec128_fits(v, DEC128_MAX_PREC).then_some((v, scale))
217
223
  }
218
224
  Err(_) => None,
219
225
  }
@@ -229,7 +235,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
229
235
  // TODO better error
230
236
  v = v.checked_neg().unwrap();
231
237
  }
232
- Ok(AnyValue::Decimal(v, scale))
238
+ Ok(AnyValue::Decimal(v, DEC128_MAX_PREC, scale))
233
239
  }
234
240
 
235
241
  let ruby = Ruby::get_with(ob);
@@ -1,5 +1,6 @@
1
1
  use magnus::{IntoValue, RString, Ruby, TryConvert, Value, prelude::*};
2
2
  use polars::prelude::*;
3
+ use polars_compute::decimal::DecimalFmtBuffer;
3
4
 
4
5
  use super::{Wrap, get_rbseq, struct_dict};
5
6
 
@@ -129,11 +130,13 @@ impl IntoValue for Wrap<&DateChunked> {
129
130
  impl IntoValue for Wrap<&DecimalChunked> {
130
131
  fn into_value_with(self, ruby: &Ruby) -> Value {
131
132
  let utils = pl_utils();
132
- let rb_scale = (-(self.0.scale() as i32)).into_value_with(ruby);
133
+ let rb_precision = self.0.precision().into_value_with(ruby);
134
+ let mut buf = DecimalFmtBuffer::new();
133
135
  let iter = self.0.physical().into_iter().map(|opt_v| {
134
136
  opt_v.map(|v| {
137
+ let s = buf.format_dec128(v, self.0.scale(), false, false);
135
138
  utils
136
- .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
139
+ .funcall::<_, _, Value>("_to_ruby_decimal", (rb_precision, s))
137
140
  .unwrap()
138
141
  })
139
142
  });
@@ -25,6 +25,7 @@ use polars::prelude::default_values::{
25
25
  use polars::prelude::deletion::DeletionFilesList;
26
26
  use polars::prelude::*;
27
27
  use polars::series::ops::NullBehavior;
28
+ use polars_compute::decimal::dec128_verify_prec_scale;
28
29
  use polars_core::schema::iceberg::IcebergSchema;
29
30
  use polars_core::utils::arrow::array::Array;
30
31
  use polars_core::utils::materialize_dyn_int;
@@ -174,6 +175,10 @@ impl IntoValue for Wrap<DataType> {
174
175
  let class = pl.const_get::<_, Value>("UInt64").unwrap();
175
176
  class.funcall("new", ()).unwrap()
176
177
  }
178
+ DataType::UInt128 => {
179
+ let class = pl.const_get::<_, Value>("UInt128").unwrap();
180
+ class.funcall("new", ()).unwrap()
181
+ }
177
182
  DataType::Float32 => {
178
183
  let class = pl.const_get::<_, Value>("Float32").unwrap();
179
184
  class.funcall("new", ()).unwrap()
@@ -350,7 +355,11 @@ impl TryConvert for Wrap<DataType> {
350
355
  "Polars::Time" => DataType::Time,
351
356
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
352
357
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
353
- "Polars::Decimal" => DataType::Decimal(None, None),
358
+ "Polars::Decimal" => {
359
+ return Err(RbTypeError::new_err(
360
+ "Decimal without precision/scale set is not a valid Polars datatype",
361
+ ));
362
+ }
354
363
  "Polars::List" => DataType::List(Box::new(DataType::Null)),
355
364
  "Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
356
365
  "Polars::Struct" => DataType::Struct(vec![]),
@@ -415,7 +424,8 @@ impl TryConvert for Wrap<DataType> {
415
424
  "Polars::Decimal" => {
416
425
  let precision = ob.funcall("precision", ())?;
417
426
  let scale = ob.funcall("scale", ())?;
418
- DataType::Decimal(precision, Some(scale))
427
+ dec128_verify_prec_scale(precision, scale).map_err(to_rb_err)?;
428
+ DataType::Decimal(precision, scale)
419
429
  }
420
430
  "Polars::List" => {
421
431
  let inner: Value = ob.funcall("inner", ()).unwrap();
@@ -882,7 +892,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
882
892
  let parsed = match String::try_convert(ob)?.as_str() {
883
893
  "uncompressed" => None,
884
894
  "lz4" => Some(IpcCompression::LZ4),
885
- "zstd" => Some(IpcCompression::ZSTD),
895
+ "zstd" => Some(IpcCompression::ZSTD(Default::default())),
886
896
  v => {
887
897
  return Err(RbValueError::new_err(format!(
888
898
  "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
@@ -1091,21 +1101,6 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1091
1101
  }
1092
1102
  }
1093
1103
 
1094
- impl TryConvert for Wrap<IpcCompression> {
1095
- fn try_convert(ob: Value) -> RbResult<Self> {
1096
- let parsed = match String::try_convert(ob)?.as_str() {
1097
- "lz4" => IpcCompression::LZ4,
1098
- "zstd" => IpcCompression::ZSTD,
1099
- v => {
1100
- return Err(RbValueError::new_err(format!(
1101
- "compression must be one of {{'lz4', 'zstd'}}, got {v}"
1102
- )));
1103
- }
1104
- };
1105
- Ok(Wrap(parsed))
1106
- }
1107
- }
1108
-
1109
1104
  impl TryConvert for Wrap<SearchSortedSide> {
1110
1105
  fn try_convert(ob: Value) -> RbResult<Self> {
1111
1106
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1208,7 +1203,8 @@ impl TryConvert for Wrap<QuoteStyle> {
1208
1203
  }
1209
1204
 
1210
1205
  pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1211
- let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1206
+ let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), kv)
1207
+ .map_err(RbPolarsErr::from)?;
1212
1208
  Ok(out)
1213
1209
  }
1214
1210
 
@@ -1307,6 +1303,17 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
1307
1303
  }
1308
1304
  };
1309
1305
 
1306
+ let categorical_to_string =
1307
+ match &*ob.funcall::<_, _, String>("categorical_to_string", ())? {
1308
+ "allow" => true,
1309
+ "forbid" => false,
1310
+ v => {
1311
+ return Err(RbValueError::new_err(format!(
1312
+ "unknown option for categorical_to_string: {v}"
1313
+ )));
1314
+ }
1315
+ };
1316
+
1310
1317
  return Ok(Wrap(CastColumnsPolicy {
1311
1318
  integer_upcast,
1312
1319
  float_upcast,
@@ -1315,6 +1322,7 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
1315
1322
  datetime_microseconds_downcast: false,
1316
1323
  datetime_convert_timezone,
1317
1324
  null_upcast: true,
1325
+ categorical_to_string,
1318
1326
  missing_struct_fields,
1319
1327
  extra_struct_fields,
1320
1328
  }));
@@ -54,7 +54,7 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- let mut schema = if let Some(mut schema) = schema {
57
+ let schema = if let Some(mut schema) = schema {
58
58
  resolve_schema_overrides(&mut schema, schema_overrides);
59
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
60
60
  schema
@@ -62,11 +62,6 @@ fn finish_from_rows(
62
62
  rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
63
63
  };
64
64
 
65
- // TODO: Remove this step when Decimals are supported properly.
66
- // Erasing the decimal precision/scale here will just require us to infer it again later.
67
- // https://github.com/pola-rs/polars/issues/14427
68
- erase_decimal_precision_scale(&mut schema);
69
-
70
65
  let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
71
66
  Ok(df.into())
72
67
  }
@@ -106,14 +101,6 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
106
101
  }
107
102
  }
108
103
 
109
- fn erase_decimal_precision_scale(schema: &mut Schema) {
110
- for dtype in schema.iter_values_mut() {
111
- if let DataType::Decimal(_, _) = dtype {
112
- *dtype = DataType::Decimal(None, None)
113
- }
114
- }
115
- }
116
-
117
104
  fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
118
105
  where
119
106
  I: IntoIterator<Item = &'a str>,
@@ -552,11 +552,6 @@ impl RbDataFrame {
552
552
  s.into_series().into()
553
553
  }
554
554
 
555
- pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
556
- let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
557
- Ok(df.into())
558
- }
559
-
560
555
  pub fn clear(&self) -> Self {
561
556
  self.df.borrow().clear().into()
562
557
  }
@@ -218,31 +218,39 @@ impl RbExpr {
218
218
  self.inner.clone().dt().timestamp(tu.0).into()
219
219
  }
220
220
 
221
- pub fn dt_total_days(&self) -> Self {
222
- self.inner.clone().dt().total_days().into()
221
+ pub fn dt_total_days(&self, fractional: bool) -> Self {
222
+ self.inner.clone().dt().total_days(fractional).into()
223
223
  }
224
224
 
225
- pub fn dt_total_hours(&self) -> Self {
226
- self.inner.clone().dt().total_hours().into()
225
+ pub fn dt_total_hours(&self, fractional: bool) -> Self {
226
+ self.inner.clone().dt().total_hours(fractional).into()
227
227
  }
228
228
 
229
- pub fn dt_total_minutes(&self) -> Self {
230
- self.inner.clone().dt().total_minutes().into()
229
+ pub fn dt_total_minutes(&self, fractional: bool) -> Self {
230
+ self.inner.clone().dt().total_minutes(fractional).into()
231
231
  }
232
232
 
233
- pub fn dt_total_seconds(&self) -> Self {
234
- self.inner.clone().dt().total_seconds().into()
233
+ pub fn dt_total_seconds(&self, fractional: bool) -> Self {
234
+ self.inner.clone().dt().total_seconds(fractional).into()
235
235
  }
236
236
 
237
- pub fn dt_total_milliseconds(&self) -> Self {
238
- self.inner.clone().dt().total_milliseconds().into()
237
+ pub fn dt_total_milliseconds(&self, fractional: bool) -> Self {
238
+ self.inner
239
+ .clone()
240
+ .dt()
241
+ .total_milliseconds(fractional)
242
+ .into()
239
243
  }
240
244
 
241
- pub fn dt_total_microseconds(&self) -> Self {
242
- self.inner.clone().dt().total_microseconds().into()
245
+ pub fn dt_total_microseconds(&self, fractional: bool) -> Self {
246
+ self.inner
247
+ .clone()
248
+ .dt()
249
+ .total_microseconds(fractional)
250
+ .into()
243
251
  }
244
252
 
245
- pub fn dt_total_nanoseconds(&self) -> Self {
246
- self.inner.clone().dt().total_nanoseconds().into()
253
+ pub fn dt_total_nanoseconds(&self, fractional: bool) -> Self {
254
+ self.inner.clone().dt().total_nanoseconds(fractional).into()
247
255
  }
248
256
  }
@@ -145,13 +145,13 @@ impl Seek for RbFileLikeObject {
145
145
  SeekFrom::End(i) => (2, i),
146
146
  };
147
147
 
148
- let new_position = Ruby::get()
149
- .unwrap()
150
- .get_inner(self.inner)
151
- .funcall("seek", (offset, whence))
148
+ let inner = Ruby::get().unwrap().get_inner(self.inner);
149
+
150
+ inner
151
+ .funcall::<_, _, Value>("seek", (offset, whence))
152
152
  .map_err(rberr_to_io_err)?;
153
153
 
154
- Ok(new_position)
154
+ inner.funcall("tell", ()).map_err(rberr_to_io_err)
155
155
  }
156
156
  }
157
157
 
@@ -5,7 +5,7 @@ use polars::prelude::default_values::DefaultFieldValues;
5
5
  use polars::prelude::deletion::DeletionFilesList;
6
6
  use polars::prelude::{
7
7
  CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
8
- UnifiedScanArgs,
8
+ TableStatistics, UnifiedScanArgs,
9
9
  };
10
10
  use polars_io::{HiveOptions, RowIndex};
11
11
  use polars_utils::IdxSize;
@@ -24,6 +24,12 @@ impl TryConvert for RbScanOptions {
24
24
  }
25
25
  }
26
26
 
27
+ impl TryConvert for Wrap<TableStatistics> {
28
+ fn try_convert(_ob: Value) -> RbResult<Self> {
29
+ todo!();
30
+ }
31
+ }
32
+
27
33
  impl RbScanOptions {
28
34
  pub fn extract_unified_scan_args(
29
35
  &self,
@@ -38,6 +44,10 @@ impl RbScanOptions {
38
44
  let include_file_paths: Option<Wrap<PlSmallStr>> =
39
45
  self.0.funcall("include_file_paths", ())?;
40
46
  let glob: bool = self.0.funcall("glob", ())?;
47
+ let hidden_file_prefix: Option<Vec<String>> = self.0.funcall("hidden_file_prefix", ())?;
48
+ let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
49
+ let default_values: Option<Wrap<DefaultFieldValues>> =
50
+ self.0.funcall("default_values", ())?;
41
51
  let hive_partitioning: Option<bool> = self.0.funcall("hive_partitioning", ())?;
42
52
  let hive_schema: Option<Wrap<Schema>> = self.0.funcall("hive_schema", ())?;
43
53
  let try_parse_hive_dates: bool = self.0.funcall("try_parse_hive_dates", ())?;
@@ -48,9 +58,9 @@ impl RbScanOptions {
48
58
  let retries: usize = self.0.funcall("retries", ())?;
49
59
  let deletion_files: Option<Wrap<DeletionFilesList>> =
50
60
  self.0.funcall("deletion_files", ())?;
51
- let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
52
- let default_values: Option<Wrap<DefaultFieldValues>> =
53
- self.0.funcall("default_values", ())?;
61
+ let table_statistics: Option<Wrap<TableStatistics>> =
62
+ self.0.funcall("table_statistics", ())?;
63
+ let row_count: Option<(u64, u64)> = self.0.funcall("row_count", ())?;
54
64
 
55
65
  let cloud_options = storage_options;
56
66
 
@@ -89,7 +99,13 @@ impl RbScanOptions {
89
99
  rechunk,
90
100
  cache,
91
101
  glob,
102
+ hidden_file_prefix: hidden_file_prefix
103
+ .map(|x| x.into_iter().map(|x| (*x).into()).collect()),
92
104
  projection: None,
105
+ column_mapping: column_mapping.map(|x| x.0),
106
+ default_values: default_values
107
+ .map(|x| x.0)
108
+ .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
93
109
  row_index,
94
110
  pre_slice: pre_slice.map(Slice::from),
95
111
  cast_columns_policy: cast_options.0,
@@ -97,10 +113,8 @@ impl RbScanOptions {
97
113
  extra_columns_policy: extra_columns.0,
98
114
  include_file_paths: include_file_paths.map(|x| x.0),
99
115
  deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
100
- column_mapping: column_mapping.map(|x| x.0),
101
- default_values: default_values
102
- .map(|x| x.0)
103
- .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
116
+ table_statistics: table_statistics.map(|x| x.0),
117
+ row_count,
104
118
  };
105
119
 
106
120
  Ok(unified_scan_args)
@@ -1,5 +1,5 @@
1
1
  use magnus::{IntoValue, RArray, RHash, Ruby, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
2
- use polars::io::{HiveOptions, RowIndex};
2
+ use polars::io::RowIndex;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use polars_plan::dsl::ScanSources;
@@ -18,7 +18,10 @@ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResu
18
18
  fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
19
19
  use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
20
20
  Ok(match get_ruby_scan_source_input(obj, false)? {
21
- RubyScanSourceInput::Path(path) => (Some(path.clone()), ScanSources::Paths([path].into())),
21
+ RubyScanSourceInput::Path(path) => (
22
+ Some(path.clone()),
23
+ ScanSources::Paths(FromIterator::from_iter([path])),
24
+ ),
22
25
  RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
23
26
  RubyScanSourceInput::Buffer(buff) => (None, ScanSources::Buffers([buff].into())),
24
27
  })
@@ -180,48 +183,28 @@ impl RbLazyFrame {
180
183
  Ok(lf.into())
181
184
  }
182
185
 
183
- #[allow(clippy::too_many_arguments)]
184
186
  pub fn new_from_ipc(
185
- source: Option<Value>,
186
187
  sources: Wrap<ScanSources>,
187
- n_rows: Option<usize>,
188
- cache: bool,
189
- rechunk: bool,
190
- row_index: Option<(String, IdxSize)>,
191
- hive_partitioning: Option<bool>,
192
- hive_schema: Option<Wrap<Schema>>,
193
- try_parse_hive_dates: bool,
194
- include_file_paths: Option<String>,
188
+ scan_options: RbScanOptions,
189
+ file_cache_ttl: Option<u64>,
195
190
  ) -> RbResult<Self> {
196
- let row_index = row_index.map(|(name, offset)| RowIndex {
197
- name: name.into(),
198
- offset,
199
- });
191
+ let options = IpcScanOptions;
200
192
 
201
- let hive_options = HiveOptions {
202
- enabled: hive_partitioning,
203
- hive_start_idx: 0,
204
- schema: hive_schema.map(|x| Arc::new(x.0)),
205
- try_parse_dates: try_parse_hive_dates,
206
- };
193
+ let sources = sources.0;
194
+ let first_path = sources.first_path().map(|p| p.into_owned());
207
195
 
208
- let args = ScanArgsIpc {
209
- n_rows,
210
- cache,
211
- rechunk,
212
- row_index,
213
- cloud_options: None,
214
- hive_options,
215
- include_file_paths: include_file_paths.map(|x| x.into()),
216
- };
196
+ let mut unified_scan_args =
197
+ scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
217
198
 
218
- let sources = sources.0;
219
- let (_first_path, sources) = match source {
220
- None => (sources.first_path().map(|p| p.into_owned()), sources),
221
- Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
222
- };
199
+ if let Some(file_cache_ttl) = file_cache_ttl {
200
+ unified_scan_args
201
+ .cloud_options
202
+ .get_or_insert_default()
203
+ .file_cache_ttl = file_cache_ttl;
204
+ }
223
205
 
224
- let lf = LazyFrame::scan_ipc_sources(sources, args).map_err(RbPolarsErr::from)?;
206
+ let lf = LazyFrame::scan_ipc_sources(sources, options, unified_scan_args)
207
+ .map_err(RbPolarsErr::from)?;
225
208
  Ok(lf.into())
226
209
  }
227
210
 
@@ -406,13 +389,13 @@ impl RbLazyFrame {
406
389
  pub fn sink_ipc(
407
390
  &self,
408
391
  target: SinkTarget,
409
- compression: Option<Wrap<IpcCompression>>,
392
+ compression: Wrap<Option<IpcCompression>>,
410
393
  cloud_options: Option<Vec<(String, String)>>,
411
394
  retries: usize,
412
395
  sink_options: Wrap<SinkOptions>,
413
396
  ) -> RbResult<RbLazyFrame> {
414
397
  let options = IpcWriterOptions {
415
- compression: compression.map(|c| c.0),
398
+ compression: compression.0,
416
399
  ..Default::default()
417
400
  };
418
401
 
@@ -949,11 +932,14 @@ impl RbLazyFrame {
949
932
  Ok(schema_dict)
950
933
  }
951
934
 
952
- pub fn unnest(&self, columns: &RbSelector) -> Self {
935
+ pub fn unnest(&self, columns: &RbSelector, separator: Option<String>) -> Self {
953
936
  self.ldf
954
937
  .borrow()
955
938
  .clone()
956
- .unnest(columns.inner.clone())
939
+ .unnest(
940
+ columns.inner.clone(),
941
+ separator.as_deref().map(PlSmallStr::from_str),
942
+ )
957
943
  .into()
958
944
  }
959
945
 
@@ -49,7 +49,6 @@ flag_getter_setters! {
49
49
  (SLICE_PUSHDOWN, get_slice_pushdown, set_slice_pushdown, clear=true)
50
50
  (COMM_SUBPLAN_ELIM, get_comm_subplan_elim, set_comm_subplan_elim, clear=true)
51
51
  (COMM_SUBEXPR_ELIM, get_comm_subexpr_elim, set_comm_subexpr_elim, clear=true)
52
- (COLLAPSE_JOINS, get_collapse_joins, set_collapse_joins, clear=true)
53
52
  (CHECK_ORDER_OBSERVE, get_check_order_observe, set_check_order_observe, clear=true)
54
53
  (FAST_PROJECTION, get_fast_projection, set_fast_projection, clear=true)
55
54
 
@@ -156,7 +156,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
156
156
  class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
157
157
  class.define_method("upsample", method!(RbDataFrame::upsample, 4))?;
158
158
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
159
- class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
160
159
  class.define_method("clear", method!(RbDataFrame::clear, 0))?;
161
160
  class.define_method(
162
161
  "serialize_binary",
@@ -432,21 +431,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
432
431
  class.define_method("dt_millisecond", method!(RbExpr::dt_millisecond, 0))?;
433
432
  class.define_method("dt_microsecond", method!(RbExpr::dt_microsecond, 0))?;
434
433
  class.define_method("dt_nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
435
- class.define_method("dt_total_days", method!(RbExpr::dt_total_days, 0))?;
436
- class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours, 0))?;
437
- class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes, 0))?;
438
- class.define_method("dt_total_seconds", method!(RbExpr::dt_total_seconds, 0))?;
434
+ class.define_method("dt_total_days", method!(RbExpr::dt_total_days, 1))?;
435
+ class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours, 1))?;
436
+ class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes, 1))?;
437
+ class.define_method("dt_total_seconds", method!(RbExpr::dt_total_seconds, 1))?;
439
438
  class.define_method(
440
439
  "dt_total_nanoseconds",
441
- method!(RbExpr::dt_total_nanoseconds, 0),
440
+ method!(RbExpr::dt_total_nanoseconds, 1),
442
441
  )?;
443
442
  class.define_method(
444
443
  "dt_total_microseconds",
445
- method!(RbExpr::dt_total_microseconds, 0),
444
+ method!(RbExpr::dt_total_microseconds, 1),
446
445
  )?;
447
446
  class.define_method(
448
447
  "dt_total_milliseconds",
449
- method!(RbExpr::dt_total_milliseconds, 0),
448
+ method!(RbExpr::dt_total_milliseconds, 1),
450
449
  )?;
451
450
  class.define_method("dt_timestamp", method!(RbExpr::dt_timestamp, 1))?;
452
451
  class.define_method("dt_to_string", method!(RbExpr::dt_to_string, 1))?;
@@ -845,7 +844,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
845
844
  "new_from_parquet",
846
845
  function!(RbLazyFrame::new_from_parquet, 6),
847
846
  )?;
848
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
847
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 3))?;
849
848
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
850
849
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
851
850
  class.define_method(
@@ -912,7 +911,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
912
911
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
913
912
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
914
913
  class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
915
- class.define_method("unnest", method!(RbLazyFrame::unnest, 1))?;
914
+ class.define_method("unnest", method!(RbLazyFrame::unnest, 2))?;
916
915
  class.define_method("count", method!(RbLazyFrame::count, 0))?;
917
916
  class.define_method("merge_sorted", method!(RbLazyFrame::merge_sorted, 2))?;
918
917
 
@@ -927,6 +926,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
927
926
  class.define_singleton_method("new_opt_u16", function!(RbSeries::new_opt_u16, 3))?;
928
927
  class.define_singleton_method("new_opt_u32", function!(RbSeries::new_opt_u32, 3))?;
929
928
  class.define_singleton_method("new_opt_u64", function!(RbSeries::new_opt_u64, 3))?;
929
+ class.define_singleton_method("new_opt_u128", function!(RbSeries::new_opt_u128, 3))?;
930
930
  class.define_singleton_method("new_opt_i8", function!(RbSeries::new_opt_i8, 3))?;
931
931
  class.define_singleton_method("new_opt_i16", function!(RbSeries::new_opt_i16, 3))?;
932
932
  class.define_singleton_method("new_opt_i32", function!(RbSeries::new_opt_i32, 3))?;
@@ -1402,14 +1402,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1402
1402
  "set_comm_subexpr_elim",
1403
1403
  method!(RbOptFlags::set_comm_subexpr_elim, 1),
1404
1404
  )?;
1405
- class.define_method(
1406
- "get_collapse_joins",
1407
- method!(RbOptFlags::get_collapse_joins, 0),
1408
- )?;
1409
- class.define_method(
1410
- "set_collapse_joins",
1411
- method!(RbOptFlags::set_collapse_joins, 1),
1412
- )?;
1413
1405
  class.define_method(
1414
1406
  "get_check_order_observe",
1415
1407
  method!(RbOptFlags::get_check_order_observe, 0),
@@ -1,5 +1,6 @@
1
1
  use crate::error::RbPolarsErr;
2
2
  use crate::prelude::*;
3
+ use crate::utils::to_rb_err;
3
4
  use crate::{RbResult, RbSeries};
4
5
  use magnus::{IntoValue, Ruby, Value};
5
6
 
@@ -58,12 +59,18 @@ impl RbSeries {
58
59
  .cast(&DataType::UInt8)
59
60
  .unwrap()
60
61
  .mean_reduce()
62
+ .map_err(to_rb_err)?
61
63
  .as_any_value(),
62
64
  )
63
65
  .into_value_with(ruby)),
64
66
  // For non-numeric output types we require mean_reduce.
65
67
  dt if dt.is_temporal() => Ok(Wrap(
66
- rb_self.series.borrow().mean_reduce().as_any_value(),
68
+ rb_self
69
+ .series
70
+ .borrow()
71
+ .mean_reduce()
72
+ .map_err(to_rb_err)?
73
+ .as_any_value(),
67
74
  )
68
75
  .into_value_with(ruby)),
69
76
  _ => Ok(rb_self.series.borrow().mean().into_value_with(ruby)),
@@ -75,6 +75,7 @@ init_method_opt!(new_opt_u8, UInt8Type, u8);
75
75
  init_method_opt!(new_opt_u16, UInt16Type, u16);
76
76
  init_method_opt!(new_opt_u32, UInt32Type, u32);
77
77
  init_method_opt!(new_opt_u64, UInt64Type, u64);
78
+ init_method_opt!(new_opt_u128, UInt128Type, u128);
78
79
  init_method_opt!(new_opt_i8, Int8Type, i8);
79
80
  init_method_opt!(new_opt_i16, Int16Type, i16);
80
81
  init_method_opt!(new_opt_i32, Int32Type, i32);
@@ -18,6 +18,7 @@ impl RbSeries {
18
18
  DataType::UInt16 => ruby.ary_from_iter(series.u16().unwrap()).as_value(),
19
19
  DataType::UInt32 => ruby.ary_from_iter(series.u32().unwrap()).as_value(),
20
20
  DataType::UInt64 => ruby.ary_from_iter(series.u64().unwrap()).as_value(),
21
+ DataType::UInt128 => ruby.ary_from_iter(series.u128().unwrap()).as_value(),
21
22
  DataType::Int8 => ruby.ary_from_iter(series.i8().unwrap()).as_value(),
22
23
  DataType::Int16 => ruby.ary_from_iter(series.i16().unwrap()).as_value(),
23
24
  DataType::Int32 => ruby.ary_from_iter(series.i32().unwrap()).as_value(),
@@ -340,7 +340,6 @@ impl RbSeries {
340
340
  let s = self.series.borrow();
341
341
  let ca = s.str().map_err(RbPolarsErr::from)?;
342
342
  ca.to_decimal_infer(inference_length)
343
- .map(Series::from)
344
343
  .map(Into::into)
345
344
  .map_err(RbPolarsErr::from)
346
345
  .map_err(RbErr::from)