polars-df 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.13.0"
3
+ version = "0.14.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -15,14 +15,13 @@ ahash = "0.8"
15
15
  chrono = "0.4"
16
16
  either = "1.8"
17
17
  magnus = "0.7"
18
- polars-core = "=0.42.0"
19
- polars-parquet = "=0.42.0"
20
- polars-utils = "=0.42.0"
18
+ polars-core = "=0.43.1"
19
+ polars-parquet = "=0.43.1"
20
+ polars-utils = "=0.43.1"
21
21
  serde_json = "1"
22
- smartstring = "1"
23
22
 
24
23
  [dependencies.polars]
25
- version = "=0.42.0"
24
+ version = "=0.43.1"
26
25
  features = [
27
26
  "abs",
28
27
  "approx_unique",
@@ -54,7 +54,7 @@ impl RbBatchedCsv {
54
54
  let null_values = null_values.map(|w| w.0);
55
55
  let eol_char = eol_char.as_bytes()[0];
56
56
  let row_index = row_index.map(|(name, offset)| RowIndex {
57
- name: Arc::from(name.as_str()),
57
+ name: name.into(),
58
58
  offset,
59
59
  });
60
60
  let quote_char = if let Some(s) = quote_char {
@@ -72,7 +72,7 @@ impl RbBatchedCsv {
72
72
  .iter()
73
73
  .map(|(name, dtype)| {
74
74
  let dtype = dtype.0.clone();
75
- Field::new(name, dtype)
75
+ Field::new((&**name).into(), dtype)
76
76
  })
77
77
  .collect::<Schema>()
78
78
  });
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
95
95
  .with_projection(projection.map(Arc::new))
96
96
  .with_rechunk(rechunk)
97
97
  .with_chunk_size(chunk_size)
98
- .with_columns(columns.map(Arc::from))
98
+ .with_columns(columns.map(|x| x.into_iter().map(PlSmallStr::from_string).collect()))
99
99
  .with_n_threads(n_threads)
100
100
  .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
101
101
  .with_low_memory(low_memory)
@@ -51,7 +51,10 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
51
51
  AnyValue::Datetime(v, time_unit, time_zone) => {
52
52
  let time_unit = time_unit.to_ascii();
53
53
  utils()
54
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
54
+ .funcall(
55
+ "_to_ruby_datetime",
56
+ (v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
57
+ )
55
58
  .unwrap()
56
59
  }
57
60
  AnyValue::Duration(v, time_unit) => {
@@ -122,7 +125,10 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
122
125
  fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
123
126
  let v = RArray::from_value(ob).unwrap();
124
127
  if v.is_empty() {
125
- Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
128
+ Ok(AnyValue::List(Series::new_empty(
129
+ PlSmallStr::EMPTY,
130
+ &DataType::Null,
131
+ )))
126
132
  } else {
127
133
  let list = v;
128
134
 
@@ -142,7 +148,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
142
148
  avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
143
149
  }
144
150
 
145
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
151
+ let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, true)
146
152
  .map_err(RbPolarsErr::from)?;
147
153
  Ok(AnyValue::List(s))
148
154
  }
@@ -162,7 +168,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
162
168
  let key = String::try_convert(k)?;
163
169
  let val = Wrap::<AnyValue>::try_convert(v)?.0;
164
170
  let dtype = DataType::from(&val);
165
- keys.push(Field::new(&key, dtype));
171
+ keys.push(Field::new(key.into(), dtype));
166
172
  vals.push(val);
167
173
  Ok(ForEach::Continue)
168
174
  })?;
@@ -9,7 +9,7 @@ use crate::RbResult;
9
9
  impl TryConvert for Wrap<StringChunked> {
10
10
  fn try_convert(obj: Value) -> RbResult<Self> {
11
11
  let (seq, len) = get_rbseq(obj)?;
12
- let mut builder = StringChunkedBuilder::new("", len);
12
+ let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, len);
13
13
 
14
14
  for res in seq.into_iter() {
15
15
  let item = res;
@@ -25,7 +25,7 @@ impl TryConvert for Wrap<StringChunked> {
25
25
  impl TryConvert for Wrap<BinaryChunked> {
26
26
  fn try_convert(obj: Value) -> RbResult<Self> {
27
27
  let (seq, len) = get_rbseq(obj)?;
28
- let mut builder = BinaryChunkedBuilder::new("", len);
28
+ let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, len);
29
29
 
30
30
  for res in seq.into_iter() {
31
31
  let item = res;
@@ -90,7 +90,7 @@ impl IntoValue for Wrap<&DatetimeChunked> {
90
90
  fn into_value_with(self, _: &Ruby) -> Value {
91
91
  let utils = utils();
92
92
  let time_unit = Wrap(self.0.time_unit()).into_value();
93
- let time_zone = self.0.time_zone().clone().into_value();
93
+ let time_zone = self.0.time_zone().as_deref().map(|v| v.into_value());
94
94
  let iter = self.0.into_iter().map(|opt_v| {
95
95
  opt_v.map(|v| {
96
96
  utils
@@ -20,7 +20,6 @@ use polars::series::ops::NullBehavior;
20
20
  use polars_core::utils::arrow::array::Array;
21
21
  use polars_core::utils::materialize_dyn_int;
22
22
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
- use smartstring::alias::String as SmartString;
24
23
 
25
24
  use crate::object::OBJECT_NAME;
26
25
  use crate::rb_modules::series;
@@ -84,14 +83,26 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
84
83
  .unwrap()
85
84
  }
86
85
 
86
+ impl TryConvert for Wrap<PlSmallStr> {
87
+ fn try_convert(ob: Value) -> RbResult<Self> {
88
+ Ok(Wrap((&*String::try_convert(ob)?).into()))
89
+ }
90
+ }
91
+
87
92
  impl TryConvert for Wrap<NullValues> {
88
93
  fn try_convert(ob: Value) -> RbResult<Self> {
89
94
  if let Ok(s) = String::try_convert(ob) {
90
- Ok(Wrap(NullValues::AllColumnsSingle(s)))
95
+ Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
91
96
  } else if let Ok(s) = Vec::<String>::try_convert(ob) {
92
- Ok(Wrap(NullValues::AllColumns(s)))
97
+ Ok(Wrap(NullValues::AllColumns(
98
+ s.into_iter().map(|x| (&*x).into()).collect(),
99
+ )))
93
100
  } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
94
- Ok(Wrap(NullValues::Named(s)))
101
+ Ok(Wrap(NullValues::Named(
102
+ s.into_iter()
103
+ .map(|(a, b)| ((&*a).into(), (&*b).into()))
104
+ .collect(),
105
+ )))
95
106
  } else {
96
107
  Err(RbPolarsErr::other(
97
108
  "could not extract value from null_values argument".into(),
@@ -189,7 +200,7 @@ impl IntoValue for Wrap<DataType> {
189
200
  DataType::Datetime(tu, tz) => {
190
201
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
191
202
  datetime_class
192
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
203
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
193
204
  .unwrap()
194
205
  }
195
206
  DataType::Duration(tu) => {
@@ -210,7 +221,9 @@ impl IntoValue for Wrap<DataType> {
210
221
  // we should always have an initialized rev_map coming from rust
211
222
  let categories = rev_map.as_ref().unwrap().get_categories();
212
223
  let class = pl.const_get::<_, Value>("Enum").unwrap();
213
- let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
224
+ let s =
225
+ Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
226
+ .unwrap();
214
227
  let series = to_series(s.into());
215
228
  class.funcall::<_, _, Value>("new", (series,)).unwrap()
216
229
  }
@@ -222,7 +235,7 @@ impl IntoValue for Wrap<DataType> {
222
235
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
223
236
  let iter = fields.iter().map(|fld| {
224
237
  let name = fld.name().as_str();
225
- let dtype = Wrap(fld.data_type().clone());
238
+ let dtype = Wrap(fld.dtype().clone());
226
239
  field_class
227
240
  .funcall::<_, _, Value>("new", (name, dtype))
228
241
  .unwrap()
@@ -276,7 +289,7 @@ impl TryConvert for Wrap<Field> {
276
289
  fn try_convert(ob: Value) -> RbResult<Self> {
277
290
  let name: String = ob.funcall("name", ())?;
278
291
  let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
279
- Ok(Wrap(Field::new(&name, dtype.0)))
292
+ Ok(Wrap(Field::new((&*name).into(), dtype.0)))
280
293
  }
281
294
  }
282
295
 
@@ -341,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
341
354
  let s = get_series(categories)?;
342
355
  let ca = s.str().map_err(RbPolarsErr::from)?;
343
356
  let categories = ca.downcast_iter().next().unwrap().clone();
344
- create_enum_data_type(categories)
357
+ create_enum_dtype(categories)
345
358
  }
346
359
  "Polars::Date" => DataType::Date,
347
360
  "Polars::Time" => DataType::Time,
@@ -357,8 +370,8 @@ impl TryConvert for Wrap<DataType> {
357
370
  "Polars::Datetime" => {
358
371
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
359
372
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
360
- let time_zone = ob.funcall("time_zone", ())?;
361
- DataType::Datetime(time_unit, time_zone)
373
+ let time_zone: Option<String> = ob.funcall("time_zone", ())?;
374
+ DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
362
375
  }
363
376
  "Polars::Decimal" => {
364
377
  let precision = ob.funcall("precision", ())?;
@@ -463,7 +476,7 @@ impl TryConvert for Wrap<Schema> {
463
476
 
464
477
  let mut schema = Vec::new();
465
478
  dict.foreach(|key: String, val: Wrap<DataType>| {
466
- schema.push(Ok(Field::new(&key, val.0)));
479
+ schema.push(Ok(Field::new((&*key).into(), val.0)));
467
480
  Ok(ForEach::Continue)
468
481
  })
469
482
  .unwrap();
@@ -1053,14 +1066,6 @@ pub fn parse_parquet_compression(
1053
1066
  Ok(parsed)
1054
1067
  }
1055
1068
 
1056
- pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1057
- where
1058
- I: IntoIterator<Item = S>,
1059
- S: AsRef<str>,
1060
- {
1061
- container.into_iter().map(|s| s.as_ref().into()).collect()
1062
- }
1063
-
1064
1069
  impl TryConvert for Wrap<NonZeroUsize> {
1065
1070
  fn try_convert(ob: Value) -> RbResult<Self> {
1066
1071
  let v = usize::try_convert(ob)?;
@@ -1069,3 +1074,14 @@ impl TryConvert for Wrap<NonZeroUsize> {
1069
1074
  .ok_or(RbValueError::new_err("must be non-zero".into()))
1070
1075
  }
1071
1076
  }
1077
+
1078
+ pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1079
+ where
1080
+ I: IntoIterator<Item = S>,
1081
+ S: AsRef<str>,
1082
+ {
1083
+ container
1084
+ .into_iter()
1085
+ .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
+ .collect()
1087
+ }
@@ -79,7 +79,7 @@ fn update_schema_from_rows(
79
79
  rows: &[Row],
80
80
  infer_schema_length: Option<usize>,
81
81
  ) -> RbResult<()> {
82
- let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
82
+ let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
83
83
  if schema_is_complete {
84
84
  return Ok(());
85
85
  }
@@ -89,7 +89,7 @@ fn update_schema_from_rows(
89
89
  rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
90
90
  let inferred_dtypes_slice = inferred_dtypes.as_slice();
91
91
 
92
- for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
92
+ for (i, dtype) in schema.iter_values_mut().enumerate() {
93
93
  if !dtype.is_known() {
94
94
  *dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
95
95
  polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
@@ -110,7 +110,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
110
110
  }
111
111
 
112
112
  fn erase_decimal_precision_scale(schema: &mut Schema) {
113
- for dtype in schema.iter_dtypes_mut() {
113
+ for dtype in schema.iter_values_mut() {
114
114
  if let DataType::Decimal(_, _) = dtype {
115
115
  *dtype = DataType::Decimal(None, None)
116
116
  }
@@ -123,7 +123,7 @@ where
123
123
  {
124
124
  let fields = column_names
125
125
  .into_iter()
126
- .map(|c| Field::new(c, DataType::Unknown(Default::default())));
126
+ .map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
127
127
  Schema::from_iter(fields)
128
128
  }
129
129
 
@@ -9,6 +9,7 @@ use crate::map::dataframe::{
9
9
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
+ use crate::prelude::strings_to_pl_smallstr;
12
13
  use crate::series::{to_rbseries_collection, to_series_collection};
13
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
14
15
 
@@ -254,7 +255,7 @@ impl RbDataFrame {
254
255
  }
255
256
 
256
257
  pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
257
- let indices = IdxCa::from_vec("", indices);
258
+ let indices = IdxCa::from_vec("".into(), indices);
258
259
  let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
259
260
  Ok(RbDataFrame::new(df))
260
261
  }
@@ -332,7 +333,7 @@ impl RbDataFrame {
332
333
  let df = self
333
334
  .df
334
335
  .borrow()
335
- .with_row_index(&name, offset)
336
+ .with_row_index(name.into(), offset)
336
337
  .map_err(RbPolarsErr::from)?;
337
338
  Ok(df.into())
338
339
  }
@@ -349,8 +350,8 @@ impl RbDataFrame {
349
350
  variable_name: Option<String>,
350
351
  ) -> RbResult<Self> {
351
352
  let args = UnpivotArgsIR {
352
- on: strings_to_smartstrings(on),
353
- index: strings_to_smartstrings(index),
353
+ on: strings_to_pl_smallstr(on),
354
+ index: strings_to_pl_smallstr(index),
354
355
  value_name: value_name.map(|s| s.into()),
355
356
  variable_name: variable_name.map(|s| s.into()),
356
357
  };
@@ -581,7 +582,7 @@ impl RbDataFrame {
581
582
  }
582
583
 
583
584
  pub fn to_struct(&self, name: String) -> RbSeries {
584
- let s = self.df.borrow().clone().into_struct(&name);
585
+ let s = self.df.borrow().clone().into_struct(name.into());
585
586
  s.into_series().into()
586
587
  }
587
588
 
@@ -50,7 +50,7 @@ impl RbDataFrame {
50
50
  let null_values = null_values.map(|w| w.0);
51
51
  let eol_char = eol_char.as_bytes()[0];
52
52
  let row_index = row_index.map(|(name, offset)| RowIndex {
53
- name: Arc::from(name.as_str()),
53
+ name: name.into(),
54
54
  offset,
55
55
  });
56
56
  let quote_char = if let Some(s) = quote_char {
@@ -68,7 +68,7 @@ impl RbDataFrame {
68
68
  .iter()
69
69
  .map(|(name, dtype)| {
70
70
  let dtype = dtype.0.clone();
71
- Field::new(name, dtype)
71
+ Field::new((&**name).into(), dtype)
72
72
  })
73
73
  .collect::<Schema>()
74
74
  });
@@ -91,7 +91,7 @@ impl RbDataFrame {
91
91
  .with_projection(projection.map(Arc::new))
92
92
  .with_rechunk(rechunk)
93
93
  .with_chunk_size(chunk_size)
94
- .with_columns(columns.map(Arc::from))
94
+ .with_columns(columns.map(|x| x.into_iter().map(|x| x.into()).collect()))
95
95
  .with_n_threads(n_threads)
96
96
  .with_schema_overwrite(overwrite_dtype.map(Arc::new))
97
97
  .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
@@ -135,7 +135,7 @@ impl RbDataFrame {
135
135
  use EitherRustRubyFile::*;
136
136
 
137
137
  let row_index = row_index.map(|(name, offset)| RowIndex {
138
- name: Arc::from(name.as_str()),
138
+ name: name.into(),
139
139
  offset,
140
140
  });
141
141
  let result = match get_either_file(rb_f, false)? {
@@ -225,7 +225,7 @@ impl RbDataFrame {
225
225
  _memory_map: bool,
226
226
  ) -> RbResult<Self> {
227
227
  let row_index = row_index.map(|(name, offset)| RowIndex {
228
- name: Arc::from(name.as_str()),
228
+ name: name.into(),
229
229
  offset,
230
230
  });
231
231
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -252,7 +252,7 @@ impl RbDataFrame {
252
252
  rechunk: bool,
253
253
  ) -> RbResult<Self> {
254
254
  let row_index = row_index.map(|(name, offset)| RowIndex {
255
- name: Arc::from(name.as_str()),
255
+ name: name.into(),
256
256
  offset,
257
257
  });
258
258
  // rb_f = read_if_bytesio(rb_f);
@@ -29,8 +29,12 @@ impl RbExpr {
29
29
  self.inner.clone().dt().with_time_unit(tu.0).into()
30
30
  }
31
31
 
32
- pub fn dt_convert_time_zone(&self, tz: TimeZone) -> Self {
33
- self.inner.clone().dt().convert_time_zone(tz).into()
32
+ pub fn dt_convert_time_zone(&self, time_zone: String) -> Self {
33
+ self.inner
34
+ .clone()
35
+ .dt()
36
+ .convert_time_zone(time_zone.into())
37
+ .into()
34
38
  }
35
39
 
36
40
  pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
@@ -46,7 +50,11 @@ impl RbExpr {
46
50
  self.inner
47
51
  .clone()
48
52
  .dt()
49
- .replace_time_zone(time_zone, ambiguous.inner.clone(), non_existent.0)
53
+ .replace_time_zone(
54
+ time_zone.map(|x| x.into()),
55
+ ambiguous.inner.clone(),
56
+ non_existent.0,
57
+ )
50
58
  .into()
51
59
  }
52
60
 
@@ -242,7 +242,7 @@ impl RbExpr {
242
242
  pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
243
243
  self.inner
244
244
  .clone()
245
- .value_counts(sort, parallel, name, normalize)
245
+ .value_counts(sort, parallel, name.as_str(), normalize)
246
246
  .into()
247
247
  }
248
248
 
@@ -1,5 +1,6 @@
1
1
  use magnus::{block::Proc, value::Opaque, Ruby};
2
2
  use polars::prelude::*;
3
+ use polars_utils::format_pl_smallstr;
3
4
 
4
5
  use crate::RbExpr;
5
6
 
@@ -15,9 +16,9 @@ impl RbExpr {
15
16
  .name()
16
17
  .map(move |name| {
17
18
  let lambda = Ruby::get().unwrap().get_inner(lambda);
18
- let out = lambda.call::<_, String>((name,));
19
+ let out = lambda.call::<_, String>((name.as_str(),));
19
20
  match out {
20
- Ok(out) => Ok(out),
21
+ Ok(out) => Ok(format_pl_smallstr!("{}", out)),
21
22
  Err(e) => Err(PolarsError::ComputeError(
22
23
  format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
24
  )),
@@ -19,6 +19,8 @@ impl RbExpr {
19
19
  exact: bool,
20
20
  cache: bool,
21
21
  ) -> Self {
22
+ let format = format.map(|x| x.into());
23
+
22
24
  let options = StrptimeOptions {
23
25
  format,
24
26
  strict,
@@ -33,12 +35,15 @@ impl RbExpr {
33
35
  &self,
34
36
  format: Option<String>,
35
37
  time_unit: Option<Wrap<TimeUnit>>,
36
- time_zone: Option<TimeZone>,
38
+ time_zone: Option<Wrap<TimeZone>>,
37
39
  strict: bool,
38
40
  exact: bool,
39
41
  cache: bool,
40
42
  ambiguous: &Self,
41
43
  ) -> Self {
44
+ let format = format.map(|x| x.into());
45
+ let time_zone = time_zone.map(|x| x.0);
46
+
42
47
  let options = StrptimeOptions {
43
48
  format,
44
49
  strict,
@@ -58,6 +63,8 @@ impl RbExpr {
58
63
  }
59
64
 
60
65
  pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
66
+ let format = format.map(|x| x.into());
67
+
61
68
  let options = StrptimeOptions {
62
69
  format,
63
70
  strict,
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
11
11
  let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
12
12
 
13
13
  let dict = RHash::new();
14
- for field in &metadata.schema.fields {
15
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
16
- dict.aset(field.name.clone(), dt)?;
14
+ for field in metadata.schema.iter_values() {
15
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
+ dict.aset(field.name.as_str(), dt)?;
17
17
  }
18
18
  Ok(dict)
19
19
  }
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
26
26
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
27
 
28
28
  let dict = RHash::new();
29
- for field in arrow_schema.fields {
30
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
31
- dict.aset(field.name, dt)?;
29
+ for field in arrow_schema.iter_values() {
30
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
31
+ dict.aset(field.name.as_str(), dt)?;
32
32
  }
33
33
  Ok(dict)
34
34
  }
@@ -56,13 +56,14 @@ pub fn datetime_range(
56
56
  every: String,
57
57
  closed: Wrap<ClosedWindow>,
58
58
  time_unit: Option<Wrap<TimeUnit>>,
59
- time_zone: Option<TimeZone>,
59
+ time_zone: Option<Wrap<TimeZone>>,
60
60
  ) -> RbExpr {
61
61
  let start = start.inner.clone();
62
62
  let end = end.inner.clone();
63
63
  let every = Duration::parse(&every);
64
64
  let closed = closed.0;
65
65
  let time_unit = time_unit.map(|x| x.0);
66
+ let time_zone = time_zone.map(|x| x.0);
66
67
  dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
67
68
  }
68
69
 
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
72
73
  every: String,
73
74
  closed: Wrap<ClosedWindow>,
74
75
  time_unit: Option<Wrap<TimeUnit>>,
75
- time_zone: Option<TimeZone>,
76
+ time_zone: Option<Wrap<TimeZone>>,
76
77
  ) -> RbExpr {
77
78
  let start = start.inner.clone();
78
79
  let end = end.inner.clone();
79
80
  let every = Duration::parse(&every);
80
81
  let closed = closed.0;
81
82
  let time_unit = time_unit.map(|x| x.0);
83
+ let time_zone = time_zone.map(|x| x.0);
82
84
  dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
83
85
  }
84
86
 
@@ -59,7 +59,7 @@ impl RbLazyFrame {
59
59
  ) -> RbResult<Self> {
60
60
  let batch_size = batch_size.map(|v| v.0);
61
61
  let row_index = row_index.map(|(name, offset)| RowIndex {
62
- name: Arc::from(name.as_str()),
62
+ name: name.into(),
63
63
  offset,
64
64
  });
65
65
 
@@ -106,14 +106,14 @@ impl RbLazyFrame {
106
106
  let separator = separator.as_bytes()[0];
107
107
  let eol_char = eol_char.as_bytes()[0];
108
108
  let row_index = row_index.map(|(name, offset)| RowIndex {
109
- name: Arc::from(name.as_str()),
109
+ name: name.into(),
110
110
  offset,
111
111
  });
112
112
 
113
113
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
114
114
  overwrite_dtype
115
115
  .into_iter()
116
- .map(|(name, dtype)| Field::new(&name, dtype.0))
116
+ .map(|(name, dtype)| Field::new((&*name).into(), dtype.0))
117
117
  .collect::<Schema>()
118
118
  });
119
119
 
@@ -128,7 +128,7 @@ impl RbLazyFrame {
128
128
  .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
129
129
  // TODO add with_schema
130
130
  .with_low_memory(low_memory)
131
- .with_comment_prefix(comment_prefix.as_deref())
131
+ .with_comment_prefix(comment_prefix.map(|x| x.into()))
132
132
  .with_quote_char(quote_char)
133
133
  .with_eol_char(eol_char)
134
134
  .with_rechunk(rechunk)
@@ -176,7 +176,7 @@ impl RbLazyFrame {
176
176
  };
177
177
 
178
178
  let row_index = row_index.map(|(name, offset)| RowIndex {
179
- name: Arc::from(name.as_str()),
179
+ name: name.into(),
180
180
  offset,
181
181
  });
182
182
  let hive_options = HiveOptions {
@@ -197,7 +197,7 @@ impl RbLazyFrame {
197
197
  use_statistics,
198
198
  hive_options,
199
199
  glob,
200
- include_file_paths: include_file_paths.map(Arc::from),
200
+ include_file_paths: include_file_paths.map(|x| x.into()),
201
201
  };
202
202
 
203
203
  let lf = if path.is_some() {
@@ -216,14 +216,13 @@ impl RbLazyFrame {
216
216
  cache: bool,
217
217
  rechunk: bool,
218
218
  row_index: Option<(String, IdxSize)>,
219
- memory_map: bool,
220
219
  hive_partitioning: Option<bool>,
221
220
  hive_schema: Option<Wrap<Schema>>,
222
221
  try_parse_hive_dates: bool,
223
222
  include_file_paths: Option<String>,
224
223
  ) -> RbResult<Self> {
225
224
  let row_index = row_index.map(|(name, offset)| RowIndex {
226
- name: Arc::from(name.as_str()),
225
+ name: name.into(),
227
226
  offset,
228
227
  });
229
228
 
@@ -239,10 +238,9 @@ impl RbLazyFrame {
239
238
  cache,
240
239
  rechunk,
241
240
  row_index,
242
- memory_map,
243
241
  cloud_options: None,
244
242
  hive_options,
245
- include_file_paths: include_file_paths.map(Arc::from),
243
+ include_file_paths: include_file_paths.map(|x| x.into()),
246
244
  };
247
245
  let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
248
246
  Ok(lf.into())
@@ -593,8 +591,8 @@ impl RbLazyFrame {
593
591
  .force_parallel(force_parallel)
594
592
  .how(JoinType::AsOf(AsOfOptions {
595
593
  strategy: strategy.0,
596
- left_by: left_by.map(strings_to_smartstrings),
597
- right_by: right_by.map(strings_to_smartstrings),
594
+ left_by: left_by.map(strings_to_pl_smallstr),
595
+ right_by: right_by.map(strings_to_pl_smallstr),
598
596
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
599
597
  tolerance_str: tolerance_str.map(|s| s.into()),
600
598
  }))
@@ -744,8 +742,8 @@ impl RbLazyFrame {
744
742
  ) -> RbResult<Self> {
745
743
  let ldf = self.ldf.borrow().clone();
746
744
  Ok(match maintain_order {
747
- true => ldf.unique_stable(subset, keep.0),
748
- false => ldf.unique(subset, keep.0),
745
+ true => ldf.unique_stable_generic(subset, keep.0),
746
+ false => ldf.unique_generic(subset, keep.0),
749
747
  }
750
748
  .into())
751
749
  }
@@ -805,7 +803,11 @@ impl RbLazyFrame {
805
803
  }
806
804
 
807
805
  pub fn collect_schema(&self) -> RbResult<RHash> {
808
- let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
806
+ let schema = self
807
+ .ldf
808
+ .borrow_mut()
809
+ .collect_schema()
810
+ .map_err(RbPolarsErr::from)?;
809
811
 
810
812
  let schema_dict = RHash::new();
811
813
  schema.iter_fields().for_each(|fld| {
@@ -813,7 +815,7 @@ impl RbLazyFrame {
813
815
  schema_dict
814
816
  .aset::<String, Value>(
815
817
  fld.name().to_string(),
816
- Wrap(fld.data_type().clone()).into_value(),
818
+ Wrap(fld.dtype().clone()).into_value(),
817
819
  )
818
820
  .unwrap();
819
821
  });
@@ -711,7 +711,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
711
711
  "new_from_parquet",
712
712
  function!(RbLazyFrame::new_from_parquet, 14),
713
713
  )?;
714
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
714
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
715
715
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
716
716
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
717
717
  class.define_method(