polars-df 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.13.0"
3
+ version = "0.14.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -15,14 +15,13 @@ ahash = "0.8"
15
15
  chrono = "0.4"
16
16
  either = "1.8"
17
17
  magnus = "0.7"
18
- polars-core = "=0.42.0"
19
- polars-parquet = "=0.42.0"
20
- polars-utils = "=0.42.0"
18
+ polars-core = "=0.43.1"
19
+ polars-parquet = "=0.43.1"
20
+ polars-utils = "=0.43.1"
21
21
  serde_json = "1"
22
- smartstring = "1"
23
22
 
24
23
  [dependencies.polars]
25
- version = "=0.42.0"
24
+ version = "=0.43.1"
26
25
  features = [
27
26
  "abs",
28
27
  "approx_unique",
@@ -54,7 +54,7 @@ impl RbBatchedCsv {
54
54
  let null_values = null_values.map(|w| w.0);
55
55
  let eol_char = eol_char.as_bytes()[0];
56
56
  let row_index = row_index.map(|(name, offset)| RowIndex {
57
- name: Arc::from(name.as_str()),
57
+ name: name.into(),
58
58
  offset,
59
59
  });
60
60
  let quote_char = if let Some(s) = quote_char {
@@ -72,7 +72,7 @@ impl RbBatchedCsv {
72
72
  .iter()
73
73
  .map(|(name, dtype)| {
74
74
  let dtype = dtype.0.clone();
75
- Field::new(name, dtype)
75
+ Field::new((&**name).into(), dtype)
76
76
  })
77
77
  .collect::<Schema>()
78
78
  });
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
95
95
  .with_projection(projection.map(Arc::new))
96
96
  .with_rechunk(rechunk)
97
97
  .with_chunk_size(chunk_size)
98
- .with_columns(columns.map(Arc::from))
98
+ .with_columns(columns.map(|x| x.into_iter().map(PlSmallStr::from_string).collect()))
99
99
  .with_n_threads(n_threads)
100
100
  .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
101
101
  .with_low_memory(low_memory)
@@ -51,7 +51,10 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
51
51
  AnyValue::Datetime(v, time_unit, time_zone) => {
52
52
  let time_unit = time_unit.to_ascii();
53
53
  utils()
54
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
54
+ .funcall(
55
+ "_to_ruby_datetime",
56
+ (v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
57
+ )
55
58
  .unwrap()
56
59
  }
57
60
  AnyValue::Duration(v, time_unit) => {
@@ -122,7 +125,10 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
122
125
  fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
123
126
  let v = RArray::from_value(ob).unwrap();
124
127
  if v.is_empty() {
125
- Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
128
+ Ok(AnyValue::List(Series::new_empty(
129
+ PlSmallStr::EMPTY,
130
+ &DataType::Null,
131
+ )))
126
132
  } else {
127
133
  let list = v;
128
134
 
@@ -142,7 +148,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
142
148
  avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
143
149
  }
144
150
 
145
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
151
+ let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, true)
146
152
  .map_err(RbPolarsErr::from)?;
147
153
  Ok(AnyValue::List(s))
148
154
  }
@@ -162,7 +168,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
162
168
  let key = String::try_convert(k)?;
163
169
  let val = Wrap::<AnyValue>::try_convert(v)?.0;
164
170
  let dtype = DataType::from(&val);
165
- keys.push(Field::new(&key, dtype));
171
+ keys.push(Field::new(key.into(), dtype));
166
172
  vals.push(val);
167
173
  Ok(ForEach::Continue)
168
174
  })?;
@@ -9,7 +9,7 @@ use crate::RbResult;
9
9
  impl TryConvert for Wrap<StringChunked> {
10
10
  fn try_convert(obj: Value) -> RbResult<Self> {
11
11
  let (seq, len) = get_rbseq(obj)?;
12
- let mut builder = StringChunkedBuilder::new("", len);
12
+ let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, len);
13
13
 
14
14
  for res in seq.into_iter() {
15
15
  let item = res;
@@ -25,7 +25,7 @@ impl TryConvert for Wrap<StringChunked> {
25
25
  impl TryConvert for Wrap<BinaryChunked> {
26
26
  fn try_convert(obj: Value) -> RbResult<Self> {
27
27
  let (seq, len) = get_rbseq(obj)?;
28
- let mut builder = BinaryChunkedBuilder::new("", len);
28
+ let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, len);
29
29
 
30
30
  for res in seq.into_iter() {
31
31
  let item = res;
@@ -90,7 +90,7 @@ impl IntoValue for Wrap<&DatetimeChunked> {
90
90
  fn into_value_with(self, _: &Ruby) -> Value {
91
91
  let utils = utils();
92
92
  let time_unit = Wrap(self.0.time_unit()).into_value();
93
- let time_zone = self.0.time_zone().clone().into_value();
93
+ let time_zone = self.0.time_zone().as_deref().map(|v| v.into_value());
94
94
  let iter = self.0.into_iter().map(|opt_v| {
95
95
  opt_v.map(|v| {
96
96
  utils
@@ -20,7 +20,6 @@ use polars::series::ops::NullBehavior;
20
20
  use polars_core::utils::arrow::array::Array;
21
21
  use polars_core::utils::materialize_dyn_int;
22
22
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
- use smartstring::alias::String as SmartString;
24
23
 
25
24
  use crate::object::OBJECT_NAME;
26
25
  use crate::rb_modules::series;
@@ -84,14 +83,26 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
84
83
  .unwrap()
85
84
  }
86
85
 
86
+ impl TryConvert for Wrap<PlSmallStr> {
87
+ fn try_convert(ob: Value) -> RbResult<Self> {
88
+ Ok(Wrap((&*String::try_convert(ob)?).into()))
89
+ }
90
+ }
91
+
87
92
  impl TryConvert for Wrap<NullValues> {
88
93
  fn try_convert(ob: Value) -> RbResult<Self> {
89
94
  if let Ok(s) = String::try_convert(ob) {
90
- Ok(Wrap(NullValues::AllColumnsSingle(s)))
95
+ Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
91
96
  } else if let Ok(s) = Vec::<String>::try_convert(ob) {
92
- Ok(Wrap(NullValues::AllColumns(s)))
97
+ Ok(Wrap(NullValues::AllColumns(
98
+ s.into_iter().map(|x| (&*x).into()).collect(),
99
+ )))
93
100
  } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
94
- Ok(Wrap(NullValues::Named(s)))
101
+ Ok(Wrap(NullValues::Named(
102
+ s.into_iter()
103
+ .map(|(a, b)| ((&*a).into(), (&*b).into()))
104
+ .collect(),
105
+ )))
95
106
  } else {
96
107
  Err(RbPolarsErr::other(
97
108
  "could not extract value from null_values argument".into(),
@@ -189,7 +200,7 @@ impl IntoValue for Wrap<DataType> {
189
200
  DataType::Datetime(tu, tz) => {
190
201
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
191
202
  datetime_class
192
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
203
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
193
204
  .unwrap()
194
205
  }
195
206
  DataType::Duration(tu) => {
@@ -210,7 +221,9 @@ impl IntoValue for Wrap<DataType> {
210
221
  // we should always have an initialized rev_map coming from rust
211
222
  let categories = rev_map.as_ref().unwrap().get_categories();
212
223
  let class = pl.const_get::<_, Value>("Enum").unwrap();
213
- let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
224
+ let s =
225
+ Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
226
+ .unwrap();
214
227
  let series = to_series(s.into());
215
228
  class.funcall::<_, _, Value>("new", (series,)).unwrap()
216
229
  }
@@ -222,7 +235,7 @@ impl IntoValue for Wrap<DataType> {
222
235
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
223
236
  let iter = fields.iter().map(|fld| {
224
237
  let name = fld.name().as_str();
225
- let dtype = Wrap(fld.data_type().clone());
238
+ let dtype = Wrap(fld.dtype().clone());
226
239
  field_class
227
240
  .funcall::<_, _, Value>("new", (name, dtype))
228
241
  .unwrap()
@@ -276,7 +289,7 @@ impl TryConvert for Wrap<Field> {
276
289
  fn try_convert(ob: Value) -> RbResult<Self> {
277
290
  let name: String = ob.funcall("name", ())?;
278
291
  let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
279
- Ok(Wrap(Field::new(&name, dtype.0)))
292
+ Ok(Wrap(Field::new((&*name).into(), dtype.0)))
280
293
  }
281
294
  }
282
295
 
@@ -341,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
341
354
  let s = get_series(categories)?;
342
355
  let ca = s.str().map_err(RbPolarsErr::from)?;
343
356
  let categories = ca.downcast_iter().next().unwrap().clone();
344
- create_enum_data_type(categories)
357
+ create_enum_dtype(categories)
345
358
  }
346
359
  "Polars::Date" => DataType::Date,
347
360
  "Polars::Time" => DataType::Time,
@@ -357,8 +370,8 @@ impl TryConvert for Wrap<DataType> {
357
370
  "Polars::Datetime" => {
358
371
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
359
372
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
360
- let time_zone = ob.funcall("time_zone", ())?;
361
- DataType::Datetime(time_unit, time_zone)
373
+ let time_zone: Option<String> = ob.funcall("time_zone", ())?;
374
+ DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
362
375
  }
363
376
  "Polars::Decimal" => {
364
377
  let precision = ob.funcall("precision", ())?;
@@ -463,7 +476,7 @@ impl TryConvert for Wrap<Schema> {
463
476
 
464
477
  let mut schema = Vec::new();
465
478
  dict.foreach(|key: String, val: Wrap<DataType>| {
466
- schema.push(Ok(Field::new(&key, val.0)));
479
+ schema.push(Ok(Field::new((&*key).into(), val.0)));
467
480
  Ok(ForEach::Continue)
468
481
  })
469
482
  .unwrap();
@@ -1053,14 +1066,6 @@ pub fn parse_parquet_compression(
1053
1066
  Ok(parsed)
1054
1067
  }
1055
1068
 
1056
- pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1057
- where
1058
- I: IntoIterator<Item = S>,
1059
- S: AsRef<str>,
1060
- {
1061
- container.into_iter().map(|s| s.as_ref().into()).collect()
1062
- }
1063
-
1064
1069
  impl TryConvert for Wrap<NonZeroUsize> {
1065
1070
  fn try_convert(ob: Value) -> RbResult<Self> {
1066
1071
  let v = usize::try_convert(ob)?;
@@ -1069,3 +1074,14 @@ impl TryConvert for Wrap<NonZeroUsize> {
1069
1074
  .ok_or(RbValueError::new_err("must be non-zero".into()))
1070
1075
  }
1071
1076
  }
1077
+
1078
+ pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1079
+ where
1080
+ I: IntoIterator<Item = S>,
1081
+ S: AsRef<str>,
1082
+ {
1083
+ container
1084
+ .into_iter()
1085
+ .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
+ .collect()
1087
+ }
@@ -79,7 +79,7 @@ fn update_schema_from_rows(
79
79
  rows: &[Row],
80
80
  infer_schema_length: Option<usize>,
81
81
  ) -> RbResult<()> {
82
- let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
82
+ let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
83
83
  if schema_is_complete {
84
84
  return Ok(());
85
85
  }
@@ -89,7 +89,7 @@ fn update_schema_from_rows(
89
89
  rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
90
90
  let inferred_dtypes_slice = inferred_dtypes.as_slice();
91
91
 
92
- for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
92
+ for (i, dtype) in schema.iter_values_mut().enumerate() {
93
93
  if !dtype.is_known() {
94
94
  *dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
95
95
  polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
@@ -110,7 +110,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
110
110
  }
111
111
 
112
112
  fn erase_decimal_precision_scale(schema: &mut Schema) {
113
- for dtype in schema.iter_dtypes_mut() {
113
+ for dtype in schema.iter_values_mut() {
114
114
  if let DataType::Decimal(_, _) = dtype {
115
115
  *dtype = DataType::Decimal(None, None)
116
116
  }
@@ -123,7 +123,7 @@ where
123
123
  {
124
124
  let fields = column_names
125
125
  .into_iter()
126
- .map(|c| Field::new(c, DataType::Unknown(Default::default())));
126
+ .map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
127
127
  Schema::from_iter(fields)
128
128
  }
129
129
 
@@ -9,6 +9,7 @@ use crate::map::dataframe::{
9
9
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
+ use crate::prelude::strings_to_pl_smallstr;
12
13
  use crate::series::{to_rbseries_collection, to_series_collection};
13
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
14
15
 
@@ -254,7 +255,7 @@ impl RbDataFrame {
254
255
  }
255
256
 
256
257
  pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
257
- let indices = IdxCa::from_vec("", indices);
258
+ let indices = IdxCa::from_vec("".into(), indices);
258
259
  let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
259
260
  Ok(RbDataFrame::new(df))
260
261
  }
@@ -332,7 +333,7 @@ impl RbDataFrame {
332
333
  let df = self
333
334
  .df
334
335
  .borrow()
335
- .with_row_index(&name, offset)
336
+ .with_row_index(name.into(), offset)
336
337
  .map_err(RbPolarsErr::from)?;
337
338
  Ok(df.into())
338
339
  }
@@ -349,8 +350,8 @@ impl RbDataFrame {
349
350
  variable_name: Option<String>,
350
351
  ) -> RbResult<Self> {
351
352
  let args = UnpivotArgsIR {
352
- on: strings_to_smartstrings(on),
353
- index: strings_to_smartstrings(index),
353
+ on: strings_to_pl_smallstr(on),
354
+ index: strings_to_pl_smallstr(index),
354
355
  value_name: value_name.map(|s| s.into()),
355
356
  variable_name: variable_name.map(|s| s.into()),
356
357
  };
@@ -581,7 +582,7 @@ impl RbDataFrame {
581
582
  }
582
583
 
583
584
  pub fn to_struct(&self, name: String) -> RbSeries {
584
- let s = self.df.borrow().clone().into_struct(&name);
585
+ let s = self.df.borrow().clone().into_struct(name.into());
585
586
  s.into_series().into()
586
587
  }
587
588
 
@@ -50,7 +50,7 @@ impl RbDataFrame {
50
50
  let null_values = null_values.map(|w| w.0);
51
51
  let eol_char = eol_char.as_bytes()[0];
52
52
  let row_index = row_index.map(|(name, offset)| RowIndex {
53
- name: Arc::from(name.as_str()),
53
+ name: name.into(),
54
54
  offset,
55
55
  });
56
56
  let quote_char = if let Some(s) = quote_char {
@@ -68,7 +68,7 @@ impl RbDataFrame {
68
68
  .iter()
69
69
  .map(|(name, dtype)| {
70
70
  let dtype = dtype.0.clone();
71
- Field::new(name, dtype)
71
+ Field::new((&**name).into(), dtype)
72
72
  })
73
73
  .collect::<Schema>()
74
74
  });
@@ -91,7 +91,7 @@ impl RbDataFrame {
91
91
  .with_projection(projection.map(Arc::new))
92
92
  .with_rechunk(rechunk)
93
93
  .with_chunk_size(chunk_size)
94
- .with_columns(columns.map(Arc::from))
94
+ .with_columns(columns.map(|x| x.into_iter().map(|x| x.into()).collect()))
95
95
  .with_n_threads(n_threads)
96
96
  .with_schema_overwrite(overwrite_dtype.map(Arc::new))
97
97
  .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
@@ -135,7 +135,7 @@ impl RbDataFrame {
135
135
  use EitherRustRubyFile::*;
136
136
 
137
137
  let row_index = row_index.map(|(name, offset)| RowIndex {
138
- name: Arc::from(name.as_str()),
138
+ name: name.into(),
139
139
  offset,
140
140
  });
141
141
  let result = match get_either_file(rb_f, false)? {
@@ -225,7 +225,7 @@ impl RbDataFrame {
225
225
  _memory_map: bool,
226
226
  ) -> RbResult<Self> {
227
227
  let row_index = row_index.map(|(name, offset)| RowIndex {
228
- name: Arc::from(name.as_str()),
228
+ name: name.into(),
229
229
  offset,
230
230
  });
231
231
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
@@ -252,7 +252,7 @@ impl RbDataFrame {
252
252
  rechunk: bool,
253
253
  ) -> RbResult<Self> {
254
254
  let row_index = row_index.map(|(name, offset)| RowIndex {
255
- name: Arc::from(name.as_str()),
255
+ name: name.into(),
256
256
  offset,
257
257
  });
258
258
  // rb_f = read_if_bytesio(rb_f);
@@ -29,8 +29,12 @@ impl RbExpr {
29
29
  self.inner.clone().dt().with_time_unit(tu.0).into()
30
30
  }
31
31
 
32
- pub fn dt_convert_time_zone(&self, tz: TimeZone) -> Self {
33
- self.inner.clone().dt().convert_time_zone(tz).into()
32
+ pub fn dt_convert_time_zone(&self, time_zone: String) -> Self {
33
+ self.inner
34
+ .clone()
35
+ .dt()
36
+ .convert_time_zone(time_zone.into())
37
+ .into()
34
38
  }
35
39
 
36
40
  pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
@@ -46,7 +50,11 @@ impl RbExpr {
46
50
  self.inner
47
51
  .clone()
48
52
  .dt()
49
- .replace_time_zone(time_zone, ambiguous.inner.clone(), non_existent.0)
53
+ .replace_time_zone(
54
+ time_zone.map(|x| x.into()),
55
+ ambiguous.inner.clone(),
56
+ non_existent.0,
57
+ )
50
58
  .into()
51
59
  }
52
60
 
@@ -242,7 +242,7 @@ impl RbExpr {
242
242
  pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
243
243
  self.inner
244
244
  .clone()
245
- .value_counts(sort, parallel, name, normalize)
245
+ .value_counts(sort, parallel, name.as_str(), normalize)
246
246
  .into()
247
247
  }
248
248
 
@@ -1,5 +1,6 @@
1
1
  use magnus::{block::Proc, value::Opaque, Ruby};
2
2
  use polars::prelude::*;
3
+ use polars_utils::format_pl_smallstr;
3
4
 
4
5
  use crate::RbExpr;
5
6
 
@@ -15,9 +16,9 @@ impl RbExpr {
15
16
  .name()
16
17
  .map(move |name| {
17
18
  let lambda = Ruby::get().unwrap().get_inner(lambda);
18
- let out = lambda.call::<_, String>((name,));
19
+ let out = lambda.call::<_, String>((name.as_str(),));
19
20
  match out {
20
- Ok(out) => Ok(out),
21
+ Ok(out) => Ok(format_pl_smallstr!("{}", out)),
21
22
  Err(e) => Err(PolarsError::ComputeError(
22
23
  format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
24
  )),
@@ -19,6 +19,8 @@ impl RbExpr {
19
19
  exact: bool,
20
20
  cache: bool,
21
21
  ) -> Self {
22
+ let format = format.map(|x| x.into());
23
+
22
24
  let options = StrptimeOptions {
23
25
  format,
24
26
  strict,
@@ -33,12 +35,15 @@ impl RbExpr {
33
35
  &self,
34
36
  format: Option<String>,
35
37
  time_unit: Option<Wrap<TimeUnit>>,
36
- time_zone: Option<TimeZone>,
38
+ time_zone: Option<Wrap<TimeZone>>,
37
39
  strict: bool,
38
40
  exact: bool,
39
41
  cache: bool,
40
42
  ambiguous: &Self,
41
43
  ) -> Self {
44
+ let format = format.map(|x| x.into());
45
+ let time_zone = time_zone.map(|x| x.0);
46
+
42
47
  let options = StrptimeOptions {
43
48
  format,
44
49
  strict,
@@ -58,6 +63,8 @@ impl RbExpr {
58
63
  }
59
64
 
60
65
  pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
66
+ let format = format.map(|x| x.into());
67
+
61
68
  let options = StrptimeOptions {
62
69
  format,
63
70
  strict,
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
11
11
  let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
12
12
 
13
13
  let dict = RHash::new();
14
- for field in &metadata.schema.fields {
15
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
16
- dict.aset(field.name.clone(), dt)?;
14
+ for field in metadata.schema.iter_values() {
15
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
+ dict.aset(field.name.as_str(), dt)?;
17
17
  }
18
18
  Ok(dict)
19
19
  }
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
26
26
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
27
 
28
28
  let dict = RHash::new();
29
- for field in arrow_schema.fields {
30
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
31
- dict.aset(field.name, dt)?;
29
+ for field in arrow_schema.iter_values() {
30
+ let dt: Wrap<DataType> = Wrap((&field.dtype).into());
31
+ dict.aset(field.name.as_str(), dt)?;
32
32
  }
33
33
  Ok(dict)
34
34
  }
@@ -56,13 +56,14 @@ pub fn datetime_range(
56
56
  every: String,
57
57
  closed: Wrap<ClosedWindow>,
58
58
  time_unit: Option<Wrap<TimeUnit>>,
59
- time_zone: Option<TimeZone>,
59
+ time_zone: Option<Wrap<TimeZone>>,
60
60
  ) -> RbExpr {
61
61
  let start = start.inner.clone();
62
62
  let end = end.inner.clone();
63
63
  let every = Duration::parse(&every);
64
64
  let closed = closed.0;
65
65
  let time_unit = time_unit.map(|x| x.0);
66
+ let time_zone = time_zone.map(|x| x.0);
66
67
  dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
67
68
  }
68
69
 
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
72
73
  every: String,
73
74
  closed: Wrap<ClosedWindow>,
74
75
  time_unit: Option<Wrap<TimeUnit>>,
75
- time_zone: Option<TimeZone>,
76
+ time_zone: Option<Wrap<TimeZone>>,
76
77
  ) -> RbExpr {
77
78
  let start = start.inner.clone();
78
79
  let end = end.inner.clone();
79
80
  let every = Duration::parse(&every);
80
81
  let closed = closed.0;
81
82
  let time_unit = time_unit.map(|x| x.0);
83
+ let time_zone = time_zone.map(|x| x.0);
82
84
  dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
83
85
  }
84
86
 
@@ -59,7 +59,7 @@ impl RbLazyFrame {
59
59
  ) -> RbResult<Self> {
60
60
  let batch_size = batch_size.map(|v| v.0);
61
61
  let row_index = row_index.map(|(name, offset)| RowIndex {
62
- name: Arc::from(name.as_str()),
62
+ name: name.into(),
63
63
  offset,
64
64
  });
65
65
 
@@ -106,14 +106,14 @@ impl RbLazyFrame {
106
106
  let separator = separator.as_bytes()[0];
107
107
  let eol_char = eol_char.as_bytes()[0];
108
108
  let row_index = row_index.map(|(name, offset)| RowIndex {
109
- name: Arc::from(name.as_str()),
109
+ name: name.into(),
110
110
  offset,
111
111
  });
112
112
 
113
113
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
114
114
  overwrite_dtype
115
115
  .into_iter()
116
- .map(|(name, dtype)| Field::new(&name, dtype.0))
116
+ .map(|(name, dtype)| Field::new((&*name).into(), dtype.0))
117
117
  .collect::<Schema>()
118
118
  });
119
119
 
@@ -128,7 +128,7 @@ impl RbLazyFrame {
128
128
  .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
129
129
  // TODO add with_schema
130
130
  .with_low_memory(low_memory)
131
- .with_comment_prefix(comment_prefix.as_deref())
131
+ .with_comment_prefix(comment_prefix.map(|x| x.into()))
132
132
  .with_quote_char(quote_char)
133
133
  .with_eol_char(eol_char)
134
134
  .with_rechunk(rechunk)
@@ -176,7 +176,7 @@ impl RbLazyFrame {
176
176
  };
177
177
 
178
178
  let row_index = row_index.map(|(name, offset)| RowIndex {
179
- name: Arc::from(name.as_str()),
179
+ name: name.into(),
180
180
  offset,
181
181
  });
182
182
  let hive_options = HiveOptions {
@@ -197,7 +197,7 @@ impl RbLazyFrame {
197
197
  use_statistics,
198
198
  hive_options,
199
199
  glob,
200
- include_file_paths: include_file_paths.map(Arc::from),
200
+ include_file_paths: include_file_paths.map(|x| x.into()),
201
201
  };
202
202
 
203
203
  let lf = if path.is_some() {
@@ -216,14 +216,13 @@ impl RbLazyFrame {
216
216
  cache: bool,
217
217
  rechunk: bool,
218
218
  row_index: Option<(String, IdxSize)>,
219
- memory_map: bool,
220
219
  hive_partitioning: Option<bool>,
221
220
  hive_schema: Option<Wrap<Schema>>,
222
221
  try_parse_hive_dates: bool,
223
222
  include_file_paths: Option<String>,
224
223
  ) -> RbResult<Self> {
225
224
  let row_index = row_index.map(|(name, offset)| RowIndex {
226
- name: Arc::from(name.as_str()),
225
+ name: name.into(),
227
226
  offset,
228
227
  });
229
228
 
@@ -239,10 +238,9 @@ impl RbLazyFrame {
239
238
  cache,
240
239
  rechunk,
241
240
  row_index,
242
- memory_map,
243
241
  cloud_options: None,
244
242
  hive_options,
245
- include_file_paths: include_file_paths.map(Arc::from),
243
+ include_file_paths: include_file_paths.map(|x| x.into()),
246
244
  };
247
245
  let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
248
246
  Ok(lf.into())
@@ -593,8 +591,8 @@ impl RbLazyFrame {
593
591
  .force_parallel(force_parallel)
594
592
  .how(JoinType::AsOf(AsOfOptions {
595
593
  strategy: strategy.0,
596
- left_by: left_by.map(strings_to_smartstrings),
597
- right_by: right_by.map(strings_to_smartstrings),
594
+ left_by: left_by.map(strings_to_pl_smallstr),
595
+ right_by: right_by.map(strings_to_pl_smallstr),
598
596
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
599
597
  tolerance_str: tolerance_str.map(|s| s.into()),
600
598
  }))
@@ -744,8 +742,8 @@ impl RbLazyFrame {
744
742
  ) -> RbResult<Self> {
745
743
  let ldf = self.ldf.borrow().clone();
746
744
  Ok(match maintain_order {
747
- true => ldf.unique_stable(subset, keep.0),
748
- false => ldf.unique(subset, keep.0),
745
+ true => ldf.unique_stable_generic(subset, keep.0),
746
+ false => ldf.unique_generic(subset, keep.0),
749
747
  }
750
748
  .into())
751
749
  }
@@ -805,7 +803,11 @@ impl RbLazyFrame {
805
803
  }
806
804
 
807
805
  pub fn collect_schema(&self) -> RbResult<RHash> {
808
- let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
806
+ let schema = self
807
+ .ldf
808
+ .borrow_mut()
809
+ .collect_schema()
810
+ .map_err(RbPolarsErr::from)?;
809
811
 
810
812
  let schema_dict = RHash::new();
811
813
  schema.iter_fields().for_each(|fld| {
@@ -813,7 +815,7 @@ impl RbLazyFrame {
813
815
  schema_dict
814
816
  .aset::<String, Value>(
815
817
  fld.name().to_string(),
816
- Wrap(fld.data_type().clone()).into_value(),
818
+ Wrap(fld.dtype().clone()).into_value(),
817
819
  )
818
820
  .unwrap();
819
821
  });
@@ -711,7 +711,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
711
711
  "new_from_parquet",
712
712
  function!(RbLazyFrame::new_from_parquet, 14),
713
713
  )?;
714
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
714
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
715
715
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
716
716
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
717
717
  class.define_method(