polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,10 +1,11 @@
1
1
  use magnus::{prelude::*, RArray};
2
2
  use polars_core::prelude::*;
3
3
 
4
+ use crate::any_value::rb_object_to_any_value;
4
5
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
5
6
  use crate::prelude::ObjectValue;
6
7
  use crate::series::to_series_collection;
7
- use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
8
+ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
8
9
 
9
10
  impl RbSeries {
10
11
  pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
@@ -35,36 +36,28 @@ impl RbSeries {
35
36
  }
36
37
  }
37
38
 
38
- fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
39
+ fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
39
40
  where
40
41
  T: PolarsNumericType,
41
42
  ChunkedArray<T>: IntoSeries,
42
43
  T::Native: magnus::TryConvert,
43
44
  {
44
- let len = obj.len();
45
+ let len = values.len();
45
46
  let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
46
47
 
47
- unsafe {
48
- for item in obj.as_slice().iter() {
49
- if item.is_nil() {
50
- builder.append_null()
51
- } else {
52
- match T::Native::try_convert(*item) {
53
- Ok(val) => builder.append_value(val),
54
- Err(e) => {
55
- if strict {
56
- return Err(e);
57
- }
58
- builder.append_null()
59
- }
60
- }
61
- }
48
+ for res in values.into_iter() {
49
+ let value = res;
50
+ if value.is_nil() {
51
+ builder.append_null()
52
+ } else {
53
+ let v = <T::Native>::try_convert(value)?;
54
+ builder.append_value(v)
62
55
  }
63
56
  }
64
- let ca = builder.finish();
65
57
 
58
+ let ca = builder.finish();
66
59
  let s = ca.into_series();
67
- Ok(RbSeries::new(s))
60
+ Ok(s.into())
68
61
  }
69
62
 
70
63
  // Init with lists that can contain Nones
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
91
84
 
92
85
  fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
93
86
  let mut val = Vec::with_capacity(arr.len());
94
- for v in arr.each() {
95
- val.push(Wrap::<AnyValue<'s>>::try_convert(v?)?);
87
+ for v in arr.into_iter() {
88
+ val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
96
89
  }
97
90
  Ok(val)
98
91
  }
99
92
 
100
93
  impl RbSeries {
101
- pub fn new_from_anyvalues(name: String, val: RArray, strict: bool) -> RbResult<Self> {
102
- let val = vec_wrap_any_value(val)?;
103
- let avs = slice_extract_wrapped(&val);
94
+ pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
95
+ let any_values_result = vec_wrap_any_value(values);
104
96
  // from anyvalues is fallible
105
- let s = Series::from_any_values(&name, avs, strict).map_err(RbPolarsErr::from)?;
97
+ let result = any_values_result.and_then(|avs| {
98
+ let avs = slice_extract_wrapped(&avs);
99
+ let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
100
+ RbTypeError::new_err(format!(
101
+ "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
102
+ ))
103
+ })?;
104
+ Ok(s.into())
105
+ });
106
+
107
+ // Fall back to Object type for non-strict construction.
108
+ if !strict && result.is_err() {
109
+ return Self::new_object(name, values, strict);
110
+ }
111
+
112
+ result
113
+ }
114
+
115
+ pub fn new_from_any_values_and_dtype(
116
+ name: String,
117
+ values: RArray,
118
+ dtype: Wrap<DataType>,
119
+ strict: bool,
120
+ ) -> RbResult<Self> {
121
+ let any_values = values
122
+ .into_iter()
123
+ .map(|v| rb_object_to_any_value(v, strict))
124
+ .collect::<RbResult<Vec<AnyValue>>>()?;
125
+ let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
126
+ .map_err(|e| {
127
+ RbTypeError::new_err(format!(
128
+ "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
129
+ ))
130
+ })?;
106
131
  Ok(s.into())
107
132
  }
108
133
 
@@ -125,9 +150,9 @@ impl RbSeries {
125
150
 
126
151
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
127
152
  let val = val
128
- .each()
129
- .map(|v| v.map(ObjectValue::from))
130
- .collect::<RbResult<Vec<ObjectValue>>>()?;
153
+ .into_iter()
154
+ .map(ObjectValue::from)
155
+ .collect::<Vec<ObjectValue>>();
131
156
  let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
132
157
  Ok(s.into())
133
158
  }
@@ -163,15 +188,8 @@ impl RbSeries {
163
188
  }
164
189
  }
165
190
 
166
- pub fn new_decimal(name: String, val: RArray, strict: bool) -> RbResult<Self> {
167
- let val = vec_wrap_any_value(val)?;
168
- // TODO: do we have to respect 'strict' here? it's possible if we want to
169
- let avs = slice_extract_wrapped(&val);
170
- // create a fake dtype with a placeholder "none" scale, to be inferred later
171
- let dtype = DataType::Decimal(None, None);
172
- let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
173
- .map_err(RbPolarsErr::from)?;
174
- Ok(s.into())
191
+ pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
192
+ Self::new_from_any_values(name, values, strict)
175
193
  }
176
194
 
177
195
  pub fn repeat(
@@ -1,57 +1,139 @@
1
- use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
1
+ use magnus::{value::qnil, IntoValue, RArray, Value};
2
2
  use polars_core::prelude::*;
3
3
 
4
- use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
4
+ use crate::prelude::*;
5
+ use crate::RbSeries;
5
6
 
6
7
  impl RbSeries {
7
- /// For numeric types, this should only be called for Series with null types.
8
- /// This will cast to floats so that `nil = NAN`
9
- pub fn to_numo(&self) -> RbResult<Value> {
10
- let s = &self.series.borrow();
11
- match s.dtype() {
12
- DataType::String => {
13
- let ca = s.str().unwrap();
8
+ /// Convert this Series to a Ruby array.
9
+ /// This operation copies data.
10
+ pub fn to_a(&self) -> Value {
11
+ let series = &self.series.borrow();
14
12
 
15
- // TODO make more efficient
16
- let np_arr = RArray::from_iter(ca);
17
- class::object()
18
- .const_get::<_, RModule>("Numo")?
19
- .const_get::<_, RClass>("RObject")?
20
- .funcall("cast", (np_arr,))
21
- }
22
- dt if dt.is_numeric() => {
23
- if s.bit_repr_is_large() {
24
- let s = s.cast(&DataType::Float64).unwrap();
25
- let ca = s.f64().unwrap();
26
- // TODO make more efficient
27
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
28
- Some(v) => v,
29
- None => f64::NAN,
30
- }));
31
- class::object()
32
- .const_get::<_, RModule>("Numo")?
33
- .const_get::<_, RClass>("DFloat")?
34
- .funcall("cast", (np_arr,))
35
- } else {
36
- let s = s.cast(&DataType::Float32).unwrap();
37
- let ca = s.f32().unwrap();
38
- // TODO make more efficient
39
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
40
- Some(v) => v,
41
- None => f32::NAN,
42
- }));
43
- class::object()
44
- .const_get::<_, RModule>("Numo")?
45
- .const_get::<_, RClass>("SFloat")?
46
- .funcall("cast", (np_arr,))
47
- }
48
- }
49
- dt => {
50
- raise_err!(
51
- format!("'to_numo' not supported for dtype: {dt:?}"),
52
- ComputeError
53
- );
54
- }
13
+ fn to_a_recursive(series: &Series) -> Value {
14
+ let rblist = match series.dtype() {
15
+ DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
16
+ DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
17
+ DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
18
+ DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
19
+ DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
20
+ DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
21
+ DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
22
+ DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
23
+ DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
24
+ DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
25
+ DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
26
+ DataType::Categorical(_, _) | DataType::Enum(_, _) => {
27
+ RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
28
+ }
29
+ DataType::Object(_, _) => {
30
+ let v = RArray::with_capacity(series.len());
31
+ for i in 0..series.len() {
32
+ let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
33
+ match obj {
34
+ Some(val) => v.push(val.to_object()).unwrap(),
35
+ None => v.push(qnil()).unwrap(),
36
+ };
37
+ }
38
+ v.into_value()
39
+ }
40
+ DataType::List(_) => {
41
+ let v = RArray::new();
42
+ let ca = series.list().unwrap();
43
+ for opt_s in ca.amortized_iter() {
44
+ match opt_s {
45
+ None => {
46
+ v.push(qnil()).unwrap();
47
+ }
48
+ Some(s) => {
49
+ let rblst = to_a_recursive(s.as_ref());
50
+ v.push(rblst).unwrap();
51
+ }
52
+ }
53
+ }
54
+ v.into_value()
55
+ }
56
+ DataType::Array(_, _) => {
57
+ let v = RArray::new();
58
+ let ca = series.array().unwrap();
59
+ for opt_s in ca.amortized_iter() {
60
+ match opt_s {
61
+ None => {
62
+ v.push(qnil()).unwrap();
63
+ }
64
+ Some(s) => {
65
+ let rblst = to_a_recursive(s.as_ref());
66
+ v.push(rblst).unwrap();
67
+ }
68
+ }
69
+ }
70
+ v.into_value()
71
+ }
72
+ DataType::Date => {
73
+ let ca = series.date().unwrap();
74
+ return Wrap(ca).into_value();
75
+ }
76
+ DataType::Time => {
77
+ let ca = series.time().unwrap();
78
+ return Wrap(ca).into_value();
79
+ }
80
+ DataType::Datetime(_, _) => {
81
+ let ca = series.datetime().unwrap();
82
+ return Wrap(ca).into_value();
83
+ }
84
+ DataType::Decimal(_, _) => {
85
+ let ca = series.decimal().unwrap();
86
+ return Wrap(ca).into_value();
87
+ }
88
+ DataType::String => {
89
+ let ca = series.str().unwrap();
90
+ return Wrap(ca).into_value();
91
+ }
92
+ DataType::Struct(_) => {
93
+ let ca = series.struct_().unwrap();
94
+ return Wrap(ca).into_value();
95
+ }
96
+ DataType::Duration(_) => {
97
+ let ca = series.duration().unwrap();
98
+ return Wrap(ca).into_value();
99
+ }
100
+ DataType::Binary => {
101
+ let ca = series.binary().unwrap();
102
+ return Wrap(ca).into_value();
103
+ }
104
+ DataType::Null => {
105
+ let null: Option<u8> = None;
106
+ let n = series.len();
107
+ let iter = std::iter::repeat(null).take(n);
108
+ use std::iter::{Repeat, Take};
109
+ struct NullIter {
110
+ iter: Take<Repeat<Option<u8>>>,
111
+ n: usize,
112
+ }
113
+ impl Iterator for NullIter {
114
+ type Item = Option<u8>;
115
+
116
+ fn next(&mut self) -> Option<Self::Item> {
117
+ self.iter.next()
118
+ }
119
+ fn size_hint(&self) -> (usize, Option<usize>) {
120
+ (self.n, Some(self.n))
121
+ }
122
+ }
123
+ impl ExactSizeIterator for NullIter {}
124
+
125
+ RArray::from_iter(NullIter { iter, n }).into_value()
126
+ }
127
+ DataType::Unknown(_) => {
128
+ panic!("to_a not implemented for unknown")
129
+ }
130
+ DataType::BinaryOffset => {
131
+ unreachable!()
132
+ }
133
+ };
134
+ rblist
55
135
  }
136
+
137
+ to_a_recursive(series)
56
138
  }
57
139
  }
@@ -5,7 +5,7 @@ mod construction;
5
5
  mod export;
6
6
  mod scatter;
7
7
 
8
- use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
8
+ use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
@@ -36,8 +36,8 @@ impl RbSeries {
36
36
 
37
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
38
38
  let mut series = Vec::new();
39
- for item in rs.each() {
40
- series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
39
+ for item in rs.into_iter() {
40
+ series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
41
41
  }
42
42
  Ok(series)
43
43
  }
@@ -247,13 +247,24 @@ impl RbSeries {
247
247
  .into())
248
248
  }
249
249
 
250
- pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
251
- let df = self
250
+ pub fn value_counts(
251
+ &self,
252
+ sort: bool,
253
+ parallel: bool,
254
+ name: String,
255
+ normalize: bool,
256
+ ) -> RbResult<RbDataFrame> {
257
+ let out = self
252
258
  .series
253
259
  .borrow()
254
- .value_counts(true, sorted)
260
+ .value_counts(sort, parallel, name, normalize)
255
261
  .map_err(RbPolarsErr::from)?;
256
- Ok(df.into())
262
+ Ok(out.into())
263
+ }
264
+
265
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
266
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
267
+ self.series.borrow().slice(offset, length).into()
257
268
  }
258
269
 
259
270
  pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
@@ -301,10 +312,20 @@ impl RbSeries {
301
312
  Ok(s.into())
302
313
  }
303
314
 
304
- pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
305
- if strict {
306
- self.series.borrow().eq(&other.series.borrow())
307
- } else if null_equal {
315
+ pub fn equals(
316
+ &self,
317
+ other: &RbSeries,
318
+ check_dtypes: bool,
319
+ check_names: bool,
320
+ null_equal: bool,
321
+ ) -> bool {
322
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
323
+ return false;
324
+ }
325
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
326
+ return false;
327
+ }
328
+ if null_equal {
308
329
  self.series.borrow().equals_missing(&other.series.borrow())
309
330
  } else {
310
331
  self.series.borrow().equals(&other.series.borrow())
@@ -325,136 +346,6 @@ impl RbSeries {
325
346
  self.series.borrow().len()
326
347
  }
327
348
 
328
- pub fn to_a(&self) -> Value {
329
- let series = &self.series.borrow();
330
-
331
- fn to_a_recursive(series: &Series) -> Value {
332
- let rblist = match series.dtype() {
333
- DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
334
- DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
335
- DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
336
- DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
337
- DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
338
- DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
339
- DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
340
- DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
341
- DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
342
- DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
343
- DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
344
- DataType::Categorical(_, _) | DataType::Enum(_, _) => {
345
- RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
346
- }
347
- DataType::Object(_, _) => {
348
- let v = RArray::with_capacity(series.len());
349
- for i in 0..series.len() {
350
- let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
351
- match obj {
352
- Some(val) => v.push(val.to_object()).unwrap(),
353
- None => v.push(qnil()).unwrap(),
354
- };
355
- }
356
- v.into_value()
357
- }
358
- DataType::List(_) => {
359
- let v = RArray::new();
360
- let ca = series.list().unwrap();
361
- for opt_s in unsafe { ca.amortized_iter() } {
362
- match opt_s {
363
- None => {
364
- v.push(qnil()).unwrap();
365
- }
366
- Some(s) => {
367
- let rblst = to_a_recursive(s.as_ref());
368
- v.push(rblst).unwrap();
369
- }
370
- }
371
- }
372
- v.into_value()
373
- }
374
- DataType::Array(_, _) => {
375
- let v = RArray::new();
376
- let ca = series.array().unwrap();
377
- for opt_s in ca.amortized_iter() {
378
- match opt_s {
379
- None => {
380
- v.push(qnil()).unwrap();
381
- }
382
- Some(s) => {
383
- let rblst = to_a_recursive(s.as_ref());
384
- v.push(rblst).unwrap();
385
- }
386
- }
387
- }
388
- v.into_value()
389
- }
390
- DataType::Date => {
391
- let ca = series.date().unwrap();
392
- return Wrap(ca).into_value();
393
- }
394
- DataType::Time => {
395
- let ca = series.time().unwrap();
396
- return Wrap(ca).into_value();
397
- }
398
- DataType::Datetime(_, _) => {
399
- let ca = series.datetime().unwrap();
400
- return Wrap(ca).into_value();
401
- }
402
- DataType::Decimal(_, _) => {
403
- let ca = series.decimal().unwrap();
404
- return Wrap(ca).into_value();
405
- }
406
- DataType::String => {
407
- let ca = series.str().unwrap();
408
- return Wrap(ca).into_value();
409
- }
410
- DataType::Struct(_) => {
411
- let ca = series.struct_().unwrap();
412
- return Wrap(ca).into_value();
413
- }
414
- DataType::Duration(_) => {
415
- let ca = series.duration().unwrap();
416
- return Wrap(ca).into_value();
417
- }
418
- DataType::Binary => {
419
- let ca = series.binary().unwrap();
420
- return Wrap(ca).into_value();
421
- }
422
- DataType::Null => {
423
- let null: Option<u8> = None;
424
- let n = series.len();
425
- let iter = std::iter::repeat(null).take(n);
426
- use std::iter::{Repeat, Take};
427
- struct NullIter {
428
- iter: Take<Repeat<Option<u8>>>,
429
- n: usize,
430
- }
431
- impl Iterator for NullIter {
432
- type Item = Option<u8>;
433
-
434
- fn next(&mut self) -> Option<Self::Item> {
435
- self.iter.next()
436
- }
437
- fn size_hint(&self) -> (usize, Option<usize>) {
438
- (self.n, Some(self.n))
439
- }
440
- }
441
- impl ExactSizeIterator for NullIter {}
442
-
443
- RArray::from_iter(NullIter { iter, n }).into_value()
444
- }
445
- DataType::Unknown => {
446
- panic!("to_a not implemented for unknown")
447
- }
448
- DataType::BinaryOffset => {
449
- unreachable!()
450
- }
451
- };
452
- rblist
453
- }
454
-
455
- to_a_recursive(series)
456
- }
457
-
458
349
  pub fn clone(&self) -> Self {
459
350
  RbSeries::new(self.series.borrow().clone())
460
351
  }
@@ -37,7 +37,9 @@ impl RbSQLContext {
37
37
  }
38
38
 
39
39
  pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
- self.context.borrow_mut().register(&name, lf.ldf.clone())
40
+ self.context
41
+ .borrow_mut()
42
+ .register(&name, lf.ldf.borrow().clone())
41
43
  }
42
44
 
43
45
  pub fn unregister(&self, name: String) {
@@ -358,7 +358,7 @@ module Polars
358
358
  # # │ [7, 8, 9] ┆ 4 ┆ null │
359
359
  # # └───────────────┴─────┴──────┘
360
360
  def get(index, null_on_oob: true)
361
- index = Utils.parse_as_expression(index)
361
+ index = Utils.parse_into_expression(index)
362
362
  Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
363
363
  end
364
364
 
@@ -446,7 +446,7 @@ module Polars
446
446
  # # │ ["x", "y"] ┆ _ ┆ x_y │
447
447
  # # └───────────────┴───────────┴──────┘
448
448
  def join(separator, ignore_nulls: true)
449
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
449
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
450
450
  Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
451
451
  end
452
452
 
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ ["a", "c"] ┆ true │
503
503
  # # └───────────────┴──────────┘
504
504
  def contains(item)
505
- item = Utils.parse_as_expression(item, str_as_lit: true)
505
+ item = Utils.parse_into_expression(item, str_as_lit: true)
506
506
  Utils.wrap_expr(_rbexpr.arr_contains(item))
507
507
  end
508
508
 
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ [2, 2] ┆ 2 │
531
531
  # # └───────────────┴────────────────┘
532
532
  def count_matches(element)
533
- element = Utils.parse_as_expression(element, str_as_lit: true)
533
+ element = Utils.parse_into_expression(element, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.arr_count_matches(element))
535
535
  end
536
536
  end
@@ -13,6 +13,7 @@ module Polars
13
13
  skip_rows: 0,
14
14
  dtypes: nil,
15
15
  null_values: nil,
16
+ missing_utf8_is_empty_string: false,
16
17
  ignore_errors: false,
17
18
  parse_dates: false,
18
19
  n_threads: nil,
@@ -28,10 +29,12 @@ module Polars
28
29
  sample_size: 1024,
29
30
  eol_char: "\n",
30
31
  new_columns: nil,
31
- truncate_ragged_lines: false
32
+ raise_if_empty: true,
33
+ truncate_ragged_lines: false,
34
+ decimal_comma: false
32
35
  )
33
36
  if Utils.pathlike?(file)
34
- path = Utils.normalise_filepath(file)
37
+ path = Utils.normalize_filepath(file)
35
38
  end
36
39
 
37
40
  dtype_list = nil
@@ -39,7 +42,7 @@ module Polars
39
42
  if !dtypes.nil?
40
43
  if dtypes.is_a?(Hash)
41
44
  dtype_list = []
42
- dtypes.each do|k, v|
45
+ dtypes.each do |k, v|
43
46
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
44
47
  end
45
48
  elsif dtypes.is_a?(::Array)
@@ -72,12 +75,15 @@ module Polars
72
75
  comment_char,
73
76
  quote_char,
74
77
  processed_null_values,
78
+ missing_utf8_is_empty_string,
75
79
  parse_dates,
76
80
  skip_rows_after_header,
77
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
81
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
78
82
  sample_size,
79
83
  eol_char,
80
- truncate_ragged_lines
84
+ raise_if_empty,
85
+ truncate_ragged_lines,
86
+ decimal_comma
81
87
  )
82
88
  self.new_columns = new_columns
83
89
  end