polars-df 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,10 +1,11 @@
1
1
  use magnus::{prelude::*, RArray};
2
2
  use polars_core::prelude::*;
3
3
 
4
+ use crate::any_value::rb_object_to_any_value;
4
5
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
5
6
  use crate::prelude::ObjectValue;
6
7
  use crate::series::to_series_collection;
7
- use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
8
+ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
8
9
 
9
10
  impl RbSeries {
10
11
  pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
@@ -35,36 +36,28 @@ impl RbSeries {
35
36
  }
36
37
  }
37
38
 
38
- fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
39
+ fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
39
40
  where
40
41
  T: PolarsNumericType,
41
42
  ChunkedArray<T>: IntoSeries,
42
43
  T::Native: magnus::TryConvert,
43
44
  {
44
- let len = obj.len();
45
+ let len = values.len();
45
46
  let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
46
47
 
47
- unsafe {
48
- for item in obj.as_slice().iter() {
49
- if item.is_nil() {
50
- builder.append_null()
51
- } else {
52
- match T::Native::try_convert(*item) {
53
- Ok(val) => builder.append_value(val),
54
- Err(e) => {
55
- if strict {
56
- return Err(e);
57
- }
58
- builder.append_null()
59
- }
60
- }
61
- }
48
+ for res in values.into_iter() {
49
+ let value = res;
50
+ if value.is_nil() {
51
+ builder.append_null()
52
+ } else {
53
+ let v = <T::Native>::try_convert(value)?;
54
+ builder.append_value(v)
62
55
  }
63
56
  }
64
- let ca = builder.finish();
65
57
 
58
+ let ca = builder.finish();
66
59
  let s = ca.into_series();
67
- Ok(RbSeries::new(s))
60
+ Ok(s.into())
68
61
  }
69
62
 
70
63
  // Init with lists that can contain Nones
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
91
84
 
92
85
  fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
93
86
  let mut val = Vec::with_capacity(arr.len());
94
- for v in arr.each() {
95
- val.push(Wrap::<AnyValue<'s>>::try_convert(v?)?);
87
+ for v in arr.into_iter() {
88
+ val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
96
89
  }
97
90
  Ok(val)
98
91
  }
99
92
 
100
93
  impl RbSeries {
101
- pub fn new_from_anyvalues(name: String, val: RArray, strict: bool) -> RbResult<Self> {
102
- let val = vec_wrap_any_value(val)?;
103
- let avs = slice_extract_wrapped(&val);
94
+ pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
95
+ let any_values_result = vec_wrap_any_value(values);
104
96
  // from anyvalues is fallible
105
- let s = Series::from_any_values(&name, avs, strict).map_err(RbPolarsErr::from)?;
97
+ let result = any_values_result.and_then(|avs| {
98
+ let avs = slice_extract_wrapped(&avs);
99
+ let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
100
+ RbTypeError::new_err(format!(
101
+ "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
102
+ ))
103
+ })?;
104
+ Ok(s.into())
105
+ });
106
+
107
+ // Fall back to Object type for non-strict construction.
108
+ if !strict && result.is_err() {
109
+ return Self::new_object(name, values, strict);
110
+ }
111
+
112
+ result
113
+ }
114
+
115
+ pub fn new_from_any_values_and_dtype(
116
+ name: String,
117
+ values: RArray,
118
+ dtype: Wrap<DataType>,
119
+ strict: bool,
120
+ ) -> RbResult<Self> {
121
+ let any_values = values
122
+ .into_iter()
123
+ .map(|v| rb_object_to_any_value(v, strict))
124
+ .collect::<RbResult<Vec<AnyValue>>>()?;
125
+ let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
126
+ .map_err(|e| {
127
+ RbTypeError::new_err(format!(
128
+ "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
129
+ ))
130
+ })?;
106
131
  Ok(s.into())
107
132
  }
108
133
 
@@ -125,9 +150,9 @@ impl RbSeries {
125
150
 
126
151
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
127
152
  let val = val
128
- .each()
129
- .map(|v| v.map(ObjectValue::from))
130
- .collect::<RbResult<Vec<ObjectValue>>>()?;
153
+ .into_iter()
154
+ .map(ObjectValue::from)
155
+ .collect::<Vec<ObjectValue>>();
131
156
  let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
132
157
  Ok(s.into())
133
158
  }
@@ -163,15 +188,8 @@ impl RbSeries {
163
188
  }
164
189
  }
165
190
 
166
- pub fn new_decimal(name: String, val: RArray, strict: bool) -> RbResult<Self> {
167
- let val = vec_wrap_any_value(val)?;
168
- // TODO: do we have to respect 'strict' here? it's possible if we want to
169
- let avs = slice_extract_wrapped(&val);
170
- // create a fake dtype with a placeholder "none" scale, to be inferred later
171
- let dtype = DataType::Decimal(None, None);
172
- let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
173
- .map_err(RbPolarsErr::from)?;
174
- Ok(s.into())
191
+ pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
192
+ Self::new_from_any_values(name, values, strict)
175
193
  }
176
194
 
177
195
  pub fn repeat(
@@ -1,57 +1,139 @@
1
- use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
1
+ use magnus::{value::qnil, IntoValue, RArray, Value};
2
2
  use polars_core::prelude::*;
3
3
 
4
- use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
4
+ use crate::prelude::*;
5
+ use crate::RbSeries;
5
6
 
6
7
  impl RbSeries {
7
- /// For numeric types, this should only be called for Series with null types.
8
- /// This will cast to floats so that `nil = NAN`
9
- pub fn to_numo(&self) -> RbResult<Value> {
10
- let s = &self.series.borrow();
11
- match s.dtype() {
12
- DataType::String => {
13
- let ca = s.str().unwrap();
8
+ /// Convert this Series to a Ruby array.
9
+ /// This operation copies data.
10
+ pub fn to_a(&self) -> Value {
11
+ let series = &self.series.borrow();
14
12
 
15
- // TODO make more efficient
16
- let np_arr = RArray::from_iter(ca);
17
- class::object()
18
- .const_get::<_, RModule>("Numo")?
19
- .const_get::<_, RClass>("RObject")?
20
- .funcall("cast", (np_arr,))
21
- }
22
- dt if dt.is_numeric() => {
23
- if s.bit_repr_is_large() {
24
- let s = s.cast(&DataType::Float64).unwrap();
25
- let ca = s.f64().unwrap();
26
- // TODO make more efficient
27
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
28
- Some(v) => v,
29
- None => f64::NAN,
30
- }));
31
- class::object()
32
- .const_get::<_, RModule>("Numo")?
33
- .const_get::<_, RClass>("DFloat")?
34
- .funcall("cast", (np_arr,))
35
- } else {
36
- let s = s.cast(&DataType::Float32).unwrap();
37
- let ca = s.f32().unwrap();
38
- // TODO make more efficient
39
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
40
- Some(v) => v,
41
- None => f32::NAN,
42
- }));
43
- class::object()
44
- .const_get::<_, RModule>("Numo")?
45
- .const_get::<_, RClass>("SFloat")?
46
- .funcall("cast", (np_arr,))
47
- }
48
- }
49
- dt => {
50
- raise_err!(
51
- format!("'to_numo' not supported for dtype: {dt:?}"),
52
- ComputeError
53
- );
54
- }
13
+ fn to_a_recursive(series: &Series) -> Value {
14
+ let rblist = match series.dtype() {
15
+ DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
16
+ DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
17
+ DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
18
+ DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
19
+ DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
20
+ DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
21
+ DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
22
+ DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
23
+ DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
24
+ DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
25
+ DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
26
+ DataType::Categorical(_, _) | DataType::Enum(_, _) => {
27
+ RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
28
+ }
29
+ DataType::Object(_, _) => {
30
+ let v = RArray::with_capacity(series.len());
31
+ for i in 0..series.len() {
32
+ let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
33
+ match obj {
34
+ Some(val) => v.push(val.to_object()).unwrap(),
35
+ None => v.push(qnil()).unwrap(),
36
+ };
37
+ }
38
+ v.into_value()
39
+ }
40
+ DataType::List(_) => {
41
+ let v = RArray::new();
42
+ let ca = series.list().unwrap();
43
+ for opt_s in ca.amortized_iter() {
44
+ match opt_s {
45
+ None => {
46
+ v.push(qnil()).unwrap();
47
+ }
48
+ Some(s) => {
49
+ let rblst = to_a_recursive(s.as_ref());
50
+ v.push(rblst).unwrap();
51
+ }
52
+ }
53
+ }
54
+ v.into_value()
55
+ }
56
+ DataType::Array(_, _) => {
57
+ let v = RArray::new();
58
+ let ca = series.array().unwrap();
59
+ for opt_s in ca.amortized_iter() {
60
+ match opt_s {
61
+ None => {
62
+ v.push(qnil()).unwrap();
63
+ }
64
+ Some(s) => {
65
+ let rblst = to_a_recursive(s.as_ref());
66
+ v.push(rblst).unwrap();
67
+ }
68
+ }
69
+ }
70
+ v.into_value()
71
+ }
72
+ DataType::Date => {
73
+ let ca = series.date().unwrap();
74
+ return Wrap(ca).into_value();
75
+ }
76
+ DataType::Time => {
77
+ let ca = series.time().unwrap();
78
+ return Wrap(ca).into_value();
79
+ }
80
+ DataType::Datetime(_, _) => {
81
+ let ca = series.datetime().unwrap();
82
+ return Wrap(ca).into_value();
83
+ }
84
+ DataType::Decimal(_, _) => {
85
+ let ca = series.decimal().unwrap();
86
+ return Wrap(ca).into_value();
87
+ }
88
+ DataType::String => {
89
+ let ca = series.str().unwrap();
90
+ return Wrap(ca).into_value();
91
+ }
92
+ DataType::Struct(_) => {
93
+ let ca = series.struct_().unwrap();
94
+ return Wrap(ca).into_value();
95
+ }
96
+ DataType::Duration(_) => {
97
+ let ca = series.duration().unwrap();
98
+ return Wrap(ca).into_value();
99
+ }
100
+ DataType::Binary => {
101
+ let ca = series.binary().unwrap();
102
+ return Wrap(ca).into_value();
103
+ }
104
+ DataType::Null => {
105
+ let null: Option<u8> = None;
106
+ let n = series.len();
107
+ let iter = std::iter::repeat(null).take(n);
108
+ use std::iter::{Repeat, Take};
109
+ struct NullIter {
110
+ iter: Take<Repeat<Option<u8>>>,
111
+ n: usize,
112
+ }
113
+ impl Iterator for NullIter {
114
+ type Item = Option<u8>;
115
+
116
+ fn next(&mut self) -> Option<Self::Item> {
117
+ self.iter.next()
118
+ }
119
+ fn size_hint(&self) -> (usize, Option<usize>) {
120
+ (self.n, Some(self.n))
121
+ }
122
+ }
123
+ impl ExactSizeIterator for NullIter {}
124
+
125
+ RArray::from_iter(NullIter { iter, n }).into_value()
126
+ }
127
+ DataType::Unknown(_) => {
128
+ panic!("to_a not implemented for unknown")
129
+ }
130
+ DataType::BinaryOffset => {
131
+ unreachable!()
132
+ }
133
+ };
134
+ rblist
55
135
  }
136
+
137
+ to_a_recursive(series)
56
138
  }
57
139
  }
@@ -5,7 +5,7 @@ mod construction;
5
5
  mod export;
6
6
  mod scatter;
7
7
 
8
- use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
8
+ use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
@@ -36,8 +36,8 @@ impl RbSeries {
36
36
 
37
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
38
38
  let mut series = Vec::new();
39
- for item in rs.each() {
40
- series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
39
+ for item in rs.into_iter() {
40
+ series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
41
41
  }
42
42
  Ok(series)
43
43
  }
@@ -247,13 +247,24 @@ impl RbSeries {
247
247
  .into())
248
248
  }
249
249
 
250
- pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
251
- let df = self
250
+ pub fn value_counts(
251
+ &self,
252
+ sort: bool,
253
+ parallel: bool,
254
+ name: String,
255
+ normalize: bool,
256
+ ) -> RbResult<RbDataFrame> {
257
+ let out = self
252
258
  .series
253
259
  .borrow()
254
- .value_counts(true, sorted)
260
+ .value_counts(sort, parallel, name, normalize)
255
261
  .map_err(RbPolarsErr::from)?;
256
- Ok(df.into())
262
+ Ok(out.into())
263
+ }
264
+
265
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
266
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
267
+ self.series.borrow().slice(offset, length).into()
257
268
  }
258
269
 
259
270
  pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
@@ -301,10 +312,20 @@ impl RbSeries {
301
312
  Ok(s.into())
302
313
  }
303
314
 
304
- pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
305
- if strict {
306
- self.series.borrow().eq(&other.series.borrow())
307
- } else if null_equal {
315
+ pub fn equals(
316
+ &self,
317
+ other: &RbSeries,
318
+ check_dtypes: bool,
319
+ check_names: bool,
320
+ null_equal: bool,
321
+ ) -> bool {
322
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
323
+ return false;
324
+ }
325
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
326
+ return false;
327
+ }
328
+ if null_equal {
308
329
  self.series.borrow().equals_missing(&other.series.borrow())
309
330
  } else {
310
331
  self.series.borrow().equals(&other.series.borrow())
@@ -325,136 +346,6 @@ impl RbSeries {
325
346
  self.series.borrow().len()
326
347
  }
327
348
 
328
- pub fn to_a(&self) -> Value {
329
- let series = &self.series.borrow();
330
-
331
- fn to_a_recursive(series: &Series) -> Value {
332
- let rblist = match series.dtype() {
333
- DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
334
- DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
335
- DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
336
- DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
337
- DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
338
- DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
339
- DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
340
- DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
341
- DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
342
- DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
343
- DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
344
- DataType::Categorical(_, _) | DataType::Enum(_, _) => {
345
- RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
346
- }
347
- DataType::Object(_, _) => {
348
- let v = RArray::with_capacity(series.len());
349
- for i in 0..series.len() {
350
- let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
351
- match obj {
352
- Some(val) => v.push(val.to_object()).unwrap(),
353
- None => v.push(qnil()).unwrap(),
354
- };
355
- }
356
- v.into_value()
357
- }
358
- DataType::List(_) => {
359
- let v = RArray::new();
360
- let ca = series.list().unwrap();
361
- for opt_s in unsafe { ca.amortized_iter() } {
362
- match opt_s {
363
- None => {
364
- v.push(qnil()).unwrap();
365
- }
366
- Some(s) => {
367
- let rblst = to_a_recursive(s.as_ref());
368
- v.push(rblst).unwrap();
369
- }
370
- }
371
- }
372
- v.into_value()
373
- }
374
- DataType::Array(_, _) => {
375
- let v = RArray::new();
376
- let ca = series.array().unwrap();
377
- for opt_s in ca.amortized_iter() {
378
- match opt_s {
379
- None => {
380
- v.push(qnil()).unwrap();
381
- }
382
- Some(s) => {
383
- let rblst = to_a_recursive(s.as_ref());
384
- v.push(rblst).unwrap();
385
- }
386
- }
387
- }
388
- v.into_value()
389
- }
390
- DataType::Date => {
391
- let ca = series.date().unwrap();
392
- return Wrap(ca).into_value();
393
- }
394
- DataType::Time => {
395
- let ca = series.time().unwrap();
396
- return Wrap(ca).into_value();
397
- }
398
- DataType::Datetime(_, _) => {
399
- let ca = series.datetime().unwrap();
400
- return Wrap(ca).into_value();
401
- }
402
- DataType::Decimal(_, _) => {
403
- let ca = series.decimal().unwrap();
404
- return Wrap(ca).into_value();
405
- }
406
- DataType::String => {
407
- let ca = series.str().unwrap();
408
- return Wrap(ca).into_value();
409
- }
410
- DataType::Struct(_) => {
411
- let ca = series.struct_().unwrap();
412
- return Wrap(ca).into_value();
413
- }
414
- DataType::Duration(_) => {
415
- let ca = series.duration().unwrap();
416
- return Wrap(ca).into_value();
417
- }
418
- DataType::Binary => {
419
- let ca = series.binary().unwrap();
420
- return Wrap(ca).into_value();
421
- }
422
- DataType::Null => {
423
- let null: Option<u8> = None;
424
- let n = series.len();
425
- let iter = std::iter::repeat(null).take(n);
426
- use std::iter::{Repeat, Take};
427
- struct NullIter {
428
- iter: Take<Repeat<Option<u8>>>,
429
- n: usize,
430
- }
431
- impl Iterator for NullIter {
432
- type Item = Option<u8>;
433
-
434
- fn next(&mut self) -> Option<Self::Item> {
435
- self.iter.next()
436
- }
437
- fn size_hint(&self) -> (usize, Option<usize>) {
438
- (self.n, Some(self.n))
439
- }
440
- }
441
- impl ExactSizeIterator for NullIter {}
442
-
443
- RArray::from_iter(NullIter { iter, n }).into_value()
444
- }
445
- DataType::Unknown => {
446
- panic!("to_a not implemented for unknown")
447
- }
448
- DataType::BinaryOffset => {
449
- unreachable!()
450
- }
451
- };
452
- rblist
453
- }
454
-
455
- to_a_recursive(series)
456
- }
457
-
458
349
  pub fn clone(&self) -> Self {
459
350
  RbSeries::new(self.series.borrow().clone())
460
351
  }
@@ -37,7 +37,9 @@ impl RbSQLContext {
37
37
  }
38
38
 
39
39
  pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
- self.context.borrow_mut().register(&name, lf.ldf.clone())
40
+ self.context
41
+ .borrow_mut()
42
+ .register(&name, lf.ldf.borrow().clone())
41
43
  }
42
44
 
43
45
  pub fn unregister(&self, name: String) {
@@ -358,7 +358,7 @@ module Polars
358
358
  # # │ [7, 8, 9] ┆ 4 ┆ null │
359
359
  # # └───────────────┴─────┴──────┘
360
360
  def get(index, null_on_oob: true)
361
- index = Utils.parse_as_expression(index)
361
+ index = Utils.parse_into_expression(index)
362
362
  Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
363
363
  end
364
364
 
@@ -446,7 +446,7 @@ module Polars
446
446
  # # │ ["x", "y"] ┆ _ ┆ x_y │
447
447
  # # └───────────────┴───────────┴──────┘
448
448
  def join(separator, ignore_nulls: true)
449
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
449
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
450
450
  Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
451
451
  end
452
452
 
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ ["a", "c"] ┆ true │
503
503
  # # └───────────────┴──────────┘
504
504
  def contains(item)
505
- item = Utils.parse_as_expression(item, str_as_lit: true)
505
+ item = Utils.parse_into_expression(item, str_as_lit: true)
506
506
  Utils.wrap_expr(_rbexpr.arr_contains(item))
507
507
  end
508
508
 
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ [2, 2] ┆ 2 │
531
531
  # # └───────────────┴────────────────┘
532
532
  def count_matches(element)
533
- element = Utils.parse_as_expression(element, str_as_lit: true)
533
+ element = Utils.parse_into_expression(element, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.arr_count_matches(element))
535
535
  end
536
536
  end
@@ -13,6 +13,7 @@ module Polars
13
13
  skip_rows: 0,
14
14
  dtypes: nil,
15
15
  null_values: nil,
16
+ missing_utf8_is_empty_string: false,
16
17
  ignore_errors: false,
17
18
  parse_dates: false,
18
19
  n_threads: nil,
@@ -28,10 +29,12 @@ module Polars
28
29
  sample_size: 1024,
29
30
  eol_char: "\n",
30
31
  new_columns: nil,
31
- truncate_ragged_lines: false
32
+ raise_if_empty: true,
33
+ truncate_ragged_lines: false,
34
+ decimal_comma: false
32
35
  )
33
36
  if Utils.pathlike?(file)
34
- path = Utils.normalise_filepath(file)
37
+ path = Utils.normalize_filepath(file)
35
38
  end
36
39
 
37
40
  dtype_list = nil
@@ -39,7 +42,7 @@ module Polars
39
42
  if !dtypes.nil?
40
43
  if dtypes.is_a?(Hash)
41
44
  dtype_list = []
42
- dtypes.each do|k, v|
45
+ dtypes.each do |k, v|
43
46
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
44
47
  end
45
48
  elsif dtypes.is_a?(::Array)
@@ -72,12 +75,15 @@ module Polars
72
75
  comment_char,
73
76
  quote_char,
74
77
  processed_null_values,
78
+ missing_utf8_is_empty_string,
75
79
  parse_dates,
76
80
  skip_rows_after_header,
77
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
81
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
78
82
  sample_size,
79
83
  eol_char,
80
- truncate_ragged_lines
84
+ raise_if_empty,
85
+ truncate_ragged_lines,
86
+ decimal_comma
81
87
  )
82
88
  self.new_columns = new_columns
83
89
  end