polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -2,12 +2,15 @@ use std::io::BufReader;
2
2
 
3
3
  use magnus::{RHash, Ruby, Value};
4
4
  use polars::prelude::ArrowSchema;
5
+ use polars::prelude::PlPathRef;
6
+ use polars_io::cloud::CloudOptions;
5
7
 
6
8
  use crate::conversion::Wrap;
7
9
  use crate::file::{EitherRustRubyFile, get_either_file};
10
+ use crate::utils::EnterPolarsExt;
8
11
  use crate::{RbPolarsErr, RbResult};
9
12
 
10
- pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
13
+ pub fn read_ipc_schema(rb: &Ruby, rb_f: Value) -> RbResult<RHash> {
11
14
  use arrow::io::ipc::read::read_file_metadata;
12
15
  let metadata = match get_either_file(rb_f, false)? {
13
16
  EitherRustRubyFile::Rust(r) => {
@@ -16,26 +19,70 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
16
19
  EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
17
20
  };
18
21
 
19
- let ruby = Ruby::get_with(rb_f);
20
- let dict = ruby.hash_new();
22
+ let dict = rb.hash_new();
21
23
  fields_to_rbdict(&metadata.schema, &dict)?;
22
24
  Ok(dict)
23
25
  }
24
26
 
25
- pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
27
+ pub fn read_parquet_metadata(
28
+ rb: &Ruby,
29
+ rb_f: Value,
30
+ storage_options: Option<Vec<(String, String)>>,
31
+ credential_provider: Option<Value>,
32
+ retries: usize,
33
+ ) -> RbResult<RHash> {
34
+ use std::io::Cursor;
35
+
36
+ use polars_io::pl_async::get_runtime;
26
37
  use polars_parquet::read::read_metadata;
27
38
  use polars_parquet::read::schema::read_custom_key_value_metadata;
39
+ use polars_utils::plpath::PlPath;
28
40
 
29
- let metadata = match get_either_file(rb_f, false)? {
30
- EitherRustRubyFile::Rust(r) => {
31
- read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
41
+ use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
42
+
43
+ let metadata = match get_ruby_scan_source_input(rb_f, false)? {
44
+ RubyScanSourceInput::Buffer(buf) => {
45
+ read_metadata(&mut Cursor::new(buf)).map_err(RbPolarsErr::from)?
46
+ }
47
+ RubyScanSourceInput::Path(p) => {
48
+ let cloud_options = parse_cloud_options(
49
+ Some(p.as_ref()),
50
+ storage_options,
51
+ credential_provider,
52
+ retries,
53
+ )?;
54
+ match p {
55
+ PlPath::Local(local) => {
56
+ let file = polars_utils::open_file(&local).map_err(RbPolarsErr::from)?;
57
+ read_metadata(&mut BufReader::new(file)).map_err(RbPolarsErr::from)?
58
+ }
59
+ PlPath::Cloud(_) => {
60
+ use polars::prelude::ParquetObjectStore;
61
+ use polars_error::PolarsResult;
62
+
63
+ rb.detach(|| {
64
+ get_runtime().block_on(async {
65
+ let mut reader = ParquetObjectStore::from_uri(
66
+ p.as_ref(),
67
+ cloud_options.as_ref(),
68
+ None,
69
+ )
70
+ .await?;
71
+ let result = reader.get_metadata().await?;
72
+ PolarsResult::Ok((**result).clone())
73
+ })
74
+ })
75
+ .map_err(RbPolarsErr::from)?
76
+ }
77
+ }
78
+ }
79
+ RubyScanSourceInput::File(f) => {
80
+ read_metadata(&mut BufReader::new(f)).map_err(RbPolarsErr::from)?
32
81
  }
33
- EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
34
82
  };
35
83
 
36
84
  let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
37
- let ruby = Ruby::get_with(rb_f);
38
- let dict = ruby.hash_new();
85
+ let dict = rb.hash_new();
39
86
  for (key, value) in key_value_metadata.into_iter() {
40
87
  dict.aset(key.as_str(), value.as_str())?;
41
88
  }
@@ -66,3 +113,27 @@ fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
66
113
  }
67
114
  Ok(())
68
115
  }
116
+
117
+ pub fn parse_cloud_options<'a>(
118
+ first_path: Option<PlPathRef<'a>>,
119
+ storage_options: Option<Vec<(String, String)>>,
120
+ credential_provider: Option<Value>,
121
+ retries: usize,
122
+ ) -> RbResult<Option<CloudOptions>> {
123
+ let result = if let Some(first_path) = first_path {
124
+ use crate::prelude::parse_cloud_options;
125
+
126
+ let first_path_url = first_path.to_str();
127
+ let cloud_options =
128
+ parse_cloud_options(first_path_url, storage_options.unwrap_or_default())?;
129
+
130
+ Some(
131
+ cloud_options
132
+ .with_max_retries(retries)
133
+ .with_credential_provider(credential_provider.map(|_| todo!())),
134
+ )
135
+ } else {
136
+ None
137
+ };
138
+ Ok(result)
139
+ }
@@ -6,9 +6,11 @@ use polars::lazy::dsl;
6
6
  use polars::prelude::*;
7
7
 
8
8
  use crate::conversion::{Wrap, get_lf, get_rbseq};
9
+ use crate::expr::ToExprs;
9
10
  use crate::expr::datatype::RbDataTypeExpr;
11
+ use crate::lazyframe::RbOptFlags;
10
12
  use crate::map::lazy::binary_lambda;
11
- use crate::rb_exprs_to_exprs;
13
+ use crate::utils::{EnterPolarsExt, RubyAttach};
12
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
15
 
14
16
  macro_rules! set_unwrapped_or_0 {
@@ -62,7 +64,7 @@ pub fn arg_sort_by(
62
64
  multithreaded: bool,
63
65
  maintain_order: bool,
64
66
  ) -> RbResult<RbExpr> {
65
- let by = rb_exprs_to_exprs(by)?;
67
+ let by = by.to_exprs()?;
66
68
  Ok(dsl::arg_sort_by(
67
69
  by,
68
70
  SortMultipleOptions {
@@ -81,7 +83,12 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
81
83
  }
82
84
 
83
85
  pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
84
- let exprs = rb_exprs_to_exprs(exprs)?;
86
+ let exprs = exprs.to_exprs()?;
87
+ if exprs.is_empty() {
88
+ return Err(RbValueError::new_err(
89
+ "expected at least 1 expression in 'as_struct'",
90
+ ));
91
+ }
85
92
  Ok(dsl::as_struct(exprs).into())
86
93
  }
87
94
 
@@ -90,7 +97,7 @@ pub fn field(names: Vec<String>) -> RbExpr {
90
97
  }
91
98
 
92
99
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
93
- let exprs = rb_exprs_to_exprs(exprs)?;
100
+ let exprs = exprs.to_exprs()?;
94
101
  Ok(dsl::coalesce(&exprs).into())
95
102
  }
96
103
 
@@ -98,13 +105,25 @@ pub fn col(name: String) -> RbExpr {
98
105
  dsl::col(&name).into()
99
106
  }
100
107
 
101
- pub fn collect_all(ruby: &Ruby, lfs: RArray) -> RbResult<RArray> {
108
+ fn lfs_to_plans(lfs: RArray) -> RbResult<Vec<DslPlan>> {
102
109
  let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
110
+ Ok(lfs
111
+ .into_iter()
112
+ .map(|lf| lf.ldf.read().logical_plan.clone())
113
+ .collect())
114
+ }
103
115
 
104
- Ok(ruby.ary_from_iter(lfs.iter().map(|lf| {
105
- let df = lf.ldf.borrow().clone().collect().unwrap();
106
- RbDataFrame::new(df)
107
- })))
116
+ pub fn collect_all(
117
+ ruby: &Ruby,
118
+ lfs: RArray,
119
+ engine: Wrap<Engine>,
120
+ optflags: &RbOptFlags,
121
+ ) -> RbResult<RArray> {
122
+ let plans = lfs_to_plans(lfs)?;
123
+ let dfs = ruby.enter_polars(|| {
124
+ LazyFrame::collect_all_with_engine(plans, engine.0, optflags.clone().inner.into_inner())
125
+ })?;
126
+ Ok(ruby.ary_from_iter(dfs.into_iter().map(Into::<RbDataFrame>::into)))
108
127
  }
109
128
 
110
129
  pub fn concat_lf(
@@ -136,13 +155,19 @@ pub fn concat_lf(
136
155
  }
137
156
 
138
157
  pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
139
- let s = rb_exprs_to_exprs(s)?;
158
+ let s = s.to_exprs()?;
140
159
  let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
141
160
  Ok(expr.into())
142
161
  }
143
162
 
163
+ pub fn concat_arr(s: RArray) -> RbResult<RbExpr> {
164
+ let s = s.to_exprs()?;
165
+ let expr = dsl::concat_arr(s).map_err(RbPolarsErr::from)?;
166
+ Ok(expr.into())
167
+ }
168
+
144
169
  pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
145
- let s = rb_exprs_to_exprs(s)?;
170
+ let s = s.to_exprs()?;
146
171
  Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
147
172
  }
148
173
 
@@ -158,10 +183,6 @@ pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
158
183
  y.inner.clone().arctan2(x.inner.clone()).into()
159
184
  }
160
185
 
161
- pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
162
- y.inner.clone().arctan2(x.inner.clone()).degrees().into()
163
- }
164
-
165
186
  pub fn cum_fold(
166
187
  acc: &RbExpr,
167
188
  lambda: Value,
@@ -170,10 +191,10 @@ pub fn cum_fold(
170
191
  return_dtype: Option<&RbDataTypeExpr>,
171
192
  include_init: bool,
172
193
  ) -> RbResult<RbExpr> {
173
- let exprs = rb_exprs_to_exprs(exprs)?;
194
+ let exprs = exprs.to_exprs()?;
174
195
  let lambda = Opaque::from(lambda);
175
196
  let func = PlanCallback::new(move |(a, b): (Series, Series)| {
176
- binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
197
+ Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
177
198
  });
178
199
  Ok(dsl::cum_fold_exprs(
179
200
  acc.inner.clone(),
@@ -186,6 +207,60 @@ pub fn cum_fold(
186
207
  .into())
187
208
  }
188
209
 
210
+ pub fn cum_reduce(
211
+ lambda: Value,
212
+ exprs: RArray,
213
+ returns_scalar: bool,
214
+ return_dtype: Option<&RbDataTypeExpr>,
215
+ ) -> RbResult<RbExpr> {
216
+ let exprs = exprs.to_exprs()?;
217
+ let lambda = Opaque::from(lambda);
218
+ let func = PlanCallback::new(move |(a, b): (Series, Series)| {
219
+ Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
220
+ });
221
+ Ok(dsl::cum_reduce_exprs(
222
+ func,
223
+ exprs,
224
+ returns_scalar,
225
+ return_dtype.map(|v| v.inner.clone()),
226
+ )
227
+ .into())
228
+ }
229
+
230
+ pub fn datetime(
231
+ year: &RbExpr,
232
+ month: &RbExpr,
233
+ day: &RbExpr,
234
+ hour: Option<&RbExpr>,
235
+ minute: Option<&RbExpr>,
236
+ second: Option<&RbExpr>,
237
+ microsecond: Option<&RbExpr>,
238
+ time_unit: Wrap<TimeUnit>,
239
+ time_zone: Wrap<Option<TimeZone>>,
240
+ ambiguous: &RbExpr,
241
+ ) -> RbExpr {
242
+ let year = year.inner.clone();
243
+ let month = month.inner.clone();
244
+ let day = day.inner.clone();
245
+ set_unwrapped_or_0!(hour, minute, second, microsecond);
246
+ let ambiguous = ambiguous.inner.clone();
247
+ let time_unit = time_unit.0;
248
+ let time_zone = time_zone.0;
249
+ let args = DatetimeArgs {
250
+ year,
251
+ month,
252
+ day,
253
+ hour,
254
+ minute,
255
+ second,
256
+ microsecond,
257
+ time_unit,
258
+ time_zone,
259
+ ambiguous,
260
+ };
261
+ dsl::datetime(args).into()
262
+ }
263
+
189
264
  pub fn concat_lf_diagonal(
190
265
  lfs: RArray,
191
266
  rechunk: bool,
@@ -224,7 +299,6 @@ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame
224
299
  Ok(lf.into())
225
300
  }
226
301
 
227
- #[allow(clippy::too_many_arguments)]
228
302
  pub fn duration(
229
303
  weeks: Option<&RbExpr>,
230
304
  days: Option<&RbExpr>,
@@ -267,10 +341,10 @@ pub fn fold(
267
341
  returns_scalar: bool,
268
342
  return_dtype: Option<&RbDataTypeExpr>,
269
343
  ) -> RbResult<RbExpr> {
270
- let exprs = rb_exprs_to_exprs(exprs)?;
344
+ let exprs = exprs.to_exprs()?;
271
345
  let lambda = Opaque::from(lambda);
272
346
  let func = PlanCallback::new(move |(a, b): (Series, Series)| {
273
- binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
347
+ Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
274
348
  });
275
349
  Ok(dsl::fold_exprs(
276
350
  acc.inner.clone(),
@@ -309,7 +383,7 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
309
383
  Ok(dsl::lit(unsafe { v.as_slice() }).into())
310
384
  }
311
385
  } else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
312
- let s = series.series.borrow();
386
+ let s = series.series.read();
313
387
  if is_scalar {
314
388
  let av = s
315
389
  .get(0)
@@ -8,4 +8,6 @@ pub mod misc;
8
8
  pub mod random;
9
9
  pub mod range;
10
10
  pub mod string_cache;
11
+ pub mod strings;
12
+ pub mod utils;
11
13
  pub mod whenthen;
@@ -3,7 +3,7 @@ use polars_core::datatypes::{TimeUnit, TimeZone};
3
3
 
4
4
  use crate::conversion::Wrap;
5
5
  use crate::prelude::*;
6
- use crate::{RbExpr, RbResult};
6
+ use crate::{RbDataTypeExpr, RbExpr, RbPolarsErr, RbResult};
7
7
 
8
8
  pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
9
9
  let start = start.inner.clone();
@@ -16,9 +16,9 @@ pub fn int_ranges(
16
16
  start: &RbExpr,
17
17
  end: &RbExpr,
18
18
  step: &RbExpr,
19
- dtype: Wrap<DataType>,
19
+ dtype: &RbDataTypeExpr,
20
20
  ) -> RbResult<RbExpr> {
21
- let dtype = dtype.0;
21
+ let dtype = dtype.inner.clone();
22
22
  Ok(dsl::int_ranges(
23
23
  start.inner.clone(),
24
24
  end.inner.clone(),
@@ -113,3 +113,19 @@ pub fn time_ranges(
113
113
  let closed = closed.0;
114
114
  dsl::time_ranges(start, end, every, closed).into()
115
115
  }
116
+
117
+ pub fn linear_spaces(
118
+ start: &RbExpr,
119
+ end: &RbExpr,
120
+ num_samples: &RbExpr,
121
+ closed: Wrap<ClosedInterval>,
122
+ as_array: bool,
123
+ ) -> RbResult<RbExpr> {
124
+ let start = start.inner.clone();
125
+ let end = end.inner.clone();
126
+ let num_samples = num_samples.inner.clone();
127
+ let closed = closed.0;
128
+ let out =
129
+ dsl::linear_spaces(start, end, num_samples, closed, as_array).map_err(RbPolarsErr::from)?;
130
+ Ok(out.into())
131
+ }
@@ -0,0 +1,6 @@
1
+ use crate::RbResult;
2
+
3
+ pub fn escape_regex(s: String) -> RbResult<String> {
4
+ let escaped_s = polars_ops::chunked_array::strings::escape_regex_str(&s);
5
+ Ok(escaped_s)
6
+ }
@@ -0,0 +1,6 @@
1
+ use crate::RbResult;
2
+ use polars_core::config::get_engine_affinity;
3
+
4
+ pub fn rb_get_engine_affinity() -> RbResult<String> {
5
+ Ok(get_engine_affinity())
6
+ }
@@ -1 +1,50 @@
1
- pub mod to_ruby;
1
+ use magnus::prelude::*;
2
+ use magnus::{RHash, Value};
3
+ use polars::prelude::{ArrowDataType, DataType};
4
+ use polars_error::polars_err;
5
+
6
+ use crate::interop::arrow::to_rust::normalize_arrow_fields;
7
+ use crate::prelude::Wrap;
8
+ use crate::series::import_schema_rbcapsule;
9
+ use crate::utils::to_rb_err;
10
+ use crate::{RbResult, RbValueError};
11
+
12
+ pub mod to_rb;
13
+ pub mod to_rust;
14
+
15
+ pub fn init_polars_schema_from_arrow_c_schema(
16
+ polars_schema: RHash,
17
+ schema_object: Value,
18
+ ) -> RbResult<()> {
19
+ let schema_capsule = schema_object.funcall("arrow_c_schema", ())?;
20
+
21
+ let field = import_schema_rbcapsule(schema_capsule)?;
22
+ let field = normalize_arrow_fields(&field);
23
+
24
+ let ArrowDataType::Struct(fields) = field.dtype else {
25
+ return Err(RbValueError::new_err(format!(
26
+ "arrow_c_schema of object passed to Polars::Schema did not return struct dtype: \
27
+ object: {}, dtype: {:?}",
28
+ schema_object, &field.dtype
29
+ )));
30
+ };
31
+
32
+ for field in fields {
33
+ let dtype = DataType::from_arrow_field(&field);
34
+
35
+ let name = field.name.as_str();
36
+ let dtype = Wrap(dtype);
37
+
38
+ if polars_schema.get(name).is_some() {
39
+ return Err(to_rb_err(polars_err!(
40
+ Duplicate:
41
+ "arrow schema contained duplicate name: {}",
42
+ name
43
+ )));
44
+ }
45
+
46
+ polars_schema.aset(name, dtype)?;
47
+ }
48
+
49
+ Ok(())
50
+ }
@@ -20,6 +20,17 @@ impl RbArrowArrayStream {
20
20
  }
21
21
  }
22
22
 
23
+ #[magnus::wrap(class = "Polars::ArrowSchema")]
24
+ pub struct RbArrowSchema {
25
+ pub(crate) schema: ffi::ArrowSchema,
26
+ }
27
+
28
+ impl RbArrowSchema {
29
+ pub fn to_i(&self) -> usize {
30
+ (&self.schema as *const _) as usize
31
+ }
32
+ }
33
+
23
34
  pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value> {
24
35
  let iter = Box::new(DataFrameStreamIterator::new(df));
25
36
  let field = iter.field();
@@ -27,6 +38,25 @@ pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value
27
38
  Ok(RbArrowArrayStream { stream }.into_value_with(ruby))
28
39
  }
29
40
 
41
+ pub(crate) fn polars_schema_to_rbcapsule(
42
+ ruby: &Ruby,
43
+ schema: crate::prelude::Wrap<polars::prelude::Schema>,
44
+ ) -> RbResult<Value> {
45
+ let schema: arrow::ffi::ArrowSchema = arrow::ffi::export_field_to_c(&ArrowField::new(
46
+ PlSmallStr::EMPTY,
47
+ ArrowDataType::Struct(
48
+ schema
49
+ .0
50
+ .iter_fields()
51
+ .map(|x| x.to_arrow(CompatLevel::newest()))
52
+ .collect(),
53
+ ),
54
+ false,
55
+ ));
56
+
57
+ Ok(RbArrowSchema { schema }.into_value_with(ruby))
58
+ }
59
+
30
60
  pub struct DataFrameStreamIterator {
31
61
  columns: Vec<Series>,
32
62
  dtype: ArrowDataType,
@@ -0,0 +1,43 @@
1
+ use polars_core::prelude::*;
2
+
3
+ pub(crate) fn normalize_arrow_fields(field: &ArrowField) -> ArrowField {
4
+ // normalize fields with extension dtypes that are otherwise standard dtypes associated
5
+ // with (for us) irrelevant metadata; recreate the field using the inner (standard) dtype
6
+ match field {
7
+ ArrowField {
8
+ dtype: ArrowDataType::Struct(fields),
9
+ ..
10
+ } => {
11
+ let mut normalized = false;
12
+ let normalized_fields: Vec<_> = fields
13
+ .iter()
14
+ .map(|f| {
15
+ // note: google bigquery column data is returned as a standard arrow dtype, but the
16
+ // sql type it was loaded from is associated as metadata (resulting in an extension dtype)
17
+ if let ArrowDataType::Extension(ext_type) = &f.dtype
18
+ && ext_type.name.starts_with("google:sqlType:")
19
+ {
20
+ normalized = true;
21
+ return ArrowField::new(
22
+ f.name.clone(),
23
+ ext_type.inner.clone(),
24
+ f.is_nullable,
25
+ );
26
+ }
27
+ f.clone()
28
+ })
29
+ .collect();
30
+
31
+ if normalized {
32
+ ArrowField::new(
33
+ field.name.clone(),
34
+ ArrowDataType::Struct(normalized_fields),
35
+ field.is_nullable,
36
+ )
37
+ } else {
38
+ field.clone()
39
+ }
40
+ }
41
+ _ => field.clone(),
42
+ }
43
+ }
@@ -6,7 +6,7 @@ use crate::dataframe::RbDataFrame;
6
6
  impl RbDataFrame {
7
7
  pub fn to_numo(&self) -> Option<Value> {
8
8
  let mut st = None;
9
- for s in self.df.borrow().iter() {
9
+ for s in self.df.read().iter() {
10
10
  let dt_i = s.dtype();
11
11
  match st {
12
12
  None => st = Some(dt_i.clone()),
@@ -11,7 +11,7 @@ use crate::series::RbSeries;
11
11
  impl RbSeries {
12
12
  /// Convert this Series to a Numo array.
13
13
  pub fn to_numo(&self) -> RbResult<Value> {
14
- series_to_numo(&self.series.borrow())
14
+ series_to_numo(&self.series.read())
15
15
  }
16
16
  }
17
17
 
@@ -0,0 +1,39 @@
1
+ use magnus::Ruby;
2
+ use polars::prelude::*;
3
+
4
+ use super::RbLazyFrame;
5
+ use crate::utils::EnterPolarsExt;
6
+ use crate::{RbDataFrame, RbResult};
7
+
8
+ impl RbLazyFrame {
9
+ pub fn collect_concurrently(ruby: &Ruby, self_: &Self) -> RbResult<RbInProcessQuery> {
10
+ let ipq = ruby.enter_polars(|| {
11
+ let ldf = self_.ldf.read().clone();
12
+ ldf.collect_concurrently()
13
+ })?;
14
+ Ok(RbInProcessQuery { ipq })
15
+ }
16
+ }
17
+
18
+ #[magnus::wrap(class = "Polars::RbInProcessQuery")]
19
+ #[repr(transparent)]
20
+ #[derive(Clone)]
21
+ pub struct RbInProcessQuery {
22
+ pub ipq: InProcessQuery,
23
+ }
24
+
25
+ impl RbInProcessQuery {
26
+ pub fn cancel(ruby: &Ruby, self_: &Self) -> RbResult<()> {
27
+ ruby.enter_polars_ok(|| self_.ipq.cancel())
28
+ }
29
+
30
+ pub fn fetch(ruby: &Ruby, self_: &Self) -> RbResult<Option<RbDataFrame>> {
31
+ let out = ruby.enter_polars(|| self_.ipq.fetch().transpose())?;
32
+ Ok(out.map(|df| df.into()))
33
+ }
34
+
35
+ pub fn fetch_blocking(ruby: &Ruby, self_: &Self) -> RbResult<RbDataFrame> {
36
+ let out = ruby.enter_polars(|| self_.ipq.fetch_blocking())?;
37
+ Ok(out.into())
38
+ }
39
+ }