polars-df 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +36 -44
- data/ext/polars/src/expr/list.rs +27 -17
- data/ext/polars/src/expr/meta.rs +18 -41
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -7
- data/ext/polars/src/file.rs +12 -6
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/range.rs +14 -10
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +75 -113
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +104 -26
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
data/ext/polars/src/file.rs
CHANGED
@@ -3,17 +3,18 @@ use std::io;
|
|
3
3
|
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
|
7
7
|
use polars::io::cloud::CloudOptions;
|
8
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars::prelude::PlPath;
|
9
10
|
use polars::prelude::file::DynWriteable;
|
10
11
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
11
12
|
use polars_utils::file::ClosableFile;
|
12
13
|
use polars_utils::mmap::MemSlice;
|
13
14
|
|
15
|
+
use crate::RbResult;
|
14
16
|
use crate::error::RbPolarsErr;
|
15
17
|
use crate::prelude::resolve_homedir;
|
16
|
-
use crate::RbResult;
|
17
18
|
|
18
19
|
#[derive(Clone)]
|
19
20
|
pub struct RbFileLikeObject {
|
@@ -93,7 +94,7 @@ impl RbFileLikeObject {
|
|
93
94
|
|
94
95
|
/// Extracts a string repr from, and returns an IO error to send back to rust.
|
95
96
|
fn rberr_to_io_err(e: Error) -> io::Error {
|
96
|
-
io::Error::
|
97
|
+
io::Error::other(e.to_string())
|
97
98
|
}
|
98
99
|
|
99
100
|
impl Read for RbFileLikeObject {
|
@@ -188,7 +189,7 @@ impl EitherRustRubyFile {
|
|
188
189
|
|
189
190
|
pub enum RubyScanSourceInput {
|
190
191
|
Buffer(MemSlice),
|
191
|
-
Path(
|
192
|
+
Path(PlPath),
|
192
193
|
#[allow(dead_code)]
|
193
194
|
File(File),
|
194
195
|
}
|
@@ -202,8 +203,13 @@ pub(crate) fn try_get_rbfile(
|
|
202
203
|
}
|
203
204
|
|
204
205
|
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
205
|
-
if let Ok(
|
206
|
-
|
206
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
207
|
+
let mut file_path = PlPath::new(&s);
|
208
|
+
if let Some(p) = file_path.as_ref().as_local_path() {
|
209
|
+
if p.starts_with("~/") {
|
210
|
+
file_path = PlPath::Local(resolve_homedir(&p).into());
|
211
|
+
}
|
212
|
+
}
|
207
213
|
Ok(RubyScanSourceInput::Path(file_path))
|
208
214
|
} else {
|
209
215
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
@@ -1,13 +1,10 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
-
use arrow::array::Utf8ViewArray;
|
4
3
|
use magnus::{RHash, Value};
|
5
4
|
use polars::prelude::ArrowSchema;
|
6
|
-
use polars_core::datatypes::create_enum_dtype;
|
7
5
|
|
8
6
|
use crate::conversion::Wrap;
|
9
|
-
use crate::file::{
|
10
|
-
use crate::prelude::ArrowDataType;
|
7
|
+
use crate::file::{EitherRustRubyFile, get_either_file};
|
11
8
|
use crate::{RbPolarsErr, RbResult};
|
12
9
|
|
13
10
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
@@ -42,13 +39,7 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
42
39
|
|
43
40
|
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
41
|
for field in schema.iter_values() {
|
45
|
-
let dt =
|
46
|
-
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
-
ArrowDataType::Utf8View,
|
48
|
-
)))
|
49
|
-
} else {
|
50
|
-
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
-
};
|
42
|
+
let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
|
52
43
|
dict.aset(field.name.as_str(), dt)?;
|
53
44
|
}
|
54
45
|
Ok(())
|
@@ -1,13 +1,13 @@
|
|
1
1
|
use magnus::encoding::{self, EncodingCapable};
|
2
2
|
use magnus::{
|
3
|
-
|
3
|
+
Float, Integer, RArray, RString, Ruby, Value, class, prelude::*, typed_data::Obj, value::Opaque,
|
4
4
|
};
|
5
5
|
use polars::lazy::dsl;
|
6
6
|
use polars::prelude::*;
|
7
7
|
|
8
|
-
use crate::conversion::{get_lf, get_rbseq
|
8
|
+
use crate::conversion::{Wrap, get_lf, get_rbseq};
|
9
|
+
use crate::expr::datatype::RbDataTypeExpr;
|
9
10
|
use crate::map::lazy::binary_lambda;
|
10
|
-
use crate::prelude::vec_extract_wrapped;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
12
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
13
|
|
@@ -103,10 +103,6 @@ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
|
103
103
|
})))
|
104
104
|
}
|
105
105
|
|
106
|
-
pub fn cols(names: Vec<String>) -> RbExpr {
|
107
|
-
dsl::cols(names).into()
|
108
|
-
}
|
109
|
-
|
110
106
|
pub fn concat_lf(
|
111
107
|
lfs: Value,
|
112
108
|
rechunk: bool,
|
@@ -166,20 +162,24 @@ pub fn cum_fold(
|
|
166
162
|
acc: &RbExpr,
|
167
163
|
lambda: Value,
|
168
164
|
exprs: RArray,
|
165
|
+
returns_scalar: bool,
|
166
|
+
return_dtype: Option<&RbDataTypeExpr>,
|
169
167
|
include_init: bool,
|
170
168
|
) -> RbResult<RbExpr> {
|
171
169
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
172
170
|
let lambda = Opaque::from(lambda);
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
171
|
+
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
172
|
+
binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
|
173
|
+
});
|
174
|
+
Ok(dsl::cum_fold_exprs(
|
175
|
+
acc.inner.clone(),
|
176
|
+
func,
|
177
|
+
exprs,
|
178
|
+
returns_scalar,
|
179
|
+
return_dtype.map(|v| v.inner.clone()),
|
180
|
+
include_init,
|
181
|
+
)
|
182
|
+
.into())
|
183
183
|
}
|
184
184
|
|
185
185
|
pub fn concat_lf_diagonal(
|
@@ -220,24 +220,6 @@ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame
|
|
220
220
|
Ok(lf.into())
|
221
221
|
}
|
222
222
|
|
223
|
-
pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
224
|
-
let dtypes = dtypes
|
225
|
-
.into_iter()
|
226
|
-
.map(Wrap::<DataType>::try_convert)
|
227
|
-
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
228
|
-
let dtypes = vec_extract_wrapped(dtypes);
|
229
|
-
Ok(dsl::dtype_cols(dtypes).into())
|
230
|
-
}
|
231
|
-
|
232
|
-
pub fn index_cols(indices: Vec<i64>) -> RbExpr {
|
233
|
-
if indices.len() == 1 {
|
234
|
-
dsl::nth(indices[0])
|
235
|
-
} else {
|
236
|
-
dsl::index_cols(indices)
|
237
|
-
}
|
238
|
-
.into()
|
239
|
-
}
|
240
|
-
|
241
223
|
#[allow(clippy::too_many_arguments)]
|
242
224
|
pub fn duration(
|
243
225
|
weeks: Option<&RbExpr>,
|
@@ -274,42 +256,28 @@ pub fn duration(
|
|
274
256
|
dsl::duration(args).into()
|
275
257
|
}
|
276
258
|
|
277
|
-
pub fn first() -> RbExpr {
|
278
|
-
dsl::first().into()
|
279
|
-
}
|
280
|
-
|
281
259
|
pub fn fold(
|
282
260
|
acc: &RbExpr,
|
283
261
|
lambda: Value,
|
284
262
|
exprs: RArray,
|
285
263
|
returns_scalar: bool,
|
286
|
-
return_dtype: Option
|
264
|
+
return_dtype: Option<&RbDataTypeExpr>,
|
287
265
|
) -> RbResult<RbExpr> {
|
288
266
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
289
267
|
let lambda = Opaque::from(lambda);
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
Ruby::get().unwrap().get_inner(lambda),
|
294
|
-
a.take_materialized_series(),
|
295
|
-
b.take_materialized_series(),
|
296
|
-
)
|
297
|
-
.map(|v| v.map(Column::from))
|
298
|
-
};
|
268
|
+
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
269
|
+
binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
|
270
|
+
});
|
299
271
|
Ok(dsl::fold_exprs(
|
300
272
|
acc.inner.clone(),
|
301
273
|
func,
|
302
274
|
exprs,
|
303
275
|
returns_scalar,
|
304
|
-
return_dtype.map(|w| w.
|
276
|
+
return_dtype.map(|w| w.inner.clone()),
|
305
277
|
)
|
306
278
|
.into())
|
307
279
|
}
|
308
280
|
|
309
|
-
pub fn last() -> RbExpr {
|
310
|
-
dsl::last().into()
|
311
|
-
}
|
312
|
-
|
313
281
|
pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
|
314
282
|
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
315
283
|
Ok(dsl::lit(bool::try_convert(value)?).into())
|
@@ -1,8 +1,8 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
2
|
use polars_core;
|
3
|
+
use polars_core::POOL;
|
3
4
|
use polars_core::fmt::FloatFmt;
|
4
5
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
-
use polars_core::POOL;
|
6
6
|
|
7
7
|
use crate::conversion::Wrap;
|
8
8
|
use crate::{RbResult, RbValueError};
|
@@ -22,7 +22,7 @@ pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
|
22
22
|
e => {
|
23
23
|
return Err(RbValueError::new_err(format!(
|
24
24
|
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
-
)))
|
25
|
+
)));
|
26
26
|
}
|
27
27
|
};
|
28
28
|
polars_core::fmt::set_float_fmt(fmt);
|
@@ -3,7 +3,7 @@ use polars_core::datatypes::{TimeUnit, TimeZone};
|
|
3
3
|
|
4
4
|
use crate::conversion::Wrap;
|
5
5
|
use crate::prelude::*;
|
6
|
-
use crate::RbExpr;
|
6
|
+
use crate::{RbExpr, RbResult};
|
7
7
|
|
8
8
|
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
9
|
let start = start.inner.clone();
|
@@ -12,16 +12,20 @@ pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>)
|
|
12
12
|
dsl::int_range(start, end, step, dtype).into()
|
13
13
|
}
|
14
14
|
|
15
|
-
pub fn int_ranges(
|
15
|
+
pub fn int_ranges(
|
16
|
+
start: &RbExpr,
|
17
|
+
end: &RbExpr,
|
18
|
+
step: &RbExpr,
|
19
|
+
dtype: Wrap<DataType>,
|
20
|
+
) -> RbResult<RbExpr> {
|
16
21
|
let dtype = dtype.0;
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
result.into()
|
22
|
+
Ok(dsl::int_ranges(
|
23
|
+
start.inner.clone(),
|
24
|
+
end.inner.clone(),
|
25
|
+
step.inner.clone(),
|
26
|
+
dtype,
|
27
|
+
)
|
28
|
+
.into())
|
25
29
|
}
|
26
30
|
|
27
31
|
pub fn date_range(
|
@@ -1,17 +1,17 @@
|
|
1
1
|
use crate::RbResult;
|
2
2
|
use magnus::{RArray, Ruby, Value};
|
3
|
-
use polars_core::StringCacheHolder;
|
4
3
|
|
5
4
|
pub fn enable_string_cache() {
|
6
|
-
|
5
|
+
// The string cache no longer exists.
|
7
6
|
}
|
8
7
|
|
9
8
|
pub fn disable_string_cache() {
|
10
|
-
|
9
|
+
// The string cache no longer exists.
|
11
10
|
}
|
12
11
|
|
13
12
|
pub fn using_string_cache() -> bool {
|
14
|
-
|
13
|
+
// The string cache no longer exists.
|
14
|
+
true
|
15
15
|
}
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbStringCacheHolder")]
|
@@ -19,7 +19,6 @@ pub struct RbStringCacheHolder {}
|
|
19
19
|
|
20
20
|
impl RbStringCacheHolder {
|
21
21
|
pub fn hold() -> RbResult<Value> {
|
22
|
-
let _hold = StringCacheHolder::hold();
|
23
22
|
Ruby::get().unwrap().yield_splat(RArray::new())
|
24
23
|
}
|
25
24
|
}
|
@@ -3,10 +3,10 @@ use num_traits::{Float, NumCast};
|
|
3
3
|
use polars_core::prelude::*;
|
4
4
|
|
5
5
|
use super::numo_rs::{Element, RbArray1};
|
6
|
+
use crate::RbResult;
|
6
7
|
use crate::error::RbPolarsErr;
|
7
8
|
use crate::raise_err;
|
8
9
|
use crate::series::RbSeries;
|
9
|
-
use crate::RbResult;
|
10
10
|
|
11
11
|
impl RbSeries {
|
12
12
|
/// Convert this Series to a Numo array.
|
@@ -0,0 +1,102 @@
|
|
1
|
+
use std::sync::Arc;
|
2
|
+
|
3
|
+
use magnus::{TryConvert, Value, value::ReprValue};
|
4
|
+
use polars::prelude::deletion::DeletionFilesList;
|
5
|
+
use polars::prelude::{
|
6
|
+
CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
|
7
|
+
UnifiedScanArgs,
|
8
|
+
};
|
9
|
+
use polars_io::{HiveOptions, RowIndex};
|
10
|
+
use polars_utils::IdxSize;
|
11
|
+
use polars_utils::plpath::PlPathRef;
|
12
|
+
use polars_utils::slice_enum::Slice;
|
13
|
+
|
14
|
+
use crate::RbResult;
|
15
|
+
use crate::prelude::Wrap;
|
16
|
+
|
17
|
+
/// Interface to `class ScanOptions` on the Ruby side
|
18
|
+
pub struct RbScanOptions(Value);
|
19
|
+
|
20
|
+
impl TryConvert for RbScanOptions {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
Ok(Self(ob))
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
impl RbScanOptions {
|
27
|
+
pub fn extract_unified_scan_args(
|
28
|
+
&self,
|
29
|
+
// For cloud_options init
|
30
|
+
first_path: Option<PlPathRef>,
|
31
|
+
) -> RbResult<UnifiedScanArgs> {
|
32
|
+
let row_index: Option<(Wrap<PlSmallStr>, IdxSize)> = self.0.funcall("row_index", ())?;
|
33
|
+
let pre_slice: Option<(i64, usize)> = self.0.funcall("pre_slice", ())?;
|
34
|
+
let cast_options: Wrap<CastColumnsPolicy> = self.0.funcall("cast_options", ())?;
|
35
|
+
let extra_columns: Wrap<ExtraColumnsPolicy> = self.0.funcall("extra_columns", ())?;
|
36
|
+
let missing_columns: Wrap<MissingColumnsPolicy> = self.0.funcall("missing_columns", ())?;
|
37
|
+
let include_file_paths: Option<Wrap<PlSmallStr>> =
|
38
|
+
self.0.funcall("include_file_paths", ())?;
|
39
|
+
let glob: bool = self.0.funcall("glob", ())?;
|
40
|
+
let hive_partitioning: Option<bool> = self.0.funcall("hive_partitioning", ())?;
|
41
|
+
let hive_schema: Option<Wrap<Schema>> = self.0.funcall("hive_schema", ())?;
|
42
|
+
let try_parse_hive_dates: bool = self.0.funcall("try_parse_hive_dates", ())?;
|
43
|
+
let rechunk: bool = self.0.funcall("rechunk", ())?;
|
44
|
+
let cache: bool = self.0.funcall("cache", ())?;
|
45
|
+
let storage_options: Option<Vec<(String, String)>> =
|
46
|
+
self.0.funcall("storage_options", ())?;
|
47
|
+
let retries: usize = self.0.funcall("retries", ())?;
|
48
|
+
let deletion_files: Option<Wrap<DeletionFilesList>> =
|
49
|
+
self.0.funcall("deletion_files", ())?;
|
50
|
+
let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
|
51
|
+
|
52
|
+
let cloud_options = storage_options;
|
53
|
+
|
54
|
+
let cloud_options = if let Some(first_path) = first_path {
|
55
|
+
use crate::prelude::parse_cloud_options;
|
56
|
+
|
57
|
+
let first_path_url = first_path.to_str();
|
58
|
+
let cloud_options =
|
59
|
+
parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
|
60
|
+
|
61
|
+
Some(cloud_options.with_max_retries(retries))
|
62
|
+
} else {
|
63
|
+
None
|
64
|
+
};
|
65
|
+
|
66
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
67
|
+
|
68
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
69
|
+
name: name.0,
|
70
|
+
offset,
|
71
|
+
});
|
72
|
+
|
73
|
+
let hive_options = HiveOptions {
|
74
|
+
enabled: hive_partitioning,
|
75
|
+
hive_start_idx: 0,
|
76
|
+
schema: hive_schema,
|
77
|
+
try_parse_dates: try_parse_hive_dates,
|
78
|
+
};
|
79
|
+
|
80
|
+
let unified_scan_args = UnifiedScanArgs {
|
81
|
+
// Schema is currently still stored inside the options per scan type, but we do eventually
|
82
|
+
// want to put it here instead.
|
83
|
+
schema: None,
|
84
|
+
cloud_options,
|
85
|
+
hive_options,
|
86
|
+
rechunk,
|
87
|
+
cache,
|
88
|
+
glob,
|
89
|
+
projection: None,
|
90
|
+
row_index,
|
91
|
+
pre_slice: pre_slice.map(Slice::from),
|
92
|
+
cast_columns_policy: cast_options.0,
|
93
|
+
missing_columns_policy: missing_columns.0,
|
94
|
+
extra_columns_policy: extra_columns.0,
|
95
|
+
include_file_paths: include_file_paths.map(|x| x.0),
|
96
|
+
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
|
97
|
+
column_mapping: column_mapping.map(|x| x.0),
|
98
|
+
};
|
99
|
+
|
100
|
+
Ok(unified_scan_args)
|
101
|
+
}
|
102
|
+
}
|