duckdb 0.7.2-dev2410.0 → 0.7.2-dev2507.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
@@ -19,6 +19,7 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
19
19
|
if (arrow_convert_data.find(col_idx) == arrow_convert_data.end()) {
|
20
20
|
arrow_convert_data[col_idx] = make_uniq<ArrowConvertData>();
|
21
21
|
}
|
22
|
+
auto &convert_data = *arrow_convert_data[col_idx];
|
22
23
|
if (format == "n") {
|
23
24
|
return LogicalType::SQLNULL;
|
24
25
|
} else if (format == "b") {
|
@@ -52,10 +53,10 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
52
53
|
}
|
53
54
|
return LogicalType::DECIMAL(width, scale);
|
54
55
|
} else if (format == "u") {
|
55
|
-
|
56
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
56
57
|
return LogicalType::VARCHAR;
|
57
58
|
} else if (format == "U") {
|
58
|
-
|
59
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
|
59
60
|
return LogicalType::VARCHAR;
|
60
61
|
} else if (format == "tsn:") {
|
61
62
|
return LogicalTypeId::TIMESTAMP_NS;
|
@@ -66,56 +67,56 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
66
67
|
} else if (format == "tss:") {
|
67
68
|
return LogicalTypeId::TIMESTAMP_SEC;
|
68
69
|
} else if (format == "tdD") {
|
69
|
-
|
70
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
|
70
71
|
return LogicalType::DATE;
|
71
72
|
} else if (format == "tdm") {
|
72
|
-
|
73
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
|
73
74
|
return LogicalType::DATE;
|
74
75
|
} else if (format == "tts") {
|
75
|
-
|
76
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
|
76
77
|
return LogicalType::TIME;
|
77
78
|
} else if (format == "ttm") {
|
78
|
-
|
79
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
|
79
80
|
return LogicalType::TIME;
|
80
81
|
} else if (format == "ttu") {
|
81
|
-
|
82
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
|
82
83
|
return LogicalType::TIME;
|
83
84
|
} else if (format == "ttn") {
|
84
|
-
|
85
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
|
85
86
|
return LogicalType::TIME;
|
86
87
|
} else if (format == "tDs") {
|
87
|
-
|
88
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
|
88
89
|
return LogicalType::INTERVAL;
|
89
90
|
} else if (format == "tDm") {
|
90
|
-
|
91
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
|
91
92
|
return LogicalType::INTERVAL;
|
92
93
|
} else if (format == "tDu") {
|
93
|
-
|
94
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
|
94
95
|
return LogicalType::INTERVAL;
|
95
96
|
} else if (format == "tDn") {
|
96
|
-
|
97
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
|
97
98
|
return LogicalType::INTERVAL;
|
98
99
|
} else if (format == "tiD") {
|
99
|
-
|
100
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
|
100
101
|
return LogicalType::INTERVAL;
|
101
102
|
} else if (format == "tiM") {
|
102
|
-
|
103
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTHS);
|
103
104
|
return LogicalType::INTERVAL;
|
104
105
|
} else if (format == "tin") {
|
105
|
-
|
106
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTH_DAY_NANO);
|
106
107
|
return LogicalType::INTERVAL;
|
107
108
|
} else if (format == "+l") {
|
108
|
-
|
109
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
109
110
|
auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
|
110
111
|
return LogicalType::LIST(child_type);
|
111
112
|
} else if (format == "+L") {
|
112
|
-
|
113
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
|
113
114
|
auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
|
114
115
|
return LogicalType::LIST(child_type);
|
115
116
|
} else if (format[0] == '+' && format[1] == 'w') {
|
116
117
|
std::string parameters = format.substr(format.find(':') + 1);
|
117
118
|
idx_t fixed_size = std::stoi(parameters);
|
118
|
-
|
119
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
|
119
120
|
auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
|
120
121
|
return LogicalType::LIST(child_type);
|
121
122
|
} else if (format == "+s") {
|
@@ -127,7 +128,7 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
127
128
|
return LogicalType::STRUCT(child_types);
|
128
129
|
|
129
130
|
} else if (format == "+m") {
|
130
|
-
|
131
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
131
132
|
|
132
133
|
auto &arrow_struct_type = *schema.children[0];
|
133
134
|
D_ASSERT(arrow_struct_type.n_children == 2);
|
@@ -135,26 +136,26 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
135
136
|
auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
|
136
137
|
return LogicalType::MAP(key_type, value_type);
|
137
138
|
} else if (format == "z") {
|
138
|
-
|
139
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
139
140
|
return LogicalType::BLOB;
|
140
141
|
} else if (format == "Z") {
|
141
|
-
|
142
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
|
142
143
|
return LogicalType::BLOB;
|
143
144
|
} else if (format[0] == 'w') {
|
144
145
|
std::string parameters = format.substr(format.find(':') + 1);
|
145
146
|
idx_t fixed_size = std::stoi(parameters);
|
146
|
-
|
147
|
+
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
|
147
148
|
return LogicalType::BLOB;
|
148
149
|
} else if (format[0] == 't' && format[1] == 's') {
|
149
150
|
// Timestamp with Timezone
|
150
151
|
if (format[2] == 'n') {
|
151
|
-
|
152
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
|
152
153
|
} else if (format[2] == 'u') {
|
153
|
-
|
154
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
|
154
155
|
} else if (format[2] == 'm') {
|
155
|
-
|
156
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
|
156
157
|
} else if (format[2] == 's') {
|
157
|
-
|
158
|
+
convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
|
158
159
|
} else {
|
159
160
|
throw NotImplementedException(" Timestamptz precision of not accepted");
|
160
161
|
}
|
@@ -5,9 +5,19 @@
|
|
5
5
|
#include "duckdb/common/types/arrow_aux_data.hpp"
|
6
6
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
7
7
|
|
8
|
+
namespace {
|
9
|
+
using duckdb::idx_t;
|
10
|
+
struct ArrowConvertDataIndices {
|
11
|
+
//! The index that refers to 'variable_sz_type' in ArrowConvertData
|
12
|
+
idx_t variable_sized_index;
|
13
|
+
//! The index that refers to 'date_time_precision' in ArrowConvertData
|
14
|
+
idx_t datetime_precision_index;
|
15
|
+
};
|
16
|
+
} // namespace
|
17
|
+
|
8
18
|
namespace duckdb {
|
9
19
|
|
10
|
-
void ShiftRight(unsigned char *ar, int size, int shift) {
|
20
|
+
static void ShiftRight(unsigned char *ar, int size, int shift) {
|
11
21
|
int carry = 0;
|
12
22
|
while (shift--) {
|
13
23
|
for (int i = size - 1; i >= 0; --i) {
|
@@ -18,8 +28,8 @@ void ShiftRight(unsigned char *ar, int size, int shift) {
|
|
18
28
|
}
|
19
29
|
}
|
20
30
|
|
21
|
-
void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
22
|
-
|
31
|
+
static void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
32
|
+
int64_t nested_offset = -1, bool add_null = false) {
|
23
33
|
// In certains we don't need to or cannot copy arrow's validity mask to duckdb.
|
24
34
|
//
|
25
35
|
// The conditions where we do want to copy arrow's mask to duckdb are:
|
@@ -68,22 +78,23 @@ void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState
|
|
68
78
|
}
|
69
79
|
}
|
70
80
|
|
71
|
-
void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
72
|
-
|
81
|
+
static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
82
|
+
int64_t nested_offset, bool add_null = false) {
|
73
83
|
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
|
74
84
|
auto &mask = FlatVector::Validity(vector);
|
75
85
|
GetValidityMask(mask, array, scan_state, size, nested_offset, add_null);
|
76
86
|
}
|
77
87
|
|
78
|
-
void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
79
|
-
|
80
|
-
|
81
|
-
|
88
|
+
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
89
|
+
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
90
|
+
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset = -1,
|
91
|
+
ValidityMask *parent_mask = nullptr);
|
82
92
|
|
83
|
-
void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
84
|
-
|
85
|
-
|
86
|
-
|
93
|
+
static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
94
|
+
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
95
|
+
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
|
96
|
+
ValidityMask *parent_mask) {
|
97
|
+
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
|
87
98
|
idx_t list_size = 0;
|
88
99
|
SetValidityMask(vector, array, scan_state, size, nested_offset);
|
89
100
|
idx_t start_offset = 0;
|
@@ -157,10 +168,10 @@ void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
|
|
157
168
|
}
|
158
169
|
}
|
159
170
|
|
160
|
-
void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
161
|
-
|
162
|
-
|
163
|
-
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.
|
171
|
+
static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
172
|
+
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
173
|
+
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset) {
|
174
|
+
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
|
164
175
|
SetValidityMask(vector, array, scan_state, size, nested_offset);
|
165
176
|
if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) {
|
166
177
|
//! Have to check validity mask before setting this up
|
@@ -195,7 +206,7 @@ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
|
|
195
206
|
} else {
|
196
207
|
//! Check if last offset is higher than max uint32
|
197
208
|
if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
|
198
|
-
throw
|
209
|
+
throw ConversionException("DuckDB does not support Blobs over 4GB");
|
199
210
|
} // LCOV_EXCL_STOP
|
200
211
|
auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
|
201
212
|
if (nested_offset != -1) {
|
@@ -213,7 +224,7 @@ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
|
|
213
224
|
}
|
214
225
|
}
|
215
226
|
|
216
|
-
void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
|
227
|
+
static void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
|
217
228
|
auto valid_check = CheckMapValidity(vector, count);
|
218
229
|
switch (valid_check) {
|
219
230
|
case MapInvalidReason::VALID:
|
@@ -246,7 +257,8 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
|
|
246
257
|
}
|
247
258
|
}
|
248
259
|
|
249
|
-
void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
260
|
+
static void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
261
|
+
int64_t nested_offset) {
|
250
262
|
auto internal_type = GetTypeIdSize(vector.GetType().InternalType());
|
251
263
|
auto data_ptr = (data_ptr_t)array.buffers[1] + internal_type * (scan_state.chunk_offset + array.offset);
|
252
264
|
if (nested_offset != -1) {
|
@@ -256,8 +268,8 @@ void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &sc
|
|
256
268
|
}
|
257
269
|
|
258
270
|
template <class T>
|
259
|
-
void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
|
260
|
-
|
271
|
+
static void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
|
272
|
+
idx_t size, int64_t conversion) {
|
261
273
|
auto tgt_ptr = (dtime_t *)FlatVector::GetData(vector);
|
262
274
|
auto &validity_mask = FlatVector::Validity(vector);
|
263
275
|
auto src_ptr = (T *)array.buffers[1] + scan_state.chunk_offset + array.offset;
|
@@ -274,8 +286,8 @@ void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan
|
|
274
286
|
}
|
275
287
|
}
|
276
288
|
|
277
|
-
void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
278
|
-
|
289
|
+
static void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
290
|
+
int64_t nested_offset, idx_t size, int64_t conversion) {
|
279
291
|
auto tgt_ptr = (timestamp_t *)FlatVector::GetData(vector);
|
280
292
|
auto &validity_mask = FlatVector::Validity(vector);
|
281
293
|
auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
|
@@ -292,8 +304,8 @@ void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalStat
|
|
292
304
|
}
|
293
305
|
}
|
294
306
|
|
295
|
-
void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
296
|
-
|
307
|
+
static void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
308
|
+
int64_t nested_offset, idx_t size, int64_t conversion) {
|
297
309
|
auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
|
298
310
|
auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
|
299
311
|
if (nested_offset != -1) {
|
@@ -308,8 +320,8 @@ void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
308
320
|
}
|
309
321
|
}
|
310
322
|
|
311
|
-
void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
312
|
-
|
323
|
+
static void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
324
|
+
int64_t nested_offset, idx_t size) {
|
313
325
|
auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
|
314
326
|
auto src_ptr = (int32_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
|
315
327
|
if (nested_offset != -1) {
|
@@ -322,8 +334,8 @@ void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalS
|
|
322
334
|
}
|
323
335
|
}
|
324
336
|
|
325
|
-
void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
326
|
-
|
337
|
+
static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
338
|
+
int64_t nested_offset, idx_t size) {
|
327
339
|
auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
|
328
340
|
auto src_ptr = (ArrowInterval *)array.buffers[1] + scan_state.chunk_offset + array.offset;
|
329
341
|
if (nested_offset != -1) {
|
@@ -336,9 +348,10 @@ void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowSca
|
|
336
348
|
}
|
337
349
|
}
|
338
350
|
|
339
|
-
void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
340
|
-
|
341
|
-
|
351
|
+
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
352
|
+
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
353
|
+
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
|
354
|
+
ValidityMask *parent_mask) {
|
342
355
|
switch (vector.GetType().id()) {
|
343
356
|
case LogicalTypeId::SQLNULL:
|
344
357
|
vector.Reference(Value());
|
@@ -390,11 +403,11 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
390
403
|
break;
|
391
404
|
}
|
392
405
|
case LogicalTypeId::VARCHAR: {
|
393
|
-
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.
|
406
|
+
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
|
394
407
|
auto cdata = (char *)array.buffers[2];
|
395
408
|
if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) {
|
396
409
|
if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
|
397
|
-
throw
|
410
|
+
throw ConversionException("DuckDB does not support Strings over 4GB");
|
398
411
|
} // LCOV_EXCL_STOP
|
399
412
|
auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
|
400
413
|
if (nested_offset != -1) {
|
@@ -411,7 +424,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
411
424
|
break;
|
412
425
|
}
|
413
426
|
case LogicalTypeId::DATE: {
|
414
|
-
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.
|
427
|
+
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
|
415
428
|
switch (precision) {
|
416
429
|
case ArrowDateTimeType::DAYS: {
|
417
430
|
DirectConversion(vector, array, scan_state, nested_offset);
|
@@ -430,12 +443,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
430
443
|
break;
|
431
444
|
}
|
432
445
|
default:
|
433
|
-
throw
|
446
|
+
throw NotImplementedException("Unsupported precision for Date Type ");
|
434
447
|
}
|
435
448
|
break;
|
436
449
|
}
|
437
450
|
case LogicalTypeId::TIME: {
|
438
|
-
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.
|
451
|
+
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
|
439
452
|
switch (precision) {
|
440
453
|
case ArrowDateTimeType::SECONDS: {
|
441
454
|
TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -461,12 +474,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
461
474
|
break;
|
462
475
|
}
|
463
476
|
default:
|
464
|
-
throw
|
477
|
+
throw NotImplementedException("Unsupported precision for Time Type ");
|
465
478
|
}
|
466
479
|
break;
|
467
480
|
}
|
468
481
|
case LogicalTypeId::TIMESTAMP_TZ: {
|
469
|
-
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.
|
482
|
+
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
|
470
483
|
switch (precision) {
|
471
484
|
case ArrowDateTimeType::SECONDS: {
|
472
485
|
TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -492,12 +505,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
492
505
|
break;
|
493
506
|
}
|
494
507
|
default:
|
495
|
-
throw
|
508
|
+
throw NotImplementedException("Unsupported precision for TimestampTZ Type ");
|
496
509
|
}
|
497
510
|
break;
|
498
511
|
}
|
499
512
|
case LogicalTypeId::INTERVAL: {
|
500
|
-
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.
|
513
|
+
auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
|
501
514
|
switch (precision) {
|
502
515
|
case ArrowDateTimeType::SECONDS: {
|
503
516
|
IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -534,7 +547,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
534
547
|
break;
|
535
548
|
}
|
536
549
|
default:
|
537
|
-
throw
|
550
|
+
throw NotImplementedException("Unsupported precision for Interval/Duration Type ");
|
538
551
|
}
|
539
552
|
break;
|
540
553
|
}
|
@@ -585,8 +598,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
585
598
|
break;
|
586
599
|
}
|
587
600
|
default:
|
588
|
-
throw
|
589
|
-
|
601
|
+
throw NotImplementedException("Unsupported physical type for Decimal: %s",
|
602
|
+
TypeIdToString(vector.GetType().InternalType()));
|
590
603
|
}
|
591
604
|
break;
|
592
605
|
}
|
@@ -626,7 +639,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
626
639
|
break;
|
627
640
|
}
|
628
641
|
default:
|
629
|
-
throw
|
642
|
+
throw NotImplementedException("Unsupported type %s", vector.GetType().ToString());
|
630
643
|
}
|
631
644
|
}
|
632
645
|
|
@@ -644,7 +657,7 @@ static void SetSelectionVectorLoopWithChecks(SelectionVector &sel, data_ptr_t in
|
|
644
657
|
auto indices = (T *)indices_p;
|
645
658
|
for (idx_t row = 0; row < size; row++) {
|
646
659
|
if (indices[row] > NumericLimits<uint32_t>::Maximum()) {
|
647
|
-
throw
|
660
|
+
throw ConversionException("DuckDB only supports indices that fit on an uint32");
|
648
661
|
}
|
649
662
|
sel.set_index(row, indices[row]);
|
650
663
|
}
|
@@ -664,8 +677,8 @@ static void SetMaskedSelectionVectorLoop(SelectionVector &sel, data_ptr_t indice
|
|
664
677
|
}
|
665
678
|
}
|
666
679
|
|
667
|
-
void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType &logical_type, idx_t size,
|
668
|
-
|
680
|
+
static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType &logical_type, idx_t size,
|
681
|
+
ValidityMask *mask = nullptr, idx_t last_element_pos = 0) {
|
669
682
|
sel.Initialize(size);
|
670
683
|
|
671
684
|
if (mask) {
|
@@ -685,7 +698,7 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
|
|
685
698
|
case LogicalTypeId::UINTEGER:
|
686
699
|
if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
|
687
700
|
//! Its guaranteed that our indices will point to the last element, so just throw an error
|
688
|
-
throw
|
701
|
+
throw ConversionException("DuckDB only supports indices that fit on an uint32");
|
689
702
|
}
|
690
703
|
SetMaskedSelectionVectorLoop<uint32_t>(sel, indices_p, size, *mask, last_element_pos);
|
691
704
|
break;
|
@@ -695,20 +708,20 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
|
|
695
708
|
case LogicalTypeId::UBIGINT:
|
696
709
|
if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
|
697
710
|
//! Its guaranteed that our indices will point to the last element, so just throw an error
|
698
|
-
throw
|
711
|
+
throw ConversionException("DuckDB only supports indices that fit on an uint32");
|
699
712
|
}
|
700
713
|
SetMaskedSelectionVectorLoop<uint64_t>(sel, indices_p, size, *mask, last_element_pos);
|
701
714
|
break;
|
702
715
|
case LogicalTypeId::BIGINT:
|
703
716
|
if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
|
704
717
|
//! Its guaranteed that our indices will point to the last element, so just throw an error
|
705
|
-
throw
|
718
|
+
throw ConversionException("DuckDB only supports indices that fit on an uint32");
|
706
719
|
}
|
707
720
|
SetMaskedSelectionVectorLoop<int64_t>(sel, indices_p, size, *mask, last_element_pos);
|
708
721
|
break;
|
709
722
|
|
710
723
|
default:
|
711
|
-
throw
|
724
|
+
throw NotImplementedException("(Arrow) Unsupported type for selection vectors %s", logical_type.ToString());
|
712
725
|
}
|
713
726
|
|
714
727
|
} else {
|
@@ -748,17 +761,18 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
|
|
748
761
|
}
|
749
762
|
break;
|
750
763
|
default:
|
751
|
-
throw
|
764
|
+
throw ConversionException("(Arrow) Unsupported type for selection vectors %s", logical_type.ToString());
|
752
765
|
}
|
753
766
|
}
|
754
767
|
}
|
755
768
|
|
756
|
-
void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
757
|
-
|
758
|
-
|
769
|
+
static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
770
|
+
idx_t size,
|
771
|
+
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
772
|
+
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx) {
|
759
773
|
SelectionVector sel;
|
760
774
|
auto &dict_vectors = scan_state.arrow_dictionary_vectors;
|
761
|
-
if (dict_vectors.
|
775
|
+
if (!dict_vectors.count(col_idx)) {
|
762
776
|
//! We need to set the dictionary data for this column
|
763
777
|
auto base_vector = make_uniq<Vector>(vector.GetType(), array.dictionary->length);
|
764
778
|
SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0);
|
@@ -791,10 +805,11 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
|
791
805
|
auto arrow_array_idx = arrow_scan_is_projected ? idx : col_idx;
|
792
806
|
|
793
807
|
if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
|
808
|
+
// This column is skipped by the projection pushdown
|
794
809
|
continue;
|
795
810
|
}
|
796
811
|
|
797
|
-
|
812
|
+
ArrowConvertDataIndices arrow_convert_idx {0, 0};
|
798
813
|
auto &array = *scan_state.chunk->arrow_array.children[arrow_array_idx];
|
799
814
|
if (!array.release) {
|
800
815
|
throw InvalidInputException("arrow_scan: released array passed");
|
@@ -802,6 +817,7 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
|
802
817
|
if (array.length != scan_state.chunk->arrow_array.length) {
|
803
818
|
throw InvalidInputException("arrow_scan: array length mismatch");
|
804
819
|
}
|
820
|
+
// Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
|
805
821
|
output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
|
806
822
|
if (array.dictionary) {
|
807
823
|
ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_convert_data,
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev2507"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "c5737e4a94"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|