duckdb 0.7.2-dev2430.0 → 0.7.2-dev2507.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev2430.0",
5
+ "version": "0.7.2-dev2507.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -19,6 +19,7 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
19
19
  if (arrow_convert_data.find(col_idx) == arrow_convert_data.end()) {
20
20
  arrow_convert_data[col_idx] = make_uniq<ArrowConvertData>();
21
21
  }
22
+ auto &convert_data = *arrow_convert_data[col_idx];
22
23
  if (format == "n") {
23
24
  return LogicalType::SQLNULL;
24
25
  } else if (format == "b") {
@@ -52,10 +53,10 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
52
53
  }
53
54
  return LogicalType::DECIMAL(width, scale);
54
55
  } else if (format == "u") {
55
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
56
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
56
57
  return LogicalType::VARCHAR;
57
58
  } else if (format == "U") {
58
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
59
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
59
60
  return LogicalType::VARCHAR;
60
61
  } else if (format == "tsn:") {
61
62
  return LogicalTypeId::TIMESTAMP_NS;
@@ -66,56 +67,56 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
66
67
  } else if (format == "tss:") {
67
68
  return LogicalTypeId::TIMESTAMP_SEC;
68
69
  } else if (format == "tdD") {
69
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
70
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
70
71
  return LogicalType::DATE;
71
72
  } else if (format == "tdm") {
72
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
73
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
73
74
  return LogicalType::DATE;
74
75
  } else if (format == "tts") {
75
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
76
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
76
77
  return LogicalType::TIME;
77
78
  } else if (format == "ttm") {
78
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
79
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
79
80
  return LogicalType::TIME;
80
81
  } else if (format == "ttu") {
81
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
82
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
82
83
  return LogicalType::TIME;
83
84
  } else if (format == "ttn") {
84
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
85
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
85
86
  return LogicalType::TIME;
86
87
  } else if (format == "tDs") {
87
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
88
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
88
89
  return LogicalType::INTERVAL;
89
90
  } else if (format == "tDm") {
90
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
91
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
91
92
  return LogicalType::INTERVAL;
92
93
  } else if (format == "tDu") {
93
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
94
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
94
95
  return LogicalType::INTERVAL;
95
96
  } else if (format == "tDn") {
96
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
97
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
97
98
  return LogicalType::INTERVAL;
98
99
  } else if (format == "tiD") {
99
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
100
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
100
101
  return LogicalType::INTERVAL;
101
102
  } else if (format == "tiM") {
102
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MONTHS);
103
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTHS);
103
104
  return LogicalType::INTERVAL;
104
105
  } else if (format == "tin") {
105
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MONTH_DAY_NANO);
106
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTH_DAY_NANO);
106
107
  return LogicalType::INTERVAL;
107
108
  } else if (format == "+l") {
108
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
109
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
109
110
  auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
110
111
  return LogicalType::LIST(child_type);
111
112
  } else if (format == "+L") {
112
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
113
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
113
114
  auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
114
115
  return LogicalType::LIST(child_type);
115
116
  } else if (format[0] == '+' && format[1] == 'w') {
116
117
  std::string parameters = format.substr(format.find(':') + 1);
117
118
  idx_t fixed_size = std::stoi(parameters);
118
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
119
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
119
120
  auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
120
121
  return LogicalType::LIST(child_type);
121
122
  } else if (format == "+s") {
@@ -127,7 +128,7 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
127
128
  return LogicalType::STRUCT(child_types);
128
129
 
129
130
  } else if (format == "+m") {
130
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
131
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
131
132
 
132
133
  auto &arrow_struct_type = *schema.children[0];
133
134
  D_ASSERT(arrow_struct_type.n_children == 2);
@@ -135,26 +136,26 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
135
136
  auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
136
137
  return LogicalType::MAP(key_type, value_type);
137
138
  } else if (format == "z") {
138
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
139
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
139
140
  return LogicalType::BLOB;
140
141
  } else if (format == "Z") {
141
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
142
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
142
143
  return LogicalType::BLOB;
143
144
  } else if (format[0] == 'w') {
144
145
  std::string parameters = format.substr(format.find(':') + 1);
145
146
  idx_t fixed_size = std::stoi(parameters);
146
- arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
147
+ convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
147
148
  return LogicalType::BLOB;
148
149
  } else if (format[0] == 't' && format[1] == 's') {
149
150
  // Timestamp with Timezone
150
151
  if (format[2] == 'n') {
151
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
152
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
152
153
  } else if (format[2] == 'u') {
153
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
154
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
154
155
  } else if (format[2] == 'm') {
155
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
156
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
156
157
  } else if (format[2] == 's') {
157
- arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
158
+ convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
158
159
  } else {
159
160
  throw NotImplementedException(" Timestamptz precision of not accepted");
160
161
  }
@@ -5,9 +5,19 @@
5
5
  #include "duckdb/common/types/arrow_aux_data.hpp"
6
6
  #include "duckdb/function/scalar/nested_functions.hpp"
7
7
 
8
+ namespace {
9
+ using duckdb::idx_t;
10
+ struct ArrowConvertDataIndices {
11
+ //! The index that refers to 'variable_sz_type' in ArrowConvertData
12
+ idx_t variable_sized_index;
13
+ //! The index that refers to 'date_time_precision' in ArrowConvertData
14
+ idx_t datetime_precision_index;
15
+ };
16
+ } // namespace
17
+
8
18
  namespace duckdb {
9
19
 
10
- void ShiftRight(unsigned char *ar, int size, int shift) {
20
+ static void ShiftRight(unsigned char *ar, int size, int shift) {
11
21
  int carry = 0;
12
22
  while (shift--) {
13
23
  for (int i = size - 1; i >= 0; --i) {
@@ -18,8 +28,8 @@ void ShiftRight(unsigned char *ar, int size, int shift) {
18
28
  }
19
29
  }
20
30
 
21
- void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
22
- int64_t nested_offset = -1, bool add_null = false) {
31
+ static void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
32
+ int64_t nested_offset = -1, bool add_null = false) {
23
33
  // In certains we don't need to or cannot copy arrow's validity mask to duckdb.
24
34
  //
25
35
  // The conditions where we do want to copy arrow's mask to duckdb are:
@@ -68,22 +78,23 @@ void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState
68
78
  }
69
79
  }
70
80
 
71
- void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
72
- int64_t nested_offset, bool add_null = false) {
81
+ static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
82
+ int64_t nested_offset, bool add_null = false) {
73
83
  D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
74
84
  auto &mask = FlatVector::Validity(vector);
75
85
  GetValidityMask(mask, array, scan_state, size, nested_offset, add_null);
76
86
  }
77
87
 
78
- void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
79
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
80
- std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset = -1,
81
- ValidityMask *parent_mask = nullptr);
88
+ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
89
+ std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
90
+ idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset = -1,
91
+ ValidityMask *parent_mask = nullptr);
82
92
 
83
- void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
84
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
85
- std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) {
86
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++];
93
+ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
94
+ std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
95
+ idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
96
+ ValidityMask *parent_mask) {
97
+ auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
87
98
  idx_t list_size = 0;
88
99
  SetValidityMask(vector, array, scan_state, size, nested_offset);
89
100
  idx_t start_offset = 0;
@@ -157,10 +168,10 @@ void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
157
168
  }
158
169
  }
159
170
 
160
- void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
161
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
162
- std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset) {
163
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++];
171
+ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
172
+ std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
173
+ idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset) {
174
+ auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
164
175
  SetValidityMask(vector, array, scan_state, size, nested_offset);
165
176
  if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) {
166
177
  //! Have to check validity mask before setting this up
@@ -195,7 +206,7 @@ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
195
206
  } else {
196
207
  //! Check if last offset is higher than max uint32
197
208
  if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
198
- throw std::runtime_error("DuckDB does not support Blobs over 4GB");
209
+ throw ConversionException("DuckDB does not support Blobs over 4GB");
199
210
  } // LCOV_EXCL_STOP
200
211
  auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
201
212
  if (nested_offset != -1) {
@@ -213,7 +224,7 @@ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &s
213
224
  }
214
225
  }
215
226
 
216
- void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
227
+ static void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
217
228
  auto valid_check = CheckMapValidity(vector, count);
218
229
  switch (valid_check) {
219
230
  case MapInvalidReason::VALID:
@@ -246,7 +257,8 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
246
257
  }
247
258
  }
248
259
 
249
- void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset) {
260
+ static void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
261
+ int64_t nested_offset) {
250
262
  auto internal_type = GetTypeIdSize(vector.GetType().InternalType());
251
263
  auto data_ptr = (data_ptr_t)array.buffers[1] + internal_type * (scan_state.chunk_offset + array.offset);
252
264
  if (nested_offset != -1) {
@@ -256,8 +268,8 @@ void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &sc
256
268
  }
257
269
 
258
270
  template <class T>
259
- void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
260
- idx_t size, int64_t conversion) {
271
+ static void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
272
+ idx_t size, int64_t conversion) {
261
273
  auto tgt_ptr = (dtime_t *)FlatVector::GetData(vector);
262
274
  auto &validity_mask = FlatVector::Validity(vector);
263
275
  auto src_ptr = (T *)array.buffers[1] + scan_state.chunk_offset + array.offset;
@@ -274,8 +286,8 @@ void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan
274
286
  }
275
287
  }
276
288
 
277
- void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
278
- idx_t size, int64_t conversion) {
289
+ static void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
290
+ int64_t nested_offset, idx_t size, int64_t conversion) {
279
291
  auto tgt_ptr = (timestamp_t *)FlatVector::GetData(vector);
280
292
  auto &validity_mask = FlatVector::Validity(vector);
281
293
  auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
@@ -292,8 +304,8 @@ void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalStat
292
304
  }
293
305
  }
294
306
 
295
- void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
296
- idx_t size, int64_t conversion) {
307
+ static void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
308
+ int64_t nested_offset, idx_t size, int64_t conversion) {
297
309
  auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
298
310
  auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
299
311
  if (nested_offset != -1) {
@@ -308,8 +320,8 @@ void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState
308
320
  }
309
321
  }
310
322
 
311
- void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
312
- idx_t size) {
323
+ static void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
324
+ int64_t nested_offset, idx_t size) {
313
325
  auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
314
326
  auto src_ptr = (int32_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
315
327
  if (nested_offset != -1) {
@@ -322,8 +334,8 @@ void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalS
322
334
  }
323
335
  }
324
336
 
325
- void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
326
- int64_t nested_offset, idx_t size) {
337
+ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
338
+ int64_t nested_offset, idx_t size) {
327
339
  auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
328
340
  auto src_ptr = (ArrowInterval *)array.buffers[1] + scan_state.chunk_offset + array.offset;
329
341
  if (nested_offset != -1) {
@@ -336,9 +348,10 @@ void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowSca
336
348
  }
337
349
  }
338
350
 
339
- void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
340
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
341
- std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) {
351
+ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
352
+ std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
353
+ idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
354
+ ValidityMask *parent_mask) {
342
355
  switch (vector.GetType().id()) {
343
356
  case LogicalTypeId::SQLNULL:
344
357
  vector.Reference(Value());
@@ -390,11 +403,11 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
390
403
  break;
391
404
  }
392
405
  case LogicalTypeId::VARCHAR: {
393
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++];
406
+ auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
394
407
  auto cdata = (char *)array.buffers[2];
395
408
  if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) {
396
409
  if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
397
- throw std::runtime_error("DuckDB does not support Strings over 4GB");
410
+ throw ConversionException("DuckDB does not support Strings over 4GB");
398
411
  } // LCOV_EXCL_STOP
399
412
  auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
400
413
  if (nested_offset != -1) {
@@ -411,7 +424,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
411
424
  break;
412
425
  }
413
426
  case LogicalTypeId::DATE: {
414
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++];
427
+ auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
415
428
  switch (precision) {
416
429
  case ArrowDateTimeType::DAYS: {
417
430
  DirectConversion(vector, array, scan_state, nested_offset);
@@ -430,12 +443,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
430
443
  break;
431
444
  }
432
445
  default:
433
- throw std::runtime_error("Unsupported precision for Date Type ");
446
+ throw NotImplementedException("Unsupported precision for Date Type ");
434
447
  }
435
448
  break;
436
449
  }
437
450
  case LogicalTypeId::TIME: {
438
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++];
451
+ auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
439
452
  switch (precision) {
440
453
  case ArrowDateTimeType::SECONDS: {
441
454
  TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000000);
@@ -461,12 +474,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
461
474
  break;
462
475
  }
463
476
  default:
464
- throw std::runtime_error("Unsupported precision for Time Type ");
477
+ throw NotImplementedException("Unsupported precision for Time Type ");
465
478
  }
466
479
  break;
467
480
  }
468
481
  case LogicalTypeId::TIMESTAMP_TZ: {
469
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++];
482
+ auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
470
483
  switch (precision) {
471
484
  case ArrowDateTimeType::SECONDS: {
472
485
  TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000000);
@@ -492,12 +505,12 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
492
505
  break;
493
506
  }
494
507
  default:
495
- throw std::runtime_error("Unsupported precision for TimestampTZ Type ");
508
+ throw NotImplementedException("Unsupported precision for TimestampTZ Type ");
496
509
  }
497
510
  break;
498
511
  }
499
512
  case LogicalTypeId::INTERVAL: {
500
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.second++];
513
+ auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
501
514
  switch (precision) {
502
515
  case ArrowDateTimeType::SECONDS: {
503
516
  IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000);
@@ -534,7 +547,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
534
547
  break;
535
548
  }
536
549
  default:
537
- throw std::runtime_error("Unsupported precision for Interval/Duration Type ");
550
+ throw NotImplementedException("Unsupported precision for Interval/Duration Type ");
538
551
  }
539
552
  break;
540
553
  }
@@ -585,8 +598,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
585
598
  break;
586
599
  }
587
600
  default:
588
- throw std::runtime_error("Unsupported physical type for Decimal: " +
589
- TypeIdToString(vector.GetType().InternalType()));
601
+ throw NotImplementedException("Unsupported physical type for Decimal: %s",
602
+ TypeIdToString(vector.GetType().InternalType()));
590
603
  }
591
604
  break;
592
605
  }
@@ -626,7 +639,7 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
626
639
  break;
627
640
  }
628
641
  default:
629
- throw std::runtime_error("Unsupported type " + vector.GetType().ToString());
642
+ throw NotImplementedException("Unsupported type %s", vector.GetType().ToString());
630
643
  }
631
644
  }
632
645
 
@@ -644,7 +657,7 @@ static void SetSelectionVectorLoopWithChecks(SelectionVector &sel, data_ptr_t in
644
657
  auto indices = (T *)indices_p;
645
658
  for (idx_t row = 0; row < size; row++) {
646
659
  if (indices[row] > NumericLimits<uint32_t>::Maximum()) {
647
- throw std::runtime_error("DuckDB only supports indices that fit on an uint32");
660
+ throw ConversionException("DuckDB only supports indices that fit on an uint32");
648
661
  }
649
662
  sel.set_index(row, indices[row]);
650
663
  }
@@ -664,8 +677,8 @@ static void SetMaskedSelectionVectorLoop(SelectionVector &sel, data_ptr_t indice
664
677
  }
665
678
  }
666
679
 
667
- void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType &logical_type, idx_t size,
668
- ValidityMask *mask = nullptr, idx_t last_element_pos = 0) {
680
+ static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType &logical_type, idx_t size,
681
+ ValidityMask *mask = nullptr, idx_t last_element_pos = 0) {
669
682
  sel.Initialize(size);
670
683
 
671
684
  if (mask) {
@@ -685,7 +698,7 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
685
698
  case LogicalTypeId::UINTEGER:
686
699
  if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
687
700
  //! Its guaranteed that our indices will point to the last element, so just throw an error
688
- throw std::runtime_error("DuckDB only supports indices that fit on an uint32");
701
+ throw ConversionException("DuckDB only supports indices that fit on an uint32");
689
702
  }
690
703
  SetMaskedSelectionVectorLoop<uint32_t>(sel, indices_p, size, *mask, last_element_pos);
691
704
  break;
@@ -695,20 +708,20 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
695
708
  case LogicalTypeId::UBIGINT:
696
709
  if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
697
710
  //! Its guaranteed that our indices will point to the last element, so just throw an error
698
- throw std::runtime_error("DuckDB only supports indices that fit on an uint32");
711
+ throw ConversionException("DuckDB only supports indices that fit on an uint32");
699
712
  }
700
713
  SetMaskedSelectionVectorLoop<uint64_t>(sel, indices_p, size, *mask, last_element_pos);
701
714
  break;
702
715
  case LogicalTypeId::BIGINT:
703
716
  if (last_element_pos > NumericLimits<uint32_t>::Maximum()) {
704
717
  //! Its guaranteed that our indices will point to the last element, so just throw an error
705
- throw std::runtime_error("DuckDB only supports indices that fit on an uint32");
718
+ throw ConversionException("DuckDB only supports indices that fit on an uint32");
706
719
  }
707
720
  SetMaskedSelectionVectorLoop<int64_t>(sel, indices_p, size, *mask, last_element_pos);
708
721
  break;
709
722
 
710
723
  default:
711
- throw std::runtime_error("(Arrow) Unsupported type for selection vectors " + logical_type.ToString());
724
+ throw NotImplementedException("(Arrow) Unsupported type for selection vectors %s", logical_type.ToString());
712
725
  }
713
726
 
714
727
  } else {
@@ -748,17 +761,18 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
748
761
  }
749
762
  break;
750
763
  default:
751
- throw std::runtime_error("(Arrow) Unsupported type for selection vectors " + logical_type.ToString());
764
+ throw ConversionException("(Arrow) Unsupported type for selection vectors %s", logical_type.ToString());
752
765
  }
753
766
  }
754
767
  }
755
768
 
756
- void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
757
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
758
- idx_t col_idx, std::pair<idx_t, idx_t> &arrow_convert_idx) {
769
+ static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
770
+ idx_t size,
771
+ std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
772
+ idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx) {
759
773
  SelectionVector sel;
760
774
  auto &dict_vectors = scan_state.arrow_dictionary_vectors;
761
- if (dict_vectors.find(col_idx) == dict_vectors.end()) {
775
+ if (!dict_vectors.count(col_idx)) {
762
776
  //! We need to set the dictionary data for this column
763
777
  auto base_vector = make_uniq<Vector>(vector.GetType(), array.dictionary->length);
764
778
  SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0);
@@ -791,10 +805,11 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
791
805
  auto arrow_array_idx = arrow_scan_is_projected ? idx : col_idx;
792
806
 
793
807
  if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
808
+ // This column is skipped by the projection pushdown
794
809
  continue;
795
810
  }
796
811
 
797
- std::pair<idx_t, idx_t> arrow_convert_idx {0, 0};
812
+ ArrowConvertDataIndices arrow_convert_idx {0, 0};
798
813
  auto &array = *scan_state.chunk->arrow_array.children[arrow_array_idx];
799
814
  if (!array.release) {
800
815
  throw InvalidInputException("arrow_scan: released array passed");
@@ -802,6 +817,7 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
802
817
  if (array.length != scan_state.chunk->arrow_array.length) {
803
818
  throw InvalidInputException("arrow_scan: array length mismatch");
804
819
  }
820
+ // Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
805
821
  output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
806
822
  if (array.dictionary) {
807
823
  ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_convert_data,
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev2430"
2
+ #define DUCKDB_VERSION "0.7.2-dev2507"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "f429595834"
5
+ #define DUCKDB_SOURCE_ID "c5737e4a94"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"