duckdb 0.8.2-dev2842.0 → 0.8.2-dev3007.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +7 -5
  6. package/src/duckdb/extension/json/json_serializer.cpp +2 -3
  7. package/src/duckdb/src/common/adbc/adbc.cpp +400 -145
  8. package/src/duckdb/src/common/adbc/driver_manager.cpp +79 -31
  9. package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
  10. package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
  11. package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
  12. package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
  13. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -2
  14. package/src/duckdb/src/common/multi_file_reader.cpp +6 -0
  15. package/src/duckdb/src/execution/window_executor.cpp +5 -8
  16. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  17. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
  18. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +3 -3
  19. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
  20. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -2
  21. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +0 -2
  22. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
  23. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
  24. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +0 -2
  25. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +2 -4
  26. package/src/duckdb/src/include/duckdb.h +16 -0
  27. package/src/duckdb/src/main/capi/arrow-c.cpp +41 -0
  28. package/src/duckdb/src/main/capi/prepared-c.cpp +60 -30
  29. package/src/duckdb/src/main/chunk_scan_state.cpp +6 -0
  30. package/src/duckdb/src/main/client_context.cpp +1 -1
  31. package/src/duckdb/src/optimizer/topn_optimizer.cpp +7 -0
  32. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
  33. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  34. package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
  35. package/src/duckdb_node.hpp +1 -0
  36. package/src/statement.cpp +1 -1
@@ -0,0 +1,474 @@
1
+ // Licensed to the Apache Software Foundation (ASF) under one
2
+ // or more contributor license agreements. See the NOTICE file
3
+ // distributed with this work for additional information
4
+ // regarding copyright ownership. The ASF licenses this file
5
+ // to you under the Apache License, Version 2.0 (the
6
+ // "License"); you may not use this file except in compliance
7
+ // with the License. You may obtain a copy of the License at
8
+ //
9
+ // http://www.apache.org/licenses/LICENSE-2.0
10
+ //
11
+ // Unless required by applicable law or agreed to in writing,
12
+ // software distributed under the License is distributed on an
13
+ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ // KIND, either express or implied. See the License for the
15
+ // specific language governing permissions and limitations
16
+ // under the License.
17
+
18
+ #include <errno.h>
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+
23
+ #include "duckdb/common/arrow/nanoarrow/nanoarrow.hpp"
24
+
25
+ namespace duckdb_nanoarrow {
26
+
27
+ void ArrowSchemaRelease(struct ArrowSchema *schema) {
28
+ if (schema->format != NULL)
29
+ ArrowFree((void *)schema->format);
30
+ if (schema->name != NULL)
31
+ ArrowFree((void *)schema->name);
32
+ if (schema->metadata != NULL)
33
+ ArrowFree((void *)schema->metadata);
34
+
35
+ // This object owns the memory for all the children, but those
36
+ // children may have been generated elsewhere and might have
37
+ // their own release() callback.
38
+ if (schema->children != NULL) {
39
+ for (int64_t i = 0; i < schema->n_children; i++) {
40
+ if (schema->children[i] != NULL) {
41
+ if (schema->children[i]->release != NULL) {
42
+ schema->children[i]->release(schema->children[i]);
43
+ }
44
+
45
+ ArrowFree(schema->children[i]);
46
+ }
47
+ }
48
+
49
+ ArrowFree(schema->children);
50
+ }
51
+
52
+ // This object owns the memory for the dictionary but it
53
+ // may have been generated somewhere else and have its own
54
+ // release() callback.
55
+ if (schema->dictionary != NULL) {
56
+ if (schema->dictionary->release != NULL) {
57
+ schema->dictionary->release(schema->dictionary);
58
+ }
59
+
60
+ ArrowFree(schema->dictionary);
61
+ }
62
+
63
+ // private data not currently used
64
+ if (schema->private_data != NULL) {
65
+ ArrowFree(schema->private_data);
66
+ }
67
+
68
+ schema->release = NULL;
69
+ }
70
+
71
+ const char *ArrowSchemaFormatTemplate(enum ArrowType data_type) {
72
+ switch (data_type) {
73
+ case NANOARROW_TYPE_UNINITIALIZED:
74
+ return NULL;
75
+ case NANOARROW_TYPE_NA:
76
+ return "n";
77
+ case NANOARROW_TYPE_BOOL:
78
+ return "b";
79
+
80
+ case NANOARROW_TYPE_UINT8:
81
+ return "C";
82
+ case NANOARROW_TYPE_INT8:
83
+ return "c";
84
+ case NANOARROW_TYPE_UINT16:
85
+ return "S";
86
+ case NANOARROW_TYPE_INT16:
87
+ return "s";
88
+ case NANOARROW_TYPE_UINT32:
89
+ return "I";
90
+ case NANOARROW_TYPE_INT32:
91
+ return "i";
92
+ case NANOARROW_TYPE_UINT64:
93
+ return "L";
94
+ case NANOARROW_TYPE_INT64:
95
+ return "l";
96
+
97
+ case NANOARROW_TYPE_HALF_FLOAT:
98
+ return "e";
99
+ case NANOARROW_TYPE_FLOAT:
100
+ return "f";
101
+ case NANOARROW_TYPE_DOUBLE:
102
+ return "g";
103
+
104
+ case NANOARROW_TYPE_STRING:
105
+ return "u";
106
+ case NANOARROW_TYPE_LARGE_STRING:
107
+ return "U";
108
+ case NANOARROW_TYPE_BINARY:
109
+ return "z";
110
+ case NANOARROW_TYPE_LARGE_BINARY:
111
+ return "Z";
112
+
113
+ case NANOARROW_TYPE_DATE32:
114
+ return "tdD";
115
+ case NANOARROW_TYPE_DATE64:
116
+ return "tdm";
117
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
118
+ return "tiM";
119
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
120
+ return "tiD";
121
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
122
+ return "tin";
123
+
124
+ case NANOARROW_TYPE_LIST:
125
+ return "+l";
126
+ case NANOARROW_TYPE_LARGE_LIST:
127
+ return "+L";
128
+ case NANOARROW_TYPE_STRUCT:
129
+ return "+s";
130
+ case NANOARROW_TYPE_MAP:
131
+ return "+m";
132
+
133
+ default:
134
+ return NULL;
135
+ }
136
+ }
137
+
138
+ ArrowErrorCode ArrowSchemaInit(struct ArrowSchema *schema, enum ArrowType data_type) {
139
+ schema->format = NULL;
140
+ schema->name = NULL;
141
+ schema->metadata = NULL;
142
+ schema->flags = ARROW_FLAG_NULLABLE;
143
+ schema->n_children = 0;
144
+ schema->children = NULL;
145
+ schema->dictionary = NULL;
146
+ schema->private_data = NULL;
147
+ schema->release = &ArrowSchemaRelease;
148
+
149
+ // We don't allocate the dictionary because it has to be nullptr
150
+ // for non-dictionary-encoded arrays.
151
+
152
+ // Set the format to a valid format string for data_type
153
+ const char *template_format = ArrowSchemaFormatTemplate(data_type);
154
+
155
+ // If data_type isn't recognized and not explicitly unset
156
+ if (template_format == NULL && data_type != NANOARROW_TYPE_UNINITIALIZED) {
157
+ schema->release(schema);
158
+ return EINVAL;
159
+ }
160
+
161
+ int result = ArrowSchemaSetFormat(schema, template_format);
162
+ if (result != NANOARROW_OK) {
163
+ schema->release(schema);
164
+ return result;
165
+ }
166
+
167
+ return NANOARROW_OK;
168
+ }
169
+
170
+ ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema *schema, enum ArrowType data_type, int32_t fixed_size) {
171
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
172
+ if (result != NANOARROW_OK) {
173
+ return result;
174
+ }
175
+
176
+ if (fixed_size <= 0) {
177
+ schema->release(schema);
178
+ return EINVAL;
179
+ }
180
+
181
+ char buffer[64];
182
+ int n_chars;
183
+ switch (data_type) {
184
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
185
+ n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
186
+ break;
187
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
188
+ n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
189
+ break;
190
+ default:
191
+ schema->release(schema);
192
+ return EINVAL;
193
+ }
194
+
195
+ buffer[n_chars] = '\0';
196
+ result = ArrowSchemaSetFormat(schema, buffer);
197
+ if (result != NANOARROW_OK) {
198
+ schema->release(schema);
199
+ }
200
+
201
+ return result;
202
+ }
203
+
204
+ ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema *schema, enum ArrowType data_type, int32_t decimal_precision,
205
+ int32_t decimal_scale) {
206
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
207
+ if (result != NANOARROW_OK) {
208
+ return result;
209
+ }
210
+
211
+ if (decimal_precision <= 0) {
212
+ schema->release(schema);
213
+ return EINVAL;
214
+ }
215
+
216
+ char buffer[64];
217
+ int n_chars;
218
+ switch (data_type) {
219
+ case NANOARROW_TYPE_DECIMAL128:
220
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
221
+ break;
222
+ case NANOARROW_TYPE_DECIMAL256:
223
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, decimal_scale);
224
+ break;
225
+ default:
226
+ schema->release(schema);
227
+ return EINVAL;
228
+ }
229
+
230
+ buffer[n_chars] = '\0';
231
+
232
+ result = ArrowSchemaSetFormat(schema, buffer);
233
+ if (result != NANOARROW_OK) {
234
+ schema->release(schema);
235
+ return result;
236
+ }
237
+
238
+ return NANOARROW_OK;
239
+ }
240
+
241
+ static const char *ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
242
+ switch (time_unit) {
243
+ case NANOARROW_TIME_UNIT_SECOND:
244
+ return "s";
245
+ case NANOARROW_TIME_UNIT_MILLI:
246
+ return "m";
247
+ case NANOARROW_TIME_UNIT_MICRO:
248
+ return "u";
249
+ case NANOARROW_TIME_UNIT_NANO:
250
+ return "n";
251
+ default:
252
+ return NULL;
253
+ }
254
+ }
255
+
256
+ ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema *schema, enum ArrowType data_type,
257
+ enum ArrowTimeUnit time_unit, const char *timezone) {
258
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
259
+ if (result != NANOARROW_OK) {
260
+ return result;
261
+ }
262
+
263
+ const char *time_unit_str = ArrowTimeUnitString(time_unit);
264
+ if (time_unit_str == NULL) {
265
+ schema->release(schema);
266
+ return EINVAL;
267
+ }
268
+
269
+ char buffer[128];
270
+ int n_chars;
271
+ switch (data_type) {
272
+ case NANOARROW_TYPE_TIME32:
273
+ case NANOARROW_TYPE_TIME64:
274
+ if (timezone != NULL) {
275
+ schema->release(schema);
276
+ return EINVAL;
277
+ }
278
+ n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
279
+ break;
280
+ case NANOARROW_TYPE_TIMESTAMP:
281
+ if (timezone == NULL) {
282
+ timezone = "";
283
+ }
284
+ n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone);
285
+ break;
286
+ case NANOARROW_TYPE_DURATION:
287
+ if (timezone != NULL) {
288
+ schema->release(schema);
289
+ return EINVAL;
290
+ }
291
+ n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
292
+ break;
293
+ default:
294
+ schema->release(schema);
295
+ return EINVAL;
296
+ }
297
+
298
+ if (static_cast<size_t>(n_chars) >= sizeof(buffer)) {
299
+ schema->release(schema);
300
+ return ERANGE;
301
+ }
302
+
303
+ buffer[n_chars] = '\0';
304
+
305
+ result = ArrowSchemaSetFormat(schema, buffer);
306
+ if (result != NANOARROW_OK) {
307
+ schema->release(schema);
308
+ return result;
309
+ }
310
+
311
+ return NANOARROW_OK;
312
+ }
313
+
314
+ ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema *schema, const char *format) {
315
+ if (schema->format != NULL) {
316
+ ArrowFree((void *)schema->format);
317
+ }
318
+
319
+ if (format != NULL) {
320
+ size_t format_size = strlen(format) + 1;
321
+ schema->format = (const char *)ArrowMalloc(format_size);
322
+ if (schema->format == NULL) {
323
+ return ENOMEM;
324
+ }
325
+
326
+ memcpy((void *)schema->format, format, format_size);
327
+ } else {
328
+ schema->format = NULL;
329
+ }
330
+
331
+ return NANOARROW_OK;
332
+ }
333
+
334
+ ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema *schema, const char *name) {
335
+ if (schema->name != NULL) {
336
+ ArrowFree((void *)schema->name);
337
+ }
338
+
339
+ if (name != NULL) {
340
+ size_t name_size = strlen(name) + 1;
341
+ schema->name = (const char *)ArrowMalloc(name_size);
342
+ if (schema->name == NULL) {
343
+ return ENOMEM;
344
+ }
345
+
346
+ memcpy((void *)schema->name, name, name_size);
347
+ } else {
348
+ schema->name = NULL;
349
+ }
350
+
351
+ return NANOARROW_OK;
352
+ }
353
+
354
+ ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema *schema, const char *metadata) {
355
+ if (schema->metadata != NULL) {
356
+ ArrowFree((void *)schema->metadata);
357
+ }
358
+
359
+ if (metadata != NULL) {
360
+ size_t metadata_size = ArrowMetadataSizeOf(metadata);
361
+ schema->metadata = (const char *)ArrowMalloc(metadata_size);
362
+ if (schema->metadata == NULL) {
363
+ return ENOMEM;
364
+ }
365
+
366
+ memcpy((void *)schema->metadata, metadata, metadata_size);
367
+ } else {
368
+ schema->metadata = NULL;
369
+ }
370
+
371
+ return NANOARROW_OK;
372
+ }
373
+
374
+ ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema *schema, int64_t n_children) {
375
+ if (schema->children != NULL) {
376
+ return EEXIST;
377
+ }
378
+
379
+ if (n_children > 0) {
380
+ schema->children = (struct ArrowSchema **)ArrowMalloc(n_children * sizeof(struct ArrowSchema *));
381
+
382
+ if (schema->children == NULL) {
383
+ return ENOMEM;
384
+ }
385
+
386
+ schema->n_children = n_children;
387
+
388
+ memset(schema->children, 0, n_children * sizeof(struct ArrowSchema *));
389
+
390
+ for (int64_t i = 0; i < n_children; i++) {
391
+ schema->children[i] = (struct ArrowSchema *)ArrowMalloc(sizeof(struct ArrowSchema));
392
+
393
+ if (schema->children[i] == NULL) {
394
+ return ENOMEM;
395
+ }
396
+
397
+ schema->children[i]->release = NULL;
398
+ }
399
+ }
400
+
401
+ return NANOARROW_OK;
402
+ }
403
+
404
+ ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema *schema) {
405
+ if (schema->dictionary != NULL) {
406
+ return EEXIST;
407
+ }
408
+
409
+ schema->dictionary = (struct ArrowSchema *)ArrowMalloc(sizeof(struct ArrowSchema));
410
+ if (schema->dictionary == NULL) {
411
+ return ENOMEM;
412
+ }
413
+
414
+ schema->dictionary->release = NULL;
415
+ return NANOARROW_OK;
416
+ }
417
+
418
+ int ArrowSchemaDeepCopy(struct ArrowSchema *schema, struct ArrowSchema *schema_out) {
419
+ int result;
420
+ result = ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA);
421
+ if (result != NANOARROW_OK) {
422
+ return result;
423
+ }
424
+
425
+ result = ArrowSchemaSetFormat(schema_out, schema->format);
426
+ if (result != NANOARROW_OK) {
427
+ schema_out->release(schema_out);
428
+ return result;
429
+ }
430
+
431
+ result = ArrowSchemaSetName(schema_out, schema->name);
432
+ if (result != NANOARROW_OK) {
433
+ schema_out->release(schema_out);
434
+ return result;
435
+ }
436
+
437
+ result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
438
+ if (result != NANOARROW_OK) {
439
+ schema_out->release(schema_out);
440
+ return result;
441
+ }
442
+
443
+ result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
444
+ if (result != NANOARROW_OK) {
445
+ schema_out->release(schema_out);
446
+ return result;
447
+ }
448
+
449
+ for (int64_t i = 0; i < schema->n_children; i++) {
450
+ result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
451
+ if (result != NANOARROW_OK) {
452
+ schema_out->release(schema_out);
453
+ return result;
454
+ }
455
+ }
456
+
457
+ if (schema->dictionary != NULL) {
458
+ result = ArrowSchemaAllocateDictionary(schema_out);
459
+ if (result != NANOARROW_OK) {
460
+ schema_out->release(schema_out);
461
+ return result;
462
+ }
463
+
464
+ result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
465
+ if (result != NANOARROW_OK) {
466
+ schema_out->release(schema_out);
467
+ return result;
468
+ }
469
+ }
470
+
471
+ return NANOARROW_OK;
472
+ }
473
+
474
+ } // namespace duckdb_nanoarrow
@@ -0,0 +1,84 @@
1
+ #include "duckdb/common/adbc/single_batch_array_stream.hpp"
2
+ #include "duckdb/common/arrow/nanoarrow/nanoarrow.h"
3
+ #include "duckdb/common/adbc/adbc.hpp"
4
+
5
+ #include "duckdb.h"
6
+ #include "duckdb/common/arrow/arrow_wrapper.hpp"
7
+ #include "duckdb/common/arrow/nanoarrow/nanoarrow.hpp"
8
+
9
+ #include <errno.h>
10
+ #include <stdarg.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+
15
+ namespace duckdb_adbc {
16
+
17
+ using duckdb_nanoarrow::ArrowSchemaDeepCopy;
18
+
19
+ static const char *SingleBatchArrayStreamGetLastError(struct ArrowArrayStream *stream) {
20
+ return NULL;
21
+ }
22
+
23
+ static int SingleBatchArrayStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *batch) {
24
+ if (!stream || !stream->private_data) {
25
+ return EINVAL;
26
+ }
27
+ struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
28
+
29
+ memcpy(batch, &impl->batch, sizeof(*batch));
30
+ memset(&impl->batch, 0, sizeof(*batch));
31
+ return 0;
32
+ }
33
+
34
+ static int SingleBatchArrayStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *schema) {
35
+ if (!stream || !stream->private_data) {
36
+ return EINVAL;
37
+ }
38
+ struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
39
+
40
+ return ArrowSchemaDeepCopy(&impl->schema, schema);
41
+ }
42
+
43
+ static void SingleBatchArrayStreamRelease(struct ArrowArrayStream *stream) {
44
+ if (!stream || !stream->private_data) {
45
+ return;
46
+ }
47
+ struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
48
+ impl->schema.release(&impl->schema);
49
+ if (impl->batch.release) {
50
+ impl->batch.release(&impl->batch);
51
+ }
52
+ free(impl);
53
+
54
+ memset(stream, 0, sizeof(*stream));
55
+ }
56
+
57
+ AdbcStatusCode BatchToArrayStream(struct ArrowArray *values, struct ArrowSchema *schema,
58
+ struct ArrowArrayStream *stream, struct AdbcError *error) {
59
+ if (!values->release) {
60
+ SetError(error, "ArrowArray is not initialized");
61
+ return ADBC_STATUS_INTERNAL;
62
+ } else if (!schema->release) {
63
+ SetError(error, "ArrowSchema is not initialized");
64
+ return ADBC_STATUS_INTERNAL;
65
+ } else if (stream->release) {
66
+ SetError(error, "ArrowArrayStream is already initialized");
67
+ return ADBC_STATUS_INTERNAL;
68
+ }
69
+
70
+ struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)malloc(sizeof(*impl));
71
+ memcpy(&impl->schema, schema, sizeof(*schema));
72
+ memcpy(&impl->batch, values, sizeof(*values));
73
+ memset(schema, 0, sizeof(*schema));
74
+ memset(values, 0, sizeof(*values));
75
+ stream->private_data = impl;
76
+ stream->get_last_error = SingleBatchArrayStreamGetLastError;
77
+ stream->get_next = SingleBatchArrayStreamGetNext;
78
+ stream->get_schema = SingleBatchArrayStreamGetSchema;
79
+ stream->release = SingleBatchArrayStreamRelease;
80
+
81
+ return ADBC_STATUS_OK;
82
+ }
83
+
84
+ } // namespace duckdb_adbc
@@ -43,6 +43,7 @@ struct DuckDBArrowSchemaHolder {
43
43
  std::list<vector<ArrowSchema *>> nested_children_ptr;
44
44
  //! This holds strings created to represent decimal types
45
45
  vector<unsafe_unique_array<char>> owned_type_names;
46
+ vector<unsafe_unique_array<char>> owned_column_names;
46
47
  };
47
48
 
48
49
  static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) {
@@ -59,7 +60,7 @@ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, c
59
60
  child.private_data = nullptr;
60
61
  child.release = ReleaseDuckDBArrowSchema;
61
62
 
62
- //! Store the child schema
63
+ // Store the child schema
63
64
  child.flags = ARROW_FLAG_NULLABLE;
64
65
  root_holder.owned_type_names.push_back(AddName(name));
65
66
 
@@ -69,6 +70,7 @@ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, c
69
70
  child.metadata = nullptr;
70
71
  child.dictionary = nullptr;
71
72
  }
73
+
72
74
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
73
75
  const ClientProperties &options);
74
76
 
@@ -309,7 +311,7 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<Logical
309
311
 
310
312
  // Configure all child schemas
311
313
  for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
312
-
314
+ root_holder->owned_column_names.push_back(AddName(names[col_idx]));
313
315
  auto &child = root_holder->children[col_idx];
314
316
  InitializeChild(child, *root_holder, names[col_idx]);
315
317
  SetArrowFormat(*root_holder, child, types[col_idx], options);
@@ -550,6 +550,12 @@ Value MultiFileReaderOptions::GetHivePartitionValue(const string &base, const st
550
550
  if (it == hive_types_schema.end()) {
551
551
  return value;
552
552
  }
553
+
554
+ // Handle nulls
555
+ if (base.empty() || StringUtil::CIEquals(base, "NULL")) {
556
+ return Value(it->second);
557
+ }
558
+
553
559
  if (!value.TryCastAs(context, it->second)) {
554
560
  throw InvalidInputException("Unable to cast '%s' (from hive partition column '%s') to: '%s'", value.ToString(),
555
561
  StringUtil::Upper(it->first), it->second.ToString());
@@ -1163,9 +1163,9 @@ void WindowLeadLagExecutor::EvaluateInternal(WindowExecutorState &lstate, Vector
1163
1163
  }
1164
1164
  int64_t val_idx = (int64_t)row_idx;
1165
1165
  if (wexpr.type == ExpressionType::WINDOW_LEAD) {
1166
- val_idx += offset;
1166
+ val_idx = AddOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(val_idx, offset);
1167
1167
  } else {
1168
- val_idx -= offset;
1168
+ val_idx = SubtractOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(val_idx, offset);
1169
1169
  }
1170
1170
 
1171
1171
  idx_t delta = 0;
@@ -1200,10 +1200,9 @@ void WindowFirstValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vec
1200
1200
  auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
1201
1201
  auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
1202
1202
  auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
1203
- auto &rmask = FlatVector::Validity(result);
1204
1203
  for (idx_t i = 0; i < count; ++i, ++row_idx) {
1205
1204
  if (window_begin[i] >= window_end[i]) {
1206
- rmask.SetInvalid(i);
1205
+ FlatVector::SetNull(result, i, true);
1207
1206
  continue;
1208
1207
  }
1209
1208
  // Same as NTH_VALUE(..., 1)
@@ -1228,10 +1227,9 @@ void WindowLastValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vect
1228
1227
  auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
1229
1228
  auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
1230
1229
  auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
1231
- auto &rmask = FlatVector::Validity(result);
1232
1230
  for (idx_t i = 0; i < count; ++i, ++row_idx) {
1233
1231
  if (window_begin[i] >= window_end[i]) {
1234
- rmask.SetInvalid(i);
1232
+ FlatVector::SetNull(result, i, true);
1235
1233
  continue;
1236
1234
  }
1237
1235
  idx_t n = 1;
@@ -1257,10 +1255,9 @@ void WindowNthValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vecto
1257
1255
  auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
1258
1256
  auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
1259
1257
  auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
1260
- auto &rmask = FlatVector::Validity(result);
1261
1258
  for (idx_t i = 0; i < count; ++i, ++row_idx) {
1262
1259
  if (window_begin[i] >= window_end[i]) {
1263
- rmask.SetInvalid(i);
1260
+ FlatVector::SetNull(result, i, true);
1264
1261
  continue;
1265
1262
  }
1266
1263
  // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev2842"
2
+ #define DUCKDB_VERSION "0.8.2-dev3007"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "6421a36e94"
5
+ #define DUCKDB_SOURCE_ID "dd7f0c0870"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -52,6 +52,7 @@
52
52
 
53
53
  //! @cond Doxygen_Suppress
54
54
  namespace duckdb_adbc {
55
+
55
56
  #ifdef __cplusplus
56
57
  extern "C" {
57
58
  #endif
@@ -84,10 +84,10 @@ AdbcStatusCode StatementExecutePartitions(struct AdbcStatement *statement, struc
84
84
  struct AdbcPartitions *partitions, int64_t *rows_affected,
85
85
  struct AdbcError *error);
86
86
 
87
+ //! This method should only be called when the string is guaranteed to not be NULL
87
88
  void SetError(struct AdbcError *error, const std::string &message);
89
+ void SetError(struct AdbcError *error, const char *message);
88
90
 
89
- void InitiliazeADBCError(AdbcError *error);
90
-
91
- AdbcStatusCode SetErrorMaybe(const void *result, AdbcError *error, const std::string &error_message);
91
+ void InitializeADBCError(AdbcError *error);
92
92
 
93
93
  } // namespace duckdb_adbc
@@ -0,0 +1,16 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/common/arrow/arrow.hpp"
4
+ #include "duckdb/common/adbc/adbc.h"
5
+
6
+ namespace duckdb_adbc {
7
+
8
+ struct SingleBatchArrayStream {
9
+ struct ArrowSchema schema;
10
+ struct ArrowArray batch;
11
+ };
12
+
13
+ AdbcStatusCode BatchToArrayStream(struct ArrowArray *values, struct ArrowSchema *schema,
14
+ struct ArrowArrayStream *stream, struct AdbcError *error);
15
+
16
+ } // namespace duckdb_adbc