duckdb 0.8.2-dev2842.0 → 0.8.2-dev3007.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
- package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
- package/src/duckdb/extension/json/json_deserializer.cpp +7 -5
- package/src/duckdb/extension/json/json_serializer.cpp +2 -3
- package/src/duckdb/src/common/adbc/adbc.cpp +400 -145
- package/src/duckdb/src/common/adbc/driver_manager.cpp +79 -31
- package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
- package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
- package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
- package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +6 -0
- package/src/duckdb/src/execution/window_executor.cpp +5 -8
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -2
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +2 -4
- package/src/duckdb/src/include/duckdb.h +16 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +41 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +60 -30
- package/src/duckdb/src/main/chunk_scan_state.cpp +6 -0
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +7 -0
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
- package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
- package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +1 -1
@@ -0,0 +1,474 @@
|
|
1
|
+
// Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
// or more contributor license agreements. See the NOTICE file
|
3
|
+
// distributed with this work for additional information
|
4
|
+
// regarding copyright ownership. The ASF licenses this file
|
5
|
+
// to you under the Apache License, Version 2.0 (the
|
6
|
+
// "License"); you may not use this file except in compliance
|
7
|
+
// with the License. You may obtain a copy of the License at
|
8
|
+
//
|
9
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
//
|
11
|
+
// Unless required by applicable law or agreed to in writing,
|
12
|
+
// software distributed under the License is distributed on an
|
13
|
+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
// KIND, either express or implied. See the License for the
|
15
|
+
// specific language governing permissions and limitations
|
16
|
+
// under the License.
|
17
|
+
|
18
|
+
#include <errno.h>
|
19
|
+
#include <stdio.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <string.h>
|
22
|
+
|
23
|
+
#include "duckdb/common/arrow/nanoarrow/nanoarrow.hpp"
|
24
|
+
|
25
|
+
namespace duckdb_nanoarrow {
|
26
|
+
|
27
|
+
void ArrowSchemaRelease(struct ArrowSchema *schema) {
|
28
|
+
if (schema->format != NULL)
|
29
|
+
ArrowFree((void *)schema->format);
|
30
|
+
if (schema->name != NULL)
|
31
|
+
ArrowFree((void *)schema->name);
|
32
|
+
if (schema->metadata != NULL)
|
33
|
+
ArrowFree((void *)schema->metadata);
|
34
|
+
|
35
|
+
// This object owns the memory for all the children, but those
|
36
|
+
// children may have been generated elsewhere and might have
|
37
|
+
// their own release() callback.
|
38
|
+
if (schema->children != NULL) {
|
39
|
+
for (int64_t i = 0; i < schema->n_children; i++) {
|
40
|
+
if (schema->children[i] != NULL) {
|
41
|
+
if (schema->children[i]->release != NULL) {
|
42
|
+
schema->children[i]->release(schema->children[i]);
|
43
|
+
}
|
44
|
+
|
45
|
+
ArrowFree(schema->children[i]);
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
ArrowFree(schema->children);
|
50
|
+
}
|
51
|
+
|
52
|
+
// This object owns the memory for the dictionary but it
|
53
|
+
// may have been generated somewhere else and have its own
|
54
|
+
// release() callback.
|
55
|
+
if (schema->dictionary != NULL) {
|
56
|
+
if (schema->dictionary->release != NULL) {
|
57
|
+
schema->dictionary->release(schema->dictionary);
|
58
|
+
}
|
59
|
+
|
60
|
+
ArrowFree(schema->dictionary);
|
61
|
+
}
|
62
|
+
|
63
|
+
// private data not currently used
|
64
|
+
if (schema->private_data != NULL) {
|
65
|
+
ArrowFree(schema->private_data);
|
66
|
+
}
|
67
|
+
|
68
|
+
schema->release = NULL;
|
69
|
+
}
|
70
|
+
|
71
|
+
const char *ArrowSchemaFormatTemplate(enum ArrowType data_type) {
|
72
|
+
switch (data_type) {
|
73
|
+
case NANOARROW_TYPE_UNINITIALIZED:
|
74
|
+
return NULL;
|
75
|
+
case NANOARROW_TYPE_NA:
|
76
|
+
return "n";
|
77
|
+
case NANOARROW_TYPE_BOOL:
|
78
|
+
return "b";
|
79
|
+
|
80
|
+
case NANOARROW_TYPE_UINT8:
|
81
|
+
return "C";
|
82
|
+
case NANOARROW_TYPE_INT8:
|
83
|
+
return "c";
|
84
|
+
case NANOARROW_TYPE_UINT16:
|
85
|
+
return "S";
|
86
|
+
case NANOARROW_TYPE_INT16:
|
87
|
+
return "s";
|
88
|
+
case NANOARROW_TYPE_UINT32:
|
89
|
+
return "I";
|
90
|
+
case NANOARROW_TYPE_INT32:
|
91
|
+
return "i";
|
92
|
+
case NANOARROW_TYPE_UINT64:
|
93
|
+
return "L";
|
94
|
+
case NANOARROW_TYPE_INT64:
|
95
|
+
return "l";
|
96
|
+
|
97
|
+
case NANOARROW_TYPE_HALF_FLOAT:
|
98
|
+
return "e";
|
99
|
+
case NANOARROW_TYPE_FLOAT:
|
100
|
+
return "f";
|
101
|
+
case NANOARROW_TYPE_DOUBLE:
|
102
|
+
return "g";
|
103
|
+
|
104
|
+
case NANOARROW_TYPE_STRING:
|
105
|
+
return "u";
|
106
|
+
case NANOARROW_TYPE_LARGE_STRING:
|
107
|
+
return "U";
|
108
|
+
case NANOARROW_TYPE_BINARY:
|
109
|
+
return "z";
|
110
|
+
case NANOARROW_TYPE_LARGE_BINARY:
|
111
|
+
return "Z";
|
112
|
+
|
113
|
+
case NANOARROW_TYPE_DATE32:
|
114
|
+
return "tdD";
|
115
|
+
case NANOARROW_TYPE_DATE64:
|
116
|
+
return "tdm";
|
117
|
+
case NANOARROW_TYPE_INTERVAL_MONTHS:
|
118
|
+
return "tiM";
|
119
|
+
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
|
120
|
+
return "tiD";
|
121
|
+
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
|
122
|
+
return "tin";
|
123
|
+
|
124
|
+
case NANOARROW_TYPE_LIST:
|
125
|
+
return "+l";
|
126
|
+
case NANOARROW_TYPE_LARGE_LIST:
|
127
|
+
return "+L";
|
128
|
+
case NANOARROW_TYPE_STRUCT:
|
129
|
+
return "+s";
|
130
|
+
case NANOARROW_TYPE_MAP:
|
131
|
+
return "+m";
|
132
|
+
|
133
|
+
default:
|
134
|
+
return NULL;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
ArrowErrorCode ArrowSchemaInit(struct ArrowSchema *schema, enum ArrowType data_type) {
|
139
|
+
schema->format = NULL;
|
140
|
+
schema->name = NULL;
|
141
|
+
schema->metadata = NULL;
|
142
|
+
schema->flags = ARROW_FLAG_NULLABLE;
|
143
|
+
schema->n_children = 0;
|
144
|
+
schema->children = NULL;
|
145
|
+
schema->dictionary = NULL;
|
146
|
+
schema->private_data = NULL;
|
147
|
+
schema->release = &ArrowSchemaRelease;
|
148
|
+
|
149
|
+
// We don't allocate the dictionary because it has to be nullptr
|
150
|
+
// for non-dictionary-encoded arrays.
|
151
|
+
|
152
|
+
// Set the format to a valid format string for data_type
|
153
|
+
const char *template_format = ArrowSchemaFormatTemplate(data_type);
|
154
|
+
|
155
|
+
// If data_type isn't recognized and not explicitly unset
|
156
|
+
if (template_format == NULL && data_type != NANOARROW_TYPE_UNINITIALIZED) {
|
157
|
+
schema->release(schema);
|
158
|
+
return EINVAL;
|
159
|
+
}
|
160
|
+
|
161
|
+
int result = ArrowSchemaSetFormat(schema, template_format);
|
162
|
+
if (result != NANOARROW_OK) {
|
163
|
+
schema->release(schema);
|
164
|
+
return result;
|
165
|
+
}
|
166
|
+
|
167
|
+
return NANOARROW_OK;
|
168
|
+
}
|
169
|
+
|
170
|
+
ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema *schema, enum ArrowType data_type, int32_t fixed_size) {
|
171
|
+
int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
|
172
|
+
if (result != NANOARROW_OK) {
|
173
|
+
return result;
|
174
|
+
}
|
175
|
+
|
176
|
+
if (fixed_size <= 0) {
|
177
|
+
schema->release(schema);
|
178
|
+
return EINVAL;
|
179
|
+
}
|
180
|
+
|
181
|
+
char buffer[64];
|
182
|
+
int n_chars;
|
183
|
+
switch (data_type) {
|
184
|
+
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
|
185
|
+
n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
|
186
|
+
break;
|
187
|
+
case NANOARROW_TYPE_FIXED_SIZE_LIST:
|
188
|
+
n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
|
189
|
+
break;
|
190
|
+
default:
|
191
|
+
schema->release(schema);
|
192
|
+
return EINVAL;
|
193
|
+
}
|
194
|
+
|
195
|
+
buffer[n_chars] = '\0';
|
196
|
+
result = ArrowSchemaSetFormat(schema, buffer);
|
197
|
+
if (result != NANOARROW_OK) {
|
198
|
+
schema->release(schema);
|
199
|
+
}
|
200
|
+
|
201
|
+
return result;
|
202
|
+
}
|
203
|
+
|
204
|
+
ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema *schema, enum ArrowType data_type, int32_t decimal_precision,
|
205
|
+
int32_t decimal_scale) {
|
206
|
+
int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
|
207
|
+
if (result != NANOARROW_OK) {
|
208
|
+
return result;
|
209
|
+
}
|
210
|
+
|
211
|
+
if (decimal_precision <= 0) {
|
212
|
+
schema->release(schema);
|
213
|
+
return EINVAL;
|
214
|
+
}
|
215
|
+
|
216
|
+
char buffer[64];
|
217
|
+
int n_chars;
|
218
|
+
switch (data_type) {
|
219
|
+
case NANOARROW_TYPE_DECIMAL128:
|
220
|
+
n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
|
221
|
+
break;
|
222
|
+
case NANOARROW_TYPE_DECIMAL256:
|
223
|
+
n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, decimal_scale);
|
224
|
+
break;
|
225
|
+
default:
|
226
|
+
schema->release(schema);
|
227
|
+
return EINVAL;
|
228
|
+
}
|
229
|
+
|
230
|
+
buffer[n_chars] = '\0';
|
231
|
+
|
232
|
+
result = ArrowSchemaSetFormat(schema, buffer);
|
233
|
+
if (result != NANOARROW_OK) {
|
234
|
+
schema->release(schema);
|
235
|
+
return result;
|
236
|
+
}
|
237
|
+
|
238
|
+
return NANOARROW_OK;
|
239
|
+
}
|
240
|
+
|
241
|
+
static const char *ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
|
242
|
+
switch (time_unit) {
|
243
|
+
case NANOARROW_TIME_UNIT_SECOND:
|
244
|
+
return "s";
|
245
|
+
case NANOARROW_TIME_UNIT_MILLI:
|
246
|
+
return "m";
|
247
|
+
case NANOARROW_TIME_UNIT_MICRO:
|
248
|
+
return "u";
|
249
|
+
case NANOARROW_TIME_UNIT_NANO:
|
250
|
+
return "n";
|
251
|
+
default:
|
252
|
+
return NULL;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema *schema, enum ArrowType data_type,
|
257
|
+
enum ArrowTimeUnit time_unit, const char *timezone) {
|
258
|
+
int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
|
259
|
+
if (result != NANOARROW_OK) {
|
260
|
+
return result;
|
261
|
+
}
|
262
|
+
|
263
|
+
const char *time_unit_str = ArrowTimeUnitString(time_unit);
|
264
|
+
if (time_unit_str == NULL) {
|
265
|
+
schema->release(schema);
|
266
|
+
return EINVAL;
|
267
|
+
}
|
268
|
+
|
269
|
+
char buffer[128];
|
270
|
+
int n_chars;
|
271
|
+
switch (data_type) {
|
272
|
+
case NANOARROW_TYPE_TIME32:
|
273
|
+
case NANOARROW_TYPE_TIME64:
|
274
|
+
if (timezone != NULL) {
|
275
|
+
schema->release(schema);
|
276
|
+
return EINVAL;
|
277
|
+
}
|
278
|
+
n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
|
279
|
+
break;
|
280
|
+
case NANOARROW_TYPE_TIMESTAMP:
|
281
|
+
if (timezone == NULL) {
|
282
|
+
timezone = "";
|
283
|
+
}
|
284
|
+
n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone);
|
285
|
+
break;
|
286
|
+
case NANOARROW_TYPE_DURATION:
|
287
|
+
if (timezone != NULL) {
|
288
|
+
schema->release(schema);
|
289
|
+
return EINVAL;
|
290
|
+
}
|
291
|
+
n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
|
292
|
+
break;
|
293
|
+
default:
|
294
|
+
schema->release(schema);
|
295
|
+
return EINVAL;
|
296
|
+
}
|
297
|
+
|
298
|
+
if (static_cast<size_t>(n_chars) >= sizeof(buffer)) {
|
299
|
+
schema->release(schema);
|
300
|
+
return ERANGE;
|
301
|
+
}
|
302
|
+
|
303
|
+
buffer[n_chars] = '\0';
|
304
|
+
|
305
|
+
result = ArrowSchemaSetFormat(schema, buffer);
|
306
|
+
if (result != NANOARROW_OK) {
|
307
|
+
schema->release(schema);
|
308
|
+
return result;
|
309
|
+
}
|
310
|
+
|
311
|
+
return NANOARROW_OK;
|
312
|
+
}
|
313
|
+
|
314
|
+
ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema *schema, const char *format) {
|
315
|
+
if (schema->format != NULL) {
|
316
|
+
ArrowFree((void *)schema->format);
|
317
|
+
}
|
318
|
+
|
319
|
+
if (format != NULL) {
|
320
|
+
size_t format_size = strlen(format) + 1;
|
321
|
+
schema->format = (const char *)ArrowMalloc(format_size);
|
322
|
+
if (schema->format == NULL) {
|
323
|
+
return ENOMEM;
|
324
|
+
}
|
325
|
+
|
326
|
+
memcpy((void *)schema->format, format, format_size);
|
327
|
+
} else {
|
328
|
+
schema->format = NULL;
|
329
|
+
}
|
330
|
+
|
331
|
+
return NANOARROW_OK;
|
332
|
+
}
|
333
|
+
|
334
|
+
ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema *schema, const char *name) {
|
335
|
+
if (schema->name != NULL) {
|
336
|
+
ArrowFree((void *)schema->name);
|
337
|
+
}
|
338
|
+
|
339
|
+
if (name != NULL) {
|
340
|
+
size_t name_size = strlen(name) + 1;
|
341
|
+
schema->name = (const char *)ArrowMalloc(name_size);
|
342
|
+
if (schema->name == NULL) {
|
343
|
+
return ENOMEM;
|
344
|
+
}
|
345
|
+
|
346
|
+
memcpy((void *)schema->name, name, name_size);
|
347
|
+
} else {
|
348
|
+
schema->name = NULL;
|
349
|
+
}
|
350
|
+
|
351
|
+
return NANOARROW_OK;
|
352
|
+
}
|
353
|
+
|
354
|
+
ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema *schema, const char *metadata) {
|
355
|
+
if (schema->metadata != NULL) {
|
356
|
+
ArrowFree((void *)schema->metadata);
|
357
|
+
}
|
358
|
+
|
359
|
+
if (metadata != NULL) {
|
360
|
+
size_t metadata_size = ArrowMetadataSizeOf(metadata);
|
361
|
+
schema->metadata = (const char *)ArrowMalloc(metadata_size);
|
362
|
+
if (schema->metadata == NULL) {
|
363
|
+
return ENOMEM;
|
364
|
+
}
|
365
|
+
|
366
|
+
memcpy((void *)schema->metadata, metadata, metadata_size);
|
367
|
+
} else {
|
368
|
+
schema->metadata = NULL;
|
369
|
+
}
|
370
|
+
|
371
|
+
return NANOARROW_OK;
|
372
|
+
}
|
373
|
+
|
374
|
+
ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema *schema, int64_t n_children) {
|
375
|
+
if (schema->children != NULL) {
|
376
|
+
return EEXIST;
|
377
|
+
}
|
378
|
+
|
379
|
+
if (n_children > 0) {
|
380
|
+
schema->children = (struct ArrowSchema **)ArrowMalloc(n_children * sizeof(struct ArrowSchema *));
|
381
|
+
|
382
|
+
if (schema->children == NULL) {
|
383
|
+
return ENOMEM;
|
384
|
+
}
|
385
|
+
|
386
|
+
schema->n_children = n_children;
|
387
|
+
|
388
|
+
memset(schema->children, 0, n_children * sizeof(struct ArrowSchema *));
|
389
|
+
|
390
|
+
for (int64_t i = 0; i < n_children; i++) {
|
391
|
+
schema->children[i] = (struct ArrowSchema *)ArrowMalloc(sizeof(struct ArrowSchema));
|
392
|
+
|
393
|
+
if (schema->children[i] == NULL) {
|
394
|
+
return ENOMEM;
|
395
|
+
}
|
396
|
+
|
397
|
+
schema->children[i]->release = NULL;
|
398
|
+
}
|
399
|
+
}
|
400
|
+
|
401
|
+
return NANOARROW_OK;
|
402
|
+
}
|
403
|
+
|
404
|
+
ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema *schema) {
|
405
|
+
if (schema->dictionary != NULL) {
|
406
|
+
return EEXIST;
|
407
|
+
}
|
408
|
+
|
409
|
+
schema->dictionary = (struct ArrowSchema *)ArrowMalloc(sizeof(struct ArrowSchema));
|
410
|
+
if (schema->dictionary == NULL) {
|
411
|
+
return ENOMEM;
|
412
|
+
}
|
413
|
+
|
414
|
+
schema->dictionary->release = NULL;
|
415
|
+
return NANOARROW_OK;
|
416
|
+
}
|
417
|
+
|
418
|
+
int ArrowSchemaDeepCopy(struct ArrowSchema *schema, struct ArrowSchema *schema_out) {
|
419
|
+
int result;
|
420
|
+
result = ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA);
|
421
|
+
if (result != NANOARROW_OK) {
|
422
|
+
return result;
|
423
|
+
}
|
424
|
+
|
425
|
+
result = ArrowSchemaSetFormat(schema_out, schema->format);
|
426
|
+
if (result != NANOARROW_OK) {
|
427
|
+
schema_out->release(schema_out);
|
428
|
+
return result;
|
429
|
+
}
|
430
|
+
|
431
|
+
result = ArrowSchemaSetName(schema_out, schema->name);
|
432
|
+
if (result != NANOARROW_OK) {
|
433
|
+
schema_out->release(schema_out);
|
434
|
+
return result;
|
435
|
+
}
|
436
|
+
|
437
|
+
result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
|
438
|
+
if (result != NANOARROW_OK) {
|
439
|
+
schema_out->release(schema_out);
|
440
|
+
return result;
|
441
|
+
}
|
442
|
+
|
443
|
+
result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
|
444
|
+
if (result != NANOARROW_OK) {
|
445
|
+
schema_out->release(schema_out);
|
446
|
+
return result;
|
447
|
+
}
|
448
|
+
|
449
|
+
for (int64_t i = 0; i < schema->n_children; i++) {
|
450
|
+
result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
|
451
|
+
if (result != NANOARROW_OK) {
|
452
|
+
schema_out->release(schema_out);
|
453
|
+
return result;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
if (schema->dictionary != NULL) {
|
458
|
+
result = ArrowSchemaAllocateDictionary(schema_out);
|
459
|
+
if (result != NANOARROW_OK) {
|
460
|
+
schema_out->release(schema_out);
|
461
|
+
return result;
|
462
|
+
}
|
463
|
+
|
464
|
+
result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
|
465
|
+
if (result != NANOARROW_OK) {
|
466
|
+
schema_out->release(schema_out);
|
467
|
+
return result;
|
468
|
+
}
|
469
|
+
}
|
470
|
+
|
471
|
+
return NANOARROW_OK;
|
472
|
+
}
|
473
|
+
|
474
|
+
} // namespace duckdb_nanoarrow
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include "duckdb/common/adbc/single_batch_array_stream.hpp"
|
2
|
+
#include "duckdb/common/arrow/nanoarrow/nanoarrow.h"
|
3
|
+
#include "duckdb/common/adbc/adbc.hpp"
|
4
|
+
|
5
|
+
#include "duckdb.h"
|
6
|
+
#include "duckdb/common/arrow/arrow_wrapper.hpp"
|
7
|
+
#include "duckdb/common/arrow/nanoarrow/nanoarrow.hpp"
|
8
|
+
|
9
|
+
#include <errno.h>
|
10
|
+
#include <stdarg.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
13
|
+
#include <string.h>
|
14
|
+
|
15
|
+
namespace duckdb_adbc {
|
16
|
+
|
17
|
+
using duckdb_nanoarrow::ArrowSchemaDeepCopy;
|
18
|
+
|
19
|
+
static const char *SingleBatchArrayStreamGetLastError(struct ArrowArrayStream *stream) {
|
20
|
+
return NULL;
|
21
|
+
}
|
22
|
+
|
23
|
+
static int SingleBatchArrayStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *batch) {
|
24
|
+
if (!stream || !stream->private_data) {
|
25
|
+
return EINVAL;
|
26
|
+
}
|
27
|
+
struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
|
28
|
+
|
29
|
+
memcpy(batch, &impl->batch, sizeof(*batch));
|
30
|
+
memset(&impl->batch, 0, sizeof(*batch));
|
31
|
+
return 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
static int SingleBatchArrayStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *schema) {
|
35
|
+
if (!stream || !stream->private_data) {
|
36
|
+
return EINVAL;
|
37
|
+
}
|
38
|
+
struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
|
39
|
+
|
40
|
+
return ArrowSchemaDeepCopy(&impl->schema, schema);
|
41
|
+
}
|
42
|
+
|
43
|
+
static void SingleBatchArrayStreamRelease(struct ArrowArrayStream *stream) {
|
44
|
+
if (!stream || !stream->private_data) {
|
45
|
+
return;
|
46
|
+
}
|
47
|
+
struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)stream->private_data;
|
48
|
+
impl->schema.release(&impl->schema);
|
49
|
+
if (impl->batch.release) {
|
50
|
+
impl->batch.release(&impl->batch);
|
51
|
+
}
|
52
|
+
free(impl);
|
53
|
+
|
54
|
+
memset(stream, 0, sizeof(*stream));
|
55
|
+
}
|
56
|
+
|
57
|
+
AdbcStatusCode BatchToArrayStream(struct ArrowArray *values, struct ArrowSchema *schema,
|
58
|
+
struct ArrowArrayStream *stream, struct AdbcError *error) {
|
59
|
+
if (!values->release) {
|
60
|
+
SetError(error, "ArrowArray is not initialized");
|
61
|
+
return ADBC_STATUS_INTERNAL;
|
62
|
+
} else if (!schema->release) {
|
63
|
+
SetError(error, "ArrowSchema is not initialized");
|
64
|
+
return ADBC_STATUS_INTERNAL;
|
65
|
+
} else if (stream->release) {
|
66
|
+
SetError(error, "ArrowArrayStream is already initialized");
|
67
|
+
return ADBC_STATUS_INTERNAL;
|
68
|
+
}
|
69
|
+
|
70
|
+
struct SingleBatchArrayStream *impl = (struct SingleBatchArrayStream *)malloc(sizeof(*impl));
|
71
|
+
memcpy(&impl->schema, schema, sizeof(*schema));
|
72
|
+
memcpy(&impl->batch, values, sizeof(*values));
|
73
|
+
memset(schema, 0, sizeof(*schema));
|
74
|
+
memset(values, 0, sizeof(*values));
|
75
|
+
stream->private_data = impl;
|
76
|
+
stream->get_last_error = SingleBatchArrayStreamGetLastError;
|
77
|
+
stream->get_next = SingleBatchArrayStreamGetNext;
|
78
|
+
stream->get_schema = SingleBatchArrayStreamGetSchema;
|
79
|
+
stream->release = SingleBatchArrayStreamRelease;
|
80
|
+
|
81
|
+
return ADBC_STATUS_OK;
|
82
|
+
}
|
83
|
+
|
84
|
+
} // namespace duckdb_adbc
|
@@ -43,6 +43,7 @@ struct DuckDBArrowSchemaHolder {
|
|
43
43
|
std::list<vector<ArrowSchema *>> nested_children_ptr;
|
44
44
|
//! This holds strings created to represent decimal types
|
45
45
|
vector<unsafe_unique_array<char>> owned_type_names;
|
46
|
+
vector<unsafe_unique_array<char>> owned_column_names;
|
46
47
|
};
|
47
48
|
|
48
49
|
static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) {
|
@@ -59,7 +60,7 @@ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, c
|
|
59
60
|
child.private_data = nullptr;
|
60
61
|
child.release = ReleaseDuckDBArrowSchema;
|
61
62
|
|
62
|
-
|
63
|
+
// Store the child schema
|
63
64
|
child.flags = ARROW_FLAG_NULLABLE;
|
64
65
|
root_holder.owned_type_names.push_back(AddName(name));
|
65
66
|
|
@@ -69,6 +70,7 @@ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, c
|
|
69
70
|
child.metadata = nullptr;
|
70
71
|
child.dictionary = nullptr;
|
71
72
|
}
|
73
|
+
|
72
74
|
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
73
75
|
const ClientProperties &options);
|
74
76
|
|
@@ -309,7 +311,7 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<Logical
|
|
309
311
|
|
310
312
|
// Configure all child schemas
|
311
313
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
312
|
-
|
314
|
+
root_holder->owned_column_names.push_back(AddName(names[col_idx]));
|
313
315
|
auto &child = root_holder->children[col_idx];
|
314
316
|
InitializeChild(child, *root_holder, names[col_idx]);
|
315
317
|
SetArrowFormat(*root_holder, child, types[col_idx], options);
|
@@ -550,6 +550,12 @@ Value MultiFileReaderOptions::GetHivePartitionValue(const string &base, const st
|
|
550
550
|
if (it == hive_types_schema.end()) {
|
551
551
|
return value;
|
552
552
|
}
|
553
|
+
|
554
|
+
// Handle nulls
|
555
|
+
if (base.empty() || StringUtil::CIEquals(base, "NULL")) {
|
556
|
+
return Value(it->second);
|
557
|
+
}
|
558
|
+
|
553
559
|
if (!value.TryCastAs(context, it->second)) {
|
554
560
|
throw InvalidInputException("Unable to cast '%s' (from hive partition column '%s') to: '%s'", value.ToString(),
|
555
561
|
StringUtil::Upper(it->first), it->second.ToString());
|
@@ -1163,9 +1163,9 @@ void WindowLeadLagExecutor::EvaluateInternal(WindowExecutorState &lstate, Vector
|
|
1163
1163
|
}
|
1164
1164
|
int64_t val_idx = (int64_t)row_idx;
|
1165
1165
|
if (wexpr.type == ExpressionType::WINDOW_LEAD) {
|
1166
|
-
val_idx
|
1166
|
+
val_idx = AddOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(val_idx, offset);
|
1167
1167
|
} else {
|
1168
|
-
val_idx
|
1168
|
+
val_idx = SubtractOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(val_idx, offset);
|
1169
1169
|
}
|
1170
1170
|
|
1171
1171
|
idx_t delta = 0;
|
@@ -1200,10 +1200,9 @@ void WindowFirstValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vec
|
|
1200
1200
|
auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
|
1201
1201
|
auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
|
1202
1202
|
auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
|
1203
|
-
auto &rmask = FlatVector::Validity(result);
|
1204
1203
|
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1205
1204
|
if (window_begin[i] >= window_end[i]) {
|
1206
|
-
|
1205
|
+
FlatVector::SetNull(result, i, true);
|
1207
1206
|
continue;
|
1208
1207
|
}
|
1209
1208
|
// Same as NTH_VALUE(..., 1)
|
@@ -1228,10 +1227,9 @@ void WindowLastValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vect
|
|
1228
1227
|
auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
|
1229
1228
|
auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
|
1230
1229
|
auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
|
1231
|
-
auto &rmask = FlatVector::Validity(result);
|
1232
1230
|
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1233
1231
|
if (window_begin[i] >= window_end[i]) {
|
1234
|
-
|
1232
|
+
FlatVector::SetNull(result, i, true);
|
1235
1233
|
continue;
|
1236
1234
|
}
|
1237
1235
|
idx_t n = 1;
|
@@ -1257,10 +1255,9 @@ void WindowNthValueExecutor::EvaluateInternal(WindowExecutorState &lstate, Vecto
|
|
1257
1255
|
auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
|
1258
1256
|
auto window_begin = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_BEGIN]);
|
1259
1257
|
auto window_end = FlatVector::GetData<const idx_t>(lbstate.bounds.data[WINDOW_END]);
|
1260
|
-
auto &rmask = FlatVector::Validity(result);
|
1261
1258
|
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1262
1259
|
if (window_begin[i] >= window_end[i]) {
|
1263
|
-
|
1260
|
+
FlatVector::SetNull(result, i, true);
|
1264
1261
|
continue;
|
1265
1262
|
}
|
1266
1263
|
// Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev3007"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "dd7f0c0870"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -84,10 +84,10 @@ AdbcStatusCode StatementExecutePartitions(struct AdbcStatement *statement, struc
|
|
84
84
|
struct AdbcPartitions *partitions, int64_t *rows_affected,
|
85
85
|
struct AdbcError *error);
|
86
86
|
|
87
|
+
//! This method should only be called when the string is guaranteed to not be NULL
|
87
88
|
void SetError(struct AdbcError *error, const std::string &message);
|
89
|
+
void SetError(struct AdbcError *error, const char *message);
|
88
90
|
|
89
|
-
void
|
90
|
-
|
91
|
-
AdbcStatusCode SetErrorMaybe(const void *result, AdbcError *error, const std::string &error_message);
|
91
|
+
void InitializeADBCError(AdbcError *error);
|
92
92
|
|
93
93
|
} // namespace duckdb_adbc
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/arrow.hpp"
|
4
|
+
#include "duckdb/common/adbc/adbc.h"
|
5
|
+
|
6
|
+
namespace duckdb_adbc {
|
7
|
+
|
8
|
+
struct SingleBatchArrayStream {
|
9
|
+
struct ArrowSchema schema;
|
10
|
+
struct ArrowArray batch;
|
11
|
+
};
|
12
|
+
|
13
|
+
AdbcStatusCode BatchToArrayStream(struct ArrowArray *values, struct ArrowSchema *schema,
|
14
|
+
struct ArrowArrayStream *stream, struct AdbcError *error);
|
15
|
+
|
16
|
+
} // namespace duckdb_adbc
|