duckdb 0.8.2-dev2044.0 → 0.8.2-dev2090.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/configure.py +7 -2
- package/package.json +1 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -4
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +89 -727
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -3
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -4
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +3 -76
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
- package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
package/binding.gyp
CHANGED
@@ -14,6 +14,7 @@
|
|
14
14
|
"src/duckdb/ub_src_catalog_default.cpp",
|
15
15
|
"src/duckdb/ub_src_common_adbc.cpp",
|
16
16
|
"src/duckdb/ub_src_common.cpp",
|
17
|
+
"src/duckdb/ub_src_common_arrow_appender.cpp",
|
17
18
|
"src/duckdb/ub_src_common_arrow.cpp",
|
18
19
|
"src/duckdb/ub_src_common_crypto.cpp",
|
19
20
|
"src/duckdb/ub_src_common_enums.cpp",
|
package/configure.py
CHANGED
@@ -31,6 +31,7 @@ if os.environ.get('DUCKDB_NODE_BUILD_CACHE') == '1' and os.path.isfile(cache_fil
|
|
31
31
|
windows_options = cache['windows_options']
|
32
32
|
cflags = cache['cflags']
|
33
33
|
elif 'DUCKDB_NODE_BINDIR' in os.environ:
|
34
|
+
|
34
35
|
def find_library_path(libdir, libname):
|
35
36
|
flist = os.listdir(libdir)
|
36
37
|
for fname in flist:
|
@@ -38,6 +39,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
|
|
38
39
|
if os.path.isfile(fpath) and package_build.file_is_lib(fname, libname):
|
39
40
|
return fpath
|
40
41
|
raise Exception(f"Failed to find library {libname} in {libdir}")
|
42
|
+
|
41
43
|
# existing build
|
42
44
|
existing_duckdb_dir = os.environ['DUCKDB_NODE_BINDIR']
|
43
45
|
cflags = os.environ['DUCKDB_NODE_CFLAGS']
|
@@ -48,7 +50,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
|
|
48
50
|
|
49
51
|
result_libraries = package_build.get_libraries(existing_duckdb_dir, libraries, extensions)
|
50
52
|
libraries = []
|
51
|
-
for
|
53
|
+
for libdir, libname in result_libraries:
|
52
54
|
if libdir is None:
|
53
55
|
continue
|
54
56
|
libraries.append(find_library_path(libdir, libname))
|
@@ -72,7 +74,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
|
|
72
74
|
'include_list': include_list,
|
73
75
|
'libraries': libraries,
|
74
76
|
'cflags': cflags,
|
75
|
-
'windows_options': windows_options
|
77
|
+
'windows_options': windows_options,
|
76
78
|
}
|
77
79
|
with open(cache_file, 'wb+') as f:
|
78
80
|
pickle.dump(cache, f)
|
@@ -90,9 +92,11 @@ else:
|
|
90
92
|
windows_options = ['/GR']
|
91
93
|
cflags = ['-frtti']
|
92
94
|
|
95
|
+
|
93
96
|
def sanitize_path(x):
|
94
97
|
return x.replace('\\', '/')
|
95
98
|
|
99
|
+
|
96
100
|
source_list = [sanitize_path(x) for x in source_list]
|
97
101
|
include_list = [sanitize_path(x) for x in include_list]
|
98
102
|
libraries = [sanitize_path(x) for x in libraries]
|
@@ -100,6 +104,7 @@ libraries = [sanitize_path(x) for x in libraries]
|
|
100
104
|
with open(gyp_in, 'r') as f:
|
101
105
|
input_json = json.load(f)
|
102
106
|
|
107
|
+
|
103
108
|
def replace_entries(node, replacement_map):
|
104
109
|
if type(node) == type([]):
|
105
110
|
for key in replacement_map.keys():
|
package/package.json
CHANGED
@@ -52,12 +52,14 @@ duckdb_adbc::AdbcStatusCode duckdb_adbc_init(size_t count, struct duckdb_adbc::A
|
|
52
52
|
|
53
53
|
namespace duckdb_adbc {
|
54
54
|
|
55
|
+
enum class IngestionMode { CREATE = 0, APPEND = 1 };
|
55
56
|
struct DuckDBAdbcStatementWrapper {
|
56
57
|
::duckdb_connection connection;
|
57
58
|
::duckdb_arrow result;
|
58
59
|
::duckdb_prepared_statement statement;
|
59
60
|
char *ingestion_table_name;
|
60
61
|
ArrowArrayStream *ingestion_stream;
|
62
|
+
IngestionMode ingestion_mode = IngestionMode::CREATE;
|
61
63
|
};
|
62
64
|
static AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArrayStream *out, const char *query,
|
63
65
|
struct AdbcError *error);
|
@@ -428,7 +430,7 @@ void stream_schema(uintptr_t factory_ptr, duckdb::ArrowSchemaWrapper &schema) {
|
|
428
430
|
}
|
429
431
|
|
430
432
|
AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, struct ArrowArrayStream *input,
|
431
|
-
struct AdbcError *error) {
|
433
|
+
struct AdbcError *error, IngestionMode ingestion_mode) {
|
432
434
|
|
433
435
|
auto status = SetErrorMaybe(connection, error, "Invalid connection");
|
434
436
|
if (status != ADBC_STATUS_OK) {
|
@@ -446,12 +448,11 @@ AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, stru
|
|
446
448
|
}
|
447
449
|
auto cconn = (duckdb::Connection *)connection;
|
448
450
|
|
449
|
-
auto has_table = cconn->TableInfo(table_name);
|
450
451
|
auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
|
451
452
|
duckdb::Value::POINTER((uintptr_t)stream_produce),
|
452
453
|
duckdb::Value::POINTER((uintptr_t)get_schema)});
|
453
454
|
try {
|
454
|
-
if (
|
455
|
+
if (ingestion_mode == IngestionMode::CREATE) {
|
455
456
|
// We create the table based on an Arrow Scanner
|
456
457
|
arrow_scan->Create(table_name);
|
457
458
|
} else {
|
@@ -505,6 +506,7 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem
|
|
505
506
|
statement_wrapper->result = nullptr;
|
506
507
|
statement_wrapper->ingestion_stream = nullptr;
|
507
508
|
statement_wrapper->ingestion_table_name = nullptr;
|
509
|
+
statement_wrapper->ingestion_mode = IngestionMode::CREATE;
|
508
510
|
return ADBC_STATUS_OK;
|
509
511
|
}
|
510
512
|
|
@@ -557,7 +559,7 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
|
|
557
559
|
if (wrapper->ingestion_stream && wrapper->ingestion_table_name) {
|
558
560
|
auto stream = wrapper->ingestion_stream;
|
559
561
|
wrapper->ingestion_stream = nullptr;
|
560
|
-
return Ingest(wrapper->connection, wrapper->ingestion_table_name, stream, error);
|
562
|
+
return Ingest(wrapper->connection, wrapper->ingestion_table_name, stream, error, wrapper->ingestion_mode);
|
561
563
|
}
|
562
564
|
|
563
565
|
auto res = duckdb_execute_prepared_arrow(wrapper->statement, &wrapper->result);
|
@@ -643,6 +645,18 @@ AdbcStatusCode StatementSetOption(struct AdbcStatement *statement, const char *k
|
|
643
645
|
wrapper->ingestion_table_name = strdup(value);
|
644
646
|
return ADBC_STATUS_OK;
|
645
647
|
}
|
648
|
+
if (strcmp(key, ADBC_INGEST_OPTION_MODE) == 0) {
|
649
|
+
if (strcmp(value, ADBC_INGEST_OPTION_MODE_CREATE) == 0) {
|
650
|
+
wrapper->ingestion_mode = IngestionMode::CREATE;
|
651
|
+
return ADBC_STATUS_OK;
|
652
|
+
} else if (strcmp(value, ADBC_INGEST_OPTION_MODE_APPEND) == 0) {
|
653
|
+
wrapper->ingestion_mode = IngestionMode::APPEND;
|
654
|
+
return ADBC_STATUS_OK;
|
655
|
+
} else {
|
656
|
+
SetError(error, "Invalid ingestion mode");
|
657
|
+
return ADBC_STATUS_INVALID_ARGUMENT;
|
658
|
+
}
|
659
|
+
}
|
646
660
|
return ADBC_STATUS_INVALID_ARGUMENT;
|
647
661
|
}
|
648
662
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/bool_data.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
void ArrowBoolData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
7
|
+
auto byte_count = (capacity + 7) / 8;
|
8
|
+
result.main_buffer.reserve(byte_count);
|
9
|
+
}
|
10
|
+
|
11
|
+
void ArrowBoolData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
12
|
+
idx_t size = to - from;
|
13
|
+
UnifiedVectorFormat format;
|
14
|
+
input.ToUnifiedFormat(input_size, format);
|
15
|
+
|
16
|
+
// we initialize both the validity and the bit set to 1's
|
17
|
+
ResizeValidity(append_data.validity, append_data.row_count + size);
|
18
|
+
ResizeValidity(append_data.main_buffer, append_data.row_count + size);
|
19
|
+
auto data = UnifiedVectorFormat::GetData<bool>(format);
|
20
|
+
|
21
|
+
auto result_data = append_data.main_buffer.GetData<uint8_t>();
|
22
|
+
auto validity_data = append_data.validity.GetData<uint8_t>();
|
23
|
+
uint8_t current_bit;
|
24
|
+
idx_t current_byte;
|
25
|
+
GetBitPosition(append_data.row_count, current_byte, current_bit);
|
26
|
+
for (idx_t i = from; i < to; i++) {
|
27
|
+
auto source_idx = format.sel->get_index(i);
|
28
|
+
// append the validity mask
|
29
|
+
if (!format.validity.RowIsValid(source_idx)) {
|
30
|
+
SetNull(append_data, validity_data, current_byte, current_bit);
|
31
|
+
} else if (!data[source_idx]) {
|
32
|
+
UnsetBit(result_data, current_byte, current_bit);
|
33
|
+
}
|
34
|
+
NextBit(current_byte, current_bit);
|
35
|
+
}
|
36
|
+
append_data.row_count += size;
|
37
|
+
}
|
38
|
+
|
39
|
+
void ArrowBoolData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
40
|
+
result->n_buffers = 2;
|
41
|
+
result->buffers[1] = append_data.main_buffer.data();
|
42
|
+
}
|
43
|
+
|
44
|
+
} // namespace duckdb
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/list_data.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
//===--------------------------------------------------------------------===//
|
7
|
+
// Lists
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
void ArrowListData::AppendOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
|
10
|
+
vector<sel_t> &child_sel) {
|
11
|
+
// resize the offset buffer - the offset buffer holds the offsets into the child array
|
12
|
+
idx_t size = to - from;
|
13
|
+
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
|
14
|
+
auto data = UnifiedVectorFormat::GetData<list_entry_t>(format);
|
15
|
+
auto offset_data = append_data.main_buffer.GetData<uint32_t>();
|
16
|
+
if (append_data.row_count == 0) {
|
17
|
+
// first entry
|
18
|
+
offset_data[0] = 0;
|
19
|
+
}
|
20
|
+
// set up the offsets using the list entries
|
21
|
+
auto last_offset = offset_data[append_data.row_count];
|
22
|
+
for (idx_t i = from; i < to; i++) {
|
23
|
+
auto source_idx = format.sel->get_index(i);
|
24
|
+
auto offset_idx = append_data.row_count + i + 1 - from;
|
25
|
+
|
26
|
+
if (!format.validity.RowIsValid(source_idx)) {
|
27
|
+
offset_data[offset_idx] = last_offset;
|
28
|
+
continue;
|
29
|
+
}
|
30
|
+
|
31
|
+
// append the offset data
|
32
|
+
auto list_length = data[source_idx].length;
|
33
|
+
last_offset += list_length;
|
34
|
+
offset_data[offset_idx] = last_offset;
|
35
|
+
|
36
|
+
for (idx_t k = 0; k < list_length; k++) {
|
37
|
+
child_sel.push_back(data[source_idx].offset + k);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
void ArrowListData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
43
|
+
auto &child_type = ListType::GetChildType(type);
|
44
|
+
result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
|
45
|
+
auto child_buffer = ArrowAppender::InitializeChild(child_type, capacity, result.options);
|
46
|
+
result.child_data.push_back(std::move(child_buffer));
|
47
|
+
}
|
48
|
+
|
49
|
+
void ArrowListData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
50
|
+
UnifiedVectorFormat format;
|
51
|
+
input.ToUnifiedFormat(input_size, format);
|
52
|
+
idx_t size = to - from;
|
53
|
+
vector<sel_t> child_indices;
|
54
|
+
AppendValidity(append_data, format, from, to);
|
55
|
+
ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
|
56
|
+
|
57
|
+
// append the child vector of the list
|
58
|
+
SelectionVector child_sel(child_indices.data());
|
59
|
+
auto &child = ListVector::GetEntry(input);
|
60
|
+
auto child_size = child_indices.size();
|
61
|
+
Vector child_copy(child.GetType());
|
62
|
+
child_copy.Slice(child, child_sel, child_size);
|
63
|
+
append_data.child_data[0]->append_vector(*append_data.child_data[0], child_copy, 0, child_size, child_size);
|
64
|
+
append_data.row_count += size;
|
65
|
+
}
|
66
|
+
|
67
|
+
void ArrowListData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
68
|
+
result->n_buffers = 2;
|
69
|
+
result->buffers[1] = append_data.main_buffer.data();
|
70
|
+
|
71
|
+
auto &child_type = ListType::GetChildType(type);
|
72
|
+
append_data.child_pointers.resize(1);
|
73
|
+
result->children = append_data.child_pointers.data();
|
74
|
+
result->n_children = 1;
|
75
|
+
append_data.child_pointers[0] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[0]);
|
76
|
+
}
|
77
|
+
|
78
|
+
} // namespace duckdb
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/map_data.hpp"
|
3
|
+
#include "duckdb/common/arrow/appender/list_data.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
//===--------------------------------------------------------------------===//
|
8
|
+
// Maps
|
9
|
+
//===--------------------------------------------------------------------===//
|
10
|
+
void ArrowMapData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
11
|
+
// map types are stored in a (too) clever way
|
12
|
+
// the main buffer holds the null values and the offsets
|
13
|
+
// then we have a single child, which is a struct of the map_type, and the key_type
|
14
|
+
result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
|
15
|
+
|
16
|
+
auto &key_type = MapType::KeyType(type);
|
17
|
+
auto &value_type = MapType::ValueType(type);
|
18
|
+
auto internal_struct = make_uniq<ArrowAppendData>(result.options);
|
19
|
+
internal_struct->child_data.push_back(ArrowAppender::InitializeChild(key_type, capacity, result.options));
|
20
|
+
internal_struct->child_data.push_back(ArrowAppender::InitializeChild(value_type, capacity, result.options));
|
21
|
+
|
22
|
+
result.child_data.push_back(std::move(internal_struct));
|
23
|
+
}
|
24
|
+
|
25
|
+
void ArrowMapData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
26
|
+
UnifiedVectorFormat format;
|
27
|
+
input.ToUnifiedFormat(input_size, format);
|
28
|
+
idx_t size = to - from;
|
29
|
+
AppendValidity(append_data, format, from, to);
|
30
|
+
vector<sel_t> child_indices;
|
31
|
+
ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
|
32
|
+
|
33
|
+
SelectionVector child_sel(child_indices.data());
|
34
|
+
auto &key_vector = MapVector::GetKeys(input);
|
35
|
+
auto &value_vector = MapVector::GetValues(input);
|
36
|
+
auto list_size = child_indices.size();
|
37
|
+
|
38
|
+
auto &struct_data = *append_data.child_data[0];
|
39
|
+
auto &key_data = *struct_data.child_data[0];
|
40
|
+
auto &value_data = *struct_data.child_data[1];
|
41
|
+
|
42
|
+
Vector key_vector_copy(key_vector.GetType());
|
43
|
+
key_vector_copy.Slice(key_vector, child_sel, list_size);
|
44
|
+
Vector value_vector_copy(value_vector.GetType());
|
45
|
+
value_vector_copy.Slice(value_vector, child_sel, list_size);
|
46
|
+
key_data.append_vector(key_data, key_vector_copy, 0, list_size, list_size);
|
47
|
+
value_data.append_vector(value_data, value_vector_copy, 0, list_size, list_size);
|
48
|
+
|
49
|
+
append_data.row_count += size;
|
50
|
+
struct_data.row_count += size;
|
51
|
+
}
|
52
|
+
|
53
|
+
void ArrowMapData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
54
|
+
// set up the main map buffer
|
55
|
+
result->n_buffers = 2;
|
56
|
+
result->buffers[1] = append_data.main_buffer.data();
|
57
|
+
|
58
|
+
// the main map buffer has a single child: a struct
|
59
|
+
append_data.child_pointers.resize(1);
|
60
|
+
result->children = append_data.child_pointers.data();
|
61
|
+
result->n_children = 1;
|
62
|
+
append_data.child_pointers[0] = ArrowAppender::FinalizeChild(type, *append_data.child_data[0]);
|
63
|
+
|
64
|
+
// now that struct has two children: the key and the value type
|
65
|
+
auto &struct_data = *append_data.child_data[0];
|
66
|
+
auto &struct_result = append_data.child_pointers[0];
|
67
|
+
struct_data.child_pointers.resize(2);
|
68
|
+
struct_result->n_buffers = 1;
|
69
|
+
struct_result->n_children = 2;
|
70
|
+
struct_result->length = struct_data.child_data[0]->row_count;
|
71
|
+
struct_result->children = struct_data.child_pointers.data();
|
72
|
+
|
73
|
+
D_ASSERT(struct_data.child_data[0]->row_count == struct_data.child_data[1]->row_count);
|
74
|
+
|
75
|
+
auto &key_type = MapType::KeyType(type);
|
76
|
+
auto &value_type = MapType::ValueType(type);
|
77
|
+
struct_data.child_pointers[0] = ArrowAppender::FinalizeChild(key_type, *struct_data.child_data[0]);
|
78
|
+
struct_data.child_pointers[1] = ArrowAppender::FinalizeChild(value_type, *struct_data.child_data[1]);
|
79
|
+
|
80
|
+
// keys cannot have null values
|
81
|
+
if (struct_data.child_pointers[0]->null_count > 0) {
|
82
|
+
throw std::runtime_error("Arrow doesn't accept NULL keys on Maps");
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
} // namespace duckdb
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/struct_data.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
//===--------------------------------------------------------------------===//
|
7
|
+
// Structs
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
void ArrowStructData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
10
|
+
auto &children = StructType::GetChildTypes(type);
|
11
|
+
for (auto &child : children) {
|
12
|
+
auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
|
13
|
+
result.child_data.push_back(std::move(child_buffer));
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
void ArrowStructData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
18
|
+
UnifiedVectorFormat format;
|
19
|
+
input.ToUnifiedFormat(input_size, format);
|
20
|
+
idx_t size = to - from;
|
21
|
+
AppendValidity(append_data, format, from, to);
|
22
|
+
// append the children of the struct
|
23
|
+
auto &children = StructVector::GetEntries(input);
|
24
|
+
for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
|
25
|
+
auto &child = children[child_idx];
|
26
|
+
auto &child_data = *append_data.child_data[child_idx];
|
27
|
+
child_data.append_vector(child_data, *child, from, to, size);
|
28
|
+
}
|
29
|
+
append_data.row_count += size;
|
30
|
+
}
|
31
|
+
|
32
|
+
void ArrowStructData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
33
|
+
result->n_buffers = 1;
|
34
|
+
|
35
|
+
auto &child_types = StructType::GetChildTypes(type);
|
36
|
+
append_data.child_pointers.resize(child_types.size());
|
37
|
+
result->children = append_data.child_pointers.data();
|
38
|
+
result->n_children = child_types.size();
|
39
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
40
|
+
auto &child_type = child_types[i].second;
|
41
|
+
append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
} // namespace duckdb
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/union_data.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
//===--------------------------------------------------------------------===//
|
7
|
+
// Unions
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
void ArrowUnionData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
10
|
+
result.main_buffer.reserve(capacity * sizeof(int8_t));
|
11
|
+
|
12
|
+
for (auto &child : UnionType::CopyMemberTypes(type)) {
|
13
|
+
auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
|
14
|
+
result.child_data.push_back(std::move(child_buffer));
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
19
|
+
UnifiedVectorFormat format;
|
20
|
+
input.ToUnifiedFormat(input_size, format);
|
21
|
+
idx_t size = to - from;
|
22
|
+
|
23
|
+
auto &types_buffer = append_data.main_buffer;
|
24
|
+
|
25
|
+
duckdb::vector<Vector> child_vectors;
|
26
|
+
for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
|
27
|
+
child_vectors.emplace_back(child.second);
|
28
|
+
}
|
29
|
+
|
30
|
+
for (idx_t input_idx = from; input_idx < to; input_idx++) {
|
31
|
+
const auto &val = input.GetValue(input_idx);
|
32
|
+
|
33
|
+
idx_t tag = 0;
|
34
|
+
Value resolved_value(nullptr);
|
35
|
+
if (!val.IsNull()) {
|
36
|
+
tag = UnionValue::GetTag(val);
|
37
|
+
|
38
|
+
resolved_value = UnionValue::GetValue(val);
|
39
|
+
}
|
40
|
+
|
41
|
+
for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
|
42
|
+
child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr));
|
43
|
+
}
|
44
|
+
|
45
|
+
types_buffer.data()[input_idx] = tag;
|
46
|
+
}
|
47
|
+
|
48
|
+
for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
|
49
|
+
auto &child_buffer = append_data.child_data[child_idx];
|
50
|
+
auto &child = child_vectors[child_idx];
|
51
|
+
child_buffer->append_vector(*child_buffer, child, from, to, size);
|
52
|
+
}
|
53
|
+
append_data.row_count += size;
|
54
|
+
}
|
55
|
+
|
56
|
+
void ArrowUnionData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
57
|
+
result->n_buffers = 2;
|
58
|
+
result->buffers[1] = append_data.main_buffer.data();
|
59
|
+
|
60
|
+
auto &child_types = UnionType::CopyMemberTypes(type);
|
61
|
+
append_data.child_pointers.resize(child_types.size());
|
62
|
+
result->children = append_data.child_pointers.data();
|
63
|
+
result->n_children = child_types.size();
|
64
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
65
|
+
auto &child_type = child_types[i].second;
|
66
|
+
append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
} // namespace duckdb
|