duckdb 0.8.2-dev2044.0 → 0.8.2-dev2090.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/binding.gyp +1 -0
  2. package/configure.py +7 -2
  3. package/package.json +1 -1
  4. package/src/duckdb/src/common/adbc/adbc.cpp +18 -4
  5. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  6. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  7. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  8. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  9. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  10. package/src/duckdb/src/common/arrow/arrow_appender.cpp +89 -727
  11. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +2 -1
  12. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  13. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  14. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  15. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  16. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  17. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  18. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  19. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  20. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  21. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  22. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  23. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +5 -0
  24. package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
  25. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -3
  26. package/src/duckdb/src/parallel/executor.cpp +1 -1
  27. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -4
  28. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +3 -76
  29. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
  30. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
  31. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
package/binding.gyp CHANGED
@@ -14,6 +14,7 @@
14
14
  "src/duckdb/ub_src_catalog_default.cpp",
15
15
  "src/duckdb/ub_src_common_adbc.cpp",
16
16
  "src/duckdb/ub_src_common.cpp",
17
+ "src/duckdb/ub_src_common_arrow_appender.cpp",
17
18
  "src/duckdb/ub_src_common_arrow.cpp",
18
19
  "src/duckdb/ub_src_common_crypto.cpp",
19
20
  "src/duckdb/ub_src_common_enums.cpp",
package/configure.py CHANGED
@@ -31,6 +31,7 @@ if os.environ.get('DUCKDB_NODE_BUILD_CACHE') == '1' and os.path.isfile(cache_fil
31
31
  windows_options = cache['windows_options']
32
32
  cflags = cache['cflags']
33
33
  elif 'DUCKDB_NODE_BINDIR' in os.environ:
34
+
34
35
  def find_library_path(libdir, libname):
35
36
  flist = os.listdir(libdir)
36
37
  for fname in flist:
@@ -38,6 +39,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
38
39
  if os.path.isfile(fpath) and package_build.file_is_lib(fname, libname):
39
40
  return fpath
40
41
  raise Exception(f"Failed to find library {libname} in {libdir}")
42
+
41
43
  # existing build
42
44
  existing_duckdb_dir = os.environ['DUCKDB_NODE_BINDIR']
43
45
  cflags = os.environ['DUCKDB_NODE_CFLAGS']
@@ -48,7 +50,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
48
50
 
49
51
  result_libraries = package_build.get_libraries(existing_duckdb_dir, libraries, extensions)
50
52
  libraries = []
51
- for (libdir, libname) in result_libraries:
53
+ for libdir, libname in result_libraries:
52
54
  if libdir is None:
53
55
  continue
54
56
  libraries.append(find_library_path(libdir, libname))
@@ -72,7 +74,7 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
72
74
  'include_list': include_list,
73
75
  'libraries': libraries,
74
76
  'cflags': cflags,
75
- 'windows_options': windows_options
77
+ 'windows_options': windows_options,
76
78
  }
77
79
  with open(cache_file, 'wb+') as f:
78
80
  pickle.dump(cache, f)
@@ -90,9 +92,11 @@ else:
90
92
  windows_options = ['/GR']
91
93
  cflags = ['-frtti']
92
94
 
95
+
93
96
  def sanitize_path(x):
94
97
  return x.replace('\\', '/')
95
98
 
99
+
96
100
  source_list = [sanitize_path(x) for x in source_list]
97
101
  include_list = [sanitize_path(x) for x in include_list]
98
102
  libraries = [sanitize_path(x) for x in libraries]
@@ -100,6 +104,7 @@ libraries = [sanitize_path(x) for x in libraries]
100
104
  with open(gyp_in, 'r') as f:
101
105
  input_json = json.load(f)
102
106
 
107
+
103
108
  def replace_entries(node, replacement_map):
104
109
  if type(node) == type([]):
105
110
  for key in replacement_map.keys():
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev2044.0",
5
+ "version": "0.8.2-dev2090.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -52,12 +52,14 @@ duckdb_adbc::AdbcStatusCode duckdb_adbc_init(size_t count, struct duckdb_adbc::A
52
52
 
53
53
  namespace duckdb_adbc {
54
54
 
55
+ enum class IngestionMode { CREATE = 0, APPEND = 1 };
55
56
  struct DuckDBAdbcStatementWrapper {
56
57
  ::duckdb_connection connection;
57
58
  ::duckdb_arrow result;
58
59
  ::duckdb_prepared_statement statement;
59
60
  char *ingestion_table_name;
60
61
  ArrowArrayStream *ingestion_stream;
62
+ IngestionMode ingestion_mode = IngestionMode::CREATE;
61
63
  };
62
64
  static AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArrayStream *out, const char *query,
63
65
  struct AdbcError *error);
@@ -428,7 +430,7 @@ void stream_schema(uintptr_t factory_ptr, duckdb::ArrowSchemaWrapper &schema) {
428
430
  }
429
431
 
430
432
  AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, struct ArrowArrayStream *input,
431
- struct AdbcError *error) {
433
+ struct AdbcError *error, IngestionMode ingestion_mode) {
432
434
 
433
435
  auto status = SetErrorMaybe(connection, error, "Invalid connection");
434
436
  if (status != ADBC_STATUS_OK) {
@@ -446,12 +448,11 @@ AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, stru
446
448
  }
447
449
  auto cconn = (duckdb::Connection *)connection;
448
450
 
449
- auto has_table = cconn->TableInfo(table_name);
450
451
  auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
451
452
  duckdb::Value::POINTER((uintptr_t)stream_produce),
452
453
  duckdb::Value::POINTER((uintptr_t)get_schema)});
453
454
  try {
454
- if (!has_table) {
455
+ if (ingestion_mode == IngestionMode::CREATE) {
455
456
  // We create the table based on an Arrow Scanner
456
457
  arrow_scan->Create(table_name);
457
458
  } else {
@@ -505,6 +506,7 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem
505
506
  statement_wrapper->result = nullptr;
506
507
  statement_wrapper->ingestion_stream = nullptr;
507
508
  statement_wrapper->ingestion_table_name = nullptr;
509
+ statement_wrapper->ingestion_mode = IngestionMode::CREATE;
508
510
  return ADBC_STATUS_OK;
509
511
  }
510
512
 
@@ -557,7 +559,7 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
557
559
  if (wrapper->ingestion_stream && wrapper->ingestion_table_name) {
558
560
  auto stream = wrapper->ingestion_stream;
559
561
  wrapper->ingestion_stream = nullptr;
560
- return Ingest(wrapper->connection, wrapper->ingestion_table_name, stream, error);
562
+ return Ingest(wrapper->connection, wrapper->ingestion_table_name, stream, error, wrapper->ingestion_mode);
561
563
  }
562
564
 
563
565
  auto res = duckdb_execute_prepared_arrow(wrapper->statement, &wrapper->result);
@@ -643,6 +645,18 @@ AdbcStatusCode StatementSetOption(struct AdbcStatement *statement, const char *k
643
645
  wrapper->ingestion_table_name = strdup(value);
644
646
  return ADBC_STATUS_OK;
645
647
  }
648
+ if (strcmp(key, ADBC_INGEST_OPTION_MODE) == 0) {
649
+ if (strcmp(value, ADBC_INGEST_OPTION_MODE_CREATE) == 0) {
650
+ wrapper->ingestion_mode = IngestionMode::CREATE;
651
+ return ADBC_STATUS_OK;
652
+ } else if (strcmp(value, ADBC_INGEST_OPTION_MODE_APPEND) == 0) {
653
+ wrapper->ingestion_mode = IngestionMode::APPEND;
654
+ return ADBC_STATUS_OK;
655
+ } else {
656
+ SetError(error, "Invalid ingestion mode");
657
+ return ADBC_STATUS_INVALID_ARGUMENT;
658
+ }
659
+ }
646
660
  return ADBC_STATUS_INVALID_ARGUMENT;
647
661
  }
648
662
 
@@ -0,0 +1,44 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/bool_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ void ArrowBoolData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
7
+ auto byte_count = (capacity + 7) / 8;
8
+ result.main_buffer.reserve(byte_count);
9
+ }
10
+
11
+ void ArrowBoolData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
12
+ idx_t size = to - from;
13
+ UnifiedVectorFormat format;
14
+ input.ToUnifiedFormat(input_size, format);
15
+
16
+ // we initialize both the validity and the bit set to 1's
17
+ ResizeValidity(append_data.validity, append_data.row_count + size);
18
+ ResizeValidity(append_data.main_buffer, append_data.row_count + size);
19
+ auto data = UnifiedVectorFormat::GetData<bool>(format);
20
+
21
+ auto result_data = append_data.main_buffer.GetData<uint8_t>();
22
+ auto validity_data = append_data.validity.GetData<uint8_t>();
23
+ uint8_t current_bit;
24
+ idx_t current_byte;
25
+ GetBitPosition(append_data.row_count, current_byte, current_bit);
26
+ for (idx_t i = from; i < to; i++) {
27
+ auto source_idx = format.sel->get_index(i);
28
+ // append the validity mask
29
+ if (!format.validity.RowIsValid(source_idx)) {
30
+ SetNull(append_data, validity_data, current_byte, current_bit);
31
+ } else if (!data[source_idx]) {
32
+ UnsetBit(result_data, current_byte, current_bit);
33
+ }
34
+ NextBit(current_byte, current_bit);
35
+ }
36
+ append_data.row_count += size;
37
+ }
38
+
39
+ void ArrowBoolData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
40
+ result->n_buffers = 2;
41
+ result->buffers[1] = append_data.main_buffer.data();
42
+ }
43
+
44
+ } // namespace duckdb
@@ -0,0 +1,78 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/list_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Lists
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowListData::AppendOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
10
+ vector<sel_t> &child_sel) {
11
+ // resize the offset buffer - the offset buffer holds the offsets into the child array
12
+ idx_t size = to - from;
13
+ append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
14
+ auto data = UnifiedVectorFormat::GetData<list_entry_t>(format);
15
+ auto offset_data = append_data.main_buffer.GetData<uint32_t>();
16
+ if (append_data.row_count == 0) {
17
+ // first entry
18
+ offset_data[0] = 0;
19
+ }
20
+ // set up the offsets using the list entries
21
+ auto last_offset = offset_data[append_data.row_count];
22
+ for (idx_t i = from; i < to; i++) {
23
+ auto source_idx = format.sel->get_index(i);
24
+ auto offset_idx = append_data.row_count + i + 1 - from;
25
+
26
+ if (!format.validity.RowIsValid(source_idx)) {
27
+ offset_data[offset_idx] = last_offset;
28
+ continue;
29
+ }
30
+
31
+ // append the offset data
32
+ auto list_length = data[source_idx].length;
33
+ last_offset += list_length;
34
+ offset_data[offset_idx] = last_offset;
35
+
36
+ for (idx_t k = 0; k < list_length; k++) {
37
+ child_sel.push_back(data[source_idx].offset + k);
38
+ }
39
+ }
40
+ }
41
+
42
+ void ArrowListData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
43
+ auto &child_type = ListType::GetChildType(type);
44
+ result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
45
+ auto child_buffer = ArrowAppender::InitializeChild(child_type, capacity, result.options);
46
+ result.child_data.push_back(std::move(child_buffer));
47
+ }
48
+
49
+ void ArrowListData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
50
+ UnifiedVectorFormat format;
51
+ input.ToUnifiedFormat(input_size, format);
52
+ idx_t size = to - from;
53
+ vector<sel_t> child_indices;
54
+ AppendValidity(append_data, format, from, to);
55
+ ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
56
+
57
+ // append the child vector of the list
58
+ SelectionVector child_sel(child_indices.data());
59
+ auto &child = ListVector::GetEntry(input);
60
+ auto child_size = child_indices.size();
61
+ Vector child_copy(child.GetType());
62
+ child_copy.Slice(child, child_sel, child_size);
63
+ append_data.child_data[0]->append_vector(*append_data.child_data[0], child_copy, 0, child_size, child_size);
64
+ append_data.row_count += size;
65
+ }
66
+
67
+ void ArrowListData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
68
+ result->n_buffers = 2;
69
+ result->buffers[1] = append_data.main_buffer.data();
70
+
71
+ auto &child_type = ListType::GetChildType(type);
72
+ append_data.child_pointers.resize(1);
73
+ result->children = append_data.child_pointers.data();
74
+ result->n_children = 1;
75
+ append_data.child_pointers[0] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[0]);
76
+ }
77
+
78
+ } // namespace duckdb
@@ -0,0 +1,86 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/map_data.hpp"
3
+ #include "duckdb/common/arrow/appender/list_data.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ //===--------------------------------------------------------------------===//
8
+ // Maps
9
+ //===--------------------------------------------------------------------===//
10
+ void ArrowMapData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
11
+ // map types are stored in a (too) clever way
12
+ // the main buffer holds the null values and the offsets
13
+ // then we have a single child, which is a struct of the map_type, and the key_type
14
+ result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
15
+
16
+ auto &key_type = MapType::KeyType(type);
17
+ auto &value_type = MapType::ValueType(type);
18
+ auto internal_struct = make_uniq<ArrowAppendData>(result.options);
19
+ internal_struct->child_data.push_back(ArrowAppender::InitializeChild(key_type, capacity, result.options));
20
+ internal_struct->child_data.push_back(ArrowAppender::InitializeChild(value_type, capacity, result.options));
21
+
22
+ result.child_data.push_back(std::move(internal_struct));
23
+ }
24
+
25
+ void ArrowMapData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
26
+ UnifiedVectorFormat format;
27
+ input.ToUnifiedFormat(input_size, format);
28
+ idx_t size = to - from;
29
+ AppendValidity(append_data, format, from, to);
30
+ vector<sel_t> child_indices;
31
+ ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
32
+
33
+ SelectionVector child_sel(child_indices.data());
34
+ auto &key_vector = MapVector::GetKeys(input);
35
+ auto &value_vector = MapVector::GetValues(input);
36
+ auto list_size = child_indices.size();
37
+
38
+ auto &struct_data = *append_data.child_data[0];
39
+ auto &key_data = *struct_data.child_data[0];
40
+ auto &value_data = *struct_data.child_data[1];
41
+
42
+ Vector key_vector_copy(key_vector.GetType());
43
+ key_vector_copy.Slice(key_vector, child_sel, list_size);
44
+ Vector value_vector_copy(value_vector.GetType());
45
+ value_vector_copy.Slice(value_vector, child_sel, list_size);
46
+ key_data.append_vector(key_data, key_vector_copy, 0, list_size, list_size);
47
+ value_data.append_vector(value_data, value_vector_copy, 0, list_size, list_size);
48
+
49
+ append_data.row_count += size;
50
+ struct_data.row_count += size;
51
+ }
52
+
53
+ void ArrowMapData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
54
+ // set up the main map buffer
55
+ result->n_buffers = 2;
56
+ result->buffers[1] = append_data.main_buffer.data();
57
+
58
+ // the main map buffer has a single child: a struct
59
+ append_data.child_pointers.resize(1);
60
+ result->children = append_data.child_pointers.data();
61
+ result->n_children = 1;
62
+ append_data.child_pointers[0] = ArrowAppender::FinalizeChild(type, *append_data.child_data[0]);
63
+
64
+ // now that struct has two children: the key and the value type
65
+ auto &struct_data = *append_data.child_data[0];
66
+ auto &struct_result = append_data.child_pointers[0];
67
+ struct_data.child_pointers.resize(2);
68
+ struct_result->n_buffers = 1;
69
+ struct_result->n_children = 2;
70
+ struct_result->length = struct_data.child_data[0]->row_count;
71
+ struct_result->children = struct_data.child_pointers.data();
72
+
73
+ D_ASSERT(struct_data.child_data[0]->row_count == struct_data.child_data[1]->row_count);
74
+
75
+ auto &key_type = MapType::KeyType(type);
76
+ auto &value_type = MapType::ValueType(type);
77
+ struct_data.child_pointers[0] = ArrowAppender::FinalizeChild(key_type, *struct_data.child_data[0]);
78
+ struct_data.child_pointers[1] = ArrowAppender::FinalizeChild(value_type, *struct_data.child_data[1]);
79
+
80
+ // keys cannot have null values
81
+ if (struct_data.child_pointers[0]->null_count > 0) {
82
+ throw std::runtime_error("Arrow doesn't accept NULL keys on Maps");
83
+ }
84
+ }
85
+
86
+ } // namespace duckdb
@@ -0,0 +1,45 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/struct_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Structs
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowStructData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
10
+ auto &children = StructType::GetChildTypes(type);
11
+ for (auto &child : children) {
12
+ auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
13
+ result.child_data.push_back(std::move(child_buffer));
14
+ }
15
+ }
16
+
17
+ void ArrowStructData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
18
+ UnifiedVectorFormat format;
19
+ input.ToUnifiedFormat(input_size, format);
20
+ idx_t size = to - from;
21
+ AppendValidity(append_data, format, from, to);
22
+ // append the children of the struct
23
+ auto &children = StructVector::GetEntries(input);
24
+ for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
25
+ auto &child = children[child_idx];
26
+ auto &child_data = *append_data.child_data[child_idx];
27
+ child_data.append_vector(child_data, *child, from, to, size);
28
+ }
29
+ append_data.row_count += size;
30
+ }
31
+
32
+ void ArrowStructData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
33
+ result->n_buffers = 1;
34
+
35
+ auto &child_types = StructType::GetChildTypes(type);
36
+ append_data.child_pointers.resize(child_types.size());
37
+ result->children = append_data.child_pointers.data();
38
+ result->n_children = child_types.size();
39
+ for (idx_t i = 0; i < child_types.size(); i++) {
40
+ auto &child_type = child_types[i].second;
41
+ append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
42
+ }
43
+ }
44
+
45
+ } // namespace duckdb
@@ -0,0 +1,70 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/union_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Unions
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowUnionData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
10
+ result.main_buffer.reserve(capacity * sizeof(int8_t));
11
+
12
+ for (auto &child : UnionType::CopyMemberTypes(type)) {
13
+ auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
14
+ result.child_data.push_back(std::move(child_buffer));
15
+ }
16
+ }
17
+
18
+ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
19
+ UnifiedVectorFormat format;
20
+ input.ToUnifiedFormat(input_size, format);
21
+ idx_t size = to - from;
22
+
23
+ auto &types_buffer = append_data.main_buffer;
24
+
25
+ duckdb::vector<Vector> child_vectors;
26
+ for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
27
+ child_vectors.emplace_back(child.second);
28
+ }
29
+
30
+ for (idx_t input_idx = from; input_idx < to; input_idx++) {
31
+ const auto &val = input.GetValue(input_idx);
32
+
33
+ idx_t tag = 0;
34
+ Value resolved_value(nullptr);
35
+ if (!val.IsNull()) {
36
+ tag = UnionValue::GetTag(val);
37
+
38
+ resolved_value = UnionValue::GetValue(val);
39
+ }
40
+
41
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
42
+ child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr));
43
+ }
44
+
45
+ types_buffer.data()[input_idx] = tag;
46
+ }
47
+
48
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
49
+ auto &child_buffer = append_data.child_data[child_idx];
50
+ auto &child = child_vectors[child_idx];
51
+ child_buffer->append_vector(*child_buffer, child, from, to, size);
52
+ }
53
+ append_data.row_count += size;
54
+ }
55
+
56
+ void ArrowUnionData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
57
+ result->n_buffers = 2;
58
+ result->buffers[1] = append_data.main_buffer.data();
59
+
60
+ auto &child_types = UnionType::CopyMemberTypes(type);
61
+ append_data.child_pointers.resize(child_types.size());
62
+ result->children = append_data.child_pointers.data();
63
+ result->n_children = child_types.size();
64
+ for (idx_t i = 0; i < child_types.size(); i++) {
65
+ auto &child_type = child_types[i].second;
66
+ append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
67
+ }
68
+ }
69
+
70
+ } // namespace duckdb