duckdb 0.8.2-dev2068.0 → 0.8.2-dev2090.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  4. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  5. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  6. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  7. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  8. package/src/duckdb/src/common/arrow/arrow_appender.cpp +89 -727
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +2 -1
  10. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  11. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  12. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  13. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  14. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  15. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  16. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  17. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  18. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  19. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  20. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  21. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +5 -0
  22. package/src/duckdb/src/parallel/executor.cpp +1 -1
  23. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
package/binding.gyp CHANGED
@@ -14,6 +14,7 @@
14
14
  "src/duckdb/ub_src_catalog_default.cpp",
15
15
  "src/duckdb/ub_src_common_adbc.cpp",
16
16
  "src/duckdb/ub_src_common.cpp",
17
+ "src/duckdb/ub_src_common_arrow_appender.cpp",
17
18
  "src/duckdb/ub_src_common_arrow.cpp",
18
19
  "src/duckdb/ub_src_common_crypto.cpp",
19
20
  "src/duckdb/ub_src_common_enums.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev2068.0",
5
+ "version": "0.8.2-dev2090.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -0,0 +1,44 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/bool_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ void ArrowBoolData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
7
+ auto byte_count = (capacity + 7) / 8;
8
+ result.main_buffer.reserve(byte_count);
9
+ }
10
+
11
+ void ArrowBoolData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
12
+ idx_t size = to - from;
13
+ UnifiedVectorFormat format;
14
+ input.ToUnifiedFormat(input_size, format);
15
+
16
+ // we initialize both the validity and the bit set to 1's
17
+ ResizeValidity(append_data.validity, append_data.row_count + size);
18
+ ResizeValidity(append_data.main_buffer, append_data.row_count + size);
19
+ auto data = UnifiedVectorFormat::GetData<bool>(format);
20
+
21
+ auto result_data = append_data.main_buffer.GetData<uint8_t>();
22
+ auto validity_data = append_data.validity.GetData<uint8_t>();
23
+ uint8_t current_bit;
24
+ idx_t current_byte;
25
+ GetBitPosition(append_data.row_count, current_byte, current_bit);
26
+ for (idx_t i = from; i < to; i++) {
27
+ auto source_idx = format.sel->get_index(i);
28
+ // append the validity mask
29
+ if (!format.validity.RowIsValid(source_idx)) {
30
+ SetNull(append_data, validity_data, current_byte, current_bit);
31
+ } else if (!data[source_idx]) {
32
+ UnsetBit(result_data, current_byte, current_bit);
33
+ }
34
+ NextBit(current_byte, current_bit);
35
+ }
36
+ append_data.row_count += size;
37
+ }
38
+
39
+ void ArrowBoolData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
40
+ result->n_buffers = 2;
41
+ result->buffers[1] = append_data.main_buffer.data();
42
+ }
43
+
44
+ } // namespace duckdb
@@ -0,0 +1,78 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/list_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Lists
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowListData::AppendOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
10
+ vector<sel_t> &child_sel) {
11
+ // resize the offset buffer - the offset buffer holds the offsets into the child array
12
+ idx_t size = to - from;
13
+ append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
14
+ auto data = UnifiedVectorFormat::GetData<list_entry_t>(format);
15
+ auto offset_data = append_data.main_buffer.GetData<uint32_t>();
16
+ if (append_data.row_count == 0) {
17
+ // first entry
18
+ offset_data[0] = 0;
19
+ }
20
+ // set up the offsets using the list entries
21
+ auto last_offset = offset_data[append_data.row_count];
22
+ for (idx_t i = from; i < to; i++) {
23
+ auto source_idx = format.sel->get_index(i);
24
+ auto offset_idx = append_data.row_count + i + 1 - from;
25
+
26
+ if (!format.validity.RowIsValid(source_idx)) {
27
+ offset_data[offset_idx] = last_offset;
28
+ continue;
29
+ }
30
+
31
+ // append the offset data
32
+ auto list_length = data[source_idx].length;
33
+ last_offset += list_length;
34
+ offset_data[offset_idx] = last_offset;
35
+
36
+ for (idx_t k = 0; k < list_length; k++) {
37
+ child_sel.push_back(data[source_idx].offset + k);
38
+ }
39
+ }
40
+ }
41
+
42
+ void ArrowListData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
43
+ auto &child_type = ListType::GetChildType(type);
44
+ result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
45
+ auto child_buffer = ArrowAppender::InitializeChild(child_type, capacity, result.options);
46
+ result.child_data.push_back(std::move(child_buffer));
47
+ }
48
+
49
+ void ArrowListData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
50
+ UnifiedVectorFormat format;
51
+ input.ToUnifiedFormat(input_size, format);
52
+ idx_t size = to - from;
53
+ vector<sel_t> child_indices;
54
+ AppendValidity(append_data, format, from, to);
55
+ ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
56
+
57
+ // append the child vector of the list
58
+ SelectionVector child_sel(child_indices.data());
59
+ auto &child = ListVector::GetEntry(input);
60
+ auto child_size = child_indices.size();
61
+ Vector child_copy(child.GetType());
62
+ child_copy.Slice(child, child_sel, child_size);
63
+ append_data.child_data[0]->append_vector(*append_data.child_data[0], child_copy, 0, child_size, child_size);
64
+ append_data.row_count += size;
65
+ }
66
+
67
+ void ArrowListData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
68
+ result->n_buffers = 2;
69
+ result->buffers[1] = append_data.main_buffer.data();
70
+
71
+ auto &child_type = ListType::GetChildType(type);
72
+ append_data.child_pointers.resize(1);
73
+ result->children = append_data.child_pointers.data();
74
+ result->n_children = 1;
75
+ append_data.child_pointers[0] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[0]);
76
+ }
77
+
78
+ } // namespace duckdb
@@ -0,0 +1,86 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/map_data.hpp"
3
+ #include "duckdb/common/arrow/appender/list_data.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ //===--------------------------------------------------------------------===//
8
+ // Maps
9
+ //===--------------------------------------------------------------------===//
10
+ void ArrowMapData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
11
+ // map types are stored in a (too) clever way
12
+ // the main buffer holds the null values and the offsets
13
+ // then we have a single child, which is a struct of the map_type, and the key_type
14
+ result.main_buffer.reserve((capacity + 1) * sizeof(uint32_t));
15
+
16
+ auto &key_type = MapType::KeyType(type);
17
+ auto &value_type = MapType::ValueType(type);
18
+ auto internal_struct = make_uniq<ArrowAppendData>(result.options);
19
+ internal_struct->child_data.push_back(ArrowAppender::InitializeChild(key_type, capacity, result.options));
20
+ internal_struct->child_data.push_back(ArrowAppender::InitializeChild(value_type, capacity, result.options));
21
+
22
+ result.child_data.push_back(std::move(internal_struct));
23
+ }
24
+
25
+ void ArrowMapData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
26
+ UnifiedVectorFormat format;
27
+ input.ToUnifiedFormat(input_size, format);
28
+ idx_t size = to - from;
29
+ AppendValidity(append_data, format, from, to);
30
+ vector<sel_t> child_indices;
31
+ ArrowListData::AppendOffsets(append_data, format, from, to, child_indices);
32
+
33
+ SelectionVector child_sel(child_indices.data());
34
+ auto &key_vector = MapVector::GetKeys(input);
35
+ auto &value_vector = MapVector::GetValues(input);
36
+ auto list_size = child_indices.size();
37
+
38
+ auto &struct_data = *append_data.child_data[0];
39
+ auto &key_data = *struct_data.child_data[0];
40
+ auto &value_data = *struct_data.child_data[1];
41
+
42
+ Vector key_vector_copy(key_vector.GetType());
43
+ key_vector_copy.Slice(key_vector, child_sel, list_size);
44
+ Vector value_vector_copy(value_vector.GetType());
45
+ value_vector_copy.Slice(value_vector, child_sel, list_size);
46
+ key_data.append_vector(key_data, key_vector_copy, 0, list_size, list_size);
47
+ value_data.append_vector(value_data, value_vector_copy, 0, list_size, list_size);
48
+
49
+ append_data.row_count += size;
50
+ struct_data.row_count += size;
51
+ }
52
+
53
+ void ArrowMapData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
54
+ // set up the main map buffer
55
+ result->n_buffers = 2;
56
+ result->buffers[1] = append_data.main_buffer.data();
57
+
58
+ // the main map buffer has a single child: a struct
59
+ append_data.child_pointers.resize(1);
60
+ result->children = append_data.child_pointers.data();
61
+ result->n_children = 1;
62
+ append_data.child_pointers[0] = ArrowAppender::FinalizeChild(type, *append_data.child_data[0]);
63
+
64
+ // now that struct has two children: the key and the value type
65
+ auto &struct_data = *append_data.child_data[0];
66
+ auto &struct_result = append_data.child_pointers[0];
67
+ struct_data.child_pointers.resize(2);
68
+ struct_result->n_buffers = 1;
69
+ struct_result->n_children = 2;
70
+ struct_result->length = struct_data.child_data[0]->row_count;
71
+ struct_result->children = struct_data.child_pointers.data();
72
+
73
+ D_ASSERT(struct_data.child_data[0]->row_count == struct_data.child_data[1]->row_count);
74
+
75
+ auto &key_type = MapType::KeyType(type);
76
+ auto &value_type = MapType::ValueType(type);
77
+ struct_data.child_pointers[0] = ArrowAppender::FinalizeChild(key_type, *struct_data.child_data[0]);
78
+ struct_data.child_pointers[1] = ArrowAppender::FinalizeChild(value_type, *struct_data.child_data[1]);
79
+
80
+ // keys cannot have null values
81
+ if (struct_data.child_pointers[0]->null_count > 0) {
82
+ throw std::runtime_error("Arrow doesn't accept NULL keys on Maps");
83
+ }
84
+ }
85
+
86
+ } // namespace duckdb
@@ -0,0 +1,45 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/struct_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Structs
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowStructData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
10
+ auto &children = StructType::GetChildTypes(type);
11
+ for (auto &child : children) {
12
+ auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
13
+ result.child_data.push_back(std::move(child_buffer));
14
+ }
15
+ }
16
+
17
+ void ArrowStructData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
18
+ UnifiedVectorFormat format;
19
+ input.ToUnifiedFormat(input_size, format);
20
+ idx_t size = to - from;
21
+ AppendValidity(append_data, format, from, to);
22
+ // append the children of the struct
23
+ auto &children = StructVector::GetEntries(input);
24
+ for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
25
+ auto &child = children[child_idx];
26
+ auto &child_data = *append_data.child_data[child_idx];
27
+ child_data.append_vector(child_data, *child, from, to, size);
28
+ }
29
+ append_data.row_count += size;
30
+ }
31
+
32
+ void ArrowStructData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
33
+ result->n_buffers = 1;
34
+
35
+ auto &child_types = StructType::GetChildTypes(type);
36
+ append_data.child_pointers.resize(child_types.size());
37
+ result->children = append_data.child_pointers.data();
38
+ result->n_children = child_types.size();
39
+ for (idx_t i = 0; i < child_types.size(); i++) {
40
+ auto &child_type = child_types[i].second;
41
+ append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
42
+ }
43
+ }
44
+
45
+ } // namespace duckdb
@@ -0,0 +1,70 @@
1
+ #include "duckdb/common/arrow/arrow_appender.hpp"
2
+ #include "duckdb/common/arrow/appender/union_data.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ //===--------------------------------------------------------------------===//
7
+ // Unions
8
+ //===--------------------------------------------------------------------===//
9
+ void ArrowUnionData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
10
+ result.main_buffer.reserve(capacity * sizeof(int8_t));
11
+
12
+ for (auto &child : UnionType::CopyMemberTypes(type)) {
13
+ auto child_buffer = ArrowAppender::InitializeChild(child.second, capacity, result.options);
14
+ result.child_data.push_back(std::move(child_buffer));
15
+ }
16
+ }
17
+
18
+ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
19
+ UnifiedVectorFormat format;
20
+ input.ToUnifiedFormat(input_size, format);
21
+ idx_t size = to - from;
22
+
23
+ auto &types_buffer = append_data.main_buffer;
24
+
25
+ duckdb::vector<Vector> child_vectors;
26
+ for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
27
+ child_vectors.emplace_back(child.second);
28
+ }
29
+
30
+ for (idx_t input_idx = from; input_idx < to; input_idx++) {
31
+ const auto &val = input.GetValue(input_idx);
32
+
33
+ idx_t tag = 0;
34
+ Value resolved_value(nullptr);
35
+ if (!val.IsNull()) {
36
+ tag = UnionValue::GetTag(val);
37
+
38
+ resolved_value = UnionValue::GetValue(val);
39
+ }
40
+
41
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
42
+ child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr));
43
+ }
44
+
45
+ types_buffer.data()[input_idx] = tag;
46
+ }
47
+
48
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
49
+ auto &child_buffer = append_data.child_data[child_idx];
50
+ auto &child = child_vectors[child_idx];
51
+ child_buffer->append_vector(*child_buffer, child, from, to, size);
52
+ }
53
+ append_data.row_count += size;
54
+ }
55
+
56
+ void ArrowUnionData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
57
+ result->n_buffers = 2;
58
+ result->buffers[1] = append_data.main_buffer.data();
59
+
60
+ auto &child_types = UnionType::CopyMemberTypes(type);
61
+ append_data.child_pointers.resize(child_types.size());
62
+ result->children = append_data.child_pointers.data();
63
+ result->n_children = child_types.size();
64
+ for (idx_t i = 0; i < child_types.size(); i++) {
65
+ auto &child_type = child_types[i].second;
66
+ append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
67
+ }
68
+ }
69
+
70
+ } // namespace duckdb