duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/src/catalog/catalog.cpp +20 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +7 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
- package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,78 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
+
#include <cmath>
|
3
|
+
#include <algorithm>
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
template <class NUMERIC_TYPE>
|
8
|
+
static void ListCosineSimilarity(DataChunk &args, ExpressionState &, Vector &result) {
|
9
|
+
D_ASSERT(args.ColumnCount() == 2);
|
10
|
+
|
11
|
+
auto count = args.size();
|
12
|
+
auto &left = args.data[0];
|
13
|
+
auto &right = args.data[1];
|
14
|
+
auto left_count = ListVector::GetListSize(left);
|
15
|
+
auto right_count = ListVector::GetListSize(right);
|
16
|
+
|
17
|
+
auto &left_child = ListVector::GetEntry(left);
|
18
|
+
auto &right_child = ListVector::GetEntry(right);
|
19
|
+
|
20
|
+
D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
21
|
+
D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
22
|
+
|
23
|
+
if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
|
24
|
+
throw InvalidInputException("list_cosine_similarity: left argument can not contain NULL values");
|
25
|
+
}
|
26
|
+
|
27
|
+
if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
|
28
|
+
throw InvalidInputException("list_cosine_similarity: right argument can not contain NULL values");
|
29
|
+
}
|
30
|
+
|
31
|
+
auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
|
32
|
+
auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
|
33
|
+
|
34
|
+
BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
|
35
|
+
left, right, result, count, [&](list_entry_t left, list_entry_t right) {
|
36
|
+
if (left.length != right.length) {
|
37
|
+
throw InvalidInputException(StringUtil::Format(
|
38
|
+
"list_cosine_similarity: list dimensions must be equal, got left length %d and right length %d",
|
39
|
+
left.length, right.length));
|
40
|
+
}
|
41
|
+
|
42
|
+
auto dimensions = left.length;
|
43
|
+
|
44
|
+
NUMERIC_TYPE distance = 0;
|
45
|
+
NUMERIC_TYPE norm_l = 0;
|
46
|
+
NUMERIC_TYPE norm_r = 0;
|
47
|
+
|
48
|
+
auto l_ptr = left_data + left.offset;
|
49
|
+
auto r_ptr = right_data + right.offset;
|
50
|
+
for (idx_t i = 0; i < dimensions; i++) {
|
51
|
+
auto x = *l_ptr++;
|
52
|
+
auto y = *r_ptr++;
|
53
|
+
distance += x * y;
|
54
|
+
norm_l += x * x;
|
55
|
+
norm_r += y * y;
|
56
|
+
}
|
57
|
+
|
58
|
+
auto similarity = distance / (std::sqrt(norm_l) * std::sqrt(norm_r));
|
59
|
+
|
60
|
+
// clamp to [-1, 1] to avoid floating point errors
|
61
|
+
return std::max(static_cast<NUMERIC_TYPE>(-1), std::min(similarity, static_cast<NUMERIC_TYPE>(1)));
|
62
|
+
});
|
63
|
+
|
64
|
+
if (args.AllConstant()) {
|
65
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
ScalarFunctionSet ListCosineSimilarityFun::GetFunctions() {
|
70
|
+
ScalarFunctionSet set("list_cosine_similarity");
|
71
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
|
72
|
+
LogicalType::FLOAT, ListCosineSimilarity<float>));
|
73
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
|
74
|
+
LogicalType::DOUBLE, ListCosineSimilarity<double>));
|
75
|
+
return set;
|
76
|
+
}
|
77
|
+
|
78
|
+
} // namespace duckdb
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
+
#include <cmath>
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
template <class NUMERIC_TYPE>
|
7
|
+
static void ListDistance(DataChunk &args, ExpressionState &, Vector &result) {
|
8
|
+
D_ASSERT(args.ColumnCount() == 2);
|
9
|
+
|
10
|
+
auto count = args.size();
|
11
|
+
auto &left = args.data[0];
|
12
|
+
auto &right = args.data[1];
|
13
|
+
auto left_count = ListVector::GetListSize(left);
|
14
|
+
auto right_count = ListVector::GetListSize(right);
|
15
|
+
|
16
|
+
auto &left_child = ListVector::GetEntry(left);
|
17
|
+
auto &right_child = ListVector::GetEntry(right);
|
18
|
+
|
19
|
+
D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
20
|
+
D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
21
|
+
|
22
|
+
if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
|
23
|
+
throw InvalidInputException("list_distance: left argument can not contain NULL values");
|
24
|
+
}
|
25
|
+
|
26
|
+
if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
|
27
|
+
throw InvalidInputException("list_distance: right argument can not contain NULL values");
|
28
|
+
}
|
29
|
+
|
30
|
+
auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
|
31
|
+
auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
|
32
|
+
|
33
|
+
BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
|
34
|
+
left, right, result, count, [&](list_entry_t left, list_entry_t right) {
|
35
|
+
if (left.length != right.length) {
|
36
|
+
throw InvalidInputException(StringUtil::Format(
|
37
|
+
"list_distance: list dimensions must be equal, got left length %d and right length %d", left.length,
|
38
|
+
right.length));
|
39
|
+
}
|
40
|
+
|
41
|
+
auto dimensions = left.length;
|
42
|
+
|
43
|
+
NUMERIC_TYPE distance = 0;
|
44
|
+
|
45
|
+
auto l_ptr = left_data + left.offset;
|
46
|
+
auto r_ptr = right_data + right.offset;
|
47
|
+
|
48
|
+
for (idx_t i = 0; i < dimensions; i++) {
|
49
|
+
auto x = *l_ptr++;
|
50
|
+
auto y = *r_ptr++;
|
51
|
+
auto diff = x - y;
|
52
|
+
distance += diff * diff;
|
53
|
+
}
|
54
|
+
|
55
|
+
return std::sqrt(distance);
|
56
|
+
});
|
57
|
+
|
58
|
+
if (args.AllConstant()) {
|
59
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
ScalarFunctionSet ListDistanceFun::GetFunctions() {
|
64
|
+
ScalarFunctionSet set("list_distance");
|
65
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
|
66
|
+
LogicalType::FLOAT, ListDistance<float>));
|
67
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
|
68
|
+
LogicalType::DOUBLE, ListDistance<double>));
|
69
|
+
return set;
|
70
|
+
}
|
71
|
+
|
72
|
+
} // namespace duckdb
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
+
|
3
|
+
namespace duckdb {
|
4
|
+
|
5
|
+
template <class NUMERIC_TYPE>
|
6
|
+
static void ListInnerProduct(DataChunk &args, ExpressionState &, Vector &result) {
|
7
|
+
D_ASSERT(args.ColumnCount() == 2);
|
8
|
+
|
9
|
+
auto count = args.size();
|
10
|
+
auto &left = args.data[0];
|
11
|
+
auto &right = args.data[1];
|
12
|
+
auto left_count = ListVector::GetListSize(left);
|
13
|
+
auto right_count = ListVector::GetListSize(right);
|
14
|
+
|
15
|
+
auto &left_child = ListVector::GetEntry(left);
|
16
|
+
auto &right_child = ListVector::GetEntry(right);
|
17
|
+
|
18
|
+
D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
19
|
+
D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
20
|
+
|
21
|
+
if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
|
22
|
+
throw InvalidInputException("list_inner_product: left argument can not contain NULL values");
|
23
|
+
}
|
24
|
+
|
25
|
+
if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
|
26
|
+
throw InvalidInputException("list_inner_product: right argument can not contain NULL values");
|
27
|
+
}
|
28
|
+
|
29
|
+
auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
|
30
|
+
auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
|
31
|
+
|
32
|
+
BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
|
33
|
+
left, right, result, count, [&](list_entry_t left, list_entry_t right) {
|
34
|
+
if (left.length != right.length) {
|
35
|
+
throw InvalidInputException(StringUtil::Format(
|
36
|
+
"list_inner_product: list dimensions must be equal, got left length %d and right length %d",
|
37
|
+
left.length, right.length));
|
38
|
+
}
|
39
|
+
|
40
|
+
auto dimensions = left.length;
|
41
|
+
|
42
|
+
NUMERIC_TYPE distance = 0;
|
43
|
+
|
44
|
+
auto l_ptr = left_data + left.offset;
|
45
|
+
auto r_ptr = right_data + right.offset;
|
46
|
+
|
47
|
+
for (idx_t i = 0; i < dimensions; i++) {
|
48
|
+
auto x = *l_ptr++;
|
49
|
+
auto y = *r_ptr++;
|
50
|
+
distance += x * y;
|
51
|
+
}
|
52
|
+
|
53
|
+
return distance;
|
54
|
+
});
|
55
|
+
|
56
|
+
if (args.AllConstant()) {
|
57
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
ScalarFunctionSet ListInnerProductFun::GetFunctions() {
|
62
|
+
ScalarFunctionSet set("list_inner_product");
|
63
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
|
64
|
+
LogicalType::FLOAT, ListInnerProduct<float>));
|
65
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
|
66
|
+
LogicalType::DOUBLE, ListInnerProduct<double>));
|
67
|
+
return set;
|
68
|
+
}
|
69
|
+
|
70
|
+
} // namespace duckdb
|