duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/src/catalog/catalog.cpp +20 -0
  8. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
  10. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  11. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  12. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +7 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  15. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  16. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  17. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  18. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  19. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  20. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  21. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  22. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  23. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  24. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  25. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  26. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  27. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  29. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  30. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  32. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  33. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  35. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  37. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  38. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  40. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  41. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  42. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  43. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  44. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  45. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  46. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  47. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  48. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  53. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  54. package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
  56. package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
  57. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  58. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  59. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  60. package/src/duckdb/src/storage/data_table.cpp +3 -3
  61. package/src/duckdb/src/storage/index.cpp +7 -1
  62. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  63. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
  64. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  65. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  66. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  67. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  68. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  69. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  70. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  71. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,78 @@
1
+ #include "duckdb/core_functions/scalar/list_functions.hpp"
2
+ #include <cmath>
3
+ #include <algorithm>
4
+
5
+ namespace duckdb {
6
+
7
+ template <class NUMERIC_TYPE>
8
+ static void ListCosineSimilarity(DataChunk &args, ExpressionState &, Vector &result) {
9
+ D_ASSERT(args.ColumnCount() == 2);
10
+
11
+ auto count = args.size();
12
+ auto &left = args.data[0];
13
+ auto &right = args.data[1];
14
+ auto left_count = ListVector::GetListSize(left);
15
+ auto right_count = ListVector::GetListSize(right);
16
+
17
+ auto &left_child = ListVector::GetEntry(left);
18
+ auto &right_child = ListVector::GetEntry(right);
19
+
20
+ D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
21
+ D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
22
+
23
+ if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
24
+ throw InvalidInputException("list_cosine_similarity: left argument can not contain NULL values");
25
+ }
26
+
27
+ if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
28
+ throw InvalidInputException("list_cosine_similarity: right argument can not contain NULL values");
29
+ }
30
+
31
+ auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
32
+ auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
33
+
34
+ BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
35
+ left, right, result, count, [&](list_entry_t left, list_entry_t right) {
36
+ if (left.length != right.length) {
37
+ throw InvalidInputException(StringUtil::Format(
38
+ "list_cosine_similarity: list dimensions must be equal, got left length %d and right length %d",
39
+ left.length, right.length));
40
+ }
41
+
42
+ auto dimensions = left.length;
43
+
44
+ NUMERIC_TYPE distance = 0;
45
+ NUMERIC_TYPE norm_l = 0;
46
+ NUMERIC_TYPE norm_r = 0;
47
+
48
+ auto l_ptr = left_data + left.offset;
49
+ auto r_ptr = right_data + right.offset;
50
+ for (idx_t i = 0; i < dimensions; i++) {
51
+ auto x = *l_ptr++;
52
+ auto y = *r_ptr++;
53
+ distance += x * y;
54
+ norm_l += x * x;
55
+ norm_r += y * y;
56
+ }
57
+
58
+ auto similarity = distance / (std::sqrt(norm_l) * std::sqrt(norm_r));
59
+
60
+ // clamp to [-1, 1] to avoid floating point errors
61
+ return std::max(static_cast<NUMERIC_TYPE>(-1), std::min(similarity, static_cast<NUMERIC_TYPE>(1)));
62
+ });
63
+
64
+ if (args.AllConstant()) {
65
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
66
+ }
67
+ }
68
+
69
+ ScalarFunctionSet ListCosineSimilarityFun::GetFunctions() {
70
+ ScalarFunctionSet set("list_cosine_similarity");
71
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
72
+ LogicalType::FLOAT, ListCosineSimilarity<float>));
73
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
74
+ LogicalType::DOUBLE, ListCosineSimilarity<double>));
75
+ return set;
76
+ }
77
+
78
+ } // namespace duckdb
@@ -0,0 +1,72 @@
1
+ #include "duckdb/core_functions/scalar/list_functions.hpp"
2
+ #include <cmath>
3
+
4
+ namespace duckdb {
5
+
6
+ template <class NUMERIC_TYPE>
7
+ static void ListDistance(DataChunk &args, ExpressionState &, Vector &result) {
8
+ D_ASSERT(args.ColumnCount() == 2);
9
+
10
+ auto count = args.size();
11
+ auto &left = args.data[0];
12
+ auto &right = args.data[1];
13
+ auto left_count = ListVector::GetListSize(left);
14
+ auto right_count = ListVector::GetListSize(right);
15
+
16
+ auto &left_child = ListVector::GetEntry(left);
17
+ auto &right_child = ListVector::GetEntry(right);
18
+
19
+ D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
20
+ D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
21
+
22
+ if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
23
+ throw InvalidInputException("list_distance: left argument can not contain NULL values");
24
+ }
25
+
26
+ if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
27
+ throw InvalidInputException("list_distance: right argument can not contain NULL values");
28
+ }
29
+
30
+ auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
31
+ auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
32
+
33
+ BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
34
+ left, right, result, count, [&](list_entry_t left, list_entry_t right) {
35
+ if (left.length != right.length) {
36
+ throw InvalidInputException(StringUtil::Format(
37
+ "list_distance: list dimensions must be equal, got left length %d and right length %d", left.length,
38
+ right.length));
39
+ }
40
+
41
+ auto dimensions = left.length;
42
+
43
+ NUMERIC_TYPE distance = 0;
44
+
45
+ auto l_ptr = left_data + left.offset;
46
+ auto r_ptr = right_data + right.offset;
47
+
48
+ for (idx_t i = 0; i < dimensions; i++) {
49
+ auto x = *l_ptr++;
50
+ auto y = *r_ptr++;
51
+ auto diff = x - y;
52
+ distance += diff * diff;
53
+ }
54
+
55
+ return std::sqrt(distance);
56
+ });
57
+
58
+ if (args.AllConstant()) {
59
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
60
+ }
61
+ }
62
+
63
+ ScalarFunctionSet ListDistanceFun::GetFunctions() {
64
+ ScalarFunctionSet set("list_distance");
65
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
66
+ LogicalType::FLOAT, ListDistance<float>));
67
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
68
+ LogicalType::DOUBLE, ListDistance<double>));
69
+ return set;
70
+ }
71
+
72
+ } // namespace duckdb
@@ -0,0 +1,70 @@
1
+ #include "duckdb/core_functions/scalar/list_functions.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ template <class NUMERIC_TYPE>
6
+ static void ListInnerProduct(DataChunk &args, ExpressionState &, Vector &result) {
7
+ D_ASSERT(args.ColumnCount() == 2);
8
+
9
+ auto count = args.size();
10
+ auto &left = args.data[0];
11
+ auto &right = args.data[1];
12
+ auto left_count = ListVector::GetListSize(left);
13
+ auto right_count = ListVector::GetListSize(right);
14
+
15
+ auto &left_child = ListVector::GetEntry(left);
16
+ auto &right_child = ListVector::GetEntry(right);
17
+
18
+ D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
19
+ D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
20
+
21
+ if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
22
+ throw InvalidInputException("list_inner_product: left argument can not contain NULL values");
23
+ }
24
+
25
+ if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
26
+ throw InvalidInputException("list_inner_product: right argument can not contain NULL values");
27
+ }
28
+
29
+ auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
30
+ auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
31
+
32
+ BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
33
+ left, right, result, count, [&](list_entry_t left, list_entry_t right) {
34
+ if (left.length != right.length) {
35
+ throw InvalidInputException(StringUtil::Format(
36
+ "list_inner_product: list dimensions must be equal, got left length %d and right length %d",
37
+ left.length, right.length));
38
+ }
39
+
40
+ auto dimensions = left.length;
41
+
42
+ NUMERIC_TYPE distance = 0;
43
+
44
+ auto l_ptr = left_data + left.offset;
45
+ auto r_ptr = right_data + right.offset;
46
+
47
+ for (idx_t i = 0; i < dimensions; i++) {
48
+ auto x = *l_ptr++;
49
+ auto y = *r_ptr++;
50
+ distance += x * y;
51
+ }
52
+
53
+ return distance;
54
+ });
55
+
56
+ if (args.AllConstant()) {
57
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
58
+ }
59
+ }
60
+
61
+ ScalarFunctionSet ListInnerProductFun::GetFunctions() {
62
+ ScalarFunctionSet set("list_inner_product");
63
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
64
+ LogicalType::FLOAT, ListInnerProduct<float>));
65
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
66
+ LogicalType::DOUBLE, ListInnerProduct<double>));
67
+ return set;
68
+ }
69
+
70
+ } // namespace duckdb