duckdb 0.8.2-dev1764.0 → 0.8.2-dev1859.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +38 -22
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -4
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -5
- package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +8 -0
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/core_functions/function_list.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/execution/index/art/art.cpp +49 -108
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
- package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
- package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
- package/src/duckdb/src/execution/index/art/node.cpp +104 -95
- package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
- package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
- package/src/duckdb/src/execution/window_executor.cpp +1280 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/parser/parser.cpp +43 -38
- package/src/duckdb/src/storage/arena_allocator.cpp +12 -0
- package/src/duckdb/src/storage/compression/rle.cpp +52 -12
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
package/README.md
CHANGED
@@ -100,6 +100,13 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
|
|
100
100
|
});
|
101
101
|
```
|
102
102
|
|
103
|
+
## Supported Node versions
|
104
|
+
We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
|
105
|
+
Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
|
106
|
+
|
107
|
+
We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
|
108
|
+
As per July 2023, Node 15 has been removed from the supported versions.
|
109
|
+
|
103
110
|
## Development
|
104
111
|
|
105
112
|
### First install:
|
package/binding.gyp
CHANGED
@@ -45,6 +45,7 @@
|
|
45
45
|
"src/duckdb/ub_src_core_functions_scalar_bit.cpp",
|
46
46
|
"src/duckdb/ub_src_core_functions_scalar_blob.cpp",
|
47
47
|
"src/duckdb/ub_src_core_functions_scalar_date.cpp",
|
48
|
+
"src/duckdb/ub_src_core_functions_scalar_debug.cpp",
|
48
49
|
"src/duckdb/ub_src_core_functions_scalar_enum.cpp",
|
49
50
|
"src/duckdb/ub_src_core_functions_scalar_generic.cpp",
|
50
51
|
"src/duckdb/ub_src_core_functions_scalar_list.cpp",
|
package/package.json
CHANGED
@@ -93,6 +93,7 @@ public:
|
|
93
93
|
shared_ptr<ParquetFileMetadataCache> metadata;
|
94
94
|
ParquetOptions parquet_options;
|
95
95
|
MultiFileReaderData reader_data;
|
96
|
+
unique_ptr<ColumnReader> root_reader;
|
96
97
|
|
97
98
|
public:
|
98
99
|
void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);
|
@@ -116,6 +116,11 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
116
116
|
vector<string> column_names;
|
117
117
|
duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
118
118
|
idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
|
119
|
+
|
120
|
+
//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
|
121
|
+
static constexpr const idx_t BYTES_PER_ROW = 1024;
|
122
|
+
idx_t row_group_size_bytes;
|
123
|
+
|
119
124
|
ChildFieldIDs field_ids;
|
120
125
|
};
|
121
126
|
|
@@ -741,33 +746,39 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
|
|
741
746
|
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
742
747
|
vector<LogicalType> &sql_types) {
|
743
748
|
D_ASSERT(names.size() == sql_types.size());
|
749
|
+
bool row_group_size_bytes_set = false;
|
744
750
|
auto bind_data = make_uniq<ParquetWriteBindData>();
|
745
751
|
for (auto &option : info.options) {
|
746
|
-
auto loption = StringUtil::Lower(option.first);
|
752
|
+
const auto loption = StringUtil::Lower(option.first);
|
753
|
+
if (option.second.size() != 1) {
|
754
|
+
// All parquet write options require exactly one argument
|
755
|
+
throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
|
756
|
+
}
|
747
757
|
if (loption == "row_group_size" || loption == "chunk_size") {
|
748
758
|
bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
|
759
|
+
} else if (loption == "row_group_size_bytes") {
|
760
|
+
auto roption = option.second[0];
|
761
|
+
if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
|
762
|
+
bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
|
763
|
+
} else {
|
764
|
+
bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
|
765
|
+
}
|
766
|
+
row_group_size_bytes_set = true;
|
749
767
|
} else if (loption == "compression" || loption == "codec") {
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
|
763
|
-
continue;
|
764
|
-
}
|
768
|
+
const auto roption = StringUtil::Lower(option.second[0].ToString());
|
769
|
+
if (roption == "uncompressed") {
|
770
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
|
771
|
+
} else if (roption == "snappy") {
|
772
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
773
|
+
} else if (roption == "gzip") {
|
774
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
|
775
|
+
} else if (roption == "zstd") {
|
776
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
|
777
|
+
} else {
|
778
|
+
throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]",
|
779
|
+
loption);
|
765
780
|
}
|
766
|
-
throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
|
767
781
|
} else if (loption == "field_ids") {
|
768
|
-
if (option.second.size() != 1) {
|
769
|
-
throw BinderException("FIELD_IDS requires exactly one argument");
|
770
|
-
}
|
771
782
|
if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
|
772
783
|
StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
|
773
784
|
idx_t field_id = 0;
|
@@ -788,6 +799,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
|
|
788
799
|
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
|
789
800
|
}
|
790
801
|
}
|
802
|
+
if (!row_group_size_bytes_set) {
|
803
|
+
bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
|
804
|
+
}
|
791
805
|
bind_data->sql_types = sql_types;
|
792
806
|
bind_data->column_names = names;
|
793
807
|
return std::move(bind_data);
|
@@ -812,8 +826,10 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
|
|
812
826
|
|
813
827
|
// append data to the local (buffered) chunk collection
|
814
828
|
local_state.buffer.Append(local_state.append_state, input);
|
815
|
-
|
816
|
-
|
829
|
+
|
830
|
+
if (local_state.buffer.Count() > bind_data.row_group_size ||
|
831
|
+
local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) {
|
832
|
+
// if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
|
817
833
|
local_state.append_state.current_chunk_state.handles.clear();
|
818
834
|
global_state.writer->Flush(local_state.buffer);
|
819
835
|
local_state.buffer.InitializeAppend(local_state.append_state);
|
@@ -399,8 +399,7 @@ void ParquetReader::InitializeSchema() {
|
|
399
399
|
if (file_meta_data->schema.size() < 2) {
|
400
400
|
throw FormatException("Need at least one non-root column in the file");
|
401
401
|
}
|
402
|
-
|
403
|
-
|
402
|
+
root_reader = CreateReader();
|
404
403
|
auto &root_type = root_reader->Type();
|
405
404
|
auto &child_types = StructType::GetChildTypes(root_type);
|
406
405
|
D_ASSERT(root_type.id() == LogicalTypeId::STRUCT);
|
@@ -450,7 +449,6 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
|
|
450
449
|
ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
|
451
450
|
}
|
452
451
|
}
|
453
|
-
|
454
452
|
InitializeSchema();
|
455
453
|
}
|
456
454
|
|
@@ -483,7 +481,6 @@ unique_ptr<BaseStatistics> ParquetReader::ReadStatistics(const string &name) {
|
|
483
481
|
|
484
482
|
unique_ptr<BaseStatistics> column_stats;
|
485
483
|
auto file_meta_data = GetFileMetadata();
|
486
|
-
auto root_reader = CreateReader();
|
487
484
|
auto column_reader = root_reader->Cast<StructColumnReader>().GetChildReader(file_col_idx);
|
488
485
|
|
489
486
|
for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) {
|
@@ -7,7 +7,8 @@
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
9
|
constexpr const idx_t DConstants::INVALID_INDEX;
|
10
|
-
const row_t MAX_ROW_ID =
|
10
|
+
const row_t MAX_ROW_ID = 36028797018960000ULL; // 2^55
|
11
|
+
const row_t MAX_ROW_ID_LOCAL = 72057594037920000ULL; // 2^56
|
11
12
|
const column_t COLUMN_IDENTIFIER_ROW_ID = (column_t)-1;
|
12
13
|
const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0};
|
13
14
|
const double PI = 3.141592653589793;
|
@@ -3112,8 +3112,6 @@ const char* EnumUtil::ToChars<NType>(NType value) {
|
|
3112
3112
|
switch(value) {
|
3113
3113
|
case NType::PREFIX:
|
3114
3114
|
return "PREFIX";
|
3115
|
-
case NType::LEAF_SEGMENT:
|
3116
|
-
return "LEAF_SEGMENT";
|
3117
3115
|
case NType::LEAF:
|
3118
3116
|
return "LEAF";
|
3119
3117
|
case NType::NODE_4:
|
@@ -3124,6 +3122,8 @@ const char* EnumUtil::ToChars<NType>(NType value) {
|
|
3124
3122
|
return "NODE_48";
|
3125
3123
|
case NType::NODE_256:
|
3126
3124
|
return "NODE_256";
|
3125
|
+
case NType::LEAF_INLINED:
|
3126
|
+
return "LEAF_INLINED";
|
3127
3127
|
default:
|
3128
3128
|
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
3129
3129
|
}
|
@@ -3134,9 +3134,6 @@ NType EnumUtil::FromString<NType>(const char *value) {
|
|
3134
3134
|
if (StringUtil::Equals(value, "PREFIX")) {
|
3135
3135
|
return NType::PREFIX;
|
3136
3136
|
}
|
3137
|
-
if (StringUtil::Equals(value, "LEAF_SEGMENT")) {
|
3138
|
-
return NType::LEAF_SEGMENT;
|
3139
|
-
}
|
3140
3137
|
if (StringUtil::Equals(value, "LEAF")) {
|
3141
3138
|
return NType::LEAF;
|
3142
3139
|
}
|
@@ -3152,6 +3149,9 @@ NType EnumUtil::FromString<NType>(const char *value) {
|
|
3152
3149
|
if (StringUtil::Equals(value, "NODE_256")) {
|
3153
3150
|
return NType::NODE_256;
|
3154
3151
|
}
|
3152
|
+
if (StringUtil::Equals(value, "LEAF_INLINED")) {
|
3153
|
+
return NType::LEAF_INLINED;
|
3154
|
+
}
|
3155
3155
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
3156
3156
|
}
|
3157
3157
|
|
@@ -315,7 +315,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
|
|
315
315
|
sd.data_blocks.back()->block->SetSwizzling(nullptr);
|
316
316
|
// Create a single heap block to store the ordered heap
|
317
317
|
idx_t total_byte_offset =
|
318
|
-
std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
|
318
|
+
std::accumulate(heap.blocks.begin(), heap.blocks.end(), (idx_t)0,
|
319
319
|
[](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->byte_offset; });
|
320
320
|
idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE);
|
321
321
|
auto ordered_heap_block = make_uniq<RowDataBlock>(*buffer_manager, heap_block_size, 1);
|
@@ -85,7 +85,7 @@ SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state)
|
|
85
85
|
}
|
86
86
|
|
87
87
|
idx_t SortedBlock::Count() const {
|
88
|
-
idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0,
|
88
|
+
idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), (idx_t)0,
|
89
89
|
[](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->count; });
|
90
90
|
if (!sort_layout.all_constant) {
|
91
91
|
D_ASSERT(count == blob_sorting_data->Count());
|
@@ -100,6 +100,14 @@ Allocator &ColumnDataCollection::GetAllocator() const {
|
|
100
100
|
return allocator->GetAllocator();
|
101
101
|
}
|
102
102
|
|
103
|
+
idx_t ColumnDataCollection::SizeInBytes() const {
|
104
|
+
idx_t total_size = 0;
|
105
|
+
for (const auto &segment : segments) {
|
106
|
+
total_size += segment->SizeInBytes();
|
107
|
+
}
|
108
|
+
return total_size;
|
109
|
+
}
|
110
|
+
|
103
111
|
//===--------------------------------------------------------------------===//
|
104
112
|
// ColumnDataRow
|
105
113
|
//===--------------------------------------------------------------------===//
|
@@ -243,6 +243,11 @@ idx_t ColumnDataCollectionSegment::ChunkCount() const {
|
|
243
243
|
return chunk_data.size();
|
244
244
|
}
|
245
245
|
|
246
|
+
idx_t ColumnDataCollectionSegment::SizeInBytes() const {
|
247
|
+
D_ASSERT(!allocator->IsShared());
|
248
|
+
return allocator->SizeInBytes() + heap->SizeInBytes();
|
249
|
+
}
|
250
|
+
|
246
251
|
void ColumnDataCollectionSegment::FetchChunk(idx_t chunk_idx, DataChunk &result) {
|
247
252
|
vector<column_t> column_ids;
|
248
253
|
column_ids.reserve(types.size());
|
@@ -17,6 +17,7 @@
|
|
17
17
|
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
18
18
|
#include "duckdb/core_functions/scalar/struct_functions.hpp"
|
19
19
|
#include "duckdb/core_functions/scalar/union_functions.hpp"
|
20
|
+
#include "duckdb/core_functions/scalar/debug_functions.hpp"
|
20
21
|
|
21
22
|
namespace duckdb {
|
22
23
|
|
@@ -339,6 +340,7 @@ static StaticFunctionDefinition internal_functions[] = {
|
|
339
340
|
DUCKDB_AGGREGATE_FUNCTION(VarPopFun),
|
340
341
|
DUCKDB_AGGREGATE_FUNCTION(VarSampFun),
|
341
342
|
DUCKDB_AGGREGATE_FUNCTION_ALIAS(VarianceFun),
|
343
|
+
DUCKDB_SCALAR_FUNCTION(VectorTypeFun),
|
342
344
|
DUCKDB_SCALAR_FUNCTION(VersionFun),
|
343
345
|
DUCKDB_SCALAR_FUNCTION_SET(WeekFun),
|
344
346
|
DUCKDB_SCALAR_FUNCTION_SET(WeekDayFun),
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/debug_functions.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/exception.hpp"
|
4
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
|
+
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
6
|
+
#include "duckdb/common/enum_util.hpp"
|
7
|
+
|
8
|
+
namespace duckdb {
|
9
|
+
|
10
|
+
static void VectorTypeFunction(DataChunk &input, ExpressionState &state, Vector &result) {
|
11
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
12
|
+
auto data = ConstantVector::GetData<string_t>(result);
|
13
|
+
data[0] = StringVector::AddString(result, EnumUtil::ToString(input.data[0].GetVectorType()));
|
14
|
+
}
|
15
|
+
|
16
|
+
ScalarFunction VectorTypeFun::GetFunction() {
|
17
|
+
return ScalarFunction("vector_type", // name of the function
|
18
|
+
{LogicalType::ANY}, // argument list
|
19
|
+
LogicalType::VARCHAR, // return type
|
20
|
+
VectorTypeFunction);
|
21
|
+
}
|
22
|
+
|
23
|
+
} // namespace duckdb
|
@@ -5,7 +5,6 @@
|
|
5
5
|
#include "duckdb/execution/expression_executor.hpp"
|
6
6
|
#include "duckdb/storage/arena_allocator.hpp"
|
7
7
|
#include "duckdb/execution/index/art/art_key.hpp"
|
8
|
-
#include "duckdb/execution/index/art/leaf_segment.hpp"
|
9
8
|
#include "duckdb/execution/index/art/prefix.hpp"
|
10
9
|
#include "duckdb/execution/index/art/leaf.hpp"
|
11
10
|
#include "duckdb/execution/index/art/node4.hpp"
|
@@ -44,7 +43,6 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
44
43
|
|
45
44
|
// initialize all allocators
|
46
45
|
allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Prefix), buffer_manager.GetBufferAllocator()));
|
47
|
-
allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(LeafSegment), buffer_manager.GetBufferAllocator()));
|
48
46
|
allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Leaf), buffer_manager.GetBufferAllocator()));
|
49
47
|
allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Node4), buffer_manager.GetBufferAllocator()));
|
50
48
|
allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Node16), buffer_manager.GetBufferAllocator()));
|
@@ -54,8 +52,8 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
54
52
|
// set the root node of the tree
|
55
53
|
tree = make_uniq<Node>();
|
56
54
|
if (block_id != DConstants::INVALID_INDEX) {
|
57
|
-
tree->
|
58
|
-
tree->
|
55
|
+
tree->SetSerialized();
|
56
|
+
tree->SetPtr(block_id, block_offset);
|
59
57
|
tree->Deserialize(*this);
|
60
58
|
}
|
61
59
|
serialized_data_pointer = BlockPointer(block_id, block_offset);
|
@@ -308,7 +306,7 @@ bool Construct(ART &art, vector<ARTKey> &keys, row_t *row_ids, Node &node, KeySe
|
|
308
306
|
reference<Node> ref_node(node);
|
309
307
|
Prefix::New(art, ref_node, start_key, prefix_start, start_key.len - prefix_start);
|
310
308
|
if (single_row_id) {
|
311
|
-
Leaf::New(
|
309
|
+
Leaf::New(ref_node, row_ids[key_section.start]);
|
312
310
|
} else {
|
313
311
|
Leaf::New(art, ref_node, row_ids + key_section.start, num_row_ids);
|
314
312
|
}
|
@@ -358,19 +356,9 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
|
|
358
356
|
D_ASSERT(!VerifyAndToStringInternal(true).empty());
|
359
357
|
for (idx_t i = 0; i < count; i++) {
|
360
358
|
D_ASSERT(!keys[i].Empty());
|
361
|
-
auto
|
362
|
-
D_ASSERT(
|
363
|
-
|
364
|
-
|
365
|
-
if (leaf.IsInlined()) {
|
366
|
-
D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
|
367
|
-
continue;
|
368
|
-
}
|
369
|
-
|
370
|
-
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
371
|
-
Node leaf_segment = leaf.row_ids.ptr;
|
372
|
-
auto position = leaf.FindRowId(*this, leaf_segment, row_ids[i]);
|
373
|
-
D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
|
359
|
+
auto leaf = Lookup(*tree, keys[i], 0);
|
360
|
+
D_ASSERT(leaf.IsSet());
|
361
|
+
D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_ids[i]));
|
374
362
|
}
|
375
363
|
#endif
|
376
364
|
|
@@ -431,19 +419,9 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
431
419
|
continue;
|
432
420
|
}
|
433
421
|
|
434
|
-
auto
|
435
|
-
D_ASSERT(
|
436
|
-
|
437
|
-
|
438
|
-
if (leaf.IsInlined()) {
|
439
|
-
D_ASSERT(row_identifiers[i] == leaf.row_ids.inlined);
|
440
|
-
continue;
|
441
|
-
}
|
442
|
-
|
443
|
-
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
444
|
-
Node leaf_segment = leaf.row_ids.ptr;
|
445
|
-
auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
|
446
|
-
D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
|
422
|
+
auto leaf = Lookup(*tree, keys[i], 0);
|
423
|
+
D_ASSERT(leaf.IsSet());
|
424
|
+
D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
|
447
425
|
}
|
448
426
|
#endif
|
449
427
|
|
@@ -471,19 +449,13 @@ void ART::VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) {
|
|
471
449
|
CheckConstraintsForChunk(chunk, conflict_manager);
|
472
450
|
}
|
473
451
|
|
474
|
-
bool ART::InsertToLeaf(Node &
|
452
|
+
bool ART::InsertToLeaf(Node &leaf, const row_t &row_id) {
|
475
453
|
|
476
|
-
|
477
|
-
|
478
|
-
#ifdef DEBUG
|
479
|
-
for (idx_t k = 0; k < leaf.count; k++) {
|
480
|
-
D_ASSERT(leaf.GetRowId(*this, k) != row_id);
|
481
|
-
}
|
482
|
-
#endif
|
483
|
-
if (IsUnique() && leaf.count != 0) {
|
454
|
+
if (IsUnique()) {
|
484
455
|
return false;
|
485
456
|
}
|
486
|
-
|
457
|
+
|
458
|
+
Leaf::Insert(*this, leaf, row_id);
|
487
459
|
return true;
|
488
460
|
}
|
489
461
|
|
@@ -494,14 +466,14 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
494
466
|
D_ASSERT(depth <= key.len);
|
495
467
|
reference<Node> ref_node(node);
|
496
468
|
Prefix::New(*this, ref_node, key, depth, key.len - depth);
|
497
|
-
Leaf::New(
|
469
|
+
Leaf::New(ref_node, row_id);
|
498
470
|
return true;
|
499
471
|
}
|
500
472
|
|
501
|
-
auto node_type = node.
|
473
|
+
auto node_type = node.GetType();
|
502
474
|
|
503
475
|
// insert the row ID into this leaf
|
504
|
-
if (node_type == NType::LEAF) {
|
476
|
+
if (node_type == NType::LEAF || node_type == NType::LEAF_INLINED) {
|
505
477
|
return InsertToLeaf(node, row_id);
|
506
478
|
}
|
507
479
|
|
@@ -518,13 +490,11 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
518
490
|
|
519
491
|
// insert a new leaf node at key[depth]
|
520
492
|
Node leaf_node;
|
493
|
+
reference<Node> ref_node(leaf_node);
|
521
494
|
if (depth + 1 < key.len) {
|
522
|
-
reference<Node> ref_node(leaf_node);
|
523
495
|
Prefix::New(*this, ref_node, key, depth + 1, key.len - depth - 1);
|
524
|
-
Leaf::New(*this, ref_node, row_id);
|
525
|
-
} else {
|
526
|
-
Leaf::New(*this, leaf_node, row_id);
|
527
496
|
}
|
497
|
+
Leaf::New(ref_node, row_id);
|
528
498
|
Node::InsertChild(*this, node, key[depth], leaf_node);
|
529
499
|
return true;
|
530
500
|
}
|
@@ -534,7 +504,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
534
504
|
auto mismatch_position = Prefix::Traverse(*this, next_node, key, depth);
|
535
505
|
|
536
506
|
// prefix matches key
|
537
|
-
if (next_node.get().
|
507
|
+
if (next_node.get().GetType() != NType::PREFIX) {
|
538
508
|
return Insert(next_node, key, depth, row_id);
|
539
509
|
}
|
540
510
|
|
@@ -550,13 +520,11 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
550
520
|
|
551
521
|
// insert new leaf
|
552
522
|
Node leaf_node;
|
523
|
+
reference<Node> ref_node(leaf_node);
|
553
524
|
if (depth + 1 < key.len) {
|
554
|
-
reference<Node> ref_node(leaf_node);
|
555
525
|
Prefix::New(*this, ref_node, key, depth + 1, key.len - depth - 1);
|
556
|
-
Leaf::New(*this, ref_node, row_id);
|
557
|
-
} else {
|
558
|
-
Leaf::New(*this, leaf_node, row_id);
|
559
526
|
}
|
527
|
+
Leaf::New(ref_node, row_id);
|
560
528
|
Node4::InsertChild(*this, next_node, key[depth], leaf_node);
|
561
529
|
return true;
|
562
530
|
}
|
@@ -596,19 +564,9 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
596
564
|
continue;
|
597
565
|
}
|
598
566
|
|
599
|
-
auto
|
600
|
-
if (
|
601
|
-
|
602
|
-
|
603
|
-
if (leaf.IsInlined()) {
|
604
|
-
D_ASSERT(row_identifiers[i] != leaf.row_ids.inlined);
|
605
|
-
continue;
|
606
|
-
}
|
607
|
-
|
608
|
-
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
609
|
-
Node leaf_segment = leaf.row_ids.ptr;
|
610
|
-
auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
|
611
|
-
D_ASSERT(position == (uint32_t)DConstants::INVALID_INDEX);
|
567
|
+
auto leaf = Lookup(*tree, keys[i], 0);
|
568
|
+
if (leaf.IsSet()) {
|
569
|
+
D_ASSERT(!Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
|
612
570
|
}
|
613
571
|
}
|
614
572
|
#endif
|
@@ -622,21 +580,17 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
|
|
622
580
|
|
623
581
|
// handle prefix
|
624
582
|
reference<Node> next_node(node);
|
625
|
-
if (next_node.get().
|
583
|
+
if (next_node.get().GetType() == NType::PREFIX) {
|
626
584
|
Prefix::Traverse(*this, next_node, key, depth);
|
627
|
-
if (next_node.get().
|
585
|
+
if (next_node.get().GetType() == NType::PREFIX) {
|
628
586
|
return;
|
629
587
|
}
|
630
588
|
}
|
631
589
|
|
632
590
|
// delete a row ID from a leaf (root is leaf with possible prefix nodes)
|
633
|
-
if (next_node.get().
|
634
|
-
|
635
|
-
leaf.Remove(*this, row_id);
|
636
|
-
|
637
|
-
if (leaf.count == 0) {
|
591
|
+
if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
|
592
|
+
if (Leaf::Remove(*this, next_node, row_id)) {
|
638
593
|
Node::Free(*this, node);
|
639
|
-
node.Reset();
|
640
594
|
}
|
641
595
|
return;
|
642
596
|
}
|
@@ -648,20 +602,16 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
|
|
648
602
|
|
649
603
|
auto temp_depth = depth + 1;
|
650
604
|
reference<Node> child_node(*child);
|
651
|
-
if (child_node.get().
|
605
|
+
if (child_node.get().GetType() == NType::PREFIX) {
|
652
606
|
Prefix::Traverse(*this, child_node, key, temp_depth);
|
653
|
-
if (child_node.get().
|
607
|
+
if (child_node.get().GetType() == NType::PREFIX) {
|
654
608
|
return;
|
655
609
|
}
|
656
610
|
}
|
657
611
|
|
658
|
-
if (child_node.get().
|
612
|
+
if (child_node.get().GetType() == NType::LEAF || child_node.get().GetType() == NType::LEAF_INLINED) {
|
659
613
|
// leaf found, remove entry
|
660
|
-
|
661
|
-
leaf.Remove(*this, row_id);
|
662
|
-
|
663
|
-
if (leaf.count == 0) {
|
664
|
-
// leaf is empty, delete leaf, decrement node counter and maybe shrink node
|
614
|
+
if (Leaf::Remove(*this, child_node, row_id)) {
|
665
615
|
Node::DeleteChild(*this, next_node, node, key[depth]);
|
666
616
|
}
|
667
617
|
return;
|
@@ -713,20 +663,11 @@ static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &val
|
|
713
663
|
|
714
664
|
bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids) {
|
715
665
|
|
716
|
-
auto
|
717
|
-
if (!
|
666
|
+
auto leaf = Lookup(*tree, key, 0);
|
667
|
+
if (!leaf.IsSet()) {
|
718
668
|
return true;
|
719
669
|
}
|
720
|
-
|
721
|
-
auto &leaf = Leaf::Get(*this, leaf_node);
|
722
|
-
if (leaf.count > max_count) {
|
723
|
-
return false;
|
724
|
-
}
|
725
|
-
for (idx_t i = 0; i < leaf.count; i++) {
|
726
|
-
row_t row_id = leaf.GetRowId(*this, i);
|
727
|
-
result_ids.push_back(row_id);
|
728
|
-
}
|
729
|
-
return true;
|
670
|
+
return Leaf::GetRowIds(*this, leaf, result_ids, max_count);
|
730
671
|
}
|
731
672
|
|
732
673
|
void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
|
@@ -738,8 +679,10 @@ void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
|
|
738
679
|
return;
|
739
680
|
}
|
740
681
|
|
741
|
-
|
742
|
-
|
682
|
+
// we only perform index joins on PK/FK columns
|
683
|
+
D_ASSERT(leaf_node.GetType() == NType::LEAF_INLINED);
|
684
|
+
result_size = 1;
|
685
|
+
return;
|
743
686
|
}
|
744
687
|
|
745
688
|
//===--------------------------------------------------------------------===//
|
@@ -752,14 +695,14 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
|
|
752
695
|
|
753
696
|
// traverse prefix, if exists
|
754
697
|
reference<Node> next_node(node);
|
755
|
-
if (next_node.get().
|
698
|
+
if (next_node.get().GetType() == NType::PREFIX) {
|
756
699
|
Prefix::Traverse(*this, next_node, key, depth);
|
757
|
-
if (next_node.get().
|
700
|
+
if (next_node.get().GetType() == NType::PREFIX) {
|
758
701
|
return Node();
|
759
702
|
}
|
760
703
|
}
|
761
704
|
|
762
|
-
if (next_node.get().
|
705
|
+
if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
|
763
706
|
return next_node.get();
|
764
707
|
}
|
765
708
|
|
@@ -994,20 +937,18 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
|
|
994
937
|
continue;
|
995
938
|
}
|
996
939
|
|
997
|
-
auto
|
998
|
-
if (!
|
940
|
+
auto leaf = Lookup(*tree, keys[i], 0);
|
941
|
+
if (!leaf.IsSet()) {
|
999
942
|
if (conflict_manager.AddMiss(i)) {
|
1000
943
|
found_conflict = i;
|
1001
944
|
}
|
1002
945
|
continue;
|
1003
946
|
}
|
1004
947
|
|
1005
|
-
//
|
1006
|
-
// NOTE:
|
1007
|
-
|
1008
|
-
|
1009
|
-
auto row_id = leaf.GetRowId(*this, 0);
|
1010
|
-
if (conflict_manager.AddHit(i, row_id)) {
|
948
|
+
// when we find a node, we need to update the 'matches' and 'row_ids'
|
949
|
+
// NOTE: leaves can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
|
950
|
+
D_ASSERT(leaf.GetType() == NType::LEAF_INLINED);
|
951
|
+
if (conflict_manager.AddHit(i, leaf.GetRowId())) {
|
1011
952
|
found_conflict = i;
|
1012
953
|
}
|
1013
954
|
}
|
@@ -1086,7 +1027,7 @@ void ART::Vacuum(IndexLock &state) {
|
|
1086
1027
|
}
|
1087
1028
|
|
1088
1029
|
// traverse the allocated memory of the tree to perform a vacuum
|
1089
|
-
|
1030
|
+
tree->Vacuum(*this, flags);
|
1090
1031
|
|
1091
1032
|
// finalize the vacuum operation
|
1092
1033
|
FinalizeVacuum(flags);
|
@@ -72,17 +72,6 @@ bool ARTKey::operator>(const ARTKey &k) const {
|
|
72
72
|
return len > k.len;
|
73
73
|
}
|
74
74
|
|
75
|
-
bool ARTKey::operator<(const ARTKey &k) const {
|
76
|
-
for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
|
77
|
-
if (data[i] < k.data[i]) {
|
78
|
-
return true;
|
79
|
-
} else if (data[i] > k.data[i]) {
|
80
|
-
return false;
|
81
|
-
}
|
82
|
-
}
|
83
|
-
return len < k.len;
|
84
|
-
}
|
85
|
-
|
86
75
|
bool ARTKey::operator>=(const ARTKey &k) const {
|
87
76
|
for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
|
88
77
|
if (data[i] > k.data[i]) {
|