duckdb 0.8.1-dev287.0 → 0.8.1-dev327.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +24 -0
- package/src/duckdb/src/common/types/timestamp.cpp +37 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +18 -12
- package/src/duckdb/src/execution/index/art/art.cpp +80 -7
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +20 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +10 -11
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +10 -0
- package/src/duckdb/src/execution/index/art/node.cpp +47 -35
- package/src/duckdb/src/execution/index/art/node16.cpp +3 -0
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -0
- package/src/duckdb/src/execution/index/art/node4.cpp +3 -0
- package/src/duckdb/src/execution/index/art/node48.cpp +2 -0
- package/src/duckdb/src/execution/index/art/prefix.cpp +2 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +29 -3
- package/src/duckdb/src/function/table/read_csv.cpp +2 -0
- package/src/duckdb/src/function/table/repeat.cpp +3 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +4 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +10 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +13 -3
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/byte_reader.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +13 -13
- package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +0 -6
- package/src/duckdb/src/parser/parsed_data/create_info.cpp +0 -3
- package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +0 -2
- package/src/duckdb/src/parser/transform/statement/transform_drop.cpp +0 -3
- package/src/duckdb/src/parser/transformer.cpp +0 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +0 -27
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +0 -25
- package/src/duckdb/src/planner/operator/logical_pivot.cpp +14 -2
- package/src/duckdb/src/storage/index.cpp +13 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +0 -14
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12828 -12956
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +4 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -46
- package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -27
package/package.json
CHANGED
@@ -665,6 +665,28 @@ unique_ptr<LocalFunctionData> ParquetWriteInitializeLocal(ExecutionContext &cont
|
|
665
665
|
return make_uniq<ParquetWriteLocalState>(context.client, bind_data.sql_types);
|
666
666
|
}
|
667
667
|
|
668
|
+
// LCOV_EXCL_START
|
669
|
+
static void ParquetCopySerialize(FieldWriter &writer, const FunctionData &bind_data_p, const CopyFunction &function) {
|
670
|
+
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
|
671
|
+
writer.WriteRegularSerializableList<LogicalType>(bind_data.sql_types);
|
672
|
+
writer.WriteList<string>(bind_data.column_names);
|
673
|
+
writer.WriteField<duckdb_parquet::format::CompressionCodec::type>(bind_data.codec);
|
674
|
+
writer.WriteField<idx_t>(bind_data.row_group_size);
|
675
|
+
}
|
676
|
+
|
677
|
+
static unique_ptr<FunctionData> ParquetCopyDeserialize(ClientContext &context, FieldReader &reader,
|
678
|
+
CopyFunction &function) {
|
679
|
+
unique_ptr<ParquetWriteBindData> data = make_uniq<ParquetWriteBindData>();
|
680
|
+
|
681
|
+
data->sql_types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
|
682
|
+
data->column_names = reader.ReadRequiredList<string>();
|
683
|
+
data->codec = reader.ReadRequired<duckdb_parquet::format::CompressionCodec::type>();
|
684
|
+
data->row_group_size = reader.ReadRequired<idx_t>();
|
685
|
+
|
686
|
+
return std::move(data);
|
687
|
+
}
|
688
|
+
// LCOV_EXCL_STOP
|
689
|
+
|
668
690
|
//===--------------------------------------------------------------------===//
|
669
691
|
// Execution Mode
|
670
692
|
//===--------------------------------------------------------------------===//
|
@@ -764,6 +786,8 @@ void ParquetExtension::Load(DuckDB &db) {
|
|
764
786
|
function.prepare_batch = ParquetWritePrepareBatch;
|
765
787
|
function.flush_batch = ParquetWriteFlushBatch;
|
766
788
|
function.desired_batch_size = ParquetWriteDesiredBatchSize;
|
789
|
+
function.serialize = ParquetCopySerialize;
|
790
|
+
function.deserialize = ParquetCopyDeserialize;
|
767
791
|
|
768
792
|
function.extension = "parquet";
|
769
793
|
ExtensionUtil::RegisterFunction(db_instance, function);
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "duckdb/common/chrono.hpp"
|
9
9
|
#include "duckdb/common/operator/add.hpp"
|
10
10
|
#include "duckdb/common/operator/multiply.hpp"
|
11
|
+
#include "duckdb/common/operator/subtract.hpp"
|
11
12
|
#include "duckdb/common/limits.hpp"
|
12
13
|
#include <ctime>
|
13
14
|
|
@@ -21,6 +22,38 @@ static_assert(sizeof(timestamp_t) == sizeof(int64_t), "timestamp_t was padded");
|
|
21
22
|
// Z is optional
|
22
23
|
// ISO 8601
|
23
24
|
|
25
|
+
// arithmetic operators
|
26
|
+
timestamp_t timestamp_t::operator+(const double &value) const {
|
27
|
+
timestamp_t result;
|
28
|
+
if (!TryAddOperator::Operation(this->value, int64_t(value), result.value)) {
|
29
|
+
throw OutOfRangeException("Overflow in timestamp addition");
|
30
|
+
}
|
31
|
+
return result;
|
32
|
+
}
|
33
|
+
|
34
|
+
int64_t timestamp_t::operator-(const timestamp_t &other) const {
|
35
|
+
int64_t result;
|
36
|
+
if (!TrySubtractOperator::Operation(value, int64_t(other.value), result)) {
|
37
|
+
throw OutOfRangeException("Overflow in timestamp subtraction");
|
38
|
+
}
|
39
|
+
return result;
|
40
|
+
}
|
41
|
+
|
42
|
+
// in-place operators
|
43
|
+
timestamp_t ×tamp_t::operator+=(const int64_t &delta) {
|
44
|
+
if (!TryAddOperator::Operation(value, delta, value)) {
|
45
|
+
throw OutOfRangeException("Overflow in timestamp increment");
|
46
|
+
}
|
47
|
+
return *this;
|
48
|
+
}
|
49
|
+
|
50
|
+
timestamp_t ×tamp_t::operator-=(const int64_t &delta) {
|
51
|
+
if (!TrySubtractOperator::Operation(value, delta, value)) {
|
52
|
+
throw OutOfRangeException("Overflow in timestamp decrement");
|
53
|
+
}
|
54
|
+
return *this;
|
55
|
+
}
|
56
|
+
|
24
57
|
bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset, string_t &tz) {
|
25
58
|
idx_t pos;
|
26
59
|
date_t date;
|
@@ -59,7 +92,10 @@ bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &r
|
|
59
92
|
pos++;
|
60
93
|
has_offset = true;
|
61
94
|
} else if (Timestamp::TryParseUTCOffset(str, pos, len, hour_offset, minute_offset)) {
|
62
|
-
|
95
|
+
const int64_t delta = hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE;
|
96
|
+
if (!TrySubtractOperator::Operation(result.value, delta, result.value)) {
|
97
|
+
return false;
|
98
|
+
}
|
63
99
|
has_offset = true;
|
64
100
|
} else {
|
65
101
|
// Parse a time zone: / [A-Za-z0-9/_]+/
|
@@ -1153,16 +1153,27 @@ AggregateFunction GetMedianAbsoluteDeviationAggregateFunction(const LogicalType
|
|
1153
1153
|
|
1154
1154
|
static void QuantileSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const AggregateFunction &function) {
|
1155
1155
|
D_ASSERT(bind_data_p);
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1156
|
+
auto bind_data = dynamic_cast<const QuantileBindData *>(bind_data_p);
|
1157
|
+
D_ASSERT(bind_data);
|
1158
|
+
writer.WriteRegularSerializableList<Value>(bind_data->quantiles);
|
1159
|
+
writer.WriteList<idx_t>(bind_data->order);
|
1160
|
+
writer.WriteField<bool>(bind_data->desc);
|
1160
1161
|
}
|
1161
1162
|
|
1162
1163
|
unique_ptr<FunctionData> QuantileDeserialize(ClientContext &context, FieldReader &reader,
|
1163
1164
|
AggregateFunction &bound_function) {
|
1164
|
-
auto quantiles = reader.
|
1165
|
-
|
1165
|
+
auto quantiles = reader.ReadRequiredSerializableList<Value, Value>();
|
1166
|
+
auto bind_data = make_uniq<QuantileBindData>(quantiles);
|
1167
|
+
bind_data->quantiles = quantiles;
|
1168
|
+
bind_data->order = reader.ReadRequiredList<idx_t>();
|
1169
|
+
bind_data->desc = reader.ReadRequired<bool>();
|
1170
|
+
|
1171
|
+
return std::move(bind_data);
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
static void QuantileDecimalSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
|
1175
|
+
const AggregateFunction &function) {
|
1176
|
+
throw NotImplementedException("FIXME: serializing quantiles with decimals is not supported right now");
|
1166
1177
|
}
|
1167
1178
|
|
1168
1179
|
unique_ptr<FunctionData> BindMedian(ClientContext &context, AggregateFunction &function,
|
@@ -1176,7 +1187,7 @@ unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunc
|
|
1176
1187
|
|
1177
1188
|
function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type);
|
1178
1189
|
function.name = "median";
|
1179
|
-
function.serialize =
|
1190
|
+
function.serialize = QuantileDecimalSerialize;
|
1180
1191
|
function.deserialize = QuantileDeserialize;
|
1181
1192
|
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1182
1193
|
return bind_data;
|
@@ -1227,11 +1238,6 @@ unique_ptr<FunctionData> BindQuantile(ClientContext &context, AggregateFunction
|
|
1227
1238
|
return make_uniq<QuantileBindData>(quantiles);
|
1228
1239
|
}
|
1229
1240
|
|
1230
|
-
static void QuantileDecimalSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
|
1231
|
-
const AggregateFunction &function) {
|
1232
|
-
throw NotImplementedException("FIXME: serializing quantiles with decimals is not supported right now");
|
1233
|
-
}
|
1234
|
-
|
1235
1241
|
unique_ptr<FunctionData> BindDiscreteQuantileDecimal(ClientContext &context, AggregateFunction &function,
|
1236
1242
|
vector<unique_ptr<Expression>> &arguments) {
|
1237
1243
|
auto bind_data = BindQuantile(context, function, arguments);
|
@@ -18,7 +18,6 @@
|
|
18
18
|
#include "duckdb/storage/table/scan_state.hpp"
|
19
19
|
|
20
20
|
#include <algorithm>
|
21
|
-
#include <cstring>
|
22
21
|
|
23
22
|
namespace duckdb {
|
24
23
|
|
@@ -347,7 +346,31 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
|
|
347
346
|
|
348
347
|
auto key_section = KeySection(0, count - 1, 0, 0);
|
349
348
|
auto has_constraint = IsUnique();
|
350
|
-
|
349
|
+
if (!Construct(*this, keys, row_ids, *this->tree, key_section, has_constraint)) {
|
350
|
+
return false;
|
351
|
+
}
|
352
|
+
|
353
|
+
#ifdef DEBUG
|
354
|
+
D_ASSERT(!VerifyAndToStringInternal(true).empty());
|
355
|
+
for (idx_t i = 0; i < count; i++) {
|
356
|
+
D_ASSERT(!keys[i].Empty());
|
357
|
+
auto leaf_node = Lookup(*tree, keys[i], 0);
|
358
|
+
D_ASSERT(leaf_node.IsSet());
|
359
|
+
auto &leaf = Leaf::Get(*this, leaf_node);
|
360
|
+
|
361
|
+
if (leaf.IsInlined()) {
|
362
|
+
D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
|
363
|
+
continue;
|
364
|
+
}
|
365
|
+
|
366
|
+
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
367
|
+
Node leaf_segment = leaf.row_ids.ptr;
|
368
|
+
auto position = leaf.FindRowId(*this, leaf_segment, row_ids[i]);
|
369
|
+
D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
|
370
|
+
}
|
371
|
+
#endif
|
372
|
+
|
373
|
+
return true;
|
351
374
|
}
|
352
375
|
|
353
376
|
//===--------------------------------------------------------------------===//
|
@@ -397,6 +420,29 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
397
420
|
return PreservedError(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"",
|
398
421
|
AppendRowError(input, failed_index)));
|
399
422
|
}
|
423
|
+
|
424
|
+
#ifdef DEBUG
|
425
|
+
for (idx_t i = 0; i < input.size(); i++) {
|
426
|
+
if (keys[i].Empty()) {
|
427
|
+
continue;
|
428
|
+
}
|
429
|
+
|
430
|
+
auto leaf_node = Lookup(*tree, keys[i], 0);
|
431
|
+
D_ASSERT(leaf_node.IsSet());
|
432
|
+
auto &leaf = Leaf::Get(*this, leaf_node);
|
433
|
+
|
434
|
+
if (leaf.IsInlined()) {
|
435
|
+
D_ASSERT(row_identifiers[i] == leaf.row_ids.inlined);
|
436
|
+
continue;
|
437
|
+
}
|
438
|
+
|
439
|
+
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
440
|
+
Node leaf_segment = leaf.row_ids.ptr;
|
441
|
+
auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
|
442
|
+
D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
|
443
|
+
}
|
444
|
+
#endif
|
445
|
+
|
400
446
|
return PreservedError();
|
401
447
|
}
|
402
448
|
|
@@ -535,16 +581,31 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
535
581
|
continue;
|
536
582
|
}
|
537
583
|
Erase(*tree, keys[i], 0, row_identifiers[i]);
|
584
|
+
}
|
585
|
+
|
538
586
|
#ifdef DEBUG
|
587
|
+
// verify that we removed all row IDs
|
588
|
+
for (idx_t i = 0; i < input.size(); i++) {
|
589
|
+
if (keys[i].Empty()) {
|
590
|
+
continue;
|
591
|
+
}
|
592
|
+
|
539
593
|
auto node = Lookup(*tree, keys[i], 0);
|
540
594
|
if (node.IsSet()) {
|
541
595
|
auto &leaf = Leaf::Get(*this, node);
|
542
|
-
|
543
|
-
|
596
|
+
|
597
|
+
if (leaf.IsInlined()) {
|
598
|
+
D_ASSERT(row_identifiers[i] != leaf.row_ids.inlined);
|
599
|
+
continue;
|
544
600
|
}
|
601
|
+
|
602
|
+
D_ASSERT(leaf.row_ids.ptr.IsSet());
|
603
|
+
Node leaf_segment = leaf.row_ids.ptr;
|
604
|
+
auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
|
605
|
+
D_ASSERT(position == (uint32_t)DConstants::INVALID_INDEX);
|
545
606
|
}
|
546
|
-
#endif
|
547
607
|
}
|
608
|
+
#endif
|
548
609
|
}
|
549
610
|
|
550
611
|
void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
|
@@ -1022,6 +1083,10 @@ void ART::Vacuum(IndexLock &state) {
|
|
1022
1083
|
|
1023
1084
|
// finalize the vacuum operation
|
1024
1085
|
FinalizeVacuum(flags);
|
1086
|
+
|
1087
|
+
for (auto &allocator : allocators) {
|
1088
|
+
allocator->Verify();
|
1089
|
+
}
|
1025
1090
|
}
|
1026
1091
|
|
1027
1092
|
//===--------------------------------------------------------------------===//
|
@@ -1059,6 +1124,10 @@ bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
|
|
1059
1124
|
if (!tree->Merge(*this, *other_art.tree)) {
|
1060
1125
|
return false;
|
1061
1126
|
}
|
1127
|
+
|
1128
|
+
for (auto &allocator : allocators) {
|
1129
|
+
allocator->Verify();
|
1130
|
+
}
|
1062
1131
|
return true;
|
1063
1132
|
}
|
1064
1133
|
|
@@ -1066,9 +1135,13 @@ bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
|
|
1066
1135
|
// Utility
|
1067
1136
|
//===--------------------------------------------------------------------===//
|
1068
1137
|
|
1069
|
-
string ART::
|
1138
|
+
string ART::VerifyAndToString(IndexLock &state, const bool only_verify) {
|
1139
|
+
return VerifyAndToStringInternal(only_verify);
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
string ART::VerifyAndToStringInternal(const bool only_verify) {
|
1070
1143
|
if (tree->IsSet()) {
|
1071
|
-
return tree->
|
1144
|
+
return "ART: " + tree->VerifyAndToString(*this, only_verify);
|
1072
1145
|
}
|
1073
1146
|
return "[empty]";
|
1074
1147
|
}
|
@@ -126,13 +126,20 @@ void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
|
|
126
126
|
|
127
127
|
bool FixedSizeAllocator::InitializeVacuum() {
|
128
128
|
|
129
|
+
if (total_allocations == 0) {
|
130
|
+
Reset();
|
131
|
+
return false;
|
132
|
+
}
|
133
|
+
|
129
134
|
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
135
|
+
D_ASSERT(total_available_allocations >= total_allocations);
|
130
136
|
auto total_free_positions = total_available_allocations - total_allocations;
|
131
137
|
|
132
138
|
// vacuum_count buffers can be freed
|
133
|
-
auto vacuum_count = total_free_positions / allocations_per_buffer
|
139
|
+
auto vacuum_count = total_free_positions / allocations_per_buffer;
|
134
140
|
|
135
141
|
// calculate the vacuum threshold adaptively
|
142
|
+
D_ASSERT(vacuum_count < buffers.size());
|
136
143
|
idx_t memory_usage = GetMemoryUsage();
|
137
144
|
idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
|
138
145
|
auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
|
@@ -171,10 +178,22 @@ SwizzleablePointer FixedSizeAllocator::VacuumPointer(const SwizzleablePointer pt
|
|
171
178
|
// buffer after the vacuum operation
|
172
179
|
|
173
180
|
auto new_ptr = New();
|
181
|
+
|
182
|
+
// new increases the allocation count
|
183
|
+
total_allocations--;
|
184
|
+
|
174
185
|
memcpy(Get(new_ptr), Get(ptr), allocation_size);
|
175
186
|
return new_ptr;
|
176
187
|
}
|
177
188
|
|
189
|
+
void FixedSizeAllocator::Verify() const {
|
190
|
+
#ifdef DEBUG
|
191
|
+
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
192
|
+
D_ASSERT(total_available_allocations >= total_allocations);
|
193
|
+
D_ASSERT(buffers.size() >= buffers_with_free_space.size());
|
194
|
+
#endif
|
195
|
+
}
|
196
|
+
|
178
197
|
uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
|
179
198
|
|
180
199
|
auto data = mask.GetData();
|
@@ -60,16 +60,10 @@ void Leaf::Free(ART &art, Node &node) {
|
|
60
60
|
D_ASSERT(node.IsSet());
|
61
61
|
D_ASSERT(!node.IsSwizzled());
|
62
62
|
|
63
|
+
// free leaf segments
|
63
64
|
auto &leaf = Leaf::Get(art, node);
|
64
|
-
|
65
|
-
// delete all leaf segments
|
66
65
|
if (!leaf.IsInlined()) {
|
67
|
-
|
68
|
-
while (ptr.IsSet()) {
|
69
|
-
auto next_ptr = LeafSegment::Get(art, ptr).next;
|
70
|
-
Node::Free(art, ptr);
|
71
|
-
ptr = next_ptr;
|
72
|
-
}
|
66
|
+
Node::Free(art, leaf.row_ids.ptr);
|
73
67
|
}
|
74
68
|
}
|
75
69
|
|
@@ -275,10 +269,10 @@ uint32_t Leaf::FindRowId(const ART &art, Node &ptr, const row_t row_id) const {
|
|
275
269
|
return (uint32_t)DConstants::INVALID_INDEX;
|
276
270
|
}
|
277
271
|
|
278
|
-
string Leaf::
|
272
|
+
string Leaf::VerifyAndToString(const ART &art, const bool only_verify) const {
|
279
273
|
|
280
274
|
if (IsInlined()) {
|
281
|
-
return "
|
275
|
+
return only_verify ? "" : "Leaf [count: 1, row ID: " + to_string(row_ids.inlined) + "]";
|
282
276
|
}
|
283
277
|
|
284
278
|
auto ptr = row_ids.ptr;
|
@@ -296,7 +290,10 @@ string Leaf::ToString(const ART &art) const {
|
|
296
290
|
remaining -= to_string_count;
|
297
291
|
ptr = segment.next;
|
298
292
|
}
|
299
|
-
|
293
|
+
|
294
|
+
D_ASSERT(remaining == 0);
|
295
|
+
D_ASSERT(this_count == count);
|
296
|
+
return only_verify ? "" : "Leaf [count: " + to_string(count) + ", row IDs: " + str + "] \n";
|
300
297
|
}
|
301
298
|
|
302
299
|
BlockPointer Leaf::Serialize(const ART &art, MetaBlockWriter &writer) const {
|
@@ -366,6 +363,7 @@ void Leaf::Vacuum(ART &art) {
|
|
366
363
|
auto &allocator = Node::GetAllocator(art, NType::LEAF_SEGMENT);
|
367
364
|
if (allocator.NeedsVacuum(row_ids.ptr)) {
|
368
365
|
row_ids.ptr.SetPtr(allocator.VacuumPointer(row_ids.ptr));
|
366
|
+
row_ids.ptr.type = (uint8_t)NType::LEAF_SEGMENT;
|
369
367
|
}
|
370
368
|
|
371
369
|
auto ptr = row_ids.ptr;
|
@@ -374,6 +372,7 @@ void Leaf::Vacuum(ART &art) {
|
|
374
372
|
ptr = segment.next;
|
375
373
|
if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
|
376
374
|
segment.next.SetPtr(allocator.VacuumPointer(ptr));
|
375
|
+
segment.next.type = (uint8_t)NType::LEAF_SEGMENT;
|
377
376
|
ptr = segment.next;
|
378
377
|
}
|
379
378
|
}
|
@@ -15,6 +15,16 @@ LeafSegment &LeafSegment::New(ART &art, Node &node) {
|
|
15
15
|
return segment;
|
16
16
|
}
|
17
17
|
|
18
|
+
void LeafSegment::Free(ART &art, Node &node) {
|
19
|
+
|
20
|
+
D_ASSERT(node.IsSet());
|
21
|
+
D_ASSERT(!node.IsSwizzled());
|
22
|
+
|
23
|
+
// free next segment
|
24
|
+
auto next_segment = LeafSegment::Get(art, node).next;
|
25
|
+
Node::Free(art, next_segment);
|
26
|
+
}
|
27
|
+
|
18
28
|
LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
|
19
29
|
|
20
30
|
reference<LeafSegment> segment(*this);
|
@@ -69,6 +69,9 @@ void Node::Free(ART &art, Node &node) {
|
|
69
69
|
|
70
70
|
// free the prefixes and children of the nodes
|
71
71
|
switch (type) {
|
72
|
+
case NType::LEAF_SEGMENT:
|
73
|
+
LeafSegment::Free(art, node);
|
74
|
+
break;
|
72
75
|
case NType::LEAF:
|
73
76
|
Leaf::Free(art, node);
|
74
77
|
break;
|
@@ -159,65 +162,57 @@ void Node::DeleteChild(ART &art, Node &node, const uint8_t byte) {
|
|
159
162
|
|
160
163
|
optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
|
161
164
|
|
162
|
-
D_ASSERT(!IsSwizzled());
|
165
|
+
D_ASSERT(IsSet() && !IsSwizzled());
|
163
166
|
|
164
167
|
optional_ptr<Node> child;
|
165
168
|
switch (DecodeARTNodeType()) {
|
166
|
-
case NType::NODE_4:
|
169
|
+
case NType::NODE_4:
|
167
170
|
child = Node4::Get(art, *this).GetChild(byte);
|
168
171
|
break;
|
169
|
-
|
170
|
-
case NType::NODE_16: {
|
172
|
+
case NType::NODE_16:
|
171
173
|
child = Node16::Get(art, *this).GetChild(byte);
|
172
174
|
break;
|
173
|
-
|
174
|
-
case NType::NODE_48: {
|
175
|
+
case NType::NODE_48:
|
175
176
|
child = Node48::Get(art, *this).GetChild(byte);
|
176
177
|
break;
|
177
|
-
|
178
|
-
case NType::NODE_256: {
|
178
|
+
case NType::NODE_256:
|
179
179
|
child = Node256::Get(art, *this).GetChild(byte);
|
180
180
|
break;
|
181
|
-
}
|
182
181
|
default:
|
183
182
|
throw InternalException("Invalid node type for GetChild.");
|
184
183
|
}
|
185
184
|
|
186
|
-
//
|
185
|
+
// deserialize the ART node before returning it
|
187
186
|
if (child && child->IsSwizzled()) {
|
188
187
|
child->Deserialize(art);
|
189
188
|
}
|
190
189
|
return child;
|
191
190
|
}
|
192
191
|
|
193
|
-
optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte) const {
|
192
|
+
optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte, const bool deserialize) const {
|
194
193
|
|
195
|
-
D_ASSERT(!IsSwizzled());
|
194
|
+
D_ASSERT(IsSet() && !IsSwizzled());
|
196
195
|
|
197
196
|
optional_ptr<Node> child;
|
198
197
|
switch (DecodeARTNodeType()) {
|
199
|
-
case NType::NODE_4:
|
198
|
+
case NType::NODE_4:
|
200
199
|
child = Node4::Get(art, *this).GetNextChild(byte);
|
201
200
|
break;
|
202
|
-
|
203
|
-
case NType::NODE_16: {
|
201
|
+
case NType::NODE_16:
|
204
202
|
child = Node16::Get(art, *this).GetNextChild(byte);
|
205
203
|
break;
|
206
|
-
|
207
|
-
case NType::NODE_48: {
|
204
|
+
case NType::NODE_48:
|
208
205
|
child = Node48::Get(art, *this).GetNextChild(byte);
|
209
206
|
break;
|
210
|
-
|
211
|
-
case NType::NODE_256: {
|
207
|
+
case NType::NODE_256:
|
212
208
|
child = Node256::Get(art, *this).GetNextChild(byte);
|
213
209
|
break;
|
214
|
-
}
|
215
210
|
default:
|
216
211
|
throw InternalException("Invalid node type for GetNextChild.");
|
217
212
|
}
|
218
213
|
|
219
|
-
//
|
220
|
-
if (child && child->IsSwizzled()) {
|
214
|
+
// deserialize the ART node before returning it
|
215
|
+
if (child && deserialize && child->IsSwizzled()) {
|
221
216
|
child->Deserialize(art);
|
222
217
|
}
|
223
218
|
return child;
|
@@ -260,10 +255,11 @@ void Node::Deserialize(ART &art) {
|
|
260
255
|
type = reader.Read<uint8_t>();
|
261
256
|
swizzle_flag = 0;
|
262
257
|
|
263
|
-
auto
|
264
|
-
SetPtr(Node::GetAllocator(art,
|
258
|
+
auto decoded_type = DecodeARTNodeType();
|
259
|
+
SetPtr(Node::GetAllocator(art, decoded_type).New());
|
260
|
+
type = (uint8_t)decoded_type;
|
265
261
|
|
266
|
-
switch (
|
262
|
+
switch (decoded_type) {
|
267
263
|
case NType::LEAF:
|
268
264
|
return Leaf::Get(art, *this).Deserialize(art, reader);
|
269
265
|
case NType::NODE_4:
|
@@ -283,28 +279,43 @@ void Node::Deserialize(ART &art) {
|
|
283
279
|
// Utility
|
284
280
|
//===--------------------------------------------------------------------===//
|
285
281
|
|
286
|
-
string Node::
|
282
|
+
string Node::VerifyAndToString(ART &art, const bool only_verify) {
|
287
283
|
|
288
|
-
D_ASSERT(
|
284
|
+
D_ASSERT(IsSet());
|
285
|
+
if (IsSwizzled()) {
|
286
|
+
return only_verify ? "" : "swizzled";
|
287
|
+
}
|
289
288
|
|
290
|
-
|
291
|
-
|
289
|
+
auto type = DecodeARTNodeType();
|
290
|
+
if (type == NType::LEAF) {
|
291
|
+
auto str = Leaf::Get(art, *this).VerifyAndToString(art, only_verify);
|
292
|
+
return only_verify ? "" : "\n" + str;
|
292
293
|
}
|
293
294
|
|
294
295
|
string str = "Node" + to_string(GetCapacity()) + ": [";
|
295
296
|
|
297
|
+
idx_t child_count = 0;
|
296
298
|
uint8_t byte = 0;
|
297
|
-
auto child = GetNextChild(art, byte);
|
299
|
+
auto child = GetNextChild(art, byte, false);
|
298
300
|
while (child) {
|
299
|
-
|
300
|
-
if (
|
301
|
-
|
301
|
+
child_count++;
|
302
|
+
if (child->IsSwizzled()) {
|
303
|
+
if (!only_verify) {
|
304
|
+
str += "(swizzled)";
|
305
|
+
}
|
306
|
+
} else {
|
307
|
+
str += "(" + to_string(byte) + ", " + child->VerifyAndToString(art, only_verify) + ")";
|
308
|
+
if (byte == NumericLimits<uint8_t>::Maximum()) {
|
309
|
+
break;
|
310
|
+
}
|
302
311
|
}
|
303
312
|
byte++;
|
304
|
-
child = GetNextChild(art, byte);
|
313
|
+
child = GetNextChild(art, byte, false);
|
305
314
|
}
|
306
315
|
|
307
|
-
|
316
|
+
// ensure that the child count is at least two
|
317
|
+
D_ASSERT(child_count > 1);
|
318
|
+
return only_verify ? "" : "\n" + str + "]";
|
308
319
|
}
|
309
320
|
|
310
321
|
idx_t Node::GetCapacity() const {
|
@@ -567,6 +578,7 @@ void Node::Vacuum(ART &art, Node &node, const ARTFlags &flags) {
|
|
567
578
|
needs_vacuum = flags.vacuum_flags[node.type - 1] && allocator.NeedsVacuum(node);
|
568
579
|
if (needs_vacuum) {
|
569
580
|
node.SetPtr(allocator.VacuumPointer(node));
|
581
|
+
node.type = (uint8_t)type;
|
570
582
|
}
|
571
583
|
|
572
584
|
switch (type) {
|
@@ -60,6 +60,7 @@ Node16 &Node16::ShrinkNode48(ART &art, Node &node16, Node &node48) {
|
|
60
60
|
n16.prefix.Move(n48.prefix);
|
61
61
|
|
62
62
|
for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
|
63
|
+
D_ASSERT(n16.count <= Node::NODE_16_CAPACITY);
|
63
64
|
if (n48.child_index[i] != Node::EMPTY_MARKER) {
|
64
65
|
n16.key[n16.count] = i;
|
65
66
|
n16.children[n16.count] = n48.children[n48.child_index[i]];
|
@@ -160,6 +161,7 @@ optional_ptr<Node> Node16::GetChild(const uint8_t byte) {
|
|
160
161
|
|
161
162
|
for (idx_t i = 0; i < count; i++) {
|
162
163
|
if (key[i] == byte) {
|
164
|
+
D_ASSERT(children[i].IsSet());
|
163
165
|
return &children[i];
|
164
166
|
}
|
165
167
|
}
|
@@ -171,6 +173,7 @@ optional_ptr<Node> Node16::GetNextChild(uint8_t &byte) {
|
|
171
173
|
for (idx_t i = 0; i < count; i++) {
|
172
174
|
if (key[i] >= byte) {
|
173
175
|
byte = key[i];
|
176
|
+
D_ASSERT(children[i].IsSet());
|
174
177
|
return &children[i];
|
175
178
|
}
|
176
179
|
}
|
@@ -37,6 +37,7 @@ Node4 &Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) {
|
|
37
37
|
auto &n4 = Node4::New(art, node4);
|
38
38
|
auto &n16 = Node16::Get(art, node16);
|
39
39
|
|
40
|
+
D_ASSERT(n16.count <= Node::NODE_4_CAPACITY);
|
40
41
|
n4.count = n16.count;
|
41
42
|
n4.prefix.Move(n16.prefix);
|
42
43
|
|
@@ -145,6 +146,7 @@ optional_ptr<Node> Node4::GetChild(const uint8_t byte) {
|
|
145
146
|
|
146
147
|
for (idx_t i = 0; i < count; i++) {
|
147
148
|
if (key[i] == byte) {
|
149
|
+
D_ASSERT(children[i].IsSet());
|
148
150
|
return &children[i];
|
149
151
|
}
|
150
152
|
}
|
@@ -156,6 +158,7 @@ optional_ptr<Node> Node4::GetNextChild(uint8_t &byte) {
|
|
156
158
|
for (idx_t i = 0; i < count; i++) {
|
157
159
|
if (key[i] >= byte) {
|
158
160
|
byte = key[i];
|
161
|
+
D_ASSERT(children[i].IsSet());
|
159
162
|
return &children[i];
|
160
163
|
}
|
161
164
|
}
|
@@ -85,6 +85,7 @@ Node48 &Node48::ShrinkNode256(ART &art, Node &node48, Node &node256) {
|
|
85
85
|
n48.prefix.Move(n256.prefix);
|
86
86
|
|
87
87
|
for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
|
88
|
+
D_ASSERT(n48.count <= Node::NODE_48_CAPACITY);
|
88
89
|
if (n256.children[i].IsSet()) {
|
89
90
|
n48.child_index[i] = n48.count;
|
90
91
|
n48.children[n48.count] = n256.children[i];
|
@@ -168,6 +169,7 @@ optional_ptr<Node> Node48::GetNextChild(uint8_t &byte) {
|
|
168
169
|
for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) {
|
169
170
|
if (child_index[i] != Node::EMPTY_MARKER) {
|
170
171
|
byte = i;
|
172
|
+
D_ASSERT(children[child_index[i]].IsSet());
|
171
173
|
return &children[child_index[i]];
|
172
174
|
}
|
173
175
|
}
|
@@ -427,6 +427,7 @@ void Prefix::Vacuum(ART &art) {
|
|
427
427
|
auto &allocator = Node::GetAllocator(art, NType::PREFIX_SEGMENT);
|
428
428
|
if (allocator.NeedsVacuum(data.ptr)) {
|
429
429
|
data.ptr.SetPtr(allocator.VacuumPointer(data.ptr));
|
430
|
+
data.ptr.type = (uint8_t)NType::PREFIX_SEGMENT;
|
430
431
|
}
|
431
432
|
|
432
433
|
auto ptr = data.ptr;
|
@@ -435,6 +436,7 @@ void Prefix::Vacuum(ART &art) {
|
|
435
436
|
ptr = segment.next;
|
436
437
|
if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
|
437
438
|
segment.next.SetPtr(allocator.VacuumPointer(ptr));
|
439
|
+
segment.next.type = (uint8_t)NType::PREFIX_SEGMENT;
|
438
440
|
ptr = segment.next;
|
439
441
|
}
|
440
442
|
}
|