duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +2 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +286 -269
- package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
- package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/node.cpp +444 -379
- package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
- package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
- package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
- package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
- package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
- package/src/duckdb/src/function/table/read_csv.cpp +5 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
- package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +26 -0
- package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
- package/src/duckdb/src/storage/data_table.cpp +6 -3
- package/src/duckdb/src/storage/index.cpp +18 -6
- package/src/duckdb/src/storage/local_storage.cpp +8 -2
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
- package/src/duckdb/src/storage/wal_replay.cpp +1 -1
- package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
- package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,97 +1,22 @@
|
|
1
1
|
#include "duckdb/execution/index/art/swizzleable_pointer.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/
|
3
|
+
#include "duckdb/storage/meta_block_reader.hpp"
|
4
4
|
|
5
5
|
namespace duckdb {
|
6
|
-
SwizzleablePointer::~SwizzleablePointer() {
|
7
|
-
if (pointer) {
|
8
|
-
if (!IsSwizzled()) {
|
9
|
-
Node::Delete((Node *)pointer);
|
10
|
-
}
|
11
|
-
}
|
12
|
-
}
|
13
6
|
|
14
|
-
SwizzleablePointer::SwizzleablePointer(
|
15
|
-
idx_t block_id = reader.Read<block_id_t>();
|
16
|
-
uint32_t offset = reader.Read<uint32_t>();
|
17
|
-
if (block_id == DConstants::INVALID_INDEX || offset == (uint32_t)DConstants::INVALID_INDEX) {
|
18
|
-
pointer = 0;
|
19
|
-
return;
|
20
|
-
}
|
21
|
-
idx_t pointer_size = sizeof(pointer) * 8;
|
22
|
-
pointer = block_id;
|
23
|
-
// This assumes high 32 bits of pointer are zero.
|
24
|
-
pointer = pointer << (pointer_size / 2);
|
25
|
-
D_ASSERT((pointer >> (pointer_size / 2)) == block_id);
|
26
|
-
pointer += offset;
|
27
|
-
// Set the left most bit to indicate this is a swizzled pointer and send it back to the mother-ship
|
28
|
-
uint64_t mask = 1;
|
29
|
-
mask = mask << (pointer_size - 1);
|
30
|
-
// This assumes the 33rd most significant bit of the block_id is zero.
|
31
|
-
pointer |= mask;
|
32
|
-
}
|
33
|
-
|
34
|
-
SwizzleablePointer &SwizzleablePointer::operator=(const Node *ptr) {
|
35
|
-
// If the object already has a non-swizzled pointer, this will leak memory.
|
36
|
-
//
|
37
|
-
// TODO: If enabled, this assert will fire, indicating a possible leak. If an exception
|
38
|
-
// is thrown here, it will cause a double-free. There is some work to do to make all this safer.
|
39
|
-
// D_ASSERT(empty() || IsSwizzled());
|
40
|
-
if (sizeof(ptr) == 4) {
|
41
|
-
pointer = (uint32_t)(size_t)ptr;
|
42
|
-
} else {
|
43
|
-
pointer = (uint64_t)ptr;
|
44
|
-
}
|
45
|
-
return *this;
|
46
|
-
}
|
47
|
-
|
48
|
-
bool operator!=(const SwizzleablePointer &s_ptr, const uint64_t &ptr) {
|
49
|
-
return (s_ptr.pointer != ptr);
|
50
|
-
}
|
7
|
+
SwizzleablePointer::SwizzleablePointer(MetaBlockReader &reader) {
|
51
8
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
// This is destructive. Pointer will be invalid after this operation.
|
56
|
-
// That's okay because this is only ever called from Unswizzle.
|
57
|
-
pointer = pointer & ~(1ULL << (pointer_size - 1));
|
58
|
-
uint32_t block_id = pointer >> (pointer_size / 2);
|
59
|
-
uint32_t offset = pointer & 0xffffffff;
|
60
|
-
return {block_id, offset};
|
61
|
-
}
|
62
|
-
|
63
|
-
bool SwizzleablePointer::IsSwizzled() {
|
64
|
-
idx_t pointer_size = sizeof(pointer) * 8;
|
65
|
-
return (pointer >> (pointer_size - 1)) & 1;
|
66
|
-
}
|
67
|
-
|
68
|
-
void SwizzleablePointer::Reset() {
|
69
|
-
if (pointer) {
|
70
|
-
if (!IsSwizzled()) {
|
71
|
-
Node::Delete((Node *)pointer);
|
72
|
-
}
|
73
|
-
}
|
74
|
-
*this = nullptr;
|
75
|
-
}
|
9
|
+
idx_t block_id = reader.Read<block_id_t>();
|
10
|
+
offset = reader.Read<uint32_t>();
|
11
|
+
type = 0;
|
76
12
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
// first we unset the bae
|
81
|
-
auto block_info = GetSwizzledBlockInfo();
|
82
|
-
*this = Node::Deserialize(art, block_info.block_id, block_info.offset);
|
83
|
-
art.Verify();
|
13
|
+
if (block_id == DConstants::INVALID_INDEX) {
|
14
|
+
swizzle_flag = 0;
|
15
|
+
return;
|
84
16
|
}
|
85
|
-
return (Node *)pointer;
|
86
|
-
}
|
87
17
|
|
88
|
-
|
89
|
-
|
90
|
-
Unswizzle(art);
|
91
|
-
return ((Node *)pointer)->Serialize(art, writer);
|
92
|
-
} else {
|
93
|
-
return {(block_id_t)DConstants::INVALID_INDEX, (uint32_t)DConstants::INVALID_INDEX};
|
94
|
-
}
|
18
|
+
buffer_id = (uint32_t)block_id;
|
19
|
+
swizzle_flag = 1;
|
95
20
|
}
|
96
21
|
|
97
22
|
} // namespace duckdb
|
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "duckdb/transaction/duck_transaction.hpp"
|
14
14
|
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
|
15
15
|
#include "duckdb/storage/table/scan_state.hpp"
|
16
|
+
#include "duckdb/execution/index/art/art_key.hpp"
|
16
17
|
|
17
18
|
namespace duckdb {
|
18
19
|
|
@@ -48,7 +49,7 @@ public:
|
|
48
49
|
ExpressionExecutor probe_executor;
|
49
50
|
|
50
51
|
ArenaAllocator arena_allocator;
|
51
|
-
vector<
|
52
|
+
vector<ARTKey> keys;
|
52
53
|
unique_ptr<ColumnFetchState> fetch_state;
|
53
54
|
|
54
55
|
public:
|
@@ -268,8 +268,8 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
|
|
268
268
|
idx_t row_entry = parse_chunk.size();
|
269
269
|
|
270
270
|
// test against null string, but only if the value was not quoted
|
271
|
-
if ((!has_quotes || return_types[column].id() != LogicalTypeId::VARCHAR) &&
|
272
|
-
Equals::Operation(str_val, string_t(options.null_str))) {
|
271
|
+
if ((!(has_quotes && !options.allow_quoted_nulls) || return_types[column].id() != LogicalTypeId::VARCHAR) &&
|
272
|
+
!options.force_not_null[column] && Equals::Operation(str_val, string_t(options.null_str))) {
|
273
273
|
FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
|
274
274
|
} else {
|
275
275
|
auto &v = parse_chunk.data[column];
|
@@ -175,6 +175,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
175
175
|
}
|
176
176
|
} else if (loption == "null_padding") {
|
177
177
|
null_padding = ParseBoolean(value, loption);
|
178
|
+
} else if (loption == "allow_quoted_nulls") {
|
179
|
+
allow_quoted_nulls = ParseBoolean(value, loption);
|
178
180
|
} else {
|
179
181
|
throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
|
180
182
|
}
|
@@ -124,6 +124,8 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
|
124
124
|
while (!successfully_read_first_line) {
|
125
125
|
DataChunk first_line_chunk;
|
126
126
|
first_line_chunk.Initialize(allocator, return_types);
|
127
|
+
// Ensure that parse_chunk has no gunk when trying to figure new line
|
128
|
+
parse_chunk.Reset();
|
127
129
|
for (; position_buffer < end_buffer; position_buffer++) {
|
128
130
|
if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
|
129
131
|
bool carriage_return = (*buffer)[position_buffer] == '\r';
|
@@ -183,6 +185,8 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
|
183
185
|
if (verification_positions.beginning_of_first_line == 0) {
|
184
186
|
verification_positions.beginning_of_first_line = position_buffer;
|
185
187
|
}
|
188
|
+
// Ensure that parse_chunk has no gunk when trying to figure new line
|
189
|
+
parse_chunk.Reset();
|
186
190
|
|
187
191
|
verification_positions.end_of_last_line = position_buffer;
|
188
192
|
finished = false;
|
@@ -6,6 +6,7 @@
|
|
6
6
|
#include "duckdb/main/client_context.hpp"
|
7
7
|
#include "duckdb/storage/storage_manager.hpp"
|
8
8
|
#include "duckdb/main/database_manager.hpp"
|
9
|
+
#include "duckdb/execution/index/art/art_key.hpp"
|
9
10
|
|
10
11
|
namespace duckdb {
|
11
12
|
|
@@ -39,7 +40,7 @@ public:
|
|
39
40
|
|
40
41
|
unique_ptr<Index> local_index;
|
41
42
|
ArenaAllocator arena_allocator;
|
42
|
-
vector<
|
43
|
+
vector<ARTKey> keys;
|
43
44
|
DataChunk key_chunk;
|
44
45
|
vector<column_t> key_column_ids;
|
45
46
|
};
|
@@ -52,7 +53,7 @@ unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContex
|
|
52
53
|
case IndexType::ART: {
|
53
54
|
auto &storage = table.GetStorage();
|
54
55
|
state->global_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
|
55
|
-
info->constraint_type, storage.db
|
56
|
+
info->constraint_type, storage.db);
|
56
57
|
break;
|
57
58
|
}
|
58
59
|
default:
|
@@ -69,13 +70,13 @@ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionConte
|
|
69
70
|
case IndexType::ART: {
|
70
71
|
auto &storage = table.GetStorage();
|
71
72
|
state->local_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
|
72
|
-
info->constraint_type, storage.db
|
73
|
+
info->constraint_type, storage.db);
|
73
74
|
break;
|
74
75
|
}
|
75
76
|
default:
|
76
77
|
throw InternalException("Unimplemented index type");
|
77
78
|
}
|
78
|
-
state->keys = vector<
|
79
|
+
state->keys = vector<ARTKey>(STANDARD_VECTOR_SIZE);
|
79
80
|
state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types);
|
80
81
|
|
81
82
|
for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) {
|
@@ -97,9 +98,8 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, GlobalSinkSt
|
|
97
98
|
ART::GenerateKeys(lstate.arena_allocator, lstate.key_chunk, lstate.keys);
|
98
99
|
|
99
100
|
auto &storage = table.GetStorage();
|
100
|
-
auto art =
|
101
|
-
|
102
|
-
lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db, false);
|
101
|
+
auto art = make_uniq<ART>(lstate.local_index->column_ids, lstate.local_index->table_io_manager,
|
102
|
+
lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db);
|
103
103
|
if (!art->ConstructFromSorted(lstate.key_chunk.size(), lstate.keys, row_identifiers)) {
|
104
104
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
105
105
|
}
|
@@ -134,11 +134,6 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
|
|
134
134
|
throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
|
135
135
|
}
|
136
136
|
|
137
|
-
state.global_index->Verify();
|
138
|
-
if (state.global_index->track_memory) {
|
139
|
-
state.global_index->buffer_manager.IncreaseUsedMemory(state.global_index->memory_size);
|
140
|
-
}
|
141
|
-
|
142
137
|
auto &schema = table.schema;
|
143
138
|
auto index_entry = schema.CreateIndex(context, *info, table).get();
|
144
139
|
if (!index_entry) {
|
@@ -153,6 +148,10 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
|
|
153
148
|
index.parsed_expressions.push_back(parsed_expr->Copy());
|
154
149
|
}
|
155
150
|
|
151
|
+
// vacuum excess memory
|
152
|
+
state.global_index->Vacuum();
|
153
|
+
|
154
|
+
// add index to storage
|
156
155
|
storage.info->indexes.AddIndex(std::move(state.global_index));
|
157
156
|
return SinkFinalizeType::READY;
|
158
157
|
}
|
@@ -27,7 +27,7 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
|
|
27
27
|
if (file_handle->CanSeek()) {
|
28
28
|
file_handle->Reset();
|
29
29
|
}
|
30
|
-
return make_uniq<CSVFileHandle>(std::move(file_handle));
|
30
|
+
return make_uniq<CSVFileHandle>(std::move(file_handle), false);
|
31
31
|
}
|
32
32
|
|
33
33
|
void ReadCSVData::FinalizeRead(ClientContext &context) {
|
@@ -259,6 +259,7 @@ public:
|
|
259
259
|
idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p)
|
260
260
|
: file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
|
261
261
|
force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)) {
|
262
|
+
file_handle->DisableReset();
|
262
263
|
current_file_path = files_path_p[0];
|
263
264
|
estimated_linenr = rows_to_skip;
|
264
265
|
file_size = file_handle->FileSize();
|
@@ -861,6 +862,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
|
|
861
862
|
table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
|
862
863
|
table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
|
863
864
|
table_function.named_parameters["null_padding"] = LogicalType::BOOLEAN;
|
865
|
+
table_function.named_parameters["allow_quoted_nulls"] = LogicalType::BOOLEAN;
|
864
866
|
table_function.named_parameters["column_types"] = LogicalType::ANY;
|
865
867
|
table_function.named_parameters["dtypes"] = LogicalType::ANY;
|
866
868
|
table_function.named_parameters["types"] = LogicalType::ANY;
|
@@ -920,6 +922,7 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
|
|
920
922
|
writer.WriteString(null_str);
|
921
923
|
writer.WriteField<FileCompressionType>(compression);
|
922
924
|
writer.WriteField<NewLineIdentifier>(new_line);
|
925
|
+
writer.WriteField<bool>(allow_quoted_nulls);
|
923
926
|
// read options
|
924
927
|
writer.WriteField<idx_t>(skip_rows);
|
925
928
|
writer.WriteField<bool>(skip_rows_set);
|
@@ -954,6 +957,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
|
|
954
957
|
null_str = reader.ReadRequired<string>();
|
955
958
|
compression = reader.ReadRequired<FileCompressionType>();
|
956
959
|
new_line = reader.ReadRequired<NewLineIdentifier>();
|
960
|
+
allow_quoted_nulls = reader.ReadRequired<bool>();
|
957
961
|
// read options
|
958
962
|
skip_rows = reader.ReadRequired<idx_t>();
|
959
963
|
skip_rows_set = reader.ReadRequired<bool>();
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev2699"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "199c0211c7"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -8,56 +8,54 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include "duckdb/common/common.hpp"
|
12
|
-
#include "duckdb/common/types/data_chunk.hpp"
|
13
|
-
#include "duckdb/common/types/vector.hpp"
|
14
|
-
#include "duckdb/execution/index/art/art_key.hpp"
|
15
|
-
#include "duckdb/execution/index/art/iterator.hpp"
|
16
|
-
#include "duckdb/execution/index/art/leaf.hpp"
|
17
|
-
#include "duckdb/execution/index/art/node.hpp"
|
18
|
-
#include "duckdb/execution/index/art/node16.hpp"
|
19
|
-
#include "duckdb/execution/index/art/node256.hpp"
|
20
|
-
#include "duckdb/execution/index/art/node4.hpp"
|
21
|
-
#include "duckdb/execution/index/art/node48.hpp"
|
22
|
-
#include "duckdb/parser/parsed_expression.hpp"
|
23
|
-
#include "duckdb/storage/data_table.hpp"
|
24
11
|
#include "duckdb/storage/index.hpp"
|
25
|
-
#include "duckdb/storage/meta_block_writer.hpp"
|
26
12
|
|
27
13
|
namespace duckdb {
|
28
14
|
|
29
|
-
|
30
|
-
struct ARTIndexScanState;
|
31
|
-
|
15
|
+
// classes
|
32
16
|
enum class VerifyExistenceType : uint8_t {
|
33
17
|
APPEND = 0, // appends to a table
|
34
18
|
APPEND_FK = 1, // appends to a table that has a foreign key
|
35
19
|
DELETE_FK = 2 // delete from a table that has a foreign key
|
36
20
|
};
|
21
|
+
class ConflictManager;
|
22
|
+
class Node;
|
23
|
+
class ARTKey;
|
24
|
+
class FixedSizeAllocator;
|
25
|
+
|
26
|
+
// structs
|
27
|
+
struct ARTIndexScanState;
|
28
|
+
struct ARTFlags {
|
29
|
+
vector<bool> vacuum_flags;
|
30
|
+
vector<idx_t> merge_buffer_counts;
|
31
|
+
};
|
37
32
|
|
38
33
|
class ART : public Index {
|
39
34
|
public:
|
40
|
-
//! Constructs an ART
|
35
|
+
//! Constructs an ART
|
41
36
|
ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
42
|
-
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
|
43
|
-
AttachedDatabase &db,
|
44
|
-
idx_t block_offset = DConstants::INVALID_INDEX);
|
37
|
+
const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
|
38
|
+
AttachedDatabase &db, const idx_t block_id = DConstants::INVALID_INDEX,
|
39
|
+
const idx_t block_offset = DConstants::INVALID_INDEX);
|
45
40
|
~ART() override;
|
46
41
|
|
47
42
|
//! Root of the tree
|
48
|
-
Node
|
43
|
+
unique_ptr<Node> tree;
|
44
|
+
//! Fixed-size allocators holding the ART nodes
|
45
|
+
vector<unique_ptr<FixedSizeAllocator>> allocators;
|
49
46
|
|
50
47
|
public:
|
51
48
|
//! Initialize a single predicate scan on the index with the given expression and column IDs
|
52
49
|
unique_ptr<IndexScanState> InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
|
53
|
-
ExpressionType expression_type) override;
|
50
|
+
const ExpressionType expression_type) override;
|
54
51
|
//! Initialize a two predicate scan on the index with the given expression and column IDs
|
55
|
-
unique_ptr<IndexScanState> InitializeScanTwoPredicates(Transaction &transaction, const Value &low_value,
|
56
|
-
ExpressionType low_expression_type,
|
57
|
-
|
52
|
+
unique_ptr<IndexScanState> InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value,
|
53
|
+
const ExpressionType low_expression_type,
|
54
|
+
const Value &high_value,
|
55
|
+
const ExpressionType high_expression_type) override;
|
58
56
|
//! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched,
|
59
57
|
//! and false otherwise
|
60
|
-
bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count,
|
58
|
+
bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count,
|
61
59
|
vector<row_t> &result_ids) override;
|
62
60
|
|
63
61
|
//! Called when data is appended to the index. The lock obtained from InitializeLock must be held
|
@@ -72,22 +70,25 @@ public:
|
|
72
70
|
PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
|
73
71
|
|
74
72
|
//! Construct an ART from a vector of sorted keys
|
75
|
-
bool ConstructFromSorted(idx_t count, vector<
|
73
|
+
bool ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_identifiers);
|
76
74
|
|
77
75
|
//! Search equal values and fetches the row IDs
|
78
|
-
bool SearchEqual(
|
76
|
+
bool SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids);
|
79
77
|
//! Search equal values used for joins that do not need to fetch data
|
80
|
-
void SearchEqualJoinNoFetch(
|
78
|
+
void SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size);
|
81
79
|
|
82
80
|
//! Serializes the index and returns the pair of block_id offset positions
|
83
|
-
BlockPointer Serialize(
|
81
|
+
BlockPointer Serialize(MetaBlockWriter &writer) override;
|
84
82
|
|
85
83
|
//! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
|
86
84
|
//! index must also be locked during the merge
|
87
85
|
bool MergeIndexes(IndexLock &state, Index &other_index) override;
|
88
86
|
|
87
|
+
//! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held
|
88
|
+
void Vacuum(IndexLock &state) override;
|
89
|
+
|
89
90
|
//! Generate ART keys for an input chunk
|
90
|
-
static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<
|
91
|
+
static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<ARTKey> &keys);
|
91
92
|
|
92
93
|
//! Generate a string containing all the expressions and their respective values that violate a constraint
|
93
94
|
string GenerateErrorKeyName(DataChunk &input, idx_t row);
|
@@ -96,31 +97,38 @@ public:
|
|
96
97
|
//! Performs constraint checking for a chunk of input data
|
97
98
|
void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override;
|
98
99
|
|
99
|
-
//! Returns the string representation of
|
100
|
+
//! Returns the string representation of the ART
|
100
101
|
string ToString() override;
|
101
|
-
//! Verifies that the in-memory size value of the index matches its actual size
|
102
|
-
void Verify() override;
|
103
|
-
//! Increases the memory size by the difference between the old size and the current size
|
104
|
-
//! and performs verifications
|
105
|
-
void IncreaseAndVerifyMemorySize(idx_t old_memory_size) override;
|
106
102
|
|
107
103
|
private:
|
108
104
|
//! Insert a row ID into a leaf
|
109
|
-
bool InsertToLeaf(
|
105
|
+
bool InsertToLeaf(Node &leaf_node, const row_t &row_id);
|
110
106
|
//! Insert a key into the tree
|
111
|
-
bool Insert(Node
|
107
|
+
bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
112
108
|
//! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
|
113
|
-
void Erase(Node
|
109
|
+
void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
114
110
|
//! Find the node with a matching key, or return nullptr if not found
|
115
|
-
|
111
|
+
Node Lookup(Node node, const ARTKey &key, idx_t depth);
|
116
112
|
//! Returns all row IDs belonging to a key greater (or equal) than the search key
|
117
|
-
bool SearchGreater(ARTIndexScanState *state,
|
113
|
+
bool SearchGreater(ARTIndexScanState *state, ARTKey &key, bool inclusive, idx_t max_count,
|
114
|
+
vector<row_t> &result_ids);
|
118
115
|
//! Returns all row IDs belonging to a key less (or equal) than the upper_bound
|
119
|
-
bool SearchLess(ARTIndexScanState *state,
|
116
|
+
bool SearchLess(ARTIndexScanState *state, ARTKey &upper_bound, bool inclusive, idx_t max_count,
|
120
117
|
vector<row_t> &result_ids);
|
121
118
|
//! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound
|
122
|
-
bool SearchCloseRange(ARTIndexScanState *state,
|
119
|
+
bool SearchCloseRange(ARTIndexScanState *state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_inclusive,
|
123
120
|
bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
|
121
|
+
|
122
|
+
//! Initializes a merge operation by returning a set containing the buffer count of each fixed-size allocator
|
123
|
+
void InitializeMerge(ARTFlags &flags);
|
124
|
+
|
125
|
+
//! Initializes a vacuum operation by calling the initialize operation of the respective
|
126
|
+
//! node allocator, and returns a vector containing either true, if the allocator at
|
127
|
+
//! the respective position qualifies, or false, if not
|
128
|
+
void InitializeVacuum(ARTFlags &flags);
|
129
|
+
//! Finalizes a vacuum operation by calling the finalize operation of all qualifying
|
130
|
+
//! fixed size allocators
|
131
|
+
void FinalizeVacuum(const ARTFlags &flags);
|
124
132
|
};
|
125
133
|
|
126
134
|
} // namespace duckdb
|
@@ -17,36 +17,37 @@
|
|
17
17
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
20
|
-
class
|
20
|
+
class ARTKey {
|
21
21
|
public:
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
ARTKey();
|
23
|
+
ARTKey(const data_ptr_t &data, const uint32_t &len);
|
24
|
+
ARTKey(ArenaAllocator &allocator, const uint32_t &len);
|
25
25
|
|
26
|
-
|
26
|
+
uint32_t len;
|
27
27
|
data_ptr_t data;
|
28
28
|
|
29
29
|
public:
|
30
30
|
template <class T>
|
31
|
-
static inline
|
32
|
-
auto data =
|
33
|
-
return
|
31
|
+
static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
|
32
|
+
auto data = ARTKey::CreateData<T>(allocator, element);
|
33
|
+
return ARTKey(data, sizeof(element));
|
34
34
|
}
|
35
35
|
|
36
36
|
template <class T>
|
37
|
-
static inline
|
38
|
-
return
|
37
|
+
static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
|
38
|
+
return CreateARTKey(allocator, type, element.GetValueUnsafe<T>());
|
39
39
|
}
|
40
40
|
|
41
41
|
template <class T>
|
42
|
-
static inline void
|
43
|
-
key.data =
|
42
|
+
static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, T element) {
|
43
|
+
key.data = ARTKey::CreateData<T>(allocator, element);
|
44
44
|
key.len = sizeof(element);
|
45
45
|
}
|
46
46
|
|
47
47
|
template <class T>
|
48
|
-
static inline void
|
49
|
-
|
48
|
+
static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key,
|
49
|
+
const Value element) {
|
50
|
+
key.data = ARTKey::CreateData<T>(allocator, element.GetValueUnsafe<T>());
|
50
51
|
key.len = sizeof(element);
|
51
52
|
}
|
52
53
|
|
@@ -57,14 +58,18 @@ public:
|
|
57
58
|
const data_t &operator[](size_t i) const {
|
58
59
|
return data[i];
|
59
60
|
}
|
60
|
-
bool operator>(const
|
61
|
-
bool operator<(const
|
62
|
-
bool operator>=(const
|
63
|
-
bool operator==(const
|
61
|
+
bool operator>(const ARTKey &k) const;
|
62
|
+
bool operator<(const ARTKey &k) const;
|
63
|
+
bool operator>=(const ARTKey &k) const;
|
64
|
+
bool operator==(const ARTKey &k) const;
|
64
65
|
|
65
|
-
bool ByteMatches(
|
66
|
-
|
67
|
-
|
66
|
+
inline bool ByteMatches(const ARTKey &other, const uint32_t &depth) const {
|
67
|
+
return data[depth] == other[depth];
|
68
|
+
}
|
69
|
+
inline bool Empty() const {
|
70
|
+
return len == 0;
|
71
|
+
}
|
72
|
+
void ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &concat_key);
|
68
73
|
|
69
74
|
private:
|
70
75
|
template <class T>
|
@@ -76,9 +81,9 @@ private:
|
|
76
81
|
};
|
77
82
|
|
78
83
|
template <>
|
79
|
-
|
84
|
+
ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
|
80
85
|
template <>
|
81
|
-
|
86
|
+
ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
|
82
87
|
template <>
|
83
|
-
void
|
88
|
+
void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value);
|
84
89
|
} // namespace duckdb
|