duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +2 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +286 -269
- package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
- package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/node.cpp +444 -379
- package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
- package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
- package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
- package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
- package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
- package/src/duckdb/src/function/table/read_csv.cpp +5 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
- package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +26 -0
- package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
- package/src/duckdb/src/storage/data_table.cpp +6 -3
- package/src/duckdb/src/storage/index.cpp +18 -6
- package/src/duckdb/src/storage/local_storage.cpp +8 -2
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
- package/src/duckdb/src/storage/wal_replay.cpp +1 -1
- package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
- package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -338,27 +338,29 @@ void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
|
|
338
338
|
|
339
339
|
void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader) {
|
340
340
|
|
341
|
-
//
|
341
|
+
// deserialize the index metadata
|
342
342
|
auto info = IndexCatalogEntry::Deserialize(reader, context);
|
343
343
|
|
344
|
-
//
|
344
|
+
// create the index in the catalog
|
345
345
|
auto &schema_catalog = catalog.GetSchema(context, info->schema);
|
346
346
|
auto &table_catalog = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, info->schema, info->table->table_name)
|
347
347
|
.Cast<DuckTableEntry>();
|
348
348
|
auto &index_catalog = schema_catalog.CreateIndex(context, *info, table_catalog)->Cast<DuckIndexEntry>();
|
349
349
|
index_catalog.info = table_catalog.GetStorage().info;
|
350
|
-
|
350
|
+
|
351
|
+
// we deserialize the index lazily, i.e., we do not need to load any node information
|
352
|
+
// except the root block id and offset
|
351
353
|
auto root_block_id = reader.Read<block_id_t>();
|
352
354
|
auto root_offset = reader.Read<uint32_t>();
|
353
355
|
|
354
|
-
//
|
356
|
+
// obtain the expressions of the ART from the index metadata
|
355
357
|
vector<unique_ptr<Expression>> unbound_expressions;
|
356
358
|
vector<unique_ptr<ParsedExpression>> parsed_expressions;
|
357
|
-
|
358
359
|
for (auto &p_exp : info->parsed_expressions) {
|
359
360
|
parsed_expressions.push_back(p_exp->Copy());
|
360
361
|
}
|
361
362
|
|
363
|
+
// bind the parsed expressions
|
362
364
|
auto binder = Binder::CreateBinder(context);
|
363
365
|
auto &table_ref = info->table->Cast<TableRef>();
|
364
366
|
auto bound_table = binder->Bind(table_ref);
|
@@ -370,8 +372,7 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
|
|
370
372
|
}
|
371
373
|
|
372
374
|
if (parsed_expressions.empty()) {
|
373
|
-
//
|
374
|
-
// column refs
|
375
|
+
// this is a PK/FK index: we create the necessary bound column ref expressions
|
375
376
|
unbound_expressions.reserve(info->column_ids.size());
|
376
377
|
for (idx_t key_nr = 0; key_nr < info->column_ids.size(); key_nr++) {
|
377
378
|
auto &col = table_catalog.GetColumn(LogicalIndex(info->column_ids[key_nr]));
|
@@ -380,17 +381,18 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
|
|
380
381
|
}
|
381
382
|
}
|
382
383
|
|
384
|
+
// create the index and add it to the storage
|
383
385
|
switch (info->index_type) {
|
384
386
|
case IndexType::ART: {
|
385
387
|
auto &storage = table_catalog.GetStorage();
|
386
388
|
auto art = make_uniq<ART>(info->column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
|
387
|
-
info->constraint_type, storage.db,
|
389
|
+
info->constraint_type, storage.db, root_block_id, root_offset);
|
388
390
|
index_catalog.index = art.get();
|
389
391
|
storage.info->indexes.AddIndex(std::move(art));
|
390
392
|
break;
|
391
393
|
}
|
392
394
|
default:
|
393
|
-
throw InternalException("
|
395
|
+
throw InternalException("Unknown index type for ReadIndex");
|
394
396
|
}
|
395
397
|
}
|
396
398
|
|
@@ -1162,9 +1162,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
|
|
1162
1162
|
|
1163
1163
|
auto &allocator = Allocator::Get(db);
|
1164
1164
|
|
1165
|
-
|
1166
|
-
result.Initialize(allocator, index->logical_types);
|
1167
|
-
|
1165
|
+
// intermediate holds scanned chunks of the underlying data to create the index
|
1168
1166
|
DataChunk intermediate;
|
1169
1167
|
vector<LogicalType> intermediate_types;
|
1170
1168
|
auto column_ids = index->column_ids;
|
@@ -1176,6 +1174,10 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
|
|
1176
1174
|
intermediate_types.emplace_back(LogicalType::ROW_TYPE);
|
1177
1175
|
intermediate.Initialize(allocator, intermediate_types);
|
1178
1176
|
|
1177
|
+
// holds the result of executing the index expression on the intermediate chunks
|
1178
|
+
DataChunk result;
|
1179
|
+
result.Initialize(allocator, index->logical_types);
|
1180
|
+
|
1179
1181
|
// initialize an index scan
|
1180
1182
|
CreateIndexScanState state;
|
1181
1183
|
InitializeWALCreateIndexScan(state, column_ids);
|
@@ -1209,6 +1211,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
|
|
1209
1211
|
}
|
1210
1212
|
}
|
1211
1213
|
}
|
1214
|
+
|
1212
1215
|
info->indexes.AddIndex(std::move(index));
|
1213
1216
|
}
|
1214
1217
|
|
@@ -10,10 +10,10 @@ namespace duckdb {
|
|
10
10
|
|
11
11
|
Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_manager,
|
12
12
|
const vector<column_t> &column_ids_p, const vector<unique_ptr<Expression>> &unbound_expressions,
|
13
|
-
IndexConstraintType constraint_type_p
|
13
|
+
IndexConstraintType constraint_type_p)
|
14
14
|
|
15
15
|
: type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
16
|
-
db(db), buffer_manager(BufferManager::GetBufferManager(db))
|
16
|
+
db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
|
17
17
|
|
18
18
|
for (auto &expr : unbound_expressions) {
|
19
19
|
types.push_back(expr->return_type.InternalType());
|
@@ -49,19 +49,31 @@ void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
|
|
49
49
|
}
|
50
50
|
|
51
51
|
bool Index::MergeIndexes(Index &other_index) {
|
52
|
+
|
52
53
|
IndexLock state;
|
53
54
|
InitializeLock(state);
|
54
55
|
|
55
56
|
switch (this->type) {
|
56
|
-
case IndexType::ART:
|
57
|
-
|
58
|
-
return art.MergeIndexes(state, other_index);
|
59
|
-
}
|
57
|
+
case IndexType::ART:
|
58
|
+
return Cast<ART>().MergeIndexes(state, other_index);
|
60
59
|
default:
|
61
60
|
throw InternalException("Unimplemented index type for merge");
|
62
61
|
}
|
63
62
|
}
|
64
63
|
|
64
|
+
void Index::Vacuum() {
|
65
|
+
|
66
|
+
IndexLock state;
|
67
|
+
InitializeLock(state);
|
68
|
+
|
69
|
+
switch (this->type) {
|
70
|
+
case IndexType::ART:
|
71
|
+
return Cast<ART>().Vacuum(state);
|
72
|
+
default:
|
73
|
+
throw InternalException("Unimplemented index type for vacuum");
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
65
77
|
void Index::ExecuteExpressions(DataChunk &input, DataChunk &result) {
|
66
78
|
executor.Execute(input, result);
|
67
79
|
}
|
@@ -118,6 +118,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
118
118
|
row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
|
119
119
|
types, MAX_ROW_ID, 0);
|
120
120
|
row_groups->InitializeEmpty();
|
121
|
+
|
121
122
|
table.info->indexes.Scan([&](Index &index) {
|
122
123
|
D_ASSERT(index.type == IndexType::ART);
|
123
124
|
auto &art = index.Cast<ART>();
|
@@ -129,7 +130,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
129
130
|
unbound_expressions.push_back(expr->Copy());
|
130
131
|
}
|
131
132
|
indexes.AddIndex(make_uniq<ART>(art.column_ids, art.table_io_manager, std::move(unbound_expressions),
|
132
|
-
art.constraint_type, art.db
|
133
|
+
art.constraint_type, art.db));
|
133
134
|
}
|
134
135
|
return false;
|
135
136
|
});
|
@@ -520,6 +521,12 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
520
521
|
storage.AppendToIndexes(transaction, append_state, append_count, true);
|
521
522
|
}
|
522
523
|
transaction.PushAppend(table, append_state.row_start, append_count);
|
524
|
+
|
525
|
+
// possibly vacuum any excess index data
|
526
|
+
table.info->indexes.Scan([&](Index &index) {
|
527
|
+
index.Vacuum();
|
528
|
+
return false;
|
529
|
+
});
|
523
530
|
}
|
524
531
|
|
525
532
|
void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransaction &transaction) {
|
@@ -531,7 +538,6 @@ void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransacti
|
|
531
538
|
auto table = entry.first;
|
532
539
|
auto storage = entry.second.get();
|
533
540
|
Flush(table, *storage);
|
534
|
-
|
535
541
|
entry.second.reset();
|
536
542
|
}
|
537
543
|
}
|
@@ -229,15 +229,6 @@ void StandardBufferManager::Unpin(shared_ptr<BlockHandle> &handle) {
|
|
229
229
|
}
|
230
230
|
}
|
231
231
|
|
232
|
-
// POTENTIALLY PROBLEMATIC
|
233
|
-
void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
|
234
|
-
ReserveMemory(size);
|
235
|
-
}
|
236
|
-
|
237
|
-
void StandardBufferManager::DecreaseUsedMemory(idx_t size) {
|
238
|
-
FreeReservedMemory(size);
|
239
|
-
}
|
240
|
-
|
241
232
|
void StandardBufferManager::SetLimit(idx_t limit) {
|
242
233
|
buffer_pool.SetLimit(limit, InMemoryWarning());
|
243
234
|
}
|
@@ -417,7 +417,7 @@ void ReplayState::ReplayCreateIndex() {
|
|
417
417
|
switch (info->index_type) {
|
418
418
|
case IndexType::ART: {
|
419
419
|
index = make_uniq<ART>(info->column_ids, TableIOManager::Get(data_table), expressions, info->constraint_type,
|
420
|
-
data_table.db
|
420
|
+
data_table.db);
|
421
421
|
break;
|
422
422
|
}
|
423
423
|
default:
|
@@ -52,15 +52,21 @@ void CleanupState::CleanupDelete(DeleteInfo &info) {
|
|
52
52
|
auto version_table = info.table;
|
53
53
|
D_ASSERT(version_table->info->cardinality >= info.count);
|
54
54
|
version_table->info->cardinality -= info.count;
|
55
|
+
|
55
56
|
if (version_table->info->indexes.Empty()) {
|
56
57
|
// this table has no indexes: no cleanup to be done
|
57
58
|
return;
|
58
59
|
}
|
60
|
+
|
59
61
|
if (current_table != version_table) {
|
60
62
|
// table for this entry differs from previous table: flush and switch to the new table
|
61
63
|
Flush();
|
62
64
|
current_table = version_table;
|
63
65
|
}
|
66
|
+
|
67
|
+
// possibly vacuum any indexes in this table later
|
68
|
+
indexed_tables[current_table->info->table] = current_table;
|
69
|
+
|
64
70
|
count = 0;
|
65
71
|
for (idx_t i = 0; i < info.count; i++) {
|
66
72
|
row_numbers[count++] = info.vinfo->start + info.rows[i];
|
@@ -126,6 +126,14 @@ void UndoBuffer::Cleanup() {
|
|
126
126
|
CleanupState state;
|
127
127
|
UndoBuffer::IteratorState iterator_state;
|
128
128
|
IterateEntries(iterator_state, [&](UndoFlags type, data_ptr_t data) { state.CleanupEntry(type, data); });
|
129
|
+
|
130
|
+
// possibly vacuum indexes
|
131
|
+
for (const auto &table : state.indexed_tables) {
|
132
|
+
table.second->info->indexes.Scan([&](Index &index) {
|
133
|
+
index.Vacuum();
|
134
|
+
return false;
|
135
|
+
});
|
136
|
+
}
|
129
137
|
}
|
130
138
|
|
131
139
|
void UndoBuffer::Commit(UndoBuffer::IteratorState &iterator_state, optional_ptr<WriteAheadLog> log,
|
@@ -352,13 +352,13 @@
|
|
352
352
|
|
353
353
|
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
|
354
354
|
|
355
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
|
356
|
+
|
355
357
|
#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
|
356
358
|
|
357
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
359
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
|
358
360
|
|
359
361
|
#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
|
360
362
|
|
361
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
362
|
-
|
363
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
|
363
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
364
364
|
|
@@ -1,10 +1,14 @@
|
|
1
1
|
#include "src/execution/index/art/art_key.cpp"
|
2
2
|
|
3
|
+
#include "src/execution/index/art/node.cpp"
|
4
|
+
|
5
|
+
#include "src/execution/index/art/fixed_size_allocator.cpp"
|
6
|
+
|
3
7
|
#include "src/execution/index/art/iterator.cpp"
|
4
8
|
|
5
9
|
#include "src/execution/index/art/leaf.cpp"
|
6
10
|
|
7
|
-
#include "src/execution/index/art/
|
11
|
+
#include "src/execution/index/art/leaf_segment.cpp"
|
8
12
|
|
9
13
|
#include "src/execution/index/art/node4.cpp"
|
10
14
|
|
@@ -18,5 +22,7 @@
|
|
18
22
|
|
19
23
|
#include "src/execution/index/art/prefix.cpp"
|
20
24
|
|
25
|
+
#include "src/execution/index/art/prefix_segment.cpp"
|
26
|
+
|
21
27
|
#include "src/execution/index/art/art.cpp"
|
22
28
|
|