duckdb 0.8.2-dev4474.0 → 0.8.2-dev4572.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/duckdb.js +11 -1
- package/package.json +3 -1
- package/src/connection.cpp +48 -7
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +3 -0
- package/src/duckdb/src/catalog/catalog.cpp +5 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
- package/src/duckdb/src/execution/column_binding_resolver.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/table/arrow.cpp +18 -13
- package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
- package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_projection.cpp +0 -1
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
- package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/data_pointer.cpp +20 -0
- package/src/duckdb/src/storage/local_storage.cpp +3 -7
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +5 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
- package/src/duckdb/src/storage/table/column_data.cpp +17 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
- package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb_node.hpp +1 -0
- package/test/close_hang.test.ts +39 -0
@@ -14,6 +14,7 @@
|
|
14
14
|
#include "duckdb/common/map.hpp"
|
15
15
|
#include "duckdb/storage/storage_info.hpp"
|
16
16
|
#include "duckdb/common/mutex.hpp"
|
17
|
+
#include "duckdb/storage/data_pointer.hpp"
|
17
18
|
|
18
19
|
namespace duckdb {
|
19
20
|
class DatabaseInstance;
|
@@ -21,6 +22,7 @@ class ColumnData;
|
|
21
22
|
class ColumnDataCheckpointer;
|
22
23
|
class ColumnSegment;
|
23
24
|
class SegmentStatistics;
|
25
|
+
struct ColumnSegmentState;
|
24
26
|
|
25
27
|
struct ColumnFetchState;
|
26
28
|
struct ColumnScanState;
|
@@ -62,6 +64,11 @@ struct CompressedSegmentState {
|
|
62
64
|
virtual ~CompressedSegmentState() {
|
63
65
|
}
|
64
66
|
|
67
|
+
//! Display info for PRAGMA storage_info
|
68
|
+
virtual string GetSegmentInfo() const { // LCOV_EXCL_START
|
69
|
+
return "";
|
70
|
+
} // LCOV_EXCL_STOP
|
71
|
+
|
65
72
|
template <class TARGET>
|
66
73
|
TARGET &Cast() {
|
67
74
|
D_ASSERT(dynamic_cast<TARGET *>(this));
|
@@ -75,7 +82,7 @@ struct CompressedSegmentState {
|
|
75
82
|
};
|
76
83
|
|
77
84
|
struct CompressionAppendState {
|
78
|
-
CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
|
85
|
+
explicit CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
|
79
86
|
}
|
80
87
|
virtual ~CompressionAppendState() {
|
81
88
|
}
|
@@ -139,13 +146,24 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
|
|
139
146
|
//===--------------------------------------------------------------------===//
|
140
147
|
// Append (optional)
|
141
148
|
//===--------------------------------------------------------------------===//
|
142
|
-
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
|
149
|
+
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
|
150
|
+
ColumnSegment &segment, block_id_t block_id, optional_ptr<ColumnSegmentState> segment_state);
|
143
151
|
typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
|
144
152
|
typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
|
145
153
|
SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
|
146
154
|
typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
|
147
155
|
typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
|
148
156
|
|
157
|
+
//===--------------------------------------------------------------------===//
|
158
|
+
// Serialization (optional)
|
159
|
+
//===--------------------------------------------------------------------===//
|
160
|
+
//! Function prototype for serializing the segment state
|
161
|
+
typedef unique_ptr<ColumnSegmentState> (*compression_serialize_state_t)(ColumnSegment &segment);
|
162
|
+
//! Function prototype for deserializing the segment state
|
163
|
+
typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deserializer &deserializer);
|
164
|
+
//! Function prototype for cleaning up the segment state when the column data is dropped
|
165
|
+
typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
|
166
|
+
|
149
167
|
class CompressionFunction {
|
150
168
|
public:
|
151
169
|
CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
|
@@ -157,12 +175,16 @@ public:
|
|
157
175
|
compression_init_segment_t init_segment = nullptr,
|
158
176
|
compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
|
159
177
|
compression_finalize_append_t finalize_append = nullptr,
|
160
|
-
compression_revert_append_t revert_append = nullptr
|
178
|
+
compression_revert_append_t revert_append = nullptr,
|
179
|
+
compression_serialize_state_t serialize_state = nullptr,
|
180
|
+
compression_deserialize_state_t deserialize_state = nullptr,
|
181
|
+
compression_cleanup_state_t cleanup_state = nullptr)
|
161
182
|
: type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
|
162
183
|
init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
|
163
184
|
init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
|
164
185
|
init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
|
165
|
-
revert_append(revert_append)
|
186
|
+
revert_append(revert_append), serialize_state(serialize_state), deserialize_state(deserialize_state),
|
187
|
+
cleanup_state(cleanup_state) {
|
166
188
|
}
|
167
189
|
|
168
190
|
//! Compression type
|
@@ -218,6 +240,16 @@ public:
|
|
218
240
|
compression_finalize_append_t finalize_append;
|
219
241
|
//! Revert append (optional)
|
220
242
|
compression_revert_append_t revert_append;
|
243
|
+
|
244
|
+
// State serialize functions
|
245
|
+
//! This is only necessary if the segment state has information that must be written to disk in the metadata
|
246
|
+
|
247
|
+
//! Serialize the segment state to the metadata (optional)
|
248
|
+
compression_serialize_state_t serialize_state;
|
249
|
+
//! Deserialize the segment state to the metadata (optional)
|
250
|
+
compression_deserialize_state_t deserialize_state;
|
251
|
+
//! Cleanup the segment state (optional)
|
252
|
+
compression_cleanup_state_t cleanup_state;
|
221
253
|
};
|
222
254
|
|
223
255
|
//! The set of compression functions
|
@@ -129,6 +129,8 @@ public:
|
|
129
129
|
|
130
130
|
//! Scan Function
|
131
131
|
static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
|
132
|
+
static void PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p, vector<string> &names,
|
133
|
+
vector<LogicalType> &return_types);
|
132
134
|
|
133
135
|
protected:
|
134
136
|
//! Defines Maximum Number of Threads
|
@@ -25,6 +25,10 @@ struct PragmaStorageInfo {
|
|
25
25
|
static void RegisterFunction(BuiltinFunctions &set);
|
26
26
|
};
|
27
27
|
|
28
|
+
struct PragmaMetadataInfo {
|
29
|
+
static void RegisterFunction(BuiltinFunctions &set);
|
30
|
+
};
|
31
|
+
|
28
32
|
struct PragmaLastProfilingOutput {
|
29
33
|
static void RegisterFunction(BuiltinFunctions &set);
|
30
34
|
};
|
@@ -13,7 +13,7 @@
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
15
|
// The Empty_needle_removal Optimization rule folds some foldable ConstantExpression
|
16
|
-
//(e.g.: PREFIX('xyz', '') is TRUE, PREFIX(NULL, '') is NULL, so rewrite PREFIX(x, '') to (
|
16
|
+
//(e.g.: PREFIX('xyz', '') is TRUE, PREFIX(NULL, '') is NULL, so rewrite PREFIX(x, '') to TRUE_OR_NULL(x)
|
17
17
|
class EmptyNeedleRemovalRule : public Rule {
|
18
18
|
public:
|
19
19
|
explicit EmptyNeedleRemovalRule(ExpressionRewriter &rewriter);
|
@@ -14,13 +14,16 @@
|
|
14
14
|
#include "duckdb/function/compression_function.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
|
+
struct UncompressedStringSegmentState;
|
17
18
|
|
18
19
|
class OverflowStringWriter {
|
19
20
|
public:
|
20
21
|
virtual ~OverflowStringWriter() {
|
21
22
|
}
|
22
23
|
|
23
|
-
virtual void WriteString(string_t string, block_id_t &result_block,
|
24
|
+
virtual void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
|
25
|
+
int32_t &result_offset) = 0;
|
26
|
+
virtual void Flush() = 0;
|
24
27
|
};
|
25
28
|
|
26
29
|
struct StringBlock {
|
@@ -43,15 +46,35 @@ struct string_location_t {
|
|
43
46
|
};
|
44
47
|
|
45
48
|
struct UncompressedStringSegmentState : public CompressedSegmentState {
|
46
|
-
~UncompressedStringSegmentState();
|
49
|
+
~UncompressedStringSegmentState() override;
|
47
50
|
|
48
51
|
//! The string block holding strings that do not fit in the main block
|
49
52
|
//! FIXME: this should be replaced by a heap that also allows freeing of unused strings
|
50
53
|
unique_ptr<StringBlock> head;
|
54
|
+
//! Map of block id to string block
|
55
|
+
unordered_map<block_id_t, reference<StringBlock>> overflow_blocks;
|
51
56
|
//! Overflow string writer (if any), if not set overflow strings will be written to memory blocks
|
52
57
|
unique_ptr<OverflowStringWriter> overflow_writer;
|
53
|
-
//!
|
54
|
-
|
58
|
+
//! The set of overflow blocks written to disk (if any)
|
59
|
+
vector<block_id_t> on_disk_blocks;
|
60
|
+
|
61
|
+
public:
|
62
|
+
shared_ptr<BlockHandle> GetHandle(BlockManager &manager, block_id_t block_id);
|
63
|
+
|
64
|
+
void RegisterBlock(BlockManager &manager, block_id_t block_id);
|
65
|
+
|
66
|
+
string GetSegmentInfo() const override {
|
67
|
+
if (on_disk_blocks.empty()) {
|
68
|
+
return "";
|
69
|
+
}
|
70
|
+
string result = StringUtil::Join(on_disk_blocks, on_disk_blocks.size(), ", ",
|
71
|
+
[&](block_id_t block) { return to_string(block); });
|
72
|
+
return "Overflow String Block Ids: " + result;
|
73
|
+
}
|
74
|
+
|
75
|
+
private:
|
76
|
+
mutex block_lock;
|
77
|
+
unordered_map<block_id_t, shared_ptr<BlockHandle>> handles;
|
55
78
|
};
|
56
79
|
|
57
80
|
} // namespace duckdb
|
@@ -30,10 +30,12 @@ public:
|
|
30
30
|
static constexpr idx_t STRING_SPACE = Storage::BLOCK_SIZE - sizeof(block_id_t);
|
31
31
|
|
32
32
|
public:
|
33
|
-
void WriteString(string_t string, block_id_t &result_block,
|
33
|
+
void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
|
34
|
+
int32_t &result_offset) override;
|
35
|
+
void Flush() override;
|
34
36
|
|
35
37
|
private:
|
36
|
-
void AllocateNewBlock(block_id_t new_block_id);
|
38
|
+
void AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id);
|
37
39
|
};
|
38
40
|
|
39
41
|
} // namespace duckdb
|
@@ -20,8 +20,27 @@ namespace duckdb {
|
|
20
20
|
class Serializer;
|
21
21
|
class Deserializer;
|
22
22
|
|
23
|
+
struct ColumnSegmentState {
|
24
|
+
virtual ~ColumnSegmentState() {
|
25
|
+
}
|
26
|
+
|
27
|
+
virtual void Serialize(Serializer &serializer) const = 0;
|
28
|
+
static unique_ptr<ColumnSegmentState> Deserialize(Deserializer &deserializer);
|
29
|
+
|
30
|
+
template <class TARGET>
|
31
|
+
TARGET &Cast() {
|
32
|
+
D_ASSERT(dynamic_cast<TARGET *>(this));
|
33
|
+
return reinterpret_cast<TARGET &>(*this);
|
34
|
+
}
|
35
|
+
template <class TARGET>
|
36
|
+
const TARGET &Cast() const {
|
37
|
+
D_ASSERT(dynamic_cast<const TARGET *>(this));
|
38
|
+
return reinterpret_cast<const TARGET &>(*this);
|
39
|
+
}
|
40
|
+
};
|
41
|
+
|
23
42
|
struct DataPointer {
|
24
|
-
DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
|
43
|
+
explicit DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
|
25
44
|
}
|
26
45
|
|
27
46
|
uint64_t row_start;
|
@@ -30,6 +49,8 @@ struct DataPointer {
|
|
30
49
|
CompressionType compression_type;
|
31
50
|
//! Type-specific statistics of the segment
|
32
51
|
BaseStatistics statistics;
|
52
|
+
//! Serialized segment state
|
53
|
+
unique_ptr<ColumnSegmentState> segment_state;
|
33
54
|
|
34
55
|
void Serialize(Serializer &serializer) const;
|
35
56
|
static DataPointer Deserialize(Deserializer &source);
|
@@ -16,6 +16,7 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
class DatabaseInstance;
|
19
|
+
struct MetadataBlockInfo;
|
19
20
|
|
20
21
|
struct MetadataBlock {
|
21
22
|
shared_ptr<BlockHandle> block;
|
@@ -66,6 +67,7 @@ public:
|
|
66
67
|
void MarkBlocksAsModified();
|
67
68
|
void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
|
68
69
|
|
70
|
+
vector<MetadataBlockInfo> GetMetadataInfo() const;
|
69
71
|
idx_t BlockCount();
|
70
72
|
|
71
73
|
void Write(WriteStream &sink);
|
@@ -68,6 +68,7 @@ public:
|
|
68
68
|
virtual bool IsCheckpointClean(MetaBlockPointer checkpoint_id) = 0;
|
69
69
|
virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0;
|
70
70
|
virtual DatabaseSize GetDatabaseSize() = 0;
|
71
|
+
virtual vector<MetadataBlockInfo> GetMetadataInfo() = 0;
|
71
72
|
virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
|
72
73
|
|
73
74
|
protected:
|
@@ -112,6 +113,7 @@ public:
|
|
112
113
|
bool IsCheckpointClean(MetaBlockPointer checkpoint_id) override;
|
113
114
|
void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override;
|
114
115
|
DatabaseSize GetDatabaseSize() override;
|
116
|
+
vector<MetadataBlockInfo> GetMetadataInfo() override;
|
115
117
|
shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
|
116
118
|
|
117
119
|
protected:
|
@@ -56,7 +56,8 @@ public:
|
|
56
56
|
static void StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result);
|
57
57
|
static void StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
|
58
58
|
idx_t result_idx);
|
59
|
-
static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id
|
59
|
+
static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id,
|
60
|
+
optional_ptr<ColumnSegmentState> segment_state);
|
60
61
|
|
61
62
|
static unique_ptr<CompressionAppendState> StringInitAppend(ColumnSegment &segment) {
|
62
63
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
@@ -194,5 +195,9 @@ public:
|
|
194
195
|
data_ptr_t baseptr, int32_t dict_offset, uint32_t string_length);
|
195
196
|
static string_t FetchString(ColumnSegment &segment, StringDictionaryContainer dict, Vector &result,
|
196
197
|
data_ptr_t baseptr, string_location_t location, uint32_t string_length);
|
198
|
+
|
199
|
+
static unique_ptr<ColumnSegmentState> SerializeState(ColumnSegment &segment);
|
200
|
+
static unique_ptr<ColumnSegmentState> DeserializeState(Deserializer &deserializer);
|
201
|
+
static void CleanupState(ColumnSegment &segment);
|
197
202
|
};
|
198
203
|
} // namespace duckdb
|
@@ -57,7 +57,8 @@ public:
|
|
57
57
|
static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
|
58
58
|
block_id_t id, idx_t offset, const LogicalType &type_p,
|
59
59
|
idx_t start, idx_t count, CompressionType compression_type,
|
60
|
-
BaseStatistics statistics
|
60
|
+
BaseStatistics statistics,
|
61
|
+
unique_ptr<ColumnSegmentState> segment_state);
|
61
62
|
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
|
62
63
|
idx_t segment_size = Storage::BLOCK_SIZE);
|
63
64
|
static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
|
@@ -118,14 +119,17 @@ public:
|
|
118
119
|
return row_index - this->start;
|
119
120
|
}
|
120
121
|
|
121
|
-
CompressedSegmentState
|
122
|
+
optional_ptr<CompressedSegmentState> GetSegmentState() {
|
122
123
|
return segment_state.get();
|
123
124
|
}
|
124
125
|
|
126
|
+
void CommitDropSegment();
|
127
|
+
|
125
128
|
public:
|
126
129
|
ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
|
127
130
|
idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics,
|
128
|
-
block_id_t block_id, idx_t offset, idx_t segment_size
|
131
|
+
block_id_t block_id, idx_t offset, idx_t segment_size,
|
132
|
+
unique_ptr<ColumnSegmentState> segment_state = nullptr);
|
129
133
|
ColumnSegment(ColumnSegment &other, idx_t start);
|
130
134
|
|
131
135
|
private:
|
@@ -72,17 +72,21 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
|
|
72
72
|
for (auto &cond : comp_join.conditions) {
|
73
73
|
if (cond.comparison == ExpressionType::COMPARE_EQUAL) {
|
74
74
|
has_equality = true;
|
75
|
+
break;
|
75
76
|
}
|
76
77
|
}
|
77
78
|
if (!has_equality) {
|
78
79
|
break;
|
79
80
|
}
|
80
|
-
//
|
81
|
+
// visit current operator expressions so they are added to the referenced_columns
|
82
|
+
LogicalOperatorVisitor::VisitOperatorExpressions(op);
|
83
|
+
|
81
84
|
column_binding_set_t unused_bindings;
|
85
|
+
auto old_op_bindings = op.GetColumnBindings();
|
82
86
|
ExtractUnusedColumnBindings(op.children[1]->GetColumnBindings(), unused_bindings);
|
83
87
|
|
84
88
|
// now recurse into the filter and its children
|
85
|
-
|
89
|
+
LogicalOperatorVisitor::VisitOperatorChildren(op);
|
86
90
|
|
87
91
|
// then generate the projection map
|
88
92
|
GenerateProjectionMap(op.children[1]->GetColumnBindings(), unused_bindings, comp_join.right_projection_map);
|
@@ -118,12 +122,14 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
|
|
118
122
|
if (everything_referenced) {
|
119
123
|
break;
|
120
124
|
}
|
125
|
+
// first visit operator expressions to populate referenced columns
|
126
|
+
LogicalOperatorVisitor::VisitOperatorExpressions(op);
|
121
127
|
// filter, figure out which columns are not needed after the filter
|
122
128
|
column_binding_set_t unused_bindings;
|
123
129
|
ExtractUnusedColumnBindings(op.children[0]->GetColumnBindings(), unused_bindings);
|
124
130
|
|
125
131
|
// now recurse into the filter and its children
|
126
|
-
|
132
|
+
LogicalOperatorVisitor::VisitOperatorChildren(op);
|
127
133
|
|
128
134
|
// then generate the projection map
|
129
135
|
GenerateProjectionMap(op.children[0]->GetColumnBindings(), unused_bindings, filter.projection_map);
|
@@ -11,7 +11,6 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalProj
|
|
11
11
|
ReplaceWithEmptyResult(*node_ptr);
|
12
12
|
return std::move(node_stats);
|
13
13
|
}
|
14
|
-
|
15
14
|
// then propagate to each of the expressions
|
16
15
|
for (idx_t i = 0; i < proj.expressions.size(); i++) {
|
17
16
|
auto stats = PropagateExpression(proj.expressions[i]);
|
@@ -86,7 +86,7 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
|
|
86
86
|
this->bound_aggregate = true;
|
87
87
|
unique_ptr<Expression> bound_filter;
|
88
88
|
AggregateBinder aggregate_binder(binder, context);
|
89
|
-
string error
|
89
|
+
string error;
|
90
90
|
|
91
91
|
// Now we bind the filter (if any)
|
92
92
|
if (aggr.filter) {
|
@@ -167,9 +167,6 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
|
|
167
167
|
} else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
|
168
168
|
return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
|
169
169
|
}
|
170
|
-
if (!filter_error.empty()) {
|
171
|
-
return BindResult(filter_error);
|
172
|
-
}
|
173
170
|
|
174
171
|
if (aggr.filter) {
|
175
172
|
auto &child = BoundExpression::GetExpression(*aggr.filter);
|
@@ -20,10 +20,7 @@ PartialBlockAllocation RowGroupWriter::GetBlockAllocation(uint32_t segment_size)
|
|
20
20
|
void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state,
|
21
21
|
Serializer &serializer) {
|
22
22
|
const auto &data_pointers = column_checkpoint_state.data_pointers;
|
23
|
-
serializer.
|
24
|
-
auto &data_pointer = data_pointers[i];
|
25
|
-
list.WriteElement(data_pointer);
|
26
|
-
});
|
23
|
+
serializer.WriteProperty(100, "data_pointers", data_pointers);
|
27
24
|
}
|
28
25
|
|
29
26
|
MetadataWriter &SingleFileRowGroupWriter::GetPayloadWriter() {
|
@@ -10,19 +10,42 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
|
|
10
10
|
}
|
11
11
|
|
12
12
|
WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
|
13
|
-
|
14
|
-
|
13
|
+
// verify that the overflow writer has been flushed
|
14
|
+
D_ASSERT(Exception::UncaughtException() || offset == 0);
|
15
|
+
}
|
16
|
+
|
17
|
+
shared_ptr<BlockHandle> UncompressedStringSegmentState::GetHandle(BlockManager &manager, block_id_t block_id) {
|
18
|
+
lock_guard<mutex> lock(block_lock);
|
19
|
+
auto entry = handles.find(block_id);
|
20
|
+
if (entry != handles.end()) {
|
21
|
+
return entry->second;
|
15
22
|
}
|
23
|
+
auto result = manager.RegisterBlock(block_id);
|
24
|
+
handles.insert(make_pair(block_id, result));
|
25
|
+
return result;
|
16
26
|
}
|
17
27
|
|
18
|
-
void
|
28
|
+
void UncompressedStringSegmentState::RegisterBlock(BlockManager &manager, block_id_t block_id) {
|
29
|
+
lock_guard<mutex> lock(block_lock);
|
30
|
+
auto entry = handles.find(block_id);
|
31
|
+
if (entry != handles.end()) {
|
32
|
+
throw InternalException("UncompressedStringSegmentState::RegisterBlock - block id %llu already exists",
|
33
|
+
block_id);
|
34
|
+
}
|
35
|
+
auto result = manager.RegisterBlock(block_id);
|
36
|
+
handles.insert(make_pair(block_id, std::move(result)));
|
37
|
+
on_disk_blocks.push_back(block_id);
|
38
|
+
}
|
39
|
+
|
40
|
+
void WriteOverflowStringsToDisk::WriteString(UncompressedStringSegmentState &state, string_t string,
|
41
|
+
block_id_t &result_block, int32_t &result_offset) {
|
19
42
|
auto &buffer_manager = block_manager.buffer_manager;
|
20
43
|
if (!handle.IsValid()) {
|
21
44
|
handle = buffer_manager.Allocate(Storage::BLOCK_SIZE);
|
22
45
|
}
|
23
46
|
// first write the length of the string
|
24
47
|
if (block_id == INVALID_BLOCK || offset + 2 * sizeof(uint32_t) >= STRING_SPACE) {
|
25
|
-
AllocateNewBlock(block_manager.GetFreeBlockId());
|
48
|
+
AllocateNewBlock(state, block_manager.GetFreeBlockId());
|
26
49
|
}
|
27
50
|
result_block = block_id;
|
28
51
|
result_offset = offset;
|
@@ -55,23 +78,37 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
|
|
55
78
|
strptr += to_write;
|
56
79
|
}
|
57
80
|
if (remaining > 0) {
|
81
|
+
D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
|
58
82
|
// there is still remaining stuff to write
|
59
|
-
// first get the new block id and write it to the end of the previous block
|
60
|
-
auto new_block_id = block_manager.GetFreeBlockId();
|
61
|
-
Store<block_id_t>(new_block_id, data_ptr + offset);
|
62
83
|
// now write the current block to disk and allocate a new block
|
63
|
-
AllocateNewBlock(
|
84
|
+
AllocateNewBlock(state, block_manager.GetFreeBlockId());
|
64
85
|
}
|
65
86
|
}
|
66
87
|
}
|
67
88
|
|
68
|
-
void WriteOverflowStringsToDisk::
|
89
|
+
void WriteOverflowStringsToDisk::Flush() {
|
90
|
+
if (block_id != INVALID_BLOCK && offset > 0) {
|
91
|
+
// zero-initialize the empty part of the overflow string buffer (if any)
|
92
|
+
if (offset < STRING_SPACE) {
|
93
|
+
memset(handle.Ptr() + offset, 0, STRING_SPACE - offset);
|
94
|
+
}
|
95
|
+
// write to disk
|
96
|
+
block_manager.Write(handle.GetFileBuffer(), block_id);
|
97
|
+
}
|
98
|
+
block_id = INVALID_BLOCK;
|
99
|
+
offset = 0;
|
100
|
+
}
|
101
|
+
|
102
|
+
void WriteOverflowStringsToDisk::AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id) {
|
69
103
|
if (block_id != INVALID_BLOCK) {
|
70
104
|
// there is an old block, write it first
|
71
|
-
|
105
|
+
// write the new block id at the end of the previous block
|
106
|
+
Store<block_id_t>(new_block_id, handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
|
107
|
+
Flush();
|
72
108
|
}
|
73
109
|
offset = 0;
|
74
110
|
block_id = new_block_id;
|
111
|
+
state.RegisterBlock(block_manager, new_block_id);
|
75
112
|
}
|
76
113
|
|
77
114
|
} // namespace duckdb
|
@@ -136,8 +136,6 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
|
|
136
136
|
|
137
137
|
// truncate the file
|
138
138
|
block_manager.Truncate();
|
139
|
-
|
140
|
-
metadata_manager.MarkBlocksAsModified();
|
141
139
|
}
|
142
140
|
|
143
141
|
void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {
|
@@ -65,7 +65,7 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
|
|
65
65
|
auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start);
|
66
66
|
if (type.InternalType() == PhysicalType::VARCHAR) {
|
67
67
|
auto &state = compressed_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
68
|
-
state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.
|
68
|
+
state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetRowGroup().GetBlockManager());
|
69
69
|
}
|
70
70
|
current_segment = std::move(compressed_segment);
|
71
71
|
current_segment->InitializeAppend(append_state);
|
@@ -73,6 +73,11 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
|
|
73
73
|
|
74
74
|
void UncompressedCompressState::FlushSegment(idx_t segment_size) {
|
75
75
|
auto &state = checkpointer.GetCheckpointState();
|
76
|
+
if (current_segment->type.InternalType() == PhysicalType::VARCHAR) {
|
77
|
+
auto &segment_state = current_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
78
|
+
segment_state.overflow_writer->Flush();
|
79
|
+
segment_state.overflow_writer.reset();
|
80
|
+
}
|
76
81
|
state.FlushSegment(std::move(current_segment), segment_size);
|
77
82
|
}
|
78
83
|
|