duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/duckdb.js +11 -1
- package/package.json +3 -1
- package/src/connection.cpp +48 -7
- package/src/duckdb/src/catalog/catalog.cpp +5 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
- package/src/duckdb/src/common/enum_util.cpp +24 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/table/arrow.cpp +18 -13
- package/src/duckdb/src/function/table/read_csv.cpp +3 -130
- package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
- package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/main/connection.cpp +4 -6
- package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
- package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
- package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/data_pointer.cpp +20 -0
- package/src/duckdb/src/storage/local_storage.cpp +3 -7
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +5 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
- package/src/duckdb/src/storage/table/column_data.cpp +17 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
- package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb_node.hpp +1 -0
- package/test/close_hang.test.ts +39 -0
@@ -14,13 +14,16 @@
|
|
14
14
|
#include "duckdb/function/compression_function.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
|
+
struct UncompressedStringSegmentState;
|
17
18
|
|
18
19
|
class OverflowStringWriter {
|
19
20
|
public:
|
20
21
|
virtual ~OverflowStringWriter() {
|
21
22
|
}
|
22
23
|
|
23
|
-
virtual void WriteString(string_t string, block_id_t &result_block,
|
24
|
+
virtual void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
|
25
|
+
int32_t &result_offset) = 0;
|
26
|
+
virtual void Flush() = 0;
|
24
27
|
};
|
25
28
|
|
26
29
|
struct StringBlock {
|
@@ -43,15 +46,35 @@ struct string_location_t {
|
|
43
46
|
};
|
44
47
|
|
45
48
|
struct UncompressedStringSegmentState : public CompressedSegmentState {
|
46
|
-
~UncompressedStringSegmentState();
|
49
|
+
~UncompressedStringSegmentState() override;
|
47
50
|
|
48
51
|
//! The string block holding strings that do not fit in the main block
|
49
52
|
//! FIXME: this should be replaced by a heap that also allows freeing of unused strings
|
50
53
|
unique_ptr<StringBlock> head;
|
54
|
+
//! Map of block id to string block
|
55
|
+
unordered_map<block_id_t, reference<StringBlock>> overflow_blocks;
|
51
56
|
//! Overflow string writer (if any), if not set overflow strings will be written to memory blocks
|
52
57
|
unique_ptr<OverflowStringWriter> overflow_writer;
|
53
|
-
//!
|
54
|
-
|
58
|
+
//! The set of overflow blocks written to disk (if any)
|
59
|
+
vector<block_id_t> on_disk_blocks;
|
60
|
+
|
61
|
+
public:
|
62
|
+
shared_ptr<BlockHandle> GetHandle(BlockManager &manager, block_id_t block_id);
|
63
|
+
|
64
|
+
void RegisterBlock(BlockManager &manager, block_id_t block_id);
|
65
|
+
|
66
|
+
string GetSegmentInfo() const override {
|
67
|
+
if (on_disk_blocks.empty()) {
|
68
|
+
return "";
|
69
|
+
}
|
70
|
+
string result = StringUtil::Join(on_disk_blocks, on_disk_blocks.size(), ", ",
|
71
|
+
[&](block_id_t block) { return to_string(block); });
|
72
|
+
return "Overflow String Block Ids: " + result;
|
73
|
+
}
|
74
|
+
|
75
|
+
private:
|
76
|
+
mutex block_lock;
|
77
|
+
unordered_map<block_id_t, shared_ptr<BlockHandle>> handles;
|
55
78
|
};
|
56
79
|
|
57
80
|
} // namespace duckdb
|
@@ -30,10 +30,12 @@ public:
|
|
30
30
|
static constexpr idx_t STRING_SPACE = Storage::BLOCK_SIZE - sizeof(block_id_t);
|
31
31
|
|
32
32
|
public:
|
33
|
-
void WriteString(string_t string, block_id_t &result_block,
|
33
|
+
void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
|
34
|
+
int32_t &result_offset) override;
|
35
|
+
void Flush() override;
|
34
36
|
|
35
37
|
private:
|
36
|
-
void AllocateNewBlock(block_id_t new_block_id);
|
38
|
+
void AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id);
|
37
39
|
};
|
38
40
|
|
39
41
|
} // namespace duckdb
|
@@ -20,8 +20,27 @@ namespace duckdb {
|
|
20
20
|
class Serializer;
|
21
21
|
class Deserializer;
|
22
22
|
|
23
|
+
struct ColumnSegmentState {
|
24
|
+
virtual ~ColumnSegmentState() {
|
25
|
+
}
|
26
|
+
|
27
|
+
virtual void Serialize(Serializer &serializer) const = 0;
|
28
|
+
static unique_ptr<ColumnSegmentState> Deserialize(Deserializer &deserializer);
|
29
|
+
|
30
|
+
template <class TARGET>
|
31
|
+
TARGET &Cast() {
|
32
|
+
D_ASSERT(dynamic_cast<TARGET *>(this));
|
33
|
+
return reinterpret_cast<TARGET &>(*this);
|
34
|
+
}
|
35
|
+
template <class TARGET>
|
36
|
+
const TARGET &Cast() const {
|
37
|
+
D_ASSERT(dynamic_cast<const TARGET *>(this));
|
38
|
+
return reinterpret_cast<const TARGET &>(*this);
|
39
|
+
}
|
40
|
+
};
|
41
|
+
|
23
42
|
struct DataPointer {
|
24
|
-
DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
|
43
|
+
explicit DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
|
25
44
|
}
|
26
45
|
|
27
46
|
uint64_t row_start;
|
@@ -30,6 +49,8 @@ struct DataPointer {
|
|
30
49
|
CompressionType compression_type;
|
31
50
|
//! Type-specific statistics of the segment
|
32
51
|
BaseStatistics statistics;
|
52
|
+
//! Serialized segment state
|
53
|
+
unique_ptr<ColumnSegmentState> segment_state;
|
33
54
|
|
34
55
|
void Serialize(Serializer &serializer) const;
|
35
56
|
static DataPointer Deserialize(Deserializer &source);
|
@@ -16,6 +16,7 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
class DatabaseInstance;
|
19
|
+
struct MetadataBlockInfo;
|
19
20
|
|
20
21
|
struct MetadataBlock {
|
21
22
|
shared_ptr<BlockHandle> block;
|
@@ -66,6 +67,7 @@ public:
|
|
66
67
|
void MarkBlocksAsModified();
|
67
68
|
void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
|
68
69
|
|
70
|
+
vector<MetadataBlockInfo> GetMetadataInfo() const;
|
69
71
|
idx_t BlockCount();
|
70
72
|
|
71
73
|
void Write(WriteStream &sink);
|
@@ -68,6 +68,7 @@ public:
|
|
68
68
|
virtual bool IsCheckpointClean(MetaBlockPointer checkpoint_id) = 0;
|
69
69
|
virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0;
|
70
70
|
virtual DatabaseSize GetDatabaseSize() = 0;
|
71
|
+
virtual vector<MetadataBlockInfo> GetMetadataInfo() = 0;
|
71
72
|
virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
|
72
73
|
|
73
74
|
protected:
|
@@ -112,6 +113,7 @@ public:
|
|
112
113
|
bool IsCheckpointClean(MetaBlockPointer checkpoint_id) override;
|
113
114
|
void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override;
|
114
115
|
DatabaseSize GetDatabaseSize() override;
|
116
|
+
vector<MetadataBlockInfo> GetMetadataInfo() override;
|
115
117
|
shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
|
116
118
|
|
117
119
|
protected:
|
@@ -56,7 +56,8 @@ public:
|
|
56
56
|
static void StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result);
|
57
57
|
static void StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
|
58
58
|
idx_t result_idx);
|
59
|
-
static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id
|
59
|
+
static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id,
|
60
|
+
optional_ptr<ColumnSegmentState> segment_state);
|
60
61
|
|
61
62
|
static unique_ptr<CompressionAppendState> StringInitAppend(ColumnSegment &segment) {
|
62
63
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
@@ -194,5 +195,9 @@ public:
|
|
194
195
|
data_ptr_t baseptr, int32_t dict_offset, uint32_t string_length);
|
195
196
|
static string_t FetchString(ColumnSegment &segment, StringDictionaryContainer dict, Vector &result,
|
196
197
|
data_ptr_t baseptr, string_location_t location, uint32_t string_length);
|
198
|
+
|
199
|
+
static unique_ptr<ColumnSegmentState> SerializeState(ColumnSegment &segment);
|
200
|
+
static unique_ptr<ColumnSegmentState> DeserializeState(Deserializer &deserializer);
|
201
|
+
static void CleanupState(ColumnSegment &segment);
|
197
202
|
};
|
198
203
|
} // namespace duckdb
|
@@ -57,7 +57,8 @@ public:
|
|
57
57
|
static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
|
58
58
|
block_id_t id, idx_t offset, const LogicalType &type_p,
|
59
59
|
idx_t start, idx_t count, CompressionType compression_type,
|
60
|
-
BaseStatistics statistics
|
60
|
+
BaseStatistics statistics,
|
61
|
+
unique_ptr<ColumnSegmentState> segment_state);
|
61
62
|
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
|
62
63
|
idx_t segment_size = Storage::BLOCK_SIZE);
|
63
64
|
static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
|
@@ -118,14 +119,17 @@ public:
|
|
118
119
|
return row_index - this->start;
|
119
120
|
}
|
120
121
|
|
121
|
-
CompressedSegmentState
|
122
|
+
optional_ptr<CompressedSegmentState> GetSegmentState() {
|
122
123
|
return segment_state.get();
|
123
124
|
}
|
124
125
|
|
126
|
+
void CommitDropSegment();
|
127
|
+
|
125
128
|
public:
|
126
129
|
ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
|
127
130
|
idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics,
|
128
|
-
block_id_t block_id, idx_t offset, idx_t segment_size
|
131
|
+
block_id_t block_id, idx_t offset, idx_t segment_size,
|
132
|
+
unique_ptr<ColumnSegmentState> segment_state = nullptr);
|
129
133
|
ColumnSegment(ColumnSegment &other, idx_t start);
|
130
134
|
|
131
135
|
private:
|
@@ -219,14 +219,12 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
|
|
219
219
|
}
|
220
220
|
|
221
221
|
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
|
222
|
-
|
223
|
-
return ReadCSV(csv_file, options);
|
222
|
+
named_parameter_map_t options;
|
223
|
+
return ReadCSV(csv_file, std::move(options));
|
224
224
|
}
|
225
225
|
|
226
|
-
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file,
|
227
|
-
|
228
|
-
options.auto_detect = true;
|
229
|
-
return make_shared<ReadCSVRelation>(context, csv_file, options);
|
226
|
+
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, named_parameter_map_t &&options) {
|
227
|
+
return make_shared<ReadCSVRelation>(context, csv_file, std::move(options));
|
230
228
|
}
|
231
229
|
|
232
230
|
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
|
@@ -158,11 +158,12 @@ void WriteExtensionFileToDisk(FileSystem &fs, const string &path, void *data, id
|
|
158
158
|
}
|
159
159
|
|
160
160
|
string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const ClientConfig> client_config, const string &repository) {
|
161
|
-
string default_endpoint = "http://extensions.duckdb.org";
|
162
161
|
string versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension";
|
163
162
|
#ifdef WASM_LOADABLE_EXTENSIONS
|
163
|
+
string default_endpoint = "https://extensions.duckdb.org";
|
164
164
|
versioned_path = "/duckdb-wasm" + versioned_path + ".wasm";
|
165
165
|
#else
|
166
|
+
string default_endpoint = "http://extensions.duckdb.org";
|
166
167
|
versioned_path = versioned_path + ".gz";
|
167
168
|
#endif
|
168
169
|
string custom_endpoint = client_config ? client_config->custom_extension_repo : string();
|
@@ -1,6 +1,5 @@
|
|
1
1
|
#include "duckdb/main/relation/read_csv_relation.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/common/string_util.hpp"
|
4
3
|
#include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
|
5
4
|
#include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
|
6
5
|
#include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
|
@@ -8,6 +7,9 @@
|
|
8
7
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
9
8
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
10
9
|
#include "duckdb/parser/expression/function_expression.hpp"
|
10
|
+
#include "duckdb/common/string_util.hpp"
|
11
|
+
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
|
12
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
11
13
|
#include "duckdb/parser/expression/star_expression.hpp"
|
12
14
|
#include "duckdb/parser/query_node/select_node.hpp"
|
13
15
|
#include "duckdb/parser/tableref/basetableref.hpp"
|
@@ -34,8 +36,8 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
|
|
34
36
|
AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
|
35
37
|
}
|
36
38
|
|
37
|
-
ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
|
38
|
-
|
39
|
+
ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
|
40
|
+
named_parameter_map_t &&options, string alias_p)
|
39
41
|
: TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
|
40
42
|
auto_detect(true) {
|
41
43
|
|
@@ -43,12 +45,24 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
|
|
43
45
|
alias = StringUtil::Split(csv_file, ".")[0];
|
44
46
|
}
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
auto
|
48
|
+
auto files = MultiFileReader::GetFileList(*context, csv_file, "CSV");
|
49
|
+
D_ASSERT(!files.empty());
|
50
|
+
|
51
|
+
auto &file_name = files[0];
|
52
|
+
options["auto_detect"] = Value::BOOLEAN(true);
|
53
|
+
CSVReaderOptions csv_options;
|
54
|
+
csv_options.file_path = file_name;
|
55
|
+
vector<string> empty;
|
56
|
+
|
57
|
+
vector<LogicalType> unused_types;
|
58
|
+
vector<string> unused_names;
|
59
|
+
csv_options.FromNamedParameters(options, *context, unused_types, unused_names);
|
60
|
+
// Run the auto-detect, populating the options with the detected settings
|
61
|
+
|
62
|
+
auto bm_file_handle = BaseCSVReader::OpenCSV(*context, csv_options);
|
63
|
+
auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), csv_options);
|
50
64
|
CSVStateMachineCache state_machine_cache;
|
51
|
-
CSVSniffer sniffer(
|
65
|
+
CSVSniffer sniffer(csv_options, buffer_manager, state_machine_cache);
|
52
66
|
auto sniffer_result = sniffer.SniffCSV();
|
53
67
|
auto &types = sniffer_result.return_types;
|
54
68
|
auto &names = sniffer_result.names;
|
@@ -56,7 +70,12 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
|
|
56
70
|
columns.emplace_back(names[i], types[i]);
|
57
71
|
}
|
58
72
|
|
59
|
-
|
73
|
+
//! Capture the options potentially set/altered by the auto detection phase
|
74
|
+
csv_options.ToNamedParameters(options);
|
75
|
+
|
76
|
+
// No need to auto-detect again
|
77
|
+
options["auto_detect"] = Value::BOOLEAN(false);
|
78
|
+
SetNamedParameters(std::move(options));
|
60
79
|
}
|
61
80
|
|
62
81
|
string ReadCSVRelation::GetAlias() {
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "duckdb/main/client_context.hpp"
|
10
10
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
11
11
|
#include "duckdb/parser/expression/columnref_expression.hpp"
|
12
|
+
#include "duckdb/common/shared_ptr.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
|
@@ -16,7 +17,12 @@ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument
|
|
16
17
|
named_parameters[name] = std::move(argument);
|
17
18
|
}
|
18
19
|
|
19
|
-
TableFunctionRelation::
|
20
|
+
void TableFunctionRelation::SetNamedParameters(named_parameter_map_t &&options) {
|
21
|
+
D_ASSERT(named_parameters.empty());
|
22
|
+
named_parameters = std::move(options);
|
23
|
+
}
|
24
|
+
|
25
|
+
TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
|
20
26
|
vector<Value> parameters_p, named_parameter_map_t named_parameters,
|
21
27
|
shared_ptr<Relation> input_relation_p, bool auto_init)
|
22
28
|
: Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
|
@@ -25,7 +31,7 @@ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext
|
|
25
31
|
InitializeColumns();
|
26
32
|
}
|
27
33
|
|
28
|
-
TableFunctionRelation::TableFunctionRelation(const
|
34
|
+
TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
|
29
35
|
vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
|
30
36
|
bool auto_init)
|
31
37
|
: Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
|
@@ -86,7 +86,7 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
|
|
86
86
|
this->bound_aggregate = true;
|
87
87
|
unique_ptr<Expression> bound_filter;
|
88
88
|
AggregateBinder aggregate_binder(binder, context);
|
89
|
-
string error
|
89
|
+
string error;
|
90
90
|
|
91
91
|
// Now we bind the filter (if any)
|
92
92
|
if (aggr.filter) {
|
@@ -167,9 +167,6 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
|
|
167
167
|
} else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
|
168
168
|
return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
|
169
169
|
}
|
170
|
-
if (!filter_error.empty()) {
|
171
|
-
return BindResult(filter_error);
|
172
|
-
}
|
173
170
|
|
174
171
|
if (aggr.filter) {
|
175
172
|
auto &child = BoundExpression::GetExpression(*aggr.filter);
|
@@ -20,10 +20,7 @@ PartialBlockAllocation RowGroupWriter::GetBlockAllocation(uint32_t segment_size)
|
|
20
20
|
void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state,
|
21
21
|
Serializer &serializer) {
|
22
22
|
const auto &data_pointers = column_checkpoint_state.data_pointers;
|
23
|
-
serializer.
|
24
|
-
auto &data_pointer = data_pointers[i];
|
25
|
-
list.WriteElement(data_pointer);
|
26
|
-
});
|
23
|
+
serializer.WriteProperty(100, "data_pointers", data_pointers);
|
27
24
|
}
|
28
25
|
|
29
26
|
MetadataWriter &SingleFileRowGroupWriter::GetPayloadWriter() {
|
@@ -10,19 +10,42 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
|
|
10
10
|
}
|
11
11
|
|
12
12
|
WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
|
13
|
-
|
14
|
-
|
13
|
+
// verify that the overflow writer has been flushed
|
14
|
+
D_ASSERT(Exception::UncaughtException() || offset == 0);
|
15
|
+
}
|
16
|
+
|
17
|
+
shared_ptr<BlockHandle> UncompressedStringSegmentState::GetHandle(BlockManager &manager, block_id_t block_id) {
|
18
|
+
lock_guard<mutex> lock(block_lock);
|
19
|
+
auto entry = handles.find(block_id);
|
20
|
+
if (entry != handles.end()) {
|
21
|
+
return entry->second;
|
15
22
|
}
|
23
|
+
auto result = manager.RegisterBlock(block_id);
|
24
|
+
handles.insert(make_pair(block_id, result));
|
25
|
+
return result;
|
16
26
|
}
|
17
27
|
|
18
|
-
void
|
28
|
+
void UncompressedStringSegmentState::RegisterBlock(BlockManager &manager, block_id_t block_id) {
|
29
|
+
lock_guard<mutex> lock(block_lock);
|
30
|
+
auto entry = handles.find(block_id);
|
31
|
+
if (entry != handles.end()) {
|
32
|
+
throw InternalException("UncompressedStringSegmentState::RegisterBlock - block id %llu already exists",
|
33
|
+
block_id);
|
34
|
+
}
|
35
|
+
auto result = manager.RegisterBlock(block_id);
|
36
|
+
handles.insert(make_pair(block_id, std::move(result)));
|
37
|
+
on_disk_blocks.push_back(block_id);
|
38
|
+
}
|
39
|
+
|
40
|
+
void WriteOverflowStringsToDisk::WriteString(UncompressedStringSegmentState &state, string_t string,
|
41
|
+
block_id_t &result_block, int32_t &result_offset) {
|
19
42
|
auto &buffer_manager = block_manager.buffer_manager;
|
20
43
|
if (!handle.IsValid()) {
|
21
44
|
handle = buffer_manager.Allocate(Storage::BLOCK_SIZE);
|
22
45
|
}
|
23
46
|
// first write the length of the string
|
24
47
|
if (block_id == INVALID_BLOCK || offset + 2 * sizeof(uint32_t) >= STRING_SPACE) {
|
25
|
-
AllocateNewBlock(block_manager.GetFreeBlockId());
|
48
|
+
AllocateNewBlock(state, block_manager.GetFreeBlockId());
|
26
49
|
}
|
27
50
|
result_block = block_id;
|
28
51
|
result_offset = offset;
|
@@ -55,23 +78,37 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
|
|
55
78
|
strptr += to_write;
|
56
79
|
}
|
57
80
|
if (remaining > 0) {
|
81
|
+
D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
|
58
82
|
// there is still remaining stuff to write
|
59
|
-
// first get the new block id and write it to the end of the previous block
|
60
|
-
auto new_block_id = block_manager.GetFreeBlockId();
|
61
|
-
Store<block_id_t>(new_block_id, data_ptr + offset);
|
62
83
|
// now write the current block to disk and allocate a new block
|
63
|
-
AllocateNewBlock(
|
84
|
+
AllocateNewBlock(state, block_manager.GetFreeBlockId());
|
64
85
|
}
|
65
86
|
}
|
66
87
|
}
|
67
88
|
|
68
|
-
void WriteOverflowStringsToDisk::
|
89
|
+
void WriteOverflowStringsToDisk::Flush() {
|
90
|
+
if (block_id != INVALID_BLOCK && offset > 0) {
|
91
|
+
// zero-initialize the empty part of the overflow string buffer (if any)
|
92
|
+
if (offset < STRING_SPACE) {
|
93
|
+
memset(handle.Ptr() + offset, 0, STRING_SPACE - offset);
|
94
|
+
}
|
95
|
+
// write to disk
|
96
|
+
block_manager.Write(handle.GetFileBuffer(), block_id);
|
97
|
+
}
|
98
|
+
block_id = INVALID_BLOCK;
|
99
|
+
offset = 0;
|
100
|
+
}
|
101
|
+
|
102
|
+
void WriteOverflowStringsToDisk::AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id) {
|
69
103
|
if (block_id != INVALID_BLOCK) {
|
70
104
|
// there is an old block, write it first
|
71
|
-
|
105
|
+
// write the new block id at the end of the previous block
|
106
|
+
Store<block_id_t>(new_block_id, handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
|
107
|
+
Flush();
|
72
108
|
}
|
73
109
|
offset = 0;
|
74
110
|
block_id = new_block_id;
|
111
|
+
state.RegisterBlock(block_manager, new_block_id);
|
75
112
|
}
|
76
113
|
|
77
114
|
} // namespace duckdb
|
@@ -136,8 +136,6 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
|
|
136
136
|
|
137
137
|
// truncate the file
|
138
138
|
block_manager.Truncate();
|
139
|
-
|
140
|
-
metadata_manager.MarkBlocksAsModified();
|
141
139
|
}
|
142
140
|
|
143
141
|
void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {
|
@@ -65,7 +65,7 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
|
|
65
65
|
auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start);
|
66
66
|
if (type.InternalType() == PhysicalType::VARCHAR) {
|
67
67
|
auto &state = compressed_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
68
|
-
state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.
|
68
|
+
state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetRowGroup().GetBlockManager());
|
69
69
|
}
|
70
70
|
current_segment = std::move(compressed_segment);
|
71
71
|
current_segment->InitializeAppend(append_state);
|
@@ -73,6 +73,11 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
|
|
73
73
|
|
74
74
|
void UncompressedCompressState::FlushSegment(idx_t segment_size) {
|
75
75
|
auto &state = checkpointer.GetCheckpointState();
|
76
|
+
if (current_segment->type.InternalType() == PhysicalType::VARCHAR) {
|
77
|
+
auto &segment_state = current_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
78
|
+
segment_state.overflow_writer->Flush();
|
79
|
+
segment_state.overflow_writer.reset();
|
80
|
+
}
|
76
81
|
state.FlushSegment(std::move(current_segment), segment_size);
|
77
82
|
}
|
78
83
|
|