duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/lib/duckdb.js +11 -1
  2. package/package.json +3 -1
  3. package/src/connection.cpp +48 -7
  4. package/src/duckdb/src/catalog/catalog.cpp +5 -0
  5. package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
  6. package/src/duckdb/src/common/enum_util.cpp +24 -0
  7. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
  8. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
  9. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  10. package/src/duckdb/src/function/table/arrow.cpp +18 -13
  11. package/src/duckdb/src/function/table/read_csv.cpp +3 -130
  12. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
  13. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
  14. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  15. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  16. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  17. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
  18. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  20. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
  21. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
  22. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
  23. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  24. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  25. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
  26. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
  27. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  29. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
  30. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
  31. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
  32. package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
  33. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
  36. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
  37. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  38. package/src/duckdb/src/main/connection.cpp +4 -6
  39. package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
  40. package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
  41. package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
  42. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
  43. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
  44. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
  45. package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
  46. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
  47. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
  48. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
  49. package/src/duckdb/src/storage/data_pointer.cpp +20 -0
  50. package/src/duckdb/src/storage/local_storage.cpp +3 -7
  51. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
  52. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
  53. package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
  54. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  55. package/src/duckdb/src/storage/storage_manager.cpp +5 -0
  56. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
  57. package/src/duckdb/src/storage/table/column_data.cpp +17 -14
  58. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
  59. package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
  60. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  61. package/src/duckdb/ub_src_storage.cpp +2 -0
  62. package/src/duckdb_node.hpp +1 -0
  63. package/test/close_hang.test.ts +39 -0
@@ -14,13 +14,16 @@
14
14
  #include "duckdb/function/compression_function.hpp"
15
15
 
16
16
  namespace duckdb {
17
+ struct UncompressedStringSegmentState;
17
18
 
18
19
  class OverflowStringWriter {
19
20
  public:
20
21
  virtual ~OverflowStringWriter() {
21
22
  }
22
23
 
23
- virtual void WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) = 0;
24
+ virtual void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
25
+ int32_t &result_offset) = 0;
26
+ virtual void Flush() = 0;
24
27
  };
25
28
 
26
29
  struct StringBlock {
@@ -43,15 +46,35 @@ struct string_location_t {
43
46
  };
44
47
 
45
48
  struct UncompressedStringSegmentState : public CompressedSegmentState {
46
- ~UncompressedStringSegmentState();
49
+ ~UncompressedStringSegmentState() override;
47
50
 
48
51
  //! The string block holding strings that do not fit in the main block
49
52
  //! FIXME: this should be replaced by a heap that also allows freeing of unused strings
50
53
  unique_ptr<StringBlock> head;
54
+ //! Map of block id to string block
55
+ unordered_map<block_id_t, reference<StringBlock>> overflow_blocks;
51
56
  //! Overflow string writer (if any), if not set overflow strings will be written to memory blocks
52
57
  unique_ptr<OverflowStringWriter> overflow_writer;
53
- //! Map of block id to string block
54
- unordered_map<block_id_t, StringBlock *> overflow_blocks;
58
+ //! The set of overflow blocks written to disk (if any)
59
+ vector<block_id_t> on_disk_blocks;
60
+
61
+ public:
62
+ shared_ptr<BlockHandle> GetHandle(BlockManager &manager, block_id_t block_id);
63
+
64
+ void RegisterBlock(BlockManager &manager, block_id_t block_id);
65
+
66
+ string GetSegmentInfo() const override {
67
+ if (on_disk_blocks.empty()) {
68
+ return "";
69
+ }
70
+ string result = StringUtil::Join(on_disk_blocks, on_disk_blocks.size(), ", ",
71
+ [&](block_id_t block) { return to_string(block); });
72
+ return "Overflow String Block Ids: " + result;
73
+ }
74
+
75
+ private:
76
+ mutex block_lock;
77
+ unordered_map<block_id_t, shared_ptr<BlockHandle>> handles;
55
78
  };
56
79
 
57
80
  } // namespace duckdb
@@ -30,10 +30,12 @@ public:
30
30
  static constexpr idx_t STRING_SPACE = Storage::BLOCK_SIZE - sizeof(block_id_t);
31
31
 
32
32
  public:
33
- void WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) override;
33
+ void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
34
+ int32_t &result_offset) override;
35
+ void Flush() override;
34
36
 
35
37
  private:
36
- void AllocateNewBlock(block_id_t new_block_id);
38
+ void AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id);
37
39
  };
38
40
 
39
41
  } // namespace duckdb
@@ -20,8 +20,27 @@ namespace duckdb {
20
20
  class Serializer;
21
21
  class Deserializer;
22
22
 
23
+ struct ColumnSegmentState {
24
+ virtual ~ColumnSegmentState() {
25
+ }
26
+
27
+ virtual void Serialize(Serializer &serializer) const = 0;
28
+ static unique_ptr<ColumnSegmentState> Deserialize(Deserializer &deserializer);
29
+
30
+ template <class TARGET>
31
+ TARGET &Cast() {
32
+ D_ASSERT(dynamic_cast<TARGET *>(this));
33
+ return reinterpret_cast<TARGET &>(*this);
34
+ }
35
+ template <class TARGET>
36
+ const TARGET &Cast() const {
37
+ D_ASSERT(dynamic_cast<const TARGET *>(this));
38
+ return reinterpret_cast<const TARGET &>(*this);
39
+ }
40
+ };
41
+
23
42
  struct DataPointer {
24
- DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
43
+ explicit DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
25
44
  }
26
45
 
27
46
  uint64_t row_start;
@@ -30,6 +49,8 @@ struct DataPointer {
30
49
  CompressionType compression_type;
31
50
  //! Type-specific statistics of the segment
32
51
  BaseStatistics statistics;
52
+ //! Serialized segment state
53
+ unique_ptr<ColumnSegmentState> segment_state;
33
54
 
34
55
  void Serialize(Serializer &serializer) const;
35
56
  static DataPointer Deserialize(Deserializer &source);
@@ -21,4 +21,10 @@ struct DatabaseSize {
21
21
  idx_t wal_size = 0;
22
22
  };
23
23
 
24
+ struct MetadataBlockInfo {
25
+ block_id_t block_id;
26
+ idx_t total_blocks;
27
+ vector<idx_t> free_list;
28
+ };
29
+
24
30
  } // namespace duckdb
@@ -16,6 +16,7 @@
16
16
 
17
17
  namespace duckdb {
18
18
  class DatabaseInstance;
19
+ struct MetadataBlockInfo;
19
20
 
20
21
  struct MetadataBlock {
21
22
  shared_ptr<BlockHandle> block;
@@ -66,6 +67,7 @@ public:
66
67
  void MarkBlocksAsModified();
67
68
  void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
68
69
 
70
+ vector<MetadataBlockInfo> GetMetadataInfo() const;
69
71
  idx_t BlockCount();
70
72
 
71
73
  void Write(WriteStream &sink);
@@ -68,6 +68,7 @@ public:
68
68
  virtual bool IsCheckpointClean(MetaBlockPointer checkpoint_id) = 0;
69
69
  virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0;
70
70
  virtual DatabaseSize GetDatabaseSize() = 0;
71
+ virtual vector<MetadataBlockInfo> GetMetadataInfo() = 0;
71
72
  virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
72
73
 
73
74
  protected:
@@ -112,6 +113,7 @@ public:
112
113
  bool IsCheckpointClean(MetaBlockPointer checkpoint_id) override;
113
114
  void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override;
114
115
  DatabaseSize GetDatabaseSize() override;
116
+ vector<MetadataBlockInfo> GetMetadataInfo() override;
115
117
  shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
116
118
 
117
119
  protected:
@@ -56,7 +56,8 @@ public:
56
56
  static void StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result);
57
57
  static void StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
58
58
  idx_t result_idx);
59
- static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id);
59
+ static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id,
60
+ optional_ptr<ColumnSegmentState> segment_state);
60
61
 
61
62
  static unique_ptr<CompressionAppendState> StringInitAppend(ColumnSegment &segment) {
62
63
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
@@ -194,5 +195,9 @@ public:
194
195
  data_ptr_t baseptr, int32_t dict_offset, uint32_t string_length);
195
196
  static string_t FetchString(ColumnSegment &segment, StringDictionaryContainer dict, Vector &result,
196
197
  data_ptr_t baseptr, string_location_t location, uint32_t string_length);
198
+
199
+ static unique_ptr<ColumnSegmentState> SerializeState(ColumnSegment &segment);
200
+ static unique_ptr<ColumnSegmentState> DeserializeState(Deserializer &deserializer);
201
+ static void CleanupState(ColumnSegment &segment);
197
202
  };
198
203
  } // namespace duckdb
@@ -57,7 +57,8 @@ public:
57
57
  static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
58
58
  block_id_t id, idx_t offset, const LogicalType &type_p,
59
59
  idx_t start, idx_t count, CompressionType compression_type,
60
- BaseStatistics statistics);
60
+ BaseStatistics statistics,
61
+ unique_ptr<ColumnSegmentState> segment_state);
61
62
  static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
62
63
  idx_t segment_size = Storage::BLOCK_SIZE);
63
64
  static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
@@ -118,14 +119,17 @@ public:
118
119
  return row_index - this->start;
119
120
  }
120
121
 
121
- CompressedSegmentState *GetSegmentState() {
122
+ optional_ptr<CompressedSegmentState> GetSegmentState() {
122
123
  return segment_state.get();
123
124
  }
124
125
 
126
+ void CommitDropSegment();
127
+
125
128
  public:
126
129
  ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
127
130
  idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics,
128
- block_id_t block_id, idx_t offset, idx_t segment_size);
131
+ block_id_t block_id, idx_t offset, idx_t segment_size,
132
+ unique_ptr<ColumnSegmentState> segment_state = nullptr);
129
133
  ColumnSegment(ColumnSegment &other, idx_t start);
130
134
 
131
135
  private:
@@ -28,6 +28,7 @@ struct ColumnSegmentInfo {
28
28
  bool persistent;
29
29
  block_id_t block_id;
30
30
  idx_t block_offset;
31
+ string segment_info;
31
32
  };
32
33
 
33
34
  struct IndexInfo {
@@ -219,14 +219,12 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
219
219
  }
220
220
 
221
221
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
222
- CSVReaderOptions options;
223
- return ReadCSV(csv_file, options);
222
+ named_parameter_map_t options;
223
+ return ReadCSV(csv_file, std::move(options));
224
224
  }
225
225
 
226
- shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, CSVReaderOptions &options) {
227
- options.file_path = csv_file;
228
- options.auto_detect = true;
229
- return make_shared<ReadCSVRelation>(context, csv_file, options);
226
+ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, named_parameter_map_t &&options) {
227
+ return make_shared<ReadCSVRelation>(context, csv_file, std::move(options));
230
228
  }
231
229
 
232
230
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
@@ -158,11 +158,12 @@ void WriteExtensionFileToDisk(FileSystem &fs, const string &path, void *data, id
158
158
  }
159
159
 
160
160
  string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const ClientConfig> client_config, const string &repository) {
161
- string default_endpoint = "http://extensions.duckdb.org";
162
161
  string versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension";
163
162
  #ifdef WASM_LOADABLE_EXTENSIONS
163
+ string default_endpoint = "https://extensions.duckdb.org";
164
164
  versioned_path = "/duckdb-wasm" + versioned_path + ".wasm";
165
165
  #else
166
+ string default_endpoint = "http://extensions.duckdb.org";
166
167
  versioned_path = versioned_path + ".gz";
167
168
  #endif
168
169
  string custom_endpoint = client_config ? client_config->custom_extension_repo : string();
@@ -1,6 +1,5 @@
1
1
  #include "duckdb/main/relation/read_csv_relation.hpp"
2
2
 
3
- #include "duckdb/common/string_util.hpp"
4
3
  #include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
5
4
  #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
6
5
  #include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
@@ -8,6 +7,9 @@
8
7
  #include "duckdb/parser/expression/comparison_expression.hpp"
9
8
  #include "duckdb/parser/expression/constant_expression.hpp"
10
9
  #include "duckdb/parser/expression/function_expression.hpp"
10
+ #include "duckdb/common/string_util.hpp"
11
+ #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
12
+ #include "duckdb/common/multi_file_reader.hpp"
11
13
  #include "duckdb/parser/expression/star_expression.hpp"
12
14
  #include "duckdb/parser/query_node/select_node.hpp"
13
15
  #include "duckdb/parser/tableref/basetableref.hpp"
@@ -34,8 +36,8 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
34
36
  AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
35
37
  }
36
38
 
37
- ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
38
- CSVReaderOptions options, string alias_p)
39
+ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
40
+ named_parameter_map_t &&options, string alias_p)
39
41
  : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
40
42
  auto_detect(true) {
41
43
 
@@ -43,12 +45,24 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
43
45
  alias = StringUtil::Split(csv_file, ".")[0];
44
46
  }
45
47
 
46
- // Force auto_detect for this constructor
47
- options.auto_detect = true;
48
- auto bm_file_handle = BaseCSVReader::OpenCSV(*context, options);
49
- auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), options);
48
+ auto files = MultiFileReader::GetFileList(*context, csv_file, "CSV");
49
+ D_ASSERT(!files.empty());
50
+
51
+ auto &file_name = files[0];
52
+ options["auto_detect"] = Value::BOOLEAN(true);
53
+ CSVReaderOptions csv_options;
54
+ csv_options.file_path = file_name;
55
+ vector<string> empty;
56
+
57
+ vector<LogicalType> unused_types;
58
+ vector<string> unused_names;
59
+ csv_options.FromNamedParameters(options, *context, unused_types, unused_names);
60
+ // Run the auto-detect, populating the options with the detected settings
61
+
62
+ auto bm_file_handle = BaseCSVReader::OpenCSV(*context, csv_options);
63
+ auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), csv_options);
50
64
  CSVStateMachineCache state_machine_cache;
51
- CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
65
+ CSVSniffer sniffer(csv_options, buffer_manager, state_machine_cache);
52
66
  auto sniffer_result = sniffer.SniffCSV();
53
67
  auto &types = sniffer_result.return_types;
54
68
  auto &names = sniffer_result.names;
@@ -56,7 +70,12 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
56
70
  columns.emplace_back(names[i], types[i]);
57
71
  }
58
72
 
59
- AddNamedParameter("auto_detect", Value::BOOLEAN(true));
73
+ //! Capture the options potentially set/altered by the auto detection phase
74
+ csv_options.ToNamedParameters(options);
75
+
76
+ // No need to auto-detect again
77
+ options["auto_detect"] = Value::BOOLEAN(false);
78
+ SetNamedParameters(std::move(options));
60
79
  }
61
80
 
62
81
  string ReadCSVRelation::GetAlias() {
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/main/client_context.hpp"
10
10
  #include "duckdb/parser/expression/comparison_expression.hpp"
11
11
  #include "duckdb/parser/expression/columnref_expression.hpp"
12
+ #include "duckdb/common/shared_ptr.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -16,7 +17,12 @@ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument
16
17
  named_parameters[name] = std::move(argument);
17
18
  }
18
19
 
19
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
20
+ void TableFunctionRelation::SetNamedParameters(named_parameter_map_t &&options) {
21
+ D_ASSERT(named_parameters.empty());
22
+ named_parameters = std::move(options);
23
+ }
24
+
25
+ TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
20
26
  vector<Value> parameters_p, named_parameter_map_t named_parameters,
21
27
  shared_ptr<Relation> input_relation_p, bool auto_init)
22
28
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
@@ -25,7 +31,7 @@ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext
25
31
  InitializeColumns();
26
32
  }
27
33
 
28
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
34
+ TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
29
35
  vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
30
36
  bool auto_init)
31
37
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
@@ -86,7 +86,7 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
86
86
  this->bound_aggregate = true;
87
87
  unique_ptr<Expression> bound_filter;
88
88
  AggregateBinder aggregate_binder(binder, context);
89
- string error, filter_error;
89
+ string error;
90
90
 
91
91
  // Now we bind the filter (if any)
92
92
  if (aggr.filter) {
@@ -167,9 +167,6 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
167
167
  } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
168
168
  return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
169
169
  }
170
- if (!filter_error.empty()) {
171
- return BindResult(filter_error);
172
- }
173
170
 
174
171
  if (aggr.filter) {
175
172
  auto &child = BoundExpression::GetExpression(*aggr.filter);
@@ -20,10 +20,7 @@ PartialBlockAllocation RowGroupWriter::GetBlockAllocation(uint32_t segment_size)
20
20
  void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state,
21
21
  Serializer &serializer) {
22
22
  const auto &data_pointers = column_checkpoint_state.data_pointers;
23
- serializer.WriteList(100, "data_pointers", data_pointers.size(), [&](Serializer::List &list, idx_t i) {
24
- auto &data_pointer = data_pointers[i];
25
- list.WriteElement(data_pointer);
26
- });
23
+ serializer.WriteProperty(100, "data_pointers", data_pointers);
27
24
  }
28
25
 
29
26
  MetadataWriter &SingleFileRowGroupWriter::GetPayloadWriter() {
@@ -10,19 +10,42 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
10
10
  }
11
11
 
12
12
  WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
13
- if (offset > 0) {
14
- block_manager.Write(handle.GetFileBuffer(), block_id);
13
+ // verify that the overflow writer has been flushed
14
+ D_ASSERT(Exception::UncaughtException() || offset == 0);
15
+ }
16
+
17
+ shared_ptr<BlockHandle> UncompressedStringSegmentState::GetHandle(BlockManager &manager, block_id_t block_id) {
18
+ lock_guard<mutex> lock(block_lock);
19
+ auto entry = handles.find(block_id);
20
+ if (entry != handles.end()) {
21
+ return entry->second;
15
22
  }
23
+ auto result = manager.RegisterBlock(block_id);
24
+ handles.insert(make_pair(block_id, result));
25
+ return result;
16
26
  }
17
27
 
18
- void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) {
28
+ void UncompressedStringSegmentState::RegisterBlock(BlockManager &manager, block_id_t block_id) {
29
+ lock_guard<mutex> lock(block_lock);
30
+ auto entry = handles.find(block_id);
31
+ if (entry != handles.end()) {
32
+ throw InternalException("UncompressedStringSegmentState::RegisterBlock - block id %llu already exists",
33
+ block_id);
34
+ }
35
+ auto result = manager.RegisterBlock(block_id);
36
+ handles.insert(make_pair(block_id, std::move(result)));
37
+ on_disk_blocks.push_back(block_id);
38
+ }
39
+
40
+ void WriteOverflowStringsToDisk::WriteString(UncompressedStringSegmentState &state, string_t string,
41
+ block_id_t &result_block, int32_t &result_offset) {
19
42
  auto &buffer_manager = block_manager.buffer_manager;
20
43
  if (!handle.IsValid()) {
21
44
  handle = buffer_manager.Allocate(Storage::BLOCK_SIZE);
22
45
  }
23
46
  // first write the length of the string
24
47
  if (block_id == INVALID_BLOCK || offset + 2 * sizeof(uint32_t) >= STRING_SPACE) {
25
- AllocateNewBlock(block_manager.GetFreeBlockId());
48
+ AllocateNewBlock(state, block_manager.GetFreeBlockId());
26
49
  }
27
50
  result_block = block_id;
28
51
  result_offset = offset;
@@ -55,23 +78,37 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
55
78
  strptr += to_write;
56
79
  }
57
80
  if (remaining > 0) {
81
+ D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
58
82
  // there is still remaining stuff to write
59
- // first get the new block id and write it to the end of the previous block
60
- auto new_block_id = block_manager.GetFreeBlockId();
61
- Store<block_id_t>(new_block_id, data_ptr + offset);
62
83
  // now write the current block to disk and allocate a new block
63
- AllocateNewBlock(new_block_id);
84
+ AllocateNewBlock(state, block_manager.GetFreeBlockId());
64
85
  }
65
86
  }
66
87
  }
67
88
 
68
- void WriteOverflowStringsToDisk::AllocateNewBlock(block_id_t new_block_id) {
89
+ void WriteOverflowStringsToDisk::Flush() {
90
+ if (block_id != INVALID_BLOCK && offset > 0) {
91
+ // zero-initialize the empty part of the overflow string buffer (if any)
92
+ if (offset < STRING_SPACE) {
93
+ memset(handle.Ptr() + offset, 0, STRING_SPACE - offset);
94
+ }
95
+ // write to disk
96
+ block_manager.Write(handle.GetFileBuffer(), block_id);
97
+ }
98
+ block_id = INVALID_BLOCK;
99
+ offset = 0;
100
+ }
101
+
102
+ void WriteOverflowStringsToDisk::AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id) {
69
103
  if (block_id != INVALID_BLOCK) {
70
104
  // there is an old block, write it first
71
- block_manager.Write(handle.GetFileBuffer(), block_id);
105
+ // write the new block id at the end of the previous block
106
+ Store<block_id_t>(new_block_id, handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
107
+ Flush();
72
108
  }
73
109
  offset = 0;
74
110
  block_id = new_block_id;
111
+ state.RegisterBlock(block_manager, new_block_id);
75
112
  }
76
113
 
77
114
  } // namespace duckdb
@@ -136,8 +136,6 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
136
136
 
137
137
  // truncate the file
138
138
  block_manager.Truncate();
139
-
140
- metadata_manager.MarkBlocksAsModified();
141
139
  }
142
140
 
143
141
  void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {
@@ -65,7 +65,7 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
65
65
  auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start);
66
66
  if (type.InternalType() == PhysicalType::VARCHAR) {
67
67
  auto &state = compressed_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
68
- state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetColumnData().GetBlockManager());
68
+ state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetRowGroup().GetBlockManager());
69
69
  }
70
70
  current_segment = std::move(compressed_segment);
71
71
  current_segment->InitializeAppend(append_state);
@@ -73,6 +73,11 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
73
73
 
74
74
  void UncompressedCompressState::FlushSegment(idx_t segment_size) {
75
75
  auto &state = checkpointer.GetCheckpointState();
76
+ if (current_segment->type.InternalType() == PhysicalType::VARCHAR) {
77
+ auto &segment_state = current_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
78
+ segment_state.overflow_writer->Flush();
79
+ segment_state.overflow_writer.reset();
80
+ }
76
81
  state.FlushSegment(std::move(current_segment), segment_size);
77
82
  }
78
83