duckdb 0.8.2-dev4474.0 → 0.8.2-dev4572.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/lib/duckdb.js +11 -1
  2. package/package.json +3 -1
  3. package/src/connection.cpp +48 -7
  4. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +3 -0
  5. package/src/duckdb/src/catalog/catalog.cpp +5 -0
  6. package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +1 -0
  8. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
  9. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  10. package/src/duckdb/src/function/table/arrow.cpp +18 -13
  11. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
  12. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
  13. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  14. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  15. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  16. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
  17. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
  18. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
  19. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  20. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  21. package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
  22. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
  23. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
  24. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
  25. package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
  26. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  27. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
  28. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
  29. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
  30. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  31. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +9 -3
  32. package/src/duckdb/src/optimizer/statistics/operator/propagate_projection.cpp +0 -1
  33. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
  34. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
  35. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
  36. package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
  37. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
  38. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
  39. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
  40. package/src/duckdb/src/storage/data_pointer.cpp +20 -0
  41. package/src/duckdb/src/storage/local_storage.cpp +3 -7
  42. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
  43. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
  44. package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
  45. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  46. package/src/duckdb/src/storage/storage_manager.cpp +5 -0
  47. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
  48. package/src/duckdb/src/storage/table/column_data.cpp +17 -14
  49. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
  50. package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
  51. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  52. package/src/duckdb/ub_src_storage.cpp +2 -0
  53. package/src/duckdb_node.hpp +1 -0
  54. package/test/close_hang.test.ts +39 -0
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/common/map.hpp"
15
15
  #include "duckdb/storage/storage_info.hpp"
16
16
  #include "duckdb/common/mutex.hpp"
17
+ #include "duckdb/storage/data_pointer.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
  class DatabaseInstance;
@@ -21,6 +22,7 @@ class ColumnData;
21
22
  class ColumnDataCheckpointer;
22
23
  class ColumnSegment;
23
24
  class SegmentStatistics;
25
+ struct ColumnSegmentState;
24
26
 
25
27
  struct ColumnFetchState;
26
28
  struct ColumnScanState;
@@ -62,6 +64,11 @@ struct CompressedSegmentState {
62
64
  virtual ~CompressedSegmentState() {
63
65
  }
64
66
 
67
+ //! Display info for PRAGMA storage_info
68
+ virtual string GetSegmentInfo() const { // LCOV_EXCL_START
69
+ return "";
70
+ } // LCOV_EXCL_STOP
71
+
65
72
  template <class TARGET>
66
73
  TARGET &Cast() {
67
74
  D_ASSERT(dynamic_cast<TARGET *>(this));
@@ -75,7 +82,7 @@ struct CompressedSegmentState {
75
82
  };
76
83
 
77
84
  struct CompressionAppendState {
78
- CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
85
+ explicit CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
79
86
  }
80
87
  virtual ~CompressionAppendState() {
81
88
  }
@@ -139,13 +146,24 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
139
146
  //===--------------------------------------------------------------------===//
140
147
  // Append (optional)
141
148
  //===--------------------------------------------------------------------===//
142
- typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
149
+ typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
150
+ ColumnSegment &segment, block_id_t block_id, optional_ptr<ColumnSegmentState> segment_state);
143
151
  typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
144
152
  typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
145
153
  SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
146
154
  typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
147
155
  typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
148
156
 
157
+ //===--------------------------------------------------------------------===//
158
+ // Serialization (optional)
159
+ //===--------------------------------------------------------------------===//
160
+ //! Function prototype for serializing the segment state
161
+ typedef unique_ptr<ColumnSegmentState> (*compression_serialize_state_t)(ColumnSegment &segment);
162
+ //! Function prototype for deserializing the segment state
163
+ typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deserializer &deserializer);
164
+ //! Function prototype for cleaning up the segment state when the column data is dropped
165
+ typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
166
+
149
167
  class CompressionFunction {
150
168
  public:
151
169
  CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
@@ -157,12 +175,16 @@ public:
157
175
  compression_init_segment_t init_segment = nullptr,
158
176
  compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
159
177
  compression_finalize_append_t finalize_append = nullptr,
160
- compression_revert_append_t revert_append = nullptr)
178
+ compression_revert_append_t revert_append = nullptr,
179
+ compression_serialize_state_t serialize_state = nullptr,
180
+ compression_deserialize_state_t deserialize_state = nullptr,
181
+ compression_cleanup_state_t cleanup_state = nullptr)
161
182
  : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
162
183
  init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
163
184
  init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
164
185
  init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
165
- revert_append(revert_append) {
186
+ revert_append(revert_append), serialize_state(serialize_state), deserialize_state(deserialize_state),
187
+ cleanup_state(cleanup_state) {
166
188
  }
167
189
 
168
190
  //! Compression type
@@ -218,6 +240,16 @@ public:
218
240
  compression_finalize_append_t finalize_append;
219
241
  //! Revert append (optional)
220
242
  compression_revert_append_t revert_append;
243
+
244
+ // State serialize functions
245
+ //! This is only necessary if the segment state has information that must be written to disk in the metadata
246
+
247
+ //! Serialize the segment state to the metadata (optional)
248
+ compression_serialize_state_t serialize_state;
249
+ //! Deserialize the segment state to the metadata (optional)
250
+ compression_deserialize_state_t deserialize_state;
251
+ //! Cleanup the segment state (optional)
252
+ compression_cleanup_state_t cleanup_state;
221
253
  };
222
254
 
223
255
  //! The set of compression functions
@@ -129,6 +129,8 @@ public:
129
129
 
130
130
  //! Scan Function
131
131
  static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
132
+ static void PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p, vector<string> &names,
133
+ vector<LogicalType> &return_types);
132
134
 
133
135
  protected:
134
136
  //! Defines Maximum Number of Threads
@@ -25,6 +25,10 @@ struct PragmaStorageInfo {
25
25
  static void RegisterFunction(BuiltinFunctions &set);
26
26
  };
27
27
 
28
+ struct PragmaMetadataInfo {
29
+ static void RegisterFunction(BuiltinFunctions &set);
30
+ };
31
+
28
32
  struct PragmaLastProfilingOutput {
29
33
  static void RegisterFunction(BuiltinFunctions &set);
30
34
  };
@@ -13,7 +13,7 @@
13
13
  namespace duckdb {
14
14
 
15
15
  // The Empty_needle_removal Optimization rule folds some foldable ConstantExpression
16
- //(e.g.: PREFIX('xyz', '') is TRUE, PREFIX(NULL, '') is NULL, so rewrite PREFIX(x, '') to (CASE WHEN x IS NOT NULL THEN)
16
+ //(e.g.: PREFIX('xyz', '') is TRUE, PREFIX(NULL, '') is NULL, so rewrite PREFIX(x, '') to TRUE_OR_NULL(x)
17
17
  class EmptyNeedleRemovalRule : public Rule {
18
18
  public:
19
19
  explicit EmptyNeedleRemovalRule(ExpressionRewriter &rewriter);
@@ -14,13 +14,16 @@
14
14
  #include "duckdb/function/compression_function.hpp"
15
15
 
16
16
  namespace duckdb {
17
+ struct UncompressedStringSegmentState;
17
18
 
18
19
  class OverflowStringWriter {
19
20
  public:
20
21
  virtual ~OverflowStringWriter() {
21
22
  }
22
23
 
23
- virtual void WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) = 0;
24
+ virtual void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
25
+ int32_t &result_offset) = 0;
26
+ virtual void Flush() = 0;
24
27
  };
25
28
 
26
29
  struct StringBlock {
@@ -43,15 +46,35 @@ struct string_location_t {
43
46
  };
44
47
 
45
48
  struct UncompressedStringSegmentState : public CompressedSegmentState {
46
- ~UncompressedStringSegmentState();
49
+ ~UncompressedStringSegmentState() override;
47
50
 
48
51
  //! The string block holding strings that do not fit in the main block
49
52
  //! FIXME: this should be replaced by a heap that also allows freeing of unused strings
50
53
  unique_ptr<StringBlock> head;
54
+ //! Map of block id to string block
55
+ unordered_map<block_id_t, reference<StringBlock>> overflow_blocks;
51
56
  //! Overflow string writer (if any), if not set overflow strings will be written to memory blocks
52
57
  unique_ptr<OverflowStringWriter> overflow_writer;
53
- //! Map of block id to string block
54
- unordered_map<block_id_t, StringBlock *> overflow_blocks;
58
+ //! The set of overflow blocks written to disk (if any)
59
+ vector<block_id_t> on_disk_blocks;
60
+
61
+ public:
62
+ shared_ptr<BlockHandle> GetHandle(BlockManager &manager, block_id_t block_id);
63
+
64
+ void RegisterBlock(BlockManager &manager, block_id_t block_id);
65
+
66
+ string GetSegmentInfo() const override {
67
+ if (on_disk_blocks.empty()) {
68
+ return "";
69
+ }
70
+ string result = StringUtil::Join(on_disk_blocks, on_disk_blocks.size(), ", ",
71
+ [&](block_id_t block) { return to_string(block); });
72
+ return "Overflow String Block Ids: " + result;
73
+ }
74
+
75
+ private:
76
+ mutex block_lock;
77
+ unordered_map<block_id_t, shared_ptr<BlockHandle>> handles;
55
78
  };
56
79
 
57
80
  } // namespace duckdb
@@ -30,10 +30,12 @@ public:
30
30
  static constexpr idx_t STRING_SPACE = Storage::BLOCK_SIZE - sizeof(block_id_t);
31
31
 
32
32
  public:
33
- void WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) override;
33
+ void WriteString(UncompressedStringSegmentState &state, string_t string, block_id_t &result_block,
34
+ int32_t &result_offset) override;
35
+ void Flush() override;
34
36
 
35
37
  private:
36
- void AllocateNewBlock(block_id_t new_block_id);
38
+ void AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id);
37
39
  };
38
40
 
39
41
  } // namespace duckdb
@@ -20,8 +20,27 @@ namespace duckdb {
20
20
  class Serializer;
21
21
  class Deserializer;
22
22
 
23
+ struct ColumnSegmentState {
24
+ virtual ~ColumnSegmentState() {
25
+ }
26
+
27
+ virtual void Serialize(Serializer &serializer) const = 0;
28
+ static unique_ptr<ColumnSegmentState> Deserialize(Deserializer &deserializer);
29
+
30
+ template <class TARGET>
31
+ TARGET &Cast() {
32
+ D_ASSERT(dynamic_cast<TARGET *>(this));
33
+ return reinterpret_cast<TARGET &>(*this);
34
+ }
35
+ template <class TARGET>
36
+ const TARGET &Cast() const {
37
+ D_ASSERT(dynamic_cast<const TARGET *>(this));
38
+ return reinterpret_cast<const TARGET &>(*this);
39
+ }
40
+ };
41
+
23
42
  struct DataPointer {
24
- DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
43
+ explicit DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
25
44
  }
26
45
 
27
46
  uint64_t row_start;
@@ -30,6 +49,8 @@ struct DataPointer {
30
49
  CompressionType compression_type;
31
50
  //! Type-specific statistics of the segment
32
51
  BaseStatistics statistics;
52
+ //! Serialized segment state
53
+ unique_ptr<ColumnSegmentState> segment_state;
33
54
 
34
55
  void Serialize(Serializer &serializer) const;
35
56
  static DataPointer Deserialize(Deserializer &source);
@@ -21,4 +21,10 @@ struct DatabaseSize {
21
21
  idx_t wal_size = 0;
22
22
  };
23
23
 
24
+ struct MetadataBlockInfo {
25
+ block_id_t block_id;
26
+ idx_t total_blocks;
27
+ vector<idx_t> free_list;
28
+ };
29
+
24
30
  } // namespace duckdb
@@ -16,6 +16,7 @@
16
16
 
17
17
  namespace duckdb {
18
18
  class DatabaseInstance;
19
+ struct MetadataBlockInfo;
19
20
 
20
21
  struct MetadataBlock {
21
22
  shared_ptr<BlockHandle> block;
@@ -66,6 +67,7 @@ public:
66
67
  void MarkBlocksAsModified();
67
68
  void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
68
69
 
70
+ vector<MetadataBlockInfo> GetMetadataInfo() const;
69
71
  idx_t BlockCount();
70
72
 
71
73
  void Write(WriteStream &sink);
@@ -68,6 +68,7 @@ public:
68
68
  virtual bool IsCheckpointClean(MetaBlockPointer checkpoint_id) = 0;
69
69
  virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0;
70
70
  virtual DatabaseSize GetDatabaseSize() = 0;
71
+ virtual vector<MetadataBlockInfo> GetMetadataInfo() = 0;
71
72
  virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
72
73
 
73
74
  protected:
@@ -112,6 +113,7 @@ public:
112
113
  bool IsCheckpointClean(MetaBlockPointer checkpoint_id) override;
113
114
  void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override;
114
115
  DatabaseSize GetDatabaseSize() override;
116
+ vector<MetadataBlockInfo> GetMetadataInfo() override;
115
117
  shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
116
118
 
117
119
  protected:
@@ -56,7 +56,8 @@ public:
56
56
  static void StringScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result);
57
57
  static void StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
58
58
  idx_t result_idx);
59
- static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id);
59
+ static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id,
60
+ optional_ptr<ColumnSegmentState> segment_state);
60
61
 
61
62
  static unique_ptr<CompressionAppendState> StringInitAppend(ColumnSegment &segment) {
62
63
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
@@ -194,5 +195,9 @@ public:
194
195
  data_ptr_t baseptr, int32_t dict_offset, uint32_t string_length);
195
196
  static string_t FetchString(ColumnSegment &segment, StringDictionaryContainer dict, Vector &result,
196
197
  data_ptr_t baseptr, string_location_t location, uint32_t string_length);
198
+
199
+ static unique_ptr<ColumnSegmentState> SerializeState(ColumnSegment &segment);
200
+ static unique_ptr<ColumnSegmentState> DeserializeState(Deserializer &deserializer);
201
+ static void CleanupState(ColumnSegment &segment);
197
202
  };
198
203
  } // namespace duckdb
@@ -57,7 +57,8 @@ public:
57
57
  static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
58
58
  block_id_t id, idx_t offset, const LogicalType &type_p,
59
59
  idx_t start, idx_t count, CompressionType compression_type,
60
- BaseStatistics statistics);
60
+ BaseStatistics statistics,
61
+ unique_ptr<ColumnSegmentState> segment_state);
61
62
  static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
62
63
  idx_t segment_size = Storage::BLOCK_SIZE);
63
64
  static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
@@ -118,14 +119,17 @@ public:
118
119
  return row_index - this->start;
119
120
  }
120
121
 
121
- CompressedSegmentState *GetSegmentState() {
122
+ optional_ptr<CompressedSegmentState> GetSegmentState() {
122
123
  return segment_state.get();
123
124
  }
124
125
 
126
+ void CommitDropSegment();
127
+
125
128
  public:
126
129
  ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
127
130
  idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics,
128
- block_id_t block_id, idx_t offset, idx_t segment_size);
131
+ block_id_t block_id, idx_t offset, idx_t segment_size,
132
+ unique_ptr<ColumnSegmentState> segment_state = nullptr);
129
133
  ColumnSegment(ColumnSegment &other, idx_t start);
130
134
 
131
135
  private:
@@ -28,6 +28,7 @@ struct ColumnSegmentInfo {
28
28
  bool persistent;
29
29
  block_id_t block_id;
30
30
  idx_t block_offset;
31
+ string segment_info;
31
32
  };
32
33
 
33
34
  struct IndexInfo {
@@ -72,17 +72,21 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
72
72
  for (auto &cond : comp_join.conditions) {
73
73
  if (cond.comparison == ExpressionType::COMPARE_EQUAL) {
74
74
  has_equality = true;
75
+ break;
75
76
  }
76
77
  }
77
78
  if (!has_equality) {
78
79
  break;
79
80
  }
80
- // now, for each of the columns of the RHS, check which columns need to be projected
81
+ // visit current operator expressions so they are added to the referenced_columns
82
+ LogicalOperatorVisitor::VisitOperatorExpressions(op);
83
+
81
84
  column_binding_set_t unused_bindings;
85
+ auto old_op_bindings = op.GetColumnBindings();
82
86
  ExtractUnusedColumnBindings(op.children[1]->GetColumnBindings(), unused_bindings);
83
87
 
84
88
  // now recurse into the filter and its children
85
- StandardVisitOperator(op);
89
+ LogicalOperatorVisitor::VisitOperatorChildren(op);
86
90
 
87
91
  // then generate the projection map
88
92
  GenerateProjectionMap(op.children[1]->GetColumnBindings(), unused_bindings, comp_join.right_projection_map);
@@ -118,12 +122,14 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
118
122
  if (everything_referenced) {
119
123
  break;
120
124
  }
125
+ // first visit operator expressions to populate referenced columns
126
+ LogicalOperatorVisitor::VisitOperatorExpressions(op);
121
127
  // filter, figure out which columns are not needed after the filter
122
128
  column_binding_set_t unused_bindings;
123
129
  ExtractUnusedColumnBindings(op.children[0]->GetColumnBindings(), unused_bindings);
124
130
 
125
131
  // now recurse into the filter and its children
126
- StandardVisitOperator(op);
132
+ LogicalOperatorVisitor::VisitOperatorChildren(op);
127
133
 
128
134
  // then generate the projection map
129
135
  GenerateProjectionMap(op.children[0]->GetColumnBindings(), unused_bindings, filter.projection_map);
@@ -11,7 +11,6 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalProj
11
11
  ReplaceWithEmptyResult(*node_ptr);
12
12
  return std::move(node_stats);
13
13
  }
14
-
15
14
  // then propagate to each of the expressions
16
15
  for (idx_t i = 0; i < proj.expressions.size(); i++) {
17
16
  auto stats = PropagateExpression(proj.expressions[i]);
@@ -86,7 +86,7 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
86
86
  this->bound_aggregate = true;
87
87
  unique_ptr<Expression> bound_filter;
88
88
  AggregateBinder aggregate_binder(binder, context);
89
- string error, filter_error;
89
+ string error;
90
90
 
91
91
  // Now we bind the filter (if any)
92
92
  if (aggr.filter) {
@@ -167,9 +167,6 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu
167
167
  } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
168
168
  return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
169
169
  }
170
- if (!filter_error.empty()) {
171
- return BindResult(filter_error);
172
- }
173
170
 
174
171
  if (aggr.filter) {
175
172
  auto &child = BoundExpression::GetExpression(*aggr.filter);
@@ -20,10 +20,7 @@ PartialBlockAllocation RowGroupWriter::GetBlockAllocation(uint32_t segment_size)
20
20
  void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state,
21
21
  Serializer &serializer) {
22
22
  const auto &data_pointers = column_checkpoint_state.data_pointers;
23
- serializer.WriteList(100, "data_pointers", data_pointers.size(), [&](Serializer::List &list, idx_t i) {
24
- auto &data_pointer = data_pointers[i];
25
- list.WriteElement(data_pointer);
26
- });
23
+ serializer.WriteProperty(100, "data_pointers", data_pointers);
27
24
  }
28
25
 
29
26
  MetadataWriter &SingleFileRowGroupWriter::GetPayloadWriter() {
@@ -10,19 +10,42 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
10
10
  }
11
11
 
12
12
  WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
13
- if (offset > 0) {
14
- block_manager.Write(handle.GetFileBuffer(), block_id);
13
+ // verify that the overflow writer has been flushed
14
+ D_ASSERT(Exception::UncaughtException() || offset == 0);
15
+ }
16
+
17
+ shared_ptr<BlockHandle> UncompressedStringSegmentState::GetHandle(BlockManager &manager, block_id_t block_id) {
18
+ lock_guard<mutex> lock(block_lock);
19
+ auto entry = handles.find(block_id);
20
+ if (entry != handles.end()) {
21
+ return entry->second;
15
22
  }
23
+ auto result = manager.RegisterBlock(block_id);
24
+ handles.insert(make_pair(block_id, result));
25
+ return result;
16
26
  }
17
27
 
18
- void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) {
28
+ void UncompressedStringSegmentState::RegisterBlock(BlockManager &manager, block_id_t block_id) {
29
+ lock_guard<mutex> lock(block_lock);
30
+ auto entry = handles.find(block_id);
31
+ if (entry != handles.end()) {
32
+ throw InternalException("UncompressedStringSegmentState::RegisterBlock - block id %llu already exists",
33
+ block_id);
34
+ }
35
+ auto result = manager.RegisterBlock(block_id);
36
+ handles.insert(make_pair(block_id, std::move(result)));
37
+ on_disk_blocks.push_back(block_id);
38
+ }
39
+
40
+ void WriteOverflowStringsToDisk::WriteString(UncompressedStringSegmentState &state, string_t string,
41
+ block_id_t &result_block, int32_t &result_offset) {
19
42
  auto &buffer_manager = block_manager.buffer_manager;
20
43
  if (!handle.IsValid()) {
21
44
  handle = buffer_manager.Allocate(Storage::BLOCK_SIZE);
22
45
  }
23
46
  // first write the length of the string
24
47
  if (block_id == INVALID_BLOCK || offset + 2 * sizeof(uint32_t) >= STRING_SPACE) {
25
- AllocateNewBlock(block_manager.GetFreeBlockId());
48
+ AllocateNewBlock(state, block_manager.GetFreeBlockId());
26
49
  }
27
50
  result_block = block_id;
28
51
  result_offset = offset;
@@ -55,23 +78,37 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
55
78
  strptr += to_write;
56
79
  }
57
80
  if (remaining > 0) {
81
+ D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
58
82
  // there is still remaining stuff to write
59
- // first get the new block id and write it to the end of the previous block
60
- auto new_block_id = block_manager.GetFreeBlockId();
61
- Store<block_id_t>(new_block_id, data_ptr + offset);
62
83
  // now write the current block to disk and allocate a new block
63
- AllocateNewBlock(new_block_id);
84
+ AllocateNewBlock(state, block_manager.GetFreeBlockId());
64
85
  }
65
86
  }
66
87
  }
67
88
 
68
- void WriteOverflowStringsToDisk::AllocateNewBlock(block_id_t new_block_id) {
89
+ void WriteOverflowStringsToDisk::Flush() {
90
+ if (block_id != INVALID_BLOCK && offset > 0) {
91
+ // zero-initialize the empty part of the overflow string buffer (if any)
92
+ if (offset < STRING_SPACE) {
93
+ memset(handle.Ptr() + offset, 0, STRING_SPACE - offset);
94
+ }
95
+ // write to disk
96
+ block_manager.Write(handle.GetFileBuffer(), block_id);
97
+ }
98
+ block_id = INVALID_BLOCK;
99
+ offset = 0;
100
+ }
101
+
102
+ void WriteOverflowStringsToDisk::AllocateNewBlock(UncompressedStringSegmentState &state, block_id_t new_block_id) {
69
103
  if (block_id != INVALID_BLOCK) {
70
104
  // there is an old block, write it first
71
- block_manager.Write(handle.GetFileBuffer(), block_id);
105
+ // write the new block id at the end of the previous block
106
+ Store<block_id_t>(new_block_id, handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
107
+ Flush();
72
108
  }
73
109
  offset = 0;
74
110
  block_id = new_block_id;
111
+ state.RegisterBlock(block_manager, new_block_id);
75
112
  }
76
113
 
77
114
  } // namespace duckdb
@@ -136,8 +136,6 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
136
136
 
137
137
  // truncate the file
138
138
  block_manager.Truncate();
139
-
140
- metadata_manager.MarkBlocksAsModified();
141
139
  }
142
140
 
143
141
  void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {
@@ -65,7 +65,7 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
65
65
  auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start);
66
66
  if (type.InternalType() == PhysicalType::VARCHAR) {
67
67
  auto &state = compressed_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
68
- state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetColumnData().GetBlockManager());
68
+ state.overflow_writer = make_uniq<WriteOverflowStringsToDisk>(checkpointer.GetRowGroup().GetBlockManager());
69
69
  }
70
70
  current_segment = std::move(compressed_segment);
71
71
  current_segment->InitializeAppend(append_state);
@@ -73,6 +73,11 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
73
73
 
74
74
  void UncompressedCompressState::FlushSegment(idx_t segment_size) {
75
75
  auto &state = checkpointer.GetCheckpointState();
76
+ if (current_segment->type.InternalType() == PhysicalType::VARCHAR) {
77
+ auto &segment_state = current_segment->GetSegmentState()->Cast<UncompressedStringSegmentState>();
78
+ segment_state.overflow_writer->Flush();
79
+ segment_state.overflow_writer.reset();
80
+ }
76
81
  state.FlushSegment(std::move(current_segment), segment_size);
77
82
  }
78
83