duckdb 0.6.2-dev1770.0 → 0.6.2-dev1794.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev1770.0",
5
+ "version": "0.6.2-dev1794.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -18,6 +18,7 @@ static DefaultOptimizerType internal_optimizer_types[] = {
18
18
  {"in_clause", OptimizerType::IN_CLAUSE},
19
19
  {"join_order", OptimizerType::JOIN_ORDER},
20
20
  {"deliminator", OptimizerType::DELIMINATOR},
21
+ {"unnest_rewriter", OptimizerType::UNNEST_REWRITER},
21
22
  {"unused_columns", OptimizerType::UNUSED_COLUMNS},
22
23
  {"statistics_propagation", OptimizerType::STATISTICS_PROPAGATION},
23
24
  {"common_subexpressions", OptimizerType::COMMON_SUBEXPRESSIONS},
@@ -23,8 +23,6 @@ void FileBuffer::Init() {
23
23
  size = 0;
24
24
  internal_buffer = nullptr;
25
25
  internal_size = 0;
26
- malloced_buffer = nullptr;
27
- malloced_size = 0;
28
26
  }
29
27
 
30
28
  FileBuffer::FileBuffer(FileBuffer &source, FileBufferType type_p) : allocator(source.allocator), type(type_p) {
@@ -33,31 +31,29 @@ FileBuffer::FileBuffer(FileBuffer &source, FileBufferType type_p) : allocator(so
33
31
  size = source.size;
34
32
  internal_buffer = source.internal_buffer;
35
33
  internal_size = source.internal_size;
36
- malloced_buffer = source.malloced_buffer;
37
- malloced_size = source.malloced_size;
38
34
 
39
35
  source.Init();
40
36
  }
41
37
 
42
38
  FileBuffer::~FileBuffer() {
43
- if (!malloced_buffer) {
39
+ if (!internal_buffer) {
44
40
  return;
45
41
  }
46
- allocator.FreeData(malloced_buffer, malloced_size);
42
+ allocator.FreeData(internal_buffer, internal_size);
47
43
  }
48
44
 
49
45
  void FileBuffer::ReallocBuffer(size_t new_size) {
50
- if (malloced_buffer) {
51
- malloced_buffer = allocator.ReallocateData(malloced_buffer, malloced_size, new_size);
46
+ data_ptr_t new_buffer;
47
+ if (internal_buffer) {
48
+ new_buffer = allocator.ReallocateData(internal_buffer, internal_size, new_size);
52
49
  } else {
53
- malloced_buffer = allocator.AllocateData(new_size);
50
+ new_buffer = allocator.AllocateData(new_size);
54
51
  }
55
- if (!malloced_buffer) {
52
+ if (!new_buffer) {
56
53
  throw std::bad_alloc();
57
54
  }
58
- malloced_size = new_size;
59
- internal_buffer = malloced_buffer;
60
- internal_size = malloced_size;
55
+ internal_buffer = new_buffer;
56
+ internal_size = new_size;
61
57
  // Caller must update these.
62
58
  buffer = nullptr;
63
59
  size = 0;
@@ -92,32 +88,11 @@ void FileBuffer::Read(FileHandle &handle, uint64_t location) {
92
88
  handle.Read(internal_buffer, internal_size, location);
93
89
  }
94
90
 
95
- void FileBuffer::ReadAndChecksum(FileHandle &handle, uint64_t location) {
96
- // read the buffer from disk
97
- Read(handle, location);
98
- // compute the checksum
99
- auto stored_checksum = Load<uint64_t>(internal_buffer);
100
- uint64_t computed_checksum = Checksum(buffer, size);
101
- // verify the checksum
102
- if (stored_checksum != computed_checksum) {
103
- throw IOException("Corrupt database file: computed checksum %llu does not match stored checksum %llu in block",
104
- computed_checksum, stored_checksum);
105
- }
106
- }
107
-
108
91
  void FileBuffer::Write(FileHandle &handle, uint64_t location) {
109
92
  D_ASSERT(type != FileBufferType::TINY_BUFFER);
110
93
  handle.Write(internal_buffer, internal_size, location);
111
94
  }
112
95
 
113
- void FileBuffer::ChecksumAndWrite(FileHandle &handle, uint64_t location) {
114
- // compute the checksum and write it to the start of the buffer (if not temp buffer)
115
- uint64_t checksum = Checksum(buffer, size);
116
- Store<uint64_t>(checksum, internal_buffer);
117
- // now write the buffer
118
- Write(handle, location);
119
- }
120
-
121
96
  void FileBuffer::Clear() {
122
97
  memset(internal_buffer, 0, internal_size);
123
98
  }
@@ -11,8 +11,8 @@ SwizzleablePointer::~SwizzleablePointer() {
11
11
 
12
12
  SwizzleablePointer::SwizzleablePointer(duckdb::MetaBlockReader &reader) {
13
13
  idx_t block_id = reader.Read<block_id_t>();
14
- idx_t offset = reader.Read<uint32_t>();
15
- if (block_id == DConstants::INVALID_INDEX || offset == DConstants::INVALID_INDEX) {
14
+ uint32_t offset = reader.Read<uint32_t>();
15
+ if (block_id == DConstants::INVALID_INDEX || offset == (uint32_t)DConstants::INVALID_INDEX) {
16
16
  pointer = 0;
17
17
  return;
18
18
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev1770"
2
+ #define DUCKDB_VERSION "0.6.2-dev1794"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "864ff1c719"
5
+ #define DUCKDB_SOURCE_ID "d8add1ee3f"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -21,6 +21,7 @@ enum class OptimizerType : uint32_t {
21
21
  IN_CLAUSE,
22
22
  JOIN_ORDER,
23
23
  DELIMINATOR,
24
+ UNNEST_REWRITER,
24
25
  UNUSED_COLUMNS,
25
26
  STATISTICS_PROPAGATION,
26
27
  COMMON_SUBEXPRESSIONS,
@@ -39,14 +39,8 @@ public:
39
39
  public:
40
40
  //! Read into the FileBuffer from the specified location.
41
41
  void Read(FileHandle &handle, uint64_t location);
42
- //! Read into the FileBuffer from the specified location. Automatically verifies the checksum, and throws an
43
- //! exception if the checksum does not match correctly.
44
- virtual void ReadAndChecksum(FileHandle &handle, uint64_t location);
45
42
  //! Write the contents of the FileBuffer to the specified location.
46
43
  void Write(FileHandle &handle, uint64_t location);
47
- //! Write the contents of the FileBuffer to the specified location. Automatically adds a checksum of the contents of
48
- //! the filebuffer in front of the written data.
49
- virtual void ChecksumAndWrite(FileHandle &handle, uint64_t location);
50
44
 
51
45
  void Clear();
52
46
 
@@ -57,6 +51,9 @@ public:
57
51
  uint64_t AllocSize() const {
58
52
  return internal_size;
59
53
  }
54
+ data_ptr_t InternalBuffer() {
55
+ return internal_buffer;
56
+ }
60
57
 
61
58
  struct MemoryRequirement {
62
59
  idx_t alloc_size;
@@ -72,16 +69,6 @@ protected:
72
69
  uint64_t internal_size;
73
70
 
74
71
  void ReallocBuffer(size_t malloc_size);
75
-
76
- private:
77
- //! The buffer that was actually malloc'd, i.e. the pointer that must be freed when the FileBuffer is destroyed
78
- data_ptr_t malloced_buffer;
79
- uint64_t malloced_size;
80
-
81
- protected:
82
- uint64_t GetMallocedSize() {
83
- return malloced_size;
84
- }
85
72
  void Init();
86
73
  };
87
74
 
@@ -0,0 +1,84 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/unnest_rewriter.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/planner/logical_operator.hpp"
12
+ #include "duckdb/common/pair.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ class Optimizer;
17
+
18
+ struct ReplaceBinding {
19
+ ReplaceBinding() {};
20
+ ReplaceBinding(ColumnBinding old_binding, ColumnBinding new_binding)
21
+ : old_binding(old_binding), new_binding(new_binding) {
22
+ }
23
+ ColumnBinding old_binding;
24
+ ColumnBinding new_binding;
25
+ };
26
+
27
+ struct LHSBinding {
28
+ LHSBinding() {};
29
+ LHSBinding(ColumnBinding binding, LogicalType type) : binding(binding), type(type) {
30
+ }
31
+ ColumnBinding binding;
32
+ LogicalType type;
33
+ string alias;
34
+ };
35
+
36
+ //! The UnnestRewriterPlanUpdater updates column bindings after changing the operator plan
37
+ class UnnestRewriterPlanUpdater : LogicalOperatorVisitor {
38
+ public:
39
+ UnnestRewriterPlanUpdater() {
40
+ }
41
+ //! Update each operator of the plan after moving an UNNEST into a projection
42
+ void VisitOperator(LogicalOperator &op) override;
43
+ //! Visit an expression and update its column bindings after moving and UNNEST into a projection
44
+ void VisitExpression(unique_ptr<Expression> *expression) override;
45
+
46
+ //! Contains all bindings that need to be updated
47
+ vector<ReplaceBinding> replace_bindings;
48
+ };
49
+
50
+ //! The UnnestRewriter optimizer traverses the logical operator tree and rewrites duplicate
51
+ //! eliminated joins that contain UNNESTs by moving the UNNESTs into the projection of
52
+ //! the SELECT
53
+ class UnnestRewriter {
54
+ public:
55
+ UnnestRewriter() {
56
+ }
57
+ //! Rewrite duplicate eliminated joins with UNNESTs
58
+ unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);
59
+
60
+ private:
61
+ //! Find delim joins that contain an UNNEST
62
+ void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
63
+ //! Rewrite a delim join that contains an UNNEST
64
+ bool RewriteCandidate(unique_ptr<LogicalOperator> *candidate);
65
+ //! Update the bindings of the RHS sequence of LOGICAL_PROJECTION(s)
66
+ void UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, unique_ptr<LogicalOperator> *candidate,
67
+ UnnestRewriterPlanUpdater &updater);
68
+ //! Update the bindings of the BOUND_UNNEST expression of the LOGICAL_UNNEST
69
+ void UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater, unique_ptr<LogicalOperator> *candidate);
70
+
71
+ //! Store all delim columns of the delim join
72
+ void GetDelimColumns(LogicalOperator &op);
73
+ //! Store all LHS expressions of the LOGICAL_PROJECTION
74
+ void GetLHSExpressions(LogicalOperator &op);
75
+
76
+ //! Keep track of the delim columns to find the correct UNNEST column
77
+ vector<ColumnBinding> delim_columns;
78
+ //! Store the column bindings of the LHS child of the LOGICAL_DELIM_JOIN
79
+ vector<LHSBinding> lhs_bindings;
80
+ //! Stores the table index of the former child of the LOGICAL_UNNEST
81
+ idx_t overwritten_tbl_idx;
82
+ };
83
+
84
+ } // namespace duckdb
@@ -12,8 +12,7 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- //! LogicalAggregate represents an aggregate operation with (optional) GROUP BY
16
- //! operator.
15
+ //! LogicalUnnest represents the logical UNNEST operator.
17
16
  class LogicalUnnest : public LogicalOperator {
18
17
  public:
19
18
  explicit LogicalUnnest(idx_t unnest_index)
@@ -85,10 +85,8 @@ public:
85
85
  }
86
86
 
87
87
  //! Construct a managed buffer.
88
- //! The block_id is just used for internal tracking. It doesn't map to any actual
89
- //! BlockManager.
90
- virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
91
- FileBufferType type = FileBufferType::MANAGED_BUFFER);
88
+ unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
89
+ FileBufferType type = FileBufferType::MANAGED_BUFFER);
92
90
 
93
91
  DUCKDB_API void ReserveMemory(idx_t size);
94
92
  DUCKDB_API void FreeReservedMemory(idx_t size);
@@ -118,6 +118,7 @@ public:
118
118
  }
119
119
  //! Serializes the index and returns the pair of block_id offset positions
120
120
  virtual BlockPointer Serialize(duckdb::MetaBlockWriter &writer);
121
+ BlockPointer GetBlockPointer();
121
122
 
122
123
  //! Returns block/offset of where index was most recently serialized.
123
124
  BlockPointer GetSerializedDataPointer() const {
@@ -59,6 +59,9 @@ private:
59
59
 
60
60
  void Initialize(DatabaseHeader &header);
61
61
 
62
+ void ReadAndChecksum(FileBuffer &handle, uint64_t location) const;
63
+ void ChecksumAndWrite(FileBuffer &handle, uint64_t location) const;
64
+
62
65
  //! Return the blocks to which we will write the free list and modified blocks
63
66
  vector<block_id_t> GetFreeListBlocks();
64
67
 
@@ -12,11 +12,11 @@
12
12
  #include "duckdb/common/types/data_chunk.hpp"
13
13
  #include "duckdb/common/enums/wal_type.hpp"
14
14
  #include "duckdb/common/serializer/buffered_file_writer.hpp"
15
- #include "duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp"
16
- #include "duckdb/storage/storage_info.hpp"
17
-
18
15
  #include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp"
16
+ #include "duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp"
19
17
  #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp"
18
+ #include "duckdb/main/attached_database.hpp"
19
+ #include "duckdb/storage/storage_info.hpp"
20
20
 
21
21
  namespace duckdb {
22
22
 
@@ -38,8 +38,8 @@ class TransactionManager;
38
38
  class ReplayState {
39
39
  public:
40
40
  ReplayState(AttachedDatabase &db, ClientContext &context, Deserializer &source)
41
- : db(db), context(context), catalog(Catalog::GetCatalog(context, INVALID_CATALOG)), source(source),
42
- current_table(nullptr), deserialize_only(false), checkpoint_id(INVALID_BLOCK) {
41
+ : db(db), context(context), catalog(db.GetCatalog()), source(source), current_table(nullptr),
42
+ deserialize_only(false), checkpoint_id(INVALID_BLOCK) {
43
43
  }
44
44
 
45
45
  AttachedDatabase &db;
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/optimizer/common_aggregate_optimizer.hpp"
10
10
  #include "duckdb/optimizer/cse_optimizer.hpp"
11
11
  #include "duckdb/optimizer/deliminator.hpp"
12
+ #include "duckdb/optimizer/unnest_rewriter.hpp"
12
13
  #include "duckdb/optimizer/expression_heuristics.hpp"
13
14
  #include "duckdb/optimizer/filter_pullup.hpp"
14
15
  #include "duckdb/optimizer/filter_pushdown.hpp"
@@ -111,6 +112,13 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
111
112
  plan = deliminator.Optimize(std::move(plan));
112
113
  });
113
114
 
115
+ // rewrites UNNESTs in DelimJoins by moving them to the projection
116
+ RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() {
117
+ UnnestRewriter unnest_rewriter;
118
+ plan = unnest_rewriter.Optimize(std::move(plan));
119
+ });
120
+
121
+ // removes unused columns
114
122
  RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() {
115
123
  RemoveUnusedColumns unused(binder, context, true);
116
124
  unused.VisitOperator(*plan);
@@ -0,0 +1,312 @@
1
+ #include "duckdb/optimizer/unnest_rewriter.hpp"
2
+
3
+ #include "duckdb/common/pair.hpp"
4
+ #include "duckdb/planner/operator/logical_delim_get.hpp"
5
+ #include "duckdb/planner/operator/logical_delim_join.hpp"
6
+ #include "duckdb/planner/operator/logical_unnest.hpp"
7
+ #include "duckdb/planner/operator/logical_projection.hpp"
8
+ #include "duckdb/planner/operator/logical_window.hpp"
9
+ #include "duckdb/planner/expression/bound_unnest_expression.hpp"
10
+ #include "duckdb/planner/expression/bound_columnref_expression.hpp"
11
+
12
+ namespace duckdb {
13
+
14
+ void UnnestRewriterPlanUpdater::VisitOperator(LogicalOperator &op) {
15
+ VisitOperatorChildren(op);
16
+ VisitOperatorExpressions(op);
17
+ }
18
+
19
+ void UnnestRewriterPlanUpdater::VisitExpression(unique_ptr<Expression> *expression) {
20
+
21
+ auto &expr = *expression;
22
+
23
+ if (expr->expression_class == ExpressionClass::BOUND_COLUMN_REF) {
24
+
25
+ auto &bound_column_ref = (BoundColumnRefExpression &)*expr;
26
+ for (idx_t i = 0; i < replace_bindings.size(); i++) {
27
+ if (bound_column_ref.binding == replace_bindings[i].old_binding) {
28
+ bound_column_ref.binding = replace_bindings[i].new_binding;
29
+ }
30
+ // previously pointing to the LOGICAL_DELIM_GET
31
+ if (bound_column_ref.binding.table_index == replace_bindings[i].old_binding.table_index &&
32
+ replace_bindings[i].old_binding.column_index == DConstants::INVALID_INDEX) {
33
+ bound_column_ref.binding = replace_bindings[i].new_binding;
34
+ }
35
+ }
36
+ }
37
+
38
+ VisitExpressionChildren(**expression);
39
+ }
40
+
41
+ unique_ptr<LogicalOperator> UnnestRewriter::Optimize(unique_ptr<LogicalOperator> op) {
42
+
43
+ UnnestRewriterPlanUpdater updater;
44
+ vector<unique_ptr<LogicalOperator> *> candidates;
45
+ FindCandidates(&op, candidates);
46
+
47
+ // rewrite the plan and update the bindings
48
+ for (auto &candidate : candidates) {
49
+
50
+ // rearrange the logical operators
51
+ if (RewriteCandidate(candidate)) {
52
+ // update the bindings of the BOUND_UNNEST expression
53
+ UpdateBoundUnnestBindings(updater, candidate);
54
+ // update the sequence of LOGICAL_PROJECTION(s)
55
+ UpdateRHSBindings(&op, candidate, updater);
56
+ // reset
57
+ delim_columns.clear();
58
+ lhs_bindings.clear();
59
+ }
60
+ }
61
+
62
+ return op;
63
+ }
64
+
65
+ void UnnestRewriter::FindCandidates(unique_ptr<LogicalOperator> *op_ptr,
66
+ vector<unique_ptr<LogicalOperator> *> &candidates) {
67
+ auto op = op_ptr->get();
68
+ // search children before adding, so that we add candidates bottom-up
69
+ for (auto &child : op->children) {
70
+ FindCandidates(&child, candidates);
71
+ }
72
+
73
+ // search for operator that has a LOGICAL_DELIM_JOIN as its child
74
+ if (op->children.size() != 1) {
75
+ return;
76
+ }
77
+ if (op->children[0]->type != LogicalOperatorType::LOGICAL_DELIM_JOIN) {
78
+ return;
79
+ }
80
+
81
+ // found a delim join
82
+ auto &delim_join = (LogicalDelimJoin &)*op->children[0];
83
+ // only support INNER delim joins
84
+ if (delim_join.join_type != JoinType::INNER) {
85
+ return;
86
+ }
87
+ // INNER delim join must have exactly one condition
88
+ if (delim_join.conditions.size() != 1) {
89
+ return;
90
+ }
91
+
92
+ // LHS child is a window
93
+ if (delim_join.children[0]->type != LogicalOperatorType::LOGICAL_WINDOW) {
94
+ return;
95
+ }
96
+
97
+ // RHS child must be projection(s) followed by an UNNEST
98
+ auto curr_op = &delim_join.children[1];
99
+ while (curr_op->get()->type == LogicalOperatorType::LOGICAL_PROJECTION) {
100
+ if (curr_op->get()->children.size() != 1) {
101
+ break;
102
+ }
103
+ curr_op = &curr_op->get()->children[0];
104
+ }
105
+
106
+ if (curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST) {
107
+ candidates.push_back(op_ptr);
108
+ }
109
+ return;
110
+ }
111
+
112
+ bool UnnestRewriter::RewriteCandidate(unique_ptr<LogicalOperator> *candidate) {
113
+
114
+ auto &topmost_op = (LogicalOperator &)**candidate;
115
+ if (topmost_op.type != LogicalOperatorType::LOGICAL_PROJECTION &&
116
+ topmost_op.type != LogicalOperatorType::LOGICAL_WINDOW &&
117
+ topmost_op.type != LogicalOperatorType::LOGICAL_FILTER &&
118
+ topmost_op.type != LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY &&
119
+ topmost_op.type != LogicalOperatorType::LOGICAL_UNNEST) {
120
+ return false;
121
+ }
122
+
123
+ // get the LOGICAL_DELIM_JOIN, which is a child of the candidate
124
+ D_ASSERT(topmost_op.children.size() == 1);
125
+ auto &delim_join = *(topmost_op.children[0]);
126
+ D_ASSERT(delim_join.type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
127
+ GetDelimColumns(delim_join);
128
+
129
+ // LHS of the LOGICAL_DELIM_JOIN is a LOGICAL_WINDOW that contains a LOGICAL_PROJECTION
130
+ // this lhs_proj later becomes the child of the UNNEST
131
+ auto &window = *delim_join.children[0];
132
+ auto &lhs_op = window.children[0];
133
+ GetLHSExpressions(*lhs_op);
134
+
135
+ // find the LOGICAL_UNNEST
136
+ // and get the path down to the LOGICAL_UNNEST
137
+ vector<unique_ptr<LogicalOperator> *> path_to_unnest;
138
+ auto curr_op = &(delim_join.children[1]);
139
+ while (curr_op->get()->type == LogicalOperatorType::LOGICAL_PROJECTION) {
140
+ path_to_unnest.push_back(curr_op);
141
+ curr_op = &curr_op->get()->children[0];
142
+ }
143
+
144
+ // store the table index of the child of the LOGICAL_UNNEST
145
+ // then update the plan by making the lhs_proj the child of the LOGICAL_UNNEST
146
+ D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
147
+ auto &unnest = (LogicalUnnest &)*curr_op->get();
148
+ D_ASSERT(unnest.children[0]->type == LogicalOperatorType::LOGICAL_DELIM_GET);
149
+ overwritten_tbl_idx = ((LogicalDelimGet &)*unnest.children[0]).table_index;
150
+ unnest.children[0] = std::move(lhs_op);
151
+
152
+ // replace the LOGICAL_DELIM_JOIN with its RHS child operator
153
+ topmost_op.children[0] = std::move(*path_to_unnest.front());
154
+ return true;
155
+ }
156
+
157
+ void UnnestRewriter::UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, unique_ptr<LogicalOperator> *candidate,
158
+ UnnestRewriterPlanUpdater &updater) {
159
+
160
+ auto &topmost_op = (LogicalOperator &)**candidate;
161
+ idx_t shift = lhs_bindings.size();
162
+
163
+ vector<unique_ptr<LogicalOperator> *> path_to_unnest;
164
+ auto curr_op = &(topmost_op.children[0]);
165
+ while (curr_op->get()->type == LogicalOperatorType::LOGICAL_PROJECTION) {
166
+
167
+ path_to_unnest.push_back(curr_op);
168
+ D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_PROJECTION);
169
+ auto &proj = (LogicalProjection &)*curr_op->get();
170
+
171
+ // pop the two last expressions from all projections (delim_idx and UNNEST column)
172
+ D_ASSERT(proj.expressions.size() > 2);
173
+ proj.expressions.pop_back();
174
+ proj.expressions.pop_back();
175
+
176
+ // store all shifted current bindings
177
+ idx_t tbl_idx = proj.table_index;
178
+ for (idx_t i = 0; i < proj.expressions.size(); i++) {
179
+ ReplaceBinding replace_binding(ColumnBinding(tbl_idx, i), ColumnBinding(tbl_idx, i + shift));
180
+ updater.replace_bindings.push_back(replace_binding);
181
+ }
182
+
183
+ curr_op = &curr_op->get()->children[0];
184
+ }
185
+
186
+ // update all bindings by shifting them
187
+ updater.VisitOperator(*plan_ptr->get());
188
+ updater.replace_bindings.clear();
189
+
190
+ // update all bindings coming from the LHS to RHS bindings
191
+ D_ASSERT(topmost_op.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
192
+ auto &top_proj = (LogicalProjection &)*topmost_op.children[0];
193
+ for (idx_t i = 0; i < lhs_bindings.size(); i++) {
194
+ ReplaceBinding replace_binding(lhs_bindings[i].binding, ColumnBinding(top_proj.table_index, i));
195
+ updater.replace_bindings.push_back(replace_binding);
196
+ }
197
+
198
+ // temporarily remove the BOUND_UNNEST and the child of the LOGICAL_UNNEST from the plan
199
+ D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
200
+ auto &unnest = (LogicalUnnest &)*curr_op->get();
201
+ auto temp_bound_unnest = std::move(unnest.expressions[0]);
202
+ auto temp_unnest_child = std::move(unnest.children[0]);
203
+ unnest.expressions.clear();
204
+ unnest.children.clear();
205
+ // update the bindings of the plan
206
+ updater.VisitOperator(*plan_ptr->get());
207
+ updater.replace_bindings.clear();
208
+ // add the child again
209
+ unnest.expressions.push_back(std::move(temp_bound_unnest));
210
+ unnest.children.push_back(std::move(temp_unnest_child));
211
+
212
+ // add the LHS expressions to each LOGICAL_PROJECTION
213
+ for (idx_t i = path_to_unnest.size(); i > 0; i--) {
214
+
215
+ D_ASSERT(path_to_unnest[i - 1]->get()->type == LogicalOperatorType::LOGICAL_PROJECTION);
216
+ auto &proj = (LogicalProjection &)*path_to_unnest[i - 1]->get();
217
+
218
+ // temporarily store the existing expressions
219
+ vector<unique_ptr<Expression>> existing_expressions;
220
+ for (idx_t expr_idx = 0; expr_idx < proj.expressions.size(); expr_idx++) {
221
+ existing_expressions.push_back(std::move(proj.expressions[expr_idx]));
222
+ }
223
+
224
+ proj.expressions.clear();
225
+
226
+ // add the new expressions
227
+ for (idx_t expr_idx = 0; expr_idx < lhs_bindings.size(); expr_idx++) {
228
+ auto new_expr = make_unique<BoundColumnRefExpression>(
229
+ lhs_bindings[expr_idx].alias, lhs_bindings[expr_idx].type, lhs_bindings[expr_idx].binding);
230
+ proj.expressions.push_back(std::move(new_expr));
231
+
232
+ // update the table index
233
+ lhs_bindings[expr_idx].binding.table_index = proj.table_index;
234
+ lhs_bindings[expr_idx].binding.column_index = expr_idx;
235
+ }
236
+
237
+ // add the existing expressions again
238
+ for (idx_t expr_idx = 0; expr_idx < existing_expressions.size(); expr_idx++) {
239
+ proj.expressions.push_back(std::move(existing_expressions[expr_idx]));
240
+ }
241
+ }
242
+ }
243
+
244
+ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &updater,
245
+ unique_ptr<LogicalOperator> *candidate) {
246
+
247
+ auto &topmost_op = (LogicalOperator &)**candidate;
248
+
249
+ // traverse LOGICAL_PROJECTION(s)
250
+ auto curr_op = &(topmost_op.children[0]);
251
+ while (curr_op->get()->type == LogicalOperatorType::LOGICAL_PROJECTION) {
252
+ curr_op = &curr_op->get()->children[0];
253
+ }
254
+
255
+ // found the LOGICAL_UNNEST
256
+ D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
257
+ auto &unnest = (LogicalUnnest &)*curr_op->get();
258
+
259
+ auto unnest_child_cols = unnest.children[0]->GetColumnBindings();
260
+ for (idx_t delim_col_idx = 0; delim_col_idx < delim_columns.size(); delim_col_idx++) {
261
+ for (idx_t child_col_idx = 0; child_col_idx < unnest_child_cols.size(); child_col_idx++) {
262
+ if (delim_columns[delim_col_idx].table_index == unnest_child_cols[child_col_idx].table_index) {
263
+ ColumnBinding old_binding(overwritten_tbl_idx, DConstants::INVALID_INDEX);
264
+ updater.replace_bindings.emplace_back(ReplaceBinding(old_binding, delim_columns[delim_col_idx]));
265
+ break;
266
+ }
267
+ }
268
+ }
269
+
270
+ // update bindings
271
+ D_ASSERT(unnest.expressions.size() == 1);
272
+ updater.VisitExpression(&unnest.expressions[0]);
273
+ updater.replace_bindings.clear();
274
+ }
275
+
276
+ void UnnestRewriter::GetDelimColumns(LogicalOperator &op) {
277
+
278
+ D_ASSERT(op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
279
+ auto &delim_join = (LogicalDelimJoin &)op;
280
+ for (idx_t i = 0; i < delim_join.duplicate_eliminated_columns.size(); i++) {
281
+ auto &expr = *delim_join.duplicate_eliminated_columns[i];
282
+ D_ASSERT(expr.type == ExpressionType::BOUND_COLUMN_REF);
283
+ auto &bound_colref_expr = (BoundColumnRefExpression &)expr;
284
+ delim_columns.push_back(bound_colref_expr.binding);
285
+ }
286
+ }
287
+
288
+ void UnnestRewriter::GetLHSExpressions(LogicalOperator &op) {
289
+
290
+ op.ResolveOperatorTypes();
291
+ auto col_bindings = op.GetColumnBindings();
292
+ D_ASSERT(op.types.size() == col_bindings.size());
293
+
294
+ bool set_alias = false;
295
+ // we can easily extract the alias for LOGICAL_PROJECTION(s)
296
+ if (op.type == LogicalOperatorType::LOGICAL_PROJECTION) {
297
+ auto &proj = (LogicalProjection &)op;
298
+ if (proj.expressions.size() == op.types.size()) {
299
+ set_alias = true;
300
+ }
301
+ }
302
+
303
+ for (idx_t i = 0; i < op.types.size(); i++) {
304
+ lhs_bindings.emplace_back(LHSBinding(col_bindings[i], op.types[i]));
305
+ if (set_alias) {
306
+ auto &proj = (LogicalProjection &)op;
307
+ lhs_bindings.back().alias = proj.expressions[i]->alias;
308
+ }
309
+ }
310
+ }
311
+
312
+ } // namespace duckdb
@@ -9,11 +9,11 @@ Block::Block(Allocator &allocator, block_id_t id)
9
9
 
10
10
  Block::Block(Allocator &allocator, block_id_t id, uint32_t internal_size)
11
11
  : FileBuffer(allocator, FileBufferType::BLOCK, internal_size), id(id) {
12
- D_ASSERT((GetMallocedSize() & (Storage::SECTOR_SIZE - 1)) == 0);
12
+ D_ASSERT((AllocSize() & (Storage::SECTOR_SIZE - 1)) == 0);
13
13
  }
14
14
 
15
15
  Block::Block(FileBuffer &source, block_id_t id) : FileBuffer(source, FileBufferType::BLOCK), id(id) {
16
- D_ASSERT((GetMallocedSize() & (Storage::SECTOR_SIZE - 1)) == 0);
16
+ D_ASSERT((AllocSize() & (Storage::SECTOR_SIZE - 1)) == 0);
17
17
  }
18
18
 
19
19
  } // namespace duckdb
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/storage/single_file_block_manager.hpp"
2
2
 
3
3
  #include "duckdb/common/allocator.hpp"
4
+ #include "duckdb/common/checksum.hpp"
4
5
  #include "duckdb/common/exception.hpp"
5
6
  #include "duckdb/common/serializer/buffered_deserializer.hpp"
6
7
  #include "duckdb/common/serializer/buffered_serializer.hpp"
@@ -142,7 +143,7 @@ SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path
142
143
 
143
144
  SerializeHeaderStructure<MainHeader>(main_header, header_buffer.buffer);
144
145
  // now write the header to the file
145
- header_buffer.ChecksumAndWrite(*handle, 0);
146
+ ChecksumAndWrite(header_buffer, 0);
146
147
  header_buffer.Clear();
147
148
 
148
149
  // write the database headers
@@ -155,14 +156,14 @@ SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path
155
156
  h1.free_list = INVALID_BLOCK;
156
157
  h1.block_count = 0;
157
158
  SerializeHeaderStructure<DatabaseHeader>(h1, header_buffer.buffer);
158
- header_buffer.ChecksumAndWrite(*handle, Storage::FILE_HEADER_SIZE);
159
+ ChecksumAndWrite(header_buffer, Storage::FILE_HEADER_SIZE);
159
160
  // header 2
160
161
  h2.iteration = 0;
161
162
  h2.meta_block = INVALID_BLOCK;
162
163
  h2.free_list = INVALID_BLOCK;
163
164
  h2.block_count = 0;
164
165
  SerializeHeaderStructure<DatabaseHeader>(h2, header_buffer.buffer);
165
- header_buffer.ChecksumAndWrite(*handle, Storage::FILE_HEADER_SIZE * 2);
166
+ ChecksumAndWrite(header_buffer, Storage::FILE_HEADER_SIZE * 2);
166
167
  // ensure that writing to disk is completed before returning
167
168
  handle->Sync();
168
169
  // we start with h2 as active_header, this way our initial write will be in h1
@@ -172,14 +173,14 @@ SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path
172
173
  } else {
173
174
  MainHeader::CheckMagicBytes(*handle);
174
175
  // otherwise, we check the metadata of the file
175
- header_buffer.ReadAndChecksum(*handle, 0);
176
+ ReadAndChecksum(header_buffer, 0);
176
177
  DeserializeHeaderStructure<MainHeader>(header_buffer.buffer);
177
178
 
178
179
  // read the database headers from disk
179
180
  DatabaseHeader h1, h2;
180
- header_buffer.ReadAndChecksum(*handle, Storage::FILE_HEADER_SIZE);
181
+ ReadAndChecksum(header_buffer, Storage::FILE_HEADER_SIZE);
181
182
  h1 = DeserializeHeaderStructure<DatabaseHeader>(header_buffer.buffer);
182
- header_buffer.ReadAndChecksum(*handle, Storage::FILE_HEADER_SIZE * 2);
183
+ ReadAndChecksum(header_buffer, Storage::FILE_HEADER_SIZE * 2);
183
184
  h2 = DeserializeHeaderStructure<DatabaseHeader>(header_buffer.buffer);
184
185
  // check the header with the highest iteration count
185
186
  if (h1.iteration > h2.iteration) {
@@ -195,6 +196,27 @@ SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path
195
196
  }
196
197
  }
197
198
 
199
+ void SingleFileBlockManager::ReadAndChecksum(FileBuffer &block, uint64_t location) const {
200
+ // read the buffer from disk
201
+ block.Read(*handle, location);
202
+ // compute the checksum
203
+ auto stored_checksum = Load<uint64_t>(block.InternalBuffer());
204
+ uint64_t computed_checksum = Checksum(block.buffer, block.size);
205
+ // verify the checksum
206
+ if (stored_checksum != computed_checksum) {
207
+ throw IOException("Corrupt database file: computed checksum %llu does not match stored checksum %llu in block",
208
+ computed_checksum, stored_checksum);
209
+ }
210
+ }
211
+
212
+ void SingleFileBlockManager::ChecksumAndWrite(FileBuffer &block, uint64_t location) const {
213
+ // compute the checksum and write it to the start of the buffer (if not temp buffer)
214
+ uint64_t checksum = Checksum(block.buffer, block.size);
215
+ Store<uint64_t>(checksum, block.InternalBuffer());
216
+ // now write the buffer
217
+ block.Write(*handle, location);
218
+ }
219
+
198
220
  void SingleFileBlockManager::Initialize(DatabaseHeader &header) {
199
221
  free_list_id = header.free_list;
200
222
  meta_block = header.meta_block;
@@ -317,12 +339,12 @@ unique_ptr<Block> SingleFileBlockManager::CreateBlock(block_id_t block_id, FileB
317
339
  void SingleFileBlockManager::Read(Block &block) {
318
340
  D_ASSERT(block.id >= 0);
319
341
  D_ASSERT(std::find(free_list.begin(), free_list.end(), block.id) == free_list.end());
320
- block.ReadAndChecksum(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE);
342
+ ReadAndChecksum(block, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE);
321
343
  }
322
344
 
323
345
  void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) {
324
346
  D_ASSERT(block_id >= 0);
325
- buffer.ChecksumAndWrite(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE);
347
+ ChecksumAndWrite(buffer, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE);
326
348
  }
327
349
 
328
350
  vector<block_id_t> SingleFileBlockManager::GetFreeListBlocks() {
@@ -431,8 +453,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
431
453
  Store<DatabaseHeader>(header, header_buffer.buffer);
432
454
  // now write the header to the file, active_header determines whether we write to h1 or h2
433
455
  // note that if active_header is h1 we write to h2, and vice versa
434
- header_buffer.ChecksumAndWrite(*handle,
435
- active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2);
456
+ ChecksumAndWrite(header_buffer, active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2);
436
457
  // switch active header to the other header
437
458
  active_header = 1 - active_header;
438
459
  //! Ensure the header write ends up on disk
@@ -4,6 +4,8 @@
4
4
 
5
5
  #include "src/optimizer/deliminator.cpp"
6
6
 
7
+ #include "src/optimizer/unnest_rewriter.cpp"
8
+
7
9
  #include "src/optimizer/column_lifetime_analyzer.cpp"
8
10
 
9
11
  #include "src/optimizer/expression_heuristics.cpp"