npm - duckdb - Versions diffs - 0.7.2-dev3402.0 → 0.7.2-dev3515.0 - Mend

duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/src/duckdb/src/include/duckdb/common/vector.hpp CHANGED Viewed

@@ -12,6 +12,7 @@
 #include "duckdb/common/typedefs.hpp"
 #include "duckdb/common/likely.hpp"
 #include "duckdb/common/exception.hpp"
+#include "duckdb/common/memory_safety.hpp"
 #include <vector>
 namespace duckdb {
@@ -25,6 +26,7 @@ public:
 	using const_reference = typename original::const_reference;
 	using reference = typename original::reference;
+private:
 	static inline void AssertIndexInBounds(idx_t index, idx_t size) {
 #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
 		return;
@@ -35,6 +37,7 @@ public:
 #endif
 	}
+public:
 #ifdef DUCKDB_CLANG_TIDY
 	// This is necessary to tell clang-tidy that it reinitializes the variable after a move
 	[[clang::reinitializes]]
@@ -55,7 +58,7 @@ public:
 	template <bool _SAFE = false>
 	inline typename original::reference get(typename original::size_type __n) {
-		if (_SAFE) {
+		if (MemorySafety<_SAFE>::enabled) {
 			AssertIndexInBounds(__n, original::size());
 		}
 		return original::operator[](__n);
@@ -63,7 +66,7 @@ public:
 	template <bool _SAFE = false>
 	inline typename original::const_reference get(typename original::size_type __n) const {
-		if (_SAFE) {
+		if (MemorySafety<_SAFE>::enabled) {
 			AssertIndexInBounds(__n, original::size());
 		}
 		return original::operator[](__n);

package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp CHANGED Viewed

@@ -73,7 +73,7 @@ struct AggregateHTAppendState {
 	SelectionVector empty_vector;
 	SelectionVector new_groups;
 	Vector addresses;
-	unique_ptr<UnifiedVectorFormat[]> group_data;
+	unsafe_array_ptr<UnifiedVectorFormat> group_data;
 	DataChunk group_chunk;
 	TupleDataChunkState chunk_state;

package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp CHANGED Viewed

@@ -65,12 +65,12 @@ public:
 	//! returned by the JoinHashTable::Scan function and can be used to resume a
 	//! probe.
 	struct ScanStructure {
-		unique_ptr<UnifiedVectorFormat[]> key_data;
+		unsafe_array_ptr<UnifiedVectorFormat> key_data;
 		Vector pointers;
 		idx_t count;
 		SelectionVector sel_vector;
 		// whether or not the given tuple has found a match
-		unique_ptr<bool[]> found_match;
+		unsafe_array_ptr<bool> found_match;
 		JoinHashTable &ht;
 		bool finished;
@@ -212,8 +212,8 @@ private:
 	//! Insert the given set of locations into the HT with the given set of hashes
 	void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
-	idx_t PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data, const SelectionVector *&current_sel,
-	                  SelectionVector &sel, bool build_side);
+	idx_t PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
+	                  const SelectionVector *&current_sel, SelectionVector &sel, bool build_side);
 	//! Lock for combining data_collection when merging HTs
 	mutex data_lock;

package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp CHANGED Viewed

@@ -67,7 +67,7 @@ public:
 private:
 	bool enabled;
-	unique_ptr<bool[]> found_match;
+	unsafe_array_ptr<bool> found_match;
 	idx_t count;
 };

package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp CHANGED Viewed

@@ -68,7 +68,7 @@ private:
 	//! Build and probe statistics
 	PerfectHashJoinStats perfect_join_statistics;
 	//! Stores the occurences of each value in the build side
-	unique_ptr<bool[]> bitmap_build_idx;
+	unsafe_array_ptr<bool> bitmap_build_idx;
 	//! Stores the number of unique keys in the build side
 	idx_t unique_keys = 0;
 };

package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp CHANGED Viewed

@@ -83,7 +83,7 @@ public:
 		//! The total number of rows in the RHS
 		atomic<idx_t> count;
 		//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
-		unique_ptr<bool[]> found_match;
+		unsafe_array_ptr<bool> found_match;
 		//! Memory usage per thread
 		idx_t memory_per_thread;
 	};

package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp CHANGED Viewed

@@ -61,8 +61,6 @@ public:
 	DataChunk parse_chunk;
-	std::queue<unique_ptr<DataChunk>> cached_chunks;
 	ParserMode mode;
 public:

package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp CHANGED Viewed

@@ -60,12 +60,12 @@ public:
 	virtual ~BufferedCSVReader() {
 	}
-	unique_ptr<char[]> buffer;
+	unsafe_array_ptr<char> buffer;
 	idx_t buffer_size;
 	idx_t position;
 	idx_t start = 0;
-	vector<unique_ptr<char[]>> cached_buffers;
+	vector<unsafe_array_ptr<char>> cached_buffers;
 	unique_ptr<CSVFileHandle> file_handle;

package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp CHANGED Viewed

@@ -11,152 +11,52 @@
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/mutex.hpp"
 #include "duckdb/common/helper.hpp"
+#include "duckdb/common/allocator.hpp"
 namespace duckdb {
+class Allocator;
+class FileSystem;
 struct CSVFileHandle {
 public:
-	explicit CSVFileHandle(unique_ptr<FileHandle> file_handle_p, bool enable_reset = true)
-	    : file_handle(std::move(file_handle_p)), reset_enabled(enable_reset) {
-		can_seek = file_handle->CanSeek();
-		plain_file_source = file_handle->OnDiskFile() && can_seek;
-		file_size = file_handle->GetFileSize();
-	}
+	CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
+	              FileCompressionType compression, bool enable_reset = true);
-	bool CanSeek() {
-		return can_seek;
-	}
-	void Seek(idx_t position) {
-		if (!can_seek) {
-			throw InternalException("Cannot seek in this file");
-		}
-		file_handle->Seek(position);
-	}
-	idx_t SeekPosition() {
-		if (!can_seek) {
-			throw InternalException("Cannot seek in this file");
-		}
-		return file_handle->SeekPosition();
-	}
-	void Reset() {
-		if (plain_file_source) {
-			file_handle->Reset();
-		} else {
-			if (!reset_enabled) {
-				throw InternalException("Reset called but reset is not enabled for this CSV Handle");
-			}
-			read_position = 0;
-		}
-	}
-	bool PlainFileSource() {
-		return plain_file_source;
-	}
-	bool OnDiskFile() {
-		return file_handle->OnDiskFile();
-	}
-	idx_t FileSize() {
-		return file_size;
-	}
+	mutex main_mutex;
-	bool FinishedReading() {
-		return requested_bytes >= file_size;
-	}
+public:
+	bool CanSeek();
+	void Seek(idx_t position);
+	idx_t SeekPosition();
+	void Reset();
+	bool OnDiskFile();
-	idx_t Read(void *buffer, idx_t nr_bytes) {
-		requested_bytes += nr_bytes;
-		if (!plain_file_source) {
-			// not a plain file source: we need to do some bookkeeping around the reset functionality
-			idx_t result_offset = 0;
-			if (read_position < buffer_size) {
-				// we need to read from our cached buffer
-				auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
-				memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
-				result_offset += buffer_read_count;
-				read_position += buffer_read_count;
-				if (result_offset == nr_bytes) {
-					return nr_bytes;
-				}
-			} else if (!reset_enabled && cached_buffer) {
-				// reset is disabled, but we still have cached data
-				// we can remove any cached data
-				cached_buffer.reset();
-				buffer_size = 0;
-				buffer_capacity = 0;
-				read_position = 0;
-			}
-			// we have data left to read from the file
-			// read directly into the buffer
-			auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
-			file_size = file_handle->GetFileSize();
-			read_position += bytes_read;
-			if (reset_enabled) {
-				// if reset caching is enabled, we need to cache the bytes that we have read
-				if (buffer_size + bytes_read >= buffer_capacity) {
-					// no space; first enlarge the buffer
-					buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
+	idx_t FileSize();
-					auto new_buffer = unique_ptr<data_t[]>(new data_t[buffer_capacity]);
-					if (buffer_size > 0) {
-						memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
-					}
-					cached_buffer = std::move(new_buffer);
-				}
-				memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
-				buffer_size += bytes_read;
-			}
+	bool FinishedReading();
-			return result_offset + bytes_read;
-		} else {
-			return file_handle->Read(buffer, nr_bytes);
-		}
-	}
+	idx_t Read(void *buffer, idx_t nr_bytes);
-	string ReadLine() {
-		bool carriage_return = false;
-		string result;
-		char buffer[1];
-		while (true) {
-			idx_t bytes_read = Read(buffer, 1);
-			if (bytes_read == 0) {
-				return result;
-			}
-			if (carriage_return) {
-				if (buffer[0] != '\n') {
-					if (!file_handle->CanSeek()) {
-						throw BinderException(
-						    "Carriage return newlines not supported when reading CSV files in which we cannot seek");
-					}
-					file_handle->Seek(file_handle->SeekPosition() - 1);
-					return result;
-				}
-			}
-			if (buffer[0] == '\n') {
-				return result;
-			}
-			if (buffer[0] != '\r') {
-				result += buffer[0];
-			} else {
-				carriage_return = true;
-			}
-		}
-	}
+	string ReadLine();
+	void DisableReset();
-	void DisableReset() {
-		this->reset_enabled = false;
-	}
-	mutex main_mutex;
-	idx_t count = 0;
+	static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
+	                                             FileCompressionType compression);
+	static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
+	                                          FileCompressionType compression, bool enable_reset);
 private:
+	FileSystem &fs;
+	Allocator &allocator;
 	unique_ptr<FileHandle> file_handle;
+	string path;
+	FileCompressionType compression;
 	bool reset_enabled = true;
 	bool can_seek = false;
-	bool plain_file_source = false;
+	bool on_disk_file = false;
 	idx_t file_size = 0;
 	// reset support
-	unique_ptr<data_t[]> cached_buffer;
+	AllocatedData cached_buffer;
 	idx_t read_position = 0;
 	idx_t buffer_size = 0;
 	idx_t buffer_capacity = 0;

package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp CHANGED Viewed

@@ -67,7 +67,7 @@ struct CSVBufferRead {
 		} else {
 			// 3) It starts in the current buffer and ends in the next buffer
 			D_ASSERT(next_buffer);
-			auto intersection = unique_ptr<char[]>(new char[length]);
+			auto intersection = make_unsafe_array<char>(length);
 			idx_t cur_pos = 0;
 			auto buffer_ptr = buffer->Ptr();
 			for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
@@ -85,7 +85,7 @@ struct CSVBufferRead {
 	shared_ptr<CSVBuffer> buffer;
 	shared_ptr<CSVBuffer> next_buffer;
-	vector<unique_ptr<char[]>> intersections;
+	vector<unsafe_array_ptr<char>> intersections;
 	optional_ptr<LineInfo> line_info;
 	idx_t buffer_start;

package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp CHANGED Viewed

@@ -46,9 +46,9 @@ protected:
 	// The actual pointer to the data
 	data_ptr_t data;
 	//! The owned data of the HT
-	unique_ptr<data_t[]> owned_data;
+	unsafe_array_ptr<data_t> owned_data;
 	//! Information on whether or not a specific group has any entries
-	unique_ptr<bool[]> group_is_set;
+	unsafe_array_ptr<bool> group_is_set;
 	//! The minimum values for each of the group columns
 	vector<Value> group_minima;

package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp CHANGED Viewed

@@ -113,7 +113,7 @@ private:
 	Vector statev;
 	//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
-	unique_ptr<data_t[]> levels_flat_native;
+	unsafe_array_ptr<data_t> levels_flat_native;
 	//! For each level, the starting location in the levels_flat_native array
 	vector<idx_t> levels_flat_start;

package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp CHANGED Viewed

@@ -55,7 +55,7 @@ struct WriteCSVData : public BaseCSVData {
 	//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
 	idx_t flush_size = 4096 * 8;
 	//! For each byte whether or not the CSV file requires quotes when containing the byte
-	unique_ptr<bool[]> requires_quotes;
+	unsafe_array_ptr<bool> requires_quotes;
 };
 struct ColumnInfo {
@@ -65,7 +65,7 @@ struct ColumnInfo {
 		names = std::move(names_p);
 		types = std::move(types_p);
 	}
-	void Serialize(FieldWriter &writer) {
+	void Serialize(FieldWriter &writer) const {
 		writer.WriteList<string>(names);
 		writer.WriteRegularSerializableList<LogicalType>(types);
 	}
@@ -99,8 +99,6 @@ struct ReadCSVData : public BaseCSVData {
 	bool single_threaded = false;
 	//! Reader bind data
 	MultiFileReaderBindData reader_bind;
-	//! If any file is a pipe
-	bool is_pipe = false;
 	vector<ColumnInfo> column_info;
 	void Initialize(unique_ptr<BufferedCSVReader> &reader) {

package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp CHANGED Viewed

@@ -27,12 +27,12 @@ struct SingleJoinRelation {
 //! Set of relations, used in the join graph.
 struct JoinRelationSet {
-	JoinRelationSet(unique_ptr<idx_t[]> relations, idx_t count) : relations(std::move(relations)), count(count) {
+	JoinRelationSet(unsafe_array_ptr<idx_t> relations, idx_t count) : relations(std::move(relations)), count(count) {
 	}
 	string ToString() const;
-	unique_ptr<idx_t[]> relations;
+	unsafe_array_ptr<idx_t> relations;
 	idx_t count;
 	static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub);
@@ -55,7 +55,7 @@ public:
 	//! Create or get a JoinRelationSet from a set of relation bindings
 	JoinRelationSet &GetJoinRelation(unordered_set<idx_t> &bindings);
 	//! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations
-	JoinRelationSet &GetJoinRelation(unique_ptr<idx_t[]> relations, idx_t count);
+	JoinRelationSet &GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count);
 	//! Union two sets of relations together and create a new relation set
 	JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right);
 	// //! Create the set difference of left \ right (i.e. all elements in left that are not in right)

package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp CHANGED Viewed

@@ -24,6 +24,8 @@ enum class OnConflictAction : uint8_t {
 	REPLACE // Only used in transform/bind step, changed to UPDATE later
 };
+enum class InsertColumnOrder : uint8_t { INSERT_BY_POSITION = 0, INSERT_BY_NAME = 1 };
 class OnConflictInfo {
 public:
 	OnConflictInfo();
@@ -75,6 +77,9 @@ public:
 	//! Whether or not this a DEFAULT VALUES
 	bool default_values = false;
+	//! INSERT BY POSITION or INSERT BY NAME
+	InsertColumnOrder column_order = InsertColumnOrder::INSERT_BY_POSITION;
 protected:
 	InsertStatement(const InsertStatement &other);

package/src/duckdb/src/include/duckdb/planner/binder.hpp CHANGED Viewed

@@ -39,6 +39,7 @@ class LogicalProjection;
 class ColumnList;
 class ExternalDependency;
 class TableFunction;
+class TableStorageInfo;
 struct CreateInfo;
 struct BoundCreateTableInfo;
@@ -167,8 +168,8 @@ public:
 	unique_ptr<LogicalOperator> BindUpdateSet(LogicalOperator &op, unique_ptr<LogicalOperator> root,
 	                                          UpdateSetInfo &set_info, TableCatalogEntry &table,
 	                                          vector<PhysicalIndex> &columns);
-	void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
-	                                TableCatalogEntry &table);
+	void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
+	                                TableCatalogEntry &table, TableStorageInfo &storage_info);
 	void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt);
 	static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema);

package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp CHANGED Viewed

@@ -20,7 +20,7 @@ struct ArenaChunk {
 	AllocatedData data;
 	idx_t current_position;
 	idx_t maximum_size;
-	unique_ptr<ArenaChunk> next;
+	unsafe_unique_ptr<ArenaChunk> next;
 	ArenaChunk *prev;
 };
@@ -56,7 +56,7 @@ private:
 	//! Internal allocator that is used by the arena allocator
 	Allocator &allocator;
 	idx_t current_capacity;
-	unique_ptr<ArenaChunk> head;
+	unsafe_unique_ptr<ArenaChunk> head;
 	ArenaChunk *tail;
 	//! An allocator wrapper using this arena allocator
 	Allocator arena_allocator;

package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #pragma once
 #include "duckdb/storage/storage_info.hpp"
+#include "duckdb/common/file_buffer.hpp"
 namespace duckdb {
 class BlockHandle;
@@ -30,9 +31,15 @@ public:
 	//! Returns whether or not the BufferHandle is valid.
 	DUCKDB_API bool IsValid() const;
 	//! Returns a pointer to the buffer data. Handle must be valid.
-	DUCKDB_API data_ptr_t Ptr() const;
+	inline data_ptr_t Ptr() const {
+		D_ASSERT(IsValid());
+		return node->buffer;
+	}
 	//! Returns a pointer to the buffer data. Handle must be valid.
-	DUCKDB_API data_ptr_t Ptr();
+	inline data_ptr_t Ptr() {
+		D_ASSERT(IsValid());
+		return node->buffer;
+	}
 	//! Gets the underlying file buffer. Handle must be valid.
 	DUCKDB_API FileBuffer &GetFileBuffer();
 	//! Destroys the buffer handle

package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp CHANGED Viewed

@@ -138,7 +138,7 @@ private:
 		StringStatsData string_data;
 	} stats_union;
 	//! Child stats (for LIST and STRUCT)
-	unique_ptr<BaseStatistics[]> child_stats;
+	unsafe_array_ptr<BaseStatistics> child_stats;
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp CHANGED Viewed

@@ -44,7 +44,7 @@ struct RowGroupAppendState {
 	//! The current row_group we are appending to
 	RowGroup *row_group;
 	//! The column append states
-	unique_ptr<ColumnAppendState[]> states;
+	unsafe_array_ptr<ColumnAppendState> states;
 	//! Offset within the row_group
 	idx_t offset_in_row_group;
 };

package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp CHANGED Viewed

@@ -99,7 +99,7 @@ public:
 	//! The maximum row within the row group
 	idx_t max_row_group_row;
 	//! Child column scans
-	unique_ptr<ColumnScanState[]> column_scans;
+	unsafe_array_ptr<ColumnScanState> column_scans;
 	//! Row group segment tree
 	RowGroupSegmentTree *row_groups;
 	//! The total maximum row index

package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp CHANGED Viewed

@@ -96,8 +96,8 @@ private:
 struct UpdateNodeData {
 	unique_ptr<UpdateInfo> info;
-	unique_ptr<sel_t[]> tuples;
-	unique_ptr<data_t[]> tuple_data;
+	unsafe_array_ptr<sel_t> tuples;
+	unsafe_array_ptr<data_t> tuple_data;
 };
 struct UpdateNode {

package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp CHANGED Viewed

@@ -8,7 +8,9 @@
 #pragma once
+#include "duckdb/storage/storage_info.hpp"
 #include "duckdb/common/types/value.hpp"
+#include "duckdb/common/unordered_set.hpp"
 namespace duckdb {

package/src/duckdb/src/main/client_context.cpp CHANGED Viewed

@@ -1013,6 +1013,7 @@ void ClientContext::TryBindRelation(Relation &relation, vector<ColumnDefinition>
 	D_ASSERT(!relation.GetAlias().empty());
 	D_ASSERT(!relation.ToString().empty());
 #endif
+	client_data->http_state = make_uniq<HTTPState>();
 	RunFunctionInTransaction([&]() {
 		// bind the expressions
 		auto binder = Binder::CreateBinder(*this);

package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp CHANGED Viewed

@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
 	D_ASSERT(node.set.count == 1);
 	auto relation_id = node.set.relations[0];
-	double lowest_card_found = NumericLimits<double>::Maximum();
+	double lowest_card_found = node.GetBaseTableCardinality();
 	for (auto &column : relation_attributes[relation_id].columns) {
 		auto card_after_filters = node.GetBaseTableCardinality();
 		ColumnBinding key = ColumnBinding(relation_id, column);

package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp CHANGED Viewed

@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
 			}
 		}
 	}
+	if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
+		auto &join = op->Cast<LogicalAnyJoin>();
+		if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
+			auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
+			auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
+			if (rhs_cardinality > lhs_cardinality * 2) {
+				join.join_type = JoinType::RIGHT;
+				std::swap(join.children[0], join.children[1]);
+			}
+		}
+	}
 	if (non_reorderable_operation) {
 		// we encountered a non-reordable operation (setop or non-inner join)
 		// we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
 			// we have to add a cross product; we add it between the two smallest relations
 			optional_ptr<JoinNode> smallest_plans[2];
 			idx_t smallest_index[2];
-			for (idx_t i = 0; i < join_relations.size(); i++) {
+			D_ASSERT(join_relations.size() >= 2);
+			// first just add the first two join relations. It doesn't matter the cost as the JOO
+			// will swap them on estimated cardinality anyway.
+			for (idx_t i = 0; i < 2; i++) {
+				auto current_plan = plans[&join_relations[i].get()].get();
+				smallest_plans[i] = current_plan;
+				smallest_index[i] = i;
+			}
+			// if there are any other join relations that don't have connections
+			// add them if they have lower estimated cardinality.
+			for (idx_t i = 2; i < join_relations.size(); i++) {
 				// get the plan for this relation
 				auto current_plan = plans[&join_relations[i].get()].get();
 				// check if the cardinality is smaller than the smallest two found so far

package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp CHANGED Viewed

@@ -35,7 +35,7 @@ bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) {
 	return false;
 }
-JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> relations, idx_t count) {
+JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count) {
 	// now look it up in the tree
 	reference<JoinRelationTreeNode> info(root);
 	for (idx_t i = 0; i < count; i++) {
@@ -59,7 +59,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> rel
 //! Create or get a JoinRelationSet from a single node with the given index
 JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
 	// create a sorted vector of the relations
-	auto relations = unique_ptr<idx_t[]>(new idx_t[1]);
+	auto relations = make_unsafe_array<idx_t>(1);
 	relations[0] = index;
 	idx_t count = 1;
 	return GetJoinRelation(std::move(relations), count);
@@ -67,7 +67,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
 JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
 	// create a sorted vector of the relations
-	unique_ptr<idx_t[]> relations = bindings.empty() ? nullptr : unique_ptr<idx_t[]>(new idx_t[bindings.size()]);
+	unsafe_array_ptr<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_array<idx_t>(bindings.size());
 	idx_t count = 0;
 	for (auto &entry : bindings) {
 		relations[count++] = entry;
@@ -77,7 +77,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &b
 }
 JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelationSet &right) {
-	auto relations = unique_ptr<idx_t[]>(new idx_t[left.count + right.count]);
+	auto relations = make_unsafe_array<idx_t>(left.count + right.count);
 	idx_t count = 0;
 	// move through the left and right relations, eliminating duplicates
 	idx_t i = 0, j = 0;
@@ -113,7 +113,7 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
 }
 // JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) {
-// 	auto relations = unique_ptr<idx_t[]>(new idx_t[left->count]);
+// 	auto relations = unsafe_array_ptr<idx_t>(new idx_t[left->count]);
 // 	idx_t count = 0;
 // 	// move through the left and right relations
 // 	idx_t i = 0, j = 0;