npm - duckdb - Versions diffs - 0.7.2-dev1901.0 → 0.7.2-dev2233.0 - Mend

duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp CHANGED Viewed

@@ -25,8 +25,9 @@
 namespace duckdb {
 ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
-                                     unique_ptr<CSVBufferRead> buffer_p, const vector<LogicalType> &requested_types)
-    : BaseCSVReader(context, std::move(options_p), requested_types) {
+                                     unique_ptr<CSVBufferRead> buffer_p, idx_t first_pos_first_buffer_p,
+                                     const vector<LogicalType> &requested_types)
+    : BaseCSVReader(context, std::move(options_p), requested_types), first_pos_first_buffer(first_pos_first_buffer_p) {
 	Initialize(requested_types);
 	SetBufferRead(std::move(buffer_p));
 	if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
@@ -52,9 +53,34 @@ bool ParallelCSVReader::NewLineDelimiter(bool carry, bool carry_followed_by_nl,
 	return (carry && carry_followed_by_nl) || (!carry && first_char);
 }
+void ParallelCSVReader::SkipEmptyLines() {
+	idx_t new_pos_buffer = position_buffer;
+	if (parse_chunk.data.size() == 1) {
+		// Empty lines are null data.
+		return;
+	}
+	for (; new_pos_buffer < end_buffer; new_pos_buffer++) {
+		if (StringUtil::CharacterIsNewline((*buffer)[new_pos_buffer])) {
+			bool carrier_return = (*buffer)[new_pos_buffer] == '\r';
+			new_pos_buffer++;
+			if (carrier_return && new_pos_buffer < buffer_size && (*buffer)[new_pos_buffer] == '\n') {
+				position_buffer++;
+			}
+			if (new_pos_buffer > end_buffer) {
+				return;
+			}
+			position_buffer = new_pos_buffer;
+		} else if ((*buffer)[new_pos_buffer] != ' ') {
+			return;
+		}
+	}
+}
 bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
 	if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
-	    start_buffer == buffer->buffer->GetStart()) {
+	    start_buffer == first_pos_first_buffer) {
+		start_buffer = buffer->buffer->GetStart();
+		position_buffer = start_buffer;
 		verification_positions.beginning_of_first_line = position_buffer;
 		verification_positions.end_of_last_line = position_buffer;
 		// First buffer doesn't need any setting
@@ -70,11 +96,23 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
 					if (position_buffer > end_buffer) {
 						return false;
 					}
+					SkipEmptyLines();
+					if (verification_positions.beginning_of_first_line == 0) {
+						verification_positions.beginning_of_first_line = position_buffer;
+					}
+					verification_positions.end_of_last_line = position_buffer;
 					return true;
 				}
 			}
 			return false;
 		}
+		SkipEmptyLines();
+		if (verification_positions.beginning_of_first_line == 0) {
+			verification_positions.beginning_of_first_line = position_buffer;
+		}
+		verification_positions.end_of_last_line = position_buffer;
 		return true;
 	}
@@ -102,6 +140,11 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
 				}
 			}
 		}
+		SkipEmptyLines();
+		if (position_buffer > buffer_size) {
+			break;
+		}
 		if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
 			break;
@@ -113,18 +156,20 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
 		}
 		idx_t position_set = position_buffer;
 		start_buffer = position_buffer;
 		// We check if we can add this line
 		// disable the projection pushdown while reading the first line
 		// otherwise the first line parsing can be influenced by which columns we are reading
 		auto column_ids = std::move(reader_data.column_ids);
 		auto column_mapping = std::move(reader_data.column_mapping);
 		InitializeProjection();
-		successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
+		try {
+			successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
+		} catch (...) {
+			successfully_read_first_line = false;
+		}
 		// restore the projection pushdown
 		reader_data.column_ids = std::move(column_ids);
 		reader_data.column_mapping = std::move(column_mapping);
 		end_buffer = end_buffer_real;
 		start_buffer = position_set;
 		if (position_buffer >= end_buffer) {
@@ -190,27 +235,55 @@ bool ParallelCSVReader::BufferRemainder() {
 	return true;
 }
+void VerifyLineLength(idx_t line_size, idx_t max_line_size) {
+	if (line_size > max_line_size) {
+		// FIXME: this should also output the correct estimated linenumber where it broke
+		throw InvalidInputException("Maximum line size of %llu bytes exceeded!", max_line_size);
+	}
+}
+bool AllNewLine(string_t value, idx_t column_amount) {
+	auto value_str = value.GetString();
+	if (value_str.empty() && column_amount == 1) {
+		// This is a one column (empty)
+		return false;
+	}
+	for (idx_t i = 0; i < value.GetSize(); i++) {
+		if (!StringUtil::CharacterIsNewline(value_str[i])) {
+			return false;
+		}
+	}
+	return true;
+}
 bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
 	// used for parsing algorithm
+	if (start_buffer == buffer_size) {
+		// Nothing to read
+		finished = true;
+		return true;
+	}
 	D_ASSERT(end_buffer <= buffer_size);
 	bool finished_chunk = false;
 	idx_t column = 0;
 	idx_t offset = 0;
 	bool has_quotes = false;
 	vector<idx_t> escape_positions;
 	if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
 		// First time reading this buffer piece
 		if (!SetPosition(insert_chunk)) {
-			// This means the buffer size does not contain a new line
-			if (position_buffer - start_buffer == options.buffer_size) {
-				error_message = "Line does not fit in one buffer. Increase the buffer size.";
-				return false;
-			}
 			finished = true;
 			return true;
 		}
 	}
+	if (position_buffer == buffer_size) {
+		// Nothing to read
+		finished = true;
+		return true;
+	}
+	// Keep track of line size
+	idx_t line_start = position_buffer;
 	// start parsing the first value
 	goto value_start;
@@ -242,11 +315,16 @@ normal : {
 		if (c == options.delimiter[0]) {
 			// delimiter: end the value and add it to the chunk
 			goto add_value;
+		} else if (c == options.quote[0] && try_add_line) {
+			return false;
 		} else if (StringUtil::CharacterIsNewline(c)) {
 			// newline: add row
-			if (column > 0 || try_add_line || insert_chunk.data.size() == 1) {
+			if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
 				goto add_row;
 			}
+			if (column == 0 && position_buffer == start_buffer) {
+				start_buffer++;
+			}
 		}
 	}
 	if (!BufferRemainder()) {
@@ -285,12 +363,15 @@ add_row : {
 		parse_chunk.Reset();
 		return success;
 	} else {
+		VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
+		line_start = position_buffer;
 		finished_chunk = AddRow(insert_chunk, column, error_message);
 	}
 	// increase position by 1 and move start to the new position
 	offset = 0;
 	has_quotes = false;
-	start_buffer = ++position_buffer;
+	position_buffer++;
+	start_buffer = position_buffer;
 	verification_positions.end_of_last_line = position_buffer;
 	if (reached_remainder_state) {
 		goto final_state;
@@ -309,7 +390,10 @@ add_row : {
 			// newline after carriage return: skip
 			// increase position by 1 and move start to the new position
 			start_buffer = ++position_buffer;
+			SkipEmptyLines();
 			verification_positions.end_of_last_line = position_buffer;
+			start_buffer = position_buffer;
 			if (reached_remainder_state) {
 				goto final_state;
 			}
@@ -331,6 +415,9 @@ add_row : {
 			error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
 			return false;
 		}
+		SkipEmptyLines();
+		verification_positions.end_of_last_line = position_buffer;
+		start_buffer = position_buffer;
 		// \n newline, move to value start
 		if (finished_chunk) {
 			goto final_state;
@@ -391,7 +478,7 @@ unquote : {
 	} else if (StringUtil::CharacterIsNewline(c)) {
 		offset = 1;
 		// FIXME: should this be an assertion?
-		D_ASSERT(column == parse_chunk.ColumnCount() - 1);
+		D_ASSERT(try_add_line || (!try_add_line && column == parse_chunk.ColumnCount() - 1));
 		goto add_row;
 	} else if (position_buffer >= end_buffer) {
 		// reached end of buffer
@@ -448,22 +535,27 @@ final_state : {
 	}
 	// If this is the last buffer, we have to read the last value
 	if (buffer->buffer->IsCSVFileLastBuffer() || (buffer->next_buffer && buffer->next_buffer->IsCSVFileLastBuffer())) {
-		if (column > 0 || try_add_line || (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
+		if (column > 0 || start_buffer != position_buffer || try_add_line ||
+		    (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
 			// remaining values to be added to the chunk
 			auto str_value = buffer->GetValue(start_buffer, position_buffer, offset);
-			AddValue(str_value, column, escape_positions, has_quotes);
-			if (try_add_line) {
-				bool success = column == return_types.size();
-				if (success) {
+			if (!AllNewLine(str_value, insert_chunk.data.size()) || offset == 0) {
+				AddValue(str_value, column, escape_positions, has_quotes);
+				if (try_add_line) {
+					bool success = column == return_types.size();
+					if (success) {
+						AddRow(insert_chunk, column, error_message);
+						success = Flush(insert_chunk);
+					}
+					parse_chunk.Reset();
+					reached_remainder_state = false;
+					return success;
+				} else {
+					VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
+					line_start = position_buffer;
 					AddRow(insert_chunk, column, error_message);
-					success = Flush(insert_chunk);
+					verification_positions.end_of_last_line = position_buffer;
 				}
-				parse_chunk.Reset();
-				reached_remainder_state = false;
-				return success;
-			} else {
-				AddRow(insert_chunk, column, error_message);
-				verification_positions.end_of_last_line = position_buffer;
 			}
 		}
 	}
@@ -471,11 +563,14 @@ final_state : {
 	if (mode == ParserMode::PARSING) {
 		Flush(insert_chunk);
 	}
-	if (position_buffer != verification_positions.end_of_last_line &&
-	    !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
+	if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
 		error_message = "Line does not fit in one buffer. Increase the buffer size.";
 		return false;
 	}
+	end_buffer = buffer_size;
+	SkipEmptyLines();
+	end_buffer = buffer->buffer_end;
+	verification_positions.end_of_last_line = position_buffer;
 	if (position_buffer >= end_buffer) {
 		if (position_buffer >= end_buffer) {
 			if (position_buffer == end_buffer && StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1]) &&

package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp CHANGED Viewed

@@ -3,6 +3,8 @@
 #include "duckdb/common/hive_partitioning.hpp"
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/file_opener.hpp"
+#include "duckdb/common/types/uuid.hpp"
+#include "duckdb/common/string_util.hpp"
 #include <algorithm>
@@ -40,6 +42,7 @@ public:
 //===--------------------------------------------------------------------===//
 // Sink
 //===--------------------------------------------------------------------===//
 void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
 	auto &fs = FileSystem::GetFileSystem(context);
 	auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
@@ -111,10 +114,10 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gst
 		for (idx_t i = 0; i < partitions.size(); i++) {
 			string hive_path =
 			    CreateDirRecursive(partition_columns, names, partition_key_map[i]->values, trimmed_path, fs);
-			string full_path = fs.JoinPath(hive_path, "data_" + to_string(l.writer_offset) + "." + function.extension);
-			if (fs.FileExists(full_path) && !allow_overwrite) {
+			string full_path(filename_pattern.CreateFilename(fs, hive_path, function.extension, l.writer_offset));
+			if (fs.FileExists(full_path) && !overwrite_or_ignore) {
 				throw IOException("failed to create " + full_path +
-				                  ", file exists! Enable ALLOW_OVERWRITE option to force writing");
+				                  ", file exists! Enable OVERWRITE_OR_IGNORE option to force writing");
 			}
 			// Create a writer for the current file
 			auto fun_data_global = function.copy_to_initialize_global(context.client, *bind_data, full_path);
@@ -184,10 +187,9 @@ unique_ptr<LocalSinkState> PhysicalCopyToFile::GetLocalSinkState(ExecutionContex
 			this_file_offset = g.last_file_offset++;
 		}
 		auto &fs = FileSystem::GetFileSystem(context.client);
-		string output_path =
-		    fs.JoinPath(file_path, StringUtil::Format("out_%llu", this_file_offset) + "." + function.extension);
-		if (fs.FileExists(output_path) && !allow_overwrite) {
-			throw IOException("%s exists! Enable ALLOW_OVERWRITE option to force writing", output_path);
+		string output_path(filename_pattern.CreateFilename(fs, file_path, function.extension, this_file_offset));
+		if (fs.FileExists(output_path) && !overwrite_or_ignore) {
+			throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", output_path);
 		}
 		res->global_state = function.copy_to_initialize_global(context.client, *bind_data, output_path);
 	}
@@ -199,17 +201,17 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
 	if (partition_output || per_thread_output) {
 		auto &fs = FileSystem::GetFileSystem(context);
-		if (fs.FileExists(file_path) && !allow_overwrite) {
-			throw IOException("%s exists! Enable ALLOW_OVERWRITE option to force writing", file_path);
+		if (fs.FileExists(file_path) && !overwrite_or_ignore) {
+			throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", file_path);
 		}
 		if (!fs.DirectoryExists(file_path)) {
 			fs.CreateDirectory(file_path);
-		} else if (!allow_overwrite) {
+		} else if (!overwrite_or_ignore) {
 			idx_t n_files = 0;
 			fs.ListFiles(
 			    file_path, [&n_files](const string &path, bool) { n_files++; }, FileOpener::Get(context));
 			if (n_files > 0) {
-				throw IOException("Directory %s is not empty! Enable ALLOW_OVERWRITE option to force writing",
+				throw IOException("Directory %s is not empty! Enable OVERWRITE_OR_IGNORE option to force writing",
 				                  file_path);
 			}
 		}

package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp CHANGED Viewed

@@ -1,10 +1,11 @@
 #include "duckdb/execution/operator/persistent/physical_delete.hpp"
+#include "duckdb/common/atomic.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/execution/expression_executor.hpp"
 #include "duckdb/storage/data_table.hpp"
-#include "duckdb/transaction/duck_transaction.hpp"
-#include "duckdb/common/types/column_data_collection.hpp"
 #include "duckdb/storage/table/scan_state.hpp"
+#include "duckdb/transaction/duck_transaction.hpp"
 namespace duckdb {

package/src/duckdb/src/execution/operator/persistent/physical_export.cpp CHANGED Viewed

@@ -2,6 +2,7 @@
 #include "duckdb/catalog/catalog.hpp"
 #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
+#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/string_util.hpp"
 #include "duckdb/parallel/meta_pipeline.hpp"
@@ -16,12 +17,12 @@ namespace duckdb {
 using std::stringstream;
-static void WriteCatalogEntries(stringstream &ss, vector<CatalogEntry *> &entries) {
+static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>> &entries) {
 	for (auto &entry : entries) {
-		if (entry->internal) {
+		if (entry.get().internal) {
 			continue;
 		}
-		ss << entry->ToSQL() << std::endl;
+		ss << entry.get().ToSQL() << std::endl;
 	}
 	ss << std::endl;
 }
@@ -42,8 +43,8 @@ static void WriteValueAsSQL(stringstream &ss, Value &val) {
 	}
 }
-static void WriteCopyStatement(FileSystem &fs, stringstream &ss, TableCatalogEntry *table, CopyInfo &info,
-                               ExportedTableData &exported_table, CopyFunction const &function) {
+static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info, ExportedTableData &exported_table,
+                               CopyFunction const &function) {
 	ss << "COPY ";
 	if (exported_table.schema_name != DEFAULT_SCHEMA) {
@@ -107,52 +108,53 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
 	auto *opener = FileSystem::GetFileOpener(ccontext);
 	// gather all catalog types to export
-	vector<CatalogEntry *> schemas;
-	vector<CatalogEntry *> custom_types;
-	vector<CatalogEntry *> sequences;
-	vector<CatalogEntry *> tables;
-	vector<CatalogEntry *> views;
-	vector<CatalogEntry *> indexes;
-	vector<CatalogEntry *> macros;
+	vector<reference<CatalogEntry>> schemas;
+	vector<reference<CatalogEntry>> custom_types;
+	vector<reference<CatalogEntry>> sequences;
+	vector<reference<CatalogEntry>> tables;
+	vector<reference<CatalogEntry>> views;
+	vector<reference<CatalogEntry>> indexes;
+	vector<reference<CatalogEntry>> macros;
 	auto schema_list = Catalog::GetSchemas(ccontext, info->catalog);
 	for (auto &schema : schema_list) {
 		if (!schema->internal) {
-			schemas.push_back(schema);
+			schemas.push_back(*schema);
 		}
 		schema->Scan(context.client, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
 			if (entry->internal) {
 				return;
 			}
 			if (entry->type != CatalogType::TABLE_ENTRY) {
-				views.push_back(entry);
+				views.push_back(*entry);
 			}
 		});
 		schema->Scan(context.client, CatalogType::SEQUENCE_ENTRY,
-		             [&](CatalogEntry *entry) { sequences.push_back(entry); });
+		             [&](CatalogEntry *entry) { sequences.push_back(*entry); });
 		schema->Scan(context.client, CatalogType::TYPE_ENTRY,
-		             [&](CatalogEntry *entry) { custom_types.push_back(entry); });
-		schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(entry); });
+		             [&](CatalogEntry *entry) { custom_types.push_back(*entry); });
+		schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(*entry); });
 		schema->Scan(context.client, CatalogType::MACRO_ENTRY, [&](CatalogEntry *entry) {
 			if (!entry->internal && entry->type == CatalogType::MACRO_ENTRY) {
-				macros.push_back(entry);
+				macros.push_back(*entry);
 			}
 		});
 		schema->Scan(context.client, CatalogType::TABLE_MACRO_ENTRY, [&](CatalogEntry *entry) {
 			if (!entry->internal && entry->type == CatalogType::TABLE_MACRO_ENTRY) {
-				macros.push_back(entry);
+				macros.push_back(*entry);
 			}
 		});
 	}
 	// consider the order of tables because of foreign key constraint
 	for (idx_t i = 0; i < exported_tables.data.size(); i++) {
-		tables.push_back((CatalogEntry *)exported_tables.data[i].entry);
+		tables.push_back(exported_tables.data[i].entry);
 	}
 	// order macro's by timestamp so nested macro's are imported nicely
-	sort(macros.begin(), macros.end(),
-	     [](const CatalogEntry *lhs, const CatalogEntry *rhs) { return lhs->oid < rhs->oid; });
+	sort(macros.begin(), macros.end(), [](const reference<CatalogEntry> &lhs, const reference<CatalogEntry> &rhs) {
+		return lhs.get().oid < rhs.get().oid;
+	});
 	// write the schema.sql file
 	// export order is SCHEMA -> SEQUENCE -> TABLE -> VIEW -> INDEX
@@ -172,9 +174,8 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
 	// for every table, we write COPY INTO statement with the specified options
 	stringstream load_ss;
 	for (idx_t i = 0; i < exported_tables.data.size(); i++) {
-		auto &table = exported_tables.data[i].entry;
 		auto exported_table_info = exported_tables.data[i].table_data;
-		WriteCopyStatement(fs, load_ss, table, *info, exported_table_info, function);
+		WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
 	}
 	WriteStringStreamToFile(fs, opener, load_ss, fs.JoinPath(info->file_path, "load.sql"));
 	state.finished = true;

package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp CHANGED Viewed

@@ -1,7 +1,7 @@
 #include "duckdb/execution/operator/persistent/physical_insert.hpp"
 #include "duckdb/parallel/thread_context.hpp"
 #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
-#include "duckdb/common/types/column_data_collection.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
 #include "duckdb/execution/expression_executor.hpp"
 #include "duckdb/storage/data_table.hpp"

package/src/duckdb/src/execution/operator/persistent/physical_update.cpp CHANGED Viewed

@@ -1,12 +1,13 @@
 #include "duckdb/execution/operator/persistent/physical_update.hpp"
-#include "duckdb/parallel/thread_context.hpp"
 #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
-#include "duckdb/common/types/column_data_collection.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
 #include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/parallel/thread_context.hpp"
 #include "duckdb/planner/expression/bound_reference_expression.hpp"
 #include "duckdb/storage/data_table.hpp"
-#include "duckdb/main/client_context.hpp"
 namespace duckdb {

package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp CHANGED Viewed

@@ -103,7 +103,7 @@ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chu
 }
 string PhysicalTableScan::GetName() const {
-	return StringUtil::Upper(function.name);
+	return StringUtil::Upper(function.name + " " + function.extra_info);
 }
 string PhysicalTableScan::ParamsToString() const {

package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp CHANGED Viewed

@@ -1,7 +1,7 @@
 #include "duckdb/execution/operator/schema/physical_create_type.hpp"
 #include "duckdb/catalog/catalog.hpp"
-#include "duckdb/common/types/column_data_collection.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 namespace duckdb {

package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp CHANGED Viewed

@@ -1,6 +1,6 @@
 #include "duckdb/execution/operator/set/physical_recursive_cte.hpp"
-#include "duckdb/common/types/column_data_collection.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
 #include "duckdb/execution/aggregate_hashtable.hpp"
 #include "duckdb/execution/executor.hpp"
@@ -183,12 +183,12 @@ void PhysicalRecursiveCTE::BuildPipelines(Pipeline &current, MetaPipeline &meta_
 	// the LHS of the recursive CTE is our initial state
 	auto initial_state_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
-	initial_state_pipeline->Build(children[0].get());
+	initial_state_pipeline->Build(*children[0]);
 	// the RHS is the recursive pipeline
 	recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
 	recursive_meta_pipeline->SetRecursiveCTE();
-	recursive_meta_pipeline->Build(children[1].get());
+	recursive_meta_pipeline->Build(*children[1]);
 }
 vector<const PhysicalOperator *> PhysicalRecursiveCTE::GetSources() const {

package/src/duckdb/src/execution/partitionable_hashtable.cpp CHANGED Viewed

@@ -1,46 +1,17 @@
 #include "duckdb/execution/partitionable_hashtable.hpp"
-namespace duckdb {
-static idx_t PartitionInfoNPartitions(const idx_t n_partitions_upper_bound) {
-	idx_t n_partitions = 1;
-	while (n_partitions <= n_partitions_upper_bound / 2) {
-		n_partitions *= 2;
-		if (n_partitions >= 256) {
-			break;
-		}
-	}
-	return n_partitions;
-}
-static idx_t PartitionInfoRadixBits(const idx_t n_partitions) {
-	idx_t radix_bits = 0;
-	auto radix_partitions_copy = n_partitions;
-	while (radix_partitions_copy - 1) {
-		radix_bits++;
-		radix_partitions_copy >>= 1;
-	}
-	return radix_bits;
-}
+#include "duckdb/common/radix_partitioning.hpp"
-static hash_t PartitionInfoRadixMask(const idx_t radix_bits, const idx_t radix_shift) {
-	hash_t radix_mask = 0;
-	// we use the fifth byte of the 64 bit hash as radix source
-	for (idx_t i = 0; i < radix_bits; i++) {
-		radix_mask = (radix_mask << 1) | 1;
-	}
-	radix_mask <<= radix_shift;
-	return radix_mask;
-}
+namespace duckdb {
 RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
-    : n_partitions(PartitionInfoNPartitions(n_partitions_upper_bound)),
-      radix_bits(PartitionInfoRadixBits(n_partitions)), radix_mask(PartitionInfoRadixMask(radix_bits, RADIX_SHIFT)) {
+    : n_partitions(PreviousPowerOfTwo(n_partitions_upper_bound)),
+      radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
+      radix_shift(RadixPartitioning::Shift(radix_bits)) {
-	// finalize_threads needs to be a power of 2
 	D_ASSERT(n_partitions > 0);
 	D_ASSERT(n_partitions <= 256);
-	D_ASSERT((n_partitions & (n_partitions - 1)) == 0);
+	D_ASSERT(IsPowerOfTwo(n_partitions));
 	D_ASSERT(radix_bits <= 8);
 }
@@ -80,7 +51,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
                                            DataChunk &payload, const vector<idx_t> &filter) {
 	// If this is false, a single AddChunk would overflow the max capacity
 	D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
-	if (list.empty() || list.back()->Size() + groups.size() >= list.back()->MaxCapacity()) {
+	if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
 		idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
 		if (!list.empty()) {
 			new_capacity = list.back()->Capacity();
@@ -159,7 +130,7 @@ void PartitionableHashTable::Partition() {
 			    context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
 			partition_hts[r] = radix_partitioned_hts[r].back().get();
 		}
-		unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
+		unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
 		unpartitioned_ht.reset();
 	}
 	unpartitioned_hts.clear();
@@ -176,6 +147,7 @@ HashTableList PartitionableHashTable::GetPartition(idx_t partition) {
 	D_ASSERT(radix_partitioned_hts.size() > partition);
 	return std::move(radix_partitioned_hts[partition]);
 }
 HashTableList PartitionableHashTable::GetUnpartitioned() {
 	D_ASSERT(!IsPartitioned());
 	return std::move(unpartitioned_hts);

package/src/duckdb/src/execution/physical_operator.cpp CHANGED Viewed

@@ -138,7 +138,7 @@ void PhysicalOperator::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipe
 		// we create a new pipeline starting from the child
 		auto child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
-		child_meta_pipeline->Build(children[0].get());
+		child_meta_pipeline->Build(*children[0]);
 	} else {
 		// operator is not a sink! recurse in children
 		if (children.empty()) {