npm - duckdb - Versions diffs - 0.6.2-dev1124.0 → 0.6.2-dev1160.0 - Mend

duckdb 0.6.2-dev1124.0 → 0.6.2-dev1160.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.6.2-dev1124.0",
+  "version": "0.6.2-dev1160.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/parquet/column_writer.cpp CHANGED Viewed

@@ -1723,6 +1723,38 @@ void ListColumnWriter::FinalizeAnalyze(ColumnWriterState &state_p) {
 	child_writer->FinalizeAnalyze(*state.child_state);
 }
+idx_t GetConsecutiveChildList(Vector &list, idx_t count, Vector &result) {
+	auto list_data = FlatVector::GetData<list_entry_t>(list);
+	auto &validity = FlatVector::Validity(list);
+	bool consecutive_flat_list = true;
+	idx_t child_count = 0;
+	for (idx_t i = 0; i < count; i++) {
+		if (!validity.RowIsValid(i)) {
+			continue;
+		}
+		if (list_data[i].offset != child_count) {
+			consecutive_flat_list = false;
+		}
+		child_count += list_data[i].length;
+	}
+	if (!consecutive_flat_list) {
+		SelectionVector child_sel(child_count);
+		idx_t entry = 0;
+		for (idx_t i = 0; i < count; i++) {
+			if (!validity.RowIsValid(i)) {
+				continue;
+			}
+			for (idx_t k = 0; k < list_data[i].length; k++) {
+				child_sel.set_index(entry++, list_data[i].offset + k);
+			}
+		}
+		result.Slice(child_sel, child_count);
+		result.Flatten(child_count);
+	}
+	return child_count;
+}
 void ListColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) {
 	auto &state = (ListColumnWriterState &)state_p;
@@ -1775,8 +1807,9 @@ void ListColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *pa
 	state.parent_index += vcount;
 	auto &list_child = ListVector::GetEntry(vector);
-	auto list_count = ListVector::GetListSize(vector);
-	child_writer->Prepare(*state.child_state, &state_p, list_child, list_count);
+	Vector child_list(list_child);
+	idx_t child_count = GetConsecutiveChildList(vector, count, child_list);
+	child_writer->Prepare(*state.child_state, &state_p, child_list, child_count);
 }
 void ListColumnWriter::BeginWrite(ColumnWriterState &state_p) {
@@ -1788,8 +1821,9 @@ void ListColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t c
 	auto &state = (ListColumnWriterState &)state_p;
 	auto &list_child = ListVector::GetEntry(vector);
-	auto list_count = ListVector::GetListSize(vector);
-	child_writer->Write(*state.child_state, list_child, list_count);
+	Vector child_list(list_child);
+	idx_t child_count = GetConsecutiveChildList(vector, count, child_list);
+	child_writer->Write(*state.child_state, child_list, child_count);
 }
 void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {

package/src/duckdb/extension/parquet/include/parquet_reader.hpp CHANGED Viewed

@@ -73,6 +73,7 @@ struct ParquetOptions {
 	bool filename = false;
 	bool file_row_number = false;
 	bool hive_partitioning = false;
+	bool union_by_name = false;
 public:
 	void Serialize(FieldWriter &writer) const;
@@ -109,6 +110,17 @@ public:
 	shared_ptr<ParquetFileMetadataCache> metadata;
 	ParquetOptions parquet_options;
+	//! when reading multiple parquet files (with union by name option)
+	//! TableFunction might return more cols than any single parquet file. Even all parquet files have same
+	//! cols, those files might have cols at different positions and with different logical type.
+	//! e.g. p1.parquet (a INT , b VARCHAR) p2.parquet (c VARCHAR, a VARCHAR)
+	vector<idx_t> union_idx_map;
+	//! If the parquet file dont have union_cols5  union_null_cols[5] will be true.
+	//! some parquet files may not have all union cols.
+	vector<bool> union_null_cols;
+	//! All union cols will cast to same type.
+	vector<LogicalType> union_col_types;
 public:
 	void InitializeScan(ParquetReaderScanState &state, vector<column_t> column_ids, vector<idx_t> groups_to_read,
 	                    TableFilterSet *table_filters);
@@ -139,6 +151,7 @@ private:
 	uint64_t GetGroupSpan(ParquetReaderScanState &state);
 	void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
 	LogicalType DeriveLogicalType(const SchemaElement &s_ele);
+	void RearrangeChildReaders(unique_ptr<duckdb::ColumnReader> &root_reader, vector<column_t> &column_ids);
 	template <typename... Args>
 	std::runtime_error FormatException(const string fmt_str, Args... params) {

package/src/duckdb/extension/parquet/parquet-extension.cpp CHANGED Viewed

@@ -20,6 +20,7 @@
 #include "duckdb/common/field_writer.hpp"
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/hive_partitioning.hpp"
+#include "duckdb/common/union_by_name.hpp"
 #include "duckdb/common/types/chunk_collection.hpp"
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/function/table_function.hpp"
@@ -45,6 +46,10 @@ struct ParquetReadBindData : public TableFunctionData {
 	vector<string> names;
 	vector<LogicalType> types;
+	// The union readers are created (when parquet union_by_name option is on) during binding
+	// Those readers can be re-used during ParquetParallelStateNext
+	vector<shared_ptr<ParquetReader>> union_readers;
 	// These come from the initial_reader, but need to be stored in case the initial_reader is removed by a filter
 	idx_t initial_file_cardinality;
 	idx_t initial_file_row_groups;
@@ -127,6 +132,7 @@ void ParquetOptions::Serialize(FieldWriter &writer) const {
 	writer.WriteField<bool>(filename);
 	writer.WriteField<bool>(file_row_number);
 	writer.WriteField<bool>(hive_partitioning);
+	writer.WriteField<bool>(union_by_name);
 }
 void ParquetOptions::Deserialize(FieldReader &reader) {
@@ -134,6 +140,7 @@ void ParquetOptions::Deserialize(FieldReader &reader) {
 	filename = reader.ReadRequired<bool>();
 	file_row_number = reader.ReadRequired<bool>();
 	hive_partitioning = reader.ReadRequired<bool>();
+	union_by_name = reader.ReadRequired<bool>();
 }
 BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
@@ -148,6 +155,7 @@ BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
 	bind_info.InsertOption("filename", Value::BOOLEAN(parquet_bind->parquet_options.filename));
 	bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind->parquet_options.file_row_number));
 	bind_info.InsertOption("hive_partitioning", Value::BOOLEAN(parquet_bind->parquet_options.hive_partitioning));
+	bind_info.InsertOption("union_by_name", Value::BOOLEAN(parquet_bind->parquet_options.union_by_name));
 	return bind_info;
 }
@@ -164,6 +172,7 @@ public:
 		table_function.named_parameters["filename"] = LogicalType::BOOLEAN;
 		table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN;
 		table_function.named_parameters["hive_partitioning"] = LogicalType::BOOLEAN;
+		table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
 		table_function.get_batch_index = ParquetScanGetBatchIndex;
 		table_function.serialize = ParquetScanSerialize;
 		table_function.deserialize = ParquetScanDeserialize;
@@ -180,6 +189,7 @@ public:
 		table_function.named_parameters["filename"] = LogicalType::BOOLEAN;
 		table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN;
 		table_function.named_parameters["hive_partitioning"] = LogicalType::BOOLEAN;
+		table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
 		set.AddFunction(table_function);
 		return set;
 	}
@@ -201,22 +211,31 @@ public:
 				parquet_options.file_row_number = true;
 			} else if (loption == "hive_partitioning") {
 				parquet_options.hive_partitioning = true;
+			} else if (loption == "union_by_name") {
+				parquet_options.union_by_name = true;
 			} else {
 				throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first);
 			}
 		}
-		auto result = make_unique<ParquetReadBindData>();
 		FileSystem &fs = FileSystem::GetFileSystem(context);
-		result->files = fs.Glob(info.file_path, context);
-		if (result->files.empty()) {
+		auto files = fs.Glob(info.file_path, context);
+		if (files.empty()) {
 			throw IOException("No files found that match the pattern \"%s\"", info.file_path);
 		}
-		result->SetInitialReader(
-		    make_shared<ParquetReader>(context, result->files[0], expected_types, parquet_options));
-		result->names = result->initial_reader->names;
-		result->types = result->initial_reader->return_types;
-		return std::move(result);
+		// The most likely path (Parquet read without union by name option)
+		if (!parquet_options.union_by_name) {
+			auto result = make_unique<ParquetReadBindData>();
+			result->files = std::move(files);
+			result->SetInitialReader(
+			    make_shared<ParquetReader>(context, result->files[0], expected_types, parquet_options));
+			result->names = result->initial_reader->names;
+			result->types = result->initial_reader->return_types;
+			return std::move(result);
+		} else {
+			return ParquetUnionNamesBind(context, files, expected_types, expected_names, parquet_options);
+		}
 	}
 	static unique_ptr<BaseStatistics> ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p,
@@ -303,11 +322,40 @@ public:
 	                                                        vector<LogicalType> &return_types, vector<string> &names,
 	                                                        ParquetOptions parquet_options) {
 		auto result = make_unique<ParquetReadBindData>();
+		// The most likely path (Parquet Scan without union by name option)
+		if (!parquet_options.union_by_name) {
+			result->files = std::move(files);
+			result->SetInitialReader(make_shared<ParquetReader>(context, result->files[0], parquet_options));
+			return_types = result->types = result->initial_reader->return_types;
+			names = result->names = result->initial_reader->names;
+			return std::move(result);
+		} else {
+			return ParquetUnionNamesBind(context, files, return_types, names, parquet_options);
+		}
+	}
+	static unique_ptr<FunctionData> ParquetUnionNamesBind(ClientContext &context, vector<string> files,
+	                                                      vector<LogicalType> &return_types, vector<string> &names,
+	                                                      ParquetOptions parquet_options) {
+		auto result = make_unique<ParquetReadBindData>();
 		result->files = std::move(files);
-		result->SetInitialReader(make_shared<ParquetReader>(context, result->files[0], parquet_options));
-		return_types = result->types = result->initial_reader->return_types;
-		names = result->names = result->initial_reader->names;
+		case_insensitive_map_t<idx_t> union_names_map;
+		vector<string> union_col_names;
+		vector<LogicalType> union_col_types;
+		auto dummy_readers = UnionByName<ParquetReader, ParquetOptions>::UnionCols(
+		    context, result->files, union_col_types, union_col_names, union_names_map, parquet_options);
+		dummy_readers = UnionByName<ParquetReader, ParquetOptions>::CreateUnionMap(
+		    std::move(dummy_readers), union_col_types, union_col_names, union_names_map);
+		std::move(dummy_readers.begin(), dummy_readers.end(), std::back_inserter(result->union_readers));
+		names.assign(union_col_names.begin(), union_col_names.end());
+		return_types.assign(union_col_types.begin(), union_col_types.end());
+		result->SetInitialReader(result->union_readers[0]);
+		D_ASSERT(names.size() == return_types.size());
 		return std::move(result);
 	}
@@ -337,6 +385,8 @@ public:
 				parquet_options.file_row_number = BooleanValue::Get(kv.second);
 			} else if (loption == "hive_partitioning") {
 				parquet_options.hive_partitioning = BooleanValue::Get(kv.second);
+			} else if (loption == "union_by_name") {
+				parquet_options.union_by_name = BooleanValue::Get(kv.second);
 			}
 		}
 		FileSystem &fs = FileSystem::GetFileSystem(context);
@@ -370,6 +420,8 @@ public:
 				parquet_options.file_row_number = BooleanValue::Get(kv.second);
 			} else if (loption == "hive_partitioning") {
 				parquet_options.hive_partitioning = BooleanValue::Get(kv.second);
+			} else if (loption == "union_by_name") {
+				parquet_options.union_by_name = true;
 			}
 		}
 		return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options);
@@ -417,20 +469,24 @@ public:
 		result->file_opening = std::vector<bool>(bind_data.files.size(), false);
 		result->file_mutexes = std::unique_ptr<mutex[]>(new mutex[bind_data.files.size()]);
-		result->readers = std::vector<shared_ptr<ParquetReader>>(bind_data.files.size(), nullptr);
-		if (bind_data.initial_reader) {
-			result->initial_reader = bind_data.initial_reader;
-			result->readers[0] = bind_data.initial_reader;
-		} else {
-			if (bind_data.files.empty()) {
-				result->initial_reader = nullptr;
+		if (!bind_data.parquet_options.union_by_name) {
+			result->readers = std::vector<shared_ptr<ParquetReader>>(bind_data.files.size(), nullptr);
+			if (bind_data.initial_reader) {
+				result->initial_reader = bind_data.initial_reader;
+				result->readers[0] = bind_data.initial_reader;
 			} else {
-				result->initial_reader =
-				    make_shared<ParquetReader>(context, bind_data.files[0], bind_data.names, bind_data.types,
-				                               input.column_ids, bind_data.parquet_options, bind_data.files[0]);
-				result->readers[0] = result->initial_reader;
+				if (bind_data.files.empty()) {
+					result->initial_reader = nullptr;
+				} else {
+					result->initial_reader =
+					    make_shared<ParquetReader>(context, bind_data.files[0], bind_data.names, bind_data.types,
+					                               input.column_ids, bind_data.parquet_options, bind_data.files[0]);
+					result->readers[0] = result->initial_reader;
+				}
 			}
+		} else {
+			result->readers = std::move(bind_data.union_readers);
+			result->initial_reader = result->readers[0];
 		}
 		result->row_group_index = 0;
@@ -497,6 +553,9 @@ public:
 			bind_data.chunk_count++;
 			if (output.size() > 0) {
+				if (bind_data.parquet_options.union_by_name) {
+					UnionByName<ParquetReader, ParquetOptions>::SetNullUnionCols(output, data.reader->union_null_cols);
+				}
 				return;
 			}
 			if (!ParquetParallelStateNext(context, bind_data, data, gstate)) {
@@ -533,6 +592,12 @@ public:
 			D_ASSERT(parallel_state.initial_reader);
 			if (parallel_state.readers[parallel_state.file_index]) {
+				const auto &current_reader = parallel_state.readers[parallel_state.file_index];
+				if (current_reader->union_null_cols.empty()) {
+					current_reader->union_null_cols.resize(current_reader->return_types.size());
+					std::fill(current_reader->union_null_cols.begin(), current_reader->union_null_cols.end(), false);
+				}
 				if (parallel_state.row_group_index <
 				    parallel_state.readers[parallel_state.file_index]->NumRowGroups()) {
 					// The current reader has rowgroups left to be scanned

package/src/duckdb/extension/parquet/parquet_reader.cpp CHANGED Viewed

@@ -352,7 +352,6 @@ unique_ptr<ColumnReader> ParquetReader::CreateReader(const duckdb_parquet::forma
 	D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size());
 	auto &root_struct_reader = (StructColumnReader &)*ret;
 	// add casts if required
 	for (auto &entry : cast_map) {
 		auto column_idx = entry.first;
@@ -700,11 +699,35 @@ void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector<column_
 	state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, *file_opener, state.prefetch_mode);
 	state.root_reader = CreateReader(GetFileMetadata());
+	if (parquet_options.union_by_name) {
+		RearrangeChildReaders(state.root_reader, state.column_ids);
+	}
 	state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE);
 	state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE);
 }
+void ParquetReader::RearrangeChildReaders(unique_ptr<duckdb::ColumnReader> &root_reader, vector<column_t> &column_ids) {
+	auto &root_struct_reader = (StructColumnReader &)*root_reader;
+	unordered_map<idx_t, idx_t> reverse_union_idx;
+	for (idx_t col = 0; col < union_idx_map.size(); ++col) {
+		auto child_reader = move(root_struct_reader.child_readers[col]);
+		auto cast_reader = make_unique<CastColumnReader>(move(child_reader), union_col_types[union_idx_map[col]]);
+		root_struct_reader.child_readers[col] = move(cast_reader);
+		reverse_union_idx[union_idx_map[col]] = col;
+	}
+	vector<bool> column_id_nulls(column_ids.size(), true);
+	for (idx_t col = 0; col < column_ids.size(); ++col) {
+		auto find = reverse_union_idx.find(column_ids[col]);
+		if (find != reverse_union_idx.end()) {
+			column_ids[col] = find->second;
+			column_id_nulls[col] = false;
+		}
+	}
+	union_null_cols = move(column_id_nulls);
+}
 void FilterIsNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) {
 	if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
 		auto &mask = ConstantVector::Validity(v);
@@ -898,6 +921,8 @@ bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &resul
 		return false;
 	}
+	D_ASSERT(union_null_cols.size() >= result.ColumnCount());
 	// see if we have to switch to the next row group in the parquet file
 	if (state.current_group < 0 || (int64_t)state.group_offset >= GetGroup(state).num_rows) {
 		state.current_group++;
@@ -915,7 +940,7 @@ bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &resul
 		uint64_t to_scan_compressed_bytes = 0;
 		for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) {
 			// this is a special case where we are not interested in the actual contents of the file
-			if (IsRowIdColumnId(state.column_ids[out_col_idx])) {
+			if (IsRowIdColumnId(state.column_ids[out_col_idx]) || union_null_cols[out_col_idx]) {
 				continue;
 			}
@@ -956,7 +981,7 @@ bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &resul
 				// Prefetch column-wise
 				for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) {
-					if (IsRowIdColumnId(state.column_ids[out_col_idx])) {
+					if (IsRowIdColumnId(state.column_ids[out_col_idx]) || union_null_cols[out_col_idx]) {
 						continue;
 					}
@@ -1007,6 +1032,10 @@ bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &resul
 	if (state.filters) {
 		vector<bool> need_to_read(result.ColumnCount(), true);
+		for (idx_t col = 0; col < need_to_read.size(); ++col) {
+			need_to_read[col] = need_to_read[col] && !union_null_cols[col];
+		}
 		// first load the columns that are used in filters
 		for (auto &filter_col : state.filters->filters) {
 			auto file_col_idx = state.column_ids[filter_col.first];
@@ -1058,6 +1087,9 @@ bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &resul
 				result.data[out_col_idx].Reference(constant_42);
 				continue;
 			}
+			if (union_null_cols[out_col_idx]) {
+				continue;
+			}
 			auto rows_read = root_reader->GetChildReader(file_col_idx)
 			                     ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]);

package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp CHANGED Viewed

@@ -160,7 +160,7 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 	} else {
 		row_empty = false;
 	}
-	if (!sql_types.empty() && column == sql_types.size() && length == 0) {
+	if (!return_types.empty() && column == return_types.size() && length == 0) {
 		// skip a single trailing delimiter in last column
 		return;
 	}
@@ -168,14 +168,14 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 		column++;
 		return;
 	}
-	if (column >= sql_types.size()) {
+	if (column >= return_types.size()) {
 		if (options.ignore_errors) {
 			error_column_overflow = true;
 			return;
 		} else {
 			throw InvalidInputException(
 			    "Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
-			    GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), options.ToString());
+			    GetLineNumberStr(linenr, linenr_estimated).c_str(), return_types.size(), options.ToString());
 		}
 	}
@@ -183,7 +183,7 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 	idx_t row_entry = parse_chunk.size();
 	// test against null string, but only if the value was not quoted
-	if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
+	if ((!has_quotes || return_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
 	    Equals::Operation(str_val, string_t(options.null_str))) {
 		FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
 	} else {
@@ -221,7 +221,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 	if (row_empty) {
 		row_empty = false;
-		if (sql_types.size() != 1) {
+		if (return_types.size() != 1) {
 			if (mode == ParserMode::PARSING) {
 				FlatVector::SetNull(parse_chunk.data[0], parse_chunk.size(), false);
 			}
@@ -238,7 +238,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 		return false;
 	}
-	if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
+	if (column < return_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
 		if (options.ignore_errors) {
 			column = 0;
 			return false;
@@ -249,7 +249,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 			} else {
 				throw InvalidInputException(
 				    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d.\nParser options:\n%s",
-				    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
+				    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), return_types.size(), column,
 				    options.ToString());
 			}
 		}
@@ -282,13 +282,6 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 	return false;
 }
-void BaseCSVReader::SetNullUnionCols(DataChunk &insert_chunk) {
-	for (idx_t col = 0; col < insert_nulls_idx.size(); ++col) {
-		insert_chunk.data[insert_nulls_idx[col]].SetVectorType(VectorType::CONSTANT_VECTOR);
-		ConstantVector::SetNull(insert_chunk.data[insert_nulls_idx[col]], true);
-	}
-}
 void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset) {
 	D_ASSERT(col_idx < chunk.data.size());
 	D_ASSERT(row_idx < chunk.size());
@@ -302,8 +295,8 @@ void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, i
 	auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
 	if (utf_type == UnicodeType::INVALID) {
 		string col_name = to_string(col_idx);
-		if (col_idx < col_names.size()) {
-			col_name = "\"" + col_names[col_idx] + "\"";
+		if (col_idx < names.size()) {
+			col_name = "\"" + names[col_idx] + "\"";
 		}
 		int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
 		D_ASSERT(error_line >= 0);
@@ -330,9 +323,9 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 	// convert the columns in the parsed chunk to the types of the table
 	insert_chunk.SetCardinality(parse_chunk);
-	for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
+	for (idx_t col_idx = 0; col_idx < return_types.size(); col_idx++) {
 		auto insert_idx = insert_cols_idx[col_idx];
-		auto &type = sql_types[col_idx];
+		auto &type = return_types[col_idx];
 		if (type.id() == LogicalTypeId::VARCHAR) {
 			// target type is varchar: no need to convert
 			// just test that all strings are valid utf-8 strings
@@ -345,7 +338,8 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 				// use the date format to cast the chunk
 				success = TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
 				                            parse_chunk.size(), error_message);
-			} else if (options.has_format[LogicalTypeId::TIMESTAMP] && type.id() == LogicalTypeId::TIMESTAMP) {
+			} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
+			           return_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
 				// use the date format to cast the chunk
 				success = TryCastTimestampVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
 				                                 parse_chunk.size(), error_message);
@@ -365,8 +359,8 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 				continue;
 			}
 			string col_name = to_string(col_idx);
-			if (col_idx < col_names.size()) {
-				col_name = "\"" + col_names[col_idx] + "\"";
+			if (col_idx < names.size()) {
+				col_name = "\"" + names[col_idx] + "\"";
 			}
 			// figure out the exact line number
@@ -401,7 +395,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 		for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
 			bool failed = false;
-			for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
+			for (idx_t column_idx = 0; column_idx < return_types.size(); column_idx++) {
 				auto &inserted_column = insert_chunk.data[column_idx];
 				auto &parsed_column = parse_chunk.data[column_idx];

package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp CHANGED Viewed

@@ -37,6 +37,16 @@ BufferedCSVReader::BufferedCSVReader(ClientContext &context, BufferedCSVReaderOp
                         std::move(options_p), requested_types) {
 }
+BufferedCSVReader::BufferedCSVReader(ClientContext &context, string filename, BufferedCSVReaderOptions options_p,
+                                     const vector<LogicalType> &requested_types)
+    : BaseCSVReader(FileSystem::GetFileSystem(context), Allocator::Get(context), FileSystem::GetFileOpener(context),
+                    move(options_p), requested_types),
+      buffer_size(0), position(0), start(0) {
+	options.file_path = move(filename);
+	file_handle = OpenCSV(options);
+	Initialize(requested_types);
+}
 BufferedCSVReader::~BufferedCSVReader() {
 }
@@ -236,20 +246,20 @@ static string NormalizeColumnName(const string &col_name) {
 void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
 	PrepareComplexParser();
 	if (options.auto_detect) {
-		sql_types = SniffCSV(requested_types);
-		if (sql_types.empty()) {
+		return_types = SniffCSV(requested_types);
+		if (return_types.empty()) {
 			throw Exception("Failed to detect column types from CSV: is the file a valid CSV file?");
 		}
 		if (cached_chunks.empty()) {
 			JumpToBeginning(options.skip_rows, options.header);
 		}
 	} else {
-		sql_types = requested_types;
+		return_types = requested_types;
 		ResetBuffer();
 		SkipRowsAndReadHeader(options.skip_rows, options.header);
 	}
-	InitParseChunk(sql_types.size());
-	InitInsertChunkIdx(sql_types.size());
+	InitParseChunk(return_types.size());
+	InitInsertChunkIdx(return_types.size());
 	// we only need reset support during the automatic CSV type detection
 	// since reset support might require caching (in the case of streams), we disable it for the remainder
 	file_handle->DisableReset();
@@ -297,7 +307,7 @@ void BufferedCSVReader::SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header)
 	if (skip_header) {
 		// ignore the first line as a header line
-		InitParseChunk(sql_types.size());
+		InitParseChunk(return_types.size());
 		ParseCSV(ParserMode::PARSING_HEADER);
 	}
 }
@@ -520,14 +530,14 @@ void BufferedCSVReader::DetectCandidateTypes(const vector<LogicalType> &type_can
 			format_candidates[t.first].clear();
 		}
-		// set all sql_types to VARCHAR so we can do datatype detection based on VARCHAR values
-		sql_types.clear();
-		sql_types.assign(options.num_cols, LogicalType::VARCHAR);
+		// set all return_types to VARCHAR so we can do datatype detection based on VARCHAR values
+		return_types.clear();
+		return_types.assign(options.num_cols, LogicalType::VARCHAR);
 		// jump to beginning and skip potential header
 		JumpToBeginning(options.skip_rows, true);
 		DataChunk header_row;
-		header_row.Initialize(allocator, sql_types);
+		header_row.Initialize(allocator, return_types);
 		parse_chunk.Copy(header_row);
 		if (header_row.size() == 0) {
@@ -535,7 +545,7 @@ void BufferedCSVReader::DetectCandidateTypes(const vector<LogicalType> &type_can
 		}
 		// init parse chunk and read csv with info candidate
-		InitParseChunk(sql_types.size());
+		InitParseChunk(return_types.size());
 		if (!TryParseCSV(ParserMode::SNIFFING_DATATYPES)) {
 			continue;
 		}
@@ -713,7 +723,7 @@ void BufferedCSVReader::DetectHeader(const vector<vector<LogicalType>> &best_sql
 				col_name = col_name + "_" + to_string(name_collision_count[col_name]);
 			}
-			col_names.push_back(col_name);
+			names.push_back(col_name);
 			name_collision_count[col_name] = 0;
 		}
@@ -721,7 +731,7 @@ void BufferedCSVReader::DetectHeader(const vector<vector<LogicalType>> &best_sql
 		options.header = false;
 		for (idx_t col = 0; col < options.num_cols; col++) {
 			string column_name = GenerateColumnName(options.num_cols, col);
-			col_names.push_back(column_name);
+			names.push_back(column_name);
 		}
 	}
 }
@@ -731,8 +741,8 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
                                                            vector<vector<LogicalType>> &best_sql_types_candidates,
                                                            map<LogicalTypeId, vector<string>> &best_format_candidates) {
 	// for the type refine we set the SQL types to VARCHAR for all columns
-	sql_types.clear();
-	sql_types.assign(options.num_cols, LogicalType::VARCHAR);
+	return_types.clear();
+	return_types.assign(options.num_cols, LogicalType::VARCHAR);
 	vector<LogicalType> detected_types;
@@ -747,11 +757,11 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
 		}
 	} else if (options.all_varchar) {
 		// return all types varchar
-		detected_types = sql_types;
+		detected_types = return_types;
 	} else {
 		// jump through the rest of the file and continue to refine the sql type guess
 		while (JumpToNextSample()) {
-			InitParseChunk(sql_types.size());
+			InitParseChunk(return_types.size());
 			// if jump ends up a bad line, we just skip this chunk
 			if (!TryParseCSV(ParserMode::SNIFFING_DATATYPES)) {
 				continue;
@@ -878,11 +888,11 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
 	options.num_cols = best_num_cols;
 	DetectHeader(best_sql_types_candidates, best_header_row);
 	auto sql_types_per_column = options.sql_types_per_column;
-	for (idx_t i = 0; i < col_names.size(); i++) {
-		auto it = sql_types_per_column.find(col_names[i]);
+	for (idx_t i = 0; i < names.size(); i++) {
+		auto it = sql_types_per_column.find(names[i]);
 		if (it != sql_types_per_column.end()) {
 			best_sql_types_candidates[i] = {it->second};
-			sql_types_per_column.erase(col_names[i]);
+			sql_types_per_column.erase(names[i]);
 		}
 	}
 	if (!sql_types_per_column.empty()) {

package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp CHANGED Viewed

@@ -38,9 +38,9 @@ ParallelCSVReader::~ParallelCSVReader() {
 }
 void ParallelCSVReader::Initialize(const vector<LogicalType> &requested_types) {
-	sql_types = requested_types;
-	InitParseChunk(sql_types.size());
-	InitInsertChunkIdx(sql_types.size());
+	return_types = requested_types;
+	InitParseChunk(return_types.size());
+	InitInsertChunkIdx(return_types.size());
 }
 bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
@@ -360,7 +360,7 @@ final_state : {
 			// remaining values to be added to the chunk
 			AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
 			if (try_add_line) {
-				bool success = column == sql_types.size();
+				bool success = column == return_types.size();
 				if (success) {
 					AddRow(insert_chunk, column, error_message);
 					success = Flush(insert_chunk);

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -4,6 +4,7 @@
 #include "duckdb/main/database.hpp"
 #include "duckdb/common/string_util.hpp"
 #include "duckdb/common/hive_partitioning.hpp"
+#include "duckdb/common/union_by_name.hpp"
 #include "duckdb/main/config.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/expression/function_expression.hpp"
@@ -127,9 +128,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	if (options.auto_detect) {
 		options.file_path = result->files[0];
 		auto initial_reader = make_unique<BufferedCSVReader>(context, options);
-		return_types.assign(initial_reader->sql_types.begin(), initial_reader->sql_types.end());
+		return_types.assign(initial_reader->return_types.begin(), initial_reader->return_types.end());
 		if (names.empty()) {
-			names.assign(initial_reader->col_names.begin(), initial_reader->col_names.end());
+			names.assign(initial_reader->names.begin(), initial_reader->names.end());
 		} else {
 			if (explicitly_set_columns) {
 				// The user has influenced the names, can't assume they are valid anymore
@@ -143,7 +144,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			}
 		}
 		options = initial_reader->options;
-		result->sql_types = initial_reader->sql_types;
+		result->sql_types = initial_reader->return_types;
 		result->initial_reader = std::move(initial_reader);
 	} else {
 		result->sql_types = return_types;
@@ -152,58 +153,25 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	// union_col_names will exclude filename and hivepartition
 	if (options.union_by_name) {
-		idx_t union_names_index = 0;
 		case_insensitive_map_t<idx_t> union_names_map;
 		vector<string> union_col_names;
 		vector<LogicalType> union_col_types;
-		for (idx_t file_idx = 0; file_idx < result->files.size(); ++file_idx) {
-			options.file_path = result->files[file_idx];
-			auto reader = make_unique<BufferedCSVReader>(context, options);
-			auto &col_names = reader->col_names;
-			auto &sql_types = reader->sql_types;
-			D_ASSERT(col_names.size() == sql_types.size());
-			for (idx_t col = 0; col < col_names.size(); ++col) {
-				auto union_find = union_names_map.find(col_names[col]);
-				if (union_find != union_names_map.end()) {
-					// given same name , union_col's type must compatible with col's type
-					LogicalType compatible_type;
-					compatible_type = LogicalType::MaxLogicalType(union_col_types[union_find->second], sql_types[col]);
-					union_col_types[union_find->second] = compatible_type;
-				} else {
-					union_names_map[col_names[col]] = union_names_index;
-					union_names_index++;
-					union_col_names.emplace_back(col_names[col]);
-					union_col_types.emplace_back(sql_types[col]);
-				}
-			}
-			result->union_readers.push_back(std::move(reader));
-		}
+		auto dummy_readers = UnionByName<BufferedCSVReader, BufferedCSVReaderOptions>::UnionCols(
+		    context, result->files, union_col_types, union_col_names, union_names_map, options);
-		for (auto &reader : result->union_readers) {
-			auto &col_names = reader->col_names;
-			vector<bool> is_null_cols(union_col_names.size(), true);
+		dummy_readers = UnionByName<BufferedCSVReader, BufferedCSVReaderOptions>::CreateUnionMap(
+		    std::move(dummy_readers), union_col_types, union_col_names, union_names_map);
-			for (idx_t col = 0; col < col_names.size(); ++col) {
-				idx_t remap_col = union_names_map[col_names[col]];
-				reader->insert_cols_idx[col] = remap_col;
-				is_null_cols[remap_col] = false;
-			}
-			for (idx_t col = 0; col < union_col_names.size(); ++col) {
-				if (is_null_cols[col]) {
-					reader->insert_nulls_idx.push_back(col);
-				}
-			}
+		std::move(dummy_readers.begin(), dummy_readers.end(), std::back_inserter(result->union_readers));
+		for (auto &reader : result->union_readers) {
+			reader->insert_cols_idx = reader->union_idx_map;
 		}
-		const idx_t first_file_index = 0;
-		result->initial_reader = std::move(result->union_readers[first_file_index]);
 		names.assign(union_col_names.begin(), union_col_names.end());
 		return_types.assign(union_col_types.begin(), union_col_types.end());
+		const idx_t first_file_index = 0;
+		result->initial_reader = std::move(result->union_readers[first_file_index]);
 		D_ASSERT(names.size() == return_types.size());
 	}
@@ -544,7 +512,7 @@ static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext
 	}
 	result->next_file = 1;
 	if (result->initial_reader) {
-		result->sql_types = result->initial_reader->sql_types;
+		result->sql_types = result->initial_reader->return_types;
 	}
 	return std::move(result);
 }
@@ -603,7 +571,8 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
 	} while (true);
 	if (bind_data.options.union_by_name) {
-		lstate.csv_reader->SetNullUnionCols(output);
+		UnionByName<BufferedCSVReader, BufferedCSVReaderOptions>::SetNullUnionCols(output,
+		                                                                           lstate.csv_reader->union_null_cols);
 	}
 	if (bind_data.options.include_file_name) {
 		auto &col = output.data[bind_data.filename_col_idx];

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.6.2-dev1124"
+#define DUCKDB_VERSION "0.6.2-dev1160"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "aa44cebfc5"
+#define DUCKDB_SOURCE_ID "351d01503a"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/union_by_name.hpp ADDED Viewed

@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/union_by_name.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+#include <vector>
+#include <string>
+#include "duckdb/common/types.hpp"
+using std::string;
+using std::vector;
+namespace duckdb {
+template <class READER_TYPE, class OPTION_TYPE>
+class UnionByName {
+public:
+	//! Union all files(readers) by their col names
+	static vector<unique_ptr<READER_TYPE>>
+	UnionCols(ClientContext &context, const vector<string> &files, vector<LogicalType> &union_col_types,
+	          vector<string> &union_col_names, case_insensitive_map_t<idx_t> &union_names_map, OPTION_TYPE options) {
+		idx_t union_names_index = 0;
+		vector<unique_ptr<READER_TYPE>> union_readers;
+		for (idx_t file_idx = 0; file_idx < files.size(); ++file_idx) {
+			const auto file_name = files[file_idx];
+			auto reader = make_unique<READER_TYPE>(context, file_name, options);
+			auto &col_names = reader->names;
+			auto &sql_types = reader->return_types;
+			D_ASSERT(col_names.size() == sql_types.size());
+			for (idx_t col = 0; col < col_names.size(); ++col) {
+				auto union_find = union_names_map.find(col_names[col]);
+				if (union_find != union_names_map.end()) {
+					// given same name , union_col's type must compatible with col's type
+					LogicalType compatible_type;
+					compatible_type = LogicalType::MaxLogicalType(union_col_types[union_find->second], sql_types[col]);
+					union_col_types[union_find->second] = compatible_type;
+				} else {
+					union_names_map[col_names[col]] = union_names_index;
+					union_names_index++;
+					union_col_names.emplace_back(col_names[col]);
+					union_col_types.emplace_back(sql_types[col]);
+				}
+			}
+			union_readers.push_back(move(reader));
+		}
+		return union_readers;
+	}
+	//! Create information for reader's col mapping to union cols
+	static vector<unique_ptr<READER_TYPE>> CreateUnionMap(vector<unique_ptr<READER_TYPE>> union_readers,
+	                                                      vector<LogicalType> &union_col_types,
+	                                                      vector<string> &union_col_names,
+	                                                      case_insensitive_map_t<idx_t> &union_names_map) {
+		for (auto &reader : union_readers) {
+			auto &col_names = reader->names;
+			vector<bool> union_null_cols(union_col_names.size(), true);
+			vector<idx_t> union_idx_map(col_names.size(), 0);
+			for (idx_t col = 0; col < col_names.size(); ++col) {
+				idx_t union_idx = union_names_map[col_names[col]];
+				union_idx_map[col] = union_idx;
+				union_null_cols[union_idx] = false;
+			}
+			reader->union_col_types = union_col_types;
+			reader->union_idx_map = move(union_idx_map);
+			reader->union_null_cols = move(union_null_cols);
+		}
+		return union_readers;
+	}
+	//! Set nulls into the cols that mismtach union names
+	static void SetNullUnionCols(DataChunk &result, const vector<bool> &union_null_cols) {
+		for (idx_t col = 0; col < union_null_cols.size(); ++col) {
+			if (union_null_cols[col]) {
+				result.data[col].SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result.data[col], true);
+			}
+		}
+	}
+};
+} // namespace duckdb

package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp CHANGED Viewed

@@ -44,13 +44,15 @@ public:
 	Allocator &allocator;
 	FileOpener *opener;
 	BufferedCSVReaderOptions options;
-	vector<LogicalType> sql_types;
-	vector<string> col_names;
+	vector<LogicalType> return_types;
+	vector<string> names;
 	//! remap parse_chunk col to insert_chunk col, because when
 	//! union_by_name option on insert_chunk may have more cols
 	vector<idx_t> insert_cols_idx;
-	vector<idx_t> insert_nulls_idx;
+	vector<idx_t> union_idx_map;
+	vector<bool> union_null_cols;
+	vector<LogicalType> union_col_types;
 	idx_t linenr = 0;
 	bool linenr_estimated = false;
@@ -70,10 +72,6 @@ public:
 	ParserMode mode;
-public:
-	//! Fill nulls into the cols that mismtach union names
-	void SetNullUnionCols(DataChunk &insert_chunk);
 protected:
 	//! Initializes the parse_chunk with varchar columns and aligns info with new number of cols
 	void InitParseChunk(idx_t num_cols);
@@ -100,7 +98,7 @@ protected:
 	static string GetLineNumberStr(idx_t linenr, bool linenr_estimated);
 protected:
-	//! Whether or not the current row's columns have overflown sql_types.size()
+	//! Whether or not the current row's columns have overflown return_types.size()
 	bool error_column_overflow = false;
 	//! Number of sniffed columns - only used when auto-detecting
 	vector<idx_t> sniffed_column_counts;

package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp CHANGED Viewed

@@ -57,6 +57,8 @@ public:
 	                  const vector<LogicalType> &requested_types = vector<LogicalType>());
 	BufferedCSVReader(FileSystem &fs, Allocator &allocator, FileOpener *opener, BufferedCSVReaderOptions options,
 	                  const vector<LogicalType> &requested_types = vector<LogicalType>());
+	BufferedCSVReader(ClientContext &context, string filename, BufferedCSVReaderOptions options,
+	                  const vector<LogicalType> &requested_types = vector<LogicalType>());
 	~BufferedCSVReader();
 	unique_ptr<char[]> buffer;

package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp CHANGED Viewed

@@ -19,9 +19,12 @@ public:
 	}
 	virtual ~SegmentBase() {
 	}
 	SegmentBase *Next() {
+#ifndef DUCKDB_R_BUILD
 		return next.load();
+#else
+		return next;
+#endif
 	}
 	//! The start row id of this chunk
@@ -29,7 +32,12 @@ public:
 	//! The amount of entries in this storage chunk
 	atomic<idx_t> count;
 	//! The next segment after this one
+#ifndef DUCKDB_R_BUILD
 	atomic<SegmentBase *> next;
+#else
+	SegmentBase *next;
+#endif
 };
 } // namespace duckdb

package/src/duckdb/src/main/connection.cpp CHANGED Viewed

@@ -224,8 +224,8 @@ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
 	options.auto_detect = true;
 	BufferedCSVReader reader(*context, options);
 	vector<ColumnDefinition> column_list;
-	for (idx_t i = 0; i < reader.sql_types.size(); i++) {
-		column_list.emplace_back(reader.col_names[i], reader.sql_types[i]);
+	for (idx_t i = 0; i < reader.return_types.size(); i++) {
+		column_list.emplace_back(reader.names[i], reader.return_types[i]);
 	}
 	return make_shared<ReadCSVRelation>(context, csv_file, std::move(column_list), true);
 }

package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp CHANGED Viewed

@@ -8,14 +8,14 @@ namespace duckdb {
 BoundLambdaRefExpression::BoundLambdaRefExpression(string alias_p, LogicalType type, ColumnBinding binding,
                                                    idx_t lambda_index, idx_t depth)
-    : Expression(ExpressionType::BOUND_LAMBDA_REF, ExpressionClass::BOUND_LAMBDA_REF, move(type)), binding(binding),
-      lambda_index(lambda_index), depth(depth) {
-	this->alias = move(alias_p);
+    : Expression(ExpressionType::BOUND_LAMBDA_REF, ExpressionClass::BOUND_LAMBDA_REF, std::move(type)),
+      binding(binding), lambda_index(lambda_index), depth(depth) {
+	this->alias = std::move(alias_p);
 }
 BoundLambdaRefExpression::BoundLambdaRefExpression(LogicalType type, ColumnBinding binding, idx_t lambda_index,
                                                    idx_t depth)
-    : BoundLambdaRefExpression(string(), move(type), binding, lambda_index, depth) {
+    : BoundLambdaRefExpression(string(), std::move(type), binding, lambda_index, depth) {
 }
 unique_ptr<Expression> BoundLambdaRefExpression::Copy() {

package/src/duckdb/src/storage/table/update_segment.cpp CHANGED Viewed

@@ -616,7 +616,7 @@ struct UpdateSelectElement {
 template <>
 string_t UpdateSelectElement::Operation(UpdateSegment *segment, string_t element) {
-	return element.IsInlined() ? element : segment->GetStringHeap().AddString(element);
+	return element.IsInlined() ? element : segment->GetStringHeap().AddBlob(element);
 }
 template <class T>
@@ -942,7 +942,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
 		for (idx_t i = 0; i < count; i++) {
 			((StringStatistics &)*stats.statistics).Update(update_data[i]);
 			if (!update_data[i].IsInlined()) {
-				update_data[i] = segment->GetStringHeap().AddString(update_data[i]);
+				update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
 			}
 		}
 		sel.Initialize(nullptr);
@@ -955,7 +955,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
 				sel.set_index(not_null_count++, i);
 				((StringStatistics &)*stats.statistics).Update(update_data[i]);
 				if (!update_data[i].IsInlined()) {
-					update_data[i] = segment->GetStringHeap().AddString(update_data[i]);
+					update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
 				}
 			}
 		}