npm - duckdb - Versions diffs - 0.8.2-dev4314.0 → 0.8.2-dev4424.0 - Mend

duckdb 0.8.2-dev4314.0 → 0.8.2-dev4424.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev4314.0",
+  "version": "0.8.2-dev4424.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/parquet/parquet_extension.cpp CHANGED Viewed

@@ -118,7 +118,7 @@ struct ParquetWriteBindData : public TableFunctionData {
 	vector<LogicalType> sql_types;
 	vector<string> column_names;
 	duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
-	idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
+	idx_t row_group_size = Storage::ROW_GROUP_SIZE;
 	//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
 	static constexpr const idx_t BYTES_PER_ROW = 1024;

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -551,6 +551,8 @@ BindingMode EnumUtil::FromString<BindingMode>(const char *value) {
 template<>
 const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
 	switch(value) {
+	case BitpackingMode::INVALID:
+		return "INVALID";
 	case BitpackingMode::AUTO:
 		return "AUTO";
 	case BitpackingMode::CONSTANT:
@@ -568,6 +570,9 @@ const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
 template<>
 BitpackingMode EnumUtil::FromString<BitpackingMode>(const char *value) {
+	if (StringUtil::Equals(value, "INVALID")) {
+		return BitpackingMode::INVALID;
+	}
 	if (StringUtil::Equals(value, "AUTO")) {
 		return BitpackingMode::AUTO;
 	}

package/src/duckdb/src/common/file_buffer.cpp CHANGED Viewed

@@ -5,7 +5,7 @@
 #include "duckdb/common/exception.hpp"
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/helper.hpp"
+#include "duckdb/storage/storage_info.hpp"
 #include <cstring>
 namespace duckdb {

package/src/duckdb/src/common/types/date.cpp CHANGED Viewed

@@ -492,7 +492,7 @@ int32_t Date::ExtractDayOfTheYear(date_t date) {
 int64_t Date::ExtractJulianDay(date_t date) {
 	// Julian Day 0 is (-4713, 11, 24) in the proleptic Gregorian calendar.
-	static const auto JULIAN_EPOCH = -2440588;
+	static const int64_t JULIAN_EPOCH = -2440588;
 	return date.days - JULIAN_EPOCH;
 }

package/src/duckdb/src/common/types/validity_mask.cpp CHANGED Viewed

@@ -1,4 +1,7 @@
 #include "duckdb/common/types/validity_mask.hpp"
+#include "duckdb/common/limits.hpp"
+#include "duckdb/common/serializer/write_stream.hpp"
+#include "duckdb/common/serializer/read_stream.hpp"
 namespace duckdb {
@@ -173,4 +176,57 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
 #endif
 }
+enum class ValiditySerialization : uint8_t { BITMASK = 0, VALID_VALUES = 1, INVALID_VALUES = 2 };
+void ValidityMask::Write(WriteStream &writer, idx_t count) {
+	auto valid_values = CountValid(count);
+	auto invalid_values = count - valid_values;
+	auto bitmask_bytes = ValidityMask::ValidityMaskSize(count);
+	auto need_u32 = count >= NumericLimits<uint16_t>::Maximum();
+	auto bytes_per_value = need_u32 ? sizeof(uint32_t) : sizeof(uint16_t);
+	auto valid_value_size = bytes_per_value * valid_values + sizeof(uint32_t);
+	auto invalid_value_size = bytes_per_value * invalid_values + sizeof(uint32_t);
+	if (valid_value_size < bitmask_bytes || invalid_value_size < bitmask_bytes) {
+		auto serialize_valid = valid_value_size < invalid_value_size;
+		// serialize (in)valid value indexes as [COUNT][V0][V1][...][VN]
+		auto flag = serialize_valid ? ValiditySerialization::VALID_VALUES : ValiditySerialization::INVALID_VALUES;
+		writer.Write(flag);
+		writer.Write<uint32_t>(MinValue<uint32_t>(valid_values, invalid_values));
+		for (idx_t i = 0; i < count; i++) {
+			if (RowIsValid(i) == serialize_valid) {
+				if (need_u32) {
+					writer.Write<uint32_t>(i);
+				} else {
+					writer.Write<uint16_t>(i);
+				}
+			}
+		}
+	} else {
+		// serialize the entire bitmask
+		writer.Write(ValiditySerialization::BITMASK);
+		writer.WriteData(const_data_ptr_cast(GetData()), bitmask_bytes);
+	}
+}
+void ValidityMask::Read(ReadStream &reader, idx_t count) {
+	Initialize(count);
+	// deserialize the storage type
+	auto flag = reader.Read<ValiditySerialization>();
+	if (flag == ValiditySerialization::BITMASK) {
+		// deserialize the bitmask
+		reader.ReadData(data_ptr_cast(GetData()), ValidityMask::ValidityMaskSize(count));
+		return;
+	}
+	auto is_u32 = count >= NumericLimits<uint16_t>::Maximum();
+	auto is_valid = flag == ValiditySerialization::VALID_VALUES;
+	auto serialize_count = reader.Read<uint32_t>();
+	if (is_valid) {
+		SetAllInvalid(count);
+	}
+	for (idx_t i = 0; i < serialize_count; i++) {
+		idx_t index = is_u32 ? reader.Read<uint32_t>() : reader.Read<uint16_t>();
+		Set(index, is_valid);
+	}
+}
 } // namespace duckdb

package/src/duckdb/src/execution/index/fixed_size_buffer.cpp CHANGED Viewed

@@ -148,9 +148,6 @@ void FixedSizeBuffer::Pin() {
 uint32_t FixedSizeBuffer::GetOffset(const idx_t bitmask_count) {
-	// this function calls Get() on the buffer, so the buffer must already be in memory
-	D_ASSERT(InMemory());
 	// get the bitmask data
 	auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
 	ValidityMask mask(bitmask_ptr);
@@ -200,7 +197,7 @@ uint32_t FixedSizeBuffer::GetOffset(const idx_t bitmask_count) {
 uint32_t FixedSizeBuffer::GetMaxOffset(const idx_t available_segments) {
-	// this function calls Get() on the buffer, so the buffer must already be in memory
+	// this function calls Get() on the buffer
 	D_ASSERT(InMemory());
 	// finds the maximum zero bit in a bitmask, and adds one to it,
@@ -259,17 +256,13 @@ uint32_t FixedSizeBuffer::GetMaxOffset(const idx_t available_segments) {
 	}
 	// there are no allocations in this buffer
-	// FIXME: put this line back in and then fix the missing vacuum bug in
-	// FIXME: test_index_large_aborted_append.test with force_restart
-	// FIXME: test if we still have non-dirty buffer to serialize after fixing this
-	//	throw InternalException("tried to serialize empty buffer");
-	return 0;
+	throw InternalException("tried to serialize empty buffer");
 }
 void FixedSizeBuffer::SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size,
                                               const idx_t offset, const idx_t bitmask_offset) {
-	// this function calls Get() on the buffer, so the buffer must already be in memory
+	// this function calls Get() on the buffer
 	D_ASSERT(InMemory());
 	auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());

package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp CHANGED Viewed

@@ -89,17 +89,19 @@ bool ParallelCSVReader::SetPosition() {
 						position_buffer++;
 					}
 					if (position_buffer > end_buffer) {
+						VerifyLineLength(position_buffer, buffer->batch_index);
 						return false;
 					}
 					SkipEmptyLines();
 					if (verification_positions.beginning_of_first_line == 0) {
 						verification_positions.beginning_of_first_line = position_buffer;
 					}
+					VerifyLineLength(position_buffer, buffer->batch_index);
 					verification_positions.end_of_last_line = position_buffer;
 					return true;
 				}
 			}
+			VerifyLineLength(position_buffer, buffer->batch_index);
 			return false;
 		}
 		SkipEmptyLines();
@@ -143,12 +145,13 @@ bool ParallelCSVReader::SetPosition() {
 			break;
 		}
-		if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
+		auto pos_check = position_buffer == 0 ? position_buffer : position_buffer - 1;
+		if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[pos_check])) {
 			break;
 		}
 		if (position_buffer > end_buffer && options.dialect_options.new_line == NewLineIdentifier::CARRY_ON &&
-		    (*buffer)[position_buffer - 1] == '\n') {
+		    (*buffer)[pos_check] == '\n') {
 			break;
 		}
 		idx_t position_set = position_buffer;

package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp CHANGED Viewed

@@ -194,7 +194,7 @@ public:
 		}
 		auto new_count = current_collection->GetTotalRows();
 		auto batch_type =
-		    new_count < RowGroup::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
+		    new_count < Storage::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
 		if (batch_type == RowGroupBatchType::FLUSHED && writer) {
 			writer->WriteLastRowGroup(*current_collection);
 		}

package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp CHANGED Viewed

@@ -482,7 +482,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 	lock_guard<mutex> lock(gstate.lock);
 	gstate.insert_count += append_count;
-	if (append_count < RowGroup::ROW_GROUP_SIZE) {
+	if (append_count < Storage::ROW_GROUP_SIZE) {
 		// we have few rows - append to the local storage directly
 		auto &table = gstate.table;
 		auto &storage = table.GetStorage();

package/src/duckdb/src/function/table/arrow_conversion.cpp CHANGED Viewed

@@ -837,7 +837,15 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
 			throw InvalidInputException("arrow_scan: array length mismatch");
 		}
 		// Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
-		output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
+		if (scan_state.arrow_owned_data.find(idx) == scan_state.arrow_owned_data.end()) {
+			auto arrow_data = make_shared<ArrowArrayWrapper>();
+			arrow_data->arrow_array = scan_state.chunk->arrow_array;
+			scan_state.chunk->arrow_array.release = nullptr;
+			scan_state.arrow_owned_data[idx] = arrow_data;
+		}
+		output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.arrow_owned_data[idx]));
 		D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
 		auto &arrow_type = *arrow_convert_data.at(col_idx);
 		if (array.dictionary) {

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -300,7 +300,7 @@ public:
 	                       const CSVReaderOptions &options, idx_t system_threads_p, const vector<string> &files_path_p,
 	                       bool force_parallelism_p, vector<column_t> column_ids_p)
 	    : buffer_manager(std::move(buffer_manager_p)), system_threads(system_threads_p),
-	      buffer_size(options.buffer_size), force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
+	      force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
 	      line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
 		current_file_path = files_path_p[0];
 		CSVFileHandle *file_handle_ptr;
@@ -316,16 +316,6 @@ public:
 		first_file_size = file_size;
 		on_disk_file = file_handle_ptr->OnDiskFile();
 		bytes_read = 0;
-		if (buffer_size < file_size || file_size == 0) {
-			bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
-		} else {
-			bytes_per_local_state = file_size / MaxThreads();
-		}
-		if (bytes_per_local_state == 0) {
-			// In practice, I think this won't happen, it only happens because we are mocking up test scenarios
-			// this boy needs to be at least one.
-			bytes_per_local_state = 1;
-		}
 		running_threads = MaxThreads();
 		// Initialize all the book-keeping variables
@@ -368,8 +358,6 @@ public:
 	void UpdateLinesRead(CSVBufferRead &buffer_read, idx_t file_idx);
-	void IncrementThread();
 	void DecrementThread();
 	bool Finished();
@@ -402,16 +390,12 @@ private:
 	mutex main_mutex;
 	//! Byte set from for last thread
 	idx_t next_byte = 0;
-	//! How many bytes we should execute per local state
-	idx_t bytes_per_local_state;
 	//! Size of first file
 	idx_t first_file_size = 0;
 	//! Whether or not this is an on-disk file
 	bool on_disk_file = true;
 	//! Basically max number of threads in DuckDB
 	idx_t system_threads;
-	//! Size of the buffers
-	idx_t buffer_size;
 	//! Current batch index
 	idx_t batch_index = 0;
 	idx_t local_batch_index = 0;
@@ -454,11 +438,6 @@ idx_t ParallelCSVGlobalState::MaxThreads() const {
 	return system_threads;
 }
-void ParallelCSVGlobalState::IncrementThread() {
-	lock_guard<mutex> parallel_lock(main_mutex);
-	running_threads++;
-}
 void ParallelCSVGlobalState::DecrementThread() {
 	lock_guard<mutex> parallel_lock(main_mutex);
 	D_ASSERT(running_threads > 0);
@@ -572,6 +551,7 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
 	}
 	// set up the current buffer
 	line_info.current_batches[file_index - 1].insert(local_batch_index);
+	idx_t bytes_per_local_state = current_buffer->actual_size / MaxThreads() + 1;
 	auto result = make_uniq<CSVBufferRead>(
 	    buffer_manager->GetBuffer(cur_buffer_idx), buffer_manager->GetBuffer(cur_buffer_idx + 1), next_byte,
 	    next_byte + bytes_per_local_state, batch_index++, local_batch_index++, &line_info);
@@ -1135,6 +1115,9 @@ unique_ptr<TableRef> ReadCSVReplacement(ClientContext &context, const string &ta
 	if (StringUtil::EndsWith(lower_name, ".gz")) {
 		lower_name = lower_name.substr(0, lower_name.size() - 3);
 	} else if (StringUtil::EndsWith(lower_name, ".zst")) {
+		if (!Catalog::TryAutoLoad(context, "parquet")) {
+			throw MissingExtensionException("parquet extension is required for reading zst compressed file");
+		}
 		lower_name = lower_name.substr(0, lower_name.size() - 4);
 	}
 	if (!StringUtil::EndsWith(lower_name, ".csv") && !StringUtil::Contains(lower_name, ".csv?") &&

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev4314"
+#define DUCKDB_VERSION "0.8.2-dev4424"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "84a109bbee"
+#define DUCKDB_SOURCE_ID "b78b24ad26"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/constants.hpp CHANGED Viewed

@@ -58,21 +58,6 @@ struct DConstants {
 	static constexpr const idx_t INVALID_INDEX = idx_t(-1);
 };
-struct Storage {
-	//! The size of a hard disk sector, only really needed for Direct IO
-	constexpr static int SECTOR_SIZE = 4096;
-	//! Block header size for blocks written to the storage
-	constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
-	// Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
-	// default to 256KB. (1 << 18)
-	constexpr static int BLOCK_ALLOC_SIZE = 262144;
-	//! The actual memory space that is available within the blocks
-	constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
-	//! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
-	//! to the page size, which is 4KB. (1 << 12)
-	constexpr static int FILE_HEADER_SIZE = 4096;
-};
 struct LogicalIndex {
 	explicit LogicalIndex(idx_t index) : index(index) {
 	}

package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp CHANGED Viewed

@@ -1,7 +1,7 @@
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
-// duckdb/common/serializer/buffer_stream.hpp
+// duckdb/common/serializer/memory_stream.hpp
 //
 //
 //===----------------------------------------------------------------------===//

package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp CHANGED Viewed

@@ -332,6 +332,9 @@ public:
 	DUCKDB_API string ToString(idx_t count) const;
 	DUCKDB_API static bool IsAligned(idx_t count);
+	void Write(WriteStream &writer, idx_t count);
+	void Read(ReadStream &reader, idx_t count);
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/function/table/arrow.hpp CHANGED Viewed

@@ -67,6 +67,9 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
 	unique_ptr<ArrowArrayStreamWrapper> stream;
 	shared_ptr<ArrowArrayWrapper> chunk;
+	// This vector hold the Arrow Vectors owned by DuckDB to allow for zero-copy
+	// Note that only DuckDB can release these vectors
+	unordered_map<idx_t, shared_ptr<ArrowArrayWrapper>> arrow_owned_data;
 	idx_t chunk_offset = 0;
 	idx_t batch_index = 0;
 	vector<column_t> column_ids;

package/src/duckdb/src/include/duckdb/main/query_result.hpp CHANGED Viewed

@@ -40,7 +40,7 @@ public:
 	vector<string> names;
 public:
-	DUCKDB_API void ThrowError(const string &prepended_message = "") const;
+	[[noreturn]] DUCKDB_API void ThrowError(const string &prepended_message = "") const;
 	DUCKDB_API void SetError(PreservedError error);
 	DUCKDB_API bool HasError() const;
 	DUCKDB_API const ExceptionType &GetErrorType() const;

package/src/duckdb/src/include/duckdb/storage/block.hpp CHANGED Viewed

@@ -52,11 +52,11 @@ struct MetaBlockPointer {
 	idx_t block_pointer;
 	uint32_t offset;
-	bool IsValid() {
+	bool IsValid() const {
 		return block_pointer != DConstants::INVALID_INDEX;
 	}
-	block_id_t GetBlockId();
-	uint32_t GetBlockIndex();
+	block_id_t GetBlockId() const;
+	uint32_t GetBlockIndex() const;
 	void Serialize(Serializer &serializer) const;
 	static MetaBlockPointer Deserialize(Deserializer &source);

package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp CHANGED Viewed

@@ -12,14 +12,7 @@
 namespace duckdb {
-enum class BitpackingMode : uint8_t {
-	AUTO,
-	CONSTANT,
-	CONSTANT_DELTA,
-	DELTA_FOR,
-	FOR
-};
+enum class BitpackingMode : uint8_t { INVALID, AUTO, CONSTANT, CONSTANT_DELTA, DELTA_FOR, FOR };
 BitpackingMode BitpackingModeFromString(const string &str);
 string BitpackingModeToString(const BitpackingMode &mode);

package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp CHANGED Viewed

@@ -40,8 +40,8 @@ struct RowGroupPointer {
 	uint64_t tuple_count;
 	//! The data pointers of the column segments stored in the row group
 	vector<MetaBlockPointer> data_pointers;
-	//! The versions information of the row group (if any)
-	shared_ptr<VersionNode> versions;
+	//! Data pointers to the delete information of the row group (if any)
+	vector<MetaBlockPointer> deletes_pointers;
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp CHANGED Viewed

@@ -64,6 +64,7 @@ public:
 	void Flush();
 	void MarkBlocksAsModified();
+	void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
 	idx_t BlockCount();
@@ -82,6 +83,7 @@ protected:
 	void AddBlock(MetadataBlock new_block, bool if_exists = false);
 	void AddAndRegisterBlock(MetadataBlock block);
+	void ConvertToTransient(MetadataBlock &block);
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp CHANGED Viewed

@@ -18,6 +18,7 @@ enum class BlockReaderType { EXISTING_BLOCKS, REGISTER_BLOCKS };
 class MetadataReader : public ReadStream {
 public:
 	MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
+	               optional_ptr<vector<MetaBlockPointer>> read_pointers = nullptr,
 	               BlockReaderType type = BlockReaderType::EXISTING_BLOCKS);
 	MetadataReader(MetadataManager &manager, BlockPointer pointer);
 	~MetadataReader() override;
@@ -46,6 +47,7 @@ private:
 	MetadataHandle block;
 	MetadataPointer next_pointer;
 	bool has_next_block;
+	optional_ptr<vector<MetaBlockPointer>> read_pointers;
 	idx_t index;
 	idx_t offset;
 	idx_t next_offset;

package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp CHANGED Viewed

@@ -15,10 +15,10 @@ namespace duckdb {
 class MetadataWriter : public WriteStream {
 public:
+	explicit MetadataWriter(MetadataManager &manager,
+	                        optional_ptr<vector<MetaBlockPointer>> written_pointers = nullptr);
 	MetadataWriter(const MetadataWriter &) = delete;
 	MetadataWriter &operator=(const MetadataWriter &) = delete;
-	explicit MetadataWriter(MetadataManager &manager);
 	~MetadataWriter() override;
 public:
@@ -27,6 +27,9 @@ public:
 	BlockPointer GetBlockPointer();
 	MetaBlockPointer GetMetaBlockPointer();
+	MetadataManager &GetManager() {
+		return manager;
+	}
 protected:
 	virtual MetadataHandle NextHandle();
@@ -41,6 +44,7 @@ private:
 	MetadataManager &manager;
 	MetadataHandle block;
 	MetadataPointer current_pointer;
+	optional_ptr<vector<MetaBlockPointer>> written_pointers;
 	idx_t capacity;
 	idx_t offset;
 };

package/src/duckdb/src/include/duckdb/storage/storage_info.hpp CHANGED Viewed

@@ -23,6 +23,25 @@ struct FileHandle;
 #error Row group size should be cleanly divisible by vector size
 #endif
+struct Storage {
+	//! The size of a hard disk sector, only really needed for Direct IO
+	constexpr static int SECTOR_SIZE = 4096;
+	//! Block header size for blocks written to the storage
+	constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
+	// Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
+	// default to 256KB. (1 << 18)
+	constexpr static int BLOCK_ALLOC_SIZE = 262144;
+	//! The actual memory space that is available within the blocks
+	constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
+	//! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
+	//! to the page size, which is 4KB. (1 << 12)
+	constexpr static int FILE_HEADER_SIZE = 4096;
+	//! The number of rows per row group (must be a multiple of the vector size)
+	constexpr static const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
+	//! The number of vectors per row group
+	constexpr static const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
+};
 //! The version number of the database storage format
 extern const uint64_t VERSION_NUMBER;

package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp CHANGED Viewed

@@ -46,8 +46,10 @@ public:
 	virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
 	virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
-	virtual void Serialize(Serializer &serializer) const = 0;
-	static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
+	virtual bool HasDeletes() const = 0;
+	virtual void Write(WriteStream &writer) const;
+	static unique_ptr<ChunkInfo> Read(ReadStream &reader);
 public:
 	template <class TARGET>
@@ -74,8 +76,8 @@ public:
 public:
 	explicit ChunkConstantInfo(idx_t start);
-	atomic<transaction_t> insert_id;
-	atomic<transaction_t> delete_id;
+	transaction_t insert_id;
+	transaction_t delete_id;
 public:
 	idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
@@ -85,8 +87,10 @@ public:
 	void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
 	idx_t GetCommittedDeletedCount(idx_t max_count) override;
-	void Serialize(Serializer &serializer) const override;
-	static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
+	bool HasDeletes() const override;
+	void Write(WriteStream &writer) const override;
+	static unique_ptr<ChunkInfo> Read(ReadStream &reader);
 private:
 	template <class OP>
@@ -102,13 +106,13 @@ public:
 	explicit ChunkVectorInfo(idx_t start);
 	//! The transaction ids of the transactions that inserted the tuples (if any)
-	atomic<transaction_t> inserted[STANDARD_VECTOR_SIZE];
-	atomic<transaction_t> insert_id;
-	atomic<bool> same_inserted_id;
+	transaction_t inserted[STANDARD_VECTOR_SIZE];
+	transaction_t insert_id;
+	bool same_inserted_id;
 	//! The transaction ids of the transactions that deleted the tuples (if any)
-	atomic<transaction_t> deleted[STANDARD_VECTOR_SIZE];
-	atomic<bool> any_deleted;
+	transaction_t deleted[STANDARD_VECTOR_SIZE];
+	bool any_deleted;
 public:
 	idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
@@ -130,8 +134,10 @@ public:
 	idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count);
 	void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count);
-	void Serialize(Serializer &serializer) const override;
-	static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
+	bool HasDeletes() const override;
+	void Write(WriteStream &writer) const override;
+	static unique_ptr<ChunkInfo> Read(ReadStream &reader);
 private:
 	template <class OP>

package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp CHANGED Viewed

@@ -151,7 +151,7 @@ protected:
 	void AppendTransientSegment(SegmentLock &l, idx_t start_row);
 	//! Scans a base vector from the column
-	idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining);
+	idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates);
 	//! Scans a vector from the column merged with any potential updates
 	//! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found
 	template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>