duckdb 0.7.2-dev1138.0 → 0.7.2-dev1146.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
- package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
- package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
- package/src/duckdb/src/common/types/vector.cpp +2 -6
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +17 -10
- package/src/duckdb/src/function/cast_rules.cpp +9 -4
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
    
        package/package.json
    CHANGED
    
    
| @@ -122,7 +122,8 @@ struct ParquetWriteGlobalState : public GlobalFunctionData { | |
| 122 122 | 
             
            };
         | 
| 123 123 |  | 
| 124 124 | 
             
            struct ParquetWriteLocalState : public LocalFunctionData {
         | 
| 125 | 
            -
            	explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types) | 
| 125 | 
            +
            	explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
         | 
| 126 | 
            +
            	    : buffer(Allocator::Get(context), types) {
         | 
| 126 127 | 
             
            	}
         | 
| 127 128 |  | 
| 128 129 | 
             
            	ColumnDataCollection buffer;
         | 
| @@ -169,11 +169,8 @@ idx_t ColumnDataCollectionSegment::ReadVectorInternal(ChunkManagementState &stat | |
| 169 169 | 
             
            		if (type_size > 0) {
         | 
| 170 170 | 
             
            			memcpy(target_data + current_offset * type_size, base_ptr, current_vdata.count * type_size);
         | 
| 171 171 | 
             
            		}
         | 
| 172 | 
            -
            		// FIXME: use bitwise operations here
         | 
| 173 172 | 
             
            		ValidityMask current_validity(validity_data);
         | 
| 174 | 
            -
            		 | 
| 175 | 
            -
            			target_validity.Set(current_offset + k, current_validity.RowIsValid(k));
         | 
| 176 | 
            -
            		}
         | 
| 173 | 
            +
            		target_validity.SliceInPlace(current_validity, current_offset, 0, current_vdata.count);
         | 
| 177 174 | 
             
            		current_offset += current_vdata.count;
         | 
| 178 175 | 
             
            		next_index = current_vdata.next_data;
         | 
| 179 176 | 
             
            	}
         | 
| @@ -68,24 +68,41 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) { | |
| 68 68 | 
             
            	}
         | 
| 69 69 | 
             
            }
         | 
| 70 70 |  | 
| 71 | 
            -
            void ValidityMask::Slice(const ValidityMask &other, idx_t  | 
| 71 | 
            +
            void ValidityMask::Slice(const ValidityMask &other, idx_t source_offset, idx_t count) {
         | 
| 72 72 | 
             
            	if (other.AllValid()) {
         | 
| 73 73 | 
             
            		validity_mask = nullptr;
         | 
| 74 74 | 
             
            		validity_data.reset();
         | 
| 75 75 | 
             
            		return;
         | 
| 76 76 | 
             
            	}
         | 
| 77 | 
            -
            	if ( | 
| 77 | 
            +
            	if (source_offset == 0) {
         | 
| 78 78 | 
             
            		Initialize(other);
         | 
| 79 79 | 
             
            		return;
         | 
| 80 80 | 
             
            	}
         | 
| 81 | 
            -
            	ValidityMask new_mask( | 
| 81 | 
            +
            	ValidityMask new_mask(count);
         | 
| 82 | 
            +
            	new_mask.SliceInPlace(other, 0, source_offset, count);
         | 
| 83 | 
            +
            	Initialize(new_mask);
         | 
| 84 | 
            +
            }
         | 
| 82 85 |  | 
| 83 | 
            -
             | 
| 86 | 
            +
            bool ValidityMask::IsAligned(idx_t count) {
         | 
| 87 | 
            +
            	return count % BITS_PER_VALUE == 0;
         | 
| 88 | 
            +
            }
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count) {
         | 
| 91 | 
            +
            	if (IsAligned(source_offset) && IsAligned(target_offset)) {
         | 
| 92 | 
            +
            		auto target_validity = GetData();
         | 
| 93 | 
            +
            		auto source_validity = other.GetData();
         | 
| 94 | 
            +
            		auto source_offset_entries = EntryCount(source_offset);
         | 
| 95 | 
            +
            		auto target_offset_entries = EntryCount(target_offset);
         | 
| 96 | 
            +
            		memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
         | 
| 97 | 
            +
            		       sizeof(validity_t) * EntryCount(count));
         | 
| 98 | 
            +
            		return;
         | 
| 99 | 
            +
            	}
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            	// FIXME: use bitwise operations here
         | 
| 84 102 | 
             
            #if 1
         | 
| 85 | 
            -
            	for (idx_t i =  | 
| 86 | 
            -
            		 | 
| 103 | 
            +
            	for (idx_t i = 0; i < count; i++) {
         | 
| 104 | 
            +
            		Set(target_offset + i, other.RowIsValid(source_offset + i));
         | 
| 87 105 | 
             
            	}
         | 
| 88 | 
            -
            	Initialize(new_mask);
         | 
| 89 106 | 
             
            #else
         | 
| 90 107 | 
             
            	// first shift the "whole" units
         | 
| 91 108 | 
             
            	idx_t entire_units = offset / BITS_PER_VALUE;
         | 
| @@ -136,17 +136,13 @@ void Vector::Slice(Vector &other, idx_t offset, idx_t end) { | |
| 136 136 | 
             
            		for (idx_t i = 0; i < entries.size(); i++) {
         | 
| 137 137 | 
             
            			entries[i]->Slice(*other_entries[i], offset, end);
         | 
| 138 138 | 
             
            		}
         | 
| 139 | 
            -
            		 | 
| 140 | 
            -
            			new_vector.validity.Slice(other.validity, offset, end);
         | 
| 141 | 
            -
            		} else {
         | 
| 142 | 
            -
            			new_vector.validity = other.validity;
         | 
| 143 | 
            -
            		}
         | 
| 139 | 
            +
            		new_vector.validity.Slice(other.validity, offset, end - offset);
         | 
| 144 140 | 
             
            		Reference(new_vector);
         | 
| 145 141 | 
             
            	} else {
         | 
| 146 142 | 
             
            		Reference(other);
         | 
| 147 143 | 
             
            		if (offset > 0) {
         | 
| 148 144 | 
             
            			data = data + GetTypeIdSize(internal_type) * offset;
         | 
| 149 | 
            -
            			validity.Slice(other.validity, offset, end);
         | 
| 145 | 
            +
            			validity.Slice(other.validity, offset, end - offset);
         | 
| 150 146 | 
             
            		}
         | 
| 151 147 | 
             
            	}
         | 
| 152 148 | 
             
            }
         | 
| @@ -53,10 +53,13 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p | |
| 53 53 | 
             
            // Sink
         | 
| 54 54 | 
             
            //===--------------------------------------------------------------------===//
         | 
| 55 55 | 
             
            class RadixHTGlobalState : public GlobalSinkState {
         | 
| 56 | 
            +
            	constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
         | 
| 57 | 
            +
             | 
| 56 58 | 
             
            public:
         | 
| 57 59 | 
             
            	explicit RadixHTGlobalState(ClientContext &context)
         | 
| 58 | 
            -
            	    : is_empty(true), multi_scan(true),  | 
| 59 | 
            -
            	      partition_info( | 
| 60 | 
            +
            	    : is_empty(true), multi_scan(true), partitioned(false),
         | 
| 61 | 
            +
            	      partition_info(
         | 
| 62 | 
            +
            	          MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
         | 
| 60 63 | 
             
            	}
         | 
| 61 64 |  | 
| 62 65 | 
             
            	vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
         | 
| @@ -68,8 +71,8 @@ public: | |
| 68 71 | 
             
            	bool multi_scan;
         | 
| 69 72 | 
             
            	//! The lock for updating the global aggregate state
         | 
| 70 73 | 
             
            	mutex lock;
         | 
| 71 | 
            -
            	//!  | 
| 72 | 
            -
            	atomic< | 
| 74 | 
            +
            	//! Whether or not any thread has crossed the partitioning threshold
         | 
| 75 | 
            +
            	atomic<bool> partitioned;
         | 
| 73 76 |  | 
| 74 77 | 
             
            	bool is_finalized = false;
         | 
| 75 78 | 
             
            	bool is_partitioned = false;
         | 
| @@ -79,7 +82,7 @@ public: | |
| 79 82 |  | 
| 80 83 | 
             
            class RadixHTLocalState : public LocalSinkState {
         | 
| 81 84 | 
             
            public:
         | 
| 82 | 
            -
            	explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : is_empty(true) {
         | 
| 85 | 
            +
            	explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
         | 
| 83 86 | 
             
            		// if there are no groups we create a fake group so everything has the same group
         | 
| 84 87 | 
             
            		group_chunk.InitializeEmpty(ht.group_types);
         | 
| 85 88 | 
             
            		if (ht.grouping_set.empty()) {
         | 
| @@ -90,6 +93,8 @@ public: | |
| 90 93 | 
             
            	DataChunk group_chunk;
         | 
| 91 94 | 
             
            	//! The aggregate HT
         | 
| 92 95 | 
             
            	unique_ptr<PartitionableHashTable> ht;
         | 
| 96 | 
            +
            	//! The total number of groups found by this thread
         | 
| 97 | 
            +
            	idx_t total_groups;
         | 
| 93 98 |  | 
| 94 99 | 
             
            	//! Whether or not any tuples were added to the HT
         | 
| 95 100 | 
             
            	bool is_empty;
         | 
| @@ -146,7 +151,7 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState | |
| 146 151 | 
             
            		}
         | 
| 147 152 | 
             
            		D_ASSERT(gstate.finalized_hts.size() == 1);
         | 
| 148 153 | 
             
            		D_ASSERT(gstate.finalized_hts[0]);
         | 
| 149 | 
            -
            		 | 
| 154 | 
            +
            		llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
         | 
| 150 155 | 
             
            		return;
         | 
| 151 156 | 
             
            	}
         | 
| 152 157 |  | 
| @@ -160,9 +165,11 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState | |
| 160 165 | 
             
            		                                        group_types, op.payload_types, op.bindings);
         | 
| 161 166 | 
             
            	}
         | 
| 162 167 |  | 
| 163 | 
            -
            	 | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 168 | 
            +
            	llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
         | 
| 169 | 
            +
            	                                             gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
         | 
| 170 | 
            +
            	if (llstate.total_groups >= radix_limit) {
         | 
| 171 | 
            +
            		gstate.partitioned = true;
         | 
| 172 | 
            +
            	}
         | 
| 166 173 | 
             
            }
         | 
| 167 174 |  | 
| 168 175 | 
             
            void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
         | 
| @@ -183,7 +190,7 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta | |
| 183 190 | 
             
            		return; // no data
         | 
| 184 191 | 
             
            	}
         | 
| 185 192 |  | 
| 186 | 
            -
            	if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate. | 
| 193 | 
            +
            	if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
         | 
| 187 194 | 
             
            		llstate.ht->Partition();
         | 
| 188 195 | 
             
            	}
         | 
| 189 196 |  | 
| @@ -207,6 +207,15 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to) | |
| 207 207 | 
             
            		// if aliases are different, an implicit cast is not possible
         | 
| 208 208 | 
             
            		return -1;
         | 
| 209 209 | 
             
            	}
         | 
| 210 | 
            +
            	if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
         | 
| 211 | 
            +
            		// Lists can be cast if their child types can be cast
         | 
| 212 | 
            +
            		auto child_cost = ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
         | 
| 213 | 
            +
            		if (child_cost >= 100) {
         | 
| 214 | 
            +
            			// subtract one from the cost because we prefer LIST[X] -> LIST[VARCHAR] over LIST[X] -> VARCHAR
         | 
| 215 | 
            +
            			child_cost--;
         | 
| 216 | 
            +
            		}
         | 
| 217 | 
            +
            		return child_cost;
         | 
| 218 | 
            +
            	}
         | 
| 210 219 | 
             
            	if (from.id() == to.id()) {
         | 
| 211 220 | 
             
            		// arguments match: do nothing
         | 
| 212 221 | 
             
            		return 0;
         | 
| @@ -219,10 +228,6 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to) | |
| 219 228 | 
             
            		// everything can be cast to VARCHAR, but this cast has a high cost
         | 
| 220 229 | 
             
            		return TargetTypeCost(to);
         | 
| 221 230 | 
             
            	}
         | 
| 222 | 
            -
            	if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
         | 
| 223 | 
            -
            		// Lists can be cast if their child types can be cast
         | 
| 224 | 
            -
            		return ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
         | 
| 225 | 
            -
            	}
         | 
| 226 231 |  | 
| 227 232 | 
             
            	if (from.id() == LogicalTypeId::UNION && to.id() == LogicalTypeId::UNION) {
         | 
| 228 233 | 
             
            		// Unions can be cast if the source tags are a subset of the target tags
         | 
| @@ -1,8 +1,8 @@ | |
| 1 1 | 
             
            #ifndef DUCKDB_VERSION
         | 
| 2 | 
            -
            #define DUCKDB_VERSION "0.7.2- | 
| 2 | 
            +
            #define DUCKDB_VERSION "0.7.2-dev1146"
         | 
| 3 3 | 
             
            #endif
         | 
| 4 4 | 
             
            #ifndef DUCKDB_SOURCE_ID
         | 
| 5 | 
            -
            #define DUCKDB_SOURCE_ID " | 
| 5 | 
            +
            #define DUCKDB_SOURCE_ID "b8cf6a98e2"
         | 
| 6 6 | 
             
            #endif
         | 
| 7 7 | 
             
            #include "duckdb/function/table/system_functions.hpp"
         | 
| 8 8 | 
             
            #include "duckdb/main/database.hpp"
         | 
| @@ -323,9 +323,12 @@ public: | |
| 323 323 | 
             
            public:
         | 
| 324 324 | 
             
            	DUCKDB_API void Resize(idx_t old_size, idx_t new_size);
         | 
| 325 325 |  | 
| 326 | 
            -
            	DUCKDB_API void  | 
| 326 | 
            +
            	DUCKDB_API void SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count);
         | 
| 327 | 
            +
            	DUCKDB_API void Slice(const ValidityMask &other, idx_t source_offset, idx_t count);
         | 
| 327 328 | 
             
            	DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
         | 
| 328 329 | 
             
            	DUCKDB_API string ToString(idx_t count) const;
         | 
| 330 | 
            +
             | 
| 331 | 
            +
            	DUCKDB_API static bool IsAligned(idx_t count);
         | 
| 329 332 | 
             
            };
         | 
| 330 333 |  | 
| 331 334 | 
             
            } // namespace duckdb
         |