npm - duckdb - Versions diffs - 0.3.4-dev9.0 → 0.3.5-dev2.0 - Mend

duckdb 0.3.4-dev9.0 → 0.3.5-dev2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +55 -63
package/src/duckdb.hpp +26 -6
package/src/parquet-amalgamation.cpp +29837 -29837

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.3.4-dev9.0",
+  "version": "0.3.5-dev2.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -60698,6 +60698,7 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa
 		const idx_t count = ie_lstate.SelectOuterRows(ie_lstate.right_matches);
 		if (!count) {
 			ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate);
+			continue;
 		}
 		SliceSortedPayload(result, ie_sink.tables[1]->global_sort_state, ie_lstate.right_base, ie_lstate.true_sel,
@@ -68361,12 +68362,12 @@ void PartitionableHashTable::Partition() {
 	D_ASSERT(radix_partitioned_hts.size() == 0);
 	D_ASSERT(partition_info.n_partitions > 1);
-	vector<GroupedAggregateHashTable *> partition_hts;
+	vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
 	for (auto &unpartitioned_ht : unpartitioned_hts) {
 		for (idx_t r = 0; r < partition_info.n_partitions; r++) {
 			radix_partitioned_hts[r].push_back(make_unique<GroupedAggregateHashTable>(
 			    buffer_manager, group_types, payload_types, bindings, HtEntryType::HT_WIDTH_32));
-			partition_hts.push_back(radix_partitioned_hts[r].back().get());
+			partition_hts[r] = radix_partitioned_hts[r].back().get();
 		}
 		unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
 		unpartitioned_ht.reset();
@@ -68845,6 +68846,8 @@ template <>
 bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result);
 template <>
 bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result);
+template <>
+bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result);
 struct SubtractOperatorOverflowCheck {
 	template <class TA, class TB, class TR>
@@ -80015,6 +80018,9 @@ void HistogramFun::RegisterFunction(BuiltinFunctions &set) {
 	fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_S));
 	fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_MS));
 	fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_NS));
+	fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME));
+	fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME_TZ));
+	fun.AddFunction(GetHistogramFunction<int32_t>(LogicalType::DATE));
 	set.AddFunction(fun);
 }
@@ -87821,6 +87827,10 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
 	VectorData value_data;
 	value_vector.Orrify(count, value_data);
+	// not required for a comparison of nested types
+	auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
+	auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
 	for (idx_t i = 0; i < count; i++) {
 		auto list_index = list_data.sel->get_index(i);
 		auto value_index = value_data.sel->get_index(i);
@@ -87831,23 +87841,18 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
 		}
 		const auto &list_entry = list_entries[list_index];
-		auto source_idx = child_data.sel->get_index(list_entry.offset);
-		// not required for a comparison of nested types
-		auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
-		auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
-		result_entries[list_index] = OP::Initialize();
+		result_entries[i] = OP::Initialize();
 		for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
-			auto child_value_idx = source_idx + child_idx;
+			auto child_value_idx = child_data.sel->get_index(list_entry.offset + child_idx);
 			if (!child_data.validity.RowIsValid(child_value_idx)) {
 				continue;
 			}
 			if (!is_nested) {
 				if (ValueEqualsOrNot<CHILD_TYPE>(child_value[child_value_idx], values[value_index])) {
-					result_entries[list_index] = OP::UpdateResultEntries(child_idx);
+					result_entries[i] = OP::UpdateResultEntries(child_idx);
 					break; // Found value in list, no need to look further
 				}
 			} else {
@@ -87855,7 +87860,7 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
 				// to more efficiently compare nested types
 				if (ValueEqualsOrNot<Value>(child_vector.GetValue(child_value_idx),
 				                            value_vector.GetValue(value_index))) {
-					result_entries[list_index] = OP::UpdateResultEntries(child_idx);
+					result_entries[i] = OP::UpdateResultEntries(child_idx);
 					break; // Found value in list, no need to look further
 				}
 			}
@@ -88274,10 +88279,7 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
 			continue;
 		}
-		auto source_idx = child_data.sel->get_index(list_entry.offset);
-		idx_t child_idx = 0;
-		while (child_idx < list_entry.length) {
+		for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
 			// states vector is full, update
 			if (states_idx == STANDARD_VECTOR_SIZE) {
@@ -88290,10 +88292,10 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
 				states_idx = 0;
 			}
-			sel_vector.set_index(states_idx, source_idx + child_idx);
+			auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx);
+			sel_vector.set_index(states_idx, source_idx);
 			states_update[states_idx] = state_ptr;
 			states_idx++;
-			child_idx++;
 		}
 	}
@@ -88330,7 +88332,7 @@ static unique_ptr<FunctionData> ListAggregateBind(ClientContext &context, Scalar
 	// get the function name
 	Value function_value = ExpressionExecutor::EvaluateScalar(*arguments[1]);
-	auto function_name = StringValue::Get(function_value);
+	auto function_name = function_value.ToString();
 	vector<LogicalType> types;
 	types.push_back(list_child_type);
@@ -92442,6 +92444,12 @@ bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result
 	return true;
 }
+template <>
+bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result) {
+	result = left;
+	return Hugeint::SubtractInPlace(result, right);
+}
 //===--------------------------------------------------------------------===//
 // subtract decimal with overflow check
 //===--------------------------------------------------------------------===//
@@ -129644,28 +129652,25 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundAggreg
 namespace duckdb {
-unique_ptr<Expression> CastHugeintToSmallestType(unique_ptr<Expression> expr, NumericStatistics &num_stats) {
-	// Compute range
-	if (num_stats.min.IsNull() || num_stats.max.IsNull()) {
-		return expr;
-	}
-	auto min_val = num_stats.min.GetValue<hugeint_t>();
-	auto max_val = num_stats.max.GetValue<hugeint_t>();
-	if (max_val < min_val) {
-		return expr;
-	}
+template <class T>
+bool GetCastType(T signed_range, LogicalType &cast_type) {
+	auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
-	// Prevent overflow
-	if (min_val < NumericLimits<int64_t>().Minimum() && max_val > NumericLimits<int64_t>().Maximum()) {
-		return expr;
+	// Check if this range fits in a smaller type
+	if (range < NumericLimits<uint8_t>::Maximum()) {
+		cast_type = LogicalType::UTINYINT;
+	} else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
+		cast_type = LogicalType::USMALLINT;
+	} else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
+		cast_type = LogicalType::UINTEGER;
+	} else {
+		return false;
 	}
+	return true;
+}
-	// Compute range
-	auto range = max_val - min_val;
-	// Check if this range fits in a smaller type
-	LogicalType cast_type;
+template <>
+bool GetCastType(hugeint_t range, LogicalType &cast_type) {
 	if (range < NumericLimits<uint8_t>().Maximum()) {
 		cast_type = LogicalType::UTINYINT;
 	} else if (range < NumericLimits<uint16_t>().Maximum()) {
@@ -129673,22 +129678,11 @@ unique_ptr<Expression> CastHugeintToSmallestType(unique_ptr<Expression> expr, Nu
 	} else if (range < NumericLimits<uint32_t>().Maximum()) {
 		cast_type = LogicalType::UINTEGER;
 	} else if (range < NumericLimits<uint64_t>().Maximum()) {
-		cast_type = LogicalTypeId::UBIGINT;
+		cast_type = LogicalType::UBIGINT;
 	} else {
-		return expr;
+		return false;
 	}
-	// Create expression to map to a smaller range
-	auto input_type = expr->return_type;
-	auto minimum_expr = make_unique<BoundConstantExpression>(Value::CreateValue(min_val));
-	vector<unique_ptr<Expression>> arguments;
-	arguments.push_back(move(expr));
-	arguments.push_back(move(minimum_expr));
-	auto minus_expr = make_unique<BoundFunctionExpression>(input_type, SubtractFun::GetFunction(input_type, input_type),
-	                                                       move(arguments), nullptr, true);
-	// Cast to smaller type
-	return make_unique<BoundCastExpression>(move(minus_expr), cast_type);
+	return true;
 }
 template <class T>
@@ -129706,21 +129700,14 @@ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
 	// Compute range, cast to unsigned to prevent comparing signed with unsigned
 	T signed_range;
-	if (!TrySubtractOperator::Operation(signed_min_val, signed_max_val, signed_range)) {
+	if (!TrySubtractOperator::Operation(signed_max_val, signed_min_val, signed_range)) {
 		// overflow in subtraction: cannot do any simplification
 		return expr;
 	}
-	auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
 	// Check if this range fits in a smaller type
 	LogicalType cast_type;
-	if (range < NumericLimits<uint8_t>::Maximum()) {
-		cast_type = LogicalType::UTINYINT;
-	} else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
-		cast_type = LogicalType::USMALLINT;
-	} else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
-		cast_type = LogicalType::UINTEGER;
-	} else {
+	if (!GetCastType(signed_range, cast_type)) {
 		return expr;
 	}
@@ -129756,7 +129743,7 @@ unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, NumericSt
 	case PhysicalType::INT64:
 		return TemplatedCastToSmallestType<int64_t>(move(expr), num_stats);
 	case PhysicalType::INT128:
-		return CastHugeintToSmallestType(move(expr), num_stats);
+		return TemplatedCastToSmallestType<hugeint_t>(move(expr), num_stats);
 	default:
 		throw NotImplementedException("Unknown integer type!");
 	}
@@ -130724,6 +130711,8 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
 				if (join.conditions.size() > 1) {
 					// there are multiple conditions: erase this condition
 					join.conditions.erase(join.conditions.begin() + i);
+					// remove the corresponding statistics
+					join.join_stats.clear();
 					i--;
 					continue;
 				} else {
@@ -155786,8 +155775,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) {
 	auto copy = make_unique<LogicalCopyToFile>(copy_function->function, move(function_data));
 	copy->file_path = stmt.info->file_path;
 	copy->use_tmp_file = use_tmp_file;
-	LocalFileSystem fs;
-	copy->is_file_and_exists = fs.FileExists(copy->file_path);
+	copy->is_file_and_exists = config.file_system->FileExists(copy->file_path);
 	copy->AddChild(move(select_node.plan));
@@ -170302,6 +170290,10 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
 	D_ASSERT(removed_column < column_definitions.size());
 	column_definitions.erase(column_definitions.begin() + removed_column);
+	for (idx_t i = 0; i < column_definitions.size(); i++) {
+		column_definitions[i].oid = i;
+	}
 	// alter the row_groups and remove the column from each of them
 	this->row_groups = make_shared<SegmentTree>();
 	auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment();

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "5079e8e7f"
-#define DUCKDB_VERSION "v0.3.4-dev9"
+#define DUCKDB_SOURCE_ID "0c68c88e5"
+#define DUCKDB_VERSION "v0.3.5-dev2"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -2473,6 +2473,13 @@ public:
 		entry_idx = row_idx / BITS_PER_VALUE;
 		idx_in_entry = row_idx % BITS_PER_VALUE;
 	}
+	//! Get an entry that has first-n bits set as valid and rest set as invalid
+	static inline V EntryWithValidBits(idx_t n) {
+		if (n == 0) {
+			return V(0);
+		}
+		return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n);
+	}
 	//! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a
 	//! not-null check
@@ -2548,20 +2555,33 @@ public:
 		}
 	}
-	//! Marks "count" entries in the validity mask as invalid (null)
+	//! Marks exactly "count" bits in the validity mask as invalid (null)
 	inline void SetAllInvalid(idx_t count) {
 		EnsureWritable();
-		for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) {
+		if (count == 0) {
+			return;
+		}
+		auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
+		for (idx_t i = 0; i < last_entry_index; i++) {
 			validity_mask[i] = 0;
 		}
+		auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
+		validity_mask[last_entry_index] = (last_entry_bits == 0) ? 0 : (ValidityBuffer::MAX_ENTRY << (last_entry_bits));
 	}
-	//! Marks "count" entries in the validity mask as valid (not null)
+	//! Marks exactly "count" bits in the validity mask as valid (not null)
 	inline void SetAllValid(idx_t count) {
 		EnsureWritable();
-		for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) {
+		if (count == 0) {
+			return;
+		}
+		auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
+		for (idx_t i = 0; i < last_entry_index; i++) {
 			validity_mask[i] = ValidityBuffer::MAX_ENTRY;
 		}
+		auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
+		validity_mask[last_entry_index] |=
+		    (last_entry_bits == 0) ? ValidityBuffer::MAX_ENTRY : ~(ValidityBuffer::MAX_ENTRY << (last_entry_bits));
 	}
 	inline bool IsMaskSet() const {