npm - duckdb - Versions diffs - 0.3.5-dev673.0 → 0.3.5-dev699.0 - Mend

duckdb 0.3.5-dev673.0 → 0.3.5-dev699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +74 -14
package/src/duckdb.hpp +2 -2
package/src/parquet-amalgamation.cpp +34534 -34534

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.3.5-dev673.0",
+  "version": "0.3.5-dev699.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -18728,6 +18728,7 @@ duckdb::string_t StringCastTZ::Operation(timestamp_t input, Vector &result);
+#include <cmath>
 namespace duckdb {
@@ -18786,10 +18787,21 @@ bool TryCastWithOverflowCheckFloat(SRC value, T &result, SRC min, SRC max) {
 	if (!(value >= min && value < max)) {
 		return false;
 	}
-	result = T(value);
+	// PG FLOAT => INT casts use statistical rounding.
+	result = std::nearbyint(value);
 	return true;
 }
+template <>
+bool TryCastWithOverflowCheck(float value, int8_t &result) {
+	return TryCastWithOverflowCheckFloat<float, int8_t>(value, result, -128.0f, 128.0f);
+}
+template <>
+bool TryCastWithOverflowCheck(float value, int16_t &result) {
+	return TryCastWithOverflowCheckFloat<float, int16_t>(value, result, -32768.0f, 32768.0f);
+}
 template <>
 bool TryCastWithOverflowCheck(float value, int32_t &result) {
 	return TryCastWithOverflowCheckFloat<float, int32_t>(value, result, -2147483648.0f, 2147483648.0f);
@@ -18801,6 +18813,21 @@ bool TryCastWithOverflowCheck(float value, int64_t &result) {
 	                                                     9223372036854775808.0f);
 }
+template <>
+bool TryCastWithOverflowCheck(double value, int8_t &result) {
+	return TryCastWithOverflowCheckFloat<double, int8_t>(value, result, -128.0, 128.0);
+}
+template <>
+bool TryCastWithOverflowCheck(double value, int16_t &result) {
+	return TryCastWithOverflowCheckFloat<double, int16_t>(value, result, -32768.0, 32768.0);
+}
+template <>
+bool TryCastWithOverflowCheck(double value, int32_t &result) {
+	return TryCastWithOverflowCheckFloat<double, int32_t>(value, result, -2147483648.0, 2147483648.0);
+}
 template <>
 bool TryCastWithOverflowCheck(double value, int64_t &result) {
 	return TryCastWithOverflowCheckFloat<double, int64_t>(value, result, -9223372036854775808.0, 9223372036854775808.0);
@@ -19026,12 +19053,12 @@ bool TryCastWithOverflowCheck(uint64_t value, hugeint_t &result) {
 template <>
 bool TryCastWithOverflowCheck(float value, hugeint_t &result) {
-	return Hugeint::TryConvert(value, result);
+	return Hugeint::TryConvert(std::nearbyintf(value), result);
 }
 template <>
 bool TryCastWithOverflowCheck(double value, hugeint_t &result) {
-	return Hugeint::TryConvert(value, result);
+	return Hugeint::TryConvert(std::nearbyint(value), result);
 }
 template <>
@@ -23282,7 +23309,7 @@ struct IntegerCastOperation {
 		if (dbl_res < NumericLimits<result_t>::Minimum() || dbl_res > NumericLimits<result_t>::Maximum()) {
 			return false;
 		}
-		state.result = (result_t)dbl_res;
+		state.result = (result_t)std::nearbyint(dbl_res);
 		return true;
 	}
@@ -41269,11 +41296,11 @@ inline uint64_t TemplatedHash(const string_t &elem) {
 	data_ptr_t data = (data_ptr_t)elem.GetDataUnsafe();
 	const auto &len = elem.GetSize();
 	uint64_t h = 0;
-	for (idx_t i = 0; i < len / 8; i += 8) {
+	for (idx_t i = 0; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) {
 		h ^= TemplatedHash<uint64_t>(Load<uint64_t>(data));
-		data += 8;
+		data += sizeof(uint64_t);
 	}
-	switch (len & 7) {
+	switch (len & (sizeof(uint64_t) - 1)) {
 	case 4:
 		h ^= TemplatedHash<uint32_t>(Load<uint32_t>(data));
 		break;
@@ -104365,6 +104392,31 @@ LogicalType GetArrowLogicalType(ArrowSchema &schema,
 	}
 }
+// Renames repeated columns and case sensitive columns
+void RenameArrowColumns(vector<string> &names) {
+	unordered_map<string, idx_t> name_map;
+	for (auto &column_name : names) {
+		// put it all lower_case
+		auto low_column_name = StringUtil::Lower(column_name);
+		if (name_map.find(low_column_name) == name_map.end()) {
+			// Name does not exist yet
+			name_map[low_column_name]++;
+		} else {
+			// Name already exists, we add _x where x is the repetition number
+			string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
+			auto new_column_name_low = StringUtil::Lower(new_column_name);
+			while (name_map.find(new_column_name_low) != name_map.end()) {
+				// This name is already here due to a previous definition
+				name_map[low_column_name]++;
+				new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
+				new_column_name_low = StringUtil::Lower(new_column_name);
+			}
+			column_name = new_column_name;
+			name_map[new_column_name_low]++;
+		}
+	}
+}
 unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
                                                            vector<LogicalType> &return_types, vector<string> &names) {
 	typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(
@@ -104408,6 +104460,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
 		}
 		names.push_back(name);
 	}
+	RenameArrowColumns(names);
 	return move(res);
 }
@@ -181002,6 +181055,8 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
+#include <math.h>
 namespace duckdb {
 DistinctStatistics::DistinctStatistics()
@@ -181062,7 +181117,7 @@ void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_
 		return;
 	}
 	total_count += count;
-	count = MaxValue<idx_t>(idx_t(SAMPLE_RATE * double(count)), 1);
+	count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
 	sample_count += count;
 	uint64_t indices[STANDARD_VECTOR_SIZE];
@@ -181080,12 +181135,17 @@ idx_t DistinctStatistics::GetCount() const {
 	if (sample_count == 0 || total_count == 0) {
 		return 0;
 	}
-	// Estimate HLL count because we use sampling
-	double hll_count = log->Count();
-	double unique_proportion = hll_count / double(sample_count);
-	double actual_sample_rate = double(sample_count) / double(total_count);
-	double multiplier = double(1) + unique_proportion * (double(1) / actual_sample_rate - double(1));
-	return idx_t(multiplier * hll_count);
+	double u = MinValue<idx_t>(log->Count(), sample_count);
+	double s = sample_count;
+	double n = total_count;
+	// Assume this proportion of the the sampled values occurred only once
+	double u1 = pow(u / s, 2) * u;
+	// Estimate total uniques using Good Turing Estimation
+	idx_t estimate = u + u1 / s * (n - s);
+	return MinValue<idx_t>(estimate, total_count);
 }
 } // namespace duckdb

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "064847033"
-#define DUCKDB_VERSION "v0.3.5-dev673"
+#define DUCKDB_SOURCE_ID "360aedc4f"
+#define DUCKDB_VERSION "v0.3.5-dev699"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //