npm - duckdb - Versions diffs - 0.9.1-dev120.0 → 0.9.1-dev157.0 - Mend

duckdb 0.9.1-dev120.0 → 0.9.1-dev157.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.9.1-dev120.0",
+  "version": "0.9.1-dev157.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/parquet/column_reader.cpp CHANGED Viewed

@@ -243,6 +243,7 @@ void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChun
 void ColumnReader::PrepareRead(parquet_filter_t &filter) {
 	dict_decoder.reset();
 	defined_decoder.reset();
+	bss_decoder.reset();
 	block.reset();
 	PageHeader page_hdr;
 	page_hdr.read(protocol);
@@ -443,6 +444,13 @@ void ColumnReader::PrepareDataPage(PageHeader &page_hdr) {
 		PrepareDeltaByteArray(*block);
 		break;
 	}
+	case Encoding::BYTE_STREAM_SPLIT: {
+		// Subtract 1 from length as the block is allocated with 1 extra byte,
+		// but the byte stream split encoder needs to know the correct data size.
+		bss_decoder = make_uniq<BssDecoder>(block->ptr, block->len - 1);
+		block->inc(block->len);
+		break;
+	}
 	case Encoding::PLAIN:
 		// nothing to do here, will be read directly below
 		break;
@@ -488,7 +496,7 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
 		idx_t null_count = 0;
-		if ((dict_decoder || dbp_decoder || rle_decoder) && HasDefines()) {
+		if ((dict_decoder || dbp_decoder || rle_decoder || bss_decoder) && HasDefines()) {
 			// we need the null count because the dictionary offsets have no entries for nulls
 			for (idx_t i = 0; i < read_now; i++) {
 				if (define_out[i + result_offset] != max_define) {
@@ -534,6 +542,23 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
 		} else if (byte_array_data) {
 			// DELTA_BYTE_ARRAY or DELTA_LENGTH_BYTE_ARRAY
 			DeltaByteArray(define_out, read_now, filter, result_offset, result);
+		} else if (bss_decoder) {
+			auto read_buf = make_shared<ResizeableBuffer>();
+			switch (schema.type) {
+			case duckdb_parquet::format::Type::FLOAT:
+				read_buf->resize(reader.allocator, sizeof(float) * (read_now - null_count));
+				bss_decoder->GetBatch<float>(read_buf->ptr, read_now - null_count);
+				break;
+			case duckdb_parquet::format::Type::DOUBLE:
+				read_buf->resize(reader.allocator, sizeof(double) * (read_now - null_count));
+				bss_decoder->GetBatch<double>(read_buf->ptr, read_now - null_count);
+				break;
+			default:
+				throw std::runtime_error("BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data");
+			}
+			Plain(read_buf, define_out, read_now, filter, result_offset, result);
 		} else {
 			PlainReference(block, result);
 			Plain(block, define_out, read_now, filter, result_offset, result);

package/src/duckdb/extension/parquet/include/column_reader.hpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #pragma once
 #include "duckdb.hpp"
+#include "parquet_bss_decoder.hpp"
 #include "parquet_dbp_decoder.hpp"
 #include "parquet_rle_bp_decoder.hpp"
 #include "parquet_statistics.hpp"
@@ -161,6 +162,7 @@ private:
 	unique_ptr<RleBpDecoder> repeated_decoder;
 	unique_ptr<DbpDecoder> dbp_decoder;
 	unique_ptr<RleBpDecoder> rle_decoder;
+	unique_ptr<BssDecoder> bss_decoder;
 	// dummies for Skip()
 	parquet_filter_t none_filter;

package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp ADDED Viewed

@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_bss_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+namespace duckdb {
+/// Decoder for the Byte Stream Split encoding
+class BssDecoder {
+public:
+	/// Create a decoder object. buffer/buffer_len is the encoded data.
+	BssDecoder(data_ptr_t buffer, uint32_t buffer_len) : buffer_(buffer, buffer_len), value_offset_(0) {
+	}
+public:
+	template <typename T>
+	void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) {
+		if (buffer_.len % sizeof(T) != 0) {
+			std::stringstream error;
+			error << "Data buffer size for the BYTE_STREAM_SPLIT encoding (" << buffer_.len
+			      << ") should be a multiple of the type size (" << sizeof(T) << ")";
+			throw std::runtime_error(error.str());
+		}
+		uint32_t num_buffer_values = buffer_.len / sizeof(T);
+		buffer_.available((value_offset_ + batch_size) * sizeof(T));
+		for (uint32_t byte_offset = 0; byte_offset < sizeof(T); ++byte_offset) {
+			data_ptr_t input_bytes = buffer_.ptr + byte_offset * num_buffer_values + value_offset_;
+			for (uint32_t i = 0; i < batch_size; ++i) {
+				values_target_ptr[byte_offset + i * sizeof(T)] = *(input_bytes + i);
+			}
+		}
+		value_offset_ += batch_size;
+	}
+private:
+	ByteBuffer buffer_;
+	uint32_t value_offset_;
+};
+} // namespace duckdb

package/src/duckdb/src/execution/operator/helper/physical_reset.cpp CHANGED Viewed

@@ -21,10 +21,7 @@ void PhysicalReset::ResetExtensionVariable(ExecutionContext &context, DBConfig &
 SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
 	auto &config = DBConfig::GetConfig(context.client);
-	if (config.options.lock_configuration) {
-		throw InvalidInputException("Cannot reset configuration option \"%s\" - the configuration has been locked",
-		                            name);
-	}
+	config.CheckLock(name);
 	auto option = DBConfig::GetOptionByName(name);
 	if (!option) {
 		// check if this is an extra extension variable

package/src/duckdb/src/execution/operator/helper/physical_set.cpp CHANGED Viewed

@@ -24,10 +24,8 @@ void PhysicalSet::SetExtensionVariable(ClientContext &context, ExtensionOption &
 SourceResultType PhysicalSet::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
 	auto &config = DBConfig::GetConfig(context.client);
-	if (config.options.lock_configuration) {
-		throw InvalidInputException("Cannot change configuration option \"%s\" - the configuration has been locked",
-		                            name);
-	}
+	// check if we are allowed to change the configuration option
+	config.CheckLock(name);
 	auto option = DBConfig::GetOptionByName(name);
 	if (!option) {
 		// check if this is an extra extension variable

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v0.9.1-dev120"
+#define DUCKDB_VERSION "v0.9.1-dev157"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "af666ad8ba"
+#define DUCKDB_SOURCE_ID "fa87a54b70"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/main/config.hpp CHANGED Viewed

@@ -240,6 +240,8 @@ public:
 	DUCKDB_API void SetOption(const string &name, Value value);
 	DUCKDB_API void ResetOption(const string &name);
+	DUCKDB_API void CheckLock(const string &name);
 	DUCKDB_API static idx_t ParseMemoryLimit(const string &arg);
 	//! Return the list of possible compression functions for the specific physical type

package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp CHANGED Viewed

@@ -90,8 +90,8 @@ public:
 	void QualifyColumnNames(unique_ptr<ParsedExpression> &expr);
 	static void QualifyColumnNames(Binder &binder, unique_ptr<ParsedExpression> &expr);
-	static unique_ptr<Expression> PushCollation(ClientContext &context, unique_ptr<Expression> source,
-	                                            const string &collation, bool equality_only = false);
+	static bool PushCollation(ClientContext &context, unique_ptr<Expression> &source, const LogicalType &sql_type,
+	                          bool equality_only = false);
 	static void TestCollation(ClientContext &context, const string &collation);
 	bool BindCorrelatedColumns(unique_ptr<ParsedExpression> &expr);

package/src/duckdb/src/main/config.cpp CHANGED Viewed

@@ -233,6 +233,20 @@ void DBConfig::SetDefaultMaxMemory() {
 	}
 }
+void DBConfig::CheckLock(const string &name) {
+	if (!options.lock_configuration) {
+		// not locked
+		return;
+	}
+	case_insensitive_set_t allowed_settings {"schema", "search_path"};
+	if (allowed_settings.find(name) != allowed_settings.end()) {
+		// we are always allowed to change these settings
+		return;
+	}
+	// not allowed!
+	throw InvalidInputException("Cannot change configuration option \"%s\" - the configuration has been locked", name);
+}
 idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
 	static constexpr const char *CPU_MAX = "/sys/fs/cgroup/cpu.max";
 	static constexpr const char *CFS_QUOTA = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";

package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp CHANGED Viewed

@@ -34,13 +34,11 @@ BindResult ExpressionBinder::BindExpression(BetweenExpression &expr, idx_t depth
 	input = BoundCastExpression::AddCastToType(context, std::move(input), input_type);
 	lower = BoundCastExpression::AddCastToType(context, std::move(lower), input_type);
 	upper = BoundCastExpression::AddCastToType(context, std::move(upper), input_type);
-	if (input_type.id() == LogicalTypeId::VARCHAR) {
-		// handle collation
-		auto collation = StringType::GetCollation(input_type);
-		input = PushCollation(context, std::move(input), collation, false);
-		lower = PushCollation(context, std::move(lower), collation, false);
-		upper = PushCollation(context, std::move(upper), collation, false);
-	}
+	// handle collation
+	PushCollation(context, input, input_type, false);
+	PushCollation(context, lower, input_type, false);
+	PushCollation(context, upper, input_type, false);
 	if (!input->HasSideEffects() && !input->HasParameter() && !input->HasSubquery()) {
 		// the expression does not have side effects and can be copied: create two comparisons
 		// the reason we do this is that individual comparisons are easier to handle in optimizers

package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp CHANGED Viewed

@@ -18,8 +18,10 @@ BindResult ExpressionBinder::BindExpression(CollateExpression &expr, idx_t depth
 		throw BinderException("collations are only supported for type varchar");
 	}
 	// Validate the collation, but don't use it
-	PushCollation(context, child->Copy(), expr.collation, false);
-	child->return_type = LogicalType::VARCHAR_COLLATION(expr.collation);
+	auto child_copy = child->Copy();
+	auto collation_type = LogicalType::VARCHAR_COLLATION(expr.collation);
+	PushCollation(context, child_copy, collation_type, false);
+	child->return_type = collation_type;
 	return BindResult(std::move(child));
 }

package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp CHANGED Viewed

@@ -18,20 +18,25 @@
 namespace duckdb {
-unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> source,
-                                                       const string &collation_p, bool equality_only) {
+bool ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> &source,
+                                     const LogicalType &sql_type, bool equality_only) {
+	if (sql_type.id() != LogicalTypeId::VARCHAR) {
+		// only VARCHAR columns require collation
+		return false;
+	}
 	// replace default collation with system collation
+	auto str_collation = StringType::GetCollation(sql_type);
 	string collation;
-	if (collation_p.empty()) {
+	if (str_collation.empty()) {
 		collation = DBConfig::GetConfig(context).options.collation;
 	} else {
-		collation = collation_p;
+		collation = str_collation;
 	}
 	collation = StringUtil::Lower(collation);
 	// bind the collation
 	if (collation.empty() || collation == "binary" || collation == "c" || collation == "posix") {
-		// binary collation: just skip
-		return source;
+		// no collation or binary collation: skip
+		return false;
 	}
 	auto &catalog = Catalog::GetSystemCatalog(context);
 	auto splits = StringUtil::Split(StringUtil::Lower(collation), ".");
@@ -60,11 +65,12 @@ unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, u
 		auto function = function_binder.BindScalarFunction(collation_entry.function, std::move(children));
 		source = std::move(function);
 	}
-	return source;
+	return true;
 }
 void ExpressionBinder::TestCollation(ClientContext &context, const string &collation) {
-	PushCollation(context, make_uniq<BoundConstantExpression>(Value("")), collation);
+	auto expr = make_uniq_base<Expression, BoundConstantExpression>(Value(""));
+	PushCollation(context, expr, LogicalType::VARCHAR_COLLATION(collation));
 }
 LogicalType BoundComparisonExpression::BindComparison(LogicalType left_type, LogicalType right_type) {
@@ -134,12 +140,9 @@ BindResult ExpressionBinder::BindExpression(ComparisonExpression &expr, idx_t de
 	right = BoundCastExpression::AddCastToType(context, std::move(right), input_type,
 	                                           input_type.id() == LogicalTypeId::ENUM);
-	if (input_type.id() == LogicalTypeId::VARCHAR) {
-		// handle collation
-		auto collation = StringType::GetCollation(input_type);
-		left = PushCollation(context, std::move(left), collation, expr.type == ExpressionType::COMPARE_EQUAL);
-		right = PushCollation(context, std::move(right), collation, expr.type == ExpressionType::COMPARE_EQUAL);
-	}
+	PushCollation(context, left, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
+	PushCollation(context, right, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
 	// now create the bound comparison expression
 	return BindResult(make_uniq<BoundComparisonExpression>(expr.type, std::move(left), std::move(right)));
 }

package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp CHANGED Viewed

@@ -222,10 +222,7 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector<LogicalType>
 			for (auto &target_distinct : distinct.target_distincts) {
 				auto &bound_colref = target_distinct->Cast<BoundColumnRefExpression>();
 				const auto &sql_type = sql_types[bound_colref.binding.column_index];
-				if (sql_type.id() == LogicalTypeId::VARCHAR) {
-					target_distinct = ExpressionBinder::PushCollation(context, std::move(target_distinct),
-					                                                  StringType::GetCollation(sql_type), true);
-				}
+				ExpressionBinder::PushCollation(context, target_distinct, sql_type, true);
 			}
 			break;
 		}
@@ -253,10 +250,7 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector<LogicalType>
 				D_ASSERT(bound_colref.binding.column_index < sql_types.size());
 				const auto &sql_type = sql_types[bound_colref.binding.column_index];
 				bound_colref.return_type = sql_types[bound_colref.binding.column_index];
-				if (sql_type.id() == LogicalTypeId::VARCHAR) {
-					order_node.expression = ExpressionBinder::PushCollation(context, std::move(order_node.expression),
-					                                                        StringType::GetCollation(sql_type));
-				}
+				ExpressionBinder::PushCollation(context, order_node.expression, sql_type);
 			}
 			break;
 		}
@@ -389,9 +383,8 @@ unique_ptr<BoundQueryNode> Binder::BindSelectNode(SelectNode &statement, unique_
 			bool contains_subquery = bound_expr_ref.HasSubquery();
 			// push a potential collation, if necessary
-			auto collated_expr = ExpressionBinder::PushCollation(context, std::move(bound_expr),
-			                                                     StringType::GetCollation(group_type), true);
-			if (!contains_subquery && !collated_expr->Equals(bound_expr_ref)) {
+			bool requires_collation = ExpressionBinder::PushCollation(context, bound_expr, group_type, true);
+			if (!contains_subquery && requires_collation) {
 				// if there is a collation on a group x, we should group by the collated expr,
 				// but also push a first(x) aggregate in case x is selected (uncollated)
 				info.collated_groups[i] = result->aggregates.size();
@@ -405,7 +398,7 @@ unique_ptr<BoundQueryNode> Binder::BindSelectNode(SelectNode &statement, unique_
 				auto function = function_binder.BindAggregateFunction(first_fun, std::move(first_children));
 				result->aggregates.push_back(std::move(function));
 			}
-			result->groups.group_expressions.push_back(std::move(collated_expr));
+			result->groups.group_expressions.push_back(std::move(bound_expr));
 			// in the unbound expression we DO bind the table names of any ColumnRefs
 			// we do this to make sure that "table.a" and "a" are treated the same

package/src/duckdb/src/transaction/duck_transaction_manager.cpp CHANGED Viewed

@@ -252,6 +252,7 @@ void DuckTransactionManager::RollbackTransaction(Transaction *transaction_p) {
 }
 void DuckTransactionManager::RemoveTransaction(DuckTransaction &transaction) noexcept {
+	bool changes_made = transaction.ChangesMade();
 	// remove the transaction from the list of active transactions
 	idx_t t_index = active_transactions.size();
 	// check for the lowest and highest start time in the list of transactions
@@ -275,15 +276,18 @@ void DuckTransactionManager::RemoveTransaction(DuckTransaction &transaction) noe
 	D_ASSERT(t_index != active_transactions.size());
 	auto current_transaction = std::move(active_transactions[t_index]);
 	auto current_query = DatabaseManager::Get(db).ActiveQueryNumber();
-	if (transaction.commit_id != 0) {
-		// the transaction was committed, add it to the list of recently
-		// committed transactions
-		recently_committed_transactions.push_back(std::move(current_transaction));
-	} else {
-		// the transaction was aborted, but we might still need its information
-		// add it to the set of transactions awaiting GC
-		current_transaction->highest_active_query = current_query;
-		old_transactions.push_back(std::move(current_transaction));
+	if (changes_made) {
+		// if the transaction made any changes we need to keep it around
+		if (transaction.commit_id != 0) {
+			// the transaction was committed, add it to the list of recently
+			// committed transactions
+			recently_committed_transactions.push_back(std::move(current_transaction));
+		} else {
+			// the transaction was aborted, but we might still need its information
+			// add it to the set of transactions awaiting GC
+			current_transaction->highest_active_query = current_query;
+			old_transactions.push_back(std::move(current_transaction));
+		}
 	}
 	// remove the transaction from the set of currently active transactions
 	active_transactions.erase(active_transactions.begin() + t_index);

package/src/duckdb/third_party/parquet/parquet_types.h CHANGED Viewed

@@ -92,7 +92,8 @@ struct Encoding {
     DELTA_BINARY_PACKED = 5,
     DELTA_LENGTH_BYTE_ARRAY = 6,
     DELTA_BYTE_ARRAY = 7,
-    RLE_DICTIONARY = 8
+    RLE_DICTIONARY = 8,
+    BYTE_STREAM_SPLIT = 9,
   };
 };