npm - duckdb - Versions diffs - 0.4.1-dev2367.0 → 0.4.1-dev2376.0 - Mend

duckdb 0.4.1-dev2367.0 → 0.4.1-dev2376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +21 -34
package/src/duckdb.hpp +4 -2
package/src/parquet-amalgamation.cpp +37423 -37423

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.4.1-dev2367.0",
+  "version": "0.4.1-dev2376.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -41401,6 +41401,14 @@ struct ColumnDataMetaData {
 	}
 };
+//! Explicitly initialized without types
+ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p) {
+	types.clear();
+	count = 0;
+	this->finished_append = false;
+	allocator = make_shared<ColumnDataAllocator>(allocator_p);
+}
 ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p, vector<LogicalType> types_p) {
 	Initialize(move(types_p));
 	allocator = make_shared<ColumnDataAllocator>(allocator_p);
@@ -114809,10 +114817,8 @@ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(uintptr_
 typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
 struct ArrowScanFunctionData : public PyTableFunctionData {
-	ArrowScanFunctionData(idx_t rows_per_thread_p, stream_factory_produce_t scanner_producer_p,
-	                      uintptr_t stream_factory_ptr_p)
-	    : lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p),
-	      scanner_producer(scanner_producer_p), number_of_rows(0) {
+	ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p)
+	    : lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) {
 	}
 	//! This holds the original list type (col_idx, [ArrowListType,size])
 	unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
@@ -114823,8 +114829,6 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
 	uintptr_t stream_factory_ptr;
 	//! Pointer to the scanner factory produce
 	stream_factory_produce_t scanner_producer;
-	//! Number of rows (Used in cardinality and progress bar)
-	int64_t number_of_rows;
 };
 struct ArrowScanLocalState : public LocalTableFunctionState {
@@ -115080,9 +115084,8 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
 	auto stream_factory_ptr = input.inputs[0].GetPointer();
 	auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();
 	auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer();
-	auto rows_per_thread = input.inputs[3].GetValue<uint64_t>();
-	auto res = make_unique<ArrowScanFunctionData>(rows_per_thread, stream_factory_produce, stream_factory_ptr);
+	auto res = make_unique<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);
 	auto &data = *res;
 	stream_factory_get_schema(stream_factory_ptr, data.schema_root);
@@ -115127,11 +115130,7 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
 }
 idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
-	auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
-	if (bind_data.number_of_rows <= 0 || ClientConfig::GetConfig(context).verify_parallelism) {
-		return context.db->NumberOfThreads();
-	}
-	return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1;
+	return context.db->NumberOfThreads();
 }
 bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
@@ -115197,28 +115196,15 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
 }
 unique_ptr<NodeStatistics> ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) {
-	auto &bind_data = (ArrowScanFunctionData &)*data;
-	return make_unique<NodeStatistics>(bind_data.number_of_rows, bind_data.number_of_rows);
-}
-double ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p,
-                                         const GlobalTableFunctionState *global_state) {
-	auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
-	if (bind_data.number_of_rows == 0) {
-		return 100;
-	}
-	auto percentage = bind_data.lines_read * 100.0 / bind_data.number_of_rows;
-	return percentage;
+	return make_unique<NodeStatistics>();
 }
 void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
-	TableFunction arrow("arrow_scan",
-	                    {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT},
+	TableFunction arrow("arrow_scan", {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER},
 	                    ArrowScanFunction, ArrowScanBind, ArrowScanInitGlobal, ArrowScanInitLocal);
 	arrow.cardinality = ArrowScanCardinality;
 	arrow.projection_pushdown = true;
 	arrow.filter_pushdown = true;
-	arrow.table_scan_progress = ArrowProgress;
 	set.AddFunction(arrow);
 }
@@ -126510,9 +126496,8 @@ unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_str
 	if (statements.empty()) {
 		// no statements, return empty successful result
 		StatementProperties properties;
-		vector<LogicalType> types;
 		vector<string> names;
-		auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator(), move(types));
+		auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator());
 		return make_unique<MaterializedQueryResult>(StatementType::INVALID_STATEMENT, properties, move(names),
 		                                            move(collection), GetClientProperties());
 	}
@@ -141850,7 +141835,7 @@ private:
 	bool full_plan_found;
 	bool must_update_full_plan;
-	unordered_set<JoinNode *> join_nodes_in_full_plan;
+	unordered_set<std::string> join_nodes_in_full_plan;
 	//! Extract the bindings referred to by an Expression
 	bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
@@ -142161,6 +142146,8 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
 		}
 	}
 	double denom = 1;
+	// TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
+	//       structures. When that's the case, multiply the denom structures that have no intersection
 	for (auto &match : subgraphs) {
 		// It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
 		// were connected. When this happens, just use the largest denominator of all the subgraphs.
@@ -145960,7 +145947,7 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
 		join_nodes_in_full_plan.clear();
 	}
 	if (node->set->count < relations.size()) {
-		join_nodes_in_full_plan.insert(node);
+		join_nodes_in_full_plan.insert(node->set->ToString());
 	}
 	UpdateJoinNodesInFullPlan(node->left);
 	UpdateJoinNodesInFullPlan(node->right);
@@ -145984,8 +145971,8 @@ JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *r
 		if (entry != plans.end()) {
 			cardinality_estimator.VerifySymmetry(result, entry->second.get());
 		}
-		if (full_plan_found && join_nodes_in_full_plan.count(new_plan.get()) > 0) {
+		if (full_plan_found &&
+		    join_nodes_in_full_plan.find(new_plan->set->ToString()) != join_nodes_in_full_plan.end()) {
 			must_update_full_plan = true;
 		}
 		if (new_set->count == relations.size()) {

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "32e28e153"
-#define DUCKDB_VERSION "v0.4.1-dev2367"
+#define DUCKDB_SOURCE_ID "32c0d9c35"
+#define DUCKDB_VERSION "v0.4.1-dev2376"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -11271,6 +11271,8 @@ class ColumnDataCollection {
 public:
 	//! Constructs an in-memory column data collection from an allocator
 	DUCKDB_API ColumnDataCollection(Allocator &allocator, vector<LogicalType> types);
+	//! Constructs an empty (but valid) in-memory column data collection from an allocator
+	DUCKDB_API ColumnDataCollection(Allocator &allocator);
 	//! Constructs a buffer-managed column data collection
 	DUCKDB_API ColumnDataCollection(BufferManager &buffer_manager, vector<LogicalType> types);
 	//! Constructs either an in-memory or a buffer-managed column data collection