duckdb 0.4.1-dev2367.0 → 0.4.1-dev2376.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +21 -34
- package/src/duckdb.hpp +4 -2
- package/src/parquet-amalgamation.cpp +37423 -37423
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -41401,6 +41401,14 @@ struct ColumnDataMetaData {
|
|
|
41401
41401
|
}
|
|
41402
41402
|
};
|
|
41403
41403
|
|
|
41404
|
+
//! Explicitly initialized without types
|
|
41405
|
+
ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p) {
|
|
41406
|
+
types.clear();
|
|
41407
|
+
count = 0;
|
|
41408
|
+
this->finished_append = false;
|
|
41409
|
+
allocator = make_shared<ColumnDataAllocator>(allocator_p);
|
|
41410
|
+
}
|
|
41411
|
+
|
|
41404
41412
|
ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p, vector<LogicalType> types_p) {
|
|
41405
41413
|
Initialize(move(types_p));
|
|
41406
41414
|
allocator = make_shared<ColumnDataAllocator>(allocator_p);
|
|
@@ -114809,10 +114817,8 @@ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(uintptr_
|
|
|
114809
114817
|
typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
|
|
114810
114818
|
|
|
114811
114819
|
struct ArrowScanFunctionData : public PyTableFunctionData {
|
|
114812
|
-
ArrowScanFunctionData(
|
|
114813
|
-
|
|
114814
|
-
: lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p),
|
|
114815
|
-
scanner_producer(scanner_producer_p), number_of_rows(0) {
|
|
114820
|
+
ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p)
|
|
114821
|
+
: lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) {
|
|
114816
114822
|
}
|
|
114817
114823
|
//! This holds the original list type (col_idx, [ArrowListType,size])
|
|
114818
114824
|
unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
|
|
@@ -114823,8 +114829,6 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
|
|
|
114823
114829
|
uintptr_t stream_factory_ptr;
|
|
114824
114830
|
//! Pointer to the scanner factory produce
|
|
114825
114831
|
stream_factory_produce_t scanner_producer;
|
|
114826
|
-
//! Number of rows (Used in cardinality and progress bar)
|
|
114827
|
-
int64_t number_of_rows;
|
|
114828
114832
|
};
|
|
114829
114833
|
|
|
114830
114834
|
struct ArrowScanLocalState : public LocalTableFunctionState {
|
|
@@ -115080,9 +115084,8 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
|
|
|
115080
115084
|
auto stream_factory_ptr = input.inputs[0].GetPointer();
|
|
115081
115085
|
auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();
|
|
115082
115086
|
auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer();
|
|
115083
|
-
auto rows_per_thread = input.inputs[3].GetValue<uint64_t>();
|
|
115084
115087
|
|
|
115085
|
-
auto res = make_unique<ArrowScanFunctionData>(
|
|
115088
|
+
auto res = make_unique<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);
|
|
115086
115089
|
|
|
115087
115090
|
auto &data = *res;
|
|
115088
115091
|
stream_factory_get_schema(stream_factory_ptr, data.schema_root);
|
|
@@ -115127,11 +115130,7 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
|
|
|
115127
115130
|
}
|
|
115128
115131
|
|
|
115129
115132
|
idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
|
|
115130
|
-
|
|
115131
|
-
if (bind_data.number_of_rows <= 0 || ClientConfig::GetConfig(context).verify_parallelism) {
|
|
115132
|
-
return context.db->NumberOfThreads();
|
|
115133
|
-
}
|
|
115134
|
-
return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1;
|
|
115133
|
+
return context.db->NumberOfThreads();
|
|
115135
115134
|
}
|
|
115136
115135
|
|
|
115137
115136
|
bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
|
|
@@ -115197,28 +115196,15 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
|
|
|
115197
115196
|
}
|
|
115198
115197
|
|
|
115199
115198
|
unique_ptr<NodeStatistics> ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) {
|
|
115200
|
-
|
|
115201
|
-
return make_unique<NodeStatistics>(bind_data.number_of_rows, bind_data.number_of_rows);
|
|
115202
|
-
}
|
|
115203
|
-
|
|
115204
|
-
double ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p,
|
|
115205
|
-
const GlobalTableFunctionState *global_state) {
|
|
115206
|
-
auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
|
|
115207
|
-
if (bind_data.number_of_rows == 0) {
|
|
115208
|
-
return 100;
|
|
115209
|
-
}
|
|
115210
|
-
auto percentage = bind_data.lines_read * 100.0 / bind_data.number_of_rows;
|
|
115211
|
-
return percentage;
|
|
115199
|
+
return make_unique<NodeStatistics>();
|
|
115212
115200
|
}
|
|
115213
115201
|
|
|
115214
115202
|
void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
|
|
115215
|
-
TableFunction arrow("arrow_scan",
|
|
115216
|
-
{LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT},
|
|
115203
|
+
TableFunction arrow("arrow_scan", {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER},
|
|
115217
115204
|
ArrowScanFunction, ArrowScanBind, ArrowScanInitGlobal, ArrowScanInitLocal);
|
|
115218
115205
|
arrow.cardinality = ArrowScanCardinality;
|
|
115219
115206
|
arrow.projection_pushdown = true;
|
|
115220
115207
|
arrow.filter_pushdown = true;
|
|
115221
|
-
arrow.table_scan_progress = ArrowProgress;
|
|
115222
115208
|
set.AddFunction(arrow);
|
|
115223
115209
|
}
|
|
115224
115210
|
|
|
@@ -126510,9 +126496,8 @@ unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_str
|
|
|
126510
126496
|
if (statements.empty()) {
|
|
126511
126497
|
// no statements, return empty successful result
|
|
126512
126498
|
StatementProperties properties;
|
|
126513
|
-
vector<LogicalType> types;
|
|
126514
126499
|
vector<string> names;
|
|
126515
|
-
auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator()
|
|
126500
|
+
auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator());
|
|
126516
126501
|
return make_unique<MaterializedQueryResult>(StatementType::INVALID_STATEMENT, properties, move(names),
|
|
126517
126502
|
move(collection), GetClientProperties());
|
|
126518
126503
|
}
|
|
@@ -141850,7 +141835,7 @@ private:
|
|
|
141850
141835
|
|
|
141851
141836
|
bool full_plan_found;
|
|
141852
141837
|
bool must_update_full_plan;
|
|
141853
|
-
unordered_set<
|
|
141838
|
+
unordered_set<std::string> join_nodes_in_full_plan;
|
|
141854
141839
|
|
|
141855
141840
|
//! Extract the bindings referred to by an Expression
|
|
141856
141841
|
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
|
@@ -142161,6 +142146,8 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
|
|
|
142161
142146
|
}
|
|
142162
142147
|
}
|
|
142163
142148
|
double denom = 1;
|
|
142149
|
+
// TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
|
|
142150
|
+
// structures. When that's the case, multiply the denom structures that have no intersection
|
|
142164
142151
|
for (auto &match : subgraphs) {
|
|
142165
142152
|
// It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
|
|
142166
142153
|
// were connected. When this happens, just use the largest denominator of all the subgraphs.
|
|
@@ -145960,7 +145947,7 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
|
|
|
145960
145947
|
join_nodes_in_full_plan.clear();
|
|
145961
145948
|
}
|
|
145962
145949
|
if (node->set->count < relations.size()) {
|
|
145963
|
-
join_nodes_in_full_plan.insert(node);
|
|
145950
|
+
join_nodes_in_full_plan.insert(node->set->ToString());
|
|
145964
145951
|
}
|
|
145965
145952
|
UpdateJoinNodesInFullPlan(node->left);
|
|
145966
145953
|
UpdateJoinNodesInFullPlan(node->right);
|
|
@@ -145984,8 +145971,8 @@ JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *r
|
|
|
145984
145971
|
if (entry != plans.end()) {
|
|
145985
145972
|
cardinality_estimator.VerifySymmetry(result, entry->second.get());
|
|
145986
145973
|
}
|
|
145987
|
-
|
|
145988
|
-
|
|
145974
|
+
if (full_plan_found &&
|
|
145975
|
+
join_nodes_in_full_plan.find(new_plan->set->ToString()) != join_nodes_in_full_plan.end()) {
|
|
145989
145976
|
must_update_full_plan = true;
|
|
145990
145977
|
}
|
|
145991
145978
|
if (new_set->count == relations.size()) {
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "32c0d9c35"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev2376"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -11271,6 +11271,8 @@ class ColumnDataCollection {
|
|
|
11271
11271
|
public:
|
|
11272
11272
|
//! Constructs an in-memory column data collection from an allocator
|
|
11273
11273
|
DUCKDB_API ColumnDataCollection(Allocator &allocator, vector<LogicalType> types);
|
|
11274
|
+
//! Constructs an empty (but valid) in-memory column data collection from an allocator
|
|
11275
|
+
DUCKDB_API ColumnDataCollection(Allocator &allocator);
|
|
11274
11276
|
//! Constructs a buffer-managed column data collection
|
|
11275
11277
|
DUCKDB_API ColumnDataCollection(BufferManager &buffer_manager, vector<LogicalType> types);
|
|
11276
11278
|
//! Constructs either an in-memory or a buffer-managed column data collection
|