duckdb 0.4.1-dev2367.0 → 0.4.1-dev2376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev2367.0",
4
+ "version": "0.4.1-dev2376.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -41401,6 +41401,14 @@ struct ColumnDataMetaData {
41401
41401
  }
41402
41402
  };
41403
41403
 
41404
+ //! Explicitly initialized without types
41405
+ ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p) {
41406
+ types.clear();
41407
+ count = 0;
41408
+ this->finished_append = false;
41409
+ allocator = make_shared<ColumnDataAllocator>(allocator_p);
41410
+ }
41411
+
41404
41412
  ColumnDataCollection::ColumnDataCollection(Allocator &allocator_p, vector<LogicalType> types_p) {
41405
41413
  Initialize(move(types_p));
41406
41414
  allocator = make_shared<ColumnDataAllocator>(allocator_p);
@@ -114809,10 +114817,8 @@ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(uintptr_
114809
114817
  typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
114810
114818
 
114811
114819
  struct ArrowScanFunctionData : public PyTableFunctionData {
114812
- ArrowScanFunctionData(idx_t rows_per_thread_p, stream_factory_produce_t scanner_producer_p,
114813
- uintptr_t stream_factory_ptr_p)
114814
- : lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p),
114815
- scanner_producer(scanner_producer_p), number_of_rows(0) {
114820
+ ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p)
114821
+ : lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) {
114816
114822
  }
114817
114823
  //! This holds the original list type (col_idx, [ArrowListType,size])
114818
114824
  unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
@@ -114823,8 +114829,6 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
114823
114829
  uintptr_t stream_factory_ptr;
114824
114830
  //! Pointer to the scanner factory produce
114825
114831
  stream_factory_produce_t scanner_producer;
114826
- //! Number of rows (Used in cardinality and progress bar)
114827
- int64_t number_of_rows;
114828
114832
  };
114829
114833
 
114830
114834
  struct ArrowScanLocalState : public LocalTableFunctionState {
@@ -115080,9 +115084,8 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
115080
115084
  auto stream_factory_ptr = input.inputs[0].GetPointer();
115081
115085
  auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();
115082
115086
  auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer();
115083
- auto rows_per_thread = input.inputs[3].GetValue<uint64_t>();
115084
115087
 
115085
- auto res = make_unique<ArrowScanFunctionData>(rows_per_thread, stream_factory_produce, stream_factory_ptr);
115088
+ auto res = make_unique<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);
115086
115089
 
115087
115090
  auto &data = *res;
115088
115091
  stream_factory_get_schema(stream_factory_ptr, data.schema_root);
@@ -115127,11 +115130,7 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
115127
115130
  }
115128
115131
 
115129
115132
  idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
115130
- auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
115131
- if (bind_data.number_of_rows <= 0 || ClientConfig::GetConfig(context).verify_parallelism) {
115132
- return context.db->NumberOfThreads();
115133
- }
115134
- return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1;
115133
+ return context.db->NumberOfThreads();
115135
115134
  }
115136
115135
 
115137
115136
  bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
@@ -115197,28 +115196,15 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
115197
115196
  }
115198
115197
 
115199
115198
  unique_ptr<NodeStatistics> ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) {
115200
- auto &bind_data = (ArrowScanFunctionData &)*data;
115201
- return make_unique<NodeStatistics>(bind_data.number_of_rows, bind_data.number_of_rows);
115202
- }
115203
-
115204
- double ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p,
115205
- const GlobalTableFunctionState *global_state) {
115206
- auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
115207
- if (bind_data.number_of_rows == 0) {
115208
- return 100;
115209
- }
115210
- auto percentage = bind_data.lines_read * 100.0 / bind_data.number_of_rows;
115211
- return percentage;
115199
+ return make_unique<NodeStatistics>();
115212
115200
  }
115213
115201
 
115214
115202
  void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
115215
- TableFunction arrow("arrow_scan",
115216
- {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT},
115203
+ TableFunction arrow("arrow_scan", {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER},
115217
115204
  ArrowScanFunction, ArrowScanBind, ArrowScanInitGlobal, ArrowScanInitLocal);
115218
115205
  arrow.cardinality = ArrowScanCardinality;
115219
115206
  arrow.projection_pushdown = true;
115220
115207
  arrow.filter_pushdown = true;
115221
- arrow.table_scan_progress = ArrowProgress;
115222
115208
  set.AddFunction(arrow);
115223
115209
  }
115224
115210
 
@@ -126510,9 +126496,8 @@ unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_str
126510
126496
  if (statements.empty()) {
126511
126497
  // no statements, return empty successful result
126512
126498
  StatementProperties properties;
126513
- vector<LogicalType> types;
126514
126499
  vector<string> names;
126515
- auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator(), move(types));
126500
+ auto collection = make_unique<ColumnDataCollection>(Allocator::DefaultAllocator());
126516
126501
  return make_unique<MaterializedQueryResult>(StatementType::INVALID_STATEMENT, properties, move(names),
126517
126502
  move(collection), GetClientProperties());
126518
126503
  }
@@ -141850,7 +141835,7 @@ private:
141850
141835
 
141851
141836
  bool full_plan_found;
141852
141837
  bool must_update_full_plan;
141853
- unordered_set<JoinNode *> join_nodes_in_full_plan;
141838
+ unordered_set<std::string> join_nodes_in_full_plan;
141854
141839
 
141855
141840
  //! Extract the bindings referred to by an Expression
141856
141841
  bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
@@ -142161,6 +142146,8 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
142161
142146
  }
142162
142147
  }
142163
142148
  double denom = 1;
142149
+ // TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
142150
+ // structures. When that's the case, multiply the denom structures that have no intersection
142164
142151
  for (auto &match : subgraphs) {
142165
142152
  // It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
142166
142153
  // were connected. When this happens, just use the largest denominator of all the subgraphs.
@@ -145960,7 +145947,7 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
145960
145947
  join_nodes_in_full_plan.clear();
145961
145948
  }
145962
145949
  if (node->set->count < relations.size()) {
145963
- join_nodes_in_full_plan.insert(node);
145950
+ join_nodes_in_full_plan.insert(node->set->ToString());
145964
145951
  }
145965
145952
  UpdateJoinNodesInFullPlan(node->left);
145966
145953
  UpdateJoinNodesInFullPlan(node->right);
@@ -145984,8 +145971,8 @@ JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *r
145984
145971
  if (entry != plans.end()) {
145985
145972
  cardinality_estimator.VerifySymmetry(result, entry->second.get());
145986
145973
  }
145987
-
145988
- if (full_plan_found && join_nodes_in_full_plan.count(new_plan.get()) > 0) {
145974
+ if (full_plan_found &&
145975
+ join_nodes_in_full_plan.find(new_plan->set->ToString()) != join_nodes_in_full_plan.end()) {
145989
145976
  must_update_full_plan = true;
145990
145977
  }
145991
145978
  if (new_set->count == relations.size()) {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "32e28e153"
15
- #define DUCKDB_VERSION "v0.4.1-dev2367"
14
+ #define DUCKDB_SOURCE_ID "32c0d9c35"
15
+ #define DUCKDB_VERSION "v0.4.1-dev2376"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -11271,6 +11271,8 @@ class ColumnDataCollection {
11271
11271
  public:
11272
11272
  //! Constructs an in-memory column data collection from an allocator
11273
11273
  DUCKDB_API ColumnDataCollection(Allocator &allocator, vector<LogicalType> types);
11274
+ //! Constructs an empty (but valid) in-memory column data collection from an allocator
11275
+ DUCKDB_API ColumnDataCollection(Allocator &allocator);
11274
11276
  //! Constructs a buffer-managed column data collection
11275
11277
  DUCKDB_API ColumnDataCollection(BufferManager &buffer_manager, vector<LogicalType> types);
11276
11278
  //! Constructs either an in-memory or a buffer-managed column data collection