duckdb 0.5.2-dev4.0 → 0.5.2-dev50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev4.0",
4
+ "version": "0.5.2-dev50.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -43588,12 +43588,16 @@ hash_t Hash(hugeint_t val) {
43588
43588
 
43589
43589
  template <>
43590
43590
  hash_t Hash(float val) {
43591
- return std::hash<float> {}(val);
43591
+ static_assert(sizeof(float) == sizeof(uint32_t), "");
43592
+ uint32_t uval = *((uint32_t *)&val);
43593
+ return murmurhash64(uval);
43592
43594
  }
43593
43595
 
43594
43596
  template <>
43595
43597
  hash_t Hash(double val) {
43596
- return std::hash<double> {}(val);
43598
+ static_assert(sizeof(double) == sizeof(uint64_t), "");
43599
+ uint64_t uval = *((uint64_t *)&val);
43600
+ return murmurhash64(uval);
43597
43601
  }
43598
43602
 
43599
43603
  template <>
@@ -45707,6 +45711,11 @@ public:
45707
45711
  return layout.GetTypes();
45708
45712
  }
45709
45713
 
45714
+ //! The number of rows in the collection
45715
+ inline idx_t Count() const {
45716
+ return total_count;
45717
+ }
45718
+
45710
45719
  //! The number of rows scanned so far
45711
45720
  inline idx_t Scanned() const {
45712
45721
  return total_scanned;
@@ -66288,7 +66297,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
66288
66297
  struct WindowExecutor {
66289
66298
  WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocator, const idx_t count);
66290
66299
 
66291
- void Sink(DataChunk &input_chunk, const idx_t input_idx);
66300
+ void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
66292
66301
  void Finalize(WindowAggregationMode mode);
66293
66302
 
66294
66303
  void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
@@ -66362,7 +66371,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocato
66362
66371
  PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
66363
66372
  }
66364
66373
 
66365
- void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
66374
+ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
66366
66375
  // Single pass over the input to produce the global data.
66367
66376
  // Vectorisation for the win...
66368
66377
 
@@ -66397,7 +66406,7 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
66397
66406
  if (!vdata.validity.AllValid()) {
66398
66407
  // Lazily materialise the contents when we find the first NULL
66399
66408
  if (ignore_nulls.AllValid()) {
66400
- ignore_nulls.Initialize(payload_collection.Count());
66409
+ ignore_nulls.Initialize(total_count);
66401
66410
  }
66402
66411
  // Write to the current position
66403
66412
  // Chunks in a collection are full, so we don't have to worry about raggedness
@@ -66926,7 +66935,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
66926
66935
 
66927
66936
  // TODO: Parallelization opportunity
66928
66937
  for (auto &wexec : window_execs) {
66929
- wexec->Sink(input_chunk, input_idx);
66938
+ wexec->Sink(input_chunk, input_idx, scanner->Count());
66930
66939
  }
66931
66940
  input_idx += input_chunk.size();
66932
66941
  }
@@ -96448,6 +96457,8 @@ struct PragmaFunctions {
96448
96457
  static void RegisterFunction(BuiltinFunctions &set);
96449
96458
  };
96450
96459
 
96460
+ string PragmaShow(ClientContext &context, const FunctionParameters &parameters);
96461
+
96451
96462
  } // namespace duckdb
96452
96463
 
96453
96464
 
@@ -118595,15 +118606,47 @@ void DuckDBColumnsFun::RegisterFunction(BuiltinFunctions &set) {
118595
118606
 
118596
118607
 
118597
118608
 
118609
+ namespace duckdb {
118610
+
118611
+ struct UniqueKeyInfo {
118612
+ string schema, table;
118613
+ vector<storage_t> columns;
118614
+
118615
+ bool operator==(const UniqueKeyInfo &other) const {
118616
+ return (schema == other.schema) && (table == other.table) && (columns == other.columns);
118617
+ }
118618
+ };
118619
+
118620
+ } // namespace duckdb
118621
+
118622
+ namespace std {
118623
+
118624
+ template <>
118625
+ struct hash<duckdb::UniqueKeyInfo> {
118626
+ template <class X>
118627
+ static size_t ComputeHash(const X &x) {
118628
+ return hash<X>()(x);
118629
+ }
118630
+
118631
+ size_t operator()(const duckdb::UniqueKeyInfo &j) const {
118632
+ D_ASSERT(j.columns.size() > 0);
118633
+ return ComputeHash(j.schema) + ComputeHash(j.table) + ComputeHash(j.columns[0]);
118634
+ }
118635
+ };
118636
+
118637
+ } // namespace std
118638
+
118598
118639
  namespace duckdb {
118599
118640
 
118600
118641
  struct DuckDBConstraintsData : public GlobalTableFunctionState {
118601
- DuckDBConstraintsData() : offset(0), constraint_offset(0) {
118642
+ DuckDBConstraintsData() : offset(0), constraint_offset(0), unique_constraint_offset(0) {
118602
118643
  }
118603
118644
 
118604
118645
  vector<CatalogEntry *> entries;
118605
118646
  idx_t offset;
118606
118647
  idx_t constraint_offset;
118648
+ idx_t unique_constraint_offset;
118649
+ unordered_map<UniqueKeyInfo, idx_t> known_fk_unique_constraint_offsets;
118607
118650
  };
118608
118651
 
118609
118652
  static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, TableFunctionBindInput &input,
@@ -118634,7 +118677,6 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
118634
118677
  return_types.emplace_back(LogicalType::VARCHAR);
118635
118678
 
118636
118679
  names.emplace_back("constraint_column_indexes");
118637
- ;
118638
118680
  return_types.push_back(LogicalType::LIST(LogicalType::BIGINT));
118639
118681
 
118640
118682
  names.emplace_back("constraint_column_names");
@@ -118646,15 +118688,29 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
118646
118688
  unique_ptr<GlobalTableFunctionState> DuckDBConstraintsInit(ClientContext &context, TableFunctionInitInput &input) {
118647
118689
  auto result = make_unique<DuckDBConstraintsData>();
118648
118690
 
118649
- // scan all the schemas for tables and collect themand collect them
118691
+ // scan all the schemas for tables and collect them
118650
118692
  auto schemas = Catalog::GetCatalog(context).schemas->GetEntries<SchemaCatalogEntry>(context);
118693
+
118694
+ sort(schemas.begin(), schemas.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
118695
+
118696
+ // check the temp schema as well
118697
+ auto &temp_schema = ClientData::Get(context).temporary_objects;
118698
+ schemas.push_back(temp_schema.get());
118699
+
118651
118700
  for (auto &schema : schemas) {
118652
- schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); });
118701
+ vector<CatalogEntry *> entries;
118702
+
118703
+ schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
118704
+ if (entry->type == CatalogType::TABLE_ENTRY) {
118705
+ entries.push_back(entry);
118706
+ }
118707
+ });
118708
+
118709
+ sort(entries.begin(), entries.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
118710
+
118711
+ result->entries.insert(result->entries.end(), entries.begin(), entries.end());
118653
118712
  };
118654
118713
 
118655
- // check the temp schema as well
118656
- ClientData::Get(context).temporary_objects->Scan(context, CatalogType::TABLE_ENTRY,
118657
- [&](CatalogEntry *entry) { result->entries.push_back(entry); });
118658
118714
  return move(result);
118659
118715
  }
118660
118716
 
@@ -118669,30 +118725,15 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118669
118725
  idx_t count = 0;
118670
118726
  while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
118671
118727
  auto &entry = data.entries[data.offset];
118672
-
118673
- if (entry->type != CatalogType::TABLE_ENTRY) {
118674
- data.offset++;
118675
- continue;
118676
- }
118728
+ D_ASSERT(entry->type == CatalogType::TABLE_ENTRY);
118677
118729
 
118678
118730
  auto &table = (TableCatalogEntry &)*entry;
118679
118731
  for (; data.constraint_offset < table.constraints.size() && count < STANDARD_VECTOR_SIZE;
118680
118732
  data.constraint_offset++) {
118681
118733
  auto &constraint = table.constraints[data.constraint_offset];
118682
118734
  // return values:
118683
- // schema_name, LogicalType::VARCHAR
118684
- output.SetValue(0, count, Value(table.schema->name));
118685
- // schema_oid, LogicalType::BIGINT
118686
- output.SetValue(1, count, Value::BIGINT(table.schema->oid));
118687
- // table_name, LogicalType::VARCHAR
118688
- output.SetValue(2, count, Value(table.name));
118689
- // table_oid, LogicalType::BIGINT
118690
- output.SetValue(3, count, Value::BIGINT(table.oid));
118691
-
118692
- // constraint_index, BIGINT
118693
- output.SetValue(4, count, Value::BIGINT(data.constraint_offset));
118694
-
118695
118735
  // constraint_type, VARCHAR
118736
+ // Processing this first due to shortcut (early continue)
118696
118737
  string constraint_type;
118697
118738
  switch (constraint->type) {
118698
118739
  case ConstraintType::CHECK:
@@ -118706,14 +118747,73 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118706
118747
  case ConstraintType::NOT_NULL:
118707
118748
  constraint_type = "NOT NULL";
118708
118749
  break;
118709
- case ConstraintType::FOREIGN_KEY:
118750
+ case ConstraintType::FOREIGN_KEY: {
118751
+ auto &bound_foreign_key =
118752
+ (const BoundForeignKeyConstraint &)*table.bound_constraints[data.constraint_offset];
118753
+ if (bound_foreign_key.info.type == ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE) {
118754
+ // Those are already covered by PRIMARY KEY and UNIQUE entries
118755
+ continue;
118756
+ }
118710
118757
  constraint_type = "FOREIGN KEY";
118711
118758
  break;
118759
+ }
118712
118760
  default:
118713
118761
  throw NotImplementedException("Unimplemented constraint for duckdb_constraints");
118714
118762
  }
118715
118763
  output.SetValue(5, count, Value(constraint_type));
118716
118764
 
118765
+ // schema_name, LogicalType::VARCHAR
118766
+ output.SetValue(0, count, Value(table.schema->name));
118767
+ // schema_oid, LogicalType::BIGINT
118768
+ output.SetValue(1, count, Value::BIGINT(table.schema->oid));
118769
+ // table_name, LogicalType::VARCHAR
118770
+ output.SetValue(2, count, Value(table.name));
118771
+ // table_oid, LogicalType::BIGINT
118772
+ output.SetValue(3, count, Value::BIGINT(table.oid));
118773
+
118774
+ // constraint_index, BIGINT
118775
+ auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
118776
+ UniqueKeyInfo uk_info;
118777
+ switch (bound_constraint.type) {
118778
+ case ConstraintType::UNIQUE: {
118779
+ auto &bound_unique = (BoundUniqueConstraint &)bound_constraint;
118780
+ uk_info = {table.schema->name, table.name, bound_unique.keys};
118781
+ break;
118782
+ }
118783
+ case ConstraintType::FOREIGN_KEY: {
118784
+ const auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
118785
+ const auto &info = bound_foreign_key.info;
118786
+ uk_info = {info.schema, info.table, info.pk_keys};
118787
+ if (uk_info.schema.empty()) {
118788
+ // FIXME: Can we somehow make use of Binder::BindSchema() here?
118789
+ // From experiments, an omitted schema in REFERENCES ... means "main" or "temp", even if the table
118790
+ // resides in a different schema. Is this guaranteed to be stable?
118791
+ if (entry->temporary) {
118792
+ uk_info.schema = "temp";
118793
+ } else {
118794
+ uk_info.schema = "main";
118795
+ }
118796
+ }
118797
+
118798
+ break;
118799
+ }
118800
+ default:
118801
+ break;
118802
+ }
118803
+
118804
+ if (uk_info.columns.empty()) {
118805
+ output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset++));
118806
+ } else {
118807
+ auto known_unique_constraint_offset = data.known_fk_unique_constraint_offsets.find(uk_info);
118808
+ if (known_unique_constraint_offset == data.known_fk_unique_constraint_offsets.end()) {
118809
+ data.known_fk_unique_constraint_offsets.insert(make_pair(uk_info, data.unique_constraint_offset));
118810
+ output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset));
118811
+ data.unique_constraint_offset++;
118812
+ } else {
118813
+ output.SetValue(4, count, Value::BIGINT(known_unique_constraint_offset->second));
118814
+ }
118815
+ }
118816
+
118717
118817
  // constraint_text, VARCHAR
118718
118818
  output.SetValue(6, count, Value(constraint->ToString()));
118719
118819
 
@@ -118725,7 +118825,6 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118725
118825
  }
118726
118826
  output.SetValue(7, count, expression_text);
118727
118827
 
118728
- auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
118729
118828
  vector<column_t> column_index_list;
118730
118829
  switch (bound_constraint.type) {
118731
118830
  case ConstraintType::CHECK: {
@@ -118748,7 +118847,7 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118748
118847
  break;
118749
118848
  }
118750
118849
  case ConstraintType::FOREIGN_KEY: {
118751
- auto &bound_foreign_key = (BoundForeignKeyConstraint &)bound_constraint;
118850
+ auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
118752
118851
  for (auto &col_idx : bound_foreign_key.info.fk_keys) {
118753
118852
  column_index_list.push_back(column_t(col_idx));
118754
118853
  }
@@ -120916,6 +121015,13 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
120916
121015
  auto &bind_data = (PragmaStorageFunctionData &)*data_p.bind_data;
120917
121016
  auto &data = (PragmaStorageOperatorData &)*data_p.global_state;
120918
121017
  idx_t count = 0;
121018
+ map<storage_t, column_t> soid_to_idx;
121019
+ for (idx_t cidx = 0; cidx < bind_data.table_entry->columns.size(); cidx++) {
121020
+ auto &entry = bind_data.table_entry->columns[cidx];
121021
+ if (!entry.Generated()) {
121022
+ soid_to_idx[entry.StorageOid()] = entry.Oid();
121023
+ }
121024
+ }
120919
121025
  while (data.offset < bind_data.storage_info.size() && count < STANDARD_VECTOR_SIZE) {
120920
121026
  auto &entry = bind_data.storage_info[data.offset++];
120921
121027
  D_ASSERT(entry.size() + 1 == output.ColumnCount());
@@ -120923,8 +121029,9 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
120923
121029
  for (idx_t col_idx = 0; col_idx < entry.size(); col_idx++, result_idx++) {
120924
121030
  if (col_idx == 1) {
120925
121031
  // write the column name
120926
- auto column_index = entry[col_idx].GetValue<int64_t>();
120927
- output.SetValue(result_idx, count, Value(bind_data.table_entry->columns[column_index].Name()));
121032
+ auto storage_column_index = entry[col_idx].GetValue<int64_t>();
121033
+ output.SetValue(result_idx, count,
121034
+ Value(bind_data.table_entry->columns[soid_to_idx[storage_column_index]].Name()));
120928
121035
  result_idx++;
120929
121036
  }
120930
121037
  output.SetValue(result_idx, count, entry[col_idx]);
@@ -197917,8 +198024,9 @@ void DataTable::VerifyAppendConstraints(TableCatalogEntry &table, ClientContext
197917
198024
  auto &constraint = table.bound_constraints[i];
197918
198025
  switch (base_constraint->type) {
197919
198026
  case ConstraintType::NOT_NULL: {
197920
- auto &not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
197921
- VerifyNotNullConstraint(table, chunk.data[not_null.index], chunk.size(),
198027
+ auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198028
+ auto &not_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
198029
+ VerifyNotNullConstraint(table, chunk.data[bound_not_null.index], chunk.size(),
197922
198030
  table.columns[not_null.index].Name());
197923
198031
  break;
197924
198032
  }
@@ -198389,13 +198497,16 @@ static bool CreateMockChunk(TableCatalogEntry &table, const vector<column_t> &co
198389
198497
 
198390
198498
  void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk,
198391
198499
  const vector<column_t> &column_ids) {
198392
- for (auto &constraint : table.bound_constraints) {
198500
+ for (idx_t i = 0; i < table.bound_constraints.size(); i++) {
198501
+ auto &base_constraint = table.constraints[i];
198502
+ auto &constraint = table.bound_constraints[i];
198393
198503
  switch (constraint->type) {
198394
198504
  case ConstraintType::NOT_NULL: {
198395
- auto &not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198505
+ auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198506
+ auto &not_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
198396
198507
  // check if the constraint is in the list of column_ids
198397
198508
  for (idx_t i = 0; i < column_ids.size(); i++) {
198398
- if (column_ids[i] == not_null.index) {
198509
+ if (column_ids[i] == bound_not_null.index) {
198399
198510
  // found the column id: check the data in
198400
198511
  VerifyNotNullConstraint(table, chunk.data[i], chunk.size(), table.columns[not_null.index].Name());
198401
198512
  break;