duckdb 0.5.2-dev2.0 → 0.5.2-dev34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev2.0",
4
+ "version": "0.5.2-dev34.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -43588,12 +43588,16 @@ hash_t Hash(hugeint_t val) {
43588
43588
 
43589
43589
  template <>
43590
43590
  hash_t Hash(float val) {
43591
- return std::hash<float> {}(val);
43591
+ static_assert(sizeof(float) == sizeof(uint32_t), "");
43592
+ uint32_t uval = *((uint32_t *)&val);
43593
+ return murmurhash64(uval);
43592
43594
  }
43593
43595
 
43594
43596
  template <>
43595
43597
  hash_t Hash(double val) {
43596
- return std::hash<double> {}(val);
43598
+ static_assert(sizeof(double) == sizeof(uint64_t), "");
43599
+ uint64_t uval = *((uint64_t *)&val);
43600
+ return murmurhash64(uval);
43597
43601
  }
43598
43602
 
43599
43603
  template <>
@@ -45707,6 +45711,11 @@ public:
45707
45711
  return layout.GetTypes();
45708
45712
  }
45709
45713
 
45714
+ //! The number of rows in the collection
45715
+ inline idx_t Count() const {
45716
+ return total_count;
45717
+ }
45718
+
45710
45719
  //! The number of rows scanned so far
45711
45720
  inline idx_t Scanned() const {
45712
45721
  return total_scanned;
@@ -66288,7 +66297,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
66288
66297
  struct WindowExecutor {
66289
66298
  WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocator, const idx_t count);
66290
66299
 
66291
- void Sink(DataChunk &input_chunk, const idx_t input_idx);
66300
+ void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
66292
66301
  void Finalize(WindowAggregationMode mode);
66293
66302
 
66294
66303
  void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
@@ -66362,7 +66371,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocato
66362
66371
  PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
66363
66372
  }
66364
66373
 
66365
- void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
66374
+ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
66366
66375
  // Single pass over the input to produce the global data.
66367
66376
  // Vectorisation for the win...
66368
66377
 
@@ -66397,7 +66406,7 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
66397
66406
  if (!vdata.validity.AllValid()) {
66398
66407
  // Lazily materialise the contents when we find the first NULL
66399
66408
  if (ignore_nulls.AllValid()) {
66400
- ignore_nulls.Initialize(payload_collection.Count());
66409
+ ignore_nulls.Initialize(total_count);
66401
66410
  }
66402
66411
  // Write to the current position
66403
66412
  // Chunks in a collection are full, so we don't have to worry about raggedness
@@ -66926,7 +66935,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
66926
66935
 
66927
66936
  // TODO: Parallelization opportunity
66928
66937
  for (auto &wexec : window_execs) {
66929
- wexec->Sink(input_chunk, input_idx);
66938
+ wexec->Sink(input_chunk, input_idx, scanner->Count());
66930
66939
  }
66931
66940
  input_idx += input_chunk.size();
66932
66941
  }
@@ -118595,15 +118604,47 @@ void DuckDBColumnsFun::RegisterFunction(BuiltinFunctions &set) {
118595
118604
 
118596
118605
 
118597
118606
 
118607
+ namespace duckdb {
118608
+
118609
+ struct UniqueKeyInfo {
118610
+ string schema, table;
118611
+ vector<storage_t> columns;
118612
+
118613
+ bool operator==(const UniqueKeyInfo &other) const {
118614
+ return (schema == other.schema) && (table == other.table) && (columns == other.columns);
118615
+ }
118616
+ };
118617
+
118618
+ } // namespace duckdb
118619
+
118620
+ namespace std {
118621
+
118622
+ template <>
118623
+ struct hash<duckdb::UniqueKeyInfo> {
118624
+ template <class X>
118625
+ static size_t ComputeHash(const X &x) {
118626
+ return hash<X>()(x);
118627
+ }
118628
+
118629
+ size_t operator()(const duckdb::UniqueKeyInfo &j) const {
118630
+ D_ASSERT(j.columns.size() > 0);
118631
+ return ComputeHash(j.schema) + ComputeHash(j.table) + ComputeHash(j.columns[0]);
118632
+ }
118633
+ };
118634
+
118635
+ } // namespace std
118636
+
118598
118637
  namespace duckdb {
118599
118638
 
118600
118639
  struct DuckDBConstraintsData : public GlobalTableFunctionState {
118601
- DuckDBConstraintsData() : offset(0), constraint_offset(0) {
118640
+ DuckDBConstraintsData() : offset(0), constraint_offset(0), unique_constraint_offset(0) {
118602
118641
  }
118603
118642
 
118604
118643
  vector<CatalogEntry *> entries;
118605
118644
  idx_t offset;
118606
118645
  idx_t constraint_offset;
118646
+ idx_t unique_constraint_offset;
118647
+ unordered_map<UniqueKeyInfo, idx_t> known_fk_unique_constraint_offsets;
118607
118648
  };
118608
118649
 
118609
118650
  static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, TableFunctionBindInput &input,
@@ -118634,7 +118675,6 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
118634
118675
  return_types.emplace_back(LogicalType::VARCHAR);
118635
118676
 
118636
118677
  names.emplace_back("constraint_column_indexes");
118637
- ;
118638
118678
  return_types.push_back(LogicalType::LIST(LogicalType::BIGINT));
118639
118679
 
118640
118680
  names.emplace_back("constraint_column_names");
@@ -118646,15 +118686,29 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
118646
118686
  unique_ptr<GlobalTableFunctionState> DuckDBConstraintsInit(ClientContext &context, TableFunctionInitInput &input) {
118647
118687
  auto result = make_unique<DuckDBConstraintsData>();
118648
118688
 
118649
- // scan all the schemas for tables and collect themand collect them
118689
+ // scan all the schemas for tables and collect them
118650
118690
  auto schemas = Catalog::GetCatalog(context).schemas->GetEntries<SchemaCatalogEntry>(context);
118691
+
118692
+ sort(schemas.begin(), schemas.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
118693
+
118694
+ // check the temp schema as well
118695
+ auto &temp_schema = ClientData::Get(context).temporary_objects;
118696
+ schemas.push_back(temp_schema.get());
118697
+
118651
118698
  for (auto &schema : schemas) {
118652
- schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) { result->entries.push_back(entry); });
118699
+ vector<CatalogEntry *> entries;
118700
+
118701
+ schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
118702
+ if (entry->type == CatalogType::TABLE_ENTRY) {
118703
+ entries.push_back(entry);
118704
+ }
118705
+ });
118706
+
118707
+ sort(entries.begin(), entries.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
118708
+
118709
+ result->entries.insert(result->entries.end(), entries.begin(), entries.end());
118653
118710
  };
118654
118711
 
118655
- // check the temp schema as well
118656
- ClientData::Get(context).temporary_objects->Scan(context, CatalogType::TABLE_ENTRY,
118657
- [&](CatalogEntry *entry) { result->entries.push_back(entry); });
118658
118712
  return move(result);
118659
118713
  }
118660
118714
 
@@ -118669,30 +118723,15 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118669
118723
  idx_t count = 0;
118670
118724
  while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
118671
118725
  auto &entry = data.entries[data.offset];
118672
-
118673
- if (entry->type != CatalogType::TABLE_ENTRY) {
118674
- data.offset++;
118675
- continue;
118676
- }
118726
+ D_ASSERT(entry->type == CatalogType::TABLE_ENTRY);
118677
118727
 
118678
118728
  auto &table = (TableCatalogEntry &)*entry;
118679
118729
  for (; data.constraint_offset < table.constraints.size() && count < STANDARD_VECTOR_SIZE;
118680
118730
  data.constraint_offset++) {
118681
118731
  auto &constraint = table.constraints[data.constraint_offset];
118682
118732
  // return values:
118683
- // schema_name, LogicalType::VARCHAR
118684
- output.SetValue(0, count, Value(table.schema->name));
118685
- // schema_oid, LogicalType::BIGINT
118686
- output.SetValue(1, count, Value::BIGINT(table.schema->oid));
118687
- // table_name, LogicalType::VARCHAR
118688
- output.SetValue(2, count, Value(table.name));
118689
- // table_oid, LogicalType::BIGINT
118690
- output.SetValue(3, count, Value::BIGINT(table.oid));
118691
-
118692
- // constraint_index, BIGINT
118693
- output.SetValue(4, count, Value::BIGINT(data.constraint_offset));
118694
-
118695
118733
  // constraint_type, VARCHAR
118734
+ // Processing this first due to shortcut (early continue)
118696
118735
  string constraint_type;
118697
118736
  switch (constraint->type) {
118698
118737
  case ConstraintType::CHECK:
@@ -118706,14 +118745,73 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118706
118745
  case ConstraintType::NOT_NULL:
118707
118746
  constraint_type = "NOT NULL";
118708
118747
  break;
118709
- case ConstraintType::FOREIGN_KEY:
118748
+ case ConstraintType::FOREIGN_KEY: {
118749
+ auto &bound_foreign_key =
118750
+ (const BoundForeignKeyConstraint &)*table.bound_constraints[data.constraint_offset];
118751
+ if (bound_foreign_key.info.type == ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE) {
118752
+ // Those are already covered by PRIMARY KEY and UNIQUE entries
118753
+ continue;
118754
+ }
118710
118755
  constraint_type = "FOREIGN KEY";
118711
118756
  break;
118757
+ }
118712
118758
  default:
118713
118759
  throw NotImplementedException("Unimplemented constraint for duckdb_constraints");
118714
118760
  }
118715
118761
  output.SetValue(5, count, Value(constraint_type));
118716
118762
 
118763
+ // schema_name, LogicalType::VARCHAR
118764
+ output.SetValue(0, count, Value(table.schema->name));
118765
+ // schema_oid, LogicalType::BIGINT
118766
+ output.SetValue(1, count, Value::BIGINT(table.schema->oid));
118767
+ // table_name, LogicalType::VARCHAR
118768
+ output.SetValue(2, count, Value(table.name));
118769
+ // table_oid, LogicalType::BIGINT
118770
+ output.SetValue(3, count, Value::BIGINT(table.oid));
118771
+
118772
+ // constraint_index, BIGINT
118773
+ auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
118774
+ UniqueKeyInfo uk_info;
118775
+ switch (bound_constraint.type) {
118776
+ case ConstraintType::UNIQUE: {
118777
+ auto &bound_unique = (BoundUniqueConstraint &)bound_constraint;
118778
+ uk_info = {table.schema->name, table.name, bound_unique.keys};
118779
+ break;
118780
+ }
118781
+ case ConstraintType::FOREIGN_KEY: {
118782
+ const auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
118783
+ const auto &info = bound_foreign_key.info;
118784
+ uk_info = {info.schema, info.table, info.pk_keys};
118785
+ if (uk_info.schema.empty()) {
118786
+ // FIXME: Can we somehow make use of Binder::BindSchema() here?
118787
+ // From experiments, an omitted schema in REFERENCES ... means "main" or "temp", even if the table
118788
+ // resides in a different schema. Is this guaranteed to be stable?
118789
+ if (entry->temporary) {
118790
+ uk_info.schema = "temp";
118791
+ } else {
118792
+ uk_info.schema = "main";
118793
+ }
118794
+ }
118795
+
118796
+ break;
118797
+ }
118798
+ default:
118799
+ break;
118800
+ }
118801
+
118802
+ if (uk_info.columns.empty()) {
118803
+ output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset++));
118804
+ } else {
118805
+ auto known_unique_constraint_offset = data.known_fk_unique_constraint_offsets.find(uk_info);
118806
+ if (known_unique_constraint_offset == data.known_fk_unique_constraint_offsets.end()) {
118807
+ data.known_fk_unique_constraint_offsets.insert(make_pair(uk_info, data.unique_constraint_offset));
118808
+ output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset));
118809
+ data.unique_constraint_offset++;
118810
+ } else {
118811
+ output.SetValue(4, count, Value::BIGINT(known_unique_constraint_offset->second));
118812
+ }
118813
+ }
118814
+
118717
118815
  // constraint_text, VARCHAR
118718
118816
  output.SetValue(6, count, Value(constraint->ToString()));
118719
118817
 
@@ -118725,7 +118823,6 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118725
118823
  }
118726
118824
  output.SetValue(7, count, expression_text);
118727
118825
 
118728
- auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
118729
118826
  vector<column_t> column_index_list;
118730
118827
  switch (bound_constraint.type) {
118731
118828
  case ConstraintType::CHECK: {
@@ -118748,7 +118845,7 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
118748
118845
  break;
118749
118846
  }
118750
118847
  case ConstraintType::FOREIGN_KEY: {
118751
- auto &bound_foreign_key = (BoundForeignKeyConstraint &)bound_constraint;
118848
+ auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
118752
118849
  for (auto &col_idx : bound_foreign_key.info.fk_keys) {
118753
118850
  column_index_list.push_back(column_t(col_idx));
118754
118851
  }
@@ -120916,6 +121013,13 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
120916
121013
  auto &bind_data = (PragmaStorageFunctionData &)*data_p.bind_data;
120917
121014
  auto &data = (PragmaStorageOperatorData &)*data_p.global_state;
120918
121015
  idx_t count = 0;
121016
+ map<storage_t, column_t> soid_to_idx;
121017
+ for (idx_t cidx = 0; cidx < bind_data.table_entry->columns.size(); cidx++) {
121018
+ auto &entry = bind_data.table_entry->columns[cidx];
121019
+ if (!entry.Generated()) {
121020
+ soid_to_idx[entry.StorageOid()] = entry.Oid();
121021
+ }
121022
+ }
120919
121023
  while (data.offset < bind_data.storage_info.size() && count < STANDARD_VECTOR_SIZE) {
120920
121024
  auto &entry = bind_data.storage_info[data.offset++];
120921
121025
  D_ASSERT(entry.size() + 1 == output.ColumnCount());
@@ -120923,8 +121027,9 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
120923
121027
  for (idx_t col_idx = 0; col_idx < entry.size(); col_idx++, result_idx++) {
120924
121028
  if (col_idx == 1) {
120925
121029
  // write the column name
120926
- auto column_index = entry[col_idx].GetValue<int64_t>();
120927
- output.SetValue(result_idx, count, Value(bind_data.table_entry->columns[column_index].Name()));
121030
+ auto storage_column_index = entry[col_idx].GetValue<int64_t>();
121031
+ output.SetValue(result_idx, count,
121032
+ Value(bind_data.table_entry->columns[soid_to_idx[storage_column_index]].Name()));
120928
121033
  result_idx++;
120929
121034
  }
120930
121035
  output.SetValue(result_idx, count, entry[col_idx]);
@@ -197917,8 +198022,9 @@ void DataTable::VerifyAppendConstraints(TableCatalogEntry &table, ClientContext
197917
198022
  auto &constraint = table.bound_constraints[i];
197918
198023
  switch (base_constraint->type) {
197919
198024
  case ConstraintType::NOT_NULL: {
197920
- auto &not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
197921
- VerifyNotNullConstraint(table, chunk.data[not_null.index], chunk.size(),
198025
+ auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198026
+ auto &not_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
198027
+ VerifyNotNullConstraint(table, chunk.data[bound_not_null.index], chunk.size(),
197922
198028
  table.columns[not_null.index].Name());
197923
198029
  break;
197924
198030
  }
@@ -198389,13 +198495,16 @@ static bool CreateMockChunk(TableCatalogEntry &table, const vector<column_t> &co
198389
198495
 
198390
198496
  void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk,
198391
198497
  const vector<column_t> &column_ids) {
198392
- for (auto &constraint : table.bound_constraints) {
198498
+ for (idx_t i = 0; i < table.bound_constraints.size(); i++) {
198499
+ auto &base_constraint = table.constraints[i];
198500
+ auto &constraint = table.bound_constraints[i];
198393
198501
  switch (constraint->type) {
198394
198502
  case ConstraintType::NOT_NULL: {
198395
- auto &not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198503
+ auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
198504
+ auto &not_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
198396
198505
  // check if the constraint is in the list of column_ids
198397
198506
  for (idx_t i = 0; i < column_ids.size(); i++) {
198398
- if (column_ids[i] == not_null.index) {
198507
+ if (column_ids[i] == bound_not_null.index) {
198399
198508
  // found the column id: check the data in
198400
198509
  VerifyNotNullConstraint(table, chunk.data[i], chunk.size(), table.columns[not_null.index].Name());
198401
198510
  break;
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "8b019cb8e"
15
- #define DUCKDB_VERSION "v0.5.2-dev2"
14
+ #define DUCKDB_SOURCE_ID "d8f7ef99b"
15
+ #define DUCKDB_VERSION "v0.5.2-dev34"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -9491,7 +9491,7 @@ struct ForeignKeyInfo {
9491
9491
  //! The set of main key table's column's index
9492
9492
  vector<storage_t> pk_keys;
9493
9493
  //! The set of foreign key table's column's index
9494
- vector<idx_t> fk_keys;
9494
+ vector<storage_t> fk_keys;
9495
9495
  };
9496
9496
 
9497
9497
  //! Constraint is the base class of any type of table constraint.
@@ -11416,6 +11416,7 @@ public:
11416
11416
  DUCKDB_API ~ColumnDataCollection();
11417
11417
 
11418
11418
  public:
11419
+ //! The types of columns in the ColumnDataCollection
11419
11420
  DUCKDB_API vector<LogicalType> &Types() {
11420
11421
  return types;
11421
11422
  }
@@ -14409,7 +14410,7 @@ public:
14409
14410
  ChunkCollection(Allocator &allocator);
14410
14411
  ChunkCollection(ClientContext &context);
14411
14412
 
14412
- //! The amount of columns in the ChunkCollection
14413
+ //! The types of columns in the ChunkCollection
14413
14414
  DUCKDB_API vector<LogicalType> &Types() {
14414
14415
  return types;
14415
14416
  }