duckdb 0.5.2-dev4.0 → 0.5.2-dev50.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +151 -40
- package/src/duckdb.hpp +699 -698
- package/src/parquet-amalgamation.cpp +36422 -36422
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -43588,12 +43588,16 @@ hash_t Hash(hugeint_t val) {
|
|
|
43588
43588
|
|
|
43589
43589
|
template <>
|
|
43590
43590
|
hash_t Hash(float val) {
|
|
43591
|
-
|
|
43591
|
+
static_assert(sizeof(float) == sizeof(uint32_t), "");
|
|
43592
|
+
uint32_t uval = *((uint32_t *)&val);
|
|
43593
|
+
return murmurhash64(uval);
|
|
43592
43594
|
}
|
|
43593
43595
|
|
|
43594
43596
|
template <>
|
|
43595
43597
|
hash_t Hash(double val) {
|
|
43596
|
-
|
|
43598
|
+
static_assert(sizeof(double) == sizeof(uint64_t), "");
|
|
43599
|
+
uint64_t uval = *((uint64_t *)&val);
|
|
43600
|
+
return murmurhash64(uval);
|
|
43597
43601
|
}
|
|
43598
43602
|
|
|
43599
43603
|
template <>
|
|
@@ -45707,6 +45711,11 @@ public:
|
|
|
45707
45711
|
return layout.GetTypes();
|
|
45708
45712
|
}
|
|
45709
45713
|
|
|
45714
|
+
//! The number of rows in the collection
|
|
45715
|
+
inline idx_t Count() const {
|
|
45716
|
+
return total_count;
|
|
45717
|
+
}
|
|
45718
|
+
|
|
45710
45719
|
//! The number of rows scanned so far
|
|
45711
45720
|
inline idx_t Scanned() const {
|
|
45712
45721
|
return total_scanned;
|
|
@@ -66288,7 +66297,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
|
|
|
66288
66297
|
struct WindowExecutor {
|
|
66289
66298
|
WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocator, const idx_t count);
|
|
66290
66299
|
|
|
66291
|
-
void Sink(DataChunk &input_chunk, const idx_t input_idx);
|
|
66300
|
+
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
|
|
66292
66301
|
void Finalize(WindowAggregationMode mode);
|
|
66293
66302
|
|
|
66294
66303
|
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
|
@@ -66362,7 +66371,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocato
|
|
|
66362
66371
|
PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
|
|
66363
66372
|
}
|
|
66364
66373
|
|
|
66365
|
-
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
|
|
66374
|
+
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
|
66366
66375
|
// Single pass over the input to produce the global data.
|
|
66367
66376
|
// Vectorisation for the win...
|
|
66368
66377
|
|
|
@@ -66397,7 +66406,7 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx) {
|
|
|
66397
66406
|
if (!vdata.validity.AllValid()) {
|
|
66398
66407
|
// Lazily materialise the contents when we find the first NULL
|
|
66399
66408
|
if (ignore_nulls.AllValid()) {
|
|
66400
|
-
ignore_nulls.Initialize(
|
|
66409
|
+
ignore_nulls.Initialize(total_count);
|
|
66401
66410
|
}
|
|
66402
66411
|
// Write to the current position
|
|
66403
66412
|
// Chunks in a collection are full, so we don't have to worry about raggedness
|
|
@@ -66926,7 +66935,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
|
66926
66935
|
|
|
66927
66936
|
// TODO: Parallelization opportunity
|
|
66928
66937
|
for (auto &wexec : window_execs) {
|
|
66929
|
-
wexec->Sink(input_chunk, input_idx);
|
|
66938
|
+
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
|
66930
66939
|
}
|
|
66931
66940
|
input_idx += input_chunk.size();
|
|
66932
66941
|
}
|
|
@@ -96448,6 +96457,8 @@ struct PragmaFunctions {
|
|
|
96448
96457
|
static void RegisterFunction(BuiltinFunctions &set);
|
|
96449
96458
|
};
|
|
96450
96459
|
|
|
96460
|
+
string PragmaShow(ClientContext &context, const FunctionParameters ¶meters);
|
|
96461
|
+
|
|
96451
96462
|
} // namespace duckdb
|
|
96452
96463
|
|
|
96453
96464
|
|
|
@@ -118595,15 +118606,47 @@ void DuckDBColumnsFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
118595
118606
|
|
|
118596
118607
|
|
|
118597
118608
|
|
|
118609
|
+
namespace duckdb {
|
|
118610
|
+
|
|
118611
|
+
struct UniqueKeyInfo {
|
|
118612
|
+
string schema, table;
|
|
118613
|
+
vector<storage_t> columns;
|
|
118614
|
+
|
|
118615
|
+
bool operator==(const UniqueKeyInfo &other) const {
|
|
118616
|
+
return (schema == other.schema) && (table == other.table) && (columns == other.columns);
|
|
118617
|
+
}
|
|
118618
|
+
};
|
|
118619
|
+
|
|
118620
|
+
} // namespace duckdb
|
|
118621
|
+
|
|
118622
|
+
namespace std {
|
|
118623
|
+
|
|
118624
|
+
template <>
|
|
118625
|
+
struct hash<duckdb::UniqueKeyInfo> {
|
|
118626
|
+
template <class X>
|
|
118627
|
+
static size_t ComputeHash(const X &x) {
|
|
118628
|
+
return hash<X>()(x);
|
|
118629
|
+
}
|
|
118630
|
+
|
|
118631
|
+
size_t operator()(const duckdb::UniqueKeyInfo &j) const {
|
|
118632
|
+
D_ASSERT(j.columns.size() > 0);
|
|
118633
|
+
return ComputeHash(j.schema) + ComputeHash(j.table) + ComputeHash(j.columns[0]);
|
|
118634
|
+
}
|
|
118635
|
+
};
|
|
118636
|
+
|
|
118637
|
+
} // namespace std
|
|
118638
|
+
|
|
118598
118639
|
namespace duckdb {
|
|
118599
118640
|
|
|
118600
118641
|
struct DuckDBConstraintsData : public GlobalTableFunctionState {
|
|
118601
|
-
DuckDBConstraintsData() : offset(0), constraint_offset(0) {
|
|
118642
|
+
DuckDBConstraintsData() : offset(0), constraint_offset(0), unique_constraint_offset(0) {
|
|
118602
118643
|
}
|
|
118603
118644
|
|
|
118604
118645
|
vector<CatalogEntry *> entries;
|
|
118605
118646
|
idx_t offset;
|
|
118606
118647
|
idx_t constraint_offset;
|
|
118648
|
+
idx_t unique_constraint_offset;
|
|
118649
|
+
unordered_map<UniqueKeyInfo, idx_t> known_fk_unique_constraint_offsets;
|
|
118607
118650
|
};
|
|
118608
118651
|
|
|
118609
118652
|
static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, TableFunctionBindInput &input,
|
|
@@ -118634,7 +118677,6 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
|
|
|
118634
118677
|
return_types.emplace_back(LogicalType::VARCHAR);
|
|
118635
118678
|
|
|
118636
118679
|
names.emplace_back("constraint_column_indexes");
|
|
118637
|
-
;
|
|
118638
118680
|
return_types.push_back(LogicalType::LIST(LogicalType::BIGINT));
|
|
118639
118681
|
|
|
118640
118682
|
names.emplace_back("constraint_column_names");
|
|
@@ -118646,15 +118688,29 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
|
|
|
118646
118688
|
unique_ptr<GlobalTableFunctionState> DuckDBConstraintsInit(ClientContext &context, TableFunctionInitInput &input) {
|
|
118647
118689
|
auto result = make_unique<DuckDBConstraintsData>();
|
|
118648
118690
|
|
|
118649
|
-
// scan all the schemas for tables and collect
|
|
118691
|
+
// scan all the schemas for tables and collect them
|
|
118650
118692
|
auto schemas = Catalog::GetCatalog(context).schemas->GetEntries<SchemaCatalogEntry>(context);
|
|
118693
|
+
|
|
118694
|
+
sort(schemas.begin(), schemas.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
|
|
118695
|
+
|
|
118696
|
+
// check the temp schema as well
|
|
118697
|
+
auto &temp_schema = ClientData::Get(context).temporary_objects;
|
|
118698
|
+
schemas.push_back(temp_schema.get());
|
|
118699
|
+
|
|
118651
118700
|
for (auto &schema : schemas) {
|
|
118652
|
-
|
|
118701
|
+
vector<CatalogEntry *> entries;
|
|
118702
|
+
|
|
118703
|
+
schema->Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
|
|
118704
|
+
if (entry->type == CatalogType::TABLE_ENTRY) {
|
|
118705
|
+
entries.push_back(entry);
|
|
118706
|
+
}
|
|
118707
|
+
});
|
|
118708
|
+
|
|
118709
|
+
sort(entries.begin(), entries.end(), [&](CatalogEntry *x, CatalogEntry *y) { return (x->name < y->name); });
|
|
118710
|
+
|
|
118711
|
+
result->entries.insert(result->entries.end(), entries.begin(), entries.end());
|
|
118653
118712
|
};
|
|
118654
118713
|
|
|
118655
|
-
// check the temp schema as well
|
|
118656
|
-
ClientData::Get(context).temporary_objects->Scan(context, CatalogType::TABLE_ENTRY,
|
|
118657
|
-
[&](CatalogEntry *entry) { result->entries.push_back(entry); });
|
|
118658
118714
|
return move(result);
|
|
118659
118715
|
}
|
|
118660
118716
|
|
|
@@ -118669,30 +118725,15 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
|
|
|
118669
118725
|
idx_t count = 0;
|
|
118670
118726
|
while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
|
|
118671
118727
|
auto &entry = data.entries[data.offset];
|
|
118672
|
-
|
|
118673
|
-
if (entry->type != CatalogType::TABLE_ENTRY) {
|
|
118674
|
-
data.offset++;
|
|
118675
|
-
continue;
|
|
118676
|
-
}
|
|
118728
|
+
D_ASSERT(entry->type == CatalogType::TABLE_ENTRY);
|
|
118677
118729
|
|
|
118678
118730
|
auto &table = (TableCatalogEntry &)*entry;
|
|
118679
118731
|
for (; data.constraint_offset < table.constraints.size() && count < STANDARD_VECTOR_SIZE;
|
|
118680
118732
|
data.constraint_offset++) {
|
|
118681
118733
|
auto &constraint = table.constraints[data.constraint_offset];
|
|
118682
118734
|
// return values:
|
|
118683
|
-
// schema_name, LogicalType::VARCHAR
|
|
118684
|
-
output.SetValue(0, count, Value(table.schema->name));
|
|
118685
|
-
// schema_oid, LogicalType::BIGINT
|
|
118686
|
-
output.SetValue(1, count, Value::BIGINT(table.schema->oid));
|
|
118687
|
-
// table_name, LogicalType::VARCHAR
|
|
118688
|
-
output.SetValue(2, count, Value(table.name));
|
|
118689
|
-
// table_oid, LogicalType::BIGINT
|
|
118690
|
-
output.SetValue(3, count, Value::BIGINT(table.oid));
|
|
118691
|
-
|
|
118692
|
-
// constraint_index, BIGINT
|
|
118693
|
-
output.SetValue(4, count, Value::BIGINT(data.constraint_offset));
|
|
118694
|
-
|
|
118695
118735
|
// constraint_type, VARCHAR
|
|
118736
|
+
// Processing this first due to shortcut (early continue)
|
|
118696
118737
|
string constraint_type;
|
|
118697
118738
|
switch (constraint->type) {
|
|
118698
118739
|
case ConstraintType::CHECK:
|
|
@@ -118706,14 +118747,73 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
|
|
|
118706
118747
|
case ConstraintType::NOT_NULL:
|
|
118707
118748
|
constraint_type = "NOT NULL";
|
|
118708
118749
|
break;
|
|
118709
|
-
case ConstraintType::FOREIGN_KEY:
|
|
118750
|
+
case ConstraintType::FOREIGN_KEY: {
|
|
118751
|
+
auto &bound_foreign_key =
|
|
118752
|
+
(const BoundForeignKeyConstraint &)*table.bound_constraints[data.constraint_offset];
|
|
118753
|
+
if (bound_foreign_key.info.type == ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE) {
|
|
118754
|
+
// Those are already covered by PRIMARY KEY and UNIQUE entries
|
|
118755
|
+
continue;
|
|
118756
|
+
}
|
|
118710
118757
|
constraint_type = "FOREIGN KEY";
|
|
118711
118758
|
break;
|
|
118759
|
+
}
|
|
118712
118760
|
default:
|
|
118713
118761
|
throw NotImplementedException("Unimplemented constraint for duckdb_constraints");
|
|
118714
118762
|
}
|
|
118715
118763
|
output.SetValue(5, count, Value(constraint_type));
|
|
118716
118764
|
|
|
118765
|
+
// schema_name, LogicalType::VARCHAR
|
|
118766
|
+
output.SetValue(0, count, Value(table.schema->name));
|
|
118767
|
+
// schema_oid, LogicalType::BIGINT
|
|
118768
|
+
output.SetValue(1, count, Value::BIGINT(table.schema->oid));
|
|
118769
|
+
// table_name, LogicalType::VARCHAR
|
|
118770
|
+
output.SetValue(2, count, Value(table.name));
|
|
118771
|
+
// table_oid, LogicalType::BIGINT
|
|
118772
|
+
output.SetValue(3, count, Value::BIGINT(table.oid));
|
|
118773
|
+
|
|
118774
|
+
// constraint_index, BIGINT
|
|
118775
|
+
auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
|
|
118776
|
+
UniqueKeyInfo uk_info;
|
|
118777
|
+
switch (bound_constraint.type) {
|
|
118778
|
+
case ConstraintType::UNIQUE: {
|
|
118779
|
+
auto &bound_unique = (BoundUniqueConstraint &)bound_constraint;
|
|
118780
|
+
uk_info = {table.schema->name, table.name, bound_unique.keys};
|
|
118781
|
+
break;
|
|
118782
|
+
}
|
|
118783
|
+
case ConstraintType::FOREIGN_KEY: {
|
|
118784
|
+
const auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
|
|
118785
|
+
const auto &info = bound_foreign_key.info;
|
|
118786
|
+
uk_info = {info.schema, info.table, info.pk_keys};
|
|
118787
|
+
if (uk_info.schema.empty()) {
|
|
118788
|
+
// FIXME: Can we somehow make use of Binder::BindSchema() here?
|
|
118789
|
+
// From experiments, an omitted schema in REFERENCES ... means "main" or "temp", even if the table
|
|
118790
|
+
// resides in a different schema. Is this guaranteed to be stable?
|
|
118791
|
+
if (entry->temporary) {
|
|
118792
|
+
uk_info.schema = "temp";
|
|
118793
|
+
} else {
|
|
118794
|
+
uk_info.schema = "main";
|
|
118795
|
+
}
|
|
118796
|
+
}
|
|
118797
|
+
|
|
118798
|
+
break;
|
|
118799
|
+
}
|
|
118800
|
+
default:
|
|
118801
|
+
break;
|
|
118802
|
+
}
|
|
118803
|
+
|
|
118804
|
+
if (uk_info.columns.empty()) {
|
|
118805
|
+
output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset++));
|
|
118806
|
+
} else {
|
|
118807
|
+
auto known_unique_constraint_offset = data.known_fk_unique_constraint_offsets.find(uk_info);
|
|
118808
|
+
if (known_unique_constraint_offset == data.known_fk_unique_constraint_offsets.end()) {
|
|
118809
|
+
data.known_fk_unique_constraint_offsets.insert(make_pair(uk_info, data.unique_constraint_offset));
|
|
118810
|
+
output.SetValue(4, count, Value::BIGINT(data.unique_constraint_offset));
|
|
118811
|
+
data.unique_constraint_offset++;
|
|
118812
|
+
} else {
|
|
118813
|
+
output.SetValue(4, count, Value::BIGINT(known_unique_constraint_offset->second));
|
|
118814
|
+
}
|
|
118815
|
+
}
|
|
118816
|
+
|
|
118717
118817
|
// constraint_text, VARCHAR
|
|
118718
118818
|
output.SetValue(6, count, Value(constraint->ToString()));
|
|
118719
118819
|
|
|
@@ -118725,7 +118825,6 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
|
|
|
118725
118825
|
}
|
|
118726
118826
|
output.SetValue(7, count, expression_text);
|
|
118727
118827
|
|
|
118728
|
-
auto &bound_constraint = (BoundConstraint &)*table.bound_constraints[data.constraint_offset];
|
|
118729
118828
|
vector<column_t> column_index_list;
|
|
118730
118829
|
switch (bound_constraint.type) {
|
|
118731
118830
|
case ConstraintType::CHECK: {
|
|
@@ -118748,7 +118847,7 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
|
|
|
118748
118847
|
break;
|
|
118749
118848
|
}
|
|
118750
118849
|
case ConstraintType::FOREIGN_KEY: {
|
|
118751
|
-
auto &bound_foreign_key = (BoundForeignKeyConstraint &)bound_constraint;
|
|
118850
|
+
auto &bound_foreign_key = (const BoundForeignKeyConstraint &)bound_constraint;
|
|
118752
118851
|
for (auto &col_idx : bound_foreign_key.info.fk_keys) {
|
|
118753
118852
|
column_index_list.push_back(column_t(col_idx));
|
|
118754
118853
|
}
|
|
@@ -120916,6 +121015,13 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
|
|
|
120916
121015
|
auto &bind_data = (PragmaStorageFunctionData &)*data_p.bind_data;
|
|
120917
121016
|
auto &data = (PragmaStorageOperatorData &)*data_p.global_state;
|
|
120918
121017
|
idx_t count = 0;
|
|
121018
|
+
map<storage_t, column_t> soid_to_idx;
|
|
121019
|
+
for (idx_t cidx = 0; cidx < bind_data.table_entry->columns.size(); cidx++) {
|
|
121020
|
+
auto &entry = bind_data.table_entry->columns[cidx];
|
|
121021
|
+
if (!entry.Generated()) {
|
|
121022
|
+
soid_to_idx[entry.StorageOid()] = entry.Oid();
|
|
121023
|
+
}
|
|
121024
|
+
}
|
|
120919
121025
|
while (data.offset < bind_data.storage_info.size() && count < STANDARD_VECTOR_SIZE) {
|
|
120920
121026
|
auto &entry = bind_data.storage_info[data.offset++];
|
|
120921
121027
|
D_ASSERT(entry.size() + 1 == output.ColumnCount());
|
|
@@ -120923,8 +121029,9 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
|
|
|
120923
121029
|
for (idx_t col_idx = 0; col_idx < entry.size(); col_idx++, result_idx++) {
|
|
120924
121030
|
if (col_idx == 1) {
|
|
120925
121031
|
// write the column name
|
|
120926
|
-
auto
|
|
120927
|
-
output.SetValue(result_idx, count,
|
|
121032
|
+
auto storage_column_index = entry[col_idx].GetValue<int64_t>();
|
|
121033
|
+
output.SetValue(result_idx, count,
|
|
121034
|
+
Value(bind_data.table_entry->columns[soid_to_idx[storage_column_index]].Name()));
|
|
120928
121035
|
result_idx++;
|
|
120929
121036
|
}
|
|
120930
121037
|
output.SetValue(result_idx, count, entry[col_idx]);
|
|
@@ -197917,8 +198024,9 @@ void DataTable::VerifyAppendConstraints(TableCatalogEntry &table, ClientContext
|
|
|
197917
198024
|
auto &constraint = table.bound_constraints[i];
|
|
197918
198025
|
switch (base_constraint->type) {
|
|
197919
198026
|
case ConstraintType::NOT_NULL: {
|
|
197920
|
-
auto &
|
|
197921
|
-
|
|
198027
|
+
auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
|
|
198028
|
+
auto ¬_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
|
|
198029
|
+
VerifyNotNullConstraint(table, chunk.data[bound_not_null.index], chunk.size(),
|
|
197922
198030
|
table.columns[not_null.index].Name());
|
|
197923
198031
|
break;
|
|
197924
198032
|
}
|
|
@@ -198389,13 +198497,16 @@ static bool CreateMockChunk(TableCatalogEntry &table, const vector<column_t> &co
|
|
|
198389
198497
|
|
|
198390
198498
|
void DataTable::VerifyUpdateConstraints(TableCatalogEntry &table, DataChunk &chunk,
|
|
198391
198499
|
const vector<column_t> &column_ids) {
|
|
198392
|
-
for (
|
|
198500
|
+
for (idx_t i = 0; i < table.bound_constraints.size(); i++) {
|
|
198501
|
+
auto &base_constraint = table.constraints[i];
|
|
198502
|
+
auto &constraint = table.bound_constraints[i];
|
|
198393
198503
|
switch (constraint->type) {
|
|
198394
198504
|
case ConstraintType::NOT_NULL: {
|
|
198395
|
-
auto &
|
|
198505
|
+
auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
|
|
198506
|
+
auto ¬_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
|
|
198396
198507
|
// check if the constraint is in the list of column_ids
|
|
198397
198508
|
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
198398
|
-
if (column_ids[i] ==
|
|
198509
|
+
if (column_ids[i] == bound_not_null.index) {
|
|
198399
198510
|
// found the column id: check the data in
|
|
198400
198511
|
VerifyNotNullConstraint(table, chunk.data[i], chunk.size(), table.columns[not_null.index].Name());
|
|
198401
198512
|
break;
|