duckdb 0.4.1-dev464.0 → 0.4.1-dev484.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +340 -195
- package/src/duckdb.hpp +102 -21
- package/src/parquet-amalgamation.cpp +29242 -29242
package/src/duckdb.cpp
CHANGED
|
@@ -61691,7 +61691,7 @@ void PhysicalLimitPercent::GetData(ExecutionContext &context, DataChunk &chunk,
|
|
|
61691
61691
|
|
|
61692
61692
|
namespace duckdb {
|
|
61693
61693
|
|
|
61694
|
-
//!
|
|
61694
|
+
//! PhysicalLoad represents an extension LOAD operation
|
|
61695
61695
|
class PhysicalLoad : public PhysicalOperator {
|
|
61696
61696
|
public:
|
|
61697
61697
|
explicit PhysicalLoad(unique_ptr<LoadInfo> info, idx_t estimated_cardinality)
|
|
@@ -62629,27 +62629,236 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G
|
|
|
62629
62629
|
|
|
62630
62630
|
|
|
62631
62631
|
|
|
62632
|
+
|
|
62632
62633
|
namespace duckdb {
|
|
62633
62634
|
|
|
62634
|
-
//! PhysicalVacuum represents a VACUUM operation (e.
|
|
62635
|
+
//! PhysicalVacuum represents a VACUUM operation (i.e. VACUUM or ANALYZE)
|
|
62635
62636
|
class PhysicalVacuum : public PhysicalOperator {
|
|
62636
62637
|
public:
|
|
62637
|
-
|
|
62638
|
-
: PhysicalOperator(PhysicalOperatorType::VACUUM, {LogicalType::BOOLEAN}, estimated_cardinality),
|
|
62639
|
-
info(move(info)) {
|
|
62640
|
-
}
|
|
62638
|
+
PhysicalVacuum(unique_ptr<VacuumInfo> info, idx_t estimated_cardinality);
|
|
62641
62639
|
|
|
62642
62640
|
unique_ptr<VacuumInfo> info;
|
|
62643
62641
|
|
|
62642
|
+
private:
|
|
62643
|
+
unordered_map<idx_t, idx_t> column_id_map;
|
|
62644
|
+
|
|
62644
62645
|
public:
|
|
62646
|
+
// Source interface
|
|
62645
62647
|
void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
|
|
62646
62648
|
LocalSourceState &lstate) const override;
|
|
62649
|
+
|
|
62650
|
+
public:
|
|
62651
|
+
// Sink interface
|
|
62652
|
+
unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
|
|
62653
|
+
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
|
62654
|
+
|
|
62655
|
+
SinkResultType Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
|
|
62656
|
+
DataChunk &input) const override;
|
|
62657
|
+
void Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const override;
|
|
62658
|
+
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
62659
|
+
GlobalSinkState &gstate) const override;
|
|
62660
|
+
|
|
62661
|
+
bool IsSink() const override {
|
|
62662
|
+
return info->has_table;
|
|
62663
|
+
}
|
|
62664
|
+
|
|
62665
|
+
bool ParallelSink() const override {
|
|
62666
|
+
return IsSink();
|
|
62667
|
+
}
|
|
62647
62668
|
};
|
|
62648
62669
|
|
|
62649
62670
|
} // namespace duckdb
|
|
62650
62671
|
|
|
62651
62672
|
|
|
62673
|
+
//===----------------------------------------------------------------------===//
|
|
62674
|
+
// DuckDB
|
|
62675
|
+
//
|
|
62676
|
+
// duckdb/planner/operator/logical_get.hpp
|
|
62677
|
+
//
|
|
62678
|
+
//
|
|
62679
|
+
//===----------------------------------------------------------------------===//
|
|
62680
|
+
|
|
62681
|
+
|
|
62682
|
+
|
|
62683
|
+
|
|
62684
|
+
|
|
62685
|
+
|
|
62686
|
+
|
|
62687
|
+
namespace duckdb {
|
|
62688
|
+
|
|
62689
|
+
//! LogicalGet represents a scan operation from a data source
|
|
62690
|
+
class LogicalGet : public LogicalOperator {
|
|
62691
|
+
public:
|
|
62692
|
+
LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
|
|
62693
|
+
vector<LogicalType> returned_types, vector<string> returned_names);
|
|
62694
|
+
|
|
62695
|
+
//! The table index in the current bind context
|
|
62696
|
+
idx_t table_index;
|
|
62697
|
+
//! The function that is called
|
|
62698
|
+
TableFunction function;
|
|
62699
|
+
//! The bind data of the function
|
|
62700
|
+
unique_ptr<FunctionData> bind_data;
|
|
62701
|
+
//! The types of ALL columns that can be returned by the table function
|
|
62702
|
+
vector<LogicalType> returned_types;
|
|
62703
|
+
//! The names of ALL columns that can be returned by the table function
|
|
62704
|
+
vector<string> names;
|
|
62705
|
+
//! Bound column IDs
|
|
62706
|
+
vector<column_t> column_ids;
|
|
62707
|
+
//! Filters pushed down for table scan
|
|
62708
|
+
TableFilterSet table_filters;
|
|
62709
|
+
|
|
62710
|
+
string GetName() const override;
|
|
62711
|
+
string ParamsToString() const override;
|
|
62712
|
+
//! Returns the underlying table that is being scanned, or nullptr if there is none
|
|
62713
|
+
TableCatalogEntry *GetTable() const;
|
|
62714
|
+
|
|
62715
|
+
public:
|
|
62716
|
+
vector<ColumnBinding> GetColumnBindings() override;
|
|
62717
|
+
|
|
62718
|
+
idx_t EstimateCardinality(ClientContext &context) override;
|
|
62719
|
+
|
|
62720
|
+
protected:
|
|
62721
|
+
void ResolveTypes() override;
|
|
62722
|
+
};
|
|
62723
|
+
} // namespace duckdb
|
|
62724
|
+
|
|
62725
|
+
|
|
62726
|
+
//===----------------------------------------------------------------------===//
|
|
62727
|
+
// DuckDB
|
|
62728
|
+
//
|
|
62729
|
+
// duckdb/storage/statistics/distinct_statistics.hpp
|
|
62730
|
+
//
|
|
62731
|
+
//
|
|
62732
|
+
//===----------------------------------------------------------------------===//
|
|
62733
|
+
|
|
62734
|
+
|
|
62735
|
+
|
|
62736
|
+
|
|
62737
|
+
|
|
62738
|
+
|
|
62739
|
+
|
|
62652
62740
|
namespace duckdb {
|
|
62741
|
+
class Serializer;
|
|
62742
|
+
class Deserializer;
|
|
62743
|
+
class Vector;
|
|
62744
|
+
|
|
62745
|
+
class DistinctStatistics : public BaseStatistics {
|
|
62746
|
+
public:
|
|
62747
|
+
DistinctStatistics();
|
|
62748
|
+
explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
|
|
62749
|
+
|
|
62750
|
+
//! The HLL of the table
|
|
62751
|
+
unique_ptr<HyperLogLog> log;
|
|
62752
|
+
//! How many values have been sampled into the HLL
|
|
62753
|
+
atomic<idx_t> sample_count;
|
|
62754
|
+
//! How many values have been inserted (before sampling)
|
|
62755
|
+
atomic<idx_t> total_count;
|
|
62756
|
+
|
|
62757
|
+
public:
|
|
62758
|
+
void Merge(const BaseStatistics &other) override;
|
|
62759
|
+
|
|
62760
|
+
unique_ptr<BaseStatistics> Copy() const override;
|
|
62761
|
+
|
|
62762
|
+
void Serialize(Serializer &serializer) const override;
|
|
62763
|
+
void Serialize(FieldWriter &writer) const override;
|
|
62764
|
+
|
|
62765
|
+
static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
|
|
62766
|
+
static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
|
|
62767
|
+
|
|
62768
|
+
void Update(Vector &update, idx_t count, bool sample = true);
|
|
62769
|
+
void Update(VectorData &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
|
|
62770
|
+
|
|
62771
|
+
string ToString() const override;
|
|
62772
|
+
idx_t GetCount() const;
|
|
62773
|
+
|
|
62774
|
+
private:
|
|
62775
|
+
//! For distinct statistics we sample the input to speed up insertions
|
|
62776
|
+
static constexpr const double SAMPLE_RATE = 0.1;
|
|
62777
|
+
};
|
|
62778
|
+
|
|
62779
|
+
} // namespace duckdb
|
|
62780
|
+
|
|
62781
|
+
|
|
62782
|
+
namespace duckdb {
|
|
62783
|
+
|
|
62784
|
+
PhysicalVacuum::PhysicalVacuum(unique_ptr<VacuumInfo> info_p, idx_t estimated_cardinality)
|
|
62785
|
+
: PhysicalOperator(PhysicalOperatorType::VACUUM, {LogicalType::BOOLEAN}, estimated_cardinality),
|
|
62786
|
+
info(move(info_p)) {
|
|
62787
|
+
if (info->has_table) {
|
|
62788
|
+
auto &get = (LogicalGet &)*info->bound_ref->get;
|
|
62789
|
+
for (idx_t i = 0; i < get.column_ids.size(); i++) {
|
|
62790
|
+
column_id_map[i] = get.column_ids[i];
|
|
62791
|
+
}
|
|
62792
|
+
}
|
|
62793
|
+
}
|
|
62794
|
+
|
|
62795
|
+
class VacuumLocalSinkState : public LocalSinkState {
|
|
62796
|
+
public:
|
|
62797
|
+
explicit VacuumLocalSinkState(VacuumInfo &info) {
|
|
62798
|
+
for (idx_t col_idx = 0; col_idx < info.columns.size(); col_idx++) {
|
|
62799
|
+
column_distinct_stats.push_back(make_unique<DistinctStatistics>());
|
|
62800
|
+
}
|
|
62801
|
+
};
|
|
62802
|
+
|
|
62803
|
+
vector<unique_ptr<DistinctStatistics>> column_distinct_stats;
|
|
62804
|
+
};
|
|
62805
|
+
|
|
62806
|
+
unique_ptr<LocalSinkState> PhysicalVacuum::GetLocalSinkState(ExecutionContext &context) const {
|
|
62807
|
+
return make_unique<VacuumLocalSinkState>(*info);
|
|
62808
|
+
}
|
|
62809
|
+
|
|
62810
|
+
class VacuumGlobalSinkState : public GlobalSinkState {
|
|
62811
|
+
public:
|
|
62812
|
+
explicit VacuumGlobalSinkState(VacuumInfo &info) {
|
|
62813
|
+
for (idx_t col_idx = 0; col_idx < info.columns.size(); col_idx++) {
|
|
62814
|
+
column_distinct_stats.push_back(make_unique<DistinctStatistics>());
|
|
62815
|
+
}
|
|
62816
|
+
};
|
|
62817
|
+
|
|
62818
|
+
mutex stats_lock;
|
|
62819
|
+
vector<unique_ptr<DistinctStatistics>> column_distinct_stats;
|
|
62820
|
+
};
|
|
62821
|
+
|
|
62822
|
+
unique_ptr<GlobalSinkState> PhysicalVacuum::GetGlobalSinkState(ClientContext &context) const {
|
|
62823
|
+
return make_unique<VacuumGlobalSinkState>(*info);
|
|
62824
|
+
}
|
|
62825
|
+
|
|
62826
|
+
SinkResultType PhysicalVacuum::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
|
|
62827
|
+
DataChunk &input) const {
|
|
62828
|
+
auto &lstate = (VacuumLocalSinkState &)lstate_p;
|
|
62829
|
+
D_ASSERT(lstate.column_distinct_stats.size() == column_id_map.size());
|
|
62830
|
+
|
|
62831
|
+
for (idx_t col_idx = 0; col_idx < input.data.size(); col_idx++) {
|
|
62832
|
+
lstate.column_distinct_stats[col_idx]->Update(input.data[col_idx], input.size(), false);
|
|
62833
|
+
}
|
|
62834
|
+
|
|
62835
|
+
return SinkResultType::NEED_MORE_INPUT;
|
|
62836
|
+
}
|
|
62837
|
+
|
|
62838
|
+
void PhysicalVacuum::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
|
|
62839
|
+
auto &gstate = (VacuumGlobalSinkState &)gstate_p;
|
|
62840
|
+
auto &lstate = (VacuumLocalSinkState &)lstate_p;
|
|
62841
|
+
|
|
62842
|
+
lock_guard<mutex> lock(gstate.stats_lock);
|
|
62843
|
+
D_ASSERT(gstate.column_distinct_stats.size() == lstate.column_distinct_stats.size());
|
|
62844
|
+
for (idx_t col_idx = 0; col_idx < gstate.column_distinct_stats.size(); col_idx++) {
|
|
62845
|
+
gstate.column_distinct_stats[col_idx]->Merge(*lstate.column_distinct_stats[col_idx]);
|
|
62846
|
+
}
|
|
62847
|
+
}
|
|
62848
|
+
|
|
62849
|
+
SinkFinalizeType PhysicalVacuum::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
62850
|
+
GlobalSinkState &gstate) const {
|
|
62851
|
+
auto &sink = (VacuumGlobalSinkState &)gstate;
|
|
62852
|
+
|
|
62853
|
+
auto table = info->bound_ref->table;
|
|
62854
|
+
for (idx_t col_idx = 0; col_idx < sink.column_distinct_stats.size(); col_idx++) {
|
|
62855
|
+
table->storage->SetStatistics(column_id_map.at(col_idx), [&](BaseStatistics &stats) {
|
|
62856
|
+
stats.distinct_stats = move(sink.column_distinct_stats[col_idx]);
|
|
62857
|
+
});
|
|
62858
|
+
}
|
|
62859
|
+
|
|
62860
|
+
return SinkFinalizeType::READY;
|
|
62861
|
+
}
|
|
62653
62862
|
|
|
62654
62863
|
void PhysicalVacuum::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
|
|
62655
62864
|
LocalSourceState &lstate) const {
|
|
@@ -76713,57 +76922,6 @@ protected:
|
|
|
76713
76922
|
|
|
76714
76923
|
} // namespace duckdb
|
|
76715
76924
|
|
|
76716
|
-
//===----------------------------------------------------------------------===//
|
|
76717
|
-
// DuckDB
|
|
76718
|
-
//
|
|
76719
|
-
// duckdb/planner/operator/logical_get.hpp
|
|
76720
|
-
//
|
|
76721
|
-
//
|
|
76722
|
-
//===----------------------------------------------------------------------===//
|
|
76723
|
-
|
|
76724
|
-
|
|
76725
|
-
|
|
76726
|
-
|
|
76727
|
-
|
|
76728
|
-
|
|
76729
|
-
|
|
76730
|
-
namespace duckdb {
|
|
76731
|
-
|
|
76732
|
-
//! LogicalGet represents a scan operation from a data source
|
|
76733
|
-
class LogicalGet : public LogicalOperator {
|
|
76734
|
-
public:
|
|
76735
|
-
LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
|
|
76736
|
-
vector<LogicalType> returned_types, vector<string> returned_names);
|
|
76737
|
-
|
|
76738
|
-
//! The table index in the current bind context
|
|
76739
|
-
idx_t table_index;
|
|
76740
|
-
//! The function that is called
|
|
76741
|
-
TableFunction function;
|
|
76742
|
-
//! The bind data of the function
|
|
76743
|
-
unique_ptr<FunctionData> bind_data;
|
|
76744
|
-
//! The types of ALL columns that can be returned by the table function
|
|
76745
|
-
vector<LogicalType> returned_types;
|
|
76746
|
-
//! The names of ALL columns that can be returned by the table function
|
|
76747
|
-
vector<string> names;
|
|
76748
|
-
//! Bound column IDs
|
|
76749
|
-
vector<column_t> column_ids;
|
|
76750
|
-
//! Filters pushed down for table scan
|
|
76751
|
-
TableFilterSet table_filters;
|
|
76752
|
-
|
|
76753
|
-
string GetName() const override;
|
|
76754
|
-
string ParamsToString() const override;
|
|
76755
|
-
//! Returns the underlying table that is being scanned, or nullptr if there is none
|
|
76756
|
-
TableCatalogEntry *GetTable() const;
|
|
76757
|
-
|
|
76758
|
-
public:
|
|
76759
|
-
vector<ColumnBinding> GetColumnBindings() override;
|
|
76760
|
-
|
|
76761
|
-
idx_t EstimateCardinality(ClientContext &context) override;
|
|
76762
|
-
|
|
76763
|
-
protected:
|
|
76764
|
-
void ResolveTypes() override;
|
|
76765
|
-
};
|
|
76766
|
-
} // namespace duckdb
|
|
76767
76925
|
|
|
76768
76926
|
|
|
76769
76927
|
namespace duckdb {
|
|
@@ -77705,6 +77863,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalShow &op)
|
|
|
77705
77863
|
} // namespace duckdb
|
|
77706
77864
|
|
|
77707
77865
|
|
|
77866
|
+
|
|
77867
|
+
|
|
77868
|
+
|
|
77869
|
+
|
|
77870
|
+
|
|
77871
|
+
|
|
77872
|
+
|
|
77873
|
+
|
|
77708
77874
|
//===----------------------------------------------------------------------===//
|
|
77709
77875
|
// DuckDB
|
|
77710
77876
|
//
|
|
@@ -77737,15 +77903,6 @@ protected:
|
|
|
77737
77903
|
} // namespace duckdb
|
|
77738
77904
|
|
|
77739
77905
|
|
|
77740
|
-
|
|
77741
|
-
|
|
77742
|
-
|
|
77743
|
-
|
|
77744
|
-
|
|
77745
|
-
|
|
77746
|
-
|
|
77747
|
-
|
|
77748
|
-
|
|
77749
77906
|
namespace duckdb {
|
|
77750
77907
|
|
|
77751
77908
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSimple &op) {
|
|
@@ -77758,9 +77915,19 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSimple &op
|
|
|
77758
77915
|
case LogicalOperatorType::LOGICAL_TRANSACTION:
|
|
77759
77916
|
return make_unique<PhysicalTransaction>(unique_ptr_cast<ParseInfo, TransactionInfo>(move(op.info)),
|
|
77760
77917
|
op.estimated_cardinality);
|
|
77761
|
-
case LogicalOperatorType::LOGICAL_VACUUM:
|
|
77762
|
-
|
|
77763
|
-
|
|
77918
|
+
case LogicalOperatorType::LOGICAL_VACUUM: {
|
|
77919
|
+
auto &info = (VacuumInfo &)*op.info;
|
|
77920
|
+
if (!info.has_table) {
|
|
77921
|
+
return make_unique<PhysicalVacuum>(unique_ptr_cast<ParseInfo, VacuumInfo>(move(op.info)),
|
|
77922
|
+
op.estimated_cardinality);
|
|
77923
|
+
}
|
|
77924
|
+
info.bound_ref->get->ResolveOperatorTypes();
|
|
77925
|
+
auto child = CreatePlan(*info.bound_ref->get);
|
|
77926
|
+
auto result = make_unique<PhysicalVacuum>(unique_ptr_cast<ParseInfo, VacuumInfo>(move(op.info)),
|
|
77927
|
+
op.estimated_cardinality);
|
|
77928
|
+
result->children.push_back(move(child));
|
|
77929
|
+
return move(result);
|
|
77930
|
+
}
|
|
77764
77931
|
case LogicalOperatorType::LOGICAL_LOAD:
|
|
77765
77932
|
return make_unique<PhysicalLoad>(unique_ptr_cast<ParseInfo, LoadInfo>(move(op.info)), op.estimated_cardinality);
|
|
77766
77933
|
default:
|
|
@@ -137503,7 +137670,7 @@ unique_ptr<LogicalOperator> RegexRangeFilter::Rewrite(unique_ptr<LogicalOperator
|
|
|
137503
137670
|
|
|
137504
137671
|
|
|
137505
137672
|
|
|
137506
|
-
|
|
137673
|
+
|
|
137507
137674
|
|
|
137508
137675
|
namespace duckdb {
|
|
137509
137676
|
|
|
@@ -137728,6 +137895,23 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
|
137728
137895
|
everything_referenced = true;
|
|
137729
137896
|
break;
|
|
137730
137897
|
}
|
|
137898
|
+
case LogicalOperatorType::LOGICAL_VACUUM: {
|
|
137899
|
+
auto &vacuum = (LogicalSimple &)op;
|
|
137900
|
+
auto &info = (VacuumInfo &)*vacuum.info;
|
|
137901
|
+
if (!info.bound_ref) {
|
|
137902
|
+
break;
|
|
137903
|
+
}
|
|
137904
|
+
|
|
137905
|
+
auto &get = (LogicalGet &)*info.bound_ref->get;
|
|
137906
|
+
for (auto &col : info.columns) {
|
|
137907
|
+
for (idx_t col_idx = 0; col_idx < get.names.size(); col_idx++) {
|
|
137908
|
+
if (get.names[col_idx] == col) {
|
|
137909
|
+
get.column_ids.push_back(col_idx);
|
|
137910
|
+
break;
|
|
137911
|
+
}
|
|
137912
|
+
}
|
|
137913
|
+
}
|
|
137914
|
+
}
|
|
137731
137915
|
default:
|
|
137732
137916
|
break;
|
|
137733
137917
|
}
|
|
@@ -140365,60 +140549,6 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundConjun
|
|
|
140365
140549
|
} // namespace duckdb
|
|
140366
140550
|
|
|
140367
140551
|
|
|
140368
|
-
//===----------------------------------------------------------------------===//
|
|
140369
|
-
// DuckDB
|
|
140370
|
-
//
|
|
140371
|
-
// duckdb/storage/statistics/distinct_statistics.hpp
|
|
140372
|
-
//
|
|
140373
|
-
//
|
|
140374
|
-
//===----------------------------------------------------------------------===//
|
|
140375
|
-
|
|
140376
|
-
|
|
140377
|
-
|
|
140378
|
-
|
|
140379
|
-
|
|
140380
|
-
|
|
140381
|
-
|
|
140382
|
-
namespace duckdb {
|
|
140383
|
-
class Serializer;
|
|
140384
|
-
class Deserializer;
|
|
140385
|
-
class Vector;
|
|
140386
|
-
|
|
140387
|
-
class DistinctStatistics : public BaseStatistics {
|
|
140388
|
-
public:
|
|
140389
|
-
DistinctStatistics();
|
|
140390
|
-
explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
|
|
140391
|
-
|
|
140392
|
-
//! The HLL of the table
|
|
140393
|
-
unique_ptr<HyperLogLog> log;
|
|
140394
|
-
//! How many values have been sampled into the HLL
|
|
140395
|
-
atomic<idx_t> sample_count;
|
|
140396
|
-
//! How many values have been inserted (before sampling)
|
|
140397
|
-
atomic<idx_t> total_count;
|
|
140398
|
-
|
|
140399
|
-
public:
|
|
140400
|
-
void Merge(const BaseStatistics &other) override;
|
|
140401
|
-
|
|
140402
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
|
140403
|
-
|
|
140404
|
-
void Serialize(Serializer &serializer) const override;
|
|
140405
|
-
void Serialize(FieldWriter &writer) const override;
|
|
140406
|
-
|
|
140407
|
-
static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
|
|
140408
|
-
static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
|
|
140409
|
-
|
|
140410
|
-
void Update(Vector &update, idx_t count);
|
|
140411
|
-
void Update(VectorData &update_data, const LogicalType &ptype, idx_t count);
|
|
140412
|
-
|
|
140413
|
-
string ToString() const override;
|
|
140414
|
-
idx_t GetCount() const;
|
|
140415
|
-
|
|
140416
|
-
private:
|
|
140417
|
-
//! For distinct statistics we sample the input to speed up insertions
|
|
140418
|
-
static constexpr const double SAMPLE_RATE = 0.1;
|
|
140419
|
-
};
|
|
140420
|
-
|
|
140421
|
-
} // namespace duckdb
|
|
140422
140552
|
|
|
140423
140553
|
|
|
140424
140554
|
|
|
@@ -155180,7 +155310,7 @@ private:
|
|
|
155180
155310
|
//! Transform a Postgres duckdb_libpgquery::T_PGImportStmt node into a PragmaStatement
|
|
155181
155311
|
unique_ptr<PragmaStatement> TransformImport(duckdb_libpgquery::PGNode *node);
|
|
155182
155312
|
unique_ptr<ExplainStatement> TransformExplain(duckdb_libpgquery::PGNode *node);
|
|
155183
|
-
unique_ptr<
|
|
155313
|
+
unique_ptr<SQLStatement> TransformVacuum(duckdb_libpgquery::PGNode *node);
|
|
155184
155314
|
unique_ptr<SQLStatement> TransformShow(duckdb_libpgquery::PGNode *node);
|
|
155185
155315
|
unique_ptr<ShowStatement> TransformShowSelect(duckdb_libpgquery::PGNode *node);
|
|
155186
155316
|
|
|
@@ -157356,12 +157486,12 @@ namespace duckdb {
|
|
|
157356
157486
|
|
|
157357
157487
|
class VacuumStatement : public SQLStatement {
|
|
157358
157488
|
public:
|
|
157359
|
-
VacuumStatement();
|
|
157489
|
+
explicit VacuumStatement(const VacuumOptions &options);
|
|
157360
157490
|
|
|
157361
157491
|
unique_ptr<VacuumInfo> info;
|
|
157362
157492
|
|
|
157363
157493
|
protected:
|
|
157364
|
-
VacuumStatement(const VacuumStatement &other)
|
|
157494
|
+
VacuumStatement(const VacuumStatement &other);
|
|
157365
157495
|
|
|
157366
157496
|
public:
|
|
157367
157497
|
unique_ptr<SQLStatement> Copy() const override;
|
|
@@ -157372,7 +157502,11 @@ public:
|
|
|
157372
157502
|
|
|
157373
157503
|
namespace duckdb {
|
|
157374
157504
|
|
|
157375
|
-
VacuumStatement::VacuumStatement(
|
|
157505
|
+
VacuumStatement::VacuumStatement(const VacuumOptions &options)
|
|
157506
|
+
: SQLStatement(StatementType::VACUUM_STATEMENT), info(make_unique<VacuumInfo>(options)) {
|
|
157507
|
+
}
|
|
157508
|
+
|
|
157509
|
+
VacuumStatement::VacuumStatement(const VacuumStatement &other) : SQLStatement(other), info(other.info->Copy()) {
|
|
157376
157510
|
}
|
|
157377
157511
|
|
|
157378
157512
|
unique_ptr<SQLStatement> VacuumStatement::Copy() const {
|
|
@@ -160976,7 +161110,6 @@ unique_ptr<CreateStatement> Transformer::TransformCreateFunction(duckdb_libpgque
|
|
|
160976
161110
|
auto qname = TransformQualifiedName(stmt->name);
|
|
160977
161111
|
|
|
160978
161112
|
unique_ptr<MacroFunction> macro_func;
|
|
160979
|
-
;
|
|
160980
161113
|
|
|
160981
161114
|
// function can be null here
|
|
160982
161115
|
if (stmt->function) {
|
|
@@ -162343,12 +162476,55 @@ unique_ptr<UpdateStatement> Transformer::TransformUpdate(duckdb_libpgquery::PGNo
|
|
|
162343
162476
|
|
|
162344
162477
|
namespace duckdb {
|
|
162345
162478
|
|
|
162346
|
-
|
|
162479
|
+
VacuumOptions ParseOptions(int options) {
|
|
162480
|
+
VacuumOptions result;
|
|
162481
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_VACUUM) {
|
|
162482
|
+
result.vacuum = true;
|
|
162483
|
+
}
|
|
162484
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_ANALYZE) {
|
|
162485
|
+
result.analyze = true;
|
|
162486
|
+
}
|
|
162487
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_VERBOSE) {
|
|
162488
|
+
throw NotImplementedException("Verbose vacuum option");
|
|
162489
|
+
}
|
|
162490
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_FREEZE) {
|
|
162491
|
+
throw NotImplementedException("Freeze vacuum option");
|
|
162492
|
+
}
|
|
162493
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_FULL) {
|
|
162494
|
+
throw NotImplementedException("Full vacuum option");
|
|
162495
|
+
}
|
|
162496
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_NOWAIT) {
|
|
162497
|
+
throw NotImplementedException("No Wait vacuum option");
|
|
162498
|
+
}
|
|
162499
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_SKIPTOAST) {
|
|
162500
|
+
throw NotImplementedException("Skip Toast vacuum option");
|
|
162501
|
+
}
|
|
162502
|
+
if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_DISABLE_PAGE_SKIPPING) {
|
|
162503
|
+
throw NotImplementedException("Disable Page Skipping vacuum option");
|
|
162504
|
+
}
|
|
162505
|
+
return result;
|
|
162506
|
+
}
|
|
162507
|
+
|
|
162508
|
+
unique_ptr<SQLStatement> Transformer::TransformVacuum(duckdb_libpgquery::PGNode *node) {
|
|
162347
162509
|
auto stmt = reinterpret_cast<duckdb_libpgquery::PGVacuumStmt *>(node);
|
|
162348
162510
|
D_ASSERT(stmt);
|
|
162349
|
-
|
|
162350
|
-
auto result = make_unique<VacuumStatement>();
|
|
162351
|
-
|
|
162511
|
+
|
|
162512
|
+
auto result = make_unique<VacuumStatement>(ParseOptions(stmt->options));
|
|
162513
|
+
|
|
162514
|
+
if (stmt->relation) {
|
|
162515
|
+
result->info->ref = TransformRangeVar(stmt->relation);
|
|
162516
|
+
result->info->has_table = true;
|
|
162517
|
+
}
|
|
162518
|
+
|
|
162519
|
+
if (stmt->va_cols) {
|
|
162520
|
+
D_ASSERT(result->info->has_table);
|
|
162521
|
+
for (auto col_node = stmt->va_cols->head; col_node != nullptr; col_node = col_node->next) {
|
|
162522
|
+
result->info->columns.emplace_back(
|
|
162523
|
+
reinterpret_cast<duckdb_libpgquery::PGValue *>(col_node->data.ptr_value)->val.str);
|
|
162524
|
+
}
|
|
162525
|
+
}
|
|
162526
|
+
|
|
162527
|
+
return move(result);
|
|
162352
162528
|
}
|
|
162353
162529
|
|
|
162354
162530
|
} // namespace duckdb
|
|
@@ -163364,35 +163540,6 @@ protected:
|
|
|
163364
163540
|
|
|
163365
163541
|
|
|
163366
163542
|
|
|
163367
|
-
//===----------------------------------------------------------------------===//
|
|
163368
|
-
// DuckDB
|
|
163369
|
-
//
|
|
163370
|
-
// duckdb/planner/bound_tableref.hpp
|
|
163371
|
-
//
|
|
163372
|
-
//
|
|
163373
|
-
//===----------------------------------------------------------------------===//
|
|
163374
|
-
|
|
163375
|
-
|
|
163376
|
-
|
|
163377
|
-
|
|
163378
|
-
|
|
163379
|
-
|
|
163380
|
-
|
|
163381
|
-
namespace duckdb {
|
|
163382
|
-
|
|
163383
|
-
class BoundTableRef {
|
|
163384
|
-
public:
|
|
163385
|
-
explicit BoundTableRef(TableReferenceType type) : type(type) {
|
|
163386
|
-
}
|
|
163387
|
-
virtual ~BoundTableRef() {
|
|
163388
|
-
}
|
|
163389
|
-
|
|
163390
|
-
//! The type of table reference
|
|
163391
|
-
TableReferenceType type;
|
|
163392
|
-
//! The sample options (if any)
|
|
163393
|
-
unique_ptr<SampleOptions> sample;
|
|
163394
|
-
};
|
|
163395
|
-
} // namespace duckdb
|
|
163396
163543
|
|
|
163397
163544
|
|
|
163398
163545
|
|
|
@@ -167207,33 +167354,6 @@ struct BoundCreateFunctionInfo {
|
|
|
167207
167354
|
|
|
167208
167355
|
|
|
167209
167356
|
|
|
167210
|
-
//===----------------------------------------------------------------------===//
|
|
167211
|
-
// DuckDB
|
|
167212
|
-
//
|
|
167213
|
-
// duckdb/planner/tableref/bound_basetableref.hpp
|
|
167214
|
-
//
|
|
167215
|
-
//
|
|
167216
|
-
//===----------------------------------------------------------------------===//
|
|
167217
|
-
|
|
167218
|
-
|
|
167219
|
-
|
|
167220
|
-
|
|
167221
|
-
|
|
167222
|
-
|
|
167223
|
-
namespace duckdb {
|
|
167224
|
-
class TableCatalogEntry;
|
|
167225
|
-
|
|
167226
|
-
//! Represents a TableReference to a base table in the schema
|
|
167227
|
-
class BoundBaseTableRef : public BoundTableRef {
|
|
167228
|
-
public:
|
|
167229
|
-
BoundBaseTableRef(TableCatalogEntry *table, unique_ptr<LogicalOperator> get)
|
|
167230
|
-
: BoundTableRef(TableReferenceType::BASE_TABLE), table(table), get(move(get)) {
|
|
167231
|
-
}
|
|
167232
|
-
|
|
167233
|
-
TableCatalogEntry *table;
|
|
167234
|
-
unique_ptr<LogicalOperator> get;
|
|
167235
|
-
};
|
|
167236
|
-
} // namespace duckdb
|
|
167237
167357
|
|
|
167238
167358
|
|
|
167239
167359
|
|
|
@@ -169300,10 +169420,27 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
|
|
|
169300
169420
|
|
|
169301
169421
|
|
|
169302
169422
|
|
|
169423
|
+
|
|
169303
169424
|
namespace duckdb {
|
|
169304
169425
|
|
|
169305
169426
|
BoundStatement Binder::Bind(VacuumStatement &stmt) {
|
|
169306
169427
|
BoundStatement result;
|
|
169428
|
+
|
|
169429
|
+
if (stmt.info->has_table) {
|
|
169430
|
+
auto bound_table = Bind(*stmt.info->ref);
|
|
169431
|
+
if (bound_table->type != TableReferenceType::BASE_TABLE) {
|
|
169432
|
+
throw InvalidInputException("Can only vacuum/analyze base tables!");
|
|
169433
|
+
}
|
|
169434
|
+
stmt.info->bound_ref = unique_ptr_cast<BoundTableRef, BoundBaseTableRef>(move(bound_table));
|
|
169435
|
+
|
|
169436
|
+
auto &columns = stmt.info->columns;
|
|
169437
|
+
if (columns.empty()) {
|
|
169438
|
+
// Empty means ALL columns should be vacuumed/analyzed
|
|
169439
|
+
auto &get = (LogicalGet &)*stmt.info->bound_ref->get;
|
|
169440
|
+
columns.insert(columns.end(), get.names.begin(), get.names.end());
|
|
169441
|
+
}
|
|
169442
|
+
}
|
|
169443
|
+
|
|
169307
169444
|
result.names = {"Success"};
|
|
169308
169445
|
result.types = {LogicalType::BOOLEAN};
|
|
169309
169446
|
result.plan = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
|
|
@@ -182698,7 +182835,6 @@ private:
|
|
|
182698
182835
|
|
|
182699
182836
|
|
|
182700
182837
|
|
|
182701
|
-
|
|
182702
182838
|
namespace duckdb {
|
|
182703
182839
|
|
|
182704
182840
|
DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &table,
|
|
@@ -184007,6 +184143,12 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
|
|
|
184007
184143
|
return column_stats[column_id]->stats->Copy();
|
|
184008
184144
|
}
|
|
184009
184145
|
|
|
184146
|
+
void DataTable::SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun) {
|
|
184147
|
+
D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
|
|
184148
|
+
lock_guard<mutex> stats_guard(stats_lock);
|
|
184149
|
+
set_fun(*column_stats[column_id]->stats);
|
|
184150
|
+
}
|
|
184151
|
+
|
|
184010
184152
|
//===--------------------------------------------------------------------===//
|
|
184011
184153
|
// Checkpoint
|
|
184012
184154
|
//===--------------------------------------------------------------------===//
|
|
@@ -185520,7 +185662,7 @@ unique_ptr<BaseStatistics> DistinctStatistics::Copy() const {
|
|
|
185520
185662
|
void DistinctStatistics::Merge(const BaseStatistics &other_p) {
|
|
185521
185663
|
BaseStatistics::Merge(other_p);
|
|
185522
185664
|
auto &other = (const DistinctStatistics &)other_p;
|
|
185523
|
-
log->Merge(*other.log);
|
|
185665
|
+
log = log->Merge(*other.log);
|
|
185524
185666
|
sample_count += other.sample_count;
|
|
185525
185667
|
total_count += other.total_count;
|
|
185526
185668
|
}
|
|
@@ -185550,18 +185692,21 @@ unique_ptr<DistinctStatistics> DistinctStatistics::Deserialize(FieldReader &read
|
|
|
185550
185692
|
return make_unique<DistinctStatistics>(HyperLogLog::Deserialize(reader), sample_count, total_count);
|
|
185551
185693
|
}
|
|
185552
185694
|
|
|
185553
|
-
void DistinctStatistics::Update(Vector &v, idx_t count) {
|
|
185695
|
+
void DistinctStatistics::Update(Vector &v, idx_t count, bool sample) {
|
|
185554
185696
|
VectorData vdata;
|
|
185555
185697
|
v.Orrify(count, vdata);
|
|
185556
|
-
Update(vdata, v.GetType(), count);
|
|
185698
|
+
Update(vdata, v.GetType(), count, sample);
|
|
185557
185699
|
}
|
|
185558
185700
|
|
|
185559
|
-
void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_t count) {
|
|
185701
|
+
void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_t count, bool sample) {
|
|
185560
185702
|
if (count == 0) {
|
|
185561
185703
|
return;
|
|
185562
185704
|
}
|
|
185705
|
+
|
|
185563
185706
|
total_count += count;
|
|
185564
|
-
|
|
185707
|
+
if (sample) {
|
|
185708
|
+
count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
|
|
185709
|
+
}
|
|
185565
185710
|
sample_count += count;
|
|
185566
185711
|
|
|
185567
185712
|
uint64_t indices[STANDARD_VECTOR_SIZE];
|