duckdb 0.4.1-dev464.0 → 0.4.1-dev484.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -61691,7 +61691,7 @@ void PhysicalLimitPercent::GetData(ExecutionContext &context, DataChunk &chunk,
61691
61691
 
61692
61692
  namespace duckdb {
61693
61693
 
61694
- //! PhysicalVacuum represents an etension LOAD operation
61694
+ //! PhysicalLoad represents an extension LOAD operation
61695
61695
  class PhysicalLoad : public PhysicalOperator {
61696
61696
  public:
61697
61697
  explicit PhysicalLoad(unique_ptr<LoadInfo> info, idx_t estimated_cardinality)
@@ -62629,27 +62629,236 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G
62629
62629
 
62630
62630
 
62631
62631
 
62632
+
62632
62633
  namespace duckdb {
62633
62634
 
62634
- //! PhysicalVacuum represents a VACUUM operation (e.g. VACUUM or ANALYZE)
62635
+ //! PhysicalVacuum represents a VACUUM operation (i.e. VACUUM or ANALYZE)
62635
62636
  class PhysicalVacuum : public PhysicalOperator {
62636
62637
  public:
62637
- explicit PhysicalVacuum(unique_ptr<VacuumInfo> info, idx_t estimated_cardinality)
62638
- : PhysicalOperator(PhysicalOperatorType::VACUUM, {LogicalType::BOOLEAN}, estimated_cardinality),
62639
- info(move(info)) {
62640
- }
62638
+ PhysicalVacuum(unique_ptr<VacuumInfo> info, idx_t estimated_cardinality);
62641
62639
 
62642
62640
  unique_ptr<VacuumInfo> info;
62643
62641
 
62642
+ private:
62643
+ unordered_map<idx_t, idx_t> column_id_map;
62644
+
62644
62645
  public:
62646
+ // Source interface
62645
62647
  void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
62646
62648
  LocalSourceState &lstate) const override;
62649
+
62650
+ public:
62651
+ // Sink interface
62652
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
62653
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
62654
+
62655
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
62656
+ DataChunk &input) const override;
62657
+ void Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const override;
62658
+ SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
62659
+ GlobalSinkState &gstate) const override;
62660
+
62661
+ bool IsSink() const override {
62662
+ return info->has_table;
62663
+ }
62664
+
62665
+ bool ParallelSink() const override {
62666
+ return IsSink();
62667
+ }
62647
62668
  };
62648
62669
 
62649
62670
  } // namespace duckdb
62650
62671
 
62651
62672
 
62673
+ //===----------------------------------------------------------------------===//
62674
+ // DuckDB
62675
+ //
62676
+ // duckdb/planner/operator/logical_get.hpp
62677
+ //
62678
+ //
62679
+ //===----------------------------------------------------------------------===//
62680
+
62681
+
62682
+
62683
+
62684
+
62685
+
62686
+
62687
+ namespace duckdb {
62688
+
62689
+ //! LogicalGet represents a scan operation from a data source
62690
+ class LogicalGet : public LogicalOperator {
62691
+ public:
62692
+ LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
62693
+ vector<LogicalType> returned_types, vector<string> returned_names);
62694
+
62695
+ //! The table index in the current bind context
62696
+ idx_t table_index;
62697
+ //! The function that is called
62698
+ TableFunction function;
62699
+ //! The bind data of the function
62700
+ unique_ptr<FunctionData> bind_data;
62701
+ //! The types of ALL columns that can be returned by the table function
62702
+ vector<LogicalType> returned_types;
62703
+ //! The names of ALL columns that can be returned by the table function
62704
+ vector<string> names;
62705
+ //! Bound column IDs
62706
+ vector<column_t> column_ids;
62707
+ //! Filters pushed down for table scan
62708
+ TableFilterSet table_filters;
62709
+
62710
+ string GetName() const override;
62711
+ string ParamsToString() const override;
62712
+ //! Returns the underlying table that is being scanned, or nullptr if there is none
62713
+ TableCatalogEntry *GetTable() const;
62714
+
62715
+ public:
62716
+ vector<ColumnBinding> GetColumnBindings() override;
62717
+
62718
+ idx_t EstimateCardinality(ClientContext &context) override;
62719
+
62720
+ protected:
62721
+ void ResolveTypes() override;
62722
+ };
62723
+ } // namespace duckdb
62724
+
62725
+
62726
+ //===----------------------------------------------------------------------===//
62727
+ // DuckDB
62728
+ //
62729
+ // duckdb/storage/statistics/distinct_statistics.hpp
62730
+ //
62731
+ //
62732
+ //===----------------------------------------------------------------------===//
62733
+
62734
+
62735
+
62736
+
62737
+
62738
+
62739
+
62652
62740
  namespace duckdb {
62741
+ class Serializer;
62742
+ class Deserializer;
62743
+ class Vector;
62744
+
62745
+ class DistinctStatistics : public BaseStatistics {
62746
+ public:
62747
+ DistinctStatistics();
62748
+ explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
62749
+
62750
+ //! The HLL of the table
62751
+ unique_ptr<HyperLogLog> log;
62752
+ //! How many values have been sampled into the HLL
62753
+ atomic<idx_t> sample_count;
62754
+ //! How many values have been inserted (before sampling)
62755
+ atomic<idx_t> total_count;
62756
+
62757
+ public:
62758
+ void Merge(const BaseStatistics &other) override;
62759
+
62760
+ unique_ptr<BaseStatistics> Copy() const override;
62761
+
62762
+ void Serialize(Serializer &serializer) const override;
62763
+ void Serialize(FieldWriter &writer) const override;
62764
+
62765
+ static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
62766
+ static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
62767
+
62768
+ void Update(Vector &update, idx_t count, bool sample = true);
62769
+ void Update(VectorData &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
62770
+
62771
+ string ToString() const override;
62772
+ idx_t GetCount() const;
62773
+
62774
+ private:
62775
+ //! For distinct statistics we sample the input to speed up insertions
62776
+ static constexpr const double SAMPLE_RATE = 0.1;
62777
+ };
62778
+
62779
+ } // namespace duckdb
62780
+
62781
+
62782
+ namespace duckdb {
62783
+
62784
+ PhysicalVacuum::PhysicalVacuum(unique_ptr<VacuumInfo> info_p, idx_t estimated_cardinality)
62785
+ : PhysicalOperator(PhysicalOperatorType::VACUUM, {LogicalType::BOOLEAN}, estimated_cardinality),
62786
+ info(move(info_p)) {
62787
+ if (info->has_table) {
62788
+ auto &get = (LogicalGet &)*info->bound_ref->get;
62789
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
62790
+ column_id_map[i] = get.column_ids[i];
62791
+ }
62792
+ }
62793
+ }
62794
+
62795
+ class VacuumLocalSinkState : public LocalSinkState {
62796
+ public:
62797
+ explicit VacuumLocalSinkState(VacuumInfo &info) {
62798
+ for (idx_t col_idx = 0; col_idx < info.columns.size(); col_idx++) {
62799
+ column_distinct_stats.push_back(make_unique<DistinctStatistics>());
62800
+ }
62801
+ };
62802
+
62803
+ vector<unique_ptr<DistinctStatistics>> column_distinct_stats;
62804
+ };
62805
+
62806
+ unique_ptr<LocalSinkState> PhysicalVacuum::GetLocalSinkState(ExecutionContext &context) const {
62807
+ return make_unique<VacuumLocalSinkState>(*info);
62808
+ }
62809
+
62810
+ class VacuumGlobalSinkState : public GlobalSinkState {
62811
+ public:
62812
+ explicit VacuumGlobalSinkState(VacuumInfo &info) {
62813
+ for (idx_t col_idx = 0; col_idx < info.columns.size(); col_idx++) {
62814
+ column_distinct_stats.push_back(make_unique<DistinctStatistics>());
62815
+ }
62816
+ };
62817
+
62818
+ mutex stats_lock;
62819
+ vector<unique_ptr<DistinctStatistics>> column_distinct_stats;
62820
+ };
62821
+
62822
+ unique_ptr<GlobalSinkState> PhysicalVacuum::GetGlobalSinkState(ClientContext &context) const {
62823
+ return make_unique<VacuumGlobalSinkState>(*info);
62824
+ }
62825
+
62826
+ SinkResultType PhysicalVacuum::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
62827
+ DataChunk &input) const {
62828
+ auto &lstate = (VacuumLocalSinkState &)lstate_p;
62829
+ D_ASSERT(lstate.column_distinct_stats.size() == column_id_map.size());
62830
+
62831
+ for (idx_t col_idx = 0; col_idx < input.data.size(); col_idx++) {
62832
+ lstate.column_distinct_stats[col_idx]->Update(input.data[col_idx], input.size(), false);
62833
+ }
62834
+
62835
+ return SinkResultType::NEED_MORE_INPUT;
62836
+ }
62837
+
62838
+ void PhysicalVacuum::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
62839
+ auto &gstate = (VacuumGlobalSinkState &)gstate_p;
62840
+ auto &lstate = (VacuumLocalSinkState &)lstate_p;
62841
+
62842
+ lock_guard<mutex> lock(gstate.stats_lock);
62843
+ D_ASSERT(gstate.column_distinct_stats.size() == lstate.column_distinct_stats.size());
62844
+ for (idx_t col_idx = 0; col_idx < gstate.column_distinct_stats.size(); col_idx++) {
62845
+ gstate.column_distinct_stats[col_idx]->Merge(*lstate.column_distinct_stats[col_idx]);
62846
+ }
62847
+ }
62848
+
62849
+ SinkFinalizeType PhysicalVacuum::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
62850
+ GlobalSinkState &gstate) const {
62851
+ auto &sink = (VacuumGlobalSinkState &)gstate;
62852
+
62853
+ auto table = info->bound_ref->table;
62854
+ for (idx_t col_idx = 0; col_idx < sink.column_distinct_stats.size(); col_idx++) {
62855
+ table->storage->SetStatistics(column_id_map.at(col_idx), [&](BaseStatistics &stats) {
62856
+ stats.distinct_stats = move(sink.column_distinct_stats[col_idx]);
62857
+ });
62858
+ }
62859
+
62860
+ return SinkFinalizeType::READY;
62861
+ }
62653
62862
 
62654
62863
  void PhysicalVacuum::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
62655
62864
  LocalSourceState &lstate) const {
@@ -76713,57 +76922,6 @@ protected:
76713
76922
 
76714
76923
  } // namespace duckdb
76715
76924
 
76716
- //===----------------------------------------------------------------------===//
76717
- // DuckDB
76718
- //
76719
- // duckdb/planner/operator/logical_get.hpp
76720
- //
76721
- //
76722
- //===----------------------------------------------------------------------===//
76723
-
76724
-
76725
-
76726
-
76727
-
76728
-
76729
-
76730
- namespace duckdb {
76731
-
76732
- //! LogicalGet represents a scan operation from a data source
76733
- class LogicalGet : public LogicalOperator {
76734
- public:
76735
- LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
76736
- vector<LogicalType> returned_types, vector<string> returned_names);
76737
-
76738
- //! The table index in the current bind context
76739
- idx_t table_index;
76740
- //! The function that is called
76741
- TableFunction function;
76742
- //! The bind data of the function
76743
- unique_ptr<FunctionData> bind_data;
76744
- //! The types of ALL columns that can be returned by the table function
76745
- vector<LogicalType> returned_types;
76746
- //! The names of ALL columns that can be returned by the table function
76747
- vector<string> names;
76748
- //! Bound column IDs
76749
- vector<column_t> column_ids;
76750
- //! Filters pushed down for table scan
76751
- TableFilterSet table_filters;
76752
-
76753
- string GetName() const override;
76754
- string ParamsToString() const override;
76755
- //! Returns the underlying table that is being scanned, or nullptr if there is none
76756
- TableCatalogEntry *GetTable() const;
76757
-
76758
- public:
76759
- vector<ColumnBinding> GetColumnBindings() override;
76760
-
76761
- idx_t EstimateCardinality(ClientContext &context) override;
76762
-
76763
- protected:
76764
- void ResolveTypes() override;
76765
- };
76766
- } // namespace duckdb
76767
76925
 
76768
76926
 
76769
76927
  namespace duckdb {
@@ -77705,6 +77863,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalShow &op)
77705
77863
  } // namespace duckdb
77706
77864
 
77707
77865
 
77866
+
77867
+
77868
+
77869
+
77870
+
77871
+
77872
+
77873
+
77708
77874
  //===----------------------------------------------------------------------===//
77709
77875
  // DuckDB
77710
77876
  //
@@ -77737,15 +77903,6 @@ protected:
77737
77903
  } // namespace duckdb
77738
77904
 
77739
77905
 
77740
-
77741
-
77742
-
77743
-
77744
-
77745
-
77746
-
77747
-
77748
-
77749
77906
  namespace duckdb {
77750
77907
 
77751
77908
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSimple &op) {
@@ -77758,9 +77915,19 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSimple &op
77758
77915
  case LogicalOperatorType::LOGICAL_TRANSACTION:
77759
77916
  return make_unique<PhysicalTransaction>(unique_ptr_cast<ParseInfo, TransactionInfo>(move(op.info)),
77760
77917
  op.estimated_cardinality);
77761
- case LogicalOperatorType::LOGICAL_VACUUM:
77762
- return make_unique<PhysicalVacuum>(unique_ptr_cast<ParseInfo, VacuumInfo>(move(op.info)),
77763
- op.estimated_cardinality);
77918
+ case LogicalOperatorType::LOGICAL_VACUUM: {
77919
+ auto &info = (VacuumInfo &)*op.info;
77920
+ if (!info.has_table) {
77921
+ return make_unique<PhysicalVacuum>(unique_ptr_cast<ParseInfo, VacuumInfo>(move(op.info)),
77922
+ op.estimated_cardinality);
77923
+ }
77924
+ info.bound_ref->get->ResolveOperatorTypes();
77925
+ auto child = CreatePlan(*info.bound_ref->get);
77926
+ auto result = make_unique<PhysicalVacuum>(unique_ptr_cast<ParseInfo, VacuumInfo>(move(op.info)),
77927
+ op.estimated_cardinality);
77928
+ result->children.push_back(move(child));
77929
+ return move(result);
77930
+ }
77764
77931
  case LogicalOperatorType::LOGICAL_LOAD:
77765
77932
  return make_unique<PhysicalLoad>(unique_ptr_cast<ParseInfo, LoadInfo>(move(op.info)), op.estimated_cardinality);
77766
77933
  default:
@@ -137503,7 +137670,7 @@ unique_ptr<LogicalOperator> RegexRangeFilter::Rewrite(unique_ptr<LogicalOperator
137503
137670
 
137504
137671
 
137505
137672
 
137506
- #include <map>
137673
+
137507
137674
 
137508
137675
  namespace duckdb {
137509
137676
 
@@ -137728,6 +137895,23 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
137728
137895
  everything_referenced = true;
137729
137896
  break;
137730
137897
  }
137898
+ case LogicalOperatorType::LOGICAL_VACUUM: {
137899
+ auto &vacuum = (LogicalSimple &)op;
137900
+ auto &info = (VacuumInfo &)*vacuum.info;
137901
+ if (!info.bound_ref) {
137902
+ break;
137903
+ }
137904
+
137905
+ auto &get = (LogicalGet &)*info.bound_ref->get;
137906
+ for (auto &col : info.columns) {
137907
+ for (idx_t col_idx = 0; col_idx < get.names.size(); col_idx++) {
137908
+ if (get.names[col_idx] == col) {
137909
+ get.column_ids.push_back(col_idx);
137910
+ break;
137911
+ }
137912
+ }
137913
+ }
137914
+ }
137731
137915
  default:
137732
137916
  break;
137733
137917
  }
@@ -140365,60 +140549,6 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundConjun
140365
140549
  } // namespace duckdb
140366
140550
 
140367
140551
 
140368
- //===----------------------------------------------------------------------===//
140369
- // DuckDB
140370
- //
140371
- // duckdb/storage/statistics/distinct_statistics.hpp
140372
- //
140373
- //
140374
- //===----------------------------------------------------------------------===//
140375
-
140376
-
140377
-
140378
-
140379
-
140380
-
140381
-
140382
- namespace duckdb {
140383
- class Serializer;
140384
- class Deserializer;
140385
- class Vector;
140386
-
140387
- class DistinctStatistics : public BaseStatistics {
140388
- public:
140389
- DistinctStatistics();
140390
- explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
140391
-
140392
- //! The HLL of the table
140393
- unique_ptr<HyperLogLog> log;
140394
- //! How many values have been sampled into the HLL
140395
- atomic<idx_t> sample_count;
140396
- //! How many values have been inserted (before sampling)
140397
- atomic<idx_t> total_count;
140398
-
140399
- public:
140400
- void Merge(const BaseStatistics &other) override;
140401
-
140402
- unique_ptr<BaseStatistics> Copy() const override;
140403
-
140404
- void Serialize(Serializer &serializer) const override;
140405
- void Serialize(FieldWriter &writer) const override;
140406
-
140407
- static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
140408
- static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
140409
-
140410
- void Update(Vector &update, idx_t count);
140411
- void Update(VectorData &update_data, const LogicalType &ptype, idx_t count);
140412
-
140413
- string ToString() const override;
140414
- idx_t GetCount() const;
140415
-
140416
- private:
140417
- //! For distinct statistics we sample the input to speed up insertions
140418
- static constexpr const double SAMPLE_RATE = 0.1;
140419
- };
140420
-
140421
- } // namespace duckdb
140422
140552
 
140423
140553
 
140424
140554
 
@@ -155180,7 +155310,7 @@ private:
155180
155310
  //! Transform a Postgres duckdb_libpgquery::T_PGImportStmt node into a PragmaStatement
155181
155311
  unique_ptr<PragmaStatement> TransformImport(duckdb_libpgquery::PGNode *node);
155182
155312
  unique_ptr<ExplainStatement> TransformExplain(duckdb_libpgquery::PGNode *node);
155183
- unique_ptr<VacuumStatement> TransformVacuum(duckdb_libpgquery::PGNode *node);
155313
+ unique_ptr<SQLStatement> TransformVacuum(duckdb_libpgquery::PGNode *node);
155184
155314
  unique_ptr<SQLStatement> TransformShow(duckdb_libpgquery::PGNode *node);
155185
155315
  unique_ptr<ShowStatement> TransformShowSelect(duckdb_libpgquery::PGNode *node);
155186
155316
 
@@ -157356,12 +157486,12 @@ namespace duckdb {
157356
157486
 
157357
157487
  class VacuumStatement : public SQLStatement {
157358
157488
  public:
157359
- VacuumStatement();
157489
+ explicit VacuumStatement(const VacuumOptions &options);
157360
157490
 
157361
157491
  unique_ptr<VacuumInfo> info;
157362
157492
 
157363
157493
  protected:
157364
- VacuumStatement(const VacuumStatement &other) : SQLStatement(other) {};
157494
+ VacuumStatement(const VacuumStatement &other);
157365
157495
 
157366
157496
  public:
157367
157497
  unique_ptr<SQLStatement> Copy() const override;
@@ -157372,7 +157502,11 @@ public:
157372
157502
 
157373
157503
  namespace duckdb {
157374
157504
 
157375
- VacuumStatement::VacuumStatement() : SQLStatement(StatementType::VACUUM_STATEMENT) {
157505
+ VacuumStatement::VacuumStatement(const VacuumOptions &options)
157506
+ : SQLStatement(StatementType::VACUUM_STATEMENT), info(make_unique<VacuumInfo>(options)) {
157507
+ }
157508
+
157509
+ VacuumStatement::VacuumStatement(const VacuumStatement &other) : SQLStatement(other), info(other.info->Copy()) {
157376
157510
  }
157377
157511
 
157378
157512
  unique_ptr<SQLStatement> VacuumStatement::Copy() const {
@@ -160976,7 +161110,6 @@ unique_ptr<CreateStatement> Transformer::TransformCreateFunction(duckdb_libpgque
160976
161110
  auto qname = TransformQualifiedName(stmt->name);
160977
161111
 
160978
161112
  unique_ptr<MacroFunction> macro_func;
160979
- ;
160980
161113
 
160981
161114
  // function can be null here
160982
161115
  if (stmt->function) {
@@ -162343,12 +162476,55 @@ unique_ptr<UpdateStatement> Transformer::TransformUpdate(duckdb_libpgquery::PGNo
162343
162476
 
162344
162477
  namespace duckdb {
162345
162478
 
162346
- unique_ptr<VacuumStatement> Transformer::TransformVacuum(duckdb_libpgquery::PGNode *node) {
162479
+ VacuumOptions ParseOptions(int options) {
162480
+ VacuumOptions result;
162481
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_VACUUM) {
162482
+ result.vacuum = true;
162483
+ }
162484
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_ANALYZE) {
162485
+ result.analyze = true;
162486
+ }
162487
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_VERBOSE) {
162488
+ throw NotImplementedException("Verbose vacuum option");
162489
+ }
162490
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_FREEZE) {
162491
+ throw NotImplementedException("Freeze vacuum option");
162492
+ }
162493
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_FULL) {
162494
+ throw NotImplementedException("Full vacuum option");
162495
+ }
162496
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_NOWAIT) {
162497
+ throw NotImplementedException("No Wait vacuum option");
162498
+ }
162499
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_SKIPTOAST) {
162500
+ throw NotImplementedException("Skip Toast vacuum option");
162501
+ }
162502
+ if (options & duckdb_libpgquery::PGVacuumOption::PG_VACOPT_DISABLE_PAGE_SKIPPING) {
162503
+ throw NotImplementedException("Disable Page Skipping vacuum option");
162504
+ }
162505
+ return result;
162506
+ }
162507
+
162508
+ unique_ptr<SQLStatement> Transformer::TransformVacuum(duckdb_libpgquery::PGNode *node) {
162347
162509
  auto stmt = reinterpret_cast<duckdb_libpgquery::PGVacuumStmt *>(node);
162348
162510
  D_ASSERT(stmt);
162349
- (void)stmt;
162350
- auto result = make_unique<VacuumStatement>();
162351
- return result;
162511
+
162512
+ auto result = make_unique<VacuumStatement>(ParseOptions(stmt->options));
162513
+
162514
+ if (stmt->relation) {
162515
+ result->info->ref = TransformRangeVar(stmt->relation);
162516
+ result->info->has_table = true;
162517
+ }
162518
+
162519
+ if (stmt->va_cols) {
162520
+ D_ASSERT(result->info->has_table);
162521
+ for (auto col_node = stmt->va_cols->head; col_node != nullptr; col_node = col_node->next) {
162522
+ result->info->columns.emplace_back(
162523
+ reinterpret_cast<duckdb_libpgquery::PGValue *>(col_node->data.ptr_value)->val.str);
162524
+ }
162525
+ }
162526
+
162527
+ return move(result);
162352
162528
  }
162353
162529
 
162354
162530
  } // namespace duckdb
@@ -163364,35 +163540,6 @@ protected:
163364
163540
 
163365
163541
 
163366
163542
 
163367
- //===----------------------------------------------------------------------===//
163368
- // DuckDB
163369
- //
163370
- // duckdb/planner/bound_tableref.hpp
163371
- //
163372
- //
163373
- //===----------------------------------------------------------------------===//
163374
-
163375
-
163376
-
163377
-
163378
-
163379
-
163380
-
163381
- namespace duckdb {
163382
-
163383
- class BoundTableRef {
163384
- public:
163385
- explicit BoundTableRef(TableReferenceType type) : type(type) {
163386
- }
163387
- virtual ~BoundTableRef() {
163388
- }
163389
-
163390
- //! The type of table reference
163391
- TableReferenceType type;
163392
- //! The sample options (if any)
163393
- unique_ptr<SampleOptions> sample;
163394
- };
163395
- } // namespace duckdb
163396
163543
 
163397
163544
 
163398
163545
 
@@ -167207,33 +167354,6 @@ struct BoundCreateFunctionInfo {
167207
167354
 
167208
167355
 
167209
167356
 
167210
- //===----------------------------------------------------------------------===//
167211
- // DuckDB
167212
- //
167213
- // duckdb/planner/tableref/bound_basetableref.hpp
167214
- //
167215
- //
167216
- //===----------------------------------------------------------------------===//
167217
-
167218
-
167219
-
167220
-
167221
-
167222
-
167223
- namespace duckdb {
167224
- class TableCatalogEntry;
167225
-
167226
- //! Represents a TableReference to a base table in the schema
167227
- class BoundBaseTableRef : public BoundTableRef {
167228
- public:
167229
- BoundBaseTableRef(TableCatalogEntry *table, unique_ptr<LogicalOperator> get)
167230
- : BoundTableRef(TableReferenceType::BASE_TABLE), table(table), get(move(get)) {
167231
- }
167232
-
167233
- TableCatalogEntry *table;
167234
- unique_ptr<LogicalOperator> get;
167235
- };
167236
- } // namespace duckdb
167237
167357
 
167238
167358
 
167239
167359
 
@@ -169300,10 +169420,27 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
169300
169420
 
169301
169421
 
169302
169422
 
169423
+
169303
169424
  namespace duckdb {
169304
169425
 
169305
169426
  BoundStatement Binder::Bind(VacuumStatement &stmt) {
169306
169427
  BoundStatement result;
169428
+
169429
+ if (stmt.info->has_table) {
169430
+ auto bound_table = Bind(*stmt.info->ref);
169431
+ if (bound_table->type != TableReferenceType::BASE_TABLE) {
169432
+ throw InvalidInputException("Can only vacuum/analyze base tables!");
169433
+ }
169434
+ stmt.info->bound_ref = unique_ptr_cast<BoundTableRef, BoundBaseTableRef>(move(bound_table));
169435
+
169436
+ auto &columns = stmt.info->columns;
169437
+ if (columns.empty()) {
169438
+ // Empty means ALL columns should be vacuumed/analyzed
169439
+ auto &get = (LogicalGet &)*stmt.info->bound_ref->get;
169440
+ columns.insert(columns.end(), get.names.begin(), get.names.end());
169441
+ }
169442
+ }
169443
+
169307
169444
  result.names = {"Success"};
169308
169445
  result.types = {LogicalType::BOOLEAN};
169309
169446
  result.plan = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
@@ -182698,7 +182835,6 @@ private:
182698
182835
 
182699
182836
 
182700
182837
 
182701
-
182702
182838
  namespace duckdb {
182703
182839
 
182704
182840
  DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &table,
@@ -184007,6 +184143,12 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
184007
184143
  return column_stats[column_id]->stats->Copy();
184008
184144
  }
184009
184145
 
184146
+ void DataTable::SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun) {
184147
+ D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
184148
+ lock_guard<mutex> stats_guard(stats_lock);
184149
+ set_fun(*column_stats[column_id]->stats);
184150
+ }
184151
+
184010
184152
  //===--------------------------------------------------------------------===//
184011
184153
  // Checkpoint
184012
184154
  //===--------------------------------------------------------------------===//
@@ -185520,7 +185662,7 @@ unique_ptr<BaseStatistics> DistinctStatistics::Copy() const {
185520
185662
  void DistinctStatistics::Merge(const BaseStatistics &other_p) {
185521
185663
  BaseStatistics::Merge(other_p);
185522
185664
  auto &other = (const DistinctStatistics &)other_p;
185523
- log->Merge(*other.log);
185665
+ log = log->Merge(*other.log);
185524
185666
  sample_count += other.sample_count;
185525
185667
  total_count += other.total_count;
185526
185668
  }
@@ -185550,18 +185692,21 @@ unique_ptr<DistinctStatistics> DistinctStatistics::Deserialize(FieldReader &read
185550
185692
  return make_unique<DistinctStatistics>(HyperLogLog::Deserialize(reader), sample_count, total_count);
185551
185693
  }
185552
185694
 
185553
- void DistinctStatistics::Update(Vector &v, idx_t count) {
185695
+ void DistinctStatistics::Update(Vector &v, idx_t count, bool sample) {
185554
185696
  VectorData vdata;
185555
185697
  v.Orrify(count, vdata);
185556
- Update(vdata, v.GetType(), count);
185698
+ Update(vdata, v.GetType(), count, sample);
185557
185699
  }
185558
185700
 
185559
- void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_t count) {
185701
+ void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_t count, bool sample) {
185560
185702
  if (count == 0) {
185561
185703
  return;
185562
185704
  }
185705
+
185563
185706
  total_count += count;
185564
- count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
185707
+ if (sample) {
185708
+ count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
185709
+ }
185565
185710
  sample_count += count;
185566
185711
 
185567
185712
  uint64_t indices[STANDARD_VECTOR_SIZE];