duckdb 0.5.1-dev21.0 → 0.5.1-dev216.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
632
+
633
+
623
634
 
635
+ namespace duckdb {
636
+
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -3667,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
3667
3763
  return count;
3668
3764
  }
3669
3765
 
3766
+ unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
3767
+ if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
3768
+ return nullptr;
3769
+ }
3770
+ if (column_id >= columns.size()) {
3771
+ throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
3772
+ }
3773
+ if (columns[column_id].Generated()) {
3774
+ return nullptr;
3775
+ }
3776
+ return storage->GetStatistics(context, columns[column_id].StorageOid());
3777
+ }
3778
+
3670
3779
  unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
3671
3780
  D_ASSERT(!internal);
3672
3781
  if (info->type != AlterType::ALTER_TABLE) {
@@ -3942,7 +4051,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
3942
4051
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3943
4052
  storage);
3944
4053
  }
3945
- auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
4054
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
3946
4055
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3947
4056
  new_storage);
3948
4057
  }
@@ -3956,7 +4065,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, S
3956
4065
  auto copy = columns[i].Copy();
3957
4066
  if (default_idx == i) {
3958
4067
  // set the default value of this column
3959
- D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
4068
+ if (copy.Generated()) {
4069
+ throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
4070
+ }
3960
4071
  copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
3961
4072
  }
3962
4073
  create_info->columns.push_back(move(copy));
@@ -3981,6 +4092,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
3981
4092
  }
3982
4093
 
3983
4094
  idx_t not_null_idx = GetColumnIndex(info.column_name);
4095
+ if (columns[not_null_idx].Generated()) {
4096
+ throw BinderException("Unsupported constraint for generated column!");
4097
+ }
3984
4098
  bool has_not_null = false;
3985
4099
  for (idx_t i = 0; i < constraints.size(); i++) {
3986
4100
  auto constraint = constraints[i]->Copy();
@@ -4004,8 +4118,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
4004
4118
  storage);
4005
4119
  }
4006
4120
 
4007
- // Return with new storage info
4008
- auto new_storage = make_shared<DataTable>(context, *storage, make_unique<NotNullConstraint>(not_null_idx));
4121
+ // Return with new storage info. Note that we need the bound column index here.
4122
+ auto new_storage = make_shared<DataTable>(context, *storage,
4123
+ make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
4009
4124
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4010
4125
  new_storage);
4011
4126
  }
@@ -4111,12 +4226,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
4111
4226
  auto expression = info.expression->Copy();
4112
4227
  auto bound_expression = expr_binder.Bind(expression);
4113
4228
  auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
4229
+ vector<column_t> storage_oids;
4114
4230
  if (bound_columns.empty()) {
4115
- bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID);
4231
+ storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
4232
+ }
4233
+ // transform to storage_oid
4234
+ else {
4235
+ for (idx_t i = 0; i < bound_columns.size(); i++) {
4236
+ storage_oids.push_back(columns[bound_columns[i]].StorageOid());
4237
+ }
4116
4238
  }
4117
4239
 
4118
- auto new_storage =
4119
- make_shared<DataTable>(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression);
4240
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
4241
+ move(storage_oids), *bound_expression);
4120
4242
  auto result =
4121
4243
  make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
4122
4244
  return move(result);
@@ -4364,7 +4486,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
4364
4486
  }
4365
4487
  }
4366
4488
  D_ASSERT(removed_index != DConstants::INVALID_INDEX);
4367
- storage->CommitDropColumn(removed_index);
4489
+ storage->CommitDropColumn(columns[removed_index].StorageOid());
4368
4490
  }
4369
4491
 
4370
4492
  void TableCatalogEntry::CommitDrop() {
@@ -4934,12 +5056,16 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
4934
5056
  throw CatalogException(rename_err_msg, original_name, value->name);
4935
5057
  }
4936
5058
  }
4937
- PutMapping(context, value->name, entry_index);
4938
- DeleteMapping(context, original_name);
4939
5059
  }
4940
5060
  //! Check the dependency manager to verify that there are no conflicting dependencies with this alter
4941
5061
  catalog.dependency_manager->AlterObject(context, entry, value.get());
4942
5062
 
5063
+ if (value->name != original_name) {
5064
+ // Do PutMapping and DeleteMapping after dependency check
5065
+ PutMapping(context, value->name, entry_index);
5066
+ DeleteMapping(context, original_name);
5067
+ }
5068
+
4943
5069
  value->timestamp = transaction.transaction_id;
4944
5070
  value->child = move(entries[entry_index]);
4945
5071
  value->child->parent = value.get();
@@ -6506,7 +6632,7 @@ static void GetBitPosition(idx_t row_idx, idx_t &current_byte, uint8_t &current_
6506
6632
  }
6507
6633
 
6508
6634
  static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
6509
- data[current_byte] &= ~(1 << current_bit);
6635
+ data[current_byte] &= ~((uint64_t)1 << current_bit);
6510
6636
  }
6511
6637
 
6512
6638
  static void NextBit(idx_t &current_byte, uint8_t &current_bit) {
@@ -16715,9 +16841,15 @@ string FileSystem::ConvertSeparators(const string &path) {
16715
16841
  }
16716
16842
 
16717
16843
  string FileSystem::ExtractBaseName(const string &path) {
16844
+ if (path.empty()) {
16845
+ return string();
16846
+ }
16718
16847
  auto normalized_path = ConvertSeparators(path);
16719
16848
  auto sep = PathSeparator();
16720
- auto vec = StringUtil::Split(StringUtil::Split(normalized_path, sep).back(), ".");
16849
+ auto splits = StringUtil::Split(normalized_path, sep);
16850
+ D_ASSERT(!splits.empty());
16851
+ auto vec = StringUtil::Split(splits.back(), ".");
16852
+ D_ASSERT(!vec.empty());
16721
16853
  return vec[0];
16722
16854
  }
16723
16855
 
@@ -19102,6 +19234,8 @@ private:
19102
19234
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
19103
19235
  void SetFilePointer(FileHandle &handle, idx_t location);
19104
19236
  idx_t GetFilePointer(FileHandle &handle);
19237
+
19238
+ vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
19105
19239
  };
19106
19240
 
19107
19241
  } // namespace duckdb
@@ -19983,6 +20117,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
19983
20117
  });
19984
20118
  }
19985
20119
 
20120
+ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
20121
+ vector<string> result;
20122
+ if (FileExists(path) || IsPipe(path)) {
20123
+ result.push_back(path);
20124
+ } else if (!absolute_path) {
20125
+ Value value;
20126
+ if (opener->TryGetCurrentSetting("file_search_path", value)) {
20127
+ auto search_paths_str = value.ToString();
20128
+ std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20129
+ for (const auto &search_path : search_paths) {
20130
+ auto joined_path = JoinPath(search_path, path);
20131
+ if (FileExists(joined_path) || IsPipe(joined_path)) {
20132
+ result.push_back(joined_path);
20133
+ }
20134
+ }
20135
+ }
20136
+ }
20137
+ return result;
20138
+ }
20139
+
19986
20140
  vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
19987
20141
  if (path.empty()) {
19988
20142
  return vector<string>();
@@ -20029,23 +20183,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20029
20183
  // Check if the path has a glob at all
20030
20184
  if (!HasGlob(path)) {
20031
20185
  // no glob: return only the file (if it exists or is a pipe)
20032
- vector<string> result;
20033
- if (FileExists(path) || IsPipe(path)) {
20034
- result.push_back(path);
20035
- } else if (!absolute_path) {
20036
- Value value;
20037
- if (opener->TryGetCurrentSetting("file_search_path", value)) {
20038
- auto search_paths_str = value.ToString();
20039
- std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20040
- for (const auto &search_path : search_paths) {
20041
- auto joined_path = JoinPath(search_path, path);
20042
- if (FileExists(joined_path) || IsPipe(joined_path)) {
20043
- result.push_back(joined_path);
20044
- }
20045
- }
20046
- }
20047
- }
20048
- return result;
20186
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20049
20187
  }
20050
20188
  vector<string> previous_directories;
20051
20189
  if (absolute_path) {
@@ -20079,7 +20217,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20079
20217
  }
20080
20218
  }
20081
20219
  }
20082
- if (is_last_chunk || result.empty()) {
20220
+ if (result.empty()) {
20221
+ // no result found that matches the glob
20222
+ // last ditch effort: search the path as a string literal
20223
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20224
+ }
20225
+ if (is_last_chunk) {
20083
20226
  return result;
20084
20227
  }
20085
20228
  previous_directories = move(result);
@@ -22428,14 +22571,16 @@ struct IntervalToStringCast {
22428
22571
  if (micros < 0) {
22429
22572
  // negative time: append negative sign
22430
22573
  buffer[length++] = '-';
22574
+ } else {
22431
22575
  micros = -micros;
22432
22576
  }
22433
- int64_t hour = micros / Interval::MICROS_PER_HOUR;
22434
- micros -= hour * Interval::MICROS_PER_HOUR;
22435
- int64_t min = micros / Interval::MICROS_PER_MINUTE;
22436
- micros -= min * Interval::MICROS_PER_MINUTE;
22437
- int64_t sec = micros / Interval::MICROS_PER_SEC;
22438
- micros -= sec * Interval::MICROS_PER_SEC;
22577
+ int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
22578
+ micros += hour * Interval::MICROS_PER_HOUR;
22579
+ int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
22580
+ micros += min * Interval::MICROS_PER_MINUTE;
22581
+ int64_t sec = -(micros / Interval::MICROS_PER_SEC);
22582
+ micros += sec * Interval::MICROS_PER_SEC;
22583
+ micros = -micros;
22439
22584
 
22440
22585
  if (hour < 10) {
22441
22586
  buffer[length++] = '0';
@@ -28558,7 +28703,7 @@ template <idx_t radix_bits>
28558
28703
  struct RadixPartitioningConstants {
28559
28704
  public:
28560
28705
  static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
28561
- static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
28706
+ static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
28562
28707
  static constexpr const idx_t TMP_BUF_SIZE = 8;
28563
28708
 
28564
28709
  public:
@@ -28576,7 +28721,7 @@ private:
28576
28721
  struct RadixPartitioning {
28577
28722
  public:
28578
28723
  static idx_t NumberOfPartitions(idx_t radix_bits) {
28579
- return 1 << radix_bits;
28724
+ return (idx_t)1 << radix_bits;
28580
28725
  }
28581
28726
 
28582
28727
  //! Partition the data in block_collection/string_heap to multiple partitions
@@ -33336,6 +33481,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33481
 
33337
33482
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33483
  #ifdef DEBUG
33484
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33485
+ return;
33486
+ }
33339
33487
  idx_t entry_idx;
33340
33488
  idx_t idx_in_entry;
33341
33489
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35821,10 @@ struct SortConstants {
35673
35821
 
35674
35822
  struct SortLayout {
35675
35823
  public:
35824
+ SortLayout() {
35825
+ }
35676
35826
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35827
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35828
 
35678
35829
  public:
35679
35830
  idx_t column_count;
@@ -37324,6 +37475,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37475
  blob_layout.Initialize(blob_layout_types);
37325
37476
  }
37326
37477
 
37478
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37479
+ SortLayout result;
37480
+ result.column_count = num_prefix_cols;
37481
+ result.all_constant = true;
37482
+ result.comparison_size = 0;
37483
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37484
+ result.order_types.push_back(order_types[col_idx]);
37485
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37486
+ result.logical_types.push_back(logical_types[col_idx]);
37487
+
37488
+ result.all_constant = result.all_constant && constant_size[col_idx];
37489
+ result.constant_size.push_back(constant_size[col_idx]);
37490
+
37491
+ result.comparison_size += column_sizes[col_idx];
37492
+ result.column_sizes.push_back(column_sizes[col_idx]);
37493
+
37494
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37495
+ result.stats.push_back(stats[col_idx]);
37496
+ result.has_null.push_back(has_null[col_idx]);
37497
+ }
37498
+ result.entry_size = entry_size;
37499
+ result.blob_layout = blob_layout;
37500
+ result.sorting_to_blob_col = sorting_to_blob_col;
37501
+ return result;
37502
+ }
37503
+
37327
37504
  LocalSortState::LocalSortState() : initialized(false) {
37328
37505
  }
37329
37506
 
@@ -47573,11 +47750,36 @@ Value Value::CreateValue(dtime_t value) {
47573
47750
  return Value::TIME(value);
47574
47751
  }
47575
47752
 
47753
+ template <>
47754
+ Value Value::CreateValue(dtime_tz_t value) {
47755
+ return Value::TIMETZ(value);
47756
+ }
47757
+
47576
47758
  template <>
47577
47759
  Value Value::CreateValue(timestamp_t value) {
47578
47760
  return Value::TIMESTAMP(value);
47579
47761
  }
47580
47762
 
47763
+ template <>
47764
+ Value Value::CreateValue(timestamp_sec_t value) {
47765
+ return Value::TIMESTAMPSEC(value);
47766
+ }
47767
+
47768
+ template <>
47769
+ Value Value::CreateValue(timestamp_ms_t value) {
47770
+ return Value::TIMESTAMPMS(value);
47771
+ }
47772
+
47773
+ template <>
47774
+ Value Value::CreateValue(timestamp_ns_t value) {
47775
+ return Value::TIMESTAMPNS(value);
47776
+ }
47777
+
47778
+ template <>
47779
+ Value Value::CreateValue(timestamp_tz_t value) {
47780
+ return Value::TIMESTAMPTZ(value);
47781
+ }
47782
+
47581
47783
  template <>
47582
47784
  Value Value::CreateValue(const char *value) {
47583
47785
  return Value(string(value));
@@ -49150,19 +49352,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
49150
49352
  }
49151
49353
  }
49152
49354
 
49153
- // FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
49154
- // just comparing internal type is not always enough
49155
- static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
49156
- if (incoming.InternalType() != target.InternalType()) {
49157
- return true;
49158
- }
49159
- if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
49160
- //! Compare the type_info
49161
- return incoming != target;
49162
- }
49163
- return false;
49164
- }
49165
-
49166
49355
  void Vector::SetValue(idx_t index, const Value &val) {
49167
49356
  if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49168
49357
  // dictionary: apply dictionary and forward to child
@@ -49170,10 +49359,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
49170
49359
  auto &child = DictionaryVector::Child(*this);
49171
49360
  return child.SetValue(sel_vector.get_index(index), val);
49172
49361
  }
49173
- if (ValueShouldBeCast(val.type(), GetType())) {
49362
+ if (val.type() != GetType()) {
49174
49363
  SetValue(index, val.CastAs(GetType()));
49175
49364
  return;
49176
49365
  }
49366
+ D_ASSERT(val.type().InternalType() == GetType().InternalType());
49177
49367
 
49178
49368
  validity.EnsureWritable();
49179
49369
  validity.Set(index, !val.IsNull());
@@ -49424,7 +49614,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
49424
49614
  auto value = GetValueInternal(v_p, index_p);
49425
49615
  // set the alias of the type to the correct value, if there is a type alias
49426
49616
  if (v_p.GetType().HasAlias()) {
49427
- value.type().SetAlias(v_p.GetType().GetAlias());
49617
+ value.type().CopyAuxInfo(v_p.GetType());
49618
+ }
49619
+ if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
49620
+ D_ASSERT(v_p.GetType() == value.type());
49428
49621
  }
49429
49622
  return value;
49430
49623
  }
@@ -51491,6 +51684,7 @@ public:
51491
51684
  if (!alias.empty()) {
51492
51685
  return false;
51493
51686
  }
51687
+ //! We only need to compare aliases when both types have them in this case
51494
51688
  return true;
51495
51689
  }
51496
51690
  if (alias != other_p->alias) {
@@ -51504,8 +51698,7 @@ public:
51504
51698
  if (type != other_p->type) {
51505
51699
  return false;
51506
51700
  }
51507
- auto &other = (ExtraTypeInfo &)*other_p;
51508
- return alias == other.alias && EqualsInternal(other_p);
51701
+ return alias == other_p->alias && EqualsInternal(other_p);
51509
51702
  }
51510
51703
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
51511
51704
  virtual void Serialize(FieldWriter &writer) const {};
@@ -52184,10 +52377,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
52184
52377
  return LogicalType(id, move(info));
52185
52378
  }
52186
52379
 
52187
- bool LogicalType::operator==(const LogicalType &rhs) const {
52188
- if (id_ != rhs.id_) {
52189
- return false;
52190
- }
52380
+ bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
52191
52381
  if (type_info_.get() == rhs.type_info_.get()) {
52192
52382
  return true;
52193
52383
  }
@@ -52199,6 +52389,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
52199
52389
  }
52200
52390
  }
52201
52391
 
52392
+ bool LogicalType::operator==(const LogicalType &rhs) const {
52393
+ if (id_ != rhs.id_) {
52394
+ return false;
52395
+ }
52396
+ return EqualTypeInfo(rhs);
52397
+ }
52398
+
52202
52399
  } // namespace duckdb
52203
52400
 
52204
52401
 
@@ -63069,6 +63266,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
63069
63266
 
63070
63267
 
63071
63268
 
63269
+ //===----------------------------------------------------------------------===//
63270
+ // DuckDB
63271
+ //
63272
+ // duckdb/parallel/base_pipeline_event.hpp
63273
+ //
63274
+ //
63275
+ //===----------------------------------------------------------------------===//
63276
+
63277
+
63278
+
63072
63279
  //===----------------------------------------------------------------------===//
63073
63280
  // DuckDB
63074
63281
  //
@@ -63142,6 +63349,22 @@ protected:
63142
63349
 
63143
63350
 
63144
63351
 
63352
+ namespace duckdb {
63353
+
63354
+ //! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
63355
+ class BasePipelineEvent : public Event {
63356
+ public:
63357
+ BasePipelineEvent(shared_ptr<Pipeline> pipeline);
63358
+ BasePipelineEvent(Pipeline &pipeline);
63359
+
63360
+ //! The pipeline that this event belongs to
63361
+ shared_ptr<Pipeline> pipeline;
63362
+ };
63363
+
63364
+ } // namespace duckdb
63365
+
63366
+
63367
+
63145
63368
  namespace duckdb {
63146
63369
 
63147
63370
  PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
@@ -63298,16 +63521,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
63298
63521
  }
63299
63522
  }
63300
63523
 
63301
- class HashAggregateFinalizeEvent : public Event {
63524
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
63302
63525
  public:
63303
63526
  HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
63304
63527
  Pipeline *pipeline_p)
63305
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p) {
63528
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
63306
63529
  }
63307
63530
 
63308
63531
  const PhysicalHashAggregate &op;
63309
63532
  HashAggregateGlobalState &gstate;
63310
- Pipeline *pipeline;
63311
63533
 
63312
63534
  public:
63313
63535
  void Schedule() override {
@@ -64569,15 +64791,14 @@ private:
64569
64791
  };
64570
64792
 
64571
64793
  // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
64572
- class DistinctAggregateFinalizeEvent : public Event {
64794
+ class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
64573
64795
  public:
64574
64796
  DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64575
- Pipeline *pipeline_p, ClientContext &context)
64576
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), context(context) {
64797
+ Pipeline &pipeline_p, ClientContext &context)
64798
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
64577
64799
  }
64578
64800
  const PhysicalUngroupedAggregate &op;
64579
64801
  UngroupedAggregateGlobalState &gstate;
64580
- Pipeline *pipeline;
64581
64802
  ClientContext &context;
64582
64803
 
64583
64804
  public:
@@ -64590,16 +64811,15 @@ public:
64590
64811
  }
64591
64812
  };
64592
64813
 
64593
- class DistinctCombineFinalizeEvent : public Event {
64814
+ class DistinctCombineFinalizeEvent : public BasePipelineEvent {
64594
64815
  public:
64595
64816
  DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64596
- Pipeline *pipeline_p, ClientContext &client)
64597
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), client(client) {
64817
+ Pipeline &pipeline_p, ClientContext &client)
64818
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
64598
64819
  }
64599
64820
 
64600
64821
  const PhysicalUngroupedAggregate &op;
64601
64822
  UngroupedAggregateGlobalState &gstate;
64602
- Pipeline *pipeline;
64603
64823
  ClientContext &client;
64604
64824
 
64605
64825
  public:
@@ -64615,7 +64835,7 @@ public:
64615
64835
  SetTasks(move(tasks));
64616
64836
 
64617
64837
  //! Now that all tables are combined, it's time to do the distinct aggregations
64618
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
64838
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
64619
64839
  this->InsertEvent(move(new_event));
64620
64840
  }
64621
64841
  };
@@ -64644,12 +64864,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
64644
64864
  }
64645
64865
  }
64646
64866
  if (any_partitioned) {
64647
- auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, &pipeline, context);
64867
+ auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
64648
64868
  event.InsertEvent(move(new_event));
64649
64869
  } else {
64650
64870
  //! Hashtables aren't partitioned, they dont need to be joined first
64651
64871
  //! So we can compute the aggregate already
64652
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, &pipeline, context);
64872
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
64653
64873
  event.InsertEvent(move(new_event));
64654
64874
  }
64655
64875
  return SinkFinalizeType::READY;
@@ -64927,12 +65147,14 @@ public:
64927
65147
 
64928
65148
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65149
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65150
+ : memory_per_thread(max_mem), count(0) {
64931
65151
 
64932
65152
  RowLayout payload_layout;
64933
65153
  payload_layout.Initialize(payload_types);
64934
65154
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65155
  global_sort->external = external;
65156
+
65157
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65158
  }
64937
65159
 
64938
65160
  void Combine(LocalSortState &local_sort) {
@@ -66393,19 +66615,18 @@ private:
66393
66615
  WindowGlobalHashGroup &hash_group;
66394
66616
  };
66395
66617
 
66396
- class WindowMergeEvent : public Event {
66618
+ class WindowMergeEvent : public BasePipelineEvent {
66397
66619
  public:
66398
66620
  WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66399
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p), hash_group(hash_group_p) {
66621
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66400
66622
  }
66401
66623
 
66402
66624
  WindowGlobalSinkState &gstate;
66403
- Pipeline &pipeline;
66404
66625
  WindowGlobalHashGroup &hash_group;
66405
66626
 
66406
66627
  public:
66407
66628
  void Schedule() override {
66408
- auto &context = pipeline.GetClientContext();
66629
+ auto &context = pipeline->GetClientContext();
66409
66630
 
66410
66631
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
66411
66632
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -66420,7 +66641,7 @@ public:
66420
66641
 
66421
66642
  void FinishEvent() override {
66422
66643
  hash_group.global_sort->CompleteMergeRound(true);
66423
- CreateMergeTasks(pipeline, *this, gstate, hash_group);
66644
+ CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66424
66645
  }
66425
66646
 
66426
66647
  static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
@@ -70755,18 +70976,17 @@ private:
70755
70976
  bool parallel;
70756
70977
  };
70757
70978
 
70758
- class HashJoinFinalizeEvent : public Event {
70979
+ class HashJoinFinalizeEvent : public BasePipelineEvent {
70759
70980
  public:
70760
70981
  HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
70761
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink) {
70982
+ : BasePipelineEvent(pipeline_p), sink(sink) {
70762
70983
  }
70763
70984
 
70764
- Pipeline &pipeline;
70765
70985
  HashJoinGlobalSinkState &sink;
70766
70986
 
70767
70987
  public:
70768
70988
  void Schedule() override {
70769
- auto &context = pipeline.GetClientContext();
70989
+ auto &context = pipeline->GetClientContext();
70770
70990
  auto parallel_construct_count =
70771
70991
  context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
70772
70992
 
@@ -70833,20 +71053,19 @@ private:
70833
71053
  JoinHashTable &local_ht;
70834
71054
  };
70835
71055
 
70836
- class HashJoinPartitionEvent : public Event {
71056
+ class HashJoinPartitionEvent : public BasePipelineEvent {
70837
71057
  public:
70838
71058
  HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
70839
71059
  vector<unique_ptr<JoinHashTable>> &local_hts)
70840
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink), local_hts(local_hts) {
71060
+ : BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
70841
71061
  }
70842
71062
 
70843
- Pipeline &pipeline;
70844
71063
  HashJoinGlobalSinkState &sink;
70845
71064
  vector<unique_ptr<JoinHashTable>> &local_hts;
70846
71065
 
70847
71066
  public:
70848
71067
  void Schedule() override {
70849
- auto &context = pipeline.GetClientContext();
71068
+ auto &context = pipeline->GetClientContext();
70850
71069
  vector<unique_ptr<Task>> partition_tasks;
70851
71070
  partition_tasks.reserve(local_hts.size());
70852
71071
  for (auto &local_ht : local_hts) {
@@ -70859,7 +71078,7 @@ public:
70859
71078
  void FinishEvent() override {
70860
71079
  local_hts.clear();
70861
71080
  sink.hash_table->PrepareExternalFinalize();
70862
- sink.ScheduleFinalize(pipeline, *this);
71081
+ sink.ScheduleFinalize(*pipeline, *this);
70863
71082
  }
70864
71083
  };
70865
71084
 
@@ -74563,21 +74782,20 @@ private:
74563
74782
  GlobalSortedTable &table;
74564
74783
  };
74565
74784
 
74566
- class RangeJoinMergeEvent : public Event {
74785
+ class RangeJoinMergeEvent : public BasePipelineEvent {
74567
74786
  public:
74568
74787
  using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
74569
74788
 
74570
74789
  public:
74571
74790
  RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
74572
- : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
74791
+ : BasePipelineEvent(pipeline_p), table(table_p) {
74573
74792
  }
74574
74793
 
74575
74794
  GlobalSortedTable &table;
74576
- Pipeline &pipeline;
74577
74795
 
74578
74796
  public:
74579
74797
  void Schedule() override {
74580
- auto &context = pipeline.GetClientContext();
74798
+ auto &context = pipeline->GetClientContext();
74581
74799
 
74582
74800
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
74583
74801
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -74596,7 +74814,7 @@ public:
74596
74814
  global_sort_state.CompleteMergeRound(true);
74597
74815
  if (global_sort_state.sorted_blocks.size() > 1) {
74598
74816
  // Multiple blocks remaining: Schedule the next round
74599
- table.ScheduleMergeTasks(pipeline, *this);
74817
+ table.ScheduleMergeTasks(*pipeline, *this);
74600
74818
  }
74601
74819
  }
74602
74820
  };
@@ -74984,18 +75202,17 @@ private:
74984
75202
  OrderGlobalState &state;
74985
75203
  };
74986
75204
 
74987
- class OrderMergeEvent : public Event {
75205
+ class OrderMergeEvent : public BasePipelineEvent {
74988
75206
  public:
74989
75207
  OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
74990
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
75208
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p) {
74991
75209
  }
74992
75210
 
74993
75211
  OrderGlobalState &gstate;
74994
- Pipeline &pipeline;
74995
75212
 
74996
75213
  public:
74997
75214
  void Schedule() override {
74998
- auto &context = pipeline.GetClientContext();
75215
+ auto &context = pipeline->GetClientContext();
74999
75216
 
75000
75217
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
75001
75218
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -75014,7 +75231,7 @@ public:
75014
75231
  global_sort_state.CompleteMergeRound();
75015
75232
  if (global_sort_state.sorted_blocks.size() > 1) {
75016
75233
  // Multiple blocks remaining: Schedule the next round
75017
- PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
75234
+ PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
75018
75235
  }
75019
75236
  }
75020
75237
  };
@@ -79914,10 +80131,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
79914
80131
  return;
79915
80132
  }
79916
80133
 
80134
+ // convert virtual column ids to storage column ids
80135
+ vector<column_t> storage_ids;
80136
+ for (auto &column_id : column_ids) {
80137
+ D_ASSERT(column_id < table.columns.size());
80138
+ storage_ids.push_back(table.columns[column_id].StorageOid());
80139
+ }
80140
+
79917
80141
  unique_ptr<Index> index;
79918
80142
  switch (info->index_type) {
79919
80143
  case IndexType::ART: {
79920
- index = make_unique<ART>(column_ids, unbound_expressions, info->constraint_type, *context.client.db);
80144
+ index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
79921
80145
  break;
79922
80146
  }
79923
80147
  default:
@@ -80636,6 +80860,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
80636
80860
  void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
80637
80861
  op_state.reset();
80638
80862
  sink_state.reset();
80863
+ pipelines.clear();
80639
80864
 
80640
80865
  // recursive CTE
80641
80866
  state.SetPipelineSource(current, this);
@@ -80935,7 +81160,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
80935
81160
  total_required_bits += group_bits;
80936
81161
  }
80937
81162
  // the total amount of groups we allocate space for is 2^required_bits
80938
- total_groups = 1 << total_required_bits;
81163
+ total_groups = (uint64_t)1 << total_required_bits;
80939
81164
  // we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
80940
81165
  grouping_columns = group_types_p.size();
80941
81166
  layout.Initialize(move(aggregate_objects_p));
@@ -81119,7 +81344,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
81119
81344
  static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
81120
81345
  idx_t entry_count, Vector &result) {
81121
81346
  // construct the mask for this entry
81122
- idx_t mask = (1 << required_bits) - 1;
81347
+ idx_t mask = ((uint64_t)1 << required_bits) - 1;
81123
81348
  switch (result.GetType().InternalType()) {
81124
81349
  case PhysicalType::INT8:
81125
81350
  ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
@@ -85366,7 +85591,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
85366
85591
  for (idx_t i = 0; i < grouping.size(); i++) {
85367
85592
  if (grouping_set.find(grouping[i]) == grouping_set.end()) {
85368
85593
  // we don't group on this value!
85369
- grouping_value += 1 << (grouping.size() - (i + 1));
85594
+ grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
85370
85595
  }
85371
85596
  }
85372
85597
  grouping_values.push_back(Value::BIGINT(grouping_value));
@@ -90924,7 +91149,21 @@ struct ModeIncluded {
90924
91149
  const idx_t bias;
90925
91150
  };
90926
91151
 
90927
- template <typename KEY_TYPE>
91152
+ struct ModeAssignmentStandard {
91153
+ template <class INPUT_TYPE, class RESULT_TYPE>
91154
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91155
+ return RESULT_TYPE(input);
91156
+ }
91157
+ };
91158
+
91159
+ struct ModeAssignmentString {
91160
+ template <class INPUT_TYPE, class RESULT_TYPE>
91161
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91162
+ return StringVector::AddString(result, input);
91163
+ }
91164
+ };
91165
+
91166
+ template <typename KEY_TYPE, typename ASSIGN_OP>
90928
91167
  struct ModeFunction {
90929
91168
  template <class STATE>
90930
91169
  static void Initialize(STATE *state) {
@@ -91037,7 +91276,7 @@ struct ModeFunction {
91037
91276
  }
91038
91277
 
91039
91278
  if (state->valid) {
91040
- rdata[rid] = RESULT_TYPE(*state->mode);
91279
+ rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
91041
91280
  } else {
91042
91281
  rmask.Set(rid, false);
91043
91282
  }
@@ -91053,10 +91292,10 @@ struct ModeFunction {
91053
91292
  }
91054
91293
  };
91055
91294
 
91056
- template <typename INPUT_TYPE, typename KEY_TYPE>
91295
+ template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
91057
91296
  AggregateFunction GetTypedModeFunction(const LogicalType &type) {
91058
91297
  using STATE = ModeState<KEY_TYPE>;
91059
- using OP = ModeFunction<KEY_TYPE>;
91298
+ using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
91060
91299
  auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
91061
91300
  func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
91062
91301
  return func;
@@ -91092,7 +91331,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
91092
91331
  return GetTypedModeFunction<interval_t, interval_t>(type);
91093
91332
 
91094
91333
  case PhysicalType::VARCHAR:
91095
- return GetTypedModeFunction<string_t, string>(type);
91334
+ return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
91096
91335
 
91097
91336
  default:
91098
91337
  throw NotImplementedException("Unimplemented mode aggregate");
@@ -93281,21 +93520,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
93281
93520
  case LogicalType::VARCHAR:
93282
93521
  return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
93283
93522
  case LogicalType::TIMESTAMP:
93284
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93523
+ return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
93285
93524
  case LogicalType::TIMESTAMP_TZ:
93286
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93525
+ return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
93287
93526
  case LogicalType::TIMESTAMP_S:
93288
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93527
+ return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
93289
93528
  case LogicalType::TIMESTAMP_MS:
93290
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93529
+ return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
93291
93530
  case LogicalType::TIMESTAMP_NS:
93292
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93531
+ return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
93293
93532
  case LogicalType::TIME:
93294
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93533
+ return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
93295
93534
  case LogicalType::TIME_TZ:
93296
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93535
+ return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
93297
93536
  case LogicalType::DATE:
93298
- return GetMapType<HistogramFunctor, int32_t, IS_ORDERED>(type);
93537
+ return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
93299
93538
  default:
93300
93539
  throw InternalException("Unimplemented histogram aggregate");
93301
93540
  }
@@ -96859,7 +97098,8 @@ struct DateDiff {
96859
97098
  struct WeekOperator {
96860
97099
  template <class TA, class TB, class TR>
96861
97100
  static inline TR Operation(TA startdate, TB enddate) {
96862
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
97101
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
97102
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96863
97103
  }
96864
97104
  };
96865
97105
 
@@ -103243,12 +103483,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
103243
103483
  result, state_vector.state_vector, count);
103244
103484
  break;
103245
103485
  case PhysicalType::INT32:
103246
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103247
- result, state_vector.state_vector, count);
103486
+ if (key_type.id() == LogicalTypeId::DATE) {
103487
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
103488
+ result, state_vector.state_vector, count);
103489
+ } else {
103490
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103491
+ result, state_vector.state_vector, count);
103492
+ }
103248
103493
  break;
103249
103494
  case PhysicalType::INT64:
103250
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103251
- result, state_vector.state_vector, count);
103495
+ switch (key_type.id()) {
103496
+ case LogicalTypeId::TIME:
103497
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
103498
+ result, state_vector.state_vector, count);
103499
+ break;
103500
+ case LogicalTypeId::TIME_TZ:
103501
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
103502
+ result, state_vector.state_vector, count);
103503
+ break;
103504
+ case LogicalTypeId::TIMESTAMP:
103505
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
103506
+ result, state_vector.state_vector, count);
103507
+ break;
103508
+ case LogicalTypeId::TIMESTAMP_MS:
103509
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
103510
+ result, state_vector.state_vector, count);
103511
+ break;
103512
+ case LogicalTypeId::TIMESTAMP_NS:
103513
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
103514
+ result, state_vector.state_vector, count);
103515
+ break;
103516
+ case LogicalTypeId::TIMESTAMP_SEC:
103517
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
103518
+ result, state_vector.state_vector, count);
103519
+ break;
103520
+ case LogicalTypeId::TIMESTAMP_TZ:
103521
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
103522
+ result, state_vector.state_vector, count);
103523
+ break;
103524
+ default:
103525
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103526
+ result, state_vector.state_vector, count);
103527
+ break;
103528
+ }
103252
103529
  break;
103253
103530
  case PhysicalType::FLOAT:
103254
103531
  FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
@@ -104318,18 +104595,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104318
104595
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104319
104596
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104320
104597
  auto count = args.size();
104321
- Vector &lists = args.data[0];
104598
+ Vector &input_lists = args.data[0];
104322
104599
 
104323
104600
  result.SetVectorType(VectorType::FLAT_VECTOR);
104324
104601
  auto &result_validity = FlatVector::Validity(result);
104325
104602
 
104326
- for (auto &v : args.data) {
104327
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104328
- v.Flatten(count);
104329
- }
104330
- }
104331
-
104332
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104603
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104333
104604
  result_validity.SetInvalid(0);
104334
104605
  return;
104335
104606
  }
@@ -104344,15 +104615,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104344
104615
  LocalSortState local_sort_state;
104345
104616
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104346
104617
 
104618
+ // this ensures that we do not change the order of the entries in the input chunk
104619
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104620
+
104347
104621
  // get the child vector
104348
- auto lists_size = ListVector::GetListSize(lists);
104349
- auto &child_vector = ListVector::GetEntry(lists);
104622
+ auto lists_size = ListVector::GetListSize(result);
104623
+ auto &child_vector = ListVector::GetEntry(result);
104350
104624
  UnifiedVectorFormat child_data;
104351
104625
  child_vector.ToUnifiedFormat(lists_size, child_data);
104352
104626
 
104353
104627
  // get the lists data
104354
104628
  UnifiedVectorFormat lists_data;
104355
- lists.ToUnifiedFormat(count, lists_data);
104629
+ result.ToUnifiedFormat(count, lists_data);
104356
104630
  auto list_entries = (list_entry_t *)lists_data.data;
104357
104631
 
104358
104632
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104449,8 +104723,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104449
104723
  child_vector.Flatten(sel_sorted_idx);
104450
104724
  }
104451
104725
 
104452
- result.Reference(lists);
104453
-
104454
104726
  if (args.AllConstant()) {
104455
104727
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104456
104728
  }
@@ -108128,6 +108400,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
108128
108400
  return left;
108129
108401
  }
108130
108402
 
108403
+ struct BinaryNumericDivideWrapper {
108404
+ template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108405
+ static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
108406
+ if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
108407
+ throw OutOfRangeException("Overflow in division of %d / %d", left, right);
108408
+ } else if (right == 0) {
108409
+ mask.SetInvalid(idx);
108410
+ return left;
108411
+ } else {
108412
+ return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
108413
+ }
108414
+ }
108415
+
108416
+ static bool AddsNulls() {
108417
+ return true;
108418
+ }
108419
+ };
108420
+
108131
108421
  struct BinaryZeroIsNullWrapper {
108132
108422
  template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108133
108423
  static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
@@ -108169,13 +108459,13 @@ template <class OP>
108169
108459
  static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
108170
108460
  switch (type.id()) {
108171
108461
  case LogicalTypeId::TINYINT:
108172
- return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
108462
+ return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
108173
108463
  case LogicalTypeId::SMALLINT:
108174
- return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
108464
+ return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
108175
108465
  case LogicalTypeId::INTEGER:
108176
- return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
108466
+ return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
108177
108467
  case LogicalTypeId::BIGINT:
108178
- return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
108468
+ return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
108179
108469
  case LogicalTypeId::UTINYINT:
108180
108470
  return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
108181
108471
  case LogicalTypeId::USMALLINT:
@@ -114623,11 +114913,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
114623
114913
 
114624
114914
  // current_schemas
114625
114915
  static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
114916
+ if (!input.AllConstant()) {
114917
+ throw NotImplementedException("current_schemas requires a constant input");
114918
+ }
114919
+ if (ConstantVector::IsNull(input.data[0])) {
114920
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
114921
+ ConstantVector::SetNull(result, true);
114922
+ return;
114923
+ }
114924
+ auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
114626
114925
  vector<Value> schema_list;
114627
- vector<string> search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path->Get();
114926
+ auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
114927
+ vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
114628
114928
  std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
114629
114929
  [](const string &s) -> Value { return Value(s); });
114630
- auto val = Value::LIST(schema_list);
114930
+
114931
+ auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
114631
114932
  result.Reference(val);
114632
114933
  }
114633
114934
 
@@ -117625,6 +117926,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
117625
117926
  table_function.named_parameters["skip"] = LogicalType::BIGINT;
117626
117927
  table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
117627
117928
  table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
117929
+ table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
117628
117930
  }
117629
117931
 
117630
117932
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -121455,8 +121757,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
121455
121757
  // we don't emit any statistics for tables that have outstanding transaction-local data
121456
121758
  return nullptr;
121457
121759
  }
121458
- auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
121459
- return bind_data.table->storage->GetStatistics(context, storage_idx);
121760
+ return bind_data.table->GetStatistics(context, column_id);
121460
121761
  }
121461
121762
 
121462
121763
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -123028,7 +123329,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
123028
123329
  }
123029
123330
  idx_t entry_idx = row / 64;
123030
123331
  idx_t idx_in_entry = row % 64;
123031
- return validity[entry_idx] & (1 << idx_in_entry);
123332
+ return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
123032
123333
  }
123033
123334
 
123034
123335
  void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
@@ -123045,7 +123346,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
123045
123346
  }
123046
123347
  idx_t entry_idx = row / 64;
123047
123348
  idx_t idx_in_entry = row % 64;
123048
- validity[entry_idx] &= ~(1 << idx_in_entry);
123349
+ validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
123049
123350
  }
123050
123351
 
123051
123352
  void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
@@ -123054,7 +123355,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
123054
123355
  }
123055
123356
  idx_t entry_idx = row / 64;
123056
123357
  idx_t idx_in_entry = row % 64;
123057
- validity[entry_idx] |= 1 << idx_in_entry;
123358
+ validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
123058
123359
  }
123059
123360
 
123060
123361
 
@@ -126456,9 +126757,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
126456
126757
  case StatementType::INSERT_STATEMENT:
126457
126758
  case StatementType::DELETE_STATEMENT:
126458
126759
  case StatementType::UPDATE_STATEMENT: {
126459
- auto sql = statement->ToString();
126460
126760
  Parser parser;
126461
- parser.ParseQuery(sql);
126761
+ PreservedError error;
126762
+ try {
126763
+ parser.ParseQuery(statement->ToString());
126764
+ } catch (const Exception &ex) {
126765
+ error = PreservedError(ex);
126766
+ } catch (std::exception &ex) {
126767
+ error = PreservedError(ex);
126768
+ }
126769
+ if (error) {
126770
+ // error in verifying query
126771
+ return make_unique<PendingQueryResult>(error);
126772
+ }
126462
126773
  statement = move(parser.statements[0]);
126463
126774
  break;
126464
126775
  }
@@ -141402,7 +141713,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141402
141713
  //===--------------------------------------------------------------------===//
141403
141714
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141404
141715
  auto &config = ClientConfig::GetConfig(context);
141405
- config.home_directory = input.IsNull() ? input.ToString() : string();
141716
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141406
141717
  }
141407
141718
 
141408
141719
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -142358,9 +142669,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
142358
142669
  // Get HLL stats here
142359
142670
  auto actual_binding = relation_column_to_original_column[key];
142360
142671
 
142361
- // sometimes base stats is null (test_709.test) returns null for base stats while
142362
- // there is still a catalog table. Anybody know anything about this?
142363
- auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
142672
+ auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
142364
142673
  if (base_stats) {
142365
142674
  count = base_stats->GetDistinctCount();
142366
142675
  }
@@ -144231,7 +144540,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
144231
144540
  auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
144232
144541
  idx_t equivalence_set = GetEquivalenceSet(node);
144233
144542
  auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
144234
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144543
+ Value constant_value;
144544
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144545
+ return FilterResult::UNSATISFIABLE;
144546
+ }
144235
144547
  if (constant_value.IsNull()) {
144236
144548
  // comparisons with null are always null (i.e. will never result in rows)
144237
144549
  return FilterResult::UNSATISFIABLE;
@@ -144312,7 +144624,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144312
144624
  }
144313
144625
  if (expr->IsFoldable()) {
144314
144626
  // scalar condition, evaluate it
144315
- auto result = ExpressionExecutor::EvaluateScalar(*expr).CastAs(LogicalType::BOOLEAN);
144627
+ Value result;
144628
+ if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
144629
+ return FilterResult::UNSUPPORTED;
144630
+ }
144631
+ result = result.CastAs(LogicalType::BOOLEAN);
144316
144632
  // check if the filter passes
144317
144633
  if (result.IsNull() || !BooleanValue::Get(result)) {
144318
144634
  // the filter does not pass the scalar test, create an empty result
@@ -144336,7 +144652,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144336
144652
 
144337
144653
  if (lower_is_scalar) {
144338
144654
  auto scalar = comparison.lower.get();
144339
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144655
+ Value constant_value;
144656
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144657
+ return FilterResult::UNSUPPORTED;
144658
+ }
144340
144659
 
144341
144660
  // create the ExpressionValueInformation
144342
144661
  ExpressionValueInformation info;
@@ -144369,7 +144688,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144369
144688
 
144370
144689
  if (upper_is_scalar) {
144371
144690
  auto scalar = comparison.upper.get();
144372
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144691
+ Value constant_value;
144692
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144693
+ return FilterResult::UNSUPPORTED;
144694
+ }
144373
144695
 
144374
144696
  // create the ExpressionValueInformation
144375
144697
  ExpressionValueInformation info;
@@ -145281,7 +145603,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
145281
145603
  // IN clause with many children: try to generate a mark join that replaces this IN expression
145282
145604
  // we can only do this if the expressions in the expression list are scalar
145283
145605
  for (idx_t i = 1; i < expr.children.size(); i++) {
145284
- D_ASSERT(expr.children[i]->return_type == in_type);
145285
145606
  if (!expr.children[i]->IsFoldable()) {
145286
145607
  // non-scalar expression
145287
145608
  all_scalar = false;
@@ -147720,21 +148041,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
147720
148041
  FilterPushdown child_pushdown(optimizer);
147721
148042
  for (idx_t i = 0; i < filters.size(); i++) {
147722
148043
  auto &f = *filters[i];
147723
- // check if any aggregate or GROUPING functions are in the set
147724
- if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
147725
- f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
147726
- // no aggregate! we can push this down
147727
- // rewrite any group bindings within the filter
147728
- f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147729
- // add the filter to the child node
147730
- if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147731
- // filter statically evaluates to false, strip tree
147732
- return make_unique<LogicalEmptyResult>(move(op));
148044
+ if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
148045
+ // filter on aggregate: cannot pushdown
148046
+ continue;
148047
+ }
148048
+ if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
148049
+ // filter on GROUPINGS function: cannot pushdown
148050
+ continue;
148051
+ }
148052
+ // if there are any empty grouping sets, we cannot push down filters
148053
+ bool has_empty_grouping_sets = false;
148054
+ for (auto &grp : aggr.grouping_sets) {
148055
+ if (grp.empty()) {
148056
+ has_empty_grouping_sets = true;
147733
148057
  }
147734
- // erase the filter from here
147735
- filters.erase(filters.begin() + i);
147736
- i--;
147737
148058
  }
148059
+ if (has_empty_grouping_sets) {
148060
+ continue;
148061
+ }
148062
+ // no aggregate! we can push this down
148063
+ // rewrite any group bindings within the filter
148064
+ f.filter = ReplaceGroupBindings(aggr, move(f.filter));
148065
+ // add the filter to the child node
148066
+ if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
148067
+ // filter statically evaluates to false, strip tree
148068
+ return make_unique<LogicalEmptyResult>(move(op));
148069
+ }
148070
+ // erase the filter from here
148071
+ filters.erase(filters.begin() + i);
148072
+ i--;
147738
148073
  }
147739
148074
  child_pushdown.GenerateFilters();
147740
148075
 
@@ -152440,6 +152775,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
152440
152775
  } // namespace duckdb
152441
152776
 
152442
152777
 
152778
+ namespace duckdb {
152779
+
152780
+ BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
152781
+ : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
152782
+ }
152783
+
152784
+ BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
152785
+ : Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
152786
+ }
152787
+
152788
+ } // namespace duckdb
152789
+
152790
+
152443
152791
 
152444
152792
 
152445
152793
 
@@ -152559,16 +152907,13 @@ public:
152559
152907
 
152560
152908
 
152561
152909
 
152562
-
152563
152910
  namespace duckdb {
152564
152911
 
152565
- class PipelineEvent : public Event {
152912
+ //! A PipelineEvent is responsible for scheduling a pipeline
152913
+ class PipelineEvent : public BasePipelineEvent {
152566
152914
  public:
152567
152915
  PipelineEvent(shared_ptr<Pipeline> pipeline);
152568
152916
 
152569
- //! The pipeline that this event belongs to
152570
- shared_ptr<Pipeline> pipeline;
152571
-
152572
152917
  public:
152573
152918
  void Schedule() override;
152574
152919
  void FinishEvent() override;
@@ -152696,17 +153041,13 @@ private:
152696
153041
 
152697
153042
 
152698
153043
 
152699
-
152700
153044
  namespace duckdb {
152701
153045
  class Executor;
152702
153046
 
152703
- class PipelineFinishEvent : public Event {
153047
+ class PipelineFinishEvent : public BasePipelineEvent {
152704
153048
  public:
152705
153049
  PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
152706
153050
 
152707
- //! The pipeline that this event belongs to
152708
- shared_ptr<Pipeline> pipeline;
152709
-
152710
153051
  public:
152711
153052
  void Schedule() override;
152712
153053
  void FinishEvent() override;
@@ -152733,6 +153074,9 @@ Executor &Executor::Get(ClientContext &context) {
152733
153074
 
152734
153075
  void Executor::AddEvent(shared_ptr<Event> event) {
152735
153076
  lock_guard<mutex> elock(executor_lock);
153077
+ if (cancelled) {
153078
+ return;
153079
+ }
152736
153080
  events.push_back(move(event));
152737
153081
  }
152738
153082
 
@@ -153036,6 +153380,7 @@ void Executor::CancelTasks() {
153036
153380
  vector<weak_ptr<Pipeline>> weak_references;
153037
153381
  {
153038
153382
  lock_guard<mutex> elock(executor_lock);
153383
+ cancelled = true;
153039
153384
  weak_references.reserve(pipelines.size());
153040
153385
  for (auto &pipeline : pipelines) {
153041
153386
  weak_references.push_back(weak_ptr<Pipeline>(pipeline));
@@ -153124,6 +153469,7 @@ PendingExecutionResult Executor::ExecuteTask() {
153124
153469
  void Executor::Reset() {
153125
153470
  lock_guard<mutex> elock(executor_lock);
153126
153471
  physical_plan = nullptr;
153472
+ cancelled = false;
153127
153473
  owned_plan.reset();
153128
153474
  root_executor.reset();
153129
153475
  root_pipelines.clear();
@@ -153558,8 +153904,7 @@ void PipelineCompleteEvent::FinalizeFinish() {
153558
153904
 
153559
153905
  namespace duckdb {
153560
153906
 
153561
- PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
153562
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
153907
+ PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153563
153908
  }
153564
153909
 
153565
153910
  void PipelineEvent::Schedule() {
@@ -153948,8 +154293,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
153948
154293
 
153949
154294
  namespace duckdb {
153950
154295
 
153951
- PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
153952
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
154296
+ PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153953
154297
  }
153954
154298
 
153955
154299
  void PipelineFinishEvent::Schedule() {
@@ -167484,7 +167828,7 @@ string QueryNode::ResultModifiersToString() const {
167484
167828
  } else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
167485
167829
  auto &limit_p_modifier = (LimitPercentModifier &)modifier;
167486
167830
  if (limit_p_modifier.limit) {
167487
- result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
167831
+ result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
167488
167832
  }
167489
167833
  if (limit_p_modifier.offset) {
167490
167834
  result += " OFFSET " + limit_p_modifier.offset->ToString();
@@ -171360,7 +171704,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause,
171360
171704
  }
171361
171705
  // we need a query
171362
171706
  if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) {
171363
- throw InternalException("A CTE needs a SELECT");
171707
+ throw NotImplementedException("A CTE needs a SELECT");
171364
171708
  }
171365
171709
 
171366
171710
  // CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings
@@ -171777,7 +172121,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n
171777
172121
 
171778
172122
  result_type = LogicalType::MAP(move(children));
171779
172123
  } else {
171780
- int8_t width, scale;
172124
+ int64_t width, scale;
171781
172125
  if (base_type == LogicalTypeId::DECIMAL) {
171782
172126
  // default decimal width/scale
171783
172127
  width = 18;
@@ -174956,6 +175300,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174956
175300
  // we didn't bind columns, try again in children
174957
175301
  return BindResult(error);
174958
175302
  }
175303
+ } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
175304
+ return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
174959
175305
  }
174960
175306
  if (!filter_error.empty()) {
174961
175307
  return BindResult(filter_error);
@@ -174963,8 +175309,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174963
175309
 
174964
175310
  if (aggr.filter) {
174965
175311
  auto &child = (BoundExpression &)*aggr.filter;
174966
- bound_filter = move(child.expr);
175312
+ bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
174967
175313
  }
175314
+
174968
175315
  // all children bound successfully
174969
175316
  // extract the children and types
174970
175317
  vector<LogicalType> types;
@@ -176117,7 +176464,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
176117
176464
  string error =
176118
176465
  MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
176119
176466
  if (!error.empty()) {
176120
- return BindResult(binder.FormatError(*expr->get(), error));
176467
+ throw BinderException(binder.FormatError(*expr->get(), error));
176121
176468
  }
176122
176469
 
176123
176470
  // create a MacroBinding to bind this macro's parameters to its arguments
@@ -177140,10 +177487,13 @@ public:
177140
177487
  public:
177141
177488
  unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
177142
177489
 
177143
- idx_t MaxCount() {
177490
+ idx_t MaxCount() const {
177144
177491
  return max_count;
177145
177492
  }
177146
177493
 
177494
+ bool HasExtraList() const {
177495
+ return extra_list;
177496
+ }
177147
177497
  unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
177148
177498
 
177149
177499
  private:
@@ -177185,6 +177535,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177185
177535
  Value &delimiter_value) {
177186
177536
  auto new_binder = Binder::CreateBinder(context, this, true);
177187
177537
  if (delimiter->HasSubquery()) {
177538
+ if (!order_binder.HasExtraList()) {
177539
+ throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
177540
+ }
177188
177541
  return order_binder.CreateExtraReference(move(delimiter));
177189
177542
  }
177190
177543
  ExpressionBinder expr_binder(*new_binder, context);
@@ -177195,6 +177548,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177195
177548
  delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
177196
177549
  return nullptr;
177197
177550
  }
177551
+ // move any correlated columns to this binder
177552
+ MoveCorrelatedExpressions(*new_binder);
177198
177553
  return expr;
177199
177554
  }
177200
177555
 
@@ -179798,11 +180153,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179798
180153
  BindDefaultValues(base.columns, result->bound_defaults);
179799
180154
  }
179800
180155
 
180156
+ idx_t regular_column_count = 0;
179801
180157
  // bind collations to detect any unsupported collation errors
179802
180158
  for (auto &column : base.columns) {
179803
180159
  if (column.Generated()) {
179804
180160
  continue;
179805
180161
  }
180162
+ regular_column_count++;
179806
180163
  if (column.Type().id() == LogicalTypeId::VARCHAR) {
179807
180164
  ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
179808
180165
  }
@@ -179814,6 +180171,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179814
180171
  result->dependencies.insert(type_dependency);
179815
180172
  }
179816
180173
  }
180174
+ if (regular_column_count == 0) {
180175
+ throw BinderException("Creating a table without physical (non-generated) columns is not supported");
180176
+ }
179817
180177
  properties.allow_stream_result = false;
179818
180178
  return result;
179819
180179
  }
@@ -181201,7 +181561,20 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181201
181561
  auto &get = (LogicalGet &)*ref->get;
181202
181562
  columns.insert(columns.end(), get.names.begin(), get.names.end());
181203
181563
  }
181564
+
181565
+ case_insensitive_set_t column_name_set;
181566
+ vector<string> non_generated_column_names;
181204
181567
  for (auto &col_name : columns) {
181568
+ if (column_name_set.count(col_name) > 0) {
181569
+ throw BinderException("Vacuum the same column twice(same name in column name list)");
181570
+ }
181571
+ column_name_set.insert(col_name);
181572
+ auto &col = ref->table->GetColumn(col_name);
181573
+ // ignore generated column
181574
+ if (col.Generated()) {
181575
+ continue;
181576
+ }
181577
+ non_generated_column_names.push_back(col_name);
181205
181578
  ColumnRefExpression colref(col_name, ref->table->name);
181206
181579
  auto result = bind_context.BindColumn(colref, 0);
181207
181580
  if (result.HasError()) {
@@ -181209,17 +181582,29 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181209
181582
  }
181210
181583
  select_list.push_back(move(result.expression));
181211
181584
  }
181212
- auto table_scan = CreatePlan(*ref);
181213
- D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181214
- auto &get = (LogicalGet &)*table_scan;
181215
- for (idx_t i = 0; i < get.column_ids.size(); i++) {
181216
- stmt.info->column_id_map[i] = get.column_ids[i];
181217
- }
181585
+ stmt.info->columns = move(non_generated_column_names);
181586
+ if (!select_list.empty()) {
181587
+ auto table_scan = CreatePlan(*ref);
181588
+ D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181218
181589
 
181219
- auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181220
- projection->children.push_back(move(table_scan));
181590
+ auto &get = (LogicalGet &)*table_scan;
181221
181591
 
181222
- root = move(projection);
181592
+ D_ASSERT(select_list.size() == get.column_ids.size());
181593
+ D_ASSERT(stmt.info->columns.size() == get.column_ids.size());
181594
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
181595
+ stmt.info->column_id_map[i] = ref->table->columns[get.column_ids[i]].StorageOid();
181596
+ }
181597
+
181598
+ auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181599
+ projection->children.push_back(move(table_scan));
181600
+
181601
+ root = move(projection);
181602
+ } else {
181603
+ // eg. CREATE TABLE test (x AS (1));
181604
+ // ANALYZE test;
181605
+ // Make it not a SINK so it doesn't have to do anything
181606
+ stmt.info->has_table = false;
181607
+ }
181223
181608
  }
181224
181609
  auto vacuum = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
181225
181610
  if (root) {
@@ -184810,7 +185195,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
184810
185195
  case ExpressionClass::COLUMN_REF:
184811
185196
  return BindResult(clause + " cannot contain column names");
184812
185197
  case ExpressionClass::SUBQUERY:
184813
- return BindResult(clause + " cannot contain subqueries");
185198
+ throw BinderException(clause + " cannot contain subqueries");
184814
185199
  case ExpressionClass::DEFAULT:
184815
185200
  return BindResult(clause + " cannot contain DEFAULT clause");
184816
185201
  case ExpressionClass::WINDOW:
@@ -185070,6 +185455,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
185070
185455
  }
185071
185456
 
185072
185457
  unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
185458
+ if (!extra_list) {
185459
+ throw InternalException("CreateExtraReference called without extra_list");
185460
+ }
185073
185461
  auto result = CreateProjectionReference(*expr, extra_list->size());
185074
185462
  extra_list->push_back(move(expr));
185075
185463
  return result;
@@ -196882,7 +197270,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
196882
197270
  }
196883
197271
 
196884
197272
  // Alter column to add new constraint
196885
- DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<Constraint> constraint)
197273
+ DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
196886
197274
  : info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
196887
197275
  is_root(true) {
196888
197276
 
@@ -197057,7 +197445,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
197057
197445
 
197058
197446
  bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
197059
197447
  const vector<column_t> &column_ids) {
197060
- while (state.current_row_group) {
197448
+ while (state.current_row_group && state.current_row_group->count > 0) {
197061
197449
  idx_t vector_index;
197062
197450
  idx_t max_row;
197063
197451
  if (ClientConfig::GetConfig(context).verify_parallelism) {
@@ -197071,13 +197459,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
197071
197459
  max_row = state.current_row_group->start + state.current_row_group->count;
197072
197460
  }
197073
197461
  max_row = MinValue<idx_t>(max_row, state.max_row);
197074
- bool need_to_scan;
197075
- if (state.current_row_group->count == 0) {
197076
- need_to_scan = false;
197077
- } else {
197078
- need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197079
- state.current_row_group, vector_index, max_row);
197080
- }
197462
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197463
+ state.current_row_group, vector_index, max_row);
197081
197464
  if (ClientConfig::GetConfig(context).verify_parallelism) {
197082
197465
  state.vector_index++;
197083
197466
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -197336,14 +197719,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
197336
197719
  VerifyForeignKeyConstraint(bfk, context, chunk, false);
197337
197720
  }
197338
197721
 
197339
- void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const Constraint *constraint) {
197722
+ void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
197340
197723
  if (constraint->type != ConstraintType::NOT_NULL) {
197341
197724
  throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
197342
197725
  }
197343
197726
  // scan the original table, check if there's any null value
197344
- auto &not_null_constraint = (NotNullConstraint &)*constraint;
197727
+ auto &not_null_constraint = (BoundNotNullConstraint &)*constraint;
197345
197728
  auto &transaction = Transaction::GetTransaction(context);
197346
197729
  vector<LogicalType> scan_types;
197730
+ D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
197347
197731
  scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
197348
197732
  DataChunk scan_chunk;
197349
197733
  auto &allocator = Allocator::Get(context);
@@ -198100,6 +198484,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
198100
198484
  return nullptr;
198101
198485
  }
198102
198486
  lock_guard<mutex> stats_guard(stats_lock);
198487
+ if (column_id >= column_stats.size()) {
198488
+ throw InternalException("Call to GetStatistics is out of range");
198489
+ }
198103
198490
  return column_stats[column_id]->stats->Copy();
198104
198491
  }
198105
198492
 
@@ -200914,6 +201301,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
200914
201301
  }
200915
201302
  // after verifying that there are no conflicts we mark the tuple as deleted
200916
201303
  deleted[rows[i]] = transaction.transaction_id;
201304
+ rows[deleted_tuples] = rows[i];
200917
201305
  deleted_tuples++;
200918
201306
  }
200919
201307
  return deleted_tuples;
@@ -203820,9 +204208,15 @@ void VersionDeleteState::Flush() {
203820
204208
  return;
203821
204209
  }
203822
204210
  // delete in the current info
203823
- delete_count += current_info->Delete(transaction, rows, count);
203824
- // now push the delete into the undo buffer
203825
- transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
204211
+ // it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
204212
+ // in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
204213
+ // this is returned in the actual_delete_count
204214
+ auto actual_delete_count = current_info->Delete(transaction, rows, count);
204215
+ delete_count += actual_delete_count;
204216
+ if (actual_delete_count > 0) {
204217
+ // now push the delete into the undo buffer, but only if any deletes were actually performed
204218
+ transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
204219
+ }
203826
204220
  count = 0;
203827
204221
  }
203828
204222
 
@@ -206658,6 +207052,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
206658
207052
 
206659
207053
  void CleanupState::CleanupDelete(DeleteInfo *info) {
206660
207054
  auto version_table = info->table;
207055
+ D_ASSERT(version_table->info->cardinality >= info->count);
206661
207056
  version_table->info->cardinality -= info->count;
206662
207057
  if (version_table->info->indexes.Empty()) {
206663
207058
  // this table has no indexes: no cleanup to be done
@@ -322205,6 +322600,8 @@ exit:
322205
322600
  // See the end of this file for a list
322206
322601
 
322207
322602
 
322603
+ // otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context
322604
+ #define MBEDTLS_ALLOW_PRIVATE_ACCESS
322208
322605
 
322209
322606
 
322210
322607