duckdb 0.5.1-dev25.0 → 0.5.1-dev255.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
632
+
633
+
623
634
 
635
+ namespace duckdb {
636
+
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -3667,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
3667
3763
  return count;
3668
3764
  }
3669
3765
 
3766
+ unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
3767
+ if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
3768
+ return nullptr;
3769
+ }
3770
+ if (column_id >= columns.size()) {
3771
+ throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
3772
+ }
3773
+ if (columns[column_id].Generated()) {
3774
+ return nullptr;
3775
+ }
3776
+ return storage->GetStatistics(context, columns[column_id].StorageOid());
3777
+ }
3778
+
3670
3779
  unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
3671
3780
  D_ASSERT(!internal);
3672
3781
  if (info->type != AlterType::ALTER_TABLE) {
@@ -3942,7 +4051,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
3942
4051
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3943
4052
  storage);
3944
4053
  }
3945
- auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
4054
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
3946
4055
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3947
4056
  new_storage);
3948
4057
  }
@@ -3956,7 +4065,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, S
3956
4065
  auto copy = columns[i].Copy();
3957
4066
  if (default_idx == i) {
3958
4067
  // set the default value of this column
3959
- D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
4068
+ if (copy.Generated()) {
4069
+ throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
4070
+ }
3960
4071
  copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
3961
4072
  }
3962
4073
  create_info->columns.push_back(move(copy));
@@ -3981,6 +4092,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
3981
4092
  }
3982
4093
 
3983
4094
  idx_t not_null_idx = GetColumnIndex(info.column_name);
4095
+ if (columns[not_null_idx].Generated()) {
4096
+ throw BinderException("Unsupported constraint for generated column!");
4097
+ }
3984
4098
  bool has_not_null = false;
3985
4099
  for (idx_t i = 0; i < constraints.size(); i++) {
3986
4100
  auto constraint = constraints[i]->Copy();
@@ -4004,8 +4118,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
4004
4118
  storage);
4005
4119
  }
4006
4120
 
4007
- // Return with new storage info
4008
- auto new_storage = make_shared<DataTable>(context, *storage, make_unique<NotNullConstraint>(not_null_idx));
4121
+ // Return with new storage info. Note that we need the bound column index here.
4122
+ auto new_storage = make_shared<DataTable>(context, *storage,
4123
+ make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
4009
4124
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4010
4125
  new_storage);
4011
4126
  }
@@ -4111,12 +4226,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
4111
4226
  auto expression = info.expression->Copy();
4112
4227
  auto bound_expression = expr_binder.Bind(expression);
4113
4228
  auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
4229
+ vector<column_t> storage_oids;
4114
4230
  if (bound_columns.empty()) {
4115
- bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID);
4231
+ storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
4232
+ }
4233
+ // transform to storage_oid
4234
+ else {
4235
+ for (idx_t i = 0; i < bound_columns.size(); i++) {
4236
+ storage_oids.push_back(columns[bound_columns[i]].StorageOid());
4237
+ }
4116
4238
  }
4117
4239
 
4118
- auto new_storage =
4119
- make_shared<DataTable>(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression);
4240
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
4241
+ move(storage_oids), *bound_expression);
4120
4242
  auto result =
4121
4243
  make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
4122
4244
  return move(result);
@@ -4364,7 +4486,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
4364
4486
  }
4365
4487
  }
4366
4488
  D_ASSERT(removed_index != DConstants::INVALID_INDEX);
4367
- storage->CommitDropColumn(removed_index);
4489
+ storage->CommitDropColumn(columns[removed_index].StorageOid());
4368
4490
  }
4369
4491
 
4370
4492
  void TableCatalogEntry::CommitDrop() {
@@ -4934,12 +5056,16 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
4934
5056
  throw CatalogException(rename_err_msg, original_name, value->name);
4935
5057
  }
4936
5058
  }
4937
- PutMapping(context, value->name, entry_index);
4938
- DeleteMapping(context, original_name);
4939
5059
  }
4940
5060
  //! Check the dependency manager to verify that there are no conflicting dependencies with this alter
4941
5061
  catalog.dependency_manager->AlterObject(context, entry, value.get());
4942
5062
 
5063
+ if (value->name != original_name) {
5064
+ // Do PutMapping and DeleteMapping after dependency check
5065
+ PutMapping(context, value->name, entry_index);
5066
+ DeleteMapping(context, original_name);
5067
+ }
5068
+
4943
5069
  value->timestamp = transaction.transaction_id;
4944
5070
  value->child = move(entries[entry_index]);
4945
5071
  value->child->parent = value.get();
@@ -6506,7 +6632,7 @@ static void GetBitPosition(idx_t row_idx, idx_t &current_byte, uint8_t &current_
6506
6632
  }
6507
6633
 
6508
6634
  static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
6509
- data[current_byte] &= ~(1 << current_bit);
6635
+ data[current_byte] &= ~((uint64_t)1 << current_bit);
6510
6636
  }
6511
6637
 
6512
6638
  static void NextBit(idx_t &current_byte, uint8_t &current_bit) {
@@ -16715,9 +16841,15 @@ string FileSystem::ConvertSeparators(const string &path) {
16715
16841
  }
16716
16842
 
16717
16843
  string FileSystem::ExtractBaseName(const string &path) {
16844
+ if (path.empty()) {
16845
+ return string();
16846
+ }
16718
16847
  auto normalized_path = ConvertSeparators(path);
16719
16848
  auto sep = PathSeparator();
16720
- auto vec = StringUtil::Split(StringUtil::Split(normalized_path, sep).back(), ".");
16849
+ auto splits = StringUtil::Split(normalized_path, sep);
16850
+ D_ASSERT(!splits.empty());
16851
+ auto vec = StringUtil::Split(splits.back(), ".");
16852
+ D_ASSERT(!vec.empty());
16721
16853
  return vec[0];
16722
16854
  }
16723
16855
 
@@ -19102,6 +19234,8 @@ private:
19102
19234
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
19103
19235
  void SetFilePointer(FileHandle &handle, idx_t location);
19104
19236
  idx_t GetFilePointer(FileHandle &handle);
19237
+
19238
+ vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
19105
19239
  };
19106
19240
 
19107
19241
  } // namespace duckdb
@@ -19983,6 +20117,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
19983
20117
  });
19984
20118
  }
19985
20119
 
20120
+ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
20121
+ vector<string> result;
20122
+ if (FileExists(path) || IsPipe(path)) {
20123
+ result.push_back(path);
20124
+ } else if (!absolute_path) {
20125
+ Value value;
20126
+ if (opener->TryGetCurrentSetting("file_search_path", value)) {
20127
+ auto search_paths_str = value.ToString();
20128
+ std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20129
+ for (const auto &search_path : search_paths) {
20130
+ auto joined_path = JoinPath(search_path, path);
20131
+ if (FileExists(joined_path) || IsPipe(joined_path)) {
20132
+ result.push_back(joined_path);
20133
+ }
20134
+ }
20135
+ }
20136
+ }
20137
+ return result;
20138
+ }
20139
+
19986
20140
  vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
19987
20141
  if (path.empty()) {
19988
20142
  return vector<string>();
@@ -20029,23 +20183,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20029
20183
  // Check if the path has a glob at all
20030
20184
  if (!HasGlob(path)) {
20031
20185
  // no glob: return only the file (if it exists or is a pipe)
20032
- vector<string> result;
20033
- if (FileExists(path) || IsPipe(path)) {
20034
- result.push_back(path);
20035
- } else if (!absolute_path) {
20036
- Value value;
20037
- if (opener->TryGetCurrentSetting("file_search_path", value)) {
20038
- auto search_paths_str = value.ToString();
20039
- std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20040
- for (const auto &search_path : search_paths) {
20041
- auto joined_path = JoinPath(search_path, path);
20042
- if (FileExists(joined_path) || IsPipe(joined_path)) {
20043
- result.push_back(joined_path);
20044
- }
20045
- }
20046
- }
20047
- }
20048
- return result;
20186
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20049
20187
  }
20050
20188
  vector<string> previous_directories;
20051
20189
  if (absolute_path) {
@@ -20079,7 +20217,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20079
20217
  }
20080
20218
  }
20081
20219
  }
20082
- if (is_last_chunk || result.empty()) {
20220
+ if (result.empty()) {
20221
+ // no result found that matches the glob
20222
+ // last ditch effort: search the path as a string literal
20223
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20224
+ }
20225
+ if (is_last_chunk) {
20083
20226
  return result;
20084
20227
  }
20085
20228
  previous_directories = move(result);
@@ -22428,14 +22571,16 @@ struct IntervalToStringCast {
22428
22571
  if (micros < 0) {
22429
22572
  // negative time: append negative sign
22430
22573
  buffer[length++] = '-';
22574
+ } else {
22431
22575
  micros = -micros;
22432
22576
  }
22433
- int64_t hour = micros / Interval::MICROS_PER_HOUR;
22434
- micros -= hour * Interval::MICROS_PER_HOUR;
22435
- int64_t min = micros / Interval::MICROS_PER_MINUTE;
22436
- micros -= min * Interval::MICROS_PER_MINUTE;
22437
- int64_t sec = micros / Interval::MICROS_PER_SEC;
22438
- micros -= sec * Interval::MICROS_PER_SEC;
22577
+ int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
22578
+ micros += hour * Interval::MICROS_PER_HOUR;
22579
+ int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
22580
+ micros += min * Interval::MICROS_PER_MINUTE;
22581
+ int64_t sec = -(micros / Interval::MICROS_PER_SEC);
22582
+ micros += sec * Interval::MICROS_PER_SEC;
22583
+ micros = -micros;
22439
22584
 
22440
22585
  if (hour < 10) {
22441
22586
  buffer[length++] = '0';
@@ -28558,7 +28703,7 @@ template <idx_t radix_bits>
28558
28703
  struct RadixPartitioningConstants {
28559
28704
  public:
28560
28705
  static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
28561
- static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
28706
+ static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
28562
28707
  static constexpr const idx_t TMP_BUF_SIZE = 8;
28563
28708
 
28564
28709
  public:
@@ -28576,7 +28721,7 @@ private:
28576
28721
  struct RadixPartitioning {
28577
28722
  public:
28578
28723
  static idx_t NumberOfPartitions(idx_t radix_bits) {
28579
- return 1 << radix_bits;
28724
+ return (idx_t)1 << radix_bits;
28580
28725
  }
28581
28726
 
28582
28727
  //! Partition the data in block_collection/string_heap to multiple partitions
@@ -33336,6 +33481,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33481
 
33337
33482
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33483
  #ifdef DEBUG
33484
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33485
+ return;
33486
+ }
33339
33487
  idx_t entry_idx;
33340
33488
  idx_t idx_in_entry;
33341
33489
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35821,10 @@ struct SortConstants {
35673
35821
 
35674
35822
  struct SortLayout {
35675
35823
  public:
35824
+ SortLayout() {
35825
+ }
35676
35826
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35827
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35828
 
35678
35829
  public:
35679
35830
  idx_t column_count;
@@ -37324,6 +37475,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37475
  blob_layout.Initialize(blob_layout_types);
37325
37476
  }
37326
37477
 
37478
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37479
+ SortLayout result;
37480
+ result.column_count = num_prefix_cols;
37481
+ result.all_constant = true;
37482
+ result.comparison_size = 0;
37483
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37484
+ result.order_types.push_back(order_types[col_idx]);
37485
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37486
+ result.logical_types.push_back(logical_types[col_idx]);
37487
+
37488
+ result.all_constant = result.all_constant && constant_size[col_idx];
37489
+ result.constant_size.push_back(constant_size[col_idx]);
37490
+
37491
+ result.comparison_size += column_sizes[col_idx];
37492
+ result.column_sizes.push_back(column_sizes[col_idx]);
37493
+
37494
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37495
+ result.stats.push_back(stats[col_idx]);
37496
+ result.has_null.push_back(has_null[col_idx]);
37497
+ }
37498
+ result.entry_size = entry_size;
37499
+ result.blob_layout = blob_layout;
37500
+ result.sorting_to_blob_col = sorting_to_blob_col;
37501
+ return result;
37502
+ }
37503
+
37327
37504
  LocalSortState::LocalSortState() : initialized(false) {
37328
37505
  }
37329
37506
 
@@ -39403,7 +39580,7 @@ public:
39403
39580
  namespace duckdb {
39404
39581
 
39405
39582
  enum class UnicodeType { INVALID, ASCII, UNICODE };
39406
- enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
39583
+ enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
39407
39584
 
39408
39585
  class Utf8Proc {
39409
39586
  public:
@@ -47573,11 +47750,36 @@ Value Value::CreateValue(dtime_t value) {
47573
47750
  return Value::TIME(value);
47574
47751
  }
47575
47752
 
47753
+ template <>
47754
+ Value Value::CreateValue(dtime_tz_t value) {
47755
+ return Value::TIMETZ(value);
47756
+ }
47757
+
47576
47758
  template <>
47577
47759
  Value Value::CreateValue(timestamp_t value) {
47578
47760
  return Value::TIMESTAMP(value);
47579
47761
  }
47580
47762
 
47763
+ template <>
47764
+ Value Value::CreateValue(timestamp_sec_t value) {
47765
+ return Value::TIMESTAMPSEC(value);
47766
+ }
47767
+
47768
+ template <>
47769
+ Value Value::CreateValue(timestamp_ms_t value) {
47770
+ return Value::TIMESTAMPMS(value);
47771
+ }
47772
+
47773
+ template <>
47774
+ Value Value::CreateValue(timestamp_ns_t value) {
47775
+ return Value::TIMESTAMPNS(value);
47776
+ }
47777
+
47778
+ template <>
47779
+ Value Value::CreateValue(timestamp_tz_t value) {
47780
+ return Value::TIMESTAMPTZ(value);
47781
+ }
47782
+
47581
47783
  template <>
47582
47784
  Value Value::CreateValue(const char *value) {
47583
47785
  return Value(string(value));
@@ -49150,19 +49352,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
49150
49352
  }
49151
49353
  }
49152
49354
 
49153
- // FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
49154
- // just comparing internal type is not always enough
49155
- static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
49156
- if (incoming.InternalType() != target.InternalType()) {
49157
- return true;
49158
- }
49159
- if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
49160
- //! Compare the type_info
49161
- return incoming != target;
49162
- }
49163
- return false;
49164
- }
49165
-
49166
49355
  void Vector::SetValue(idx_t index, const Value &val) {
49167
49356
  if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49168
49357
  // dictionary: apply dictionary and forward to child
@@ -49170,10 +49359,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
49170
49359
  auto &child = DictionaryVector::Child(*this);
49171
49360
  return child.SetValue(sel_vector.get_index(index), val);
49172
49361
  }
49173
- if (ValueShouldBeCast(val.type(), GetType())) {
49362
+ if (val.type() != GetType()) {
49174
49363
  SetValue(index, val.CastAs(GetType()));
49175
49364
  return;
49176
49365
  }
49366
+ D_ASSERT(val.type().InternalType() == GetType().InternalType());
49177
49367
 
49178
49368
  validity.EnsureWritable();
49179
49369
  validity.Set(index, !val.IsNull());
@@ -49424,7 +49614,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
49424
49614
  auto value = GetValueInternal(v_p, index_p);
49425
49615
  // set the alias of the type to the correct value, if there is a type alias
49426
49616
  if (v_p.GetType().HasAlias()) {
49427
- value.type().SetAlias(v_p.GetType().GetAlias());
49617
+ value.type().CopyAuxInfo(v_p.GetType());
49618
+ }
49619
+ if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
49620
+ D_ASSERT(v_p.GetType() == value.type());
49428
49621
  }
49429
49622
  return value;
49430
49623
  }
@@ -50216,6 +50409,24 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {
50216
50409
  StringVector::AddBuffer(vector, other.auxiliary);
50217
50410
  }
50218
50411
 
50412
+ Vector &MapVector::GetKeys(Vector &vector) {
50413
+ auto &entries = StructVector::GetEntries(vector);
50414
+ D_ASSERT(entries.size() == 2);
50415
+ return *entries[0];
50416
+ }
50417
+ Vector &MapVector::GetValues(Vector &vector) {
50418
+ auto &entries = StructVector::GetEntries(vector);
50419
+ D_ASSERT(entries.size() == 2);
50420
+ return *entries[1];
50421
+ }
50422
+
50423
+ const Vector &MapVector::GetKeys(const Vector &vector) {
50424
+ return GetKeys((Vector &)vector);
50425
+ }
50426
+ const Vector &MapVector::GetValues(const Vector &vector) {
50427
+ return GetValues((Vector &)vector);
50428
+ }
50429
+
50219
50430
  vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
50220
50431
  D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
50221
50432
  if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
@@ -51491,6 +51702,7 @@ public:
51491
51702
  if (!alias.empty()) {
51492
51703
  return false;
51493
51704
  }
51705
+ //! We only need to compare aliases when both types have them in this case
51494
51706
  return true;
51495
51707
  }
51496
51708
  if (alias != other_p->alias) {
@@ -51504,8 +51716,7 @@ public:
51504
51716
  if (type != other_p->type) {
51505
51717
  return false;
51506
51718
  }
51507
- auto &other = (ExtraTypeInfo &)*other_p;
51508
- return alias == other.alias && EqualsInternal(other_p);
51719
+ return alias == other_p->alias && EqualsInternal(other_p);
51509
51720
  }
51510
51721
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
51511
51722
  virtual void Serialize(FieldWriter &writer) const {};
@@ -52184,10 +52395,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
52184
52395
  return LogicalType(id, move(info));
52185
52396
  }
52186
52397
 
52187
- bool LogicalType::operator==(const LogicalType &rhs) const {
52188
- if (id_ != rhs.id_) {
52189
- return false;
52190
- }
52398
+ bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
52191
52399
  if (type_info_.get() == rhs.type_info_.get()) {
52192
52400
  return true;
52193
52401
  }
@@ -52199,6 +52407,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
52199
52407
  }
52200
52408
  }
52201
52409
 
52410
+ bool LogicalType::operator==(const LogicalType &rhs) const {
52411
+ if (id_ != rhs.id_) {
52412
+ return false;
52413
+ }
52414
+ return EqualTypeInfo(rhs);
52415
+ }
52416
+
52202
52417
  } // namespace duckdb
52203
52418
 
52204
52419
 
@@ -63069,6 +63284,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
63069
63284
 
63070
63285
 
63071
63286
 
63287
+ //===----------------------------------------------------------------------===//
63288
+ // DuckDB
63289
+ //
63290
+ // duckdb/parallel/base_pipeline_event.hpp
63291
+ //
63292
+ //
63293
+ //===----------------------------------------------------------------------===//
63294
+
63295
+
63296
+
63072
63297
  //===----------------------------------------------------------------------===//
63073
63298
  // DuckDB
63074
63299
  //
@@ -63142,6 +63367,22 @@ protected:
63142
63367
 
63143
63368
 
63144
63369
 
63370
+ namespace duckdb {
63371
+
63372
+ //! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
63373
+ class BasePipelineEvent : public Event {
63374
+ public:
63375
+ BasePipelineEvent(shared_ptr<Pipeline> pipeline);
63376
+ BasePipelineEvent(Pipeline &pipeline);
63377
+
63378
+ //! The pipeline that this event belongs to
63379
+ shared_ptr<Pipeline> pipeline;
63380
+ };
63381
+
63382
+ } // namespace duckdb
63383
+
63384
+
63385
+
63145
63386
  namespace duckdb {
63146
63387
 
63147
63388
  PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
@@ -63298,16 +63539,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
63298
63539
  }
63299
63540
  }
63300
63541
 
63301
- class HashAggregateFinalizeEvent : public Event {
63542
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
63302
63543
  public:
63303
63544
  HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
63304
63545
  Pipeline *pipeline_p)
63305
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p) {
63546
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
63306
63547
  }
63307
63548
 
63308
63549
  const PhysicalHashAggregate &op;
63309
63550
  HashAggregateGlobalState &gstate;
63310
- Pipeline *pipeline;
63311
63551
 
63312
63552
  public:
63313
63553
  void Schedule() override {
@@ -64569,15 +64809,14 @@ private:
64569
64809
  };
64570
64810
 
64571
64811
  // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
64572
- class DistinctAggregateFinalizeEvent : public Event {
64812
+ class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
64573
64813
  public:
64574
64814
  DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64575
- Pipeline *pipeline_p, ClientContext &context)
64576
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), context(context) {
64815
+ Pipeline &pipeline_p, ClientContext &context)
64816
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
64577
64817
  }
64578
64818
  const PhysicalUngroupedAggregate &op;
64579
64819
  UngroupedAggregateGlobalState &gstate;
64580
- Pipeline *pipeline;
64581
64820
  ClientContext &context;
64582
64821
 
64583
64822
  public:
@@ -64590,16 +64829,15 @@ public:
64590
64829
  }
64591
64830
  };
64592
64831
 
64593
- class DistinctCombineFinalizeEvent : public Event {
64832
+ class DistinctCombineFinalizeEvent : public BasePipelineEvent {
64594
64833
  public:
64595
64834
  DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64596
- Pipeline *pipeline_p, ClientContext &client)
64597
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), client(client) {
64835
+ Pipeline &pipeline_p, ClientContext &client)
64836
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
64598
64837
  }
64599
64838
 
64600
64839
  const PhysicalUngroupedAggregate &op;
64601
64840
  UngroupedAggregateGlobalState &gstate;
64602
- Pipeline *pipeline;
64603
64841
  ClientContext &client;
64604
64842
 
64605
64843
  public:
@@ -64615,7 +64853,7 @@ public:
64615
64853
  SetTasks(move(tasks));
64616
64854
 
64617
64855
  //! Now that all tables are combined, it's time to do the distinct aggregations
64618
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
64856
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
64619
64857
  this->InsertEvent(move(new_event));
64620
64858
  }
64621
64859
  };
@@ -64644,12 +64882,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
64644
64882
  }
64645
64883
  }
64646
64884
  if (any_partitioned) {
64647
- auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, &pipeline, context);
64885
+ auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
64648
64886
  event.InsertEvent(move(new_event));
64649
64887
  } else {
64650
64888
  //! Hashtables aren't partitioned, they dont need to be joined first
64651
64889
  //! So we can compute the aggregate already
64652
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, &pipeline, context);
64890
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
64653
64891
  event.InsertEvent(move(new_event));
64654
64892
  }
64655
64893
  return SinkFinalizeType::READY;
@@ -64927,12 +65165,14 @@ public:
64927
65165
 
64928
65166
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65167
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65168
+ : memory_per_thread(max_mem), count(0) {
64931
65169
 
64932
65170
  RowLayout payload_layout;
64933
65171
  payload_layout.Initialize(payload_types);
64934
65172
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65173
  global_sort->external = external;
65174
+
65175
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65176
  }
64937
65177
 
64938
65178
  void Combine(LocalSortState &local_sort) {
@@ -66393,19 +66633,18 @@ private:
66393
66633
  WindowGlobalHashGroup &hash_group;
66394
66634
  };
66395
66635
 
66396
- class WindowMergeEvent : public Event {
66636
+ class WindowMergeEvent : public BasePipelineEvent {
66397
66637
  public:
66398
66638
  WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66399
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p), hash_group(hash_group_p) {
66639
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66400
66640
  }
66401
66641
 
66402
66642
  WindowGlobalSinkState &gstate;
66403
- Pipeline &pipeline;
66404
66643
  WindowGlobalHashGroup &hash_group;
66405
66644
 
66406
66645
  public:
66407
66646
  void Schedule() override {
66408
- auto &context = pipeline.GetClientContext();
66647
+ auto &context = pipeline->GetClientContext();
66409
66648
 
66410
66649
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
66411
66650
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -66420,7 +66659,7 @@ public:
66420
66659
 
66421
66660
  void FinishEvent() override {
66422
66661
  hash_group.global_sort->CompleteMergeRound(true);
66423
- CreateMergeTasks(pipeline, *this, gstate, hash_group);
66662
+ CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66424
66663
  }
66425
66664
 
66426
66665
  static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
@@ -67829,6 +68068,11 @@ public:
67829
68068
 
67830
68069
  private:
67831
68070
  static const vector<string> PathComponents();
68071
+ //! For tagged releases we use the tag, else we use the git commit hash
68072
+ static const string GetVersionDirectoryName();
68073
+ //! Version tags occur with and without 'v', tag in extension path is always with 'v'
68074
+ static const string NormalizeVersionTag(const string &version_tag);
68075
+ static bool IsRelease(const string &version_tag);
67832
68076
 
67833
68077
  private:
67834
68078
  static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load);
@@ -70755,18 +70999,17 @@ private:
70755
70999
  bool parallel;
70756
71000
  };
70757
71001
 
70758
- class HashJoinFinalizeEvent : public Event {
71002
+ class HashJoinFinalizeEvent : public BasePipelineEvent {
70759
71003
  public:
70760
71004
  HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
70761
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink) {
71005
+ : BasePipelineEvent(pipeline_p), sink(sink) {
70762
71006
  }
70763
71007
 
70764
- Pipeline &pipeline;
70765
71008
  HashJoinGlobalSinkState &sink;
70766
71009
 
70767
71010
  public:
70768
71011
  void Schedule() override {
70769
- auto &context = pipeline.GetClientContext();
71012
+ auto &context = pipeline->GetClientContext();
70770
71013
  auto parallel_construct_count =
70771
71014
  context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
70772
71015
 
@@ -70833,20 +71076,19 @@ private:
70833
71076
  JoinHashTable &local_ht;
70834
71077
  };
70835
71078
 
70836
- class HashJoinPartitionEvent : public Event {
71079
+ class HashJoinPartitionEvent : public BasePipelineEvent {
70837
71080
  public:
70838
71081
  HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
70839
71082
  vector<unique_ptr<JoinHashTable>> &local_hts)
70840
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink), local_hts(local_hts) {
71083
+ : BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
70841
71084
  }
70842
71085
 
70843
- Pipeline &pipeline;
70844
71086
  HashJoinGlobalSinkState &sink;
70845
71087
  vector<unique_ptr<JoinHashTable>> &local_hts;
70846
71088
 
70847
71089
  public:
70848
71090
  void Schedule() override {
70849
- auto &context = pipeline.GetClientContext();
71091
+ auto &context = pipeline->GetClientContext();
70850
71092
  vector<unique_ptr<Task>> partition_tasks;
70851
71093
  partition_tasks.reserve(local_hts.size());
70852
71094
  for (auto &local_ht : local_hts) {
@@ -70859,7 +71101,7 @@ public:
70859
71101
  void FinishEvent() override {
70860
71102
  local_hts.clear();
70861
71103
  sink.hash_table->PrepareExternalFinalize();
70862
- sink.ScheduleFinalize(pipeline, *this);
71104
+ sink.ScheduleFinalize(*pipeline, *this);
70863
71105
  }
70864
71106
  };
70865
71107
 
@@ -74563,21 +74805,20 @@ private:
74563
74805
  GlobalSortedTable &table;
74564
74806
  };
74565
74807
 
74566
- class RangeJoinMergeEvent : public Event {
74808
+ class RangeJoinMergeEvent : public BasePipelineEvent {
74567
74809
  public:
74568
74810
  using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
74569
74811
 
74570
74812
  public:
74571
74813
  RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
74572
- : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
74814
+ : BasePipelineEvent(pipeline_p), table(table_p) {
74573
74815
  }
74574
74816
 
74575
74817
  GlobalSortedTable &table;
74576
- Pipeline &pipeline;
74577
74818
 
74578
74819
  public:
74579
74820
  void Schedule() override {
74580
- auto &context = pipeline.GetClientContext();
74821
+ auto &context = pipeline->GetClientContext();
74581
74822
 
74582
74823
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
74583
74824
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -74596,7 +74837,7 @@ public:
74596
74837
  global_sort_state.CompleteMergeRound(true);
74597
74838
  if (global_sort_state.sorted_blocks.size() > 1) {
74598
74839
  // Multiple blocks remaining: Schedule the next round
74599
- table.ScheduleMergeTasks(pipeline, *this);
74840
+ table.ScheduleMergeTasks(*pipeline, *this);
74600
74841
  }
74601
74842
  }
74602
74843
  };
@@ -74984,18 +75225,17 @@ private:
74984
75225
  OrderGlobalState &state;
74985
75226
  };
74986
75227
 
74987
- class OrderMergeEvent : public Event {
75228
+ class OrderMergeEvent : public BasePipelineEvent {
74988
75229
  public:
74989
75230
  OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
74990
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
75231
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p) {
74991
75232
  }
74992
75233
 
74993
75234
  OrderGlobalState &gstate;
74994
- Pipeline &pipeline;
74995
75235
 
74996
75236
  public:
74997
75237
  void Schedule() override {
74998
- auto &context = pipeline.GetClientContext();
75238
+ auto &context = pipeline->GetClientContext();
74999
75239
 
75000
75240
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
75001
75241
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -75014,7 +75254,7 @@ public:
75014
75254
  global_sort_state.CompleteMergeRound();
75015
75255
  if (global_sort_state.sorted_blocks.size() > 1) {
75016
75256
  // Multiple blocks remaining: Schedule the next round
75017
- PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
75257
+ PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
75018
75258
  }
75019
75259
  }
75020
75260
  };
@@ -79914,10 +80154,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
79914
80154
  return;
79915
80155
  }
79916
80156
 
80157
+ // convert virtual column ids to storage column ids
80158
+ vector<column_t> storage_ids;
80159
+ for (auto &column_id : column_ids) {
80160
+ D_ASSERT(column_id < table.columns.size());
80161
+ storage_ids.push_back(table.columns[column_id].StorageOid());
80162
+ }
80163
+
79917
80164
  unique_ptr<Index> index;
79918
80165
  switch (info->index_type) {
79919
80166
  case IndexType::ART: {
79920
- index = make_unique<ART>(column_ids, unbound_expressions, info->constraint_type, *context.client.db);
80167
+ index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
79921
80168
  break;
79922
80169
  }
79923
80170
  default:
@@ -80222,11 +80469,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
80222
80469
  SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
80223
80470
  DataChunk &input) const {
80224
80471
  auto &sink = (CreateTableAsGlobalState &)state;
80225
- if (sink.table) {
80226
- lock_guard<mutex> client_guard(sink.append_lock);
80227
- sink.table->storage->Append(*sink.table, context.client, input);
80228
- sink.inserted_count += input.size();
80229
- }
80472
+ D_ASSERT(sink.table);
80473
+ lock_guard<mutex> client_guard(sink.append_lock);
80474
+ sink.table->storage->Append(*sink.table, context.client, input);
80475
+ sink.inserted_count += input.size();
80230
80476
  return SinkResultType::NEED_MORE_INPUT;
80231
80477
  }
80232
80478
 
@@ -80636,6 +80882,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
80636
80882
  void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
80637
80883
  op_state.reset();
80638
80884
  sink_state.reset();
80885
+ pipelines.clear();
80639
80886
 
80640
80887
  // recursive CTE
80641
80888
  state.SetPipelineSource(current, this);
@@ -80935,7 +81182,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
80935
81182
  total_required_bits += group_bits;
80936
81183
  }
80937
81184
  // the total amount of groups we allocate space for is 2^required_bits
80938
- total_groups = 1 << total_required_bits;
81185
+ total_groups = (uint64_t)1 << total_required_bits;
80939
81186
  // we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
80940
81187
  grouping_columns = group_types_p.size();
80941
81188
  layout.Initialize(move(aggregate_objects_p));
@@ -81119,7 +81366,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
81119
81366
  static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
81120
81367
  idx_t entry_count, Vector &result) {
81121
81368
  // construct the mask for this entry
81122
- idx_t mask = (1 << required_bits) - 1;
81369
+ idx_t mask = ((uint64_t)1 << required_bits) - 1;
81123
81370
  switch (result.GetType().InternalType()) {
81124
81371
  case PhysicalType::INT8:
81125
81372
  ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
@@ -85366,7 +85613,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
85366
85613
  for (idx_t i = 0; i < grouping.size(); i++) {
85367
85614
  if (grouping_set.find(grouping[i]) == grouping_set.end()) {
85368
85615
  // we don't group on this value!
85369
- grouping_value += 1 << (grouping.size() - (i + 1));
85616
+ grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
85370
85617
  }
85371
85618
  }
85372
85619
  grouping_values.push_back(Value::BIGINT(grouping_value));
@@ -90924,7 +91171,21 @@ struct ModeIncluded {
90924
91171
  const idx_t bias;
90925
91172
  };
90926
91173
 
90927
- template <typename KEY_TYPE>
91174
+ struct ModeAssignmentStandard {
91175
+ template <class INPUT_TYPE, class RESULT_TYPE>
91176
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91177
+ return RESULT_TYPE(input);
91178
+ }
91179
+ };
91180
+
91181
+ struct ModeAssignmentString {
91182
+ template <class INPUT_TYPE, class RESULT_TYPE>
91183
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91184
+ return StringVector::AddString(result, input);
91185
+ }
91186
+ };
91187
+
91188
+ template <typename KEY_TYPE, typename ASSIGN_OP>
90928
91189
  struct ModeFunction {
90929
91190
  template <class STATE>
90930
91191
  static void Initialize(STATE *state) {
@@ -91037,7 +91298,7 @@ struct ModeFunction {
91037
91298
  }
91038
91299
 
91039
91300
  if (state->valid) {
91040
- rdata[rid] = RESULT_TYPE(*state->mode);
91301
+ rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
91041
91302
  } else {
91042
91303
  rmask.Set(rid, false);
91043
91304
  }
@@ -91053,10 +91314,10 @@ struct ModeFunction {
91053
91314
  }
91054
91315
  };
91055
91316
 
91056
- template <typename INPUT_TYPE, typename KEY_TYPE>
91317
+ template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
91057
91318
  AggregateFunction GetTypedModeFunction(const LogicalType &type) {
91058
91319
  using STATE = ModeState<KEY_TYPE>;
91059
- using OP = ModeFunction<KEY_TYPE>;
91320
+ using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
91060
91321
  auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
91061
91322
  func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
91062
91323
  return func;
@@ -91092,7 +91353,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
91092
91353
  return GetTypedModeFunction<interval_t, interval_t>(type);
91093
91354
 
91094
91355
  case PhysicalType::VARCHAR:
91095
- return GetTypedModeFunction<string_t, string>(type);
91356
+ return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
91096
91357
 
91097
91358
  default:
91098
91359
  throw NotImplementedException("Unimplemented mode aggregate");
@@ -93281,21 +93542,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
93281
93542
  case LogicalType::VARCHAR:
93282
93543
  return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
93283
93544
  case LogicalType::TIMESTAMP:
93284
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93545
+ return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
93285
93546
  case LogicalType::TIMESTAMP_TZ:
93286
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93547
+ return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
93287
93548
  case LogicalType::TIMESTAMP_S:
93288
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93549
+ return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
93289
93550
  case LogicalType::TIMESTAMP_MS:
93290
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93551
+ return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
93291
93552
  case LogicalType::TIMESTAMP_NS:
93292
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93553
+ return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
93293
93554
  case LogicalType::TIME:
93294
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93555
+ return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
93295
93556
  case LogicalType::TIME_TZ:
93296
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93557
+ return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
93297
93558
  case LogicalType::DATE:
93298
- return GetMapType<HistogramFunctor, int32_t, IS_ORDERED>(type);
93559
+ return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
93299
93560
  default:
93300
93561
  throw InternalException("Unimplemented histogram aggregate");
93301
93562
  }
@@ -96859,7 +97120,8 @@ struct DateDiff {
96859
97120
  struct WeekOperator {
96860
97121
  template <class TA, class TB, class TR>
96861
97122
  static inline TR Operation(TA startdate, TB enddate) {
96862
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
97123
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
97124
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96863
97125
  }
96864
97126
  };
96865
97127
 
@@ -103243,12 +103505,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
103243
103505
  result, state_vector.state_vector, count);
103244
103506
  break;
103245
103507
  case PhysicalType::INT32:
103246
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103247
- result, state_vector.state_vector, count);
103508
+ if (key_type.id() == LogicalTypeId::DATE) {
103509
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
103510
+ result, state_vector.state_vector, count);
103511
+ } else {
103512
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103513
+ result, state_vector.state_vector, count);
103514
+ }
103248
103515
  break;
103249
103516
  case PhysicalType::INT64:
103250
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103251
- result, state_vector.state_vector, count);
103517
+ switch (key_type.id()) {
103518
+ case LogicalTypeId::TIME:
103519
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
103520
+ result, state_vector.state_vector, count);
103521
+ break;
103522
+ case LogicalTypeId::TIME_TZ:
103523
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
103524
+ result, state_vector.state_vector, count);
103525
+ break;
103526
+ case LogicalTypeId::TIMESTAMP:
103527
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
103528
+ result, state_vector.state_vector, count);
103529
+ break;
103530
+ case LogicalTypeId::TIMESTAMP_MS:
103531
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
103532
+ result, state_vector.state_vector, count);
103533
+ break;
103534
+ case LogicalTypeId::TIMESTAMP_NS:
103535
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
103536
+ result, state_vector.state_vector, count);
103537
+ break;
103538
+ case LogicalTypeId::TIMESTAMP_SEC:
103539
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
103540
+ result, state_vector.state_vector, count);
103541
+ break;
103542
+ case LogicalTypeId::TIMESTAMP_TZ:
103543
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
103544
+ result, state_vector.state_vector, count);
103545
+ break;
103546
+ default:
103547
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103548
+ result, state_vector.state_vector, count);
103549
+ break;
103550
+ }
103252
103551
  break;
103253
103552
  case PhysicalType::FLOAT:
103254
103553
  FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
@@ -104318,18 +104617,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104318
104617
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104319
104618
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104320
104619
  auto count = args.size();
104321
- Vector &lists = args.data[0];
104620
+ Vector &input_lists = args.data[0];
104322
104621
 
104323
104622
  result.SetVectorType(VectorType::FLAT_VECTOR);
104324
104623
  auto &result_validity = FlatVector::Validity(result);
104325
104624
 
104326
- for (auto &v : args.data) {
104327
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104328
- v.Flatten(count);
104329
- }
104330
- }
104331
-
104332
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104625
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104333
104626
  result_validity.SetInvalid(0);
104334
104627
  return;
104335
104628
  }
@@ -104344,15 +104637,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104344
104637
  LocalSortState local_sort_state;
104345
104638
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104346
104639
 
104640
+ // this ensures that we do not change the order of the entries in the input chunk
104641
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104642
+
104347
104643
  // get the child vector
104348
- auto lists_size = ListVector::GetListSize(lists);
104349
- auto &child_vector = ListVector::GetEntry(lists);
104644
+ auto lists_size = ListVector::GetListSize(result);
104645
+ auto &child_vector = ListVector::GetEntry(result);
104350
104646
  UnifiedVectorFormat child_data;
104351
104647
  child_vector.ToUnifiedFormat(lists_size, child_data);
104352
104648
 
104353
104649
  // get the lists data
104354
104650
  UnifiedVectorFormat lists_data;
104355
- lists.ToUnifiedFormat(count, lists_data);
104651
+ result.ToUnifiedFormat(count, lists_data);
104356
104652
  auto list_entries = (list_entry_t *)lists_data.data;
104357
104653
 
104358
104654
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104449,8 +104745,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104449
104745
  child_vector.Flatten(sel_sorted_idx);
104450
104746
  }
104451
104747
 
104452
- result.Reference(lists);
104453
-
104454
104748
  if (args.AllConstant()) {
104455
104749
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104456
104750
  }
@@ -105224,16 +105518,21 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
105224
105518
  auto &map = args.data[0];
105225
105519
  auto &key = args.data[1];
105226
105520
 
105227
- UnifiedVectorFormat offset_data;
105521
+ UnifiedVectorFormat map_keys_data;
105522
+ UnifiedVectorFormat key_data;
105228
105523
 
105229
- auto &children = StructVector::GetEntries(map);
105524
+ auto &map_keys = MapVector::GetKeys(map);
105525
+ auto &map_values = MapVector::GetValues(map);
105526
+
105527
+ map_keys.ToUnifiedFormat(args.size(), map_keys_data);
105528
+ key.ToUnifiedFormat(args.size(), key_data);
105230
105529
 
105231
- children[0]->ToUnifiedFormat(args.size(), offset_data);
105232
105530
  for (idx_t row = 0; row < args.size(); row++) {
105233
- idx_t row_index = offset_data.sel->get_index(row);
105234
- auto key_value = key.GetValue(row_index);
105235
- auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
105236
- auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
105531
+ idx_t row_index = map_keys_data.sel->get_index(row);
105532
+ idx_t key_index = key_data.sel->get_index(row);
105533
+ auto key_value = key.GetValue(key_index);
105534
+ auto offsets = ListVector::Search(map_keys, key_value, row_index);
105535
+ auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
105237
105536
  FillResult(values, result, row);
105238
105537
  }
105239
105538
 
@@ -108128,6 +108427,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
108128
108427
  return left;
108129
108428
  }
108130
108429
 
108430
+ struct BinaryNumericDivideWrapper {
108431
+ template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108432
+ static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
108433
+ if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
108434
+ throw OutOfRangeException("Overflow in division of %d / %d", left, right);
108435
+ } else if (right == 0) {
108436
+ mask.SetInvalid(idx);
108437
+ return left;
108438
+ } else {
108439
+ return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
108440
+ }
108441
+ }
108442
+
108443
+ static bool AddsNulls() {
108444
+ return true;
108445
+ }
108446
+ };
108447
+
108131
108448
  struct BinaryZeroIsNullWrapper {
108132
108449
  template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108133
108450
  static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
@@ -108169,13 +108486,13 @@ template <class OP>
108169
108486
  static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
108170
108487
  switch (type.id()) {
108171
108488
  case LogicalTypeId::TINYINT:
108172
- return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
108489
+ return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
108173
108490
  case LogicalTypeId::SMALLINT:
108174
- return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
108491
+ return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
108175
108492
  case LogicalTypeId::INTEGER:
108176
- return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
108493
+ return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
108177
108494
  case LogicalTypeId::BIGINT:
108178
- return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
108495
+ return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
108179
108496
  case LogicalTypeId::UTINYINT:
108180
108497
  return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
108181
108498
  case LogicalTypeId::USMALLINT:
@@ -114623,11 +114940,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
114623
114940
 
114624
114941
  // current_schemas
114625
114942
  static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
114943
+ if (!input.AllConstant()) {
114944
+ throw NotImplementedException("current_schemas requires a constant input");
114945
+ }
114946
+ if (ConstantVector::IsNull(input.data[0])) {
114947
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
114948
+ ConstantVector::SetNull(result, true);
114949
+ return;
114950
+ }
114951
+ auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
114626
114952
  vector<Value> schema_list;
114627
- vector<string> search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path->Get();
114953
+ auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
114954
+ vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
114628
114955
  std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
114629
114956
  [](const string &s) -> Value { return Value(s); });
114630
- auto val = Value::LIST(schema_list);
114957
+
114958
+ auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
114631
114959
  result.Reference(val);
114632
114960
  }
114633
114961
 
@@ -114926,8 +115254,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
114926
115254
  struct ArrowScanGlobalState : public GlobalTableFunctionState {
114927
115255
  unique_ptr<ArrowArrayStreamWrapper> stream;
114928
115256
  mutex main_mutex;
114929
- bool ready = false;
114930
115257
  idx_t max_threads = 1;
115258
+ bool done = false;
114931
115259
 
114932
115260
  idx_t MaxThreads() const override {
114933
115261
  return max_threads;
@@ -115215,6 +115543,9 @@ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const Func
115215
115543
  bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
115216
115544
  ArrowScanGlobalState &parallel_state) {
115217
115545
  lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
115546
+ if (parallel_state.done) {
115547
+ return false;
115548
+ }
115218
115549
  state.chunk_offset = 0;
115219
115550
 
115220
115551
  auto current_chunk = parallel_state.stream->GetNextChunk();
@@ -115224,6 +115555,7 @@ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind
115224
115555
  state.chunk = move(current_chunk);
115225
115556
  //! have we run out of chunks? we are done
115226
115557
  if (!state.chunk->arrow_array.release) {
115558
+ parallel_state.done = true;
115227
115559
  return false;
115228
115560
  }
115229
115561
  return true;
@@ -117625,6 +117957,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
117625
117957
  table_function.named_parameters["skip"] = LogicalType::BIGINT;
117626
117958
  table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
117627
117959
  table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
117960
+ table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
117628
117961
  }
117629
117962
 
117630
117963
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -121455,8 +121788,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
121455
121788
  // we don't emit any statistics for tables that have outstanding transaction-local data
121456
121789
  return nullptr;
121457
121790
  }
121458
- auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
121459
- return bind_data.table->storage->GetStatistics(context, storage_idx);
121791
+ return bind_data.table->GetStatistics(context, column_id);
121460
121792
  }
121461
121793
 
121462
121794
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -123028,7 +123360,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
123028
123360
  }
123029
123361
  idx_t entry_idx = row / 64;
123030
123362
  idx_t idx_in_entry = row % 64;
123031
- return validity[entry_idx] & (1 << idx_in_entry);
123363
+ return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
123032
123364
  }
123033
123365
 
123034
123366
  void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
@@ -123045,7 +123377,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
123045
123377
  }
123046
123378
  idx_t entry_idx = row / 64;
123047
123379
  idx_t idx_in_entry = row % 64;
123048
- validity[entry_idx] &= ~(1 << idx_in_entry);
123380
+ validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
123049
123381
  }
123050
123382
 
123051
123383
  void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
@@ -123054,7 +123386,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
123054
123386
  }
123055
123387
  idx_t entry_idx = row / 64;
123056
123388
  idx_t idx_in_entry = row % 64;
123057
- validity[entry_idx] |= 1 << idx_in_entry;
123389
+ validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
123058
123390
  }
123059
123391
 
123060
123392
 
@@ -126456,9 +126788,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
126456
126788
  case StatementType::INSERT_STATEMENT:
126457
126789
  case StatementType::DELETE_STATEMENT:
126458
126790
  case StatementType::UPDATE_STATEMENT: {
126459
- auto sql = statement->ToString();
126460
126791
  Parser parser;
126461
- parser.ParseQuery(sql);
126792
+ PreservedError error;
126793
+ try {
126794
+ parser.ParseQuery(statement->ToString());
126795
+ } catch (const Exception &ex) {
126796
+ error = PreservedError(ex);
126797
+ } catch (std::exception &ex) {
126798
+ error = PreservedError(ex);
126799
+ }
126800
+ if (error) {
126801
+ // error in verifying query
126802
+ return make_unique<PendingQueryResult>(error);
126803
+ }
126462
126804
  statement = move(parser.statements[0]);
126463
126805
  break;
126464
126806
  }
@@ -137106,8 +137448,27 @@ namespace duckdb {
137106
137448
  //===--------------------------------------------------------------------===//
137107
137449
  // Install Extension
137108
137450
  //===--------------------------------------------------------------------===//
137451
+ const string ExtensionHelper::NormalizeVersionTag(const string &version_tag) {
137452
+ if (version_tag.length() > 0 && version_tag[0] != 'v') {
137453
+ return "v" + version_tag;
137454
+ }
137455
+ return version_tag;
137456
+ }
137457
+
137458
+ bool ExtensionHelper::IsRelease(const string &version_tag) {
137459
+ return !StringUtil::Contains(version_tag, "-dev");
137460
+ }
137461
+
137462
+ const string ExtensionHelper::GetVersionDirectoryName() {
137463
+ if (IsRelease(DuckDB::LibraryVersion())) {
137464
+ return NormalizeVersionTag(DuckDB::LibraryVersion());
137465
+ } else {
137466
+ return DuckDB::SourceID();
137467
+ }
137468
+ }
137469
+
137109
137470
  const vector<string> ExtensionHelper::PathComponents() {
137110
- return vector<string> {".duckdb", "extensions", DuckDB::SourceID(), DuckDB::Platform()};
137471
+ return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
137111
137472
  }
137112
137473
 
137113
137474
  string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
@@ -137180,7 +137541,7 @@ void ExtensionHelper::InstallExtension(ClientContext &context, const string &ext
137180
137541
  extension_name = "";
137181
137542
  }
137182
137543
 
137183
- auto url = StringUtil::Replace(url_template, "${REVISION}", DuckDB::SourceID());
137544
+ auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
137184
137545
  url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
137185
137546
  url = StringUtil::Replace(url, "${NAME}", extension_name);
137186
137547
 
@@ -141402,7 +141763,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141402
141763
  //===--------------------------------------------------------------------===//
141403
141764
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141404
141765
  auto &config = ClientConfig::GetConfig(context);
141405
- config.home_directory = input.IsNull() ? input.ToString() : string();
141766
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141406
141767
  }
141407
141768
 
141408
141769
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -142358,9 +142719,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
142358
142719
  // Get HLL stats here
142359
142720
  auto actual_binding = relation_column_to_original_column[key];
142360
142721
 
142361
- // sometimes base stats is null (test_709.test) returns null for base stats while
142362
- // there is still a catalog table. Anybody know anything about this?
142363
- auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
142722
+ auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
142364
142723
  if (base_stats) {
142365
142724
  count = base_stats->GetDistinctCount();
142366
142725
  }
@@ -143056,6 +143415,7 @@ private:
143056
143415
 
143057
143416
 
143058
143417
 
143418
+
143059
143419
  namespace duckdb {
143060
143420
 
143061
143421
  class DeliminatorPlanUpdater : LogicalOperatorVisitor {
@@ -143083,7 +143443,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
143083
143443
  cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
143084
143444
  continue;
143085
143445
  }
143086
- auto &colref = (BoundColumnRefExpression &)*cond.right;
143446
+ Expression *rhs = cond.right.get();
143447
+ while (rhs->type == ExpressionType::OPERATOR_CAST) {
143448
+ auto &cast = (BoundCastExpression &)*rhs;
143449
+ rhs = cast.child.get();
143450
+ }
143451
+ if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
143452
+ throw InternalException("Erorr in deliminator: expected a bound column reference");
143453
+ }
143454
+ auto &colref = (BoundColumnRefExpression &)*rhs;
143087
143455
  if (projection_map.find(colref.binding) != projection_map.end()) {
143088
143456
  // value on the right is a projection of removed DelimGet
143089
143457
  for (idx_t i = 0; i < decs->size(); i++) {
@@ -144231,7 +144599,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
144231
144599
  auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
144232
144600
  idx_t equivalence_set = GetEquivalenceSet(node);
144233
144601
  auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
144234
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144602
+ Value constant_value;
144603
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144604
+ return FilterResult::UNSATISFIABLE;
144605
+ }
144235
144606
  if (constant_value.IsNull()) {
144236
144607
  // comparisons with null are always null (i.e. will never result in rows)
144237
144608
  return FilterResult::UNSATISFIABLE;
@@ -144312,7 +144683,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144312
144683
  }
144313
144684
  if (expr->IsFoldable()) {
144314
144685
  // scalar condition, evaluate it
144315
- auto result = ExpressionExecutor::EvaluateScalar(*expr).CastAs(LogicalType::BOOLEAN);
144686
+ Value result;
144687
+ if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
144688
+ return FilterResult::UNSUPPORTED;
144689
+ }
144690
+ result = result.CastAs(LogicalType::BOOLEAN);
144316
144691
  // check if the filter passes
144317
144692
  if (result.IsNull() || !BooleanValue::Get(result)) {
144318
144693
  // the filter does not pass the scalar test, create an empty result
@@ -144336,7 +144711,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144336
144711
 
144337
144712
  if (lower_is_scalar) {
144338
144713
  auto scalar = comparison.lower.get();
144339
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144714
+ Value constant_value;
144715
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144716
+ return FilterResult::UNSUPPORTED;
144717
+ }
144340
144718
 
144341
144719
  // create the ExpressionValueInformation
144342
144720
  ExpressionValueInformation info;
@@ -144369,7 +144747,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144369
144747
 
144370
144748
  if (upper_is_scalar) {
144371
144749
  auto scalar = comparison.upper.get();
144372
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144750
+ Value constant_value;
144751
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144752
+ return FilterResult::UNSUPPORTED;
144753
+ }
144373
144754
 
144374
144755
  // create the ExpressionValueInformation
144375
144756
  ExpressionValueInformation info;
@@ -145281,7 +145662,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
145281
145662
  // IN clause with many children: try to generate a mark join that replaces this IN expression
145282
145663
  // we can only do this if the expressions in the expression list are scalar
145283
145664
  for (idx_t i = 1; i < expr.children.size(); i++) {
145284
- D_ASSERT(expr.children[i]->return_type == in_type);
145285
145665
  if (!expr.children[i]->IsFoldable()) {
145286
145666
  // non-scalar expression
145287
145667
  all_scalar = false;
@@ -147720,21 +148100,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
147720
148100
  FilterPushdown child_pushdown(optimizer);
147721
148101
  for (idx_t i = 0; i < filters.size(); i++) {
147722
148102
  auto &f = *filters[i];
147723
- // check if any aggregate or GROUPING functions are in the set
147724
- if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
147725
- f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
147726
- // no aggregate! we can push this down
147727
- // rewrite any group bindings within the filter
147728
- f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147729
- // add the filter to the child node
147730
- if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147731
- // filter statically evaluates to false, strip tree
147732
- return make_unique<LogicalEmptyResult>(move(op));
148103
+ if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
148104
+ // filter on aggregate: cannot pushdown
148105
+ continue;
148106
+ }
148107
+ if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
148108
+ // filter on GROUPINGS function: cannot pushdown
148109
+ continue;
148110
+ }
148111
+ // if there are any empty grouping sets, we cannot push down filters
148112
+ bool has_empty_grouping_sets = false;
148113
+ for (auto &grp : aggr.grouping_sets) {
148114
+ if (grp.empty()) {
148115
+ has_empty_grouping_sets = true;
147733
148116
  }
147734
- // erase the filter from here
147735
- filters.erase(filters.begin() + i);
147736
- i--;
147737
148117
  }
148118
+ if (has_empty_grouping_sets) {
148119
+ continue;
148120
+ }
148121
+ // no aggregate! we can push this down
148122
+ // rewrite any group bindings within the filter
148123
+ f.filter = ReplaceGroupBindings(aggr, move(f.filter));
148124
+ // add the filter to the child node
148125
+ if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
148126
+ // filter statically evaluates to false, strip tree
148127
+ return make_unique<LogicalEmptyResult>(move(op));
148128
+ }
148129
+ // erase the filter from here
148130
+ filters.erase(filters.begin() + i);
148131
+ i--;
147738
148132
  }
147739
148133
  child_pushdown.GenerateFilters();
147740
148134
 
@@ -152440,6 +152834,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
152440
152834
  } // namespace duckdb
152441
152835
 
152442
152836
 
152837
+ namespace duckdb {
152838
+
152839
+ BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
152840
+ : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
152841
+ }
152842
+
152843
+ BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
152844
+ : Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
152845
+ }
152846
+
152847
+ } // namespace duckdb
152848
+
152849
+
152443
152850
 
152444
152851
 
152445
152852
 
@@ -152559,16 +152966,13 @@ public:
152559
152966
 
152560
152967
 
152561
152968
 
152562
-
152563
152969
  namespace duckdb {
152564
152970
 
152565
- class PipelineEvent : public Event {
152971
+ //! A PipelineEvent is responsible for scheduling a pipeline
152972
+ class PipelineEvent : public BasePipelineEvent {
152566
152973
  public:
152567
152974
  PipelineEvent(shared_ptr<Pipeline> pipeline);
152568
152975
 
152569
- //! The pipeline that this event belongs to
152570
- shared_ptr<Pipeline> pipeline;
152571
-
152572
152976
  public:
152573
152977
  void Schedule() override;
152574
152978
  void FinishEvent() override;
@@ -152696,17 +153100,13 @@ private:
152696
153100
 
152697
153101
 
152698
153102
 
152699
-
152700
153103
  namespace duckdb {
152701
153104
  class Executor;
152702
153105
 
152703
- class PipelineFinishEvent : public Event {
153106
+ class PipelineFinishEvent : public BasePipelineEvent {
152704
153107
  public:
152705
153108
  PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
152706
153109
 
152707
- //! The pipeline that this event belongs to
152708
- shared_ptr<Pipeline> pipeline;
152709
-
152710
153110
  public:
152711
153111
  void Schedule() override;
152712
153112
  void FinishEvent() override;
@@ -152733,6 +153133,9 @@ Executor &Executor::Get(ClientContext &context) {
152733
153133
 
152734
153134
  void Executor::AddEvent(shared_ptr<Event> event) {
152735
153135
  lock_guard<mutex> elock(executor_lock);
153136
+ if (cancelled) {
153137
+ return;
153138
+ }
152736
153139
  events.push_back(move(event));
152737
153140
  }
152738
153141
 
@@ -153036,6 +153439,7 @@ void Executor::CancelTasks() {
153036
153439
  vector<weak_ptr<Pipeline>> weak_references;
153037
153440
  {
153038
153441
  lock_guard<mutex> elock(executor_lock);
153442
+ cancelled = true;
153039
153443
  weak_references.reserve(pipelines.size());
153040
153444
  for (auto &pipeline : pipelines) {
153041
153445
  weak_references.push_back(weak_ptr<Pipeline>(pipeline));
@@ -153112,10 +153516,10 @@ PendingExecutionResult Executor::ExecuteTask() {
153112
153516
  lock_guard<mutex> elock(executor_lock);
153113
153517
  pipelines.clear();
153114
153518
  NextExecutor();
153115
- if (!exceptions.empty()) { // LCOV_EXCL_START
153519
+ if (HasError()) { // LCOV_EXCL_START
153116
153520
  // an exception has occurred executing one of the pipelines
153117
153521
  execution_result = PendingExecutionResult::EXECUTION_ERROR;
153118
- ThrowExceptionInternal();
153522
+ ThrowException();
153119
153523
  } // LCOV_EXCL_STOP
153120
153524
  execution_result = PendingExecutionResult::RESULT_READY;
153121
153525
  return execution_result;
@@ -153124,6 +153528,7 @@ PendingExecutionResult Executor::ExecuteTask() {
153124
153528
  void Executor::Reset() {
153125
153529
  lock_guard<mutex> elock(executor_lock);
153126
153530
  physical_plan = nullptr;
153531
+ cancelled = false;
153127
153532
  owned_plan.reset();
153128
153533
  root_executor.reset();
153129
153534
  root_pipelines.clear();
@@ -153160,7 +153565,7 @@ vector<LogicalType> Executor::GetTypes() {
153160
153565
  }
153161
153566
 
153162
153567
  void Executor::PushError(PreservedError exception) {
153163
- lock_guard<mutex> elock(executor_lock);
153568
+ lock_guard<mutex> elock(error_lock);
153164
153569
  // interrupt execution of any other pipelines that belong to this executor
153165
153570
  context.interrupted = true;
153166
153571
  // push the exception onto the stack
@@ -153168,20 +153573,16 @@ void Executor::PushError(PreservedError exception) {
153168
153573
  }
153169
153574
 
153170
153575
  bool Executor::HasError() {
153171
- lock_guard<mutex> elock(executor_lock);
153576
+ lock_guard<mutex> elock(error_lock);
153172
153577
  return !exceptions.empty();
153173
153578
  }
153174
153579
 
153175
153580
  void Executor::ThrowException() {
153176
- lock_guard<mutex> elock(executor_lock);
153177
- ThrowExceptionInternal();
153178
- }
153179
-
153180
- void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
153581
+ lock_guard<mutex> elock(error_lock);
153181
153582
  D_ASSERT(!exceptions.empty());
153182
153583
  auto &entry = exceptions[0];
153183
153584
  entry.Throw();
153184
- } // LCOV_EXCL_STOP
153585
+ }
153185
153586
 
153186
153587
  void Executor::Flush(ThreadContext &tcontext) {
153187
153588
  profiler->Flush(tcontext.profiler);
@@ -153446,6 +153847,9 @@ void Pipeline::Ready() {
153446
153847
  }
153447
153848
 
153448
153849
  void Pipeline::Finalize(Event &event) {
153850
+ if (executor.HasError()) {
153851
+ return;
153852
+ }
153449
153853
  D_ASSERT(ready);
153450
153854
  try {
153451
153855
  auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
@@ -153556,16 +153960,25 @@ void PipelineCompleteEvent::FinalizeFinish() {
153556
153960
  } // namespace duckdb
153557
153961
 
153558
153962
 
153963
+
153559
153964
  namespace duckdb {
153560
153965
 
153561
- PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
153562
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
153966
+ PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153563
153967
  }
153564
153968
 
153565
153969
  void PipelineEvent::Schedule() {
153566
153970
  auto event = shared_from_this();
153567
- pipeline->Schedule(event);
153568
- D_ASSERT(total_tasks > 0);
153971
+ auto &executor = pipeline->executor;
153972
+ try {
153973
+ pipeline->Schedule(event);
153974
+ D_ASSERT(total_tasks > 0);
153975
+ } catch (Exception &ex) {
153976
+ executor.PushError(PreservedError(ex));
153977
+ } catch (std::exception &ex) {
153978
+ executor.PushError(PreservedError(ex));
153979
+ } catch (...) { // LCOV_EXCL_START
153980
+ executor.PushError(PreservedError("Unknown exception in Finalize!"));
153981
+ } // LCOV_EXCL_STOP
153569
153982
  }
153570
153983
 
153571
153984
  void PipelineEvent::FinishEvent() {
@@ -153948,8 +154361,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
153948
154361
 
153949
154362
  namespace duckdb {
153950
154363
 
153951
- PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
153952
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
154364
+ PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153953
154365
  }
153954
154366
 
153955
154367
  void PipelineFinishEvent::Schedule() {
@@ -167484,7 +167896,7 @@ string QueryNode::ResultModifiersToString() const {
167484
167896
  } else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
167485
167897
  auto &limit_p_modifier = (LimitPercentModifier &)modifier;
167486
167898
  if (limit_p_modifier.limit) {
167487
- result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
167899
+ result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
167488
167900
  }
167489
167901
  if (limit_p_modifier.offset) {
167490
167902
  result += " OFFSET " + limit_p_modifier.offset->ToString();
@@ -171360,7 +171772,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause,
171360
171772
  }
171361
171773
  // we need a query
171362
171774
  if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) {
171363
- throw InternalException("A CTE needs a SELECT");
171775
+ throw NotImplementedException("A CTE needs a SELECT");
171364
171776
  }
171365
171777
 
171366
171778
  // CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings
@@ -171777,7 +172189,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n
171777
172189
 
171778
172190
  result_type = LogicalType::MAP(move(children));
171779
172191
  } else {
171780
- int8_t width, scale;
172192
+ int64_t width, scale;
171781
172193
  if (base_type == LogicalTypeId::DECIMAL) {
171782
172194
  // default decimal width/scale
171783
172195
  width = 18;
@@ -174956,6 +175368,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174956
175368
  // we didn't bind columns, try again in children
174957
175369
  return BindResult(error);
174958
175370
  }
175371
+ } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
175372
+ return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
174959
175373
  }
174960
175374
  if (!filter_error.empty()) {
174961
175375
  return BindResult(filter_error);
@@ -174963,8 +175377,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174963
175377
 
174964
175378
  if (aggr.filter) {
174965
175379
  auto &child = (BoundExpression &)*aggr.filter;
174966
- bound_filter = move(child.expr);
175380
+ bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
174967
175381
  }
175382
+
174968
175383
  // all children bound successfully
174969
175384
  // extract the children and types
174970
175385
  vector<LogicalType> types;
@@ -176117,7 +176532,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
176117
176532
  string error =
176118
176533
  MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
176119
176534
  if (!error.empty()) {
176120
- return BindResult(binder.FormatError(*expr->get(), error));
176535
+ throw BinderException(binder.FormatError(*expr->get(), error));
176121
176536
  }
176122
176537
 
176123
176538
  // create a MacroBinding to bind this macro's parameters to its arguments
@@ -177140,10 +177555,13 @@ public:
177140
177555
  public:
177141
177556
  unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
177142
177557
 
177143
- idx_t MaxCount() {
177558
+ idx_t MaxCount() const {
177144
177559
  return max_count;
177145
177560
  }
177146
177561
 
177562
+ bool HasExtraList() const {
177563
+ return extra_list;
177564
+ }
177147
177565
  unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
177148
177566
 
177149
177567
  private:
@@ -177185,6 +177603,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177185
177603
  Value &delimiter_value) {
177186
177604
  auto new_binder = Binder::CreateBinder(context, this, true);
177187
177605
  if (delimiter->HasSubquery()) {
177606
+ if (!order_binder.HasExtraList()) {
177607
+ throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
177608
+ }
177188
177609
  return order_binder.CreateExtraReference(move(delimiter));
177189
177610
  }
177190
177611
  ExpressionBinder expr_binder(*new_binder, context);
@@ -177195,6 +177616,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177195
177616
  delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
177196
177617
  return nullptr;
177197
177618
  }
177619
+ // move any correlated columns to this binder
177620
+ MoveCorrelatedExpressions(*new_binder);
177198
177621
  return expr;
177199
177622
  }
177200
177623
 
@@ -179798,11 +180221,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179798
180221
  BindDefaultValues(base.columns, result->bound_defaults);
179799
180222
  }
179800
180223
 
180224
+ idx_t regular_column_count = 0;
179801
180225
  // bind collations to detect any unsupported collation errors
179802
180226
  for (auto &column : base.columns) {
179803
180227
  if (column.Generated()) {
179804
180228
  continue;
179805
180229
  }
180230
+ regular_column_count++;
179806
180231
  if (column.Type().id() == LogicalTypeId::VARCHAR) {
179807
180232
  ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
179808
180233
  }
@@ -179814,6 +180239,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179814
180239
  result->dependencies.insert(type_dependency);
179815
180240
  }
179816
180241
  }
180242
+ if (regular_column_count == 0) {
180243
+ throw BinderException("Creating a table without physical (non-generated) columns is not supported");
180244
+ }
179817
180245
  properties.allow_stream_result = false;
179818
180246
  return result;
179819
180247
  }
@@ -180241,6 +180669,13 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
180241
180669
  info->schema = table->schema->name;
180242
180670
  info->table = table->name;
180243
180671
 
180672
+ // We can not export generated columns
180673
+ for (auto &col : table->columns) {
180674
+ if (!col.Generated()) {
180675
+ info->select_list.push_back(col.GetName());
180676
+ }
180677
+ }
180678
+
180244
180679
  exported_data.table_name = info->table;
180245
180680
  exported_data.schema_name = info->schema;
180246
180681
  exported_data.file_path = info->file_path;
@@ -182282,7 +182717,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundEmptyTableRef &ref) {
182282
182717
  namespace duckdb {
182283
182718
 
182284
182719
  unique_ptr<LogicalOperator> Binder::CreatePlan(BoundExpressionListRef &ref) {
182285
- auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(0);
182720
+ auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(GenerateTableIndex());
182286
182721
  // values list, first plan any subqueries in the list
182287
182722
  for (auto &expr_list : ref.values) {
182288
182723
  for (auto &expr : expr_list) {
@@ -184835,7 +185270,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
184835
185270
  case ExpressionClass::COLUMN_REF:
184836
185271
  return BindResult(clause + " cannot contain column names");
184837
185272
  case ExpressionClass::SUBQUERY:
184838
- return BindResult(clause + " cannot contain subqueries");
185273
+ throw BinderException(clause + " cannot contain subqueries");
184839
185274
  case ExpressionClass::DEFAULT:
184840
185275
  return BindResult(clause + " cannot contain DEFAULT clause");
184841
185276
  case ExpressionClass::WINDOW:
@@ -185095,6 +185530,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
185095
185530
  }
185096
185531
 
185097
185532
  unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
185533
+ if (!extra_list) {
185534
+ throw InternalException("CreateExtraReference called without extra_list");
185535
+ }
185098
185536
  auto result = CreateProjectionReference(*expr, extra_list->size());
185099
185537
  extra_list->push_back(move(expr));
185100
185538
  return result;
@@ -196907,7 +197345,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
196907
197345
  }
196908
197346
 
196909
197347
  // Alter column to add new constraint
196910
- DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<Constraint> constraint)
197348
+ DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
196911
197349
  : info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
196912
197350
  is_root(true) {
196913
197351
 
@@ -197082,7 +197520,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
197082
197520
 
197083
197521
  bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
197084
197522
  const vector<column_t> &column_ids) {
197085
- while (state.current_row_group) {
197523
+ while (state.current_row_group && state.current_row_group->count > 0) {
197086
197524
  idx_t vector_index;
197087
197525
  idx_t max_row;
197088
197526
  if (ClientConfig::GetConfig(context).verify_parallelism) {
@@ -197096,13 +197534,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
197096
197534
  max_row = state.current_row_group->start + state.current_row_group->count;
197097
197535
  }
197098
197536
  max_row = MinValue<idx_t>(max_row, state.max_row);
197099
- bool need_to_scan;
197100
- if (state.current_row_group->count == 0) {
197101
- need_to_scan = false;
197102
- } else {
197103
- need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197104
- state.current_row_group, vector_index, max_row);
197105
- }
197537
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197538
+ state.current_row_group, vector_index, max_row);
197106
197539
  if (ClientConfig::GetConfig(context).verify_parallelism) {
197107
197540
  state.vector_index++;
197108
197541
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -197361,14 +197794,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
197361
197794
  VerifyForeignKeyConstraint(bfk, context, chunk, false);
197362
197795
  }
197363
197796
 
197364
- void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const Constraint *constraint) {
197797
+ void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
197365
197798
  if (constraint->type != ConstraintType::NOT_NULL) {
197366
197799
  throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
197367
197800
  }
197368
197801
  // scan the original table, check if there's any null value
197369
- auto &not_null_constraint = (NotNullConstraint &)*constraint;
197802
+ auto &not_null_constraint = (BoundNotNullConstraint &)*constraint;
197370
197803
  auto &transaction = Transaction::GetTransaction(context);
197371
197804
  vector<LogicalType> scan_types;
197805
+ D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
197372
197806
  scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
197373
197807
  DataChunk scan_chunk;
197374
197808
  auto &allocator = Allocator::Get(context);
@@ -198125,6 +198559,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
198125
198559
  return nullptr;
198126
198560
  }
198127
198561
  lock_guard<mutex> stats_guard(stats_lock);
198562
+ if (column_id >= column_stats.size()) {
198563
+ throw InternalException("Call to GetStatistics is out of range");
198564
+ }
198128
198565
  return column_stats[column_id]->stats->Copy();
198129
198566
  }
198130
198567
 
@@ -200939,6 +201376,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
200939
201376
  }
200940
201377
  // after verifying that there are no conflicts we mark the tuple as deleted
200941
201378
  deleted[rows[i]] = transaction.transaction_id;
201379
+ rows[deleted_tuples] = rows[i];
200942
201380
  deleted_tuples++;
200943
201381
  }
200944
201382
  return deleted_tuples;
@@ -201266,6 +201704,8 @@ public:
201266
201704
  idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override;
201267
201705
  idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
201268
201706
 
201707
+ void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override;
201708
+
201269
201709
  void InitializeAppend(ColumnAppendState &state) override;
201270
201710
  void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override;
201271
201711
  void RevertAppend(row_t start_row) override;
@@ -203845,9 +204285,15 @@ void VersionDeleteState::Flush() {
203845
204285
  return;
203846
204286
  }
203847
204287
  // delete in the current info
203848
- delete_count += current_info->Delete(transaction, rows, count);
203849
- // now push the delete into the undo buffer
203850
- transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
204288
+ // it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
204289
+ // in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
204290
+ // this is returned in the actual_delete_count
204291
+ auto actual_delete_count = current_info->Delete(transaction, rows, count);
204292
+ delete_count += actual_delete_count;
204293
+ if (actual_delete_count > 0) {
204294
+ // now push the delete into the undo buffer, but only if any deletes were actually performed
204295
+ transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
204296
+ }
203851
204297
  count = 0;
203852
204298
  }
203853
204299
 
@@ -204224,6 +204670,15 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t
204224
204670
  return scan_count;
204225
204671
  }
204226
204672
 
204673
+ void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
204674
+ validity.Skip(state.child_states[0], count);
204675
+
204676
+ // skip inside the sub-columns
204677
+ for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
204678
+ sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count);
204679
+ }
204680
+ }
204681
+
204227
204682
  void StructColumnData::InitializeAppend(ColumnAppendState &state) {
204228
204683
  ColumnAppendState validity_append;
204229
204684
  validity.InitializeAppend(validity_append);
@@ -206683,6 +207138,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
206683
207138
 
206684
207139
  void CleanupState::CleanupDelete(DeleteInfo *info) {
206685
207140
  auto version_table = info->table;
207141
+ D_ASSERT(version_table->info->cardinality >= info->count);
206686
207142
  version_table->info->cardinality -= info->count;
206687
207143
  if (version_table->info->indexes.Empty()) {
206688
207144
  // this table has no indexes: no cleanup to be done
@@ -260108,49 +260564,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
260108
260564
  }
260109
260565
  }
260110
260566
 
260111
- UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260112
- UnicodeType type = UnicodeType::ASCII;
260113
- char c;
260114
- for (size_t i = 0; i < len; i++) {
260115
- c = s[i];
260116
- if (c == '\0') {
260117
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260118
- return UnicodeType::INVALID;
260119
- }
260120
- // 1 Byte / ASCII
260121
- if ((c & 0x80) == 0) {
260122
- continue;
260123
- }
260124
- type = UnicodeType::UNICODE;
260125
- if ((s[++i] & 0xC0) != 0x80) {
260126
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260127
- return UnicodeType::INVALID;
260128
- }
260129
- if ((c & 0xE0) == 0xC0) {
260130
- continue;
260131
- }
260132
- if ((s[++i] & 0xC0) != 0x80) {
260133
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260134
- return UnicodeType::INVALID;
260135
- }
260136
- if ((c & 0xF0) == 0xE0) {
260137
- continue;
260138
- }
260139
- if ((s[++i] & 0xC0) != 0x80) {
260567
+ template <const int nextra_bytes, const int mask>
260568
+ static inline UnicodeType
260569
+ UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
260570
+ const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260571
+ if ((len - i) < (nextra_bytes + 1)) {
260572
+ /* incomplete byte sequence */
260573
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
260574
+ return UnicodeType::INVALID;
260575
+ }
260576
+ for (size_t j = 0 ; j < nextra_bytes; j++) {
260577
+ int c = (int) s[++i];
260578
+ /* now validate the extra bytes */
260579
+ if ((c & 0xC0) != 0x80) {
260580
+ /* extra byte is not in the format 10xxxxxx */
260140
260581
  AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260141
260582
  return UnicodeType::INVALID;
260142
260583
  }
260143
- if ((c & 0xF8) == 0xF0) {
260144
- continue;
260145
- }
260146
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260584
+ utf8char = (utf8char << 6) | (c & 0x3F);
260585
+ }
260586
+ if ((utf8char & mask) == 0) {
260587
+ /* invalid UTF-8 codepoint, not shortest possible */
260588
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260589
+ return UnicodeType::INVALID;
260590
+ }
260591
+ if (utf8char > 0x10FFFF) {
260592
+ /* value not representable by Unicode */
260593
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260594
+ return UnicodeType::INVALID;
260595
+ }
260596
+ if ((utf8char & 0x1FFF800) == 0xD800) {
260597
+ /* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
260598
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260147
260599
  return UnicodeType::INVALID;
260148
260600
  }
260601
+ return UnicodeType::UNICODE;
260602
+ }
260603
+
260604
+ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260605
+ UnicodeType type = UnicodeType::ASCII;
260606
+
260607
+ for (size_t i = 0; i < len; i++) {
260608
+ int c = (int) s[i];
260149
260609
 
260610
+ if ((c & 0x80) == 0) {
260611
+ /* 1 byte sequence */
260612
+ if (c == '\0') {
260613
+ /* NULL byte not allowed */
260614
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260615
+ return UnicodeType::INVALID;
260616
+ }
260617
+ } else {
260618
+ int first_pos_seq = i;
260619
+
260620
+ if ((c & 0xE0) == 0xC0) {
260621
+ /* 2 byte sequence */
260622
+ int utf8char = c & 0x1F;
260623
+ type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260624
+ } else if ((c & 0xF0) == 0xE0) {
260625
+ /* 3 byte sequence */
260626
+ int utf8char = c & 0x0F;
260627
+ type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260628
+ } else if ((c & 0xF8) == 0xF0) {
260629
+ /* 4 byte sequence */
260630
+ int utf8char = c & 0x07;
260631
+ type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260632
+ } else {
260633
+ /* invalid UTF-8 start byte */
260634
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260635
+ return UnicodeType::INVALID;
260636
+ }
260637
+ if (type == UnicodeType::INVALID) {
260638
+ return type;
260639
+ }
260640
+ }
260641
+ }
260150
260642
  return type;
260151
260643
  }
260152
260644
 
260153
-
260154
260645
  char* Utf8Proc::Normalize(const char *s, size_t len) {
260155
260646
  assert(s);
260156
260647
  assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
@@ -322230,6 +322721,8 @@ exit:
322230
322721
  // See the end of this file for a list
322231
322722
 
322232
322723
 
322724
+ // otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context
322725
+ #define MBEDTLS_ALLOW_PRIVATE_ACCESS
322233
322726
 
322234
322727
 
322235
322728