duckdb 0.5.1-dev29.0 → 0.5.1-dev291.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
632
+
633
+
634
+
635
+ namespace duckdb {
623
636
 
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -3667,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
3667
3763
  return count;
3668
3764
  }
3669
3765
 
3766
+ unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
3767
+ if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
3768
+ return nullptr;
3769
+ }
3770
+ if (column_id >= columns.size()) {
3771
+ throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
3772
+ }
3773
+ if (columns[column_id].Generated()) {
3774
+ return nullptr;
3775
+ }
3776
+ return storage->GetStatistics(context, columns[column_id].StorageOid());
3777
+ }
3778
+
3670
3779
  unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
3671
3780
  D_ASSERT(!internal);
3672
3781
  if (info->type != AlterType::ALTER_TABLE) {
@@ -3734,6 +3843,9 @@ static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
3734
3843
 
3735
3844
  unique_ptr<CatalogEntry> TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) {
3736
3845
  auto rename_idx = GetColumnIndex(info.old_name);
3846
+ if (rename_idx == COLUMN_IDENTIFIER_ROW_ID) {
3847
+ throw CatalogException("Cannot rename rowid column");
3848
+ }
3737
3849
  auto create_info = make_unique<CreateTableInfo>(schema->name, name);
3738
3850
  create_info->temporary = temporary;
3739
3851
  for (idx_t i = 0; i < columns.size(); i++) {
@@ -3836,6 +3948,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
3836
3948
  unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) {
3837
3949
  auto removed_index = GetColumnIndex(info.removed_column, info.if_column_exists);
3838
3950
  if (removed_index == DConstants::INVALID_INDEX) {
3951
+ if (!info.if_column_exists) {
3952
+ throw CatalogException("Cannot drop column: rowid column cannot be dropped");
3953
+ }
3839
3954
  return nullptr;
3840
3955
  }
3841
3956
 
@@ -3942,7 +4057,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
3942
4057
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3943
4058
  storage);
3944
4059
  }
3945
- auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
4060
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
3946
4061
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
3947
4062
  new_storage);
3948
4063
  }
@@ -3950,13 +4065,18 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
3950
4065
  unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) {
3951
4066
  auto create_info = make_unique<CreateTableInfo>(schema->name, name);
3952
4067
  auto default_idx = GetColumnIndex(info.column_name);
4068
+ if (default_idx == COLUMN_IDENTIFIER_ROW_ID) {
4069
+ throw CatalogException("Cannot SET DEFAULT for rowid column");
4070
+ }
3953
4071
 
3954
4072
  // Copy all the columns, changing the value of the one that was specified by 'column_name'
3955
4073
  for (idx_t i = 0; i < columns.size(); i++) {
3956
4074
  auto copy = columns[i].Copy();
3957
4075
  if (default_idx == i) {
3958
4076
  // set the default value of this column
3959
- D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
4077
+ if (copy.Generated()) {
4078
+ throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
4079
+ }
3960
4080
  copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
3961
4081
  }
3962
4082
  create_info->columns.push_back(move(copy));
@@ -3981,6 +4101,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
3981
4101
  }
3982
4102
 
3983
4103
  idx_t not_null_idx = GetColumnIndex(info.column_name);
4104
+ if (columns[not_null_idx].Generated()) {
4105
+ throw BinderException("Unsupported constraint for generated column!");
4106
+ }
3984
4107
  bool has_not_null = false;
3985
4108
  for (idx_t i = 0; i < constraints.size(); i++) {
3986
4109
  auto constraint = constraints[i]->Copy();
@@ -4004,8 +4127,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
4004
4127
  storage);
4005
4128
  }
4006
4129
 
4007
- // Return with new storage info
4008
- auto new_storage = make_shared<DataTable>(context, *storage, make_unique<NotNullConstraint>(not_null_idx));
4130
+ // Return with new storage info. Note that we need the bound column index here.
4131
+ auto new_storage = make_shared<DataTable>(context, *storage,
4132
+ make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
4009
4133
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4010
4134
  new_storage);
4011
4135
  }
@@ -4111,12 +4235,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
4111
4235
  auto expression = info.expression->Copy();
4112
4236
  auto bound_expression = expr_binder.Bind(expression);
4113
4237
  auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
4238
+ vector<column_t> storage_oids;
4114
4239
  if (bound_columns.empty()) {
4115
- bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID);
4240
+ storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
4241
+ }
4242
+ // transform to storage_oid
4243
+ else {
4244
+ for (idx_t i = 0; i < bound_columns.size(); i++) {
4245
+ storage_oids.push_back(columns[bound_columns[i]].StorageOid());
4246
+ }
4116
4247
  }
4117
4248
 
4118
- auto new_storage =
4119
- make_shared<DataTable>(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression);
4249
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
4250
+ move(storage_oids), *bound_expression);
4120
4251
  auto result =
4121
4252
  make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
4122
4253
  return move(result);
@@ -4364,7 +4495,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
4364
4495
  }
4365
4496
  }
4366
4497
  D_ASSERT(removed_index != DConstants::INVALID_INDEX);
4367
- storage->CommitDropColumn(removed_index);
4498
+ storage->CommitDropColumn(columns[removed_index].StorageOid());
4368
4499
  }
4369
4500
 
4370
4501
  void TableCatalogEntry::CommitDrop() {
@@ -4934,11 +5065,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
4934
5065
  throw CatalogException(rename_err_msg, original_name, value->name);
4935
5066
  }
4936
5067
  }
5068
+ }
5069
+
5070
+ if (value->name != original_name) {
5071
+ // Do PutMapping and DeleteMapping after dependency check
4937
5072
  PutMapping(context, value->name, entry_index);
4938
5073
  DeleteMapping(context, original_name);
4939
5074
  }
4940
- //! Check the dependency manager to verify that there are no conflicting dependencies with this alter
4941
- catalog.dependency_manager->AlterObject(context, entry, value.get());
4942
5075
 
4943
5076
  value->timestamp = transaction.transaction_id;
4944
5077
  value->child = move(entries[entry_index]);
@@ -4950,10 +5083,18 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
4950
5083
  alter_info->Serialize(serializer);
4951
5084
  BinaryData serialized_alter = serializer.GetData();
4952
5085
 
5086
+ auto new_entry = value.get();
5087
+
4953
5088
  // push the old entry in the undo buffer for this transaction
4954
5089
  transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size);
4955
5090
  entries[entry_index] = move(value);
4956
5091
 
5092
+ // Check the dependency manager to verify that there are no conflicting dependencies with this alter
5093
+ // Note that we do this AFTER the new entry has been entirely set up in the catalog set
5094
+ // that is because in case the alter fails because of a dependency conflict, we need to be able to cleanly roll back
5095
+ // to the old entry.
5096
+ catalog.dependency_manager->AlterObject(context, entry, new_entry);
5097
+
4957
5098
  return true;
4958
5099
  }
4959
5100
 
@@ -6506,7 +6647,7 @@ static void GetBitPosition(idx_t row_idx, idx_t &current_byte, uint8_t &current_
6506
6647
  }
6507
6648
 
6508
6649
  static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
6509
- data[current_byte] &= ~(1 << current_bit);
6650
+ data[current_byte] &= ~((uint64_t)1 << current_bit);
6510
6651
  }
6511
6652
 
6512
6653
  static void NextBit(idx_t &current_byte, uint8_t &current_bit) {
@@ -9450,6 +9591,8 @@ void Exception::ThrowAsTypeWithMessage(ExceptionType type, const string &message
9450
9591
  throw ParameterNotAllowedException(message);
9451
9592
  case ExceptionType::PARAMETER_NOT_RESOLVED:
9452
9593
  throw ParameterNotResolvedException();
9594
+ case ExceptionType::FATAL:
9595
+ throw FatalException(message);
9453
9596
  default:
9454
9597
  throw Exception(type, message);
9455
9598
  }
@@ -16715,9 +16858,15 @@ string FileSystem::ConvertSeparators(const string &path) {
16715
16858
  }
16716
16859
 
16717
16860
  string FileSystem::ExtractBaseName(const string &path) {
16861
+ if (path.empty()) {
16862
+ return string();
16863
+ }
16718
16864
  auto normalized_path = ConvertSeparators(path);
16719
16865
  auto sep = PathSeparator();
16720
- auto vec = StringUtil::Split(StringUtil::Split(normalized_path, sep).back(), ".");
16866
+ auto splits = StringUtil::Split(normalized_path, sep);
16867
+ D_ASSERT(!splits.empty());
16868
+ auto vec = StringUtil::Split(splits.back(), ".");
16869
+ D_ASSERT(!vec.empty());
16721
16870
  return vec[0];
16722
16871
  }
16723
16872
 
@@ -18792,7 +18941,8 @@ namespace duckdb {
18792
18941
 
18793
18942
  static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18794
18943
  unordered_map<string, column_t> &column_map,
18795
- bool filename_col, bool hive_partition_cols) {
18944
+ duckdb_re2::RE2 &compiled_regex, bool filename_col,
18945
+ bool hive_partition_cols) {
18796
18946
  unordered_map<column_t, string> result;
18797
18947
 
18798
18948
  if (filename_col) {
@@ -18803,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18803
18953
  }
18804
18954
 
18805
18955
  if (hive_partition_cols) {
18806
- auto partitions = HivePartitioning::Parse(filename);
18956
+ auto partitions = HivePartitioning::Parse(filename, compiled_regex);
18807
18957
  for (auto &partition : partitions) {
18808
18958
  auto lookup_column_id = column_map.find(partition.first);
18809
18959
  if (lookup_column_id != column_map.end()) {
@@ -18841,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
18841
18991
  // - s3://bucket/var1=value1/bla/bla/var2=value2
18842
18992
  // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
18843
18993
  // - folder/folder/folder/../var1=value1/etc/.//var2=value2
18844
- std::map<string, string> HivePartitioning::Parse(string &filename) {
18845
- std::map<string, string> result;
18994
+ const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18846
18995
 
18847
- string regex = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18996
+ std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 &regex) {
18997
+ std::map<string, string> result;
18848
18998
  duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
18849
18999
 
18850
19000
  string var;
@@ -18855,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
18855
19005
  return result;
18856
19006
  }
18857
19007
 
19008
+ std::map<string, string> HivePartitioning::Parse(string &filename) {
19009
+ duckdb_re2::RE2 regex(REGEX_STRING);
19010
+ return Parse(filename, regex);
19011
+ }
19012
+
18858
19013
  // TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
18859
19014
  // currently, only expressions that cannot be evaluated during pushdown are removed.
18860
19015
  void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
@@ -18862,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
18862
19017
  bool hive_enabled, bool filename_enabled) {
18863
19018
  vector<string> pruned_files;
18864
19019
  vector<unique_ptr<Expression>> pruned_filters;
19020
+ duckdb_re2::RE2 regex(REGEX_STRING);
18865
19021
 
18866
19022
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
18867
19023
  return;
@@ -18870,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
18870
19026
  for (idx_t i = 0; i < files.size(); i++) {
18871
19027
  auto &file = files[i];
18872
19028
  bool should_prune_file = false;
18873
- auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
19029
+ auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
18874
19030
 
18875
19031
  FilterCombiner combiner;
18876
19032
  for (auto &filter : filters) {
@@ -19102,6 +19258,8 @@ private:
19102
19258
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
19103
19259
  void SetFilePointer(FileHandle &handle, idx_t location);
19104
19260
  idx_t GetFilePointer(FileHandle &handle);
19261
+
19262
+ vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
19105
19263
  };
19106
19264
 
19107
19265
  } // namespace duckdb
@@ -19983,6 +20141,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
19983
20141
  });
19984
20142
  }
19985
20143
 
20144
+ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
20145
+ vector<string> result;
20146
+ if (FileExists(path) || IsPipe(path)) {
20147
+ result.push_back(path);
20148
+ } else if (!absolute_path) {
20149
+ Value value;
20150
+ if (opener->TryGetCurrentSetting("file_search_path", value)) {
20151
+ auto search_paths_str = value.ToString();
20152
+ std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20153
+ for (const auto &search_path : search_paths) {
20154
+ auto joined_path = JoinPath(search_path, path);
20155
+ if (FileExists(joined_path) || IsPipe(joined_path)) {
20156
+ result.push_back(joined_path);
20157
+ }
20158
+ }
20159
+ }
20160
+ }
20161
+ return result;
20162
+ }
20163
+
19986
20164
  vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
19987
20165
  if (path.empty()) {
19988
20166
  return vector<string>();
@@ -20029,23 +20207,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20029
20207
  // Check if the path has a glob at all
20030
20208
  if (!HasGlob(path)) {
20031
20209
  // no glob: return only the file (if it exists or is a pipe)
20032
- vector<string> result;
20033
- if (FileExists(path) || IsPipe(path)) {
20034
- result.push_back(path);
20035
- } else if (!absolute_path) {
20036
- Value value;
20037
- if (opener->TryGetCurrentSetting("file_search_path", value)) {
20038
- auto search_paths_str = value.ToString();
20039
- std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20040
- for (const auto &search_path : search_paths) {
20041
- auto joined_path = JoinPath(search_path, path);
20042
- if (FileExists(joined_path) || IsPipe(joined_path)) {
20043
- result.push_back(joined_path);
20044
- }
20045
- }
20046
- }
20047
- }
20048
- return result;
20210
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20049
20211
  }
20050
20212
  vector<string> previous_directories;
20051
20213
  if (absolute_path) {
@@ -20079,7 +20241,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20079
20241
  }
20080
20242
  }
20081
20243
  }
20082
- if (is_last_chunk || result.empty()) {
20244
+ if (result.empty()) {
20245
+ // no result found that matches the glob
20246
+ // last ditch effort: search the path as a string literal
20247
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20248
+ }
20249
+ if (is_last_chunk) {
20083
20250
  return result;
20084
20251
  }
20085
20252
  previous_directories = move(result);
@@ -22428,14 +22595,16 @@ struct IntervalToStringCast {
22428
22595
  if (micros < 0) {
22429
22596
  // negative time: append negative sign
22430
22597
  buffer[length++] = '-';
22598
+ } else {
22431
22599
  micros = -micros;
22432
22600
  }
22433
- int64_t hour = micros / Interval::MICROS_PER_HOUR;
22434
- micros -= hour * Interval::MICROS_PER_HOUR;
22435
- int64_t min = micros / Interval::MICROS_PER_MINUTE;
22436
- micros -= min * Interval::MICROS_PER_MINUTE;
22437
- int64_t sec = micros / Interval::MICROS_PER_SEC;
22438
- micros -= sec * Interval::MICROS_PER_SEC;
22601
+ int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
22602
+ micros += hour * Interval::MICROS_PER_HOUR;
22603
+ int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
22604
+ micros += min * Interval::MICROS_PER_MINUTE;
22605
+ int64_t sec = -(micros / Interval::MICROS_PER_SEC);
22606
+ micros += sec * Interval::MICROS_PER_SEC;
22607
+ micros = -micros;
22439
22608
 
22440
22609
  if (hour < 10) {
22441
22610
  buffer[length++] = '0';
@@ -28558,7 +28727,7 @@ template <idx_t radix_bits>
28558
28727
  struct RadixPartitioningConstants {
28559
28728
  public:
28560
28729
  static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
28561
- static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
28730
+ static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
28562
28731
  static constexpr const idx_t TMP_BUF_SIZE = 8;
28563
28732
 
28564
28733
  public:
@@ -28576,7 +28745,7 @@ private:
28576
28745
  struct RadixPartitioning {
28577
28746
  public:
28578
28747
  static idx_t NumberOfPartitions(idx_t radix_bits) {
28579
- return 1 << radix_bits;
28748
+ return (idx_t)1 << radix_bits;
28580
28749
  }
28581
28750
 
28582
28751
  //! Partition the data in block_collection/string_heap to multiple partitions
@@ -33336,6 +33505,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33505
 
33337
33506
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33507
  #ifdef DEBUG
33508
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33509
+ return;
33510
+ }
33339
33511
  idx_t entry_idx;
33340
33512
  idx_t idx_in_entry;
33341
33513
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35845,10 @@ struct SortConstants {
35673
35845
 
35674
35846
  struct SortLayout {
35675
35847
  public:
35848
+ SortLayout() {
35849
+ }
35676
35850
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35851
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35852
 
35678
35853
  public:
35679
35854
  idx_t column_count;
@@ -37324,6 +37499,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37499
  blob_layout.Initialize(blob_layout_types);
37325
37500
  }
37326
37501
 
37502
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37503
+ SortLayout result;
37504
+ result.column_count = num_prefix_cols;
37505
+ result.all_constant = true;
37506
+ result.comparison_size = 0;
37507
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37508
+ result.order_types.push_back(order_types[col_idx]);
37509
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37510
+ result.logical_types.push_back(logical_types[col_idx]);
37511
+
37512
+ result.all_constant = result.all_constant && constant_size[col_idx];
37513
+ result.constant_size.push_back(constant_size[col_idx]);
37514
+
37515
+ result.comparison_size += column_sizes[col_idx];
37516
+ result.column_sizes.push_back(column_sizes[col_idx]);
37517
+
37518
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37519
+ result.stats.push_back(stats[col_idx]);
37520
+ result.has_null.push_back(has_null[col_idx]);
37521
+ }
37522
+ result.entry_size = entry_size;
37523
+ result.blob_layout = blob_layout;
37524
+ result.sorting_to_blob_col = sorting_to_blob_col;
37525
+ return result;
37526
+ }
37527
+
37327
37528
  LocalSortState::LocalSortState() : initialized(false) {
37328
37529
  }
37329
37530
 
@@ -39403,7 +39604,7 @@ public:
39403
39604
  namespace duckdb {
39404
39605
 
39405
39606
  enum class UnicodeType { INVALID, ASCII, UNICODE };
39406
- enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
39607
+ enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
39407
39608
 
39408
39609
  class Utf8Proc {
39409
39610
  public:
@@ -47573,11 +47774,36 @@ Value Value::CreateValue(dtime_t value) {
47573
47774
  return Value::TIME(value);
47574
47775
  }
47575
47776
 
47777
+ template <>
47778
+ Value Value::CreateValue(dtime_tz_t value) {
47779
+ return Value::TIMETZ(value);
47780
+ }
47781
+
47576
47782
  template <>
47577
47783
  Value Value::CreateValue(timestamp_t value) {
47578
47784
  return Value::TIMESTAMP(value);
47579
47785
  }
47580
47786
 
47787
+ template <>
47788
+ Value Value::CreateValue(timestamp_sec_t value) {
47789
+ return Value::TIMESTAMPSEC(value);
47790
+ }
47791
+
47792
+ template <>
47793
+ Value Value::CreateValue(timestamp_ms_t value) {
47794
+ return Value::TIMESTAMPMS(value);
47795
+ }
47796
+
47797
+ template <>
47798
+ Value Value::CreateValue(timestamp_ns_t value) {
47799
+ return Value::TIMESTAMPNS(value);
47800
+ }
47801
+
47802
+ template <>
47803
+ Value Value::CreateValue(timestamp_tz_t value) {
47804
+ return Value::TIMESTAMPTZ(value);
47805
+ }
47806
+
47581
47807
  template <>
47582
47808
  Value Value::CreateValue(const char *value) {
47583
47809
  return Value(string(value));
@@ -49150,19 +49376,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
49150
49376
  }
49151
49377
  }
49152
49378
 
49153
- // FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
49154
- // just comparing internal type is not always enough
49155
- static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
49156
- if (incoming.InternalType() != target.InternalType()) {
49157
- return true;
49158
- }
49159
- if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
49160
- //! Compare the type_info
49161
- return incoming != target;
49162
- }
49163
- return false;
49164
- }
49165
-
49166
49379
  void Vector::SetValue(idx_t index, const Value &val) {
49167
49380
  if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49168
49381
  // dictionary: apply dictionary and forward to child
@@ -49170,10 +49383,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
49170
49383
  auto &child = DictionaryVector::Child(*this);
49171
49384
  return child.SetValue(sel_vector.get_index(index), val);
49172
49385
  }
49173
- if (ValueShouldBeCast(val.type(), GetType())) {
49386
+ if (val.type() != GetType()) {
49174
49387
  SetValue(index, val.CastAs(GetType()));
49175
49388
  return;
49176
49389
  }
49390
+ D_ASSERT(val.type().InternalType() == GetType().InternalType());
49177
49391
 
49178
49392
  validity.EnsureWritable();
49179
49393
  validity.Set(index, !val.IsNull());
@@ -49424,7 +49638,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
49424
49638
  auto value = GetValueInternal(v_p, index_p);
49425
49639
  // set the alias of the type to the correct value, if there is a type alias
49426
49640
  if (v_p.GetType().HasAlias()) {
49427
- value.type().SetAlias(v_p.GetType().GetAlias());
49641
+ value.type().CopyAuxInfo(v_p.GetType());
49642
+ }
49643
+ if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
49644
+ D_ASSERT(v_p.GetType() == value.type());
49428
49645
  }
49429
49646
  return value;
49430
49647
  }
@@ -50216,6 +50433,24 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {
50216
50433
  StringVector::AddBuffer(vector, other.auxiliary);
50217
50434
  }
50218
50435
 
50436
+ Vector &MapVector::GetKeys(Vector &vector) {
50437
+ auto &entries = StructVector::GetEntries(vector);
50438
+ D_ASSERT(entries.size() == 2);
50439
+ return *entries[0];
50440
+ }
50441
+ Vector &MapVector::GetValues(Vector &vector) {
50442
+ auto &entries = StructVector::GetEntries(vector);
50443
+ D_ASSERT(entries.size() == 2);
50444
+ return *entries[1];
50445
+ }
50446
+
50447
+ const Vector &MapVector::GetKeys(const Vector &vector) {
50448
+ return GetKeys((Vector &)vector);
50449
+ }
50450
+ const Vector &MapVector::GetValues(const Vector &vector) {
50451
+ return GetValues((Vector &)vector);
50452
+ }
50453
+
50219
50454
  vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
50220
50455
  D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
50221
50456
  if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
@@ -51491,6 +51726,7 @@ public:
51491
51726
  if (!alias.empty()) {
51492
51727
  return false;
51493
51728
  }
51729
+ //! We only need to compare aliases when both types have them in this case
51494
51730
  return true;
51495
51731
  }
51496
51732
  if (alias != other_p->alias) {
@@ -51504,8 +51740,7 @@ public:
51504
51740
  if (type != other_p->type) {
51505
51741
  return false;
51506
51742
  }
51507
- auto &other = (ExtraTypeInfo &)*other_p;
51508
- return alias == other.alias && EqualsInternal(other_p);
51743
+ return alias == other_p->alias && EqualsInternal(other_p);
51509
51744
  }
51510
51745
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
51511
51746
  virtual void Serialize(FieldWriter &writer) const {};
@@ -52184,10 +52419,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
52184
52419
  return LogicalType(id, move(info));
52185
52420
  }
52186
52421
 
52187
- bool LogicalType::operator==(const LogicalType &rhs) const {
52188
- if (id_ != rhs.id_) {
52189
- return false;
52190
- }
52422
+ bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
52191
52423
  if (type_info_.get() == rhs.type_info_.get()) {
52192
52424
  return true;
52193
52425
  }
@@ -52199,6 +52431,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
52199
52431
  }
52200
52432
  }
52201
52433
 
52434
+ bool LogicalType::operator==(const LogicalType &rhs) const {
52435
+ if (id_ != rhs.id_) {
52436
+ return false;
52437
+ }
52438
+ return EqualTypeInfo(rhs);
52439
+ }
52440
+
52202
52441
  } // namespace duckdb
52203
52442
 
52204
52443
 
@@ -63069,6 +63308,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
63069
63308
 
63070
63309
 
63071
63310
 
63311
+ //===----------------------------------------------------------------------===//
63312
+ // DuckDB
63313
+ //
63314
+ // duckdb/parallel/base_pipeline_event.hpp
63315
+ //
63316
+ //
63317
+ //===----------------------------------------------------------------------===//
63318
+
63319
+
63320
+
63072
63321
  //===----------------------------------------------------------------------===//
63073
63322
  // DuckDB
63074
63323
  //
@@ -63142,6 +63391,22 @@ protected:
63142
63391
 
63143
63392
 
63144
63393
 
63394
+ namespace duckdb {
63395
+
63396
+ //! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
63397
+ class BasePipelineEvent : public Event {
63398
+ public:
63399
+ BasePipelineEvent(shared_ptr<Pipeline> pipeline);
63400
+ BasePipelineEvent(Pipeline &pipeline);
63401
+
63402
+ //! The pipeline that this event belongs to
63403
+ shared_ptr<Pipeline> pipeline;
63404
+ };
63405
+
63406
+ } // namespace duckdb
63407
+
63408
+
63409
+
63145
63410
  namespace duckdb {
63146
63411
 
63147
63412
  PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
@@ -63298,16 +63563,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
63298
63563
  }
63299
63564
  }
63300
63565
 
63301
- class HashAggregateFinalizeEvent : public Event {
63566
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
63302
63567
  public:
63303
63568
  HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
63304
63569
  Pipeline *pipeline_p)
63305
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p) {
63570
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
63306
63571
  }
63307
63572
 
63308
63573
  const PhysicalHashAggregate &op;
63309
63574
  HashAggregateGlobalState &gstate;
63310
- Pipeline *pipeline;
63311
63575
 
63312
63576
  public:
63313
63577
  void Schedule() override {
@@ -64569,15 +64833,14 @@ private:
64569
64833
  };
64570
64834
 
64571
64835
  // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
64572
- class DistinctAggregateFinalizeEvent : public Event {
64836
+ class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
64573
64837
  public:
64574
64838
  DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64575
- Pipeline *pipeline_p, ClientContext &context)
64576
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), context(context) {
64839
+ Pipeline &pipeline_p, ClientContext &context)
64840
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
64577
64841
  }
64578
64842
  const PhysicalUngroupedAggregate &op;
64579
64843
  UngroupedAggregateGlobalState &gstate;
64580
- Pipeline *pipeline;
64581
64844
  ClientContext &context;
64582
64845
 
64583
64846
  public:
@@ -64590,16 +64853,15 @@ public:
64590
64853
  }
64591
64854
  };
64592
64855
 
64593
- class DistinctCombineFinalizeEvent : public Event {
64856
+ class DistinctCombineFinalizeEvent : public BasePipelineEvent {
64594
64857
  public:
64595
64858
  DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64596
- Pipeline *pipeline_p, ClientContext &client)
64597
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), client(client) {
64859
+ Pipeline &pipeline_p, ClientContext &client)
64860
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
64598
64861
  }
64599
64862
 
64600
64863
  const PhysicalUngroupedAggregate &op;
64601
64864
  UngroupedAggregateGlobalState &gstate;
64602
- Pipeline *pipeline;
64603
64865
  ClientContext &client;
64604
64866
 
64605
64867
  public:
@@ -64615,7 +64877,7 @@ public:
64615
64877
  SetTasks(move(tasks));
64616
64878
 
64617
64879
  //! Now that all tables are combined, it's time to do the distinct aggregations
64618
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
64880
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
64619
64881
  this->InsertEvent(move(new_event));
64620
64882
  }
64621
64883
  };
@@ -64644,12 +64906,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
64644
64906
  }
64645
64907
  }
64646
64908
  if (any_partitioned) {
64647
- auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, &pipeline, context);
64909
+ auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
64648
64910
  event.InsertEvent(move(new_event));
64649
64911
  } else {
64650
64912
  //! Hashtables aren't partitioned, they dont need to be joined first
64651
64913
  //! So we can compute the aggregate already
64652
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, &pipeline, context);
64914
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
64653
64915
  event.InsertEvent(move(new_event));
64654
64916
  }
64655
64917
  return SinkFinalizeType::READY;
@@ -64927,12 +65189,14 @@ public:
64927
65189
 
64928
65190
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65191
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65192
+ : memory_per_thread(max_mem), count(0) {
64931
65193
 
64932
65194
  RowLayout payload_layout;
64933
65195
  payload_layout.Initialize(payload_types);
64934
65196
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65197
  global_sort->external = external;
65198
+
65199
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65200
  }
64937
65201
 
64938
65202
  void Combine(LocalSortState &local_sort) {
@@ -66393,19 +66657,18 @@ private:
66393
66657
  WindowGlobalHashGroup &hash_group;
66394
66658
  };
66395
66659
 
66396
- class WindowMergeEvent : public Event {
66660
+ class WindowMergeEvent : public BasePipelineEvent {
66397
66661
  public:
66398
66662
  WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66399
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p), hash_group(hash_group_p) {
66663
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66400
66664
  }
66401
66665
 
66402
66666
  WindowGlobalSinkState &gstate;
66403
- Pipeline &pipeline;
66404
66667
  WindowGlobalHashGroup &hash_group;
66405
66668
 
66406
66669
  public:
66407
66670
  void Schedule() override {
66408
- auto &context = pipeline.GetClientContext();
66671
+ auto &context = pipeline->GetClientContext();
66409
66672
 
66410
66673
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
66411
66674
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -66420,7 +66683,7 @@ public:
66420
66683
 
66421
66684
  void FinishEvent() override {
66422
66685
  hash_group.global_sort->CompleteMergeRound(true);
66423
- CreateMergeTasks(pipeline, *this, gstate, hash_group);
66686
+ CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66424
66687
  }
66425
66688
 
66426
66689
  static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
@@ -67829,6 +68092,11 @@ public:
67829
68092
 
67830
68093
  private:
67831
68094
  static const vector<string> PathComponents();
68095
+ //! For tagged releases we use the tag, else we use the git commit hash
68096
+ static const string GetVersionDirectoryName();
68097
+ //! Version tags occur with and without 'v', tag in extension path is always with 'v'
68098
+ static const string NormalizeVersionTag(const string &version_tag);
68099
+ static bool IsRelease(const string &version_tag);
67832
68100
 
67833
68101
  private:
67834
68102
  static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load);
@@ -70755,18 +71023,17 @@ private:
70755
71023
  bool parallel;
70756
71024
  };
70757
71025
 
70758
- class HashJoinFinalizeEvent : public Event {
71026
+ class HashJoinFinalizeEvent : public BasePipelineEvent {
70759
71027
  public:
70760
71028
  HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
70761
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink) {
71029
+ : BasePipelineEvent(pipeline_p), sink(sink) {
70762
71030
  }
70763
71031
 
70764
- Pipeline &pipeline;
70765
71032
  HashJoinGlobalSinkState &sink;
70766
71033
 
70767
71034
  public:
70768
71035
  void Schedule() override {
70769
- auto &context = pipeline.GetClientContext();
71036
+ auto &context = pipeline->GetClientContext();
70770
71037
  auto parallel_construct_count =
70771
71038
  context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
70772
71039
 
@@ -70833,20 +71100,19 @@ private:
70833
71100
  JoinHashTable &local_ht;
70834
71101
  };
70835
71102
 
70836
- class HashJoinPartitionEvent : public Event {
71103
+ class HashJoinPartitionEvent : public BasePipelineEvent {
70837
71104
  public:
70838
71105
  HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
70839
71106
  vector<unique_ptr<JoinHashTable>> &local_hts)
70840
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink), local_hts(local_hts) {
71107
+ : BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
70841
71108
  }
70842
71109
 
70843
- Pipeline &pipeline;
70844
71110
  HashJoinGlobalSinkState &sink;
70845
71111
  vector<unique_ptr<JoinHashTable>> &local_hts;
70846
71112
 
70847
71113
  public:
70848
71114
  void Schedule() override {
70849
- auto &context = pipeline.GetClientContext();
71115
+ auto &context = pipeline->GetClientContext();
70850
71116
  vector<unique_ptr<Task>> partition_tasks;
70851
71117
  partition_tasks.reserve(local_hts.size());
70852
71118
  for (auto &local_ht : local_hts) {
@@ -70859,7 +71125,7 @@ public:
70859
71125
  void FinishEvent() override {
70860
71126
  local_hts.clear();
70861
71127
  sink.hash_table->PrepareExternalFinalize();
70862
- sink.ScheduleFinalize(pipeline, *this);
71128
+ sink.ScheduleFinalize(*pipeline, *this);
70863
71129
  }
70864
71130
  };
70865
71131
 
@@ -74563,21 +74829,20 @@ private:
74563
74829
  GlobalSortedTable &table;
74564
74830
  };
74565
74831
 
74566
- class RangeJoinMergeEvent : public Event {
74832
+ class RangeJoinMergeEvent : public BasePipelineEvent {
74567
74833
  public:
74568
74834
  using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
74569
74835
 
74570
74836
  public:
74571
74837
  RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
74572
- : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
74838
+ : BasePipelineEvent(pipeline_p), table(table_p) {
74573
74839
  }
74574
74840
 
74575
74841
  GlobalSortedTable &table;
74576
- Pipeline &pipeline;
74577
74842
 
74578
74843
  public:
74579
74844
  void Schedule() override {
74580
- auto &context = pipeline.GetClientContext();
74845
+ auto &context = pipeline->GetClientContext();
74581
74846
 
74582
74847
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
74583
74848
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -74596,7 +74861,7 @@ public:
74596
74861
  global_sort_state.CompleteMergeRound(true);
74597
74862
  if (global_sort_state.sorted_blocks.size() > 1) {
74598
74863
  // Multiple blocks remaining: Schedule the next round
74599
- table.ScheduleMergeTasks(pipeline, *this);
74864
+ table.ScheduleMergeTasks(*pipeline, *this);
74600
74865
  }
74601
74866
  }
74602
74867
  };
@@ -74984,18 +75249,17 @@ private:
74984
75249
  OrderGlobalState &state;
74985
75250
  };
74986
75251
 
74987
- class OrderMergeEvent : public Event {
75252
+ class OrderMergeEvent : public BasePipelineEvent {
74988
75253
  public:
74989
75254
  OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
74990
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
75255
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p) {
74991
75256
  }
74992
75257
 
74993
75258
  OrderGlobalState &gstate;
74994
- Pipeline &pipeline;
74995
75259
 
74996
75260
  public:
74997
75261
  void Schedule() override {
74998
- auto &context = pipeline.GetClientContext();
75262
+ auto &context = pipeline->GetClientContext();
74999
75263
 
75000
75264
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
75001
75265
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -75014,7 +75278,7 @@ public:
75014
75278
  global_sort_state.CompleteMergeRound();
75015
75279
  if (global_sort_state.sorted_blocks.size() > 1) {
75016
75280
  // Multiple blocks remaining: Schedule the next round
75017
- PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
75281
+ PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
75018
75282
  }
75019
75283
  }
75020
75284
  };
@@ -79914,10 +80178,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
79914
80178
  return;
79915
80179
  }
79916
80180
 
80181
+ // convert virtual column ids to storage column ids
80182
+ vector<column_t> storage_ids;
80183
+ for (auto &column_id : column_ids) {
80184
+ D_ASSERT(column_id < table.columns.size());
80185
+ storage_ids.push_back(table.columns[column_id].StorageOid());
80186
+ }
80187
+
79917
80188
  unique_ptr<Index> index;
79918
80189
  switch (info->index_type) {
79919
80190
  case IndexType::ART: {
79920
- index = make_unique<ART>(column_ids, unbound_expressions, info->constraint_type, *context.client.db);
80191
+ index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
79921
80192
  break;
79922
80193
  }
79923
80194
  default:
@@ -80222,11 +80493,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
80222
80493
  SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
80223
80494
  DataChunk &input) const {
80224
80495
  auto &sink = (CreateTableAsGlobalState &)state;
80225
- if (sink.table) {
80226
- lock_guard<mutex> client_guard(sink.append_lock);
80227
- sink.table->storage->Append(*sink.table, context.client, input);
80228
- sink.inserted_count += input.size();
80229
- }
80496
+ D_ASSERT(sink.table);
80497
+ lock_guard<mutex> client_guard(sink.append_lock);
80498
+ sink.table->storage->Append(*sink.table, context.client, input);
80499
+ sink.inserted_count += input.size();
80230
80500
  return SinkResultType::NEED_MORE_INPUT;
80231
80501
  }
80232
80502
 
@@ -80636,6 +80906,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
80636
80906
  void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
80637
80907
  op_state.reset();
80638
80908
  sink_state.reset();
80909
+ pipelines.clear();
80639
80910
 
80640
80911
  // recursive CTE
80641
80912
  state.SetPipelineSource(current, this);
@@ -80935,7 +81206,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
80935
81206
  total_required_bits += group_bits;
80936
81207
  }
80937
81208
  // the total amount of groups we allocate space for is 2^required_bits
80938
- total_groups = 1 << total_required_bits;
81209
+ total_groups = (uint64_t)1 << total_required_bits;
80939
81210
  // we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
80940
81211
  grouping_columns = group_types_p.size();
80941
81212
  layout.Initialize(move(aggregate_objects_p));
@@ -81119,7 +81390,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
81119
81390
  static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
81120
81391
  idx_t entry_count, Vector &result) {
81121
81392
  // construct the mask for this entry
81122
- idx_t mask = (1 << required_bits) - 1;
81393
+ idx_t mask = ((uint64_t)1 << required_bits) - 1;
81123
81394
  switch (result.GetType().InternalType()) {
81124
81395
  case PhysicalType::INT8:
81125
81396
  ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
@@ -85366,7 +85637,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
85366
85637
  for (idx_t i = 0; i < grouping.size(); i++) {
85367
85638
  if (grouping_set.find(grouping[i]) == grouping_set.end()) {
85368
85639
  // we don't group on this value!
85369
- grouping_value += 1 << (grouping.size() - (i + 1));
85640
+ grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
85370
85641
  }
85371
85642
  }
85372
85643
  grouping_values.push_back(Value::BIGINT(grouping_value));
@@ -90924,7 +91195,21 @@ struct ModeIncluded {
90924
91195
  const idx_t bias;
90925
91196
  };
90926
91197
 
90927
- template <typename KEY_TYPE>
91198
+ struct ModeAssignmentStandard {
91199
+ template <class INPUT_TYPE, class RESULT_TYPE>
91200
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91201
+ return RESULT_TYPE(input);
91202
+ }
91203
+ };
91204
+
91205
+ struct ModeAssignmentString {
91206
+ template <class INPUT_TYPE, class RESULT_TYPE>
91207
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91208
+ return StringVector::AddString(result, input);
91209
+ }
91210
+ };
91211
+
91212
+ template <typename KEY_TYPE, typename ASSIGN_OP>
90928
91213
  struct ModeFunction {
90929
91214
  template <class STATE>
90930
91215
  static void Initialize(STATE *state) {
@@ -91037,7 +91322,7 @@ struct ModeFunction {
91037
91322
  }
91038
91323
 
91039
91324
  if (state->valid) {
91040
- rdata[rid] = RESULT_TYPE(*state->mode);
91325
+ rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
91041
91326
  } else {
91042
91327
  rmask.Set(rid, false);
91043
91328
  }
@@ -91053,10 +91338,10 @@ struct ModeFunction {
91053
91338
  }
91054
91339
  };
91055
91340
 
91056
- template <typename INPUT_TYPE, typename KEY_TYPE>
91341
+ template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
91057
91342
  AggregateFunction GetTypedModeFunction(const LogicalType &type) {
91058
91343
  using STATE = ModeState<KEY_TYPE>;
91059
- using OP = ModeFunction<KEY_TYPE>;
91344
+ using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
91060
91345
  auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
91061
91346
  func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
91062
91347
  return func;
@@ -91092,7 +91377,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
91092
91377
  return GetTypedModeFunction<interval_t, interval_t>(type);
91093
91378
 
91094
91379
  case PhysicalType::VARCHAR:
91095
- return GetTypedModeFunction<string_t, string>(type);
91380
+ return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
91096
91381
 
91097
91382
  default:
91098
91383
  throw NotImplementedException("Unimplemented mode aggregate");
@@ -93281,21 +93566,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
93281
93566
  case LogicalType::VARCHAR:
93282
93567
  return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
93283
93568
  case LogicalType::TIMESTAMP:
93284
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93569
+ return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
93285
93570
  case LogicalType::TIMESTAMP_TZ:
93286
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93571
+ return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
93287
93572
  case LogicalType::TIMESTAMP_S:
93288
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93573
+ return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
93289
93574
  case LogicalType::TIMESTAMP_MS:
93290
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93575
+ return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
93291
93576
  case LogicalType::TIMESTAMP_NS:
93292
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93577
+ return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
93293
93578
  case LogicalType::TIME:
93294
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93579
+ return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
93295
93580
  case LogicalType::TIME_TZ:
93296
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93581
+ return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
93297
93582
  case LogicalType::DATE:
93298
- return GetMapType<HistogramFunctor, int32_t, IS_ORDERED>(type);
93583
+ return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
93299
93584
  default:
93300
93585
  throw InternalException("Unimplemented histogram aggregate");
93301
93586
  }
@@ -96859,7 +97144,8 @@ struct DateDiff {
96859
97144
  struct WeekOperator {
96860
97145
  template <class TA, class TB, class TR>
96861
97146
  static inline TR Operation(TA startdate, TB enddate) {
96862
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
97147
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
97148
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96863
97149
  }
96864
97150
  };
96865
97151
 
@@ -103243,12 +103529,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
103243
103529
  result, state_vector.state_vector, count);
103244
103530
  break;
103245
103531
  case PhysicalType::INT32:
103246
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103247
- result, state_vector.state_vector, count);
103532
+ if (key_type.id() == LogicalTypeId::DATE) {
103533
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
103534
+ result, state_vector.state_vector, count);
103535
+ } else {
103536
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103537
+ result, state_vector.state_vector, count);
103538
+ }
103248
103539
  break;
103249
103540
  case PhysicalType::INT64:
103250
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103251
- result, state_vector.state_vector, count);
103541
+ switch (key_type.id()) {
103542
+ case LogicalTypeId::TIME:
103543
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
103544
+ result, state_vector.state_vector, count);
103545
+ break;
103546
+ case LogicalTypeId::TIME_TZ:
103547
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
103548
+ result, state_vector.state_vector, count);
103549
+ break;
103550
+ case LogicalTypeId::TIMESTAMP:
103551
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
103552
+ result, state_vector.state_vector, count);
103553
+ break;
103554
+ case LogicalTypeId::TIMESTAMP_MS:
103555
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
103556
+ result, state_vector.state_vector, count);
103557
+ break;
103558
+ case LogicalTypeId::TIMESTAMP_NS:
103559
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
103560
+ result, state_vector.state_vector, count);
103561
+ break;
103562
+ case LogicalTypeId::TIMESTAMP_SEC:
103563
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
103564
+ result, state_vector.state_vector, count);
103565
+ break;
103566
+ case LogicalTypeId::TIMESTAMP_TZ:
103567
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
103568
+ result, state_vector.state_vector, count);
103569
+ break;
103570
+ default:
103571
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103572
+ result, state_vector.state_vector, count);
103573
+ break;
103574
+ }
103252
103575
  break;
103253
103576
  case PhysicalType::FLOAT:
103254
103577
  FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
@@ -104318,18 +104641,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104318
104641
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104319
104642
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104320
104643
  auto count = args.size();
104321
- Vector &lists = args.data[0];
104644
+ Vector &input_lists = args.data[0];
104322
104645
 
104323
104646
  result.SetVectorType(VectorType::FLAT_VECTOR);
104324
104647
  auto &result_validity = FlatVector::Validity(result);
104325
104648
 
104326
- for (auto &v : args.data) {
104327
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104328
- v.Flatten(count);
104329
- }
104330
- }
104331
-
104332
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104649
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104333
104650
  result_validity.SetInvalid(0);
104334
104651
  return;
104335
104652
  }
@@ -104344,15 +104661,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104344
104661
  LocalSortState local_sort_state;
104345
104662
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104346
104663
 
104664
+ // this ensures that we do not change the order of the entries in the input chunk
104665
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104666
+
104347
104667
  // get the child vector
104348
- auto lists_size = ListVector::GetListSize(lists);
104349
- auto &child_vector = ListVector::GetEntry(lists);
104668
+ auto lists_size = ListVector::GetListSize(result);
104669
+ auto &child_vector = ListVector::GetEntry(result);
104350
104670
  UnifiedVectorFormat child_data;
104351
104671
  child_vector.ToUnifiedFormat(lists_size, child_data);
104352
104672
 
104353
104673
  // get the lists data
104354
104674
  UnifiedVectorFormat lists_data;
104355
- lists.ToUnifiedFormat(count, lists_data);
104675
+ result.ToUnifiedFormat(count, lists_data);
104356
104676
  auto list_entries = (list_entry_t *)lists_data.data;
104357
104677
 
104358
104678
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104449,8 +104769,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104449
104769
  child_vector.Flatten(sel_sorted_idx);
104450
104770
  }
104451
104771
 
104452
- result.Reference(lists);
104453
-
104454
104772
  if (args.AllConstant()) {
104455
104773
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104456
104774
  }
@@ -105224,16 +105542,21 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
105224
105542
  auto &map = args.data[0];
105225
105543
  auto &key = args.data[1];
105226
105544
 
105227
- UnifiedVectorFormat offset_data;
105545
+ UnifiedVectorFormat map_keys_data;
105546
+ UnifiedVectorFormat key_data;
105228
105547
 
105229
- auto &children = StructVector::GetEntries(map);
105548
+ auto &map_keys = MapVector::GetKeys(map);
105549
+ auto &map_values = MapVector::GetValues(map);
105550
+
105551
+ map_keys.ToUnifiedFormat(args.size(), map_keys_data);
105552
+ key.ToUnifiedFormat(args.size(), key_data);
105230
105553
 
105231
- children[0]->ToUnifiedFormat(args.size(), offset_data);
105232
105554
  for (idx_t row = 0; row < args.size(); row++) {
105233
- idx_t row_index = offset_data.sel->get_index(row);
105234
- auto key_value = key.GetValue(row_index);
105235
- auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
105236
- auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
105555
+ idx_t row_index = map_keys_data.sel->get_index(row);
105556
+ idx_t key_index = key_data.sel->get_index(row);
105557
+ auto key_value = key.GetValue(key_index);
105558
+ auto offsets = ListVector::Search(map_keys, key_value, row_index);
105559
+ auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
105237
105560
  FillResult(values, result, row);
105238
105561
  }
105239
105562
 
@@ -108128,6 +108451,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
108128
108451
  return left;
108129
108452
  }
108130
108453
 
108454
+ struct BinaryNumericDivideWrapper {
108455
+ template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108456
+ static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
108457
+ if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
108458
+ throw OutOfRangeException("Overflow in division of %d / %d", left, right);
108459
+ } else if (right == 0) {
108460
+ mask.SetInvalid(idx);
108461
+ return left;
108462
+ } else {
108463
+ return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
108464
+ }
108465
+ }
108466
+
108467
+ static bool AddsNulls() {
108468
+ return true;
108469
+ }
108470
+ };
108471
+
108131
108472
  struct BinaryZeroIsNullWrapper {
108132
108473
  template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108133
108474
  static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
@@ -108169,13 +108510,13 @@ template <class OP>
108169
108510
  static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
108170
108511
  switch (type.id()) {
108171
108512
  case LogicalTypeId::TINYINT:
108172
- return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
108513
+ return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
108173
108514
  case LogicalTypeId::SMALLINT:
108174
- return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
108515
+ return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
108175
108516
  case LogicalTypeId::INTEGER:
108176
- return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
108517
+ return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
108177
108518
  case LogicalTypeId::BIGINT:
108178
- return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
108519
+ return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
108179
108520
  case LogicalTypeId::UTINYINT:
108180
108521
  return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
108181
108522
  case LogicalTypeId::USMALLINT:
@@ -114623,11 +114964,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
114623
114964
 
114624
114965
  // current_schemas
114625
114966
  static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
114967
+ if (!input.AllConstant()) {
114968
+ throw NotImplementedException("current_schemas requires a constant input");
114969
+ }
114970
+ if (ConstantVector::IsNull(input.data[0])) {
114971
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
114972
+ ConstantVector::SetNull(result, true);
114973
+ return;
114974
+ }
114975
+ auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
114626
114976
  vector<Value> schema_list;
114627
- vector<string> search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path->Get();
114977
+ auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
114978
+ vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
114628
114979
  std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
114629
114980
  [](const string &s) -> Value { return Value(s); });
114630
- auto val = Value::LIST(schema_list);
114981
+
114982
+ auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
114631
114983
  result.Reference(val);
114632
114984
  }
114633
114985
 
@@ -114926,8 +115278,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
114926
115278
  struct ArrowScanGlobalState : public GlobalTableFunctionState {
114927
115279
  unique_ptr<ArrowArrayStreamWrapper> stream;
114928
115280
  mutex main_mutex;
114929
- bool ready = false;
114930
115281
  idx_t max_threads = 1;
115282
+ bool done = false;
114931
115283
 
114932
115284
  idx_t MaxThreads() const override {
114933
115285
  return max_threads;
@@ -115215,6 +115567,9 @@ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const Func
115215
115567
  bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
115216
115568
  ArrowScanGlobalState &parallel_state) {
115217
115569
  lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
115570
+ if (parallel_state.done) {
115571
+ return false;
115572
+ }
115218
115573
  state.chunk_offset = 0;
115219
115574
 
115220
115575
  auto current_chunk = parallel_state.stream->GetNextChunk();
@@ -115224,6 +115579,7 @@ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind
115224
115579
  state.chunk = move(current_chunk);
115225
115580
  //! have we run out of chunks? we are done
115226
115581
  if (!state.chunk->arrow_array.release) {
115582
+ parallel_state.done = true;
115227
115583
  return false;
115228
115584
  }
115229
115585
  return true;
@@ -117625,6 +117981,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
117625
117981
  table_function.named_parameters["skip"] = LogicalType::BIGINT;
117626
117982
  table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
117627
117983
  table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
117984
+ table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
117628
117985
  }
117629
117986
 
117630
117987
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -121455,8 +121812,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
121455
121812
  // we don't emit any statistics for tables that have outstanding transaction-local data
121456
121813
  return nullptr;
121457
121814
  }
121458
- auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
121459
- return bind_data.table->storage->GetStatistics(context, storage_idx);
121815
+ return bind_data.table->GetStatistics(context, column_id);
121460
121816
  }
121461
121817
 
121462
121818
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -123028,7 +123384,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
123028
123384
  }
123029
123385
  idx_t entry_idx = row / 64;
123030
123386
  idx_t idx_in_entry = row % 64;
123031
- return validity[entry_idx] & (1 << idx_in_entry);
123387
+ return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
123032
123388
  }
123033
123389
 
123034
123390
  void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
@@ -123045,7 +123401,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
123045
123401
  }
123046
123402
  idx_t entry_idx = row / 64;
123047
123403
  idx_t idx_in_entry = row % 64;
123048
- validity[entry_idx] &= ~(1 << idx_in_entry);
123404
+ validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
123049
123405
  }
123050
123406
 
123051
123407
  void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
@@ -123054,7 +123410,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
123054
123410
  }
123055
123411
  idx_t entry_idx = row / 64;
123056
123412
  idx_t idx_in_entry = row % 64;
123057
- validity[entry_idx] |= 1 << idx_in_entry;
123413
+ validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
123058
123414
  }
123059
123415
 
123060
123416
 
@@ -126237,6 +126593,11 @@ PendingExecutionResult ClientContext::ExecuteTaskInternal(ClientContextLock &loc
126237
126593
  query_progress = active_query->progress_bar->GetCurrentPercentage();
126238
126594
  }
126239
126595
  return result;
126596
+ } catch (FatalException &ex) {
126597
+ // fatal exceptions invalidate the entire database
126598
+ result.SetError(PreservedError(ex));
126599
+ auto &db = DatabaseInstance::GetDatabase(*this);
126600
+ db.Invalidate();
126240
126601
  } catch (const Exception &ex) {
126241
126602
  result.SetError(PreservedError(ex));
126242
126603
  } catch (std::exception &ex) {
@@ -126456,9 +126817,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
126456
126817
  case StatementType::INSERT_STATEMENT:
126457
126818
  case StatementType::DELETE_STATEMENT:
126458
126819
  case StatementType::UPDATE_STATEMENT: {
126459
- auto sql = statement->ToString();
126460
126820
  Parser parser;
126461
- parser.ParseQuery(sql);
126821
+ PreservedError error;
126822
+ try {
126823
+ parser.ParseQuery(statement->ToString());
126824
+ } catch (const Exception &ex) {
126825
+ error = PreservedError(ex);
126826
+ } catch (std::exception &ex) {
126827
+ error = PreservedError(ex);
126828
+ }
126829
+ if (error) {
126830
+ // error in verifying query
126831
+ return make_unique<PendingQueryResult>(error);
126832
+ }
126462
126833
  statement = move(parser.statements[0]);
126463
126834
  break;
126464
126835
  }
@@ -137106,8 +137477,27 @@ namespace duckdb {
137106
137477
  //===--------------------------------------------------------------------===//
137107
137478
  // Install Extension
137108
137479
  //===--------------------------------------------------------------------===//
137480
+ const string ExtensionHelper::NormalizeVersionTag(const string &version_tag) {
137481
+ if (version_tag.length() > 0 && version_tag[0] != 'v') {
137482
+ return "v" + version_tag;
137483
+ }
137484
+ return version_tag;
137485
+ }
137486
+
137487
+ bool ExtensionHelper::IsRelease(const string &version_tag) {
137488
+ return !StringUtil::Contains(version_tag, "-dev");
137489
+ }
137490
+
137491
+ const string ExtensionHelper::GetVersionDirectoryName() {
137492
+ if (IsRelease(DuckDB::LibraryVersion())) {
137493
+ return NormalizeVersionTag(DuckDB::LibraryVersion());
137494
+ } else {
137495
+ return DuckDB::SourceID();
137496
+ }
137497
+ }
137498
+
137109
137499
  const vector<string> ExtensionHelper::PathComponents() {
137110
- return vector<string> {".duckdb", "extensions", DuckDB::SourceID(), DuckDB::Platform()};
137500
+ return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
137111
137501
  }
137112
137502
 
137113
137503
  string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
@@ -137180,7 +137570,7 @@ void ExtensionHelper::InstallExtension(ClientContext &context, const string &ext
137180
137570
  extension_name = "";
137181
137571
  }
137182
137572
 
137183
- auto url = StringUtil::Replace(url_template, "${REVISION}", DuckDB::SourceID());
137573
+ auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
137184
137574
  url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
137185
137575
  url = StringUtil::Replace(url, "${NAME}", extension_name);
137186
137576
 
@@ -141402,7 +141792,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141402
141792
  //===--------------------------------------------------------------------===//
141403
141793
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141404
141794
  auto &config = ClientConfig::GetConfig(context);
141405
- config.home_directory = input.IsNull() ? input.ToString() : string();
141795
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141406
141796
  }
141407
141797
 
141408
141798
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -142358,9 +142748,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
142358
142748
  // Get HLL stats here
142359
142749
  auto actual_binding = relation_column_to_original_column[key];
142360
142750
 
142361
- // sometimes base stats is null (test_709.test) returns null for base stats while
142362
- // there is still a catalog table. Anybody know anything about this?
142363
- auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
142751
+ auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
142364
142752
  if (base_stats) {
142365
142753
  count = base_stats->GetDistinctCount();
142366
142754
  }
@@ -143056,6 +143444,7 @@ private:
143056
143444
 
143057
143445
 
143058
143446
 
143447
+
143059
143448
  namespace duckdb {
143060
143449
 
143061
143450
  class DeliminatorPlanUpdater : LogicalOperatorVisitor {
@@ -143083,7 +143472,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
143083
143472
  cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
143084
143473
  continue;
143085
143474
  }
143086
- auto &colref = (BoundColumnRefExpression &)*cond.right;
143475
+ Expression *rhs = cond.right.get();
143476
+ while (rhs->type == ExpressionType::OPERATOR_CAST) {
143477
+ auto &cast = (BoundCastExpression &)*rhs;
143478
+ rhs = cast.child.get();
143479
+ }
143480
+ if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
143481
+ throw InternalException("Erorr in deliminator: expected a bound column reference");
143482
+ }
143483
+ auto &colref = (BoundColumnRefExpression &)*rhs;
143087
143484
  if (projection_map.find(colref.binding) != projection_map.end()) {
143088
143485
  // value on the right is a projection of removed DelimGet
143089
143486
  for (idx_t i = 0; i < decs->size(); i++) {
@@ -144231,7 +144628,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
144231
144628
  auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
144232
144629
  idx_t equivalence_set = GetEquivalenceSet(node);
144233
144630
  auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
144234
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144631
+ Value constant_value;
144632
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144633
+ return FilterResult::UNSATISFIABLE;
144634
+ }
144235
144635
  if (constant_value.IsNull()) {
144236
144636
  // comparisons with null are always null (i.e. will never result in rows)
144237
144637
  return FilterResult::UNSATISFIABLE;
@@ -144312,7 +144712,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144312
144712
  }
144313
144713
  if (expr->IsFoldable()) {
144314
144714
  // scalar condition, evaluate it
144315
- auto result = ExpressionExecutor::EvaluateScalar(*expr).CastAs(LogicalType::BOOLEAN);
144715
+ Value result;
144716
+ if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
144717
+ return FilterResult::UNSUPPORTED;
144718
+ }
144719
+ result = result.CastAs(LogicalType::BOOLEAN);
144316
144720
  // check if the filter passes
144317
144721
  if (result.IsNull() || !BooleanValue::Get(result)) {
144318
144722
  // the filter does not pass the scalar test, create an empty result
@@ -144336,7 +144740,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144336
144740
 
144337
144741
  if (lower_is_scalar) {
144338
144742
  auto scalar = comparison.lower.get();
144339
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144743
+ Value constant_value;
144744
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144745
+ return FilterResult::UNSUPPORTED;
144746
+ }
144340
144747
 
144341
144748
  // create the ExpressionValueInformation
144342
144749
  ExpressionValueInformation info;
@@ -144369,7 +144776,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144369
144776
 
144370
144777
  if (upper_is_scalar) {
144371
144778
  auto scalar = comparison.upper.get();
144372
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144779
+ Value constant_value;
144780
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144781
+ return FilterResult::UNSUPPORTED;
144782
+ }
144373
144783
 
144374
144784
  // create the ExpressionValueInformation
144375
144785
  ExpressionValueInformation info;
@@ -145281,7 +145691,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
145281
145691
  // IN clause with many children: try to generate a mark join that replaces this IN expression
145282
145692
  // we can only do this if the expressions in the expression list are scalar
145283
145693
  for (idx_t i = 1; i < expr.children.size(); i++) {
145284
- D_ASSERT(expr.children[i]->return_type == in_type);
145285
145694
  if (!expr.children[i]->IsFoldable()) {
145286
145695
  // non-scalar expression
145287
145696
  all_scalar = false;
@@ -147720,21 +148129,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
147720
148129
  FilterPushdown child_pushdown(optimizer);
147721
148130
  for (idx_t i = 0; i < filters.size(); i++) {
147722
148131
  auto &f = *filters[i];
147723
- // check if any aggregate or GROUPING functions are in the set
147724
- if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
147725
- f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
147726
- // no aggregate! we can push this down
147727
- // rewrite any group bindings within the filter
147728
- f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147729
- // add the filter to the child node
147730
- if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147731
- // filter statically evaluates to false, strip tree
147732
- return make_unique<LogicalEmptyResult>(move(op));
148132
+ if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
148133
+ // filter on aggregate: cannot pushdown
148134
+ continue;
148135
+ }
148136
+ if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
148137
+ // filter on GROUPINGS function: cannot pushdown
148138
+ continue;
148139
+ }
148140
+ // if there are any empty grouping sets, we cannot push down filters
148141
+ bool has_empty_grouping_sets = false;
148142
+ for (auto &grp : aggr.grouping_sets) {
148143
+ if (grp.empty()) {
148144
+ has_empty_grouping_sets = true;
147733
148145
  }
147734
- // erase the filter from here
147735
- filters.erase(filters.begin() + i);
147736
- i--;
147737
148146
  }
148147
+ if (has_empty_grouping_sets) {
148148
+ continue;
148149
+ }
148150
+ // no aggregate! we can push this down
148151
+ // rewrite any group bindings within the filter
148152
+ f.filter = ReplaceGroupBindings(aggr, move(f.filter));
148153
+ // add the filter to the child node
148154
+ if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
148155
+ // filter statically evaluates to false, strip tree
148156
+ return make_unique<LogicalEmptyResult>(move(op));
148157
+ }
148158
+ // erase the filter from here
148159
+ filters.erase(filters.begin() + i);
148160
+ i--;
147738
148161
  }
147739
148162
  child_pushdown.GenerateFilters();
147740
148163
 
@@ -152440,6 +152863,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
152440
152863
  } // namespace duckdb
152441
152864
 
152442
152865
 
152866
+ namespace duckdb {
152867
+
152868
+ BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
152869
+ : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
152870
+ }
152871
+
152872
+ BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
152873
+ : Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
152874
+ }
152875
+
152876
+ } // namespace duckdb
152877
+
152878
+
152443
152879
 
152444
152880
 
152445
152881
 
@@ -152559,16 +152995,13 @@ public:
152559
152995
 
152560
152996
 
152561
152997
 
152562
-
152563
152998
  namespace duckdb {
152564
152999
 
152565
- class PipelineEvent : public Event {
153000
+ //! A PipelineEvent is responsible for scheduling a pipeline
153001
+ class PipelineEvent : public BasePipelineEvent {
152566
153002
  public:
152567
153003
  PipelineEvent(shared_ptr<Pipeline> pipeline);
152568
153004
 
152569
- //! The pipeline that this event belongs to
152570
- shared_ptr<Pipeline> pipeline;
152571
-
152572
153005
  public:
152573
153006
  void Schedule() override;
152574
153007
  void FinishEvent() override;
@@ -152696,17 +153129,13 @@ private:
152696
153129
 
152697
153130
 
152698
153131
 
152699
-
152700
153132
  namespace duckdb {
152701
153133
  class Executor;
152702
153134
 
152703
- class PipelineFinishEvent : public Event {
153135
+ class PipelineFinishEvent : public BasePipelineEvent {
152704
153136
  public:
152705
153137
  PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
152706
153138
 
152707
- //! The pipeline that this event belongs to
152708
- shared_ptr<Pipeline> pipeline;
152709
-
152710
153139
  public:
152711
153140
  void Schedule() override;
152712
153141
  void FinishEvent() override;
@@ -152733,6 +153162,9 @@ Executor &Executor::Get(ClientContext &context) {
152733
153162
 
152734
153163
  void Executor::AddEvent(shared_ptr<Event> event) {
152735
153164
  lock_guard<mutex> elock(executor_lock);
153165
+ if (cancelled) {
153166
+ return;
153167
+ }
152736
153168
  events.push_back(move(event));
152737
153169
  }
152738
153170
 
@@ -153036,6 +153468,7 @@ void Executor::CancelTasks() {
153036
153468
  vector<weak_ptr<Pipeline>> weak_references;
153037
153469
  {
153038
153470
  lock_guard<mutex> elock(executor_lock);
153471
+ cancelled = true;
153039
153472
  weak_references.reserve(pipelines.size());
153040
153473
  for (auto &pipeline : pipelines) {
153041
153474
  weak_references.push_back(weak_ptr<Pipeline>(pipeline));
@@ -153112,10 +153545,10 @@ PendingExecutionResult Executor::ExecuteTask() {
153112
153545
  lock_guard<mutex> elock(executor_lock);
153113
153546
  pipelines.clear();
153114
153547
  NextExecutor();
153115
- if (!exceptions.empty()) { // LCOV_EXCL_START
153548
+ if (HasError()) { // LCOV_EXCL_START
153116
153549
  // an exception has occurred executing one of the pipelines
153117
153550
  execution_result = PendingExecutionResult::EXECUTION_ERROR;
153118
- ThrowExceptionInternal();
153551
+ ThrowException();
153119
153552
  } // LCOV_EXCL_STOP
153120
153553
  execution_result = PendingExecutionResult::RESULT_READY;
153121
153554
  return execution_result;
@@ -153124,6 +153557,7 @@ PendingExecutionResult Executor::ExecuteTask() {
153124
153557
  void Executor::Reset() {
153125
153558
  lock_guard<mutex> elock(executor_lock);
153126
153559
  physical_plan = nullptr;
153560
+ cancelled = false;
153127
153561
  owned_plan.reset();
153128
153562
  root_executor.reset();
153129
153563
  root_pipelines.clear();
@@ -153160,7 +153594,7 @@ vector<LogicalType> Executor::GetTypes() {
153160
153594
  }
153161
153595
 
153162
153596
  void Executor::PushError(PreservedError exception) {
153163
- lock_guard<mutex> elock(executor_lock);
153597
+ lock_guard<mutex> elock(error_lock);
153164
153598
  // interrupt execution of any other pipelines that belong to this executor
153165
153599
  context.interrupted = true;
153166
153600
  // push the exception onto the stack
@@ -153168,20 +153602,16 @@ void Executor::PushError(PreservedError exception) {
153168
153602
  }
153169
153603
 
153170
153604
  bool Executor::HasError() {
153171
- lock_guard<mutex> elock(executor_lock);
153605
+ lock_guard<mutex> elock(error_lock);
153172
153606
  return !exceptions.empty();
153173
153607
  }
153174
153608
 
153175
153609
  void Executor::ThrowException() {
153176
- lock_guard<mutex> elock(executor_lock);
153177
- ThrowExceptionInternal();
153178
- }
153179
-
153180
- void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
153610
+ lock_guard<mutex> elock(error_lock);
153181
153611
  D_ASSERT(!exceptions.empty());
153182
153612
  auto &entry = exceptions[0];
153183
153613
  entry.Throw();
153184
- } // LCOV_EXCL_STOP
153614
+ }
153185
153615
 
153186
153616
  void Executor::Flush(ThreadContext &tcontext) {
153187
153617
  profiler->Flush(tcontext.profiler);
@@ -153446,6 +153876,9 @@ void Pipeline::Ready() {
153446
153876
  }
153447
153877
 
153448
153878
  void Pipeline::Finalize(Event &event) {
153879
+ if (executor.HasError()) {
153880
+ return;
153881
+ }
153449
153882
  D_ASSERT(ready);
153450
153883
  try {
153451
153884
  auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
@@ -153556,16 +153989,25 @@ void PipelineCompleteEvent::FinalizeFinish() {
153556
153989
  } // namespace duckdb
153557
153990
 
153558
153991
 
153992
+
153559
153993
  namespace duckdb {
153560
153994
 
153561
- PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
153562
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
153995
+ PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153563
153996
  }
153564
153997
 
153565
153998
  void PipelineEvent::Schedule() {
153566
153999
  auto event = shared_from_this();
153567
- pipeline->Schedule(event);
153568
- D_ASSERT(total_tasks > 0);
154000
+ auto &executor = pipeline->executor;
154001
+ try {
154002
+ pipeline->Schedule(event);
154003
+ D_ASSERT(total_tasks > 0);
154004
+ } catch (Exception &ex) {
154005
+ executor.PushError(PreservedError(ex));
154006
+ } catch (std::exception &ex) {
154007
+ executor.PushError(PreservedError(ex));
154008
+ } catch (...) { // LCOV_EXCL_START
154009
+ executor.PushError(PreservedError("Unknown exception in Finalize!"));
154010
+ } // LCOV_EXCL_STOP
153569
154011
  }
153570
154012
 
153571
154013
  void PipelineEvent::FinishEvent() {
@@ -153948,8 +154390,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
153948
154390
 
153949
154391
  namespace duckdb {
153950
154392
 
153951
- PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
153952
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
154393
+ PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153953
154394
  }
153954
154395
 
153955
154396
  void PipelineFinishEvent::Schedule() {
@@ -167484,7 +167925,7 @@ string QueryNode::ResultModifiersToString() const {
167484
167925
  } else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
167485
167926
  auto &limit_p_modifier = (LimitPercentModifier &)modifier;
167486
167927
  if (limit_p_modifier.limit) {
167487
- result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
167928
+ result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
167488
167929
  }
167489
167930
  if (limit_p_modifier.offset) {
167490
167931
  result += " OFFSET " + limit_p_modifier.offset->ToString();
@@ -171360,7 +171801,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause,
171360
171801
  }
171361
171802
  // we need a query
171362
171803
  if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) {
171363
- throw InternalException("A CTE needs a SELECT");
171804
+ throw NotImplementedException("A CTE needs a SELECT");
171364
171805
  }
171365
171806
 
171366
171807
  // CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings
@@ -174956,6 +175397,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174956
175397
  // we didn't bind columns, try again in children
174957
175398
  return BindResult(error);
174958
175399
  }
175400
+ } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
175401
+ return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
174959
175402
  }
174960
175403
  if (!filter_error.empty()) {
174961
175404
  return BindResult(filter_error);
@@ -174963,8 +175406,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
174963
175406
 
174964
175407
  if (aggr.filter) {
174965
175408
  auto &child = (BoundExpression &)*aggr.filter;
174966
- bound_filter = move(child.expr);
175409
+ bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
174967
175410
  }
175411
+
174968
175412
  // all children bound successfully
174969
175413
  // extract the children and types
174970
175414
  vector<LogicalType> types;
@@ -176117,7 +176561,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
176117
176561
  string error =
176118
176562
  MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
176119
176563
  if (!error.empty()) {
176120
- return BindResult(binder.FormatError(*expr->get(), error));
176564
+ throw BinderException(binder.FormatError(*expr->get(), error));
176121
176565
  }
176122
176566
 
176123
176567
  // create a MacroBinding to bind this macro's parameters to its arguments
@@ -177140,10 +177584,13 @@ public:
177140
177584
  public:
177141
177585
  unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
177142
177586
 
177143
- idx_t MaxCount() {
177587
+ idx_t MaxCount() const {
177144
177588
  return max_count;
177145
177589
  }
177146
177590
 
177591
+ bool HasExtraList() const {
177592
+ return extra_list;
177593
+ }
177147
177594
  unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
177148
177595
 
177149
177596
  private:
@@ -177185,6 +177632,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177185
177632
  Value &delimiter_value) {
177186
177633
  auto new_binder = Binder::CreateBinder(context, this, true);
177187
177634
  if (delimiter->HasSubquery()) {
177635
+ if (!order_binder.HasExtraList()) {
177636
+ throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
177637
+ }
177188
177638
  return order_binder.CreateExtraReference(move(delimiter));
177189
177639
  }
177190
177640
  ExpressionBinder expr_binder(*new_binder, context);
@@ -177195,6 +177645,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177195
177645
  delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
177196
177646
  return nullptr;
177197
177647
  }
177648
+ // move any correlated columns to this binder
177649
+ MoveCorrelatedExpressions(*new_binder);
177198
177650
  return expr;
177199
177651
  }
177200
177652
 
@@ -179798,11 +180250,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179798
180250
  BindDefaultValues(base.columns, result->bound_defaults);
179799
180251
  }
179800
180252
 
180253
+ idx_t regular_column_count = 0;
179801
180254
  // bind collations to detect any unsupported collation errors
179802
180255
  for (auto &column : base.columns) {
179803
180256
  if (column.Generated()) {
179804
180257
  continue;
179805
180258
  }
180259
+ regular_column_count++;
179806
180260
  if (column.Type().id() == LogicalTypeId::VARCHAR) {
179807
180261
  ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
179808
180262
  }
@@ -179814,6 +180268,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179814
180268
  result->dependencies.insert(type_dependency);
179815
180269
  }
179816
180270
  }
180271
+ if (regular_column_count == 0) {
180272
+ throw BinderException("Creating a table without physical (non-generated) columns is not supported");
180273
+ }
179817
180274
  properties.allow_stream_result = false;
179818
180275
  return result;
179819
180276
  }
@@ -180241,6 +180698,13 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
180241
180698
  info->schema = table->schema->name;
180242
180699
  info->table = table->name;
180243
180700
 
180701
+ // We can not export generated columns
180702
+ for (auto &col : table->columns) {
180703
+ if (!col.Generated()) {
180704
+ info->select_list.push_back(col.GetName());
180705
+ }
180706
+ }
180707
+
180244
180708
  exported_data.table_name = info->table;
180245
180709
  exported_data.schema_name = info->schema;
180246
180710
  exported_data.file_path = info->file_path;
@@ -180486,7 +180950,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
180486
180950
  }
180487
180951
 
180488
180952
  // parse select statement and add to logical plan
180489
- auto root_select = Bind(*stmt.select_statement);
180953
+ auto select_binder = Binder::CreateBinder(context, this);
180954
+ auto root_select = select_binder->Bind(*stmt.select_statement);
180955
+ MoveCorrelatedExpressions(*select_binder);
180956
+
180490
180957
  CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
180491
180958
  table->name.c_str());
180492
180959
 
@@ -181768,6 +182235,18 @@ string Binder::RetrieveUsingBinding(Binder &current_binder, UsingColumnSet *curr
181768
182235
  return binding;
181769
182236
  }
181770
182237
 
182238
+ static vector<string> RemoveDuplicateUsingColumns(const vector<string> &using_columns) {
182239
+ vector<string> result;
182240
+ case_insensitive_set_t handled_columns;
182241
+ for (auto &using_column : using_columns) {
182242
+ if (handled_columns.find(using_column) == handled_columns.end()) {
182243
+ handled_columns.insert(using_column);
182244
+ result.push_back(using_column);
182245
+ }
182246
+ }
182247
+ return result;
182248
+ }
182249
+
181771
182250
  unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
181772
182251
  auto result = make_unique<BoundJoinRef>();
181773
182252
  result->left_binder = Binder::CreateBinder(context, this);
@@ -181837,6 +182316,8 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
181837
182316
  D_ASSERT(!result->condition);
181838
182317
  extra_using_columns = ref.using_columns;
181839
182318
  }
182319
+ extra_using_columns = RemoveDuplicateUsingColumns(extra_using_columns);
182320
+
181840
182321
  if (!extra_using_columns.empty()) {
181841
182322
  vector<UsingColumnSet *> left_using_bindings;
181842
182323
  vector<UsingColumnSet *> right_using_bindings;
@@ -182282,7 +182763,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundEmptyTableRef &ref) {
182282
182763
  namespace duckdb {
182283
182764
 
182284
182765
  unique_ptr<LogicalOperator> Binder::CreatePlan(BoundExpressionListRef &ref) {
182285
- auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(0);
182766
+ auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(GenerateTableIndex());
182286
182767
  // values list, first plan any subqueries in the list
182287
182768
  for (auto &expr_list : ref.values) {
182288
182769
  for (auto &expr : expr_list) {
@@ -184835,7 +185316,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
184835
185316
  case ExpressionClass::COLUMN_REF:
184836
185317
  return BindResult(clause + " cannot contain column names");
184837
185318
  case ExpressionClass::SUBQUERY:
184838
- return BindResult(clause + " cannot contain subqueries");
185319
+ throw BinderException(clause + " cannot contain subqueries");
184839
185320
  case ExpressionClass::DEFAULT:
184840
185321
  return BindResult(clause + " cannot contain DEFAULT clause");
184841
185322
  case ExpressionClass::WINDOW:
@@ -185095,6 +185576,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
185095
185576
  }
185096
185577
 
185097
185578
  unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
185579
+ if (!extra_list) {
185580
+ throw InternalException("CreateExtraReference called without extra_list");
185581
+ }
185098
185582
  auto result = CreateProjectionReference(*expr, extra_list->size());
185099
185583
  extra_list->push_back(move(expr));
185100
185584
  return result;
@@ -189221,6 +189705,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
189221
189705
  case LogicalOperatorType::LOGICAL_ORDER_BY:
189222
189706
  plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
189223
189707
  return plan;
189708
+ case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
189709
+ throw ParserException("Recursive CTEs not supported in correlated subquery");
189710
+ }
189224
189711
  default:
189225
189712
  throw InternalException("Logical operator type \"%s\" for dependent join", LogicalOperatorToString(plan->type));
189226
189713
  }
@@ -191347,7 +191834,7 @@ void CheckpointManager::CreateCheckpoint() {
191347
191834
  wal->Flush();
191348
191835
 
191349
191836
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_HEADER) {
191350
- throw IOException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
191837
+ throw FatalException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
191351
191838
  }
191352
191839
 
191353
191840
  // finally write the updated header
@@ -191356,7 +191843,7 @@ void CheckpointManager::CreateCheckpoint() {
191356
191843
  block_manager.WriteHeader(header);
191357
191844
 
191358
191845
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_TRUNCATE) {
191359
- throw IOException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
191846
+ throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
191360
191847
  }
191361
191848
 
191362
191849
  // truncate the WAL
@@ -196907,7 +197394,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
196907
197394
  }
196908
197395
 
196909
197396
  // Alter column to add new constraint
196910
- DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<Constraint> constraint)
197397
+ DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
196911
197398
  : info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
196912
197399
  is_root(true) {
196913
197400
 
@@ -197082,7 +197569,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
197082
197569
 
197083
197570
  bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
197084
197571
  const vector<column_t> &column_ids) {
197085
- while (state.current_row_group) {
197572
+ while (state.current_row_group && state.current_row_group->count > 0) {
197086
197573
  idx_t vector_index;
197087
197574
  idx_t max_row;
197088
197575
  if (ClientConfig::GetConfig(context).verify_parallelism) {
@@ -197096,13 +197583,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
197096
197583
  max_row = state.current_row_group->start + state.current_row_group->count;
197097
197584
  }
197098
197585
  max_row = MinValue<idx_t>(max_row, state.max_row);
197099
- bool need_to_scan;
197100
- if (state.current_row_group->count == 0) {
197101
- need_to_scan = false;
197102
- } else {
197103
- need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197104
- state.current_row_group, vector_index, max_row);
197105
- }
197586
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197587
+ state.current_row_group, vector_index, max_row);
197106
197588
  if (ClientConfig::GetConfig(context).verify_parallelism) {
197107
197589
  state.vector_index++;
197108
197590
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -197361,14 +197843,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
197361
197843
  VerifyForeignKeyConstraint(bfk, context, chunk, false);
197362
197844
  }
197363
197845
 
197364
- void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const Constraint *constraint) {
197846
+ void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
197365
197847
  if (constraint->type != ConstraintType::NOT_NULL) {
197366
197848
  throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
197367
197849
  }
197368
197850
  // scan the original table, check if there's any null value
197369
- auto &not_null_constraint = (NotNullConstraint &)*constraint;
197851
+ auto &not_null_constraint = (BoundNotNullConstraint &)*constraint;
197370
197852
  auto &transaction = Transaction::GetTransaction(context);
197371
197853
  vector<LogicalType> scan_types;
197854
+ D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
197372
197855
  scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
197373
197856
  DataChunk scan_chunk;
197374
197857
  auto &allocator = Allocator::Get(context);
@@ -198125,6 +198608,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
198125
198608
  return nullptr;
198126
198609
  }
198127
198610
  lock_guard<mutex> stats_guard(stats_lock);
198611
+ if (column_id >= column_stats.size()) {
198612
+ throw InternalException("Call to GetStatistics is out of range");
198613
+ }
198128
198614
  return column_stats[column_id]->stats->Copy();
198129
198615
  }
198130
198616
 
@@ -199413,7 +199899,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
199413
199899
 
199414
199900
  auto &config = DBConfig::GetConfig(db);
199415
199901
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_AFTER_FREE_LIST_WRITE) {
199416
- throw IOException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
199902
+ throw FatalException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
199417
199903
  }
199418
199904
 
199419
199905
  if (!use_direct_io) {
@@ -200939,6 +201425,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
200939
201425
  }
200940
201426
  // after verifying that there are no conflicts we mark the tuple as deleted
200941
201427
  deleted[rows[i]] = transaction.transaction_id;
201428
+ rows[deleted_tuples] = rows[i];
200942
201429
  deleted_tuples++;
200943
201430
  }
200944
201431
  return deleted_tuples;
@@ -201266,6 +201753,8 @@ public:
201266
201753
  idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override;
201267
201754
  idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
201268
201755
 
201756
+ void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override;
201757
+
201269
201758
  void InitializeAppend(ColumnAppendState &state) override;
201270
201759
  void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override;
201271
201760
  void RevertAppend(row_t start_row) override;
@@ -203845,9 +204334,15 @@ void VersionDeleteState::Flush() {
203845
204334
  return;
203846
204335
  }
203847
204336
  // delete in the current info
203848
- delete_count += current_info->Delete(transaction, rows, count);
203849
- // now push the delete into the undo buffer
203850
- transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
204337
+ // it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
204338
+ // in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
204339
+ // this is returned in the actual_delete_count
204340
+ auto actual_delete_count = current_info->Delete(transaction, rows, count);
204341
+ delete_count += actual_delete_count;
204342
+ if (actual_delete_count > 0) {
204343
+ // now push the delete into the undo buffer, but only if any deletes were actually performed
204344
+ transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
204345
+ }
203851
204346
  count = 0;
203852
204347
  }
203853
204348
 
@@ -204224,6 +204719,15 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t
204224
204719
  return scan_count;
204225
204720
  }
204226
204721
 
204722
+ void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
204723
+ validity.Skip(state.child_states[0], count);
204724
+
204725
+ // skip inside the sub-columns
204726
+ for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
204727
+ sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count);
204728
+ }
204729
+ }
204730
+
204227
204731
  void StructColumnData::InitializeAppend(ColumnAppendState &state) {
204228
204732
  ColumnAppendState validity_append;
204229
204733
  validity.InitializeAppend(validity_append);
@@ -206683,6 +207187,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
206683
207187
 
206684
207188
  void CleanupState::CleanupDelete(DeleteInfo *info) {
206685
207189
  auto version_table = info->table;
207190
+ D_ASSERT(version_table->info->cardinality >= info->count);
206686
207191
  version_table->info->cardinality -= info->count;
206687
207192
  if (version_table->info->indexes.Empty()) {
206688
207193
  // this table has no indexes: no cleanup to be done
@@ -260108,49 +260613,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
260108
260613
  }
260109
260614
  }
260110
260615
 
260111
- UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260112
- UnicodeType type = UnicodeType::ASCII;
260113
- char c;
260114
- for (size_t i = 0; i < len; i++) {
260115
- c = s[i];
260116
- if (c == '\0') {
260117
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260118
- return UnicodeType::INVALID;
260119
- }
260120
- // 1 Byte / ASCII
260121
- if ((c & 0x80) == 0) {
260122
- continue;
260123
- }
260124
- type = UnicodeType::UNICODE;
260125
- if ((s[++i] & 0xC0) != 0x80) {
260126
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260127
- return UnicodeType::INVALID;
260128
- }
260129
- if ((c & 0xE0) == 0xC0) {
260130
- continue;
260131
- }
260132
- if ((s[++i] & 0xC0) != 0x80) {
260133
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260134
- return UnicodeType::INVALID;
260135
- }
260136
- if ((c & 0xF0) == 0xE0) {
260137
- continue;
260138
- }
260139
- if ((s[++i] & 0xC0) != 0x80) {
260616
+ template <const int nextra_bytes, const int mask>
260617
+ static inline UnicodeType
260618
+ UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
260619
+ const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260620
+ if ((len - i) < (nextra_bytes + 1)) {
260621
+ /* incomplete byte sequence */
260622
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
260623
+ return UnicodeType::INVALID;
260624
+ }
260625
+ for (size_t j = 0 ; j < nextra_bytes; j++) {
260626
+ int c = (int) s[++i];
260627
+ /* now validate the extra bytes */
260628
+ if ((c & 0xC0) != 0x80) {
260629
+ /* extra byte is not in the format 10xxxxxx */
260140
260630
  AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260141
260631
  return UnicodeType::INVALID;
260142
260632
  }
260143
- if ((c & 0xF8) == 0xF0) {
260144
- continue;
260145
- }
260146
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260633
+ utf8char = (utf8char << 6) | (c & 0x3F);
260634
+ }
260635
+ if ((utf8char & mask) == 0) {
260636
+ /* invalid UTF-8 codepoint, not shortest possible */
260637
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260638
+ return UnicodeType::INVALID;
260639
+ }
260640
+ if (utf8char > 0x10FFFF) {
260641
+ /* value not representable by Unicode */
260642
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260643
+ return UnicodeType::INVALID;
260644
+ }
260645
+ if ((utf8char & 0x1FFF800) == 0xD800) {
260646
+ /* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
260647
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260147
260648
  return UnicodeType::INVALID;
260148
260649
  }
260650
+ return UnicodeType::UNICODE;
260651
+ }
260652
+
260653
+ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260654
+ UnicodeType type = UnicodeType::ASCII;
260149
260655
 
260656
+ for (size_t i = 0; i < len; i++) {
260657
+ int c = (int) s[i];
260658
+
260659
+ if ((c & 0x80) == 0) {
260660
+ /* 1 byte sequence */
260661
+ if (c == '\0') {
260662
+ /* NULL byte not allowed */
260663
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260664
+ return UnicodeType::INVALID;
260665
+ }
260666
+ } else {
260667
+ int first_pos_seq = i;
260668
+
260669
+ if ((c & 0xE0) == 0xC0) {
260670
+ /* 2 byte sequence */
260671
+ int utf8char = c & 0x1F;
260672
+ type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260673
+ } else if ((c & 0xF0) == 0xE0) {
260674
+ /* 3 byte sequence */
260675
+ int utf8char = c & 0x0F;
260676
+ type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260677
+ } else if ((c & 0xF8) == 0xF0) {
260678
+ /* 4 byte sequence */
260679
+ int utf8char = c & 0x07;
260680
+ type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260681
+ } else {
260682
+ /* invalid UTF-8 start byte */
260683
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260684
+ return UnicodeType::INVALID;
260685
+ }
260686
+ if (type == UnicodeType::INVALID) {
260687
+ return type;
260688
+ }
260689
+ }
260690
+ }
260150
260691
  return type;
260151
260692
  }
260152
260693
 
260153
-
260154
260694
  char* Utf8Proc::Normalize(const char *s, size_t len) {
260155
260695
  assert(s);
260156
260696
  assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
@@ -322230,6 +322770,8 @@ exit:
322230
322770
  // See the end of this file for a list
322231
322771
 
322232
322772
 
322773
+ // otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context
322774
+ #define MBEDTLS_ALLOW_PRIVATE_ACCESS
322233
322775
 
322234
322776
 
322235
322777