duckdb 0.5.1-dev97.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -3763,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
3763
3763
  return count;
3764
3764
  }
3765
3765
 
3766
+ unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
3767
+ if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
3768
+ return nullptr;
3769
+ }
3770
+ if (column_id >= columns.size()) {
3771
+ throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
3772
+ }
3773
+ if (columns[column_id].Generated()) {
3774
+ return nullptr;
3775
+ }
3776
+ return storage->GetStatistics(context, columns[column_id].StorageOid());
3777
+ }
3778
+
3766
3779
  unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
3767
3780
  D_ASSERT(!internal);
3768
3781
  if (info->type != AlterType::ALTER_TABLE) {
@@ -3830,6 +3843,9 @@ static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
3830
3843
 
3831
3844
  unique_ptr<CatalogEntry> TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) {
3832
3845
  auto rename_idx = GetColumnIndex(info.old_name);
3846
+ if (rename_idx == COLUMN_IDENTIFIER_ROW_ID) {
3847
+ throw CatalogException("Cannot rename rowid column");
3848
+ }
3833
3849
  auto create_info = make_unique<CreateTableInfo>(schema->name, name);
3834
3850
  create_info->temporary = temporary;
3835
3851
  for (idx_t i = 0; i < columns.size(); i++) {
@@ -3932,6 +3948,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
3932
3948
  unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) {
3933
3949
  auto removed_index = GetColumnIndex(info.removed_column, info.if_column_exists);
3934
3950
  if (removed_index == DConstants::INVALID_INDEX) {
3951
+ if (!info.if_column_exists) {
3952
+ throw CatalogException("Cannot drop column: rowid column cannot be dropped");
3953
+ }
3935
3954
  return nullptr;
3936
3955
  }
3937
3956
 
@@ -4038,7 +4057,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
4038
4057
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4039
4058
  storage);
4040
4059
  }
4041
- auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
4060
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
4042
4061
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4043
4062
  new_storage);
4044
4063
  }
@@ -4046,13 +4065,18 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
4046
4065
  unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) {
4047
4066
  auto create_info = make_unique<CreateTableInfo>(schema->name, name);
4048
4067
  auto default_idx = GetColumnIndex(info.column_name);
4068
+ if (default_idx == COLUMN_IDENTIFIER_ROW_ID) {
4069
+ throw CatalogException("Cannot SET DEFAULT for rowid column");
4070
+ }
4049
4071
 
4050
4072
  // Copy all the columns, changing the value of the one that was specified by 'column_name'
4051
4073
  for (idx_t i = 0; i < columns.size(); i++) {
4052
4074
  auto copy = columns[i].Copy();
4053
4075
  if (default_idx == i) {
4054
4076
  // set the default value of this column
4055
- D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
4077
+ if (copy.Generated()) {
4078
+ throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
4079
+ }
4056
4080
  copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
4057
4081
  }
4058
4082
  create_info->columns.push_back(move(copy));
@@ -4077,6 +4101,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
4077
4101
  }
4078
4102
 
4079
4103
  idx_t not_null_idx = GetColumnIndex(info.column_name);
4104
+ if (columns[not_null_idx].Generated()) {
4105
+ throw BinderException("Unsupported constraint for generated column!");
4106
+ }
4080
4107
  bool has_not_null = false;
4081
4108
  for (idx_t i = 0; i < constraints.size(); i++) {
4082
4109
  auto constraint = constraints[i]->Copy();
@@ -4100,8 +4127,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
4100
4127
  storage);
4101
4128
  }
4102
4129
 
4103
- // Return with new storage info
4104
- auto new_storage = make_shared<DataTable>(context, *storage, make_unique<NotNullConstraint>(not_null_idx));
4130
+ // Return with new storage info. Note that we need the bound column index here.
4131
+ auto new_storage = make_shared<DataTable>(context, *storage,
4132
+ make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
4105
4133
  return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
4106
4134
  new_storage);
4107
4135
  }
@@ -4207,12 +4235,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
4207
4235
  auto expression = info.expression->Copy();
4208
4236
  auto bound_expression = expr_binder.Bind(expression);
4209
4237
  auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
4238
+ vector<column_t> storage_oids;
4210
4239
  if (bound_columns.empty()) {
4211
- bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID);
4240
+ storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
4241
+ }
4242
+ // transform to storage_oid
4243
+ else {
4244
+ for (idx_t i = 0; i < bound_columns.size(); i++) {
4245
+ storage_oids.push_back(columns[bound_columns[i]].StorageOid());
4246
+ }
4212
4247
  }
4213
4248
 
4214
- auto new_storage =
4215
- make_shared<DataTable>(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression);
4249
+ auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
4250
+ move(storage_oids), *bound_expression);
4216
4251
  auto result =
4217
4252
  make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
4218
4253
  return move(result);
@@ -4460,7 +4495,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
4460
4495
  }
4461
4496
  }
4462
4497
  D_ASSERT(removed_index != DConstants::INVALID_INDEX);
4463
- storage->CommitDropColumn(removed_index);
4498
+ storage->CommitDropColumn(columns[removed_index].StorageOid());
4464
4499
  }
4465
4500
 
4466
4501
  void TableCatalogEntry::CommitDrop() {
@@ -5030,11 +5065,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
5030
5065
  throw CatalogException(rename_err_msg, original_name, value->name);
5031
5066
  }
5032
5067
  }
5068
+ }
5069
+
5070
+ if (value->name != original_name) {
5071
+ // Do PutMapping and DeleteMapping after dependency check
5033
5072
  PutMapping(context, value->name, entry_index);
5034
5073
  DeleteMapping(context, original_name);
5035
5074
  }
5036
- //! Check the dependency manager to verify that there are no conflicting dependencies with this alter
5037
- catalog.dependency_manager->AlterObject(context, entry, value.get());
5038
5075
 
5039
5076
  value->timestamp = transaction.transaction_id;
5040
5077
  value->child = move(entries[entry_index]);
@@ -5046,10 +5083,18 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
5046
5083
  alter_info->Serialize(serializer);
5047
5084
  BinaryData serialized_alter = serializer.GetData();
5048
5085
 
5086
+ auto new_entry = value.get();
5087
+
5049
5088
  // push the old entry in the undo buffer for this transaction
5050
5089
  transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size);
5051
5090
  entries[entry_index] = move(value);
5052
5091
 
5092
+ // Check the dependency manager to verify that there are no conflicting dependencies with this alter
5093
+ // Note that we do this AFTER the new entry has been entirely set up in the catalog set
5094
+ // that is because in case the alter fails because of a dependency conflict, we need to be able to cleanly roll back
5095
+ // to the old entry.
5096
+ catalog.dependency_manager->AlterObject(context, entry, new_entry);
5097
+
5053
5098
  return true;
5054
5099
  }
5055
5100
 
@@ -6602,7 +6647,7 @@ static void GetBitPosition(idx_t row_idx, idx_t &current_byte, uint8_t &current_
6602
6647
  }
6603
6648
 
6604
6649
  static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
6605
- data[current_byte] &= ~(1 << current_bit);
6650
+ data[current_byte] &= ~((uint64_t)1 << current_bit);
6606
6651
  }
6607
6652
 
6608
6653
  static void NextBit(idx_t &current_byte, uint8_t &current_bit) {
@@ -9546,6 +9591,8 @@ void Exception::ThrowAsTypeWithMessage(ExceptionType type, const string &message
9546
9591
  throw ParameterNotAllowedException(message);
9547
9592
  case ExceptionType::PARAMETER_NOT_RESOLVED:
9548
9593
  throw ParameterNotResolvedException();
9594
+ case ExceptionType::FATAL:
9595
+ throw FatalException(message);
9549
9596
  default:
9550
9597
  throw Exception(type, message);
9551
9598
  }
@@ -16811,9 +16858,15 @@ string FileSystem::ConvertSeparators(const string &path) {
16811
16858
  }
16812
16859
 
16813
16860
  string FileSystem::ExtractBaseName(const string &path) {
16861
+ if (path.empty()) {
16862
+ return string();
16863
+ }
16814
16864
  auto normalized_path = ConvertSeparators(path);
16815
16865
  auto sep = PathSeparator();
16816
- auto vec = StringUtil::Split(StringUtil::Split(normalized_path, sep).back(), ".");
16866
+ auto splits = StringUtil::Split(normalized_path, sep);
16867
+ D_ASSERT(!splits.empty());
16868
+ auto vec = StringUtil::Split(splits.back(), ".");
16869
+ D_ASSERT(!vec.empty());
16817
16870
  return vec[0];
16818
16871
  }
16819
16872
 
@@ -18888,7 +18941,8 @@ namespace duckdb {
18888
18941
 
18889
18942
  static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18890
18943
  unordered_map<string, column_t> &column_map,
18891
- bool filename_col, bool hive_partition_cols) {
18944
+ duckdb_re2::RE2 &compiled_regex, bool filename_col,
18945
+ bool hive_partition_cols) {
18892
18946
  unordered_map<column_t, string> result;
18893
18947
 
18894
18948
  if (filename_col) {
@@ -18899,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18899
18953
  }
18900
18954
 
18901
18955
  if (hive_partition_cols) {
18902
- auto partitions = HivePartitioning::Parse(filename);
18956
+ auto partitions = HivePartitioning::Parse(filename, compiled_regex);
18903
18957
  for (auto &partition : partitions) {
18904
18958
  auto lookup_column_id = column_map.find(partition.first);
18905
18959
  if (lookup_column_id != column_map.end()) {
@@ -18937,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
18937
18991
  // - s3://bucket/var1=value1/bla/bla/var2=value2
18938
18992
  // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
18939
18993
  // - folder/folder/folder/../var1=value1/etc/.//var2=value2
18940
- std::map<string, string> HivePartitioning::Parse(string &filename) {
18941
- std::map<string, string> result;
18994
+ const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18942
18995
 
18943
- string regex = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18996
+ std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 &regex) {
18997
+ std::map<string, string> result;
18944
18998
  duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
18945
18999
 
18946
19000
  string var;
@@ -18951,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
18951
19005
  return result;
18952
19006
  }
18953
19007
 
19008
+ std::map<string, string> HivePartitioning::Parse(string &filename) {
19009
+ duckdb_re2::RE2 regex(REGEX_STRING);
19010
+ return Parse(filename, regex);
19011
+ }
19012
+
18954
19013
  // TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
18955
19014
  // currently, only expressions that cannot be evaluated during pushdown are removed.
18956
19015
  void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
@@ -18958,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
18958
19017
  bool hive_enabled, bool filename_enabled) {
18959
19018
  vector<string> pruned_files;
18960
19019
  vector<unique_ptr<Expression>> pruned_filters;
19020
+ duckdb_re2::RE2 regex(REGEX_STRING);
18961
19021
 
18962
19022
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
18963
19023
  return;
@@ -18966,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
18966
19026
  for (idx_t i = 0; i < files.size(); i++) {
18967
19027
  auto &file = files[i];
18968
19028
  bool should_prune_file = false;
18969
- auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
19029
+ auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
18970
19030
 
18971
19031
  FilterCombiner combiner;
18972
19032
  for (auto &filter : filters) {
@@ -19198,6 +19258,8 @@ private:
19198
19258
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
19199
19259
  void SetFilePointer(FileHandle &handle, idx_t location);
19200
19260
  idx_t GetFilePointer(FileHandle &handle);
19261
+
19262
+ vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
19201
19263
  };
19202
19264
 
19203
19265
  } // namespace duckdb
@@ -20079,6 +20141,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
20079
20141
  });
20080
20142
  }
20081
20143
 
20144
+ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
20145
+ vector<string> result;
20146
+ if (FileExists(path) || IsPipe(path)) {
20147
+ result.push_back(path);
20148
+ } else if (!absolute_path) {
20149
+ Value value;
20150
+ if (opener->TryGetCurrentSetting("file_search_path", value)) {
20151
+ auto search_paths_str = value.ToString();
20152
+ std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20153
+ for (const auto &search_path : search_paths) {
20154
+ auto joined_path = JoinPath(search_path, path);
20155
+ if (FileExists(joined_path) || IsPipe(joined_path)) {
20156
+ result.push_back(joined_path);
20157
+ }
20158
+ }
20159
+ }
20160
+ }
20161
+ return result;
20162
+ }
20163
+
20082
20164
  vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20083
20165
  if (path.empty()) {
20084
20166
  return vector<string>();
@@ -20125,23 +20207,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20125
20207
  // Check if the path has a glob at all
20126
20208
  if (!HasGlob(path)) {
20127
20209
  // no glob: return only the file (if it exists or is a pipe)
20128
- vector<string> result;
20129
- if (FileExists(path) || IsPipe(path)) {
20130
- result.push_back(path);
20131
- } else if (!absolute_path) {
20132
- Value value;
20133
- if (opener->TryGetCurrentSetting("file_search_path", value)) {
20134
- auto search_paths_str = value.ToString();
20135
- std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20136
- for (const auto &search_path : search_paths) {
20137
- auto joined_path = JoinPath(search_path, path);
20138
- if (FileExists(joined_path) || IsPipe(joined_path)) {
20139
- result.push_back(joined_path);
20140
- }
20141
- }
20142
- }
20143
- }
20144
- return result;
20210
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20145
20211
  }
20146
20212
  vector<string> previous_directories;
20147
20213
  if (absolute_path) {
@@ -20175,7 +20241,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20175
20241
  }
20176
20242
  }
20177
20243
  }
20178
- if (is_last_chunk || result.empty()) {
20244
+ if (result.empty()) {
20245
+ // no result found that matches the glob
20246
+ // last ditch effort: search the path as a string literal
20247
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20248
+ }
20249
+ if (is_last_chunk) {
20179
20250
  return result;
20180
20251
  }
20181
20252
  previous_directories = move(result);
@@ -22524,14 +22595,16 @@ struct IntervalToStringCast {
22524
22595
  if (micros < 0) {
22525
22596
  // negative time: append negative sign
22526
22597
  buffer[length++] = '-';
22598
+ } else {
22527
22599
  micros = -micros;
22528
22600
  }
22529
- int64_t hour = micros / Interval::MICROS_PER_HOUR;
22530
- micros -= hour * Interval::MICROS_PER_HOUR;
22531
- int64_t min = micros / Interval::MICROS_PER_MINUTE;
22532
- micros -= min * Interval::MICROS_PER_MINUTE;
22533
- int64_t sec = micros / Interval::MICROS_PER_SEC;
22534
- micros -= sec * Interval::MICROS_PER_SEC;
22601
+ int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
22602
+ micros += hour * Interval::MICROS_PER_HOUR;
22603
+ int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
22604
+ micros += min * Interval::MICROS_PER_MINUTE;
22605
+ int64_t sec = -(micros / Interval::MICROS_PER_SEC);
22606
+ micros += sec * Interval::MICROS_PER_SEC;
22607
+ micros = -micros;
22535
22608
 
22536
22609
  if (hour < 10) {
22537
22610
  buffer[length++] = '0';
@@ -28654,7 +28727,7 @@ template <idx_t radix_bits>
28654
28727
  struct RadixPartitioningConstants {
28655
28728
  public:
28656
28729
  static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
28657
- static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
28730
+ static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
28658
28731
  static constexpr const idx_t TMP_BUF_SIZE = 8;
28659
28732
 
28660
28733
  public:
@@ -28672,7 +28745,7 @@ private:
28672
28745
  struct RadixPartitioning {
28673
28746
  public:
28674
28747
  static idx_t NumberOfPartitions(idx_t radix_bits) {
28675
- return 1 << radix_bits;
28748
+ return (idx_t)1 << radix_bits;
28676
28749
  }
28677
28750
 
28678
28751
  //! Partition the data in block_collection/string_heap to multiple partitions
@@ -39531,7 +39604,7 @@ public:
39531
39604
  namespace duckdb {
39532
39605
 
39533
39606
  enum class UnicodeType { INVALID, ASCII, UNICODE };
39534
- enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
39607
+ enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
39535
39608
 
39536
39609
  class Utf8Proc {
39537
39610
  public:
@@ -50360,6 +50433,24 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {
50360
50433
  StringVector::AddBuffer(vector, other.auxiliary);
50361
50434
  }
50362
50435
 
50436
+ Vector &MapVector::GetKeys(Vector &vector) {
50437
+ auto &entries = StructVector::GetEntries(vector);
50438
+ D_ASSERT(entries.size() == 2);
50439
+ return *entries[0];
50440
+ }
50441
+ Vector &MapVector::GetValues(Vector &vector) {
50442
+ auto &entries = StructVector::GetEntries(vector);
50443
+ D_ASSERT(entries.size() == 2);
50444
+ return *entries[1];
50445
+ }
50446
+
50447
+ const Vector &MapVector::GetKeys(const Vector &vector) {
50448
+ return GetKeys((Vector &)vector);
50449
+ }
50450
+ const Vector &MapVector::GetValues(const Vector &vector) {
50451
+ return GetValues((Vector &)vector);
50452
+ }
50453
+
50363
50454
  vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
50364
50455
  D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
50365
50456
  if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
@@ -63217,6 +63308,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
63217
63308
 
63218
63309
 
63219
63310
 
63311
+ //===----------------------------------------------------------------------===//
63312
+ // DuckDB
63313
+ //
63314
+ // duckdb/parallel/base_pipeline_event.hpp
63315
+ //
63316
+ //
63317
+ //===----------------------------------------------------------------------===//
63318
+
63319
+
63320
+
63220
63321
  //===----------------------------------------------------------------------===//
63221
63322
  // DuckDB
63222
63323
  //
@@ -63290,6 +63391,22 @@ protected:
63290
63391
 
63291
63392
 
63292
63393
 
63394
+ namespace duckdb {
63395
+
63396
+ //! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
63397
+ class BasePipelineEvent : public Event {
63398
+ public:
63399
+ BasePipelineEvent(shared_ptr<Pipeline> pipeline);
63400
+ BasePipelineEvent(Pipeline &pipeline);
63401
+
63402
+ //! The pipeline that this event belongs to
63403
+ shared_ptr<Pipeline> pipeline;
63404
+ };
63405
+
63406
+ } // namespace duckdb
63407
+
63408
+
63409
+
63293
63410
  namespace duckdb {
63294
63411
 
63295
63412
  PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
@@ -63446,16 +63563,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
63446
63563
  }
63447
63564
  }
63448
63565
 
63449
- class HashAggregateFinalizeEvent : public Event {
63566
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
63450
63567
  public:
63451
63568
  HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
63452
63569
  Pipeline *pipeline_p)
63453
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p) {
63570
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
63454
63571
  }
63455
63572
 
63456
63573
  const PhysicalHashAggregate &op;
63457
63574
  HashAggregateGlobalState &gstate;
63458
- Pipeline *pipeline;
63459
63575
 
63460
63576
  public:
63461
63577
  void Schedule() override {
@@ -64717,15 +64833,14 @@ private:
64717
64833
  };
64718
64834
 
64719
64835
  // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
64720
- class DistinctAggregateFinalizeEvent : public Event {
64836
+ class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
64721
64837
  public:
64722
64838
  DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64723
- Pipeline *pipeline_p, ClientContext &context)
64724
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), context(context) {
64839
+ Pipeline &pipeline_p, ClientContext &context)
64840
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
64725
64841
  }
64726
64842
  const PhysicalUngroupedAggregate &op;
64727
64843
  UngroupedAggregateGlobalState &gstate;
64728
- Pipeline *pipeline;
64729
64844
  ClientContext &context;
64730
64845
 
64731
64846
  public:
@@ -64738,16 +64853,15 @@ public:
64738
64853
  }
64739
64854
  };
64740
64855
 
64741
- class DistinctCombineFinalizeEvent : public Event {
64856
+ class DistinctCombineFinalizeEvent : public BasePipelineEvent {
64742
64857
  public:
64743
64858
  DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
64744
- Pipeline *pipeline_p, ClientContext &client)
64745
- : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), client(client) {
64859
+ Pipeline &pipeline_p, ClientContext &client)
64860
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
64746
64861
  }
64747
64862
 
64748
64863
  const PhysicalUngroupedAggregate &op;
64749
64864
  UngroupedAggregateGlobalState &gstate;
64750
- Pipeline *pipeline;
64751
64865
  ClientContext &client;
64752
64866
 
64753
64867
  public:
@@ -64763,7 +64877,7 @@ public:
64763
64877
  SetTasks(move(tasks));
64764
64878
 
64765
64879
  //! Now that all tables are combined, it's time to do the distinct aggregations
64766
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
64880
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
64767
64881
  this->InsertEvent(move(new_event));
64768
64882
  }
64769
64883
  };
@@ -64792,12 +64906,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
64792
64906
  }
64793
64907
  }
64794
64908
  if (any_partitioned) {
64795
- auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, &pipeline, context);
64909
+ auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
64796
64910
  event.InsertEvent(move(new_event));
64797
64911
  } else {
64798
64912
  //! Hashtables aren't partitioned, they dont need to be joined first
64799
64913
  //! So we can compute the aggregate already
64800
- auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, &pipeline, context);
64914
+ auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
64801
64915
  event.InsertEvent(move(new_event));
64802
64916
  }
64803
64917
  return SinkFinalizeType::READY;
@@ -66543,19 +66657,18 @@ private:
66543
66657
  WindowGlobalHashGroup &hash_group;
66544
66658
  };
66545
66659
 
66546
- class WindowMergeEvent : public Event {
66660
+ class WindowMergeEvent : public BasePipelineEvent {
66547
66661
  public:
66548
66662
  WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66549
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p), hash_group(hash_group_p) {
66663
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66550
66664
  }
66551
66665
 
66552
66666
  WindowGlobalSinkState &gstate;
66553
- Pipeline &pipeline;
66554
66667
  WindowGlobalHashGroup &hash_group;
66555
66668
 
66556
66669
  public:
66557
66670
  void Schedule() override {
66558
- auto &context = pipeline.GetClientContext();
66671
+ auto &context = pipeline->GetClientContext();
66559
66672
 
66560
66673
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
66561
66674
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -66570,7 +66683,7 @@ public:
66570
66683
 
66571
66684
  void FinishEvent() override {
66572
66685
  hash_group.global_sort->CompleteMergeRound(true);
66573
- CreateMergeTasks(pipeline, *this, gstate, hash_group);
66686
+ CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66574
66687
  }
66575
66688
 
66576
66689
  static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
@@ -67979,6 +68092,11 @@ public:
67979
68092
 
67980
68093
  private:
67981
68094
  static const vector<string> PathComponents();
68095
+ //! For tagged releases we use the tag, else we use the git commit hash
68096
+ static const string GetVersionDirectoryName();
68097
+ //! Version tags occur with and without 'v', tag in extension path is always with 'v'
68098
+ static const string NormalizeVersionTag(const string &version_tag);
68099
+ static bool IsRelease(const string &version_tag);
67982
68100
 
67983
68101
  private:
67984
68102
  static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load);
@@ -70905,18 +71023,17 @@ private:
70905
71023
  bool parallel;
70906
71024
  };
70907
71025
 
70908
- class HashJoinFinalizeEvent : public Event {
71026
+ class HashJoinFinalizeEvent : public BasePipelineEvent {
70909
71027
  public:
70910
71028
  HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
70911
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink) {
71029
+ : BasePipelineEvent(pipeline_p), sink(sink) {
70912
71030
  }
70913
71031
 
70914
- Pipeline &pipeline;
70915
71032
  HashJoinGlobalSinkState &sink;
70916
71033
 
70917
71034
  public:
70918
71035
  void Schedule() override {
70919
- auto &context = pipeline.GetClientContext();
71036
+ auto &context = pipeline->GetClientContext();
70920
71037
  auto parallel_construct_count =
70921
71038
  context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
70922
71039
 
@@ -70983,20 +71100,19 @@ private:
70983
71100
  JoinHashTable &local_ht;
70984
71101
  };
70985
71102
 
70986
- class HashJoinPartitionEvent : public Event {
71103
+ class HashJoinPartitionEvent : public BasePipelineEvent {
70987
71104
  public:
70988
71105
  HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
70989
71106
  vector<unique_ptr<JoinHashTable>> &local_hts)
70990
- : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink), local_hts(local_hts) {
71107
+ : BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
70991
71108
  }
70992
71109
 
70993
- Pipeline &pipeline;
70994
71110
  HashJoinGlobalSinkState &sink;
70995
71111
  vector<unique_ptr<JoinHashTable>> &local_hts;
70996
71112
 
70997
71113
  public:
70998
71114
  void Schedule() override {
70999
- auto &context = pipeline.GetClientContext();
71115
+ auto &context = pipeline->GetClientContext();
71000
71116
  vector<unique_ptr<Task>> partition_tasks;
71001
71117
  partition_tasks.reserve(local_hts.size());
71002
71118
  for (auto &local_ht : local_hts) {
@@ -71009,7 +71125,7 @@ public:
71009
71125
  void FinishEvent() override {
71010
71126
  local_hts.clear();
71011
71127
  sink.hash_table->PrepareExternalFinalize();
71012
- sink.ScheduleFinalize(pipeline, *this);
71128
+ sink.ScheduleFinalize(*pipeline, *this);
71013
71129
  }
71014
71130
  };
71015
71131
 
@@ -74713,21 +74829,20 @@ private:
74713
74829
  GlobalSortedTable &table;
74714
74830
  };
74715
74831
 
74716
- class RangeJoinMergeEvent : public Event {
74832
+ class RangeJoinMergeEvent : public BasePipelineEvent {
74717
74833
  public:
74718
74834
  using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
74719
74835
 
74720
74836
  public:
74721
74837
  RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
74722
- : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
74838
+ : BasePipelineEvent(pipeline_p), table(table_p) {
74723
74839
  }
74724
74840
 
74725
74841
  GlobalSortedTable &table;
74726
- Pipeline &pipeline;
74727
74842
 
74728
74843
  public:
74729
74844
  void Schedule() override {
74730
- auto &context = pipeline.GetClientContext();
74845
+ auto &context = pipeline->GetClientContext();
74731
74846
 
74732
74847
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
74733
74848
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -74746,7 +74861,7 @@ public:
74746
74861
  global_sort_state.CompleteMergeRound(true);
74747
74862
  if (global_sort_state.sorted_blocks.size() > 1) {
74748
74863
  // Multiple blocks remaining: Schedule the next round
74749
- table.ScheduleMergeTasks(pipeline, *this);
74864
+ table.ScheduleMergeTasks(*pipeline, *this);
74750
74865
  }
74751
74866
  }
74752
74867
  };
@@ -75134,18 +75249,17 @@ private:
75134
75249
  OrderGlobalState &state;
75135
75250
  };
75136
75251
 
75137
- class OrderMergeEvent : public Event {
75252
+ class OrderMergeEvent : public BasePipelineEvent {
75138
75253
  public:
75139
75254
  OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
75140
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
75255
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p) {
75141
75256
  }
75142
75257
 
75143
75258
  OrderGlobalState &gstate;
75144
- Pipeline &pipeline;
75145
75259
 
75146
75260
  public:
75147
75261
  void Schedule() override {
75148
- auto &context = pipeline.GetClientContext();
75262
+ auto &context = pipeline->GetClientContext();
75149
75263
 
75150
75264
  // Schedule tasks equal to the number of threads, which will each merge multiple partitions
75151
75265
  auto &ts = TaskScheduler::GetScheduler(context);
@@ -75164,7 +75278,7 @@ public:
75164
75278
  global_sort_state.CompleteMergeRound();
75165
75279
  if (global_sort_state.sorted_blocks.size() > 1) {
75166
75280
  // Multiple blocks remaining: Schedule the next round
75167
- PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
75281
+ PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
75168
75282
  }
75169
75283
  }
75170
75284
  };
@@ -80064,10 +80178,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
80064
80178
  return;
80065
80179
  }
80066
80180
 
80181
+ // convert virtual column ids to storage column ids
80182
+ vector<column_t> storage_ids;
80183
+ for (auto &column_id : column_ids) {
80184
+ D_ASSERT(column_id < table.columns.size());
80185
+ storage_ids.push_back(table.columns[column_id].StorageOid());
80186
+ }
80187
+
80067
80188
  unique_ptr<Index> index;
80068
80189
  switch (info->index_type) {
80069
80190
  case IndexType::ART: {
80070
- index = make_unique<ART>(column_ids, unbound_expressions, info->constraint_type, *context.client.db);
80191
+ index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
80071
80192
  break;
80072
80193
  }
80073
80194
  default:
@@ -80372,11 +80493,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
80372
80493
  SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
80373
80494
  DataChunk &input) const {
80374
80495
  auto &sink = (CreateTableAsGlobalState &)state;
80375
- if (sink.table) {
80376
- lock_guard<mutex> client_guard(sink.append_lock);
80377
- sink.table->storage->Append(*sink.table, context.client, input);
80378
- sink.inserted_count += input.size();
80379
- }
80496
+ D_ASSERT(sink.table);
80497
+ lock_guard<mutex> client_guard(sink.append_lock);
80498
+ sink.table->storage->Append(*sink.table, context.client, input);
80499
+ sink.inserted_count += input.size();
80380
80500
  return SinkResultType::NEED_MORE_INPUT;
80381
80501
  }
80382
80502
 
@@ -80786,6 +80906,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
80786
80906
  void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
80787
80907
  op_state.reset();
80788
80908
  sink_state.reset();
80909
+ pipelines.clear();
80789
80910
 
80790
80911
  // recursive CTE
80791
80912
  state.SetPipelineSource(current, this);
@@ -81085,7 +81206,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
81085
81206
  total_required_bits += group_bits;
81086
81207
  }
81087
81208
  // the total amount of groups we allocate space for is 2^required_bits
81088
- total_groups = 1 << total_required_bits;
81209
+ total_groups = (uint64_t)1 << total_required_bits;
81089
81210
  // we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
81090
81211
  grouping_columns = group_types_p.size();
81091
81212
  layout.Initialize(move(aggregate_objects_p));
@@ -81269,7 +81390,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
81269
81390
  static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
81270
81391
  idx_t entry_count, Vector &result) {
81271
81392
  // construct the mask for this entry
81272
- idx_t mask = (1 << required_bits) - 1;
81393
+ idx_t mask = ((uint64_t)1 << required_bits) - 1;
81273
81394
  switch (result.GetType().InternalType()) {
81274
81395
  case PhysicalType::INT8:
81275
81396
  ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
@@ -85516,7 +85637,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
85516
85637
  for (idx_t i = 0; i < grouping.size(); i++) {
85517
85638
  if (grouping_set.find(grouping[i]) == grouping_set.end()) {
85518
85639
  // we don't group on this value!
85519
- grouping_value += 1 << (grouping.size() - (i + 1));
85640
+ grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
85520
85641
  }
85521
85642
  }
85522
85643
  grouping_values.push_back(Value::BIGINT(grouping_value));
@@ -91074,7 +91195,21 @@ struct ModeIncluded {
91074
91195
  const idx_t bias;
91075
91196
  };
91076
91197
 
91077
- template <typename KEY_TYPE>
91198
+ struct ModeAssignmentStandard {
91199
+ template <class INPUT_TYPE, class RESULT_TYPE>
91200
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91201
+ return RESULT_TYPE(input);
91202
+ }
91203
+ };
91204
+
91205
+ struct ModeAssignmentString {
91206
+ template <class INPUT_TYPE, class RESULT_TYPE>
91207
+ static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
91208
+ return StringVector::AddString(result, input);
91209
+ }
91210
+ };
91211
+
91212
+ template <typename KEY_TYPE, typename ASSIGN_OP>
91078
91213
  struct ModeFunction {
91079
91214
  template <class STATE>
91080
91215
  static void Initialize(STATE *state) {
@@ -91187,7 +91322,7 @@ struct ModeFunction {
91187
91322
  }
91188
91323
 
91189
91324
  if (state->valid) {
91190
- rdata[rid] = RESULT_TYPE(*state->mode);
91325
+ rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
91191
91326
  } else {
91192
91327
  rmask.Set(rid, false);
91193
91328
  }
@@ -91203,10 +91338,10 @@ struct ModeFunction {
91203
91338
  }
91204
91339
  };
91205
91340
 
91206
- template <typename INPUT_TYPE, typename KEY_TYPE>
91341
+ template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
91207
91342
  AggregateFunction GetTypedModeFunction(const LogicalType &type) {
91208
91343
  using STATE = ModeState<KEY_TYPE>;
91209
- using OP = ModeFunction<KEY_TYPE>;
91344
+ using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
91210
91345
  auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
91211
91346
  func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
91212
91347
  return func;
@@ -91242,7 +91377,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
91242
91377
  return GetTypedModeFunction<interval_t, interval_t>(type);
91243
91378
 
91244
91379
  case PhysicalType::VARCHAR:
91245
- return GetTypedModeFunction<string_t, string>(type);
91380
+ return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
91246
91381
 
91247
91382
  default:
91248
91383
  throw NotImplementedException("Unimplemented mode aggregate");
@@ -105407,16 +105542,21 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
105407
105542
  auto &map = args.data[0];
105408
105543
  auto &key = args.data[1];
105409
105544
 
105410
- UnifiedVectorFormat offset_data;
105545
+ UnifiedVectorFormat map_keys_data;
105546
+ UnifiedVectorFormat key_data;
105411
105547
 
105412
- auto &children = StructVector::GetEntries(map);
105548
+ auto &map_keys = MapVector::GetKeys(map);
105549
+ auto &map_values = MapVector::GetValues(map);
105550
+
105551
+ map_keys.ToUnifiedFormat(args.size(), map_keys_data);
105552
+ key.ToUnifiedFormat(args.size(), key_data);
105413
105553
 
105414
- children[0]->ToUnifiedFormat(args.size(), offset_data);
105415
105554
  for (idx_t row = 0; row < args.size(); row++) {
105416
- idx_t row_index = offset_data.sel->get_index(row);
105417
- auto key_value = key.GetValue(row_index);
105418
- auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
105419
- auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
105555
+ idx_t row_index = map_keys_data.sel->get_index(row);
105556
+ idx_t key_index = key_data.sel->get_index(row);
105557
+ auto key_value = key.GetValue(key_index);
105558
+ auto offsets = ListVector::Search(map_keys, key_value, row_index);
105559
+ auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
105420
105560
  FillResult(values, result, row);
105421
105561
  }
105422
105562
 
@@ -108311,6 +108451,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
108311
108451
  return left;
108312
108452
  }
108313
108453
 
108454
+ struct BinaryNumericDivideWrapper {
108455
+ template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108456
+ static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
108457
+ if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
108458
+ throw OutOfRangeException("Overflow in division of %d / %d", left, right);
108459
+ } else if (right == 0) {
108460
+ mask.SetInvalid(idx);
108461
+ return left;
108462
+ } else {
108463
+ return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
108464
+ }
108465
+ }
108466
+
108467
+ static bool AddsNulls() {
108468
+ return true;
108469
+ }
108470
+ };
108471
+
108314
108472
  struct BinaryZeroIsNullWrapper {
108315
108473
  template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
108316
108474
  static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
@@ -108352,13 +108510,13 @@ template <class OP>
108352
108510
  static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
108353
108511
  switch (type.id()) {
108354
108512
  case LogicalTypeId::TINYINT:
108355
- return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
108513
+ return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
108356
108514
  case LogicalTypeId::SMALLINT:
108357
- return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
108515
+ return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
108358
108516
  case LogicalTypeId::INTEGER:
108359
- return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
108517
+ return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
108360
108518
  case LogicalTypeId::BIGINT:
108361
- return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
108519
+ return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
108362
108520
  case LogicalTypeId::UTINYINT:
108363
108521
  return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
108364
108522
  case LogicalTypeId::USMALLINT:
@@ -114806,11 +114964,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
114806
114964
 
114807
114965
  // current_schemas
114808
114966
  static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
114967
+ if (!input.AllConstant()) {
114968
+ throw NotImplementedException("current_schemas requires a constant input");
114969
+ }
114970
+ if (ConstantVector::IsNull(input.data[0])) {
114971
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
114972
+ ConstantVector::SetNull(result, true);
114973
+ return;
114974
+ }
114975
+ auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
114809
114976
  vector<Value> schema_list;
114810
- vector<string> search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path->Get();
114977
+ auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
114978
+ vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
114811
114979
  std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
114812
114980
  [](const string &s) -> Value { return Value(s); });
114813
- auto val = Value::LIST(schema_list);
114981
+
114982
+ auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
114814
114983
  result.Reference(val);
114815
114984
  }
114816
114985
 
@@ -115109,8 +115278,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
115109
115278
  struct ArrowScanGlobalState : public GlobalTableFunctionState {
115110
115279
  unique_ptr<ArrowArrayStreamWrapper> stream;
115111
115280
  mutex main_mutex;
115112
- bool ready = false;
115113
115281
  idx_t max_threads = 1;
115282
+ bool done = false;
115114
115283
 
115115
115284
  idx_t MaxThreads() const override {
115116
115285
  return max_threads;
@@ -115398,6 +115567,9 @@ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const Func
115398
115567
  bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
115399
115568
  ArrowScanGlobalState &parallel_state) {
115400
115569
  lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
115570
+ if (parallel_state.done) {
115571
+ return false;
115572
+ }
115401
115573
  state.chunk_offset = 0;
115402
115574
 
115403
115575
  auto current_chunk = parallel_state.stream->GetNextChunk();
@@ -115407,6 +115579,7 @@ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind
115407
115579
  state.chunk = move(current_chunk);
115408
115580
  //! have we run out of chunks? we are done
115409
115581
  if (!state.chunk->arrow_array.release) {
115582
+ parallel_state.done = true;
115410
115583
  return false;
115411
115584
  }
115412
115585
  return true;
@@ -117808,6 +117981,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
117808
117981
  table_function.named_parameters["skip"] = LogicalType::BIGINT;
117809
117982
  table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
117810
117983
  table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
117984
+ table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
117811
117985
  }
117812
117986
 
117813
117987
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -121638,8 +121812,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
121638
121812
  // we don't emit any statistics for tables that have outstanding transaction-local data
121639
121813
  return nullptr;
121640
121814
  }
121641
- auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
121642
- return bind_data.table->storage->GetStatistics(context, storage_idx);
121815
+ return bind_data.table->GetStatistics(context, column_id);
121643
121816
  }
121644
121817
 
121645
121818
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -123211,7 +123384,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
123211
123384
  }
123212
123385
  idx_t entry_idx = row / 64;
123213
123386
  idx_t idx_in_entry = row % 64;
123214
- return validity[entry_idx] & (1 << idx_in_entry);
123387
+ return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
123215
123388
  }
123216
123389
 
123217
123390
  void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
@@ -123228,7 +123401,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
123228
123401
  }
123229
123402
  idx_t entry_idx = row / 64;
123230
123403
  idx_t idx_in_entry = row % 64;
123231
- validity[entry_idx] &= ~(1 << idx_in_entry);
123404
+ validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
123232
123405
  }
123233
123406
 
123234
123407
  void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
@@ -123237,7 +123410,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
123237
123410
  }
123238
123411
  idx_t entry_idx = row / 64;
123239
123412
  idx_t idx_in_entry = row % 64;
123240
- validity[entry_idx] |= 1 << idx_in_entry;
123413
+ validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
123241
123414
  }
123242
123415
 
123243
123416
 
@@ -126420,6 +126593,11 @@ PendingExecutionResult ClientContext::ExecuteTaskInternal(ClientContextLock &loc
126420
126593
  query_progress = active_query->progress_bar->GetCurrentPercentage();
126421
126594
  }
126422
126595
  return result;
126596
+ } catch (FatalException &ex) {
126597
+ // fatal exceptions invalidate the entire database
126598
+ result.SetError(PreservedError(ex));
126599
+ auto &db = DatabaseInstance::GetDatabase(*this);
126600
+ db.Invalidate();
126423
126601
  } catch (const Exception &ex) {
126424
126602
  result.SetError(PreservedError(ex));
126425
126603
  } catch (std::exception &ex) {
@@ -126639,9 +126817,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
126639
126817
  case StatementType::INSERT_STATEMENT:
126640
126818
  case StatementType::DELETE_STATEMENT:
126641
126819
  case StatementType::UPDATE_STATEMENT: {
126642
- auto sql = statement->ToString();
126643
126820
  Parser parser;
126644
- parser.ParseQuery(sql);
126821
+ PreservedError error;
126822
+ try {
126823
+ parser.ParseQuery(statement->ToString());
126824
+ } catch (const Exception &ex) {
126825
+ error = PreservedError(ex);
126826
+ } catch (std::exception &ex) {
126827
+ error = PreservedError(ex);
126828
+ }
126829
+ if (error) {
126830
+ // error in verifying query
126831
+ return make_unique<PendingQueryResult>(error);
126832
+ }
126645
126833
  statement = move(parser.statements[0]);
126646
126834
  break;
126647
126835
  }
@@ -137289,8 +137477,27 @@ namespace duckdb {
137289
137477
  //===--------------------------------------------------------------------===//
137290
137478
  // Install Extension
137291
137479
  //===--------------------------------------------------------------------===//
137480
+ const string ExtensionHelper::NormalizeVersionTag(const string &version_tag) {
137481
+ if (version_tag.length() > 0 && version_tag[0] != 'v') {
137482
+ return "v" + version_tag;
137483
+ }
137484
+ return version_tag;
137485
+ }
137486
+
137487
+ bool ExtensionHelper::IsRelease(const string &version_tag) {
137488
+ return !StringUtil::Contains(version_tag, "-dev");
137489
+ }
137490
+
137491
+ const string ExtensionHelper::GetVersionDirectoryName() {
137492
+ if (IsRelease(DuckDB::LibraryVersion())) {
137493
+ return NormalizeVersionTag(DuckDB::LibraryVersion());
137494
+ } else {
137495
+ return DuckDB::SourceID();
137496
+ }
137497
+ }
137498
+
137292
137499
  const vector<string> ExtensionHelper::PathComponents() {
137293
- return vector<string> {".duckdb", "extensions", DuckDB::SourceID(), DuckDB::Platform()};
137500
+ return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
137294
137501
  }
137295
137502
 
137296
137503
  string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
@@ -137363,7 +137570,7 @@ void ExtensionHelper::InstallExtension(ClientContext &context, const string &ext
137363
137570
  extension_name = "";
137364
137571
  }
137365
137572
 
137366
- auto url = StringUtil::Replace(url_template, "${REVISION}", DuckDB::SourceID());
137573
+ auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
137367
137574
  url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
137368
137575
  url = StringUtil::Replace(url, "${NAME}", extension_name);
137369
137576
 
@@ -142541,9 +142748,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
142541
142748
  // Get HLL stats here
142542
142749
  auto actual_binding = relation_column_to_original_column[key];
142543
142750
 
142544
- // sometimes base stats is null (test_709.test) returns null for base stats while
142545
- // there is still a catalog table. Anybody know anything about this?
142546
- auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
142751
+ auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
142547
142752
  if (base_stats) {
142548
142753
  count = base_stats->GetDistinctCount();
142549
142754
  }
@@ -143239,6 +143444,7 @@ private:
143239
143444
 
143240
143445
 
143241
143446
 
143447
+
143242
143448
  namespace duckdb {
143243
143449
 
143244
143450
  class DeliminatorPlanUpdater : LogicalOperatorVisitor {
@@ -143266,7 +143472,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
143266
143472
  cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
143267
143473
  continue;
143268
143474
  }
143269
- auto &colref = (BoundColumnRefExpression &)*cond.right;
143475
+ Expression *rhs = cond.right.get();
143476
+ while (rhs->type == ExpressionType::OPERATOR_CAST) {
143477
+ auto &cast = (BoundCastExpression &)*rhs;
143478
+ rhs = cast.child.get();
143479
+ }
143480
+ if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
143481
+ throw InternalException("Erorr in deliminator: expected a bound column reference");
143482
+ }
143483
+ auto &colref = (BoundColumnRefExpression &)*rhs;
143270
143484
  if (projection_map.find(colref.binding) != projection_map.end()) {
143271
143485
  // value on the right is a projection of removed DelimGet
143272
143486
  for (idx_t i = 0; i < decs->size(); i++) {
@@ -144414,7 +144628,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
144414
144628
  auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
144415
144629
  idx_t equivalence_set = GetEquivalenceSet(node);
144416
144630
  auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
144417
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144631
+ Value constant_value;
144632
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144633
+ return FilterResult::UNSATISFIABLE;
144634
+ }
144418
144635
  if (constant_value.IsNull()) {
144419
144636
  // comparisons with null are always null (i.e. will never result in rows)
144420
144637
  return FilterResult::UNSATISFIABLE;
@@ -144495,7 +144712,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144495
144712
  }
144496
144713
  if (expr->IsFoldable()) {
144497
144714
  // scalar condition, evaluate it
144498
- auto result = ExpressionExecutor::EvaluateScalar(*expr).CastAs(LogicalType::BOOLEAN);
144715
+ Value result;
144716
+ if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
144717
+ return FilterResult::UNSUPPORTED;
144718
+ }
144719
+ result = result.CastAs(LogicalType::BOOLEAN);
144499
144720
  // check if the filter passes
144500
144721
  if (result.IsNull() || !BooleanValue::Get(result)) {
144501
144722
  // the filter does not pass the scalar test, create an empty result
@@ -144519,7 +144740,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144519
144740
 
144520
144741
  if (lower_is_scalar) {
144521
144742
  auto scalar = comparison.lower.get();
144522
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144743
+ Value constant_value;
144744
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144745
+ return FilterResult::UNSUPPORTED;
144746
+ }
144523
144747
 
144524
144748
  // create the ExpressionValueInformation
144525
144749
  ExpressionValueInformation info;
@@ -144552,7 +144776,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
144552
144776
 
144553
144777
  if (upper_is_scalar) {
144554
144778
  auto scalar = comparison.upper.get();
144555
- auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
144779
+ Value constant_value;
144780
+ if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
144781
+ return FilterResult::UNSUPPORTED;
144782
+ }
144556
144783
 
144557
144784
  // create the ExpressionValueInformation
144558
144785
  ExpressionValueInformation info;
@@ -145464,7 +145691,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
145464
145691
  // IN clause with many children: try to generate a mark join that replaces this IN expression
145465
145692
  // we can only do this if the expressions in the expression list are scalar
145466
145693
  for (idx_t i = 1; i < expr.children.size(); i++) {
145467
- D_ASSERT(expr.children[i]->return_type == in_type);
145468
145694
  if (!expr.children[i]->IsFoldable()) {
145469
145695
  // non-scalar expression
145470
145696
  all_scalar = false;
@@ -147903,21 +148129,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
147903
148129
  FilterPushdown child_pushdown(optimizer);
147904
148130
  for (idx_t i = 0; i < filters.size(); i++) {
147905
148131
  auto &f = *filters[i];
147906
- // check if any aggregate or GROUPING functions are in the set
147907
- if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
147908
- f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
147909
- // no aggregate! we can push this down
147910
- // rewrite any group bindings within the filter
147911
- f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147912
- // add the filter to the child node
147913
- if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147914
- // filter statically evaluates to false, strip tree
147915
- return make_unique<LogicalEmptyResult>(move(op));
148132
+ if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
148133
+ // filter on aggregate: cannot pushdown
148134
+ continue;
148135
+ }
148136
+ if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
148137
+ // filter on GROUPINGS function: cannot pushdown
148138
+ continue;
148139
+ }
148140
+ // if there are any empty grouping sets, we cannot push down filters
148141
+ bool has_empty_grouping_sets = false;
148142
+ for (auto &grp : aggr.grouping_sets) {
148143
+ if (grp.empty()) {
148144
+ has_empty_grouping_sets = true;
147916
148145
  }
147917
- // erase the filter from here
147918
- filters.erase(filters.begin() + i);
147919
- i--;
147920
148146
  }
148147
+ if (has_empty_grouping_sets) {
148148
+ continue;
148149
+ }
148150
+ // no aggregate! we can push this down
148151
+ // rewrite any group bindings within the filter
148152
+ f.filter = ReplaceGroupBindings(aggr, move(f.filter));
148153
+ // add the filter to the child node
148154
+ if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
148155
+ // filter statically evaluates to false, strip tree
148156
+ return make_unique<LogicalEmptyResult>(move(op));
148157
+ }
148158
+ // erase the filter from here
148159
+ filters.erase(filters.begin() + i);
148160
+ i--;
147921
148161
  }
147922
148162
  child_pushdown.GenerateFilters();
147923
148163
 
@@ -152623,6 +152863,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
152623
152863
  } // namespace duckdb
152624
152864
 
152625
152865
 
152866
+ namespace duckdb {
152867
+
152868
+ BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
152869
+ : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
152870
+ }
152871
+
152872
+ BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
152873
+ : Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
152874
+ }
152875
+
152876
+ } // namespace duckdb
152877
+
152878
+
152626
152879
 
152627
152880
 
152628
152881
 
@@ -152742,16 +152995,13 @@ public:
152742
152995
 
152743
152996
 
152744
152997
 
152745
-
152746
152998
  namespace duckdb {
152747
152999
 
152748
- class PipelineEvent : public Event {
153000
+ //! A PipelineEvent is responsible for scheduling a pipeline
153001
+ class PipelineEvent : public BasePipelineEvent {
152749
153002
  public:
152750
153003
  PipelineEvent(shared_ptr<Pipeline> pipeline);
152751
153004
 
152752
- //! The pipeline that this event belongs to
152753
- shared_ptr<Pipeline> pipeline;
152754
-
152755
153005
  public:
152756
153006
  void Schedule() override;
152757
153007
  void FinishEvent() override;
@@ -152879,17 +153129,13 @@ private:
152879
153129
 
152880
153130
 
152881
153131
 
152882
-
152883
153132
  namespace duckdb {
152884
153133
  class Executor;
152885
153134
 
152886
- class PipelineFinishEvent : public Event {
153135
+ class PipelineFinishEvent : public BasePipelineEvent {
152887
153136
  public:
152888
153137
  PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
152889
153138
 
152890
- //! The pipeline that this event belongs to
152891
- shared_ptr<Pipeline> pipeline;
152892
-
152893
153139
  public:
152894
153140
  void Schedule() override;
152895
153141
  void FinishEvent() override;
@@ -152916,6 +153162,9 @@ Executor &Executor::Get(ClientContext &context) {
152916
153162
 
152917
153163
  void Executor::AddEvent(shared_ptr<Event> event) {
152918
153164
  lock_guard<mutex> elock(executor_lock);
153165
+ if (cancelled) {
153166
+ return;
153167
+ }
152919
153168
  events.push_back(move(event));
152920
153169
  }
152921
153170
 
@@ -153219,6 +153468,7 @@ void Executor::CancelTasks() {
153219
153468
  vector<weak_ptr<Pipeline>> weak_references;
153220
153469
  {
153221
153470
  lock_guard<mutex> elock(executor_lock);
153471
+ cancelled = true;
153222
153472
  weak_references.reserve(pipelines.size());
153223
153473
  for (auto &pipeline : pipelines) {
153224
153474
  weak_references.push_back(weak_ptr<Pipeline>(pipeline));
@@ -153295,10 +153545,10 @@ PendingExecutionResult Executor::ExecuteTask() {
153295
153545
  lock_guard<mutex> elock(executor_lock);
153296
153546
  pipelines.clear();
153297
153547
  NextExecutor();
153298
- if (!exceptions.empty()) { // LCOV_EXCL_START
153548
+ if (HasError()) { // LCOV_EXCL_START
153299
153549
  // an exception has occurred executing one of the pipelines
153300
153550
  execution_result = PendingExecutionResult::EXECUTION_ERROR;
153301
- ThrowExceptionInternal();
153551
+ ThrowException();
153302
153552
  } // LCOV_EXCL_STOP
153303
153553
  execution_result = PendingExecutionResult::RESULT_READY;
153304
153554
  return execution_result;
@@ -153307,6 +153557,7 @@ PendingExecutionResult Executor::ExecuteTask() {
153307
153557
  void Executor::Reset() {
153308
153558
  lock_guard<mutex> elock(executor_lock);
153309
153559
  physical_plan = nullptr;
153560
+ cancelled = false;
153310
153561
  owned_plan.reset();
153311
153562
  root_executor.reset();
153312
153563
  root_pipelines.clear();
@@ -153343,7 +153594,7 @@ vector<LogicalType> Executor::GetTypes() {
153343
153594
  }
153344
153595
 
153345
153596
  void Executor::PushError(PreservedError exception) {
153346
- lock_guard<mutex> elock(executor_lock);
153597
+ lock_guard<mutex> elock(error_lock);
153347
153598
  // interrupt execution of any other pipelines that belong to this executor
153348
153599
  context.interrupted = true;
153349
153600
  // push the exception onto the stack
@@ -153351,20 +153602,16 @@ void Executor::PushError(PreservedError exception) {
153351
153602
  }
153352
153603
 
153353
153604
  bool Executor::HasError() {
153354
- lock_guard<mutex> elock(executor_lock);
153605
+ lock_guard<mutex> elock(error_lock);
153355
153606
  return !exceptions.empty();
153356
153607
  }
153357
153608
 
153358
153609
  void Executor::ThrowException() {
153359
- lock_guard<mutex> elock(executor_lock);
153360
- ThrowExceptionInternal();
153361
- }
153362
-
153363
- void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
153610
+ lock_guard<mutex> elock(error_lock);
153364
153611
  D_ASSERT(!exceptions.empty());
153365
153612
  auto &entry = exceptions[0];
153366
153613
  entry.Throw();
153367
- } // LCOV_EXCL_STOP
153614
+ }
153368
153615
 
153369
153616
  void Executor::Flush(ThreadContext &tcontext) {
153370
153617
  profiler->Flush(tcontext.profiler);
@@ -153629,6 +153876,9 @@ void Pipeline::Ready() {
153629
153876
  }
153630
153877
 
153631
153878
  void Pipeline::Finalize(Event &event) {
153879
+ if (executor.HasError()) {
153880
+ return;
153881
+ }
153632
153882
  D_ASSERT(ready);
153633
153883
  try {
153634
153884
  auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
@@ -153739,16 +153989,25 @@ void PipelineCompleteEvent::FinalizeFinish() {
153739
153989
  } // namespace duckdb
153740
153990
 
153741
153991
 
153992
+
153742
153993
  namespace duckdb {
153743
153994
 
153744
- PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
153745
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
153995
+ PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
153746
153996
  }
153747
153997
 
153748
153998
  void PipelineEvent::Schedule() {
153749
153999
  auto event = shared_from_this();
153750
- pipeline->Schedule(event);
153751
- D_ASSERT(total_tasks > 0);
154000
+ auto &executor = pipeline->executor;
154001
+ try {
154002
+ pipeline->Schedule(event);
154003
+ D_ASSERT(total_tasks > 0);
154004
+ } catch (Exception &ex) {
154005
+ executor.PushError(PreservedError(ex));
154006
+ } catch (std::exception &ex) {
154007
+ executor.PushError(PreservedError(ex));
154008
+ } catch (...) { // LCOV_EXCL_START
154009
+ executor.PushError(PreservedError("Unknown exception in Finalize!"));
154010
+ } // LCOV_EXCL_STOP
153752
154011
  }
153753
154012
 
153754
154013
  void PipelineEvent::FinishEvent() {
@@ -154131,8 +154390,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
154131
154390
 
154132
154391
  namespace duckdb {
154133
154392
 
154134
- PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
154135
- : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
154393
+ PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
154136
154394
  }
154137
154395
 
154138
154396
  void PipelineFinishEvent::Schedule() {
@@ -167667,7 +167925,7 @@ string QueryNode::ResultModifiersToString() const {
167667
167925
  } else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
167668
167926
  auto &limit_p_modifier = (LimitPercentModifier &)modifier;
167669
167927
  if (limit_p_modifier.limit) {
167670
- result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
167928
+ result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
167671
167929
  }
167672
167930
  if (limit_p_modifier.offset) {
167673
167931
  result += " OFFSET " + limit_p_modifier.offset->ToString();
@@ -175139,6 +175397,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
175139
175397
  // we didn't bind columns, try again in children
175140
175398
  return BindResult(error);
175141
175399
  }
175400
+ } else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
175401
+ return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
175142
175402
  }
175143
175403
  if (!filter_error.empty()) {
175144
175404
  return BindResult(filter_error);
@@ -175146,8 +175406,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
175146
175406
 
175147
175407
  if (aggr.filter) {
175148
175408
  auto &child = (BoundExpression &)*aggr.filter;
175149
- bound_filter = move(child.expr);
175409
+ bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
175150
175410
  }
175411
+
175151
175412
  // all children bound successfully
175152
175413
  // extract the children and types
175153
175414
  vector<LogicalType> types;
@@ -176300,7 +176561,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
176300
176561
  string error =
176301
176562
  MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
176302
176563
  if (!error.empty()) {
176303
- return BindResult(binder.FormatError(*expr->get(), error));
176564
+ throw BinderException(binder.FormatError(*expr->get(), error));
176304
176565
  }
176305
176566
 
176306
176567
  // create a MacroBinding to bind this macro's parameters to its arguments
@@ -177323,10 +177584,13 @@ public:
177323
177584
  public:
177324
177585
  unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
177325
177586
 
177326
- idx_t MaxCount() {
177587
+ idx_t MaxCount() const {
177327
177588
  return max_count;
177328
177589
  }
177329
177590
 
177591
+ bool HasExtraList() const {
177592
+ return extra_list;
177593
+ }
177330
177594
  unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
177331
177595
 
177332
177596
  private:
@@ -177368,6 +177632,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177368
177632
  Value &delimiter_value) {
177369
177633
  auto new_binder = Binder::CreateBinder(context, this, true);
177370
177634
  if (delimiter->HasSubquery()) {
177635
+ if (!order_binder.HasExtraList()) {
177636
+ throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
177637
+ }
177371
177638
  return order_binder.CreateExtraReference(move(delimiter));
177372
177639
  }
177373
177640
  ExpressionBinder expr_binder(*new_binder, context);
@@ -177378,6 +177645,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
177378
177645
  delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
177379
177646
  return nullptr;
177380
177647
  }
177648
+ // move any correlated columns to this binder
177649
+ MoveCorrelatedExpressions(*new_binder);
177381
177650
  return expr;
177382
177651
  }
177383
177652
 
@@ -179981,11 +180250,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179981
180250
  BindDefaultValues(base.columns, result->bound_defaults);
179982
180251
  }
179983
180252
 
180253
+ idx_t regular_column_count = 0;
179984
180254
  // bind collations to detect any unsupported collation errors
179985
180255
  for (auto &column : base.columns) {
179986
180256
  if (column.Generated()) {
179987
180257
  continue;
179988
180258
  }
180259
+ regular_column_count++;
179989
180260
  if (column.Type().id() == LogicalTypeId::VARCHAR) {
179990
180261
  ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
179991
180262
  }
@@ -179997,6 +180268,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
179997
180268
  result->dependencies.insert(type_dependency);
179998
180269
  }
179999
180270
  }
180271
+ if (regular_column_count == 0) {
180272
+ throw BinderException("Creating a table without physical (non-generated) columns is not supported");
180273
+ }
180000
180274
  properties.allow_stream_result = false;
180001
180275
  return result;
180002
180276
  }
@@ -180424,6 +180698,13 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
180424
180698
  info->schema = table->schema->name;
180425
180699
  info->table = table->name;
180426
180700
 
180701
+ // We can not export generated columns
180702
+ for (auto &col : table->columns) {
180703
+ if (!col.Generated()) {
180704
+ info->select_list.push_back(col.GetName());
180705
+ }
180706
+ }
180707
+
180427
180708
  exported_data.table_name = info->table;
180428
180709
  exported_data.schema_name = info->schema;
180429
180710
  exported_data.file_path = info->file_path;
@@ -180669,7 +180950,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
180669
180950
  }
180670
180951
 
180671
180952
  // parse select statement and add to logical plan
180672
- auto root_select = Bind(*stmt.select_statement);
180953
+ auto select_binder = Binder::CreateBinder(context, this);
180954
+ auto root_select = select_binder->Bind(*stmt.select_statement);
180955
+ MoveCorrelatedExpressions(*select_binder);
180956
+
180673
180957
  CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
180674
180958
  table->name.c_str());
180675
180959
 
@@ -181951,6 +182235,18 @@ string Binder::RetrieveUsingBinding(Binder &current_binder, UsingColumnSet *curr
181951
182235
  return binding;
181952
182236
  }
181953
182237
 
182238
+ static vector<string> RemoveDuplicateUsingColumns(const vector<string> &using_columns) {
182239
+ vector<string> result;
182240
+ case_insensitive_set_t handled_columns;
182241
+ for (auto &using_column : using_columns) {
182242
+ if (handled_columns.find(using_column) == handled_columns.end()) {
182243
+ handled_columns.insert(using_column);
182244
+ result.push_back(using_column);
182245
+ }
182246
+ }
182247
+ return result;
182248
+ }
182249
+
181954
182250
  unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
181955
182251
  auto result = make_unique<BoundJoinRef>();
181956
182252
  result->left_binder = Binder::CreateBinder(context, this);
@@ -182020,6 +182316,8 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
182020
182316
  D_ASSERT(!result->condition);
182021
182317
  extra_using_columns = ref.using_columns;
182022
182318
  }
182319
+ extra_using_columns = RemoveDuplicateUsingColumns(extra_using_columns);
182320
+
182023
182321
  if (!extra_using_columns.empty()) {
182024
182322
  vector<UsingColumnSet *> left_using_bindings;
182025
182323
  vector<UsingColumnSet *> right_using_bindings;
@@ -182465,7 +182763,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundEmptyTableRef &ref) {
182465
182763
  namespace duckdb {
182466
182764
 
182467
182765
  unique_ptr<LogicalOperator> Binder::CreatePlan(BoundExpressionListRef &ref) {
182468
- auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(0);
182766
+ auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(GenerateTableIndex());
182469
182767
  // values list, first plan any subqueries in the list
182470
182768
  for (auto &expr_list : ref.values) {
182471
182769
  for (auto &expr : expr_list) {
@@ -185018,7 +185316,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
185018
185316
  case ExpressionClass::COLUMN_REF:
185019
185317
  return BindResult(clause + " cannot contain column names");
185020
185318
  case ExpressionClass::SUBQUERY:
185021
- return BindResult(clause + " cannot contain subqueries");
185319
+ throw BinderException(clause + " cannot contain subqueries");
185022
185320
  case ExpressionClass::DEFAULT:
185023
185321
  return BindResult(clause + " cannot contain DEFAULT clause");
185024
185322
  case ExpressionClass::WINDOW:
@@ -185278,6 +185576,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
185278
185576
  }
185279
185577
 
185280
185578
  unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
185579
+ if (!extra_list) {
185580
+ throw InternalException("CreateExtraReference called without extra_list");
185581
+ }
185281
185582
  auto result = CreateProjectionReference(*expr, extra_list->size());
185282
185583
  extra_list->push_back(move(expr));
185283
185584
  return result;
@@ -189404,6 +189705,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
189404
189705
  case LogicalOperatorType::LOGICAL_ORDER_BY:
189405
189706
  plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
189406
189707
  return plan;
189708
+ case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
189709
+ throw ParserException("Recursive CTEs not supported in correlated subquery");
189710
+ }
189407
189711
  default:
189408
189712
  throw InternalException("Logical operator type \"%s\" for dependent join", LogicalOperatorToString(plan->type));
189409
189713
  }
@@ -191530,7 +191834,7 @@ void CheckpointManager::CreateCheckpoint() {
191530
191834
  wal->Flush();
191531
191835
 
191532
191836
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_HEADER) {
191533
- throw IOException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
191837
+ throw FatalException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
191534
191838
  }
191535
191839
 
191536
191840
  // finally write the updated header
@@ -191539,7 +191843,7 @@ void CheckpointManager::CreateCheckpoint() {
191539
191843
  block_manager.WriteHeader(header);
191540
191844
 
191541
191845
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_TRUNCATE) {
191542
- throw IOException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
191846
+ throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
191543
191847
  }
191544
191848
 
191545
191849
  // truncate the WAL
@@ -197090,7 +197394,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
197090
197394
  }
197091
197395
 
197092
197396
  // Alter column to add new constraint
197093
- DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<Constraint> constraint)
197397
+ DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
197094
197398
  : info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
197095
197399
  is_root(true) {
197096
197400
 
@@ -197265,7 +197569,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
197265
197569
 
197266
197570
  bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
197267
197571
  const vector<column_t> &column_ids) {
197268
- while (state.current_row_group) {
197572
+ while (state.current_row_group && state.current_row_group->count > 0) {
197269
197573
  idx_t vector_index;
197270
197574
  idx_t max_row;
197271
197575
  if (ClientConfig::GetConfig(context).verify_parallelism) {
@@ -197279,13 +197583,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
197279
197583
  max_row = state.current_row_group->start + state.current_row_group->count;
197280
197584
  }
197281
197585
  max_row = MinValue<idx_t>(max_row, state.max_row);
197282
- bool need_to_scan;
197283
- if (state.current_row_group->count == 0) {
197284
- need_to_scan = false;
197285
- } else {
197286
- need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197287
- state.current_row_group, vector_index, max_row);
197288
- }
197586
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
197587
+ state.current_row_group, vector_index, max_row);
197289
197588
  if (ClientConfig::GetConfig(context).verify_parallelism) {
197290
197589
  state.vector_index++;
197291
197590
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -197544,14 +197843,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
197544
197843
  VerifyForeignKeyConstraint(bfk, context, chunk, false);
197545
197844
  }
197546
197845
 
197547
- void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const Constraint *constraint) {
197846
+ void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
197548
197847
  if (constraint->type != ConstraintType::NOT_NULL) {
197549
197848
  throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
197550
197849
  }
197551
197850
  // scan the original table, check if there's any null value
197552
- auto &not_null_constraint = (NotNullConstraint &)*constraint;
197851
+ auto &not_null_constraint = (BoundNotNullConstraint &)*constraint;
197553
197852
  auto &transaction = Transaction::GetTransaction(context);
197554
197853
  vector<LogicalType> scan_types;
197854
+ D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
197555
197855
  scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
197556
197856
  DataChunk scan_chunk;
197557
197857
  auto &allocator = Allocator::Get(context);
@@ -198308,6 +198608,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
198308
198608
  return nullptr;
198309
198609
  }
198310
198610
  lock_guard<mutex> stats_guard(stats_lock);
198611
+ if (column_id >= column_stats.size()) {
198612
+ throw InternalException("Call to GetStatistics is out of range");
198613
+ }
198311
198614
  return column_stats[column_id]->stats->Copy();
198312
198615
  }
198313
198616
 
@@ -199596,7 +199899,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
199596
199899
 
199597
199900
  auto &config = DBConfig::GetConfig(db);
199598
199901
  if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_AFTER_FREE_LIST_WRITE) {
199599
- throw IOException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
199902
+ throw FatalException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
199600
199903
  }
199601
199904
 
199602
199905
  if (!use_direct_io) {
@@ -201122,6 +201425,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
201122
201425
  }
201123
201426
  // after verifying that there are no conflicts we mark the tuple as deleted
201124
201427
  deleted[rows[i]] = transaction.transaction_id;
201428
+ rows[deleted_tuples] = rows[i];
201125
201429
  deleted_tuples++;
201126
201430
  }
201127
201431
  return deleted_tuples;
@@ -201449,6 +201753,8 @@ public:
201449
201753
  idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override;
201450
201754
  idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
201451
201755
 
201756
+ void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override;
201757
+
201452
201758
  void InitializeAppend(ColumnAppendState &state) override;
201453
201759
  void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override;
201454
201760
  void RevertAppend(row_t start_row) override;
@@ -204028,9 +204334,15 @@ void VersionDeleteState::Flush() {
204028
204334
  return;
204029
204335
  }
204030
204336
  // delete in the current info
204031
- delete_count += current_info->Delete(transaction, rows, count);
204032
- // now push the delete into the undo buffer
204033
- transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
204337
+ // it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
204338
+ // in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
204339
+ // this is returned in the actual_delete_count
204340
+ auto actual_delete_count = current_info->Delete(transaction, rows, count);
204341
+ delete_count += actual_delete_count;
204342
+ if (actual_delete_count > 0) {
204343
+ // now push the delete into the undo buffer, but only if any deletes were actually performed
204344
+ transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
204345
+ }
204034
204346
  count = 0;
204035
204347
  }
204036
204348
 
@@ -204407,6 +204719,15 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t
204407
204719
  return scan_count;
204408
204720
  }
204409
204721
 
204722
+ void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
204723
+ validity.Skip(state.child_states[0], count);
204724
+
204725
+ // skip inside the sub-columns
204726
+ for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
204727
+ sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count);
204728
+ }
204729
+ }
204730
+
204410
204731
  void StructColumnData::InitializeAppend(ColumnAppendState &state) {
204411
204732
  ColumnAppendState validity_append;
204412
204733
  validity.InitializeAppend(validity_append);
@@ -206866,6 +207187,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
206866
207187
 
206867
207188
  void CleanupState::CleanupDelete(DeleteInfo *info) {
206868
207189
  auto version_table = info->table;
207190
+ D_ASSERT(version_table->info->cardinality >= info->count);
206869
207191
  version_table->info->cardinality -= info->count;
206870
207192
  if (version_table->info->indexes.Empty()) {
206871
207193
  // this table has no indexes: no cleanup to be done
@@ -260291,49 +260613,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
260291
260613
  }
260292
260614
  }
260293
260615
 
260294
- UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260295
- UnicodeType type = UnicodeType::ASCII;
260296
- char c;
260297
- for (size_t i = 0; i < len; i++) {
260298
- c = s[i];
260299
- if (c == '\0') {
260300
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260301
- return UnicodeType::INVALID;
260302
- }
260303
- // 1 Byte / ASCII
260304
- if ((c & 0x80) == 0) {
260305
- continue;
260306
- }
260307
- type = UnicodeType::UNICODE;
260308
- if ((s[++i] & 0xC0) != 0x80) {
260309
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260310
- return UnicodeType::INVALID;
260311
- }
260312
- if ((c & 0xE0) == 0xC0) {
260313
- continue;
260314
- }
260315
- if ((s[++i] & 0xC0) != 0x80) {
260316
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260317
- return UnicodeType::INVALID;
260318
- }
260319
- if ((c & 0xF0) == 0xE0) {
260320
- continue;
260321
- }
260322
- if ((s[++i] & 0xC0) != 0x80) {
260616
+ template <const int nextra_bytes, const int mask>
260617
+ static inline UnicodeType
260618
+ UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
260619
+ const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260620
+ if ((len - i) < (nextra_bytes + 1)) {
260621
+ /* incomplete byte sequence */
260622
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
260623
+ return UnicodeType::INVALID;
260624
+ }
260625
+ for (size_t j = 0 ; j < nextra_bytes; j++) {
260626
+ int c = (int) s[++i];
260627
+ /* now validate the extra bytes */
260628
+ if ((c & 0xC0) != 0x80) {
260629
+ /* extra byte is not in the format 10xxxxxx */
260323
260630
  AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260324
260631
  return UnicodeType::INVALID;
260325
260632
  }
260326
- if ((c & 0xF8) == 0xF0) {
260327
- continue;
260328
- }
260329
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260633
+ utf8char = (utf8char << 6) | (c & 0x3F);
260634
+ }
260635
+ if ((utf8char & mask) == 0) {
260636
+ /* invalid UTF-8 codepoint, not shortest possible */
260637
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260638
+ return UnicodeType::INVALID;
260639
+ }
260640
+ if (utf8char > 0x10FFFF) {
260641
+ /* value not representable by Unicode */
260642
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260330
260643
  return UnicodeType::INVALID;
260331
260644
  }
260645
+ if ((utf8char & 0x1FFF800) == 0xD800) {
260646
+ /* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
260647
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260648
+ return UnicodeType::INVALID;
260649
+ }
260650
+ return UnicodeType::UNICODE;
260651
+ }
260332
260652
 
260653
+ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260654
+ UnicodeType type = UnicodeType::ASCII;
260655
+
260656
+ for (size_t i = 0; i < len; i++) {
260657
+ int c = (int) s[i];
260658
+
260659
+ if ((c & 0x80) == 0) {
260660
+ /* 1 byte sequence */
260661
+ if (c == '\0') {
260662
+ /* NULL byte not allowed */
260663
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260664
+ return UnicodeType::INVALID;
260665
+ }
260666
+ } else {
260667
+ int first_pos_seq = i;
260668
+
260669
+ if ((c & 0xE0) == 0xC0) {
260670
+ /* 2 byte sequence */
260671
+ int utf8char = c & 0x1F;
260672
+ type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260673
+ } else if ((c & 0xF0) == 0xE0) {
260674
+ /* 3 byte sequence */
260675
+ int utf8char = c & 0x0F;
260676
+ type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260677
+ } else if ((c & 0xF8) == 0xF0) {
260678
+ /* 4 byte sequence */
260679
+ int utf8char = c & 0x07;
260680
+ type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260681
+ } else {
260682
+ /* invalid UTF-8 start byte */
260683
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260684
+ return UnicodeType::INVALID;
260685
+ }
260686
+ if (type == UnicodeType::INVALID) {
260687
+ return type;
260688
+ }
260689
+ }
260690
+ }
260333
260691
  return type;
260334
260692
  }
260335
260693
 
260336
-
260337
260694
  char* Utf8Proc::Normalize(const char *s, size_t len) {
260338
260695
  assert(s);
260339
260696
  assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);